diff options
Diffstat (limited to 'tools/testing')
708 files changed, 51311 insertions, 12137 deletions
diff --git a/tools/testing/cxl/Kbuild b/tools/testing/cxl/Kbuild index 500be85729cc..0805f08af8b3 100644 --- a/tools/testing/cxl/Kbuild +++ b/tools/testing/cxl/Kbuild @@ -10,6 +10,7 @@ ldflags-y += --wrap=devm_cxl_add_passthrough_decoder ldflags-y += --wrap=devm_cxl_enumerate_decoders ldflags-y += --wrap=cxl_await_media_ready ldflags-y += --wrap=cxl_hdm_decode_init +ldflags-y += --wrap=cxl_rcrb_to_component DRIVERS := ../../../drivers CXL_SRC := $(DRIVERS)/cxl @@ -26,6 +27,7 @@ cxl_acpi-y += config_check.o obj-m += cxl_pmem.o cxl_pmem-y := $(CXL_SRC)/pmem.o +cxl_pmem-y += $(CXL_SRC)/security.o cxl_pmem-y += config_check.o obj-m += cxl_port.o diff --git a/tools/testing/cxl/config_check.c b/tools/testing/cxl/config_check.c index de5e5b3652fd..c4c457e59841 100644 --- a/tools/testing/cxl/config_check.c +++ b/tools/testing/cxl/config_check.c @@ -10,4 +10,6 @@ void check(void) BUILD_BUG_ON(!IS_MODULE(CONFIG_CXL_BUS)); BUILD_BUG_ON(!IS_MODULE(CONFIG_CXL_ACPI)); BUILD_BUG_ON(!IS_MODULE(CONFIG_CXL_PMEM)); + BUILD_BUG_ON(!IS_ENABLED(CONFIG_CXL_REGION_INVALIDATION_TEST)); + BUILD_BUG_ON(!IS_ENABLED(CONFIG_NVDIMM_SECURITY_TEST)); } diff --git a/tools/testing/cxl/test/cxl.c b/tools/testing/cxl/test/cxl.c index 7edce12fd2ce..30ee680d38ff 100644 --- a/tools/testing/cxl/test/cxl.c +++ b/tools/testing/cxl/test/cxl.c @@ -11,11 +11,15 @@ #include <cxlmem.h> #include "mock.h" +static int interleave_arithmetic; + #define NR_CXL_HOST_BRIDGES 2 #define NR_CXL_SINGLE_HOST 1 +#define NR_CXL_RCH 1 #define NR_CXL_ROOT_PORTS 2 #define NR_CXL_SWITCH_PORTS 2 #define NR_CXL_PORT_DECODERS 8 +#define NR_BRIDGES (NR_CXL_HOST_BRIDGES + NR_CXL_SINGLE_HOST + NR_CXL_RCH) static struct platform_device *cxl_acpi; static struct platform_device *cxl_host_bridge[NR_CXL_HOST_BRIDGES]; @@ -35,6 +39,8 @@ static struct platform_device *cxl_swd_single[NR_MEM_SINGLE]; struct platform_device *cxl_mem[NR_MEM_MULTI]; struct platform_device *cxl_mem_single[NR_MEM_SINGLE]; +static struct platform_device *cxl_rch[NR_CXL_RCH]; +static struct platform_device *cxl_rcd[NR_CXL_RCH]; static inline bool is_multi_bridge(struct device *dev) { @@ -57,7 +63,7 @@ static inline bool is_single_bridge(struct device *dev) } static struct acpi_device acpi0017_mock; -static struct acpi_device host_bridge[NR_CXL_HOST_BRIDGES + NR_CXL_SINGLE_HOST] = { +static struct acpi_device host_bridge[NR_BRIDGES] = { [0] = { .handle = &host_bridge[0], }, @@ -67,7 +73,9 @@ static struct acpi_device host_bridge[NR_CXL_HOST_BRIDGES + NR_CXL_SINGLE_HOST] [2] = { .handle = &host_bridge[2], }, - + [3] = { + .handle = &host_bridge[3], + }, }; static bool is_mock_dev(struct device *dev) @@ -80,6 +88,9 @@ static bool is_mock_dev(struct device *dev) for (i = 0; i < ARRAY_SIZE(cxl_mem_single); i++) if (dev == &cxl_mem_single[i]->dev) return true; + for (i = 0; i < ARRAY_SIZE(cxl_rcd); i++) + if (dev == &cxl_rcd[i]->dev) + return true; if (dev == &cxl_acpi->dev) return true; return false; @@ -101,7 +112,7 @@ static bool is_mock_adev(struct acpi_device *adev) static struct { struct acpi_table_cedt cedt; - struct acpi_cedt_chbs chbs[NR_CXL_HOST_BRIDGES + NR_CXL_SINGLE_HOST]; + struct acpi_cedt_chbs chbs[NR_BRIDGES]; struct { struct acpi_cedt_cfmws cfmws; u32 target[1]; @@ -122,6 +133,26 @@ static struct { struct acpi_cedt_cfmws cfmws; u32 target[1]; } cfmws4; + struct { + struct acpi_cedt_cfmws cfmws; + u32 target[1]; + } cfmws5; + struct { + struct acpi_cedt_cfmws cfmws; + u32 target[1]; + } cfmws6; + struct { + struct acpi_cedt_cfmws cfmws; + u32 target[2]; + } cfmws7; + struct { + struct acpi_cedt_cfmws cfmws; + u32 target[4]; + } cfmws8; + struct { + struct acpi_cedt_cxims cxims; + u64 xormap_list[2]; + } cxims0; } __packed mock_cedt = { .cedt = { .header = { @@ -154,6 +185,14 @@ static struct { .uid = 2, .cxl_version = ACPI_CEDT_CHBS_VERSION_CXL20, }, + .chbs[3] = { + .header = { + .type = ACPI_CEDT_TYPE_CHBS, + .length = sizeof(mock_cedt.chbs[0]), + }, + .uid = 3, + .cxl_version = ACPI_CEDT_CHBS_VERSION_CXL11, + }, .cfmws0 = { .cfmws = { .header = { @@ -229,6 +268,81 @@ static struct { }, .target = { 2 }, }, + .cfmws5 = { + .cfmws = { + .header = { + .type = ACPI_CEDT_TYPE_CFMWS, + .length = sizeof(mock_cedt.cfmws5), + }, + .interleave_ways = 0, + .granularity = 4, + .restrictions = ACPI_CEDT_CFMWS_RESTRICT_TYPE3 | + ACPI_CEDT_CFMWS_RESTRICT_VOLATILE, + .qtg_id = 5, + .window_size = SZ_256M, + }, + .target = { 3 }, + }, + /* .cfmws6,7,8 use ACPI_CEDT_CFMWS_ARITHMETIC_XOR */ + .cfmws6 = { + .cfmws = { + .header = { + .type = ACPI_CEDT_TYPE_CFMWS, + .length = sizeof(mock_cedt.cfmws6), + }, + .interleave_arithmetic = ACPI_CEDT_CFMWS_ARITHMETIC_XOR, + .interleave_ways = 0, + .granularity = 4, + .restrictions = ACPI_CEDT_CFMWS_RESTRICT_TYPE3 | + ACPI_CEDT_CFMWS_RESTRICT_PMEM, + .qtg_id = 0, + .window_size = SZ_256M * 8UL, + }, + .target = { 0, }, + }, + .cfmws7 = { + .cfmws = { + .header = { + .type = ACPI_CEDT_TYPE_CFMWS, + .length = sizeof(mock_cedt.cfmws7), + }, + .interleave_arithmetic = ACPI_CEDT_CFMWS_ARITHMETIC_XOR, + .interleave_ways = 1, + .granularity = 0, + .restrictions = ACPI_CEDT_CFMWS_RESTRICT_TYPE3 | + ACPI_CEDT_CFMWS_RESTRICT_PMEM, + .qtg_id = 1, + .window_size = SZ_256M * 8UL, + }, + .target = { 0, 1, }, + }, + .cfmws8 = { + .cfmws = { + .header = { + .type = ACPI_CEDT_TYPE_CFMWS, + .length = sizeof(mock_cedt.cfmws8), + }, + .interleave_arithmetic = ACPI_CEDT_CFMWS_ARITHMETIC_XOR, + .interleave_ways = 2, + .granularity = 0, + .restrictions = ACPI_CEDT_CFMWS_RESTRICT_TYPE3 | + ACPI_CEDT_CFMWS_RESTRICT_PMEM, + .qtg_id = 0, + .window_size = SZ_256M * 16UL, + }, + .target = { 0, 1, 0, 1, }, + }, + .cxims0 = { + .cxims = { + .header = { + .type = ACPI_CEDT_TYPE_CXIMS, + .length = sizeof(mock_cedt.cxims0), + }, + .hbig = 0, + .nr_xormaps = 2, + }, + .xormap_list = { 0x404100, 0x808200, }, + }, }; struct acpi_cedt_cfmws *mock_cfmws[] = { @@ -237,6 +351,22 @@ struct acpi_cedt_cfmws *mock_cfmws[] = { [2] = &mock_cedt.cfmws2.cfmws, [3] = &mock_cedt.cfmws3.cfmws, [4] = &mock_cedt.cfmws4.cfmws, + [5] = &mock_cedt.cfmws5.cfmws, + /* Modulo Math above, XOR Math below */ + [6] = &mock_cedt.cfmws6.cfmws, + [7] = &mock_cedt.cfmws7.cfmws, + [8] = &mock_cedt.cfmws8.cfmws, +}; + +static int cfmws_start; +static int cfmws_end; +#define CFMWS_MOD_ARRAY_START 0 +#define CFMWS_MOD_ARRAY_END 5 +#define CFMWS_XOR_ARRAY_START 6 +#define CFMWS_XOR_ARRAY_END 8 + +struct acpi_cedt_cxims *mock_cxims[1] = { + [0] = &mock_cedt.cxims0.cxims, }; struct cxl_mock_res { @@ -262,11 +392,11 @@ static void depopulate_all_mock_resources(void) mutex_unlock(&mock_res_lock); } -static struct cxl_mock_res *alloc_mock_res(resource_size_t size) +static struct cxl_mock_res *alloc_mock_res(resource_size_t size, int align) { struct cxl_mock_res *res = kzalloc(sizeof(*res), GFP_KERNEL); struct genpool_data_align data = { - .align = SZ_256M, + .align = align, }; unsigned long phys; @@ -301,17 +431,17 @@ static int populate_cedt(void) else size = ACPI_CEDT_CHBS_LENGTH_CXL11; - res = alloc_mock_res(size); + res = alloc_mock_res(size, size); if (!res) return -ENOMEM; chbs->base = res->range.start; chbs->length = size; } - for (i = 0; i < ARRAY_SIZE(mock_cfmws); i++) { + for (i = cfmws_start; i <= cfmws_end; i++) { struct acpi_cedt_cfmws *window = mock_cfmws[i]; - res = alloc_mock_res(window->window_size); + res = alloc_mock_res(window->window_size, SZ_256M); if (!res) return -ENOMEM; window->base_hpa = res->range.start; @@ -320,10 +450,12 @@ static int populate_cedt(void) return 0; } +static bool is_mock_port(struct device *dev); + /* - * WARNING, this hack assumes the format of 'struct - * cxl_cfmws_context' and 'struct cxl_chbs_context' share the property that - * the first struct member is the device being probed by the cxl_acpi + * WARNING, this hack assumes the format of 'struct cxl_cfmws_context' + * and 'struct cxl_chbs_context' share the property that the first + * struct member is a cxl_test device being probed by the cxl_acpi * driver. */ struct cxl_cedt_context { @@ -340,7 +472,7 @@ static int mock_acpi_table_parse_cedt(enum acpi_cedt_type id, unsigned long end; int i; - if (dev != &cxl_acpi->dev) + if (!is_mock_port(dev) && !is_mock_dev(dev)) return acpi_table_parse_cedt(id, handler_arg, arg); if (id == ACPI_CEDT_TYPE_CHBS) @@ -351,12 +483,19 @@ static int mock_acpi_table_parse_cedt(enum acpi_cedt_type id, } if (id == ACPI_CEDT_TYPE_CFMWS) - for (i = 0; i < ARRAY_SIZE(mock_cfmws); i++) { + for (i = cfmws_start; i <= cfmws_end; i++) { h = (union acpi_subtable_headers *) mock_cfmws[i]; end = (unsigned long) h + mock_cfmws[i]->header.length; handler_arg(h, arg, end); } + if (id == ACPI_CEDT_TYPE_CXIMS) + for (i = 0; i < ARRAY_SIZE(mock_cxims); i++) { + h = (union acpi_subtable_headers *)mock_cxims[i]; + end = (unsigned long)h + mock_cxims[i]->header.length; + handler_arg(h, arg, end); + } + return 0; } @@ -370,6 +509,10 @@ static bool is_mock_bridge(struct device *dev) for (i = 0; i < ARRAY_SIZE(cxl_hb_single); i++) if (dev == &cxl_hb_single[i]->dev) return true; + for (i = 0; i < ARRAY_SIZE(cxl_rch); i++) + if (dev == &cxl_rch[i]->dev) + return true; + return false; } @@ -439,14 +582,21 @@ mock_acpi_evaluate_integer(acpi_handle handle, acpi_string pathname, return AE_OK; } -static struct pci_bus mock_pci_bus[NR_CXL_HOST_BRIDGES]; -static struct acpi_pci_root mock_pci_root[NR_CXL_HOST_BRIDGES] = { +static struct pci_bus mock_pci_bus[NR_BRIDGES]; +static struct acpi_pci_root mock_pci_root[ARRAY_SIZE(mock_pci_bus)] = { [0] = { .bus = &mock_pci_bus[0], }, [1] = { .bus = &mock_pci_bus[1], }, + [2] = { + .bus = &mock_pci_bus[2], + }, + [3] = { + .bus = &mock_pci_bus[3], + }, + }; static bool is_mock_bus(struct pci_bus *bus) @@ -634,7 +784,6 @@ static int mock_cxl_enumerate_decoders(struct cxl_hdm *cxlhdm) static int mock_cxl_port_enumerate_dports(struct cxl_port *port) { - struct device *dev = &port->dev; struct platform_device **array; int i, array_size; @@ -684,19 +833,22 @@ static int mock_cxl_port_enumerate_dports(struct cxl_port *port) dport = devm_cxl_add_dport(port, &pdev->dev, pdev->id, CXL_RESOURCE_NONE); - if (IS_ERR(dport)) { - dev_err(dev, "failed to add dport: %s (%ld)\n", - dev_name(&pdev->dev), PTR_ERR(dport)); + if (IS_ERR(dport)) return PTR_ERR(dport); - } - - dev_dbg(dev, "add dport%d: %s\n", pdev->id, - dev_name(&pdev->dev)); } return 0; } +resource_size_t mock_cxl_rcrb_to_component(struct device *dev, + resource_size_t rcrb, + enum cxl_rcrb which) +{ + dev_dbg(dev, "rcrb: %pa which: %d\n", &rcrb, which); + + return (resource_size_t) which + 1; +} + static struct cxl_mock_ops cxl_mock_ops = { .is_mock_adev = is_mock_adev, .is_mock_bridge = is_mock_bridge, @@ -705,6 +857,7 @@ static struct cxl_mock_ops cxl_mock_ops = { .is_mock_dev = is_mock_dev, .acpi_table_parse_cedt = mock_acpi_table_parse_cedt, .acpi_evaluate_integer = mock_acpi_evaluate_integer, + .cxl_rcrb_to_component = mock_cxl_rcrb_to_component, .acpi_pci_find_root = mock_acpi_pci_find_root, .devm_cxl_port_enumerate_dports = mock_cxl_port_enumerate_dports, .devm_cxl_setup_hdm = mock_cxl_setup_hdm, @@ -729,6 +882,87 @@ static void mock_companion(struct acpi_device *adev, struct device *dev) #define SZ_512G (SZ_64G * 8) #endif +static __init int cxl_rch_init(void) +{ + int rc, i; + + for (i = 0; i < ARRAY_SIZE(cxl_rch); i++) { + int idx = NR_CXL_HOST_BRIDGES + NR_CXL_SINGLE_HOST + i; + struct acpi_device *adev = &host_bridge[idx]; + struct platform_device *pdev; + + pdev = platform_device_alloc("cxl_host_bridge", idx); + if (!pdev) + goto err_bridge; + + mock_companion(adev, &pdev->dev); + rc = platform_device_add(pdev); + if (rc) { + platform_device_put(pdev); + goto err_bridge; + } + + cxl_rch[i] = pdev; + mock_pci_bus[idx].bridge = &pdev->dev; + rc = sysfs_create_link(&pdev->dev.kobj, &pdev->dev.kobj, + "firmware_node"); + if (rc) + goto err_bridge; + } + + for (i = 0; i < ARRAY_SIZE(cxl_rcd); i++) { + int idx = NR_MEM_MULTI + NR_MEM_SINGLE + i; + struct platform_device *rch = cxl_rch[i]; + struct platform_device *pdev; + + pdev = platform_device_alloc("cxl_rcd", idx); + if (!pdev) + goto err_mem; + pdev->dev.parent = &rch->dev; + set_dev_node(&pdev->dev, i % 2); + + rc = platform_device_add(pdev); + if (rc) { + platform_device_put(pdev); + goto err_mem; + } + cxl_rcd[i] = pdev; + } + + return 0; + +err_mem: + for (i = ARRAY_SIZE(cxl_rcd) - 1; i >= 0; i--) + platform_device_unregister(cxl_rcd[i]); +err_bridge: + for (i = ARRAY_SIZE(cxl_rch) - 1; i >= 0; i--) { + struct platform_device *pdev = cxl_rch[i]; + + if (!pdev) + continue; + sysfs_remove_link(&pdev->dev.kobj, "firmware_node"); + platform_device_unregister(cxl_rch[i]); + } + + return rc; +} + +static void cxl_rch_exit(void) +{ + int i; + + for (i = ARRAY_SIZE(cxl_rcd) - 1; i >= 0; i--) + platform_device_unregister(cxl_rcd[i]); + for (i = ARRAY_SIZE(cxl_rch) - 1; i >= 0; i--) { + struct platform_device *pdev = cxl_rch[i]; + + if (!pdev) + continue; + sysfs_remove_link(&pdev->dev.kobj, "firmware_node"); + platform_device_unregister(cxl_rch[i]); + } +} + static __init int cxl_single_init(void) { int i, rc; @@ -751,6 +985,7 @@ static __init int cxl_single_init(void) } cxl_hb_single[i] = pdev; + mock_pci_bus[i + NR_CXL_HOST_BRIDGES].bridge = &pdev->dev; rc = sysfs_create_link(&pdev->dev.kobj, &pdev->dev.kobj, "physical_node"); if (rc) @@ -897,6 +1132,16 @@ static __init int cxl_test_init(void) if (rc) goto err_gen_pool_add; + if (interleave_arithmetic == 1) { + cfmws_start = CFMWS_XOR_ARRAY_START; + cfmws_end = CFMWS_XOR_ARRAY_END; + dev_dbg(NULL, "cxl_test loading xor math option\n"); + } else { + cfmws_start = CFMWS_MOD_ARRAY_START; + cfmws_end = CFMWS_MOD_ARRAY_END; + dev_dbg(NULL, "cxl_test loading modulo math option\n"); + } + rc = populate_cedt(); if (rc) goto err_populate; @@ -917,6 +1162,7 @@ static __init int cxl_test_init(void) } cxl_host_bridge[i] = pdev; + mock_pci_bus[i].bridge = &pdev->dev; rc = sysfs_create_link(&pdev->dev.kobj, &pdev->dev.kobj, "physical_node"); if (rc) @@ -999,9 +1245,13 @@ static __init int cxl_test_init(void) if (rc) goto err_mem; + rc = cxl_rch_init(); + if (rc) + goto err_single; + cxl_acpi = platform_device_alloc("cxl_acpi", 0); if (!cxl_acpi) - goto err_single; + goto err_rch; mock_companion(&acpi0017_mock, &cxl_acpi->dev); acpi0017_mock.dev.bus = &platform_bus_type; @@ -1014,6 +1264,8 @@ static __init int cxl_test_init(void) err_add: platform_device_put(cxl_acpi); +err_rch: + cxl_rch_exit(); err_single: cxl_single_exit(); err_mem: @@ -1051,6 +1303,7 @@ static __exit void cxl_test_exit(void) int i; platform_device_unregister(cxl_acpi); + cxl_rch_exit(); cxl_single_exit(); for (i = ARRAY_SIZE(cxl_mem) - 1; i >= 0; i--) platform_device_unregister(cxl_mem[i]); @@ -1073,6 +1326,8 @@ static __exit void cxl_test_exit(void) unregister_cxl_mock_ops(&cxl_mock_ops); } +module_param(interleave_arithmetic, int, 0000); +MODULE_PARM_DESC(interleave_arithmetic, "Modulo:0, XOR:1"); module_init(cxl_test_init); module_exit(cxl_test_exit); MODULE_LICENSE("GPL v2"); diff --git a/tools/testing/cxl/test/mem.c b/tools/testing/cxl/test/mem.c index aa2df3a15051..5e4ecd93f1d2 100644 --- a/tools/testing/cxl/test/mem.c +++ b/tools/testing/cxl/test/mem.c @@ -65,6 +65,18 @@ static struct { }, }; +#define PASS_TRY_LIMIT 3 + +struct cxl_mockmem_data { + void *lsa; + u32 security_state; + u8 user_pass[NVDIMM_PASSPHRASE_LEN]; + u8 master_pass[NVDIMM_PASSPHRASE_LEN]; + int user_limit; + int master_limit; + +}; + static int mock_gsl(struct cxl_mbox_cmd *cmd) { if (cmd->size_out < sizeof(mock_gsl_payload)) @@ -100,6 +112,24 @@ static int mock_get_log(struct cxl_dev_state *cxlds, struct cxl_mbox_cmd *cmd) return 0; } +static int mock_rcd_id(struct cxl_dev_state *cxlds, struct cxl_mbox_cmd *cmd) +{ + struct cxl_mbox_identify id = { + .fw_revision = { "mock fw v1 " }, + .total_capacity = + cpu_to_le64(DEV_SIZE / CXL_CAPACITY_MULTIPLIER), + .volatile_capacity = + cpu_to_le64(DEV_SIZE / CXL_CAPACITY_MULTIPLIER), + }; + + if (cmd->size_out < sizeof(id)) + return -EINVAL; + + memcpy(cmd->payload_out, &id, sizeof(id)); + + return 0; +} + static int mock_id(struct cxl_dev_state *cxlds, struct cxl_mbox_cmd *cmd) { struct cxl_mbox_identify id = { @@ -137,10 +167,334 @@ static int mock_partition_info(struct cxl_dev_state *cxlds, return 0; } +static int mock_get_security_state(struct cxl_dev_state *cxlds, + struct cxl_mbox_cmd *cmd) +{ + struct cxl_mockmem_data *mdata = dev_get_drvdata(cxlds->dev); + + if (cmd->size_in) + return -EINVAL; + + if (cmd->size_out != sizeof(u32)) + return -EINVAL; + + memcpy(cmd->payload_out, &mdata->security_state, sizeof(u32)); + + return 0; +} + +static void master_plimit_check(struct cxl_mockmem_data *mdata) +{ + if (mdata->master_limit == PASS_TRY_LIMIT) + return; + mdata->master_limit++; + if (mdata->master_limit == PASS_TRY_LIMIT) + mdata->security_state |= CXL_PMEM_SEC_STATE_MASTER_PLIMIT; +} + +static void user_plimit_check(struct cxl_mockmem_data *mdata) +{ + if (mdata->user_limit == PASS_TRY_LIMIT) + return; + mdata->user_limit++; + if (mdata->user_limit == PASS_TRY_LIMIT) + mdata->security_state |= CXL_PMEM_SEC_STATE_USER_PLIMIT; +} + +static int mock_set_passphrase(struct cxl_dev_state *cxlds, struct cxl_mbox_cmd *cmd) +{ + struct cxl_mockmem_data *mdata = dev_get_drvdata(cxlds->dev); + struct cxl_set_pass *set_pass; + + if (cmd->size_in != sizeof(*set_pass)) + return -EINVAL; + + if (cmd->size_out != 0) + return -EINVAL; + + if (mdata->security_state & CXL_PMEM_SEC_STATE_FROZEN) { + cmd->return_code = CXL_MBOX_CMD_RC_SECURITY; + return -ENXIO; + } + + set_pass = cmd->payload_in; + switch (set_pass->type) { + case CXL_PMEM_SEC_PASS_MASTER: + if (mdata->security_state & CXL_PMEM_SEC_STATE_MASTER_PLIMIT) { + cmd->return_code = CXL_MBOX_CMD_RC_SECURITY; + return -ENXIO; + } + /* + * CXL spec rev3.0 8.2.9.8.6.2, The master pasphrase shall only be set in + * the security disabled state when the user passphrase is not set. + */ + if (mdata->security_state & CXL_PMEM_SEC_STATE_USER_PASS_SET) { + cmd->return_code = CXL_MBOX_CMD_RC_SECURITY; + return -ENXIO; + } + if (memcmp(mdata->master_pass, set_pass->old_pass, NVDIMM_PASSPHRASE_LEN)) { + master_plimit_check(mdata); + cmd->return_code = CXL_MBOX_CMD_RC_PASSPHRASE; + return -ENXIO; + } + memcpy(mdata->master_pass, set_pass->new_pass, NVDIMM_PASSPHRASE_LEN); + mdata->security_state |= CXL_PMEM_SEC_STATE_MASTER_PASS_SET; + return 0; + + case CXL_PMEM_SEC_PASS_USER: + if (mdata->security_state & CXL_PMEM_SEC_STATE_USER_PLIMIT) { + cmd->return_code = CXL_MBOX_CMD_RC_SECURITY; + return -ENXIO; + } + if (memcmp(mdata->user_pass, set_pass->old_pass, NVDIMM_PASSPHRASE_LEN)) { + user_plimit_check(mdata); + cmd->return_code = CXL_MBOX_CMD_RC_PASSPHRASE; + return -ENXIO; + } + memcpy(mdata->user_pass, set_pass->new_pass, NVDIMM_PASSPHRASE_LEN); + mdata->security_state |= CXL_PMEM_SEC_STATE_USER_PASS_SET; + return 0; + + default: + cmd->return_code = CXL_MBOX_CMD_RC_INPUT; + } + return -EINVAL; +} + +static int mock_disable_passphrase(struct cxl_dev_state *cxlds, struct cxl_mbox_cmd *cmd) +{ + struct cxl_mockmem_data *mdata = dev_get_drvdata(cxlds->dev); + struct cxl_disable_pass *dis_pass; + + if (cmd->size_in != sizeof(*dis_pass)) + return -EINVAL; + + if (cmd->size_out != 0) + return -EINVAL; + + if (mdata->security_state & CXL_PMEM_SEC_STATE_FROZEN) { + cmd->return_code = CXL_MBOX_CMD_RC_SECURITY; + return -ENXIO; + } + + dis_pass = cmd->payload_in; + switch (dis_pass->type) { + case CXL_PMEM_SEC_PASS_MASTER: + if (mdata->security_state & CXL_PMEM_SEC_STATE_MASTER_PLIMIT) { + cmd->return_code = CXL_MBOX_CMD_RC_SECURITY; + return -ENXIO; + } + + if (!(mdata->security_state & CXL_PMEM_SEC_STATE_MASTER_PASS_SET)) { + cmd->return_code = CXL_MBOX_CMD_RC_SECURITY; + return -ENXIO; + } + + if (memcmp(dis_pass->pass, mdata->master_pass, NVDIMM_PASSPHRASE_LEN)) { + master_plimit_check(mdata); + cmd->return_code = CXL_MBOX_CMD_RC_PASSPHRASE; + return -ENXIO; + } + + mdata->master_limit = 0; + memset(mdata->master_pass, 0, NVDIMM_PASSPHRASE_LEN); + mdata->security_state &= ~CXL_PMEM_SEC_STATE_MASTER_PASS_SET; + return 0; + + case CXL_PMEM_SEC_PASS_USER: + if (mdata->security_state & CXL_PMEM_SEC_STATE_USER_PLIMIT) { + cmd->return_code = CXL_MBOX_CMD_RC_SECURITY; + return -ENXIO; + } + + if (!(mdata->security_state & CXL_PMEM_SEC_STATE_USER_PASS_SET)) { + cmd->return_code = CXL_MBOX_CMD_RC_SECURITY; + return -ENXIO; + } + + if (memcmp(dis_pass->pass, mdata->user_pass, NVDIMM_PASSPHRASE_LEN)) { + user_plimit_check(mdata); + cmd->return_code = CXL_MBOX_CMD_RC_PASSPHRASE; + return -ENXIO; + } + + mdata->user_limit = 0; + memset(mdata->user_pass, 0, NVDIMM_PASSPHRASE_LEN); + mdata->security_state &= ~(CXL_PMEM_SEC_STATE_USER_PASS_SET | + CXL_PMEM_SEC_STATE_LOCKED); + return 0; + + default: + cmd->return_code = CXL_MBOX_CMD_RC_INPUT; + return -EINVAL; + } + + return 0; +} + +static int mock_freeze_security(struct cxl_dev_state *cxlds, struct cxl_mbox_cmd *cmd) +{ + struct cxl_mockmem_data *mdata = dev_get_drvdata(cxlds->dev); + + if (cmd->size_in != 0) + return -EINVAL; + + if (cmd->size_out != 0) + return -EINVAL; + + if (mdata->security_state & CXL_PMEM_SEC_STATE_FROZEN) + return 0; + + mdata->security_state |= CXL_PMEM_SEC_STATE_FROZEN; + return 0; +} + +static int mock_unlock_security(struct cxl_dev_state *cxlds, struct cxl_mbox_cmd *cmd) +{ + struct cxl_mockmem_data *mdata = dev_get_drvdata(cxlds->dev); + + if (cmd->size_in != NVDIMM_PASSPHRASE_LEN) + return -EINVAL; + + if (cmd->size_out != 0) + return -EINVAL; + + if (mdata->security_state & CXL_PMEM_SEC_STATE_FROZEN) { + cmd->return_code = CXL_MBOX_CMD_RC_SECURITY; + return -ENXIO; + } + + if (!(mdata->security_state & CXL_PMEM_SEC_STATE_USER_PASS_SET)) { + cmd->return_code = CXL_MBOX_CMD_RC_SECURITY; + return -ENXIO; + } + + if (mdata->security_state & CXL_PMEM_SEC_STATE_USER_PLIMIT) { + cmd->return_code = CXL_MBOX_CMD_RC_SECURITY; + return -ENXIO; + } + + if (!(mdata->security_state & CXL_PMEM_SEC_STATE_LOCKED)) { + cmd->return_code = CXL_MBOX_CMD_RC_SECURITY; + return -ENXIO; + } + + if (memcmp(cmd->payload_in, mdata->user_pass, NVDIMM_PASSPHRASE_LEN)) { + if (++mdata->user_limit == PASS_TRY_LIMIT) + mdata->security_state |= CXL_PMEM_SEC_STATE_USER_PLIMIT; + cmd->return_code = CXL_MBOX_CMD_RC_PASSPHRASE; + return -ENXIO; + } + + mdata->user_limit = 0; + mdata->security_state &= ~CXL_PMEM_SEC_STATE_LOCKED; + return 0; +} + +static int mock_passphrase_secure_erase(struct cxl_dev_state *cxlds, + struct cxl_mbox_cmd *cmd) +{ + struct cxl_mockmem_data *mdata = dev_get_drvdata(cxlds->dev); + struct cxl_pass_erase *erase; + + if (cmd->size_in != sizeof(*erase)) + return -EINVAL; + + if (cmd->size_out != 0) + return -EINVAL; + + erase = cmd->payload_in; + if (mdata->security_state & CXL_PMEM_SEC_STATE_FROZEN) { + cmd->return_code = CXL_MBOX_CMD_RC_SECURITY; + return -ENXIO; + } + + if (mdata->security_state & CXL_PMEM_SEC_STATE_USER_PLIMIT && + erase->type == CXL_PMEM_SEC_PASS_USER) { + cmd->return_code = CXL_MBOX_CMD_RC_SECURITY; + return -ENXIO; + } + + if (mdata->security_state & CXL_PMEM_SEC_STATE_MASTER_PLIMIT && + erase->type == CXL_PMEM_SEC_PASS_MASTER) { + cmd->return_code = CXL_MBOX_CMD_RC_SECURITY; + return -ENXIO; + } + + switch (erase->type) { + case CXL_PMEM_SEC_PASS_MASTER: + /* + * The spec does not clearly define the behavior of the scenario + * where a master passphrase is passed in while the master + * passphrase is not set and user passphrase is not set. The + * code will take the assumption that it will behave the same + * as a CXL secure erase command without passphrase (0x4401). + */ + if (mdata->security_state & CXL_PMEM_SEC_STATE_MASTER_PASS_SET) { + if (memcmp(mdata->master_pass, erase->pass, + NVDIMM_PASSPHRASE_LEN)) { + master_plimit_check(mdata); + cmd->return_code = CXL_MBOX_CMD_RC_PASSPHRASE; + return -ENXIO; + } + mdata->master_limit = 0; + mdata->user_limit = 0; + mdata->security_state &= ~CXL_PMEM_SEC_STATE_USER_PASS_SET; + memset(mdata->user_pass, 0, NVDIMM_PASSPHRASE_LEN); + mdata->security_state &= ~CXL_PMEM_SEC_STATE_LOCKED; + } else { + /* + * CXL rev3 8.2.9.8.6.3 Disable Passphrase + * When master passphrase is disabled, the device shall + * return Invalid Input for the Passphrase Secure Erase + * command with master passphrase. + */ + return -EINVAL; + } + /* Scramble encryption keys so that data is effectively erased */ + break; + case CXL_PMEM_SEC_PASS_USER: + /* + * The spec does not clearly define the behavior of the scenario + * where a user passphrase is passed in while the user + * passphrase is not set. The code will take the assumption that + * it will behave the same as a CXL secure erase command without + * passphrase (0x4401). + */ + if (mdata->security_state & CXL_PMEM_SEC_STATE_USER_PASS_SET) { + if (memcmp(mdata->user_pass, erase->pass, + NVDIMM_PASSPHRASE_LEN)) { + user_plimit_check(mdata); + cmd->return_code = CXL_MBOX_CMD_RC_PASSPHRASE; + return -ENXIO; + } + mdata->user_limit = 0; + mdata->security_state &= ~CXL_PMEM_SEC_STATE_USER_PASS_SET; + memset(mdata->user_pass, 0, NVDIMM_PASSPHRASE_LEN); + } + + /* + * CXL rev3 Table 8-118 + * If user passphrase is not set or supported by device, current + * passphrase value is ignored. Will make the assumption that + * the operation will proceed as secure erase w/o passphrase + * since spec is not explicit. + */ + + /* Scramble encryption keys so that data is effectively erased */ + break; + default: + return -EINVAL; + } + + return 0; +} + static int mock_get_lsa(struct cxl_dev_state *cxlds, struct cxl_mbox_cmd *cmd) { struct cxl_mbox_get_lsa *get_lsa = cmd->payload_in; - void *lsa = dev_get_drvdata(cxlds->dev); + struct cxl_mockmem_data *mdata = dev_get_drvdata(cxlds->dev); + void *lsa = mdata->lsa; u32 offset, length; if (sizeof(*get_lsa) > cmd->size_in) @@ -159,7 +513,8 @@ static int mock_get_lsa(struct cxl_dev_state *cxlds, struct cxl_mbox_cmd *cmd) static int mock_set_lsa(struct cxl_dev_state *cxlds, struct cxl_mbox_cmd *cmd) { struct cxl_mbox_set_lsa *set_lsa = cmd->payload_in; - void *lsa = dev_get_drvdata(cxlds->dev); + struct cxl_mockmem_data *mdata = dev_get_drvdata(cxlds->dev); + void *lsa = mdata->lsa; u32 offset, length; if (sizeof(*set_lsa) > cmd->size_in) @@ -216,7 +571,10 @@ static int cxl_mock_mbox_send(struct cxl_dev_state *cxlds, struct cxl_mbox_cmd * rc = mock_get_log(cxlds, cmd); break; case CXL_MBOX_OP_IDENTIFY: - rc = mock_id(cxlds, cmd); + if (cxlds->rcd) + rc = mock_rcd_id(cxlds, cmd); + else + rc = mock_id(cxlds, cmd); break; case CXL_MBOX_OP_GET_LSA: rc = mock_get_lsa(cxlds, cmd); @@ -230,6 +588,24 @@ static int cxl_mock_mbox_send(struct cxl_dev_state *cxlds, struct cxl_mbox_cmd * case CXL_MBOX_OP_GET_HEALTH_INFO: rc = mock_health_info(cxlds, cmd); break; + case CXL_MBOX_OP_GET_SECURITY_STATE: + rc = mock_get_security_state(cxlds, cmd); + break; + case CXL_MBOX_OP_SET_PASSPHRASE: + rc = mock_set_passphrase(cxlds, cmd); + break; + case CXL_MBOX_OP_DISABLE_PASSPHRASE: + rc = mock_disable_passphrase(cxlds, cmd); + break; + case CXL_MBOX_OP_FREEZE_SECURITY: + rc = mock_freeze_security(cxlds, cmd); + break; + case CXL_MBOX_OP_UNLOCK: + rc = mock_unlock_security(cxlds, cmd); + break; + case CXL_MBOX_OP_PASSPHRASE_SECURE_ERASE: + rc = mock_passphrase_secure_erase(cxlds, cmd); + break; default: break; } @@ -245,21 +621,32 @@ static void label_area_release(void *lsa) vfree(lsa); } +static bool is_rcd(struct platform_device *pdev) +{ + const struct platform_device_id *id = platform_get_device_id(pdev); + + return !!id->driver_data; +} + static int cxl_mock_mem_probe(struct platform_device *pdev) { struct device *dev = &pdev->dev; struct cxl_memdev *cxlmd; struct cxl_dev_state *cxlds; - void *lsa; + struct cxl_mockmem_data *mdata; int rc; - lsa = vmalloc(LSA_SIZE); - if (!lsa) + mdata = devm_kzalloc(dev, sizeof(*mdata), GFP_KERNEL); + if (!mdata) return -ENOMEM; - rc = devm_add_action_or_reset(dev, label_area_release, lsa); + dev_set_drvdata(dev, mdata); + + mdata->lsa = vmalloc(LSA_SIZE); + if (!mdata->lsa) + return -ENOMEM; + rc = devm_add_action_or_reset(dev, label_area_release, mdata->lsa); if (rc) return rc; - dev_set_drvdata(dev, lsa); cxlds = cxl_dev_state_create(dev); if (IS_ERR(cxlds)) @@ -268,6 +655,10 @@ static int cxl_mock_mem_probe(struct platform_device *pdev) cxlds->serial = pdev->id; cxlds->mbox_send = cxl_mock_mbox_send; cxlds->payload_size = SZ_4K; + if (is_rcd(pdev)) { + cxlds->rcd = true; + cxlds->component_reg_phys = CXL_RESOURCE_NONE; + } rc = cxl_enumerate_cmds(cxlds); if (rc) @@ -285,14 +676,51 @@ static int cxl_mock_mem_probe(struct platform_device *pdev) if (IS_ERR(cxlmd)) return PTR_ERR(cxlmd); - if (resource_size(&cxlds->pmem_res) && IS_ENABLED(CONFIG_CXL_PMEM)) - rc = devm_cxl_add_nvdimm(dev, cxlmd); - return 0; } +static ssize_t security_lock_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct cxl_mockmem_data *mdata = dev_get_drvdata(dev); + + return sysfs_emit(buf, "%u\n", + !!(mdata->security_state & CXL_PMEM_SEC_STATE_LOCKED)); +} + +static ssize_t security_lock_store(struct device *dev, struct device_attribute *attr, + const char *buf, size_t count) +{ + struct cxl_mockmem_data *mdata = dev_get_drvdata(dev); + u32 mask = CXL_PMEM_SEC_STATE_FROZEN | CXL_PMEM_SEC_STATE_USER_PLIMIT | + CXL_PMEM_SEC_STATE_MASTER_PLIMIT; + int val; + + if (kstrtoint(buf, 0, &val) < 0) + return -EINVAL; + + if (val == 1) { + if (!(mdata->security_state & CXL_PMEM_SEC_STATE_USER_PASS_SET)) + return -ENXIO; + mdata->security_state |= CXL_PMEM_SEC_STATE_LOCKED; + mdata->security_state &= ~mask; + } else { + return -EINVAL; + } + return count; +} + +static DEVICE_ATTR_RW(security_lock); + +static struct attribute *cxl_mock_mem_attrs[] = { + &dev_attr_security_lock.attr, + NULL +}; +ATTRIBUTE_GROUPS(cxl_mock_mem); + static const struct platform_device_id cxl_mock_mem_ids[] = { - { .name = "cxl_mem", }, + { .name = "cxl_mem", 0 }, + { .name = "cxl_rcd", 1 }, { }, }; MODULE_DEVICE_TABLE(platform, cxl_mock_mem_ids); @@ -302,6 +730,7 @@ static struct platform_driver cxl_mock_mem_driver = { .id_table = cxl_mock_mem_ids, .driver = { .name = KBUILD_MODNAME, + .dev_groups = cxl_mock_mem_groups, }, }; diff --git a/tools/testing/cxl/test/mock.c b/tools/testing/cxl/test/mock.c index bce6a21df0d5..5dface08e0de 100644 --- a/tools/testing/cxl/test/mock.c +++ b/tools/testing/cxl/test/mock.c @@ -224,6 +224,25 @@ int __wrap_cxl_hdm_decode_init(struct cxl_dev_state *cxlds, } EXPORT_SYMBOL_NS_GPL(__wrap_cxl_hdm_decode_init, CXL); +resource_size_t __wrap_cxl_rcrb_to_component(struct device *dev, + resource_size_t rcrb, + enum cxl_rcrb which) +{ + int index; + resource_size_t component_reg_phys; + struct cxl_mock_ops *ops = get_cxl_mock_ops(&index); + + if (ops && ops->is_mock_port(dev)) + component_reg_phys = + ops->cxl_rcrb_to_component(dev, rcrb, which); + else + component_reg_phys = cxl_rcrb_to_component(dev, rcrb, which); + put_cxl_mock_ops(index); + + return component_reg_phys; +} +EXPORT_SYMBOL_NS_GPL(__wrap_cxl_rcrb_to_component, CXL); + MODULE_LICENSE("GPL v2"); MODULE_IMPORT_NS(ACPI); MODULE_IMPORT_NS(CXL); diff --git a/tools/testing/cxl/test/mock.h b/tools/testing/cxl/test/mock.h index 738f24e3988a..ef33f159375e 100644 --- a/tools/testing/cxl/test/mock.h +++ b/tools/testing/cxl/test/mock.h @@ -15,6 +15,9 @@ struct cxl_mock_ops { acpi_string pathname, struct acpi_object_list *arguments, unsigned long long *data); + resource_size_t (*cxl_rcrb_to_component)(struct device *dev, + resource_size_t rcrb, + enum cxl_rcrb which); struct acpi_pci_root *(*acpi_pci_find_root)(acpi_handle handle); bool (*is_mock_bus)(struct pci_bus *bus); bool (*is_mock_port)(struct device *dev); diff --git a/tools/testing/kunit/kunit.py b/tools/testing/kunit/kunit.py index 4d4663fb578b..741f15420467 100755 --- a/tools/testing/kunit/kunit.py +++ b/tools/testing/kunit/kunit.py @@ -77,11 +77,8 @@ def config_tests(linux: kunit_kernel.LinuxSourceTree, config_start = time.time() success = linux.build_reconfig(request.build_dir, request.make_options) config_end = time.time() - if not success: - return KunitResult(KunitStatus.CONFIG_FAILURE, - config_end - config_start) - return KunitResult(KunitStatus.SUCCESS, - config_end - config_start) + status = KunitStatus.SUCCESS if success else KunitStatus.CONFIG_FAILURE + return KunitResult(status, config_end - config_start) def build_tests(linux: kunit_kernel.LinuxSourceTree, request: KunitBuildRequest) -> KunitResult: @@ -92,14 +89,8 @@ def build_tests(linux: kunit_kernel.LinuxSourceTree, request.build_dir, request.make_options) build_end = time.time() - if not success: - return KunitResult(KunitStatus.BUILD_FAILURE, - build_end - build_start) - if not success: - return KunitResult(KunitStatus.BUILD_FAILURE, - build_end - build_start) - return KunitResult(KunitStatus.SUCCESS, - build_end - build_start) + status = KunitStatus.SUCCESS if success else KunitStatus.BUILD_FAILURE + return KunitResult(status, build_end - build_start) def config_and_build_tests(linux: kunit_kernel.LinuxSourceTree, request: KunitBuildRequest) -> KunitResult: @@ -145,7 +136,7 @@ def exec_tests(linux: kunit_kernel.LinuxSourceTree, request: KunitExecRequest) - tests = _list_tests(linux, request) if request.run_isolated == 'test': filter_globs = tests - if request.run_isolated == 'suite': + elif request.run_isolated == 'suite': filter_globs = _suites_from_test_list(tests) # Apply the test-part of the user's glob, if present. if '.' in request.filter_glob: @@ -192,28 +183,30 @@ def _map_to_overall_status(test_status: kunit_parser.TestStatus) -> KunitStatus: def parse_tests(request: KunitParseRequest, metadata: kunit_json.Metadata, input_data: Iterable[str]) -> Tuple[KunitResult, kunit_parser.Test]: parse_start = time.time() - test_result = kunit_parser.Test() - if request.raw_output: # Treat unparsed results as one passing test. - test_result.status = kunit_parser.TestStatus.SUCCESS - test_result.counts.passed = 1 + fake_test = kunit_parser.Test() + fake_test.status = kunit_parser.TestStatus.SUCCESS + fake_test.counts.passed = 1 output: Iterable[str] = input_data if request.raw_output == 'all': pass elif request.raw_output == 'kunit': - output = kunit_parser.extract_tap_lines(output, lstrip=False) + output = kunit_parser.extract_tap_lines(output) for line in output: print(line.rstrip()) + parse_time = time.time() - parse_start + return KunitResult(KunitStatus.SUCCESS, parse_time), fake_test - else: - test_result = kunit_parser.parse_run_tests(input_data) - parse_end = time.time() + + # Actually parse the test results. + test = kunit_parser.parse_run_tests(input_data) + parse_time = time.time() - parse_start if request.json: json_str = kunit_json.get_json_result( - test=test_result, + test=test, metadata=metadata) if request.json == 'stdout': print(json_str) @@ -223,10 +216,10 @@ def parse_tests(request: KunitParseRequest, metadata: kunit_json.Metadata, input stdout.print_with_timestamp("Test results stored in %s" % os.path.abspath(request.json)) - if test_result.status != kunit_parser.TestStatus.SUCCESS: - return KunitResult(KunitStatus.TEST_FAILURE, parse_end - parse_start), test_result + if test.status != kunit_parser.TestStatus.SUCCESS: + return KunitResult(KunitStatus.TEST_FAILURE, parse_time), test - return KunitResult(KunitStatus.SUCCESS, parse_end - parse_start), test_result + return KunitResult(KunitStatus.SUCCESS, parse_time), test def run_tests(linux: kunit_kernel.LinuxSourceTree, request: KunitRequest) -> KunitResult: @@ -359,14 +352,14 @@ def add_exec_opts(parser) -> None: choices=['suite', 'test']) def add_parse_opts(parser) -> None: - parser.add_argument('--raw_output', help='If set don\'t format output from kernel. ' - 'If set to --raw_output=kunit, filters to just KUnit output.', + parser.add_argument('--raw_output', help='If set don\'t parse output from kernel. ' + 'By default, filters to just KUnit output. Use ' + '--raw_output=all to show everything', type=str, nargs='?', const='all', default=None, choices=['all', 'kunit']) parser.add_argument('--json', nargs='?', - help='Stores test results in a JSON, and either ' - 'prints to stdout or saves to file if a ' - 'filename is specified', + help='Prints parsed test results as JSON to stdout or a file if ' + 'a filename is specified. Does nothing if --raw_output is set.', type=str, const='stdout', default=None, metavar='FILE') @@ -393,6 +386,95 @@ def tree_from_args(cli_args: argparse.Namespace) -> kunit_kernel.LinuxSourceTree extra_qemu_args=qemu_args) +def run_handler(cli_args): + if not os.path.exists(cli_args.build_dir): + os.mkdir(cli_args.build_dir) + + linux = tree_from_args(cli_args) + request = KunitRequest(build_dir=cli_args.build_dir, + make_options=cli_args.make_options, + jobs=cli_args.jobs, + raw_output=cli_args.raw_output, + json=cli_args.json, + timeout=cli_args.timeout, + filter_glob=cli_args.filter_glob, + kernel_args=cli_args.kernel_args, + run_isolated=cli_args.run_isolated) + result = run_tests(linux, request) + if result.status != KunitStatus.SUCCESS: + sys.exit(1) + + +def config_handler(cli_args): + if cli_args.build_dir and ( + not os.path.exists(cli_args.build_dir)): + os.mkdir(cli_args.build_dir) + + linux = tree_from_args(cli_args) + request = KunitConfigRequest(build_dir=cli_args.build_dir, + make_options=cli_args.make_options) + result = config_tests(linux, request) + stdout.print_with_timestamp(( + 'Elapsed time: %.3fs\n') % ( + result.elapsed_time)) + if result.status != KunitStatus.SUCCESS: + sys.exit(1) + + +def build_handler(cli_args): + linux = tree_from_args(cli_args) + request = KunitBuildRequest(build_dir=cli_args.build_dir, + make_options=cli_args.make_options, + jobs=cli_args.jobs) + result = config_and_build_tests(linux, request) + stdout.print_with_timestamp(( + 'Elapsed time: %.3fs\n') % ( + result.elapsed_time)) + if result.status != KunitStatus.SUCCESS: + sys.exit(1) + + +def exec_handler(cli_args): + linux = tree_from_args(cli_args) + exec_request = KunitExecRequest(raw_output=cli_args.raw_output, + build_dir=cli_args.build_dir, + json=cli_args.json, + timeout=cli_args.timeout, + filter_glob=cli_args.filter_glob, + kernel_args=cli_args.kernel_args, + run_isolated=cli_args.run_isolated) + result = exec_tests(linux, exec_request) + stdout.print_with_timestamp(( + 'Elapsed time: %.3fs\n') % (result.elapsed_time)) + if result.status != KunitStatus.SUCCESS: + sys.exit(1) + + +def parse_handler(cli_args): + if cli_args.file is None: + sys.stdin.reconfigure(errors='backslashreplace') # pytype: disable=attribute-error + kunit_output = sys.stdin + else: + with open(cli_args.file, 'r', errors='backslashreplace') as f: + kunit_output = f.read().splitlines() + # We know nothing about how the result was created! + metadata = kunit_json.Metadata() + request = KunitParseRequest(raw_output=cli_args.raw_output, + json=cli_args.json) + result, _ = parse_tests(request, metadata, kunit_output) + if result.status != KunitStatus.SUCCESS: + sys.exit(1) + + +subcommand_handlers_map = { + 'run': run_handler, + 'config': config_handler, + 'build': build_handler, + 'exec': exec_handler, + 'parse': parse_handler +} + + def main(argv): parser = argparse.ArgumentParser( description='Helps writing and running KUnit tests.') @@ -436,78 +518,14 @@ def main(argv): if get_kernel_root_path(): os.chdir(get_kernel_root_path()) - if cli_args.subcommand == 'run': - if not os.path.exists(cli_args.build_dir): - os.mkdir(cli_args.build_dir) - - linux = tree_from_args(cli_args) - request = KunitRequest(build_dir=cli_args.build_dir, - make_options=cli_args.make_options, - jobs=cli_args.jobs, - raw_output=cli_args.raw_output, - json=cli_args.json, - timeout=cli_args.timeout, - filter_glob=cli_args.filter_glob, - kernel_args=cli_args.kernel_args, - run_isolated=cli_args.run_isolated) - result = run_tests(linux, request) - if result.status != KunitStatus.SUCCESS: - sys.exit(1) - elif cli_args.subcommand == 'config': - if cli_args.build_dir and ( - not os.path.exists(cli_args.build_dir)): - os.mkdir(cli_args.build_dir) - - linux = tree_from_args(cli_args) - request = KunitConfigRequest(build_dir=cli_args.build_dir, - make_options=cli_args.make_options) - result = config_tests(linux, request) - stdout.print_with_timestamp(( - 'Elapsed time: %.3fs\n') % ( - result.elapsed_time)) - if result.status != KunitStatus.SUCCESS: - sys.exit(1) - elif cli_args.subcommand == 'build': - linux = tree_from_args(cli_args) - request = KunitBuildRequest(build_dir=cli_args.build_dir, - make_options=cli_args.make_options, - jobs=cli_args.jobs) - result = config_and_build_tests(linux, request) - stdout.print_with_timestamp(( - 'Elapsed time: %.3fs\n') % ( - result.elapsed_time)) - if result.status != KunitStatus.SUCCESS: - sys.exit(1) - elif cli_args.subcommand == 'exec': - linux = tree_from_args(cli_args) - exec_request = KunitExecRequest(raw_output=cli_args.raw_output, - build_dir=cli_args.build_dir, - json=cli_args.json, - timeout=cli_args.timeout, - filter_glob=cli_args.filter_glob, - kernel_args=cli_args.kernel_args, - run_isolated=cli_args.run_isolated) - result = exec_tests(linux, exec_request) - stdout.print_with_timestamp(( - 'Elapsed time: %.3fs\n') % (result.elapsed_time)) - if result.status != KunitStatus.SUCCESS: - sys.exit(1) - elif cli_args.subcommand == 'parse': - if cli_args.file is None: - sys.stdin.reconfigure(errors='backslashreplace') # pytype: disable=attribute-error - kunit_output = sys.stdin - else: - with open(cli_args.file, 'r', errors='backslashreplace') as f: - kunit_output = f.read().splitlines() - # We know nothing about how the result was created! - metadata = kunit_json.Metadata() - request = KunitParseRequest(raw_output=cli_args.raw_output, - json=cli_args.json) - result, _ = parse_tests(request, metadata, kunit_output) - if result.status != KunitStatus.SUCCESS: - sys.exit(1) - else: + subcomand_handler = subcommand_handlers_map.get(cli_args.subcommand, None) + + if subcomand_handler is None: parser.print_help() + return + + subcomand_handler(cli_args) + if __name__ == '__main__': main(sys.argv[1:]) diff --git a/tools/testing/kunit/kunit_parser.py b/tools/testing/kunit/kunit_parser.py index 1ae873e3e341..a225799f6b1b 100644 --- a/tools/testing/kunit/kunit_parser.py +++ b/tools/testing/kunit/kunit_parser.py @@ -10,8 +10,10 @@ # Author: Rae Moar <rmoar@google.com> from __future__ import annotations +from dataclasses import dataclass import re import sys +import textwrap from enum import Enum, auto from typing import Iterable, Iterator, List, Optional, Tuple @@ -58,6 +60,10 @@ class Test: self.counts.errors += 1 stdout.print_with_timestamp(stdout.red('[ERROR]') + f' Test: {self.name}: {error_message}') + def ok_status(self) -> bool: + """Returns true if the status was ok, i.e. passed or skipped.""" + return self.status in (TestStatus.SUCCESS, TestStatus.SKIPPED) + class TestStatus(Enum): """An enumeration class to represent the status of a test.""" SUCCESS = auto() @@ -67,27 +73,17 @@ class TestStatus(Enum): NO_TESTS = auto() FAILURE_TO_PARSE_TESTS = auto() +@dataclass class TestCounts: """ Tracks the counts of statuses of all test cases and any errors within a Test. - - Attributes: - passed : int - the number of tests that have passed - failed : int - the number of tests that have failed - crashed : int - the number of tests that have crashed - skipped : int - the number of tests that have skipped - errors : int - the number of errors in the test and subtests - """ - def __init__(self): - """Creates TestCounts object with counts of all test - statuses and test errors set to 0. - """ - self.passed = 0 - self.failed = 0 - self.crashed = 0 - self.skipped = 0 - self.errors = 0 + """ + passed: int = 0 + failed: int = 0 + crashed: int = 0 + skipped: int = 0 + errors: int = 0 def __str__(self) -> str: """Returns the string representation of a TestCounts object.""" @@ -213,12 +209,12 @@ class LineStream: # Parsing helper methods: -KTAP_START = re.compile(r'KTAP version ([0-9]+)$') -TAP_START = re.compile(r'TAP version ([0-9]+)$') -KTAP_END = re.compile('(List of all partitions:|' +KTAP_START = re.compile(r'\s*KTAP version ([0-9]+)$') +TAP_START = re.compile(r'\s*TAP version ([0-9]+)$') +KTAP_END = re.compile(r'\s*(List of all partitions:|' 'Kernel panic - not syncing: VFS:|reboot: System halted)') -def extract_tap_lines(kernel_output: Iterable[str], lstrip=True) -> LineStream: +def extract_tap_lines(kernel_output: Iterable[str]) -> LineStream: """Extracts KTAP lines from the kernel output.""" def isolate_ktap_output(kernel_output: Iterable[str]) \ -> Iterator[Tuple[int, str]]: @@ -244,11 +240,8 @@ def extract_tap_lines(kernel_output: Iterable[str], lstrip=True) -> LineStream: # stop extracting KTAP lines break elif started: - # remove the prefix and optionally any leading - # whitespace. Our parsing logic relies on this. + # remove the prefix, if any. line = line[prefix_len:] - if lstrip: - line = line.lstrip() yield line_num, line return LineStream(lines=isolate_ktap_output(kernel_output)) @@ -300,10 +293,10 @@ def parse_ktap_header(lines: LineStream, test: Test) -> bool: check_version(version_num, TAP_VERSIONS, 'TAP', test) else: return False - test.log.append(lines.pop()) + lines.pop() return True -TEST_HEADER = re.compile(r'^# Subtest: (.*)$') +TEST_HEADER = re.compile(r'^\s*# Subtest: (.*)$') def parse_test_header(lines: LineStream, test: Test) -> bool: """ @@ -323,11 +316,11 @@ def parse_test_header(lines: LineStream, test: Test) -> bool: match = TEST_HEADER.match(lines.peek()) if not match: return False - test.log.append(lines.pop()) test.name = match.group(1) + lines.pop() return True -TEST_PLAN = re.compile(r'1\.\.([0-9]+)') +TEST_PLAN = re.compile(r'^\s*1\.\.([0-9]+)') def parse_test_plan(lines: LineStream, test: Test) -> bool: """ @@ -350,14 +343,14 @@ def parse_test_plan(lines: LineStream, test: Test) -> bool: if not match: test.expected_count = None return False - test.log.append(lines.pop()) expected_count = int(match.group(1)) test.expected_count = expected_count + lines.pop() return True -TEST_RESULT = re.compile(r'^(ok|not ok) ([0-9]+) (- )?([^#]*)( # .*)?$') +TEST_RESULT = re.compile(r'^\s*(ok|not ok) ([0-9]+) (- )?([^#]*)( # .*)?$') -TEST_RESULT_SKIP = re.compile(r'^(ok|not ok) ([0-9]+) (- )?(.*) # SKIP(.*)$') +TEST_RESULT_SKIP = re.compile(r'^\s*(ok|not ok) ([0-9]+) (- )?(.*) # SKIP(.*)$') def peek_test_name_match(lines: LineStream, test: Test) -> bool: """ @@ -414,7 +407,7 @@ def parse_test_result(lines: LineStream, test: Test, # Check if line matches test result line format if not match: return False - test.log.append(lines.pop()) + lines.pop() # Set name of test object if skip_match: @@ -446,6 +439,7 @@ def parse_diagnostic(lines: LineStream) -> List[str]: - '# Subtest: [test name]' - '[ok|not ok] [test number] [-] [test name] [optional skip directive]' + - 'KTAP version [version number]' Parameters: lines - LineStream of KTAP output to parse @@ -454,8 +448,9 @@ def parse_diagnostic(lines: LineStream) -> List[str]: Log of diagnostic lines """ log = [] # type: List[str] - while lines and not TEST_RESULT.match(lines.peek()) and not \ - TEST_HEADER.match(lines.peek()): + non_diagnostic_lines = [TEST_RESULT, TEST_HEADER, KTAP_START] + while lines and not any(re.match(lines.peek()) + for re in non_diagnostic_lines): log.append(lines.pop()) return log @@ -501,17 +496,22 @@ def print_test_header(test: Test) -> None: test - Test object representing current test being printed """ message = test.name + if message != "": + # Add a leading space before the subtest counts only if a test name + # is provided using a "# Subtest" header line. + message += " " if test.expected_count: if test.expected_count == 1: - message += ' (1 subtest)' + message += '(1 subtest)' else: - message += f' ({test.expected_count} subtests)' + message += f'({test.expected_count} subtests)' stdout.print_with_timestamp(format_test_divider(message, len(message))) def print_log(log: Iterable[str]) -> None: """Prints all strings in saved log for test in yellow.""" - for m in log: - stdout.print_with_timestamp(stdout.yellow(m)) + formatted = textwrap.dedent('\n'.join(log)) + for line in formatted.splitlines(): + stdout.print_with_timestamp(stdout.yellow(line)) def format_test_result(test: Test) -> str: """ @@ -565,6 +565,40 @@ def print_test_footer(test: Test) -> None: stdout.print_with_timestamp(format_test_divider(message, len(message) - stdout.color_len())) + + +def _summarize_failed_tests(test: Test) -> str: + """Tries to summarize all the failing subtests in `test`.""" + + def failed_names(test: Test, parent_name: str) -> List[str]: + # Note: we use 'main' internally for the top-level test. + if not parent_name or parent_name == 'main': + full_name = test.name + else: + full_name = parent_name + '.' + test.name + + if not test.subtests: # this is a leaf node + return [full_name] + + # If all the children failed, just say this subtest failed. + # Don't summarize it down "the top-level test failed", though. + failed_subtests = [sub for sub in test.subtests if not sub.ok_status()] + if parent_name and len(failed_subtests) == len(test.subtests): + return [full_name] + + all_failures = [] # type: List[str] + for t in failed_subtests: + all_failures.extend(failed_names(t, full_name)) + return all_failures + + failures = failed_names(test, '') + # If there are too many failures, printing them out will just be noisy. + if len(failures) > 10: # this is an arbitrary limit + return '' + + return 'Failures: ' + ', '.join(failures) + + def print_summary_line(test: Test) -> None: """ Prints summary line of test object. Color of line is dependent on @@ -587,6 +621,15 @@ def print_summary_line(test: Test) -> None: color = stdout.red stdout.print_with_timestamp(color(f'Testing complete. {test.counts}')) + # Summarize failures that might have gone off-screen since we had a lot + # of tests (arbitrarily defined as >=100 for now). + if test.ok_status() or test.counts.total() < 100: + return + summarized = _summarize_failed_tests(test) + if not summarized: + return + stdout.print_with_timestamp(color(summarized)) + # Other methods: def bubble_up_test_results(test: Test) -> None: @@ -609,7 +652,7 @@ def bubble_up_test_results(test: Test) -> None: elif test.counts.get_status() == TestStatus.TEST_CRASHED: test.status = TestStatus.TEST_CRASHED -def parse_test(lines: LineStream, expected_num: int, log: List[str]) -> Test: +def parse_test(lines: LineStream, expected_num: int, log: List[str], is_subtest: bool) -> Test: """ Finds next test to parse in LineStream, creates new Test object, parses any subtests of the test, populates Test object with all @@ -627,15 +670,32 @@ def parse_test(lines: LineStream, expected_num: int, log: List[str]) -> Test: 1..4 [subtests] - - Subtest header line + - Subtest header (must include either the KTAP version line or + "# Subtest" header line) - Example: + Example (preferred format with both KTAP version line and + "# Subtest" line): + KTAP version 1 # Subtest: name 1..3 [subtests] ok 1 name + Example (only "# Subtest" line): + + # Subtest: name + 1..3 + [subtests] + ok 1 name + + Example (only KTAP version line, compliant with KTAP v1 spec): + + KTAP version 1 + 1..3 + [subtests] + ok 1 name + - Test result line Example: @@ -647,28 +707,29 @@ def parse_test(lines: LineStream, expected_num: int, log: List[str]) -> Test: expected_num - expected test number for test to be parsed log - list of strings containing any preceding diagnostic lines corresponding to the current test + is_subtest - boolean indicating whether test is a subtest Return: Test object populated with characteristics and any subtests """ test = Test() test.log.extend(log) - parent_test = False - main = parse_ktap_header(lines, test) - if main: - # If KTAP/TAP header is found, attempt to parse + if not is_subtest: + # If parsing the main/top-level test, parse KTAP version line and # test plan test.name = "main" + ktap_line = parse_ktap_header(lines, test) parse_test_plan(lines, test) parent_test = True else: - # If KTAP/TAP header is not found, test must be subtest - # header or test result line so parse attempt to parser - # subtest header - parent_test = parse_test_header(lines, test) + # If not the main test, attempt to parse a test header containing + # the KTAP version line and/or subtest header line + ktap_line = parse_ktap_header(lines, test) + subtest_line = parse_test_header(lines, test) + parent_test = (ktap_line or subtest_line) if parent_test: - # If subtest header is found, attempt to parse - # test plan and print header + # If KTAP version line and/or subtest header is found, attempt + # to parse test plan and print test header parse_test_plan(lines, test) print_test_header(test) expected_count = test.expected_count @@ -683,7 +744,7 @@ def parse_test(lines: LineStream, expected_num: int, log: List[str]) -> Test: sub_log = parse_diagnostic(lines) sub_test = Test() if not lines or (peek_test_name_match(lines, test) and - not main): + is_subtest): if expected_count and test_num <= expected_count: # If parser reaches end of test before # parsing expected number of subtests, print @@ -697,20 +758,19 @@ def parse_test(lines: LineStream, expected_num: int, log: List[str]) -> Test: test.log.extend(sub_log) break else: - sub_test = parse_test(lines, test_num, sub_log) + sub_test = parse_test(lines, test_num, sub_log, True) subtests.append(sub_test) test_num += 1 test.subtests = subtests - if not main: + if is_subtest: # If not main test, look for test result line test.log.extend(parse_diagnostic(lines)) - if (parent_test and peek_test_name_match(lines, test)) or \ - not parent_test: - parse_test_result(lines, test, expected_num) - else: + if test.name != "" and not peek_test_name_match(lines, test): test.add_error('missing subtest result line!') + else: + parse_test_result(lines, test, expected_num) - # Check for there being no tests + # Check for there being no subtests within parent test if parent_test and len(subtests) == 0: # Don't override a bad status if this test had one reported. # Assumption: no subtests means CRASHED is from Test.__init__() @@ -720,11 +780,11 @@ def parse_test(lines: LineStream, expected_num: int, log: List[str]) -> Test: # Add statuses to TestCounts attribute in Test object bubble_up_test_results(test) - if parent_test and not main: + if parent_test and is_subtest: # If test has subtests and is not the main test object, print # footer. print_test_footer(test) - elif not main: + elif is_subtest: print_test_result(test) return test @@ -744,10 +804,10 @@ def parse_run_tests(kernel_output: Iterable[str]) -> Test: test = Test() if not lines: test.name = '<missing>' - test.add_error('could not find any KTAP output!') + test.add_error('Could not find any KTAP output. Did any KUnit tests run?') test.status = TestStatus.FAILURE_TO_PARSE_TESTS else: - test = parse_test(lines, 0, []) + test = parse_test(lines, 0, [], False) if test.status != TestStatus.NO_TESTS: test.status = test.counts.get_status() stdout.print_with_timestamp(DIVIDER) diff --git a/tools/testing/kunit/kunit_tool_test.py b/tools/testing/kunit/kunit_tool_test.py index e2cd2cc2e98f..0c2190514103 100755 --- a/tools/testing/kunit/kunit_tool_test.py +++ b/tools/testing/kunit/kunit_tool_test.py @@ -80,6 +80,13 @@ class KconfigTest(unittest.TestCase): self.assertEqual(actual_kconfig, expected_kconfig) class KUnitParserTest(unittest.TestCase): + def setUp(self): + self.print_mock = mock.patch('kunit_printer.Printer.print').start() + self.addCleanup(mock.patch.stopall) + + def noPrintCallContains(self, substr: str): + for call in self.print_mock.mock_calls: + self.assertNotIn(substr, call.args[0]) def assertContains(self, needle: str, haystack: kunit_parser.LineStream): # Clone the iterator so we can print the contents on failure. @@ -133,33 +140,29 @@ class KUnitParserTest(unittest.TestCase): all_passed_log = test_data_path('test_is_test_passed-all_passed.log') with open(all_passed_log) as file: result = kunit_parser.parse_run_tests(file.readlines()) - self.assertEqual( - kunit_parser.TestStatus.SUCCESS, - result.status) + self.assertEqual(kunit_parser.TestStatus.SUCCESS, result.status) + self.assertEqual(result.counts.errors, 0) def test_parse_successful_nested_tests_log(self): all_passed_log = test_data_path('test_is_test_passed-all_passed_nested.log') with open(all_passed_log) as file: result = kunit_parser.parse_run_tests(file.readlines()) - self.assertEqual( - kunit_parser.TestStatus.SUCCESS, - result.status) + self.assertEqual(kunit_parser.TestStatus.SUCCESS, result.status) + self.assertEqual(result.counts.errors, 0) def test_kselftest_nested(self): kselftest_log = test_data_path('test_is_test_passed-kselftest.log') with open(kselftest_log) as file: result = kunit_parser.parse_run_tests(file.readlines()) - self.assertEqual( - kunit_parser.TestStatus.SUCCESS, - result.status) + self.assertEqual(kunit_parser.TestStatus.SUCCESS, result.status) + self.assertEqual(result.counts.errors, 0) def test_parse_failed_test_log(self): failed_log = test_data_path('test_is_test_passed-failure.log') with open(failed_log) as file: result = kunit_parser.parse_run_tests(file.readlines()) - self.assertEqual( - kunit_parser.TestStatus.FAILURE, - result.status) + self.assertEqual(kunit_parser.TestStatus.FAILURE, result.status) + self.assertEqual(result.counts.errors, 0) def test_no_header(self): empty_log = test_data_path('test_is_test_passed-no_tests_run_no_header.log') @@ -167,9 +170,8 @@ class KUnitParserTest(unittest.TestCase): result = kunit_parser.parse_run_tests( kunit_parser.extract_tap_lines(file.readlines())) self.assertEqual(0, len(result.subtests)) - self.assertEqual( - kunit_parser.TestStatus.FAILURE_TO_PARSE_TESTS, - result.status) + self.assertEqual(kunit_parser.TestStatus.FAILURE_TO_PARSE_TESTS, result.status) + self.assertEqual(result.counts.errors, 1) def test_missing_test_plan(self): missing_plan_log = test_data_path('test_is_test_passed-' @@ -179,12 +181,8 @@ class KUnitParserTest(unittest.TestCase): kunit_parser.extract_tap_lines( file.readlines())) # A missing test plan is not an error. - self.assertEqual(0, result.counts.errors) - # All tests should be accounted for. - self.assertEqual(10, result.counts.total()) - self.assertEqual( - kunit_parser.TestStatus.SUCCESS, - result.status) + self.assertEqual(result.counts, kunit_parser.TestCounts(passed=10, errors=0)) + self.assertEqual(kunit_parser.TestStatus.SUCCESS, result.status) def test_no_tests(self): header_log = test_data_path('test_is_test_passed-no_tests_run_with_header.log') @@ -192,9 +190,8 @@ class KUnitParserTest(unittest.TestCase): result = kunit_parser.parse_run_tests( kunit_parser.extract_tap_lines(file.readlines())) self.assertEqual(0, len(result.subtests)) - self.assertEqual( - kunit_parser.TestStatus.NO_TESTS, - result.status) + self.assertEqual(kunit_parser.TestStatus.NO_TESTS, result.status) + self.assertEqual(result.counts.errors, 1) def test_no_tests_no_plan(self): no_plan_log = test_data_path('test_is_test_passed-no_tests_no_plan.log') @@ -205,7 +202,7 @@ class KUnitParserTest(unittest.TestCase): self.assertEqual( kunit_parser.TestStatus.NO_TESTS, result.subtests[0].subtests[0].status) - self.assertEqual(1, result.counts.errors) + self.assertEqual(result.counts, kunit_parser.TestCounts(passed=1, errors=1)) def test_no_kunit_output(self): @@ -214,9 +211,10 @@ class KUnitParserTest(unittest.TestCase): with open(crash_log) as file: result = kunit_parser.parse_run_tests( kunit_parser.extract_tap_lines(file.readlines())) - print_mock.assert_any_call(StrContains('could not find any KTAP output!')) + print_mock.assert_any_call(StrContains('Could not find any KTAP output.')) print_mock.stop() self.assertEqual(0, len(result.subtests)) + self.assertEqual(result.counts.errors, 1) def test_skipped_test(self): skipped_log = test_data_path('test_skip_tests.log') @@ -224,18 +222,16 @@ class KUnitParserTest(unittest.TestCase): result = kunit_parser.parse_run_tests(file.readlines()) # A skipped test does not fail the whole suite. - self.assertEqual( - kunit_parser.TestStatus.SUCCESS, - result.status) + self.assertEqual(kunit_parser.TestStatus.SUCCESS, result.status) + self.assertEqual(result.counts, kunit_parser.TestCounts(passed=4, skipped=1)) def test_skipped_all_tests(self): skipped_log = test_data_path('test_skip_all_tests.log') with open(skipped_log) as file: result = kunit_parser.parse_run_tests(file.readlines()) - self.assertEqual( - kunit_parser.TestStatus.SKIPPED, - result.status) + self.assertEqual(kunit_parser.TestStatus.SKIPPED, result.status) + self.assertEqual(result.counts, kunit_parser.TestCounts(skipped=5)) def test_ignores_hyphen(self): hyphen_log = test_data_path('test_strip_hyphen.log') @@ -243,71 +239,112 @@ class KUnitParserTest(unittest.TestCase): result = kunit_parser.parse_run_tests(file.readlines()) # A skipped test does not fail the whole suite. - self.assertEqual( - kunit_parser.TestStatus.SUCCESS, - result.status) + self.assertEqual(kunit_parser.TestStatus.SUCCESS, result.status) self.assertEqual( "sysctl_test", result.subtests[0].name) self.assertEqual( "example", result.subtests[1].name) - file.close() - def test_ignores_prefix_printk_time(self): prefix_log = test_data_path('test_config_printk_time.log') with open(prefix_log) as file: result = kunit_parser.parse_run_tests(file.readlines()) - self.assertEqual( - kunit_parser.TestStatus.SUCCESS, - result.status) - self.assertEqual('kunit-resource-test', result.subtests[0].name) + self.assertEqual(kunit_parser.TestStatus.SUCCESS, result.status) + self.assertEqual('kunit-resource-test', result.subtests[0].name) + self.assertEqual(result.counts.errors, 0) def test_ignores_multiple_prefixes(self): prefix_log = test_data_path('test_multiple_prefixes.log') with open(prefix_log) as file: result = kunit_parser.parse_run_tests(file.readlines()) - self.assertEqual( - kunit_parser.TestStatus.SUCCESS, - result.status) - self.assertEqual('kunit-resource-test', result.subtests[0].name) + self.assertEqual(kunit_parser.TestStatus.SUCCESS, result.status) + self.assertEqual('kunit-resource-test', result.subtests[0].name) + self.assertEqual(result.counts.errors, 0) def test_prefix_mixed_kernel_output(self): mixed_prefix_log = test_data_path('test_interrupted_tap_output.log') with open(mixed_prefix_log) as file: result = kunit_parser.parse_run_tests(file.readlines()) - self.assertEqual( - kunit_parser.TestStatus.SUCCESS, - result.status) - self.assertEqual('kunit-resource-test', result.subtests[0].name) + self.assertEqual(kunit_parser.TestStatus.SUCCESS, result.status) + self.assertEqual('kunit-resource-test', result.subtests[0].name) + self.assertEqual(result.counts.errors, 0) def test_prefix_poundsign(self): pound_log = test_data_path('test_pound_sign.log') with open(pound_log) as file: result = kunit_parser.parse_run_tests(file.readlines()) - self.assertEqual( - kunit_parser.TestStatus.SUCCESS, - result.status) - self.assertEqual('kunit-resource-test', result.subtests[0].name) + self.assertEqual(kunit_parser.TestStatus.SUCCESS, result.status) + self.assertEqual('kunit-resource-test', result.subtests[0].name) + self.assertEqual(result.counts.errors, 0) def test_kernel_panic_end(self): panic_log = test_data_path('test_kernel_panic_interrupt.log') with open(panic_log) as file: result = kunit_parser.parse_run_tests(file.readlines()) - self.assertEqual( - kunit_parser.TestStatus.TEST_CRASHED, - result.status) - self.assertEqual('kunit-resource-test', result.subtests[0].name) + self.assertEqual(kunit_parser.TestStatus.TEST_CRASHED, result.status) + self.assertEqual('kunit-resource-test', result.subtests[0].name) + self.assertGreaterEqual(result.counts.errors, 1) def test_pound_no_prefix(self): pound_log = test_data_path('test_pound_no_prefix.log') with open(pound_log) as file: result = kunit_parser.parse_run_tests(file.readlines()) - self.assertEqual( - kunit_parser.TestStatus.SUCCESS, - result.status) - self.assertEqual('kunit-resource-test', result.subtests[0].name) + self.assertEqual(kunit_parser.TestStatus.SUCCESS, result.status) + self.assertEqual('kunit-resource-test', result.subtests[0].name) + self.assertEqual(result.counts.errors, 0) + + def test_summarize_failures(self): + output = """ + KTAP version 1 + 1..2 + # Subtest: all_failed_suite + 1..2 + not ok 1 - test1 + not ok 2 - test2 + not ok 1 - all_failed_suite + # Subtest: some_failed_suite + 1..2 + ok 1 - test1 + not ok 2 - test2 + not ok 1 - some_failed_suite + """ + result = kunit_parser.parse_run_tests(output.splitlines()) + self.assertEqual(kunit_parser.TestStatus.FAILURE, result.status) + + self.assertEqual(kunit_parser._summarize_failed_tests(result), + 'Failures: all_failed_suite, some_failed_suite.test2') + + def test_ktap_format(self): + ktap_log = test_data_path('test_parse_ktap_output.log') + with open(ktap_log) as file: + result = kunit_parser.parse_run_tests(file.readlines()) + self.assertEqual(result.counts, kunit_parser.TestCounts(passed=3)) + self.assertEqual('suite', result.subtests[0].name) + self.assertEqual('case_1', result.subtests[0].subtests[0].name) + self.assertEqual('case_2', result.subtests[0].subtests[1].name) + + def test_parse_subtest_header(self): + ktap_log = test_data_path('test_parse_subtest_header.log') + with open(ktap_log) as file: + result = kunit_parser.parse_run_tests(file.readlines()) + self.print_mock.assert_any_call(StrContains('suite (1 subtest)')) + + def test_show_test_output_on_failure(self): + output = """ + KTAP version 1 + 1..1 + Test output. + Indented more. + not ok 1 test1 + """ + result = kunit_parser.parse_run_tests(output.splitlines()) + self.assertEqual(kunit_parser.TestStatus.FAILURE, result.status) + + self.print_mock.assert_any_call(StrContains('Test output.')) + self.print_mock.assert_any_call(StrContains(' Indented more.')) + self.noPrintCallContains('not ok 1 test1') def line_stream_from_strs(strs: Iterable[str]) -> kunit_parser.LineStream: return kunit_parser.LineStream(enumerate(strs, start=1)) @@ -485,6 +522,9 @@ class LinuxSourceTreeTest(unittest.TestCase): class KUnitJsonTest(unittest.TestCase): + def setUp(self): + self.print_mock = mock.patch('kunit_printer.Printer.print').start() + self.addCleanup(mock.patch.stopall) def _json_for(self, log_file): with open(test_data_path(log_file)) as file: @@ -581,7 +621,7 @@ class KUnitMainTest(unittest.TestCase): self.assertEqual(e.exception.code, 1) self.assertEqual(self.linux_source_mock.build_reconfig.call_count, 1) self.assertEqual(self.linux_source_mock.run_kernel.call_count, 1) - self.print_mock.assert_any_call(StrContains('could not find any KTAP output!')) + self.print_mock.assert_any_call(StrContains('Could not find any KTAP output.')) def test_exec_no_tests(self): self.linux_source_mock.run_kernel = mock.Mock(return_value=['TAP version 14', '1..0']) diff --git a/tools/testing/kunit/test_data/test_parse_ktap_output.log b/tools/testing/kunit/test_data/test_parse_ktap_output.log new file mode 100644 index 000000000000..ccdf244e5303 --- /dev/null +++ b/tools/testing/kunit/test_data/test_parse_ktap_output.log @@ -0,0 +1,8 @@ +KTAP version 1 +1..1 + KTAP version 1 + 1..3 + ok 1 case_1 + ok 2 case_2 + ok 3 case_3 +ok 1 suite diff --git a/tools/testing/kunit/test_data/test_parse_subtest_header.log b/tools/testing/kunit/test_data/test_parse_subtest_header.log new file mode 100644 index 000000000000..216631092e7b --- /dev/null +++ b/tools/testing/kunit/test_data/test_parse_subtest_header.log @@ -0,0 +1,7 @@ +KTAP version 1 +1..1 + KTAP version 1 + # Subtest: suite + 1..1 + ok 1 test +ok 1 suite
\ No newline at end of file diff --git a/tools/testing/memblock/.gitignore b/tools/testing/memblock/.gitignore index 654338e0be52..4cc7cd5aac2b 100644 --- a/tools/testing/memblock/.gitignore +++ b/tools/testing/memblock/.gitignore @@ -1,4 +1,5 @@ main memblock.c linux/memblock.h +asm/asm.h asm/cmpxchg.h diff --git a/tools/testing/memblock/Makefile b/tools/testing/memblock/Makefile index 246f7ac8489b..7a1ca694a982 100644 --- a/tools/testing/memblock/Makefile +++ b/tools/testing/memblock/Makefile @@ -7,7 +7,7 @@ CFLAGS += -I. -I../../include -Wall -O2 -fsanitize=address \ LDFLAGS += -fsanitize=address -fsanitize=undefined TARGETS = main TEST_OFILES = tests/alloc_nid_api.o tests/alloc_helpers_api.o tests/alloc_api.o \ - tests/basic_api.o tests/common.o + tests/basic_api.o tests/common.o tests/alloc_exact_nid_api.o DEP_OFILES = memblock.o lib/slab.o mmzone.o slab.o OFILES = main.o $(DEP_OFILES) $(TEST_OFILES) EXTR_SRC = ../../../mm/memblock.c @@ -29,13 +29,14 @@ include: ../../../include/linux/memblock.h ../../include/linux/*.h \ @mkdir -p linux test -L linux/memblock.h || ln -s ../../../../include/linux/memblock.h linux/memblock.h + test -L asm/asm.h || ln -s ../../../arch/x86/include/asm/asm.h asm/asm.h test -L asm/cmpxchg.h || ln -s ../../../arch/x86/include/asm/cmpxchg.h asm/cmpxchg.h memblock.c: $(EXTR_SRC) test -L memblock.c || ln -s $(EXTR_SRC) memblock.c clean: - $(RM) $(TARGETS) $(OFILES) linux/memblock.h memblock.c asm/cmpxchg.h + $(RM) $(TARGETS) $(OFILES) linux/memblock.h memblock.c asm/asm.h asm/cmpxchg.h help: @echo 'Memblock simulator' diff --git a/tools/testing/memblock/TODO b/tools/testing/memblock/TODO index 33044c634ea7..e306c90c535f 100644 --- a/tools/testing/memblock/TODO +++ b/tools/testing/memblock/TODO @@ -1,17 +1,5 @@ TODO ===== -1. Add tests trying to memblock_add() or memblock_reserve() 129th region. - This will trigger memblock_double_array(), make sure it succeeds. - *Important:* These tests require valid memory ranges, use dummy physical - memory block from common.c to implement them. It is also very - likely that the current MEM_SIZE won't be enough for these - test cases. Use realloc to adjust the size accordingly. - -2. Add test cases using this functions (implement them for both directions): - + memblock_alloc_raw() - + memblock_alloc_exact_nid_raw() - + memblock_alloc_try_nid_raw() - -3. Add tests for memblock_alloc_node() to check if the correct NUMA node is set +1. Add tests for memblock_alloc_node() to check if the correct NUMA node is set for the new region diff --git a/tools/testing/memblock/main.c b/tools/testing/memblock/main.c index 4ca1024342b1..278f9dec5008 100644 --- a/tools/testing/memblock/main.c +++ b/tools/testing/memblock/main.c @@ -3,6 +3,7 @@ #include "tests/alloc_api.h" #include "tests/alloc_helpers_api.h" #include "tests/alloc_nid_api.h" +#include "tests/alloc_exact_nid_api.h" #include "tests/common.h" int main(int argc, char **argv) @@ -12,6 +13,7 @@ int main(int argc, char **argv) memblock_alloc_checks(); memblock_alloc_helpers_checks(); memblock_alloc_nid_checks(); + memblock_alloc_exact_nid_checks(); return 0; } diff --git a/tools/testing/memblock/tests/alloc_exact_nid_api.c b/tools/testing/memblock/tests/alloc_exact_nid_api.c new file mode 100644 index 000000000000..6e14447da6e1 --- /dev/null +++ b/tools/testing/memblock/tests/alloc_exact_nid_api.c @@ -0,0 +1,1113 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +#include "alloc_exact_nid_api.h" +#include "alloc_nid_api.h" + +#define FUNC_NAME "memblock_alloc_exact_nid_raw" + +/* + * contains the fraction of MEM_SIZE contained in each node in basis point + * units (one hundredth of 1% or 1/10000) + */ +static const unsigned int node_fractions[] = { + 2500, /* 1/4 */ + 625, /* 1/16 */ + 1250, /* 1/8 */ + 1250, /* 1/8 */ + 625, /* 1/16 */ + 625, /* 1/16 */ + 2500, /* 1/4 */ + 625, /* 1/16 */ +}; + +/* + * A test that tries to allocate a memory region in a specific NUMA node that + * has enough memory to allocate a region of the requested size. + * Expect to allocate an aligned region at the end of the requested node. + */ +static int alloc_exact_nid_top_down_numa_simple_check(void) +{ + int nid_req = 3; + struct memblock_region *new_rgn = &memblock.reserved.regions[0]; + struct memblock_region *req_node = &memblock.memory.regions[nid_req]; + void *allocated_ptr = NULL; + phys_addr_t size; + phys_addr_t min_addr; + phys_addr_t max_addr; + + PREFIX_PUSH(); + setup_numa_memblock(node_fractions); + + ASSERT_LE(SZ_4, req_node->size); + size = req_node->size / SZ_4; + min_addr = memblock_start_of_DRAM(); + max_addr = memblock_end_of_DRAM(); + + allocated_ptr = memblock_alloc_exact_nid_raw(size, SMP_CACHE_BYTES, + min_addr, max_addr, + nid_req); + + ASSERT_NE(allocated_ptr, NULL); + ASSERT_MEM_NE(allocated_ptr, 0, size); + + ASSERT_EQ(new_rgn->size, size); + ASSERT_EQ(new_rgn->base, region_end(req_node) - size); + ASSERT_LE(req_node->base, new_rgn->base); + + ASSERT_EQ(memblock.reserved.cnt, 1); + ASSERT_EQ(memblock.reserved.total_size, size); + + test_pass_pop(); + + return 0; +} + +/* + * A test that tries to allocate a memory region in a specific NUMA node that + * is partially reserved but has enough memory for the allocated region: + * + * | +---------------------------------------+ | + * | | requested | | + * +-----------+---------------------------------------+----------+ + * + * | +------------------+ +-----+ | + * | | reserved | | new | | + * +-----------+------------------+--------------+-----+----------+ + * + * Expect to allocate an aligned region at the end of the requested node. The + * region count and total size get updated. + */ +static int alloc_exact_nid_top_down_numa_part_reserved_check(void) +{ + int nid_req = 4; + struct memblock_region *new_rgn = &memblock.reserved.regions[1]; + struct memblock_region *req_node = &memblock.memory.regions[nid_req]; + void *allocated_ptr = NULL; + struct region r1; + phys_addr_t size; + phys_addr_t min_addr; + phys_addr_t max_addr; + + PREFIX_PUSH(); + setup_numa_memblock(node_fractions); + + ASSERT_LE(SZ_8, req_node->size); + r1.base = req_node->base; + r1.size = req_node->size / SZ_2; + size = r1.size / SZ_4; + min_addr = memblock_start_of_DRAM(); + max_addr = memblock_end_of_DRAM(); + + memblock_reserve(r1.base, r1.size); + allocated_ptr = memblock_alloc_exact_nid_raw(size, SMP_CACHE_BYTES, + min_addr, max_addr, + nid_req); + + ASSERT_NE(allocated_ptr, NULL); + ASSERT_MEM_NE(allocated_ptr, 0, size); + + ASSERT_EQ(new_rgn->size, size); + ASSERT_EQ(new_rgn->base, region_end(req_node) - size); + ASSERT_LE(req_node->base, new_rgn->base); + + ASSERT_EQ(memblock.reserved.cnt, 2); + ASSERT_EQ(memblock.reserved.total_size, size + r1.size); + + test_pass_pop(); + + return 0; +} + +/* + * A test that tries to allocate a memory region that spans over the min_addr + * and max_addr range and overlaps with two different nodes, where the first + * node is the requested node: + * + * min_addr + * | max_addr + * | | + * v v + * | +-----------------------+-----------+ | + * | | requested | node3 | | + * +-----------+-----------------------+-----------+--------------+ + * + + + * | +-----------+ | + * | | rgn | | + * +-----------------------+-----------+--------------------------+ + * + * Expect to drop the lower limit and allocate a memory region that ends at + * the end of the requested node. + */ +static int alloc_exact_nid_top_down_numa_split_range_low_check(void) +{ + int nid_req = 2; + struct memblock_region *new_rgn = &memblock.reserved.regions[0]; + struct memblock_region *req_node = &memblock.memory.regions[nid_req]; + void *allocated_ptr = NULL; + phys_addr_t size = SZ_512; + phys_addr_t min_addr; + phys_addr_t max_addr; + phys_addr_t req_node_end; + + PREFIX_PUSH(); + setup_numa_memblock(node_fractions); + + req_node_end = region_end(req_node); + min_addr = req_node_end - SZ_256; + max_addr = min_addr + size; + + allocated_ptr = memblock_alloc_exact_nid_raw(size, SMP_CACHE_BYTES, + min_addr, max_addr, + nid_req); + + ASSERT_NE(allocated_ptr, NULL); + ASSERT_MEM_NE(allocated_ptr, 0, size); + + ASSERT_EQ(new_rgn->size, size); + ASSERT_EQ(new_rgn->base, req_node_end - size); + ASSERT_LE(req_node->base, new_rgn->base); + + ASSERT_EQ(memblock.reserved.cnt, 1); + ASSERT_EQ(memblock.reserved.total_size, size); + + test_pass_pop(); + + return 0; +} + +/* + * A test that tries to allocate a memory region that spans over the min_addr + * and max_addr range and overlaps with two different nodes, where the requested + * node ends before min_addr: + * + * min_addr + * | max_addr + * | | + * v v + * | +---------------+ +-------------+---------+ | + * | | requested | | node1 | node2 | | + * +----+---------------+--------+-------------+---------+----------+ + * + + + * | +---------+ | + * | | rgn | | + * +----------+---------+-------------------------------------------+ + * + * Expect to drop the lower limit and allocate a memory region that ends at + * the end of the requested node. + */ +static int alloc_exact_nid_top_down_numa_no_overlap_split_check(void) +{ + int nid_req = 2; + struct memblock_region *new_rgn = &memblock.reserved.regions[0]; + struct memblock_region *req_node = &memblock.memory.regions[nid_req]; + struct memblock_region *node2 = &memblock.memory.regions[6]; + void *allocated_ptr = NULL; + phys_addr_t size; + phys_addr_t min_addr; + phys_addr_t max_addr; + + PREFIX_PUSH(); + setup_numa_memblock(node_fractions); + + size = SZ_512; + min_addr = node2->base - SZ_256; + max_addr = min_addr + size; + + allocated_ptr = memblock_alloc_exact_nid_raw(size, SMP_CACHE_BYTES, + min_addr, max_addr, + nid_req); + + ASSERT_NE(allocated_ptr, NULL); + ASSERT_MEM_NE(allocated_ptr, 0, size); + + ASSERT_EQ(new_rgn->size, size); + ASSERT_EQ(new_rgn->base, region_end(req_node) - size); + ASSERT_LE(req_node->base, new_rgn->base); + + ASSERT_EQ(memblock.reserved.cnt, 1); + ASSERT_EQ(memblock.reserved.total_size, size); + + test_pass_pop(); + + return 0; +} + +/* + * A test that tries to allocate memory within min_addr and max_add range when + * the requested node and the range do not overlap, and requested node ends + * before min_addr. The range overlaps with multiple nodes along node + * boundaries: + * + * min_addr + * | max_addr + * | | + * v v + * |-----------+ +----------+----...----+----------+ | + * | requested | | min node | ... | max node | | + * +-----------+-----------+----------+----...----+----------+------+ + * + + + * | +-----+ | + * | | rgn | | + * +-----+-----+----------------------------------------------------+ + * + * Expect to drop the lower limit and allocate a memory region that ends at + * the end of the requested node. + */ +static int alloc_exact_nid_top_down_numa_no_overlap_low_check(void) +{ + int nid_req = 0; + struct memblock_region *new_rgn = &memblock.reserved.regions[0]; + struct memblock_region *req_node = &memblock.memory.regions[nid_req]; + struct memblock_region *min_node = &memblock.memory.regions[2]; + struct memblock_region *max_node = &memblock.memory.regions[5]; + void *allocated_ptr = NULL; + phys_addr_t size = SZ_64; + phys_addr_t max_addr; + phys_addr_t min_addr; + + PREFIX_PUSH(); + setup_numa_memblock(node_fractions); + + min_addr = min_node->base; + max_addr = region_end(max_node); + + allocated_ptr = memblock_alloc_exact_nid_raw(size, SMP_CACHE_BYTES, + min_addr, max_addr, + nid_req); + + ASSERT_NE(allocated_ptr, NULL); + ASSERT_MEM_NE(allocated_ptr, 0, size); + + ASSERT_EQ(new_rgn->size, size); + ASSERT_EQ(new_rgn->base, region_end(req_node) - size); + + ASSERT_EQ(memblock.reserved.cnt, 1); + ASSERT_EQ(memblock.reserved.total_size, size); + + test_pass_pop(); + + return 0; +} + +/* + * A test that tries to allocate a memory region in a specific NUMA node that + * has enough memory to allocate a region of the requested size. + * Expect to allocate an aligned region at the beginning of the requested node. + */ +static int alloc_exact_nid_bottom_up_numa_simple_check(void) +{ + int nid_req = 3; + struct memblock_region *new_rgn = &memblock.reserved.regions[0]; + struct memblock_region *req_node = &memblock.memory.regions[nid_req]; + void *allocated_ptr = NULL; + phys_addr_t size; + phys_addr_t min_addr; + phys_addr_t max_addr; + + PREFIX_PUSH(); + setup_numa_memblock(node_fractions); + + ASSERT_LE(SZ_4, req_node->size); + size = req_node->size / SZ_4; + min_addr = memblock_start_of_DRAM(); + max_addr = memblock_end_of_DRAM(); + + allocated_ptr = memblock_alloc_exact_nid_raw(size, SMP_CACHE_BYTES, + min_addr, max_addr, + nid_req); + + ASSERT_NE(allocated_ptr, NULL); + ASSERT_MEM_NE(allocated_ptr, 0, size); + + ASSERT_EQ(new_rgn->size, size); + ASSERT_EQ(new_rgn->base, req_node->base); + ASSERT_LE(region_end(new_rgn), region_end(req_node)); + + ASSERT_EQ(memblock.reserved.cnt, 1); + ASSERT_EQ(memblock.reserved.total_size, size); + + test_pass_pop(); + + return 0; +} + +/* + * A test that tries to allocate a memory region in a specific NUMA node that + * is partially reserved but has enough memory for the allocated region: + * + * | +---------------------------------------+ | + * | | requested | | + * +-----------+---------------------------------------+---------+ + * + * | +------------------+-----+ | + * | | reserved | new | | + * +-----------+------------------+-----+------------------------+ + * + * Expect to allocate an aligned region in the requested node that merges with + * the existing reserved region. The total size gets updated. + */ +static int alloc_exact_nid_bottom_up_numa_part_reserved_check(void) +{ + int nid_req = 4; + struct memblock_region *new_rgn = &memblock.reserved.regions[0]; + struct memblock_region *req_node = &memblock.memory.regions[nid_req]; + void *allocated_ptr = NULL; + struct region r1; + phys_addr_t size; + phys_addr_t min_addr; + phys_addr_t max_addr; + phys_addr_t total_size; + + PREFIX_PUSH(); + setup_numa_memblock(node_fractions); + + ASSERT_LE(SZ_8, req_node->size); + r1.base = req_node->base; + r1.size = req_node->size / SZ_2; + size = r1.size / SZ_4; + min_addr = memblock_start_of_DRAM(); + max_addr = memblock_end_of_DRAM(); + total_size = size + r1.size; + + memblock_reserve(r1.base, r1.size); + allocated_ptr = memblock_alloc_exact_nid_raw(size, SMP_CACHE_BYTES, + min_addr, max_addr, + nid_req); + + ASSERT_NE(allocated_ptr, NULL); + ASSERT_MEM_NE(allocated_ptr, 0, size); + + ASSERT_EQ(new_rgn->size, total_size); + ASSERT_EQ(new_rgn->base, req_node->base); + ASSERT_LE(region_end(new_rgn), region_end(req_node)); + + ASSERT_EQ(memblock.reserved.cnt, 1); + ASSERT_EQ(memblock.reserved.total_size, total_size); + + test_pass_pop(); + + return 0; +} + +/* + * A test that tries to allocate a memory region that spans over the min_addr + * and max_addr range and overlaps with two different nodes, where the first + * node is the requested node: + * + * min_addr + * | max_addr + * | | + * v v + * | +-----------------------+-----------+ | + * | | requested | node3 | | + * +-----------+-----------------------+-----------+--------------+ + * + + + * | +-----------+ | + * | | rgn | | + * +-----------+-----------+--------------------------------------+ + * + * Expect to drop the lower limit and allocate a memory region at the beginning + * of the requested node. + */ +static int alloc_exact_nid_bottom_up_numa_split_range_low_check(void) +{ + int nid_req = 2; + struct memblock_region *new_rgn = &memblock.reserved.regions[0]; + struct memblock_region *req_node = &memblock.memory.regions[nid_req]; + void *allocated_ptr = NULL; + phys_addr_t size = SZ_512; + phys_addr_t min_addr; + phys_addr_t max_addr; + phys_addr_t req_node_end; + + PREFIX_PUSH(); + setup_numa_memblock(node_fractions); + + req_node_end = region_end(req_node); + min_addr = req_node_end - SZ_256; + max_addr = min_addr + size; + + allocated_ptr = memblock_alloc_exact_nid_raw(size, SMP_CACHE_BYTES, + min_addr, max_addr, + nid_req); + + ASSERT_NE(allocated_ptr, NULL); + ASSERT_MEM_NE(allocated_ptr, 0, size); + + ASSERT_EQ(new_rgn->size, size); + ASSERT_EQ(new_rgn->base, req_node->base); + ASSERT_LE(region_end(new_rgn), req_node_end); + + ASSERT_EQ(memblock.reserved.cnt, 1); + ASSERT_EQ(memblock.reserved.total_size, size); + + test_pass_pop(); + + return 0; +} + +/* + * A test that tries to allocate a memory region that spans over the min_addr + * and max_addr range and overlaps with two different nodes, where the requested + * node ends before min_addr: + * + * min_addr + * | max_addr + * | | + * v v + * | +---------------+ +-------------+---------+ | + * | | requested | | node1 | node2 | | + * +----+---------------+--------+-------------+---------+---------+ + * + + + * | +---------+ | + * | | rgn | | + * +----+---------+------------------------------------------------+ + * + * Expect to drop the lower limit and allocate a memory region that starts at + * the beginning of the requested node. + */ +static int alloc_exact_nid_bottom_up_numa_no_overlap_split_check(void) +{ + int nid_req = 2; + struct memblock_region *new_rgn = &memblock.reserved.regions[0]; + struct memblock_region *req_node = &memblock.memory.regions[nid_req]; + struct memblock_region *node2 = &memblock.memory.regions[6]; + void *allocated_ptr = NULL; + phys_addr_t size; + phys_addr_t min_addr; + phys_addr_t max_addr; + + PREFIX_PUSH(); + setup_numa_memblock(node_fractions); + + size = SZ_512; + min_addr = node2->base - SZ_256; + max_addr = min_addr + size; + + allocated_ptr = memblock_alloc_exact_nid_raw(size, SMP_CACHE_BYTES, + min_addr, max_addr, + nid_req); + + ASSERT_NE(allocated_ptr, NULL); + ASSERT_MEM_NE(allocated_ptr, 0, size); + + ASSERT_EQ(new_rgn->size, size); + ASSERT_EQ(new_rgn->base, req_node->base); + ASSERT_LE(region_end(new_rgn), region_end(req_node)); + + ASSERT_EQ(memblock.reserved.cnt, 1); + ASSERT_EQ(memblock.reserved.total_size, size); + + test_pass_pop(); + + return 0; +} + +/* + * A test that tries to allocate memory within min_addr and max_add range when + * the requested node and the range do not overlap, and requested node ends + * before min_addr. The range overlaps with multiple nodes along node + * boundaries: + * + * min_addr + * | max_addr + * | | + * v v + * |-----------+ +----------+----...----+----------+ | + * | requested | | min node | ... | max node | | + * +-----------+-----------+----------+----...----+----------+------+ + * + + + * |-----+ | + * | rgn | | + * +-----+----------------------------------------------------------+ + * + * Expect to drop the lower limit and allocate a memory region that starts at + * the beginning of the requested node. + */ +static int alloc_exact_nid_bottom_up_numa_no_overlap_low_check(void) +{ + int nid_req = 0; + struct memblock_region *new_rgn = &memblock.reserved.regions[0]; + struct memblock_region *req_node = &memblock.memory.regions[nid_req]; + struct memblock_region *min_node = &memblock.memory.regions[2]; + struct memblock_region *max_node = &memblock.memory.regions[5]; + void *allocated_ptr = NULL; + phys_addr_t size = SZ_64; + phys_addr_t max_addr; + phys_addr_t min_addr; + + PREFIX_PUSH(); + setup_numa_memblock(node_fractions); + + min_addr = min_node->base; + max_addr = region_end(max_node); + + allocated_ptr = memblock_alloc_exact_nid_raw(size, SMP_CACHE_BYTES, + min_addr, max_addr, + nid_req); + + ASSERT_NE(allocated_ptr, NULL); + ASSERT_MEM_NE(allocated_ptr, 0, size); + + ASSERT_EQ(new_rgn->size, size); + ASSERT_EQ(new_rgn->base, req_node->base); + ASSERT_LE(region_end(new_rgn), region_end(req_node)); + + ASSERT_EQ(memblock.reserved.cnt, 1); + ASSERT_EQ(memblock.reserved.total_size, size); + + test_pass_pop(); + + return 0; +} + +/* + * A test that tries to allocate a memory region in a specific NUMA node that + * does not have enough memory to allocate a region of the requested size: + * + * | +-----+ | + * | | req | | + * +---+-----+----------------------------+ + * + * +---------+ + * | rgn | + * +---------+ + * + * Expect no allocation to happen. + */ +static int alloc_exact_nid_numa_small_node_generic_check(void) +{ + int nid_req = 1; + struct memblock_region *req_node = &memblock.memory.regions[nid_req]; + void *allocated_ptr = NULL; + phys_addr_t size; + phys_addr_t min_addr; + phys_addr_t max_addr; + + PREFIX_PUSH(); + setup_numa_memblock(node_fractions); + + size = SZ_2 * req_node->size; + min_addr = memblock_start_of_DRAM(); + max_addr = memblock_end_of_DRAM(); + + allocated_ptr = memblock_alloc_exact_nid_raw(size, SMP_CACHE_BYTES, + min_addr, max_addr, + nid_req); + + ASSERT_EQ(allocated_ptr, NULL); + + test_pass_pop(); + + return 0; +} + +/* + * A test that tries to allocate a memory region in a specific NUMA node that + * is fully reserved: + * + * | +---------+ | + * | |requested| | + * +--------------+---------+-------------+ + * + * | +---------+ | + * | | reserved| | + * +--------------+---------+-------------+ + * + * Expect no allocation to happen. + */ +static int alloc_exact_nid_numa_node_reserved_generic_check(void) +{ + int nid_req = 2; + struct memblock_region *req_node = &memblock.memory.regions[nid_req]; + void *allocated_ptr = NULL; + phys_addr_t size; + phys_addr_t min_addr; + phys_addr_t max_addr; + + PREFIX_PUSH(); + setup_numa_memblock(node_fractions); + + size = req_node->size; + min_addr = memblock_start_of_DRAM(); + max_addr = memblock_end_of_DRAM(); + + memblock_reserve(req_node->base, req_node->size); + allocated_ptr = memblock_alloc_exact_nid_raw(size, SMP_CACHE_BYTES, + min_addr, max_addr, + nid_req); + + ASSERT_EQ(allocated_ptr, NULL); + + test_pass_pop(); + + return 0; +} + +/* + * A test that tries to allocate a memory region in a specific NUMA node that + * is partially reserved and does not have enough contiguous memory for the + * allocated region: + * + * | +-----------------------+ | + * | | requested | | + * +-----------+-----------------------+----+ + * + * | +----------+ | + * | | reserved | | + * +-----------------+----------+-----------+ + * + * Expect no allocation to happen. + */ +static int alloc_exact_nid_numa_part_reserved_fail_generic_check(void) +{ + int nid_req = 4; + struct memblock_region *req_node = &memblock.memory.regions[nid_req]; + void *allocated_ptr = NULL; + struct region r1; + phys_addr_t size; + phys_addr_t min_addr; + phys_addr_t max_addr; + + PREFIX_PUSH(); + setup_numa_memblock(node_fractions); + + ASSERT_LE(SZ_4, req_node->size); + size = req_node->size / SZ_2; + r1.base = req_node->base + (size / SZ_2); + r1.size = size; + + min_addr = memblock_start_of_DRAM(); + max_addr = memblock_end_of_DRAM(); + + memblock_reserve(r1.base, r1.size); + allocated_ptr = memblock_alloc_exact_nid_raw(size, SMP_CACHE_BYTES, + min_addr, max_addr, + nid_req); + + ASSERT_EQ(allocated_ptr, NULL); + + test_pass_pop(); + + return 0; +} + +/* + * A test that tries to allocate a memory region that spans over the min_addr + * and max_addr range and overlaps with two different nodes, where the second + * node is the requested node: + * + * min_addr + * | max_addr + * | | + * v v + * | +--------------------------+---------+ | + * | | first node |requested| | + * +------+--------------------------+---------+----------------+ + * + * Expect no allocation to happen. + */ +static int alloc_exact_nid_numa_split_range_high_generic_check(void) +{ + int nid_req = 3; + struct memblock_region *req_node = &memblock.memory.regions[nid_req]; + void *allocated_ptr = NULL; + phys_addr_t size = SZ_512; + phys_addr_t min_addr; + phys_addr_t max_addr; + + PREFIX_PUSH(); + setup_numa_memblock(node_fractions); + + min_addr = req_node->base - SZ_256; + max_addr = min_addr + size; + + allocated_ptr = memblock_alloc_exact_nid_raw(size, SMP_CACHE_BYTES, + min_addr, max_addr, + nid_req); + + ASSERT_EQ(allocated_ptr, NULL); + + test_pass_pop(); + + return 0; +} + +/* + * A test that tries to allocate memory within min_addr and max_add range when + * the requested node and the range do not overlap, and requested node starts + * after max_addr. The range overlaps with multiple nodes along node + * boundaries: + * + * min_addr + * | max_addr + * | | + * v v + * | +----------+----...----+----------+ +-----------+ | + * | | min node | ... | max node | | requested | | + * +-----+----------+----...----+----------+--------+-----------+---+ + * + * Expect no allocation to happen. + */ +static int alloc_exact_nid_numa_no_overlap_high_generic_check(void) +{ + int nid_req = 7; + struct memblock_region *min_node = &memblock.memory.regions[2]; + struct memblock_region *max_node = &memblock.memory.regions[5]; + void *allocated_ptr = NULL; + phys_addr_t size = SZ_64; + phys_addr_t max_addr; + phys_addr_t min_addr; + + PREFIX_PUSH(); + setup_numa_memblock(node_fractions); + + min_addr = min_node->base; + max_addr = region_end(max_node); + + allocated_ptr = memblock_alloc_exact_nid_raw(size, SMP_CACHE_BYTES, + min_addr, max_addr, + nid_req); + + ASSERT_EQ(allocated_ptr, NULL); + + test_pass_pop(); + + return 0; +} + +/* + * A test that tries to allocate a memory region in a specific NUMA node that + * does not have enough memory to allocate a region of the requested size. + * Additionally, none of the nodes have enough memory to allocate the region: + * + * +-----------------------------------+ + * | new | + * +-----------------------------------+ + * |-------+-------+-------+-------+-------+-------+-------+-------| + * | node0 | node1 | node2 | node3 | node4 | node5 | node6 | node7 | + * +-------+-------+-------+-------+-------+-------+-------+-------+ + * + * Expect no allocation to happen. + */ +static int alloc_exact_nid_numa_large_region_generic_check(void) +{ + int nid_req = 3; + void *allocated_ptr = NULL; + phys_addr_t size = MEM_SIZE / SZ_2; + phys_addr_t min_addr; + phys_addr_t max_addr; + + PREFIX_PUSH(); + setup_numa_memblock(node_fractions); + + min_addr = memblock_start_of_DRAM(); + max_addr = memblock_end_of_DRAM(); + + allocated_ptr = memblock_alloc_exact_nid_raw(size, SMP_CACHE_BYTES, + min_addr, max_addr, + nid_req); + ASSERT_EQ(allocated_ptr, NULL); + + test_pass_pop(); + + return 0; +} + +/* + * A test that tries to allocate memory within min_addr and max_addr range when + * there are two reserved regions at the borders. The requested node starts at + * min_addr and ends at max_addr and is the same size as the region to be + * allocated: + * + * min_addr + * | max_addr + * | | + * v v + * | +-----------+-----------------------+-----------------------| + * | | node5 | requested | node7 | + * +------+-----------+-----------------------+-----------------------+ + * + + + * | +----+-----------------------+----+ | + * | | r2 | new | r1 | | + * +-------------+----+-----------------------+----+------------------+ + * + * Expect to merge all of the regions into one. The region counter and total + * size fields get updated. + */ +static int alloc_exact_nid_numa_reserved_full_merge_generic_check(void) +{ + int nid_req = 6; + int nid_next = nid_req + 1; + struct memblock_region *new_rgn = &memblock.reserved.regions[0]; + struct memblock_region *req_node = &memblock.memory.regions[nid_req]; + struct memblock_region *next_node = &memblock.memory.regions[nid_next]; + void *allocated_ptr = NULL; + struct region r1, r2; + phys_addr_t size = req_node->size; + phys_addr_t total_size; + phys_addr_t max_addr; + phys_addr_t min_addr; + + PREFIX_PUSH(); + setup_numa_memblock(node_fractions); + + r1.base = next_node->base; + r1.size = SZ_128; + + r2.size = SZ_128; + r2.base = r1.base - (size + r2.size); + + total_size = r1.size + r2.size + size; + min_addr = r2.base + r2.size; + max_addr = r1.base; + + memblock_reserve(r1.base, r1.size); + memblock_reserve(r2.base, r2.size); + + allocated_ptr = memblock_alloc_exact_nid_raw(size, SMP_CACHE_BYTES, + min_addr, max_addr, + nid_req); + + ASSERT_NE(allocated_ptr, NULL); + ASSERT_MEM_NE(allocated_ptr, 0, size); + + ASSERT_EQ(new_rgn->size, total_size); + ASSERT_EQ(new_rgn->base, r2.base); + + ASSERT_LE(new_rgn->base, req_node->base); + ASSERT_LE(region_end(req_node), region_end(new_rgn)); + + ASSERT_EQ(memblock.reserved.cnt, 1); + ASSERT_EQ(memblock.reserved.total_size, total_size); + + test_pass_pop(); + + return 0; +} + +/* + * A test that tries to allocate memory within min_addr and max_add range, + * where the total range can fit the region, but it is split between two nodes + * and everything else is reserved. Additionally, nid is set to NUMA_NO_NODE + * instead of requesting a specific node: + * + * +-----------+ + * | new | + * +-----------+ + * | +---------------------+-----------| + * | | prev node | next node | + * +------+---------------------+-----------+ + * + + + * |----------------------+ +-----| + * | r1 | | r2 | + * +----------------------+-----------+-----+ + * ^ ^ + * | | + * | max_addr + * | + * min_addr + * + * Expect no allocation to happen. + */ +static int alloc_exact_nid_numa_split_all_reserved_generic_check(void) +{ + void *allocated_ptr = NULL; + struct memblock_region *next_node = &memblock.memory.regions[7]; + struct region r1, r2; + phys_addr_t size = SZ_256; + phys_addr_t max_addr; + phys_addr_t min_addr; + + PREFIX_PUSH(); + setup_numa_memblock(node_fractions); + + r2.base = next_node->base + SZ_128; + r2.size = memblock_end_of_DRAM() - r2.base; + + r1.size = MEM_SIZE - (r2.size + size); + r1.base = memblock_start_of_DRAM(); + + min_addr = r1.base + r1.size; + max_addr = r2.base; + + memblock_reserve(r1.base, r1.size); + memblock_reserve(r2.base, r2.size); + + allocated_ptr = memblock_alloc_exact_nid_raw(size, SMP_CACHE_BYTES, + min_addr, max_addr, + NUMA_NO_NODE); + + ASSERT_EQ(allocated_ptr, NULL); + + test_pass_pop(); + + return 0; +} + +/* Test case wrappers for NUMA tests */ +static int alloc_exact_nid_numa_simple_check(void) +{ + test_print("\tRunning %s...\n", __func__); + memblock_set_bottom_up(false); + alloc_exact_nid_top_down_numa_simple_check(); + memblock_set_bottom_up(true); + alloc_exact_nid_bottom_up_numa_simple_check(); + + return 0; +} + +static int alloc_exact_nid_numa_part_reserved_check(void) +{ + test_print("\tRunning %s...\n", __func__); + memblock_set_bottom_up(false); + alloc_exact_nid_top_down_numa_part_reserved_check(); + memblock_set_bottom_up(true); + alloc_exact_nid_bottom_up_numa_part_reserved_check(); + + return 0; +} + +static int alloc_exact_nid_numa_split_range_low_check(void) +{ + test_print("\tRunning %s...\n", __func__); + memblock_set_bottom_up(false); + alloc_exact_nid_top_down_numa_split_range_low_check(); + memblock_set_bottom_up(true); + alloc_exact_nid_bottom_up_numa_split_range_low_check(); + + return 0; +} + +static int alloc_exact_nid_numa_no_overlap_split_check(void) +{ + test_print("\tRunning %s...\n", __func__); + memblock_set_bottom_up(false); + alloc_exact_nid_top_down_numa_no_overlap_split_check(); + memblock_set_bottom_up(true); + alloc_exact_nid_bottom_up_numa_no_overlap_split_check(); + + return 0; +} + +static int alloc_exact_nid_numa_no_overlap_low_check(void) +{ + test_print("\tRunning %s...\n", __func__); + memblock_set_bottom_up(false); + alloc_exact_nid_top_down_numa_no_overlap_low_check(); + memblock_set_bottom_up(true); + alloc_exact_nid_bottom_up_numa_no_overlap_low_check(); + + return 0; +} + +static int alloc_exact_nid_numa_small_node_check(void) +{ + test_print("\tRunning %s...\n", __func__); + run_top_down(alloc_exact_nid_numa_small_node_generic_check); + run_bottom_up(alloc_exact_nid_numa_small_node_generic_check); + + return 0; +} + +static int alloc_exact_nid_numa_node_reserved_check(void) +{ + test_print("\tRunning %s...\n", __func__); + run_top_down(alloc_exact_nid_numa_node_reserved_generic_check); + run_bottom_up(alloc_exact_nid_numa_node_reserved_generic_check); + + return 0; +} + +static int alloc_exact_nid_numa_part_reserved_fail_check(void) +{ + test_print("\tRunning %s...\n", __func__); + run_top_down(alloc_exact_nid_numa_part_reserved_fail_generic_check); + run_bottom_up(alloc_exact_nid_numa_part_reserved_fail_generic_check); + + return 0; +} + +static int alloc_exact_nid_numa_split_range_high_check(void) +{ + test_print("\tRunning %s...\n", __func__); + run_top_down(alloc_exact_nid_numa_split_range_high_generic_check); + run_bottom_up(alloc_exact_nid_numa_split_range_high_generic_check); + + return 0; +} + +static int alloc_exact_nid_numa_no_overlap_high_check(void) +{ + test_print("\tRunning %s...\n", __func__); + run_top_down(alloc_exact_nid_numa_no_overlap_high_generic_check); + run_bottom_up(alloc_exact_nid_numa_no_overlap_high_generic_check); + + return 0; +} + +static int alloc_exact_nid_numa_large_region_check(void) +{ + test_print("\tRunning %s...\n", __func__); + run_top_down(alloc_exact_nid_numa_large_region_generic_check); + run_bottom_up(alloc_exact_nid_numa_large_region_generic_check); + + return 0; +} + +static int alloc_exact_nid_numa_reserved_full_merge_check(void) +{ + test_print("\tRunning %s...\n", __func__); + run_top_down(alloc_exact_nid_numa_reserved_full_merge_generic_check); + run_bottom_up(alloc_exact_nid_numa_reserved_full_merge_generic_check); + + return 0; +} + +static int alloc_exact_nid_numa_split_all_reserved_check(void) +{ + test_print("\tRunning %s...\n", __func__); + run_top_down(alloc_exact_nid_numa_split_all_reserved_generic_check); + run_bottom_up(alloc_exact_nid_numa_split_all_reserved_generic_check); + + return 0; +} + +int __memblock_alloc_exact_nid_numa_checks(void) +{ + test_print("Running %s NUMA tests...\n", FUNC_NAME); + + alloc_exact_nid_numa_simple_check(); + alloc_exact_nid_numa_part_reserved_check(); + alloc_exact_nid_numa_split_range_low_check(); + alloc_exact_nid_numa_no_overlap_split_check(); + alloc_exact_nid_numa_no_overlap_low_check(); + + alloc_exact_nid_numa_small_node_check(); + alloc_exact_nid_numa_node_reserved_check(); + alloc_exact_nid_numa_part_reserved_fail_check(); + alloc_exact_nid_numa_split_range_high_check(); + alloc_exact_nid_numa_no_overlap_high_check(); + alloc_exact_nid_numa_large_region_check(); + alloc_exact_nid_numa_reserved_full_merge_check(); + alloc_exact_nid_numa_split_all_reserved_check(); + + return 0; +} + +int memblock_alloc_exact_nid_checks(void) +{ + prefix_reset(); + prefix_push(FUNC_NAME); + + reset_memblock_attributes(); + dummy_physical_memory_init(); + + memblock_alloc_exact_nid_range_checks(); + memblock_alloc_exact_nid_numa_checks(); + + dummy_physical_memory_cleanup(); + + prefix_pop(); + + return 0; +} diff --git a/tools/testing/memblock/tests/alloc_exact_nid_api.h b/tools/testing/memblock/tests/alloc_exact_nid_api.h new file mode 100644 index 000000000000..cef419d55d2a --- /dev/null +++ b/tools/testing/memblock/tests/alloc_exact_nid_api.h @@ -0,0 +1,25 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +#ifndef _MEMBLOCK_ALLOC_EXACT_NID_H +#define _MEMBLOCK_ALLOC_EXACT_NID_H + +#include "common.h" + +int memblock_alloc_exact_nid_checks(void); +int __memblock_alloc_exact_nid_numa_checks(void); + +#ifdef CONFIG_NUMA +static inline int memblock_alloc_exact_nid_numa_checks(void) +{ + __memblock_alloc_exact_nid_numa_checks(); + return 0; +} + +#else +static inline int memblock_alloc_exact_nid_numa_checks(void) +{ + return 0; +} + +#endif /* CONFIG_NUMA */ + +#endif diff --git a/tools/testing/memblock/tests/alloc_nid_api.c b/tools/testing/memblock/tests/alloc_nid_api.c index 2c2d60f4e3e3..49ef68cccd6f 100644 --- a/tools/testing/memblock/tests/alloc_nid_api.c +++ b/tools/testing/memblock/tests/alloc_nid_api.c @@ -18,18 +18,29 @@ static const unsigned int node_fractions[] = { 625, /* 1/16 */ }; -static inline const char * const get_memblock_alloc_try_nid_name(int flags) +static inline const char * const get_memblock_alloc_nid_name(int flags) { + if (flags & TEST_F_EXACT) + return "memblock_alloc_exact_nid_raw"; if (flags & TEST_F_RAW) return "memblock_alloc_try_nid_raw"; return "memblock_alloc_try_nid"; } -static inline void *run_memblock_alloc_try_nid(phys_addr_t size, - phys_addr_t align, - phys_addr_t min_addr, - phys_addr_t max_addr, int nid) -{ +static inline void *run_memblock_alloc_nid(phys_addr_t size, + phys_addr_t align, + phys_addr_t min_addr, + phys_addr_t max_addr, int nid) +{ + assert(!(alloc_nid_test_flags & TEST_F_EXACT) || + (alloc_nid_test_flags & TEST_F_RAW)); + /* + * TEST_F_EXACT should be checked before TEST_F_RAW since + * memblock_alloc_exact_nid_raw() performs raw allocations. + */ + if (alloc_nid_test_flags & TEST_F_EXACT) + return memblock_alloc_exact_nid_raw(size, align, min_addr, + max_addr, nid); if (alloc_nid_test_flags & TEST_F_RAW) return memblock_alloc_try_nid_raw(size, align, min_addr, max_addr, nid); @@ -50,7 +61,7 @@ static inline void *run_memblock_alloc_try_nid(phys_addr_t size, * * Expect to allocate a region that ends at max_addr. */ -static int alloc_try_nid_top_down_simple_check(void) +static int alloc_nid_top_down_simple_check(void) { struct memblock_region *rgn = &memblock.reserved.regions[0]; void *allocated_ptr = NULL; @@ -65,9 +76,9 @@ static int alloc_try_nid_top_down_simple_check(void) min_addr = memblock_start_of_DRAM() + SMP_CACHE_BYTES * 2; max_addr = min_addr + SZ_512; - allocated_ptr = run_memblock_alloc_try_nid(size, SMP_CACHE_BYTES, - min_addr, max_addr, - NUMA_NO_NODE); + allocated_ptr = run_memblock_alloc_nid(size, SMP_CACHE_BYTES, + min_addr, max_addr, + NUMA_NO_NODE); rgn_end = rgn->base + rgn->size; ASSERT_NE(allocated_ptr, NULL); @@ -102,7 +113,7 @@ static int alloc_try_nid_top_down_simple_check(void) * * Expect to allocate an aligned region that ends before max_addr. */ -static int alloc_try_nid_top_down_end_misaligned_check(void) +static int alloc_nid_top_down_end_misaligned_check(void) { struct memblock_region *rgn = &memblock.reserved.regions[0]; void *allocated_ptr = NULL; @@ -118,9 +129,9 @@ static int alloc_try_nid_top_down_end_misaligned_check(void) min_addr = memblock_start_of_DRAM() + SMP_CACHE_BYTES * 2; max_addr = min_addr + SZ_512 + misalign; - allocated_ptr = run_memblock_alloc_try_nid(size, SMP_CACHE_BYTES, - min_addr, max_addr, - NUMA_NO_NODE); + allocated_ptr = run_memblock_alloc_nid(size, SMP_CACHE_BYTES, + min_addr, max_addr, + NUMA_NO_NODE); rgn_end = rgn->base + rgn->size; ASSERT_NE(allocated_ptr, NULL); @@ -153,7 +164,7 @@ static int alloc_try_nid_top_down_end_misaligned_check(void) * Expect to allocate a region that starts at min_addr and ends at * max_addr, given that min_addr is aligned. */ -static int alloc_try_nid_exact_address_generic_check(void) +static int alloc_nid_exact_address_generic_check(void) { struct memblock_region *rgn = &memblock.reserved.regions[0]; void *allocated_ptr = NULL; @@ -168,9 +179,9 @@ static int alloc_try_nid_exact_address_generic_check(void) min_addr = memblock_start_of_DRAM() + SMP_CACHE_BYTES; max_addr = min_addr + size; - allocated_ptr = run_memblock_alloc_try_nid(size, SMP_CACHE_BYTES, - min_addr, max_addr, - NUMA_NO_NODE); + allocated_ptr = run_memblock_alloc_nid(size, SMP_CACHE_BYTES, + min_addr, max_addr, + NUMA_NO_NODE); rgn_end = rgn->base + rgn->size; ASSERT_NE(allocated_ptr, NULL); @@ -205,7 +216,7 @@ static int alloc_try_nid_exact_address_generic_check(void) * Expect to drop the lower limit and allocate a memory region which * ends at max_addr (if the address is aligned). */ -static int alloc_try_nid_top_down_narrow_range_check(void) +static int alloc_nid_top_down_narrow_range_check(void) { struct memblock_region *rgn = &memblock.reserved.regions[0]; void *allocated_ptr = NULL; @@ -219,9 +230,9 @@ static int alloc_try_nid_top_down_narrow_range_check(void) min_addr = memblock_start_of_DRAM() + SZ_512; max_addr = min_addr + SMP_CACHE_BYTES; - allocated_ptr = run_memblock_alloc_try_nid(size, SMP_CACHE_BYTES, - min_addr, max_addr, - NUMA_NO_NODE); + allocated_ptr = run_memblock_alloc_nid(size, SMP_CACHE_BYTES, + min_addr, max_addr, + NUMA_NO_NODE); ASSERT_NE(allocated_ptr, NULL); assert_mem_content(allocated_ptr, size, alloc_nid_test_flags); @@ -257,7 +268,7 @@ static int alloc_try_nid_top_down_narrow_range_check(void) * * Expect no allocation to happen. */ -static int alloc_try_nid_low_max_generic_check(void) +static int alloc_nid_low_max_generic_check(void) { void *allocated_ptr = NULL; phys_addr_t size = SZ_1K; @@ -270,9 +281,9 @@ static int alloc_try_nid_low_max_generic_check(void) min_addr = memblock_start_of_DRAM(); max_addr = min_addr + SMP_CACHE_BYTES; - allocated_ptr = run_memblock_alloc_try_nid(size, SMP_CACHE_BYTES, - min_addr, max_addr, - NUMA_NO_NODE); + allocated_ptr = run_memblock_alloc_nid(size, SMP_CACHE_BYTES, + min_addr, max_addr, + NUMA_NO_NODE); ASSERT_EQ(allocated_ptr, NULL); @@ -295,7 +306,7 @@ static int alloc_try_nid_low_max_generic_check(void) * * Expect a merge of both regions. Only the region size gets updated. */ -static int alloc_try_nid_min_reserved_generic_check(void) +static int alloc_nid_min_reserved_generic_check(void) { struct memblock_region *rgn = &memblock.reserved.regions[0]; void *allocated_ptr = NULL; @@ -315,9 +326,9 @@ static int alloc_try_nid_min_reserved_generic_check(void) memblock_reserve(reserved_base, r1_size); - allocated_ptr = run_memblock_alloc_try_nid(r2_size, SMP_CACHE_BYTES, - min_addr, max_addr, - NUMA_NO_NODE); + allocated_ptr = run_memblock_alloc_nid(r2_size, SMP_CACHE_BYTES, + min_addr, max_addr, + NUMA_NO_NODE); ASSERT_NE(allocated_ptr, NULL); assert_mem_content(allocated_ptr, r2_size, alloc_nid_test_flags); @@ -347,7 +358,7 @@ static int alloc_try_nid_min_reserved_generic_check(void) * * Expect a merge of regions. Only the region size gets updated. */ -static int alloc_try_nid_max_reserved_generic_check(void) +static int alloc_nid_max_reserved_generic_check(void) { struct memblock_region *rgn = &memblock.reserved.regions[0]; void *allocated_ptr = NULL; @@ -365,9 +376,9 @@ static int alloc_try_nid_max_reserved_generic_check(void) memblock_reserve(max_addr, r1_size); - allocated_ptr = run_memblock_alloc_try_nid(r2_size, SMP_CACHE_BYTES, - min_addr, max_addr, - NUMA_NO_NODE); + allocated_ptr = run_memblock_alloc_nid(r2_size, SMP_CACHE_BYTES, + min_addr, max_addr, + NUMA_NO_NODE); ASSERT_NE(allocated_ptr, NULL); assert_mem_content(allocated_ptr, r2_size, alloc_nid_test_flags); @@ -400,7 +411,7 @@ static int alloc_try_nid_max_reserved_generic_check(void) * updated. The total size field gets updated. */ -static int alloc_try_nid_top_down_reserved_with_space_check(void) +static int alloc_nid_top_down_reserved_with_space_check(void) { struct memblock_region *rgn1 = &memblock.reserved.regions[1]; struct memblock_region *rgn2 = &memblock.reserved.regions[0]; @@ -428,9 +439,9 @@ static int alloc_try_nid_top_down_reserved_with_space_check(void) memblock_reserve(r1.base, r1.size); memblock_reserve(r2.base, r2.size); - allocated_ptr = run_memblock_alloc_try_nid(r3_size, SMP_CACHE_BYTES, - min_addr, max_addr, - NUMA_NO_NODE); + allocated_ptr = run_memblock_alloc_nid(r3_size, SMP_CACHE_BYTES, + min_addr, max_addr, + NUMA_NO_NODE); ASSERT_NE(allocated_ptr, NULL); assert_mem_content(allocated_ptr, r3_size, alloc_nid_test_flags); @@ -465,7 +476,7 @@ static int alloc_try_nid_top_down_reserved_with_space_check(void) * Expect to merge all of the regions into one. The region counter and total * size fields get updated. */ -static int alloc_try_nid_reserved_full_merge_generic_check(void) +static int alloc_nid_reserved_full_merge_generic_check(void) { struct memblock_region *rgn = &memblock.reserved.regions[0]; void *allocated_ptr = NULL; @@ -491,9 +502,9 @@ static int alloc_try_nid_reserved_full_merge_generic_check(void) memblock_reserve(r1.base, r1.size); memblock_reserve(r2.base, r2.size); - allocated_ptr = run_memblock_alloc_try_nid(r3_size, SMP_CACHE_BYTES, - min_addr, max_addr, - NUMA_NO_NODE); + allocated_ptr = run_memblock_alloc_nid(r3_size, SMP_CACHE_BYTES, + min_addr, max_addr, + NUMA_NO_NODE); ASSERT_NE(allocated_ptr, NULL); assert_mem_content(allocated_ptr, r3_size, alloc_nid_test_flags); @@ -527,7 +538,7 @@ static int alloc_try_nid_reserved_full_merge_generic_check(void) * Expect to merge the new region with r2. The second region does not get * updated. The total size counter gets updated. */ -static int alloc_try_nid_top_down_reserved_no_space_check(void) +static int alloc_nid_top_down_reserved_no_space_check(void) { struct memblock_region *rgn1 = &memblock.reserved.regions[1]; struct memblock_region *rgn2 = &memblock.reserved.regions[0]; @@ -555,9 +566,9 @@ static int alloc_try_nid_top_down_reserved_no_space_check(void) memblock_reserve(r1.base, r1.size); memblock_reserve(r2.base, r2.size); - allocated_ptr = run_memblock_alloc_try_nid(r3_size, SMP_CACHE_BYTES, - min_addr, max_addr, - NUMA_NO_NODE); + allocated_ptr = run_memblock_alloc_nid(r3_size, SMP_CACHE_BYTES, + min_addr, max_addr, + NUMA_NO_NODE); ASSERT_NE(allocated_ptr, NULL); assert_mem_content(allocated_ptr, r3_size, alloc_nid_test_flags); @@ -596,7 +607,7 @@ static int alloc_try_nid_top_down_reserved_no_space_check(void) * Expect no allocation to happen. */ -static int alloc_try_nid_reserved_all_generic_check(void) +static int alloc_nid_reserved_all_generic_check(void) { void *allocated_ptr = NULL; struct region r1, r2; @@ -620,9 +631,9 @@ static int alloc_try_nid_reserved_all_generic_check(void) memblock_reserve(r1.base, r1.size); memblock_reserve(r2.base, r2.size); - allocated_ptr = run_memblock_alloc_try_nid(r3_size, SMP_CACHE_BYTES, - min_addr, max_addr, - NUMA_NO_NODE); + allocated_ptr = run_memblock_alloc_nid(r3_size, SMP_CACHE_BYTES, + min_addr, max_addr, + NUMA_NO_NODE); ASSERT_EQ(allocated_ptr, NULL); @@ -636,7 +647,7 @@ static int alloc_try_nid_reserved_all_generic_check(void) * bigger than the end address of the available memory. Expect to allocate * a region that ends before the end of the memory. */ -static int alloc_try_nid_top_down_cap_max_check(void) +static int alloc_nid_top_down_cap_max_check(void) { struct memblock_region *rgn = &memblock.reserved.regions[0]; void *allocated_ptr = NULL; @@ -650,9 +661,9 @@ static int alloc_try_nid_top_down_cap_max_check(void) min_addr = memblock_end_of_DRAM() - SZ_1K; max_addr = memblock_end_of_DRAM() + SZ_256; - allocated_ptr = run_memblock_alloc_try_nid(size, SMP_CACHE_BYTES, - min_addr, max_addr, - NUMA_NO_NODE); + allocated_ptr = run_memblock_alloc_nid(size, SMP_CACHE_BYTES, + min_addr, max_addr, + NUMA_NO_NODE); ASSERT_NE(allocated_ptr, NULL); assert_mem_content(allocated_ptr, size, alloc_nid_test_flags); @@ -673,7 +684,7 @@ static int alloc_try_nid_top_down_cap_max_check(void) * smaller than the start address of the available memory. Expect to allocate * a region that ends before the end of the memory. */ -static int alloc_try_nid_top_down_cap_min_check(void) +static int alloc_nid_top_down_cap_min_check(void) { struct memblock_region *rgn = &memblock.reserved.regions[0]; void *allocated_ptr = NULL; @@ -687,9 +698,9 @@ static int alloc_try_nid_top_down_cap_min_check(void) min_addr = memblock_start_of_DRAM() - SZ_256; max_addr = memblock_end_of_DRAM(); - allocated_ptr = run_memblock_alloc_try_nid(size, SMP_CACHE_BYTES, - min_addr, max_addr, - NUMA_NO_NODE); + allocated_ptr = run_memblock_alloc_nid(size, SMP_CACHE_BYTES, + min_addr, max_addr, + NUMA_NO_NODE); ASSERT_NE(allocated_ptr, NULL); assert_mem_content(allocated_ptr, size, alloc_nid_test_flags); @@ -719,7 +730,7 @@ static int alloc_try_nid_top_down_cap_min_check(void) * * Expect to allocate a region that ends before max_addr. */ -static int alloc_try_nid_bottom_up_simple_check(void) +static int alloc_nid_bottom_up_simple_check(void) { struct memblock_region *rgn = &memblock.reserved.regions[0]; void *allocated_ptr = NULL; @@ -734,9 +745,9 @@ static int alloc_try_nid_bottom_up_simple_check(void) min_addr = memblock_start_of_DRAM() + SMP_CACHE_BYTES * 2; max_addr = min_addr + SZ_512; - allocated_ptr = run_memblock_alloc_try_nid(size, SMP_CACHE_BYTES, - min_addr, max_addr, - NUMA_NO_NODE); + allocated_ptr = run_memblock_alloc_nid(size, SMP_CACHE_BYTES, + min_addr, max_addr, + NUMA_NO_NODE); rgn_end = rgn->base + rgn->size; ASSERT_NE(allocated_ptr, NULL); @@ -771,7 +782,7 @@ static int alloc_try_nid_bottom_up_simple_check(void) * * Expect to allocate an aligned region that ends before max_addr. */ -static int alloc_try_nid_bottom_up_start_misaligned_check(void) +static int alloc_nid_bottom_up_start_misaligned_check(void) { struct memblock_region *rgn = &memblock.reserved.regions[0]; void *allocated_ptr = NULL; @@ -787,9 +798,9 @@ static int alloc_try_nid_bottom_up_start_misaligned_check(void) min_addr = memblock_start_of_DRAM() + misalign; max_addr = min_addr + SZ_512; - allocated_ptr = run_memblock_alloc_try_nid(size, SMP_CACHE_BYTES, - min_addr, max_addr, - NUMA_NO_NODE); + allocated_ptr = run_memblock_alloc_nid(size, SMP_CACHE_BYTES, + min_addr, max_addr, + NUMA_NO_NODE); rgn_end = rgn->base + rgn->size; ASSERT_NE(allocated_ptr, NULL); @@ -824,7 +835,7 @@ static int alloc_try_nid_bottom_up_start_misaligned_check(void) * Expect to drop the lower limit and allocate a memory region which * starts at the beginning of the available memory. */ -static int alloc_try_nid_bottom_up_narrow_range_check(void) +static int alloc_nid_bottom_up_narrow_range_check(void) { struct memblock_region *rgn = &memblock.reserved.regions[0]; void *allocated_ptr = NULL; @@ -838,9 +849,9 @@ static int alloc_try_nid_bottom_up_narrow_range_check(void) min_addr = memblock_start_of_DRAM() + SZ_512; max_addr = min_addr + SMP_CACHE_BYTES; - allocated_ptr = run_memblock_alloc_try_nid(size, SMP_CACHE_BYTES, - min_addr, max_addr, - NUMA_NO_NODE); + allocated_ptr = run_memblock_alloc_nid(size, SMP_CACHE_BYTES, + min_addr, max_addr, + NUMA_NO_NODE); ASSERT_NE(allocated_ptr, NULL); assert_mem_content(allocated_ptr, size, alloc_nid_test_flags); @@ -873,7 +884,7 @@ static int alloc_try_nid_bottom_up_narrow_range_check(void) * updated. The total size field gets updated. */ -static int alloc_try_nid_bottom_up_reserved_with_space_check(void) +static int alloc_nid_bottom_up_reserved_with_space_check(void) { struct memblock_region *rgn1 = &memblock.reserved.regions[1]; struct memblock_region *rgn2 = &memblock.reserved.regions[0]; @@ -901,9 +912,9 @@ static int alloc_try_nid_bottom_up_reserved_with_space_check(void) memblock_reserve(r1.base, r1.size); memblock_reserve(r2.base, r2.size); - allocated_ptr = run_memblock_alloc_try_nid(r3_size, SMP_CACHE_BYTES, - min_addr, max_addr, - NUMA_NO_NODE); + allocated_ptr = run_memblock_alloc_nid(r3_size, SMP_CACHE_BYTES, + min_addr, max_addr, + NUMA_NO_NODE); ASSERT_NE(allocated_ptr, NULL); assert_mem_content(allocated_ptr, r3_size, alloc_nid_test_flags); @@ -942,7 +953,7 @@ static int alloc_try_nid_bottom_up_reserved_with_space_check(void) * Other regions are not modified. */ -static int alloc_try_nid_bottom_up_reserved_no_space_check(void) +static int alloc_nid_bottom_up_reserved_no_space_check(void) { struct memblock_region *rgn1 = &memblock.reserved.regions[2]; struct memblock_region *rgn2 = &memblock.reserved.regions[1]; @@ -971,9 +982,9 @@ static int alloc_try_nid_bottom_up_reserved_no_space_check(void) memblock_reserve(r1.base, r1.size); memblock_reserve(r2.base, r2.size); - allocated_ptr = run_memblock_alloc_try_nid(r3_size, SMP_CACHE_BYTES, - min_addr, max_addr, - NUMA_NO_NODE); + allocated_ptr = run_memblock_alloc_nid(r3_size, SMP_CACHE_BYTES, + min_addr, max_addr, + NUMA_NO_NODE); ASSERT_NE(allocated_ptr, NULL); assert_mem_content(allocated_ptr, r3_size, alloc_nid_test_flags); @@ -1000,7 +1011,7 @@ static int alloc_try_nid_bottom_up_reserved_no_space_check(void) * bigger than the end address of the available memory. Expect to allocate * a region that starts at the min_addr. */ -static int alloc_try_nid_bottom_up_cap_max_check(void) +static int alloc_nid_bottom_up_cap_max_check(void) { struct memblock_region *rgn = &memblock.reserved.regions[0]; void *allocated_ptr = NULL; @@ -1014,9 +1025,9 @@ static int alloc_try_nid_bottom_up_cap_max_check(void) min_addr = memblock_start_of_DRAM() + SZ_1K; max_addr = memblock_end_of_DRAM() + SZ_256; - allocated_ptr = run_memblock_alloc_try_nid(size, SMP_CACHE_BYTES, - min_addr, max_addr, - NUMA_NO_NODE); + allocated_ptr = run_memblock_alloc_nid(size, SMP_CACHE_BYTES, + min_addr, max_addr, + NUMA_NO_NODE); ASSERT_NE(allocated_ptr, NULL); assert_mem_content(allocated_ptr, size, alloc_nid_test_flags); @@ -1037,7 +1048,7 @@ static int alloc_try_nid_bottom_up_cap_max_check(void) * smaller than the start address of the available memory. Expect to allocate * a region at the beginning of the available memory. */ -static int alloc_try_nid_bottom_up_cap_min_check(void) +static int alloc_nid_bottom_up_cap_min_check(void) { struct memblock_region *rgn = &memblock.reserved.regions[0]; void *allocated_ptr = NULL; @@ -1051,9 +1062,9 @@ static int alloc_try_nid_bottom_up_cap_min_check(void) min_addr = memblock_start_of_DRAM(); max_addr = memblock_end_of_DRAM() - SZ_256; - allocated_ptr = run_memblock_alloc_try_nid(size, SMP_CACHE_BYTES, - min_addr, max_addr, - NUMA_NO_NODE); + allocated_ptr = run_memblock_alloc_nid(size, SMP_CACHE_BYTES, + min_addr, max_addr, + NUMA_NO_NODE); ASSERT_NE(allocated_ptr, NULL); assert_mem_content(allocated_ptr, size, alloc_nid_test_flags); @@ -1070,133 +1081,133 @@ static int alloc_try_nid_bottom_up_cap_min_check(void) } /* Test case wrappers for range tests */ -static int alloc_try_nid_simple_check(void) +static int alloc_nid_simple_check(void) { test_print("\tRunning %s...\n", __func__); memblock_set_bottom_up(false); - alloc_try_nid_top_down_simple_check(); + alloc_nid_top_down_simple_check(); memblock_set_bottom_up(true); - alloc_try_nid_bottom_up_simple_check(); + alloc_nid_bottom_up_simple_check(); return 0; } -static int alloc_try_nid_misaligned_check(void) +static int alloc_nid_misaligned_check(void) { test_print("\tRunning %s...\n", __func__); memblock_set_bottom_up(false); - alloc_try_nid_top_down_end_misaligned_check(); + alloc_nid_top_down_end_misaligned_check(); memblock_set_bottom_up(true); - alloc_try_nid_bottom_up_start_misaligned_check(); + alloc_nid_bottom_up_start_misaligned_check(); return 0; } -static int alloc_try_nid_narrow_range_check(void) +static int alloc_nid_narrow_range_check(void) { test_print("\tRunning %s...\n", __func__); memblock_set_bottom_up(false); - alloc_try_nid_top_down_narrow_range_check(); + alloc_nid_top_down_narrow_range_check(); memblock_set_bottom_up(true); - alloc_try_nid_bottom_up_narrow_range_check(); + alloc_nid_bottom_up_narrow_range_check(); return 0; } -static int alloc_try_nid_reserved_with_space_check(void) +static int alloc_nid_reserved_with_space_check(void) { test_print("\tRunning %s...\n", __func__); memblock_set_bottom_up(false); - alloc_try_nid_top_down_reserved_with_space_check(); + alloc_nid_top_down_reserved_with_space_check(); memblock_set_bottom_up(true); - alloc_try_nid_bottom_up_reserved_with_space_check(); + alloc_nid_bottom_up_reserved_with_space_check(); return 0; } -static int alloc_try_nid_reserved_no_space_check(void) +static int alloc_nid_reserved_no_space_check(void) { test_print("\tRunning %s...\n", __func__); memblock_set_bottom_up(false); - alloc_try_nid_top_down_reserved_no_space_check(); + alloc_nid_top_down_reserved_no_space_check(); memblock_set_bottom_up(true); - alloc_try_nid_bottom_up_reserved_no_space_check(); + alloc_nid_bottom_up_reserved_no_space_check(); return 0; } -static int alloc_try_nid_cap_max_check(void) +static int alloc_nid_cap_max_check(void) { test_print("\tRunning %s...\n", __func__); memblock_set_bottom_up(false); - alloc_try_nid_top_down_cap_max_check(); + alloc_nid_top_down_cap_max_check(); memblock_set_bottom_up(true); - alloc_try_nid_bottom_up_cap_max_check(); + alloc_nid_bottom_up_cap_max_check(); return 0; } -static int alloc_try_nid_cap_min_check(void) +static int alloc_nid_cap_min_check(void) { test_print("\tRunning %s...\n", __func__); memblock_set_bottom_up(false); - alloc_try_nid_top_down_cap_min_check(); + alloc_nid_top_down_cap_min_check(); memblock_set_bottom_up(true); - alloc_try_nid_bottom_up_cap_min_check(); + alloc_nid_bottom_up_cap_min_check(); return 0; } -static int alloc_try_nid_min_reserved_check(void) +static int alloc_nid_min_reserved_check(void) { test_print("\tRunning %s...\n", __func__); - run_top_down(alloc_try_nid_min_reserved_generic_check); - run_bottom_up(alloc_try_nid_min_reserved_generic_check); + run_top_down(alloc_nid_min_reserved_generic_check); + run_bottom_up(alloc_nid_min_reserved_generic_check); return 0; } -static int alloc_try_nid_max_reserved_check(void) +static int alloc_nid_max_reserved_check(void) { test_print("\tRunning %s...\n", __func__); - run_top_down(alloc_try_nid_max_reserved_generic_check); - run_bottom_up(alloc_try_nid_max_reserved_generic_check); + run_top_down(alloc_nid_max_reserved_generic_check); + run_bottom_up(alloc_nid_max_reserved_generic_check); return 0; } -static int alloc_try_nid_exact_address_check(void) +static int alloc_nid_exact_address_check(void) { test_print("\tRunning %s...\n", __func__); - run_top_down(alloc_try_nid_exact_address_generic_check); - run_bottom_up(alloc_try_nid_exact_address_generic_check); + run_top_down(alloc_nid_exact_address_generic_check); + run_bottom_up(alloc_nid_exact_address_generic_check); return 0; } -static int alloc_try_nid_reserved_full_merge_check(void) +static int alloc_nid_reserved_full_merge_check(void) { test_print("\tRunning %s...\n", __func__); - run_top_down(alloc_try_nid_reserved_full_merge_generic_check); - run_bottom_up(alloc_try_nid_reserved_full_merge_generic_check); + run_top_down(alloc_nid_reserved_full_merge_generic_check); + run_bottom_up(alloc_nid_reserved_full_merge_generic_check); return 0; } -static int alloc_try_nid_reserved_all_check(void) +static int alloc_nid_reserved_all_check(void) { test_print("\tRunning %s...\n", __func__); - run_top_down(alloc_try_nid_reserved_all_generic_check); - run_bottom_up(alloc_try_nid_reserved_all_generic_check); + run_top_down(alloc_nid_reserved_all_generic_check); + run_bottom_up(alloc_nid_reserved_all_generic_check); return 0; } -static int alloc_try_nid_low_max_check(void) +static int alloc_nid_low_max_check(void) { test_print("\tRunning %s...\n", __func__); - run_top_down(alloc_try_nid_low_max_generic_check); - run_bottom_up(alloc_try_nid_low_max_generic_check); + run_top_down(alloc_nid_low_max_generic_check); + run_bottom_up(alloc_nid_low_max_generic_check); return 0; } @@ -1204,22 +1215,22 @@ static int alloc_try_nid_low_max_check(void) static int memblock_alloc_nid_range_checks(void) { test_print("Running %s range tests...\n", - get_memblock_alloc_try_nid_name(alloc_nid_test_flags)); + get_memblock_alloc_nid_name(alloc_nid_test_flags)); - alloc_try_nid_simple_check(); - alloc_try_nid_misaligned_check(); - alloc_try_nid_narrow_range_check(); - alloc_try_nid_reserved_with_space_check(); - alloc_try_nid_reserved_no_space_check(); - alloc_try_nid_cap_max_check(); - alloc_try_nid_cap_min_check(); + alloc_nid_simple_check(); + alloc_nid_misaligned_check(); + alloc_nid_narrow_range_check(); + alloc_nid_reserved_with_space_check(); + alloc_nid_reserved_no_space_check(); + alloc_nid_cap_max_check(); + alloc_nid_cap_min_check(); - alloc_try_nid_min_reserved_check(); - alloc_try_nid_max_reserved_check(); - alloc_try_nid_exact_address_check(); - alloc_try_nid_reserved_full_merge_check(); - alloc_try_nid_reserved_all_check(); - alloc_try_nid_low_max_check(); + alloc_nid_min_reserved_check(); + alloc_nid_max_reserved_check(); + alloc_nid_exact_address_check(); + alloc_nid_reserved_full_merge_check(); + alloc_nid_reserved_all_check(); + alloc_nid_low_max_check(); return 0; } @@ -1229,7 +1240,7 @@ static int memblock_alloc_nid_range_checks(void) * has enough memory to allocate a region of the requested size. * Expect to allocate an aligned region at the end of the requested node. */ -static int alloc_try_nid_top_down_numa_simple_check(void) +static int alloc_nid_top_down_numa_simple_check(void) { int nid_req = 3; struct memblock_region *new_rgn = &memblock.reserved.regions[0]; @@ -1247,8 +1258,8 @@ static int alloc_try_nid_top_down_numa_simple_check(void) min_addr = memblock_start_of_DRAM(); max_addr = memblock_end_of_DRAM(); - allocated_ptr = run_memblock_alloc_try_nid(size, SMP_CACHE_BYTES, - min_addr, max_addr, nid_req); + allocated_ptr = run_memblock_alloc_nid(size, SMP_CACHE_BYTES, + min_addr, max_addr, nid_req); ASSERT_NE(allocated_ptr, NULL); assert_mem_content(allocated_ptr, size, alloc_nid_test_flags); @@ -1280,7 +1291,7 @@ static int alloc_try_nid_top_down_numa_simple_check(void) * Expect to allocate an aligned region at the end of the last node that has * enough memory (in this case, nid = 6) after falling back to NUMA_NO_NODE. */ -static int alloc_try_nid_top_down_numa_small_node_check(void) +static int alloc_nid_top_down_numa_small_node_check(void) { int nid_req = 1; int nid_exp = 6; @@ -1299,8 +1310,8 @@ static int alloc_try_nid_top_down_numa_small_node_check(void) min_addr = memblock_start_of_DRAM(); max_addr = memblock_end_of_DRAM(); - allocated_ptr = run_memblock_alloc_try_nid(size, SMP_CACHE_BYTES, - min_addr, max_addr, nid_req); + allocated_ptr = run_memblock_alloc_nid(size, SMP_CACHE_BYTES, + min_addr, max_addr, nid_req); ASSERT_NE(allocated_ptr, NULL); assert_mem_content(allocated_ptr, size, alloc_nid_test_flags); @@ -1333,7 +1344,7 @@ static int alloc_try_nid_top_down_numa_small_node_check(void) * large enough and has enough unreserved memory (in this case, nid = 6) after * falling back to NUMA_NO_NODE. The region count and total size get updated. */ -static int alloc_try_nid_top_down_numa_node_reserved_check(void) +static int alloc_nid_top_down_numa_node_reserved_check(void) { int nid_req = 2; int nid_exp = 6; @@ -1353,8 +1364,8 @@ static int alloc_try_nid_top_down_numa_node_reserved_check(void) max_addr = memblock_end_of_DRAM(); memblock_reserve(req_node->base, req_node->size); - allocated_ptr = run_memblock_alloc_try_nid(size, SMP_CACHE_BYTES, - min_addr, max_addr, nid_req); + allocated_ptr = run_memblock_alloc_nid(size, SMP_CACHE_BYTES, + min_addr, max_addr, nid_req); ASSERT_NE(allocated_ptr, NULL); assert_mem_content(allocated_ptr, size, alloc_nid_test_flags); @@ -1386,7 +1397,7 @@ static int alloc_try_nid_top_down_numa_node_reserved_check(void) * Expect to allocate an aligned region at the end of the requested node. The * region count and total size get updated. */ -static int alloc_try_nid_top_down_numa_part_reserved_check(void) +static int alloc_nid_top_down_numa_part_reserved_check(void) { int nid_req = 4; struct memblock_region *new_rgn = &memblock.reserved.regions[1]; @@ -1408,8 +1419,8 @@ static int alloc_try_nid_top_down_numa_part_reserved_check(void) max_addr = memblock_end_of_DRAM(); memblock_reserve(r1.base, r1.size); - allocated_ptr = run_memblock_alloc_try_nid(size, SMP_CACHE_BYTES, - min_addr, max_addr, nid_req); + allocated_ptr = run_memblock_alloc_nid(size, SMP_CACHE_BYTES, + min_addr, max_addr, nid_req); ASSERT_NE(allocated_ptr, NULL); assert_mem_content(allocated_ptr, size, alloc_nid_test_flags); @@ -1444,7 +1455,7 @@ static int alloc_try_nid_top_down_numa_part_reserved_check(void) * nid = NUMA_NODES - 1) after falling back to NUMA_NO_NODE. The region count * and total size get updated. */ -static int alloc_try_nid_top_down_numa_part_reserved_fallback_check(void) +static int alloc_nid_top_down_numa_part_reserved_fallback_check(void) { int nid_req = 4; int nid_exp = NUMA_NODES - 1; @@ -1469,8 +1480,8 @@ static int alloc_try_nid_top_down_numa_part_reserved_fallback_check(void) max_addr = memblock_end_of_DRAM(); memblock_reserve(r1.base, r1.size); - allocated_ptr = run_memblock_alloc_try_nid(size, SMP_CACHE_BYTES, - min_addr, max_addr, nid_req); + allocated_ptr = run_memblock_alloc_nid(size, SMP_CACHE_BYTES, + min_addr, max_addr, nid_req); ASSERT_NE(allocated_ptr, NULL); assert_mem_content(allocated_ptr, size, alloc_nid_test_flags); @@ -1507,7 +1518,7 @@ static int alloc_try_nid_top_down_numa_part_reserved_fallback_check(void) * Expect to drop the lower limit and allocate a memory region that ends at * the end of the requested node. */ -static int alloc_try_nid_top_down_numa_split_range_low_check(void) +static int alloc_nid_top_down_numa_split_range_low_check(void) { int nid_req = 2; struct memblock_region *new_rgn = &memblock.reserved.regions[0]; @@ -1525,8 +1536,8 @@ static int alloc_try_nid_top_down_numa_split_range_low_check(void) min_addr = req_node_end - SZ_256; max_addr = min_addr + size; - allocated_ptr = run_memblock_alloc_try_nid(size, SMP_CACHE_BYTES, - min_addr, max_addr, nid_req); + allocated_ptr = run_memblock_alloc_nid(size, SMP_CACHE_BYTES, + min_addr, max_addr, nid_req); ASSERT_NE(allocated_ptr, NULL); assert_mem_content(allocated_ptr, size, alloc_nid_test_flags); @@ -1563,7 +1574,7 @@ static int alloc_try_nid_top_down_numa_split_range_low_check(void) * Expect to drop the lower limit and allocate a memory region that * ends at the end of the first node that overlaps with the range. */ -static int alloc_try_nid_top_down_numa_split_range_high_check(void) +static int alloc_nid_top_down_numa_split_range_high_check(void) { int nid_req = 3; int nid_exp = nid_req - 1; @@ -1582,8 +1593,8 @@ static int alloc_try_nid_top_down_numa_split_range_high_check(void) min_addr = exp_node_end - SZ_256; max_addr = min_addr + size; - allocated_ptr = run_memblock_alloc_try_nid(size, SMP_CACHE_BYTES, - min_addr, max_addr, nid_req); + allocated_ptr = run_memblock_alloc_nid(size, SMP_CACHE_BYTES, + min_addr, max_addr, nid_req); ASSERT_NE(allocated_ptr, NULL); assert_mem_content(allocated_ptr, size, alloc_nid_test_flags); @@ -1620,7 +1631,7 @@ static int alloc_try_nid_top_down_numa_split_range_high_check(void) * Expect to drop the lower limit and allocate a memory region that ends at * the end of the requested node. */ -static int alloc_try_nid_top_down_numa_no_overlap_split_check(void) +static int alloc_nid_top_down_numa_no_overlap_split_check(void) { int nid_req = 2; struct memblock_region *new_rgn = &memblock.reserved.regions[0]; @@ -1638,8 +1649,8 @@ static int alloc_try_nid_top_down_numa_no_overlap_split_check(void) min_addr = node2->base - SZ_256; max_addr = min_addr + size; - allocated_ptr = run_memblock_alloc_try_nid(size, SMP_CACHE_BYTES, - min_addr, max_addr, nid_req); + allocated_ptr = run_memblock_alloc_nid(size, SMP_CACHE_BYTES, + min_addr, max_addr, nid_req); ASSERT_NE(allocated_ptr, NULL); assert_mem_content(allocated_ptr, size, alloc_nid_test_flags); @@ -1677,7 +1688,7 @@ static int alloc_try_nid_top_down_numa_no_overlap_split_check(void) * Expect to allocate a memory region at the end of the final node in * the range after falling back to NUMA_NO_NODE. */ -static int alloc_try_nid_top_down_numa_no_overlap_low_check(void) +static int alloc_nid_top_down_numa_no_overlap_low_check(void) { int nid_req = 0; struct memblock_region *new_rgn = &memblock.reserved.regions[0]; @@ -1694,8 +1705,8 @@ static int alloc_try_nid_top_down_numa_no_overlap_low_check(void) min_addr = min_node->base; max_addr = region_end(max_node); - allocated_ptr = run_memblock_alloc_try_nid(size, SMP_CACHE_BYTES, - min_addr, max_addr, nid_req); + allocated_ptr = run_memblock_alloc_nid(size, SMP_CACHE_BYTES, + min_addr, max_addr, nid_req); ASSERT_NE(allocated_ptr, NULL); assert_mem_content(allocated_ptr, size, alloc_nid_test_flags); @@ -1733,7 +1744,7 @@ static int alloc_try_nid_top_down_numa_no_overlap_low_check(void) * Expect to allocate a memory region at the end of the final node in * the range after falling back to NUMA_NO_NODE. */ -static int alloc_try_nid_top_down_numa_no_overlap_high_check(void) +static int alloc_nid_top_down_numa_no_overlap_high_check(void) { int nid_req = 7; struct memblock_region *new_rgn = &memblock.reserved.regions[0]; @@ -1750,8 +1761,8 @@ static int alloc_try_nid_top_down_numa_no_overlap_high_check(void) min_addr = min_node->base; max_addr = region_end(max_node); - allocated_ptr = run_memblock_alloc_try_nid(size, SMP_CACHE_BYTES, - min_addr, max_addr, nid_req); + allocated_ptr = run_memblock_alloc_nid(size, SMP_CACHE_BYTES, + min_addr, max_addr, nid_req); ASSERT_NE(allocated_ptr, NULL); assert_mem_content(allocated_ptr, size, alloc_nid_test_flags); @@ -1773,7 +1784,7 @@ static int alloc_try_nid_top_down_numa_no_overlap_high_check(void) * has enough memory to allocate a region of the requested size. * Expect to allocate an aligned region at the beginning of the requested node. */ -static int alloc_try_nid_bottom_up_numa_simple_check(void) +static int alloc_nid_bottom_up_numa_simple_check(void) { int nid_req = 3; struct memblock_region *new_rgn = &memblock.reserved.regions[0]; @@ -1791,8 +1802,8 @@ static int alloc_try_nid_bottom_up_numa_simple_check(void) min_addr = memblock_start_of_DRAM(); max_addr = memblock_end_of_DRAM(); - allocated_ptr = run_memblock_alloc_try_nid(size, SMP_CACHE_BYTES, - min_addr, max_addr, nid_req); + allocated_ptr = run_memblock_alloc_nid(size, SMP_CACHE_BYTES, + min_addr, max_addr, nid_req); ASSERT_NE(allocated_ptr, NULL); assert_mem_content(allocated_ptr, size, alloc_nid_test_flags); @@ -1824,7 +1835,7 @@ static int alloc_try_nid_bottom_up_numa_simple_check(void) * Expect to allocate an aligned region at the beginning of the first node that * has enough memory (in this case, nid = 0) after falling back to NUMA_NO_NODE. */ -static int alloc_try_nid_bottom_up_numa_small_node_check(void) +static int alloc_nid_bottom_up_numa_small_node_check(void) { int nid_req = 1; int nid_exp = 0; @@ -1843,8 +1854,8 @@ static int alloc_try_nid_bottom_up_numa_small_node_check(void) min_addr = memblock_start_of_DRAM(); max_addr = memblock_end_of_DRAM(); - allocated_ptr = run_memblock_alloc_try_nid(size, SMP_CACHE_BYTES, - min_addr, max_addr, nid_req); + allocated_ptr = run_memblock_alloc_nid(size, SMP_CACHE_BYTES, + min_addr, max_addr, nid_req); ASSERT_NE(allocated_ptr, NULL); assert_mem_content(allocated_ptr, size, alloc_nid_test_flags); @@ -1878,7 +1889,7 @@ static int alloc_try_nid_bottom_up_numa_small_node_check(void) * after falling back to NUMA_NO_NODE. The region count and total size get * updated. */ -static int alloc_try_nid_bottom_up_numa_node_reserved_check(void) +static int alloc_nid_bottom_up_numa_node_reserved_check(void) { int nid_req = 2; int nid_exp = 0; @@ -1898,8 +1909,8 @@ static int alloc_try_nid_bottom_up_numa_node_reserved_check(void) max_addr = memblock_end_of_DRAM(); memblock_reserve(req_node->base, req_node->size); - allocated_ptr = run_memblock_alloc_try_nid(size, SMP_CACHE_BYTES, - min_addr, max_addr, nid_req); + allocated_ptr = run_memblock_alloc_nid(size, SMP_CACHE_BYTES, + min_addr, max_addr, nid_req); ASSERT_NE(allocated_ptr, NULL); assert_mem_content(allocated_ptr, size, alloc_nid_test_flags); @@ -1931,7 +1942,7 @@ static int alloc_try_nid_bottom_up_numa_node_reserved_check(void) * Expect to allocate an aligned region in the requested node that merges with * the existing reserved region. The total size gets updated. */ -static int alloc_try_nid_bottom_up_numa_part_reserved_check(void) +static int alloc_nid_bottom_up_numa_part_reserved_check(void) { int nid_req = 4; struct memblock_region *new_rgn = &memblock.reserved.regions[0]; @@ -1955,8 +1966,8 @@ static int alloc_try_nid_bottom_up_numa_part_reserved_check(void) total_size = size + r1.size; memblock_reserve(r1.base, r1.size); - allocated_ptr = run_memblock_alloc_try_nid(size, SMP_CACHE_BYTES, - min_addr, max_addr, nid_req); + allocated_ptr = run_memblock_alloc_nid(size, SMP_CACHE_BYTES, + min_addr, max_addr, nid_req); ASSERT_NE(allocated_ptr, NULL); assert_mem_content(allocated_ptr, size, alloc_nid_test_flags); @@ -1991,7 +2002,7 @@ static int alloc_try_nid_bottom_up_numa_part_reserved_check(void) * nid = 0) after falling back to NUMA_NO_NODE. The region count and total size * get updated. */ -static int alloc_try_nid_bottom_up_numa_part_reserved_fallback_check(void) +static int alloc_nid_bottom_up_numa_part_reserved_fallback_check(void) { int nid_req = 4; int nid_exp = 0; @@ -2016,8 +2027,8 @@ static int alloc_try_nid_bottom_up_numa_part_reserved_fallback_check(void) max_addr = memblock_end_of_DRAM(); memblock_reserve(r1.base, r1.size); - allocated_ptr = run_memblock_alloc_try_nid(size, SMP_CACHE_BYTES, - min_addr, max_addr, nid_req); + allocated_ptr = run_memblock_alloc_nid(size, SMP_CACHE_BYTES, + min_addr, max_addr, nid_req); ASSERT_NE(allocated_ptr, NULL); assert_mem_content(allocated_ptr, size, alloc_nid_test_flags); @@ -2054,7 +2065,7 @@ static int alloc_try_nid_bottom_up_numa_part_reserved_fallback_check(void) * Expect to drop the lower limit and allocate a memory region at the beginning * of the requested node. */ -static int alloc_try_nid_bottom_up_numa_split_range_low_check(void) +static int alloc_nid_bottom_up_numa_split_range_low_check(void) { int nid_req = 2; struct memblock_region *new_rgn = &memblock.reserved.regions[0]; @@ -2072,8 +2083,8 @@ static int alloc_try_nid_bottom_up_numa_split_range_low_check(void) min_addr = req_node_end - SZ_256; max_addr = min_addr + size; - allocated_ptr = run_memblock_alloc_try_nid(size, SMP_CACHE_BYTES, - min_addr, max_addr, nid_req); + allocated_ptr = run_memblock_alloc_nid(size, SMP_CACHE_BYTES, + min_addr, max_addr, nid_req); ASSERT_NE(allocated_ptr, NULL); assert_mem_content(allocated_ptr, size, alloc_nid_test_flags); @@ -2110,7 +2121,7 @@ static int alloc_try_nid_bottom_up_numa_split_range_low_check(void) * Expect to drop the lower limit and allocate a memory region at the beginning * of the first node that has enough memory. */ -static int alloc_try_nid_bottom_up_numa_split_range_high_check(void) +static int alloc_nid_bottom_up_numa_split_range_high_check(void) { int nid_req = 3; int nid_exp = 0; @@ -2130,8 +2141,8 @@ static int alloc_try_nid_bottom_up_numa_split_range_high_check(void) min_addr = req_node->base - SZ_256; max_addr = min_addr + size; - allocated_ptr = run_memblock_alloc_try_nid(size, SMP_CACHE_BYTES, - min_addr, max_addr, nid_req); + allocated_ptr = run_memblock_alloc_nid(size, SMP_CACHE_BYTES, + min_addr, max_addr, nid_req); ASSERT_NE(allocated_ptr, NULL); assert_mem_content(allocated_ptr, size, alloc_nid_test_flags); @@ -2168,7 +2179,7 @@ static int alloc_try_nid_bottom_up_numa_split_range_high_check(void) * Expect to drop the lower limit and allocate a memory region that starts at * the beginning of the requested node. */ -static int alloc_try_nid_bottom_up_numa_no_overlap_split_check(void) +static int alloc_nid_bottom_up_numa_no_overlap_split_check(void) { int nid_req = 2; struct memblock_region *new_rgn = &memblock.reserved.regions[0]; @@ -2186,8 +2197,8 @@ static int alloc_try_nid_bottom_up_numa_no_overlap_split_check(void) min_addr = node2->base - SZ_256; max_addr = min_addr + size; - allocated_ptr = run_memblock_alloc_try_nid(size, SMP_CACHE_BYTES, - min_addr, max_addr, nid_req); + allocated_ptr = run_memblock_alloc_nid(size, SMP_CACHE_BYTES, + min_addr, max_addr, nid_req); ASSERT_NE(allocated_ptr, NULL); assert_mem_content(allocated_ptr, size, alloc_nid_test_flags); @@ -2225,7 +2236,7 @@ static int alloc_try_nid_bottom_up_numa_no_overlap_split_check(void) * Expect to allocate a memory region at the beginning of the first node * in the range after falling back to NUMA_NO_NODE. */ -static int alloc_try_nid_bottom_up_numa_no_overlap_low_check(void) +static int alloc_nid_bottom_up_numa_no_overlap_low_check(void) { int nid_req = 0; struct memblock_region *new_rgn = &memblock.reserved.regions[0]; @@ -2242,8 +2253,8 @@ static int alloc_try_nid_bottom_up_numa_no_overlap_low_check(void) min_addr = min_node->base; max_addr = region_end(max_node); - allocated_ptr = run_memblock_alloc_try_nid(size, SMP_CACHE_BYTES, - min_addr, max_addr, nid_req); + allocated_ptr = run_memblock_alloc_nid(size, SMP_CACHE_BYTES, + min_addr, max_addr, nid_req); ASSERT_NE(allocated_ptr, NULL); assert_mem_content(allocated_ptr, size, alloc_nid_test_flags); @@ -2281,7 +2292,7 @@ static int alloc_try_nid_bottom_up_numa_no_overlap_low_check(void) * Expect to allocate a memory region at the beginning of the first node * in the range after falling back to NUMA_NO_NODE. */ -static int alloc_try_nid_bottom_up_numa_no_overlap_high_check(void) +static int alloc_nid_bottom_up_numa_no_overlap_high_check(void) { int nid_req = 7; struct memblock_region *new_rgn = &memblock.reserved.regions[0]; @@ -2298,8 +2309,8 @@ static int alloc_try_nid_bottom_up_numa_no_overlap_high_check(void) min_addr = min_node->base; max_addr = region_end(max_node); - allocated_ptr = run_memblock_alloc_try_nid(size, SMP_CACHE_BYTES, - min_addr, max_addr, nid_req); + allocated_ptr = run_memblock_alloc_nid(size, SMP_CACHE_BYTES, + min_addr, max_addr, nid_req); ASSERT_NE(allocated_ptr, NULL); assert_mem_content(allocated_ptr, size, alloc_nid_test_flags); @@ -2330,7 +2341,7 @@ static int alloc_try_nid_bottom_up_numa_no_overlap_high_check(void) * * Expect no allocation to happen. */ -static int alloc_try_nid_numa_large_region_generic_check(void) +static int alloc_nid_numa_large_region_generic_check(void) { int nid_req = 3; void *allocated_ptr = NULL; @@ -2344,8 +2355,8 @@ static int alloc_try_nid_numa_large_region_generic_check(void) min_addr = memblock_start_of_DRAM(); max_addr = memblock_end_of_DRAM(); - allocated_ptr = run_memblock_alloc_try_nid(size, SMP_CACHE_BYTES, - min_addr, max_addr, nid_req); + allocated_ptr = run_memblock_alloc_nid(size, SMP_CACHE_BYTES, + min_addr, max_addr, nid_req); ASSERT_EQ(allocated_ptr, NULL); test_pass_pop(); @@ -2374,7 +2385,7 @@ static int alloc_try_nid_numa_large_region_generic_check(void) * Expect to merge all of the regions into one. The region counter and total * size fields get updated. */ -static int alloc_try_nid_numa_reserved_full_merge_generic_check(void) +static int alloc_nid_numa_reserved_full_merge_generic_check(void) { int nid_req = 6; int nid_next = nid_req + 1; @@ -2404,8 +2415,8 @@ static int alloc_try_nid_numa_reserved_full_merge_generic_check(void) memblock_reserve(r1.base, r1.size); memblock_reserve(r2.base, r2.size); - allocated_ptr = run_memblock_alloc_try_nid(size, SMP_CACHE_BYTES, - min_addr, max_addr, nid_req); + allocated_ptr = run_memblock_alloc_nid(size, SMP_CACHE_BYTES, + min_addr, max_addr, nid_req); ASSERT_NE(allocated_ptr, NULL); assert_mem_content(allocated_ptr, size, alloc_nid_test_flags); @@ -2448,7 +2459,7 @@ static int alloc_try_nid_numa_reserved_full_merge_generic_check(void) * * Expect no allocation to happen. */ -static int alloc_try_nid_numa_split_all_reserved_generic_check(void) +static int alloc_nid_numa_split_all_reserved_generic_check(void) { void *allocated_ptr = NULL; struct memblock_region *next_node = &memblock.memory.regions[7]; @@ -2472,9 +2483,9 @@ static int alloc_try_nid_numa_split_all_reserved_generic_check(void) memblock_reserve(r1.base, r1.size); memblock_reserve(r2.base, r2.size); - allocated_ptr = run_memblock_alloc_try_nid(size, SMP_CACHE_BYTES, - min_addr, max_addr, - NUMA_NO_NODE); + allocated_ptr = run_memblock_alloc_nid(size, SMP_CACHE_BYTES, + min_addr, max_addr, + NUMA_NO_NODE); ASSERT_EQ(allocated_ptr, NULL); @@ -2484,139 +2495,139 @@ static int alloc_try_nid_numa_split_all_reserved_generic_check(void) } /* Test case wrappers for NUMA tests */ -static int alloc_try_nid_numa_simple_check(void) +static int alloc_nid_numa_simple_check(void) { test_print("\tRunning %s...\n", __func__); memblock_set_bottom_up(false); - alloc_try_nid_top_down_numa_simple_check(); + alloc_nid_top_down_numa_simple_check(); memblock_set_bottom_up(true); - alloc_try_nid_bottom_up_numa_simple_check(); + alloc_nid_bottom_up_numa_simple_check(); return 0; } -static int alloc_try_nid_numa_small_node_check(void) +static int alloc_nid_numa_small_node_check(void) { test_print("\tRunning %s...\n", __func__); memblock_set_bottom_up(false); - alloc_try_nid_top_down_numa_small_node_check(); + alloc_nid_top_down_numa_small_node_check(); memblock_set_bottom_up(true); - alloc_try_nid_bottom_up_numa_small_node_check(); + alloc_nid_bottom_up_numa_small_node_check(); return 0; } -static int alloc_try_nid_numa_node_reserved_check(void) +static int alloc_nid_numa_node_reserved_check(void) { test_print("\tRunning %s...\n", __func__); memblock_set_bottom_up(false); - alloc_try_nid_top_down_numa_node_reserved_check(); + alloc_nid_top_down_numa_node_reserved_check(); memblock_set_bottom_up(true); - alloc_try_nid_bottom_up_numa_node_reserved_check(); + alloc_nid_bottom_up_numa_node_reserved_check(); return 0; } -static int alloc_try_nid_numa_part_reserved_check(void) +static int alloc_nid_numa_part_reserved_check(void) { test_print("\tRunning %s...\n", __func__); memblock_set_bottom_up(false); - alloc_try_nid_top_down_numa_part_reserved_check(); + alloc_nid_top_down_numa_part_reserved_check(); memblock_set_bottom_up(true); - alloc_try_nid_bottom_up_numa_part_reserved_check(); + alloc_nid_bottom_up_numa_part_reserved_check(); return 0; } -static int alloc_try_nid_numa_part_reserved_fallback_check(void) +static int alloc_nid_numa_part_reserved_fallback_check(void) { test_print("\tRunning %s...\n", __func__); memblock_set_bottom_up(false); - alloc_try_nid_top_down_numa_part_reserved_fallback_check(); + alloc_nid_top_down_numa_part_reserved_fallback_check(); memblock_set_bottom_up(true); - alloc_try_nid_bottom_up_numa_part_reserved_fallback_check(); + alloc_nid_bottom_up_numa_part_reserved_fallback_check(); return 0; } -static int alloc_try_nid_numa_split_range_low_check(void) +static int alloc_nid_numa_split_range_low_check(void) { test_print("\tRunning %s...\n", __func__); memblock_set_bottom_up(false); - alloc_try_nid_top_down_numa_split_range_low_check(); + alloc_nid_top_down_numa_split_range_low_check(); memblock_set_bottom_up(true); - alloc_try_nid_bottom_up_numa_split_range_low_check(); + alloc_nid_bottom_up_numa_split_range_low_check(); return 0; } -static int alloc_try_nid_numa_split_range_high_check(void) +static int alloc_nid_numa_split_range_high_check(void) { test_print("\tRunning %s...\n", __func__); memblock_set_bottom_up(false); - alloc_try_nid_top_down_numa_split_range_high_check(); + alloc_nid_top_down_numa_split_range_high_check(); memblock_set_bottom_up(true); - alloc_try_nid_bottom_up_numa_split_range_high_check(); + alloc_nid_bottom_up_numa_split_range_high_check(); return 0; } -static int alloc_try_nid_numa_no_overlap_split_check(void) +static int alloc_nid_numa_no_overlap_split_check(void) { test_print("\tRunning %s...\n", __func__); memblock_set_bottom_up(false); - alloc_try_nid_top_down_numa_no_overlap_split_check(); + alloc_nid_top_down_numa_no_overlap_split_check(); memblock_set_bottom_up(true); - alloc_try_nid_bottom_up_numa_no_overlap_split_check(); + alloc_nid_bottom_up_numa_no_overlap_split_check(); return 0; } -static int alloc_try_nid_numa_no_overlap_low_check(void) +static int alloc_nid_numa_no_overlap_low_check(void) { test_print("\tRunning %s...\n", __func__); memblock_set_bottom_up(false); - alloc_try_nid_top_down_numa_no_overlap_low_check(); + alloc_nid_top_down_numa_no_overlap_low_check(); memblock_set_bottom_up(true); - alloc_try_nid_bottom_up_numa_no_overlap_low_check(); + alloc_nid_bottom_up_numa_no_overlap_low_check(); return 0; } -static int alloc_try_nid_numa_no_overlap_high_check(void) +static int alloc_nid_numa_no_overlap_high_check(void) { test_print("\tRunning %s...\n", __func__); memblock_set_bottom_up(false); - alloc_try_nid_top_down_numa_no_overlap_high_check(); + alloc_nid_top_down_numa_no_overlap_high_check(); memblock_set_bottom_up(true); - alloc_try_nid_bottom_up_numa_no_overlap_high_check(); + alloc_nid_bottom_up_numa_no_overlap_high_check(); return 0; } -static int alloc_try_nid_numa_large_region_check(void) +static int alloc_nid_numa_large_region_check(void) { test_print("\tRunning %s...\n", __func__); - run_top_down(alloc_try_nid_numa_large_region_generic_check); - run_bottom_up(alloc_try_nid_numa_large_region_generic_check); + run_top_down(alloc_nid_numa_large_region_generic_check); + run_bottom_up(alloc_nid_numa_large_region_generic_check); return 0; } -static int alloc_try_nid_numa_reserved_full_merge_check(void) +static int alloc_nid_numa_reserved_full_merge_check(void) { test_print("\tRunning %s...\n", __func__); - run_top_down(alloc_try_nid_numa_reserved_full_merge_generic_check); - run_bottom_up(alloc_try_nid_numa_reserved_full_merge_generic_check); + run_top_down(alloc_nid_numa_reserved_full_merge_generic_check); + run_bottom_up(alloc_nid_numa_reserved_full_merge_generic_check); return 0; } -static int alloc_try_nid_numa_split_all_reserved_check(void) +static int alloc_nid_numa_split_all_reserved_check(void) { test_print("\tRunning %s...\n", __func__); - run_top_down(alloc_try_nid_numa_split_all_reserved_generic_check); - run_bottom_up(alloc_try_nid_numa_split_all_reserved_generic_check); + run_top_down(alloc_nid_numa_split_all_reserved_generic_check); + run_bottom_up(alloc_nid_numa_split_all_reserved_generic_check); return 0; } @@ -2624,22 +2635,22 @@ static int alloc_try_nid_numa_split_all_reserved_check(void) int __memblock_alloc_nid_numa_checks(void) { test_print("Running %s NUMA tests...\n", - get_memblock_alloc_try_nid_name(alloc_nid_test_flags)); + get_memblock_alloc_nid_name(alloc_nid_test_flags)); - alloc_try_nid_numa_simple_check(); - alloc_try_nid_numa_small_node_check(); - alloc_try_nid_numa_node_reserved_check(); - alloc_try_nid_numa_part_reserved_check(); - alloc_try_nid_numa_part_reserved_fallback_check(); - alloc_try_nid_numa_split_range_low_check(); - alloc_try_nid_numa_split_range_high_check(); + alloc_nid_numa_simple_check(); + alloc_nid_numa_small_node_check(); + alloc_nid_numa_node_reserved_check(); + alloc_nid_numa_part_reserved_check(); + alloc_nid_numa_part_reserved_fallback_check(); + alloc_nid_numa_split_range_low_check(); + alloc_nid_numa_split_range_high_check(); - alloc_try_nid_numa_no_overlap_split_check(); - alloc_try_nid_numa_no_overlap_low_check(); - alloc_try_nid_numa_no_overlap_high_check(); - alloc_try_nid_numa_large_region_check(); - alloc_try_nid_numa_reserved_full_merge_check(); - alloc_try_nid_numa_split_all_reserved_check(); + alloc_nid_numa_no_overlap_split_check(); + alloc_nid_numa_no_overlap_low_check(); + alloc_nid_numa_no_overlap_high_check(); + alloc_nid_numa_large_region_check(); + alloc_nid_numa_reserved_full_merge_check(); + alloc_nid_numa_split_all_reserved_check(); return 0; } @@ -2649,7 +2660,7 @@ static int memblock_alloc_nid_checks_internal(int flags) alloc_nid_test_flags = flags; prefix_reset(); - prefix_push(get_memblock_alloc_try_nid_name(flags)); + prefix_push(get_memblock_alloc_nid_name(flags)); reset_memblock_attributes(); dummy_physical_memory_init(); @@ -2671,3 +2682,12 @@ int memblock_alloc_nid_checks(void) return 0; } + +int memblock_alloc_exact_nid_range_checks(void) +{ + alloc_nid_test_flags = (TEST_F_RAW | TEST_F_EXACT); + + memblock_alloc_nid_range_checks(); + + return 0; +} diff --git a/tools/testing/memblock/tests/alloc_nid_api.h b/tools/testing/memblock/tests/alloc_nid_api.h index 92d07d230e18..2b8cabacacb8 100644 --- a/tools/testing/memblock/tests/alloc_nid_api.h +++ b/tools/testing/memblock/tests/alloc_nid_api.h @@ -5,6 +5,7 @@ #include "common.h" int memblock_alloc_nid_checks(void); +int memblock_alloc_exact_nid_range_checks(void); int __memblock_alloc_nid_numa_checks(void); #ifdef CONFIG_NUMA diff --git a/tools/testing/memblock/tests/basic_api.c b/tools/testing/memblock/tests/basic_api.c index a13a57ba0815..411647094cc3 100644 --- a/tools/testing/memblock/tests/basic_api.c +++ b/tools/testing/memblock/tests/basic_api.c @@ -423,6 +423,98 @@ static int memblock_add_near_max_check(void) return 0; } +/* + * A test that trying to add the 129th memory block. + * Expect to trigger memblock_double_array() to double the + * memblock.memory.max, find a new valid memory as + * memory.regions. + */ +static int memblock_add_many_check(void) +{ + int i; + void *orig_region; + struct region r = { + .base = SZ_16K, + .size = SZ_16K, + }; + phys_addr_t new_memory_regions_size; + phys_addr_t base, size = SZ_64; + phys_addr_t gap_size = SZ_64; + + PREFIX_PUSH(); + + reset_memblock_regions(); + memblock_allow_resize(); + + dummy_physical_memory_init(); + /* + * We allocated enough memory by using dummy_physical_memory_init(), and + * split it into small block. First we split a large enough memory block + * as the memory region which will be choosed by memblock_double_array(). + */ + base = PAGE_ALIGN(dummy_physical_memory_base()); + new_memory_regions_size = PAGE_ALIGN(INIT_MEMBLOCK_REGIONS * 2 * + sizeof(struct memblock_region)); + memblock_add(base, new_memory_regions_size); + + /* This is the base of small memory block. */ + base += new_memory_regions_size + gap_size; + + orig_region = memblock.memory.regions; + + for (i = 0; i < INIT_MEMBLOCK_REGIONS; i++) { + /* + * Add these small block to fulfill the memblock. We keep a + * gap between the nearby memory to avoid being merged. + */ + memblock_add(base, size); + base += size + gap_size; + + ASSERT_EQ(memblock.memory.cnt, i + 2); + ASSERT_EQ(memblock.memory.total_size, new_memory_regions_size + + (i + 1) * size); + } + + /* + * At there, memblock_double_array() has been succeed, check if it + * update the memory.max. + */ + ASSERT_EQ(memblock.memory.max, INIT_MEMBLOCK_REGIONS * 2); + + /* memblock_double_array() will reserve the memory it used. Check it. */ + ASSERT_EQ(memblock.reserved.cnt, 1); + ASSERT_EQ(memblock.reserved.total_size, new_memory_regions_size); + + /* + * Now memblock_double_array() works fine. Let's check after the + * double_array(), the memblock_add() still works as normal. + */ + memblock_add(r.base, r.size); + ASSERT_EQ(memblock.memory.regions[0].base, r.base); + ASSERT_EQ(memblock.memory.regions[0].size, r.size); + + ASSERT_EQ(memblock.memory.cnt, INIT_MEMBLOCK_REGIONS + 2); + ASSERT_EQ(memblock.memory.total_size, INIT_MEMBLOCK_REGIONS * size + + new_memory_regions_size + + r.size); + ASSERT_EQ(memblock.memory.max, INIT_MEMBLOCK_REGIONS * 2); + + dummy_physical_memory_cleanup(); + + /* + * The current memory.regions is occupying a range of memory that + * allocated from dummy_physical_memory_init(). After free the memory, + * we must not use it. So restore the origin memory region to make sure + * the tests can run as normal and not affected by the double array. + */ + memblock.memory.regions = orig_region; + memblock.memory.cnt = INIT_MEMBLOCK_REGIONS; + + test_pass_pop(); + + return 0; +} + static int memblock_add_checks(void) { prefix_reset(); @@ -438,6 +530,7 @@ static int memblock_add_checks(void) memblock_add_twice_check(); memblock_add_between_check(); memblock_add_near_max_check(); + memblock_add_many_check(); prefix_pop(); @@ -799,6 +892,96 @@ static int memblock_reserve_near_max_check(void) return 0; } +/* + * A test that trying to reserve the 129th memory block. + * Expect to trigger memblock_double_array() to double the + * memblock.memory.max, find a new valid memory as + * reserved.regions. + */ +static int memblock_reserve_many_check(void) +{ + int i; + void *orig_region; + struct region r = { + .base = SZ_16K, + .size = SZ_16K, + }; + phys_addr_t memory_base = SZ_128K; + phys_addr_t new_reserved_regions_size; + + PREFIX_PUSH(); + + reset_memblock_regions(); + memblock_allow_resize(); + + /* Add a valid memory region used by double_array(). */ + dummy_physical_memory_init(); + memblock_add(dummy_physical_memory_base(), MEM_SIZE); + + for (i = 0; i < INIT_MEMBLOCK_REGIONS; i++) { + /* Reserve some fakes memory region to fulfill the memblock. */ + memblock_reserve(memory_base, MEM_SIZE); + + ASSERT_EQ(memblock.reserved.cnt, i + 1); + ASSERT_EQ(memblock.reserved.total_size, (i + 1) * MEM_SIZE); + + /* Keep the gap so these memory region will not be merged. */ + memory_base += MEM_SIZE * 2; + } + + orig_region = memblock.reserved.regions; + + /* This reserve the 129 memory_region, and makes it double array. */ + memblock_reserve(memory_base, MEM_SIZE); + + /* + * This is the memory region size used by the doubled reserved.regions, + * and it has been reserved due to it has been used. The size is used to + * calculate the total_size that the memblock.reserved have now. + */ + new_reserved_regions_size = PAGE_ALIGN((INIT_MEMBLOCK_REGIONS * 2) * + sizeof(struct memblock_region)); + /* + * The double_array() will find a free memory region as the new + * reserved.regions, and the used memory region will be reserved, so + * there will be one more region exist in the reserved memblock. And the + * one more reserved region's size is new_reserved_regions_size. + */ + ASSERT_EQ(memblock.reserved.cnt, INIT_MEMBLOCK_REGIONS + 2); + ASSERT_EQ(memblock.reserved.total_size, (INIT_MEMBLOCK_REGIONS + 1) * MEM_SIZE + + new_reserved_regions_size); + ASSERT_EQ(memblock.reserved.max, INIT_MEMBLOCK_REGIONS * 2); + + /* + * Now memblock_double_array() works fine. Let's check after the + * double_array(), the memblock_reserve() still works as normal. + */ + memblock_reserve(r.base, r.size); + ASSERT_EQ(memblock.reserved.regions[0].base, r.base); + ASSERT_EQ(memblock.reserved.regions[0].size, r.size); + + ASSERT_EQ(memblock.reserved.cnt, INIT_MEMBLOCK_REGIONS + 3); + ASSERT_EQ(memblock.reserved.total_size, (INIT_MEMBLOCK_REGIONS + 1) * MEM_SIZE + + new_reserved_regions_size + + r.size); + ASSERT_EQ(memblock.reserved.max, INIT_MEMBLOCK_REGIONS * 2); + + dummy_physical_memory_cleanup(); + + /* + * The current reserved.regions is occupying a range of memory that + * allocated from dummy_physical_memory_init(). After free the memory, + * we must not use it. So restore the origin memory region to make sure + * the tests can run as normal and not affected by the double array. + */ + memblock.reserved.regions = orig_region; + memblock.reserved.cnt = INIT_MEMBLOCK_RESERVED_REGIONS; + + test_pass_pop(); + + return 0; +} + static int memblock_reserve_checks(void) { prefix_reset(); @@ -813,6 +996,7 @@ static int memblock_reserve_checks(void) memblock_reserve_twice_check(); memblock_reserve_between_check(); memblock_reserve_near_max_check(); + memblock_reserve_many_check(); prefix_pop(); diff --git a/tools/testing/memblock/tests/common.c b/tools/testing/memblock/tests/common.c index 3f795047bbe1..f43b6f414983 100644 --- a/tools/testing/memblock/tests/common.c +++ b/tools/testing/memblock/tests/common.c @@ -5,8 +5,6 @@ #include <linux/memory_hotplug.h> #include <linux/build_bug.h> -#define INIT_MEMBLOCK_REGIONS 128 -#define INIT_MEMBLOCK_RESERVED_REGIONS INIT_MEMBLOCK_REGIONS #define PREFIXES_MAX 15 #define DELIM ": " #define BASIS 10000 @@ -115,6 +113,11 @@ void dummy_physical_memory_cleanup(void) free(memory_block.base); } +phys_addr_t dummy_physical_memory_base(void) +{ + return (phys_addr_t)memory_block.base; +} + static void usage(const char *prog) { BUILD_BUG_ON(ARRAY_SIZE(help_opts) != ARRAY_SIZE(long_opts) - 1); diff --git a/tools/testing/memblock/tests/common.h b/tools/testing/memblock/tests/common.h index d6bbbe63bfc3..4f23302ee677 100644 --- a/tools/testing/memblock/tests/common.h +++ b/tools/testing/memblock/tests/common.h @@ -10,14 +10,19 @@ #include <linux/printk.h> #include <../selftests/kselftest.h> -#define MEM_SIZE SZ_16K +#define MEM_SIZE SZ_32K #define NUMA_NODES 8 +#define INIT_MEMBLOCK_REGIONS 128 +#define INIT_MEMBLOCK_RESERVED_REGIONS INIT_MEMBLOCK_REGIONS + enum test_flags { /* No special request. */ TEST_F_NONE = 0x0, /* Perform raw allocations (no zeroing of memory). */ TEST_F_RAW = 0x1, + /* Perform allocations on the exact node specified. */ + TEST_F_EXACT = 0x2 }; /** @@ -124,6 +129,7 @@ void setup_memblock(void); void setup_numa_memblock(const unsigned int node_fracs[]); void dummy_physical_memory_init(void); void dummy_physical_memory_cleanup(void); +phys_addr_t dummy_physical_memory_base(void); void parse_args(int argc, char **argv); void test_fail(void); diff --git a/tools/testing/nvdimm/Kbuild b/tools/testing/nvdimm/Kbuild index 5eb5c23b062f..8153251ea389 100644 --- a/tools/testing/nvdimm/Kbuild +++ b/tools/testing/nvdimm/Kbuild @@ -79,7 +79,6 @@ libnvdimm-$(CONFIG_BTT) += $(NVDIMM_SRC)/btt_devs.o libnvdimm-$(CONFIG_NVDIMM_PFN) += $(NVDIMM_SRC)/pfn_devs.o libnvdimm-$(CONFIG_NVDIMM_DAX) += $(NVDIMM_SRC)/dax_devs.o libnvdimm-$(CONFIG_NVDIMM_KEYS) += $(NVDIMM_SRC)/security.o -libnvdimm-y += dimm_devs.o libnvdimm-y += libnvdimm_test.o libnvdimm-y += config_check.o diff --git a/tools/testing/nvdimm/dimm_devs.c b/tools/testing/nvdimm/dimm_devs.c deleted file mode 100644 index 57bd27dedf1f..000000000000 --- a/tools/testing/nvdimm/dimm_devs.c +++ /dev/null @@ -1,30 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* Copyright Intel Corp. 2018 */ -#include <linux/init.h> -#include <linux/module.h> -#include <linux/moduleparam.h> -#include <linux/nd.h> -#include "pmem.h" -#include "pfn.h" -#include "nd.h" -#include "nd-core.h" - -ssize_t security_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct nvdimm *nvdimm = to_nvdimm(dev); - - /* - * For the test version we need to poll the "hardware" in order - * to get the updated status for unlock testing. - */ - nvdimm->sec.flags = nvdimm_security_flags(nvdimm, NVDIMM_USER); - - if (test_bit(NVDIMM_SECURITY_DISABLED, &nvdimm->sec.flags)) - return sprintf(buf, "disabled\n"); - if (test_bit(NVDIMM_SECURITY_UNLOCKED, &nvdimm->sec.flags)) - return sprintf(buf, "unlocked\n"); - if (test_bit(NVDIMM_SECURITY_LOCKED, &nvdimm->sec.flags)) - return sprintf(buf, "locked\n"); - return -ENOTTY; -} diff --git a/tools/testing/radix-tree/maple.c b/tools/testing/radix-tree/maple.c index 2e91973fbaa6..81fa7ec2e66a 100644 --- a/tools/testing/radix-tree/maple.c +++ b/tools/testing/radix-tree/maple.c @@ -1,7 +1,8 @@ // SPDX-License-Identifier: GPL-2.0+ /* - * maple_tree.c: Userspace shim for maple tree test-suite - * Copyright (c) 2018 Liam R. Howlett <Liam.Howlett@Oracle.com> + * maple_tree.c: Userspace testing for maple tree test-suite + * Copyright (c) 2018-2022 Oracle Corporation + * Author: Liam R. Howlett <Liam.Howlett@Oracle.com> * * Any tests that require internal knowledge of the tree or threads and other * difficult to handle in kernel tests. diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile index f07aef7c592c..0ebd9cce49b9 100644 --- a/tools/testing/selftests/Makefile +++ b/tools/testing/selftests/Makefile @@ -26,7 +26,9 @@ TARGETS += fpu TARGETS += ftrace TARGETS += futex TARGETS += gpio +TARGETS += hid TARGETS += intel_pstate +TARGETS += iommu TARGETS += ipc TARGETS += ir TARGETS += kcmp @@ -48,6 +50,7 @@ TARGETS += nci TARGETS += net TARGETS += net/af_unix TARGETS += net/forwarding +TARGETS += net/hsr TARGETS += net/mptcp TARGETS += net/openvswitch TARGETS += netfilter @@ -74,6 +77,7 @@ TARGETS += sync TARGETS += syscall_user_dispatch TARGETS += sysctl TARGETS += tc-testing +TARGETS += tdx TARGETS += timens ifneq (1, $(quicktest)) TARGETS += timers @@ -233,8 +237,8 @@ ifdef INSTALL_PATH @# included in the generated runlist. for TARGET in $(TARGETS); do \ BUILD_TARGET=$$BUILD/$$TARGET; \ - [ ! -d $(INSTALL_PATH)/$$TARGET ] && echo "Skipping non-existent dir: $$TARGET" && continue; \ - echo -ne "Emit Tests for $$TARGET\n"; \ + [ ! -d $(INSTALL_PATH)/$$TARGET ] && printf "Skipping non-existent dir: $$TARGET\n" && continue; \ + printf "Emit Tests for $$TARGET\n"; \ $(MAKE) -s --no-print-directory OUTPUT=$$BUILD_TARGET COLLECTION=$$TARGET \ -C $$TARGET emit_tests >> $(TEST_LIST); \ done; diff --git a/tools/testing/selftests/alsa/.gitignore b/tools/testing/selftests/alsa/.gitignore index 3bb7c41266a8..2b0d11797f25 100644 --- a/tools/testing/selftests/alsa/.gitignore +++ b/tools/testing/selftests/alsa/.gitignore @@ -1 +1,2 @@ mixer-test +pcm-test diff --git a/tools/testing/selftests/alsa/Makefile b/tools/testing/selftests/alsa/Makefile index fd8ddce2b1a6..901949db80ad 100644 --- a/tools/testing/selftests/alsa/Makefile +++ b/tools/testing/selftests/alsa/Makefile @@ -6,7 +6,22 @@ LDLIBS += $(shell pkg-config --libs alsa) ifeq ($(LDLIBS),) LDLIBS += -lasound endif +CFLAGS += -L$(OUTPUT) -Wl,-rpath=./ -TEST_GEN_PROGS := mixer-test +LDLIBS+=-lpthread + +OVERRIDE_TARGETS = 1 + +TEST_GEN_PROGS := mixer-test pcm-test + +TEST_GEN_PROGS_EXTENDED := libatest.so + +TEST_FILES := conf.d pcm-test.conf include ../lib.mk + +$(OUTPUT)/libatest.so: conf.c alsa-local.h + $(CC) $(CFLAGS) -shared -fPIC $< $(LDLIBS) -o $@ + +$(OUTPUT)/%: %.c $(TEST_GEN_PROGS_EXTENDED) alsa-local.h + $(CC) $(CFLAGS) $< $(LDLIBS) -latest -o $@ diff --git a/tools/testing/selftests/alsa/alsa-local.h b/tools/testing/selftests/alsa/alsa-local.h new file mode 100644 index 000000000000..de030dc23bd1 --- /dev/null +++ b/tools/testing/selftests/alsa/alsa-local.h @@ -0,0 +1,27 @@ +// SPDX-License-Identifier: GPL-2.0 +// +// kselftest configuration helpers for the hw specific configuration +// +// Original author: Jaroslav Kysela <perex@perex.cz> +// Copyright (c) 2022 Red Hat Inc. + +#ifndef __ALSA_LOCAL_H +#define __ALSA_LOCAL_H + +#include <alsa/asoundlib.h> + +snd_config_t *get_alsalib_config(void); + +snd_config_t *conf_load_from_file(const char *filename); +void conf_load(void); +void conf_free(void); +snd_config_t *conf_by_card(int card); +snd_config_t *conf_get_subtree(snd_config_t *root, const char *key1, const char *key2); +int conf_get_count(snd_config_t *root, const char *key1, const char *key2); +const char *conf_get_string(snd_config_t *root, const char *key1, const char *key2, const char *def); +long conf_get_long(snd_config_t *root, const char *key1, const char *key2, long def); +int conf_get_bool(snd_config_t *root, const char *key1, const char *key2, int def); +void conf_get_string_array(snd_config_t *root, const char *key1, const char *key2, + const char **array, int array_size, const char *def); + +#endif /* __ALSA_LOCAL_H */ diff --git a/tools/testing/selftests/alsa/conf.c b/tools/testing/selftests/alsa/conf.c new file mode 100644 index 000000000000..d7aafe5a1993 --- /dev/null +++ b/tools/testing/selftests/alsa/conf.c @@ -0,0 +1,470 @@ +// SPDX-License-Identifier: GPL-2.0 +// +// kselftest configuration helpers for the hw specific configuration +// +// Original author: Jaroslav Kysela <perex@perex.cz> +// Copyright (c) 2022 Red Hat Inc. + +#include <stdio.h> +#include <stdlib.h> +#include <stdbool.h> +#include <errno.h> +#include <assert.h> +#include <dirent.h> +#include <regex.h> +#include <sys/stat.h> + +#include "../kselftest.h" +#include "alsa-local.h" + +#define SYSFS_ROOT "/sys" + +struct card_data { + int card; + snd_config_t *config; + const char *filename; + struct card_data *next; +}; + +static struct card_data *conf_cards; + +static const char *alsa_config = +"ctl.hw {\n" +" @args [ CARD ]\n" +" @args.CARD.type string\n" +" type hw\n" +" card $CARD\n" +"}\n" +"pcm.hw {\n" +" @args [ CARD DEV SUBDEV ]\n" +" @args.CARD.type string\n" +" @args.DEV.type integer\n" +" @args.SUBDEV.type integer\n" +" type hw\n" +" card $CARD\n" +" device $DEV\n" +" subdevice $SUBDEV\n" +"}\n" +; + +#ifdef SND_LIB_VER +#if SND_LIB_VERSION >= SND_LIB_VER(1, 2, 6) +#define LIB_HAS_LOAD_STRING +#endif +#endif + +#ifndef LIB_HAS_LOAD_STRING +static int snd_config_load_string(snd_config_t **config, const char *s, + size_t size) +{ + snd_input_t *input; + snd_config_t *dst; + int err; + + assert(config && s); + if (size == 0) + size = strlen(s); + err = snd_input_buffer_open(&input, s, size); + if (err < 0) + return err; + err = snd_config_top(&dst); + if (err < 0) { + snd_input_close(input); + return err; + } + err = snd_config_load(dst, input); + snd_input_close(input); + if (err < 0) { + snd_config_delete(dst); + return err; + } + *config = dst; + return 0; +} +#endif + +snd_config_t *get_alsalib_config(void) +{ + snd_config_t *config; + int err; + + err = snd_config_load_string(&config, alsa_config, strlen(alsa_config)); + if (err < 0) { + ksft_print_msg("Unable to parse custom alsa-lib configuration: %s\n", + snd_strerror(err)); + ksft_exit_fail(); + } + return config; +} + +static struct card_data *conf_data_by_card(int card, bool msg) +{ + struct card_data *conf; + + for (conf = conf_cards; conf; conf = conf->next) { + if (conf->card == card) { + if (msg) + ksft_print_msg("using hw card config %s for card %d\n", + conf->filename, card); + return conf; + } + } + return NULL; +} + +static int dump_config_tree(snd_config_t *top) +{ + snd_output_t *out; + int err; + + err = snd_output_stdio_attach(&out, stdout, 0); + if (err < 0) + ksft_exit_fail_msg("stdout attach\n"); + if (snd_config_save(top, out)) + ksft_exit_fail_msg("config save\n"); + snd_output_close(out); +} + +snd_config_t *conf_load_from_file(const char *filename) +{ + snd_config_t *dst; + snd_input_t *input; + int err; + + err = snd_input_stdio_open(&input, filename, "r"); + if (err < 0) + ksft_exit_fail_msg("Unable to parse filename %s\n", filename); + err = snd_config_top(&dst); + if (err < 0) + ksft_exit_fail_msg("Out of memory\n"); + err = snd_config_load(dst, input); + snd_input_close(input); + if (err < 0) + ksft_exit_fail_msg("Unable to parse filename %s\n", filename); + return dst; +} + +static char *sysfs_get(const char *sysfs_root, const char *id) +{ + char path[PATH_MAX], link[PATH_MAX + 1]; + struct stat sb; + ssize_t len; + char *e; + int fd; + + if (id[0] == '/') + id++; + snprintf(path, sizeof(path), "%s/%s", sysfs_root, id); + if (lstat(path, &sb) != 0) + return NULL; + if (S_ISLNK(sb.st_mode)) { + len = readlink(path, link, sizeof(link) - 1); + if (len <= 0) { + ksft_exit_fail_msg("sysfs: cannot read link '%s': %s\n", + path, strerror(errno)); + return NULL; + } + link[len] = '\0'; + e = strrchr(link, '/'); + if (e) + return strdup(e + 1); + return NULL; + } + if (S_ISDIR(sb.st_mode)) + return NULL; + if ((sb.st_mode & S_IRUSR) == 0) + return NULL; + + fd = open(path, O_RDONLY); + if (fd < 0) { + if (errno == ENOENT) + return NULL; + ksft_exit_fail_msg("sysfs: open failed for '%s': %s\n", + path, strerror(errno)); + } + len = read(fd, path, sizeof(path)-1); + close(fd); + if (len < 0) + ksft_exit_fail_msg("sysfs: unable to read value '%s': %s\n", + path, errno); + while (len > 0 && path[len-1] == '\n') + len--; + path[len] = '\0'; + e = strdup(path); + if (e == NULL) + ksft_exit_fail_msg("Out of memory\n"); + return e; +} + +static bool sysfs_match(const char *sysfs_root, snd_config_t *config) +{ + snd_config_t *node, *path_config, *regex_config; + snd_config_iterator_t i, next; + const char *path_string, *regex_string, *v; + regex_t re; + regmatch_t match[1]; + int iter = 0, ret; + + snd_config_for_each(i, next, config) { + node = snd_config_iterator_entry(i); + if (snd_config_search(node, "path", &path_config)) + ksft_exit_fail_msg("Missing path field in the sysfs block\n"); + if (snd_config_search(node, "regex", ®ex_config)) + ksft_exit_fail_msg("Missing regex field in the sysfs block\n"); + if (snd_config_get_string(path_config, &path_string)) + ksft_exit_fail_msg("Path field in the sysfs block is not a string\n"); + if (snd_config_get_string(regex_config, ®ex_string)) + ksft_exit_fail_msg("Regex field in the sysfs block is not a string\n"); + iter++; + v = sysfs_get(sysfs_root, path_string); + if (!v) + return false; + if (regcomp(&re, regex_string, REG_EXTENDED)) + ksft_exit_fail_msg("Wrong regex '%s'\n", regex_string); + ret = regexec(&re, v, 1, match, 0); + regfree(&re); + if (ret) + return false; + } + return iter > 0; +} + +static bool test_filename1(int card, const char *filename, const char *sysfs_card_root) +{ + struct card_data *data, *data2; + snd_config_t *config, *sysfs_config, *card_config, *sysfs_card_config, *node; + snd_config_iterator_t i, next; + + config = conf_load_from_file(filename); + if (snd_config_search(config, "sysfs", &sysfs_config) || + snd_config_get_type(sysfs_config) != SND_CONFIG_TYPE_COMPOUND) + ksft_exit_fail_msg("Missing global sysfs block in filename %s\n", filename); + if (snd_config_search(config, "card", &card_config) || + snd_config_get_type(card_config) != SND_CONFIG_TYPE_COMPOUND) + ksft_exit_fail_msg("Missing global card block in filename %s\n", filename); + if (!sysfs_match(SYSFS_ROOT, sysfs_config)) + return false; + snd_config_for_each(i, next, card_config) { + node = snd_config_iterator_entry(i); + if (snd_config_search(node, "sysfs", &sysfs_card_config) || + snd_config_get_type(sysfs_card_config) != SND_CONFIG_TYPE_COMPOUND) + ksft_exit_fail_msg("Missing card sysfs block in filename %s\n", filename); + if (!sysfs_match(sysfs_card_root, sysfs_card_config)) + continue; + data = malloc(sizeof(*data)); + if (!data) + ksft_exit_fail_msg("Out of memory\n"); + data2 = conf_data_by_card(card, false); + if (data2) + ksft_exit_fail_msg("Duplicate card '%s' <-> '%s'\n", filename, data2->filename); + data->card = card; + data->filename = filename; + data->config = node; + data->next = conf_cards; + conf_cards = data; + return true; + } + return false; +} + +static bool test_filename(const char *filename) +{ + char fn[128]; + int card; + + for (card = 0; card < 32; card++) { + snprintf(fn, sizeof(fn), "%s/class/sound/card%d", SYSFS_ROOT, card); + if (access(fn, R_OK) == 0 && test_filename1(card, filename, fn)) + return true; + } + return false; +} + +static int filename_filter(const struct dirent *dirent) +{ + size_t flen; + + if (dirent == NULL) + return 0; + if (dirent->d_type == DT_DIR) + return 0; + flen = strlen(dirent->d_name); + if (flen <= 5) + return 0; + if (strncmp(&dirent->d_name[flen-5], ".conf", 5) == 0) + return 1; + return 0; +} + +void conf_load(void) +{ + const char *fn = "conf.d"; + struct dirent **namelist; + int n, j; + + n = scandir(fn, &namelist, filename_filter, alphasort); + if (n < 0) + ksft_exit_fail_msg("scandir: %s\n", strerror(errno)); + for (j = 0; j < n; j++) { + size_t sl = strlen(fn) + strlen(namelist[j]->d_name) + 2; + char *filename = malloc(sl); + if (filename == NULL) + ksft_exit_fail_msg("Out of memory\n"); + sprintf(filename, "%s/%s", fn, namelist[j]->d_name); + if (test_filename(filename)) + filename = NULL; + free(filename); + free(namelist[j]); + } + free(namelist); +} + +void conf_free(void) +{ + struct card_data *conf; + + while (conf_cards) { + conf = conf_cards; + conf_cards = conf->next; + snd_config_delete(conf->config); + } +} + +snd_config_t *conf_by_card(int card) +{ + struct card_data *conf; + + conf = conf_data_by_card(card, true); + if (conf) + return conf->config; + return NULL; +} + +static int conf_get_by_keys(snd_config_t *root, const char *key1, + const char *key2, snd_config_t **result) +{ + int ret; + + if (key1) { + ret = snd_config_search(root, key1, &root); + if (ret != -ENOENT && ret < 0) + return ret; + } + if (key2) + ret = snd_config_search(root, key2, &root); + if (ret >= 0) + *result = root; + return ret; +} + +snd_config_t *conf_get_subtree(snd_config_t *root, const char *key1, const char *key2) +{ + int ret; + + if (!root) + return NULL; + ret = conf_get_by_keys(root, key1, key2, &root); + if (ret == -ENOENT) + return NULL; + if (ret < 0) + ksft_exit_fail_msg("key '%s'.'%s' search error: %s\n", key1, key2, snd_strerror(ret)); + return root; +} + +int conf_get_count(snd_config_t *root, const char *key1, const char *key2) +{ + snd_config_t *cfg; + snd_config_iterator_t i, next; + int count, ret; + + if (!root) + return -1; + ret = conf_get_by_keys(root, key1, key2, &cfg); + if (ret == -ENOENT) + return -1; + if (ret < 0) + ksft_exit_fail_msg("key '%s'.'%s' search error: %s\n", key1, key2, snd_strerror(ret)); + if (snd_config_get_type(cfg) != SND_CONFIG_TYPE_COMPOUND) + ksft_exit_fail_msg("key '%s'.'%s' is not a compound\n", key1, key2); + count = 0; + snd_config_for_each(i, next, cfg) + count++; + return count; +} + +const char *conf_get_string(snd_config_t *root, const char *key1, const char *key2, const char *def) +{ + snd_config_t *cfg; + const char *s; + int ret; + + if (!root) + return def; + ret = conf_get_by_keys(root, key1, key2, &cfg); + if (ret == -ENOENT) + return def; + if (ret < 0) + ksft_exit_fail_msg("key '%s'.'%s' search error: %s\n", key1, key2, snd_strerror(ret)); + if (snd_config_get_string(cfg, &s)) + ksft_exit_fail_msg("key '%s'.'%s' is not a string\n", key1, key2); + return s; +} + +long conf_get_long(snd_config_t *root, const char *key1, const char *key2, long def) +{ + snd_config_t *cfg; + long l; + int ret; + + if (!root) + return def; + ret = conf_get_by_keys(root, key1, key2, &cfg); + if (ret == -ENOENT) + return def; + if (ret < 0) + ksft_exit_fail_msg("key '%s'.'%s' search error: %s\n", key1, key2, snd_strerror(ret)); + if (snd_config_get_integer(cfg, &l)) + ksft_exit_fail_msg("key '%s'.'%s' is not an integer\n", key1, key2); + return l; +} + +int conf_get_bool(snd_config_t *root, const char *key1, const char *key2, int def) +{ + snd_config_t *cfg; + long l; + int ret; + + if (!root) + return def; + ret = conf_get_by_keys(root, key1, key2, &cfg); + if (ret == -ENOENT) + return def; + if (ret < 0) + ksft_exit_fail_msg("key '%s'.'%s' search error: %s\n", key1, key2, snd_strerror(ret)); + ret = snd_config_get_bool(cfg); + if (ret < 0) + ksft_exit_fail_msg("key '%s'.'%s' is not an bool\n", key1, key2); + return !!ret; +} + +void conf_get_string_array(snd_config_t *root, const char *key1, const char *key2, + const char **array, int array_size, const char *def) +{ + snd_config_t *cfg; + char buf[16]; + int ret, index; + + ret = conf_get_by_keys(root, key1, key2, &cfg); + if (ret == -ENOENT) + cfg = NULL; + else if (ret < 0) + ksft_exit_fail_msg("key '%s'.'%s' search error: %s\n", key1, key2, snd_strerror(ret)); + for (index = 0; index < array_size; index++) { + if (cfg == NULL) { + array[index] = def; + } else { + sprintf(buf, "%i", index); + array[index] = conf_get_string(cfg, buf, NULL, def); + } + } +} diff --git a/tools/testing/selftests/alsa/conf.d/Lenovo_ThinkPad_P1_Gen2.conf b/tools/testing/selftests/alsa/conf.d/Lenovo_ThinkPad_P1_Gen2.conf new file mode 100644 index 000000000000..5b40a916295d --- /dev/null +++ b/tools/testing/selftests/alsa/conf.d/Lenovo_ThinkPad_P1_Gen2.conf @@ -0,0 +1,84 @@ +# +# Example configuration for Lenovo ThinkPad P1 Gen2 +# + +# +# Use regex match for the string read from the given sysfs path +# +# The sysfs root directory (/sys) is hardwired in the test code +# (may be changed on demand). +# +# All strings must match. +# +sysfs [ + { + path "class/dmi/id/product_sku" + regex "LENOVO_MT_20QU_BU_Think_FM_ThinkPad P1 Gen 2" + } +] + +card.hda { + # + # Use regex match for the /sys/class/sound/card*/ tree (relative) + # + sysfs [ + { + path "device/subsystem_device" + regex "0x229e" + } + { + path "device/subsystem_vendor" + regex "0x17aa" + } + ] + + # + # PCM configuration + # + # pcm.0.0 - device 0 subdevice 0 + # + pcm.0.0 { + PLAYBACK { + test.time1 { + access RW_INTERLEAVED # can be omitted - default + format S16_LE # can be omitted - default + rate 48000 # can be omitted - default + channels 2 # can be omitted - default + period_size 512 + buffer_size 4096 + } + test.time2 { + access RW_INTERLEAVED + format S16_LE + rate 48000 + channels 2 + period_size 24000 + buffer_size 192000 + } + test.time3 { + access RW_INTERLEAVED + format S16_LE + rate 44100 + channels 2 + period_size 24000 + buffer_size 192000 + } + } + CAPTURE { + # use default tests, check for the presence + } + } + # + # uncomment to force the missing device checks + # + #pcm.0.2 { + # PLAYBACK { + # # check for the presence + # } + #} + #pcm.0.3 { + # CAPTURE { + # # check for the presence + # } + #} +} diff --git a/tools/testing/selftests/alsa/mixer-test.c b/tools/testing/selftests/alsa/mixer-test.c index a38b89c28030..05f1749ae19d 100644 --- a/tools/testing/selftests/alsa/mixer-test.c +++ b/tools/testing/selftests/alsa/mixer-test.c @@ -26,6 +26,7 @@ #include <stdint.h> #include "../kselftest.h" +#include "alsa-local.h" #define TESTS_PER_CONTROL 7 @@ -50,56 +51,11 @@ struct ctl_data { struct ctl_data *next; }; -static const char *alsa_config = -"ctl.hw {\n" -" @args [ CARD ]\n" -" @args.CARD.type string\n" -" type hw\n" -" card $CARD\n" -"}\n" -; - int num_cards = 0; int num_controls = 0; struct card_data *card_list = NULL; struct ctl_data *ctl_list = NULL; -#ifdef SND_LIB_VER -#if SND_LIB_VERSION >= SND_LIB_VER(1, 2, 6) -#define LIB_HAS_LOAD_STRING -#endif -#endif - -#ifndef LIB_HAS_LOAD_STRING -static int snd_config_load_string(snd_config_t **config, const char *s, - size_t size) -{ - snd_input_t *input; - snd_config_t *dst; - int err; - - assert(config && s); - if (size == 0) - size = strlen(s); - err = snd_input_buffer_open(&input, s, size); - if (err < 0) - return err; - err = snd_config_top(&dst); - if (err < 0) { - snd_input_close(input); - return err; - } - err = snd_config_load(dst, input); - snd_input_close(input); - if (err < 0) { - snd_config_delete(dst); - return err; - } - *config = dst; - return 0; -} -#endif - static void find_controls(void) { char name[32]; @@ -112,12 +68,7 @@ static void find_controls(void) if (snd_card_next(&card) < 0 || card < 0) return; - err = snd_config_load_string(&config, alsa_config, strlen(alsa_config)); - if (err < 0) { - ksft_print_msg("Unable to parse custom alsa-lib configuration: %s\n", - snd_strerror(err)); - ksft_exit_fail(); - } + config = get_alsalib_config(); while (card >= 0) { sprintf(name, "hw:%d", card); diff --git a/tools/testing/selftests/alsa/pcm-test.c b/tools/testing/selftests/alsa/pcm-test.c new file mode 100644 index 000000000000..58b525a4a32c --- /dev/null +++ b/tools/testing/selftests/alsa/pcm-test.c @@ -0,0 +1,618 @@ +// SPDX-License-Identifier: GPL-2.0 +// +// kselftest for the ALSA PCM API +// +// Original author: Jaroslav Kysela <perex@perex.cz> +// Copyright (c) 2022 Red Hat Inc. + +// This test will iterate over all cards detected in the system, exercising +// every PCM device it can find. This may conflict with other system +// software if there is audio activity so is best run on a system with a +// minimal active userspace. + +#include <stdio.h> +#include <stdlib.h> +#include <stdbool.h> +#include <errno.h> +#include <assert.h> +#include <pthread.h> + +#include "../kselftest.h" +#include "alsa-local.h" + +typedef struct timespec timestamp_t; + +struct card_data { + int card; + pthread_t thread; + struct card_data *next; +}; + +struct card_data *card_list = NULL; + +struct pcm_data { + snd_pcm_t *handle; + int card; + int device; + int subdevice; + snd_pcm_stream_t stream; + snd_config_t *pcm_config; + struct pcm_data *next; +}; + +struct pcm_data *pcm_list = NULL; + +int num_missing = 0; +struct pcm_data *pcm_missing = NULL; + +snd_config_t *default_pcm_config; + +/* Lock while reporting results since kselftest doesn't */ +pthread_mutex_t results_lock = PTHREAD_MUTEX_INITIALIZER; + +enum test_class { + TEST_CLASS_DEFAULT, + TEST_CLASS_SYSTEM, +}; + +void timestamp_now(timestamp_t *tstamp) +{ + if (clock_gettime(CLOCK_MONOTONIC_RAW, tstamp)) + ksft_exit_fail_msg("clock_get_time\n"); +} + +long long timestamp_diff_ms(timestamp_t *tstamp) +{ + timestamp_t now, diff; + timestamp_now(&now); + if (tstamp->tv_nsec > now.tv_nsec) { + diff.tv_sec = now.tv_sec - tstamp->tv_sec - 1; + diff.tv_nsec = (now.tv_nsec + 1000000000L) - tstamp->tv_nsec; + } else { + diff.tv_sec = now.tv_sec - tstamp->tv_sec; + diff.tv_nsec = now.tv_nsec - tstamp->tv_nsec; + } + return (diff.tv_sec * 1000) + ((diff.tv_nsec + 500000L) / 1000000L); +} + +static long device_from_id(snd_config_t *node) +{ + const char *id; + char *end; + long v; + + if (snd_config_get_id(node, &id)) + ksft_exit_fail_msg("snd_config_get_id\n"); + errno = 0; + v = strtol(id, &end, 10); + if (errno || *end) + return -1; + return v; +} + +static void missing_device(int card, int device, int subdevice, snd_pcm_stream_t stream) +{ + struct pcm_data *pcm_data; + + for (pcm_data = pcm_list; pcm_data != NULL; pcm_data = pcm_data->next) { + if (pcm_data->card != card) + continue; + if (pcm_data->device != device) + continue; + if (pcm_data->subdevice != subdevice) + continue; + if (pcm_data->stream != stream) + continue; + return; + } + pcm_data = calloc(1, sizeof(*pcm_data)); + if (!pcm_data) + ksft_exit_fail_msg("Out of memory\n"); + pcm_data->card = card; + pcm_data->device = device; + pcm_data->subdevice = subdevice; + pcm_data->stream = stream; + pcm_data->next = pcm_missing; + pcm_missing = pcm_data; + num_missing++; +} + +static void missing_devices(int card, snd_config_t *card_config) +{ + snd_config_t *pcm_config, *node1, *node2; + snd_config_iterator_t i1, i2, next1, next2; + int device, subdevice; + + pcm_config = conf_get_subtree(card_config, "pcm", NULL); + if (!pcm_config) + return; + snd_config_for_each(i1, next1, pcm_config) { + node1 = snd_config_iterator_entry(i1); + device = device_from_id(node1); + if (device < 0) + continue; + if (snd_config_get_type(node1) != SND_CONFIG_TYPE_COMPOUND) + continue; + snd_config_for_each(i2, next2, node1) { + node2 = snd_config_iterator_entry(i2); + subdevice = device_from_id(node2); + if (subdevice < 0) + continue; + if (conf_get_subtree(node2, "PLAYBACK", NULL)) + missing_device(card, device, subdevice, SND_PCM_STREAM_PLAYBACK); + if (conf_get_subtree(node2, "CAPTURE", NULL)) + missing_device(card, device, subdevice, SND_PCM_STREAM_CAPTURE); + } + } +} + +static void find_pcms(void) +{ + char name[32], key[64]; + int card, dev, subdev, count, direction, err; + snd_pcm_stream_t stream; + struct pcm_data *pcm_data; + snd_ctl_t *handle; + snd_pcm_info_t *pcm_info; + snd_config_t *config, *card_config, *pcm_config; + struct card_data *card_data; + + snd_pcm_info_alloca(&pcm_info); + + card = -1; + if (snd_card_next(&card) < 0 || card < 0) + return; + + config = get_alsalib_config(); + + while (card >= 0) { + sprintf(name, "hw:%d", card); + + err = snd_ctl_open_lconf(&handle, name, 0, config); + if (err < 0) { + ksft_print_msg("Failed to get hctl for card %d: %s\n", + card, snd_strerror(err)); + goto next_card; + } + + card_config = conf_by_card(card); + + card_data = calloc(1, sizeof(*card_data)); + if (!card_data) + ksft_exit_fail_msg("Out of memory\n"); + card_data->card = card; + card_data->next = card_list; + card_list = card_data; + + dev = -1; + while (1) { + if (snd_ctl_pcm_next_device(handle, &dev) < 0) + ksft_exit_fail_msg("snd_ctl_pcm_next_device\n"); + if (dev < 0) + break; + + for (direction = 0; direction < 2; direction++) { + stream = direction ? SND_PCM_STREAM_CAPTURE : SND_PCM_STREAM_PLAYBACK; + sprintf(key, "pcm.%d.%s", dev, snd_pcm_stream_name(stream)); + pcm_config = conf_get_subtree(card_config, key, NULL); + if (conf_get_bool(card_config, key, "skip", false)) { + ksft_print_msg("skipping pcm %d.%d.%s\n", card, dev, snd_pcm_stream_name(stream)); + continue; + } + snd_pcm_info_set_device(pcm_info, dev); + snd_pcm_info_set_subdevice(pcm_info, 0); + snd_pcm_info_set_stream(pcm_info, stream); + err = snd_ctl_pcm_info(handle, pcm_info); + if (err == -ENOENT) + continue; + if (err < 0) + ksft_exit_fail_msg("snd_ctl_pcm_info: %d:%d:%d\n", + dev, 0, stream); + count = snd_pcm_info_get_subdevices_count(pcm_info); + for (subdev = 0; subdev < count; subdev++) { + sprintf(key, "pcm.%d.%d.%s", dev, subdev, snd_pcm_stream_name(stream)); + if (conf_get_bool(card_config, key, "skip", false)) { + ksft_print_msg("skipping pcm %d.%d.%d.%s\n", card, dev, + subdev, snd_pcm_stream_name(stream)); + continue; + } + pcm_data = calloc(1, sizeof(*pcm_data)); + if (!pcm_data) + ksft_exit_fail_msg("Out of memory\n"); + pcm_data->card = card; + pcm_data->device = dev; + pcm_data->subdevice = subdev; + pcm_data->stream = stream; + pcm_data->pcm_config = conf_get_subtree(card_config, key, NULL); + pcm_data->next = pcm_list; + pcm_list = pcm_data; + } + } + } + + /* check for missing devices */ + missing_devices(card, card_config); + + next_card: + snd_ctl_close(handle); + if (snd_card_next(&card) < 0) { + ksft_print_msg("snd_card_next"); + break; + } + } + + snd_config_delete(config); +} + +static void test_pcm_time(struct pcm_data *data, enum test_class class, + const char *test_name, snd_config_t *pcm_cfg) +{ + char name[64], key[128], msg[256]; + const char *cs; + int i, err; + snd_pcm_t *handle = NULL; + snd_pcm_access_t access = SND_PCM_ACCESS_RW_INTERLEAVED; + snd_pcm_format_t format, old_format; + const char *alt_formats[8]; + unsigned char *samples = NULL; + snd_pcm_sframes_t frames; + long long ms; + long rate, channels, period_size, buffer_size; + unsigned int rrate; + snd_pcm_uframes_t rperiod_size, rbuffer_size, start_threshold; + timestamp_t tstamp; + bool pass = false; + snd_pcm_hw_params_t *hw_params; + snd_pcm_sw_params_t *sw_params; + const char *test_class_name; + bool skip = true; + const char *desc; + + switch (class) { + case TEST_CLASS_DEFAULT: + test_class_name = "default"; + break; + case TEST_CLASS_SYSTEM: + test_class_name = "system"; + break; + default: + ksft_exit_fail_msg("Unknown test class %d\n", class); + break; + } + + desc = conf_get_string(pcm_cfg, "description", NULL, NULL); + if (desc) + ksft_print_msg("%s.%s.%d.%d.%d.%s - %s\n", + test_class_name, test_name, + data->card, data->device, data->subdevice, + snd_pcm_stream_name(data->stream), + desc); + + + snd_pcm_hw_params_alloca(&hw_params); + snd_pcm_sw_params_alloca(&sw_params); + + cs = conf_get_string(pcm_cfg, "format", NULL, "S16_LE"); + format = snd_pcm_format_value(cs); + if (format == SND_PCM_FORMAT_UNKNOWN) + ksft_exit_fail_msg("Wrong format '%s'\n", cs); + conf_get_string_array(pcm_cfg, "alt_formats", NULL, + alt_formats, ARRAY_SIZE(alt_formats), NULL); + rate = conf_get_long(pcm_cfg, "rate", NULL, 48000); + channels = conf_get_long(pcm_cfg, "channels", NULL, 2); + period_size = conf_get_long(pcm_cfg, "period_size", NULL, 4096); + buffer_size = conf_get_long(pcm_cfg, "buffer_size", NULL, 16384); + + samples = malloc((rate * channels * snd_pcm_format_physical_width(format)) / 8); + if (!samples) + ksft_exit_fail_msg("Out of memory\n"); + snd_pcm_format_set_silence(format, samples, rate * channels); + + sprintf(name, "hw:%d,%d,%d", data->card, data->device, data->subdevice); + err = snd_pcm_open(&handle, name, data->stream, 0); + if (err < 0) { + snprintf(msg, sizeof(msg), "Failed to get pcm handle: %s", snd_strerror(err)); + goto __close; + } + + err = snd_pcm_hw_params_any(handle, hw_params); + if (err < 0) { + snprintf(msg, sizeof(msg), "snd_pcm_hw_params_any: %s", snd_strerror(err)); + goto __close; + } + err = snd_pcm_hw_params_set_rate_resample(handle, hw_params, 0); + if (err < 0) { + snprintf(msg, sizeof(msg), "snd_pcm_hw_params_set_rate_resample: %s", snd_strerror(err)); + goto __close; + } + err = snd_pcm_hw_params_set_access(handle, hw_params, access); + if (err < 0) { + snprintf(msg, sizeof(msg), "snd_pcm_hw_params_set_access %s: %s", + snd_pcm_access_name(access), snd_strerror(err)); + goto __close; + } + i = -1; +__format: + err = snd_pcm_hw_params_set_format(handle, hw_params, format); + if (err < 0) { + i++; + if (i < ARRAY_SIZE(alt_formats) && alt_formats[i]) { + old_format = format; + format = snd_pcm_format_value(alt_formats[i]); + if (format != SND_PCM_FORMAT_UNKNOWN) { + ksft_print_msg("%s.%d.%d.%d.%s.%s format %s -> %s\n", + test_name, + data->card, data->device, data->subdevice, + snd_pcm_stream_name(data->stream), + snd_pcm_access_name(access), + snd_pcm_format_name(old_format), + snd_pcm_format_name(format)); + samples = realloc(samples, (rate * channels * + snd_pcm_format_physical_width(format)) / 8); + if (!samples) + ksft_exit_fail_msg("Out of memory\n"); + snd_pcm_format_set_silence(format, samples, rate * channels); + goto __format; + } + } + snprintf(msg, sizeof(msg), "snd_pcm_hw_params_set_format %s: %s", + snd_pcm_format_name(format), snd_strerror(err)); + goto __close; + } + err = snd_pcm_hw_params_set_channels(handle, hw_params, channels); + if (err < 0) { + snprintf(msg, sizeof(msg), "snd_pcm_hw_params_set_channels %ld: %s", channels, snd_strerror(err)); + goto __close; + } + rrate = rate; + err = snd_pcm_hw_params_set_rate_near(handle, hw_params, &rrate, 0); + if (err < 0) { + snprintf(msg, sizeof(msg), "snd_pcm_hw_params_set_rate %ld: %s", rate, snd_strerror(err)); + goto __close; + } + if (rrate != rate) { + snprintf(msg, sizeof(msg), "rate mismatch %ld != %ld", rate, rrate); + goto __close; + } + rperiod_size = period_size; + err = snd_pcm_hw_params_set_period_size_near(handle, hw_params, &rperiod_size, 0); + if (err < 0) { + snprintf(msg, sizeof(msg), "snd_pcm_hw_params_set_period_size %ld: %s", period_size, snd_strerror(err)); + goto __close; + } + rbuffer_size = buffer_size; + err = snd_pcm_hw_params_set_buffer_size_near(handle, hw_params, &rbuffer_size); + if (err < 0) { + snprintf(msg, sizeof(msg), "snd_pcm_hw_params_set_buffer_size %ld: %s", buffer_size, snd_strerror(err)); + goto __close; + } + err = snd_pcm_hw_params(handle, hw_params); + if (err < 0) { + snprintf(msg, sizeof(msg), "snd_pcm_hw_params: %s", snd_strerror(err)); + goto __close; + } + + err = snd_pcm_sw_params_current(handle, sw_params); + if (err < 0) { + snprintf(msg, sizeof(msg), "snd_pcm_sw_params_current: %s", snd_strerror(err)); + goto __close; + } + if (data->stream == SND_PCM_STREAM_PLAYBACK) { + start_threshold = (rbuffer_size / rperiod_size) * rperiod_size; + } else { + start_threshold = rperiod_size; + } + err = snd_pcm_sw_params_set_start_threshold(handle, sw_params, start_threshold); + if (err < 0) { + snprintf(msg, sizeof(msg), "snd_pcm_sw_params_set_start_threshold %ld: %s", (long)start_threshold, snd_strerror(err)); + goto __close; + } + err = snd_pcm_sw_params_set_avail_min(handle, sw_params, rperiod_size); + if (err < 0) { + snprintf(msg, sizeof(msg), "snd_pcm_sw_params_set_avail_min %ld: %s", (long)rperiod_size, snd_strerror(err)); + goto __close; + } + err = snd_pcm_sw_params(handle, sw_params); + if (err < 0) { + snprintf(msg, sizeof(msg), "snd_pcm_sw_params: %s", snd_strerror(err)); + goto __close; + } + + ksft_print_msg("%s.%s.%d.%d.%d.%s hw_params.%s.%s.%ld.%ld.%ld.%ld sw_params.%ld\n", + test_class_name, test_name, + data->card, data->device, data->subdevice, + snd_pcm_stream_name(data->stream), + snd_pcm_access_name(access), + snd_pcm_format_name(format), + (long)rate, (long)channels, + (long)rperiod_size, (long)rbuffer_size, + (long)start_threshold); + + /* Set all the params, actually run the test */ + skip = false; + + timestamp_now(&tstamp); + for (i = 0; i < 4; i++) { + if (data->stream == SND_PCM_STREAM_PLAYBACK) { + frames = snd_pcm_writei(handle, samples, rate); + if (frames < 0) { + snprintf(msg, sizeof(msg), + "Write failed: expected %d, wrote %li", rate, frames); + goto __close; + } + if (frames < rate) { + snprintf(msg, sizeof(msg), + "expected %d, wrote %li", rate, frames); + goto __close; + } + } else { + frames = snd_pcm_readi(handle, samples, rate); + if (frames < 0) { + snprintf(msg, sizeof(msg), + "expected %d, wrote %li", rate, frames); + goto __close; + } + if (frames < rate) { + snprintf(msg, sizeof(msg), + "expected %d, wrote %li", rate, frames); + goto __close; + } + } + } + + snd_pcm_drain(handle); + ms = timestamp_diff_ms(&tstamp); + if (ms < 3900 || ms > 4100) { + snprintf(msg, sizeof(msg), "time mismatch: expected 4000ms got %lld", ms); + goto __close; + } + + msg[0] = '\0'; + pass = true; +__close: + pthread_mutex_lock(&results_lock); + + switch (class) { + case TEST_CLASS_SYSTEM: + test_class_name = "system"; + /* + * Anything specified as specific to this system + * should always be supported. + */ + ksft_test_result(!skip, "%s.%s.%d.%d.%d.%s.params\n", + test_class_name, test_name, + data->card, data->device, data->subdevice, + snd_pcm_stream_name(data->stream)); + break; + default: + break; + } + + if (!skip) + ksft_test_result(pass, "%s.%s.%d.%d.%d.%s%s%s\n", + test_class_name, test_name, + data->card, data->device, data->subdevice, + snd_pcm_stream_name(data->stream), + msg[0] ? " " : "", msg); + else + ksft_test_result_skip("%s.%s.%d.%d.%d.%s%s%s\n", + test_class_name, test_name, + data->card, data->device, data->subdevice, + snd_pcm_stream_name(data->stream), + msg[0] ? " " : "", msg); + + pthread_mutex_unlock(&results_lock); + + free(samples); + if (handle) + snd_pcm_close(handle); +} + +void run_time_tests(struct pcm_data *pcm, enum test_class class, + snd_config_t *cfg) +{ + const char *test_name, *test_type; + snd_config_t *pcm_cfg; + snd_config_iterator_t i, next; + + if (!cfg) + return; + + cfg = conf_get_subtree(cfg, "test", NULL); + if (cfg == NULL) + return; + + snd_config_for_each(i, next, cfg) { + pcm_cfg = snd_config_iterator_entry(i); + if (snd_config_get_id(pcm_cfg, &test_name) < 0) + ksft_exit_fail_msg("snd_config_get_id\n"); + test_type = conf_get_string(pcm_cfg, "type", NULL, "time"); + if (strcmp(test_type, "time") == 0) + test_pcm_time(pcm, class, test_name, pcm_cfg); + else + ksft_exit_fail_msg("unknown test type '%s'\n", test_type); + } +} + +void *card_thread(void *data) +{ + struct card_data *card = data; + struct pcm_data *pcm; + + for (pcm = pcm_list; pcm != NULL; pcm = pcm->next) { + if (pcm->card != card->card) + continue; + + run_time_tests(pcm, TEST_CLASS_DEFAULT, default_pcm_config); + run_time_tests(pcm, TEST_CLASS_SYSTEM, pcm->pcm_config); + } + + return 0; +} + +int main(void) +{ + struct card_data *card; + struct pcm_data *pcm; + snd_config_t *global_config, *cfg, *pcm_cfg; + int num_pcm_tests = 0, num_tests, num_std_pcm_tests; + int ret; + void *thread_ret; + + ksft_print_header(); + + global_config = conf_load_from_file("pcm-test.conf"); + default_pcm_config = conf_get_subtree(global_config, "pcm", NULL); + if (default_pcm_config == NULL) + ksft_exit_fail_msg("default pcm test configuration (pcm compound) is missing\n"); + + conf_load(); + + find_pcms(); + + num_std_pcm_tests = conf_get_count(default_pcm_config, "test", NULL); + + for (pcm = pcm_list; pcm != NULL; pcm = pcm->next) { + num_pcm_tests += num_std_pcm_tests; + cfg = pcm->pcm_config; + if (cfg == NULL) + continue; + /* Setting params is reported as a separate test */ + num_tests = conf_get_count(cfg, "test", NULL) * 2; + if (num_tests > 0) + num_pcm_tests += num_tests; + } + + ksft_set_plan(num_missing + num_pcm_tests); + + for (pcm = pcm_missing; pcm != NULL; pcm = pcm->next) { + ksft_test_result(false, "test.missing.%d.%d.%d.%s\n", + pcm->card, pcm->device, pcm->subdevice, + snd_pcm_stream_name(pcm->stream)); + } + + for (card = card_list; card != NULL; card = card->next) { + ret = pthread_create(&card->thread, NULL, card_thread, card); + if (ret != 0) { + ksft_exit_fail_msg("Failed to create card %d thread: %d (%s)\n", + card->card, ret, + strerror(errno)); + } + } + + for (card = card_list; card != NULL; card = card->next) { + ret = pthread_join(card->thread, &thread_ret); + if (ret != 0) { + ksft_exit_fail_msg("Failed to join card %d thread: %d (%s)\n", + card->card, ret, + strerror(errno)); + } + } + + snd_config_delete(global_config); + conf_free(); + + ksft_exit_pass(); + + return 0; +} diff --git a/tools/testing/selftests/alsa/pcm-test.conf b/tools/testing/selftests/alsa/pcm-test.conf new file mode 100644 index 000000000000..71bd3f78a6f2 --- /dev/null +++ b/tools/testing/selftests/alsa/pcm-test.conf @@ -0,0 +1,63 @@ +pcm.test.time1 { + description "8kHz mono large periods" + format S16_LE + alt_formats [ S32_LE ] + rate 8000 + channels 1 + period_size 8000 + buffer_size 32000 +} +pcm.test.time2 { + description "8kHz stereo large periods" + format S16_LE + alt_formats [ S32_LE ] + rate 8000 + channels 2 + period_size 8000 + buffer_size 32000 +} +pcm.test.time3 { + description "44.1kHz stereo large periods" + format S16_LE + alt_formats [ S32_LE ] + rate 44100 + channels 2 + period_size 22500 + buffer_size 192000 +} +pcm.test.time4 { + description "48kHz stereo small periods" + format S16_LE + alt_formats [ S32_LE ] + rate 48000 + channels 2 + period_size 512 + buffer_size 4096 +} +pcm.test.time5 { + description "48kHz stereo large periods" + format S16_LE + alt_formats [ S32_LE ] + rate 48000 + channels 2 + period_size 24000 + buffer_size 192000 +} +pcm.test.time6 { + description "48kHz 6 channel large periods" + format S16_LE + alt_formats [ S32_LE ] + rate 48000 + channels 2 + period_size 48000 + buffer_size 576000 +} +pcm.test.time7 { + description "96kHz stereo large periods" + format S16_LE + alt_formats [ S32_LE ] + rate 96000 + channels 2 + period_size 48000 + buffer_size 192000 +} diff --git a/tools/testing/selftests/amd-pstate/Makefile b/tools/testing/selftests/amd-pstate/Makefile index 199867f44b32..5fd1424db37d 100644 --- a/tools/testing/selftests/amd-pstate/Makefile +++ b/tools/testing/selftests/amd-pstate/Makefile @@ -4,6 +4,10 @@ # No binaries, but make sure arg-less "make" doesn't trigger "run_tests" all: -TEST_PROGS := amd-pstate-ut.sh +uname_M := $(shell uname -m 2>/dev/null || echo not) +ARCH ?= $(shell echo $(uname_M) | sed -e s/i.86/x86/ -e s/x86_64/x86/) + +TEST_PROGS := run.sh +TEST_FILES := basic.sh tbench.sh gitsource.sh include ../lib.mk diff --git a/tools/testing/selftests/amd-pstate/amd-pstate-ut.sh b/tools/testing/selftests/amd-pstate/amd-pstate-ut.sh deleted file mode 100755 index f8e82d91ffcf..000000000000 --- a/tools/testing/selftests/amd-pstate/amd-pstate-ut.sh +++ /dev/null @@ -1,56 +0,0 @@ -#!/bin/sh -# SPDX-License-Identifier: GPL-2.0 - -# amd-pstate-ut is a test module for testing the amd-pstate driver. -# It can only run on x86 architectures and current cpufreq driver -# must be amd-pstate. -# (1) It can help all users to verify their processor support -# (SBIOS/Firmware or Hardware). -# (2) Kernel can have a basic function test to avoid the kernel -# regression during the update. -# (3) We can introduce more functional or performance tests to align -# the result together, it will benefit power and performance scale optimization. - -# Kselftest framework requirement - SKIP code is 4. -ksft_skip=4 - -# amd-pstate-ut only run on x86/x86_64 AMD systems. -ARCH=$(uname -m 2>/dev/null | sed -e 's/i.86/x86/' -e 's/x86_64/x86/') -VENDOR=$(cat /proc/cpuinfo | grep -m 1 'vendor_id' | awk '{print $NF}') - -if ! echo "$ARCH" | grep -q x86; then - echo "$0 # Skipped: Test can only run on x86 architectures." - exit $ksft_skip -fi - -if ! echo "$VENDOR" | grep -iq amd; then - echo "$0 # Skipped: Test can only run on AMD CPU." - echo "$0 # Current cpu vendor is $VENDOR." - exit $ksft_skip -fi - -scaling_driver=$(cat /sys/devices/system/cpu/cpufreq/policy0/scaling_driver) -if [ "$scaling_driver" != "amd-pstate" ]; then - echo "$0 # Skipped: Test can only run on amd-pstate driver." - echo "$0 # Please set X86_AMD_PSTATE enabled." - echo "$0 # Current cpufreq scaling drvier is $scaling_driver." - exit $ksft_skip -fi - -msg="Skip all tests:" -if [ ! -w /dev ]; then - echo $msg please run this as root >&2 - exit $ksft_skip -fi - -if ! /sbin/modprobe -q -n amd-pstate-ut; then - echo "amd-pstate-ut: module amd-pstate-ut is not found [SKIP]" - exit $ksft_skip -fi -if /sbin/modprobe -q amd-pstate-ut; then - /sbin/modprobe -q -r amd-pstate-ut - echo "amd-pstate-ut: ok" -else - echo "amd-pstate-ut: [FAIL]" - exit 1 -fi diff --git a/tools/testing/selftests/amd-pstate/basic.sh b/tools/testing/selftests/amd-pstate/basic.sh new file mode 100755 index 000000000000..e4c43193e4a3 --- /dev/null +++ b/tools/testing/selftests/amd-pstate/basic.sh @@ -0,0 +1,38 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 + +# amd-pstate-ut is a test module for testing the amd-pstate driver. +# It can only run on x86 architectures and current cpufreq driver +# must be amd-pstate. +# (1) It can help all users to verify their processor support +# (SBIOS/Firmware or Hardware). +# (2) Kernel can have a basic function test to avoid the kernel +# regression during the update. +# (3) We can introduce more functional or performance tests to align +# the result together, it will benefit power and performance scale optimization. + +# protect against multiple inclusion +if [ $FILE_BASIC ]; then + return 0 +else + FILE_BASIC=DONE +fi + +amd_pstate_basic() +{ + printf "\n---------------------------------------------\n" + printf "*** Running AMD P-state ut ***" + printf "\n---------------------------------------------\n" + + if ! /sbin/modprobe -q -n amd-pstate-ut; then + echo "amd-pstate-ut: module amd-pstate-ut is not found [SKIP]" + exit $ksft_skip + fi + if /sbin/modprobe -q amd-pstate-ut; then + /sbin/modprobe -q -r amd-pstate-ut + echo "amd-pstate-basic: ok" + else + echo "amd-pstate-basic: [FAIL]" + exit 1 + fi +} diff --git a/tools/testing/selftests/amd-pstate/gitsource.sh b/tools/testing/selftests/amd-pstate/gitsource.sh new file mode 100755 index 000000000000..dbc1fe45599d --- /dev/null +++ b/tools/testing/selftests/amd-pstate/gitsource.sh @@ -0,0 +1,354 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 + +# Testing and monitor the cpu desire performance, frequency, load, +# power consumption and throughput etc. when this script trigger +# gitsource test. +# 1) Download and tar gitsource codes. +# 2) Run gitsource benchmark on specific governors, ondemand or schedutil. +# 3) Run tbench benchmark comparative test on acpi-cpufreq kernel driver. +# 4) Get desire performance, frequency, load by perf. +# 5) Get power consumption and throughput by amd_pstate_trace.py. +# 6) Get run time by /usr/bin/time. +# 7) Analyse test results and save it in file selftest.gitsource.csv. +#8) Plot png images about time, energy and performance per watt for each test. + +# protect against multiple inclusion +if [ $FILE_GITSOURCE ]; then + return 0 +else + FILE_GITSOURCE=DONE +fi + +git_name="git-2.15.1" +git_tar="$git_name.tar.gz" +gitsource_url="https://github.com/git/git/archive/refs/tags/v2.15.1.tar.gz" +gitsource_governors=("ondemand" "schedutil") + +# $1: governor, $2: round, $3: des-perf, $4: freq, $5: load, $6: time $7: energy, $8: PPW +store_csv_gitsource() +{ + echo "$1, $2, $3, $4, $5, $6, $7, $8" | tee -a $OUTFILE_GIT.csv > /dev/null 2>&1 +} + +# clear some special lines +clear_csv_gitsource() +{ + if [ -f $OUTFILE_GIT.csv ]; then + sed -i '/Comprison(%)/d' $OUTFILE_GIT.csv + sed -i "/$(scaling_name)/d" $OUTFILE_GIT.csv + fi +} + +# find string $1 in file csv and get the number of lines +get_lines_csv_gitsource() +{ + if [ -f $OUTFILE_GIT.csv ]; then + return `grep -c "$1" $OUTFILE_GIT.csv` + else + return 0 + fi +} + +pre_clear_gitsource() +{ + post_clear_gitsource + rm -rf gitsource_*.png + clear_csv_gitsource +} + +post_clear_gitsource() +{ + rm -rf results/tracer-gitsource* + rm -rf $OUTFILE_GIT*.log + rm -rf $OUTFILE_GIT*.result +} + +install_gitsource() +{ + if [ ! -d $git_name ]; then + printf "Download gitsource, please wait a moment ...\n\n" + wget -O $git_tar $gitsource_url > /dev/null 2>&1 + + printf "Tar gitsource ...\n\n" + tar -xzf $git_tar + fi +} + +# $1: governor, $2: loop +run_gitsource() +{ + echo "Launching amd pstate tracer for $1 #$2 tracer_interval: $TRACER_INTERVAL" + ./amd_pstate_trace.py -n tracer-gitsource-$1-$2 -i $TRACER_INTERVAL > /dev/null 2>&1 & + + printf "Make and test gitsource for $1 #$2 make_cpus: $MAKE_CPUS\n" + cd $git_name + perf stat -a --per-socket -I 1000 -e power/energy-pkg/ /usr/bin/time -o ../$OUTFILE_GIT.time-gitsource-$1-$2.log make test -j$MAKE_CPUS > ../$OUTFILE_GIT-perf-$1-$2.log 2>&1 + cd .. + + for job in `jobs -p` + do + echo "Waiting for job id $job" + wait $job + done +} + +# $1: governor, $2: loop +parse_gitsource() +{ + awk '{print $5}' results/tracer-gitsource-$1-$2/cpu.csv | sed -e '1d' | sed s/,// > $OUTFILE_GIT-des-perf-$1-$2.log + avg_des_perf=$(awk 'BEGIN {i=0; sum=0};{i++; sum += $1};END {print sum/i}' $OUTFILE_GIT-des-perf-$1-$2.log) + printf "Gitsource-$1-#$2 avg des perf: $avg_des_perf\n" | tee -a $OUTFILE_GIT.result + + awk '{print $7}' results/tracer-gitsource-$1-$2/cpu.csv | sed -e '1d' | sed s/,// > $OUTFILE_GIT-freq-$1-$2.log + avg_freq=$(awk 'BEGIN {i=0; sum=0};{i++; sum += $1};END {print sum/i}' $OUTFILE_GIT-freq-$1-$2.log) + printf "Gitsource-$1-#$2 avg freq: $avg_freq\n" | tee -a $OUTFILE_GIT.result + + awk '{print $11}' results/tracer-gitsource-$1-$2/cpu.csv | sed -e '1d' | sed s/,// > $OUTFILE_GIT-load-$1-$2.log + avg_load=$(awk 'BEGIN {i=0; sum=0};{i++; sum += $1};END {print sum/i}' $OUTFILE_GIT-load-$1-$2.log) + printf "Gitsource-$1-#$2 avg load: $avg_load\n" | tee -a $OUTFILE_GIT.result + + grep user $OUTFILE_GIT.time-gitsource-$1-$2.log | awk '{print $1}' | sed -e 's/user//' > $OUTFILE_GIT-time-$1-$2.log + time_sum=$(awk 'BEGIN {sum=0};{sum += $1};END {print sum}' $OUTFILE_GIT-time-$1-$2.log) + printf "Gitsource-$1-#$2 user time(s): $time_sum\n" | tee -a $OUTFILE_GIT.result + + grep Joules $OUTFILE_GIT-perf-$1-$2.log | awk '{print $4}' > $OUTFILE_GIT-energy-$1-$2.log + en_sum=$(awk 'BEGIN {sum=0};{sum += $1};END {print sum}' $OUTFILE_GIT-energy-$1-$2.log) + printf "Gitsource-$1-#$2 power consumption(J): $en_sum\n" | tee -a $OUTFILE_GIT.result + + # Permance is the number of run gitsource per second, denoted 1/t, where 1 is the number of run gitsource in t + # senconds. It is well known that P=E/t, where P is power measured in watts(W), E is energy measured in joules(J), + # and t is time measured in seconds(s). This means that performance per watt becomes + # 1/t 1/t 1 + # ----- = ----- = --- + # P E/t E + # with unit given by 1 per joule. + ppw=`echo "scale=9;1/$en_sum" | bc | awk '{printf "%.9f", $0}'` + printf "Gitsource-$1-#$2 performance per watt(1/J): $ppw\n" | tee -a $OUTFILE_GIT.result + printf "\n" | tee -a $OUTFILE_GIT.result + + driver_name=`echo $(scaling_name)` + store_csv_gitsource "$driver_name-$1" $2 $avg_des_perf $avg_freq $avg_load $time_sum $en_sum $ppw +} + +# $1: governor +loop_gitsource() +{ + printf "\nGitsource total test times is $LOOP_TIMES for $1\n\n" + for i in `seq 1 $LOOP_TIMES` + do + run_gitsource $1 $i + parse_gitsource $1 $i + done +} + +# $1: governor +gather_gitsource() +{ + printf "Gitsource test result for $1 (loops:$LOOP_TIMES)" | tee -a $OUTFILE_GIT.result + printf "\n--------------------------------------------------\n" | tee -a $OUTFILE_GIT.result + + grep "Gitsource-$1-#" $OUTFILE_GIT.result | grep "avg des perf:" | awk '{print $NF}' > $OUTFILE_GIT-des-perf-$1.log + avg_des_perf=$(awk 'BEGIN {sum=0};{sum += $1};END {print sum/'$LOOP_TIMES'}' $OUTFILE_GIT-des-perf-$1.log) + printf "Gitsource-$1 avg des perf: $avg_des_perf\n" | tee -a $OUTFILE_GIT.result + + grep "Gitsource-$1-#" $OUTFILE_GIT.result | grep "avg freq:" | awk '{print $NF}' > $OUTFILE_GIT-freq-$1.log + avg_freq=$(awk 'BEGIN {sum=0};{sum += $1};END {print sum/'$LOOP_TIMES'}' $OUTFILE_GIT-freq-$1.log) + printf "Gitsource-$1 avg freq: $avg_freq\n" | tee -a $OUTFILE_GIT.result + + grep "Gitsource-$1-#" $OUTFILE_GIT.result | grep "avg load:" | awk '{print $NF}' > $OUTFILE_GIT-load-$1.log + avg_load=$(awk 'BEGIN {sum=0};{sum += $1};END {print sum/'$LOOP_TIMES'}' $OUTFILE_GIT-load-$1.log) + printf "Gitsource-$1 avg load: $avg_load\n" | tee -a $OUTFILE_GIT.result + + grep "Gitsource-$1-#" $OUTFILE_GIT.result | grep "user time(s):" | awk '{print $NF}' > $OUTFILE_GIT-time-$1.log + time_sum=$(awk 'BEGIN {sum=0};{sum += $1};END {print sum}' $OUTFILE_GIT-time-$1.log) + printf "Gitsource-$1 total user time(s): $time_sum\n" | tee -a $OUTFILE_GIT.result + + avg_time=$(awk 'BEGIN {sum=0};{sum += $1};END {print sum/'$LOOP_TIMES'}' $OUTFILE_GIT-time-$1.log) + printf "Gitsource-$1 avg user times(s): $avg_time\n" | tee -a $OUTFILE_GIT.result + + grep "Gitsource-$1-#" $OUTFILE_GIT.result | grep "power consumption(J):" | awk '{print $NF}' > $OUTFILE_GIT-energy-$1.log + en_sum=$(awk 'BEGIN {sum=0};{sum += $1};END {print sum}' $OUTFILE_GIT-energy-$1.log) + printf "Gitsource-$1 total power consumption(J): $en_sum\n" | tee -a $OUTFILE_GIT.result + + avg_en=$(awk 'BEGIN {sum=0};{sum += $1};END {print sum/'$LOOP_TIMES'}' $OUTFILE_GIT-energy-$1.log) + printf "Gitsource-$1 avg power consumption(J): $avg_en\n" | tee -a $OUTFILE_GIT.result + + # Permance is the number of run gitsource per second, denoted 1/t, where 1 is the number of run gitsource in t + # senconds. It is well known that P=E/t, where P is power measured in watts(W), E is energy measured in joules(J), + # and t is time measured in seconds(s). This means that performance per watt becomes + # 1/t 1/t 1 + # ----- = ----- = --- + # P E/t E + # with unit given by 1 per joule. + ppw=`echo "scale=9;1/$avg_en" | bc | awk '{printf "%.9f", $0}'` + printf "Gitsource-$1 performance per watt(1/J): $ppw\n" | tee -a $OUTFILE_GIT.result + printf "\n" | tee -a $OUTFILE_GIT.result + + driver_name=`echo $(scaling_name)` + store_csv_gitsource "$driver_name-$1" "Average" $avg_des_perf $avg_freq $avg_load $avg_time $avg_en $ppw +} + +# $1: base scaling_driver $2: base governor $3: comparison scaling_driver $4: comparison governor +__calc_comp_gitsource() +{ + base=`grep "$1-$2" $OUTFILE_GIT.csv | grep "Average"` + comp=`grep "$3-$4" $OUTFILE_GIT.csv | grep "Average"` + + if [ -n "$base" -a -n "$comp" ]; then + printf "\n==================================================\n" | tee -a $OUTFILE_GIT.result + printf "Gitsource comparison $1-$2 VS $3-$4" | tee -a $OUTFILE_GIT.result + printf "\n==================================================\n" | tee -a $OUTFILE_GIT.result + + # get the base values + des_perf_base=`echo "$base" | awk '{print $3}' | sed s/,//` + freq_base=`echo "$base" | awk '{print $4}' | sed s/,//` + load_base=`echo "$base" | awk '{print $5}' | sed s/,//` + time_base=`echo "$base" | awk '{print $6}' | sed s/,//` + energy_base=`echo "$base" | awk '{print $7}' | sed s/,//` + ppw_base=`echo "$base" | awk '{print $8}' | sed s/,//` + + # get the comparison values + des_perf_comp=`echo "$comp" | awk '{print $3}' | sed s/,//` + freq_comp=`echo "$comp" | awk '{print $4}' | sed s/,//` + load_comp=`echo "$comp" | awk '{print $5}' | sed s/,//` + time_comp=`echo "$comp" | awk '{print $6}' | sed s/,//` + energy_comp=`echo "$comp" | awk '{print $7}' | sed s/,//` + ppw_comp=`echo "$comp" | awk '{print $8}' | sed s/,//` + + # compare the base and comp values + des_perf_drop=`echo "scale=4;($des_perf_comp-$des_perf_base)*100/$des_perf_base" | bc | awk '{printf "%.4f", $0}'` + printf "Gitsource-$1 des perf base: $des_perf_base comprison: $des_perf_comp percent: $des_perf_drop\n" | tee -a $OUTFILE_GIT.result + + freq_drop=`echo "scale=4;($freq_comp-$freq_base)*100/$freq_base" | bc | awk '{printf "%.4f", $0}'` + printf "Gitsource-$1 freq base: $freq_base comprison: $freq_comp percent: $freq_drop\n" | tee -a $OUTFILE_GIT.result + + load_drop=`echo "scale=4;($load_comp-$load_base)*100/$load_base" | bc | awk '{printf "%.4f", $0}'` + printf "Gitsource-$1 load base: $load_base comprison: $load_comp percent: $load_drop\n" | tee -a $OUTFILE_GIT.result + + time_drop=`echo "scale=4;($time_comp-$time_base)*100/$time_base" | bc | awk '{printf "%.4f", $0}'` + printf "Gitsource-$1 time base: $time_base comprison: $time_comp percent: $time_drop\n" | tee -a $OUTFILE_GIT.result + + energy_drop=`echo "scale=4;($energy_comp-$energy_base)*100/$energy_base" | bc | awk '{printf "%.4f", $0}'` + printf "Gitsource-$1 energy base: $energy_base comprison: $energy_comp percent: $energy_drop\n" | tee -a $OUTFILE_GIT.result + + ppw_drop=`echo "scale=4;($ppw_comp-$ppw_base)*100/$ppw_base" | bc | awk '{printf "%.4f", $0}'` + printf "Gitsource-$1 performance per watt base: $ppw_base comprison: $ppw_comp percent: $ppw_drop\n" | tee -a $OUTFILE_GIT.result + printf "\n" | tee -a $OUTFILE_GIT.result + + store_csv_gitsource "$1-$2 VS $3-$4" "Comprison(%)" "$des_perf_drop" "$freq_drop" "$load_drop" "$time_drop" "$energy_drop" "$ppw_drop" + fi +} + +# calculate the comparison(%) +calc_comp_gitsource() +{ + # acpi-cpufreq-ondemand VS acpi-cpufreq-schedutil + __calc_comp_gitsource ${all_scaling_names[0]} ${gitsource_governors[0]} ${all_scaling_names[0]} ${gitsource_governors[1]} + + # amd-pstate-ondemand VS amd-pstate-schedutil + __calc_comp_gitsource ${all_scaling_names[1]} ${gitsource_governors[0]} ${all_scaling_names[1]} ${gitsource_governors[1]} + + # acpi-cpufreq-ondemand VS amd-pstate-ondemand + __calc_comp_gitsource ${all_scaling_names[0]} ${gitsource_governors[0]} ${all_scaling_names[1]} ${gitsource_governors[0]} + + # acpi-cpufreq-schedutil VS amd-pstate-schedutil + __calc_comp_gitsource ${all_scaling_names[0]} ${gitsource_governors[1]} ${all_scaling_names[1]} ${gitsource_governors[1]} +} + +# $1: file_name, $2: title, $3: ylable, $4: column +plot_png_gitsource() +{ + # all_scaling_names[1] all_scaling_names[0] flag + # amd-pstate acpi-cpufreq + # N N 0 + # N Y 1 + # Y N 2 + # Y Y 3 + ret=`grep -c "${all_scaling_names[1]}" $OUTFILE_GIT.csv` + if [ $ret -eq 0 ]; then + ret=`grep -c "${all_scaling_names[0]}" $OUTFILE_GIT.csv` + if [ $ret -eq 0 ]; then + flag=0 + else + flag=1 + fi + else + ret=`grep -c "${all_scaling_names[0]}" $OUTFILE_GIT.csv` + if [ $ret -eq 0 ]; then + flag=2 + else + flag=3 + fi + fi + + gnuplot << EOF + set term png + set output "$1" + + set title "$2" + set xlabel "Test Cycles (round)" + set ylabel "$3" + + set grid + set style data histogram + set style fill solid 0.5 border + set boxwidth 0.8 + + if ($flag == 1) { + plot \ + "<(sed -n -e 's/,//g' -e '/${all_scaling_names[0]}-${gitsource_governors[0]}/p' $OUTFILE_GIT.csv)" using $4:xtic(2) title "${all_scaling_names[0]}-${gitsource_governors[0]}", \ + "<(sed -n -e 's/,//g' -e '/${all_scaling_names[0]}-${gitsource_governors[1]}/p' $OUTFILE_GIT.csv)" using $4:xtic(2) title "${all_scaling_names[0]}-${gitsource_governors[1]}" + } else { + if ($flag == 2) { + plot \ + "<(sed -n -e 's/,//g' -e '/${all_scaling_names[1]}-${gitsource_governors[0]}/p' $OUTFILE_GIT.csv)" using $4:xtic(2) title "${all_scaling_names[1]}-${gitsource_governors[0]}", \ + "<(sed -n -e 's/,//g' -e '/${all_scaling_names[1]}-${gitsource_governors[1]}/p' $OUTFILE_GIT.csv)" using $4:xtic(2) title "${all_scaling_names[1]}-${gitsource_governors[1]}" + } else { + if ($flag == 3 ) { + plot \ + "<(sed -n -e 's/,//g' -e '/${all_scaling_names[0]}-${gitsource_governors[0]}/p' $OUTFILE_GIT.csv)" using $4:xtic(2) title "${all_scaling_names[0]}-${gitsource_governors[0]}", \ + "<(sed -n -e 's/,//g' -e '/${all_scaling_names[0]}-${gitsource_governors[1]}/p' $OUTFILE_GIT.csv)" using $4:xtic(2) title "${all_scaling_names[0]}-${gitsource_governors[1]}", \ + "<(sed -n -e 's/,//g' -e '/${all_scaling_names[1]}-${gitsource_governors[0]}/p' $OUTFILE_GIT.csv)" using $4:xtic(2) title "${all_scaling_names[1]}-${gitsource_governors[0]}", \ + "<(sed -n -e 's/,//g' -e '/${all_scaling_names[1]}-${gitsource_governors[1]}/p' $OUTFILE_GIT.csv)" using $4:xtic(2) title "${all_scaling_names[1]}-${gitsource_governors[1]}" + } + } + } + quit +EOF +} + +amd_pstate_gitsource() +{ + printf "\n---------------------------------------------\n" + printf "*** Running gitsource ***" + printf "\n---------------------------------------------\n" + + pre_clear_gitsource + + install_gitsource + + get_lines_csv_gitsource "Governor" + if [ $? -eq 0 ]; then + # add titles and unit for csv file + store_csv_gitsource "Governor" "Round" "Des-perf" "Freq" "Load" "Time" "Energy" "Performance Per Watt" + store_csv_gitsource "Unit" "" "" "GHz" "" "s" "J" "1/J" + fi + + backup_governor + for governor in ${gitsource_governors[*]} ; do + printf "\nSpecified governor is $governor\n\n" + switch_governor $governor + loop_gitsource $governor + gather_gitsource $governor + done + restore_governor + + plot_png_gitsource "gitsource_time.png" "Gitsource Benchmark Time" "Time (s)" 6 + plot_png_gitsource "gitsource_energy.png" "Gitsource Benchmark Energy" "Energy (J)" 7 + plot_png_gitsource "gitsource_ppw.png" "Gitsource Benchmark Performance Per Watt" "Performance Per Watt (1/J)" 8 + + calc_comp_gitsource + + post_clear_gitsource +} diff --git a/tools/testing/selftests/amd-pstate/run.sh b/tools/testing/selftests/amd-pstate/run.sh new file mode 100755 index 000000000000..57cad57e59c0 --- /dev/null +++ b/tools/testing/selftests/amd-pstate/run.sh @@ -0,0 +1,387 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# protect against multiple inclusion +if [ $FILE_MAIN ]; then + return 0 +else + FILE_MAIN=DONE +fi + +source basic.sh +source tbench.sh +source gitsource.sh + +# amd-pstate-ut only run on x86/x86_64 AMD systems. +ARCH=$(uname -m 2>/dev/null | sed -e 's/i.86/x86/' -e 's/x86_64/x86/') +VENDOR=$(cat /proc/cpuinfo | grep -m 1 'vendor_id' | awk '{print $NF}') + +msg="Skip all tests:" +FUNC=all +OUTFILE=selftest +OUTFILE_TBENCH="$OUTFILE.tbench" +OUTFILE_GIT="$OUTFILE.gitsource" + +SYSFS= +CPUROOT= +CPUFREQROOT= +MAKE_CPUS= + +TIME_LIMIT=100 +PROCESS_NUM=128 +LOOP_TIMES=3 +TRACER_INTERVAL=10 +CURRENT_TEST=amd-pstate +COMPARATIVE_TEST= + +# Kselftest framework requirement - SKIP code is 4. +ksft_skip=4 +all_scaling_names=("acpi-cpufreq" "amd-pstate") + +# Get current cpufreq scaling driver name +scaling_name() +{ + if [ "$COMPARATIVE_TEST" = "" ]; then + echo "$CURRENT_TEST" + else + echo "$COMPARATIVE_TEST" + fi +} + +# Counts CPUs with cpufreq directories +count_cpus() +{ + count=0; + + for cpu in `ls $CPUROOT | grep "cpu[0-9].*"`; do + if [ -d $CPUROOT/$cpu/cpufreq ]; then + let count=count+1; + fi + done + + echo $count; +} + +# $1: policy +find_current_governor() +{ + cat $CPUFREQROOT/$1/scaling_governor +} + +backup_governor() +{ + policies=$(ls $CPUFREQROOT| grep "policy[0-9].*") + for policy in $policies; do + cur_gov=$(find_current_governor $policy) + echo "$policy $cur_gov" >> $OUTFILE.backup_governor.log + done + + printf "Governor $cur_gov backup done.\n" +} + +restore_governor() +{ + i=0; + + policies=$(awk '{print $1}' $OUTFILE.backup_governor.log) + for policy in $policies; do + let i++; + governor=$(sed -n ''$i'p' $OUTFILE.backup_governor.log | awk '{print $2}') + + # switch governor + echo $governor > $CPUFREQROOT/$policy/scaling_governor + done + + printf "Governor restored to $governor.\n" +} + +# $1: governor +switch_governor() +{ + policies=$(ls $CPUFREQROOT| grep "policy[0-9].*") + for policy in $policies; do + filepath=$CPUFREQROOT/$policy/scaling_available_governors + + # Exit if cpu isn't managed by cpufreq core + if [ ! -f $filepath ]; then + return; + fi + + echo $1 > $CPUFREQROOT/$policy/scaling_governor + done + + printf "Switched governor to $1.\n" +} + +# All amd-pstate tests +amd_pstate_all() +{ + printf "\n=============================================\n" + printf "***** Running AMD P-state Sanity Tests *****\n" + printf "=============================================\n\n" + + count=$(count_cpus) + if [ $count = 0 ]; then + printf "No cpu is managed by cpufreq core, exiting\n" + exit; + else + printf "AMD P-state manages: $count CPUs\n" + fi + + # unit test for amd-pstate kernel driver + amd_pstate_basic + + # tbench + amd_pstate_tbench + + # gitsource + amd_pstate_gitsource +} + +help() +{ + printf "Usage: $0 [OPTION...] + [-h <help>] + [-o <output-file-for-dump>] + [-c <all: All testing, + basic: Basic testing, + tbench: Tbench testing, + gitsource: Gitsource testing.>] + [-t <tbench time limit>] + [-p <tbench process number>] + [-l <loop times for tbench>] + [-i <amd tracer interval>] + [-m <comparative test: acpi-cpufreq>] + \n" + exit 2 +} + +parse_arguments() +{ + while getopts ho:c:t:p:l:i:m: arg + do + case $arg in + h) # --help + help + ;; + + c) # --func_type (Function to perform: basic, tbench, gitsource (default: all)) + FUNC=$OPTARG + ;; + + o) # --output-file (Output file to store dumps) + OUTFILE=$OPTARG + ;; + + t) # --tbench-time-limit + TIME_LIMIT=$OPTARG + ;; + + p) # --tbench-process-number + PROCESS_NUM=$OPTARG + ;; + + l) # --tbench/gitsource-loop-times + LOOP_TIMES=$OPTARG + ;; + + i) # --amd-tracer-interval + TRACER_INTERVAL=$OPTARG + ;; + + m) # --comparative-test + COMPARATIVE_TEST=$OPTARG + ;; + + *) + help + ;; + esac + done +} + +command_perf() +{ + if ! command -v perf > /dev/null; then + echo $msg please install perf. >&2 + exit $ksft_skip + fi +} + +command_tbench() +{ + if ! command -v tbench > /dev/null; then + if apt policy dbench > /dev/null 2>&1; then + echo $msg apt install dbench >&2 + exit $ksft_skip + elif yum list available | grep dbench > /dev/null 2>&1; then + echo $msg yum install dbench >&2 + exit $ksft_skip + fi + fi + + if ! command -v tbench > /dev/null; then + echo $msg please install tbench. >&2 + exit $ksft_skip + fi +} + +prerequisite() +{ + if ! echo "$ARCH" | grep -q x86; then + echo "$0 # Skipped: Test can only run on x86 architectures." + exit $ksft_skip + fi + + if ! echo "$VENDOR" | grep -iq amd; then + echo "$0 # Skipped: Test can only run on AMD CPU." + echo "$0 # Current cpu vendor is $VENDOR." + exit $ksft_skip + fi + + scaling_driver=$(cat /sys/devices/system/cpu/cpufreq/policy0/scaling_driver) + if [ "$COMPARATIVE_TEST" = "" ]; then + if [ "$scaling_driver" != "$CURRENT_TEST" ]; then + echo "$0 # Skipped: Test can only run on $CURRENT_TEST driver or run comparative test." + echo "$0 # Please set X86_AMD_PSTATE enabled or run comparative test." + echo "$0 # Current cpufreq scaling drvier is $scaling_driver." + exit $ksft_skip + fi + else + case "$FUNC" in + "tbench" | "gitsource") + if [ "$scaling_driver" != "$COMPARATIVE_TEST" ]; then + echo "$0 # Skipped: Comparison test can only run on $COMPARISON_TEST driver." + echo "$0 # Current cpufreq scaling drvier is $scaling_driver." + exit $ksft_skip + fi + ;; + + *) + echo "$0 # Skipped: Comparison test are only for tbench or gitsource." + echo "$0 # Current comparative test is for $FUNC." + exit $ksft_skip + ;; + esac + fi + + if [ ! -w /dev ]; then + echo $msg please run this as root >&2 + exit $ksft_skip + fi + + case "$FUNC" in + "all") + command_perf + command_tbench + ;; + + "tbench") + command_perf + command_tbench + ;; + + "gitsource") + command_perf + ;; + esac + + SYSFS=`mount -t sysfs | head -1 | awk '{ print $3 }'` + + if [ ! -d "$SYSFS" ]; then + echo $msg sysfs is not mounted >&2 + exit 2 + fi + + CPUROOT=$SYSFS/devices/system/cpu + CPUFREQROOT="$CPUROOT/cpufreq" + + if ! ls $CPUROOT/cpu* > /dev/null 2>&1; then + echo $msg cpus not available in sysfs >&2 + exit 2 + fi + + if ! ls $CPUROOT/cpufreq > /dev/null 2>&1; then + echo $msg cpufreq directory not available in sysfs >&2 + exit 2 + fi +} + +do_test() +{ + # Check if CPUs are managed by cpufreq or not + count=$(count_cpus) + MAKE_CPUS=$((count*2)) + + if [ $count = 0 ]; then + echo "No cpu is managed by cpufreq core, exiting" + exit 2; + fi + + case "$FUNC" in + "all") + amd_pstate_all + ;; + + "basic") + amd_pstate_basic + ;; + + "tbench") + amd_pstate_tbench + ;; + + "gitsource") + amd_pstate_gitsource + ;; + + *) + echo "Invalid [-f] function type" + help + ;; + esac +} + +# clear dumps +pre_clear_dumps() +{ + case "$FUNC" in + "all") + rm -rf $OUTFILE.log + rm -rf $OUTFILE.backup_governor.log + rm -rf *.png + ;; + + "tbench") + rm -rf $OUTFILE.log + rm -rf $OUTFILE.backup_governor.log + rm -rf tbench_*.png + ;; + + "gitsource") + rm -rf $OUTFILE.log + rm -rf $OUTFILE.backup_governor.log + rm -rf gitsource_*.png + ;; + + *) + ;; + esac +} + +post_clear_dumps() +{ + rm -rf $OUTFILE.log + rm -rf $OUTFILE.backup_governor.log +} + +# Parse arguments +parse_arguments $@ + +# Make sure all requirements are met +prerequisite + +# Run requested functions +pre_clear_dumps +do_test | tee -a $OUTFILE.log +post_clear_dumps diff --git a/tools/testing/selftests/amd-pstate/tbench.sh b/tools/testing/selftests/amd-pstate/tbench.sh new file mode 100755 index 000000000000..49c9850341f6 --- /dev/null +++ b/tools/testing/selftests/amd-pstate/tbench.sh @@ -0,0 +1,339 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 + +# Testing and monitor the cpu desire performance, frequency, load, +# power consumption and throughput etc.when this script trigger tbench +# test cases. +# 1) Run tbench benchmark on specific governors, ondemand or schedutil. +# 2) Run tbench benchmark comparative test on acpi-cpufreq kernel driver. +# 3) Get desire performance, frequency, load by perf. +# 4) Get power consumption and throughput by amd_pstate_trace.py. +# 5) Analyse test results and save it in file selftest.tbench.csv. +# 6) Plot png images about performance, energy and performance per watt for each test. + +# protect against multiple inclusion +if [ $FILE_TBENCH ]; then + return 0 +else + FILE_TBENCH=DONE +fi + +tbench_governors=("ondemand" "schedutil") + +# $1: governor, $2: round, $3: des-perf, $4: freq, $5: load, $6: performance, $7: energy, $8: performance per watt +store_csv_tbench() +{ + echo "$1, $2, $3, $4, $5, $6, $7, $8" | tee -a $OUTFILE_TBENCH.csv > /dev/null 2>&1 +} + +# clear some special lines +clear_csv_tbench() +{ + if [ -f $OUTFILE_TBENCH.csv ]; then + sed -i '/Comprison(%)/d' $OUTFILE_TBENCH.csv + sed -i "/$(scaling_name)/d" $OUTFILE_TBENCH.csv + fi +} + +# find string $1 in file csv and get the number of lines +get_lines_csv_tbench() +{ + if [ -f $OUTFILE_TBENCH.csv ]; then + return `grep -c "$1" $OUTFILE_TBENCH.csv` + else + return 0 + fi +} + +pre_clear_tbench() +{ + post_clear_tbench + rm -rf tbench_*.png + clear_csv_tbench +} + +post_clear_tbench() +{ + rm -rf results/tracer-tbench* + rm -rf $OUTFILE_TBENCH*.log + rm -rf $OUTFILE_TBENCH*.result + +} + +# $1: governor, $2: loop +run_tbench() +{ + echo "Launching amd pstate tracer for $1 #$2 tracer_interval: $TRACER_INTERVAL" + ./amd_pstate_trace.py -n tracer-tbench-$1-$2 -i $TRACER_INTERVAL > /dev/null 2>&1 & + + printf "Test tbench for $1 #$2 time_limit: $TIME_LIMIT procs_num: $PROCESS_NUM\n" + tbench_srv > /dev/null 2>&1 & + perf stat -a --per-socket -I 1000 -e power/energy-pkg/ tbench -t $TIME_LIMIT $PROCESS_NUM > $OUTFILE_TBENCH-perf-$1-$2.log 2>&1 + + pid=`pidof tbench_srv` + kill $pid + + for job in `jobs -p` + do + echo "Waiting for job id $job" + wait $job + done +} + +# $1: governor, $2: loop +parse_tbench() +{ + awk '{print $5}' results/tracer-tbench-$1-$2/cpu.csv | sed -e '1d' | sed s/,// > $OUTFILE_TBENCH-des-perf-$1-$2.log + avg_des_perf=$(awk 'BEGIN {i=0; sum=0};{i++; sum += $1};END {print sum/i}' $OUTFILE_TBENCH-des-perf-$1-$2.log) + printf "Tbench-$1-#$2 avg des perf: $avg_des_perf\n" | tee -a $OUTFILE_TBENCH.result + + awk '{print $7}' results/tracer-tbench-$1-$2/cpu.csv | sed -e '1d' | sed s/,// > $OUTFILE_TBENCH-freq-$1-$2.log + avg_freq=$(awk 'BEGIN {i=0; sum=0};{i++; sum += $1};END {print sum/i}' $OUTFILE_TBENCH-freq-$1-$2.log) + printf "Tbench-$1-#$2 avg freq: $avg_freq\n" | tee -a $OUTFILE_TBENCH.result + + awk '{print $11}' results/tracer-tbench-$1-$2/cpu.csv | sed -e '1d' | sed s/,// > $OUTFILE_TBENCH-load-$1-$2.log + avg_load=$(awk 'BEGIN {i=0; sum=0};{i++; sum += $1};END {print sum/i}' $OUTFILE_TBENCH-load-$1-$2.log) + printf "Tbench-$1-#$2 avg load: $avg_load\n" | tee -a $OUTFILE_TBENCH.result + + grep Throughput $OUTFILE_TBENCH-perf-$1-$2.log | awk '{print $2}' > $OUTFILE_TBENCH-throughput-$1-$2.log + tp_sum=$(awk 'BEGIN {sum=0};{sum += $1};END {print sum}' $OUTFILE_TBENCH-throughput-$1-$2.log) + printf "Tbench-$1-#$2 throughput(MB/s): $tp_sum\n" | tee -a $OUTFILE_TBENCH.result + + grep Joules $OUTFILE_TBENCH-perf-$1-$2.log | awk '{print $4}' > $OUTFILE_TBENCH-energy-$1-$2.log + en_sum=$(awk 'BEGIN {sum=0};{sum += $1};END {print sum}' $OUTFILE_TBENCH-energy-$1-$2.log) + printf "Tbench-$1-#$2 power consumption(J): $en_sum\n" | tee -a $OUTFILE_TBENCH.result + + # Permance is throughput per second, denoted T/t, where T is throught rendered in t seconds. + # It is well known that P=E/t, where P is power measured in watts(W), E is energy measured in joules(J), + # and t is time measured in seconds(s). This means that performance per watt becomes + # T/t T/t T + # --- = --- = --- + # P E/t E + # with unit given by MB per joule. + ppw=`echo "scale=4;($TIME_LIMIT-1)*$tp_sum/$en_sum" | bc | awk '{printf "%.4f", $0}'` + printf "Tbench-$1-#$2 performance per watt(MB/J): $ppw\n" | tee -a $OUTFILE_TBENCH.result + printf "\n" | tee -a $OUTFILE_TBENCH.result + + driver_name=`echo $(scaling_name)` + store_csv_tbench "$driver_name-$1" $2 $avg_des_perf $avg_freq $avg_load $tp_sum $en_sum $ppw +} + +# $1: governor +loop_tbench() +{ + printf "\nTbench total test times is $LOOP_TIMES for $1\n\n" + for i in `seq 1 $LOOP_TIMES` + do + run_tbench $1 $i + parse_tbench $1 $i + done +} + +# $1: governor +gather_tbench() +{ + printf "Tbench test result for $1 (loops:$LOOP_TIMES)" | tee -a $OUTFILE_TBENCH.result + printf "\n--------------------------------------------------\n" | tee -a $OUTFILE_TBENCH.result + + grep "Tbench-$1-#" $OUTFILE_TBENCH.result | grep "avg des perf:" | awk '{print $NF}' > $OUTFILE_TBENCH-des-perf-$1.log + avg_des_perf=$(awk 'BEGIN {sum=0};{sum += $1};END {print sum/'$LOOP_TIMES'}' $OUTFILE_TBENCH-des-perf-$1.log) + printf "Tbench-$1 avg des perf: $avg_des_perf\n" | tee -a $OUTFILE_TBENCH.result + + grep "Tbench-$1-#" $OUTFILE_TBENCH.result | grep "avg freq:" | awk '{print $NF}' > $OUTFILE_TBENCH-freq-$1.log + avg_freq=$(awk 'BEGIN {sum=0};{sum += $1};END {print sum/'$LOOP_TIMES'}' $OUTFILE_TBENCH-freq-$1.log) + printf "Tbench-$1 avg freq: $avg_freq\n" | tee -a $OUTFILE_TBENCH.result + + grep "Tbench-$1-#" $OUTFILE_TBENCH.result | grep "avg load:" | awk '{print $NF}' > $OUTFILE_TBENCH-load-$1.log + avg_load=$(awk 'BEGIN {sum=0};{sum += $1};END {print sum/'$LOOP_TIMES'}' $OUTFILE_TBENCH-load-$1.log) + printf "Tbench-$1 avg load: $avg_load\n" | tee -a $OUTFILE_TBENCH.result + + grep "Tbench-$1-#" $OUTFILE_TBENCH.result | grep "throughput(MB/s):" | awk '{print $NF}' > $OUTFILE_TBENCH-throughput-$1.log + tp_sum=$(awk 'BEGIN {sum=0};{sum += $1};END {print sum}' $OUTFILE_TBENCH-throughput-$1.log) + printf "Tbench-$1 total throughput(MB/s): $tp_sum\n" | tee -a $OUTFILE_TBENCH.result + + avg_tp=$(awk 'BEGIN {sum=0};{sum += $1};END {print sum/'$LOOP_TIMES'}' $OUTFILE_TBENCH-throughput-$1.log) + printf "Tbench-$1 avg throughput(MB/s): $avg_tp\n" | tee -a $OUTFILE_TBENCH.result + + grep "Tbench-$1-#" $OUTFILE_TBENCH.result | grep "power consumption(J):" | awk '{print $NF}' > $OUTFILE_TBENCH-energy-$1.log + en_sum=$(awk 'BEGIN {sum=0};{sum += $1};END {print sum}' $OUTFILE_TBENCH-energy-$1.log) + printf "Tbench-$1 total power consumption(J): $en_sum\n" | tee -a $OUTFILE_TBENCH.result + + avg_en=$(awk 'BEGIN {sum=0};{sum += $1};END {print sum/'$LOOP_TIMES'}' $OUTFILE_TBENCH-energy-$1.log) + printf "Tbench-$1 avg power consumption(J): $avg_en\n" | tee -a $OUTFILE_TBENCH.result + + # Permance is throughput per second, denoted T/t, where T is throught rendered in t seconds. + # It is well known that P=E/t, where P is power measured in watts(W), E is energy measured in joules(J), + # and t is time measured in seconds(s). This means that performance per watt becomes + # T/t T/t T + # --- = --- = --- + # P E/t E + # with unit given by MB per joule. + ppw=`echo "scale=4;($TIME_LIMIT-1)*$avg_tp/$avg_en" | bc | awk '{printf "%.4f", $0}'` + printf "Tbench-$1 performance per watt(MB/J): $ppw\n" | tee -a $OUTFILE_TBENCH.result + printf "\n" | tee -a $OUTFILE_TBENCH.result + + driver_name=`echo $(scaling_name)` + store_csv_tbench "$driver_name-$1" "Average" $avg_des_perf $avg_freq $avg_load $avg_tp $avg_en $ppw +} + +# $1: base scaling_driver $2: base governor $3: comparative scaling_driver $4: comparative governor +__calc_comp_tbench() +{ + base=`grep "$1-$2" $OUTFILE_TBENCH.csv | grep "Average"` + comp=`grep "$3-$4" $OUTFILE_TBENCH.csv | grep "Average"` + + if [ -n "$base" -a -n "$comp" ]; then + printf "\n==================================================\n" | tee -a $OUTFILE_TBENCH.result + printf "Tbench comparison $1-$2 VS $3-$4" | tee -a $OUTFILE_TBENCH.result + printf "\n==================================================\n" | tee -a $OUTFILE_TBENCH.result + + # get the base values + des_perf_base=`echo "$base" | awk '{print $3}' | sed s/,//` + freq_base=`echo "$base" | awk '{print $4}' | sed s/,//` + load_base=`echo "$base" | awk '{print $5}' | sed s/,//` + perf_base=`echo "$base" | awk '{print $6}' | sed s/,//` + energy_base=`echo "$base" | awk '{print $7}' | sed s/,//` + ppw_base=`echo "$base" | awk '{print $8}' | sed s/,//` + + # get the comparative values + des_perf_comp=`echo "$comp" | awk '{print $3}' | sed s/,//` + freq_comp=`echo "$comp" | awk '{print $4}' | sed s/,//` + load_comp=`echo "$comp" | awk '{print $5}' | sed s/,//` + perf_comp=`echo "$comp" | awk '{print $6}' | sed s/,//` + energy_comp=`echo "$comp" | awk '{print $7}' | sed s/,//` + ppw_comp=`echo "$comp" | awk '{print $8}' | sed s/,//` + + # compare the base and comp values + des_perf_drop=`echo "scale=4;($des_perf_comp-$des_perf_base)*100/$des_perf_base" | bc | awk '{printf "%.4f", $0}'` + printf "Tbench-$1 des perf base: $des_perf_base comprison: $des_perf_comp percent: $des_perf_drop\n" | tee -a $OUTFILE_TBENCH.result + + freq_drop=`echo "scale=4;($freq_comp-$freq_base)*100/$freq_base" | bc | awk '{printf "%.4f", $0}'` + printf "Tbench-$1 freq base: $freq_base comprison: $freq_comp percent: $freq_drop\n" | tee -a $OUTFILE_TBENCH.result + + load_drop=`echo "scale=4;($load_comp-$load_base)*100/$load_base" | bc | awk '{printf "%.4f", $0}'` + printf "Tbench-$1 load base: $load_base comprison: $load_comp percent: $load_drop\n" | tee -a $OUTFILE_TBENCH.result + + perf_drop=`echo "scale=4;($perf_comp-$perf_base)*100/$perf_base" | bc | awk '{printf "%.4f", $0}'` + printf "Tbench-$1 perf base: $perf_base comprison: $perf_comp percent: $perf_drop\n" | tee -a $OUTFILE_TBENCH.result + + energy_drop=`echo "scale=4;($energy_comp-$energy_base)*100/$energy_base" | bc | awk '{printf "%.4f", $0}'` + printf "Tbench-$1 energy base: $energy_base comprison: $energy_comp percent: $energy_drop\n" | tee -a $OUTFILE_TBENCH.result + + ppw_drop=`echo "scale=4;($ppw_comp-$ppw_base)*100/$ppw_base" | bc | awk '{printf "%.4f", $0}'` + printf "Tbench-$1 performance per watt base: $ppw_base comprison: $ppw_comp percent: $ppw_drop\n" | tee -a $OUTFILE_TBENCH.result + printf "\n" | tee -a $OUTFILE_TBENCH.result + + store_csv_tbench "$1-$2 VS $3-$4" "Comprison(%)" "$des_perf_drop" "$freq_drop" "$load_drop" "$perf_drop" "$energy_drop" "$ppw_drop" + fi +} + +# calculate the comparison(%) +calc_comp_tbench() +{ + # acpi-cpufreq-ondemand VS acpi-cpufreq-schedutil + __calc_comp_tbench ${all_scaling_names[0]} ${tbench_governors[0]} ${all_scaling_names[0]} ${tbench_governors[1]} + + # amd-pstate-ondemand VS amd-pstate-schedutil + __calc_comp_tbench ${all_scaling_names[1]} ${tbench_governors[0]} ${all_scaling_names[1]} ${tbench_governors[1]} + + # acpi-cpufreq-ondemand VS amd-pstate-ondemand + __calc_comp_tbench ${all_scaling_names[0]} ${tbench_governors[0]} ${all_scaling_names[1]} ${tbench_governors[0]} + + # acpi-cpufreq-schedutil VS amd-pstate-schedutil + __calc_comp_tbench ${all_scaling_names[0]} ${tbench_governors[1]} ${all_scaling_names[1]} ${tbench_governors[1]} +} + +# $1: file_name, $2: title, $3: ylable, $4: column +plot_png_tbench() +{ + # all_scaling_names[1] all_scaling_names[0] flag + # amd-pstate acpi-cpufreq + # N N 0 + # N Y 1 + # Y N 2 + # Y Y 3 + ret=`grep -c "${all_scaling_names[1]}" $OUTFILE_TBENCH.csv` + if [ $ret -eq 0 ]; then + ret=`grep -c "${all_scaling_names[0]}" $OUTFILE_TBENCH.csv` + if [ $ret -eq 0 ]; then + flag=0 + else + flag=1 + fi + else + ret=`grep -c "${all_scaling_names[0]}" $OUTFILE_TBENCH.csv` + if [ $ret -eq 0 ]; then + flag=2 + else + flag=3 + fi + fi + + gnuplot << EOF + set term png + set output "$1" + + set title "$2" + set xlabel "Test Cycles (round)" + set ylabel "$3" + + set grid + set style data histogram + set style fill solid 0.5 border + set boxwidth 0.8 + + if ($flag == 1) { + plot \ + "<(sed -n -e 's/,//g' -e '/${all_scaling_names[0]}-${tbench_governors[0]}/p' $OUTFILE_TBENCH.csv)" using $4:xtic(2) title "${all_scaling_names[0]}-${tbench_governors[0]}", \ + "<(sed -n -e 's/,//g' -e '/${all_scaling_names[0]}-${tbench_governors[1]}/p' $OUTFILE_TBENCH.csv)" using $4:xtic(2) title "${all_scaling_names[0]}-${tbench_governors[1]}" + } else { + if ($flag == 2) { + plot \ + "<(sed -n -e 's/,//g' -e '/${all_scaling_names[1]}-${tbench_governors[0]}/p' $OUTFILE_TBENCH.csv)" using $4:xtic(2) title "${all_scaling_names[1]}-${tbench_governors[0]}", \ + "<(sed -n -e 's/,//g' -e '/${all_scaling_names[1]}-${tbench_governors[1]}/p' $OUTFILE_TBENCH.csv)" using $4:xtic(2) title "${all_scaling_names[1]}-${tbench_governors[1]}" + } else { + if ($flag == 3 ) { + plot \ + "<(sed -n -e 's/,//g' -e '/${all_scaling_names[0]}-${tbench_governors[0]}/p' $OUTFILE_TBENCH.csv)" using $4:xtic(2) title "${all_scaling_names[0]}-${tbench_governors[0]}", \ + "<(sed -n -e 's/,//g' -e '/${all_scaling_names[0]}-${tbench_governors[1]}/p' $OUTFILE_TBENCH.csv)" using $4:xtic(2) title "${all_scaling_names[0]}-${tbench_governors[1]}", \ + "<(sed -n -e 's/,//g' -e '/${all_scaling_names[1]}-${tbench_governors[0]}/p' $OUTFILE_TBENCH.csv)" using $4:xtic(2) title "${all_scaling_names[1]}-${tbench_governors[0]}", \ + "<(sed -n -e 's/,//g' -e '/${all_scaling_names[1]}-${tbench_governors[1]}/p' $OUTFILE_TBENCH.csv)" using $4:xtic(2) title "${all_scaling_names[1]}-${tbench_governors[1]}" + } + } + } + quit +EOF +} + +amd_pstate_tbench() +{ + printf "\n---------------------------------------------\n" + printf "*** Running tbench ***" + printf "\n---------------------------------------------\n" + + pre_clear_tbench + + get_lines_csv_tbench "Governor" + if [ $? -eq 0 ]; then + # add titles and unit for csv file + store_csv_tbench "Governor" "Round" "Des-perf" "Freq" "Load" "Performance" "Energy" "Performance Per Watt" + store_csv_tbench "Unit" "" "" "GHz" "" "MB/s" "J" "MB/J" + fi + + backup_governor + for governor in ${tbench_governors[*]} ; do + printf "\nSpecified governor is $governor\n\n" + switch_governor $governor + loop_tbench $governor + gather_tbench $governor + done + restore_governor + + plot_png_tbench "tbench_perfromance.png" "Tbench Benchmark Performance" "Performance" 6 + plot_png_tbench "tbench_energy.png" "Tbench Benchmark Energy" "Energy (J)" 7 + plot_png_tbench "tbench_ppw.png" "Tbench Benchmark Performance Per Watt" "Performance Per Watt (MB/J)" 8 + + calc_comp_tbench + + post_clear_tbench +} diff --git a/tools/testing/selftests/arm64/abi/hwcap.c b/tools/testing/selftests/arm64/abi/hwcap.c index 9f1a7b5c6193..93333a90bf3a 100644 --- a/tools/testing/selftests/arm64/abi/hwcap.c +++ b/tools/testing/selftests/arm64/abi/hwcap.c @@ -33,6 +33,12 @@ */ typedef void (*sigill_fn)(void); +static void cssc_sigill(void) +{ + /* CNT x0, x0 */ + asm volatile(".inst 0xdac01c00" : : : "x0"); +} + static void rng_sigill(void) { asm volatile("mrs x0, S3_3_C2_C4_0" : : : "x0"); @@ -44,6 +50,78 @@ static void sme_sigill(void) asm volatile(".inst 0x04bf5800" : : : "x0"); } +static void sme2_sigill(void) +{ + /* SMSTART ZA */ + asm volatile("msr S0_3_C4_C5_3, xzr" : : : ); + + /* ZERO ZT0 */ + asm volatile(".inst 0xc0480001" : : : ); + + /* SMSTOP */ + asm volatile("msr S0_3_C4_C6_3, xzr" : : : ); +} + +static void sme2p1_sigill(void) +{ + /* SMSTART SM */ + asm volatile("msr S0_3_C4_C3_3, xzr" : : : ); + + /* BFCLAMP { Z0.H - Z1.H }, Z0.H, Z0.H */ + asm volatile(".inst 0xc120C000" : : : ); + + /* SMSTOP */ + asm volatile("msr S0_3_C4_C6_3, xzr" : : : ); +} + +static void smei16i32_sigill(void) +{ + /* SMSTART */ + asm volatile("msr S0_3_C4_C7_3, xzr" : : : ); + + /* SMOPA ZA0.S, P0/M, P0/M, Z0.B, Z0.B */ + asm volatile(".inst 0xa0800000" : : : ); + + /* SMSTOP */ + asm volatile("msr S0_3_C4_C6_3, xzr" : : : ); +} + +static void smebi32i32_sigill(void) +{ + /* SMSTART */ + asm volatile("msr S0_3_C4_C7_3, xzr" : : : ); + + /* BMOPA ZA0.S, P0/M, P0/M, Z0.B, Z0.B */ + asm volatile(".inst 0x80800008" : : : ); + + /* SMSTOP */ + asm volatile("msr S0_3_C4_C6_3, xzr" : : : ); +} + +static void smeb16b16_sigill(void) +{ + /* SMSTART */ + asm volatile("msr S0_3_C4_C7_3, xzr" : : : ); + + /* BFADD ZA.H[W0, 0], {Z0.H-Z1.H} */ + asm volatile(".inst 0xC1E41C00" : : : ); + + /* SMSTOP */ + asm volatile("msr S0_3_C4_C6_3, xzr" : : : ); +} + +static void smef16f16_sigill(void) +{ + /* SMSTART */ + asm volatile("msr S0_3_C4_C7_3, xzr" : : : ); + + /* FADD ZA.H[W0, 0], { Z0.H-Z1.H } */ + asm volatile(".inst 0xc1a41C00" : : : ); + + /* SMSTOP */ + asm volatile("msr S0_3_C4_C6_3, xzr" : : : ); +} + static void sve_sigill(void) { /* RDVL x0, #0 */ @@ -56,6 +134,12 @@ static void sve2_sigill(void) asm volatile(".inst 0x4408A000" : : : "z0"); } +static void sve2p1_sigill(void) +{ + /* BFADD Z0.H, Z0.H, Z0.H */ + asm volatile(".inst 0x65000000" : : : "z0"); +} + static void sveaes_sigill(void) { /* AESD z0.b, z0.b, z0.b */ @@ -119,6 +203,13 @@ static const struct hwcap_data { bool sigill_reliable; } hwcaps[] = { { + .name = "CSSC", + .at_hwcap = AT_HWCAP2, + .hwcap_bit = HWCAP2_CSSC, + .cpuinfo = "cssc", + .sigill_fn = cssc_sigill, + }, + { .name = "RNG", .at_hwcap = AT_HWCAP2, .hwcap_bit = HWCAP2_RNG, @@ -126,6 +217,12 @@ static const struct hwcap_data { .sigill_fn = rng_sigill, }, { + .name = "RPRFM", + .at_hwcap = AT_HWCAP2, + .hwcap_bit = HWCAP2_RPRFM, + .cpuinfo = "rprfm", + }, + { .name = "SME", .at_hwcap = AT_HWCAP2, .hwcap_bit = HWCAP2_SME, @@ -134,6 +231,49 @@ static const struct hwcap_data { .sigill_reliable = true, }, { + .name = "SME2", + .at_hwcap = AT_HWCAP2, + .hwcap_bit = HWCAP2_SME2, + .cpuinfo = "sme2", + .sigill_fn = sme2_sigill, + .sigill_reliable = true, + }, + { + .name = "SME 2.1", + .at_hwcap = AT_HWCAP2, + .hwcap_bit = HWCAP2_SME2P1, + .cpuinfo = "sme2p1", + .sigill_fn = sme2p1_sigill, + }, + { + .name = "SME I16I32", + .at_hwcap = AT_HWCAP2, + .hwcap_bit = HWCAP2_SME_I16I32, + .cpuinfo = "smei16i32", + .sigill_fn = smei16i32_sigill, + }, + { + .name = "SME BI32I32", + .at_hwcap = AT_HWCAP2, + .hwcap_bit = HWCAP2_SME_BI32I32, + .cpuinfo = "smebi32i32", + .sigill_fn = smebi32i32_sigill, + }, + { + .name = "SME B16B16", + .at_hwcap = AT_HWCAP2, + .hwcap_bit = HWCAP2_SME_B16B16, + .cpuinfo = "smeb16b16", + .sigill_fn = smeb16b16_sigill, + }, + { + .name = "SME F16F16", + .at_hwcap = AT_HWCAP2, + .hwcap_bit = HWCAP2_SME_F16F16, + .cpuinfo = "smef16f16", + .sigill_fn = smef16f16_sigill, + }, + { .name = "SVE", .at_hwcap = AT_HWCAP, .hwcap_bit = HWCAP_SVE, @@ -149,6 +289,13 @@ static const struct hwcap_data { .sigill_fn = sve2_sigill, }, { + .name = "SVE 2.1", + .at_hwcap = AT_HWCAP2, + .hwcap_bit = HWCAP2_SVE2P1, + .cpuinfo = "sve2p1", + .sigill_fn = sve2p1_sigill, + }, + { .name = "SVE AES", .at_hwcap = AT_HWCAP2, .hwcap_bit = HWCAP2_SVEAES, diff --git a/tools/testing/selftests/arm64/abi/syscall-abi-asm.S b/tools/testing/selftests/arm64/abi/syscall-abi-asm.S index b523c21c2278..df3230fdac39 100644 --- a/tools/testing/selftests/arm64/abi/syscall-abi-asm.S +++ b/tools/testing/selftests/arm64/abi/syscall-abi-asm.S @@ -23,6 +23,9 @@ .arch_extension sve +#define ID_AA64SMFR0_EL1_SMEver_SHIFT 56 +#define ID_AA64SMFR0_EL1_SMEver_WIDTH 4 + /* * LDR (vector to ZA array): * LDR ZA[\nw, #\offset], [X\nxbase, #\offset, MUL VL] @@ -45,6 +48,26 @@ | ((\offset) & 7) .endm +/* + * LDR (ZT0) + * + * LDR ZT0, nx + */ +.macro _ldr_zt nx + .inst 0xe11f8000 \ + | (((\nx) & 0x1f) << 5) +.endm + +/* + * STR (ZT0) + * + * STR ZT0, nx + */ +.macro _str_zt nx + .inst 0xe13f8000 \ + | (((\nx) & 0x1f) << 5) +.endm + .globl do_syscall do_syscall: // Store callee saved registers x19-x29 (80 bytes) plus x0 and x1 @@ -64,7 +87,7 @@ do_syscall: msr S3_3_C4_C2_2, x2 1: - // Load ZA if it's enabled - uses x12 as scratch due to SME LDR + // Load ZA and ZT0 if enabled - uses x12 as scratch due to SME LDR tbz x2, #SVCR_ZA_SHIFT, 1f mov w12, #0 ldr x2, =za_in @@ -73,6 +96,15 @@ do_syscall: add x12, x12, #1 cmp x1, x12 bne 2b + + // ZT0 + mrs x2, S3_0_C0_C4_5 // ID_AA64SMFR0_EL1 + ubfx x2, x2, #ID_AA64SMFR0_EL1_SMEver_SHIFT, \ + #ID_AA64SMFR0_EL1_SMEver_WIDTH + cbz x2, 1f + adrp x2, zt_in + add x2, x2, :lo12:zt_in + _ldr_zt 2 1: // Load GPRs x8-x28, and save our SP/FP for later comparison @@ -92,8 +124,11 @@ do_syscall: str x29, [x2], #8 // FP str x30, [x2], #8 // LR - // Load FPRs if we're not doing SVE + // Load FPRs if we're not doing neither SVE nor streaming SVE cbnz x0, 1f + ldr x2, =svcr_in + tbnz x2, #SVCR_SM_SHIFT, 1f + ldr x2, =fpr_in ldp q0, q1, [x2] ldp q2, q3, [x2, #16 * 2] @@ -111,10 +146,11 @@ do_syscall: ldp q26, q27, [x2, #16 * 26] ldp q28, q29, [x2, #16 * 28] ldp q30, q31, [x2, #16 * 30] + + b 2f 1: // Load the SVE registers if we're doing SVE/SME - cbz x0, 1f ldr x2, =z_in ldr z0, [x2, #0, MUL VL] @@ -153,11 +189,11 @@ do_syscall: // Only set a non-zero FFR, test patterns must be zero since the // syscall should clear it - this lets us handle FA64. ldr x2, =ffr_in - ldr p0, [x2, #0] + ldr p0, [x2] ldr x2, [x2, #0] - cbz x2, 2f + cbz x2, 1f wrffr p0.b -2: +1: ldr x2, =p_in ldr p0, [x2, #0, MUL VL] @@ -176,7 +212,7 @@ do_syscall: ldr p13, [x2, #13, MUL VL] ldr p14, [x2, #14, MUL VL] ldr p15, [x2, #15, MUL VL] -1: +2: // Do the syscall svc #0 @@ -235,6 +271,15 @@ do_syscall: add x12, x12, #1 cmp x1, x12 bne 2b + + // ZT0 + mrs x2, S3_0_C0_C4_5 // ID_AA64SMFR0_EL1 + ubfx x2, x2, #ID_AA64SMFR0_EL1_SMEver_SHIFT, \ + #ID_AA64SMFR0_EL1_SMEver_WIDTH + cbz x2, 1f + adrp x2, zt_out + add x2, x2, :lo12:zt_out + _str_zt 2 1: // Save the SVE state if we have some @@ -298,7 +343,7 @@ do_syscall: cbz x2, 1f ldr x2, =ffr_out rdffr p0.b - str p0, [x2, #0] + str p0, [x2] 1: // Restore callee saved registers x19-x30 diff --git a/tools/testing/selftests/arm64/abi/syscall-abi.c b/tools/testing/selftests/arm64/abi/syscall-abi.c index dd7ebe536d05..18cc123e2347 100644 --- a/tools/testing/selftests/arm64/abi/syscall-abi.c +++ b/tools/testing/selftests/arm64/abi/syscall-abi.c @@ -20,10 +20,13 @@ #include "syscall-abi.h" -#define NUM_VL ((SVE_VQ_MAX - SVE_VQ_MIN) + 1) - static int default_sme_vl; +static int sve_vl_count; +static unsigned int sve_vls[SVE_VQ_MAX]; +static int sme_vl_count; +static unsigned int sme_vls[SVE_VQ_MAX]; + extern void do_syscall(int sve_vl, int sme_vl); static void fill_random(void *buf, size_t size) @@ -83,6 +86,7 @@ static int check_gpr(struct syscall_cfg *cfg, int sve_vl, int sme_vl, uint64_t s #define NUM_FPR 32 uint64_t fpr_in[NUM_FPR * 2]; uint64_t fpr_out[NUM_FPR * 2]; +uint64_t fpr_zero[NUM_FPR * 2]; static void setup_fpr(struct syscall_cfg *cfg, int sve_vl, int sme_vl, uint64_t svcr) @@ -97,7 +101,7 @@ static int check_fpr(struct syscall_cfg *cfg, int sve_vl, int sme_vl, int errors = 0; int i; - if (!sve_vl) { + if (!sve_vl && !(svcr & SVCR_SM_MASK)) { for (i = 0; i < ARRAY_SIZE(fpr_in); i++) { if (fpr_in[i] != fpr_out[i]) { ksft_print_msg("%s Q%d/%d mismatch %llx != %llx\n", @@ -109,6 +113,18 @@ static int check_fpr(struct syscall_cfg *cfg, int sve_vl, int sme_vl, } } + /* + * In streaming mode the whole register set should be cleared + * by the transition out of streaming mode. + */ + if (svcr & SVCR_SM_MASK) { + if (memcmp(fpr_zero, fpr_out, sizeof(fpr_out)) != 0) { + ksft_print_msg("%s FPSIMD registers non-zero exiting SM\n", + cfg->name); + errors++; + } + } + return errors; } @@ -284,8 +300,8 @@ static int check_svcr(struct syscall_cfg *cfg, int sve_vl, int sme_vl, return errors; } -uint8_t za_in[SVE_NUM_PREGS * __SVE_ZREG_SIZE(SVE_VQ_MAX)]; -uint8_t za_out[SVE_NUM_PREGS * __SVE_ZREG_SIZE(SVE_VQ_MAX)]; +uint8_t za_in[ZA_SIG_REGS_SIZE(SVE_VQ_MAX)]; +uint8_t za_out[ZA_SIG_REGS_SIZE(SVE_VQ_MAX)]; static void setup_za(struct syscall_cfg *cfg, int sve_vl, int sme_vl, uint64_t svcr) @@ -311,6 +327,35 @@ static int check_za(struct syscall_cfg *cfg, int sve_vl, int sme_vl, return errors; } +uint8_t zt_in[ZT_SIG_REG_BYTES] __attribute__((aligned(16))); +uint8_t zt_out[ZT_SIG_REG_BYTES] __attribute__((aligned(16))); + +static void setup_zt(struct syscall_cfg *cfg, int sve_vl, int sme_vl, + uint64_t svcr) +{ + fill_random(zt_in, sizeof(zt_in)); + memset(zt_out, 0, sizeof(zt_out)); +} + +static int check_zt(struct syscall_cfg *cfg, int sve_vl, int sme_vl, + uint64_t svcr) +{ + int errors = 0; + + if (!(getauxval(AT_HWCAP2) & HWCAP2_SME2)) + return 0; + + if (!(svcr & SVCR_ZA_MASK)) + return 0; + + if (memcmp(zt_in, zt_out, sizeof(zt_in)) != 0) { + ksft_print_msg("SME VL %d ZT does not match\n", sme_vl); + errors++; + } + + return errors; +} + typedef void (*setup_fn)(struct syscall_cfg *cfg, int sve_vl, int sme_vl, uint64_t svcr); typedef int (*check_fn)(struct syscall_cfg *cfg, int sve_vl, int sme_vl, @@ -334,6 +379,7 @@ static struct { { setup_ffr, check_ffr }, { setup_svcr, check_svcr }, { setup_za, check_za }, + { setup_zt, check_zt }, }; static bool do_test(struct syscall_cfg *cfg, int sve_vl, int sme_vl, @@ -355,73 +401,78 @@ static bool do_test(struct syscall_cfg *cfg, int sve_vl, int sme_vl, static void test_one_syscall(struct syscall_cfg *cfg) { - int sve_vq, sve_vl; - int sme_vq, sme_vl; + int sve, sme; + int ret; /* FPSIMD only case */ ksft_test_result(do_test(cfg, 0, default_sme_vl, 0), "%s FPSIMD\n", cfg->name); - if (!(getauxval(AT_HWCAP) & HWCAP_SVE)) - return; - - for (sve_vq = SVE_VQ_MAX; sve_vq > 0; --sve_vq) { - sve_vl = prctl(PR_SVE_SET_VL, sve_vq * 16); - if (sve_vl == -1) + for (sve = 0; sve < sve_vl_count; sve++) { + ret = prctl(PR_SVE_SET_VL, sve_vls[sve]); + if (ret == -1) ksft_exit_fail_msg("PR_SVE_SET_VL failed: %s (%d)\n", strerror(errno), errno); - sve_vl &= PR_SVE_VL_LEN_MASK; - - if (sve_vq != sve_vq_from_vl(sve_vl)) - sve_vq = sve_vq_from_vl(sve_vl); - - ksft_test_result(do_test(cfg, sve_vl, default_sme_vl, 0), - "%s SVE VL %d\n", cfg->name, sve_vl); - - if (!(getauxval(AT_HWCAP2) & HWCAP2_SME)) - continue; + ksft_test_result(do_test(cfg, sve_vls[sve], default_sme_vl, 0), + "%s SVE VL %d\n", cfg->name, sve_vls[sve]); - for (sme_vq = SVE_VQ_MAX; sme_vq > 0; --sme_vq) { - sme_vl = prctl(PR_SME_SET_VL, sme_vq * 16); - if (sme_vl == -1) + for (sme = 0; sme < sme_vl_count; sme++) { + ret = prctl(PR_SME_SET_VL, sme_vls[sme]); + if (ret == -1) ksft_exit_fail_msg("PR_SME_SET_VL failed: %s (%d)\n", strerror(errno), errno); - sme_vl &= PR_SME_VL_LEN_MASK; - - if (sme_vq != sve_vq_from_vl(sme_vl)) - sme_vq = sve_vq_from_vl(sme_vl); - - ksft_test_result(do_test(cfg, sve_vl, sme_vl, + ksft_test_result(do_test(cfg, sve_vls[sve], + sme_vls[sme], SVCR_ZA_MASK | SVCR_SM_MASK), "%s SVE VL %d/SME VL %d SM+ZA\n", - cfg->name, sve_vl, sme_vl); - ksft_test_result(do_test(cfg, sve_vl, sme_vl, - SVCR_SM_MASK), + cfg->name, sve_vls[sve], + sme_vls[sme]); + ksft_test_result(do_test(cfg, sve_vls[sve], + sme_vls[sme], SVCR_SM_MASK), "%s SVE VL %d/SME VL %d SM\n", - cfg->name, sve_vl, sme_vl); - ksft_test_result(do_test(cfg, sve_vl, sme_vl, - SVCR_ZA_MASK), + cfg->name, sve_vls[sve], + sme_vls[sme]); + ksft_test_result(do_test(cfg, sve_vls[sve], + sme_vls[sme], SVCR_ZA_MASK), "%s SVE VL %d/SME VL %d ZA\n", - cfg->name, sve_vl, sme_vl); + cfg->name, sve_vls[sve], + sme_vls[sme]); } } + + for (sme = 0; sme < sme_vl_count; sme++) { + ret = prctl(PR_SME_SET_VL, sme_vls[sme]); + if (ret == -1) + ksft_exit_fail_msg("PR_SME_SET_VL failed: %s (%d)\n", + strerror(errno), errno); + + ksft_test_result(do_test(cfg, 0, sme_vls[sme], + SVCR_ZA_MASK | SVCR_SM_MASK), + "%s SME VL %d SM+ZA\n", + cfg->name, sme_vls[sme]); + ksft_test_result(do_test(cfg, 0, sme_vls[sme], SVCR_SM_MASK), + "%s SME VL %d SM\n", + cfg->name, sme_vls[sme]); + ksft_test_result(do_test(cfg, 0, sme_vls[sme], SVCR_ZA_MASK), + "%s SME VL %d ZA\n", + cfg->name, sme_vls[sme]); + } } -int sve_count_vls(void) +void sve_count_vls(void) { unsigned int vq; - int vl_count = 0; int vl; if (!(getauxval(AT_HWCAP) & HWCAP_SVE)) - return 0; + return; /* * Enumerate up to SVE_VQ_MAX vector lengths */ - for (vq = SVE_VQ_MAX; vq > 0; --vq) { + for (vq = SVE_VQ_MAX; vq > 0; vq /= 2) { vl = prctl(PR_SVE_SET_VL, vq * 16); if (vl == -1) ksft_exit_fail_msg("PR_SVE_SET_VL failed: %s (%d)\n", @@ -432,28 +483,22 @@ int sve_count_vls(void) if (vq != sve_vq_from_vl(vl)) vq = sve_vq_from_vl(vl); - vl_count++; + sve_vls[sve_vl_count++] = vl; } - - return vl_count; } -int sme_count_vls(void) +void sme_count_vls(void) { unsigned int vq; - int vl_count = 0; int vl; if (!(getauxval(AT_HWCAP2) & HWCAP2_SME)) - return 0; - - /* Ensure we configure a SME VL, used to flag if SVCR is set */ - default_sme_vl = 16; + return; /* * Enumerate up to SVE_VQ_MAX vector lengths */ - for (vq = SVE_VQ_MAX; vq > 0; --vq) { + for (vq = SVE_VQ_MAX; vq > 0; vq /= 2) { vl = prctl(PR_SME_SET_VL, vq * 16); if (vl == -1) ksft_exit_fail_msg("PR_SME_SET_VL failed: %s (%d)\n", @@ -461,31 +506,47 @@ int sme_count_vls(void) vl &= PR_SME_VL_LEN_MASK; + /* Found lowest VL */ + if (sve_vq_from_vl(vl) > vq) + break; + if (vq != sve_vq_from_vl(vl)) vq = sve_vq_from_vl(vl); - vl_count++; + sme_vls[sme_vl_count++] = vl; } - return vl_count; + /* Ensure we configure a SME VL, used to flag if SVCR is set */ + default_sme_vl = sme_vls[0]; } int main(void) { int i; int tests = 1; /* FPSIMD */ + int sme_ver; srandom(getpid()); ksft_print_header(); - tests += sve_count_vls(); - tests += (sve_count_vls() * sme_count_vls()) * 3; + + sve_count_vls(); + sme_count_vls(); + + tests += sve_vl_count; + tests += sme_vl_count * 3; + tests += (sve_vl_count * sme_vl_count) * 3; ksft_set_plan(ARRAY_SIZE(syscalls) * tests); + if (getauxval(AT_HWCAP2) & HWCAP2_SME2) + sme_ver = 2; + else + sme_ver = 1; + if (getauxval(AT_HWCAP2) & HWCAP2_SME_FA64) - ksft_print_msg("SME with FA64\n"); + ksft_print_msg("SME%d with FA64\n", sme_ver); else if (getauxval(AT_HWCAP2) & HWCAP2_SME) - ksft_print_msg("SME without FA64\n"); + ksft_print_msg("SME%d without FA64\n", sme_ver); for (i = 0; i < ARRAY_SIZE(syscalls); i++) test_one_syscall(&syscalls[i]); diff --git a/tools/testing/selftests/arm64/bti/test.c b/tools/testing/selftests/arm64/bti/test.c index 67b77ab83c20..2cd8dcee5aec 100644 --- a/tools/testing/selftests/arm64/bti/test.c +++ b/tools/testing/selftests/arm64/bti/test.c @@ -6,6 +6,7 @@ #include "system.h" +#include <stdbool.h> #include <stddef.h> #include <linux/errno.h> #include <linux/auxvec.h> @@ -101,7 +102,8 @@ static void handler(int n, siginfo_t *si __always_unused, uc->uc_mcontext.pstate &= ~PSR_BTYPE_MASK; } -static int skip_all; +/* Does the system have BTI? */ +static bool have_bti; static void __do_test(void (*trampoline)(void (*)(void)), void (*fn)(void), @@ -109,19 +111,11 @@ static void __do_test(void (*trampoline)(void (*)(void)), const char *name, int expect_sigill) { - if (skip_all) { - test_skipped++; - putstr("ok "); - putnum(test_num); - putstr(" "); - puttestname(name, trampoline_name); - putstr(" # SKIP\n"); - - return; - } - - /* Branch Target exceptions should only happen in BTI binaries: */ - if (!BTI) + /* + * Branch Target exceptions should only happen for BTI + * binaries running on a system with BTI: + */ + if (!BTI || !have_bti) expect_sigill = 0; sigill_expected = expect_sigill; @@ -199,9 +193,10 @@ void start(int *argcp) putstr("# HWCAP2_BTI present\n"); if (!(hwcap & HWCAP_PACA)) putstr("# Bad hardware? Expect problems.\n"); + have_bti = true; } else { putstr("# HWCAP2_BTI not present\n"); - skip_all = 1; + have_bti = false; } putstr("# Test binary"); diff --git a/tools/testing/selftests/arm64/fp/.gitignore b/tools/testing/selftests/arm64/fp/.gitignore index df79d29664a1..ebc86757bdd8 100644 --- a/tools/testing/selftests/arm64/fp/.gitignore +++ b/tools/testing/selftests/arm64/fp/.gitignore @@ -12,3 +12,5 @@ vlset za-fork za-ptrace za-test +zt-ptrace +zt-test diff --git a/tools/testing/selftests/arm64/fp/Makefile b/tools/testing/selftests/arm64/fp/Makefile index 36db61358ed5..48f56c86ad45 100644 --- a/tools/testing/selftests/arm64/fp/Makefile +++ b/tools/testing/selftests/arm64/fp/Makefile @@ -3,7 +3,7 @@ # A proper top_srcdir is needed by KSFT(lib.mk) top_srcdir = $(realpath ../../../../../) -CFLAGS += -I$(top_srcdir)/usr/include/ +CFLAGS += $(KHDR_INCLUDES) TEST_GEN_PROGS := fp-stress \ sve-ptrace sve-probe-vls \ @@ -14,6 +14,8 @@ TEST_GEN_PROGS_EXTENDED := fp-pidbench fpsimd-test \ sve-test \ ssve-test \ za-test \ + zt-ptrace \ + zt-test \ vlset TEST_PROGS_EXTENDED := fpsimd-stress sve-stress ssve-stress za-stress @@ -41,5 +43,8 @@ $(OUTPUT)/za-fork: za-fork.c $(OUTPUT)/za-fork-asm.o $(OUTPUT)/za-ptrace: za-ptrace.c $(OUTPUT)/za-test: za-test.S $(OUTPUT)/asm-utils.o $(CC) -nostdlib $^ -o $@ +$(OUTPUT)/zt-ptrace: zt-ptrace.c +$(OUTPUT)/zt-test: zt-test.S $(OUTPUT)/asm-utils.o + $(CC) -nostdlib $^ -o $@ include ../../lib.mk diff --git a/tools/testing/selftests/arm64/fp/assembler.h b/tools/testing/selftests/arm64/fp/assembler.h index 90bd433d2665..9b38a0da407d 100644 --- a/tools/testing/selftests/arm64/fp/assembler.h +++ b/tools/testing/selftests/arm64/fp/assembler.h @@ -57,7 +57,7 @@ endfunction // Utility macro to print a literal string // Clobbers x0-x4,x8 .macro puts string - .pushsection .rodata.str1.1, "aMS", 1 + .pushsection .rodata.str1.1, "aMS", @progbits, 1 .L__puts_literal\@: .string "\string" .popsection diff --git a/tools/testing/selftests/arm64/fp/fp-pidbench.S b/tools/testing/selftests/arm64/fp/fp-pidbench.S index 16a436389bfc..73830f6bc99b 100644 --- a/tools/testing/selftests/arm64/fp/fp-pidbench.S +++ b/tools/testing/selftests/arm64/fp/fp-pidbench.S @@ -31,7 +31,6 @@ // Main program entry point .globl _start function _start -_start: puts "Iterations per test: " mov x20, #10000 lsl x20, x20, #8 diff --git a/tools/testing/selftests/arm64/fp/fp-stress.c b/tools/testing/selftests/arm64/fp/fp-stress.c index 4e62a9199f97..dd31647b00a2 100644 --- a/tools/testing/selftests/arm64/fp/fp-stress.c +++ b/tools/testing/selftests/arm64/fp/fp-stress.c @@ -39,10 +39,12 @@ struct child_data { static int epoll_fd; static struct child_data *children; +static struct epoll_event *evs; +static int tests; static int num_children; static bool terminate; -static void drain_output(bool flush); +static int startup_pipe[2]; static int num_processors(void) { @@ -82,12 +84,36 @@ static void child_start(struct child_data *child, const char *program) } /* + * Duplicate the read side of the startup pipe to + * FD 3 so we can close everything else. + */ + ret = dup2(startup_pipe[0], 3); + if (ret == -1) { + fprintf(stderr, "dup2() %d\n", errno); + exit(EXIT_FAILURE); + } + + /* * Very dumb mechanism to clean open FDs other than * stdio. We don't want O_CLOEXEC for the pipes... */ - for (i = 3; i < 8192; i++) + for (i = 4; i < 8192; i++) close(i); + /* + * Read from the startup pipe, there should be no data + * and we should block until it is closed. We just + * carry on on error since this isn't super critical. + */ + ret = read(3, &i, sizeof(i)); + if (ret < 0) + fprintf(stderr, "read(startp pipe) failed: %s (%d)\n", + strerror(errno), errno); + if (ret > 0) + fprintf(stderr, "%d bytes of data on startup pipe\n", + ret); + close(3); + ret = execl(program, program, NULL); fprintf(stderr, "execl(%s) failed: %d (%s)\n", program, errno, strerror(errno)); @@ -112,12 +138,6 @@ static void child_start(struct child_data *child, const char *program) ksft_exit_fail_msg("%s EPOLL_CTL_ADD failed: %s (%d)\n", child->name, strerror(errno), errno); } - - /* - * Keep output flowing during child startup so logs - * are more timely, can help debugging. - */ - drain_output(false); } } @@ -290,12 +310,12 @@ static void start_fpsimd(struct child_data *child, int cpu, int copy) { int ret; - child_start(child, "./fpsimd-test"); - ret = asprintf(&child->name, "FPSIMD-%d-%d", cpu, copy); if (ret == -1) ksft_exit_fail_msg("asprintf() failed\n"); + child_start(child, "./fpsimd-test"); + ksft_print_msg("Started %s\n", child->name); } @@ -307,12 +327,12 @@ static void start_sve(struct child_data *child, int vl, int cpu) if (ret < 0) ksft_exit_fail_msg("Failed to set SVE VL %d\n", vl); - child_start(child, "./sve-test"); - ret = asprintf(&child->name, "SVE-VL-%d-%d", vl, cpu); if (ret == -1) ksft_exit_fail_msg("asprintf() failed\n"); + child_start(child, "./sve-test"); + ksft_print_msg("Started %s\n", child->name); } @@ -320,16 +340,16 @@ static void start_ssve(struct child_data *child, int vl, int cpu) { int ret; + ret = asprintf(&child->name, "SSVE-VL-%d-%d", vl, cpu); + if (ret == -1) + ksft_exit_fail_msg("asprintf() failed\n"); + ret = prctl(PR_SME_SET_VL, vl | PR_SME_VL_INHERIT); if (ret < 0) ksft_exit_fail_msg("Failed to set SME VL %d\n", ret); child_start(child, "./ssve-test"); - ret = asprintf(&child->name, "SSVE-VL-%d-%d", vl, cpu); - if (ret == -1) - ksft_exit_fail_msg("asprintf() failed\n"); - ksft_print_msg("Started %s\n", child->name); } @@ -341,12 +361,25 @@ static void start_za(struct child_data *child, int vl, int cpu) if (ret < 0) ksft_exit_fail_msg("Failed to set SME VL %d\n", ret); + ret = asprintf(&child->name, "ZA-VL-%d-%d", vl, cpu); + if (ret == -1) + ksft_exit_fail_msg("asprintf() failed\n"); + child_start(child, "./za-test"); - ret = asprintf(&child->name, "ZA-VL-%d-%d", vl, cpu); + ksft_print_msg("Started %s\n", child->name); +} + +static void start_zt(struct child_data *child, int cpu) +{ + int ret; + + ret = asprintf(&child->name, "ZT-%d", cpu); if (ret == -1) ksft_exit_fail_msg("asprintf() failed\n"); + child_start(child, "./zt-test"); + ksft_print_msg("Started %s\n", child->name); } @@ -357,7 +390,7 @@ static void probe_vls(int vls[], int *vl_count, int set_vl) *vl_count = 0; - for (vq = SVE_VQ_MAX; vq > 0; --vq) { + for (vq = SVE_VQ_MAX; vq > 0; vq /= 2) { vl = prctl(set_vl, vq * 16); if (vl == -1) ksft_exit_fail_msg("SET_VL failed: %s (%d)\n", @@ -365,6 +398,9 @@ static void probe_vls(int vls[], int *vl_count, int set_vl) vl &= PR_SVE_VL_LEN_MASK; + if (*vl_count && (vl == vls[*vl_count - 1])) + break; + vq = sve_vq_from_vl(vl); vls[*vl_count] = vl; @@ -375,11 +411,11 @@ static void probe_vls(int vls[], int *vl_count, int set_vl) /* Handle any pending output without blocking */ static void drain_output(bool flush) { - struct epoll_event ev; int ret = 1; + int i; while (ret > 0) { - ret = epoll_wait(epoll_fd, &ev, 1, 0); + ret = epoll_wait(epoll_fd, evs, tests, 0); if (ret < 0) { if (errno == EINTR) continue; @@ -387,8 +423,8 @@ static void drain_output(bool flush) strerror(errno), errno); } - if (ret == 1) - child_output(ev.data.ptr, ev.events, flush); + for (i = 0; i < ret; i++) + child_output(evs[i].data.ptr, evs[i].events, flush); } } @@ -401,10 +437,12 @@ int main(int argc, char **argv) { int ret; int timeout = 10; - int cpus, tests, i, j, c; + int cpus, i, j, c; int sve_vl_count, sme_vl_count, fpsimd_per_cpu; + bool all_children_started = false; + int seen_children; int sve_vls[MAX_VLS], sme_vls[MAX_VLS]; - struct epoll_event ev; + bool have_sme2; struct sigaction sa; while ((c = getopt_long(argc, argv, "t:", options, NULL)) != -1) { @@ -437,6 +475,13 @@ int main(int argc, char **argv) sme_vl_count = 0; } + if (getauxval(AT_HWCAP2) & HWCAP2_SME2) { + tests += cpus; + have_sme2 = true; + } else { + have_sme2 = false; + } + /* Force context switching if we only have FPSIMD */ if (!sve_vl_count && !sme_vl_count) fpsimd_per_cpu = 2; @@ -447,8 +492,9 @@ int main(int argc, char **argv) ksft_print_header(); ksft_set_plan(tests); - ksft_print_msg("%d CPUs, %d SVE VLs, %d SME VLs\n", - cpus, sve_vl_count, sme_vl_count); + ksft_print_msg("%d CPUs, %d SVE VLs, %d SME VLs, SME2 %s\n", + cpus, sve_vl_count, sme_vl_count, + have_sme2 ? "present" : "absent"); if (timeout > 0) ksft_print_msg("Will run for %ds\n", timeout); @@ -465,6 +511,12 @@ int main(int argc, char **argv) strerror(errno), ret); epoll_fd = ret; + /* Create a pipe which children will block on before execing */ + ret = pipe(startup_pipe); + if (ret != 0) + ksft_exit_fail_msg("Failed to create startup pipe: %s (%d)\n", + strerror(errno), errno); + /* Get signal handers ready before we start any children */ memset(&sa, 0, sizeof(sa)); sa.sa_sigaction = handle_exit_signal; @@ -484,6 +536,11 @@ int main(int argc, char **argv) ksft_print_msg("Failed to install SIGCHLD handler: %s (%d)\n", strerror(errno), errno); + evs = calloc(tests, sizeof(*evs)); + if (!evs) + ksft_exit_fail_msg("Failed to allocated %d epoll events\n", + tests); + for (i = 0; i < cpus; i++) { for (j = 0; j < fpsimd_per_cpu; j++) start_fpsimd(&children[num_children++], i, j); @@ -495,8 +552,18 @@ int main(int argc, char **argv) start_ssve(&children[num_children++], sme_vls[j], i); start_za(&children[num_children++], sme_vls[j], i); } + + if (have_sme2) + start_zt(&children[num_children++], i); } + /* + * All children started, close the startup pipe and let them + * run. + */ + close(startup_pipe[0]); + close(startup_pipe[1]); + for (;;) { /* Did we get a signal asking us to exit? */ if (terminate) @@ -510,7 +577,7 @@ int main(int argc, char **argv) * useful in emulation where we will both be slow and * likely to have a large set of VLs. */ - ret = epoll_wait(epoll_fd, &ev, 1, 1000); + ret = epoll_wait(epoll_fd, evs, tests, 1000); if (ret < 0) { if (errno == EINTR) continue; @@ -519,13 +586,40 @@ int main(int argc, char **argv) } /* Output? */ - if (ret == 1) { - child_output(ev.data.ptr, ev.events, false); + if (ret > 0) { + for (i = 0; i < ret; i++) { + child_output(evs[i].data.ptr, evs[i].events, + false); + } continue; } /* Otherwise epoll_wait() timed out */ + /* + * If the child processes have not produced output they + * aren't actually running the tests yet . + */ + if (!all_children_started) { + seen_children = 0; + + for (i = 0; i < num_children; i++) + if (children[i].output_seen || + children[i].exited) + seen_children++; + + if (seen_children != num_children) { + ksft_print_msg("Waiting for %d children\n", + num_children - seen_children); + continue; + } + + all_children_started = true; + } + + ksft_print_msg("Sending signals, timeout remaining: %d\n", + timeout); + for (i = 0; i < num_children; i++) child_tickle(&children[i]); diff --git a/tools/testing/selftests/arm64/fp/fpsimd-test.S b/tools/testing/selftests/arm64/fp/fpsimd-test.S index 918d04885a33..8b960d01ed2e 100644 --- a/tools/testing/selftests/arm64/fp/fpsimd-test.S +++ b/tools/testing/selftests/arm64/fp/fpsimd-test.S @@ -215,7 +215,6 @@ endfunction // Main program entry point .globl _start function _start -_start: mov x23, #0 // signal count mov w0, #SIGINT diff --git a/tools/testing/selftests/arm64/fp/sme-inst.h b/tools/testing/selftests/arm64/fp/sme-inst.h index 7191e53ca1c0..9292bba5400b 100644 --- a/tools/testing/selftests/arm64/fp/sme-inst.h +++ b/tools/testing/selftests/arm64/fp/sme-inst.h @@ -48,4 +48,24 @@ | ((\offset) & 7) .endm +/* + * LDR (ZT0) + * + * LDR ZT0, nx + */ +.macro _ldr_zt nx + .inst 0xe11f8000 \ + | (((\nx) & 0x1f) << 5) +.endm + +/* + * STR (ZT0) + * + * STR ZT0, nx + */ +.macro _str_zt nx + .inst 0xe13f8000 \ + | (((\nx) & 0x1f) << 5) +.endm + #endif diff --git a/tools/testing/selftests/arm64/fp/sve-ptrace.c b/tools/testing/selftests/arm64/fp/sve-ptrace.c index 8c4847977583..6d61992fe8a0 100644 --- a/tools/testing/selftests/arm64/fp/sve-ptrace.c +++ b/tools/testing/selftests/arm64/fp/sve-ptrace.c @@ -30,6 +30,16 @@ #define NT_ARM_SSVE 0x40b #endif +/* + * The architecture defines the maximum VQ as 16 but for extensibility + * the kernel specifies the SVE_VQ_MAX as 512 resulting in us running + * a *lot* more tests than are useful if we use it. Until the + * architecture is extended let's limit our coverage to what is + * currently allowed, plus one extra to ensure we cover constraining + * the VL as expected. + */ +#define TEST_VQ_MAX 17 + struct vec_type { const char *name; unsigned long hwcap_type; @@ -55,7 +65,7 @@ static const struct vec_type vec_types[] = { }, }; -#define VL_TESTS (((SVE_VQ_MAX - SVE_VQ_MIN) + 1) * 4) +#define VL_TESTS (((TEST_VQ_MAX - SVE_VQ_MIN) + 1) * 4) #define FLAG_TESTS 2 #define FPSIMD_TESTS 2 @@ -689,7 +699,7 @@ static int do_parent(pid_t child) } /* Step through every possible VQ */ - for (vq = SVE_VQ_MIN; vq <= SVE_VQ_MAX; vq++) { + for (vq = SVE_VQ_MIN; vq <= TEST_VQ_MAX; vq++) { vl = sve_vl_from_vq(vq); /* First, try to set this vector length */ diff --git a/tools/testing/selftests/arm64/fp/sve-test.S b/tools/testing/selftests/arm64/fp/sve-test.S index 2a18cb4c528c..4328895dfc87 100644 --- a/tools/testing/selftests/arm64/fp/sve-test.S +++ b/tools/testing/selftests/arm64/fp/sve-test.S @@ -378,7 +378,6 @@ endfunction // Main program entry point .globl _start function _start -_start: mov x23, #0 // Irritation signal count mov w0, #SIGINT diff --git a/tools/testing/selftests/arm64/fp/za-ptrace.c b/tools/testing/selftests/arm64/fp/za-ptrace.c index bf6158654056..ac27d87396fc 100644 --- a/tools/testing/selftests/arm64/fp/za-ptrace.c +++ b/tools/testing/selftests/arm64/fp/za-ptrace.c @@ -25,7 +25,17 @@ #define NT_ARM_ZA 0x40c #endif -#define EXPECTED_TESTS (((SVE_VQ_MAX - SVE_VQ_MIN) + 1) * 3) +/* + * The architecture defines the maximum VQ as 16 but for extensibility + * the kernel specifies the SVE_VQ_MAX as 512 resulting in us running + * a *lot* more tests than are useful if we use it. Until the + * architecture is extended let's limit our coverage to what is + * currently allowed, plus one extra to ensure we cover constraining + * the VL as expected. + */ +#define TEST_VQ_MAX 17 + +#define EXPECTED_TESTS (((TEST_VQ_MAX - SVE_VQ_MIN) + 1) * 3) static void fill_buf(char *buf, size_t size) { @@ -301,7 +311,7 @@ static int do_parent(pid_t child) ksft_print_msg("Parent is %d, child is %d\n", getpid(), child); /* Step through every possible VQ */ - for (vq = SVE_VQ_MIN; vq <= SVE_VQ_MAX; vq++) { + for (vq = SVE_VQ_MIN; vq <= TEST_VQ_MAX; vq++) { vl = sve_vl_from_vq(vq); /* First, try to set this vector length */ diff --git a/tools/testing/selftests/arm64/fp/za-test.S b/tools/testing/selftests/arm64/fp/za-test.S index 53c54af65704..9dcd70911397 100644 --- a/tools/testing/selftests/arm64/fp/za-test.S +++ b/tools/testing/selftests/arm64/fp/za-test.S @@ -231,7 +231,6 @@ endfunction // Main program entry point .globl _start function _start -_start: mov x23, #0 // signal count mov w0, #SIGINT diff --git a/tools/testing/selftests/arm64/fp/zt-ptrace.c b/tools/testing/selftests/arm64/fp/zt-ptrace.c new file mode 100644 index 000000000000..996d9614a131 --- /dev/null +++ b/tools/testing/selftests/arm64/fp/zt-ptrace.c @@ -0,0 +1,365 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2021 ARM Limited. + */ +#include <errno.h> +#include <stdbool.h> +#include <stddef.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <sys/auxv.h> +#include <sys/prctl.h> +#include <sys/ptrace.h> +#include <sys/types.h> +#include <sys/uio.h> +#include <sys/wait.h> +#include <asm/sigcontext.h> +#include <asm/ptrace.h> + +#include "../../kselftest.h" + +/* <linux/elf.h> and <sys/auxv.h> don't like each other, so: */ +#ifndef NT_ARM_ZA +#define NT_ARM_ZA 0x40c +#endif +#ifndef NT_ARM_ZT +#define NT_ARM_ZT 0x40d +#endif + +#define EXPECTED_TESTS 3 + +static int sme_vl; + +static void fill_buf(char *buf, size_t size) +{ + int i; + + for (i = 0; i < size; i++) + buf[i] = random(); +} + +static int do_child(void) +{ + if (ptrace(PTRACE_TRACEME, -1, NULL, NULL)) + ksft_exit_fail_msg("PTRACE_TRACEME", strerror(errno)); + + if (raise(SIGSTOP)) + ksft_exit_fail_msg("raise(SIGSTOP)", strerror(errno)); + + return EXIT_SUCCESS; +} + +static struct user_za_header *get_za(pid_t pid, void **buf, size_t *size) +{ + struct user_za_header *za; + void *p; + size_t sz = sizeof(*za); + struct iovec iov; + + while (1) { + if (*size < sz) { + p = realloc(*buf, sz); + if (!p) { + errno = ENOMEM; + goto error; + } + + *buf = p; + *size = sz; + } + + iov.iov_base = *buf; + iov.iov_len = sz; + if (ptrace(PTRACE_GETREGSET, pid, NT_ARM_ZA, &iov)) + goto error; + + za = *buf; + if (za->size <= sz) + break; + + sz = za->size; + } + + return za; + +error: + return NULL; +} + +static int set_za(pid_t pid, const struct user_za_header *za) +{ + struct iovec iov; + + iov.iov_base = (void *)za; + iov.iov_len = za->size; + return ptrace(PTRACE_SETREGSET, pid, NT_ARM_ZA, &iov); +} + +static int get_zt(pid_t pid, char zt[ZT_SIG_REG_BYTES]) +{ + struct iovec iov; + + iov.iov_base = zt; + iov.iov_len = ZT_SIG_REG_BYTES; + return ptrace(PTRACE_GETREGSET, pid, NT_ARM_ZT, &iov); +} + + +static int set_zt(pid_t pid, const char zt[ZT_SIG_REG_BYTES]) +{ + struct iovec iov; + + iov.iov_base = (void *)zt; + iov.iov_len = ZT_SIG_REG_BYTES; + return ptrace(PTRACE_SETREGSET, pid, NT_ARM_ZT, &iov); +} + +/* Reading with ZA disabled returns all zeros */ +static void ptrace_za_disabled_read_zt(pid_t child) +{ + struct user_za_header za; + char zt[ZT_SIG_REG_BYTES]; + int ret, i; + bool fail = false; + + /* Disable PSTATE.ZA using the ZA interface */ + memset(&za, 0, sizeof(za)); + za.vl = sme_vl; + za.size = sizeof(za); + + ret = set_za(child, &za); + if (ret != 0) { + ksft_print_msg("Failed to disable ZA\n"); + fail = true; + } + + /* Read back ZT */ + ret = get_zt(child, zt); + if (ret != 0) { + ksft_print_msg("Failed to read ZT\n"); + fail = true; + } + + for (i = 0; i < ARRAY_SIZE(zt); i++) { + if (zt[i]) { + ksft_print_msg("zt[%d]: 0x%x != 0\n", i, zt[i]); + fail = true; + } + } + + ksft_test_result(!fail, "ptrace_za_disabled_read_zt\n"); +} + +/* Writing then reading ZT should return the data written */ +static void ptrace_set_get_zt(pid_t child) +{ + char zt_in[ZT_SIG_REG_BYTES]; + char zt_out[ZT_SIG_REG_BYTES]; + int ret, i; + bool fail = false; + + fill_buf(zt_in, sizeof(zt_in)); + + ret = set_zt(child, zt_in); + if (ret != 0) { + ksft_print_msg("Failed to set ZT\n"); + fail = true; + } + + ret = get_zt(child, zt_out); + if (ret != 0) { + ksft_print_msg("Failed to read ZT\n"); + fail = true; + } + + for (i = 0; i < ARRAY_SIZE(zt_in); i++) { + if (zt_in[i] != zt_out[i]) { + ksft_print_msg("zt[%d]: 0x%x != 0x%x\n", i, + zt_in[i], zt_out[i]); + fail = true; + } + } + + ksft_test_result(!fail, "ptrace_set_get_zt\n"); +} + +/* Writing ZT should set PSTATE.ZA */ +static void ptrace_enable_za_via_zt(pid_t child) +{ + struct user_za_header za_in; + struct user_za_header *za_out; + char zt[ZT_SIG_REG_BYTES]; + char *za_data; + size_t za_out_size; + int ret, i, vq; + bool fail = false; + + /* Disable PSTATE.ZA using the ZA interface */ + memset(&za_in, 0, sizeof(za_in)); + za_in.vl = sme_vl; + za_in.size = sizeof(za_in); + + ret = set_za(child, &za_in); + if (ret != 0) { + ksft_print_msg("Failed to disable ZA\n"); + fail = true; + } + + /* Write ZT */ + fill_buf(zt, sizeof(zt)); + ret = set_zt(child, zt); + if (ret != 0) { + ksft_print_msg("Failed to set ZT\n"); + fail = true; + } + + /* Read back ZA and check for register data */ + za_out = NULL; + za_out_size = 0; + if (get_za(child, (void **)&za_out, &za_out_size)) { + /* Should have an unchanged VL */ + if (za_out->vl != sme_vl) { + ksft_print_msg("VL changed from %d to %d\n", + sme_vl, za_out->vl); + fail = true; + } + vq = __sve_vq_from_vl(za_out->vl); + za_data = (char *)za_out + ZA_PT_ZA_OFFSET; + + /* Should have register data */ + if (za_out->size < ZA_PT_SIZE(vq)) { + ksft_print_msg("ZA data less than expected: %u < %u\n", + za_out->size, ZA_PT_SIZE(vq)); + fail = true; + vq = 0; + } + + /* That register data should be non-zero */ + for (i = 0; i < ZA_PT_ZA_SIZE(vq); i++) { + if (za_data[i]) { + ksft_print_msg("ZA byte %d is %x\n", + i, za_data[i]); + fail = true; + } + } + } else { + ksft_print_msg("Failed to read ZA\n"); + fail = true; + } + + ksft_test_result(!fail, "ptrace_enable_za_via_zt\n"); +} + +static int do_parent(pid_t child) +{ + int ret = EXIT_FAILURE; + pid_t pid; + int status; + siginfo_t si; + + /* Attach to the child */ + while (1) { + int sig; + + pid = wait(&status); + if (pid == -1) { + perror("wait"); + goto error; + } + + /* + * This should never happen but it's hard to flag in + * the framework. + */ + if (pid != child) + continue; + + if (WIFEXITED(status) || WIFSIGNALED(status)) + ksft_exit_fail_msg("Child died unexpectedly\n"); + + if (!WIFSTOPPED(status)) + goto error; + + sig = WSTOPSIG(status); + + if (ptrace(PTRACE_GETSIGINFO, pid, NULL, &si)) { + if (errno == ESRCH) + goto disappeared; + + if (errno == EINVAL) { + sig = 0; /* bust group-stop */ + goto cont; + } + + ksft_test_result_fail("PTRACE_GETSIGINFO: %s\n", + strerror(errno)); + goto error; + } + + if (sig == SIGSTOP && si.si_code == SI_TKILL && + si.si_pid == pid) + break; + + cont: + if (ptrace(PTRACE_CONT, pid, NULL, sig)) { + if (errno == ESRCH) + goto disappeared; + + ksft_test_result_fail("PTRACE_CONT: %s\n", + strerror(errno)); + goto error; + } + } + + ksft_print_msg("Parent is %d, child is %d\n", getpid(), child); + + ptrace_za_disabled_read_zt(child); + ptrace_set_get_zt(child); + ptrace_enable_za_via_zt(child); + + ret = EXIT_SUCCESS; + +error: + kill(child, SIGKILL); + +disappeared: + return ret; +} + +int main(void) +{ + int ret = EXIT_SUCCESS; + pid_t child; + + srandom(getpid()); + + ksft_print_header(); + + if (!(getauxval(AT_HWCAP2) & HWCAP2_SME2)) { + ksft_set_plan(1); + ksft_exit_skip("SME2 not available\n"); + } + + /* We need a valid SME VL to enable/disable ZA */ + sme_vl = prctl(PR_SME_GET_VL); + if (sme_vl == -1) { + ksft_set_plan(1); + ksft_exit_skip("Failed to read SME VL: %d (%s)\n", + errno, strerror(errno)); + } + + ksft_set_plan(EXPECTED_TESTS); + + child = fork(); + if (!child) + return do_child(); + + if (do_parent(child)) + ret = EXIT_FAILURE; + + ksft_print_cnts(); + + return ret; +} diff --git a/tools/testing/selftests/arm64/fp/zt-test.S b/tools/testing/selftests/arm64/fp/zt-test.S new file mode 100644 index 000000000000..d63286397638 --- /dev/null +++ b/tools/testing/selftests/arm64/fp/zt-test.S @@ -0,0 +1,316 @@ +// SPDX-License-Identifier: GPL-2.0-only +// Copyright (C) 2021-2 ARM Limited. +// Original author: Mark Brown <broonie@kernel.org> +// +// Scalable Matrix Extension ZT context switch test +// Repeatedly writes unique test patterns into ZT0 +// and reads them back to verify integrity. + +#include <asm/unistd.h> +#include "assembler.h" +#include "asm-offsets.h" +#include "sme-inst.h" + +.arch_extension sve + +#define ZT_SZ 512 +#define ZT_B (ZT_SZ / 8) + +// Declare some storage space to shadow ZT register contents and a +// scratch buffer. +.pushsection .text +.data +.align 4 +ztref: + .space ZT_B +scratch: + .space ZT_B +.popsection + + +// Generate a test pattern for storage in ZT +// x0: pid +// x1: generation + +// These values are used to construct a 32-bit pattern that is repeated in the +// scratch buffer as many times as will fit: +// bits 31:24 generation number (increments once per test_loop) +// bits 23: 8 pid +// bits 7: 0 32-bit lane index + +function pattern + mov w3, wzr + bfi w3, w0, #8, #16 // PID + bfi w3, w1, #24, #8 // Generation + + ldr x0, =scratch + mov w1, #ZT_B / 4 + +0: str w3, [x0], #4 + add w3, w3, #1 // Lane + subs w1, w1, #1 + b.ne 0b + + ret +endfunction + +// Set up test pattern in a ZT horizontal vector +// x0: pid +// x1: generation +function setup_zt + mov x4, x30 + + bl pattern // Get pattern in scratch buffer + ldr x0, =ztref + ldr x1, =scratch + mov x2, #ZT_B + bl memcpy + + ldr x0, =ztref + _ldr_zt 0 // load zt0 from pointer x0 + + ret x4 +endfunction + +// Trivial memory compare: compare x2 bytes starting at address x0 with +// bytes starting at address x1. +// Returns only if all bytes match; otherwise, the program is aborted. +// Clobbers x0-x5. +function memcmp + cbz x2, 2f + + stp x0, x1, [sp, #-0x20]! + str x2, [sp, #0x10] + + mov x5, #0 +0: ldrb w3, [x0, x5] + ldrb w4, [x1, x5] + add x5, x5, #1 + cmp w3, w4 + b.ne 1f + subs x2, x2, #1 + b.ne 0b + +1: ldr x2, [sp, #0x10] + ldp x0, x1, [sp], #0x20 + b.ne barf + +2: ret +endfunction + +// Verify that a ZT vector matches its shadow in memory, else abort +// Clobbers x0-x3 +function check_zt + mov x3, x30 + + ldr x0, =scratch // Poison scratch + mov x1, #ZT_B + bl memfill_ae + + ldr x0, =scratch + _str_zt 0 + + ldr x0, =ztref + ldr x1, =scratch + mov x2, #ZT_B + mov x30, x3 + b memcmp +endfunction + +// Any SME register modified here can cause corruption in the main +// thread -- but *only* the locations modified here. +function irritator_handler + // Increment the irritation signal count (x23): + ldr x0, [x2, #ucontext_regs + 8 * 23] + add x0, x0, #1 + str x0, [x2, #ucontext_regs + 8 * 23] + + // Corrupt some random ZT data +#if 0 + adr x0, .text + (irritator_handler - .text) / 16 * 16 + movi v0.8b, #1 + movi v9.16b, #2 + movi v31.8b, #3 +#endif + + ret +endfunction + +function tickle_handler + // Increment the signal count (x23): + ldr x0, [x2, #ucontext_regs + 8 * 23] + add x0, x0, #1 + str x0, [x2, #ucontext_regs + 8 * 23] + + ret +endfunction + +function terminate_handler + mov w21, w0 + mov x20, x2 + + puts "Terminated by signal " + mov w0, w21 + bl putdec + puts ", no error, iterations=" + ldr x0, [x20, #ucontext_regs + 8 * 22] + bl putdec + puts ", signals=" + ldr x0, [x20, #ucontext_regs + 8 * 23] + bl putdecn + + mov x0, #0 + mov x8, #__NR_exit + svc #0 +endfunction + +// w0: signal number +// x1: sa_action +// w2: sa_flags +// Clobbers x0-x6,x8 +function setsignal + str x30, [sp, #-((sa_sz + 15) / 16 * 16 + 16)]! + + mov w4, w0 + mov x5, x1 + mov w6, w2 + + add x0, sp, #16 + mov x1, #sa_sz + bl memclr + + mov w0, w4 + add x1, sp, #16 + str w6, [x1, #sa_flags] + str x5, [x1, #sa_handler] + mov x2, #0 + mov x3, #sa_mask_sz + mov x8, #__NR_rt_sigaction + svc #0 + + cbz w0, 1f + + puts "sigaction failure\n" + b .Labort + +1: ldr x30, [sp], #((sa_sz + 15) / 16 * 16 + 16) + ret +endfunction + +// Main program entry point +.globl _start +function _start + mov x23, #0 // signal count + + mov w0, #SIGINT + adr x1, terminate_handler + mov w2, #SA_SIGINFO + bl setsignal + + mov w0, #SIGTERM + adr x1, terminate_handler + mov w2, #SA_SIGINFO + bl setsignal + + mov w0, #SIGUSR1 + adr x1, irritator_handler + mov w2, #SA_SIGINFO + orr w2, w2, #SA_NODEFER + bl setsignal + + mov w0, #SIGUSR2 + adr x1, tickle_handler + mov w2, #SA_SIGINFO + orr w2, w2, #SA_NODEFER + bl setsignal + + smstart_za + + // Obtain our PID, to ensure test pattern uniqueness between processes + mov x8, #__NR_getpid + svc #0 + mov x20, x0 + + puts "PID:\t" + mov x0, x20 + bl putdecn + + mov x22, #0 // generation number, increments per iteration +.Ltest_loop: + mov x0, x20 + mov x1, x22 + bl setup_zt + + mov x8, #__NR_sched_yield // Encourage preemption + svc #0 + + mrs x0, S3_3_C4_C2_2 // SVCR should have ZA=1,SM=0 + and x1, x0, #3 + cmp x1, #2 + b.ne svcr_barf + + bl check_zt + + add x22, x22, #1 // Everything still working + b .Ltest_loop + +.Labort: + mov x0, #0 + mov x1, #SIGABRT + mov x8, #__NR_kill + svc #0 +endfunction + +function barf +// fpsimd.c acitivty log dump hack +// ldr w0, =0xdeadc0de +// mov w8, #__NR_exit +// svc #0 +// end hack + smstop + mov x10, x0 // expected data + mov x11, x1 // actual data + mov x12, x2 // data size + + puts "Mismatch: PID=" + mov x0, x20 + bl putdec + puts ", iteration=" + mov x0, x22 + bl putdec + puts "\tExpected [" + mov x0, x10 + mov x1, x12 + bl dumphex + puts "]\n\tGot [" + mov x0, x11 + mov x1, x12 + bl dumphex + puts "]\n" + + mov x8, #__NR_getpid + svc #0 +// fpsimd.c acitivty log dump hack +// ldr w0, =0xdeadc0de +// mov w8, #__NR_exit +// svc #0 +// ^ end of hack + mov x1, #SIGABRT + mov x8, #__NR_kill + svc #0 +// mov x8, #__NR_exit +// mov x1, #1 +// svc #0 +endfunction + +function svcr_barf + mov x10, x0 + + puts "Bad SVCR: " + mov x0, x10 + bl putdecn + + mov x8, #__NR_exit + mov x1, #1 + svc #0 +endfunction diff --git a/tools/testing/selftests/arm64/mte/Makefile b/tools/testing/selftests/arm64/mte/Makefile index 037046f5784e..0d7ac3db8390 100644 --- a/tools/testing/selftests/arm64/mte/Makefile +++ b/tools/testing/selftests/arm64/mte/Makefile @@ -1,24 +1,33 @@ # SPDX-License-Identifier: GPL-2.0 # Copyright (C) 2020 ARM Limited -# preserve CC value from top level Makefile -ifeq ($(CC),cc) -CC := $(CROSS_COMPILE)gcc -endif - CFLAGS += -std=gnu99 -I. -pthread LDFLAGS += -pthread SRCS := $(filter-out mte_common_util.c,$(wildcard *.c)) PROGS := $(patsubst %.c,%,$(SRCS)) +ifeq ($(LLVM),) +# For GCC check that the toolchain has MTE support. + +# preserve CC value from top level Makefile +ifeq ($(CC),cc) +CC := $(CROSS_COMPILE)gcc +endif + #check if the compiler works well mte_cc_support := $(shell if ($(CC) $(CFLAGS) -march=armv8.5-a+memtag -E -x c /dev/null -o /dev/null 2>&1) then echo "1"; fi) +else + +# All supported clang versions also support MTE. +mte_cc_support := 1 + +endif + ifeq ($(mte_cc_support),1) # Generated binaries to be installed by top KSFT script TEST_GEN_PROGS := $(PROGS) -# Get Kernel headers installed and use them. else $(warning compiler "$(CC)" does not support the ARMv8.5 MTE extension.) $(warning test program "mte" will not be created.) diff --git a/tools/testing/selftests/arm64/mte/check_buffer_fill.c b/tools/testing/selftests/arm64/mte/check_buffer_fill.c index 75fc482d63b6..1dbbbd47dd50 100644 --- a/tools/testing/selftests/arm64/mte/check_buffer_fill.c +++ b/tools/testing/selftests/arm64/mte/check_buffer_fill.c @@ -32,7 +32,7 @@ static int check_buffer_by_byte(int mem_type, int mode) bool err; mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG); - item = sizeof(sizes)/sizeof(int); + item = ARRAY_SIZE(sizes); for (i = 0; i < item; i++) { ptr = (char *)mte_allocate_memory(sizes[i], mem_type, 0, true); @@ -69,7 +69,7 @@ static int check_buffer_underflow_by_byte(int mem_type, int mode, char *und_ptr = NULL; mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG); - item = sizeof(sizes)/sizeof(int); + item = ARRAY_SIZE(sizes); for (i = 0; i < item; i++) { ptr = (char *)mte_allocate_memory_tag_range(sizes[i], mem_type, 0, underflow_range, 0); @@ -165,7 +165,7 @@ static int check_buffer_overflow_by_byte(int mem_type, int mode, char *over_ptr = NULL; mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG); - item = sizeof(sizes)/sizeof(int); + item = ARRAY_SIZE(sizes); for (i = 0; i < item; i++) { ptr = (char *)mte_allocate_memory_tag_range(sizes[i], mem_type, 0, 0, overflow_range); @@ -338,7 +338,7 @@ static int check_buffer_by_block(int mem_type, int mode) int i, item, result = KSFT_PASS; mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG); - item = sizeof(sizes)/sizeof(int); + item = ARRAY_SIZE(sizes); cur_mte_cxt.fault_valid = false; for (i = 0; i < item; i++) { result = check_buffer_by_block_iterate(mem_type, mode, sizes[i]); @@ -366,7 +366,7 @@ static int check_memory_initial_tags(int mem_type, int mode, int mapping) { char *ptr; int run, fd; - int total = sizeof(sizes)/sizeof(int); + int total = ARRAY_SIZE(sizes); mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG); for (run = 0; run < total; run++) { @@ -404,7 +404,7 @@ int main(int argc, char *argv[]) { int err; size_t page_size = getpagesize(); - int item = sizeof(sizes)/sizeof(int); + int item = ARRAY_SIZE(sizes); sizes[item - 3] = page_size - 1; sizes[item - 2] = page_size; diff --git a/tools/testing/selftests/arm64/mte/check_mmap_options.c b/tools/testing/selftests/arm64/mte/check_mmap_options.c index a04b12c21ac9..17694caaff53 100644 --- a/tools/testing/selftests/arm64/mte/check_mmap_options.c +++ b/tools/testing/selftests/arm64/mte/check_mmap_options.c @@ -61,9 +61,8 @@ static int check_anonymous_memory_mapping(int mem_type, int mode, int mapping, i { char *ptr, *map_ptr; int run, result, map_size; - int item = sizeof(sizes)/sizeof(int); + int item = ARRAY_SIZE(sizes); - item = sizeof(sizes)/sizeof(int); mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG); for (run = 0; run < item; run++) { map_size = sizes[run] + OVERFLOW + UNDERFLOW; @@ -93,7 +92,7 @@ static int check_file_memory_mapping(int mem_type, int mode, int mapping, int ta { char *ptr, *map_ptr; int run, fd, map_size; - int total = sizeof(sizes)/sizeof(int); + int total = ARRAY_SIZE(sizes); int result = KSFT_PASS; mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG); @@ -132,7 +131,7 @@ static int check_clear_prot_mte_flag(int mem_type, int mode, int mapping) { char *ptr, *map_ptr; int run, prot_flag, result, fd, map_size; - int total = sizeof(sizes)/sizeof(int); + int total = ARRAY_SIZE(sizes); prot_flag = PROT_READ | PROT_WRITE; mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG); @@ -187,7 +186,7 @@ static int check_clear_prot_mte_flag(int mem_type, int mode, int mapping) int main(int argc, char *argv[]) { int err; - int item = sizeof(sizes)/sizeof(int); + int item = ARRAY_SIZE(sizes); err = mte_default_setup(); if (err) diff --git a/tools/testing/selftests/arm64/signal/.gitignore b/tools/testing/selftests/arm64/signal/.gitignore index e8d2b57f73ec..8ab4c86837fd 100644 --- a/tools/testing/selftests/arm64/signal/.gitignore +++ b/tools/testing/selftests/arm64/signal/.gitignore @@ -4,5 +4,7 @@ fake_sigreturn_* sme_* ssve_* sve_* +tpidr2_siginfo za_* +zt_* !*.[ch] diff --git a/tools/testing/selftests/arm64/signal/Makefile b/tools/testing/selftests/arm64/signal/Makefile index be7520a863b0..8f5febaf1a9a 100644 --- a/tools/testing/selftests/arm64/signal/Makefile +++ b/tools/testing/selftests/arm64/signal/Makefile @@ -22,6 +22,10 @@ $(TEST_GEN_PROGS): $(PROGS) # Common test-unit targets to build common-layout test-cases executables # Needs secondary expansion to properly include the testcase c-file in pre-reqs +COMMON_SOURCES := test_signals.c test_signals_utils.c testcases/testcases.c \ + signals.S +COMMON_HEADERS := test_signals.h test_signals_utils.h testcases/testcases.h + .SECONDEXPANSION: -$(PROGS): test_signals.c test_signals_utils.c testcases/testcases.c signals.S $$@.c test_signals.h test_signals_utils.h testcases/testcases.h - $(CC) $(CFLAGS) $^ -o $@ +$(PROGS): $$@.c ${COMMON_SOURCES} ${COMMON_HEADERS} + $(CC) $(CFLAGS) ${@}.c ${COMMON_SOURCES} -o $@ diff --git a/tools/testing/selftests/arm64/signal/test_signals.c b/tools/testing/selftests/arm64/signal/test_signals.c index 416b1ff43199..00051b40d71e 100644 --- a/tools/testing/selftests/arm64/signal/test_signals.c +++ b/tools/testing/selftests/arm64/signal/test_signals.c @@ -12,12 +12,10 @@ #include "test_signals.h" #include "test_signals_utils.h" -struct tdescr *current; +struct tdescr *current = &tde; int main(int argc, char *argv[]) { - current = &tde; - ksft_print_msg("%s :: %s\n", current->name, current->descr); if (test_setup(current) && test_init(current)) { test_run(current); diff --git a/tools/testing/selftests/arm64/signal/test_signals.h b/tools/testing/selftests/arm64/signal/test_signals.h index 0c645834ddc3..1e6273d81575 100644 --- a/tools/testing/selftests/arm64/signal/test_signals.h +++ b/tools/testing/selftests/arm64/signal/test_signals.h @@ -34,6 +34,7 @@ enum { FSVE_BIT, FSME_BIT, FSME_FA64_BIT, + FSME2_BIT, FMAX_END }; @@ -41,6 +42,7 @@ enum { #define FEAT_SVE (1UL << FSVE_BIT) #define FEAT_SME (1UL << FSME_BIT) #define FEAT_SME_FA64 (1UL << FSME_FA64_BIT) +#define FEAT_SME2 (1UL << FSME2_BIT) /* * A descriptor used to describe and configure a test case. diff --git a/tools/testing/selftests/arm64/signal/test_signals_utils.c b/tools/testing/selftests/arm64/signal/test_signals_utils.c index 308e229e58ab..40be8443949d 100644 --- a/tools/testing/selftests/arm64/signal/test_signals_utils.c +++ b/tools/testing/selftests/arm64/signal/test_signals_utils.c @@ -29,6 +29,7 @@ static char const *const feats_names[FMAX_END] = { " SVE ", " SME ", " FA64 ", + " SME2 ", }; #define MAX_FEATS_SZ 128 @@ -192,8 +193,10 @@ static bool handle_signal_copyctx(struct tdescr *td, * in the copy, this was previously validated in * ASSERT_GOOD_CONTEXT(). */ - to_copy = offset + sizeof(struct extra_context) + 16 + - extra->size; + to_copy = __builtin_offsetof(ucontext_t, + uc_mcontext.__reserved); + to_copy += offset + sizeof(struct extra_context) + 16; + to_copy += extra->size; copied_extra = (struct extra_context *)&(td->live_uc->uc_mcontext.__reserved[offset]); } else { copied_extra = NULL; @@ -323,6 +326,8 @@ int test_init(struct tdescr *td) td->feats_supported |= FEAT_SME; if (getauxval(AT_HWCAP2) & HWCAP2_SME_FA64) td->feats_supported |= FEAT_SME_FA64; + if (getauxval(AT_HWCAP2) & HWCAP2_SME2) + td->feats_supported |= FEAT_SME2; if (feats_ok(td)) { if (td->feats_required & td->feats_supported) fprintf(stderr, diff --git a/tools/testing/selftests/arm64/signal/testcases/TODO b/tools/testing/selftests/arm64/signal/testcases/TODO index 110ff9fd195d..1f7fba8194fe 100644 --- a/tools/testing/selftests/arm64/signal/testcases/TODO +++ b/tools/testing/selftests/arm64/signal/testcases/TODO @@ -1,2 +1 @@ - Validate that register contents are saved and restored as expected. -- Support and validate extra_context. diff --git a/tools/testing/selftests/arm64/signal/testcases/ssve_regs.c b/tools/testing/selftests/arm64/signal/testcases/ssve_regs.c index d0a178945b1a..3d37daafcff5 100644 --- a/tools/testing/selftests/arm64/signal/testcases/ssve_regs.c +++ b/tools/testing/selftests/arm64/signal/testcases/ssve_regs.c @@ -34,6 +34,10 @@ static bool sme_get_vls(struct tdescr *td) vl &= PR_SME_VL_LEN_MASK; + /* Did we find the lowest supported VL? */ + if (vq < sve_vq_from_vl(vl)) + break; + /* Skip missing VLs */ vq = sve_vq_from_vl(vl); @@ -92,6 +96,11 @@ static int do_one_sme_vl(struct tdescr *td, siginfo_t *si, ucontext_t *uc, return 1; } + if (!(ssve->flags & SVE_SIG_FLAG_SM)) { + fprintf(stderr, "SVE_SIG_FLAG_SM not set in SVE record\n"); + return 1; + } + /* The actual size validation is done in get_current_context() */ fprintf(stderr, "Got expected size %u and VL %d\n", head->size, ssve->vl); @@ -116,12 +125,7 @@ static int sme_regs(struct tdescr *td, siginfo_t *si, ucontext_t *uc) struct tdescr tde = { .name = "Streaming SVE registers", .descr = "Check that we get the right Streaming SVE registers reported", - /* - * We shouldn't require FA64 but things like memset() used in the - * helpers might use unsupported instructions so for now disable - * the test unless we've got the full instruction set. - */ - .feats_required = FEAT_SME | FEAT_SME_FA64, + .feats_required = FEAT_SME, .timeout = 3, .init = sme_get_vls, .run = sme_regs, diff --git a/tools/testing/selftests/arm64/signal/testcases/ssve_za_regs.c b/tools/testing/selftests/arm64/signal/testcases/ssve_za_regs.c new file mode 100644 index 000000000000..9dc5f128bbc0 --- /dev/null +++ b/tools/testing/selftests/arm64/signal/testcases/ssve_za_regs.c @@ -0,0 +1,161 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2021 ARM Limited + * + * Verify that both the streaming SVE and ZA register context in + * signal frames is set up as expected when enabled simultaneously. + */ + +#include <signal.h> +#include <ucontext.h> +#include <sys/prctl.h> + +#include "test_signals_utils.h" +#include "testcases.h" + +static union { + ucontext_t uc; + char buf[1024 * 128]; +} context; +static unsigned int vls[SVE_VQ_MAX]; +unsigned int nvls = 0; + +static bool sme_get_vls(struct tdescr *td) +{ + int vq, vl; + + /* + * Enumerate up to SVE_VQ_MAX vector lengths + */ + for (vq = SVE_VQ_MAX; vq > 0; --vq) { + vl = prctl(PR_SME_SET_VL, vq * 16); + if (vl == -1) + return false; + + vl &= PR_SME_VL_LEN_MASK; + + /* Did we find the lowest supported VL? */ + if (vq < sve_vq_from_vl(vl)) + break; + + /* Skip missing VLs */ + vq = sve_vq_from_vl(vl); + + vls[nvls++] = vl; + } + + /* We need at least one VL */ + if (nvls < 1) { + fprintf(stderr, "Only %d VL supported\n", nvls); + return false; + } + + return true; +} + +static void setup_regs(void) +{ + /* smstart sm; real data is TODO */ + asm volatile(".inst 0xd503437f" : : : ); + + /* smstart za; real data is TODO */ + asm volatile(".inst 0xd503457f" : : : ); +} + +static char zeros[ZA_SIG_REGS_SIZE(SVE_VQ_MAX)]; + +static int do_one_sme_vl(struct tdescr *td, siginfo_t *si, ucontext_t *uc, + unsigned int vl) +{ + size_t offset; + struct _aarch64_ctx *head = GET_BUF_RESV_HEAD(context); + struct _aarch64_ctx *regs; + struct sve_context *ssve; + struct za_context *za; + int ret; + + fprintf(stderr, "Testing VL %d\n", vl); + + ret = prctl(PR_SME_SET_VL, vl); + if (ret != vl) { + fprintf(stderr, "Failed to set VL, got %d\n", ret); + return 1; + } + + /* + * Get a signal context which should have the SVE and ZA + * frames in it. + */ + setup_regs(); + if (!get_current_context(td, &context.uc, sizeof(context))) + return 1; + + regs = get_header(head, SVE_MAGIC, GET_BUF_RESV_SIZE(context), + &offset); + if (!regs) { + fprintf(stderr, "No SVE context\n"); + return 1; + } + + ssve = (struct sve_context *)regs; + if (ssve->vl != vl) { + fprintf(stderr, "Got SSVE VL %d, expected %d\n", ssve->vl, vl); + return 1; + } + + if (!(ssve->flags & SVE_SIG_FLAG_SM)) { + fprintf(stderr, "SVE_SIG_FLAG_SM not set in SVE record\n"); + return 1; + } + + fprintf(stderr, "Got expected SSVE size %u and VL %d\n", + regs->size, ssve->vl); + + regs = get_header(head, ZA_MAGIC, GET_BUF_RESV_SIZE(context), + &offset); + if (!regs) { + fprintf(stderr, "No ZA context\n"); + return 1; + } + + za = (struct za_context *)regs; + if (za->vl != vl) { + fprintf(stderr, "Got ZA VL %d, expected %d\n", za->vl, vl); + return 1; + } + + fprintf(stderr, "Got expected ZA size %u and VL %d\n", + regs->size, za->vl); + + /* We didn't load any data into ZA so it should be all zeros */ + if (memcmp(zeros, (char *)za + ZA_SIG_REGS_OFFSET, + ZA_SIG_REGS_SIZE(sve_vq_from_vl(za->vl))) != 0) { + fprintf(stderr, "ZA data invalid\n"); + return 1; + } + + return 0; +} + +static int sme_regs(struct tdescr *td, siginfo_t *si, ucontext_t *uc) +{ + int i; + + for (i = 0; i < nvls; i++) { + if (do_one_sme_vl(td, si, uc, vls[i])) + return 1; + } + + td->pass = 1; + + return 0; +} + +struct tdescr tde = { + .name = "Streaming SVE registers", + .descr = "Check that we get the right Streaming SVE registers reported", + .feats_required = FEAT_SME, + .timeout = 3, + .init = sme_get_vls, + .run = sme_regs, +}; diff --git a/tools/testing/selftests/arm64/signal/testcases/testcases.c b/tools/testing/selftests/arm64/signal/testcases/testcases.c index e1c625b20ac4..9f580b55b388 100644 --- a/tools/testing/selftests/arm64/signal/testcases/testcases.c +++ b/tools/testing/selftests/arm64/signal/testcases/testcases.c @@ -1,5 +1,9 @@ // SPDX-License-Identifier: GPL-2.0 /* Copyright (C) 2019 ARM Limited */ + +#include <ctype.h> +#include <string.h> + #include "testcases.h" struct _aarch64_ctx *get_header(struct _aarch64_ctx *head, uint32_t magic, @@ -104,19 +108,41 @@ bool validate_za_context(struct za_context *za, char **err) return true; } +bool validate_zt_context(struct zt_context *zt, char **err) +{ + if (!zt || !err) + return false; + + /* If the context is present there should be at least one register */ + if (zt->nregs == 0) { + *err = "no registers"; + return false; + } + + /* Size should agree with the number of registers */ + if (zt->head.size != ZT_SIG_CONTEXT_SIZE(zt->nregs)) { + *err = "register count does not match size"; + return false; + } + + return true; +} + bool validate_reserved(ucontext_t *uc, size_t resv_sz, char **err) { bool terminated = false; size_t offs = 0; int flags = 0; - int new_flags; + int new_flags, i; struct extra_context *extra = NULL; struct sve_context *sve = NULL; struct za_context *za = NULL; + struct zt_context *zt = NULL; struct _aarch64_ctx *head = (struct _aarch64_ctx *)uc->uc_mcontext.__reserved; void *extra_data = NULL; size_t extra_sz = 0; + char magic[4]; if (!err) return false; @@ -158,6 +184,10 @@ bool validate_reserved(ucontext_t *uc, size_t resv_sz, char **err) if (head->size != sizeof(struct esr_context)) *err = "Bad size for esr_context"; break; + case TPIDR2_MAGIC: + if (head->size != sizeof(struct tpidr2_context)) + *err = "Bad size for tpidr2_context"; + break; case SVE_MAGIC: if (flags & SVE_CTX) *err = "Multiple SVE_MAGIC"; @@ -172,6 +202,13 @@ bool validate_reserved(ucontext_t *uc, size_t resv_sz, char **err) za = (struct za_context *)head; new_flags |= ZA_CTX; break; + case ZT_MAGIC: + if (flags & ZT_CTX) + *err = "Multiple ZT_MAGIC"; + /* Size is validated in validate_za_context() */ + zt = (struct zt_context *)head; + new_flags |= ZT_CTX; + break; case EXTRA_MAGIC: if (flags & EXTRA_CTX) *err = "Multiple EXTRA_MAGIC"; @@ -194,11 +231,19 @@ bool validate_reserved(ucontext_t *uc, size_t resv_sz, char **err) /* * A still unknown Magic: potentially freshly added * to the Kernel code and still unknown to the - * tests. + * tests. Magic numbers are supposed to be allocated + * as somewhat meaningful ASCII strings so try to + * print as such as well as the raw number. */ + memcpy(magic, &head->magic, sizeof(magic)); + for (i = 0; i < sizeof(magic); i++) + if (!isalnum(magic[i])) + magic[i] = '?'; + fprintf(stdout, - "SKIP Unknown MAGIC: 0x%X - Is KSFT arm64/signal up to date ?\n", - head->magic); + "SKIP Unknown MAGIC: 0x%X (%c%c%c%c) - Is KSFT arm64/signal up to date ?\n", + head->magic, + magic[3], magic[2], magic[1], magic[0]); break; } @@ -221,6 +266,9 @@ bool validate_reserved(ucontext_t *uc, size_t resv_sz, char **err) if (new_flags & ZA_CTX) if (!validate_za_context(za, err)) return false; + if (new_flags & ZT_CTX) + if (!validate_zt_context(zt, err)) + return false; flags |= new_flags; @@ -232,6 +280,11 @@ bool validate_reserved(ucontext_t *uc, size_t resv_sz, char **err) return false; } + if (terminated && (flags & ZT_CTX) && !(flags & ZA_CTX)) { + *err = "ZT context but no ZA context"; + return false; + } + return true; } diff --git a/tools/testing/selftests/arm64/signal/testcases/testcases.h b/tools/testing/selftests/arm64/signal/testcases/testcases.h index 040afded0b76..a08ab0d6207a 100644 --- a/tools/testing/selftests/arm64/signal/testcases/testcases.h +++ b/tools/testing/selftests/arm64/signal/testcases/testcases.h @@ -18,6 +18,7 @@ #define SVE_CTX (1 << 1) #define ZA_CTX (1 << 2) #define EXTRA_CTX (1 << 3) +#define ZT_CTX (1 << 4) #define KSFT_BAD_MAGIC 0xdeadbeef diff --git a/tools/testing/selftests/arm64/signal/testcases/tpidr2_siginfo.c b/tools/testing/selftests/arm64/signal/testcases/tpidr2_siginfo.c new file mode 100644 index 000000000000..6a2c82bf7ead --- /dev/null +++ b/tools/testing/selftests/arm64/signal/testcases/tpidr2_siginfo.c @@ -0,0 +1,90 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2022 ARM Limited + * + * Verify that the TPIDR2 register context in signal frames is set up as + * expected. + */ + +#include <signal.h> +#include <ucontext.h> +#include <sys/auxv.h> +#include <sys/prctl.h> +#include <unistd.h> +#include <asm/sigcontext.h> + +#include "test_signals_utils.h" +#include "testcases.h" + +static union { + ucontext_t uc; + char buf[1024 * 128]; +} context; + +#define SYS_TPIDR2 "S3_3_C13_C0_5" + +static uint64_t get_tpidr2(void) +{ + uint64_t val; + + asm volatile ( + "mrs %0, " SYS_TPIDR2 "\n" + : "=r"(val) + : + : "cc"); + + return val; +} + +int tpidr2_present(struct tdescr *td, siginfo_t *si, ucontext_t *uc) +{ + struct _aarch64_ctx *head = GET_BUF_RESV_HEAD(context); + struct tpidr2_context *tpidr2_ctx; + size_t offset; + bool in_sigframe; + bool have_sme; + __u64 orig_tpidr2; + + have_sme = getauxval(AT_HWCAP2) & HWCAP2_SME; + if (have_sme) + orig_tpidr2 = get_tpidr2(); + + if (!get_current_context(td, &context.uc, sizeof(context))) + return 1; + + tpidr2_ctx = (struct tpidr2_context *) + get_header(head, TPIDR2_MAGIC, td->live_sz, &offset); + + in_sigframe = tpidr2_ctx != NULL; + + fprintf(stderr, "TPIDR2 sigframe %s on system %s SME\n", + in_sigframe ? "present" : "absent", + have_sme ? "with" : "without"); + + td->pass = (in_sigframe == have_sme); + + /* + * Check that the value we read back was the one present at + * the time that the signal was triggered. TPIDR2 is owned by + * libc so we can't safely choose the value and it is possible + * that we may need to revisit this in future if something + * starts deciding to set a new TPIDR2 between us reading and + * the signal. + */ + if (have_sme && tpidr2_ctx) { + if (tpidr2_ctx->tpidr2 != orig_tpidr2) { + fprintf(stderr, "TPIDR2 in frame is %llx, was %llx\n", + tpidr2_ctx->tpidr2, orig_tpidr2); + td->pass = false; + } + } + + return 0; +} + +struct tdescr tde = { + .name = "TPIDR2", + .descr = "Validate that TPIDR2 is present as expected", + .timeout = 3, + .run = tpidr2_present, +}; diff --git a/tools/testing/selftests/arm64/signal/testcases/za_regs.c b/tools/testing/selftests/arm64/signal/testcases/za_regs.c index ea45acb115d5..174ad6656696 100644 --- a/tools/testing/selftests/arm64/signal/testcases/za_regs.c +++ b/tools/testing/selftests/arm64/signal/testcases/za_regs.c @@ -34,6 +34,10 @@ static bool sme_get_vls(struct tdescr *td) vl &= PR_SME_VL_LEN_MASK; + /* Did we find the lowest supported VL? */ + if (vq < sve_vq_from_vl(vl)) + break; + /* Skip missing VLs */ vq = sve_vq_from_vl(vl); diff --git a/tools/testing/selftests/arm64/signal/testcases/zt_no_regs.c b/tools/testing/selftests/arm64/signal/testcases/zt_no_regs.c new file mode 100644 index 000000000000..34f69bcf821e --- /dev/null +++ b/tools/testing/selftests/arm64/signal/testcases/zt_no_regs.c @@ -0,0 +1,51 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2021 ARM Limited + * + * Verify that using an instruction not supported in streaming mode + * traps when in streaming mode. + */ + +#include <signal.h> +#include <ucontext.h> +#include <sys/prctl.h> + +#include "test_signals_utils.h" +#include "testcases.h" + +static union { + ucontext_t uc; + char buf[1024 * 128]; +} context; + +int zt_no_regs_run(struct tdescr *td, siginfo_t *si, ucontext_t *uc) +{ + size_t offset; + struct _aarch64_ctx *head = GET_BUF_RESV_HEAD(context); + + /* + * Get a signal context which should not have a ZT frame and + * registers in it. + */ + if (!get_current_context(td, &context.uc, sizeof(context))) + return 1; + + head = get_header(head, ZT_MAGIC, GET_BUF_RESV_SIZE(context), &offset); + if (head) { + fprintf(stderr, "Got unexpected ZT context\n"); + return 1; + } + + td->pass = 1; + + return 0; +} + +struct tdescr tde = { + .name = "ZT register data not present", + .descr = "Validate that ZT is not present when ZA is disabled", + .feats_required = FEAT_SME2, + .timeout = 3, + .sanity_disabled = true, + .run = zt_no_regs_run, +}; diff --git a/tools/testing/selftests/arm64/signal/testcases/zt_regs.c b/tools/testing/selftests/arm64/signal/testcases/zt_regs.c new file mode 100644 index 000000000000..e1eb4d5c027a --- /dev/null +++ b/tools/testing/selftests/arm64/signal/testcases/zt_regs.c @@ -0,0 +1,85 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2021 ARM Limited + * + * Verify that using an instruction not supported in streaming mode + * traps when in streaming mode. + */ + +#include <signal.h> +#include <ucontext.h> +#include <sys/prctl.h> + +#include "test_signals_utils.h" +#include "testcases.h" + +static union { + ucontext_t uc; + char buf[1024 * 128]; +} context; + +static void enable_za(void) +{ + /* smstart za; real data is TODO */ + asm volatile(".inst 0xd503457f" : : : ); +} + +int zt_regs_run(struct tdescr *td, siginfo_t *si, ucontext_t *uc) +{ + size_t offset; + struct _aarch64_ctx *head = GET_BUF_RESV_HEAD(context); + struct zt_context *zt; + char *zeros; + + /* + * Get a signal context which should have a ZT frame and registers + * in it. + */ + enable_za(); + if (!get_current_context(td, &context.uc, sizeof(context))) + return 1; + + head = get_header(head, ZT_MAGIC, GET_BUF_RESV_SIZE(context), &offset); + if (!head) { + fprintf(stderr, "No ZT context\n"); + return 1; + } + + zt = (struct zt_context *)head; + if (zt->nregs == 0) { + fprintf(stderr, "Got context with no registers\n"); + return 1; + } + + fprintf(stderr, "Got expected size %u for %d registers\n", + head->size, zt->nregs); + + /* We didn't load any data into ZT so it should be all zeros */ + zeros = malloc(ZT_SIG_REGS_SIZE(zt->nregs)); + if (!zeros) { + fprintf(stderr, "Out of memory, nregs=%u\n", zt->nregs); + return 1; + } + memset(zeros, 0, ZT_SIG_REGS_SIZE(zt->nregs)); + + if (memcmp(zeros, (char *)zt + ZT_SIG_REGS_OFFSET, + ZT_SIG_REGS_SIZE(zt->nregs)) != 0) { + fprintf(stderr, "ZT data invalid\n"); + return 1; + } + + free(zeros); + + td->pass = 1; + + return 0; +} + +struct tdescr tde = { + .name = "ZT register data", + .descr = "Validate that ZT is present and has data when ZA is enabled", + .feats_required = FEAT_SME2, + .timeout = 3, + .sanity_disabled = true, + .run = zt_regs_run, +}; diff --git a/tools/testing/selftests/arm64/tags/Makefile b/tools/testing/selftests/arm64/tags/Makefile index 41cb75070511..6d29cfde43a2 100644 --- a/tools/testing/selftests/arm64/tags/Makefile +++ b/tools/testing/selftests/arm64/tags/Makefile @@ -1,6 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 -CFLAGS += -I../../../../../usr/include/ +CFLAGS += $(KHDR_INCLUDES) TEST_GEN_PROGS := tags_test TEST_PROGS := run_tags_test.sh diff --git a/tools/testing/selftests/bpf/.gitignore b/tools/testing/selftests/bpf/.gitignore index 07d2d0a8c5cb..116fecf80ca1 100644 --- a/tools/testing/selftests/bpf/.gitignore +++ b/tools/testing/selftests/bpf/.gitignore @@ -36,6 +36,7 @@ test_cpp *.lskel.h /no_alu32 /bpf_gcc +/host-tools /tools /runqslower /bench @@ -46,3 +47,5 @@ test_cpp xskxceiver xdp_redirect_multi xdp_synproxy +xdp_hw_metadata +xdp_features diff --git a/tools/testing/selftests/bpf/DENYLIST b/tools/testing/selftests/bpf/DENYLIST index 939de574fc7f..f748f2c33b22 100644 --- a/tools/testing/selftests/bpf/DENYLIST +++ b/tools/testing/selftests/bpf/DENYLIST @@ -1,6 +1,7 @@ # TEMPORARY +# Alphabetical order get_stack_raw_tp # spams with kernel warnings until next bpf -> bpf-next merge -stacktrace_build_id_nmi stacktrace_build_id +stacktrace_build_id_nmi task_fd_query_rawtp varlen diff --git a/tools/testing/selftests/bpf/DENYLIST.aarch64 b/tools/testing/selftests/bpf/DENYLIST.aarch64 new file mode 100644 index 000000000000..99cc33c51eaa --- /dev/null +++ b/tools/testing/selftests/bpf/DENYLIST.aarch64 @@ -0,0 +1,84 @@ +bloom_filter_map # libbpf: prog 'check_bloom': failed to attach: ERROR: strerror_r(-524)=22 +bpf_cookie/lsm +bpf_cookie/multi_kprobe_attach_api +bpf_cookie/multi_kprobe_link_api +bpf_cookie/trampoline +bpf_loop/check_callback_fn_stop # link unexpected error: -524 +bpf_loop/check_invalid_flags +bpf_loop/check_nested_calls +bpf_loop/check_non_constant_callback +bpf_loop/check_nr_loops +bpf_loop/check_null_callback_ctx +bpf_loop/check_stack +bpf_mod_race # bpf_mod_kfunc_race__attach unexpected error: -524 (errno 524) +bpf_tcp_ca/dctcp_fallback +btf_dump/btf_dump: var_data # find type id unexpected find type id: actual -2 < expected 0 +cgroup_hierarchical_stats # attach unexpected error: -524 (errno 524) +d_path/basic # setup attach failed: -524 +deny_namespace # attach unexpected error: -524 (errno 524) +fentry_fexit # fentry_attach unexpected error: -1 (errno 524) +fentry_test # fentry_attach unexpected error: -1 (errno 524) +fexit_sleep # fexit_attach fexit attach failed: -1 +fexit_stress # fexit attach unexpected fexit attach: actual -524 < expected 0 +fexit_test # fexit_attach unexpected error: -1 (errno 524) +get_func_args_test # get_func_args_test__attach unexpected error: -524 (errno 524) (trampoline) +get_func_ip_test # get_func_ip_test__attach unexpected error: -524 (errno 524) (trampoline) +htab_update/reenter_update +kfree_skb # attach fentry unexpected error: -524 (trampoline) +kfunc_call/subprog # extern (var ksym) 'bpf_prog_active': not found in kernel BTF +kfunc_call/subprog_lskel # skel unexpected error: -2 +kfunc_dynptr_param/dynptr_data_null # libbpf: prog 'dynptr_data_null': failed to attach: ERROR: strerror_r(-524)=22 +kprobe_multi_bench_attach # bpf_program__attach_kprobe_multi_opts unexpected error: -95 +kprobe_multi_test/attach_api_addrs # bpf_program__attach_kprobe_multi_opts unexpected error: -95 +kprobe_multi_test/attach_api_pattern # bpf_program__attach_kprobe_multi_opts unexpected error: -95 +kprobe_multi_test/attach_api_syms # bpf_program__attach_kprobe_multi_opts unexpected error: -95 +kprobe_multi_test/bench_attach # bpf_program__attach_kprobe_multi_opts unexpected error: -95 +kprobe_multi_test/link_api_addrs # link_fd unexpected link_fd: actual -95 < expected 0 +kprobe_multi_test/link_api_syms # link_fd unexpected link_fd: actual -95 < expected 0 +kprobe_multi_test/skel_api # kprobe_multi__attach unexpected error: -524 (errno 524) +ksyms_module/libbpf # 'bpf_testmod_ksym_percpu': not found in kernel BTF +ksyms_module/lskel # test_ksyms_module_lskel__open_and_load unexpected error: -2 +libbpf_get_fd_by_id_opts # test_libbpf_get_fd_by_id_opts__attach unexpected error: -524 (errno 524) +linked_list +lookup_key # test_lookup_key__attach unexpected error: -524 (errno 524) +lru_bug # lru_bug__attach unexpected error: -524 (errno 524) +modify_return # modify_return__attach failed unexpected error: -524 (errno 524) +module_attach # skel_attach skeleton attach failed: -524 +mptcp/base # run_test mptcp unexpected error: -524 (errno 524) +netcnt # packets unexpected packets: actual 10001 != expected 10000 +rcu_read_lock # failed to attach: ERROR: strerror_r(-524)=22 +recursion # skel_attach unexpected error: -524 (errno 524) +ringbuf # skel_attach skeleton attachment failed: -1 +setget_sockopt # attach_cgroup unexpected error: -524 +sk_storage_tracing # test_sk_storage_tracing__attach unexpected error: -524 (errno 524) +skc_to_unix_sock # could not attach BPF object unexpected error: -524 (errno 524) +socket_cookie # prog_attach unexpected error: -524 +stacktrace_build_id # compare_stack_ips stackmap vs. stack_amap err -1 errno 2 +task_local_storage/exit_creds # skel_attach unexpected error: -524 (errno 524) +task_local_storage/recursion # skel_attach unexpected error: -524 (errno 524) +test_bprm_opts # attach attach failed: -524 +test_ima # attach attach failed: -524 +test_local_storage # attach lsm attach failed: -524 +test_lsm # test_lsm_first_attach unexpected error: -524 (errno 524) +test_overhead # attach_fentry unexpected error: -524 +timer # timer unexpected error: -524 (errno 524) +timer_crash # timer_crash__attach unexpected error: -524 (errno 524) +timer_mim # timer_mim unexpected error: -524 (errno 524) +trace_printk # trace_printk__attach unexpected error: -1 (errno 524) +trace_vprintk # trace_vprintk__attach unexpected error: -1 (errno 524) +tracing_struct # tracing_struct__attach unexpected error: -524 (errno 524) +trampoline_count # attach_prog unexpected error: -524 +unpriv_bpf_disabled # skel_attach unexpected error: -524 (errno 524) +user_ringbuf/test_user_ringbuf_post_misaligned # misaligned_skel unexpected error: -524 (errno 524) +user_ringbuf/test_user_ringbuf_post_producer_wrong_offset +user_ringbuf/test_user_ringbuf_post_larger_than_ringbuf_sz +user_ringbuf/test_user_ringbuf_basic # ringbuf_basic_skel unexpected error: -524 (errno 524) +user_ringbuf/test_user_ringbuf_sample_full_ring_buffer +user_ringbuf/test_user_ringbuf_post_alignment_autoadjust +user_ringbuf/test_user_ringbuf_overfill +user_ringbuf/test_user_ringbuf_discards_properly_ignored +user_ringbuf/test_user_ringbuf_loop +user_ringbuf/test_user_ringbuf_msg_protocol +user_ringbuf/test_user_ringbuf_blocking_reserve +verify_pkcs7_sig # test_verify_pkcs7_sig__attach unexpected error: -524 (errno 524) +vmlinux # skel_attach skeleton attach failed: -524 diff --git a/tools/testing/selftests/bpf/DENYLIST.s390x b/tools/testing/selftests/bpf/DENYLIST.s390x index 17e074eb42b8..b89eb87034e4 100644 --- a/tools/testing/selftests/bpf/DENYLIST.s390x +++ b/tools/testing/selftests/bpf/DENYLIST.s390x @@ -1,77 +1,24 @@ # TEMPORARY -atomics # attach(add): actual -524 <= expected 0 (trampoline) -bpf_iter_setsockopt # JIT does not support calling kernel function (kfunc) +# Alphabetical order bloom_filter_map # failed to find kernel BTF type ID of '__x64_sys_getpgid': -3 (?) -bpf_tcp_ca # JIT does not support calling kernel function (kfunc) +bpf_cookie # failed to open_and_load program: -524 (trampoline) bpf_loop # attaches to __x64_sys_nanosleep -bpf_mod_race # BPF trampoline -bpf_nf # JIT does not support calling kernel function -core_read_macros # unknown func bpf_probe_read#4 (overlapping) -d_path # failed to auto-attach program 'prog_stat': -524 (trampoline) -dummy_st_ops # test_run unexpected error: -524 (errno 524) (trampoline) -fentry_fexit # fentry attach failed: -524 (trampoline) -fentry_test # fentry_first_attach unexpected error: -524 (trampoline) -fexit_bpf2bpf # freplace_attach_trace unexpected error: -524 (trampoline) +cgrp_local_storage # prog_attach unexpected error: -524 (trampoline) fexit_sleep # fexit_skel_load fexit skeleton failed (trampoline) -fexit_stress # fexit attach failed prog 0 failed: -524 (trampoline) -fexit_test # fexit_first_attach unexpected error: -524 (trampoline) -get_func_args_test # trampoline -get_func_ip_test # get_func_ip_test__attach unexpected error: -524 (trampoline) get_stack_raw_tp # user_stack corrupted user stack (no backchain userspace) -kfree_skb # attach fentry unexpected error: -524 (trampoline) -kfunc_call # 'bpf_prog_active': not found in kernel BTF (?) +kprobe_multi_bench_attach # bpf_program__attach_kprobe_multi_opts unexpected error: -95 +kprobe_multi_test # relies on fentry ksyms_module # test_ksyms_module__open_and_load unexpected error: -9 (?) ksyms_module_libbpf # JIT does not support calling kernel function (kfunc) ksyms_module_lskel # test_ksyms_module_lskel__open_and_load unexpected error: -9 (?) -modify_return # modify_return attach failed: -524 (trampoline) module_attach # skel_attach skeleton attach failed: -524 (trampoline) -mptcp -kprobe_multi_test # relies on fentry -netcnt # failed to load BPF skeleton 'netcnt_prog': -7 (?) -probe_user # check_kprobe_res wrong kprobe res from probe read (?) -recursion # skel_attach unexpected error: -524 (trampoline) ringbuf # skel_load skeleton load failed (?) -sk_assign # Can't read on server: Invalid argument (?) -sk_lookup # endianness problem -sk_storage_tracing # test_sk_storage_tracing__attach unexpected error: -524 (trampoline) -skc_to_unix_sock # could not attach BPF object unexpected error: -524 (trampoline) -socket_cookie # prog_attach unexpected error: -524 (trampoline) stacktrace_build_id # compare_map_keys stackid_hmap vs. stackmap err -2 errno 2 (?) -tailcalls # tail_calls are not allowed in non-JITed programs with bpf-to-bpf calls (?) -task_local_storage # failed to auto-attach program 'trace_exit_creds': -524 (trampoline) -test_bpffs # bpffs test failed 255 (iterator) -test_bprm_opts # failed to auto-attach program 'secure_exec': -524 (trampoline) -test_ima # failed to auto-attach program 'ima': -524 (trampoline) -test_local_storage # failed to auto-attach program 'unlink_hook': -524 (trampoline) test_lsm # attach unexpected error: -524 (trampoline) -test_overhead # attach_fentry unexpected error: -524 (trampoline) -test_profiler # unknown func bpf_probe_read_str#45 (overlapping) -timer # failed to auto-attach program 'test1': -524 (trampoline) -timer_crash # trampoline -timer_mim # failed to auto-attach program 'test1': -524 (trampoline) -trace_ext # failed to auto-attach program 'test_pkt_md_access_new': -524 (trampoline) trace_printk # trace_printk__load unexpected error: -2 (errno 2) (?) trace_vprintk # trace_vprintk__open_and_load unexpected error: -9 (?) -trampoline_count # prog 'prog1': failed to attach: ERROR: strerror_r(-524)=22 (trampoline) -verif_stats # trace_vprintk__open_and_load unexpected error: -9 (?) -vmlinux # failed to auto-attach program 'handle__fentry': -524 (trampoline) -xdp_adjust_tail # case-128 err 0 errno 28 retval 1 size 128 expect-size 3520 (?) -xdp_bonding # failed to auto-attach program 'trace_on_entry': -524 (trampoline) -xdp_bpf2bpf # failed to auto-attach program 'trace_on_entry': -524 (trampoline) -map_kptr # failed to open_and_load program: -524 (trampoline) -bpf_cookie # failed to open_and_load program: -524 (trampoline) -xdp_do_redirect # prog_run_max_size unexpected error: -22 (errno 22) -send_signal # intermittently fails to receive signal -select_reuseport # intermittently fails on new s390x setup -xdp_synproxy # JIT does not support calling kernel function (kfunc) unpriv_bpf_disabled # fentry -lru_bug # prog 'printk': failed to auto-attach: -524 -setget_sockopt # attach unexpected error: -524 (trampoline) -cb_refs # expected error message unexpected error: -524 (trampoline) -cgroup_hierarchical_stats # JIT does not support calling kernel function (kfunc) -htab_update # failed to attach: ERROR: strerror_r(-524)=22 (trampoline) -tracing_struct # failed to auto-attach: -524 (trampoline) user_ringbuf # failed to find kernel BTF type ID of '__s390x_sys_prctl': -3 (?) -lookup_key # JIT does not support calling kernel function (kfunc) -verify_pkcs7_sig # JIT does not support calling kernel function (kfunc) -kfunc_dynptr_param # JIT does not support calling kernel function (kfunc) +verif_stats # trace_vprintk__open_and_load unexpected error: -9 (?) +xdp_bonding # failed to auto-attach program 'trace_on_entry': -524 (trampoline) +xdp_metadata # JIT does not support calling kernel function (kfunc) diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index e6cf21fad69f..b677dcd0b77a 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -22,10 +22,11 @@ endif BPF_GCC ?= $(shell command -v bpf-gcc;) SAN_CFLAGS ?= +SAN_LDFLAGS ?= $(SAN_CFLAGS) CFLAGS += -g -O0 -rdynamic -Wall -Werror $(GENFLAGS) $(SAN_CFLAGS) \ -I$(CURDIR) -I$(INCLUDE_DIR) -I$(GENDIR) -I$(LIBDIR) \ -I$(TOOLSINCDIR) -I$(APIDIR) -I$(OUTPUT) -LDFLAGS += $(SAN_CFLAGS) +LDFLAGS += $(SAN_LDFLAGS) LDLIBS += -lelf -lz -lrt -lpthread # Silence some warnings when compiled with clang @@ -73,7 +74,8 @@ TEST_PROGS := test_kmod.sh \ test_bpftool.sh \ test_bpftool_metadata.sh \ test_doc_build.sh \ - test_xsk.sh + test_xsk.sh \ + test_xdp_features.sh TEST_PROGS_EXTENDED := with_addr.sh \ with_tunnels.sh ima_setup.sh verify_sig_setup.sh \ @@ -83,7 +85,8 @@ TEST_PROGS_EXTENDED := with_addr.sh \ TEST_GEN_PROGS_EXTENDED = test_sock_addr test_skb_cgroup_id_user \ flow_dissector_load test_flow_dissector test_tcp_check_syncookie_user \ test_lirc_mode2_user xdping test_cpp runqslower bench bpf_testmod.ko \ - xskxceiver xdp_redirect_multi xdp_synproxy veristat + xskxceiver xdp_redirect_multi xdp_synproxy veristat xdp_hw_metadata \ + xdp_features TEST_CUSTOM_PROGS = $(OUTPUT)/urandom_read $(OUTPUT)/sign-file TEST_GEN_FILES += liburandom_read.so @@ -149,14 +152,13 @@ endif # NOTE: Semicolon at the end is critical to override lib.mk's default static # rule for binaries. $(notdir $(TEST_GEN_PROGS) \ - $(TEST_PROGS) \ - $(TEST_PROGS_EXTENDED) \ $(TEST_GEN_PROGS_EXTENDED) \ $(TEST_CUSTOM_PROGS)): %: $(OUTPUT)/% ; # sort removes libbpf duplicates when not cross-building -MAKE_DIRS := $(sort $(BUILD_DIR)/libbpf $(HOST_BUILD_DIR)/libbpf \ - $(HOST_BUILD_DIR)/bpftool $(HOST_BUILD_DIR)/resolve_btfids \ +MAKE_DIRS := $(sort $(BUILD_DIR)/libbpf $(HOST_BUILD_DIR)/libbpf \ + $(BUILD_DIR)/bpftool $(HOST_BUILD_DIR)/bpftool \ + $(HOST_BUILD_DIR)/resolve_btfids \ $(RUNQSLOWER_OUTPUT) $(INCLUDE_DIR)) $(MAKE_DIRS): $(call msg,MKDIR,,$@) @@ -181,15 +183,17 @@ endif # do not fail. Static builds leave urandom_read relying on system-wide shared libraries. $(OUTPUT)/liburandom_read.so: urandom_read_lib1.c urandom_read_lib2.c $(call msg,LIB,,$@) - $(Q)$(CLANG) $(filter-out -static,$(CFLAGS) $(LDFLAGS)) $^ $(LDLIBS) \ - -fuse-ld=$(LLD) -Wl,-znoseparate-code -fPIC -shared -o $@ + $(Q)$(CLANG) $(filter-out -static,$(CFLAGS) $(LDFLAGS)) \ + $^ $(filter-out -static,$(LDLIBS)) \ + -fuse-ld=$(LLD) -Wl,-znoseparate-code -Wl,--build-id=sha1 \ + -fPIC -shared -o $@ $(OUTPUT)/urandom_read: urandom_read.c urandom_read_aux.c $(OUTPUT)/liburandom_read.so $(call msg,BINARY,,$@) $(Q)$(CLANG) $(filter-out -static,$(CFLAGS) $(LDFLAGS)) $(filter %.c,$^) \ - liburandom_read.so $(LDLIBS) \ - -fuse-ld=$(LLD) -Wl,-znoseparate-code \ - -Wl,-rpath=. -Wl,--build-id=sha1 -o $@ + -lurandom_read $(filter-out -static,$(LDLIBS)) -L$(OUTPUT) \ + -fuse-ld=$(LLD) -Wl,-znoseparate-code -Wl,--build-id=sha1 \ + -Wl,-rpath=. -o $@ $(OUTPUT)/sign-file: ../../../../scripts/sign-file.c $(call msg,SIGN-FILE,,$@) @@ -200,20 +204,30 @@ $(OUTPUT)/sign-file: ../../../../scripts/sign-file.c $(OUTPUT)/bpf_testmod.ko: $(VMLINUX_BTF) $(wildcard bpf_testmod/Makefile bpf_testmod/*.[ch]) $(call msg,MOD,,$@) $(Q)$(RM) bpf_testmod/bpf_testmod.ko # force re-compilation - $(Q)$(MAKE) $(submake_extras) -C bpf_testmod + $(Q)$(MAKE) $(submake_extras) RESOLVE_BTFIDS=$(RESOLVE_BTFIDS) -C bpf_testmod $(Q)cp bpf_testmod/bpf_testmod.ko $@ DEFAULT_BPFTOOL := $(HOST_SCRATCH_DIR)/sbin/bpftool +ifneq ($(CROSS_COMPILE),) +CROSS_BPFTOOL := $(SCRATCH_DIR)/sbin/bpftool +TRUNNER_BPFTOOL := $(CROSS_BPFTOOL) +USE_BOOTSTRAP := "" +else +TRUNNER_BPFTOOL := $(DEFAULT_BPFTOOL) +USE_BOOTSTRAP := "bootstrap/" +endif $(OUTPUT)/runqslower: $(BPFOBJ) | $(DEFAULT_BPFTOOL) $(RUNQSLOWER_OUTPUT) $(Q)$(MAKE) $(submake_extras) -C $(TOOLSDIR)/bpf/runqslower \ OUTPUT=$(RUNQSLOWER_OUTPUT) VMLINUX_BTF=$(VMLINUX_BTF) \ BPFTOOL_OUTPUT=$(HOST_BUILD_DIR)/bpftool/ \ BPFOBJ_OUTPUT=$(BUILD_DIR)/libbpf \ - BPFOBJ=$(BPFOBJ) BPF_INCLUDE=$(INCLUDE_DIR) && \ + BPFOBJ=$(BPFOBJ) BPF_INCLUDE=$(INCLUDE_DIR) \ + EXTRA_CFLAGS='-g -O0 $(SAN_CFLAGS)' \ + EXTRA_LDFLAGS='$(SAN_LDFLAGS)' && \ cp $(RUNQSLOWER_OUTPUT)runqslower $@ -TEST_GEN_PROGS_EXTENDED += $(DEFAULT_BPFTOOL) +TEST_GEN_PROGS_EXTENDED += $(TRUNNER_BPFTOOL) $(TEST_GEN_PROGS) $(TEST_GEN_PROGS_EXTENDED): $(BPFOBJ) @@ -239,19 +253,30 @@ $(OUTPUT)/flow_dissector_load: $(TESTING_HELPERS) $(OUTPUT)/test_maps: $(TESTING_HELPERS) $(OUTPUT)/test_verifier: $(TESTING_HELPERS) $(CAP_HELPERS) $(OUTPUT)/xsk.o: $(BPFOBJ) -$(OUTPUT)/xskxceiver: $(OUTPUT)/xsk.o BPFTOOL ?= $(DEFAULT_BPFTOOL) $(DEFAULT_BPFTOOL): $(wildcard $(BPFTOOLDIR)/*.[ch] $(BPFTOOLDIR)/Makefile) \ $(HOST_BPFOBJ) | $(HOST_BUILD_DIR)/bpftool $(Q)$(MAKE) $(submake_extras) -C $(BPFTOOLDIR) \ - ARCH= CROSS_COMPILE= CC=$(HOSTCC) LD=$(HOSTLD) \ + ARCH= CROSS_COMPILE= CC="$(HOSTCC)" LD="$(HOSTLD)" \ EXTRA_CFLAGS='-g -O0' \ OUTPUT=$(HOST_BUILD_DIR)/bpftool/ \ LIBBPF_OUTPUT=$(HOST_BUILD_DIR)/libbpf/ \ LIBBPF_DESTDIR=$(HOST_SCRATCH_DIR)/ \ prefix= DESTDIR=$(HOST_SCRATCH_DIR)/ install-bin +ifneq ($(CROSS_COMPILE),) +$(CROSS_BPFTOOL): $(wildcard $(BPFTOOLDIR)/*.[ch] $(BPFTOOLDIR)/Makefile) \ + $(BPFOBJ) | $(BUILD_DIR)/bpftool + $(Q)$(MAKE) $(submake_extras) -C $(BPFTOOLDIR) \ + ARCH=$(ARCH) CROSS_COMPILE=$(CROSS_COMPILE) \ + EXTRA_CFLAGS='-g -O0' \ + OUTPUT=$(BUILD_DIR)/bpftool/ \ + LIBBPF_OUTPUT=$(BUILD_DIR)/libbpf/ \ + LIBBPF_DESTDIR=$(SCRATCH_DIR)/ \ + prefix= DESTDIR=$(SCRATCH_DIR)/ install-bin +endif + all: docs docs: @@ -268,7 +293,8 @@ $(BPFOBJ): $(wildcard $(BPFDIR)/*.[ch] $(BPFDIR)/Makefile) \ $(APIDIR)/linux/bpf.h \ | $(BUILD_DIR)/libbpf $(Q)$(MAKE) $(submake_extras) -C $(BPFDIR) OUTPUT=$(BUILD_DIR)/libbpf/ \ - EXTRA_CFLAGS='-g -O0' \ + EXTRA_CFLAGS='-g -O0 $(SAN_CFLAGS)' \ + EXTRA_LDFLAGS='$(SAN_LDFLAGS)' \ DESTDIR=$(SCRATCH_DIR) prefix= all install_headers ifneq ($(BPFOBJ),$(HOST_BPFOBJ)) @@ -277,7 +303,8 @@ $(HOST_BPFOBJ): $(wildcard $(BPFDIR)/*.[ch] $(BPFDIR)/Makefile) \ | $(HOST_BUILD_DIR)/libbpf $(Q)$(MAKE) $(submake_extras) -C $(BPFDIR) \ EXTRA_CFLAGS='-g -O0' ARCH= CROSS_COMPILE= \ - OUTPUT=$(HOST_BUILD_DIR)/libbpf/ CC=$(HOSTCC) LD=$(HOSTLD) \ + OUTPUT=$(HOST_BUILD_DIR)/libbpf/ \ + CC="$(HOSTCC)" LD="$(HOSTLD)" \ DESTDIR=$(HOST_SCRATCH_DIR)/ prefix= all install_headers endif @@ -298,7 +325,7 @@ $(RESOLVE_BTFIDS): $(HOST_BPFOBJ) | $(HOST_BUILD_DIR)/resolve_btfids \ $(TOOLSDIR)/lib/ctype.c \ $(TOOLSDIR)/lib/str_error_r.c $(Q)$(MAKE) $(submake_extras) -C $(TOOLSDIR)/bpf/resolve_btfids \ - CC=$(HOSTCC) LD=$(HOSTLD) AR=$(HOSTAR) \ + CC="$(HOSTCC)" LD="$(HOSTLD)" AR="$(HOSTAR)" \ LIBBPF_INCLUDE=$(HOST_INCLUDE_DIR) \ OUTPUT=$(HOST_BUILD_DIR)/resolve_btfids/ BPFOBJ=$(HOST_BPFOBJ) @@ -309,9 +336,9 @@ $(RESOLVE_BTFIDS): $(HOST_BPFOBJ) | $(HOST_BUILD_DIR)/resolve_btfids \ # Use '-idirafter': Don't interfere with include mechanics except where the # build would have failed anyways. define get_sys_includes -$(shell $(1) -v -E - </dev/null 2>&1 \ +$(shell $(1) $(2) -v -E - </dev/null 2>&1 \ | sed -n '/<...> search starts here:/,/End of search list./{ s| \(/.*\)|-idirafter \1|p }') \ -$(shell $(1) -dM -E - </dev/null | grep '__riscv_xlen ' | awk '{printf("-D__riscv_xlen=%d -D__BITS_PER_LONG=%d", $$3, $$3)}') +$(shell $(1) $(2) -dM -E - </dev/null | grep '__riscv_xlen ' | awk '{printf("-D__riscv_xlen=%d -D__BITS_PER_LONG=%d", $$3, $$3)}') endef # Determine target endianness. @@ -319,7 +346,11 @@ IS_LITTLE_ENDIAN = $(shell $(CC) -dM -E - </dev/null | \ grep 'define __BYTE_ORDER__ __ORDER_LITTLE_ENDIAN__') MENDIAN=$(if $(IS_LITTLE_ENDIAN),-mlittle-endian,-mbig-endian) -CLANG_SYS_INCLUDES = $(call get_sys_includes,$(CLANG)) +ifneq ($(CROSS_COMPILE),) +CLANG_TARGET_ARCH = --target=$(notdir $(CROSS_COMPILE:%-=%)) +endif + +CLANG_SYS_INCLUDES = $(call get_sys_includes,$(CLANG),$(CLANG_TARGET_ARCH)) BPF_CFLAGS = -g -Werror -D__TARGET_ARCH_$(SRCARCH) $(MENDIAN) \ -I$(INCLUDE_DIR) -I$(CURDIR) -I$(APIDIR) \ -I$(abspath $(OUTPUT)/../usr/include) @@ -359,9 +390,11 @@ LINKED_SKELS := test_static_linked.skel.h linked_funcs.skel.h \ test_subskeleton.skel.h test_subskeleton_lib.skel.h \ test_usdt.skel.h -LSKELS := fentry_test.c fexit_test.c fexit_sleep.c \ - test_ringbuf.c atomics.c trace_printk.c trace_vprintk.c \ - map_ptr_kern.c core_kern.c core_kern_overflow.c +LSKELS := fentry_test.c fexit_test.c fexit_sleep.c atomics.c \ + trace_printk.c trace_vprintk.c map_ptr_kern.c \ + core_kern.c core_kern_overflow.c test_ringbuf.c \ + test_ringbuf_map_key.c + # Generate both light skeleton and libbpf skeleton for these LSKELS_EXTRA := test_ksyms_module.c test_ksyms_weak.c kfunc_call_test.c \ kfunc_call_test_subprog.c @@ -376,6 +409,9 @@ linked_maps.skel.h-deps := linked_maps1.bpf.o linked_maps2.bpf.o test_subskeleton.skel.h-deps := test_subskeleton_lib2.bpf.o test_subskeleton_lib.bpf.o test_subskeleton.bpf.o test_subskeleton_lib.skel.h-deps := test_subskeleton_lib2.bpf.o test_subskeleton_lib.bpf.o test_usdt.skel.h-deps := test_usdt.bpf.o test_usdt_multispec.bpf.o +xsk_xdp_progs.skel.h-deps := xsk_xdp_progs.bpf.o +xdp_hw_metadata.skel.h-deps := xdp_hw_metadata.bpf.o +xdp_features.skel.h-deps := xdp_features.bpf.o LINKED_BPF_SRCS := $(patsubst %.bpf.o,%.c,$(foreach skel,$(LINKED_SKELS),$($(skel)-deps))) @@ -506,11 +542,13 @@ endif $(OUTPUT)/$(TRUNNER_BINARY): $(TRUNNER_TEST_OBJS) \ $(TRUNNER_EXTRA_OBJS) $$(BPFOBJ) \ $(RESOLVE_BTFIDS) \ + $(TRUNNER_BPFTOOL) \ | $(TRUNNER_BINARY)-extras $$(call msg,BINARY,,$$@) $(Q)$$(CC) $$(CFLAGS) $$(filter %.a %.o,$$^) $$(LDLIBS) -o $$@ $(Q)$(RESOLVE_BTFIDS) --btf $(TRUNNER_OUTPUT)/btf_data.bpf.o $$@ - $(Q)ln -sf $(if $2,..,.)/tools/build/bpftool/bootstrap/bpftool $(if $2,$2/)bpftool + $(Q)ln -sf $(if $2,..,.)/tools/build/bpftool/$(USE_BOOTSTRAP)bpftool \ + $(OUTPUT)/$(if $2,$2/)bpftool endef @@ -520,13 +558,15 @@ TRUNNER_BPF_PROGS_DIR := progs TRUNNER_EXTRA_SOURCES := test_progs.c cgroup_helpers.c trace_helpers.c \ network_helpers.c testing_helpers.c \ btf_helpers.c flow_dissector_load.h \ - cap_helpers.c + cap_helpers.c test_loader.c xsk.c TRUNNER_EXTRA_FILES := $(OUTPUT)/urandom_read $(OUTPUT)/bpf_testmod.ko \ $(OUTPUT)/liburandom_read.so \ $(OUTPUT)/xdp_synproxy \ $(OUTPUT)/sign-file \ - ima_setup.sh verify_sig_setup.sh \ - $(wildcard progs/btf_dump_test_case_*.c) + ima_setup.sh \ + verify_sig_setup.sh \ + $(wildcard progs/btf_dump_test_case_*.c) \ + $(wildcard progs/*.bpf.o) TRUNNER_BPF_BUILD_RULE := CLANG_BPF_BUILD_RULE TRUNNER_BPF_CFLAGS := $(BPF_CFLAGS) $(CLANG_CFLAGS) -DENABLE_ATOMICS_TESTS $(eval $(call DEFINE_TEST_RUNNER,test_progs)) @@ -539,7 +579,7 @@ $(eval $(call DEFINE_TEST_RUNNER,test_progs,no_alu32)) # Define test_progs BPF-GCC-flavored test runner. ifneq ($(BPF_GCC),) TRUNNER_BPF_BUILD_RULE := GCC_BPF_BUILD_RULE -TRUNNER_BPF_CFLAGS := $(BPF_CFLAGS) $(call get_sys_includes,gcc) +TRUNNER_BPF_CFLAGS := $(BPF_CFLAGS) $(call get_sys_includes,gcc,) $(eval $(call DEFINE_TEST_RUNNER,test_progs,bpf_gcc)) endif @@ -567,6 +607,18 @@ $(OUTPUT)/test_verifier: test_verifier.c verifier/tests.h $(BPFOBJ) | $(OUTPUT) $(call msg,BINARY,,$@) $(Q)$(CC) $(CFLAGS) $(filter %.a %.o %.c,$^) $(LDLIBS) -o $@ +$(OUTPUT)/xskxceiver: xskxceiver.c $(OUTPUT)/xsk.o $(OUTPUT)/xsk_xdp_progs.skel.h $(BPFOBJ) | $(OUTPUT) + $(call msg,BINARY,,$@) + $(Q)$(CC) $(CFLAGS) $(filter %.a %.o %.c,$^) $(LDLIBS) -o $@ + +$(OUTPUT)/xdp_hw_metadata: xdp_hw_metadata.c $(OUTPUT)/network_helpers.o $(OUTPUT)/xsk.o $(OUTPUT)/xdp_hw_metadata.skel.h | $(OUTPUT) + $(call msg,BINARY,,$@) + $(Q)$(CC) $(CFLAGS) $(filter %.a %.o %.c,$^) $(LDLIBS) -o $@ + +$(OUTPUT)/xdp_features: xdp_features.c $(OUTPUT)/network_helpers.o $(OUTPUT)/xdp_features.skel.h | $(OUTPUT) + $(call msg,BINARY,,$@) + $(Q)$(CC) $(CFLAGS) $(filter %.a %.o %.c,$^) $(LDLIBS) -o $@ + # Make sure we are able to include and link libbpf against c++. $(OUTPUT)/test_cpp: test_cpp.cpp $(OUTPUT)/test_core_extern.skel.h $(BPFOBJ) $(call msg,CXX,,$@) @@ -586,6 +638,7 @@ $(OUTPUT)/bench_strncmp.o: $(OUTPUT)/strncmp_bench.skel.h $(OUTPUT)/bench_bpf_hashmap_full_update.o: $(OUTPUT)/bpf_hashmap_full_update_bench.skel.h $(OUTPUT)/bench_local_storage.o: $(OUTPUT)/local_storage_bench.skel.h $(OUTPUT)/bench_local_storage_rcu_tasks_trace.o: $(OUTPUT)/local_storage_rcu_tasks_trace_bench.skel.h +$(OUTPUT)/bench_bpf_hashmap_lookup.o: $(OUTPUT)/bpf_hashmap_lookup.skel.h $(OUTPUT)/bench.o: bench.h testing_helpers.h $(BPFOBJ) $(OUTPUT)/bench: LDLIBS += -lm $(OUTPUT)/bench: $(OUTPUT)/bench.o \ @@ -600,7 +653,9 @@ $(OUTPUT)/bench: $(OUTPUT)/bench.o \ $(OUTPUT)/bench_strncmp.o \ $(OUTPUT)/bench_bpf_hashmap_full_update.o \ $(OUTPUT)/bench_local_storage.o \ - $(OUTPUT)/bench_local_storage_rcu_tasks_trace.o + $(OUTPUT)/bench_local_storage_rcu_tasks_trace.o \ + $(OUTPUT)/bench_bpf_hashmap_lookup.o \ + # $(call msg,BINARY,,$@) $(Q)$(CC) $(CFLAGS) $(LDFLAGS) $(filter %.a %.o,$^) $(LDLIBS) -o $@ @@ -617,3 +672,6 @@ EXTRA_CLEAN := $(TEST_CUSTOM_PROGS) $(SCRATCH_DIR) $(HOST_SCRATCH_DIR) \ liburandom_read.so) .PHONY: docs docs-clean + +# Delete partially updated (corrupted) files on error +.DELETE_ON_ERROR: diff --git a/tools/testing/selftests/bpf/README.rst b/tools/testing/selftests/bpf/README.rst index d3c6b3da0bb1..cb9b95702ac6 100644 --- a/tools/testing/selftests/bpf/README.rst +++ b/tools/testing/selftests/bpf/README.rst @@ -6,18 +6,59 @@ General instructions on running selftests can be found in __ /Documentation/bpf/bpf_devel_QA.rst#q-how-to-run-bpf-selftests +============= +BPF CI System +============= + +BPF employs a continuous integration (CI) system to check patch submission in an +automated fashion. The system runs selftests for each patch in a series. Results +are propagated to patchwork, where failures are highlighted similar to +violations of other checks (such as additional warnings being emitted or a +``scripts/checkpatch.pl`` reported deficiency): + + https://patchwork.kernel.org/project/netdevbpf/list/?delegate=121173 + +The CI system executes tests on multiple architectures. It uses a kernel +configuration derived from both the generic and architecture specific config +file fragments below ``tools/testing/selftests/bpf/`` (e.g., ``config`` and +``config.x86_64``). + +Denylisting Tests +================= + +It is possible for some architectures to not have support for all BPF features. +In such a case tests in CI may fail. An example of such a shortcoming is BPF +trampoline support on IBM's s390x architecture. For cases like this, an in-tree +deny list file, located at ``tools/testing/selftests/bpf/DENYLIST.<arch>``, can +be used to prevent the test from running on such an architecture. + +In addition to that, the generic ``tools/testing/selftests/bpf/DENYLIST`` is +honored on every architecture running tests. + +These files are organized in three columns. The first column lists the test in +question. This can be the name of a test suite or of an individual test. The +remaining two columns provide additional meta data that helps identify and +classify the entry: column two is a copy and paste of the error being reported +when running the test in the setting in question. The third column, if +available, summarizes the underlying problem. A value of ``trampoline``, for +example, indicates that lack of trampoline support is causing the test to fail. +This last entry helps identify tests that can be re-enabled once such support is +added. + ========================= Running Selftests in a VM ========================= It's now possible to run the selftests using ``tools/testing/selftests/bpf/vmtest.sh``. The script tries to ensure that the tests are run with the same environment as they -would be run post-submit in the CI used by the Maintainers. +would be run post-submit in the CI used by the Maintainers, with the exception +that deny lists are not automatically honored. -This script downloads a suitable Kconfig and VM userspace image from the system used by -the CI. It builds the kernel (without overwriting your existing Kconfig), recompiles the -bpf selftests, runs them (by default ``tools/testing/selftests/bpf/test_progs``) and -saves the resulting output (by default in ``~/.bpf_selftests``). +This script uses the in-tree kernel configuration and downloads a VM userspace +image from the system used by the CI. It builds the kernel (without overwriting +your existing Kconfig), recompiles the bpf selftests, runs them (by default +``tools/testing/selftests/bpf/test_progs``) and saves the resulting output (by +default in ``~/.bpf_selftests``). Script dependencies: - clang (preferably built from sources, https://github.com/llvm/llvm-project); @@ -26,7 +67,7 @@ Script dependencies: - docutils (for ``rst2man``); - libcap-devel. -For more information on about using the script, run: +For more information about using the script, run: .. code-block:: console diff --git a/tools/testing/selftests/bpf/bench.c b/tools/testing/selftests/bpf/bench.c index c1f20a147462..0b2a53bb8460 100644 --- a/tools/testing/selftests/bpf/bench.c +++ b/tools/testing/selftests/bpf/bench.c @@ -16,6 +16,7 @@ struct env env = { .warmup_sec = 1, .duration_sec = 5, .affinity = false, + .quiet = false, .consumer_cnt = 1, .producer_cnt = 1, }; @@ -262,6 +263,7 @@ static const struct argp_option opts[] = { { "consumers", 'c', "NUM", 0, "Number of consumer threads"}, { "verbose", 'v', NULL, 0, "Verbose debug output"}, { "affinity", 'a', NULL, 0, "Set consumer/producer thread affinity"}, + { "quiet", 'q', NULL, 0, "Be more quiet"}, { "prod-affinity", ARG_PROD_AFFINITY_SET, "CPUSET", 0, "Set of CPUs for producer threads; implies --affinity"}, { "cons-affinity", ARG_CONS_AFFINITY_SET, "CPUSET", 0, @@ -275,6 +277,7 @@ extern struct argp bench_bpf_loop_argp; extern struct argp bench_local_storage_argp; extern struct argp bench_local_storage_rcu_tasks_trace_argp; extern struct argp bench_strncmp_argp; +extern struct argp bench_hashmap_lookup_argp; static const struct argp_child bench_parsers[] = { { &bench_ringbufs_argp, 0, "Ring buffers benchmark", 0 }, @@ -284,13 +287,15 @@ static const struct argp_child bench_parsers[] = { { &bench_strncmp_argp, 0, "bpf_strncmp helper benchmark", 0 }, { &bench_local_storage_rcu_tasks_trace_argp, 0, "local_storage RCU Tasks Trace slowdown benchmark", 0 }, + { &bench_hashmap_lookup_argp, 0, "Hashmap lookup benchmark", 0 }, {}, }; +/* Make pos_args global, so that we can run argp_parse twice, if necessary */ +static int pos_args; + static error_t parse_arg(int key, char *arg, struct argp_state *state) { - static int pos_args; - switch (key) { case 'v': env.verbose = true; @@ -329,6 +334,9 @@ static error_t parse_arg(int key, char *arg, struct argp_state *state) case 'a': env.affinity = true; break; + case 'q': + env.quiet = true; + break; case ARG_PROD_AFFINITY_SET: env.affinity = true; if (parse_num_list(arg, &env.prod_cpus.cpus, @@ -359,7 +367,7 @@ static error_t parse_arg(int key, char *arg, struct argp_state *state) return 0; } -static void parse_cmdline_args(int argc, char **argv) +static void parse_cmdline_args_init(int argc, char **argv) { static const struct argp argp = { .options = opts, @@ -369,9 +377,25 @@ static void parse_cmdline_args(int argc, char **argv) }; if (argp_parse(&argp, argc, argv, 0, NULL, NULL)) exit(1); - if (!env.list && !env.bench_name) { - argp_help(&argp, stderr, ARGP_HELP_DOC, "bench"); - exit(1); +} + +static void parse_cmdline_args_final(int argc, char **argv) +{ + struct argp_child bench_parsers[2] = {}; + const struct argp argp = { + .options = opts, + .parser = parse_arg, + .doc = argp_program_doc, + .children = bench_parsers, + }; + + /* Parse arguments the second time with the correct set of parsers */ + if (bench->argp) { + bench_parsers[0].argp = bench->argp; + bench_parsers[0].header = bench->name; + pos_args = 0; + if (argp_parse(&argp, argc, argv, 0, NULL, NULL)) + exit(1); } } @@ -490,6 +514,7 @@ extern const struct bench bench_local_storage_cache_seq_get; extern const struct bench bench_local_storage_cache_interleaved_get; extern const struct bench bench_local_storage_cache_hashmap_control; extern const struct bench bench_local_storage_tasks_trace; +extern const struct bench bench_bpf_hashmap_lookup; static const struct bench *benchs[] = { &bench_count_global, @@ -529,17 +554,17 @@ static const struct bench *benchs[] = { &bench_local_storage_cache_interleaved_get, &bench_local_storage_cache_hashmap_control, &bench_local_storage_tasks_trace, + &bench_bpf_hashmap_lookup, }; -static void setup_benchmark() +static void find_benchmark(void) { - int i, err; + int i; if (!env.bench_name) { fprintf(stderr, "benchmark name is not specified\n"); exit(1); } - for (i = 0; i < ARRAY_SIZE(benchs); i++) { if (strcmp(benchs[i]->name, env.bench_name) == 0) { bench = benchs[i]; @@ -550,8 +575,14 @@ static void setup_benchmark() fprintf(stderr, "benchmark '%s' not found\n", env.bench_name); exit(1); } +} - printf("Setting up benchmark '%s'...\n", bench->name); +static void setup_benchmark(void) +{ + int i, err; + + if (!env.quiet) + printf("Setting up benchmark '%s'...\n", bench->name); state.producers = calloc(env.producer_cnt, sizeof(*state.producers)); state.consumers = calloc(env.consumer_cnt, sizeof(*state.consumers)); @@ -597,7 +628,8 @@ static void setup_benchmark() next_cpu(&env.prod_cpus)); } - printf("Benchmark '%s' started.\n", bench->name); + if (!env.quiet) + printf("Benchmark '%s' started.\n", bench->name); } static pthread_mutex_t bench_done_mtx = PTHREAD_MUTEX_INITIALIZER; @@ -621,7 +653,7 @@ static void collect_measurements(long delta_ns) { int main(int argc, char **argv) { - parse_cmdline_args(argc, argv); + parse_cmdline_args_init(argc, argv); if (env.list) { int i; @@ -633,6 +665,9 @@ int main(int argc, char **argv) return 0; } + find_benchmark(); + parse_cmdline_args_final(argc, argv); + setup_benchmark(); setup_timer(); diff --git a/tools/testing/selftests/bpf/bench.h b/tools/testing/selftests/bpf/bench.h index d748255877e2..402729c6a3ac 100644 --- a/tools/testing/selftests/bpf/bench.h +++ b/tools/testing/selftests/bpf/bench.h @@ -24,6 +24,7 @@ struct env { bool verbose; bool list; bool affinity; + bool quiet; int consumer_cnt; int producer_cnt; struct cpu_set prod_cpus; @@ -47,6 +48,7 @@ struct bench_res { struct bench { const char *name; + const struct argp *argp; void (*validate)(void); void (*setup)(void); void *(*producer_thread)(void *ctx); diff --git a/tools/testing/selftests/bpf/benchs/bench_bloom_filter_map.c b/tools/testing/selftests/bpf/benchs/bench_bloom_filter_map.c index 5bcb8a8cdeb2..7c8ccc108313 100644 --- a/tools/testing/selftests/bpf/benchs/bench_bloom_filter_map.c +++ b/tools/testing/selftests/bpf/benchs/bench_bloom_filter_map.c @@ -428,6 +428,7 @@ static void *consumer(void *input) const struct bench bench_bloom_lookup = { .name = "bloom-lookup", + .argp = &bench_bloom_map_argp, .validate = validate, .setup = bloom_lookup_setup, .producer_thread = producer, @@ -439,6 +440,7 @@ const struct bench bench_bloom_lookup = { const struct bench bench_bloom_update = { .name = "bloom-update", + .argp = &bench_bloom_map_argp, .validate = validate, .setup = bloom_update_setup, .producer_thread = producer, @@ -450,6 +452,7 @@ const struct bench bench_bloom_update = { const struct bench bench_bloom_false_positive = { .name = "bloom-false-positive", + .argp = &bench_bloom_map_argp, .validate = validate, .setup = false_positive_setup, .producer_thread = producer, @@ -461,6 +464,7 @@ const struct bench bench_bloom_false_positive = { const struct bench bench_hashmap_without_bloom = { .name = "hashmap-without-bloom", + .argp = &bench_bloom_map_argp, .validate = validate, .setup = hashmap_no_bloom_setup, .producer_thread = producer, @@ -472,6 +476,7 @@ const struct bench bench_hashmap_without_bloom = { const struct bench bench_hashmap_with_bloom = { .name = "hashmap-with-bloom", + .argp = &bench_bloom_map_argp, .validate = validate, .setup = hashmap_with_bloom_setup, .producer_thread = producer, diff --git a/tools/testing/selftests/bpf/benchs/bench_bpf_hashmap_full_update.c b/tools/testing/selftests/bpf/benchs/bench_bpf_hashmap_full_update.c index cec51e0ff4b8..75abe8137b6c 100644 --- a/tools/testing/selftests/bpf/benchs/bench_bpf_hashmap_full_update.c +++ b/tools/testing/selftests/bpf/benchs/bench_bpf_hashmap_full_update.c @@ -1,7 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 /* Copyright (c) 2022 Bytedance */ -#include <argp.h> #include "bench.h" #include "bpf_hashmap_full_update_bench.skel.h" #include "bpf_util.h" @@ -68,7 +67,7 @@ static void setup(void) bpf_map_update_elem(map_fd, &i, &i, BPF_ANY); } -void hashmap_report_final(struct bench_res res[], int res_cnt) +static void hashmap_report_final(struct bench_res res[], int res_cnt) { unsigned int nr_cpus = bpf_num_possible_cpus(); int i; @@ -85,7 +84,7 @@ void hashmap_report_final(struct bench_res res[], int res_cnt) } const struct bench bench_bpf_hashmap_full_update = { - .name = "bpf-hashmap-ful-update", + .name = "bpf-hashmap-full-update", .validate = validate, .setup = setup, .producer_thread = producer, diff --git a/tools/testing/selftests/bpf/benchs/bench_bpf_hashmap_lookup.c b/tools/testing/selftests/bpf/benchs/bench_bpf_hashmap_lookup.c new file mode 100644 index 000000000000..8dbb02f75cff --- /dev/null +++ b/tools/testing/selftests/bpf/benchs/bench_bpf_hashmap_lookup.c @@ -0,0 +1,283 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2023 Isovalent */ + +#include <sys/random.h> +#include <argp.h> +#include "bench.h" +#include "bpf_hashmap_lookup.skel.h" +#include "bpf_util.h" + +/* BPF triggering benchmarks */ +static struct ctx { + struct bpf_hashmap_lookup *skel; +} ctx; + +/* only available to kernel, so define it here */ +#define BPF_MAX_LOOPS (1<<23) + +#define MAX_KEY_SIZE 1024 /* the size of the key map */ + +static struct { + __u32 key_size; + __u32 map_flags; + __u32 max_entries; + __u32 nr_entries; + __u32 nr_loops; +} args = { + .key_size = 4, + .map_flags = 0, + .max_entries = 1000, + .nr_entries = 500, + .nr_loops = 1000000, +}; + +enum { + ARG_KEY_SIZE = 8001, + ARG_MAP_FLAGS, + ARG_MAX_ENTRIES, + ARG_NR_ENTRIES, + ARG_NR_LOOPS, +}; + +static const struct argp_option opts[] = { + { "key_size", ARG_KEY_SIZE, "KEY_SIZE", 0, + "The hashmap key size (max 1024)"}, + { "map_flags", ARG_MAP_FLAGS, "MAP_FLAGS", 0, + "The hashmap flags passed to BPF_MAP_CREATE"}, + { "max_entries", ARG_MAX_ENTRIES, "MAX_ENTRIES", 0, + "The hashmap max entries"}, + { "nr_entries", ARG_NR_ENTRIES, "NR_ENTRIES", 0, + "The number of entries to insert/lookup"}, + { "nr_loops", ARG_NR_LOOPS, "NR_LOOPS", 0, + "The number of loops for the benchmark"}, + {}, +}; + +static error_t parse_arg(int key, char *arg, struct argp_state *state) +{ + long ret; + + switch (key) { + case ARG_KEY_SIZE: + ret = strtol(arg, NULL, 10); + if (ret < 1 || ret > MAX_KEY_SIZE) { + fprintf(stderr, "invalid key_size"); + argp_usage(state); + } + args.key_size = ret; + break; + case ARG_MAP_FLAGS: + ret = strtol(arg, NULL, 0); + if (ret < 0 || ret > UINT_MAX) { + fprintf(stderr, "invalid map_flags"); + argp_usage(state); + } + args.map_flags = ret; + break; + case ARG_MAX_ENTRIES: + ret = strtol(arg, NULL, 10); + if (ret < 1 || ret > UINT_MAX) { + fprintf(stderr, "invalid max_entries"); + argp_usage(state); + } + args.max_entries = ret; + break; + case ARG_NR_ENTRIES: + ret = strtol(arg, NULL, 10); + if (ret < 1 || ret > UINT_MAX) { + fprintf(stderr, "invalid nr_entries"); + argp_usage(state); + } + args.nr_entries = ret; + break; + case ARG_NR_LOOPS: + ret = strtol(arg, NULL, 10); + if (ret < 1 || ret > BPF_MAX_LOOPS) { + fprintf(stderr, "invalid nr_loops: %ld (min=1 max=%u)\n", + ret, BPF_MAX_LOOPS); + argp_usage(state); + } + args.nr_loops = ret; + break; + default: + return ARGP_ERR_UNKNOWN; + } + + return 0; +} + +const struct argp bench_hashmap_lookup_argp = { + .options = opts, + .parser = parse_arg, +}; + +static void validate(void) +{ + if (env.consumer_cnt != 1) { + fprintf(stderr, "benchmark doesn't support multi-consumer!\n"); + exit(1); + } + + if (args.nr_entries > args.max_entries) { + fprintf(stderr, "args.nr_entries is too big! (max %u, got %u)\n", + args.max_entries, args.nr_entries); + exit(1); + } +} + +static void *producer(void *input) +{ + while (true) { + /* trigger the bpf program */ + syscall(__NR_getpgid); + } + return NULL; +} + +static void *consumer(void *input) +{ + return NULL; +} + +static void measure(struct bench_res *res) +{ +} + +static inline void patch_key(u32 i, u32 *key) +{ +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ + *key = i + 1; +#else + *key = __builtin_bswap32(i + 1); +#endif + /* the rest of key is random */ +} + +static void setup(void) +{ + struct bpf_link *link; + int map_fd; + int ret; + int i; + + setup_libbpf(); + + ctx.skel = bpf_hashmap_lookup__open(); + if (!ctx.skel) { + fprintf(stderr, "failed to open skeleton\n"); + exit(1); + } + + bpf_map__set_max_entries(ctx.skel->maps.hash_map_bench, args.max_entries); + bpf_map__set_key_size(ctx.skel->maps.hash_map_bench, args.key_size); + bpf_map__set_value_size(ctx.skel->maps.hash_map_bench, 8); + bpf_map__set_map_flags(ctx.skel->maps.hash_map_bench, args.map_flags); + + ctx.skel->bss->nr_entries = args.nr_entries; + ctx.skel->bss->nr_loops = args.nr_loops / args.nr_entries; + + if (args.key_size > 4) { + for (i = 1; i < args.key_size/4; i++) + ctx.skel->bss->key[i] = 2654435761 * i; + } + + ret = bpf_hashmap_lookup__load(ctx.skel); + if (ret) { + bpf_hashmap_lookup__destroy(ctx.skel); + fprintf(stderr, "failed to load map: %s", strerror(-ret)); + exit(1); + } + + /* fill in the hash_map */ + map_fd = bpf_map__fd(ctx.skel->maps.hash_map_bench); + for (u64 i = 0; i < args.nr_entries; i++) { + patch_key(i, ctx.skel->bss->key); + bpf_map_update_elem(map_fd, ctx.skel->bss->key, &i, BPF_ANY); + } + + link = bpf_program__attach(ctx.skel->progs.benchmark); + if (!link) { + fprintf(stderr, "failed to attach program!\n"); + exit(1); + } +} + +static inline double events_from_time(u64 time) +{ + if (time) + return args.nr_loops * 1000000000llu / time / 1000000.0L; + + return 0; +} + +static int compute_events(u64 *times, double *events_mean, double *events_stddev, u64 *mean_time) +{ + int i, n = 0; + + *events_mean = 0; + *events_stddev = 0; + *mean_time = 0; + + for (i = 0; i < 32; i++) { + if (!times[i]) + break; + *mean_time += times[i]; + *events_mean += events_from_time(times[i]); + n += 1; + } + if (!n) + return 0; + + *mean_time /= n; + *events_mean /= n; + + if (n > 1) { + for (i = 0; i < n; i++) { + double events_i = *events_mean - events_from_time(times[i]); + *events_stddev += events_i * events_i / (n - 1); + } + *events_stddev = sqrt(*events_stddev); + } + + return n; +} + +static void hashmap_report_final(struct bench_res res[], int res_cnt) +{ + unsigned int nr_cpus = bpf_num_possible_cpus(); + double events_mean, events_stddev; + u64 mean_time; + int i, n; + + for (i = 0; i < nr_cpus; i++) { + n = compute_events(ctx.skel->bss->percpu_times[i], &events_mean, + &events_stddev, &mean_time); + if (n == 0) + continue; + + if (env.quiet) { + /* we expect only one cpu to be present */ + if (env.affinity) + printf("%.3lf\n", events_mean); + else + printf("cpu%02d %.3lf\n", i, events_mean); + } else { + printf("cpu%02d: lookup %.3lfM ± %.3lfM events/sec" + " (approximated from %d samples of ~%lums)\n", + i, events_mean, 2*events_stddev, + n, mean_time / 1000000); + } + } +} + +const struct bench bench_bpf_hashmap_lookup = { + .name = "bpf-hashmap-lookup", + .argp = &bench_hashmap_lookup_argp, + .validate = validate, + .setup = setup, + .producer_thread = producer, + .consumer_thread = consumer, + .measure = measure, + .report_progress = NULL, + .report_final = hashmap_report_final, +}; diff --git a/tools/testing/selftests/bpf/benchs/bench_bpf_loop.c b/tools/testing/selftests/bpf/benchs/bench_bpf_loop.c index d0a6572bfab6..d8a0394e10b1 100644 --- a/tools/testing/selftests/bpf/benchs/bench_bpf_loop.c +++ b/tools/testing/selftests/bpf/benchs/bench_bpf_loop.c @@ -95,6 +95,7 @@ static void setup(void) const struct bench bench_bpf_loop = { .name = "bpf-loop", + .argp = &bench_bpf_loop_argp, .validate = validate, .setup = setup, .producer_thread = producer, diff --git a/tools/testing/selftests/bpf/benchs/bench_local_storage.c b/tools/testing/selftests/bpf/benchs/bench_local_storage.c index 5a378c84e81f..d4b2817306d4 100644 --- a/tools/testing/selftests/bpf/benchs/bench_local_storage.c +++ b/tools/testing/selftests/bpf/benchs/bench_local_storage.c @@ -255,6 +255,7 @@ static void *producer(void *input) */ const struct bench bench_local_storage_cache_seq_get = { .name = "local-storage-cache-seq-get", + .argp = &bench_local_storage_argp, .validate = validate, .setup = local_storage_cache_get_setup, .producer_thread = producer, @@ -266,6 +267,7 @@ const struct bench bench_local_storage_cache_seq_get = { const struct bench bench_local_storage_cache_interleaved_get = { .name = "local-storage-cache-int-get", + .argp = &bench_local_storage_argp, .validate = validate, .setup = local_storage_cache_get_interleaved_setup, .producer_thread = producer, @@ -277,6 +279,7 @@ const struct bench bench_local_storage_cache_interleaved_get = { const struct bench bench_local_storage_cache_hashmap_control = { .name = "local-storage-cache-hashmap-control", + .argp = &bench_local_storage_argp, .validate = validate, .setup = hashmap_setup, .producer_thread = producer, diff --git a/tools/testing/selftests/bpf/benchs/bench_local_storage_rcu_tasks_trace.c b/tools/testing/selftests/bpf/benchs/bench_local_storage_rcu_tasks_trace.c index 43f109d93130..d5eb5587f2aa 100644 --- a/tools/testing/selftests/bpf/benchs/bench_local_storage_rcu_tasks_trace.c +++ b/tools/testing/selftests/bpf/benchs/bench_local_storage_rcu_tasks_trace.c @@ -12,17 +12,14 @@ static struct { __u32 nr_procs; __u32 kthread_pid; - bool quiet; } args = { .nr_procs = 1000, .kthread_pid = 0, - .quiet = false, }; enum { ARG_NR_PROCS = 7000, ARG_KTHREAD_PID = 7001, - ARG_QUIET = 7002, }; static const struct argp_option opts[] = { @@ -30,8 +27,6 @@ static const struct argp_option opts[] = { "Set number of user processes to spin up"}, { "kthread_pid", ARG_KTHREAD_PID, "PID", 0, "Pid of rcu_tasks_trace kthread for ticks tracking"}, - { "quiet", ARG_QUIET, "{0,1}", 0, - "If true, don't report progress"}, {}, }; @@ -56,14 +51,6 @@ static error_t parse_arg(int key, char *arg, struct argp_state *state) } args.kthread_pid = ret; break; - case ARG_QUIET: - ret = strtol(arg, NULL, 10); - if (ret < 0 || ret > 1) { - fprintf(stderr, "invalid quiet %ld\n", ret); - argp_usage(state); - } - args.quiet = ret; - break; break; default: return ARGP_ERR_UNKNOWN; @@ -230,7 +217,7 @@ static void report_progress(int iter, struct bench_res *res, long delta_ns) exit(1); } - if (args.quiet) + if (env.quiet) return; printf("Iter %d\t avg tasks_trace grace period latency\t%lf ns\n", @@ -271,6 +258,7 @@ static void report_final(struct bench_res res[], int res_cnt) */ const struct bench bench_local_storage_tasks_trace = { .name = "local-storage-tasks-trace", + .argp = &bench_local_storage_rcu_tasks_trace_argp, .validate = validate, .setup = local_storage_tasks_trace_setup, .producer_thread = producer, diff --git a/tools/testing/selftests/bpf/benchs/bench_ringbufs.c b/tools/testing/selftests/bpf/benchs/bench_ringbufs.c index c2554f9695ff..fc91fdac4faa 100644 --- a/tools/testing/selftests/bpf/benchs/bench_ringbufs.c +++ b/tools/testing/selftests/bpf/benchs/bench_ringbufs.c @@ -518,6 +518,7 @@ static void *perfbuf_custom_consumer(void *input) const struct bench bench_rb_libbpf = { .name = "rb-libbpf", + .argp = &bench_ringbufs_argp, .validate = bufs_validate, .setup = ringbuf_libbpf_setup, .producer_thread = bufs_sample_producer, @@ -529,6 +530,7 @@ const struct bench bench_rb_libbpf = { const struct bench bench_rb_custom = { .name = "rb-custom", + .argp = &bench_ringbufs_argp, .validate = bufs_validate, .setup = ringbuf_custom_setup, .producer_thread = bufs_sample_producer, @@ -540,6 +542,7 @@ const struct bench bench_rb_custom = { const struct bench bench_pb_libbpf = { .name = "pb-libbpf", + .argp = &bench_ringbufs_argp, .validate = bufs_validate, .setup = perfbuf_libbpf_setup, .producer_thread = bufs_sample_producer, @@ -551,6 +554,7 @@ const struct bench bench_pb_libbpf = { const struct bench bench_pb_custom = { .name = "pb-custom", + .argp = &bench_ringbufs_argp, .validate = bufs_validate, .setup = perfbuf_libbpf_setup, .producer_thread = bufs_sample_producer, diff --git a/tools/testing/selftests/bpf/benchs/bench_strncmp.c b/tools/testing/selftests/bpf/benchs/bench_strncmp.c index 494b591c0289..d3fad2ba6916 100644 --- a/tools/testing/selftests/bpf/benchs/bench_strncmp.c +++ b/tools/testing/selftests/bpf/benchs/bench_strncmp.c @@ -140,6 +140,7 @@ static void strncmp_measure(struct bench_res *res) const struct bench bench_strncmp_no_helper = { .name = "strncmp-no-helper", + .argp = &bench_strncmp_argp, .validate = strncmp_validate, .setup = strncmp_no_helper_setup, .producer_thread = strncmp_producer, @@ -151,6 +152,7 @@ const struct bench bench_strncmp_no_helper = { const struct bench bench_strncmp_helper = { .name = "strncmp-helper", + .argp = &bench_strncmp_argp, .validate = strncmp_validate, .setup = strncmp_helper_setup, .producer_thread = strncmp_producer, diff --git a/tools/testing/selftests/bpf/benchs/run_bench_bpf_hashmap_full_update.sh b/tools/testing/selftests/bpf/benchs/run_bench_bpf_hashmap_full_update.sh index 1e2de838f9fa..cd2efd3fdef3 100755 --- a/tools/testing/selftests/bpf/benchs/run_bench_bpf_hashmap_full_update.sh +++ b/tools/testing/selftests/bpf/benchs/run_bench_bpf_hashmap_full_update.sh @@ -6,6 +6,6 @@ source ./benchs/run_common.sh set -eufo pipefail nr_threads=`expr $(cat /proc/cpuinfo | grep "processor"| wc -l) - 1` -summary=$($RUN_BENCH -p $nr_threads bpf-hashmap-ful-update) +summary=$($RUN_BENCH -p $nr_threads bpf-hashmap-full-update) printf "$summary" printf "\n" diff --git a/tools/testing/selftests/bpf/benchs/run_bench_local_storage_rcu_tasks_trace.sh b/tools/testing/selftests/bpf/benchs/run_bench_local_storage_rcu_tasks_trace.sh index 5dac1f02892c..3e8a969f2096 100755 --- a/tools/testing/selftests/bpf/benchs/run_bench_local_storage_rcu_tasks_trace.sh +++ b/tools/testing/selftests/bpf/benchs/run_bench_local_storage_rcu_tasks_trace.sh @@ -8,4 +8,4 @@ if [ -z $kthread_pid ]; then exit 1 fi -./bench --nr_procs 15000 --kthread_pid $kthread_pid -d 600 --quiet 1 local-storage-tasks-trace +./bench --nr_procs 15000 --kthread_pid $kthread_pid -d 600 --quiet local-storage-tasks-trace diff --git a/tools/testing/selftests/bpf/bpf_experimental.h b/tools/testing/selftests/bpf/bpf_experimental.h new file mode 100644 index 000000000000..dbd2c729781a --- /dev/null +++ b/tools/testing/selftests/bpf/bpf_experimental.h @@ -0,0 +1,92 @@ +#ifndef __BPF_EXPERIMENTAL__ +#define __BPF_EXPERIMENTAL__ + +#include <vmlinux.h> +#include <bpf/bpf_tracing.h> +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_core_read.h> + +#define __contains(name, node) __attribute__((btf_decl_tag("contains:" #name ":" #node))) + +/* Description + * Allocates an object of the type represented by 'local_type_id' in + * program BTF. User may use the bpf_core_type_id_local macro to pass the + * type ID of a struct in program BTF. + * + * The 'local_type_id' parameter must be a known constant. + * The 'meta' parameter is a hidden argument that is ignored. + * Returns + * A pointer to an object of the type corresponding to the passed in + * 'local_type_id', or NULL on failure. + */ +extern void *bpf_obj_new_impl(__u64 local_type_id, void *meta) __ksym; + +/* Convenience macro to wrap over bpf_obj_new_impl */ +#define bpf_obj_new(type) ((type *)bpf_obj_new_impl(bpf_core_type_id_local(type), NULL)) + +/* Description + * Free an allocated object. All fields of the object that require + * destruction will be destructed before the storage is freed. + * + * The 'meta' parameter is a hidden argument that is ignored. + * Returns + * Void. + */ +extern void bpf_obj_drop_impl(void *kptr, void *meta) __ksym; + +/* Convenience macro to wrap over bpf_obj_drop_impl */ +#define bpf_obj_drop(kptr) bpf_obj_drop_impl(kptr, NULL) + +/* Description + * Add a new entry to the beginning of the BPF linked list. + * Returns + * Void. + */ +extern void bpf_list_push_front(struct bpf_list_head *head, struct bpf_list_node *node) __ksym; + +/* Description + * Add a new entry to the end of the BPF linked list. + * Returns + * Void. + */ +extern void bpf_list_push_back(struct bpf_list_head *head, struct bpf_list_node *node) __ksym; + +/* Description + * Remove the entry at the beginning of the BPF linked list. + * Returns + * Pointer to bpf_list_node of deleted entry, or NULL if list is empty. + */ +extern struct bpf_list_node *bpf_list_pop_front(struct bpf_list_head *head) __ksym; + +/* Description + * Remove the entry at the end of the BPF linked list. + * Returns + * Pointer to bpf_list_node of deleted entry, or NULL if list is empty. + */ +extern struct bpf_list_node *bpf_list_pop_back(struct bpf_list_head *head) __ksym; + +/* Description + * Remove 'node' from rbtree with root 'root' + * Returns + * Pointer to the removed node, or NULL if 'root' didn't contain 'node' + */ +extern struct bpf_rb_node *bpf_rbtree_remove(struct bpf_rb_root *root, + struct bpf_rb_node *node) __ksym; + +/* Description + * Add 'node' to rbtree with root 'root' using comparator 'less' + * Returns + * Nothing + */ +extern void bpf_rbtree_add(struct bpf_rb_root *root, struct bpf_rb_node *node, + bool (less)(struct bpf_rb_node *a, const struct bpf_rb_node *b)) __ksym; + +/* Description + * Return the first (leftmost) node in input tree + * Returns + * Pointer to the node, which is _not_ removed from the tree. If the tree + * contains no nodes, returns NULL. + */ +extern struct bpf_rb_node *bpf_rbtree_first(struct bpf_rb_root *root) __ksym; + +#endif diff --git a/tools/testing/selftests/bpf/bpf_legacy.h b/tools/testing/selftests/bpf/bpf_legacy.h index 845209581440..bc4555a003a7 100644 --- a/tools/testing/selftests/bpf/bpf_legacy.h +++ b/tools/testing/selftests/bpf/bpf_legacy.h @@ -2,15 +2,22 @@ #ifndef __BPF_LEGACY__ #define __BPF_LEGACY__ +#if __GNUC__ && !__clang__ +/* Functions to emit BPF_LD_ABS and BPF_LD_IND instructions. We + * provide the "standard" names as synonyms of the corresponding GCC + * builtins. Note how the SKB argument is ignored. + */ +#define load_byte(skb, off) __builtin_bpf_load_byte(off) +#define load_half(skb, off) __builtin_bpf_load_half(off) +#define load_word(skb, off) __builtin_bpf_load_word(off) +#else /* llvm builtin functions that eBPF C program may use to * emit BPF_LD_ABS and BPF_LD_IND instructions */ -unsigned long long load_byte(void *skb, - unsigned long long off) asm("llvm.bpf.load.byte"); -unsigned long long load_half(void *skb, - unsigned long long off) asm("llvm.bpf.load.half"); -unsigned long long load_word(void *skb, - unsigned long long off) asm("llvm.bpf.load.word"); +unsigned long long load_byte(void *skb, unsigned long long off) asm("llvm.bpf.load.byte"); +unsigned long long load_half(void *skb, unsigned long long off) asm("llvm.bpf.load.half"); +unsigned long long load_word(void *skb, unsigned long long off) asm("llvm.bpf.load.word"); +#endif #endif diff --git a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c index a6021d6117b5..46500636d8cd 100644 --- a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c +++ b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c @@ -59,7 +59,7 @@ bpf_testmod_test_struct_arg_5(void) { return bpf_testmod_test_struct_arg_result; } -noinline void +__bpf_kfunc void bpf_testmod_test_mod_kfunc(int i) { *(int *)this_cpu_ptr(&bpf_testmod_ksym_percpu) = i; @@ -128,6 +128,23 @@ __weak noinline struct file *bpf_testmod_return_ptr(int arg) } } +noinline int bpf_testmod_fentry_test1(int a) +{ + return a + 1; +} + +noinline int bpf_testmod_fentry_test2(int a, u64 b) +{ + return a + b; +} + +noinline int bpf_testmod_fentry_test3(char a, int b, u64 c) +{ + return a + b + c; +} + +int bpf_testmod_fentry_ok; + noinline ssize_t bpf_testmod_test_read(struct file *file, struct kobject *kobj, struct bin_attribute *bin_attr, @@ -167,6 +184,13 @@ bpf_testmod_test_read(struct file *file, struct kobject *kobj, return snprintf(buf, len, "%d\n", writable.val); } + if (bpf_testmod_fentry_test1(1) != 2 || + bpf_testmod_fentry_test2(2, 3) != 5 || + bpf_testmod_fentry_test3(4, 5, 6) != 15) + goto out; + + bpf_testmod_fentry_ok = 1; +out: return -EIO; /* always fail */ } EXPORT_SYMBOL(bpf_testmod_test_read); diff --git a/tools/testing/selftests/bpf/bpf_util.h b/tools/testing/selftests/bpf/bpf_util.h index a3352a64c067..10587a29b967 100644 --- a/tools/testing/selftests/bpf/bpf_util.h +++ b/tools/testing/selftests/bpf/bpf_util.h @@ -20,6 +20,25 @@ static inline unsigned int bpf_num_possible_cpus(void) return possible_cpus; } +/* Copy up to sz - 1 bytes from zero-terminated src string and ensure that dst + * is zero-terminated string no matter what (unless sz == 0, in which case + * it's a no-op). It's conceptually close to FreeBSD's strlcpy(), but differs + * in what is returned. Given this is internal helper, it's trivial to extend + * this, when necessary. Use this instead of strncpy inside libbpf source code. + */ +static inline void bpf_strlcpy(char *dst, const char *src, size_t sz) +{ + size_t i; + + if (sz == 0) + return; + + sz--; + for (i = 0; i < sz && src[i]; i++) + dst[i] = src[i]; + dst[i] = '\0'; +} + #define __bpf_percpu_val_align __attribute__((__aligned__(8))) #define BPF_DECLARE_PERCPU(type, name) \ diff --git a/tools/testing/selftests/bpf/cgroup_helpers.c b/tools/testing/selftests/bpf/cgroup_helpers.c index e914cc45b766..9e95b37a7dff 100644 --- a/tools/testing/selftests/bpf/cgroup_helpers.c +++ b/tools/testing/selftests/bpf/cgroup_helpers.c @@ -13,6 +13,7 @@ #include <ftw.h> #include "cgroup_helpers.h" +#include "bpf_util.h" /* * To avoid relying on the system setup, when setup_cgroup_env is called @@ -77,7 +78,7 @@ static int __enable_controllers(const char *cgroup_path, const char *controllers enable[len] = 0; close(fd); } else { - strncpy(enable, controllers, sizeof(enable)); + bpf_strlcpy(enable, controllers, sizeof(enable)); } snprintf(path, sizeof(path), "%s/cgroup.subtree_control", cgroup_path); @@ -332,6 +333,25 @@ int get_root_cgroup(void) return fd; } +/* + * remove_cgroup() - Remove a cgroup + * @relative_path: The cgroup path, relative to the workdir, to remove + * + * This function expects a cgroup to already be created, relative to the cgroup + * work dir. It also expects the cgroup doesn't have any children or live + * processes and it removes the cgroup. + * + * On failure, it will print an error to stderr. + */ +void remove_cgroup(const char *relative_path) +{ + char cgroup_path[PATH_MAX + 1]; + + format_cgroup_path(cgroup_path, relative_path); + if (rmdir(cgroup_path)) + log_err("rmdiring cgroup %s .. %s", relative_path, cgroup_path); +} + /** * create_and_get_cgroup() - Create a cgroup, relative to workdir, and get the FD * @relative_path: The cgroup path, relative to the workdir, to join diff --git a/tools/testing/selftests/bpf/cgroup_helpers.h b/tools/testing/selftests/bpf/cgroup_helpers.h index 3358734356ab..f099a166c94d 100644 --- a/tools/testing/selftests/bpf/cgroup_helpers.h +++ b/tools/testing/selftests/bpf/cgroup_helpers.h @@ -18,6 +18,7 @@ int write_cgroup_file_parent(const char *relative_path, const char *file, int cgroup_setup_and_join(const char *relative_path); int get_root_cgroup(void); int create_and_get_cgroup(const char *relative_path); +void remove_cgroup(const char *relative_path); unsigned long long get_cgroup_id(const char *relative_path); int join_cgroup(const char *relative_path); diff --git a/tools/testing/selftests/bpf/config b/tools/testing/selftests/bpf/config index 9213565c0311..63cd4ab70171 100644 --- a/tools/testing/selftests/bpf/config +++ b/tools/testing/selftests/bpf/config @@ -1,4 +1,6 @@ CONFIG_BLK_DEV_LOOP=y +CONFIG_BOOTPARAM_HARDLOCKUP_PANIC=y +CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC=y CONFIG_BPF=y CONFIG_BPF_EVENTS=y CONFIG_BPF_JIT=y @@ -6,6 +8,7 @@ CONFIG_BPF_LIRC_MODE2=y CONFIG_BPF_LSM=y CONFIG_BPF_STREAM_PARSER=y CONFIG_BPF_SYSCALL=y +# CONFIG_BPF_UNPRIV_DEFAULT_OFF is not set CONFIG_CGROUP_BPF=y CONFIG_CRYPTO_HMAC=y CONFIG_CRYPTO_SHA256=y @@ -13,6 +16,7 @@ CONFIG_CRYPTO_USER_API_HASH=y CONFIG_DYNAMIC_FTRACE=y CONFIG_FPROBE=y CONFIG_FTRACE_SYSCALLS=y +CONFIG_FUNCTION_ERROR_INJECTION=y CONFIG_FUNCTION_TRACER=y CONFIG_GENEVE=y CONFIG_IKCONFIG=y @@ -20,6 +24,7 @@ CONFIG_IKCONFIG_PROC=y CONFIG_IMA=y CONFIG_IMA_READ_POLICY=y CONFIG_IMA_WRITE_POLICY=y +CONFIG_INET_ESP=y CONFIG_IP_NF_FILTER=y CONFIG_IP_NF_RAW=y CONFIG_IP_NF_TARGET_SYNPROXY=y @@ -67,7 +72,8 @@ CONFIG_NF_NAT=y CONFIG_RC_CORE=y CONFIG_SECURITY=y CONFIG_SECURITYFS=y -CONFIG_TEST_BPF=y +CONFIG_TEST_BPF=m CONFIG_USERFAULTFD=y CONFIG_VXLAN=y CONFIG_XDP_SOCKETS=y +CONFIG_XFRM_INTERFACE=y diff --git a/tools/testing/selftests/bpf/config.aarch64 b/tools/testing/selftests/bpf/config.aarch64 new file mode 100644 index 000000000000..1f0437644186 --- /dev/null +++ b/tools/testing/selftests/bpf/config.aarch64 @@ -0,0 +1,181 @@ +CONFIG_9P_FS=y +CONFIG_ARCH_VEXPRESS=y +CONFIG_ARCH_WANT_DEFAULT_BPF_JIT=y +CONFIG_ARM_SMMU_V3=y +CONFIG_ATA=y +CONFIG_AUDIT=y +CONFIG_BINFMT_MISC=y +CONFIG_BLK_CGROUP=y +CONFIG_BLK_DEV_BSGLIB=y +CONFIG_BLK_DEV_INITRD=y +CONFIG_BLK_DEV_IO_TRACE=y +CONFIG_BLK_DEV_RAM=y +CONFIG_BLK_DEV_SD=y +CONFIG_BONDING=y +CONFIG_BPFILTER=y +CONFIG_BPF_JIT_ALWAYS_ON=y +CONFIG_BPF_JIT_DEFAULT_ON=y +CONFIG_BPF_PRELOAD_UMD=y +CONFIG_BPF_PRELOAD=y +CONFIG_BRIDGE=m +CONFIG_CGROUP_CPUACCT=y +CONFIG_CGROUP_DEVICE=y +CONFIG_CGROUP_FREEZER=y +CONFIG_CGROUP_HUGETLB=y +CONFIG_CGROUP_NET_CLASSID=y +CONFIG_CGROUP_PERF=y +CONFIG_CGROUP_PIDS=y +CONFIG_CGROUP_SCHED=y +CONFIG_CGROUPS=y +CONFIG_CHECKPOINT_RESTORE=y +CONFIG_CHR_DEV_SG=y +CONFIG_COMPAT=y +CONFIG_CPUSETS=y +CONFIG_CRASH_DUMP=y +CONFIG_CRYPTO_USER_API_RNG=y +CONFIG_CRYPTO_USER_API_SKCIPHER=y +CONFIG_DEBUG_ATOMIC_SLEEP=y +CONFIG_DEBUG_INFO_BTF=y +CONFIG_DEBUG_INFO_DWARF4=y +CONFIG_DEBUG_LIST=y +CONFIG_DEBUG_LOCKDEP=y +CONFIG_DEBUG_NOTIFIERS=y +CONFIG_DEBUG_PAGEALLOC=y +CONFIG_DEBUG_SECTION_MISMATCH=y +CONFIG_DEBUG_SG=y +CONFIG_DETECT_HUNG_TASK=y +CONFIG_DEVTMPFS_MOUNT=y +CONFIG_DEVTMPFS=y +CONFIG_DRM_VIRTIO_GPU=y +CONFIG_DRM=y +CONFIG_DUMMY=y +CONFIG_EXPERT=y +CONFIG_EXT4_FS_POSIX_ACL=y +CONFIG_EXT4_FS_SECURITY=y +CONFIG_EXT4_FS=y +CONFIG_FANOTIFY=y +CONFIG_FB=y +CONFIG_FUNCTION_PROFILER=y +CONFIG_FUSE_FS=y +CONFIG_FW_CFG_SYSFS_CMDLINE=y +CONFIG_FW_CFG_SYSFS=y +CONFIG_GDB_SCRIPTS=y +CONFIG_HAVE_EBPF_JIT=y +CONFIG_HAVE_KPROBES_ON_FTRACE=y +CONFIG_HAVE_KPROBES=y +CONFIG_HAVE_KRETPROBES=y +CONFIG_HEADERS_INSTALL=y +CONFIG_HIGH_RES_TIMERS=y +CONFIG_HUGETLBFS=y +CONFIG_HW_RANDOM_VIRTIO=y +CONFIG_HW_RANDOM=y +CONFIG_HZ_100=y +CONFIG_IDLE_PAGE_TRACKING=y +CONFIG_IKHEADERS=y +CONFIG_INET6_ESP=y +CONFIG_INET_ESP=y +CONFIG_INET=y +CONFIG_INPUT_EVDEV=y +CONFIG_IP_ADVANCED_ROUTER=y +CONFIG_IP_MULTICAST=y +CONFIG_IP_MULTIPLE_TABLES=y +CONFIG_IP_NF_IPTABLES=y +CONFIG_IPV6_SEG6_LWTUNNEL=y +CONFIG_IPVLAN=y +CONFIG_JUMP_LABEL=y +CONFIG_KERNEL_UNCOMPRESSED=y +CONFIG_KPROBES_ON_FTRACE=y +CONFIG_KPROBES=y +CONFIG_KRETPROBES=y +CONFIG_KSM=y +CONFIG_LATENCYTOP=y +CONFIG_LIVEPATCH=y +CONFIG_LOCK_STAT=y +CONFIG_MACVLAN=y +CONFIG_MACVTAP=y +CONFIG_MAGIC_SYSRQ=y +CONFIG_MAILBOX=y +CONFIG_MEMCG=y +CONFIG_MEMORY_HOTPLUG=y +CONFIG_MEMORY_HOTREMOVE=y +CONFIG_NAMESPACES=y +CONFIG_NET_9P_VIRTIO=y +CONFIG_NET_9P=y +CONFIG_NET_ACT_BPF=y +CONFIG_NET_ACT_GACT=y +CONFIG_NETDEVICES=y +CONFIG_NETFILTER_XT_MATCH_BPF=y +CONFIG_NETFILTER_XT_TARGET_MARK=y +CONFIG_NET_KEY=y +CONFIG_NET_SCH_FQ=y +CONFIG_NET_VRF=y +CONFIG_NET=y +CONFIG_NF_TABLES=y +CONFIG_NLMON=y +CONFIG_NO_HZ_IDLE=y +CONFIG_NR_CPUS=256 +CONFIG_NUMA=y +CONFIG_OVERLAY_FS=y +CONFIG_PACKET_DIAG=y +CONFIG_PACKET=y +CONFIG_PANIC_ON_OOPS=y +CONFIG_PARTITION_ADVANCED=y +CONFIG_PCI_HOST_GENERIC=y +CONFIG_PCI=y +CONFIG_PL320_MBOX=y +CONFIG_POSIX_MQUEUE=y +CONFIG_PROC_KCORE=y +CONFIG_PROFILING=y +CONFIG_PROVE_LOCKING=y +CONFIG_PTDUMP_DEBUGFS=y +CONFIG_RC_DEVICES=y +CONFIG_RC_LOOPBACK=y +CONFIG_RTC_CLASS=y +CONFIG_RTC_DRV_PL031=y +CONFIG_RT_GROUP_SCHED=y +CONFIG_SAMPLE_SECCOMP=y +CONFIG_SAMPLES=y +CONFIG_SCHED_AUTOGROUP=y +CONFIG_SCHED_TRACER=y +CONFIG_SCSI_CONSTANTS=y +CONFIG_SCSI_LOGGING=y +CONFIG_SCSI_SCAN_ASYNC=y +CONFIG_SCSI_VIRTIO=y +CONFIG_SCSI=y +CONFIG_SECURITY_NETWORK=y +CONFIG_SERIAL_AMBA_PL011_CONSOLE=y +CONFIG_SERIAL_AMBA_PL011=y +CONFIG_STACK_TRACER=y +CONFIG_STATIC_KEYS_SELFTEST=y +CONFIG_SYSVIPC=y +CONFIG_TASK_DELAY_ACCT=y +CONFIG_TASK_IO_ACCOUNTING=y +CONFIG_TASKSTATS=y +CONFIG_TASK_XACCT=y +CONFIG_TCG_TIS=y +CONFIG_TCG_TPM=y +CONFIG_TCP_CONG_ADVANCED=y +CONFIG_TCP_CONG_DCTCP=y +CONFIG_TLS=y +CONFIG_TMPFS_POSIX_ACL=y +CONFIG_TMPFS=y +CONFIG_TRACER_SNAPSHOT_PER_CPU_SWAP=y +CONFIG_TRANSPARENT_HUGEPAGE=y +CONFIG_TUN=y +CONFIG_UNIX=y +CONFIG_UPROBES=y +CONFIG_USELIB=y +CONFIG_USER_NS=y +CONFIG_VETH=y +CONFIG_VIRTIO_BALLOON=y +CONFIG_VIRTIO_BLK=y +CONFIG_VIRTIO_CONSOLE=y +CONFIG_VIRTIO_FS=y +CONFIG_VIRTIO_INPUT=y +CONFIG_VIRTIO_MMIO_CMDLINE_DEVICES=y +CONFIG_VIRTIO_MMIO=y +CONFIG_VIRTIO_NET=y +CONFIG_VIRTIO_PCI=y +CONFIG_VLAN_8021Q=y +CONFIG_VSOCKETS=y +CONFIG_XFRM_USER=y diff --git a/tools/testing/selftests/bpf/config.s390x b/tools/testing/selftests/bpf/config.s390x index f8a7a258a718..d49f6170e7bd 100644 --- a/tools/testing/selftests/bpf/config.s390x +++ b/tools/testing/selftests/bpf/config.s390x @@ -82,9 +82,6 @@ CONFIG_MARCH_Z196_TUNE=y CONFIG_MEMCG=y CONFIG_MEMORY_HOTPLUG=y CONFIG_MEMORY_HOTREMOVE=y -CONFIG_MODULE_SIG=y -CONFIG_MODULE_UNLOAD=y -CONFIG_MODULES=y CONFIG_NAMESPACES=y CONFIG_NET=y CONFIG_NET_9P=y diff --git a/tools/testing/selftests/bpf/config.x86_64 b/tools/testing/selftests/bpf/config.x86_64 index 21ce5ea4304e..dd97d61d325c 100644 --- a/tools/testing/selftests/bpf/config.x86_64 +++ b/tools/testing/selftests/bpf/config.x86_64 @@ -18,7 +18,6 @@ CONFIG_BLK_DEV_RAM=y CONFIG_BLK_DEV_RAM_SIZE=16384 CONFIG_BLK_DEV_THROTTLING=y CONFIG_BONDING=y -CONFIG_BOOTPARAM_HARDLOCKUP_PANIC=y CONFIG_BOOTTIME_TRACING=y CONFIG_BPF_JIT_ALWAYS_ON=y CONFIG_BPF_KPROBE_OVERRIDE=y diff --git a/tools/testing/selftests/bpf/map_tests/map_in_map_batch_ops.c b/tools/testing/selftests/bpf/map_tests/map_in_map_batch_ops.c index f472d28ad11a..16f1671e4bde 100644 --- a/tools/testing/selftests/bpf/map_tests/map_in_map_batch_ops.c +++ b/tools/testing/selftests/bpf/map_tests/map_in_map_batch_ops.c @@ -18,7 +18,7 @@ static __u32 get_map_id_from_fd(int map_fd) uint32_t info_len = sizeof(map_info); int ret; - ret = bpf_obj_get_info_by_fd(map_fd, &map_info, &info_len); + ret = bpf_map_get_info_by_fd(map_fd, &map_info, &info_len); CHECK(ret < 0, "Finding map info failed", "error:%s\n", strerror(errno)); diff --git a/tools/testing/selftests/bpf/netcnt_common.h b/tools/testing/selftests/bpf/netcnt_common.h index 0ab1c88041cd..2d4a58e4e39c 100644 --- a/tools/testing/selftests/bpf/netcnt_common.h +++ b/tools/testing/selftests/bpf/netcnt_common.h @@ -8,11 +8,11 @@ /* sizeof(struct bpf_local_storage_elem): * - * It really is about 128 bytes on x86_64, but allocate more to account for - * possible layout changes, different architectures, etc. + * It is about 128 bytes on x86_64 and 512 bytes on s390x, but allocate more to + * account for possible layout changes, different architectures, etc. * The kernel will wrap up to PAGE_SIZE internally anyway. */ -#define SIZEOF_BPF_LOCAL_STORAGE_ELEM 256 +#define SIZEOF_BPF_LOCAL_STORAGE_ELEM 768 /* Try to estimate kernel's BPF_LOCAL_STORAGE_MAX_VALUE_SIZE: */ #define BPF_LOCAL_STORAGE_MAX_VALUE_SIZE (0xFFFF - \ diff --git a/tools/testing/selftests/bpf/network_helpers.c b/tools/testing/selftests/bpf/network_helpers.c index bec15558fd93..01de33191226 100644 --- a/tools/testing/selftests/bpf/network_helpers.c +++ b/tools/testing/selftests/bpf/network_helpers.c @@ -390,45 +390,6 @@ struct nstoken { int orig_netns_fd; }; -static int setns_by_fd(int nsfd) -{ - int err; - - err = setns(nsfd, CLONE_NEWNET); - close(nsfd); - - if (!ASSERT_OK(err, "setns")) - return err; - - /* Switch /sys to the new namespace so that e.g. /sys/class/net - * reflects the devices in the new namespace. - */ - err = unshare(CLONE_NEWNS); - if (!ASSERT_OK(err, "unshare")) - return err; - - /* Make our /sys mount private, so the following umount won't - * trigger the global umount in case it's shared. - */ - err = mount("none", "/sys", NULL, MS_PRIVATE, NULL); - if (!ASSERT_OK(err, "remount private /sys")) - return err; - - err = umount2("/sys", MNT_DETACH); - if (!ASSERT_OK(err, "umount2 /sys")) - return err; - - err = mount("sysfs", "/sys", "sysfs", 0, NULL); - if (!ASSERT_OK(err, "mount /sys")) - return err; - - err = mount("bpffs", "/sys/fs/bpf", "bpf", 0, NULL); - if (!ASSERT_OK(err, "mount /sys/fs/bpf")) - return err; - - return 0; -} - struct nstoken *open_netns(const char *name) { int nsfd; @@ -449,8 +410,9 @@ struct nstoken *open_netns(const char *name) if (!ASSERT_GE(nsfd, 0, "open netns fd")) goto fail; - err = setns_by_fd(nsfd); - if (!ASSERT_OK(err, "setns_by_fd")) + err = setns(nsfd, CLONE_NEWNET); + close(nsfd); + if (!ASSERT_OK(err, "setns")) goto fail; return token; @@ -461,6 +423,7 @@ fail: void close_netns(struct nstoken *token) { - ASSERT_OK(setns_by_fd(token->orig_netns_fd), "setns_by_fd"); + ASSERT_OK(setns(token->orig_netns_fd, CLONE_NEWNET), "setns"); + close(token->orig_netns_fd); free(token); } diff --git a/tools/testing/selftests/bpf/prog_tests/align.c b/tools/testing/selftests/bpf/prog_tests/align.c index 970f09156eb4..4666f88f2bb4 100644 --- a/tools/testing/selftests/bpf/prog_tests/align.c +++ b/tools/testing/selftests/bpf/prog_tests/align.c @@ -2,7 +2,7 @@ #include <test_progs.h> #define MAX_INSNS 512 -#define MAX_MATCHES 16 +#define MAX_MATCHES 24 struct bpf_reg_match { unsigned int line; @@ -267,6 +267,7 @@ static struct bpf_align_test tests[] = { */ BPF_MOV64_REG(BPF_REG_5, BPF_REG_2), BPF_ALU64_REG(BPF_ADD, BPF_REG_5, BPF_REG_6), + BPF_MOV64_REG(BPF_REG_4, BPF_REG_5), BPF_ALU64_IMM(BPF_ADD, BPF_REG_5, 14), BPF_MOV64_REG(BPF_REG_4, BPF_REG_5), BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 4), @@ -280,6 +281,7 @@ static struct bpf_align_test tests[] = { BPF_MOV64_REG(BPF_REG_5, BPF_REG_2), BPF_ALU64_IMM(BPF_ADD, BPF_REG_5, 14), BPF_ALU64_REG(BPF_ADD, BPF_REG_5, BPF_REG_6), + BPF_MOV64_REG(BPF_REG_4, BPF_REG_5), BPF_ALU64_IMM(BPF_ADD, BPF_REG_5, 4), BPF_ALU64_REG(BPF_ADD, BPF_REG_5, BPF_REG_6), BPF_MOV64_REG(BPF_REG_4, BPF_REG_5), @@ -311,44 +313,52 @@ static struct bpf_align_test tests[] = { {15, "R4=pkt(id=1,off=18,r=18,umax=1020,var_off=(0x0; 0x3fc))"}, {15, "R5=pkt(id=1,off=14,r=18,umax=1020,var_off=(0x0; 0x3fc))"}, /* Variable offset is added to R5 packet pointer, - * resulting in auxiliary alignment of 4. + * resulting in auxiliary alignment of 4. To avoid BPF + * verifier's precision backtracking logging + * interfering we also have a no-op R4 = R5 + * instruction to validate R5 state. We also check + * that R4 is what it should be in such case. */ - {17, "R5_w=pkt(id=2,off=0,r=0,umax=1020,var_off=(0x0; 0x3fc))"}, + {18, "R4_w=pkt(id=2,off=0,r=0,umax=1020,var_off=(0x0; 0x3fc))"}, + {18, "R5_w=pkt(id=2,off=0,r=0,umax=1020,var_off=(0x0; 0x3fc))"}, /* Constant offset is added to R5, resulting in * reg->off of 14. */ - {18, "R5_w=pkt(id=2,off=14,r=0,umax=1020,var_off=(0x0; 0x3fc))"}, + {19, "R5_w=pkt(id=2,off=14,r=0,umax=1020,var_off=(0x0; 0x3fc))"}, /* At the time the word size load is performed from R5, * its total fixed offset is NET_IP_ALIGN + reg->off * (14) which is 16. Then the variable offset is 4-byte * aligned, so the total offset is 4-byte aligned and * meets the load's requirements. */ - {23, "R4=pkt(id=2,off=18,r=18,umax=1020,var_off=(0x0; 0x3fc))"}, - {23, "R5=pkt(id=2,off=14,r=18,umax=1020,var_off=(0x0; 0x3fc))"}, + {24, "R4=pkt(id=2,off=18,r=18,umax=1020,var_off=(0x0; 0x3fc))"}, + {24, "R5=pkt(id=2,off=14,r=18,umax=1020,var_off=(0x0; 0x3fc))"}, /* Constant offset is added to R5 packet pointer, * resulting in reg->off value of 14. */ - {25, "R5_w=pkt(off=14,r=8"}, + {26, "R5_w=pkt(off=14,r=8"}, /* Variable offset is added to R5, resulting in a - * variable offset of (4n). + * variable offset of (4n). See comment for insn #18 + * for R4 = R5 trick. */ - {26, "R5_w=pkt(id=3,off=14,r=0,umax=1020,var_off=(0x0; 0x3fc))"}, + {28, "R4_w=pkt(id=3,off=14,r=0,umax=1020,var_off=(0x0; 0x3fc))"}, + {28, "R5_w=pkt(id=3,off=14,r=0,umax=1020,var_off=(0x0; 0x3fc))"}, /* Constant is added to R5 again, setting reg->off to 18. */ - {27, "R5_w=pkt(id=3,off=18,r=0,umax=1020,var_off=(0x0; 0x3fc))"}, + {29, "R5_w=pkt(id=3,off=18,r=0,umax=1020,var_off=(0x0; 0x3fc))"}, /* And once more we add a variable; resulting var_off * is still (4n), fixed offset is not changed. * Also, we create a new reg->id. */ - {28, "R5_w=pkt(id=4,off=18,r=0,umax=2040,var_off=(0x0; 0x7fc)"}, + {31, "R4_w=pkt(id=4,off=18,r=0,umax=2040,var_off=(0x0; 0x7fc)"}, + {31, "R5_w=pkt(id=4,off=18,r=0,umax=2040,var_off=(0x0; 0x7fc)"}, /* At the time the word size load is performed from R5, * its total fixed offset is NET_IP_ALIGN + reg->off (18) * which is 20. Then the variable offset is (4n), so * the total offset is 4-byte aligned and meets the * load's requirements. */ - {33, "R4=pkt(id=4,off=22,r=22,umax=2040,var_off=(0x0; 0x7fc)"}, - {33, "R5=pkt(id=4,off=18,r=22,umax=2040,var_off=(0x0; 0x7fc)"}, + {35, "R4=pkt(id=4,off=22,r=22,umax=2040,var_off=(0x0; 0x7fc)"}, + {35, "R5=pkt(id=4,off=18,r=22,umax=2040,var_off=(0x0; 0x7fc)"}, }, }, { @@ -681,6 +691,6 @@ void test_align(void) if (!test__start_subtest(test->descr)) continue; - CHECK_FAIL(do_test_single(test)); + ASSERT_OK(do_test_single(test), test->descr); } } diff --git a/tools/testing/selftests/bpf/prog_tests/attach_probe.c b/tools/testing/selftests/bpf/prog_tests/attach_probe.c index 9566d9d2f6ee..56374c8b5436 100644 --- a/tools/testing/selftests/bpf/prog_tests/attach_probe.c +++ b/tools/testing/selftests/bpf/prog_tests/attach_probe.c @@ -33,8 +33,8 @@ void test_attach_probe(void) struct test_attach_probe* skel; ssize_t uprobe_offset, ref_ctr_offset; struct bpf_link *uprobe_err_link; + FILE *devnull; bool legacy; - char *mem; /* Check if new-style kprobe/uprobe API is supported. * Kernels that support new FD-based kprobe and uprobe BPF attachment @@ -147,7 +147,7 @@ void test_attach_probe(void) /* test attach by name for a library function, using the library * as the binary argument. libc.so.6 will be resolved via dlopen()/dlinfo(). */ - uprobe_opts.func_name = "malloc"; + uprobe_opts.func_name = "fopen"; uprobe_opts.retprobe = false; skel->links.handle_uprobe_byname2 = bpf_program__attach_uprobe_opts(skel->progs.handle_uprobe_byname2, @@ -157,7 +157,7 @@ void test_attach_probe(void) if (!ASSERT_OK_PTR(skel->links.handle_uprobe_byname2, "attach_uprobe_byname2")) goto cleanup; - uprobe_opts.func_name = "free"; + uprobe_opts.func_name = "fclose"; uprobe_opts.retprobe = true; skel->links.handle_uretprobe_byname2 = bpf_program__attach_uprobe_opts(skel->progs.handle_uretprobe_byname2, @@ -195,8 +195,8 @@ void test_attach_probe(void) usleep(1); /* trigger & validate shared library u[ret]probes attached by name */ - mem = malloc(1); - free(mem); + devnull = fopen("/dev/null", "r"); + fclose(devnull); /* trigger & validate uprobe & uretprobe */ trigger_func(); diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c b/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c index 2be2d61954bc..26b2d1bffdfd 100644 --- a/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c +++ b/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c @@ -472,6 +472,7 @@ static void lsm_subtest(struct test_bpf_cookie *skel) int prog_fd; int lsm_fd = -1; LIBBPF_OPTS(bpf_link_create_opts, link_opts); + int err; skel->bss->lsm_res = 0; @@ -482,8 +483,9 @@ static void lsm_subtest(struct test_bpf_cookie *skel) if (!ASSERT_GE(lsm_fd, 0, "lsm.link_create")) goto cleanup; - stack_mprotect(); - if (!ASSERT_EQ(errno, EPERM, "stack_mprotect")) + err = stack_mprotect(); + if (!ASSERT_EQ(err, -1, "stack_mprotect") || + !ASSERT_EQ(errno, EPERM, "stack_mprotect")) goto cleanup; usleep(1); diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c index 3369c5ec3a17..1f02168103dd 100644 --- a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c +++ b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c @@ -3,6 +3,7 @@ #include <test_progs.h> #include <unistd.h> #include <sys/syscall.h> +#include <task_local_storage_helpers.h> #include "bpf_iter_ipv6_route.skel.h" #include "bpf_iter_netlink.skel.h" #include "bpf_iter_bpf_map.skel.h" @@ -175,11 +176,6 @@ static void test_bpf_map(void) bpf_iter_bpf_map__destroy(skel); } -static int pidfd_open(pid_t pid, unsigned int flags) -{ - return syscall(SYS_pidfd_open, pid, flags); -} - static void check_bpf_link_info(const struct bpf_program *prog) { LIBBPF_OPTS(bpf_iter_attach_opts, opts); @@ -199,8 +195,8 @@ static void check_bpf_link_info(const struct bpf_program *prog) return; info_len = sizeof(info); - err = bpf_obj_get_info_by_fd(bpf_link__fd(link), &info, &info_len); - ASSERT_OK(err, "bpf_obj_get_info_by_fd"); + err = bpf_link_get_info_by_fd(bpf_link__fd(link), &info, &info_len); + ASSERT_OK(err, "bpf_link_get_info_by_fd"); ASSERT_EQ(info.iter.task.tid, getpid(), "check_task_tid"); bpf_link__destroy(link); @@ -295,8 +291,8 @@ static void test_task_pidfd(void) union bpf_iter_link_info linfo; int pidfd; - pidfd = pidfd_open(getpid(), 0); - if (!ASSERT_GT(pidfd, 0, "pidfd_open")) + pidfd = sys_pidfd_open(getpid(), 0); + if (!ASSERT_GT(pidfd, 0, "sys_pidfd_open")) return; memset(&linfo, 0, sizeof(linfo)); @@ -688,13 +684,13 @@ static void test_overflow(bool test_e2big_overflow, bool ret1) /* setup filtering map_id in bpf program */ map_info_len = sizeof(map_info); - err = bpf_obj_get_info_by_fd(map1_fd, &map_info, &map_info_len); + err = bpf_map_get_info_by_fd(map1_fd, &map_info, &map_info_len); if (CHECK(err, "get_map_info", "get map info failed: %s\n", strerror(errno))) goto free_map2; skel->bss->map1_id = map_info.id; - err = bpf_obj_get_info_by_fd(map2_fd, &map_info, &map_info_len); + err = bpf_map_get_info_by_fd(map2_fd, &map_info, &map_info_len); if (CHECK(err, "get_map_info", "get map info failed: %s\n", strerror(errno))) goto free_map2; @@ -945,10 +941,10 @@ static void test_bpf_array_map(void) { __u64 val, expected_val = 0, res_first_val, first_val = 0; DECLARE_LIBBPF_OPTS(bpf_iter_attach_opts, opts); - __u32 expected_key = 0, res_first_key; + __u32 key, expected_key = 0, res_first_key; + int err, i, map_fd, hash_fd, iter_fd; struct bpf_iter_bpf_array_map *skel; union bpf_iter_link_info linfo; - int err, i, map_fd, iter_fd; struct bpf_link *link; char buf[64] = {}; int len, start; @@ -1005,12 +1001,20 @@ static void test_bpf_array_map(void) if (!ASSERT_EQ(skel->bss->val_sum, expected_val, "val_sum")) goto close_iter; + hash_fd = bpf_map__fd(skel->maps.hashmap1); for (i = 0; i < bpf_map__max_entries(skel->maps.arraymap1); i++) { err = bpf_map_lookup_elem(map_fd, &i, &val); - if (!ASSERT_OK(err, "map_lookup")) - goto out; - if (!ASSERT_EQ(i, val, "invalid_val")) - goto out; + if (!ASSERT_OK(err, "map_lookup arraymap1")) + goto close_iter; + if (!ASSERT_EQ(i, val, "invalid_val arraymap1")) + goto close_iter; + + val = i + 4; + err = bpf_map_lookup_elem(hash_fd, &val, &key); + if (!ASSERT_OK(err, "map_lookup hashmap1")) + goto close_iter; + if (!ASSERT_EQ(key, val - 4, "invalid_val hashmap1")) + goto close_iter; } close_iter: @@ -1461,6 +1465,77 @@ out: bpf_iter_task_vma__destroy(skel); } +static void test_task_vma_dead_task(void) +{ + struct bpf_iter_task_vma *skel; + int wstatus, child_pid = -1; + time_t start_tm, cur_tm; + int err, iter_fd = -1; + int wait_sec = 3; + + skel = bpf_iter_task_vma__open(); + if (!ASSERT_OK_PTR(skel, "bpf_iter_task_vma__open")) + return; + + skel->bss->pid = getpid(); + + err = bpf_iter_task_vma__load(skel); + if (!ASSERT_OK(err, "bpf_iter_task_vma__load")) + goto out; + + skel->links.proc_maps = bpf_program__attach_iter( + skel->progs.proc_maps, NULL); + + if (!ASSERT_OK_PTR(skel->links.proc_maps, "bpf_program__attach_iter")) { + skel->links.proc_maps = NULL; + goto out; + } + + start_tm = time(NULL); + cur_tm = start_tm; + + child_pid = fork(); + if (child_pid == 0) { + /* Fork short-lived processes in the background. */ + while (cur_tm < start_tm + wait_sec) { + system("echo > /dev/null"); + cur_tm = time(NULL); + } + exit(0); + } + + if (!ASSERT_GE(child_pid, 0, "fork_child")) + goto out; + + while (cur_tm < start_tm + wait_sec) { + iter_fd = bpf_iter_create(bpf_link__fd(skel->links.proc_maps)); + if (!ASSERT_GE(iter_fd, 0, "create_iter")) + goto out; + + /* Drain all data from iter_fd. */ + while (cur_tm < start_tm + wait_sec) { + err = read_fd_into_buffer(iter_fd, task_vma_output, CMP_BUFFER_SIZE); + if (!ASSERT_GE(err, 0, "read_iter_fd")) + goto out; + + cur_tm = time(NULL); + + if (err == 0) + break; + } + + close(iter_fd); + iter_fd = -1; + } + + check_bpf_link_info(skel->progs.proc_maps); + +out: + waitpid(child_pid, &wstatus, 0); + close(iter_fd); + bpf_iter_task_vma__destroy(skel); +} + void test_bpf_sockmap_map_iter_fd(void) { struct bpf_iter_sockmap *skel; @@ -1498,7 +1573,6 @@ static noinline int trigger_func(int arg) static void test_task_vma_offset_common(struct bpf_iter_attach_opts *opts, bool one_proc) { struct bpf_iter_vma_offset *skel; - struct bpf_link *link; char buf[16] = {}; int iter_fd, len; int pgsz, shift; @@ -1513,11 +1587,11 @@ static void test_task_vma_offset_common(struct bpf_iter_attach_opts *opts, bool ; skel->bss->page_shift = shift; - link = bpf_program__attach_iter(skel->progs.get_vma_offset, opts); - if (!ASSERT_OK_PTR(link, "attach_iter")) - return; + skel->links.get_vma_offset = bpf_program__attach_iter(skel->progs.get_vma_offset, opts); + if (!ASSERT_OK_PTR(skel->links.get_vma_offset, "attach_iter")) + goto exit; - iter_fd = bpf_iter_create(bpf_link__fd(link)); + iter_fd = bpf_iter_create(bpf_link__fd(skel->links.get_vma_offset)); if (!ASSERT_GT(iter_fd, 0, "create_iter")) goto exit; @@ -1535,7 +1609,7 @@ static void test_task_vma_offset_common(struct bpf_iter_attach_opts *opts, bool close(iter_fd); exit: - bpf_link__destroy(link); + bpf_iter_vma_offset__destroy(skel); } static void test_task_vma_offset(void) @@ -1583,6 +1657,8 @@ void test_bpf_iter(void) test_task_file(); if (test__start_subtest("task_vma")) test_task_vma(); + if (test__start_subtest("task_vma_dead_task")) + test_task_vma_dead_task(); if (test__start_subtest("task_btf")) test_task_btf(); if (test__start_subtest("tcp4")) diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_nf.c b/tools/testing/selftests/bpf/prog_tests/bpf_nf.c index 8a838ea8bdf3..c8ba4009e4ab 100644 --- a/tools/testing/selftests/bpf/prog_tests/bpf_nf.c +++ b/tools/testing/selftests/bpf/prog_tests/bpf_nf.c @@ -49,14 +49,14 @@ out: static void test_bpf_nf_ct(int mode) { - const char *iptables = "iptables -t raw %s PREROUTING -j CONNMARK --set-mark 42/0"; + const char *iptables = "iptables-legacy -t raw %s PREROUTING -j CONNMARK --set-mark 42/0"; int srv_fd = -1, client_fd = -1, srv_client_fd = -1; struct sockaddr_in peer_addr = {}; struct test_bpf_nf *skel; int prog_fd, err; socklen_t len; u16 srv_port; - char cmd[64]; + char cmd[128]; LIBBPF_OPTS(bpf_test_run_opts, topts, .data_in = &pkt_v4, .data_size_in = sizeof(pkt_v4), @@ -69,7 +69,7 @@ static void test_bpf_nf_ct(int mode) /* Enable connection tracking */ snprintf(cmd, sizeof(cmd), iptables, "-A"); - if (!ASSERT_OK(system(cmd), "iptables")) + if (!ASSERT_OK(system(cmd), cmd)) goto end; srv_port = (mode == TEST_XDP) ? 5005 : 5006; diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_obj_id.c b/tools/testing/selftests/bpf/prog_tests/bpf_obj_id.c index e1c1e521cca2..675b90b15280 100644 --- a/tools/testing/selftests/bpf/prog_tests/bpf_obj_id.c +++ b/tools/testing/selftests/bpf/prog_tests/bpf_obj_id.c @@ -44,7 +44,7 @@ void serial_test_bpf_obj_id(void) CHECK(err >= 0 || errno != ENOENT, "get-fd-by-notexist-link-id", "err %d errno %d\n", err, errno); - /* Check bpf_obj_get_info_by_fd() */ + /* Check bpf_map_get_info_by_fd() */ bzero(zeros, sizeof(zeros)); for (i = 0; i < nr_iters; i++) { now = time(NULL); @@ -79,7 +79,7 @@ void serial_test_bpf_obj_id(void) /* Check getting map info */ info_len = sizeof(struct bpf_map_info) * 2; bzero(&map_infos[i], info_len); - err = bpf_obj_get_info_by_fd(map_fds[i], &map_infos[i], + err = bpf_map_get_info_by_fd(map_fds[i], &map_infos[i], &info_len); if (CHECK(err || map_infos[i].type != BPF_MAP_TYPE_ARRAY || @@ -118,8 +118,8 @@ void serial_test_bpf_obj_id(void) err = clock_gettime(CLOCK_BOOTTIME, &boot_time_ts); if (CHECK_FAIL(err)) goto done; - err = bpf_obj_get_info_by_fd(prog_fds[i], &prog_infos[i], - &info_len); + err = bpf_prog_get_info_by_fd(prog_fds[i], &prog_infos[i], + &info_len); load_time = (real_time_ts.tv_sec - boot_time_ts.tv_sec) + (prog_infos[i].load_time / nsec_per_sec); if (CHECK(err || @@ -161,8 +161,8 @@ void serial_test_bpf_obj_id(void) bzero(&link_infos[i], info_len); link_infos[i].raw_tracepoint.tp_name = ptr_to_u64(&tp_name); link_infos[i].raw_tracepoint.tp_name_len = sizeof(tp_name); - err = bpf_obj_get_info_by_fd(bpf_link__fd(links[i]), - &link_infos[i], &info_len); + err = bpf_link_get_info_by_fd(bpf_link__fd(links[i]), + &link_infos[i], &info_len); if (CHECK(err || link_infos[i].type != BPF_LINK_TYPE_RAW_TRACEPOINT || link_infos[i].prog_id != prog_infos[i].id || @@ -217,7 +217,7 @@ void serial_test_bpf_obj_id(void) * prog_info.map_ids = NULL */ prog_info.nr_map_ids = 1; - err = bpf_obj_get_info_by_fd(prog_fd, &prog_info, &info_len); + err = bpf_prog_get_info_by_fd(prog_fd, &prog_info, &info_len); if (CHECK(!err || errno != EFAULT, "get-prog-fd-bad-nr-map-ids", "err %d errno %d(%d)", err, errno, EFAULT)) @@ -228,7 +228,7 @@ void serial_test_bpf_obj_id(void) saved_map_id = *(int *)((long)prog_infos[i].map_ids); prog_info.map_ids = prog_infos[i].map_ids; prog_info.nr_map_ids = 2; - err = bpf_obj_get_info_by_fd(prog_fd, &prog_info, &info_len); + err = bpf_prog_get_info_by_fd(prog_fd, &prog_info, &info_len); prog_infos[i].jited_prog_insns = 0; prog_infos[i].xlated_prog_insns = 0; CHECK(err || info_len != sizeof(struct bpf_prog_info) || @@ -277,7 +277,7 @@ void serial_test_bpf_obj_id(void) if (CHECK_FAIL(err)) goto done; - err = bpf_obj_get_info_by_fd(map_fd, &map_info, &info_len); + err = bpf_map_get_info_by_fd(map_fd, &map_info, &info_len); CHECK(err || info_len != sizeof(struct bpf_map_info) || memcmp(&map_info, &map_infos[i], info_len) || array_value != array_magic_value, @@ -322,7 +322,7 @@ void serial_test_bpf_obj_id(void) nr_id_found++; - err = bpf_obj_get_info_by_fd(link_fd, &link_info, &info_len); + err = bpf_link_get_info_by_fd(link_fd, &link_info, &info_len); cmp_res = memcmp(&link_info, &link_infos[i], offsetof(struct bpf_link_info, raw_tracepoint)); CHECK(err || info_len != sizeof(link_info) || cmp_res, diff --git a/tools/testing/selftests/bpf/prog_tests/btf.c b/tools/testing/selftests/bpf/prog_tests/btf.c index 24dd6214394e..cbb600be943d 100644 --- a/tools/testing/selftests/bpf/prog_tests/btf.c +++ b/tools/testing/selftests/bpf/prog_tests/btf.c @@ -3949,6 +3949,20 @@ static struct btf_raw_test raw_tests[] = { .err_str = "Invalid return type", }, { + .descr = "decl_tag test #17, func proto, argument", + .raw_types = { + BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_DECL_TAG, 0, 0), 4), (-1), /* [1] */ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_PTR, 0, 0), 0), /* [2] */ + BTF_FUNC_PROTO_ENC(0, 1), /* [3] */ + BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 1), + BTF_VAR_ENC(NAME_TBD, 2, 0), /* [4] */ + BTF_END_RAW, + }, + BTF_STR_SEC("\0local\0tag1\0var"), + .btf_load_err = true, + .err_str = "Invalid arg#1", +}, +{ .descr = "type_tag test #1", .raw_types = { BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ @@ -4408,7 +4422,7 @@ static int test_big_btf_info(unsigned int test_num) info->btf = ptr_to_u64(user_btf); info->btf_size = raw_btf_size; - err = bpf_obj_get_info_by_fd(btf_fd, info, &info_len); + err = bpf_btf_get_info_by_fd(btf_fd, info, &info_len); if (CHECK(!err, "!err")) { err = -1; goto done; @@ -4421,7 +4435,7 @@ static int test_big_btf_info(unsigned int test_num) * to userspace. */ info_garbage.garbage = 0; - err = bpf_obj_get_info_by_fd(btf_fd, info, &info_len); + err = bpf_btf_get_info_by_fd(btf_fd, info, &info_len); if (CHECK(err || info_len != sizeof(*info), "err:%d errno:%d info_len:%u sizeof(*info):%zu", err, errno, info_len, sizeof(*info))) { @@ -4485,7 +4499,7 @@ static int test_btf_id(unsigned int test_num) /* Test BPF_OBJ_GET_INFO_BY_ID on btf_id */ info_len = sizeof(info[0]); - err = bpf_obj_get_info_by_fd(btf_fd[0], &info[0], &info_len); + err = bpf_btf_get_info_by_fd(btf_fd[0], &info[0], &info_len); if (CHECK(err, "errno:%d", errno)) { err = -1; goto done; @@ -4498,7 +4512,7 @@ static int test_btf_id(unsigned int test_num) } ret = 0; - err = bpf_obj_get_info_by_fd(btf_fd[1], &info[1], &info_len); + err = bpf_btf_get_info_by_fd(btf_fd[1], &info[1], &info_len); if (CHECK(err || info[0].id != info[1].id || info[0].btf_size != info[1].btf_size || (ret = memcmp(user_btf[0], user_btf[1], info[0].btf_size)), @@ -4521,7 +4535,7 @@ static int test_btf_id(unsigned int test_num) } info_len = sizeof(map_info); - err = bpf_obj_get_info_by_fd(map_fd, &map_info, &info_len); + err = bpf_map_get_info_by_fd(map_fd, &map_info, &info_len); if (CHECK(err || map_info.btf_id != info[0].id || map_info.btf_key_type_id != 1 || map_info.btf_value_type_id != 2, "err:%d errno:%d info.id:%u btf_id:%u btf_key_type_id:%u btf_value_type_id:%u", @@ -4624,7 +4638,7 @@ static void do_test_get_info(unsigned int test_num) info.btf_size = user_btf_size; ret = 0; - err = bpf_obj_get_info_by_fd(btf_fd, &info, &info_len); + err = bpf_btf_get_info_by_fd(btf_fd, &info, &info_len); if (CHECK(err || !info.id || info_len != sizeof(info) || info.btf_size != raw_btf_size || (ret = memcmp(raw_btf, user_btf, expected_nbytes)), @@ -4741,7 +4755,7 @@ static void do_test_file(unsigned int test_num) /* get necessary program info */ info_len = sizeof(struct bpf_prog_info); - err = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len); + err = bpf_prog_get_info_by_fd(prog_fd, &info, &info_len); if (CHECK(err < 0, "invalid get info (1st) errno:%d", errno)) { fprintf(stderr, "%s\n", btf_log_buf); @@ -4773,7 +4787,7 @@ static void do_test_file(unsigned int test_num) info.func_info_rec_size = rec_size; info.func_info = ptr_to_u64(func_info); - err = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len); + err = bpf_prog_get_info_by_fd(prog_fd, &info, &info_len); if (CHECK(err < 0, "invalid get info (2nd) errno:%d", errno)) { fprintf(stderr, "%s\n", btf_log_buf); @@ -6391,7 +6405,7 @@ static int test_get_finfo(const struct prog_info_raw_test *test, /* get necessary lens */ info_len = sizeof(struct bpf_prog_info); - err = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len); + err = bpf_prog_get_info_by_fd(prog_fd, &info, &info_len); if (CHECK(err < 0, "invalid get info (1st) errno:%d", errno)) { fprintf(stderr, "%s\n", btf_log_buf); return -1; @@ -6421,7 +6435,7 @@ static int test_get_finfo(const struct prog_info_raw_test *test, info.nr_func_info = nr_func_info; info.func_info_rec_size = rec_size; info.func_info = ptr_to_u64(func_info); - err = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len); + err = bpf_prog_get_info_by_fd(prog_fd, &info, &info_len); if (CHECK(err < 0, "invalid get info (2nd) errno:%d", errno)) { fprintf(stderr, "%s\n", btf_log_buf); err = -1; @@ -6485,7 +6499,7 @@ static int test_get_linfo(const struct prog_info_raw_test *test, nr_jited_func_lens = nr_jited_ksyms; info_len = sizeof(struct bpf_prog_info); - err = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len); + err = bpf_prog_get_info_by_fd(prog_fd, &info, &info_len); if (CHECK(err < 0, "err:%d errno:%d", err, errno)) { err = -1; goto done; @@ -6559,7 +6573,7 @@ static int test_get_linfo(const struct prog_info_raw_test *test, info.jited_func_lens = ptr_to_u64(jited_func_lens); } - err = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len); + err = bpf_prog_get_info_by_fd(prog_fd, &info, &info_len); /* * Only recheck the info.*line_info* fields. @@ -7133,7 +7147,7 @@ static struct btf_dedup_test dedup_tests[] = { BTF_ENUM_ENC(NAME_NTH(4), 456), /* [4] fwd enum 'e2' after full enum */ BTF_TYPE_ENC(NAME_NTH(3), BTF_INFO_ENC(BTF_KIND_ENUM, 0, 0), 4), - /* [5] incompatible fwd enum with different size */ + /* [5] fwd enum with different size, size does not matter for fwd */ BTF_TYPE_ENC(NAME_NTH(1), BTF_INFO_ENC(BTF_KIND_ENUM, 0, 0), 1), /* [6] incompatible full enum with different value */ BTF_TYPE_ENC(NAME_NTH(1), BTF_INFO_ENC(BTF_KIND_ENUM, 0, 1), 4), @@ -7150,9 +7164,7 @@ static struct btf_dedup_test dedup_tests[] = { /* [2] full enum 'e2' */ BTF_TYPE_ENC(NAME_NTH(3), BTF_INFO_ENC(BTF_KIND_ENUM, 0, 1), 4), BTF_ENUM_ENC(NAME_NTH(4), 456), - /* [3] incompatible fwd enum with different size */ - BTF_TYPE_ENC(NAME_NTH(1), BTF_INFO_ENC(BTF_KIND_ENUM, 0, 0), 1), - /* [4] incompatible full enum with different value */ + /* [3] incompatible full enum with different value */ BTF_TYPE_ENC(NAME_NTH(1), BTF_INFO_ENC(BTF_KIND_ENUM, 0, 1), 4), BTF_ENUM_ENC(NAME_NTH(2), 321), BTF_END_RAW, @@ -7611,7 +7623,263 @@ static struct btf_dedup_test dedup_tests[] = { BTF_STR_SEC("\0e1\0e1_val"), }, }, - +{ + .descr = "dedup: enum of different size: no dedup", + .input = { + .raw_types = { + /* [1] enum 'e1' */ + BTF_TYPE_ENC(NAME_NTH(1), BTF_INFO_ENC(BTF_KIND_ENUM, 0, 1), 4), + BTF_ENUM_ENC(NAME_NTH(2), 1), + /* [2] enum 'e1' */ + BTF_TYPE_ENC(NAME_NTH(1), BTF_INFO_ENC(BTF_KIND_ENUM, 0, 1), 2), + BTF_ENUM_ENC(NAME_NTH(2), 1), + BTF_END_RAW, + }, + BTF_STR_SEC("\0e1\0e1_val"), + }, + .expect = { + .raw_types = { + /* [1] enum 'e1' */ + BTF_TYPE_ENC(NAME_NTH(1), BTF_INFO_ENC(BTF_KIND_ENUM, 0, 1), 4), + BTF_ENUM_ENC(NAME_NTH(2), 1), + /* [2] enum 'e1' */ + BTF_TYPE_ENC(NAME_NTH(1), BTF_INFO_ENC(BTF_KIND_ENUM, 0, 1), 2), + BTF_ENUM_ENC(NAME_NTH(2), 1), + BTF_END_RAW, + }, + BTF_STR_SEC("\0e1\0e1_val"), + }, +}, +{ + .descr = "dedup: enum fwd to enum64", + .input = { + .raw_types = { + /* [1] enum64 'e1' */ + BTF_TYPE_ENC(NAME_NTH(1), BTF_INFO_ENC(BTF_KIND_ENUM64, 0, 1), 8), + BTF_ENUM64_ENC(NAME_NTH(2), 1, 0), + /* [2] enum 'e1' fwd */ + BTF_TYPE_ENC(NAME_NTH(1), BTF_INFO_ENC(BTF_KIND_ENUM, 0, 0), 4), + /* [3] typedef enum 'e1' td */ + BTF_TYPE_ENC(NAME_NTH(3), BTF_INFO_ENC(BTF_KIND_TYPEDEF, 0, 0), 2), + BTF_END_RAW, + }, + BTF_STR_SEC("\0e1\0e1_val\0td"), + }, + .expect = { + .raw_types = { + /* [1] enum64 'e1' */ + BTF_TYPE_ENC(NAME_NTH(1), BTF_INFO_ENC(BTF_KIND_ENUM64, 0, 1), 8), + BTF_ENUM64_ENC(NAME_NTH(2), 1, 0), + /* [2] typedef enum 'e1' td */ + BTF_TYPE_ENC(NAME_NTH(3), BTF_INFO_ENC(BTF_KIND_TYPEDEF, 0, 0), 1), + BTF_END_RAW, + }, + BTF_STR_SEC("\0e1\0e1_val\0td"), + }, +}, +{ + .descr = "dedup: enum64 fwd to enum", + .input = { + .raw_types = { + /* [1] enum 'e1' */ + BTF_TYPE_ENC(NAME_NTH(1), BTF_INFO_ENC(BTF_KIND_ENUM, 0, 1), 4), + BTF_ENUM_ENC(NAME_NTH(2), 1), + /* [2] enum64 'e1' fwd */ + BTF_TYPE_ENC(NAME_NTH(1), BTF_INFO_ENC(BTF_KIND_ENUM64, 0, 0), 8), + /* [3] typedef enum 'e1' td */ + BTF_TYPE_ENC(NAME_NTH(3), BTF_INFO_ENC(BTF_KIND_TYPEDEF, 0, 0), 2), + BTF_END_RAW, + }, + BTF_STR_SEC("\0e1\0e1_val\0td"), + }, + .expect = { + .raw_types = { + /* [1] enum 'e1' */ + BTF_TYPE_ENC(NAME_NTH(1), BTF_INFO_ENC(BTF_KIND_ENUM, 0, 1), 4), + BTF_ENUM_ENC(NAME_NTH(2), 1), + /* [2] typedef enum 'e1' td */ + BTF_TYPE_ENC(NAME_NTH(3), BTF_INFO_ENC(BTF_KIND_TYPEDEF, 0, 0), 1), + BTF_END_RAW, + }, + BTF_STR_SEC("\0e1\0e1_val\0td"), + }, +}, +{ + .descr = "dedup: standalone fwd declaration struct", + /* + * Verify that CU1:foo and CU2:foo would be unified and that + * typedef/ptr would be updated to point to CU1:foo. + * + * // CU 1: + * struct foo { int x; }; + * + * // CU 2: + * struct foo; + * typedef struct foo *foo_ptr; + */ + .input = { + .raw_types = { + /* CU 1 */ + BTF_STRUCT_ENC(NAME_NTH(1), 1, 4), /* [1] */ + BTF_MEMBER_ENC(NAME_NTH(2), 2, 0), + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [2] */ + /* CU 2 */ + BTF_FWD_ENC(NAME_NTH(1), 0), /* [3] */ + BTF_PTR_ENC(3), /* [4] */ + BTF_TYPEDEF_ENC(NAME_NTH(3), 4), /* [5] */ + BTF_END_RAW, + }, + BTF_STR_SEC("\0foo\0x\0foo_ptr"), + }, + .expect = { + .raw_types = { + BTF_STRUCT_ENC(NAME_NTH(1), 1, 4), /* [1] */ + BTF_MEMBER_ENC(NAME_NTH(2), 2, 0), + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [2] */ + BTF_PTR_ENC(1), /* [3] */ + BTF_TYPEDEF_ENC(NAME_NTH(3), 3), /* [4] */ + BTF_END_RAW, + }, + BTF_STR_SEC("\0foo\0x\0foo_ptr"), + }, +}, +{ + .descr = "dedup: standalone fwd declaration union", + /* + * Verify that CU1:foo and CU2:foo would be unified and that + * typedef/ptr would be updated to point to CU1:foo. + * Same as "dedup: standalone fwd declaration struct" but for unions. + * + * // CU 1: + * union foo { int x; }; + * + * // CU 2: + * union foo; + * typedef union foo *foo_ptr; + */ + .input = { + .raw_types = { + /* CU 1 */ + BTF_UNION_ENC(NAME_NTH(1), 1, 4), /* [1] */ + BTF_MEMBER_ENC(NAME_NTH(2), 2, 0), + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [2] */ + /* CU 2 */ + BTF_FWD_ENC(NAME_TBD, 1), /* [3] */ + BTF_PTR_ENC(3), /* [4] */ + BTF_TYPEDEF_ENC(NAME_NTH(3), 4), /* [5] */ + BTF_END_RAW, + }, + BTF_STR_SEC("\0foo\0x\0foo_ptr"), + }, + .expect = { + .raw_types = { + BTF_UNION_ENC(NAME_NTH(1), 1, 4), /* [1] */ + BTF_MEMBER_ENC(NAME_NTH(2), 2, 0), + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [2] */ + BTF_PTR_ENC(1), /* [3] */ + BTF_TYPEDEF_ENC(NAME_NTH(3), 3), /* [4] */ + BTF_END_RAW, + }, + BTF_STR_SEC("\0foo\0x\0foo_ptr"), + }, +}, +{ + .descr = "dedup: standalone fwd declaration wrong kind", + /* + * Negative test for btf_dedup_resolve_fwds: + * - CU1:foo is a struct, C2:foo is a union, thus CU2:foo is not deduped; + * - typedef/ptr should remain unchanged as well. + * + * // CU 1: + * struct foo { int x; }; + * + * // CU 2: + * union foo; + * typedef union foo *foo_ptr; + */ + .input = { + .raw_types = { + /* CU 1 */ + BTF_STRUCT_ENC(NAME_NTH(1), 1, 4), /* [1] */ + BTF_MEMBER_ENC(NAME_NTH(2), 2, 0), + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [2] */ + /* CU 2 */ + BTF_FWD_ENC(NAME_NTH(3), 1), /* [3] */ + BTF_PTR_ENC(3), /* [4] */ + BTF_TYPEDEF_ENC(NAME_NTH(3), 4), /* [5] */ + BTF_END_RAW, + }, + BTF_STR_SEC("\0foo\0x\0foo_ptr"), + }, + .expect = { + .raw_types = { + /* CU 1 */ + BTF_STRUCT_ENC(NAME_NTH(1), 1, 4), /* [1] */ + BTF_MEMBER_ENC(NAME_NTH(2), 2, 0), + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [2] */ + /* CU 2 */ + BTF_FWD_ENC(NAME_NTH(3), 1), /* [3] */ + BTF_PTR_ENC(3), /* [4] */ + BTF_TYPEDEF_ENC(NAME_NTH(3), 4), /* [5] */ + BTF_END_RAW, + }, + BTF_STR_SEC("\0foo\0x\0foo_ptr"), + }, +}, +{ + .descr = "dedup: standalone fwd declaration name conflict", + /* + * Negative test for btf_dedup_resolve_fwds: + * - two candidates for CU2:foo dedup, thus it is unchanged; + * - typedef/ptr should remain unchanged as well. + * + * // CU 1: + * struct foo { int x; }; + * + * // CU 2: + * struct foo; + * typedef struct foo *foo_ptr; + * + * // CU 3: + * struct foo { int x; int y; }; + */ + .input = { + .raw_types = { + /* CU 1 */ + BTF_STRUCT_ENC(NAME_NTH(1), 1, 4), /* [1] */ + BTF_MEMBER_ENC(NAME_NTH(2), 2, 0), + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [2] */ + /* CU 2 */ + BTF_FWD_ENC(NAME_NTH(1), 0), /* [3] */ + BTF_PTR_ENC(3), /* [4] */ + BTF_TYPEDEF_ENC(NAME_NTH(4), 4), /* [5] */ + /* CU 3 */ + BTF_STRUCT_ENC(NAME_NTH(1), 2, 8), /* [6] */ + BTF_MEMBER_ENC(NAME_NTH(2), 2, 0), + BTF_MEMBER_ENC(NAME_NTH(3), 2, 0), + BTF_END_RAW, + }, + BTF_STR_SEC("\0foo\0x\0y\0foo_ptr"), + }, + .expect = { + .raw_types = { + /* CU 1 */ + BTF_STRUCT_ENC(NAME_NTH(1), 1, 4), /* [1] */ + BTF_MEMBER_ENC(NAME_NTH(2), 2, 0), + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [2] */ + /* CU 2 */ + BTF_FWD_ENC(NAME_NTH(1), 0), /* [3] */ + BTF_PTR_ENC(3), /* [4] */ + BTF_TYPEDEF_ENC(NAME_NTH(4), 4), /* [5] */ + /* CU 3 */ + BTF_STRUCT_ENC(NAME_NTH(1), 2, 8), /* [6] */ + BTF_MEMBER_ENC(NAME_NTH(2), 2, 0), + BTF_MEMBER_ENC(NAME_NTH(3), 2, 0), + BTF_END_RAW, + }, + BTF_STR_SEC("\0foo\0x\0y\0foo_ptr"), + }, +}, }; static int btf_type_size(const struct btf_type *t) diff --git a/tools/testing/selftests/bpf/prog_tests/btf_dedup_split.c b/tools/testing/selftests/bpf/prog_tests/btf_dedup_split.c index 90aac437576d..d9024c7a892a 100644 --- a/tools/testing/selftests/bpf/prog_tests/btf_dedup_split.c +++ b/tools/testing/selftests/bpf/prog_tests/btf_dedup_split.c @@ -143,6 +143,10 @@ static void test_split_fwd_resolve() { btf__add_struct(btf1, "s2", 4); /* [5] struct s2 { */ btf__add_field(btf1, "f1", 1, 0, 0); /* int f1; */ /* } */ + /* keep this not a part of type the graph to test btf_dedup_resolve_fwds */ + btf__add_struct(btf1, "s3", 4); /* [6] struct s3 { */ + btf__add_field(btf1, "f1", 1, 0, 0); /* int f1; */ + /* } */ VALIDATE_RAW_BTF( btf1, @@ -153,20 +157,24 @@ static void test_split_fwd_resolve() { "\t'f1' type_id=2 bits_offset=0\n" "\t'f2' type_id=3 bits_offset=64", "[5] STRUCT 's2' size=4 vlen=1\n" + "\t'f1' type_id=1 bits_offset=0", + "[6] STRUCT 's3' size=4 vlen=1\n" "\t'f1' type_id=1 bits_offset=0"); btf2 = btf__new_empty_split(btf1); if (!ASSERT_OK_PTR(btf2, "empty_split_btf")) goto cleanup; - btf__add_int(btf2, "int", 4, BTF_INT_SIGNED); /* [6] int */ - btf__add_ptr(btf2, 10); /* [7] ptr to struct s1 */ - btf__add_fwd(btf2, "s2", BTF_FWD_STRUCT); /* [8] fwd for struct s2 */ - btf__add_ptr(btf2, 8); /* [9] ptr to fwd struct s2 */ - btf__add_struct(btf2, "s1", 16); /* [10] struct s1 { */ - btf__add_field(btf2, "f1", 7, 0, 0); /* struct s1 *f1; */ - btf__add_field(btf2, "f2", 9, 64, 0); /* struct s2 *f2; */ + btf__add_int(btf2, "int", 4, BTF_INT_SIGNED); /* [7] int */ + btf__add_ptr(btf2, 11); /* [8] ptr to struct s1 */ + btf__add_fwd(btf2, "s2", BTF_FWD_STRUCT); /* [9] fwd for struct s2 */ + btf__add_ptr(btf2, 9); /* [10] ptr to fwd struct s2 */ + btf__add_struct(btf2, "s1", 16); /* [11] struct s1 { */ + btf__add_field(btf2, "f1", 8, 0, 0); /* struct s1 *f1; */ + btf__add_field(btf2, "f2", 10, 64, 0); /* struct s2 *f2; */ /* } */ + btf__add_fwd(btf2, "s3", BTF_FWD_STRUCT); /* [12] fwd for struct s3 */ + btf__add_ptr(btf2, 12); /* [13] ptr to struct s1 */ VALIDATE_RAW_BTF( btf2, @@ -178,13 +186,17 @@ static void test_split_fwd_resolve() { "\t'f2' type_id=3 bits_offset=64", "[5] STRUCT 's2' size=4 vlen=1\n" "\t'f1' type_id=1 bits_offset=0", - "[6] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED", - "[7] PTR '(anon)' type_id=10", - "[8] FWD 's2' fwd_kind=struct", - "[9] PTR '(anon)' type_id=8", - "[10] STRUCT 's1' size=16 vlen=2\n" - "\t'f1' type_id=7 bits_offset=0\n" - "\t'f2' type_id=9 bits_offset=64"); + "[6] STRUCT 's3' size=4 vlen=1\n" + "\t'f1' type_id=1 bits_offset=0", + "[7] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED", + "[8] PTR '(anon)' type_id=11", + "[9] FWD 's2' fwd_kind=struct", + "[10] PTR '(anon)' type_id=9", + "[11] STRUCT 's1' size=16 vlen=2\n" + "\t'f1' type_id=8 bits_offset=0\n" + "\t'f2' type_id=10 bits_offset=64", + "[12] FWD 's3' fwd_kind=struct", + "[13] PTR '(anon)' type_id=12"); err = btf__dedup(btf2, NULL); if (!ASSERT_OK(err, "btf_dedup")) @@ -199,7 +211,10 @@ static void test_split_fwd_resolve() { "\t'f1' type_id=2 bits_offset=0\n" "\t'f2' type_id=3 bits_offset=64", "[5] STRUCT 's2' size=4 vlen=1\n" - "\t'f1' type_id=1 bits_offset=0"); + "\t'f1' type_id=1 bits_offset=0", + "[6] STRUCT 's3' size=4 vlen=1\n" + "\t'f1' type_id=1 bits_offset=0", + "[7] PTR '(anon)' type_id=6"); cleanup: btf__free(btf2); diff --git a/tools/testing/selftests/bpf/prog_tests/btf_dump.c b/tools/testing/selftests/bpf/prog_tests/btf_dump.c index 24da335482d4..e9ea38aa8248 100644 --- a/tools/testing/selftests/bpf/prog_tests/btf_dump.c +++ b/tools/testing/selftests/bpf/prog_tests/btf_dump.c @@ -791,17 +791,17 @@ static void test_btf_dump_struct_data(struct btf *btf, struct btf_dump *d, TEST_BTF_DUMP_DATA_OVER(btf, d, "struct", str, struct bpf_sock_ops, sizeof(struct bpf_sock_ops) - 1, "(struct bpf_sock_ops){\n\t.op = (__u32)1,\n", - { .op = 1, .skb_tcp_flags = 2}); + { .op = 1, .skb_hwtstamp = 2}); TEST_BTF_DUMP_DATA_OVER(btf, d, "struct", str, struct bpf_sock_ops, sizeof(struct bpf_sock_ops) - 1, "(struct bpf_sock_ops){\n\t.op = (__u32)1,\n", - { .op = 1, .skb_tcp_flags = 0}); + { .op = 1, .skb_hwtstamp = 0}); } static void test_btf_dump_var_data(struct btf *btf, struct btf_dump *d, char *str) { -#if defined(__i386__) || defined(__x86_64__) || defined(__aarch64__) +#if 0 TEST_BTF_DUMP_VAR(btf, d, NULL, str, "cpu_number", int, BTF_F_COMPACT, "int cpu_number = (int)100", 100); #endif diff --git a/tools/testing/selftests/bpf/prog_tests/btf_map_in_map.c b/tools/testing/selftests/bpf/prog_tests/btf_map_in_map.c index eb90a6b8850d..a8b53b8736f0 100644 --- a/tools/testing/selftests/bpf/prog_tests/btf_map_in_map.c +++ b/tools/testing/selftests/bpf/prog_tests/btf_map_in_map.c @@ -14,7 +14,7 @@ static __u32 bpf_map_id(struct bpf_map *map) int err; memset(&info, 0, info_len); - err = bpf_obj_get_info_by_fd(bpf_map__fd(map), &info, &info_len); + err = bpf_map_get_info_by_fd(bpf_map__fd(map), &info, &info_len); if (err) return 0; return info.id; diff --git a/tools/testing/selftests/bpf/prog_tests/btf_skc_cls_ingress.c b/tools/testing/selftests/bpf/prog_tests/btf_skc_cls_ingress.c index 7a277035c275..ef4d6a3ae423 100644 --- a/tools/testing/selftests/bpf/prog_tests/btf_skc_cls_ingress.c +++ b/tools/testing/selftests/bpf/prog_tests/btf_skc_cls_ingress.c @@ -9,6 +9,7 @@ #include <string.h> #include <errno.h> #include <sched.h> +#include <net/if.h> #include <linux/compiler.h> #include <bpf/libbpf.h> @@ -20,10 +21,12 @@ static struct test_btf_skc_cls_ingress *skel; static struct sockaddr_in6 srv_sa6; static __u32 duration; -#define PROG_PIN_FILE "/sys/fs/bpf/btf_skc_cls_ingress" - static int prepare_netns(void) { + LIBBPF_OPTS(bpf_tc_hook, qdisc_lo, .attach_point = BPF_TC_INGRESS); + LIBBPF_OPTS(bpf_tc_opts, tc_attach, + .prog_fd = bpf_program__fd(skel->progs.cls_ingress)); + if (CHECK(unshare(CLONE_NEWNET), "create netns", "unshare(CLONE_NEWNET): %s (%d)", strerror(errno), errno)) @@ -33,12 +36,12 @@ static int prepare_netns(void) "ip link set dev lo up", "failed\n")) return -1; - if (CHECK(system("tc qdisc add dev lo clsact"), - "tc qdisc add dev lo clsact", "failed\n")) + qdisc_lo.ifindex = if_nametoindex("lo"); + if (!ASSERT_OK(bpf_tc_hook_create(&qdisc_lo), "qdisc add dev lo clsact")) return -1; - if (CHECK(system("tc filter add dev lo ingress bpf direct-action object-pinned " PROG_PIN_FILE), - "install tc cls-prog at ingress", "failed\n")) + if (!ASSERT_OK(bpf_tc_attach(&qdisc_lo, &tc_attach), + "filter add dev lo ingress")) return -1; /* Ensure 20 bytes options (i.e. in total 40 bytes tcp header) for the @@ -195,19 +198,12 @@ static struct test tests[] = { void test_btf_skc_cls_ingress(void) { - int i, err; + int i; skel = test_btf_skc_cls_ingress__open_and_load(); if (CHECK(!skel, "test_btf_skc_cls_ingress__open_and_load", "failed\n")) return; - err = bpf_program__pin(skel->progs.cls_ingress, PROG_PIN_FILE); - if (CHECK(err, "bpf_program__pin", - "cannot pin bpf prog to %s. err:%d\n", PROG_PIN_FILE, err)) { - test_btf_skc_cls_ingress__destroy(skel); - return; - } - for (i = 0; i < ARRAY_SIZE(tests); i++) { if (!test__start_subtest(tests[i].desc)) continue; @@ -221,6 +217,5 @@ void test_btf_skc_cls_ingress(void) reset_test(); } - bpf_program__unpin(skel->progs.cls_ingress, PROG_PIN_FILE); test_btf_skc_cls_ingress__destroy(skel); } diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup_iter.c b/tools/testing/selftests/bpf/prog_tests/cgroup_iter.c index c4a2adb38da1..e02feb5fae97 100644 --- a/tools/testing/selftests/bpf/prog_tests/cgroup_iter.c +++ b/tools/testing/selftests/bpf/prog_tests/cgroup_iter.c @@ -189,6 +189,80 @@ static void test_walk_self_only(struct cgroup_iter *skel) BPF_CGROUP_ITER_SELF_ONLY, "self_only"); } +static void test_walk_dead_self_only(struct cgroup_iter *skel) +{ + DECLARE_LIBBPF_OPTS(bpf_iter_attach_opts, opts); + char expected_output[128], buf[128]; + const char *cgrp_name = "/dead"; + union bpf_iter_link_info linfo; + int len, cgrp_fd, iter_fd; + struct bpf_link *link; + size_t left; + char *p; + + cgrp_fd = create_and_get_cgroup(cgrp_name); + if (!ASSERT_GE(cgrp_fd, 0, "create cgrp")) + return; + + /* The cgroup will be dead during read() iteration, so it only has + * epilogue in the output + */ + snprintf(expected_output, sizeof(expected_output), EPILOGUE); + + memset(&linfo, 0, sizeof(linfo)); + linfo.cgroup.cgroup_fd = cgrp_fd; + linfo.cgroup.order = BPF_CGROUP_ITER_SELF_ONLY; + opts.link_info = &linfo; + opts.link_info_len = sizeof(linfo); + + link = bpf_program__attach_iter(skel->progs.cgroup_id_printer, &opts); + if (!ASSERT_OK_PTR(link, "attach_iter")) + goto close_cgrp; + + iter_fd = bpf_iter_create(bpf_link__fd(link)); + if (!ASSERT_GE(iter_fd, 0, "iter_create")) + goto free_link; + + /* Close link fd and cgroup fd */ + bpf_link__destroy(link); + close(cgrp_fd); + + /* Remove cgroup to mark it as dead */ + remove_cgroup(cgrp_name); + + /* Two kern_sync_rcu() and usleep() pairs are used to wait for the + * releases of cgroup css, and the last kern_sync_rcu() and usleep() + * pair is used to wait for the free of cgroup itself. + */ + kern_sync_rcu(); + usleep(8000); + kern_sync_rcu(); + usleep(8000); + kern_sync_rcu(); + usleep(1000); + + memset(buf, 0, sizeof(buf)); + left = ARRAY_SIZE(buf); + p = buf; + while ((len = read(iter_fd, p, left)) > 0) { + p += len; + left -= len; + } + + ASSERT_STREQ(buf, expected_output, "dead cgroup output"); + + /* read() after iter finishes should be ok. */ + if (len == 0) + ASSERT_OK(read(iter_fd, buf, sizeof(buf)), "second_read"); + + close(iter_fd); + return; +free_link: + bpf_link__destroy(link); +close_cgrp: + close(cgrp_fd); +} + void test_cgroup_iter(void) { struct cgroup_iter *skel = NULL; @@ -217,6 +291,8 @@ void test_cgroup_iter(void) test_early_termination(skel); if (test__start_subtest("cgroup_iter__self_only")) test_walk_self_only(skel); + if (test__start_subtest("cgroup_iter__dead_self_only")) + test_walk_dead_self_only(skel); out: cgroup_iter__destroy(skel); cleanup_cgroups(); diff --git a/tools/testing/selftests/bpf/prog_tests/cgrp_kfunc.c b/tools/testing/selftests/bpf/prog_tests/cgrp_kfunc.c new file mode 100644 index 000000000000..b3f7985c8504 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/cgrp_kfunc.c @@ -0,0 +1,108 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */ + +#define _GNU_SOURCE +#include <cgroup_helpers.h> +#include <test_progs.h> + +#include "cgrp_kfunc_failure.skel.h" +#include "cgrp_kfunc_success.skel.h" + +static struct cgrp_kfunc_success *open_load_cgrp_kfunc_skel(void) +{ + struct cgrp_kfunc_success *skel; + int err; + + skel = cgrp_kfunc_success__open(); + if (!ASSERT_OK_PTR(skel, "skel_open")) + return NULL; + + skel->bss->pid = getpid(); + + err = cgrp_kfunc_success__load(skel); + if (!ASSERT_OK(err, "skel_load")) + goto cleanup; + + return skel; + +cleanup: + cgrp_kfunc_success__destroy(skel); + return NULL; +} + +static int mkdir_rm_test_dir(void) +{ + int fd; + const char *cgrp_path = "cgrp_kfunc"; + + fd = create_and_get_cgroup(cgrp_path); + if (!ASSERT_GT(fd, 0, "mkdir_cgrp_fd")) + return -1; + + close(fd); + remove_cgroup(cgrp_path); + + return 0; +} + +static void run_success_test(const char *prog_name) +{ + struct cgrp_kfunc_success *skel; + struct bpf_program *prog; + struct bpf_link *link = NULL; + + skel = open_load_cgrp_kfunc_skel(); + if (!ASSERT_OK_PTR(skel, "open_load_skel")) + return; + + if (!ASSERT_OK(skel->bss->err, "pre_mkdir_err")) + goto cleanup; + + prog = bpf_object__find_program_by_name(skel->obj, prog_name); + if (!ASSERT_OK_PTR(prog, "bpf_object__find_program_by_name")) + goto cleanup; + + link = bpf_program__attach(prog); + if (!ASSERT_OK_PTR(link, "attached_link")) + goto cleanup; + + ASSERT_EQ(skel->bss->invocations, 0, "pre_rmdir_count"); + if (!ASSERT_OK(mkdir_rm_test_dir(), "cgrp_mkdir")) + goto cleanup; + + ASSERT_EQ(skel->bss->invocations, 1, "post_rmdir_count"); + ASSERT_OK(skel->bss->err, "post_rmdir_err"); + +cleanup: + bpf_link__destroy(link); + cgrp_kfunc_success__destroy(skel); +} + +static const char * const success_tests[] = { + "test_cgrp_acquire_release_argument", + "test_cgrp_acquire_leave_in_map", + "test_cgrp_xchg_release", + "test_cgrp_get_release", + "test_cgrp_get_ancestors", +}; + +void test_cgrp_kfunc(void) +{ + int i, err; + + err = setup_cgroup_environment(); + if (!ASSERT_OK(err, "cgrp_env_setup")) + goto cleanup; + + for (i = 0; i < ARRAY_SIZE(success_tests); i++) { + if (!test__start_subtest(success_tests[i])) + continue; + + run_success_test(success_tests[i]); + } + + RUN_TESTS(cgrp_kfunc_failure); + +cleanup: + cleanup_cgroup_environment(); +} diff --git a/tools/testing/selftests/bpf/prog_tests/cgrp_local_storage.c b/tools/testing/selftests/bpf/prog_tests/cgrp_local_storage.c new file mode 100644 index 000000000000..2cc759956e3b --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/cgrp_local_storage.c @@ -0,0 +1,265 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates.*/ + +#define _GNU_SOURCE +#include <unistd.h> +#include <sys/syscall.h> +#include <sys/types.h> +#include <test_progs.h> +#include "cgrp_ls_tp_btf.skel.h" +#include "cgrp_ls_recursion.skel.h" +#include "cgrp_ls_attach_cgroup.skel.h" +#include "cgrp_ls_negative.skel.h" +#include "cgrp_ls_sleepable.skel.h" +#include "network_helpers.h" +#include "cgroup_helpers.h" + +struct socket_cookie { + __u64 cookie_key; + __u64 cookie_value; +}; + +static void test_tp_btf(int cgroup_fd) +{ + struct cgrp_ls_tp_btf *skel; + long val1 = 1, val2 = 0; + int err; + + skel = cgrp_ls_tp_btf__open_and_load(); + if (!ASSERT_OK_PTR(skel, "skel_open_and_load")) + return; + + /* populate a value in map_b */ + err = bpf_map_update_elem(bpf_map__fd(skel->maps.map_b), &cgroup_fd, &val1, BPF_ANY); + if (!ASSERT_OK(err, "map_update_elem")) + goto out; + + /* check value */ + err = bpf_map_lookup_elem(bpf_map__fd(skel->maps.map_b), &cgroup_fd, &val2); + if (!ASSERT_OK(err, "map_lookup_elem")) + goto out; + if (!ASSERT_EQ(val2, 1, "map_lookup_elem, invalid val")) + goto out; + + /* delete value */ + err = bpf_map_delete_elem(bpf_map__fd(skel->maps.map_b), &cgroup_fd); + if (!ASSERT_OK(err, "map_delete_elem")) + goto out; + + skel->bss->target_pid = syscall(SYS_gettid); + + err = cgrp_ls_tp_btf__attach(skel); + if (!ASSERT_OK(err, "skel_attach")) + goto out; + + syscall(SYS_gettid); + syscall(SYS_gettid); + + skel->bss->target_pid = 0; + + /* 3x syscalls: 1x attach and 2x gettid */ + ASSERT_EQ(skel->bss->enter_cnt, 3, "enter_cnt"); + ASSERT_EQ(skel->bss->exit_cnt, 3, "exit_cnt"); + ASSERT_EQ(skel->bss->mismatch_cnt, 0, "mismatch_cnt"); +out: + cgrp_ls_tp_btf__destroy(skel); +} + +static void test_attach_cgroup(int cgroup_fd) +{ + int server_fd = 0, client_fd = 0, err = 0; + socklen_t addr_len = sizeof(struct sockaddr_in6); + struct cgrp_ls_attach_cgroup *skel; + __u32 cookie_expected_value; + struct sockaddr_in6 addr; + struct socket_cookie val; + + skel = cgrp_ls_attach_cgroup__open_and_load(); + if (!ASSERT_OK_PTR(skel, "skel_open")) + return; + + skel->links.set_cookie = bpf_program__attach_cgroup( + skel->progs.set_cookie, cgroup_fd); + if (!ASSERT_OK_PTR(skel->links.set_cookie, "prog_attach")) + goto out; + + skel->links.update_cookie_sockops = bpf_program__attach_cgroup( + skel->progs.update_cookie_sockops, cgroup_fd); + if (!ASSERT_OK_PTR(skel->links.update_cookie_sockops, "prog_attach")) + goto out; + + skel->links.update_cookie_tracing = bpf_program__attach( + skel->progs.update_cookie_tracing); + if (!ASSERT_OK_PTR(skel->links.update_cookie_tracing, "prog_attach")) + goto out; + + server_fd = start_server(AF_INET6, SOCK_STREAM, "::1", 0, 0); + if (!ASSERT_GE(server_fd, 0, "start_server")) + goto out; + + client_fd = connect_to_fd(server_fd, 0); + if (!ASSERT_GE(client_fd, 0, "connect_to_fd")) + goto close_server_fd; + + err = bpf_map_lookup_elem(bpf_map__fd(skel->maps.socket_cookies), + &cgroup_fd, &val); + if (!ASSERT_OK(err, "map_lookup(socket_cookies)")) + goto close_client_fd; + + err = getsockname(client_fd, (struct sockaddr *)&addr, &addr_len); + if (!ASSERT_OK(err, "getsockname")) + goto close_client_fd; + + cookie_expected_value = (ntohs(addr.sin6_port) << 8) | 0xFF; + ASSERT_EQ(val.cookie_value, cookie_expected_value, "cookie_value"); + +close_client_fd: + close(client_fd); +close_server_fd: + close(server_fd); +out: + cgrp_ls_attach_cgroup__destroy(skel); +} + +static void test_recursion(int cgroup_fd) +{ + struct cgrp_ls_recursion *skel; + int err; + + skel = cgrp_ls_recursion__open_and_load(); + if (!ASSERT_OK_PTR(skel, "skel_open_and_load")) + return; + + err = cgrp_ls_recursion__attach(skel); + if (!ASSERT_OK(err, "skel_attach")) + goto out; + + /* trigger sys_enter, make sure it does not cause deadlock */ + syscall(SYS_gettid); + +out: + cgrp_ls_recursion__destroy(skel); +} + +static void test_negative(void) +{ + struct cgrp_ls_negative *skel; + + skel = cgrp_ls_negative__open_and_load(); + if (!ASSERT_ERR_PTR(skel, "skel_open_and_load")) { + cgrp_ls_negative__destroy(skel); + return; + } +} + +static void test_cgroup_iter_sleepable(int cgroup_fd, __u64 cgroup_id) +{ + DECLARE_LIBBPF_OPTS(bpf_iter_attach_opts, opts); + union bpf_iter_link_info linfo; + struct cgrp_ls_sleepable *skel; + struct bpf_link *link; + int err, iter_fd; + char buf[16]; + + skel = cgrp_ls_sleepable__open(); + if (!ASSERT_OK_PTR(skel, "skel_open")) + return; + + bpf_program__set_autoload(skel->progs.cgroup_iter, true); + err = cgrp_ls_sleepable__load(skel); + if (!ASSERT_OK(err, "skel_load")) + goto out; + + memset(&linfo, 0, sizeof(linfo)); + linfo.cgroup.cgroup_fd = cgroup_fd; + linfo.cgroup.order = BPF_CGROUP_ITER_SELF_ONLY; + opts.link_info = &linfo; + opts.link_info_len = sizeof(linfo); + link = bpf_program__attach_iter(skel->progs.cgroup_iter, &opts); + if (!ASSERT_OK_PTR(link, "attach_iter")) + goto out; + + iter_fd = bpf_iter_create(bpf_link__fd(link)); + if (!ASSERT_GE(iter_fd, 0, "iter_create")) + goto out; + + /* trigger the program run */ + (void)read(iter_fd, buf, sizeof(buf)); + + ASSERT_EQ(skel->bss->cgroup_id, cgroup_id, "cgroup_id"); + + close(iter_fd); +out: + cgrp_ls_sleepable__destroy(skel); +} + +static void test_no_rcu_lock(__u64 cgroup_id) +{ + struct cgrp_ls_sleepable *skel; + int err; + + skel = cgrp_ls_sleepable__open(); + if (!ASSERT_OK_PTR(skel, "skel_open")) + return; + + skel->bss->target_pid = syscall(SYS_gettid); + + bpf_program__set_autoload(skel->progs.no_rcu_lock, true); + err = cgrp_ls_sleepable__load(skel); + if (!ASSERT_OK(err, "skel_load")) + goto out; + + err = cgrp_ls_sleepable__attach(skel); + if (!ASSERT_OK(err, "skel_attach")) + goto out; + + syscall(SYS_getpgid); + + ASSERT_EQ(skel->bss->cgroup_id, cgroup_id, "cgroup_id"); +out: + cgrp_ls_sleepable__destroy(skel); +} + +static void test_rcu_lock(void) +{ + struct cgrp_ls_sleepable *skel; + int err; + + skel = cgrp_ls_sleepable__open(); + if (!ASSERT_OK_PTR(skel, "skel_open")) + return; + + bpf_program__set_autoload(skel->progs.yes_rcu_lock, true); + err = cgrp_ls_sleepable__load(skel); + ASSERT_ERR(err, "skel_load"); + + cgrp_ls_sleepable__destroy(skel); +} + +void test_cgrp_local_storage(void) +{ + __u64 cgroup_id; + int cgroup_fd; + + cgroup_fd = test__join_cgroup("/cgrp_local_storage"); + if (!ASSERT_GE(cgroup_fd, 0, "join_cgroup /cgrp_local_storage")) + return; + + cgroup_id = get_cgroup_id("/cgrp_local_storage"); + if (test__start_subtest("tp_btf")) + test_tp_btf(cgroup_fd); + if (test__start_subtest("attach_cgroup")) + test_attach_cgroup(cgroup_fd); + if (test__start_subtest("recursion")) + test_recursion(cgroup_fd); + if (test__start_subtest("negative")) + test_negative(); + if (test__start_subtest("cgroup_iter_sleepable")) + test_cgroup_iter_sleepable(cgroup_fd, cgroup_id); + if (test__start_subtest("no_rcu_lock")) + test_no_rcu_lock(cgroup_id); + if (test__start_subtest("rcu_lock")) + test_rcu_lock(); + + close(cgroup_fd); +} diff --git a/tools/testing/selftests/bpf/prog_tests/check_mtu.c b/tools/testing/selftests/bpf/prog_tests/check_mtu.c index 12f4395f18b3..5338d2ea0460 100644 --- a/tools/testing/selftests/bpf/prog_tests/check_mtu.c +++ b/tools/testing/selftests/bpf/prog_tests/check_mtu.c @@ -59,7 +59,7 @@ static void test_check_mtu_xdp_attach(void) memset(&link_info, 0, sizeof(link_info)); fd = bpf_link__fd(link); - err = bpf_obj_get_info_by_fd(fd, &link_info, &link_info_len); + err = bpf_link_get_info_by_fd(fd, &link_info, &link_info_len); if (CHECK(err, "link_info", "failed: %d\n", err)) goto out; diff --git a/tools/testing/selftests/bpf/prog_tests/cpumask.c b/tools/testing/selftests/bpf/prog_tests/cpumask.c new file mode 100644 index 000000000000..5fbe457c4ebe --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/cpumask.c @@ -0,0 +1,74 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */ + +#include <test_progs.h> +#include "cpumask_failure.skel.h" +#include "cpumask_success.skel.h" + +static const char * const cpumask_success_testcases[] = { + "test_alloc_free_cpumask", + "test_set_clear_cpu", + "test_setall_clear_cpu", + "test_first_firstzero_cpu", + "test_test_and_set_clear", + "test_and_or_xor", + "test_intersects_subset", + "test_copy_any_anyand", + "test_insert_leave", + "test_insert_remove_release", + "test_insert_kptr_get_release", +}; + +static void verify_success(const char *prog_name) +{ + struct cpumask_success *skel; + struct bpf_program *prog; + struct bpf_link *link = NULL; + pid_t child_pid; + int status; + + skel = cpumask_success__open(); + if (!ASSERT_OK_PTR(skel, "cpumask_success__open")) + return; + + skel->bss->pid = getpid(); + skel->bss->nr_cpus = libbpf_num_possible_cpus(); + + cpumask_success__load(skel); + if (!ASSERT_OK_PTR(skel, "cpumask_success__load")) + goto cleanup; + + prog = bpf_object__find_program_by_name(skel->obj, prog_name); + if (!ASSERT_OK_PTR(prog, "bpf_object__find_program_by_name")) + goto cleanup; + + link = bpf_program__attach(prog); + if (!ASSERT_OK_PTR(link, "bpf_program__attach")) + goto cleanup; + + child_pid = fork(); + if (!ASSERT_GT(child_pid, -1, "child_pid")) + goto cleanup; + if (child_pid == 0) + _exit(0); + waitpid(child_pid, &status, 0); + ASSERT_OK(skel->bss->err, "post_wait_err"); + +cleanup: + bpf_link__destroy(link); + cpumask_success__destroy(skel); +} + +void test_cpumask(void) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(cpumask_success_testcases); i++) { + if (!test__start_subtest(cpumask_success_testcases[i])) + continue; + + verify_success(cpumask_success_testcases[i]); + } + + RUN_TESTS(cpumask_failure); +} diff --git a/tools/testing/selftests/bpf/prog_tests/decap_sanity.c b/tools/testing/selftests/bpf/prog_tests/decap_sanity.c new file mode 100644 index 000000000000..2853883b7cbb --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/decap_sanity.c @@ -0,0 +1,85 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */ + +#include <sys/types.h> +#include <sys/socket.h> +#include <net/if.h> +#include <linux/in6.h> + +#include "test_progs.h" +#include "network_helpers.h" +#include "decap_sanity.skel.h" + +#define SYS(fmt, ...) \ + ({ \ + char cmd[1024]; \ + snprintf(cmd, sizeof(cmd), fmt, ##__VA_ARGS__); \ + if (!ASSERT_OK(system(cmd), cmd)) \ + goto fail; \ + }) + +#define NS_TEST "decap_sanity_ns" +#define IPV6_IFACE_ADDR "face::1" +#define UDP_TEST_PORT 7777 + +void test_decap_sanity(void) +{ + LIBBPF_OPTS(bpf_tc_hook, qdisc_hook, .attach_point = BPF_TC_EGRESS); + LIBBPF_OPTS(bpf_tc_opts, tc_attach); + struct nstoken *nstoken = NULL; + struct decap_sanity *skel; + struct sockaddr_in6 addr; + socklen_t addrlen; + char buf[128] = {}; + int sockfd, err; + + skel = decap_sanity__open_and_load(); + if (!ASSERT_OK_PTR(skel, "skel open_and_load")) + return; + + SYS("ip netns add %s", NS_TEST); + SYS("ip -net %s -6 addr add %s/128 dev lo nodad", NS_TEST, IPV6_IFACE_ADDR); + SYS("ip -net %s link set dev lo up", NS_TEST); + + nstoken = open_netns(NS_TEST); + if (!ASSERT_OK_PTR(nstoken, "open_netns")) + goto fail; + + qdisc_hook.ifindex = if_nametoindex("lo"); + if (!ASSERT_GT(qdisc_hook.ifindex, 0, "if_nametoindex lo")) + goto fail; + + err = bpf_tc_hook_create(&qdisc_hook); + if (!ASSERT_OK(err, "create qdisc hook")) + goto fail; + + tc_attach.prog_fd = bpf_program__fd(skel->progs.decap_sanity); + err = bpf_tc_attach(&qdisc_hook, &tc_attach); + if (!ASSERT_OK(err, "attach filter")) + goto fail; + + addrlen = sizeof(addr); + err = make_sockaddr(AF_INET6, IPV6_IFACE_ADDR, UDP_TEST_PORT, + (void *)&addr, &addrlen); + if (!ASSERT_OK(err, "make_sockaddr")) + goto fail; + sockfd = socket(AF_INET6, SOCK_DGRAM, 0); + if (!ASSERT_NEQ(sockfd, -1, "socket")) + goto fail; + err = sendto(sockfd, buf, sizeof(buf), 0, (void *)&addr, addrlen); + close(sockfd); + if (!ASSERT_EQ(err, sizeof(buf), "send")) + goto fail; + + ASSERT_TRUE(skel->bss->init_csum_partial, "init_csum_partial"); + ASSERT_TRUE(skel->bss->final_csum_none, "final_csum_none"); + ASSERT_FALSE(skel->bss->broken_csum_start, "broken_csum_start"); + +fail: + if (nstoken) { + bpf_tc_hook_destroy(&qdisc_hook); + close_netns(nstoken); + } + system("ip netns del " NS_TEST " &> /dev/null"); + decap_sanity__destroy(skel); +} diff --git a/tools/testing/selftests/bpf/prog_tests/dummy_st_ops.c b/tools/testing/selftests/bpf/prog_tests/dummy_st_ops.c index c11832657d2b..f43fcb13d2c4 100644 --- a/tools/testing/selftests/bpf/prog_tests/dummy_st_ops.c +++ b/tools/testing/selftests/bpf/prog_tests/dummy_st_ops.c @@ -1,7 +1,8 @@ // SPDX-License-Identifier: GPL-2.0 /* Copyright (C) 2021. Huawei Technologies Co., Ltd */ #include <test_progs.h> -#include "dummy_st_ops.skel.h" +#include "dummy_st_ops_success.skel.h" +#include "dummy_st_ops_fail.skel.h" #include "trace_dummy_st_ops.skel.h" /* Need to keep consistent with definition in include/linux/bpf.h */ @@ -11,17 +12,17 @@ struct bpf_dummy_ops_state { static void test_dummy_st_ops_attach(void) { - struct dummy_st_ops *skel; + struct dummy_st_ops_success *skel; struct bpf_link *link; - skel = dummy_st_ops__open_and_load(); + skel = dummy_st_ops_success__open_and_load(); if (!ASSERT_OK_PTR(skel, "dummy_st_ops_load")) return; link = bpf_map__attach_struct_ops(skel->maps.dummy_1); ASSERT_EQ(libbpf_get_error(link), -EOPNOTSUPP, "dummy_st_ops_attach"); - dummy_st_ops__destroy(skel); + dummy_st_ops_success__destroy(skel); } static void test_dummy_init_ret_value(void) @@ -31,10 +32,10 @@ static void test_dummy_init_ret_value(void) .ctx_in = args, .ctx_size_in = sizeof(args), ); - struct dummy_st_ops *skel; + struct dummy_st_ops_success *skel; int fd, err; - skel = dummy_st_ops__open_and_load(); + skel = dummy_st_ops_success__open_and_load(); if (!ASSERT_OK_PTR(skel, "dummy_st_ops_load")) return; @@ -43,7 +44,7 @@ static void test_dummy_init_ret_value(void) ASSERT_OK(err, "test_run"); ASSERT_EQ(attr.retval, 0xf2f3f4f5, "test_ret"); - dummy_st_ops__destroy(skel); + dummy_st_ops_success__destroy(skel); } static void test_dummy_init_ptr_arg(void) @@ -58,10 +59,10 @@ static void test_dummy_init_ptr_arg(void) .ctx_size_in = sizeof(args), ); struct trace_dummy_st_ops *trace_skel; - struct dummy_st_ops *skel; + struct dummy_st_ops_success *skel; int fd, err; - skel = dummy_st_ops__open_and_load(); + skel = dummy_st_ops_success__open_and_load(); if (!ASSERT_OK_PTR(skel, "dummy_st_ops_load")) return; @@ -91,7 +92,7 @@ static void test_dummy_init_ptr_arg(void) ASSERT_EQ(trace_skel->bss->val, exp_retval, "fentry_val"); done: - dummy_st_ops__destroy(skel); + dummy_st_ops_success__destroy(skel); trace_dummy_st_ops__destroy(trace_skel); } @@ -102,12 +103,12 @@ static void test_dummy_multiple_args(void) .ctx_in = args, .ctx_size_in = sizeof(args), ); - struct dummy_st_ops *skel; + struct dummy_st_ops_success *skel; int fd, err; size_t i; char name[8]; - skel = dummy_st_ops__open_and_load(); + skel = dummy_st_ops_success__open_and_load(); if (!ASSERT_OK_PTR(skel, "dummy_st_ops_load")) return; @@ -119,7 +120,28 @@ static void test_dummy_multiple_args(void) ASSERT_EQ(skel->bss->test_2_args[i], args[i], name); } - dummy_st_ops__destroy(skel); + dummy_st_ops_success__destroy(skel); +} + +static void test_dummy_sleepable(void) +{ + __u64 args[1] = {0}; + LIBBPF_OPTS(bpf_test_run_opts, attr, + .ctx_in = args, + .ctx_size_in = sizeof(args), + ); + struct dummy_st_ops_success *skel; + int fd, err; + + skel = dummy_st_ops_success__open_and_load(); + if (!ASSERT_OK_PTR(skel, "dummy_st_ops_load")) + return; + + fd = bpf_program__fd(skel->progs.test_sleepable); + err = bpf_prog_test_run_opts(fd, &attr); + ASSERT_OK(err, "test_run"); + + dummy_st_ops_success__destroy(skel); } void test_dummy_st_ops(void) @@ -132,4 +154,8 @@ void test_dummy_st_ops(void) test_dummy_init_ptr_arg(); if (test__start_subtest("dummy_multiple_args")) test_dummy_multiple_args(); + if (test__start_subtest("dummy_sleepable")) + test_dummy_sleepable(); + + RUN_TESTS(dummy_st_ops_fail); } diff --git a/tools/testing/selftests/bpf/prog_tests/dynptr.c b/tools/testing/selftests/bpf/prog_tests/dynptr.c index 8fc4e6c02bfd..b99264ec0d9c 100644 --- a/tools/testing/selftests/bpf/prog_tests/dynptr.c +++ b/tools/testing/selftests/bpf/prog_tests/dynptr.c @@ -5,86 +5,12 @@ #include "dynptr_fail.skel.h" #include "dynptr_success.skel.h" -static size_t log_buf_sz = 1048576; /* 1 MB */ -static char obj_log_buf[1048576]; - -static struct { - const char *prog_name; - const char *expected_err_msg; -} dynptr_tests[] = { - /* failure cases */ - {"ringbuf_missing_release1", "Unreleased reference id=1"}, - {"ringbuf_missing_release2", "Unreleased reference id=2"}, - {"ringbuf_missing_release_callback", "Unreleased reference id"}, - {"use_after_invalid", "Expected an initialized dynptr as arg #3"}, - {"ringbuf_invalid_api", "type=mem expected=alloc_mem"}, - {"add_dynptr_to_map1", "invalid indirect read from stack"}, - {"add_dynptr_to_map2", "invalid indirect read from stack"}, - {"data_slice_out_of_bounds_ringbuf", "value is outside of the allowed memory range"}, - {"data_slice_out_of_bounds_map_value", "value is outside of the allowed memory range"}, - {"data_slice_use_after_release1", "invalid mem access 'scalar'"}, - {"data_slice_use_after_release2", "invalid mem access 'scalar'"}, - {"data_slice_missing_null_check1", "invalid mem access 'mem_or_null'"}, - {"data_slice_missing_null_check2", "invalid mem access 'mem_or_null'"}, - {"invalid_helper1", "invalid indirect read from stack"}, - {"invalid_helper2", "Expected an initialized dynptr as arg #3"}, - {"invalid_write1", "Expected an initialized dynptr as arg #1"}, - {"invalid_write2", "Expected an initialized dynptr as arg #3"}, - {"invalid_write3", "Expected an initialized dynptr as arg #1"}, - {"invalid_write4", "arg 1 is an unacquired reference"}, - {"invalid_read1", "invalid read from stack"}, - {"invalid_read2", "cannot pass in dynptr at an offset"}, - {"invalid_read3", "invalid read from stack"}, - {"invalid_read4", "invalid read from stack"}, - {"invalid_offset", "invalid write to stack"}, - {"global", "type=map_value expected=fp"}, - {"release_twice", "arg 1 is an unacquired reference"}, - {"release_twice_callback", "arg 1 is an unacquired reference"}, - {"dynptr_from_mem_invalid_api", - "Unsupported reg type fp for bpf_dynptr_from_mem data"}, - - /* success cases */ - {"test_read_write", NULL}, - {"test_data_slice", NULL}, - {"test_ringbuf", NULL}, +static const char * const success_tests[] = { + "test_read_write", + "test_data_slice", + "test_ringbuf", }; -static void verify_fail(const char *prog_name, const char *expected_err_msg) -{ - LIBBPF_OPTS(bpf_object_open_opts, opts); - struct bpf_program *prog; - struct dynptr_fail *skel; - int err; - - opts.kernel_log_buf = obj_log_buf; - opts.kernel_log_size = log_buf_sz; - opts.kernel_log_level = 1; - - skel = dynptr_fail__open_opts(&opts); - if (!ASSERT_OK_PTR(skel, "dynptr_fail__open_opts")) - goto cleanup; - - prog = bpf_object__find_program_by_name(skel->obj, prog_name); - if (!ASSERT_OK_PTR(prog, "bpf_object__find_program_by_name")) - goto cleanup; - - bpf_program__set_autoload(prog, true); - - bpf_map__set_max_entries(skel->maps.ringbuf, getpagesize()); - - err = dynptr_fail__load(skel); - if (!ASSERT_ERR(err, "unexpected load success")) - goto cleanup; - - if (!ASSERT_OK_PTR(strstr(obj_log_buf, expected_err_msg), "expected_err_msg")) { - fprintf(stderr, "Expected err_msg: %s\n", expected_err_msg); - fprintf(stderr, "Verifier output: %s\n", obj_log_buf); - } - -cleanup: - dynptr_fail__destroy(skel); -} - static void verify_success(const char *prog_name) { struct dynptr_success *skel; @@ -97,8 +23,6 @@ static void verify_success(const char *prog_name) skel->bss->pid = getpid(); - bpf_map__set_max_entries(skel->maps.ringbuf, getpagesize()); - dynptr_success__load(skel); if (!ASSERT_OK_PTR(skel, "dynptr_success__load")) goto cleanup; @@ -125,14 +49,12 @@ void test_dynptr(void) { int i; - for (i = 0; i < ARRAY_SIZE(dynptr_tests); i++) { - if (!test__start_subtest(dynptr_tests[i].prog_name)) + for (i = 0; i < ARRAY_SIZE(success_tests); i++) { + if (!test__start_subtest(success_tests[i])) continue; - if (dynptr_tests[i].expected_err_msg) - verify_fail(dynptr_tests[i].prog_name, - dynptr_tests[i].expected_err_msg); - else - verify_success(dynptr_tests[i].prog_name); + verify_success(success_tests[i]); } + + RUN_TESTS(dynptr_fail); } diff --git a/tools/testing/selftests/bpf/prog_tests/empty_skb.c b/tools/testing/selftests/bpf/prog_tests/empty_skb.c new file mode 100644 index 000000000000..32dd731e9070 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/empty_skb.c @@ -0,0 +1,146 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <test_progs.h> +#include <network_helpers.h> +#include <net/if.h> +#include "empty_skb.skel.h" + +#define SYS(cmd) ({ \ + if (!ASSERT_OK(system(cmd), (cmd))) \ + goto out; \ +}) + +void test_empty_skb(void) +{ + LIBBPF_OPTS(bpf_test_run_opts, tattr); + struct empty_skb *bpf_obj = NULL; + struct nstoken *tok = NULL; + struct bpf_program *prog; + char eth_hlen_pp[15]; + char eth_hlen[14]; + int veth_ifindex; + int ipip_ifindex; + int err; + int i; + + struct { + const char *msg; + const void *data_in; + __u32 data_size_in; + int *ifindex; + int err; + int ret; + bool success_on_tc; + } tests[] = { + /* Empty packets are always rejected. */ + + { + /* BPF_PROG_RUN ETH_HLEN size check */ + .msg = "veth empty ingress packet", + .data_in = NULL, + .data_size_in = 0, + .ifindex = &veth_ifindex, + .err = -EINVAL, + }, + { + /* BPF_PROG_RUN ETH_HLEN size check */ + .msg = "ipip empty ingress packet", + .data_in = NULL, + .data_size_in = 0, + .ifindex = &ipip_ifindex, + .err = -EINVAL, + }, + + /* ETH_HLEN-sized packets: + * - can not be redirected at LWT_XMIT + * - can be redirected at TC to non-tunneling dest + */ + + { + /* __bpf_redirect_common */ + .msg = "veth ETH_HLEN packet ingress", + .data_in = eth_hlen, + .data_size_in = sizeof(eth_hlen), + .ifindex = &veth_ifindex, + .ret = -ERANGE, + .success_on_tc = true, + }, + { + /* __bpf_redirect_no_mac + * + * lwt: skb->len=0 <= skb_network_offset=0 + * tc: skb->len=14 <= skb_network_offset=14 + */ + .msg = "ipip ETH_HLEN packet ingress", + .data_in = eth_hlen, + .data_size_in = sizeof(eth_hlen), + .ifindex = &ipip_ifindex, + .ret = -ERANGE, + }, + + /* ETH_HLEN+1-sized packet should be redirected. */ + + { + .msg = "veth ETH_HLEN+1 packet ingress", + .data_in = eth_hlen_pp, + .data_size_in = sizeof(eth_hlen_pp), + .ifindex = &veth_ifindex, + }, + { + .msg = "ipip ETH_HLEN+1 packet ingress", + .data_in = eth_hlen_pp, + .data_size_in = sizeof(eth_hlen_pp), + .ifindex = &ipip_ifindex, + }, + }; + + SYS("ip netns add empty_skb"); + tok = open_netns("empty_skb"); + SYS("ip link add veth0 type veth peer veth1"); + SYS("ip link set dev veth0 up"); + SYS("ip link set dev veth1 up"); + SYS("ip addr add 10.0.0.1/8 dev veth0"); + SYS("ip addr add 10.0.0.2/8 dev veth1"); + veth_ifindex = if_nametoindex("veth0"); + + SYS("ip link add ipip0 type ipip local 10.0.0.1 remote 10.0.0.2"); + SYS("ip link set ipip0 up"); + SYS("ip addr add 192.168.1.1/16 dev ipip0"); + ipip_ifindex = if_nametoindex("ipip0"); + + bpf_obj = empty_skb__open_and_load(); + if (!ASSERT_OK_PTR(bpf_obj, "open skeleton")) + goto out; + + for (i = 0; i < ARRAY_SIZE(tests); i++) { + bpf_object__for_each_program(prog, bpf_obj->obj) { + char buf[128]; + bool at_tc = !strncmp(bpf_program__section_name(prog), "tc", 2); + + tattr.data_in = tests[i].data_in; + tattr.data_size_in = tests[i].data_size_in; + + tattr.data_size_out = 0; + bpf_obj->bss->ifindex = *tests[i].ifindex; + bpf_obj->bss->ret = 0; + err = bpf_prog_test_run_opts(bpf_program__fd(prog), &tattr); + sprintf(buf, "err: %s [%s]", tests[i].msg, bpf_program__name(prog)); + + if (at_tc && tests[i].success_on_tc) + ASSERT_GE(err, 0, buf); + else + ASSERT_EQ(err, tests[i].err, buf); + sprintf(buf, "ret: %s [%s]", tests[i].msg, bpf_program__name(prog)); + if (at_tc && tests[i].success_on_tc) + ASSERT_GE(bpf_obj->bss->ret, 0, buf); + else + ASSERT_EQ(bpf_obj->bss->ret, tests[i].ret, buf); + } + } + +out: + if (bpf_obj) + empty_skb__destroy(bpf_obj); + if (tok) + close_netns(tok); + system("ip netns del empty_skb"); +} diff --git a/tools/testing/selftests/bpf/prog_tests/enable_stats.c b/tools/testing/selftests/bpf/prog_tests/enable_stats.c index 2cb2085917e7..75f85d0fe74a 100644 --- a/tools/testing/selftests/bpf/prog_tests/enable_stats.c +++ b/tools/testing/selftests/bpf/prog_tests/enable_stats.c @@ -28,7 +28,7 @@ void test_enable_stats(void) prog_fd = bpf_program__fd(skel->progs.test_enable_stats); memset(&info, 0, info_len); - err = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len); + err = bpf_prog_get_info_by_fd(prog_fd, &info, &info_len); if (CHECK(err, "get_prog_info", "failed to get bpf_prog_info for fd %d\n", prog_fd)) goto cleanup; diff --git a/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c b/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c index d1e32e792536..8ec73fdfcdab 100644 --- a/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c +++ b/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c @@ -4,6 +4,8 @@ #include <network_helpers.h> #include <bpf/btf.h> #include "bind4_prog.skel.h" +#include "freplace_progmap.skel.h" +#include "xdp_dummy.skel.h" typedef int (*test_cb)(struct bpf_object *obj); @@ -77,7 +79,7 @@ static void test_fexit_bpf2bpf_common(const char *obj_file, return; info_len = sizeof(prog_info); - err = bpf_obj_get_info_by_fd(tgt_fd, &prog_info, &info_len); + err = bpf_prog_get_info_by_fd(tgt_fd, &prog_info, &info_len); if (!ASSERT_OK(err, "tgt_fd_get_info")) goto close_prog; @@ -134,8 +136,8 @@ static void test_fexit_bpf2bpf_common(const char *obj_file, info_len = sizeof(link_info); memset(&link_info, 0, sizeof(link_info)); - err = bpf_obj_get_info_by_fd(bpf_link__fd(link[i]), - &link_info, &info_len); + err = bpf_link_get_info_by_fd(bpf_link__fd(link[i]), + &link_info, &info_len); ASSERT_OK(err, "link_fd_get_info"); ASSERT_EQ(link_info.tracing.attach_type, bpf_program__expected_attach_type(prog[i]), @@ -415,7 +417,7 @@ static int find_prog_btf_id(const char *name, __u32 attach_prog_fd) struct btf *btf; int ret; - ret = bpf_obj_get_info_by_fd(attach_prog_fd, &info, &info_len); + ret = bpf_prog_get_info_by_fd(attach_prog_fd, &info, &info_len); if (ret) return ret; @@ -481,12 +483,12 @@ static void test_fentry_to_cgroup_bpf(void) if (!ASSERT_GE(fentry_fd, 0, "load_fentry")) goto cleanup; - /* Make sure bpf_obj_get_info_by_fd works correctly when attaching + /* Make sure bpf_prog_get_info_by_fd works correctly when attaching * to another BPF program. */ - ASSERT_OK(bpf_obj_get_info_by_fd(fentry_fd, &info, &info_len), - "bpf_obj_get_info_by_fd"); + ASSERT_OK(bpf_prog_get_info_by_fd(fentry_fd, &info, &info_len), + "bpf_prog_get_info_by_fd"); ASSERT_EQ(info.btf_id, 0, "info.btf_id"); ASSERT_EQ(info.attach_btf_id, btf_id, "info.attach_btf_id"); @@ -500,6 +502,50 @@ cleanup: bind4_prog__destroy(skel); } +static void test_func_replace_progmap(void) +{ + struct bpf_cpumap_val value = { .qsize = 1 }; + struct freplace_progmap *skel = NULL; + struct xdp_dummy *tgt_skel = NULL; + __u32 key = 0; + int err; + + skel = freplace_progmap__open(); + if (!ASSERT_OK_PTR(skel, "prog_open")) + return; + + tgt_skel = xdp_dummy__open_and_load(); + if (!ASSERT_OK_PTR(tgt_skel, "tgt_prog_load")) + goto out; + + err = bpf_program__set_attach_target(skel->progs.xdp_cpumap_prog, + bpf_program__fd(tgt_skel->progs.xdp_dummy_prog), + "xdp_dummy_prog"); + if (!ASSERT_OK(err, "set_attach_target")) + goto out; + + err = freplace_progmap__load(skel); + if (!ASSERT_OK(err, "obj_load")) + goto out; + + /* Prior to fixing the kernel, loading the PROG_TYPE_EXT 'redirect' + * program above will cause the map owner type of 'cpumap' to be set to + * PROG_TYPE_EXT. This in turn will cause the bpf_map_update_elem() + * below to fail, because the program we are inserting into the map is + * of PROG_TYPE_XDP. After fixing the kernel, the initial ownership will + * be correctly resolved to the *target* of the PROG_TYPE_EXT program + * (i.e., PROG_TYPE_XDP) and the map update will succeed. + */ + value.bpf_prog.fd = bpf_program__fd(skel->progs.xdp_drop_prog); + err = bpf_map_update_elem(bpf_map__fd(skel->maps.cpu_map), + &key, &value, 0); + ASSERT_OK(err, "map_update"); + +out: + xdp_dummy__destroy(tgt_skel); + freplace_progmap__destroy(skel); +} + /* NOTE: affect other tests, must run in serial mode */ void serial_test_fexit_bpf2bpf(void) { @@ -525,4 +571,6 @@ void serial_test_fexit_bpf2bpf(void) test_func_replace_global_func(); if (test__start_subtest("fentry_to_cgroup_bpf")) test_fentry_to_cgroup_bpf(); + if (test__start_subtest("func_replace_progmap")) + test_func_replace_progmap(); } diff --git a/tools/testing/selftests/bpf/prog_tests/fexit_stress.c b/tools/testing/selftests/bpf/prog_tests/fexit_stress.c index 5a7e6011f6bf..596536def43d 100644 --- a/tools/testing/selftests/bpf/prog_tests/fexit_stress.c +++ b/tools/testing/selftests/bpf/prog_tests/fexit_stress.c @@ -2,14 +2,19 @@ /* Copyright (c) 2019 Facebook */ #include <test_progs.h> -/* that's kernel internal BPF_MAX_TRAMP_PROGS define */ -#define CNT 38 - void serial_test_fexit_stress(void) { - int fexit_fd[CNT] = {}; - int link_fd[CNT] = {}; - int err, i; + int bpf_max_tramp_links, err, i; + int *fd, *fexit_fd, *link_fd; + + bpf_max_tramp_links = get_bpf_max_tramp_links(); + if (!ASSERT_GE(bpf_max_tramp_links, 1, "bpf_max_tramp_links")) + return; + fd = calloc(bpf_max_tramp_links * 2, sizeof(*fd)); + if (!ASSERT_OK_PTR(fd, "fd")) + return; + fexit_fd = fd; + link_fd = fd + bpf_max_tramp_links; const struct bpf_insn trace_program[] = { BPF_MOV64_IMM(BPF_REG_0, 0), @@ -28,7 +33,7 @@ void serial_test_fexit_stress(void) goto out; trace_opts.attach_btf_id = err; - for (i = 0; i < CNT; i++) { + for (i = 0; i < bpf_max_tramp_links; i++) { fexit_fd[i] = bpf_prog_load(BPF_PROG_TYPE_TRACING, NULL, "GPL", trace_program, sizeof(trace_program) / sizeof(struct bpf_insn), @@ -44,10 +49,11 @@ void serial_test_fexit_stress(void) ASSERT_OK(err, "bpf_prog_test_run_opts"); out: - for (i = 0; i < CNT; i++) { + for (i = 0; i < bpf_max_tramp_links; i++) { if (link_fd[i]) close(link_fd[i]); if (fexit_fd[i]) close(fexit_fd[i]); } + free(fd); } diff --git a/tools/testing/selftests/bpf/prog_tests/fib_lookup.c b/tools/testing/selftests/bpf/prog_tests/fib_lookup.c new file mode 100644 index 000000000000..61ccddccf485 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/fib_lookup.c @@ -0,0 +1,187 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */ + +#include <sys/types.h> +#include <net/if.h> + +#include "test_progs.h" +#include "network_helpers.h" +#include "fib_lookup.skel.h" + +#define SYS(fmt, ...) \ + ({ \ + char cmd[1024]; \ + snprintf(cmd, sizeof(cmd), fmt, ##__VA_ARGS__); \ + if (!ASSERT_OK(system(cmd), cmd)) \ + goto fail; \ + }) + +#define NS_TEST "fib_lookup_ns" +#define IPV6_IFACE_ADDR "face::face" +#define IPV6_NUD_FAILED_ADDR "face::1" +#define IPV6_NUD_STALE_ADDR "face::2" +#define IPV4_IFACE_ADDR "10.0.0.254" +#define IPV4_NUD_FAILED_ADDR "10.0.0.1" +#define IPV4_NUD_STALE_ADDR "10.0.0.2" +#define DMAC "11:11:11:11:11:11" +#define DMAC_INIT { 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, } + +struct fib_lookup_test { + const char *desc; + const char *daddr; + int expected_ret; + int lookup_flags; + __u8 dmac[6]; +}; + +static const struct fib_lookup_test tests[] = { + { .desc = "IPv6 failed neigh", + .daddr = IPV6_NUD_FAILED_ADDR, .expected_ret = BPF_FIB_LKUP_RET_NO_NEIGH, }, + { .desc = "IPv6 stale neigh", + .daddr = IPV6_NUD_STALE_ADDR, .expected_ret = BPF_FIB_LKUP_RET_SUCCESS, + .dmac = DMAC_INIT, }, + { .desc = "IPv6 skip neigh", + .daddr = IPV6_NUD_FAILED_ADDR, .expected_ret = BPF_FIB_LKUP_RET_SUCCESS, + .lookup_flags = BPF_FIB_LOOKUP_SKIP_NEIGH, }, + { .desc = "IPv4 failed neigh", + .daddr = IPV4_NUD_FAILED_ADDR, .expected_ret = BPF_FIB_LKUP_RET_NO_NEIGH, }, + { .desc = "IPv4 stale neigh", + .daddr = IPV4_NUD_STALE_ADDR, .expected_ret = BPF_FIB_LKUP_RET_SUCCESS, + .dmac = DMAC_INIT, }, + { .desc = "IPv4 skip neigh", + .daddr = IPV4_NUD_FAILED_ADDR, .expected_ret = BPF_FIB_LKUP_RET_SUCCESS, + .lookup_flags = BPF_FIB_LOOKUP_SKIP_NEIGH, }, +}; + +static int ifindex; + +static int setup_netns(void) +{ + int err; + + SYS("ip link add veth1 type veth peer name veth2"); + SYS("ip link set dev veth1 up"); + + SYS("ip addr add %s/64 dev veth1 nodad", IPV6_IFACE_ADDR); + SYS("ip neigh add %s dev veth1 nud failed", IPV6_NUD_FAILED_ADDR); + SYS("ip neigh add %s dev veth1 lladdr %s nud stale", IPV6_NUD_STALE_ADDR, DMAC); + + SYS("ip addr add %s/24 dev veth1 nodad", IPV4_IFACE_ADDR); + SYS("ip neigh add %s dev veth1 nud failed", IPV4_NUD_FAILED_ADDR); + SYS("ip neigh add %s dev veth1 lladdr %s nud stale", IPV4_NUD_STALE_ADDR, DMAC); + + err = write_sysctl("/proc/sys/net/ipv4/conf/veth1/forwarding", "1"); + if (!ASSERT_OK(err, "write_sysctl(net.ipv4.conf.veth1.forwarding)")) + goto fail; + + err = write_sysctl("/proc/sys/net/ipv6/conf/veth1/forwarding", "1"); + if (!ASSERT_OK(err, "write_sysctl(net.ipv6.conf.veth1.forwarding)")) + goto fail; + + return 0; +fail: + return -1; +} + +static int set_lookup_params(struct bpf_fib_lookup *params, const char *daddr) +{ + int ret; + + memset(params, 0, sizeof(*params)); + + params->l4_protocol = IPPROTO_TCP; + params->ifindex = ifindex; + + if (inet_pton(AF_INET6, daddr, params->ipv6_dst) == 1) { + params->family = AF_INET6; + ret = inet_pton(AF_INET6, IPV6_IFACE_ADDR, params->ipv6_src); + if (!ASSERT_EQ(ret, 1, "inet_pton(IPV6_IFACE_ADDR)")) + return -1; + return 0; + } + + ret = inet_pton(AF_INET, daddr, ¶ms->ipv4_dst); + if (!ASSERT_EQ(ret, 1, "convert IP[46] address")) + return -1; + params->family = AF_INET; + ret = inet_pton(AF_INET, IPV4_IFACE_ADDR, ¶ms->ipv4_src); + if (!ASSERT_EQ(ret, 1, "inet_pton(IPV4_IFACE_ADDR)")) + return -1; + + return 0; +} + +static void mac_str(char *b, const __u8 *mac) +{ + sprintf(b, "%02X:%02X:%02X:%02X:%02X:%02X", + mac[0], mac[1], mac[2], mac[3], mac[4], mac[5]); +} + +void test_fib_lookup(void) +{ + struct bpf_fib_lookup *fib_params; + struct nstoken *nstoken = NULL; + struct __sk_buff skb = { }; + struct fib_lookup *skel; + int prog_fd, err, ret, i; + + /* The test does not use the skb->data, so + * use pkt_v6 for both v6 and v4 test. + */ + LIBBPF_OPTS(bpf_test_run_opts, run_opts, + .data_in = &pkt_v6, + .data_size_in = sizeof(pkt_v6), + .ctx_in = &skb, + .ctx_size_in = sizeof(skb), + ); + + skel = fib_lookup__open_and_load(); + if (!ASSERT_OK_PTR(skel, "skel open_and_load")) + return; + prog_fd = bpf_program__fd(skel->progs.fib_lookup); + + SYS("ip netns add %s", NS_TEST); + + nstoken = open_netns(NS_TEST); + if (!ASSERT_OK_PTR(nstoken, "open_netns")) + goto fail; + + if (setup_netns()) + goto fail; + + ifindex = if_nametoindex("veth1"); + skb.ifindex = ifindex; + fib_params = &skel->bss->fib_params; + + for (i = 0; i < ARRAY_SIZE(tests); i++) { + printf("Testing %s\n", tests[i].desc); + + if (set_lookup_params(fib_params, tests[i].daddr)) + continue; + skel->bss->fib_lookup_ret = -1; + skel->bss->lookup_flags = BPF_FIB_LOOKUP_OUTPUT | + tests[i].lookup_flags; + + err = bpf_prog_test_run_opts(prog_fd, &run_opts); + if (!ASSERT_OK(err, "bpf_prog_test_run_opts")) + continue; + + ASSERT_EQ(tests[i].expected_ret, skel->bss->fib_lookup_ret, + "fib_lookup_ret"); + + ret = memcmp(tests[i].dmac, fib_params->dmac, sizeof(tests[i].dmac)); + if (!ASSERT_EQ(ret, 0, "dmac not match")) { + char expected[18], actual[18]; + + mac_str(expected, tests[i].dmac); + mac_str(actual, fib_params->dmac); + printf("dmac expected %s actual %s\n", expected, actual); + } + } + +fail: + if (nstoken) + close_netns(nstoken); + system("ip netns del " NS_TEST " &> /dev/null"); + fib_lookup__destroy(skel); +} diff --git a/tools/testing/selftests/bpf/prog_tests/flow_dissector_reattach.c b/tools/testing/selftests/bpf/prog_tests/flow_dissector_reattach.c index 7c79462d2702..9333f7346d15 100644 --- a/tools/testing/selftests/bpf/prog_tests/flow_dissector_reattach.c +++ b/tools/testing/selftests/bpf/prog_tests/flow_dissector_reattach.c @@ -60,9 +60,9 @@ static __u32 query_prog_id(int prog) __u32 info_len = sizeof(info); int err; - err = bpf_obj_get_info_by_fd(prog, &info, &info_len); + err = bpf_prog_get_info_by_fd(prog, &info, &info_len); if (CHECK_FAIL(err || info_len != sizeof(info))) { - perror("bpf_obj_get_info_by_fd"); + perror("bpf_prog_get_info_by_fd"); return 0; } @@ -497,7 +497,7 @@ static void test_link_get_info(int netns, int prog1, int prog2) } info_len = sizeof(info); - err = bpf_obj_get_info_by_fd(link, &info, &info_len); + err = bpf_link_get_info_by_fd(link, &info, &info_len); if (CHECK_FAIL(err)) { perror("bpf_obj_get_info"); goto out_unlink; @@ -521,7 +521,7 @@ static void test_link_get_info(int netns, int prog1, int prog2) link_id = info.id; info_len = sizeof(info); - err = bpf_obj_get_info_by_fd(link, &info, &info_len); + err = bpf_link_get_info_by_fd(link, &info, &info_len); if (CHECK_FAIL(err)) { perror("bpf_obj_get_info"); goto out_unlink; @@ -546,7 +546,7 @@ static void test_link_get_info(int netns, int prog1, int prog2) netns = -1; info_len = sizeof(info); - err = bpf_obj_get_info_by_fd(link, &info, &info_len); + err = bpf_link_get_info_by_fd(link, &info, &info_len); if (CHECK_FAIL(err)) { perror("bpf_obj_get_info"); goto out_unlink; diff --git a/tools/testing/selftests/bpf/prog_tests/hashmap.c b/tools/testing/selftests/bpf/prog_tests/hashmap.c index 4747ab18f97f..d358a223fd2d 100644 --- a/tools/testing/selftests/bpf/prog_tests/hashmap.c +++ b/tools/testing/selftests/bpf/prog_tests/hashmap.c @@ -7,17 +7,18 @@ */ #include "test_progs.h" #include "bpf/hashmap.h" +#include <stddef.h> static int duration = 0; -static size_t hash_fn(const void *k, void *ctx) +static size_t hash_fn(long k, void *ctx) { - return (long)k; + return k; } -static bool equal_fn(const void *a, const void *b, void *ctx) +static bool equal_fn(long a, long b, void *ctx) { - return (long)a == (long)b; + return a == b; } static inline size_t next_pow_2(size_t n) @@ -52,8 +53,8 @@ static void test_hashmap_generic(void) return; for (i = 0; i < ELEM_CNT; i++) { - const void *oldk, *k = (const void *)(long)i; - void *oldv, *v = (void *)(long)(1024 + i); + long oldk, k = i; + long oldv, v = 1024 + i; err = hashmap__update(map, k, v, &oldk, &oldv); if (CHECK(err != -ENOENT, "hashmap__update", @@ -64,20 +65,18 @@ static void test_hashmap_generic(void) err = hashmap__add(map, k, v); } else { err = hashmap__set(map, k, v, &oldk, &oldv); - if (CHECK(oldk != NULL || oldv != NULL, "check_kv", - "unexpected k/v: %p=%p\n", oldk, oldv)) + if (CHECK(oldk != 0 || oldv != 0, "check_kv", + "unexpected k/v: %ld=%ld\n", oldk, oldv)) goto cleanup; } - if (CHECK(err, "elem_add", "failed to add k/v %ld = %ld: %d\n", - (long)k, (long)v, err)) + if (CHECK(err, "elem_add", "failed to add k/v %ld = %ld: %d\n", k, v, err)) goto cleanup; if (CHECK(!hashmap__find(map, k, &oldv), "elem_find", - "failed to find key %ld\n", (long)k)) + "failed to find key %ld\n", k)) goto cleanup; - if (CHECK(oldv != v, "elem_val", - "found value is wrong: %ld\n", (long)oldv)) + if (CHECK(oldv != v, "elem_val", "found value is wrong: %ld\n", oldv)) goto cleanup; } @@ -91,8 +90,8 @@ static void test_hashmap_generic(void) found_msk = 0; hashmap__for_each_entry(map, entry, bkt) { - long k = (long)entry->key; - long v = (long)entry->value; + long k = entry->key; + long v = entry->value; found_msk |= 1ULL << k; if (CHECK(v - k != 1024, "check_kv", @@ -104,8 +103,8 @@ static void test_hashmap_generic(void) goto cleanup; for (i = 0; i < ELEM_CNT; i++) { - const void *oldk, *k = (const void *)(long)i; - void *oldv, *v = (void *)(long)(256 + i); + long oldk, k = i; + long oldv, v = 256 + i; err = hashmap__add(map, k, v); if (CHECK(err != -EEXIST, "hashmap__add", @@ -119,13 +118,13 @@ static void test_hashmap_generic(void) if (CHECK(err, "elem_upd", "failed to update k/v %ld = %ld: %d\n", - (long)k, (long)v, err)) + k, v, err)) goto cleanup; if (CHECK(!hashmap__find(map, k, &oldv), "elem_find", - "failed to find key %ld\n", (long)k)) + "failed to find key %ld\n", k)) goto cleanup; if (CHECK(oldv != v, "elem_val", - "found value is wrong: %ld\n", (long)oldv)) + "found value is wrong: %ld\n", oldv)) goto cleanup; } @@ -139,8 +138,8 @@ static void test_hashmap_generic(void) found_msk = 0; hashmap__for_each_entry_safe(map, entry, tmp, bkt) { - long k = (long)entry->key; - long v = (long)entry->value; + long k = entry->key; + long v = entry->value; found_msk |= 1ULL << k; if (CHECK(v - k != 256, "elem_check", @@ -152,7 +151,7 @@ static void test_hashmap_generic(void) goto cleanup; found_cnt = 0; - hashmap__for_each_key_entry(map, entry, (void *)0) { + hashmap__for_each_key_entry(map, entry, 0) { found_cnt++; } if (CHECK(!found_cnt, "found_cnt", @@ -161,27 +160,25 @@ static void test_hashmap_generic(void) found_msk = 0; found_cnt = 0; - hashmap__for_each_key_entry_safe(map, entry, tmp, (void *)0) { - const void *oldk, *k; - void *oldv, *v; + hashmap__for_each_key_entry_safe(map, entry, tmp, 0) { + long oldk, k; + long oldv, v; k = entry->key; v = entry->value; found_cnt++; - found_msk |= 1ULL << (long)k; + found_msk |= 1ULL << k; if (CHECK(!hashmap__delete(map, k, &oldk, &oldv), "elem_del", - "failed to delete k/v %ld = %ld\n", - (long)k, (long)v)) + "failed to delete k/v %ld = %ld\n", k, v)) goto cleanup; if (CHECK(oldk != k || oldv != v, "check_old", "invalid deleted k/v: expected %ld = %ld, got %ld = %ld\n", - (long)k, (long)v, (long)oldk, (long)oldv)) + k, v, oldk, oldv)) goto cleanup; if (CHECK(hashmap__delete(map, k, &oldk, &oldv), "elem_del", - "unexpectedly deleted k/v %ld = %ld\n", - (long)oldk, (long)oldv)) + "unexpectedly deleted k/v %ld = %ld\n", oldk, oldv)) goto cleanup; } @@ -198,26 +195,24 @@ static void test_hashmap_generic(void) goto cleanup; hashmap__for_each_entry_safe(map, entry, tmp, bkt) { - const void *oldk, *k; - void *oldv, *v; + long oldk, k; + long oldv, v; k = entry->key; v = entry->value; found_cnt++; - found_msk |= 1ULL << (long)k; + found_msk |= 1ULL << k; if (CHECK(!hashmap__delete(map, k, &oldk, &oldv), "elem_del", - "failed to delete k/v %ld = %ld\n", - (long)k, (long)v)) + "failed to delete k/v %ld = %ld\n", k, v)) goto cleanup; if (CHECK(oldk != k || oldv != v, "elem_check", "invalid old k/v: expect %ld = %ld, got %ld = %ld\n", - (long)k, (long)v, (long)oldk, (long)oldv)) + k, v, oldk, oldv)) goto cleanup; if (CHECK(hashmap__delete(map, k, &oldk, &oldv), "elem_del", - "unexpectedly deleted k/v %ld = %ld\n", - (long)k, (long)v)) + "unexpectedly deleted k/v %ld = %ld\n", k, v)) goto cleanup; } @@ -235,7 +230,7 @@ static void test_hashmap_generic(void) hashmap__for_each_entry(map, entry, bkt) { CHECK(false, "elem_exists", "unexpected map entries left: %ld = %ld\n", - (long)entry->key, (long)entry->value); + entry->key, entry->value); goto cleanup; } @@ -243,22 +238,107 @@ static void test_hashmap_generic(void) hashmap__for_each_entry(map, entry, bkt) { CHECK(false, "elem_exists", "unexpected map entries left: %ld = %ld\n", - (long)entry->key, (long)entry->value); + entry->key, entry->value); + goto cleanup; + } + +cleanup: + hashmap__free(map); +} + +static size_t str_hash_fn(long a, void *ctx) +{ + return str_hash((char *)a); +} + +static bool str_equal_fn(long a, long b, void *ctx) +{ + return strcmp((char *)a, (char *)b) == 0; +} + +/* Verify that hashmap interface works with pointer keys and values */ +static void test_hashmap_ptr_iface(void) +{ + const char *key, *value, *old_key, *old_value; + struct hashmap_entry *cur; + struct hashmap *map; + int err, i, bkt; + + map = hashmap__new(str_hash_fn, str_equal_fn, NULL); + if (CHECK(!map, "hashmap__new", "can't allocate hashmap\n")) goto cleanup; + +#define CHECK_STR(fn, var, expected) \ + CHECK(strcmp(var, (expected)), (fn), \ + "wrong value of " #var ": '%s' instead of '%s'\n", var, (expected)) + + err = hashmap__insert(map, "a", "apricot", HASHMAP_ADD, NULL, NULL); + if (CHECK(err, "hashmap__insert", "unexpected error: %d\n", err)) + goto cleanup; + + err = hashmap__insert(map, "a", "apple", HASHMAP_SET, &old_key, &old_value); + if (CHECK(err, "hashmap__insert", "unexpected error: %d\n", err)) + goto cleanup; + CHECK_STR("hashmap__update", old_key, "a"); + CHECK_STR("hashmap__update", old_value, "apricot"); + + err = hashmap__add(map, "b", "banana"); + if (CHECK(err, "hashmap__add", "unexpected error: %d\n", err)) + goto cleanup; + + err = hashmap__set(map, "b", "breadfruit", &old_key, &old_value); + if (CHECK(err, "hashmap__set", "unexpected error: %d\n", err)) + goto cleanup; + CHECK_STR("hashmap__set", old_key, "b"); + CHECK_STR("hashmap__set", old_value, "banana"); + + err = hashmap__update(map, "b", "blueberry", &old_key, &old_value); + if (CHECK(err, "hashmap__update", "unexpected error: %d\n", err)) + goto cleanup; + CHECK_STR("hashmap__update", old_key, "b"); + CHECK_STR("hashmap__update", old_value, "breadfruit"); + + err = hashmap__append(map, "c", "cherry"); + if (CHECK(err, "hashmap__append", "unexpected error: %d\n", err)) + goto cleanup; + + if (CHECK(!hashmap__delete(map, "c", &old_key, &old_value), + "hashmap__delete", "expected to have entry for 'c'\n")) + goto cleanup; + CHECK_STR("hashmap__delete", old_key, "c"); + CHECK_STR("hashmap__delete", old_value, "cherry"); + + CHECK(!hashmap__find(map, "b", &value), "hashmap__find", "can't find value for 'b'\n"); + CHECK_STR("hashmap__find", value, "blueberry"); + + if (CHECK(!hashmap__delete(map, "b", NULL, NULL), + "hashmap__delete", "expected to have entry for 'b'\n")) + goto cleanup; + + i = 0; + hashmap__for_each_entry(map, cur, bkt) { + if (CHECK(i != 0, "hashmap__for_each_entry", "too many entries")) + goto cleanup; + key = cur->pkey; + value = cur->pvalue; + CHECK_STR("entry", key, "a"); + CHECK_STR("entry", value, "apple"); + i++; } +#undef CHECK_STR cleanup: hashmap__free(map); } -static size_t collision_hash_fn(const void *k, void *ctx) +static size_t collision_hash_fn(long k, void *ctx) { return 0; } static void test_hashmap_multimap(void) { - void *k1 = (void *)0, *k2 = (void *)1; + long k1 = 0, k2 = 1; struct hashmap_entry *entry; struct hashmap *map; long found_msk; @@ -273,23 +353,23 @@ static void test_hashmap_multimap(void) * [0] -> 1, 2, 4; * [1] -> 8, 16, 32; */ - err = hashmap__append(map, k1, (void *)1); + err = hashmap__append(map, k1, 1); if (CHECK(err, "elem_add", "failed to add k/v: %d\n", err)) goto cleanup; - err = hashmap__append(map, k1, (void *)2); + err = hashmap__append(map, k1, 2); if (CHECK(err, "elem_add", "failed to add k/v: %d\n", err)) goto cleanup; - err = hashmap__append(map, k1, (void *)4); + err = hashmap__append(map, k1, 4); if (CHECK(err, "elem_add", "failed to add k/v: %d\n", err)) goto cleanup; - err = hashmap__append(map, k2, (void *)8); + err = hashmap__append(map, k2, 8); if (CHECK(err, "elem_add", "failed to add k/v: %d\n", err)) goto cleanup; - err = hashmap__append(map, k2, (void *)16); + err = hashmap__append(map, k2, 16); if (CHECK(err, "elem_add", "failed to add k/v: %d\n", err)) goto cleanup; - err = hashmap__append(map, k2, (void *)32); + err = hashmap__append(map, k2, 32); if (CHECK(err, "elem_add", "failed to add k/v: %d\n", err)) goto cleanup; @@ -300,7 +380,7 @@ static void test_hashmap_multimap(void) /* verify global iteration still works and sees all values */ found_msk = 0; hashmap__for_each_entry(map, entry, bkt) { - found_msk |= (long)entry->value; + found_msk |= entry->value; } if (CHECK(found_msk != (1 << 6) - 1, "found_msk", "not all keys iterated: %lx\n", found_msk)) @@ -309,7 +389,7 @@ static void test_hashmap_multimap(void) /* iterate values for key 1 */ found_msk = 0; hashmap__for_each_key_entry(map, entry, k1) { - found_msk |= (long)entry->value; + found_msk |= entry->value; } if (CHECK(found_msk != (1 | 2 | 4), "found_msk", "invalid k1 values: %lx\n", found_msk)) @@ -318,7 +398,7 @@ static void test_hashmap_multimap(void) /* iterate values for key 2 */ found_msk = 0; hashmap__for_each_key_entry(map, entry, k2) { - found_msk |= (long)entry->value; + found_msk |= entry->value; } if (CHECK(found_msk != (8 | 16 | 32), "found_msk", "invalid k2 values: %lx\n", found_msk)) @@ -333,7 +413,7 @@ static void test_hashmap_empty() struct hashmap_entry *entry; int bkt; struct hashmap *map; - void *k = (void *)0; + long k = 0; /* force collisions */ map = hashmap__new(hash_fn, equal_fn, NULL); @@ -374,4 +454,6 @@ void test_hashmap() test_hashmap_multimap(); if (test__start_subtest("empty")) test_hashmap_empty(); + if (test__start_subtest("ptr_iface")) + test_hashmap_ptr_iface(); } diff --git a/tools/testing/selftests/bpf/prog_tests/htab_reuse.c b/tools/testing/selftests/bpf/prog_tests/htab_reuse.c new file mode 100644 index 000000000000..a742dd994d60 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/htab_reuse.c @@ -0,0 +1,101 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (C) 2023. Huawei Technologies Co., Ltd */ +#define _GNU_SOURCE +#include <sched.h> +#include <stdbool.h> +#include <test_progs.h> +#include "htab_reuse.skel.h" + +struct htab_op_ctx { + int fd; + int loop; + bool stop; +}; + +struct htab_val { + unsigned int lock; + unsigned int data; +}; + +static void *htab_lookup_fn(void *arg) +{ + struct htab_op_ctx *ctx = arg; + int i = 0; + + while (i++ < ctx->loop && !ctx->stop) { + struct htab_val value; + unsigned int key; + + /* Use BPF_F_LOCK to use spin-lock in map value. */ + key = 7; + bpf_map_lookup_elem_flags(ctx->fd, &key, &value, BPF_F_LOCK); + } + + return NULL; +} + +static void *htab_update_fn(void *arg) +{ + struct htab_op_ctx *ctx = arg; + int i = 0; + + while (i++ < ctx->loop && !ctx->stop) { + struct htab_val value; + unsigned int key; + + key = 7; + value.lock = 0; + value.data = key; + bpf_map_update_elem(ctx->fd, &key, &value, BPF_F_LOCK); + bpf_map_delete_elem(ctx->fd, &key); + + key = 24; + value.lock = 0; + value.data = key; + bpf_map_update_elem(ctx->fd, &key, &value, BPF_F_LOCK); + bpf_map_delete_elem(ctx->fd, &key); + } + + return NULL; +} + +void test_htab_reuse(void) +{ + unsigned int i, wr_nr = 1, rd_nr = 4; + pthread_t tids[wr_nr + rd_nr]; + struct htab_reuse *skel; + struct htab_op_ctx ctx; + int err; + + skel = htab_reuse__open_and_load(); + if (!ASSERT_OK_PTR(skel, "htab_reuse__open_and_load")) + return; + + ctx.fd = bpf_map__fd(skel->maps.htab); + ctx.loop = 500; + ctx.stop = false; + + memset(tids, 0, sizeof(tids)); + for (i = 0; i < wr_nr; i++) { + err = pthread_create(&tids[i], NULL, htab_update_fn, &ctx); + if (!ASSERT_OK(err, "pthread_create")) { + ctx.stop = true; + goto reap; + } + } + for (i = 0; i < rd_nr; i++) { + err = pthread_create(&tids[i + wr_nr], NULL, htab_lookup_fn, &ctx); + if (!ASSERT_OK(err, "pthread_create")) { + ctx.stop = true; + goto reap; + } + } + +reap: + for (i = 0; i < wr_nr + rd_nr; i++) { + if (!tids[i]) + continue; + pthread_join(tids[i], NULL); + } + htab_reuse__destroy(skel); +} diff --git a/tools/testing/selftests/bpf/prog_tests/jeq_infer_not_null.c b/tools/testing/selftests/bpf/prog_tests/jeq_infer_not_null.c new file mode 100644 index 000000000000..3add34df5767 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/jeq_infer_not_null.c @@ -0,0 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <test_progs.h> +#include "jeq_infer_not_null_fail.skel.h" + +void test_jeq_infer_not_null(void) +{ + RUN_TESTS(jeq_infer_not_null_fail); +} diff --git a/tools/testing/selftests/bpf/prog_tests/jit_probe_mem.c b/tools/testing/selftests/bpf/prog_tests/jit_probe_mem.c new file mode 100644 index 000000000000..5639428607e6 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/jit_probe_mem.c @@ -0,0 +1,28 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */ +#include <test_progs.h> +#include <network_helpers.h> + +#include "jit_probe_mem.skel.h" + +void test_jit_probe_mem(void) +{ + LIBBPF_OPTS(bpf_test_run_opts, opts, + .data_in = &pkt_v4, + .data_size_in = sizeof(pkt_v4), + .repeat = 1, + ); + struct jit_probe_mem *skel; + int ret; + + skel = jit_probe_mem__open_and_load(); + if (!ASSERT_OK_PTR(skel, "jit_probe_mem__open_and_load")) + return; + + ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.test_jit_probe_mem), &opts); + ASSERT_OK(ret, "jit_probe_mem ret"); + ASSERT_OK(opts.retval, "jit_probe_mem opts.retval"); + ASSERT_EQ(skel->data->total_sum, 192, "jit_probe_mem total_sum"); + + jit_probe_mem__destroy(skel); +} diff --git a/tools/testing/selftests/bpf/prog_tests/kfree_skb.c b/tools/testing/selftests/bpf/prog_tests/kfree_skb.c index 73579370bfbd..c07991544a78 100644 --- a/tools/testing/selftests/bpf/prog_tests/kfree_skb.c +++ b/tools/testing/selftests/bpf/prog_tests/kfree_skb.c @@ -36,7 +36,7 @@ static void on_sample(void *ctx, int cpu, void *data, __u32 size) "cb32_0 %x != %x\n", meta->cb32_0, cb.cb32[0])) return; - if (CHECK(pkt_v6->eth.h_proto != 0xdd86, "check_eth", + if (CHECK(pkt_v6->eth.h_proto != htons(ETH_P_IPV6), "check_eth", "h_proto %x\n", pkt_v6->eth.h_proto)) return; if (CHECK(pkt_v6->iph.nexthdr != 6, "check_ip", diff --git a/tools/testing/selftests/bpf/prog_tests/kfunc_call.c b/tools/testing/selftests/bpf/prog_tests/kfunc_call.c index 5af1ee8f0e6e..a543742cd7bd 100644 --- a/tools/testing/selftests/bpf/prog_tests/kfunc_call.c +++ b/tools/testing/selftests/bpf/prog_tests/kfunc_call.c @@ -72,10 +72,12 @@ static struct kfunc_test_params kfunc_tests[] = { /* success cases */ TC_TEST(kfunc_call_test1, 12), TC_TEST(kfunc_call_test2, 3), + TC_TEST(kfunc_call_test4, -1234), TC_TEST(kfunc_call_test_ref_btf_id, 0), TC_TEST(kfunc_call_test_get_mem, 42), SYSCALL_TEST(kfunc_syscall_test, 0), SYSCALL_NULL_CTX_TEST(kfunc_syscall_test_null, 0), + TC_TEST(kfunc_call_test_static_unused_arg, 0), }; struct syscall_test_args { diff --git a/tools/testing/selftests/bpf/prog_tests/kfunc_dynptr_param.c b/tools/testing/selftests/bpf/prog_tests/kfunc_dynptr_param.c index c210657d4d0a..8cd298b78e44 100644 --- a/tools/testing/selftests/bpf/prog_tests/kfunc_dynptr_param.c +++ b/tools/testing/selftests/bpf/prog_tests/kfunc_dynptr_param.c @@ -10,20 +10,11 @@ #include <test_progs.h> #include "test_kfunc_dynptr_param.skel.h" -static size_t log_buf_sz = 1048576; /* 1 MB */ -static char obj_log_buf[1048576]; - static struct { const char *prog_name; - const char *expected_verifier_err_msg; int expected_runtime_err; } kfunc_dynptr_tests[] = { - {"dynptr_type_not_supp", - "arg#0 pointer type STRUCT bpf_dynptr_kern points to unsupported dynamic pointer type", 0}, - {"not_valid_dynptr", - "arg#0 pointer type STRUCT bpf_dynptr_kern must be valid and initialized", 0}, - {"not_ptr_to_stack", "arg#0 pointer type STRUCT bpf_dynptr_kern not to stack", 0}, - {"dynptr_data_null", NULL, -EBADMSG}, + {"dynptr_data_null", -EBADMSG}, }; static bool kfunc_not_supported; @@ -41,29 +32,15 @@ static int libbpf_print_cb(enum libbpf_print_level level, const char *fmt, return 0; } -static void verify_fail(const char *prog_name, const char *expected_err_msg) +static bool has_pkcs7_kfunc_support(void) { struct test_kfunc_dynptr_param *skel; - LIBBPF_OPTS(bpf_object_open_opts, opts); libbpf_print_fn_t old_print_cb; - struct bpf_program *prog; int err; - opts.kernel_log_buf = obj_log_buf; - opts.kernel_log_size = log_buf_sz; - opts.kernel_log_level = 1; - - skel = test_kfunc_dynptr_param__open_opts(&opts); - if (!ASSERT_OK_PTR(skel, "test_kfunc_dynptr_param__open_opts")) - goto cleanup; - - prog = bpf_object__find_program_by_name(skel->obj, prog_name); - if (!ASSERT_OK_PTR(prog, "bpf_object__find_program_by_name")) - goto cleanup; - - bpf_program__set_autoload(prog, true); - - bpf_map__set_max_entries(skel->maps.ringbuf, getpagesize()); + skel = test_kfunc_dynptr_param__open(); + if (!ASSERT_OK_PTR(skel, "test_kfunc_dynptr_param__open")) + return false; kfunc_not_supported = false; @@ -75,26 +52,18 @@ static void verify_fail(const char *prog_name, const char *expected_err_msg) fprintf(stderr, "%s:SKIP:bpf_verify_pkcs7_signature() kfunc not supported\n", __func__); - test__skip(); - goto cleanup; - } - - if (!ASSERT_ERR(err, "unexpected load success")) - goto cleanup; - - if (!ASSERT_OK_PTR(strstr(obj_log_buf, expected_err_msg), "expected_err_msg")) { - fprintf(stderr, "Expected err_msg: %s\n", expected_err_msg); - fprintf(stderr, "Verifier output: %s\n", obj_log_buf); + test_kfunc_dynptr_param__destroy(skel); + return false; } -cleanup: test_kfunc_dynptr_param__destroy(skel); + + return true; } static void verify_success(const char *prog_name, int expected_runtime_err) { struct test_kfunc_dynptr_param *skel; - libbpf_print_fn_t old_print_cb; struct bpf_program *prog; struct bpf_link *link; __u32 next_id; @@ -106,21 +75,7 @@ static void verify_success(const char *prog_name, int expected_runtime_err) skel->bss->pid = getpid(); - bpf_map__set_max_entries(skel->maps.ringbuf, getpagesize()); - - kfunc_not_supported = false; - - old_print_cb = libbpf_set_print(libbpf_print_cb); err = test_kfunc_dynptr_param__load(skel); - libbpf_set_print(old_print_cb); - - if (err < 0 && kfunc_not_supported) { - fprintf(stderr, - "%s:SKIP:bpf_verify_pkcs7_signature() kfunc not supported\n", - __func__); - test__skip(); - goto cleanup; - } if (!ASSERT_OK(err, "test_kfunc_dynptr_param__load")) goto cleanup; @@ -150,15 +105,15 @@ void test_kfunc_dynptr_param(void) { int i; + if (!has_pkcs7_kfunc_support()) + return; + for (i = 0; i < ARRAY_SIZE(kfunc_dynptr_tests); i++) { if (!test__start_subtest(kfunc_dynptr_tests[i].prog_name)) continue; - if (kfunc_dynptr_tests[i].expected_verifier_err_msg) - verify_fail(kfunc_dynptr_tests[i].prog_name, - kfunc_dynptr_tests[i].expected_verifier_err_msg); - else - verify_success(kfunc_dynptr_tests[i].prog_name, - kfunc_dynptr_tests[i].expected_runtime_err); + verify_success(kfunc_dynptr_tests[i].prog_name, + kfunc_dynptr_tests[i].expected_runtime_err); } + RUN_TESTS(test_kfunc_dynptr_param); } diff --git a/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c b/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c index a4b4133d39e9..113dba349a57 100644 --- a/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c +++ b/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c @@ -312,20 +312,20 @@ static inline __u64 get_time_ns(void) return (__u64) t.tv_sec * 1000000000 + t.tv_nsec; } -static size_t symbol_hash(const void *key, void *ctx __maybe_unused) +static size_t symbol_hash(long key, void *ctx __maybe_unused) { return str_hash((const char *) key); } -static bool symbol_equal(const void *key1, const void *key2, void *ctx __maybe_unused) +static bool symbol_equal(long key1, long key2, void *ctx __maybe_unused) { return strcmp((const char *) key1, (const char *) key2) == 0; } -static int get_syms(char ***symsp, size_t *cntp) +static int get_syms(char ***symsp, size_t *cntp, bool kernel) { size_t cap = 0, cnt = 0, i; - char *name, **syms = NULL; + char *name = NULL, **syms = NULL; struct hashmap *map; char buf[256]; FILE *f; @@ -349,9 +349,12 @@ static int get_syms(char ***symsp, size_t *cntp) } while (fgets(buf, sizeof(buf), f)) { - /* skip modules */ - if (strchr(buf, '[')) + if (kernel && strchr(buf, '[')) continue; + if (!kernel && !strchr(buf, '[')) + continue; + + free(name); if (sscanf(buf, "%ms$*[^\n]\n", &name) != 1) continue; /* @@ -371,38 +374,38 @@ static int get_syms(char ***symsp, size_t *cntp) if (!strncmp(name, "__ftrace_invalid_address__", sizeof("__ftrace_invalid_address__") - 1)) continue; - err = hashmap__add(map, name, NULL); - if (err) { - free(name); - if (err == -EEXIST) - continue; + + err = hashmap__add(map, name, 0); + if (err == -EEXIST) + continue; + if (err) goto error; - } + err = libbpf_ensure_mem((void **) &syms, &cap, sizeof(*syms), cnt + 1); - if (err) { - free(name); + if (err) goto error; - } - syms[cnt] = name; - cnt++; + + syms[cnt++] = name; + name = NULL; } *symsp = syms; *cntp = cnt; error: + free(name); fclose(f); hashmap__free(map); if (err) { for (i = 0; i < cnt; i++) - free(syms[cnt]); + free(syms[i]); free(syms); } return err; } -void serial_test_kprobe_multi_bench_attach(void) +static void test_kprobe_multi_bench_attach(bool kernel) { LIBBPF_OPTS(bpf_kprobe_multi_opts, opts); struct kprobe_multi_empty *skel = NULL; @@ -413,7 +416,7 @@ void serial_test_kprobe_multi_bench_attach(void) char **syms = NULL; size_t cnt = 0, i; - if (!ASSERT_OK(get_syms(&syms, &cnt), "get_syms")) + if (!ASSERT_OK(get_syms(&syms, &cnt, kernel), "get_syms")) return; skel = kprobe_multi_empty__open_and_load(); @@ -451,6 +454,14 @@ cleanup: } } +void serial_test_kprobe_multi_bench_attach(void) +{ + if (test__start_subtest("kernel")) + test_kprobe_multi_bench_attach(true); + if (test__start_subtest("modules")) + test_kprobe_multi_bench_attach(false); +} + void test_kprobe_multi_test(void) { if (!ASSERT_OK(load_kallsyms(), "load_kallsyms")) diff --git a/tools/testing/selftests/bpf/prog_tests/kprobe_multi_testmod_test.c b/tools/testing/selftests/bpf/prog_tests/kprobe_multi_testmod_test.c new file mode 100644 index 000000000000..1fbe7e4ac00a --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/kprobe_multi_testmod_test.c @@ -0,0 +1,89 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <test_progs.h> +#include "kprobe_multi.skel.h" +#include "trace_helpers.h" +#include "bpf/libbpf_internal.h" + +static void kprobe_multi_testmod_check(struct kprobe_multi *skel) +{ + ASSERT_EQ(skel->bss->kprobe_testmod_test1_result, 1, "kprobe_test1_result"); + ASSERT_EQ(skel->bss->kprobe_testmod_test2_result, 1, "kprobe_test2_result"); + ASSERT_EQ(skel->bss->kprobe_testmod_test3_result, 1, "kprobe_test3_result"); + + ASSERT_EQ(skel->bss->kretprobe_testmod_test1_result, 1, "kretprobe_test1_result"); + ASSERT_EQ(skel->bss->kretprobe_testmod_test2_result, 1, "kretprobe_test2_result"); + ASSERT_EQ(skel->bss->kretprobe_testmod_test3_result, 1, "kretprobe_test3_result"); +} + +static void test_testmod_attach_api(struct bpf_kprobe_multi_opts *opts) +{ + struct kprobe_multi *skel = NULL; + + skel = kprobe_multi__open_and_load(); + if (!ASSERT_OK_PTR(skel, "fentry_raw_skel_load")) + return; + + skel->bss->pid = getpid(); + + skel->links.test_kprobe_testmod = bpf_program__attach_kprobe_multi_opts( + skel->progs.test_kprobe_testmod, + NULL, opts); + if (!skel->links.test_kprobe_testmod) + goto cleanup; + + opts->retprobe = true; + skel->links.test_kretprobe_testmod = bpf_program__attach_kprobe_multi_opts( + skel->progs.test_kretprobe_testmod, + NULL, opts); + if (!skel->links.test_kretprobe_testmod) + goto cleanup; + + ASSERT_OK(trigger_module_test_read(1), "trigger_read"); + kprobe_multi_testmod_check(skel); + +cleanup: + kprobe_multi__destroy(skel); +} + +static void test_testmod_attach_api_addrs(void) +{ + LIBBPF_OPTS(bpf_kprobe_multi_opts, opts); + unsigned long long addrs[3]; + + addrs[0] = ksym_get_addr("bpf_testmod_fentry_test1"); + ASSERT_NEQ(addrs[0], 0, "ksym_get_addr"); + addrs[1] = ksym_get_addr("bpf_testmod_fentry_test2"); + ASSERT_NEQ(addrs[1], 0, "ksym_get_addr"); + addrs[2] = ksym_get_addr("bpf_testmod_fentry_test3"); + ASSERT_NEQ(addrs[2], 0, "ksym_get_addr"); + + opts.addrs = (const unsigned long *) addrs; + opts.cnt = ARRAY_SIZE(addrs); + + test_testmod_attach_api(&opts); +} + +static void test_testmod_attach_api_syms(void) +{ + LIBBPF_OPTS(bpf_kprobe_multi_opts, opts); + const char *syms[3] = { + "bpf_testmod_fentry_test1", + "bpf_testmod_fentry_test2", + "bpf_testmod_fentry_test3", + }; + + opts.syms = syms; + opts.cnt = ARRAY_SIZE(syms); + test_testmod_attach_api(&opts); +} + +void serial_test_kprobe_multi_testmod_test(void) +{ + if (!ASSERT_OK(load_kallsyms_refresh(), "load_kallsyms_refresh")) + return; + + if (test__start_subtest("testmod_attach_api_syms")) + test_testmod_attach_api_syms(); + if (test__start_subtest("testmod_attach_api_addrs")) + test_testmod_attach_api_addrs(); +} diff --git a/tools/testing/selftests/bpf/prog_tests/libbpf_get_fd_by_id_opts.c b/tools/testing/selftests/bpf/prog_tests/libbpf_get_fd_by_id_opts.c new file mode 100644 index 000000000000..a3f238f51d05 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/libbpf_get_fd_by_id_opts.c @@ -0,0 +1,87 @@ +// SPDX-License-Identifier: GPL-2.0 + +/* + * Copyright (C) 2022 Huawei Technologies Duesseldorf GmbH + * + * Author: Roberto Sassu <roberto.sassu@huawei.com> + */ + +#include <test_progs.h> + +#include "test_libbpf_get_fd_by_id_opts.skel.h" + +void test_libbpf_get_fd_by_id_opts(void) +{ + struct test_libbpf_get_fd_by_id_opts *skel; + struct bpf_map_info info_m = {}; + __u32 len = sizeof(info_m), value; + int ret, zero = 0, fd = -1; + LIBBPF_OPTS(bpf_get_fd_by_id_opts, fd_opts_rdonly, + .open_flags = BPF_F_RDONLY, + ); + + skel = test_libbpf_get_fd_by_id_opts__open_and_load(); + if (!ASSERT_OK_PTR(skel, + "test_libbpf_get_fd_by_id_opts__open_and_load")) + return; + + ret = test_libbpf_get_fd_by_id_opts__attach(skel); + if (!ASSERT_OK(ret, "test_libbpf_get_fd_by_id_opts__attach")) + goto close_prog; + + ret = bpf_map_get_info_by_fd(bpf_map__fd(skel->maps.data_input), + &info_m, &len); + if (!ASSERT_OK(ret, "bpf_map_get_info_by_fd")) + goto close_prog; + + fd = bpf_map_get_fd_by_id(info_m.id); + if (!ASSERT_LT(fd, 0, "bpf_map_get_fd_by_id")) + goto close_prog; + + fd = bpf_map_get_fd_by_id_opts(info_m.id, NULL); + if (!ASSERT_LT(fd, 0, "bpf_map_get_fd_by_id_opts")) + goto close_prog; + + fd = bpf_map_get_fd_by_id_opts(info_m.id, &fd_opts_rdonly); + if (!ASSERT_GE(fd, 0, "bpf_map_get_fd_by_id_opts")) + goto close_prog; + + /* Map lookup should work with read-only fd. */ + ret = bpf_map_lookup_elem(fd, &zero, &value); + if (!ASSERT_OK(ret, "bpf_map_lookup_elem")) + goto close_prog; + + if (!ASSERT_EQ(value, 0, "map value mismatch")) + goto close_prog; + + /* Map update should not work with read-only fd. */ + ret = bpf_map_update_elem(fd, &zero, &len, BPF_ANY); + if (!ASSERT_LT(ret, 0, "bpf_map_update_elem")) + goto close_prog; + + /* Map update should work with read-write fd. */ + ret = bpf_map_update_elem(bpf_map__fd(skel->maps.data_input), &zero, + &len, BPF_ANY); + if (!ASSERT_OK(ret, "bpf_map_update_elem")) + goto close_prog; + + /* Prog get fd with opts set should not work (no kernel support). */ + ret = bpf_prog_get_fd_by_id_opts(0, &fd_opts_rdonly); + if (!ASSERT_EQ(ret, -EINVAL, "bpf_prog_get_fd_by_id_opts")) + goto close_prog; + + /* Link get fd with opts set should not work (no kernel support). */ + ret = bpf_link_get_fd_by_id_opts(0, &fd_opts_rdonly); + if (!ASSERT_EQ(ret, -EINVAL, "bpf_link_get_fd_by_id_opts")) + goto close_prog; + + /* BTF get fd with opts set should not work (no kernel support). */ + ret = bpf_btf_get_fd_by_id_opts(0, &fd_opts_rdonly); + ASSERT_EQ(ret, -EINVAL, "bpf_btf_get_fd_by_id_opts"); + +close_prog: + if (fd >= 0) + close(fd); + + test_libbpf_get_fd_by_id_opts__destroy(skel); +} diff --git a/tools/testing/selftests/bpf/prog_tests/libbpf_str.c b/tools/testing/selftests/bpf/prog_tests/libbpf_str.c index 93e9cddaadcf..efb8bd43653c 100644 --- a/tools/testing/selftests/bpf/prog_tests/libbpf_str.c +++ b/tools/testing/selftests/bpf/prog_tests/libbpf_str.c @@ -139,6 +139,14 @@ static void test_libbpf_bpf_map_type_str(void) snprintf(buf, sizeof(buf), "BPF_MAP_TYPE_%s", map_type_str); uppercase(buf); + /* Special case for map_type_name BPF_MAP_TYPE_CGROUP_STORAGE_DEPRECATED + * where it and BPF_MAP_TYPE_CGROUP_STORAGE have the same enum value + * (map_type). For this enum value, libbpf_bpf_map_type_str() picks + * BPF_MAP_TYPE_CGROUP_STORAGE. + */ + if (strcmp(map_type_name, "BPF_MAP_TYPE_CGROUP_STORAGE_DEPRECATED") == 0) + continue; + ASSERT_STREQ(buf, map_type_name, "exp_str_value"); } diff --git a/tools/testing/selftests/bpf/prog_tests/linked_list.c b/tools/testing/selftests/bpf/prog_tests/linked_list.c new file mode 100644 index 000000000000..0ed8132ce1c3 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/linked_list.c @@ -0,0 +1,775 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <bpf/btf.h> +#include <test_btf.h> +#include <linux/btf.h> +#include <test_progs.h> +#include <network_helpers.h> + +#include "linked_list.skel.h" +#include "linked_list_fail.skel.h" + +static char log_buf[1024 * 1024]; + +static struct { + const char *prog_name; + const char *err_msg; +} linked_list_fail_tests[] = { +#define TEST(test, off) \ + { #test "_missing_lock_push_front", \ + "bpf_spin_lock at off=" #off " must be held for bpf_list_head" }, \ + { #test "_missing_lock_push_back", \ + "bpf_spin_lock at off=" #off " must be held for bpf_list_head" }, \ + { #test "_missing_lock_pop_front", \ + "bpf_spin_lock at off=" #off " must be held for bpf_list_head" }, \ + { #test "_missing_lock_pop_back", \ + "bpf_spin_lock at off=" #off " must be held for bpf_list_head" }, + TEST(kptr, 32) + TEST(global, 16) + TEST(map, 0) + TEST(inner_map, 0) +#undef TEST +#define TEST(test, op) \ + { #test "_kptr_incorrect_lock_" #op, \ + "held lock and object are not in the same allocation\n" \ + "bpf_spin_lock at off=32 must be held for bpf_list_head" }, \ + { #test "_global_incorrect_lock_" #op, \ + "held lock and object are not in the same allocation\n" \ + "bpf_spin_lock at off=16 must be held for bpf_list_head" }, \ + { #test "_map_incorrect_lock_" #op, \ + "held lock and object are not in the same allocation\n" \ + "bpf_spin_lock at off=0 must be held for bpf_list_head" }, \ + { #test "_inner_map_incorrect_lock_" #op, \ + "held lock and object are not in the same allocation\n" \ + "bpf_spin_lock at off=0 must be held for bpf_list_head" }, + TEST(kptr, push_front) + TEST(kptr, push_back) + TEST(kptr, pop_front) + TEST(kptr, pop_back) + TEST(global, push_front) + TEST(global, push_back) + TEST(global, pop_front) + TEST(global, pop_back) + TEST(map, push_front) + TEST(map, push_back) + TEST(map, pop_front) + TEST(map, pop_back) + TEST(inner_map, push_front) + TEST(inner_map, push_back) + TEST(inner_map, pop_front) + TEST(inner_map, pop_back) +#undef TEST + { "map_compat_kprobe", "tracing progs cannot use bpf_{list_head,rb_root} yet" }, + { "map_compat_kretprobe", "tracing progs cannot use bpf_{list_head,rb_root} yet" }, + { "map_compat_tp", "tracing progs cannot use bpf_{list_head,rb_root} yet" }, + { "map_compat_perf", "tracing progs cannot use bpf_{list_head,rb_root} yet" }, + { "map_compat_raw_tp", "tracing progs cannot use bpf_{list_head,rb_root} yet" }, + { "map_compat_raw_tp_w", "tracing progs cannot use bpf_{list_head,rb_root} yet" }, + { "obj_type_id_oor", "local type ID argument must be in range [0, U32_MAX]" }, + { "obj_new_no_composite", "bpf_obj_new type ID argument must be of a struct" }, + { "obj_new_no_struct", "bpf_obj_new type ID argument must be of a struct" }, + { "obj_drop_non_zero_off", "R1 must have zero offset when passed to release func" }, + { "new_null_ret", "R0 invalid mem access 'ptr_or_null_'" }, + { "obj_new_acq", "Unreleased reference id=" }, + { "use_after_drop", "invalid mem access 'scalar'" }, + { "ptr_walk_scalar", "type=scalar expected=percpu_ptr_" }, + { "direct_read_lock", "direct access to bpf_spin_lock is disallowed" }, + { "direct_write_lock", "direct access to bpf_spin_lock is disallowed" }, + { "direct_read_head", "direct access to bpf_list_head is disallowed" }, + { "direct_write_head", "direct access to bpf_list_head is disallowed" }, + { "direct_read_node", "direct access to bpf_list_node is disallowed" }, + { "direct_write_node", "direct access to bpf_list_node is disallowed" }, + { "use_after_unlock_push_front", "invalid mem access 'scalar'" }, + { "use_after_unlock_push_back", "invalid mem access 'scalar'" }, + { "double_push_front", "arg#1 expected pointer to allocated object" }, + { "double_push_back", "arg#1 expected pointer to allocated object" }, + { "no_node_value_type", "bpf_list_node not found at offset=0" }, + { "incorrect_value_type", + "operation on bpf_list_head expects arg#1 bpf_list_node at offset=0 in struct foo, " + "but arg is at offset=0 in struct bar" }, + { "incorrect_node_var_off", "variable ptr_ access var_off=(0x0; 0xffffffff) disallowed" }, + { "incorrect_node_off1", "bpf_list_node not found at offset=1" }, + { "incorrect_node_off2", "arg#1 offset=40, but expected bpf_list_node at offset=0 in struct foo" }, + { "no_head_type", "bpf_list_head not found at offset=0" }, + { "incorrect_head_var_off1", "R1 doesn't have constant offset" }, + { "incorrect_head_var_off2", "variable ptr_ access var_off=(0x0; 0xffffffff) disallowed" }, + { "incorrect_head_off1", "bpf_list_head not found at offset=17" }, + { "incorrect_head_off2", "bpf_list_head not found at offset=1" }, + { "pop_front_off", + "15: (bf) r1 = r6 ; R1_w=ptr_or_null_foo(id=4,ref_obj_id=4,off=40,imm=0) " + "R6_w=ptr_or_null_foo(id=4,ref_obj_id=4,off=40,imm=0) refs=2,4\n" + "16: (85) call bpf_this_cpu_ptr#154\nR1 type=ptr_or_null_ expected=percpu_ptr_" }, + { "pop_back_off", + "15: (bf) r1 = r6 ; R1_w=ptr_or_null_foo(id=4,ref_obj_id=4,off=40,imm=0) " + "R6_w=ptr_or_null_foo(id=4,ref_obj_id=4,off=40,imm=0) refs=2,4\n" + "16: (85) call bpf_this_cpu_ptr#154\nR1 type=ptr_or_null_ expected=percpu_ptr_" }, +}; + +static void test_linked_list_fail_prog(const char *prog_name, const char *err_msg) +{ + LIBBPF_OPTS(bpf_object_open_opts, opts, .kernel_log_buf = log_buf, + .kernel_log_size = sizeof(log_buf), + .kernel_log_level = 1); + struct linked_list_fail *skel; + struct bpf_program *prog; + int ret; + + skel = linked_list_fail__open_opts(&opts); + if (!ASSERT_OK_PTR(skel, "linked_list_fail__open_opts")) + return; + + prog = bpf_object__find_program_by_name(skel->obj, prog_name); + if (!ASSERT_OK_PTR(prog, "bpf_object__find_program_by_name")) + goto end; + + bpf_program__set_autoload(prog, true); + + ret = linked_list_fail__load(skel); + if (!ASSERT_ERR(ret, "linked_list_fail__load must fail")) + goto end; + + if (!ASSERT_OK_PTR(strstr(log_buf, err_msg), "expected error message")) { + fprintf(stderr, "Expected: %s\n", err_msg); + fprintf(stderr, "Verifier: %s\n", log_buf); + } + +end: + linked_list_fail__destroy(skel); +} + +static void clear_fields(struct bpf_map *map) +{ + char buf[24]; + int key = 0; + + memset(buf, 0xff, sizeof(buf)); + ASSERT_OK(bpf_map__update_elem(map, &key, sizeof(key), buf, sizeof(buf), 0), "check_and_free_fields"); +} + +enum { + TEST_ALL, + PUSH_POP, + PUSH_POP_MULT, + LIST_IN_LIST, +}; + +static void test_linked_list_success(int mode, bool leave_in_map) +{ + LIBBPF_OPTS(bpf_test_run_opts, opts, + .data_in = &pkt_v4, + .data_size_in = sizeof(pkt_v4), + .repeat = 1, + ); + struct linked_list *skel; + int ret; + + skel = linked_list__open_and_load(); + if (!ASSERT_OK_PTR(skel, "linked_list__open_and_load")) + return; + + if (mode == LIST_IN_LIST) + goto lil; + if (mode == PUSH_POP_MULT) + goto ppm; + + ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.map_list_push_pop), &opts); + ASSERT_OK(ret, "map_list_push_pop"); + ASSERT_OK(opts.retval, "map_list_push_pop retval"); + if (!leave_in_map) + clear_fields(skel->maps.array_map); + + ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.inner_map_list_push_pop), &opts); + ASSERT_OK(ret, "inner_map_list_push_pop"); + ASSERT_OK(opts.retval, "inner_map_list_push_pop retval"); + if (!leave_in_map) + clear_fields(skel->maps.inner_map); + + ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.global_list_push_pop), &opts); + ASSERT_OK(ret, "global_list_push_pop"); + ASSERT_OK(opts.retval, "global_list_push_pop retval"); + if (!leave_in_map) + clear_fields(skel->maps.bss_A); + + if (mode == PUSH_POP) + goto end; + +ppm: + ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.map_list_push_pop_multiple), &opts); + ASSERT_OK(ret, "map_list_push_pop_multiple"); + ASSERT_OK(opts.retval, "map_list_push_pop_multiple retval"); + if (!leave_in_map) + clear_fields(skel->maps.array_map); + + ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.inner_map_list_push_pop_multiple), &opts); + ASSERT_OK(ret, "inner_map_list_push_pop_multiple"); + ASSERT_OK(opts.retval, "inner_map_list_push_pop_multiple retval"); + if (!leave_in_map) + clear_fields(skel->maps.inner_map); + + ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.global_list_push_pop_multiple), &opts); + ASSERT_OK(ret, "global_list_push_pop_multiple"); + ASSERT_OK(opts.retval, "global_list_push_pop_multiple retval"); + if (!leave_in_map) + clear_fields(skel->maps.bss_A); + + if (mode == PUSH_POP_MULT) + goto end; + +lil: + ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.map_list_in_list), &opts); + ASSERT_OK(ret, "map_list_in_list"); + ASSERT_OK(opts.retval, "map_list_in_list retval"); + if (!leave_in_map) + clear_fields(skel->maps.array_map); + + ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.inner_map_list_in_list), &opts); + ASSERT_OK(ret, "inner_map_list_in_list"); + ASSERT_OK(opts.retval, "inner_map_list_in_list retval"); + if (!leave_in_map) + clear_fields(skel->maps.inner_map); + + ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.global_list_in_list), &opts); + ASSERT_OK(ret, "global_list_in_list"); + ASSERT_OK(opts.retval, "global_list_in_list retval"); + if (!leave_in_map) + clear_fields(skel->maps.bss_A); +end: + linked_list__destroy(skel); +} + +#define SPIN_LOCK 2 +#define LIST_HEAD 3 +#define LIST_NODE 4 + +static struct btf *init_btf(void) +{ + int id, lid, hid, nid; + struct btf *btf; + + btf = btf__new_empty(); + if (!ASSERT_OK_PTR(btf, "btf__new_empty")) + return NULL; + id = btf__add_int(btf, "int", 4, BTF_INT_SIGNED); + if (!ASSERT_EQ(id, 1, "btf__add_int")) + goto end; + lid = btf__add_struct(btf, "bpf_spin_lock", 4); + if (!ASSERT_EQ(lid, SPIN_LOCK, "btf__add_struct bpf_spin_lock")) + goto end; + hid = btf__add_struct(btf, "bpf_list_head", 16); + if (!ASSERT_EQ(hid, LIST_HEAD, "btf__add_struct bpf_list_head")) + goto end; + nid = btf__add_struct(btf, "bpf_list_node", 16); + if (!ASSERT_EQ(nid, LIST_NODE, "btf__add_struct bpf_list_node")) + goto end; + return btf; +end: + btf__free(btf); + return NULL; +} + +static void test_btf(void) +{ + struct btf *btf = NULL; + int id, err; + + while (test__start_subtest("btf: too many locks")) { + btf = init_btf(); + if (!ASSERT_OK_PTR(btf, "init_btf")) + break; + id = btf__add_struct(btf, "foo", 24); + if (!ASSERT_EQ(id, 5, "btf__add_struct foo")) + break; + err = btf__add_field(btf, "a", SPIN_LOCK, 0, 0); + if (!ASSERT_OK(err, "btf__add_struct foo::a")) + break; + err = btf__add_field(btf, "b", SPIN_LOCK, 32, 0); + if (!ASSERT_OK(err, "btf__add_struct foo::a")) + break; + err = btf__add_field(btf, "c", LIST_HEAD, 64, 0); + if (!ASSERT_OK(err, "btf__add_struct foo::a")) + break; + + err = btf__load_into_kernel(btf); + ASSERT_EQ(err, -E2BIG, "check btf"); + btf__free(btf); + break; + } + + while (test__start_subtest("btf: missing lock")) { + btf = init_btf(); + if (!ASSERT_OK_PTR(btf, "init_btf")) + break; + id = btf__add_struct(btf, "foo", 16); + if (!ASSERT_EQ(id, 5, "btf__add_struct foo")) + break; + err = btf__add_field(btf, "a", LIST_HEAD, 0, 0); + if (!ASSERT_OK(err, "btf__add_struct foo::a")) + break; + id = btf__add_decl_tag(btf, "contains:baz:a", 5, 0); + if (!ASSERT_EQ(id, 6, "btf__add_decl_tag contains:baz:a")) + break; + id = btf__add_struct(btf, "baz", 16); + if (!ASSERT_EQ(id, 7, "btf__add_struct baz")) + break; + err = btf__add_field(btf, "a", LIST_NODE, 0, 0); + if (!ASSERT_OK(err, "btf__add_field baz::a")) + break; + + err = btf__load_into_kernel(btf); + ASSERT_EQ(err, -EINVAL, "check btf"); + btf__free(btf); + break; + } + + while (test__start_subtest("btf: bad offset")) { + btf = init_btf(); + if (!ASSERT_OK_PTR(btf, "init_btf")) + break; + id = btf__add_struct(btf, "foo", 36); + if (!ASSERT_EQ(id, 5, "btf__add_struct foo")) + break; + err = btf__add_field(btf, "a", LIST_HEAD, 0, 0); + if (!ASSERT_OK(err, "btf__add_field foo::a")) + break; + err = btf__add_field(btf, "b", LIST_NODE, 0, 0); + if (!ASSERT_OK(err, "btf__add_field foo::b")) + break; + err = btf__add_field(btf, "c", SPIN_LOCK, 0, 0); + if (!ASSERT_OK(err, "btf__add_field foo::c")) + break; + id = btf__add_decl_tag(btf, "contains:foo:b", 5, 0); + if (!ASSERT_EQ(id, 6, "btf__add_decl_tag contains:foo:b")) + break; + + err = btf__load_into_kernel(btf); + ASSERT_EQ(err, -EEXIST, "check btf"); + btf__free(btf); + break; + } + + while (test__start_subtest("btf: missing contains:")) { + btf = init_btf(); + if (!ASSERT_OK_PTR(btf, "init_btf")) + break; + id = btf__add_struct(btf, "foo", 24); + if (!ASSERT_EQ(id, 5, "btf__add_struct foo")) + break; + err = btf__add_field(btf, "a", SPIN_LOCK, 0, 0); + if (!ASSERT_OK(err, "btf__add_field foo::a")) + break; + err = btf__add_field(btf, "b", LIST_HEAD, 64, 0); + if (!ASSERT_OK(err, "btf__add_field foo::b")) + break; + + err = btf__load_into_kernel(btf); + ASSERT_EQ(err, -EINVAL, "check btf"); + btf__free(btf); + break; + } + + while (test__start_subtest("btf: missing struct")) { + btf = init_btf(); + if (!ASSERT_OK_PTR(btf, "init_btf")) + break; + id = btf__add_struct(btf, "foo", 24); + if (!ASSERT_EQ(id, 5, "btf__add_struct foo")) + break; + err = btf__add_field(btf, "a", SPIN_LOCK, 0, 0); + if (!ASSERT_OK(err, "btf__add_field foo::a")) + break; + err = btf__add_field(btf, "b", LIST_HEAD, 64, 0); + if (!ASSERT_OK(err, "btf__add_field foo::b")) + break; + id = btf__add_decl_tag(btf, "contains:bar:bar", 5, 1); + if (!ASSERT_EQ(id, 6, "btf__add_decl_tag contains:bar:bar")) + break; + + err = btf__load_into_kernel(btf); + ASSERT_EQ(err, -ENOENT, "check btf"); + btf__free(btf); + break; + } + + while (test__start_subtest("btf: missing node")) { + btf = init_btf(); + if (!ASSERT_OK_PTR(btf, "init_btf")) + break; + id = btf__add_struct(btf, "foo", 24); + if (!ASSERT_EQ(id, 5, "btf__add_struct foo")) + break; + err = btf__add_field(btf, "a", SPIN_LOCK, 0, 0); + if (!ASSERT_OK(err, "btf__add_field foo::a")) + break; + err = btf__add_field(btf, "b", LIST_HEAD, 64, 0); + if (!ASSERT_OK(err, "btf__add_field foo::b")) + break; + id = btf__add_decl_tag(btf, "contains:foo:c", 5, 1); + if (!ASSERT_EQ(id, 6, "btf__add_decl_tag contains:foo:c")) + break; + + err = btf__load_into_kernel(btf); + btf__free(btf); + ASSERT_EQ(err, -ENOENT, "check btf"); + break; + } + + while (test__start_subtest("btf: node incorrect type")) { + btf = init_btf(); + if (!ASSERT_OK_PTR(btf, "init_btf")) + break; + id = btf__add_struct(btf, "foo", 20); + if (!ASSERT_EQ(id, 5, "btf__add_struct foo")) + break; + err = btf__add_field(btf, "a", LIST_HEAD, 0, 0); + if (!ASSERT_OK(err, "btf__add_field foo::a")) + break; + err = btf__add_field(btf, "b", SPIN_LOCK, 128, 0); + if (!ASSERT_OK(err, "btf__add_field foo::b")) + break; + id = btf__add_decl_tag(btf, "contains:bar:a", 5, 0); + if (!ASSERT_EQ(id, 6, "btf__add_decl_tag contains:bar:a")) + break; + id = btf__add_struct(btf, "bar", 4); + if (!ASSERT_EQ(id, 7, "btf__add_struct bar")) + break; + err = btf__add_field(btf, "a", SPIN_LOCK, 0, 0); + if (!ASSERT_OK(err, "btf__add_field bar::a")) + break; + + err = btf__load_into_kernel(btf); + ASSERT_EQ(err, -EINVAL, "check btf"); + btf__free(btf); + break; + } + + while (test__start_subtest("btf: multiple bpf_list_node with name b")) { + btf = init_btf(); + if (!ASSERT_OK_PTR(btf, "init_btf")) + break; + id = btf__add_struct(btf, "foo", 52); + if (!ASSERT_EQ(id, 5, "btf__add_struct foo")) + break; + err = btf__add_field(btf, "a", LIST_HEAD, 0, 0); + if (!ASSERT_OK(err, "btf__add_field foo::a")) + break; + err = btf__add_field(btf, "b", LIST_NODE, 128, 0); + if (!ASSERT_OK(err, "btf__add_field foo::b")) + break; + err = btf__add_field(btf, "b", LIST_NODE, 256, 0); + if (!ASSERT_OK(err, "btf__add_field foo::c")) + break; + err = btf__add_field(btf, "d", SPIN_LOCK, 384, 0); + if (!ASSERT_OK(err, "btf__add_field foo::d")) + break; + id = btf__add_decl_tag(btf, "contains:foo:b", 5, 0); + if (!ASSERT_EQ(id, 6, "btf__add_decl_tag contains:foo:b")) + break; + + err = btf__load_into_kernel(btf); + ASSERT_EQ(err, -EINVAL, "check btf"); + btf__free(btf); + break; + } + + while (test__start_subtest("btf: owning | owned AA cycle")) { + btf = init_btf(); + if (!ASSERT_OK_PTR(btf, "init_btf")) + break; + id = btf__add_struct(btf, "foo", 36); + if (!ASSERT_EQ(id, 5, "btf__add_struct foo")) + break; + err = btf__add_field(btf, "a", LIST_HEAD, 0, 0); + if (!ASSERT_OK(err, "btf__add_field foo::a")) + break; + err = btf__add_field(btf, "b", LIST_NODE, 128, 0); + if (!ASSERT_OK(err, "btf__add_field foo::b")) + break; + err = btf__add_field(btf, "c", SPIN_LOCK, 256, 0); + if (!ASSERT_OK(err, "btf__add_field foo::c")) + break; + id = btf__add_decl_tag(btf, "contains:foo:b", 5, 0); + if (!ASSERT_EQ(id, 6, "btf__add_decl_tag contains:foo:b")) + break; + + err = btf__load_into_kernel(btf); + ASSERT_EQ(err, -ELOOP, "check btf"); + btf__free(btf); + break; + } + + while (test__start_subtest("btf: owning | owned ABA cycle")) { + btf = init_btf(); + if (!ASSERT_OK_PTR(btf, "init_btf")) + break; + id = btf__add_struct(btf, "foo", 36); + if (!ASSERT_EQ(id, 5, "btf__add_struct foo")) + break; + err = btf__add_field(btf, "a", LIST_HEAD, 0, 0); + if (!ASSERT_OK(err, "btf__add_field foo::a")) + break; + err = btf__add_field(btf, "b", LIST_NODE, 128, 0); + if (!ASSERT_OK(err, "btf__add_field foo::b")) + break; + err = btf__add_field(btf, "c", SPIN_LOCK, 256, 0); + if (!ASSERT_OK(err, "btf__add_field foo::c")) + break; + id = btf__add_decl_tag(btf, "contains:bar:b", 5, 0); + if (!ASSERT_EQ(id, 6, "btf__add_decl_tag contains:bar:b")) + break; + id = btf__add_struct(btf, "bar", 36); + if (!ASSERT_EQ(id, 7, "btf__add_struct bar")) + break; + err = btf__add_field(btf, "a", LIST_HEAD, 0, 0); + if (!ASSERT_OK(err, "btf__add_field bar::a")) + break; + err = btf__add_field(btf, "b", LIST_NODE, 128, 0); + if (!ASSERT_OK(err, "btf__add_field bar::b")) + break; + err = btf__add_field(btf, "c", SPIN_LOCK, 256, 0); + if (!ASSERT_OK(err, "btf__add_field bar::c")) + break; + id = btf__add_decl_tag(btf, "contains:foo:b", 7, 0); + if (!ASSERT_EQ(id, 8, "btf__add_decl_tag contains:foo:b")) + break; + + err = btf__load_into_kernel(btf); + ASSERT_EQ(err, -ELOOP, "check btf"); + btf__free(btf); + break; + } + + while (test__start_subtest("btf: owning -> owned")) { + btf = init_btf(); + if (!ASSERT_OK_PTR(btf, "init_btf")) + break; + id = btf__add_struct(btf, "foo", 20); + if (!ASSERT_EQ(id, 5, "btf__add_struct foo")) + break; + err = btf__add_field(btf, "a", LIST_HEAD, 0, 0); + if (!ASSERT_OK(err, "btf__add_field foo::a")) + break; + err = btf__add_field(btf, "b", SPIN_LOCK, 128, 0); + if (!ASSERT_OK(err, "btf__add_field foo::b")) + break; + id = btf__add_decl_tag(btf, "contains:bar:a", 5, 0); + if (!ASSERT_EQ(id, 6, "btf__add_decl_tag contains:bar:a")) + break; + id = btf__add_struct(btf, "bar", 16); + if (!ASSERT_EQ(id, 7, "btf__add_struct bar")) + break; + err = btf__add_field(btf, "a", LIST_NODE, 0, 0); + if (!ASSERT_OK(err, "btf__add_field bar::a")) + break; + + err = btf__load_into_kernel(btf); + ASSERT_EQ(err, 0, "check btf"); + btf__free(btf); + break; + } + + while (test__start_subtest("btf: owning -> owning | owned -> owned")) { + btf = init_btf(); + if (!ASSERT_OK_PTR(btf, "init_btf")) + break; + id = btf__add_struct(btf, "foo", 20); + if (!ASSERT_EQ(id, 5, "btf__add_struct foo")) + break; + err = btf__add_field(btf, "a", LIST_HEAD, 0, 0); + if (!ASSERT_OK(err, "btf__add_field foo::a")) + break; + err = btf__add_field(btf, "b", SPIN_LOCK, 128, 0); + if (!ASSERT_OK(err, "btf__add_field foo::b")) + break; + id = btf__add_decl_tag(btf, "contains:bar:b", 5, 0); + if (!ASSERT_EQ(id, 6, "btf__add_decl_tag contains:bar:b")) + break; + id = btf__add_struct(btf, "bar", 36); + if (!ASSERT_EQ(id, 7, "btf__add_struct bar")) + break; + err = btf__add_field(btf, "a", LIST_HEAD, 0, 0); + if (!ASSERT_OK(err, "btf__add_field bar::a")) + break; + err = btf__add_field(btf, "b", LIST_NODE, 128, 0); + if (!ASSERT_OK(err, "btf__add_field bar::b")) + break; + err = btf__add_field(btf, "c", SPIN_LOCK, 256, 0); + if (!ASSERT_OK(err, "btf__add_field bar::c")) + break; + id = btf__add_decl_tag(btf, "contains:baz:a", 7, 0); + if (!ASSERT_EQ(id, 8, "btf__add_decl_tag contains:baz:a")) + break; + id = btf__add_struct(btf, "baz", 16); + if (!ASSERT_EQ(id, 9, "btf__add_struct baz")) + break; + err = btf__add_field(btf, "a", LIST_NODE, 0, 0); + if (!ASSERT_OK(err, "btf__add_field baz:a")) + break; + + err = btf__load_into_kernel(btf); + ASSERT_EQ(err, 0, "check btf"); + btf__free(btf); + break; + } + + while (test__start_subtest("btf: owning | owned -> owning | owned -> owned")) { + btf = init_btf(); + if (!ASSERT_OK_PTR(btf, "init_btf")) + break; + id = btf__add_struct(btf, "foo", 36); + if (!ASSERT_EQ(id, 5, "btf__add_struct foo")) + break; + err = btf__add_field(btf, "a", LIST_HEAD, 0, 0); + if (!ASSERT_OK(err, "btf__add_field foo::a")) + break; + err = btf__add_field(btf, "b", LIST_NODE, 128, 0); + if (!ASSERT_OK(err, "btf__add_field foo::b")) + break; + err = btf__add_field(btf, "c", SPIN_LOCK, 256, 0); + if (!ASSERT_OK(err, "btf__add_field foo::c")) + break; + id = btf__add_decl_tag(btf, "contains:bar:b", 5, 0); + if (!ASSERT_EQ(id, 6, "btf__add_decl_tag contains:bar:b")) + break; + id = btf__add_struct(btf, "bar", 36); + if (!ASSERT_EQ(id, 7, "btf__add_struct bar")) + break; + err = btf__add_field(btf, "a", LIST_HEAD, 0, 0); + if (!ASSERT_OK(err, "btf__add_field bar:a")) + break; + err = btf__add_field(btf, "b", LIST_NODE, 128, 0); + if (!ASSERT_OK(err, "btf__add_field bar:b")) + break; + err = btf__add_field(btf, "c", SPIN_LOCK, 256, 0); + if (!ASSERT_OK(err, "btf__add_field bar:c")) + break; + id = btf__add_decl_tag(btf, "contains:baz:a", 7, 0); + if (!ASSERT_EQ(id, 8, "btf__add_decl_tag contains:baz:a")) + break; + id = btf__add_struct(btf, "baz", 16); + if (!ASSERT_EQ(id, 9, "btf__add_struct baz")) + break; + err = btf__add_field(btf, "a", LIST_NODE, 0, 0); + if (!ASSERT_OK(err, "btf__add_field baz:a")) + break; + + err = btf__load_into_kernel(btf); + ASSERT_EQ(err, -ELOOP, "check btf"); + btf__free(btf); + break; + } + + while (test__start_subtest("btf: owning -> owning | owned -> owning | owned -> owned")) { + btf = init_btf(); + if (!ASSERT_OK_PTR(btf, "init_btf")) + break; + id = btf__add_struct(btf, "foo", 20); + if (!ASSERT_EQ(id, 5, "btf__add_struct foo")) + break; + err = btf__add_field(btf, "a", LIST_HEAD, 0, 0); + if (!ASSERT_OK(err, "btf__add_field foo::a")) + break; + err = btf__add_field(btf, "b", SPIN_LOCK, 128, 0); + if (!ASSERT_OK(err, "btf__add_field foo::b")) + break; + id = btf__add_decl_tag(btf, "contains:bar:b", 5, 0); + if (!ASSERT_EQ(id, 6, "btf__add_decl_tag contains:bar:b")) + break; + id = btf__add_struct(btf, "bar", 36); + if (!ASSERT_EQ(id, 7, "btf__add_struct bar")) + break; + err = btf__add_field(btf, "a", LIST_HEAD, 0, 0); + if (!ASSERT_OK(err, "btf__add_field bar::a")) + break; + err = btf__add_field(btf, "b", LIST_NODE, 128, 0); + if (!ASSERT_OK(err, "btf__add_field bar::b")) + break; + err = btf__add_field(btf, "c", SPIN_LOCK, 256, 0); + if (!ASSERT_OK(err, "btf__add_field bar::c")) + break; + id = btf__add_decl_tag(btf, "contains:baz:b", 7, 0); + if (!ASSERT_EQ(id, 8, "btf__add_decl_tag")) + break; + id = btf__add_struct(btf, "baz", 36); + if (!ASSERT_EQ(id, 9, "btf__add_struct baz")) + break; + err = btf__add_field(btf, "a", LIST_HEAD, 0, 0); + if (!ASSERT_OK(err, "btf__add_field bar::a")) + break; + err = btf__add_field(btf, "b", LIST_NODE, 128, 0); + if (!ASSERT_OK(err, "btf__add_field bar::b")) + break; + err = btf__add_field(btf, "c", SPIN_LOCK, 256, 0); + if (!ASSERT_OK(err, "btf__add_field bar::c")) + break; + id = btf__add_decl_tag(btf, "contains:bam:a", 9, 0); + if (!ASSERT_EQ(id, 10, "btf__add_decl_tag contains:bam:a")) + break; + id = btf__add_struct(btf, "bam", 16); + if (!ASSERT_EQ(id, 11, "btf__add_struct bam")) + break; + err = btf__add_field(btf, "a", LIST_NODE, 0, 0); + if (!ASSERT_OK(err, "btf__add_field bam::a")) + break; + + err = btf__load_into_kernel(btf); + ASSERT_EQ(err, -ELOOP, "check btf"); + btf__free(btf); + break; + } + + while (test__start_subtest("btf: list_node and rb_node in same struct")) { + btf = init_btf(); + if (!ASSERT_OK_PTR(btf, "init_btf")) + break; + + id = btf__add_struct(btf, "bpf_rb_node", 24); + if (!ASSERT_EQ(id, 5, "btf__add_struct bpf_rb_node")) + break; + id = btf__add_struct(btf, "bar", 40); + if (!ASSERT_EQ(id, 6, "btf__add_struct bar")) + break; + err = btf__add_field(btf, "a", LIST_NODE, 0, 0); + if (!ASSERT_OK(err, "btf__add_field bar::a")) + break; + err = btf__add_field(btf, "c", 5, 128, 0); + if (!ASSERT_OK(err, "btf__add_field bar::c")) + break; + + id = btf__add_struct(btf, "foo", 20); + if (!ASSERT_EQ(id, 7, "btf__add_struct foo")) + break; + err = btf__add_field(btf, "a", LIST_HEAD, 0, 0); + if (!ASSERT_OK(err, "btf__add_field foo::a")) + break; + err = btf__add_field(btf, "b", SPIN_LOCK, 128, 0); + if (!ASSERT_OK(err, "btf__add_field foo::b")) + break; + id = btf__add_decl_tag(btf, "contains:bar:a", 7, 0); + if (!ASSERT_EQ(id, 8, "btf__add_decl_tag contains:bar:a")) + break; + + err = btf__load_into_kernel(btf); + ASSERT_EQ(err, -EINVAL, "check btf"); + btf__free(btf); + break; + } +} + +void test_linked_list(void) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(linked_list_fail_tests); i++) { + if (!test__start_subtest(linked_list_fail_tests[i].prog_name)) + continue; + test_linked_list_fail_prog(linked_list_fail_tests[i].prog_name, + linked_list_fail_tests[i].err_msg); + } + test_btf(); + test_linked_list_success(PUSH_POP, false); + test_linked_list_success(PUSH_POP, true); + test_linked_list_success(PUSH_POP_MULT, false); + test_linked_list_success(PUSH_POP_MULT, true); + test_linked_list_success(LIST_IN_LIST, false); + test_linked_list_success(LIST_IN_LIST, true); + test_linked_list_success(TEST_ALL, false); +} diff --git a/tools/testing/selftests/bpf/prog_tests/lsm_cgroup.c b/tools/testing/selftests/bpf/prog_tests/lsm_cgroup.c index 1102e4f42d2d..130a3b21e467 100644 --- a/tools/testing/selftests/bpf/prog_tests/lsm_cgroup.c +++ b/tools/testing/selftests/bpf/prog_tests/lsm_cgroup.c @@ -47,7 +47,8 @@ static __u32 query_prog_cnt(int cgroup_fd, const char *attach_func) fd = bpf_prog_get_fd_by_id(p.prog_ids[i]); ASSERT_GE(fd, 0, "prog_get_fd_by_id"); - ASSERT_OK(bpf_obj_get_info_by_fd(fd, &info, &info_len), "prog_info_by_fd"); + ASSERT_OK(bpf_prog_get_info_by_fd(fd, &info, &info_len), + "prog_info_by_fd"); close(fd); if (info.attach_btf_id == @@ -173,10 +174,12 @@ static void test_lsm_cgroup_functional(void) ASSERT_EQ(query_prog_cnt(cgroup_fd, NULL), 4, "total prog count"); ASSERT_EQ(query_prog_cnt(cgroup_fd2, NULL), 1, "total prog count"); - /* AF_UNIX is prohibited. */ - fd = socket(AF_UNIX, SOCK_STREAM, 0); - ASSERT_LT(fd, 0, "socket(AF_UNIX)"); + if (!(skel->kconfig->CONFIG_SECURITY_APPARMOR + || skel->kconfig->CONFIG_SECURITY_SELINUX + || skel->kconfig->CONFIG_SECURITY_SMACK)) + /* AF_UNIX is prohibited. */ + ASSERT_LT(fd, 0, "socket(AF_UNIX)"); close(fd); /* AF_INET6 gets default policy (sk_priority). */ @@ -233,11 +236,18 @@ static void test_lsm_cgroup_functional(void) /* AF_INET6+SOCK_STREAM * AF_PACKET+SOCK_RAW + * AF_UNIX+SOCK_RAW if already have non-bpf lsms installed * listen_fd * client_fd * accepted_fd */ - ASSERT_EQ(skel->bss->called_socket_post_create2, 5, "called_create2"); + if (skel->kconfig->CONFIG_SECURITY_APPARMOR + || skel->kconfig->CONFIG_SECURITY_SELINUX + || skel->kconfig->CONFIG_SECURITY_SMACK) + /* AF_UNIX+SOCK_RAW if already have non-bpf lsms installed */ + ASSERT_EQ(skel->bss->called_socket_post_create2, 6, "called_create2"); + else + ASSERT_EQ(skel->bss->called_socket_post_create2, 5, "called_create2"); /* start_server * bind(ETH_P_ALL) diff --git a/tools/testing/selftests/bpf/prog_tests/map_kptr.c b/tools/testing/selftests/bpf/prog_tests/map_kptr.c index fdcea7a61491..3533a4ecad01 100644 --- a/tools/testing/selftests/bpf/prog_tests/map_kptr.c +++ b/tools/testing/selftests/bpf/prog_tests/map_kptr.c @@ -5,83 +5,6 @@ #include "map_kptr.skel.h" #include "map_kptr_fail.skel.h" -static char log_buf[1024 * 1024]; - -struct { - const char *prog_name; - const char *err_msg; -} map_kptr_fail_tests[] = { - { "size_not_bpf_dw", "kptr access size must be BPF_DW" }, - { "non_const_var_off", "kptr access cannot have variable offset" }, - { "non_const_var_off_kptr_xchg", "R1 doesn't have constant offset. kptr has to be" }, - { "misaligned_access_write", "kptr access misaligned expected=8 off=7" }, - { "misaligned_access_read", "kptr access misaligned expected=8 off=1" }, - { "reject_var_off_store", "variable untrusted_ptr_ access var_off=(0x0; 0x1e0)" }, - { "reject_bad_type_match", "invalid kptr access, R1 type=untrusted_ptr_prog_test_ref_kfunc" }, - { "marked_as_untrusted_or_null", "R1 type=untrusted_ptr_or_null_ expected=percpu_ptr_" }, - { "correct_btf_id_check_size", "access beyond struct prog_test_ref_kfunc at off 32 size 4" }, - { "inherit_untrusted_on_walk", "R1 type=untrusted_ptr_ expected=percpu_ptr_" }, - { "reject_kptr_xchg_on_unref", "off=8 kptr isn't referenced kptr" }, - { "reject_kptr_get_no_map_val", "arg#0 expected pointer to map value" }, - { "reject_kptr_get_no_null_map_val", "arg#0 expected pointer to map value" }, - { "reject_kptr_get_no_kptr", "arg#0 no referenced kptr at map value offset=0" }, - { "reject_kptr_get_on_unref", "arg#0 no referenced kptr at map value offset=8" }, - { "reject_kptr_get_bad_type_match", "kernel function bpf_kfunc_call_test_kptr_get args#0" }, - { "mark_ref_as_untrusted_or_null", "R1 type=untrusted_ptr_or_null_ expected=percpu_ptr_" }, - { "reject_untrusted_store_to_ref", "store to referenced kptr disallowed" }, - { "reject_bad_type_xchg", "invalid kptr access, R2 type=ptr_prog_test_ref_kfunc expected=ptr_prog_test_member" }, - { "reject_untrusted_xchg", "R2 type=untrusted_ptr_ expected=ptr_" }, - { "reject_member_of_ref_xchg", "invalid kptr access, R2 type=ptr_prog_test_ref_kfunc" }, - { "reject_indirect_helper_access", "kptr cannot be accessed indirectly by helper" }, - { "reject_indirect_global_func_access", "kptr cannot be accessed indirectly by helper" }, - { "kptr_xchg_ref_state", "Unreleased reference id=5 alloc_insn=" }, - { "kptr_get_ref_state", "Unreleased reference id=3 alloc_insn=" }, -}; - -static void test_map_kptr_fail_prog(const char *prog_name, const char *err_msg) -{ - LIBBPF_OPTS(bpf_object_open_opts, opts, .kernel_log_buf = log_buf, - .kernel_log_size = sizeof(log_buf), - .kernel_log_level = 1); - struct map_kptr_fail *skel; - struct bpf_program *prog; - int ret; - - skel = map_kptr_fail__open_opts(&opts); - if (!ASSERT_OK_PTR(skel, "map_kptr_fail__open_opts")) - return; - - prog = bpf_object__find_program_by_name(skel->obj, prog_name); - if (!ASSERT_OK_PTR(prog, "bpf_object__find_program_by_name")) - goto end; - - bpf_program__set_autoload(prog, true); - - ret = map_kptr_fail__load(skel); - if (!ASSERT_ERR(ret, "map_kptr__load must fail")) - goto end; - - if (!ASSERT_OK_PTR(strstr(log_buf, err_msg), "expected error message")) { - fprintf(stderr, "Expected: %s\n", err_msg); - fprintf(stderr, "Verifier: %s\n", log_buf); - } - -end: - map_kptr_fail__destroy(skel); -} - -static void test_map_kptr_fail(void) -{ - int i; - - for (i = 0; i < ARRAY_SIZE(map_kptr_fail_tests); i++) { - if (!test__start_subtest(map_kptr_fail_tests[i].prog_name)) - continue; - test_map_kptr_fail_prog(map_kptr_fail_tests[i].prog_name, - map_kptr_fail_tests[i].err_msg); - } -} - static void test_map_kptr_success(bool test_run) { LIBBPF_OPTS(bpf_test_run_opts, opts, @@ -105,7 +28,7 @@ static void test_map_kptr_success(bool test_run) ASSERT_OK(opts.retval, "test_map_kptr_ref2 retval"); if (test_run) - return; + goto exit; ret = bpf_map__update_elem(skel->maps.array_map, &key, sizeof(key), buf, sizeof(buf), 0); @@ -132,6 +55,7 @@ static void test_map_kptr_success(bool test_run) ret = bpf_map__delete_elem(skel->maps.lru_hash_map, &key, sizeof(key), 0); ASSERT_OK(ret, "lru_hash_map delete"); +exit: map_kptr__destroy(skel); } @@ -144,5 +68,6 @@ void test_map_kptr(void) */ test_map_kptr_success(true); } - test_map_kptr_fail(); + + RUN_TESTS(map_kptr_fail); } diff --git a/tools/testing/selftests/bpf/prog_tests/metadata.c b/tools/testing/selftests/bpf/prog_tests/metadata.c index 2c53eade88e3..8b67dfc10f5c 100644 --- a/tools/testing/selftests/bpf/prog_tests/metadata.c +++ b/tools/testing/selftests/bpf/prog_tests/metadata.c @@ -16,7 +16,7 @@ static int duration; static int prog_holds_map(int prog_fd, int map_fd) { struct bpf_prog_info prog_info = {}; - struct bpf_prog_info map_info = {}; + struct bpf_map_info map_info = {}; __u32 prog_info_len; __u32 map_info_len; __u32 *map_ids; @@ -25,12 +25,12 @@ static int prog_holds_map(int prog_fd, int map_fd) int i; map_info_len = sizeof(map_info); - ret = bpf_obj_get_info_by_fd(map_fd, &map_info, &map_info_len); + ret = bpf_map_get_info_by_fd(map_fd, &map_info, &map_info_len); if (ret) return -errno; prog_info_len = sizeof(prog_info); - ret = bpf_obj_get_info_by_fd(prog_fd, &prog_info, &prog_info_len); + ret = bpf_prog_get_info_by_fd(prog_fd, &prog_info, &prog_info_len); if (ret) return -errno; @@ -44,7 +44,7 @@ static int prog_holds_map(int prog_fd, int map_fd) prog_info.map_ids = ptr_to_u64(map_ids); prog_info_len = sizeof(prog_info); - ret = bpf_obj_get_info_by_fd(prog_fd, &prog_info, &prog_info_len); + ret = bpf_prog_get_info_by_fd(prog_fd, &prog_info, &prog_info_len); if (ret) { ret = -errno; goto free_map_ids; diff --git a/tools/testing/selftests/bpf/prog_tests/migrate_reuseport.c b/tools/testing/selftests/bpf/prog_tests/migrate_reuseport.c index eb2feaac81fe..653b0a20fab9 100644 --- a/tools/testing/selftests/bpf/prog_tests/migrate_reuseport.c +++ b/tools/testing/selftests/bpf/prog_tests/migrate_reuseport.c @@ -488,7 +488,7 @@ static void run_test(struct migrate_reuseport_test_case *test_case, goto close_servers; } - /* Tie requests to the first four listners */ + /* Tie requests to the first four listeners */ err = start_clients(test_case); if (!ASSERT_OK(err, "start_clients")) goto close_clients; diff --git a/tools/testing/selftests/bpf/prog_tests/mmap.c b/tools/testing/selftests/bpf/prog_tests/mmap.c index 37b002ca1167..a271d5a0f7ab 100644 --- a/tools/testing/selftests/bpf/prog_tests/mmap.c +++ b/tools/testing/selftests/bpf/prog_tests/mmap.c @@ -64,7 +64,7 @@ void test_mmap(void) /* get map's ID */ memset(&map_info, 0, map_info_sz); - err = bpf_obj_get_info_by_fd(data_map_fd, &map_info, &map_info_sz); + err = bpf_map_get_info_by_fd(data_map_fd, &map_info, &map_info_sz); if (CHECK(err, "map_get_info", "failed %d\n", errno)) goto cleanup; data_map_id = map_info.id; diff --git a/tools/testing/selftests/bpf/prog_tests/module_attach.c b/tools/testing/selftests/bpf/prog_tests/module_attach.c index 6d0e50dcf47c..7fc01ff490db 100644 --- a/tools/testing/selftests/bpf/prog_tests/module_attach.c +++ b/tools/testing/selftests/bpf/prog_tests/module_attach.c @@ -103,6 +103,13 @@ void test_module_attach(void) ASSERT_ERR(delete_module("bpf_testmod", 0), "delete_module"); bpf_link__destroy(link); + link = bpf_program__attach(skel->progs.kprobe_multi); + if (!ASSERT_OK_PTR(link, "attach_kprobe_multi")) + goto cleanup; + + ASSERT_ERR(delete_module("bpf_testmod", 0), "delete_module"); + bpf_link__destroy(link); + cleanup: test_module_attach__destroy(skel); } diff --git a/tools/testing/selftests/bpf/prog_tests/nested_trust.c b/tools/testing/selftests/bpf/prog_tests/nested_trust.c new file mode 100644 index 000000000000..39886f58924e --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/nested_trust.c @@ -0,0 +1,12 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */ + +#include <test_progs.h> +#include "nested_trust_failure.skel.h" +#include "nested_trust_success.skel.h" + +void test_nested_trust(void) +{ + RUN_TESTS(nested_trust_success); + RUN_TESTS(nested_trust_failure); +} diff --git a/tools/testing/selftests/bpf/prog_tests/perf_link.c b/tools/testing/selftests/bpf/prog_tests/perf_link.c index 224eba6fef2e..3a25f1c743a1 100644 --- a/tools/testing/selftests/bpf/prog_tests/perf_link.c +++ b/tools/testing/selftests/bpf/prog_tests/perf_link.c @@ -54,7 +54,7 @@ void serial_test_perf_link(void) goto cleanup; memset(&info, 0, sizeof(info)); - err = bpf_obj_get_info_by_fd(link_fd, &info, &info_len); + err = bpf_link_get_info_by_fd(link_fd, &info, &info_len); if (!ASSERT_OK(err, "link_get_info")) goto cleanup; diff --git a/tools/testing/selftests/bpf/prog_tests/pinning.c b/tools/testing/selftests/bpf/prog_tests/pinning.c index d95cee5867b7..c799a3c5ad1f 100644 --- a/tools/testing/selftests/bpf/prog_tests/pinning.c +++ b/tools/testing/selftests/bpf/prog_tests/pinning.c @@ -18,7 +18,7 @@ __u32 get_map_id(struct bpf_object *obj, const char *name) if (CHECK(!map, "find map", "NULL map")) return 0; - err = bpf_obj_get_info_by_fd(bpf_map__fd(map), + err = bpf_map_get_info_by_fd(bpf_map__fd(map), &map_info, &map_info_len); CHECK(err, "get map info", "err %d errno %d", err, errno); return map_info.id; diff --git a/tools/testing/selftests/bpf/prog_tests/prog_run_opts.c b/tools/testing/selftests/bpf/prog_tests/prog_run_opts.c index 1ccd2bdf8fa8..01f1d1b6715a 100644 --- a/tools/testing/selftests/bpf/prog_tests/prog_run_opts.c +++ b/tools/testing/selftests/bpf/prog_tests/prog_run_opts.c @@ -12,7 +12,7 @@ static void check_run_cnt(int prog_fd, __u64 run_cnt) __u32 info_len = sizeof(info); int err; - err = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len); + err = bpf_prog_get_info_by_fd(prog_fd, &info, &info_len); if (CHECK(err, "get_prog_info", "failed to get bpf_prog_info for fd %d\n", prog_fd)) return; diff --git a/tools/testing/selftests/bpf/prog_tests/rbtree.c b/tools/testing/selftests/bpf/prog_tests/rbtree.c new file mode 100644 index 000000000000..156fa95c42f6 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/rbtree.c @@ -0,0 +1,117 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */ + +#include <test_progs.h> +#include <network_helpers.h> + +#include "rbtree.skel.h" +#include "rbtree_fail.skel.h" +#include "rbtree_btf_fail__wrong_node_type.skel.h" +#include "rbtree_btf_fail__add_wrong_type.skel.h" + +static void test_rbtree_add_nodes(void) +{ + LIBBPF_OPTS(bpf_test_run_opts, opts, + .data_in = &pkt_v4, + .data_size_in = sizeof(pkt_v4), + .repeat = 1, + ); + struct rbtree *skel; + int ret; + + skel = rbtree__open_and_load(); + if (!ASSERT_OK_PTR(skel, "rbtree__open_and_load")) + return; + + ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.rbtree_add_nodes), &opts); + ASSERT_OK(ret, "rbtree_add_nodes run"); + ASSERT_OK(opts.retval, "rbtree_add_nodes retval"); + ASSERT_EQ(skel->data->less_callback_ran, 1, "rbtree_add_nodes less_callback_ran"); + + rbtree__destroy(skel); +} + +static void test_rbtree_add_and_remove(void) +{ + LIBBPF_OPTS(bpf_test_run_opts, opts, + .data_in = &pkt_v4, + .data_size_in = sizeof(pkt_v4), + .repeat = 1, + ); + struct rbtree *skel; + int ret; + + skel = rbtree__open_and_load(); + if (!ASSERT_OK_PTR(skel, "rbtree__open_and_load")) + return; + + ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.rbtree_add_and_remove), &opts); + ASSERT_OK(ret, "rbtree_add_and_remove"); + ASSERT_OK(opts.retval, "rbtree_add_and_remove retval"); + ASSERT_EQ(skel->data->removed_key, 5, "rbtree_add_and_remove first removed key"); + + rbtree__destroy(skel); +} + +static void test_rbtree_first_and_remove(void) +{ + LIBBPF_OPTS(bpf_test_run_opts, opts, + .data_in = &pkt_v4, + .data_size_in = sizeof(pkt_v4), + .repeat = 1, + ); + struct rbtree *skel; + int ret; + + skel = rbtree__open_and_load(); + if (!ASSERT_OK_PTR(skel, "rbtree__open_and_load")) + return; + + ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.rbtree_first_and_remove), &opts); + ASSERT_OK(ret, "rbtree_first_and_remove"); + ASSERT_OK(opts.retval, "rbtree_first_and_remove retval"); + ASSERT_EQ(skel->data->first_data[0], 2, "rbtree_first_and_remove first rbtree_first()"); + ASSERT_EQ(skel->data->removed_key, 1, "rbtree_first_and_remove first removed key"); + ASSERT_EQ(skel->data->first_data[1], 4, "rbtree_first_and_remove second rbtree_first()"); + + rbtree__destroy(skel); +} + +void test_rbtree_success(void) +{ + if (test__start_subtest("rbtree_add_nodes")) + test_rbtree_add_nodes(); + if (test__start_subtest("rbtree_add_and_remove")) + test_rbtree_add_and_remove(); + if (test__start_subtest("rbtree_first_and_remove")) + test_rbtree_first_and_remove(); +} + +#define BTF_FAIL_TEST(suffix) \ +void test_rbtree_btf_fail__##suffix(void) \ +{ \ + struct rbtree_btf_fail__##suffix *skel; \ + \ + skel = rbtree_btf_fail__##suffix##__open_and_load(); \ + if (!ASSERT_ERR_PTR(skel, \ + "rbtree_btf_fail__" #suffix "__open_and_load unexpected success")) \ + rbtree_btf_fail__##suffix##__destroy(skel); \ +} + +#define RUN_BTF_FAIL_TEST(suffix) \ + if (test__start_subtest("rbtree_btf_fail__" #suffix)) \ + test_rbtree_btf_fail__##suffix(); + +BTF_FAIL_TEST(wrong_node_type); +BTF_FAIL_TEST(add_wrong_type); + +void test_rbtree_btf_fail(void) +{ + RUN_BTF_FAIL_TEST(wrong_node_type); + RUN_BTF_FAIL_TEST(add_wrong_type); +} + +void test_rbtree_fail(void) +{ + RUN_TESTS(rbtree_fail); +} diff --git a/tools/testing/selftests/bpf/prog_tests/rcu_read_lock.c b/tools/testing/selftests/bpf/prog_tests/rcu_read_lock.c new file mode 100644 index 000000000000..447d8560ecb6 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/rcu_read_lock.c @@ -0,0 +1,158 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates.*/ + +#define _GNU_SOURCE +#include <unistd.h> +#include <sys/syscall.h> +#include <sys/types.h> +#include <test_progs.h> +#include <bpf/btf.h> +#include "rcu_read_lock.skel.h" +#include "cgroup_helpers.h" + +static unsigned long long cgroup_id; + +static void test_success(void) +{ + struct rcu_read_lock *skel; + int err; + + skel = rcu_read_lock__open(); + if (!ASSERT_OK_PTR(skel, "skel_open")) + return; + + skel->bss->target_pid = syscall(SYS_gettid); + + bpf_program__set_autoload(skel->progs.get_cgroup_id, true); + bpf_program__set_autoload(skel->progs.task_succ, true); + bpf_program__set_autoload(skel->progs.no_lock, true); + bpf_program__set_autoload(skel->progs.two_regions, true); + bpf_program__set_autoload(skel->progs.non_sleepable_1, true); + bpf_program__set_autoload(skel->progs.non_sleepable_2, true); + err = rcu_read_lock__load(skel); + if (!ASSERT_OK(err, "skel_load")) + goto out; + + err = rcu_read_lock__attach(skel); + if (!ASSERT_OK(err, "skel_attach")) + goto out; + + syscall(SYS_getpgid); + + ASSERT_EQ(skel->bss->task_storage_val, 2, "task_storage_val"); + ASSERT_EQ(skel->bss->cgroup_id, cgroup_id, "cgroup_id"); +out: + rcu_read_lock__destroy(skel); +} + +static void test_rcuptr_acquire(void) +{ + struct rcu_read_lock *skel; + int err; + + skel = rcu_read_lock__open(); + if (!ASSERT_OK_PTR(skel, "skel_open")) + return; + + skel->bss->target_pid = syscall(SYS_gettid); + + bpf_program__set_autoload(skel->progs.task_acquire, true); + err = rcu_read_lock__load(skel); + if (!ASSERT_OK(err, "skel_load")) + goto out; + + err = rcu_read_lock__attach(skel); + ASSERT_OK(err, "skel_attach"); +out: + rcu_read_lock__destroy(skel); +} + +static const char * const inproper_region_tests[] = { + "miss_lock", + "miss_unlock", + "non_sleepable_rcu_mismatch", + "inproper_sleepable_helper", + "inproper_sleepable_kfunc", + "nested_rcu_region", +}; + +static void test_inproper_region(void) +{ + struct rcu_read_lock *skel; + struct bpf_program *prog; + int i, err; + + for (i = 0; i < ARRAY_SIZE(inproper_region_tests); i++) { + skel = rcu_read_lock__open(); + if (!ASSERT_OK_PTR(skel, "skel_open")) + return; + + prog = bpf_object__find_program_by_name(skel->obj, inproper_region_tests[i]); + if (!ASSERT_OK_PTR(prog, "bpf_object__find_program_by_name")) + goto out; + bpf_program__set_autoload(prog, true); + err = rcu_read_lock__load(skel); + ASSERT_ERR(err, "skel_load"); +out: + rcu_read_lock__destroy(skel); + } +} + +static const char * const rcuptr_misuse_tests[] = { + "task_untrusted_non_rcuptr", + "task_untrusted_rcuptr", + "cross_rcu_region", +}; + +static void test_rcuptr_misuse(void) +{ + struct rcu_read_lock *skel; + struct bpf_program *prog; + int i, err; + + for (i = 0; i < ARRAY_SIZE(rcuptr_misuse_tests); i++) { + skel = rcu_read_lock__open(); + if (!ASSERT_OK_PTR(skel, "skel_open")) + return; + + prog = bpf_object__find_program_by_name(skel->obj, rcuptr_misuse_tests[i]); + if (!ASSERT_OK_PTR(prog, "bpf_object__find_program_by_name")) + goto out; + bpf_program__set_autoload(prog, true); + err = rcu_read_lock__load(skel); + ASSERT_ERR(err, "skel_load"); +out: + rcu_read_lock__destroy(skel); + } +} + +void test_rcu_read_lock(void) +{ + struct btf *vmlinux_btf; + int cgroup_fd; + + vmlinux_btf = btf__load_vmlinux_btf(); + if (!ASSERT_OK_PTR(vmlinux_btf, "could not load vmlinux BTF")) + return; + if (btf__find_by_name_kind(vmlinux_btf, "rcu", BTF_KIND_TYPE_TAG) < 0) { + test__skip(); + goto out; + } + + cgroup_fd = test__join_cgroup("/rcu_read_lock"); + if (!ASSERT_GE(cgroup_fd, 0, "join_cgroup /rcu_read_lock")) + goto out; + + cgroup_id = get_cgroup_id("/rcu_read_lock"); + if (test__start_subtest("success")) + test_success(); + if (test__start_subtest("rcuptr_acquire")) + test_rcuptr_acquire(); + if (test__start_subtest("negative_tests_inproper_region")) + test_inproper_region(); + if (test__start_subtest("negative_tests_rcuptr_misuse")) + test_rcuptr_misuse(); + close(cgroup_fd); +out: + btf__free(vmlinux_btf); +} diff --git a/tools/testing/selftests/bpf/prog_tests/recursion.c b/tools/testing/selftests/bpf/prog_tests/recursion.c index f3af2627b599..23552d3e3365 100644 --- a/tools/testing/selftests/bpf/prog_tests/recursion.c +++ b/tools/testing/selftests/bpf/prog_tests/recursion.c @@ -31,8 +31,8 @@ void test_recursion(void) bpf_map_delete_elem(bpf_map__fd(skel->maps.hash2), &key); ASSERT_EQ(skel->bss->pass2, 2, "pass2 == 2"); - err = bpf_obj_get_info_by_fd(bpf_program__fd(skel->progs.on_delete), - &prog_info, &prog_info_len); + err = bpf_prog_get_info_by_fd(bpf_program__fd(skel->progs.on_delete), + &prog_info, &prog_info_len); if (!ASSERT_OK(err, "get_prog_info")) goto out; ASSERT_EQ(prog_info.recursion_misses, 2, "recursion_misses"); diff --git a/tools/testing/selftests/bpf/prog_tests/ringbuf.c b/tools/testing/selftests/bpf/prog_tests/ringbuf.c index 9a80fe8a6427..ac104dc652e3 100644 --- a/tools/testing/selftests/bpf/prog_tests/ringbuf.c +++ b/tools/testing/selftests/bpf/prog_tests/ringbuf.c @@ -13,6 +13,7 @@ #include <linux/perf_event.h> #include <linux/ring_buffer.h> #include "test_ringbuf.lskel.h" +#include "test_ringbuf_map_key.lskel.h" #define EDONE 7777 @@ -58,6 +59,7 @@ static int process_sample(void *ctx, void *data, size_t len) } } +static struct test_ringbuf_map_key_lskel *skel_map_key; static struct test_ringbuf_lskel *skel; static struct ring_buffer *ringbuf; @@ -81,7 +83,7 @@ static void *poll_thread(void *input) return (void *)(long)ring_buffer__poll(ringbuf, timeout); } -void test_ringbuf(void) +static void ringbuf_subtest(void) { const size_t rec_sz = BPF_RINGBUF_HDR_SZ + sizeof(struct sample); pthread_t thread; @@ -297,3 +299,65 @@ cleanup: ring_buffer__free(ringbuf); test_ringbuf_lskel__destroy(skel); } + +static int process_map_key_sample(void *ctx, void *data, size_t len) +{ + struct sample *s; + int err, val; + + s = data; + switch (s->seq) { + case 1: + ASSERT_EQ(s->value, 42, "sample_value"); + err = bpf_map_lookup_elem(skel_map_key->maps.hash_map.map_fd, + s, &val); + ASSERT_OK(err, "hash_map bpf_map_lookup_elem"); + ASSERT_EQ(val, 1, "hash_map val"); + return -EDONE; + default: + return 0; + } +} + +static void ringbuf_map_key_subtest(void) +{ + int err; + + skel_map_key = test_ringbuf_map_key_lskel__open(); + if (!ASSERT_OK_PTR(skel_map_key, "test_ringbuf_map_key_lskel__open")) + return; + + skel_map_key->maps.ringbuf.max_entries = getpagesize(); + skel_map_key->bss->pid = getpid(); + + err = test_ringbuf_map_key_lskel__load(skel_map_key); + if (!ASSERT_OK(err, "test_ringbuf_map_key_lskel__load")) + goto cleanup; + + ringbuf = ring_buffer__new(skel_map_key->maps.ringbuf.map_fd, + process_map_key_sample, NULL, NULL); + if (!ASSERT_OK_PTR(ringbuf, "ring_buffer__new")) + goto cleanup; + + err = test_ringbuf_map_key_lskel__attach(skel_map_key); + if (!ASSERT_OK(err, "test_ringbuf_map_key_lskel__attach")) + goto cleanup_ringbuf; + + syscall(__NR_getpgid); + ASSERT_EQ(skel_map_key->bss->seq, 1, "skel_map_key->bss->seq"); + err = ring_buffer__poll(ringbuf, -1); + ASSERT_EQ(err, -EDONE, "ring_buffer__poll"); + +cleanup_ringbuf: + ring_buffer__free(ringbuf); +cleanup: + test_ringbuf_map_key_lskel__destroy(skel_map_key); +} + +void test_ringbuf(void) +{ + if (test__start_subtest("ringbuf")) + ringbuf_subtest(); + if (test__start_subtest("ringbuf_map_key")) + ringbuf_map_key_subtest(); +} diff --git a/tools/testing/selftests/bpf/prog_tests/setget_sockopt.c b/tools/testing/selftests/bpf/prog_tests/setget_sockopt.c index 018611e6b248..7d4a9b3d3722 100644 --- a/tools/testing/selftests/bpf/prog_tests/setget_sockopt.c +++ b/tools/testing/selftests/bpf/prog_tests/setget_sockopt.c @@ -4,6 +4,7 @@ #define _GNU_SOURCE #include <sched.h> #include <linux/socket.h> +#include <linux/tls.h> #include <net/if.h> #include "test_progs.h" @@ -83,6 +84,76 @@ static void test_udp(int family) ASSERT_EQ(bss->nr_binddev, 1, "nr_bind"); } +static void test_ktls(int family) +{ + struct tls12_crypto_info_aes_gcm_128 aes128; + struct setget_sockopt__bss *bss = skel->bss; + int cfd = -1, sfd = -1, fd = -1, ret; + char buf; + + memset(bss, 0, sizeof(*bss)); + + sfd = start_server(family, SOCK_STREAM, + family == AF_INET6 ? addr6_str : addr4_str, 0, 0); + if (!ASSERT_GE(sfd, 0, "start_server")) + return; + fd = connect_to_fd(sfd, 0); + if (!ASSERT_GE(fd, 0, "connect_to_fd")) + goto err_out; + + cfd = accept(sfd, NULL, 0); + if (!ASSERT_GE(cfd, 0, "accept")) + goto err_out; + + close(sfd); + sfd = -1; + + /* Setup KTLS */ + ret = setsockopt(fd, IPPROTO_TCP, TCP_ULP, "tls", sizeof("tls")); + if (!ASSERT_OK(ret, "setsockopt")) + goto err_out; + ret = setsockopt(cfd, IPPROTO_TCP, TCP_ULP, "tls", sizeof("tls")); + if (!ASSERT_OK(ret, "setsockopt")) + goto err_out; + + memset(&aes128, 0, sizeof(aes128)); + aes128.info.version = TLS_1_2_VERSION; + aes128.info.cipher_type = TLS_CIPHER_AES_GCM_128; + + ret = setsockopt(fd, SOL_TLS, TLS_TX, &aes128, sizeof(aes128)); + if (!ASSERT_OK(ret, "setsockopt")) + goto err_out; + + ret = setsockopt(cfd, SOL_TLS, TLS_RX, &aes128, sizeof(aes128)); + if (!ASSERT_OK(ret, "setsockopt")) + goto err_out; + + /* KTLS is enabled */ + + close(fd); + /* At this point, the cfd socket is at the CLOSE_WAIT state + * and still run TLS protocol. The test for + * BPF_TCP_CLOSE_WAIT should be run at this point. + */ + ret = read(cfd, &buf, sizeof(buf)); + ASSERT_EQ(ret, 0, "read"); + close(cfd); + + ASSERT_EQ(bss->nr_listen, 1, "nr_listen"); + ASSERT_EQ(bss->nr_connect, 1, "nr_connect"); + ASSERT_EQ(bss->nr_active, 1, "nr_active"); + ASSERT_EQ(bss->nr_passive, 1, "nr_passive"); + ASSERT_EQ(bss->nr_socket_post_create, 2, "nr_socket_post_create"); + ASSERT_EQ(bss->nr_binddev, 2, "nr_bind"); + ASSERT_EQ(bss->nr_fin_wait1, 1, "nr_fin_wait1"); + return; + +err_out: + close(fd); + close(cfd); + close(sfd); +} + void test_setget_sockopt(void) { cg_fd = test__join_cgroup(CG_NAME); @@ -118,6 +189,8 @@ void test_setget_sockopt(void) test_tcp(AF_INET); test_udp(AF_INET6); test_udp(AF_INET); + test_ktls(AF_INET6); + test_ktls(AF_INET); done: setget_sockopt__destroy(skel); diff --git a/tools/testing/selftests/bpf/prog_tests/sk_assign.c b/tools/testing/selftests/bpf/prog_tests/sk_assign.c index 3e190ed63976..1374b626a985 100644 --- a/tools/testing/selftests/bpf/prog_tests/sk_assign.c +++ b/tools/testing/selftests/bpf/prog_tests/sk_assign.c @@ -29,7 +29,23 @@ static int stop, duration; static bool configure_stack(void) { + char tc_version[128]; char tc_cmd[BUFSIZ]; + char *prog; + FILE *tc; + + /* Check whether tc is built with libbpf. */ + tc = popen("tc -V", "r"); + if (CHECK_FAIL(!tc)) + return false; + if (CHECK_FAIL(!fgets(tc_version, sizeof(tc_version), tc))) + return false; + if (strstr(tc_version, ", libbpf ")) + prog = "test_sk_assign_libbpf.bpf.o"; + else + prog = "test_sk_assign.bpf.o"; + if (CHECK_FAIL(pclose(tc))) + return false; /* Move to a new networking namespace */ if (CHECK_FAIL(unshare(CLONE_NEWNET))) @@ -46,8 +62,8 @@ configure_stack(void) /* Load qdisc, BPF program */ if (CHECK_FAIL(system("tc qdisc add dev lo clsact"))) return false; - sprintf(tc_cmd, "%s %s %s %s", "tc filter add dev lo ingress bpf", - "direct-action object-file ./test_sk_assign.bpf.o", + sprintf(tc_cmd, "%s %s %s %s %s", "tc filter add dev lo ingress bpf", + "direct-action object-file", prog, "section tc", (env.verbosity < VERBOSE_VERY) ? " 2>/dev/null" : "verbose"); if (CHECK(system(tc_cmd), "BPF load failed;", @@ -129,15 +145,12 @@ get_port(int fd) static ssize_t rcv_msg(int srv_client, int type) { - struct sockaddr_storage ss; char buf[BUFSIZ]; - socklen_t slen; if (type == SOCK_STREAM) return read(srv_client, &buf, sizeof(buf)); else - return recvfrom(srv_client, &buf, sizeof(buf), 0, - (struct sockaddr *)&ss, &slen); + return recvfrom(srv_client, &buf, sizeof(buf), 0, NULL, NULL); } static int diff --git a/tools/testing/selftests/bpf/prog_tests/skeleton.c b/tools/testing/selftests/bpf/prog_tests/skeleton.c index 99dac5292b41..bc6817aee9aa 100644 --- a/tools/testing/selftests/bpf/prog_tests/skeleton.c +++ b/tools/testing/selftests/bpf/prog_tests/skeleton.c @@ -2,6 +2,7 @@ /* Copyright (c) 2019 Facebook */ #include <test_progs.h> +#include <sys/mman.h> struct s { int a; @@ -22,7 +23,8 @@ void test_skeleton(void) struct test_skeleton__kconfig *kcfg; const void *elf_bytes; size_t elf_bytes_sz = 0; - int i; + void *m; + int i, fd; skel = test_skeleton__open(); if (CHECK(!skel, "skel_open", "failed to open skeleton\n")) @@ -124,6 +126,13 @@ void test_skeleton(void) ASSERT_EQ(bss->huge_arr[ARRAY_SIZE(bss->huge_arr) - 1], 123, "huge_arr"); + fd = bpf_map__fd(skel->maps.data_non_mmapable); + m = mmap(NULL, getpagesize(), PROT_READ, MAP_SHARED, fd, 0); + if (!ASSERT_EQ(m, MAP_FAILED, "unexpected_mmap_success")) + munmap(m, getpagesize()); + + ASSERT_EQ(bpf_map__map_flags(skel->maps.data_non_mmapable), 0, "non_mmap_flags"); + elf_bytes = test_skeleton__elf_bytes(&elf_bytes_sz); ASSERT_OK_PTR(elf_bytes, "elf_bytes"); ASSERT_GE(elf_bytes_sz, 0, "elf_bytes_sz"); diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c b/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c index 0aa088900699..0ce25a967481 100644 --- a/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c +++ b/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c @@ -299,9 +299,9 @@ static __u32 query_prog_id(int prog_fd) __u32 info_len = sizeof(info); int err; - err = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len); - if (!ASSERT_OK(err, "bpf_obj_get_info_by_fd") || - !ASSERT_EQ(info_len, sizeof(info), "bpf_obj_get_info_by_fd")) + err = bpf_prog_get_info_by_fd(prog_fd, &info, &info_len); + if (!ASSERT_OK(err, "bpf_prog_get_info_by_fd") || + !ASSERT_EQ(info_len, sizeof(info), "bpf_prog_get_info_by_fd")) return 0; return info.id; diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c b/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c index 2cf0c7a3fe23..567e07c19ecc 100644 --- a/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c +++ b/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c @@ -30,6 +30,8 @@ #define MAX_STRERR_LEN 256 #define MAX_TEST_NAME 80 +#define __always_unused __attribute__((__unused__)) + #define _FAIL(errnum, fmt...) \ ({ \ error_at_line(0, (errnum), __func__, __LINE__, fmt); \ @@ -321,7 +323,8 @@ static int socket_loopback(int family, int sotype) return socket_loopback_reuseport(family, sotype, -1); } -static void test_insert_invalid(int family, int sotype, int mapfd) +static void test_insert_invalid(struct test_sockmap_listen *skel __always_unused, + int family, int sotype, int mapfd) { u32 key = 0; u64 value; @@ -338,7 +341,8 @@ static void test_insert_invalid(int family, int sotype, int mapfd) FAIL_ERRNO("map_update: expected EBADF"); } -static void test_insert_opened(int family, int sotype, int mapfd) +static void test_insert_opened(struct test_sockmap_listen *skel __always_unused, + int family, int sotype, int mapfd) { u32 key = 0; u64 value; @@ -359,7 +363,8 @@ static void test_insert_opened(int family, int sotype, int mapfd) xclose(s); } -static void test_insert_bound(int family, int sotype, int mapfd) +static void test_insert_bound(struct test_sockmap_listen *skel __always_unused, + int family, int sotype, int mapfd) { struct sockaddr_storage addr; socklen_t len; @@ -386,7 +391,8 @@ close: xclose(s); } -static void test_insert(int family, int sotype, int mapfd) +static void test_insert(struct test_sockmap_listen *skel __always_unused, + int family, int sotype, int mapfd) { u64 value; u32 key; @@ -402,7 +408,8 @@ static void test_insert(int family, int sotype, int mapfd) xclose(s); } -static void test_delete_after_insert(int family, int sotype, int mapfd) +static void test_delete_after_insert(struct test_sockmap_listen *skel __always_unused, + int family, int sotype, int mapfd) { u64 value; u32 key; @@ -419,7 +426,8 @@ static void test_delete_after_insert(int family, int sotype, int mapfd) xclose(s); } -static void test_delete_after_close(int family, int sotype, int mapfd) +static void test_delete_after_close(struct test_sockmap_listen *skel __always_unused, + int family, int sotype, int mapfd) { int err, s; u64 value; @@ -442,7 +450,8 @@ static void test_delete_after_close(int family, int sotype, int mapfd) FAIL_ERRNO("map_delete: expected EINVAL/EINVAL"); } -static void test_lookup_after_insert(int family, int sotype, int mapfd) +static void test_lookup_after_insert(struct test_sockmap_listen *skel __always_unused, + int family, int sotype, int mapfd) { u64 cookie, value; socklen_t len; @@ -470,7 +479,8 @@ static void test_lookup_after_insert(int family, int sotype, int mapfd) xclose(s); } -static void test_lookup_after_delete(int family, int sotype, int mapfd) +static void test_lookup_after_delete(struct test_sockmap_listen *skel __always_unused, + int family, int sotype, int mapfd) { int err, s; u64 value; @@ -493,7 +503,8 @@ static void test_lookup_after_delete(int family, int sotype, int mapfd) xclose(s); } -static void test_lookup_32_bit_value(int family, int sotype, int mapfd) +static void test_lookup_32_bit_value(struct test_sockmap_listen *skel __always_unused, + int family, int sotype, int mapfd) { u32 key, value32; int err, s; @@ -523,7 +534,8 @@ close: xclose(s); } -static void test_update_existing(int family, int sotype, int mapfd) +static void test_update_existing(struct test_sockmap_listen *skel __always_unused, + int family, int sotype, int mapfd) { int s1, s2; u64 value; @@ -551,7 +563,7 @@ close_s1: /* Exercise the code path where we destroy child sockets that never * got accept()'ed, aka orphans, when parent socket gets closed. */ -static void test_destroy_orphan_child(int family, int sotype, int mapfd) +static void do_destroy_orphan_child(int family, int sotype, int mapfd) { struct sockaddr_storage addr; socklen_t len; @@ -582,10 +594,38 @@ close_srv: xclose(s); } +static void test_destroy_orphan_child(struct test_sockmap_listen *skel, + int family, int sotype, int mapfd) +{ + int msg_verdict = bpf_program__fd(skel->progs.prog_msg_verdict); + int skb_verdict = bpf_program__fd(skel->progs.prog_skb_verdict); + const struct test { + int progfd; + enum bpf_attach_type atype; + } tests[] = { + { -1, -1 }, + { msg_verdict, BPF_SK_MSG_VERDICT }, + { skb_verdict, BPF_SK_SKB_VERDICT }, + }; + const struct test *t; + + for (t = tests; t < tests + ARRAY_SIZE(tests); t++) { + if (t->progfd != -1 && + xbpf_prog_attach(t->progfd, mapfd, t->atype, 0) != 0) + return; + + do_destroy_orphan_child(family, sotype, mapfd); + + if (t->progfd != -1) + xbpf_prog_detach2(t->progfd, mapfd, t->atype); + } +} + /* Perform a passive open after removing listening socket from SOCKMAP * to ensure that callbacks get restored properly. */ -static void test_clone_after_delete(int family, int sotype, int mapfd) +static void test_clone_after_delete(struct test_sockmap_listen *skel __always_unused, + int family, int sotype, int mapfd) { struct sockaddr_storage addr; socklen_t len; @@ -621,7 +661,8 @@ close_srv: * SOCKMAP, but got accept()'ed only after the parent has been removed * from SOCKMAP, gets cloned without parent psock state or callbacks. */ -static void test_accept_after_delete(int family, int sotype, int mapfd) +static void test_accept_after_delete(struct test_sockmap_listen *skel __always_unused, + int family, int sotype, int mapfd) { struct sockaddr_storage addr; const u32 zero = 0; @@ -675,7 +716,8 @@ close_srv: /* Check that child socket that got created and accepted while parent * was in a SOCKMAP is cloned without parent psock state or callbacks. */ -static void test_accept_before_delete(int family, int sotype, int mapfd) +static void test_accept_before_delete(struct test_sockmap_listen *skel __always_unused, + int family, int sotype, int mapfd) { struct sockaddr_storage addr; const u32 zero = 0, one = 1; @@ -784,7 +826,8 @@ done: return NULL; } -static void test_syn_recv_insert_delete(int family, int sotype, int mapfd) +static void test_syn_recv_insert_delete(struct test_sockmap_listen *skel __always_unused, + int family, int sotype, int mapfd) { struct connect_accept_ctx ctx = { 0 }; struct sockaddr_storage addr; @@ -847,7 +890,8 @@ static void *listen_thread(void *arg) return NULL; } -static void test_race_insert_listen(int family, int socktype, int mapfd) +static void test_race_insert_listen(struct test_sockmap_listen *skel __always_unused, + int family, int socktype, int mapfd) { struct connect_accept_ctx ctx = { 0 }; const u32 zero = 0; @@ -1473,7 +1517,8 @@ static void test_ops(struct test_sockmap_listen *skel, struct bpf_map *map, int family, int sotype) { const struct op_test { - void (*fn)(int family, int sotype, int mapfd); + void (*fn)(struct test_sockmap_listen *skel, + int family, int sotype, int mapfd); const char *name; int sotype; } tests[] = { @@ -1520,7 +1565,7 @@ static void test_ops(struct test_sockmap_listen *skel, struct bpf_map *map, if (!test__start_subtest(s)) continue; - t->fn(family, sotype, map_fd); + t->fn(skel, family, sotype, map_fd); test_ops_cleanup(map); } } diff --git a/tools/testing/selftests/bpf/prog_tests/spin_lock.c b/tools/testing/selftests/bpf/prog_tests/spin_lock.c new file mode 100644 index 000000000000..d9270bd3d920 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/spin_lock.c @@ -0,0 +1,142 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <test_progs.h> +#include <network_helpers.h> + +#include "test_spin_lock.skel.h" +#include "test_spin_lock_fail.skel.h" + +static char log_buf[1024 * 1024]; + +static struct { + const char *prog_name; + const char *err_msg; +} spin_lock_fail_tests[] = { + { "lock_id_kptr_preserve", + "5: (bf) r1 = r0 ; R0_w=ptr_foo(id=2,ref_obj_id=2,off=0,imm=0) " + "R1_w=ptr_foo(id=2,ref_obj_id=2,off=0,imm=0) refs=2\n6: (85) call bpf_this_cpu_ptr#154\n" + "R1 type=ptr_ expected=percpu_ptr_" }, + { "lock_id_global_zero", + "; R1_w=map_value(off=0,ks=4,vs=4,imm=0)\n2: (85) call bpf_this_cpu_ptr#154\n" + "R1 type=map_value expected=percpu_ptr_" }, + { "lock_id_mapval_preserve", + "8: (bf) r1 = r0 ; R0_w=map_value(id=1,off=0,ks=4,vs=8,imm=0) " + "R1_w=map_value(id=1,off=0,ks=4,vs=8,imm=0)\n9: (85) call bpf_this_cpu_ptr#154\n" + "R1 type=map_value expected=percpu_ptr_" }, + { "lock_id_innermapval_preserve", + "13: (bf) r1 = r0 ; R0=map_value(id=2,off=0,ks=4,vs=8,imm=0) " + "R1_w=map_value(id=2,off=0,ks=4,vs=8,imm=0)\n14: (85) call bpf_this_cpu_ptr#154\n" + "R1 type=map_value expected=percpu_ptr_" }, + { "lock_id_mismatch_kptr_kptr", "bpf_spin_unlock of different lock" }, + { "lock_id_mismatch_kptr_global", "bpf_spin_unlock of different lock" }, + { "lock_id_mismatch_kptr_mapval", "bpf_spin_unlock of different lock" }, + { "lock_id_mismatch_kptr_innermapval", "bpf_spin_unlock of different lock" }, + { "lock_id_mismatch_global_global", "bpf_spin_unlock of different lock" }, + { "lock_id_mismatch_global_kptr", "bpf_spin_unlock of different lock" }, + { "lock_id_mismatch_global_mapval", "bpf_spin_unlock of different lock" }, + { "lock_id_mismatch_global_innermapval", "bpf_spin_unlock of different lock" }, + { "lock_id_mismatch_mapval_mapval", "bpf_spin_unlock of different lock" }, + { "lock_id_mismatch_mapval_kptr", "bpf_spin_unlock of different lock" }, + { "lock_id_mismatch_mapval_global", "bpf_spin_unlock of different lock" }, + { "lock_id_mismatch_mapval_innermapval", "bpf_spin_unlock of different lock" }, + { "lock_id_mismatch_innermapval_innermapval1", "bpf_spin_unlock of different lock" }, + { "lock_id_mismatch_innermapval_innermapval2", "bpf_spin_unlock of different lock" }, + { "lock_id_mismatch_innermapval_kptr", "bpf_spin_unlock of different lock" }, + { "lock_id_mismatch_innermapval_global", "bpf_spin_unlock of different lock" }, + { "lock_id_mismatch_innermapval_mapval", "bpf_spin_unlock of different lock" }, +}; + +static void test_spin_lock_fail_prog(const char *prog_name, const char *err_msg) +{ + LIBBPF_OPTS(bpf_object_open_opts, opts, .kernel_log_buf = log_buf, + .kernel_log_size = sizeof(log_buf), + .kernel_log_level = 1); + struct test_spin_lock_fail *skel; + struct bpf_program *prog; + int ret; + + skel = test_spin_lock_fail__open_opts(&opts); + if (!ASSERT_OK_PTR(skel, "test_spin_lock_fail__open_opts")) + return; + + prog = bpf_object__find_program_by_name(skel->obj, prog_name); + if (!ASSERT_OK_PTR(prog, "bpf_object__find_program_by_name")) + goto end; + + bpf_program__set_autoload(prog, true); + + ret = test_spin_lock_fail__load(skel); + if (!ASSERT_ERR(ret, "test_spin_lock_fail__load must fail")) + goto end; + + /* Skip check if JIT does not support kfuncs */ + if (strstr(log_buf, "JIT does not support calling kernel function")) { + test__skip(); + goto end; + } + + if (!ASSERT_OK_PTR(strstr(log_buf, err_msg), "expected error message")) { + fprintf(stderr, "Expected: %s\n", err_msg); + fprintf(stderr, "Verifier: %s\n", log_buf); + } + +end: + test_spin_lock_fail__destroy(skel); +} + +static void *spin_lock_thread(void *arg) +{ + int err, prog_fd = *(u32 *) arg; + LIBBPF_OPTS(bpf_test_run_opts, topts, + .data_in = &pkt_v4, + .data_size_in = sizeof(pkt_v4), + .repeat = 10000, + ); + + err = bpf_prog_test_run_opts(prog_fd, &topts); + ASSERT_OK(err, "test_run"); + ASSERT_OK(topts.retval, "test_run retval"); + pthread_exit(arg); +} + +void test_spin_lock_success(void) +{ + struct test_spin_lock *skel; + pthread_t thread_id[4]; + int prog_fd, i; + void *ret; + + skel = test_spin_lock__open_and_load(); + if (!ASSERT_OK_PTR(skel, "test_spin_lock__open_and_load")) + return; + prog_fd = bpf_program__fd(skel->progs.bpf_spin_lock_test); + for (i = 0; i < 4; i++) { + int err; + + err = pthread_create(&thread_id[i], NULL, &spin_lock_thread, &prog_fd); + if (!ASSERT_OK(err, "pthread_create")) + goto end; + } + + for (i = 0; i < 4; i++) { + if (!ASSERT_OK(pthread_join(thread_id[i], &ret), "pthread_join")) + goto end; + if (!ASSERT_EQ(ret, &prog_fd, "ret == prog_fd")) + goto end; + } +end: + test_spin_lock__destroy(skel); +} + +void test_spin_lock(void) +{ + int i; + + test_spin_lock_success(); + + for (i = 0; i < ARRAY_SIZE(spin_lock_fail_tests); i++) { + if (!test__start_subtest(spin_lock_fail_tests[i].prog_name)) + continue; + test_spin_lock_fail_prog(spin_lock_fail_tests[i].prog_name, + spin_lock_fail_tests[i].err_msg); + } +} diff --git a/tools/testing/selftests/bpf/prog_tests/spinlock.c b/tools/testing/selftests/bpf/prog_tests/spinlock.c deleted file mode 100644 index 15eb1372d771..000000000000 --- a/tools/testing/selftests/bpf/prog_tests/spinlock.c +++ /dev/null @@ -1,45 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -#include <test_progs.h> -#include <network_helpers.h> - -static void *spin_lock_thread(void *arg) -{ - int err, prog_fd = *(u32 *) arg; - LIBBPF_OPTS(bpf_test_run_opts, topts, - .data_in = &pkt_v4, - .data_size_in = sizeof(pkt_v4), - .repeat = 10000, - ); - - err = bpf_prog_test_run_opts(prog_fd, &topts); - ASSERT_OK(err, "test_run"); - ASSERT_OK(topts.retval, "test_run retval"); - pthread_exit(arg); -} - -void test_spinlock(void) -{ - const char *file = "./test_spin_lock.bpf.o"; - pthread_t thread_id[4]; - struct bpf_object *obj = NULL; - int prog_fd; - int err = 0, i; - void *ret; - - err = bpf_prog_test_load(file, BPF_PROG_TYPE_CGROUP_SKB, &obj, &prog_fd); - if (CHECK_FAIL(err)) { - printf("test_spin_lock:bpf_prog_test_load errno %d\n", errno); - goto close_prog; - } - for (i = 0; i < 4; i++) - if (CHECK_FAIL(pthread_create(&thread_id[i], NULL, - &spin_lock_thread, &prog_fd))) - goto close_prog; - - for (i = 0; i < 4; i++) - if (CHECK_FAIL(pthread_join(thread_id[i], &ret) || - ret != (void *)&prog_fd)) - goto close_prog; -close_prog: - bpf_object__close(obj); -} diff --git a/tools/testing/selftests/bpf/prog_tests/task_kfunc.c b/tools/testing/selftests/bpf/prog_tests/task_kfunc.c new file mode 100644 index 000000000000..f79fa5bc9a8d --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/task_kfunc.c @@ -0,0 +1,95 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */ + +#define _GNU_SOURCE +#include <sys/wait.h> +#include <test_progs.h> +#include <unistd.h> + +#include "task_kfunc_failure.skel.h" +#include "task_kfunc_success.skel.h" + +static struct task_kfunc_success *open_load_task_kfunc_skel(void) +{ + struct task_kfunc_success *skel; + int err; + + skel = task_kfunc_success__open(); + if (!ASSERT_OK_PTR(skel, "skel_open")) + return NULL; + + skel->bss->pid = getpid(); + + err = task_kfunc_success__load(skel); + if (!ASSERT_OK(err, "skel_load")) + goto cleanup; + + return skel; + +cleanup: + task_kfunc_success__destroy(skel); + return NULL; +} + +static void run_success_test(const char *prog_name) +{ + struct task_kfunc_success *skel; + int status; + pid_t child_pid; + struct bpf_program *prog; + struct bpf_link *link = NULL; + + skel = open_load_task_kfunc_skel(); + if (!ASSERT_OK_PTR(skel, "open_load_skel")) + return; + + if (!ASSERT_OK(skel->bss->err, "pre_spawn_err")) + goto cleanup; + + prog = bpf_object__find_program_by_name(skel->obj, prog_name); + if (!ASSERT_OK_PTR(prog, "bpf_object__find_program_by_name")) + goto cleanup; + + link = bpf_program__attach(prog); + if (!ASSERT_OK_PTR(link, "attached_link")) + goto cleanup; + + child_pid = fork(); + if (!ASSERT_GT(child_pid, -1, "child_pid")) + goto cleanup; + if (child_pid == 0) + _exit(0); + waitpid(child_pid, &status, 0); + + ASSERT_OK(skel->bss->err, "post_wait_err"); + +cleanup: + bpf_link__destroy(link); + task_kfunc_success__destroy(skel); +} + +static const char * const success_tests[] = { + "test_task_acquire_release_argument", + "test_task_acquire_release_current", + "test_task_acquire_leave_in_map", + "test_task_xchg_release", + "test_task_get_release", + "test_task_current_acquire_release", + "test_task_from_pid_arg", + "test_task_from_pid_current", + "test_task_from_pid_invalid", +}; + +void test_task_kfunc(void) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(success_tests); i++) { + if (!test__start_subtest(success_tests[i])) + continue; + + run_success_test(success_tests[i]); + } + + RUN_TESTS(task_kfunc_failure); +} diff --git a/tools/testing/selftests/bpf/prog_tests/task_local_storage.c b/tools/testing/selftests/bpf/prog_tests/task_local_storage.c index 035c263aab1b..ea8537c54413 100644 --- a/tools/testing/selftests/bpf/prog_tests/task_local_storage.c +++ b/tools/testing/selftests/bpf/prog_tests/task_local_storage.c @@ -3,12 +3,16 @@ #define _GNU_SOURCE /* See feature_test_macros(7) */ #include <unistd.h> +#include <sched.h> +#include <pthread.h> #include <sys/syscall.h> /* For SYS_xxx definitions */ #include <sys/types.h> #include <test_progs.h> +#include "task_local_storage_helpers.h" #include "task_local_storage.skel.h" #include "task_local_storage_exit_creds.skel.h" #include "task_ls_recursion.skel.h" +#include "task_storage_nodeadlock.skel.h" static void test_sys_enter_exit(void) { @@ -39,7 +43,8 @@ out: static void test_exit_creds(void) { struct task_local_storage_exit_creds *skel; - int err; + int err, run_count, sync_rcu_calls = 0; + const int MAX_SYNC_RCU_CALLS = 1000; skel = task_local_storage_exit_creds__open_and_load(); if (!ASSERT_OK_PTR(skel, "skel_open_and_load")) @@ -53,8 +58,19 @@ static void test_exit_creds(void) if (CHECK_FAIL(system("ls > /dev/null"))) goto out; - /* sync rcu to make sure exit_creds() is called for "ls" */ - kern_sync_rcu(); + /* kern_sync_rcu is not enough on its own as the read section we want + * to wait for may start after we enter synchronize_rcu, so our call + * won't wait for the section to finish. Loop on the run counter + * as well to ensure the program has run. + */ + do { + kern_sync_rcu(); + run_count = __atomic_load_n(&skel->bss->run_count, __ATOMIC_SEQ_CST); + } while (run_count == 0 && ++sync_rcu_calls < MAX_SYNC_RCU_CALLS); + + ASSERT_NEQ(sync_rcu_calls, MAX_SYNC_RCU_CALLS, + "sync_rcu count too high"); + ASSERT_NEQ(run_count, 0, "run_count"); ASSERT_EQ(skel->bss->valid_ptr_count, 0, "valid_ptr_count"); ASSERT_NEQ(skel->bss->null_ptr_count, 0, "null_ptr_count"); out: @@ -63,24 +79,160 @@ out: static void test_recursion(void) { + int err, map_fd, prog_fd, task_fd; struct task_ls_recursion *skel; - int err; + struct bpf_prog_info info; + __u32 info_len = sizeof(info); + long value; + + task_fd = sys_pidfd_open(getpid(), 0); + if (!ASSERT_NEQ(task_fd, -1, "sys_pidfd_open")) + return; skel = task_ls_recursion__open_and_load(); if (!ASSERT_OK_PTR(skel, "skel_open_and_load")) - return; + goto out; err = task_ls_recursion__attach(skel); if (!ASSERT_OK(err, "skel_attach")) goto out; /* trigger sys_enter, make sure it does not cause deadlock */ + skel->bss->test_pid = getpid(); syscall(SYS_gettid); + skel->bss->test_pid = 0; + task_ls_recursion__detach(skel); + + /* Refer to the comment in BPF_PROG(on_update) for + * the explanation on the value 201 and 100. + */ + map_fd = bpf_map__fd(skel->maps.map_a); + err = bpf_map_lookup_elem(map_fd, &task_fd, &value); + ASSERT_OK(err, "lookup map_a"); + ASSERT_EQ(value, 201, "map_a value"); + ASSERT_EQ(skel->bss->nr_del_errs, 1, "bpf_task_storage_delete busy"); + + map_fd = bpf_map__fd(skel->maps.map_b); + err = bpf_map_lookup_elem(map_fd, &task_fd, &value); + ASSERT_OK(err, "lookup map_b"); + ASSERT_EQ(value, 100, "map_b value"); + + prog_fd = bpf_program__fd(skel->progs.on_lookup); + memset(&info, 0, sizeof(info)); + err = bpf_prog_get_info_by_fd(prog_fd, &info, &info_len); + ASSERT_OK(err, "get prog info"); + ASSERT_GT(info.recursion_misses, 0, "on_lookup prog recursion"); + + prog_fd = bpf_program__fd(skel->progs.on_update); + memset(&info, 0, sizeof(info)); + err = bpf_prog_get_info_by_fd(prog_fd, &info, &info_len); + ASSERT_OK(err, "get prog info"); + ASSERT_EQ(info.recursion_misses, 0, "on_update prog recursion"); + + prog_fd = bpf_program__fd(skel->progs.on_enter); + memset(&info, 0, sizeof(info)); + err = bpf_prog_get_info_by_fd(prog_fd, &info, &info_len); + ASSERT_OK(err, "get prog info"); + ASSERT_EQ(info.recursion_misses, 0, "on_enter prog recursion"); out: + close(task_fd); task_ls_recursion__destroy(skel); } +static bool stop; + +static void waitall(const pthread_t *tids, int nr) +{ + int i; + + stop = true; + for (i = 0; i < nr; i++) + pthread_join(tids[i], NULL); +} + +static void *sock_create_loop(void *arg) +{ + struct task_storage_nodeadlock *skel = arg; + int fd; + + while (!stop) { + fd = socket(AF_INET, SOCK_STREAM, 0); + close(fd); + if (skel->bss->nr_get_errs || skel->bss->nr_del_errs) + stop = true; + } + + return NULL; +} + +static void test_nodeadlock(void) +{ + struct task_storage_nodeadlock *skel; + struct bpf_prog_info info = {}; + __u32 info_len = sizeof(info); + const int nr_threads = 32; + pthread_t tids[nr_threads]; + int i, prog_fd, err; + cpu_set_t old, new; + + /* Pin all threads to one cpu to increase the chance of preemption + * in a sleepable bpf prog. + */ + CPU_ZERO(&new); + CPU_SET(0, &new); + err = sched_getaffinity(getpid(), sizeof(old), &old); + if (!ASSERT_OK(err, "getaffinity")) + return; + err = sched_setaffinity(getpid(), sizeof(new), &new); + if (!ASSERT_OK(err, "setaffinity")) + return; + + skel = task_storage_nodeadlock__open_and_load(); + if (!ASSERT_OK_PTR(skel, "open_and_load")) + goto done; + + /* Unnecessary recursion and deadlock detection are reproducible + * in the preemptible kernel. + */ + if (!skel->kconfig->CONFIG_PREEMPT) { + test__skip(); + goto done; + } + + err = task_storage_nodeadlock__attach(skel); + ASSERT_OK(err, "attach prog"); + + for (i = 0; i < nr_threads; i++) { + err = pthread_create(&tids[i], NULL, sock_create_loop, skel); + if (err) { + /* Only assert once here to avoid excessive + * PASS printing during test failure. + */ + ASSERT_OK(err, "pthread_create"); + waitall(tids, i); + goto done; + } + } + + /* With 32 threads, 1s is enough to reproduce the issue */ + sleep(1); + waitall(tids, nr_threads); + + info_len = sizeof(info); + prog_fd = bpf_program__fd(skel->progs.socket_post_create); + err = bpf_prog_get_info_by_fd(prog_fd, &info, &info_len); + ASSERT_OK(err, "get prog info"); + ASSERT_EQ(info.recursion_misses, 0, "prog recursion"); + + ASSERT_EQ(skel->bss->nr_get_errs, 0, "bpf_task_storage_get busy"); + ASSERT_EQ(skel->bss->nr_del_errs, 0, "bpf_task_storage_delete busy"); + +done: + task_storage_nodeadlock__destroy(skel); + sched_setaffinity(getpid(), sizeof(old), &old); +} + void test_task_local_storage(void) { if (test__start_subtest("sys_enter_exit")) @@ -89,4 +241,6 @@ void test_task_local_storage(void) test_exit_creds(); if (test__start_subtest("recursion")) test_recursion(); + if (test__start_subtest("nodeadlock")) + test_nodeadlock(); } diff --git a/tools/testing/selftests/bpf/prog_tests/tc_bpf.c b/tools/testing/selftests/bpf/prog_tests/tc_bpf.c index 4a505a5adf4d..e873766276d1 100644 --- a/tools/testing/selftests/bpf/prog_tests/tc_bpf.c +++ b/tools/testing/selftests/bpf/prog_tests/tc_bpf.c @@ -29,8 +29,8 @@ static int test_tc_bpf_basic(const struct bpf_tc_hook *hook, int fd) __u32 info_len = sizeof(info); int ret; - ret = bpf_obj_get_info_by_fd(fd, &info, &info_len); - if (!ASSERT_OK(ret, "bpf_obj_get_info_by_fd")) + ret = bpf_prog_get_info_by_fd(fd, &info, &info_len); + if (!ASSERT_OK(ret, "bpf_prog_get_info_by_fd")) return ret; ret = bpf_tc_attach(hook, &opts); diff --git a/tools/testing/selftests/bpf/prog_tests/tc_redirect.c b/tools/testing/selftests/bpf/prog_tests/tc_redirect.c index cb6a53b3e023..bca5e6839ac4 100644 --- a/tools/testing/selftests/bpf/prog_tests/tc_redirect.c +++ b/tools/testing/selftests/bpf/prog_tests/tc_redirect.c @@ -11,12 +11,12 @@ */ #include <arpa/inet.h> -#include <linux/if.h> #include <linux/if_tun.h> #include <linux/limits.h> #include <linux/sysctl.h> #include <linux/time_types.h> #include <linux/net_tstamp.h> +#include <net/if.h> #include <stdbool.h> #include <stdio.h> #include <sys/stat.h> @@ -59,10 +59,6 @@ #define IFADDR_STR_LEN 18 #define PING_ARGS "-i 0.2 -c 3 -w 10 -q" -#define SRC_PROG_PIN_FILE "/sys/fs/bpf/test_tc_src" -#define DST_PROG_PIN_FILE "/sys/fs/bpf/test_tc_dst" -#define CHK_PROG_PIN_FILE "/sys/fs/bpf/test_tc_chk" - #define TIMEOUT_MILLIS 10000 #define NSEC_PER_SEC 1000000000ULL @@ -115,7 +111,9 @@ static void netns_setup_namespaces_nofail(const char *verb) } struct netns_setup_result { + int ifindex_veth_src; int ifindex_veth_src_fwd; + int ifindex_veth_dst; int ifindex_veth_dst_fwd; }; @@ -139,27 +137,6 @@ static int get_ifaddr(const char *name, char *ifaddr) return 0; } -static int get_ifindex(const char *name) -{ - char path[PATH_MAX]; - char buf[32]; - FILE *f; - int ret; - - snprintf(path, PATH_MAX, "/sys/class/net/%s/ifindex", name); - f = fopen(path, "r"); - if (!ASSERT_OK_PTR(f, path)) - return -1; - - ret = fread(buf, 1, sizeof(buf), f); - if (!ASSERT_GT(ret, 0, "fread ifindex")) { - fclose(f); - return -1; - } - fclose(f); - return atoi(buf); -} - #define SYS(fmt, ...) \ ({ \ char cmd[1024]; \ @@ -182,11 +159,20 @@ static int netns_setup_links_and_routes(struct netns_setup_result *result) if (get_ifaddr("veth_src_fwd", veth_src_fwd_addr)) goto fail; - result->ifindex_veth_src_fwd = get_ifindex("veth_src_fwd"); - if (result->ifindex_veth_src_fwd < 0) + result->ifindex_veth_src = if_nametoindex("veth_src"); + if (!ASSERT_GT(result->ifindex_veth_src, 0, "ifindex_veth_src")) + goto fail; + + result->ifindex_veth_src_fwd = if_nametoindex("veth_src_fwd"); + if (!ASSERT_GT(result->ifindex_veth_src_fwd, 0, "ifindex_veth_src_fwd")) goto fail; - result->ifindex_veth_dst_fwd = get_ifindex("veth_dst_fwd"); - if (result->ifindex_veth_dst_fwd < 0) + + result->ifindex_veth_dst = if_nametoindex("veth_dst"); + if (!ASSERT_GT(result->ifindex_veth_dst, 0, "ifindex_veth_dst")) + goto fail; + + result->ifindex_veth_dst_fwd = if_nametoindex("veth_dst_fwd"); + if (!ASSERT_GT(result->ifindex_veth_dst_fwd, 0, "ifindex_veth_dst_fwd")) goto fail; SYS("ip link set veth_src netns " NS_SRC); @@ -260,19 +246,78 @@ fail: return -1; } -static int netns_load_bpf(void) +static int qdisc_clsact_create(struct bpf_tc_hook *qdisc_hook, int ifindex) +{ + char err_str[128], ifname[16]; + int err; + + qdisc_hook->ifindex = ifindex; + qdisc_hook->attach_point = BPF_TC_INGRESS | BPF_TC_EGRESS; + err = bpf_tc_hook_create(qdisc_hook); + snprintf(err_str, sizeof(err_str), + "qdisc add dev %s clsact", + if_indextoname(qdisc_hook->ifindex, ifname) ? : "<unknown_iface>"); + err_str[sizeof(err_str) - 1] = 0; + ASSERT_OK(err, err_str); + + return err; +} + +static int xgress_filter_add(struct bpf_tc_hook *qdisc_hook, + enum bpf_tc_attach_point xgress, + const struct bpf_program *prog, int priority) +{ + LIBBPF_OPTS(bpf_tc_opts, tc_attach); + char err_str[128], ifname[16]; + int err; + + qdisc_hook->attach_point = xgress; + tc_attach.prog_fd = bpf_program__fd(prog); + tc_attach.priority = priority; + err = bpf_tc_attach(qdisc_hook, &tc_attach); + snprintf(err_str, sizeof(err_str), + "filter add dev %s %s prio %d bpf da %s", + if_indextoname(qdisc_hook->ifindex, ifname) ? : "<unknown_iface>", + xgress == BPF_TC_INGRESS ? "ingress" : "egress", + priority, bpf_program__name(prog)); + err_str[sizeof(err_str) - 1] = 0; + ASSERT_OK(err, err_str); + + return err; +} + +#define QDISC_CLSACT_CREATE(qdisc_hook, ifindex) ({ \ + if ((err = qdisc_clsact_create(qdisc_hook, ifindex))) \ + goto fail; \ +}) + +#define XGRESS_FILTER_ADD(qdisc_hook, xgress, prog, priority) ({ \ + if ((err = xgress_filter_add(qdisc_hook, xgress, prog, priority))) \ + goto fail; \ +}) + +static int netns_load_bpf(const struct bpf_program *src_prog, + const struct bpf_program *dst_prog, + const struct bpf_program *chk_prog, + const struct netns_setup_result *setup_result) { - SYS("tc qdisc add dev veth_src_fwd clsact"); - SYS("tc filter add dev veth_src_fwd ingress bpf da object-pinned " - SRC_PROG_PIN_FILE); - SYS("tc filter add dev veth_src_fwd egress bpf da object-pinned " - CHK_PROG_PIN_FILE); - - SYS("tc qdisc add dev veth_dst_fwd clsact"); - SYS("tc filter add dev veth_dst_fwd ingress bpf da object-pinned " - DST_PROG_PIN_FILE); - SYS("tc filter add dev veth_dst_fwd egress bpf da object-pinned " - CHK_PROG_PIN_FILE); + LIBBPF_OPTS(bpf_tc_hook, qdisc_veth_src_fwd); + LIBBPF_OPTS(bpf_tc_hook, qdisc_veth_dst_fwd); + int err; + + /* tc qdisc add dev veth_src_fwd clsact */ + QDISC_CLSACT_CREATE(&qdisc_veth_src_fwd, setup_result->ifindex_veth_src_fwd); + /* tc filter add dev veth_src_fwd ingress bpf da src_prog */ + XGRESS_FILTER_ADD(&qdisc_veth_src_fwd, BPF_TC_INGRESS, src_prog, 0); + /* tc filter add dev veth_src_fwd egress bpf da chk_prog */ + XGRESS_FILTER_ADD(&qdisc_veth_src_fwd, BPF_TC_EGRESS, chk_prog, 0); + + /* tc qdisc add dev veth_dst_fwd clsact */ + QDISC_CLSACT_CREATE(&qdisc_veth_dst_fwd, setup_result->ifindex_veth_dst_fwd); + /* tc filter add dev veth_dst_fwd ingress bpf da dst_prog */ + XGRESS_FILTER_ADD(&qdisc_veth_dst_fwd, BPF_TC_INGRESS, dst_prog, 0); + /* tc filter add dev veth_dst_fwd egress bpf da chk_prog */ + XGRESS_FILTER_ADD(&qdisc_veth_dst_fwd, BPF_TC_EGRESS, chk_prog, 0); return 0; fail: @@ -499,78 +544,79 @@ done: close(client_fd); } -static int netns_load_dtime_bpf(struct test_tc_dtime *skel) +static int netns_load_dtime_bpf(struct test_tc_dtime *skel, + const struct netns_setup_result *setup_result) { + LIBBPF_OPTS(bpf_tc_hook, qdisc_veth_src_fwd); + LIBBPF_OPTS(bpf_tc_hook, qdisc_veth_dst_fwd); + LIBBPF_OPTS(bpf_tc_hook, qdisc_veth_src); + LIBBPF_OPTS(bpf_tc_hook, qdisc_veth_dst); struct nstoken *nstoken; - -#define PIN_FNAME(__file) "/sys/fs/bpf/" #__file -#define PIN(__prog) ({ \ - int err = bpf_program__pin(skel->progs.__prog, PIN_FNAME(__prog)); \ - if (!ASSERT_OK(err, "pin " #__prog)) \ - goto fail; \ - }) + int err; /* setup ns_src tc progs */ nstoken = open_netns(NS_SRC); if (!ASSERT_OK_PTR(nstoken, "setns " NS_SRC)) return -1; - PIN(egress_host); - PIN(ingress_host); - SYS("tc qdisc add dev veth_src clsact"); - SYS("tc filter add dev veth_src ingress bpf da object-pinned " - PIN_FNAME(ingress_host)); - SYS("tc filter add dev veth_src egress bpf da object-pinned " - PIN_FNAME(egress_host)); + /* tc qdisc add dev veth_src clsact */ + QDISC_CLSACT_CREATE(&qdisc_veth_src, setup_result->ifindex_veth_src); + /* tc filter add dev veth_src ingress bpf da ingress_host */ + XGRESS_FILTER_ADD(&qdisc_veth_src, BPF_TC_INGRESS, skel->progs.ingress_host, 0); + /* tc filter add dev veth_src egress bpf da egress_host */ + XGRESS_FILTER_ADD(&qdisc_veth_src, BPF_TC_EGRESS, skel->progs.egress_host, 0); close_netns(nstoken); /* setup ns_dst tc progs */ nstoken = open_netns(NS_DST); if (!ASSERT_OK_PTR(nstoken, "setns " NS_DST)) return -1; - PIN(egress_host); - PIN(ingress_host); - SYS("tc qdisc add dev veth_dst clsact"); - SYS("tc filter add dev veth_dst ingress bpf da object-pinned " - PIN_FNAME(ingress_host)); - SYS("tc filter add dev veth_dst egress bpf da object-pinned " - PIN_FNAME(egress_host)); + /* tc qdisc add dev veth_dst clsact */ + QDISC_CLSACT_CREATE(&qdisc_veth_dst, setup_result->ifindex_veth_dst); + /* tc filter add dev veth_dst ingress bpf da ingress_host */ + XGRESS_FILTER_ADD(&qdisc_veth_dst, BPF_TC_INGRESS, skel->progs.ingress_host, 0); + /* tc filter add dev veth_dst egress bpf da egress_host */ + XGRESS_FILTER_ADD(&qdisc_veth_dst, BPF_TC_EGRESS, skel->progs.egress_host, 0); close_netns(nstoken); /* setup ns_fwd tc progs */ nstoken = open_netns(NS_FWD); if (!ASSERT_OK_PTR(nstoken, "setns " NS_FWD)) return -1; - PIN(ingress_fwdns_prio100); - PIN(egress_fwdns_prio100); - PIN(ingress_fwdns_prio101); - PIN(egress_fwdns_prio101); - SYS("tc qdisc add dev veth_dst_fwd clsact"); - SYS("tc filter add dev veth_dst_fwd ingress prio 100 bpf da object-pinned " - PIN_FNAME(ingress_fwdns_prio100)); - SYS("tc filter add dev veth_dst_fwd ingress prio 101 bpf da object-pinned " - PIN_FNAME(ingress_fwdns_prio101)); - SYS("tc filter add dev veth_dst_fwd egress prio 100 bpf da object-pinned " - PIN_FNAME(egress_fwdns_prio100)); - SYS("tc filter add dev veth_dst_fwd egress prio 101 bpf da object-pinned " - PIN_FNAME(egress_fwdns_prio101)); - SYS("tc qdisc add dev veth_src_fwd clsact"); - SYS("tc filter add dev veth_src_fwd ingress prio 100 bpf da object-pinned " - PIN_FNAME(ingress_fwdns_prio100)); - SYS("tc filter add dev veth_src_fwd ingress prio 101 bpf da object-pinned " - PIN_FNAME(ingress_fwdns_prio101)); - SYS("tc filter add dev veth_src_fwd egress prio 100 bpf da object-pinned " - PIN_FNAME(egress_fwdns_prio100)); - SYS("tc filter add dev veth_src_fwd egress prio 101 bpf da object-pinned " - PIN_FNAME(egress_fwdns_prio101)); + /* tc qdisc add dev veth_dst_fwd clsact */ + QDISC_CLSACT_CREATE(&qdisc_veth_dst_fwd, setup_result->ifindex_veth_dst_fwd); + /* tc filter add dev veth_dst_fwd ingress prio 100 bpf da ingress_fwdns_prio100 */ + XGRESS_FILTER_ADD(&qdisc_veth_dst_fwd, BPF_TC_INGRESS, + skel->progs.ingress_fwdns_prio100, 100); + /* tc filter add dev veth_dst_fwd ingress prio 101 bpf da ingress_fwdns_prio101 */ + XGRESS_FILTER_ADD(&qdisc_veth_dst_fwd, BPF_TC_INGRESS, + skel->progs.ingress_fwdns_prio101, 101); + /* tc filter add dev veth_dst_fwd egress prio 100 bpf da egress_fwdns_prio100 */ + XGRESS_FILTER_ADD(&qdisc_veth_dst_fwd, BPF_TC_EGRESS, + skel->progs.egress_fwdns_prio100, 100); + /* tc filter add dev veth_dst_fwd egress prio 101 bpf da egress_fwdns_prio101 */ + XGRESS_FILTER_ADD(&qdisc_veth_dst_fwd, BPF_TC_EGRESS, + skel->progs.egress_fwdns_prio101, 101); + + /* tc qdisc add dev veth_src_fwd clsact */ + QDISC_CLSACT_CREATE(&qdisc_veth_src_fwd, setup_result->ifindex_veth_src_fwd); + /* tc filter add dev veth_src_fwd ingress prio 100 bpf da ingress_fwdns_prio100 */ + XGRESS_FILTER_ADD(&qdisc_veth_src_fwd, BPF_TC_INGRESS, + skel->progs.ingress_fwdns_prio100, 100); + /* tc filter add dev veth_src_fwd ingress prio 101 bpf da ingress_fwdns_prio101 */ + XGRESS_FILTER_ADD(&qdisc_veth_src_fwd, BPF_TC_INGRESS, + skel->progs.ingress_fwdns_prio101, 101); + /* tc filter add dev veth_src_fwd egress prio 100 bpf da egress_fwdns_prio100 */ + XGRESS_FILTER_ADD(&qdisc_veth_src_fwd, BPF_TC_EGRESS, + skel->progs.egress_fwdns_prio100, 100); + /* tc filter add dev veth_src_fwd egress prio 101 bpf da egress_fwdns_prio101 */ + XGRESS_FILTER_ADD(&qdisc_veth_src_fwd, BPF_TC_EGRESS, + skel->progs.egress_fwdns_prio101, 101); close_netns(nstoken); - -#undef PIN - return 0; fail: close_netns(nstoken); - return -1; + return err; } enum { @@ -746,7 +792,7 @@ static void test_tc_redirect_dtime(struct netns_setup_result *setup_result) if (!ASSERT_OK(err, "test_tc_dtime__load")) goto done; - if (netns_load_dtime_bpf(skel)) + if (netns_load_dtime_bpf(skel, setup_result)) goto done; nstoken = open_netns(NS_FWD); @@ -788,7 +834,6 @@ static void test_tc_redirect_neigh_fib(struct netns_setup_result *setup_result) { struct nstoken *nstoken = NULL; struct test_tc_neigh_fib *skel = NULL; - int err; nstoken = open_netns(NS_FWD); if (!ASSERT_OK_PTR(nstoken, "setns fwd")) @@ -801,19 +846,8 @@ static void test_tc_redirect_neigh_fib(struct netns_setup_result *setup_result) if (!ASSERT_OK(test_tc_neigh_fib__load(skel), "test_tc_neigh_fib__load")) goto done; - err = bpf_program__pin(skel->progs.tc_src, SRC_PROG_PIN_FILE); - if (!ASSERT_OK(err, "pin " SRC_PROG_PIN_FILE)) - goto done; - - err = bpf_program__pin(skel->progs.tc_chk, CHK_PROG_PIN_FILE); - if (!ASSERT_OK(err, "pin " CHK_PROG_PIN_FILE)) - goto done; - - err = bpf_program__pin(skel->progs.tc_dst, DST_PROG_PIN_FILE); - if (!ASSERT_OK(err, "pin " DST_PROG_PIN_FILE)) - goto done; - - if (netns_load_bpf()) + if (netns_load_bpf(skel->progs.tc_src, skel->progs.tc_dst, + skel->progs.tc_chk, setup_result)) goto done; /* bpf_fib_lookup() checks if forwarding is enabled */ @@ -849,19 +883,8 @@ static void test_tc_redirect_neigh(struct netns_setup_result *setup_result) if (!ASSERT_OK(err, "test_tc_neigh__load")) goto done; - err = bpf_program__pin(skel->progs.tc_src, SRC_PROG_PIN_FILE); - if (!ASSERT_OK(err, "pin " SRC_PROG_PIN_FILE)) - goto done; - - err = bpf_program__pin(skel->progs.tc_chk, CHK_PROG_PIN_FILE); - if (!ASSERT_OK(err, "pin " CHK_PROG_PIN_FILE)) - goto done; - - err = bpf_program__pin(skel->progs.tc_dst, DST_PROG_PIN_FILE); - if (!ASSERT_OK(err, "pin " DST_PROG_PIN_FILE)) - goto done; - - if (netns_load_bpf()) + if (netns_load_bpf(skel->progs.tc_src, skel->progs.tc_dst, + skel->progs.tc_chk, setup_result)) goto done; if (!ASSERT_OK(set_forwarding(false), "disable forwarding")) @@ -896,19 +919,8 @@ static void test_tc_redirect_peer(struct netns_setup_result *setup_result) if (!ASSERT_OK(err, "test_tc_peer__load")) goto done; - err = bpf_program__pin(skel->progs.tc_src, SRC_PROG_PIN_FILE); - if (!ASSERT_OK(err, "pin " SRC_PROG_PIN_FILE)) - goto done; - - err = bpf_program__pin(skel->progs.tc_chk, CHK_PROG_PIN_FILE); - if (!ASSERT_OK(err, "pin " CHK_PROG_PIN_FILE)) - goto done; - - err = bpf_program__pin(skel->progs.tc_dst, DST_PROG_PIN_FILE); - if (!ASSERT_OK(err, "pin " DST_PROG_PIN_FILE)) - goto done; - - if (netns_load_bpf()) + if (netns_load_bpf(skel->progs.tc_src, skel->progs.tc_dst, + skel->progs.tc_chk, setup_result)) goto done; if (!ASSERT_OK(set_forwarding(false), "disable forwarding")) @@ -991,6 +1003,8 @@ static int tun_relay_loop(int src_fd, int target_fd) static void test_tc_redirect_peer_l3(struct netns_setup_result *setup_result) { + LIBBPF_OPTS(bpf_tc_hook, qdisc_tun_fwd); + LIBBPF_OPTS(bpf_tc_hook, qdisc_veth_dst_fwd); struct test_tc_peer *skel = NULL; struct nstoken *nstoken = NULL; int err; @@ -1034,8 +1048,8 @@ static void test_tc_redirect_peer_l3(struct netns_setup_result *setup_result) if (!ASSERT_OK_PTR(skel, "test_tc_peer__open")) goto fail; - ifindex = get_ifindex("tun_fwd"); - if (!ASSERT_GE(ifindex, 0, "get_ifindex tun_fwd")) + ifindex = if_nametoindex("tun_fwd"); + if (!ASSERT_GT(ifindex, 0, "if_indextoname tun_fwd")) goto fail; skel->rodata->IFINDEX_SRC = ifindex; @@ -1045,31 +1059,21 @@ static void test_tc_redirect_peer_l3(struct netns_setup_result *setup_result) if (!ASSERT_OK(err, "test_tc_peer__load")) goto fail; - err = bpf_program__pin(skel->progs.tc_src_l3, SRC_PROG_PIN_FILE); - if (!ASSERT_OK(err, "pin " SRC_PROG_PIN_FILE)) - goto fail; - - err = bpf_program__pin(skel->progs.tc_dst_l3, DST_PROG_PIN_FILE); - if (!ASSERT_OK(err, "pin " DST_PROG_PIN_FILE)) - goto fail; - - err = bpf_program__pin(skel->progs.tc_chk, CHK_PROG_PIN_FILE); - if (!ASSERT_OK(err, "pin " CHK_PROG_PIN_FILE)) - goto fail; - /* Load "tc_src_l3" to the tun_fwd interface to redirect packets * towards dst, and "tc_dst" to redirect packets * and "tc_chk" on veth_dst_fwd to drop non-redirected packets. */ - SYS("tc qdisc add dev tun_fwd clsact"); - SYS("tc filter add dev tun_fwd ingress bpf da object-pinned " - SRC_PROG_PIN_FILE); - - SYS("tc qdisc add dev veth_dst_fwd clsact"); - SYS("tc filter add dev veth_dst_fwd ingress bpf da object-pinned " - DST_PROG_PIN_FILE); - SYS("tc filter add dev veth_dst_fwd egress bpf da object-pinned " - CHK_PROG_PIN_FILE); + /* tc qdisc add dev tun_fwd clsact */ + QDISC_CLSACT_CREATE(&qdisc_tun_fwd, ifindex); + /* tc filter add dev tun_fwd ingress bpf da tc_src_l3 */ + XGRESS_FILTER_ADD(&qdisc_tun_fwd, BPF_TC_INGRESS, skel->progs.tc_src_l3, 0); + + /* tc qdisc add dev veth_dst_fwd clsact */ + QDISC_CLSACT_CREATE(&qdisc_veth_dst_fwd, setup_result->ifindex_veth_dst_fwd); + /* tc filter add dev veth_dst_fwd ingress bpf da tc_dst_l3 */ + XGRESS_FILTER_ADD(&qdisc_veth_dst_fwd, BPF_TC_INGRESS, skel->progs.tc_dst_l3, 0); + /* tc filter add dev veth_dst_fwd egress bpf da tc_chk */ + XGRESS_FILTER_ADD(&qdisc_veth_dst_fwd, BPF_TC_EGRESS, skel->progs.tc_chk, 0); /* Setup route and neigh tables */ SYS("ip -netns " NS_SRC " addr add dev tun_src " IP4_TUN_SRC "/24"); @@ -1134,7 +1138,7 @@ static void *test_tc_redirect_run_tests(void *arg) return NULL; } -void serial_test_tc_redirect(void) +void test_tc_redirect(void) { pthread_t test_thread; int err; diff --git a/tools/testing/selftests/bpf/prog_tests/tcp_hdr_options.c b/tools/testing/selftests/bpf/prog_tests/tcp_hdr_options.c index 617bbce6ef8f..5cf85d0f9827 100644 --- a/tools/testing/selftests/bpf/prog_tests/tcp_hdr_options.c +++ b/tools/testing/selftests/bpf/prog_tests/tcp_hdr_options.c @@ -485,7 +485,7 @@ static void misc(void) goto check_linum; ret = read(sk_fds.passive_fd, recv_msg, sizeof(recv_msg)); - if (ASSERT_EQ(ret, sizeof(send_msg), "read(msg)")) + if (!ASSERT_EQ(ret, sizeof(send_msg), "read(msg)")) goto check_linum; } @@ -505,6 +505,8 @@ static void misc(void) ASSERT_EQ(misc_skel->bss->nr_fin, 1, "unexpected nr_fin"); + ASSERT_EQ(misc_skel->bss->nr_hwtstamp, 0, "nr_hwtstamp"); + check_linum: ASSERT_FALSE(check_error_linum(&sk_fds), "check_error_linum"); sk_fds_close(&sk_fds); @@ -539,7 +541,7 @@ void test_tcp_hdr_options(void) goto skel_destroy; cg_fd = test__join_cgroup(CG_NAME); - if (ASSERT_GE(cg_fd, 0, "join_cgroup")) + if (!ASSERT_GE(cg_fd, 0, "join_cgroup")) goto skel_destroy; for (i = 0; i < ARRAY_SIZE(tests); i++) { diff --git a/tools/testing/selftests/bpf/prog_tests/test_bpf_syscall_macro.c b/tools/testing/selftests/bpf/prog_tests/test_bpf_syscall_macro.c index c381faaae741..2900c5e9a016 100644 --- a/tools/testing/selftests/bpf/prog_tests/test_bpf_syscall_macro.c +++ b/tools/testing/selftests/bpf/prog_tests/test_bpf_syscall_macro.c @@ -1,5 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 /* Copyright 2022 Sony Group Corporation */ +#define _GNU_SOURCE +#include <fcntl.h> #include <sys/prctl.h> #include <test_progs.h> #include "bpf_syscall_macro.skel.h" @@ -13,6 +15,8 @@ void test_bpf_syscall_macro(void) unsigned long exp_arg3 = 13; unsigned long exp_arg4 = 14; unsigned long exp_arg5 = 15; + loff_t off_in, off_out; + ssize_t r; /* check whether it can open program */ skel = bpf_syscall_macro__open(); @@ -33,6 +37,7 @@ void test_bpf_syscall_macro(void) /* check whether args of syscall are copied correctly */ prctl(exp_arg1, exp_arg2, exp_arg3, exp_arg4, exp_arg5); + #if defined(__aarch64__) || defined(__s390__) ASSERT_NEQ(skel->bss->arg1, exp_arg1, "syscall_arg1"); #else @@ -68,6 +73,18 @@ void test_bpf_syscall_macro(void) ASSERT_EQ(skel->bss->arg4_syscall, exp_arg4, "BPF_KPROBE_SYSCALL_arg4"); ASSERT_EQ(skel->bss->arg5_syscall, exp_arg5, "BPF_KPROBE_SYSCALL_arg5"); + r = splice(-42, &off_in, 42, &off_out, 0x12340000, SPLICE_F_NONBLOCK); + err = -errno; + ASSERT_EQ(r, -1, "splice_res"); + ASSERT_EQ(err, -EBADF, "splice_err"); + + ASSERT_EQ(skel->bss->splice_fd_in, -42, "splice_arg1"); + ASSERT_EQ(skel->bss->splice_off_in, (__u64)&off_in, "splice_arg2"); + ASSERT_EQ(skel->bss->splice_fd_out, 42, "splice_arg3"); + ASSERT_EQ(skel->bss->splice_off_out, (__u64)&off_out, "splice_arg4"); + ASSERT_EQ(skel->bss->splice_len, 0x12340000, "splice_arg5"); + ASSERT_EQ(skel->bss->splice_flags, SPLICE_F_NONBLOCK, "splice_arg6"); + cleanup: bpf_syscall_macro__destroy(skel); } diff --git a/tools/testing/selftests/bpf/prog_tests/test_global_funcs.c b/tools/testing/selftests/bpf/prog_tests/test_global_funcs.c index 7295cc60f724..e0879df38639 100644 --- a/tools/testing/selftests/bpf/prog_tests/test_global_funcs.c +++ b/tools/testing/selftests/bpf/prog_tests/test_global_funcs.c @@ -1,104 +1,43 @@ // SPDX-License-Identifier: GPL-2.0 /* Copyright (c) 2020 Facebook */ #include <test_progs.h> - -const char *err_str; -bool found; - -static int libbpf_debug_print(enum libbpf_print_level level, - const char *format, va_list args) -{ - char *log_buf; - - if (level != LIBBPF_WARN || - strcmp(format, "libbpf: \n%s\n")) { - vprintf(format, args); - return 0; - } - - log_buf = va_arg(args, char *); - if (!log_buf) - goto out; - if (err_str && strstr(log_buf, err_str) == 0) - found = true; -out: - printf(format, log_buf); - return 0; -} - -extern int extra_prog_load_log_flags; - -static int check_load(const char *file) -{ - struct bpf_object *obj = NULL; - struct bpf_program *prog; - int err; - - found = false; - - obj = bpf_object__open_file(file, NULL); - err = libbpf_get_error(obj); - if (err) - return err; - - prog = bpf_object__next_program(obj, NULL); - if (!prog) { - err = -ENOENT; - goto err_out; - } - - bpf_program__set_flags(prog, BPF_F_TEST_RND_HI32); - bpf_program__set_log_level(prog, extra_prog_load_log_flags); - - err = bpf_object__load(obj); - -err_out: - bpf_object__close(obj); - return err; -} - -struct test_def { - const char *file; - const char *err_str; -}; +#include "test_global_func1.skel.h" +#include "test_global_func2.skel.h" +#include "test_global_func3.skel.h" +#include "test_global_func4.skel.h" +#include "test_global_func5.skel.h" +#include "test_global_func6.skel.h" +#include "test_global_func7.skel.h" +#include "test_global_func8.skel.h" +#include "test_global_func9.skel.h" +#include "test_global_func10.skel.h" +#include "test_global_func11.skel.h" +#include "test_global_func12.skel.h" +#include "test_global_func13.skel.h" +#include "test_global_func14.skel.h" +#include "test_global_func15.skel.h" +#include "test_global_func16.skel.h" +#include "test_global_func17.skel.h" +#include "test_global_func_ctx_args.skel.h" void test_test_global_funcs(void) { - struct test_def tests[] = { - { "test_global_func1.bpf.o", "combined stack size of 4 calls is 544" }, - { "test_global_func2.bpf.o" }, - { "test_global_func3.bpf.o", "the call stack of 8 frames" }, - { "test_global_func4.bpf.o" }, - { "test_global_func5.bpf.o", "expected pointer to ctx, but got PTR" }, - { "test_global_func6.bpf.o", "modified ctx ptr R2" }, - { "test_global_func7.bpf.o", "foo() doesn't return scalar" }, - { "test_global_func8.bpf.o" }, - { "test_global_func9.bpf.o" }, - { "test_global_func10.bpf.o", "invalid indirect read from stack" }, - { "test_global_func11.bpf.o", "Caller passes invalid args into func#1" }, - { "test_global_func12.bpf.o", "invalid mem access 'mem_or_null'" }, - { "test_global_func13.bpf.o", "Caller passes invalid args into func#1" }, - { "test_global_func14.bpf.o", "reference type('FWD S') size cannot be determined" }, - { "test_global_func15.bpf.o", "At program exit the register R0 has value" }, - { "test_global_func16.bpf.o", "invalid indirect read from stack" }, - { "test_global_func17.bpf.o", "Caller passes invalid args into func#1" }, - }; - libbpf_print_fn_t old_print_fn = NULL; - int err, i, duration = 0; - - old_print_fn = libbpf_set_print(libbpf_debug_print); - - for (i = 0; i < ARRAY_SIZE(tests); i++) { - const struct test_def *test = &tests[i]; - - if (!test__start_subtest(test->file)) - continue; - - err_str = test->err_str; - err = check_load(test->file); - CHECK_FAIL(!!err ^ !!err_str); - if (err_str) - CHECK(found, "", "expected string '%s'", err_str); - } - libbpf_set_print(old_print_fn); + RUN_TESTS(test_global_func1); + RUN_TESTS(test_global_func2); + RUN_TESTS(test_global_func3); + RUN_TESTS(test_global_func4); + RUN_TESTS(test_global_func5); + RUN_TESTS(test_global_func6); + RUN_TESTS(test_global_func7); + RUN_TESTS(test_global_func8); + RUN_TESTS(test_global_func9); + RUN_TESTS(test_global_func10); + RUN_TESTS(test_global_func11); + RUN_TESTS(test_global_func12); + RUN_TESTS(test_global_func13); + RUN_TESTS(test_global_func14); + RUN_TESTS(test_global_func15); + RUN_TESTS(test_global_func16); + RUN_TESTS(test_global_func17); + RUN_TESTS(test_global_func_ctx_args); } diff --git a/tools/testing/selftests/bpf/prog_tests/test_lsm.c b/tools/testing/selftests/bpf/prog_tests/test_lsm.c index 244c01125126..16175d579bc7 100644 --- a/tools/testing/selftests/bpf/prog_tests/test_lsm.c +++ b/tools/testing/selftests/bpf/prog_tests/test_lsm.c @@ -75,7 +75,8 @@ static int test_lsm(struct lsm *skel) skel->bss->monitored_pid = getpid(); err = stack_mprotect(); - if (!ASSERT_EQ(errno, EPERM, "stack_mprotect")) + if (!ASSERT_EQ(err, -1, "stack_mprotect") || + !ASSERT_EQ(errno, EPERM, "stack_mprotect")) return err; ASSERT_EQ(skel->bss->mprotect_count, 1, "mprotect_count"); diff --git a/tools/testing/selftests/bpf/prog_tests/test_tunnel.c b/tools/testing/selftests/bpf/prog_tests/test_tunnel.c index eea274110267..07ad457f3370 100644 --- a/tools/testing/selftests/bpf/prog_tests/test_tunnel.c +++ b/tools/testing/selftests/bpf/prog_tests/test_tunnel.c @@ -421,7 +421,7 @@ static void *test_tunnel_run_tests(void *arg) return NULL; } -void serial_test_tunnel(void) +void test_tunnel(void) { pthread_t test_thread; int err; diff --git a/tools/testing/selftests/bpf/prog_tests/tp_attach_query.c b/tools/testing/selftests/bpf/prog_tests/tp_attach_query.c index a479080533db..770fcc3bb1ba 100644 --- a/tools/testing/selftests/bpf/prog_tests/tp_attach_query.c +++ b/tools/testing/selftests/bpf/prog_tests/tp_attach_query.c @@ -45,8 +45,9 @@ void serial_test_tp_attach_query(void) prog_info.xlated_prog_len = 0; prog_info.nr_map_ids = 0; info_len = sizeof(prog_info); - err = bpf_obj_get_info_by_fd(prog_fd[i], &prog_info, &info_len); - if (CHECK(err, "bpf_obj_get_info_by_fd", "err %d errno %d\n", + err = bpf_prog_get_info_by_fd(prog_fd[i], &prog_info, + &info_len); + if (CHECK(err, "bpf_prog_get_info_by_fd", "err %d errno %d\n", err, errno)) goto cleanup1; saved_prog_ids[i] = prog_info.id; diff --git a/tools/testing/selftests/bpf/prog_tests/tracing_struct.c b/tools/testing/selftests/bpf/prog_tests/tracing_struct.c index d5022b91d1e4..48dc9472e160 100644 --- a/tools/testing/selftests/bpf/prog_tests/tracing_struct.c +++ b/tools/testing/selftests/bpf/prog_tests/tracing_struct.c @@ -15,7 +15,7 @@ static void test_fentry(void) err = tracing_struct__attach(skel); if (!ASSERT_OK(err, "tracing_struct__attach")) - return; + goto destroy_skel; ASSERT_OK(trigger_module_test_read(256), "trigger_read"); @@ -54,6 +54,7 @@ static void test_fentry(void) ASSERT_EQ(skel->bss->t5_ret, 1, "t5 ret"); tracing_struct__detach(skel); +destroy_skel: tracing_struct__destroy(skel); } diff --git a/tools/testing/selftests/bpf/prog_tests/trampoline_count.c b/tools/testing/selftests/bpf/prog_tests/trampoline_count.c index 564b75bc087f..e91d0d1769f1 100644 --- a/tools/testing/selftests/bpf/prog_tests/trampoline_count.c +++ b/tools/testing/selftests/bpf/prog_tests/trampoline_count.c @@ -2,8 +2,6 @@ #define _GNU_SOURCE #include <test_progs.h> -#define MAX_TRAMP_PROGS 38 - struct inst { struct bpf_object *obj; struct bpf_link *link; @@ -37,14 +35,21 @@ void serial_test_trampoline_count(void) { char *file = "test_trampoline_count.bpf.o"; char *const progs[] = { "fentry_test", "fmod_ret_test", "fexit_test" }; - struct inst inst[MAX_TRAMP_PROGS + 1] = {}; + int bpf_max_tramp_links, err, i, prog_fd; struct bpf_program *prog; struct bpf_link *link; - int prog_fd, err, i; + struct inst *inst; LIBBPF_OPTS(bpf_test_run_opts, opts); + bpf_max_tramp_links = get_bpf_max_tramp_links(); + if (!ASSERT_GE(bpf_max_tramp_links, 1, "bpf_max_tramp_links")) + return; + inst = calloc(bpf_max_tramp_links + 1, sizeof(*inst)); + if (!ASSERT_OK_PTR(inst, "inst")) + return; + /* attach 'allowed' trampoline programs */ - for (i = 0; i < MAX_TRAMP_PROGS; i++) { + for (i = 0; i < bpf_max_tramp_links; i++) { prog = load_prog(file, progs[i % ARRAY_SIZE(progs)], &inst[i]); if (!prog) goto cleanup; @@ -74,7 +79,7 @@ void serial_test_trampoline_count(void) if (!ASSERT_EQ(link, NULL, "ptr_is_null")) goto cleanup; - /* and finaly execute the probe */ + /* and finally execute the probe */ prog_fd = bpf_program__fd(prog); if (!ASSERT_GE(prog_fd, 0, "bpf_program__fd")) goto cleanup; @@ -91,4 +96,5 @@ cleanup: bpf_link__destroy(inst[i].link); bpf_object__close(inst[i].obj); } + free(inst); } diff --git a/tools/testing/selftests/bpf/prog_tests/type_cast.c b/tools/testing/selftests/bpf/prog_tests/type_cast.c new file mode 100644 index 000000000000..9317d5fa2635 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/type_cast.c @@ -0,0 +1,114 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */ +#include <test_progs.h> +#include <network_helpers.h> +#include "type_cast.skel.h" + +static void test_xdp(void) +{ + struct type_cast *skel; + int err, prog_fd; + char buf[128]; + + LIBBPF_OPTS(bpf_test_run_opts, topts, + .data_in = &pkt_v4, + .data_size_in = sizeof(pkt_v4), + .data_out = buf, + .data_size_out = sizeof(buf), + .repeat = 1, + ); + + skel = type_cast__open(); + if (!ASSERT_OK_PTR(skel, "skel_open")) + return; + + bpf_program__set_autoload(skel->progs.md_xdp, true); + err = type_cast__load(skel); + if (!ASSERT_OK(err, "skel_load")) + goto out; + + prog_fd = bpf_program__fd(skel->progs.md_xdp); + err = bpf_prog_test_run_opts(prog_fd, &topts); + ASSERT_OK(err, "test_run"); + ASSERT_EQ(topts.retval, XDP_PASS, "xdp test_run retval"); + + ASSERT_EQ(skel->bss->ifindex, 1, "xdp_md ifindex"); + ASSERT_EQ(skel->bss->ifindex, skel->bss->ingress_ifindex, "xdp_md ingress_ifindex"); + ASSERT_STREQ(skel->bss->name, "lo", "xdp_md name"); + ASSERT_NEQ(skel->bss->inum, 0, "xdp_md inum"); + +out: + type_cast__destroy(skel); +} + +static void test_tc(void) +{ + struct type_cast *skel; + int err, prog_fd; + + LIBBPF_OPTS(bpf_test_run_opts, topts, + .data_in = &pkt_v4, + .data_size_in = sizeof(pkt_v4), + .repeat = 1, + ); + + skel = type_cast__open(); + if (!ASSERT_OK_PTR(skel, "skel_open")) + return; + + bpf_program__set_autoload(skel->progs.md_skb, true); + err = type_cast__load(skel); + if (!ASSERT_OK(err, "skel_load")) + goto out; + + prog_fd = bpf_program__fd(skel->progs.md_skb); + err = bpf_prog_test_run_opts(prog_fd, &topts); + ASSERT_OK(err, "test_run"); + ASSERT_EQ(topts.retval, 0, "tc test_run retval"); + + ASSERT_EQ(skel->bss->meta_len, 0, "skb meta_len"); + ASSERT_EQ(skel->bss->frag0_len, 0, "skb frag0_len"); + ASSERT_NEQ(skel->bss->kskb_len, 0, "skb len"); + ASSERT_NEQ(skel->bss->kskb2_len, 0, "skb2 len"); + ASSERT_EQ(skel->bss->kskb_len, skel->bss->kskb2_len, "skb len compare"); + +out: + type_cast__destroy(skel); +} + +static const char * const negative_tests[] = { + "untrusted_ptr", + "kctx_u64", +}; + +static void test_negative(void) +{ + struct bpf_program *prog; + struct type_cast *skel; + int i, err; + + for (i = 0; i < ARRAY_SIZE(negative_tests); i++) { + skel = type_cast__open(); + if (!ASSERT_OK_PTR(skel, "skel_open")) + return; + + prog = bpf_object__find_program_by_name(skel->obj, negative_tests[i]); + if (!ASSERT_OK_PTR(prog, "bpf_object__find_program_by_name")) + goto out; + bpf_program__set_autoload(prog, true); + err = type_cast__load(skel); + ASSERT_ERR(err, "skel_load"); +out: + type_cast__destroy(skel); + } +} + +void test_type_cast(void) +{ + if (test__start_subtest("xdp")) + test_xdp(); + if (test__start_subtest("tc")) + test_tc(); + if (test__start_subtest("negative")) + test_negative(); +} diff --git a/tools/testing/selftests/bpf/prog_tests/unpriv_bpf_disabled.c b/tools/testing/selftests/bpf/prog_tests/unpriv_bpf_disabled.c index 1ed3cc2092db..8383a99f610f 100644 --- a/tools/testing/selftests/bpf/prog_tests/unpriv_bpf_disabled.c +++ b/tools/testing/selftests/bpf/prog_tests/unpriv_bpf_disabled.c @@ -179,7 +179,7 @@ static void test_unpriv_bpf_disabled_negative(struct test_unpriv_bpf_disabled *s ASSERT_EQ(bpf_prog_get_next_id(prog_id, &next), -EPERM, "prog_get_next_id_fails"); ASSERT_EQ(bpf_prog_get_next_id(0, &next), -EPERM, "prog_get_next_id_fails"); - if (ASSERT_OK(bpf_obj_get_info_by_fd(map_fds[0], &map_info, &map_info_len), + if (ASSERT_OK(bpf_map_get_info_by_fd(map_fds[0], &map_info, &map_info_len), "obj_get_info_by_fd")) { ASSERT_EQ(bpf_map_get_fd_by_id(map_info.id), -EPERM, "map_get_fd_by_id_fails"); ASSERT_EQ(bpf_map_get_next_id(map_info.id, &next), -EPERM, @@ -187,8 +187,8 @@ static void test_unpriv_bpf_disabled_negative(struct test_unpriv_bpf_disabled *s } ASSERT_EQ(bpf_map_get_next_id(0, &next), -EPERM, "map_get_next_id_fails"); - if (ASSERT_OK(bpf_obj_get_info_by_fd(bpf_link__fd(skel->links.sys_nanosleep_enter), - &link_info, &link_info_len), + if (ASSERT_OK(bpf_link_get_info_by_fd(bpf_link__fd(skel->links.sys_nanosleep_enter), + &link_info, &link_info_len), "obj_get_info_by_fd")) { ASSERT_EQ(bpf_link_get_fd_by_id(link_info.id), -EPERM, "link_get_fd_by_id_fails"); ASSERT_EQ(bpf_link_get_next_id(link_info.id, &next), -EPERM, @@ -269,7 +269,7 @@ void test_unpriv_bpf_disabled(void) } prog_fd = bpf_program__fd(skel->progs.sys_nanosleep_enter); - ASSERT_OK(bpf_obj_get_info_by_fd(prog_fd, &prog_info, &prog_info_len), + ASSERT_OK(bpf_prog_get_info_by_fd(prog_fd, &prog_info, &prog_info_len), "obj_get_info_by_fd"); prog_id = prog_info.id; ASSERT_GT(prog_id, 0, "valid_prog_id"); diff --git a/tools/testing/selftests/bpf/prog_tests/uprobe_autoattach.c b/tools/testing/selftests/bpf/prog_tests/uprobe_autoattach.c index 35b87c7ba5be..6558c857e620 100644 --- a/tools/testing/selftests/bpf/prog_tests/uprobe_autoattach.c +++ b/tools/testing/selftests/bpf/prog_tests/uprobe_autoattach.c @@ -3,20 +3,23 @@ #include <test_progs.h> #include "test_uprobe_autoattach.skel.h" +#include "progs/bpf_misc.h" /* uprobe attach point */ -static noinline int autoattach_trigger_func(int arg) +static noinline int autoattach_trigger_func(int arg1, int arg2, int arg3, + int arg4, int arg5, int arg6, + int arg7, int arg8) { asm volatile (""); - return arg + 1; + return arg1 + arg2 + arg3 + arg4 + arg5 + arg6 + arg7 + arg8 + 1; } void test_uprobe_autoattach(void) { + const char *devnull_str = "/dev/null"; struct test_uprobe_autoattach *skel; - int trigger_val = 100, trigger_ret; - size_t malloc_sz = 1; - char *mem; + int trigger_ret; + FILE *devnull; skel = test_uprobe_autoattach__open_and_load(); if (!ASSERT_OK_PTR(skel, "skel_open")) @@ -28,23 +31,45 @@ void test_uprobe_autoattach(void) skel->bss->test_pid = getpid(); /* trigger & validate uprobe & uretprobe */ - trigger_ret = autoattach_trigger_func(trigger_val); + trigger_ret = autoattach_trigger_func(1, 2, 3, 4, 5, 6, 7, 8); skel->bss->test_pid = getpid(); /* trigger & validate shared library u[ret]probes attached by name */ - mem = malloc(malloc_sz); + devnull = fopen(devnull_str, "r"); - ASSERT_EQ(skel->bss->uprobe_byname_parm1, trigger_val, "check_uprobe_byname_parm1"); + ASSERT_EQ(skel->bss->uprobe_byname_parm1, 1, "check_uprobe_byname_parm1"); ASSERT_EQ(skel->bss->uprobe_byname_ran, 1, "check_uprobe_byname_ran"); ASSERT_EQ(skel->bss->uretprobe_byname_rc, trigger_ret, "check_uretprobe_byname_rc"); + ASSERT_EQ(skel->bss->uretprobe_byname_ret, trigger_ret, "check_uretprobe_byname_ret"); ASSERT_EQ(skel->bss->uretprobe_byname_ran, 2, "check_uretprobe_byname_ran"); - ASSERT_EQ(skel->bss->uprobe_byname2_parm1, malloc_sz, "check_uprobe_byname2_parm1"); + ASSERT_EQ(skel->bss->uprobe_byname2_parm1, (__u64)(long)devnull_str, + "check_uprobe_byname2_parm1"); ASSERT_EQ(skel->bss->uprobe_byname2_ran, 3, "check_uprobe_byname2_ran"); - ASSERT_EQ(skel->bss->uretprobe_byname2_rc, mem, "check_uretprobe_byname2_rc"); + ASSERT_EQ(skel->bss->uretprobe_byname2_rc, (__u64)(long)devnull, + "check_uretprobe_byname2_rc"); ASSERT_EQ(skel->bss->uretprobe_byname2_ran, 4, "check_uretprobe_byname2_ran"); - free(mem); + ASSERT_EQ(skel->bss->a[0], 1, "arg1"); + ASSERT_EQ(skel->bss->a[1], 2, "arg2"); + ASSERT_EQ(skel->bss->a[2], 3, "arg3"); +#if FUNC_REG_ARG_CNT > 3 + ASSERT_EQ(skel->bss->a[3], 4, "arg4"); +#endif +#if FUNC_REG_ARG_CNT > 4 + ASSERT_EQ(skel->bss->a[4], 5, "arg5"); +#endif +#if FUNC_REG_ARG_CNT > 5 + ASSERT_EQ(skel->bss->a[5], 6, "arg6"); +#endif +#if FUNC_REG_ARG_CNT > 6 + ASSERT_EQ(skel->bss->a[6], 7, "arg7"); +#endif +#if FUNC_REG_ARG_CNT > 7 + ASSERT_EQ(skel->bss->a[7], 8, "arg8"); +#endif + + fclose(devnull); cleanup: test_uprobe_autoattach__destroy(skel); } diff --git a/tools/testing/selftests/bpf/prog_tests/usdt.c b/tools/testing/selftests/bpf/prog_tests/usdt.c index 9ad9da0f215e..56ed1eb9b527 100644 --- a/tools/testing/selftests/bpf/prog_tests/usdt.c +++ b/tools/testing/selftests/bpf/prog_tests/usdt.c @@ -314,6 +314,7 @@ static FILE *urand_spawn(int *pid) if (fscanf(f, "%d", pid) != 1) { pclose(f); + errno = EINVAL; return NULL; } diff --git a/tools/testing/selftests/bpf/prog_tests/user_ringbuf.c b/tools/testing/selftests/bpf/prog_tests/user_ringbuf.c index 02b18d018b36..3a13e102c149 100644 --- a/tools/testing/selftests/bpf/prog_tests/user_ringbuf.c +++ b/tools/testing/selftests/bpf/prog_tests/user_ringbuf.c @@ -19,8 +19,6 @@ #include "../progs/test_user_ringbuf.h" -static size_t log_buf_sz = 1 << 20; /* 1 MB */ -static char obj_log_buf[1048576]; static const long c_sample_size = sizeof(struct sample) + BPF_RINGBUF_HDR_SZ; static const long c_ringbuf_size = 1 << 12; /* 1 small page */ static const long c_max_entries = c_ringbuf_size / c_sample_size; @@ -663,21 +661,6 @@ cleanup: user_ringbuf_success__destroy(skel); } -static struct { - const char *prog_name; - const char *expected_err_msg; -} failure_tests[] = { - /* failure cases */ - {"user_ringbuf_callback_bad_access1", "negative offset dynptr_ptr ptr"}, - {"user_ringbuf_callback_bad_access2", "dereference of modified dynptr_ptr ptr"}, - {"user_ringbuf_callback_write_forbidden", "invalid mem access 'dynptr_ptr'"}, - {"user_ringbuf_callback_null_context_write", "invalid mem access 'scalar'"}, - {"user_ringbuf_callback_null_context_read", "invalid mem access 'scalar'"}, - {"user_ringbuf_callback_discard_dynptr", "arg 1 is an unacquired reference"}, - {"user_ringbuf_callback_submit_dynptr", "arg 1 is an unacquired reference"}, - {"user_ringbuf_callback_invalid_return", "At callback return the register R0 has value"}, -}; - #define SUCCESS_TEST(_func) { _func, #_func } static struct { @@ -698,42 +681,6 @@ static struct { SUCCESS_TEST(test_user_ringbuf_blocking_reserve), }; -static void verify_fail(const char *prog_name, const char *expected_err_msg) -{ - LIBBPF_OPTS(bpf_object_open_opts, opts); - struct bpf_program *prog; - struct user_ringbuf_fail *skel; - int err; - - opts.kernel_log_buf = obj_log_buf; - opts.kernel_log_size = log_buf_sz; - opts.kernel_log_level = 1; - - skel = user_ringbuf_fail__open_opts(&opts); - if (!ASSERT_OK_PTR(skel, "dynptr_fail__open_opts")) - goto cleanup; - - prog = bpf_object__find_program_by_name(skel->obj, prog_name); - if (!ASSERT_OK_PTR(prog, "bpf_object__find_program_by_name")) - goto cleanup; - - bpf_program__set_autoload(prog, true); - - bpf_map__set_max_entries(skel->maps.user_ringbuf, getpagesize()); - - err = user_ringbuf_fail__load(skel); - if (!ASSERT_ERR(err, "unexpected load success")) - goto cleanup; - - if (!ASSERT_OK_PTR(strstr(obj_log_buf, expected_err_msg), "expected_err_msg")) { - fprintf(stderr, "Expected err_msg: %s\n", expected_err_msg); - fprintf(stderr, "Verifier output: %s\n", obj_log_buf); - } - -cleanup: - user_ringbuf_fail__destroy(skel); -} - void test_user_ringbuf(void) { int i; @@ -745,10 +692,5 @@ void test_user_ringbuf(void) success_tests[i].test_callback(); } - for (i = 0; i < ARRAY_SIZE(failure_tests); i++) { - if (!test__start_subtest(failure_tests[i].prog_name)) - continue; - - verify_fail(failure_tests[i].prog_name, failure_tests[i].expected_err_msg); - } + RUN_TESTS(user_ringbuf_fail); } diff --git a/tools/testing/selftests/bpf/prog_tests/verif_stats.c b/tools/testing/selftests/bpf/prog_tests/verif_stats.c index a47e7c0e1ffd..af4b95f57ac1 100644 --- a/tools/testing/selftests/bpf/prog_tests/verif_stats.c +++ b/tools/testing/selftests/bpf/prog_tests/verif_stats.c @@ -16,8 +16,9 @@ void test_verif_stats(void) if (!ASSERT_OK_PTR(skel, "trace_vprintk__open_and_load")) goto cleanup; - err = bpf_obj_get_info_by_fd(skel->progs.sys_enter.prog_fd, &info, &len); - if (!ASSERT_OK(err, "bpf_obj_get_info_by_fd")) + err = bpf_prog_get_info_by_fd(skel->progs.sys_enter.prog_fd, + &info, &len); + if (!ASSERT_OK(err, "bpf_prog_get_info_by_fd")) goto cleanup; if (!ASSERT_GT(info.verified_insns, 0, "verified_insns")) diff --git a/tools/testing/selftests/bpf/prog_tests/verify_pkcs7_sig.c b/tools/testing/selftests/bpf/prog_tests/verify_pkcs7_sig.c index 579d6ee83ce0..dd7f2bc70048 100644 --- a/tools/testing/selftests/bpf/prog_tests/verify_pkcs7_sig.c +++ b/tools/testing/selftests/bpf/prog_tests/verify_pkcs7_sig.c @@ -61,6 +61,9 @@ static bool kfunc_not_supported; static int libbpf_print_cb(enum libbpf_print_level level, const char *fmt, va_list args) { + if (level == LIBBPF_WARN) + vprintf(fmt, args); + if (strcmp(fmt, "libbpf: extern (func ksym) '%s': not found in kernel or module BTFs\n")) return 0; diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_adjust_tail.c b/tools/testing/selftests/bpf/prog_tests/xdp_adjust_tail.c index 9b9cf8458adf..f09505f8b038 100644 --- a/tools/testing/selftests/bpf/prog_tests/xdp_adjust_tail.c +++ b/tools/testing/selftests/bpf/prog_tests/xdp_adjust_tail.c @@ -18,7 +18,7 @@ static void test_xdp_adjust_tail_shrink(void) ); err = bpf_prog_test_load(file, BPF_PROG_TYPE_XDP, &obj, &prog_fd); - if (ASSERT_OK(err, "test_xdp_adjust_tail_shrink")) + if (!ASSERT_OK(err, "test_xdp_adjust_tail_shrink")) return; err = bpf_prog_test_run_opts(prog_fd, &topts); @@ -53,7 +53,7 @@ static void test_xdp_adjust_tail_grow(void) ); err = bpf_prog_test_load(file, BPF_PROG_TYPE_XDP, &obj, &prog_fd); - if (ASSERT_OK(err, "test_xdp_adjust_tail_grow")) + if (!ASSERT_OK(err, "test_xdp_adjust_tail_grow")) return; err = bpf_prog_test_run_opts(prog_fd, &topts); @@ -63,6 +63,7 @@ static void test_xdp_adjust_tail_grow(void) expect_sz = sizeof(pkt_v6) + 40; /* Test grow with 40 bytes */ topts.data_in = &pkt_v6; topts.data_size_in = sizeof(pkt_v6); + topts.data_size_out = sizeof(buf); err = bpf_prog_test_run_opts(prog_fd, &topts); ASSERT_OK(err, "ipv6"); ASSERT_EQ(topts.retval, XDP_TX, "ipv6 retval"); @@ -75,10 +76,15 @@ static void test_xdp_adjust_tail_grow2(void) { const char *file = "./test_xdp_adjust_tail_grow.bpf.o"; char buf[4096]; /* avoid segfault: large buf to hold grow results */ - int tailroom = 320; /* SKB_DATA_ALIGN(sizeof(struct skb_shared_info))*/; struct bpf_object *obj; int err, cnt, i; int max_grow, prog_fd; + /* SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) */ +#if defined(__s390x__) + int tailroom = 512; +#else + int tailroom = 320; +#endif LIBBPF_OPTS(bpf_test_run_opts, tattr, .repeat = 1, @@ -89,7 +95,7 @@ static void test_xdp_adjust_tail_grow2(void) ); err = bpf_prog_test_load(file, BPF_PROG_TYPE_XDP, &obj, &prog_fd); - if (ASSERT_OK(err, "test_xdp_adjust_tail_grow")) + if (!ASSERT_OK(err, "test_xdp_adjust_tail_grow")) return; /* Test case-64 */ diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_attach.c b/tools/testing/selftests/bpf/prog_tests/xdp_attach.c index 062fbc8c8e5e..d4cd9f873c14 100644 --- a/tools/testing/selftests/bpf/prog_tests/xdp_attach.c +++ b/tools/testing/selftests/bpf/prog_tests/xdp_attach.c @@ -18,7 +18,7 @@ void serial_test_xdp_attach(void) err = bpf_prog_test_load(file, BPF_PROG_TYPE_XDP, &obj1, &fd1); if (CHECK_FAIL(err)) return; - err = bpf_obj_get_info_by_fd(fd1, &info, &len); + err = bpf_prog_get_info_by_fd(fd1, &info, &len); if (CHECK_FAIL(err)) goto out_1; id1 = info.id; @@ -28,7 +28,7 @@ void serial_test_xdp_attach(void) goto out_1; memset(&info, 0, sizeof(info)); - err = bpf_obj_get_info_by_fd(fd2, &info, &len); + err = bpf_prog_get_info_by_fd(fd2, &info, &len); if (CHECK_FAIL(err)) goto out_2; id2 = info.id; diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_cpumap_attach.c b/tools/testing/selftests/bpf/prog_tests/xdp_cpumap_attach.c index f775a1613833..481626a875d1 100644 --- a/tools/testing/selftests/bpf/prog_tests/xdp_cpumap_attach.c +++ b/tools/testing/selftests/bpf/prog_tests/xdp_cpumap_attach.c @@ -33,8 +33,8 @@ static void test_xdp_with_cpumap_helpers(void) prog_fd = bpf_program__fd(skel->progs.xdp_dummy_cm); map_fd = bpf_map__fd(skel->maps.cpu_map); - err = bpf_obj_get_info_by_fd(prog_fd, &info, &len); - if (!ASSERT_OK(err, "bpf_obj_get_info_by_fd")) + err = bpf_prog_get_info_by_fd(prog_fd, &info, &len); + if (!ASSERT_OK(err, "bpf_prog_get_info_by_fd")) goto out_close; val.bpf_prog.fd = prog_fd; @@ -85,8 +85,8 @@ static void test_xdp_with_cpumap_frags_helpers(void) frags_prog_fd = bpf_program__fd(skel->progs.xdp_dummy_cm_frags); map_fd = bpf_map__fd(skel->maps.cpu_map); - err = bpf_obj_get_info_by_fd(frags_prog_fd, &info, &len); - if (!ASSERT_OK(err, "bpf_obj_get_info_by_fd")) + err = bpf_prog_get_info_by_fd(frags_prog_fd, &info, &len); + if (!ASSERT_OK(err, "bpf_prog_get_info_by_fd")) goto out_close; val.bpf_prog.fd = frags_prog_fd; diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_devmap_attach.c b/tools/testing/selftests/bpf/prog_tests/xdp_devmap_attach.c index ead40016c324..ce6812558287 100644 --- a/tools/testing/selftests/bpf/prog_tests/xdp_devmap_attach.c +++ b/tools/testing/selftests/bpf/prog_tests/xdp_devmap_attach.c @@ -35,8 +35,8 @@ static void test_xdp_with_devmap_helpers(void) dm_fd = bpf_program__fd(skel->progs.xdp_dummy_dm); map_fd = bpf_map__fd(skel->maps.dm_ports); - err = bpf_obj_get_info_by_fd(dm_fd, &info, &len); - if (!ASSERT_OK(err, "bpf_obj_get_info_by_fd")) + err = bpf_prog_get_info_by_fd(dm_fd, &info, &len); + if (!ASSERT_OK(err, "bpf_prog_get_info_by_fd")) goto out_close; val.bpf_prog.fd = dm_fd; @@ -98,8 +98,8 @@ static void test_xdp_with_devmap_frags_helpers(void) dm_fd_frags = bpf_program__fd(skel->progs.xdp_dummy_dm_frags); map_fd = bpf_map__fd(skel->maps.dm_ports); - err = bpf_obj_get_info_by_fd(dm_fd_frags, &info, &len); - if (!ASSERT_OK(err, "bpf_obj_get_info_by_fd")) + err = bpf_prog_get_info_by_fd(dm_fd_frags, &info, &len); + if (!ASSERT_OK(err, "bpf_prog_get_info_by_fd")) goto out_close; val.bpf_prog.fd = dm_fd_frags; diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_do_redirect.c b/tools/testing/selftests/bpf/prog_tests/xdp_do_redirect.c index a50971c6cf4a..2666c84dbd01 100644 --- a/tools/testing/selftests/bpf/prog_tests/xdp_do_redirect.c +++ b/tools/testing/selftests/bpf/prog_tests/xdp_do_redirect.c @@ -4,10 +4,12 @@ #include <net/if.h> #include <linux/if_ether.h> #include <linux/if_packet.h> +#include <linux/if_link.h> #include <linux/ipv6.h> #include <linux/in6.h> #include <linux/udp.h> #include <bpf/bpf_endian.h> +#include <uapi/linux/netdev.h> #include "test_xdp_do_redirect.skel.h" #define SYS(fmt, ...) \ @@ -65,7 +67,11 @@ static int attach_tc_prog(struct bpf_tc_hook *hook, int fd) /* The maximum permissible size is: PAGE_SIZE - sizeof(struct xdp_page_head) - * sizeof(struct skb_shared_info) - XDP_PACKET_HEADROOM = 3368 bytes */ +#if defined(__s390x__) +#define MAX_PKT_SIZE 3176 +#else #define MAX_PKT_SIZE 3368 +#endif static void test_max_pkt_size(int fd) { char data[MAX_PKT_SIZE + 1] = {}; @@ -92,7 +98,7 @@ void test_xdp_do_redirect(void) struct test_xdp_do_redirect *skel = NULL; struct nstoken *nstoken = NULL; struct bpf_link *link; - + LIBBPF_OPTS(bpf_xdp_query_opts, query_opts); struct xdp_md ctx_in = { .data = sizeof(__u32), .data_end = sizeof(data) }; DECLARE_LIBBPF_OPTS(bpf_test_run_opts, opts, @@ -153,6 +159,29 @@ void test_xdp_do_redirect(void) !ASSERT_NEQ(ifindex_dst, 0, "ifindex_dst")) goto out; + /* Check xdp features supported by veth driver */ + err = bpf_xdp_query(ifindex_src, XDP_FLAGS_DRV_MODE, &query_opts); + if (!ASSERT_OK(err, "veth_src bpf_xdp_query")) + goto out; + + if (!ASSERT_EQ(query_opts.feature_flags, + NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_REDIRECT | + NETDEV_XDP_ACT_NDO_XMIT | NETDEV_XDP_ACT_RX_SG | + NETDEV_XDP_ACT_NDO_XMIT_SG, + "veth_src query_opts.feature_flags")) + goto out; + + err = bpf_xdp_query(ifindex_dst, XDP_FLAGS_DRV_MODE, &query_opts); + if (!ASSERT_OK(err, "veth_dst bpf_xdp_query")) + goto out; + + if (!ASSERT_EQ(query_opts.feature_flags, + NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_REDIRECT | + NETDEV_XDP_ACT_NDO_XMIT | NETDEV_XDP_ACT_RX_SG | + NETDEV_XDP_ACT_NDO_XMIT_SG, + "veth_dst query_opts.feature_flags")) + goto out; + memcpy(skel->rodata->expect_dst, &pkt_udp.eth.h_dest, ETH_ALEN); skel->rodata->ifindex_out = ifindex_src; /* redirect back to the same iface */ skel->rodata->ifindex_in = ifindex_src; diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_info.c b/tools/testing/selftests/bpf/prog_tests/xdp_info.c index cd3aa340e65e..1dbddcab87a8 100644 --- a/tools/testing/selftests/bpf/prog_tests/xdp_info.c +++ b/tools/testing/selftests/bpf/prog_tests/xdp_info.c @@ -8,6 +8,7 @@ void serial_test_xdp_info(void) { __u32 len = sizeof(struct bpf_prog_info), duration = 0, prog_id; const char *file = "./xdp_dummy.bpf.o"; + LIBBPF_OPTS(bpf_xdp_query_opts, opts); struct bpf_prog_info info = {}; struct bpf_object *obj; int err, prog_fd; @@ -33,7 +34,7 @@ void serial_test_xdp_info(void) if (CHECK_FAIL(err)) return; - err = bpf_obj_get_info_by_fd(prog_fd, &info, &len); + err = bpf_prog_get_info_by_fd(prog_fd, &info, &len); if (CHECK(err, "get_prog_info", "errno=%d\n", errno)) goto out_close; @@ -61,6 +62,13 @@ void serial_test_xdp_info(void) if (CHECK(prog_id, "prog_id_drv", "unexpected prog_id=%u\n", prog_id)) goto out; + /* Check xdp features supported by lo device */ + opts.feature_flags = ~0; + err = bpf_xdp_query(IFINDEX_LO, XDP_FLAGS_DRV_MODE, &opts); + if (!ASSERT_OK(err, "bpf_xdp_query")) + goto out; + + ASSERT_EQ(opts.feature_flags, 0, "opts.feature_flags"); out: bpf_xdp_detach(IFINDEX_LO, 0, NULL); out_close: diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_link.c b/tools/testing/selftests/bpf/prog_tests/xdp_link.c index 3e9d5c5521f0..e7e9f3c22edf 100644 --- a/tools/testing/selftests/bpf/prog_tests/xdp_link.c +++ b/tools/testing/selftests/bpf/prog_tests/xdp_link.c @@ -29,13 +29,13 @@ void serial_test_xdp_link(void) prog_fd2 = bpf_program__fd(skel2->progs.xdp_handler); memset(&prog_info, 0, sizeof(prog_info)); - err = bpf_obj_get_info_by_fd(prog_fd1, &prog_info, &prog_info_len); + err = bpf_prog_get_info_by_fd(prog_fd1, &prog_info, &prog_info_len); if (!ASSERT_OK(err, "fd_info1")) goto cleanup; id1 = prog_info.id; memset(&prog_info, 0, sizeof(prog_info)); - err = bpf_obj_get_info_by_fd(prog_fd2, &prog_info, &prog_info_len); + err = bpf_prog_get_info_by_fd(prog_fd2, &prog_info, &prog_info_len); if (!ASSERT_OK(err, "fd_info2")) goto cleanup; id2 = prog_info.id; @@ -119,7 +119,8 @@ void serial_test_xdp_link(void) goto cleanup; memset(&link_info, 0, sizeof(link_info)); - err = bpf_obj_get_info_by_fd(bpf_link__fd(link), &link_info, &link_info_len); + err = bpf_link_get_info_by_fd(bpf_link__fd(link), + &link_info, &link_info_len); if (!ASSERT_OK(err, "link_info")) goto cleanup; @@ -137,7 +138,8 @@ void serial_test_xdp_link(void) goto cleanup; memset(&link_info, 0, sizeof(link_info)); - err = bpf_obj_get_info_by_fd(bpf_link__fd(link), &link_info, &link_info_len); + err = bpf_link_get_info_by_fd(bpf_link__fd(link), + &link_info, &link_info_len); ASSERT_OK(err, "link_info"); ASSERT_EQ(link_info.prog_id, id1, "link_prog_id"); diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_metadata.c b/tools/testing/selftests/bpf/prog_tests/xdp_metadata.c new file mode 100644 index 000000000000..aa4beae99f4f --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/xdp_metadata.c @@ -0,0 +1,409 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <test_progs.h> +#include <network_helpers.h> +#include "xdp_metadata.skel.h" +#include "xdp_metadata2.skel.h" +#include "xdp_metadata.h" +#include "xsk.h" + +#include <bpf/btf.h> +#include <linux/errqueue.h> +#include <linux/if_link.h> +#include <linux/net_tstamp.h> +#include <linux/udp.h> +#include <sys/mman.h> +#include <net/if.h> +#include <poll.h> + +#define TX_NAME "veTX" +#define RX_NAME "veRX" + +#define UDP_PAYLOAD_BYTES 4 + +#define AF_XDP_SOURCE_PORT 1234 +#define AF_XDP_CONSUMER_PORT 8080 + +#define UMEM_NUM 16 +#define UMEM_FRAME_SIZE XSK_UMEM__DEFAULT_FRAME_SIZE +#define UMEM_SIZE (UMEM_FRAME_SIZE * UMEM_NUM) +#define XDP_FLAGS XDP_FLAGS_DRV_MODE +#define QUEUE_ID 0 + +#define TX_ADDR "10.0.0.1" +#define RX_ADDR "10.0.0.2" +#define PREFIX_LEN "8" +#define FAMILY AF_INET + +#define SYS(cmd) ({ \ + if (!ASSERT_OK(system(cmd), (cmd))) \ + goto out; \ +}) + +struct xsk { + void *umem_area; + struct xsk_umem *umem; + struct xsk_ring_prod fill; + struct xsk_ring_cons comp; + struct xsk_ring_prod tx; + struct xsk_ring_cons rx; + struct xsk_socket *socket; +}; + +static int open_xsk(int ifindex, struct xsk *xsk) +{ + int mmap_flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE; + const struct xsk_socket_config socket_config = { + .rx_size = XSK_RING_PROD__DEFAULT_NUM_DESCS, + .tx_size = XSK_RING_PROD__DEFAULT_NUM_DESCS, + .bind_flags = XDP_COPY, + }; + const struct xsk_umem_config umem_config = { + .fill_size = XSK_RING_PROD__DEFAULT_NUM_DESCS, + .comp_size = XSK_RING_CONS__DEFAULT_NUM_DESCS, + .frame_size = XSK_UMEM__DEFAULT_FRAME_SIZE, + .flags = XDP_UMEM_UNALIGNED_CHUNK_FLAG, + }; + __u32 idx; + u64 addr; + int ret; + int i; + + xsk->umem_area = mmap(NULL, UMEM_SIZE, PROT_READ | PROT_WRITE, mmap_flags, -1, 0); + if (!ASSERT_NEQ(xsk->umem_area, MAP_FAILED, "mmap")) + return -1; + + ret = xsk_umem__create(&xsk->umem, + xsk->umem_area, UMEM_SIZE, + &xsk->fill, + &xsk->comp, + &umem_config); + if (!ASSERT_OK(ret, "xsk_umem__create")) + return ret; + + ret = xsk_socket__create(&xsk->socket, ifindex, QUEUE_ID, + xsk->umem, + &xsk->rx, + &xsk->tx, + &socket_config); + if (!ASSERT_OK(ret, "xsk_socket__create")) + return ret; + + /* First half of umem is for TX. This way address matches 1-to-1 + * to the completion queue index. + */ + + for (i = 0; i < UMEM_NUM / 2; i++) { + addr = i * UMEM_FRAME_SIZE; + printf("%p: tx_desc[%d] -> %lx\n", xsk, i, addr); + } + + /* Second half of umem is for RX. */ + + ret = xsk_ring_prod__reserve(&xsk->fill, UMEM_NUM / 2, &idx); + if (!ASSERT_EQ(UMEM_NUM / 2, ret, "xsk_ring_prod__reserve")) + return ret; + if (!ASSERT_EQ(idx, 0, "fill idx != 0")) + return -1; + + for (i = 0; i < UMEM_NUM / 2; i++) { + addr = (UMEM_NUM / 2 + i) * UMEM_FRAME_SIZE; + printf("%p: rx_desc[%d] -> %lx\n", xsk, i, addr); + *xsk_ring_prod__fill_addr(&xsk->fill, i) = addr; + } + xsk_ring_prod__submit(&xsk->fill, ret); + + return 0; +} + +static void close_xsk(struct xsk *xsk) +{ + if (xsk->umem) + xsk_umem__delete(xsk->umem); + if (xsk->socket) + xsk_socket__delete(xsk->socket); + munmap(xsk->umem_area, UMEM_SIZE); +} + +static void ip_csum(struct iphdr *iph) +{ + __u32 sum = 0; + __u16 *p; + int i; + + iph->check = 0; + p = (void *)iph; + for (i = 0; i < sizeof(*iph) / sizeof(*p); i++) + sum += p[i]; + + while (sum >> 16) + sum = (sum & 0xffff) + (sum >> 16); + + iph->check = ~sum; +} + +static int generate_packet(struct xsk *xsk, __u16 dst_port) +{ + struct xdp_desc *tx_desc; + struct udphdr *udph; + struct ethhdr *eth; + struct iphdr *iph; + void *data; + __u32 idx; + int ret; + + ret = xsk_ring_prod__reserve(&xsk->tx, 1, &idx); + if (!ASSERT_EQ(ret, 1, "xsk_ring_prod__reserve")) + return -1; + + tx_desc = xsk_ring_prod__tx_desc(&xsk->tx, idx); + tx_desc->addr = idx % (UMEM_NUM / 2) * UMEM_FRAME_SIZE; + printf("%p: tx_desc[%u]->addr=%llx\n", xsk, idx, tx_desc->addr); + data = xsk_umem__get_data(xsk->umem_area, tx_desc->addr); + + eth = data; + iph = (void *)(eth + 1); + udph = (void *)(iph + 1); + + memcpy(eth->h_dest, "\x00\x00\x00\x00\x00\x02", ETH_ALEN); + memcpy(eth->h_source, "\x00\x00\x00\x00\x00\x01", ETH_ALEN); + eth->h_proto = htons(ETH_P_IP); + + iph->version = 0x4; + iph->ihl = 0x5; + iph->tos = 0x9; + iph->tot_len = htons(sizeof(*iph) + sizeof(*udph) + UDP_PAYLOAD_BYTES); + iph->id = 0; + iph->frag_off = 0; + iph->ttl = 0; + iph->protocol = IPPROTO_UDP; + ASSERT_EQ(inet_pton(FAMILY, TX_ADDR, &iph->saddr), 1, "inet_pton(TX_ADDR)"); + ASSERT_EQ(inet_pton(FAMILY, RX_ADDR, &iph->daddr), 1, "inet_pton(RX_ADDR)"); + ip_csum(iph); + + udph->source = htons(AF_XDP_SOURCE_PORT); + udph->dest = htons(dst_port); + udph->len = htons(sizeof(*udph) + UDP_PAYLOAD_BYTES); + udph->check = 0; + + memset(udph + 1, 0xAA, UDP_PAYLOAD_BYTES); + + tx_desc->len = sizeof(*eth) + sizeof(*iph) + sizeof(*udph) + UDP_PAYLOAD_BYTES; + xsk_ring_prod__submit(&xsk->tx, 1); + + ret = sendto(xsk_socket__fd(xsk->socket), NULL, 0, MSG_DONTWAIT, NULL, 0); + if (!ASSERT_GE(ret, 0, "sendto")) + return ret; + + return 0; +} + +static void complete_tx(struct xsk *xsk) +{ + __u32 idx; + __u64 addr; + + if (ASSERT_EQ(xsk_ring_cons__peek(&xsk->comp, 1, &idx), 1, "xsk_ring_cons__peek")) { + addr = *xsk_ring_cons__comp_addr(&xsk->comp, idx); + + printf("%p: complete tx idx=%u addr=%llx\n", xsk, idx, addr); + xsk_ring_cons__release(&xsk->comp, 1); + } +} + +static void refill_rx(struct xsk *xsk, __u64 addr) +{ + __u32 idx; + + if (ASSERT_EQ(xsk_ring_prod__reserve(&xsk->fill, 1, &idx), 1, "xsk_ring_prod__reserve")) { + printf("%p: complete idx=%u addr=%llx\n", xsk, idx, addr); + *xsk_ring_prod__fill_addr(&xsk->fill, idx) = addr; + xsk_ring_prod__submit(&xsk->fill, 1); + } +} + +static int verify_xsk_metadata(struct xsk *xsk) +{ + const struct xdp_desc *rx_desc; + struct pollfd fds = {}; + struct xdp_meta *meta; + struct ethhdr *eth; + struct iphdr *iph; + __u64 comp_addr; + void *data; + __u64 addr; + __u32 idx; + int ret; + + ret = recvfrom(xsk_socket__fd(xsk->socket), NULL, 0, MSG_DONTWAIT, NULL, NULL); + if (!ASSERT_EQ(ret, 0, "recvfrom")) + return -1; + + fds.fd = xsk_socket__fd(xsk->socket); + fds.events = POLLIN; + + ret = poll(&fds, 1, 1000); + if (!ASSERT_GT(ret, 0, "poll")) + return -1; + + ret = xsk_ring_cons__peek(&xsk->rx, 1, &idx); + if (!ASSERT_EQ(ret, 1, "xsk_ring_cons__peek")) + return -2; + + rx_desc = xsk_ring_cons__rx_desc(&xsk->rx, idx); + comp_addr = xsk_umem__extract_addr(rx_desc->addr); + addr = xsk_umem__add_offset_to_addr(rx_desc->addr); + printf("%p: rx_desc[%u]->addr=%llx addr=%llx comp_addr=%llx\n", + xsk, idx, rx_desc->addr, addr, comp_addr); + data = xsk_umem__get_data(xsk->umem_area, addr); + + /* Make sure we got the packet offset correctly. */ + + eth = data; + ASSERT_EQ(eth->h_proto, htons(ETH_P_IP), "eth->h_proto"); + iph = (void *)(eth + 1); + ASSERT_EQ((int)iph->version, 4, "iph->version"); + + /* custom metadata */ + + meta = data - sizeof(struct xdp_meta); + + if (!ASSERT_NEQ(meta->rx_timestamp, 0, "rx_timestamp")) + return -1; + + if (!ASSERT_NEQ(meta->rx_hash, 0, "rx_hash")) + return -1; + + xsk_ring_cons__release(&xsk->rx, 1); + refill_rx(xsk, comp_addr); + + return 0; +} + +void test_xdp_metadata(void) +{ + struct xdp_metadata2 *bpf_obj2 = NULL; + struct xdp_metadata *bpf_obj = NULL; + struct bpf_program *new_prog, *prog; + struct nstoken *tok = NULL; + __u32 queue_id = QUEUE_ID; + struct bpf_map *prog_arr; + struct xsk tx_xsk = {}; + struct xsk rx_xsk = {}; + __u32 val, key = 0; + int retries = 10; + int rx_ifindex; + int tx_ifindex; + int sock_fd; + int ret; + + /* Setup new networking namespace, with a veth pair. */ + + SYS("ip netns add xdp_metadata"); + tok = open_netns("xdp_metadata"); + SYS("ip link add numtxqueues 1 numrxqueues 1 " TX_NAME + " type veth peer " RX_NAME " numtxqueues 1 numrxqueues 1"); + SYS("ip link set dev " TX_NAME " address 00:00:00:00:00:01"); + SYS("ip link set dev " RX_NAME " address 00:00:00:00:00:02"); + SYS("ip link set dev " TX_NAME " up"); + SYS("ip link set dev " RX_NAME " up"); + SYS("ip addr add " TX_ADDR "/" PREFIX_LEN " dev " TX_NAME); + SYS("ip addr add " RX_ADDR "/" PREFIX_LEN " dev " RX_NAME); + + rx_ifindex = if_nametoindex(RX_NAME); + tx_ifindex = if_nametoindex(TX_NAME); + + /* Setup separate AF_XDP for TX and RX interfaces. */ + + ret = open_xsk(tx_ifindex, &tx_xsk); + if (!ASSERT_OK(ret, "open_xsk(TX_NAME)")) + goto out; + + ret = open_xsk(rx_ifindex, &rx_xsk); + if (!ASSERT_OK(ret, "open_xsk(RX_NAME)")) + goto out; + + bpf_obj = xdp_metadata__open(); + if (!ASSERT_OK_PTR(bpf_obj, "open skeleton")) + goto out; + + prog = bpf_object__find_program_by_name(bpf_obj->obj, "rx"); + bpf_program__set_ifindex(prog, rx_ifindex); + bpf_program__set_flags(prog, BPF_F_XDP_DEV_BOUND_ONLY); + + if (!ASSERT_OK(xdp_metadata__load(bpf_obj), "load skeleton")) + goto out; + + /* Make sure we can't add dev-bound programs to prog maps. */ + prog_arr = bpf_object__find_map_by_name(bpf_obj->obj, "prog_arr"); + if (!ASSERT_OK_PTR(prog_arr, "no prog_arr map")) + goto out; + + val = bpf_program__fd(prog); + if (!ASSERT_ERR(bpf_map__update_elem(prog_arr, &key, sizeof(key), + &val, sizeof(val), BPF_ANY), + "update prog_arr")) + goto out; + + /* Attach BPF program to RX interface. */ + + ret = bpf_xdp_attach(rx_ifindex, + bpf_program__fd(bpf_obj->progs.rx), + XDP_FLAGS, NULL); + if (!ASSERT_GE(ret, 0, "bpf_xdp_attach")) + goto out; + + sock_fd = xsk_socket__fd(rx_xsk.socket); + ret = bpf_map_update_elem(bpf_map__fd(bpf_obj->maps.xsk), &queue_id, &sock_fd, 0); + if (!ASSERT_GE(ret, 0, "bpf_map_update_elem")) + goto out; + + /* Send packet destined to RX AF_XDP socket. */ + if (!ASSERT_GE(generate_packet(&tx_xsk, AF_XDP_CONSUMER_PORT), 0, + "generate AF_XDP_CONSUMER_PORT")) + goto out; + + /* Verify AF_XDP RX packet has proper metadata. */ + if (!ASSERT_GE(verify_xsk_metadata(&rx_xsk), 0, + "verify_xsk_metadata")) + goto out; + + complete_tx(&tx_xsk); + + /* Make sure freplace correctly picks up original bound device + * and doesn't crash. + */ + + bpf_obj2 = xdp_metadata2__open(); + if (!ASSERT_OK_PTR(bpf_obj2, "open skeleton")) + goto out; + + new_prog = bpf_object__find_program_by_name(bpf_obj2->obj, "freplace_rx"); + bpf_program__set_attach_target(new_prog, bpf_program__fd(prog), "rx"); + + if (!ASSERT_OK(xdp_metadata2__load(bpf_obj2), "load freplace skeleton")) + goto out; + + if (!ASSERT_OK(xdp_metadata2__attach(bpf_obj2), "attach freplace")) + goto out; + + /* Send packet to trigger . */ + if (!ASSERT_GE(generate_packet(&tx_xsk, AF_XDP_CONSUMER_PORT), 0, + "generate freplace packet")) + goto out; + + while (!retries--) { + if (bpf_obj2->bss->called) + break; + usleep(10); + } + ASSERT_GT(bpf_obj2->bss->called, 0, "not called"); + +out: + close_xsk(&rx_xsk); + close_xsk(&tx_xsk); + xdp_metadata2__destroy(bpf_obj2); + xdp_metadata__destroy(bpf_obj); + if (tok) + close_netns(tok); + system("ip netns del xdp_metadata"); +} diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_synproxy.c b/tools/testing/selftests/bpf/prog_tests/xdp_synproxy.c index 75550a40e029..c72083885b6d 100644 --- a/tools/testing/selftests/bpf/prog_tests/xdp_synproxy.c +++ b/tools/testing/selftests/bpf/prog_tests/xdp_synproxy.c @@ -94,12 +94,12 @@ static void test_synproxy(bool xdp) SYS("sysctl -w net.ipv4.tcp_syncookies=2"); SYS("sysctl -w net.ipv4.tcp_timestamps=1"); SYS("sysctl -w net.netfilter.nf_conntrack_tcp_loose=0"); - SYS("iptables -t raw -I PREROUTING \ + SYS("iptables-legacy -t raw -I PREROUTING \ -i tmp1 -p tcp -m tcp --syn --dport 8080 -j CT --notrack"); - SYS("iptables -t filter -A INPUT \ + SYS("iptables-legacy -t filter -A INPUT \ -i tmp1 -p tcp -m tcp --dport 8080 -m state --state INVALID,UNTRACKED \ -j SYNPROXY --sack-perm --timestamp --wscale 7 --mss 1460"); - SYS("iptables -t filter -A INPUT \ + SYS("iptables-legacy -t filter -A INPUT \ -i tmp1 -m state --state INVALID -j DROP"); ctrl_file = SYS_OUT("./xdp_synproxy --iface tmp1 --ports 8080 \ diff --git a/tools/testing/selftests/bpf/prog_tests/xfrm_info.c b/tools/testing/selftests/bpf/prog_tests/xfrm_info.c new file mode 100644 index 000000000000..8b03c9bb4862 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/xfrm_info.c @@ -0,0 +1,362 @@ +// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause + +/* + * Topology: + * --------- + * NS0 namespace | NS1 namespace | NS2 namespace + * | | + * +---------------+ | +---------------+ | + * | ipsec0 |---------| ipsec0 | | + * | 192.168.1.100 | | | 192.168.1.200 | | + * | if_id: bpf | | +---------------+ | + * +---------------+ | | + * | | | +---------------+ + * | | | | ipsec0 | + * \------------------------------------------| 192.168.1.200 | + * | | +---------------+ + * | | + * | | (overlay network) + * ------------------------------------------------------ + * | | (underlay network) + * +--------------+ | +--------------+ | + * | veth01 |----------| veth10 | | + * | 172.16.1.100 | | | 172.16.1.200 | | + * ---------------+ | +--------------+ | + * | | + * +--------------+ | | +--------------+ + * | veth02 |-----------------------------------| veth20 | + * | 172.16.2.100 | | | | 172.16.2.200 | + * +--------------+ | | +--------------+ + * + * + * Test Packet flow + * ----------- + * The tests perform 'ping 192.168.1.200' from the NS0 namespace: + * 1) request is routed to NS0 ipsec0 + * 2) NS0 ipsec0 tc egress BPF program is triggered and sets the if_id based + * on the requested value. This makes the ipsec0 device in external mode + * select the destination tunnel + * 3) ping reaches the other namespace (NS1 or NS2 based on which if_id was + * used) and response is sent + * 4) response is received on NS0 ipsec0, tc ingress program is triggered and + * records the response if_id + * 5) requested if_id is compared with received if_id + */ + +#include <net/if.h> +#include <linux/rtnetlink.h> +#include <linux/if_link.h> + +#include "test_progs.h" +#include "network_helpers.h" +#include "xfrm_info.skel.h" + +#define NS0 "xfrm_test_ns0" +#define NS1 "xfrm_test_ns1" +#define NS2 "xfrm_test_ns2" + +#define IF_ID_0_TO_1 1 +#define IF_ID_0_TO_2 2 +#define IF_ID_1 3 +#define IF_ID_2 4 + +#define IP4_ADDR_VETH01 "172.16.1.100" +#define IP4_ADDR_VETH10 "172.16.1.200" +#define IP4_ADDR_VETH02 "172.16.2.100" +#define IP4_ADDR_VETH20 "172.16.2.200" + +#define ESP_DUMMY_PARAMS \ + "proto esp aead 'rfc4106(gcm(aes))' " \ + "0xe4d8f4b4da1df18a3510b3781496daa82488b713 128 mode tunnel " + +#define SYS(fmt, ...) \ + ({ \ + char cmd[1024]; \ + snprintf(cmd, sizeof(cmd), fmt, ##__VA_ARGS__); \ + if (!ASSERT_OK(system(cmd), cmd)) \ + goto fail; \ + }) + +#define SYS_NOFAIL(fmt, ...) \ + ({ \ + char cmd[1024]; \ + snprintf(cmd, sizeof(cmd), fmt, ##__VA_ARGS__); \ + system(cmd); \ + }) + +static int attach_tc_prog(struct bpf_tc_hook *hook, int igr_fd, int egr_fd) +{ + LIBBPF_OPTS(bpf_tc_opts, opts1, .handle = 1, .priority = 1, + .prog_fd = igr_fd); + LIBBPF_OPTS(bpf_tc_opts, opts2, .handle = 1, .priority = 1, + .prog_fd = egr_fd); + int ret; + + ret = bpf_tc_hook_create(hook); + if (!ASSERT_OK(ret, "create tc hook")) + return ret; + + if (igr_fd >= 0) { + hook->attach_point = BPF_TC_INGRESS; + ret = bpf_tc_attach(hook, &opts1); + if (!ASSERT_OK(ret, "bpf_tc_attach")) { + bpf_tc_hook_destroy(hook); + return ret; + } + } + + if (egr_fd >= 0) { + hook->attach_point = BPF_TC_EGRESS; + ret = bpf_tc_attach(hook, &opts2); + if (!ASSERT_OK(ret, "bpf_tc_attach")) { + bpf_tc_hook_destroy(hook); + return ret; + } + } + + return 0; +} + +static void cleanup(void) +{ + SYS_NOFAIL("test -f /var/run/netns/" NS0 " && ip netns delete " NS0); + SYS_NOFAIL("test -f /var/run/netns/" NS1 " && ip netns delete " NS1); + SYS_NOFAIL("test -f /var/run/netns/" NS2 " && ip netns delete " NS2); +} + +static int config_underlay(void) +{ + SYS("ip netns add " NS0); + SYS("ip netns add " NS1); + SYS("ip netns add " NS2); + + /* NS0 <-> NS1 [veth01 <-> veth10] */ + SYS("ip link add veth01 netns " NS0 " type veth peer name veth10 netns " NS1); + SYS("ip -net " NS0 " addr add " IP4_ADDR_VETH01 "/24 dev veth01"); + SYS("ip -net " NS0 " link set dev veth01 up"); + SYS("ip -net " NS1 " addr add " IP4_ADDR_VETH10 "/24 dev veth10"); + SYS("ip -net " NS1 " link set dev veth10 up"); + + /* NS0 <-> NS2 [veth02 <-> veth20] */ + SYS("ip link add veth02 netns " NS0 " type veth peer name veth20 netns " NS2); + SYS("ip -net " NS0 " addr add " IP4_ADDR_VETH02 "/24 dev veth02"); + SYS("ip -net " NS0 " link set dev veth02 up"); + SYS("ip -net " NS2 " addr add " IP4_ADDR_VETH20 "/24 dev veth20"); + SYS("ip -net " NS2 " link set dev veth20 up"); + + return 0; +fail: + return -1; +} + +static int setup_xfrm_tunnel_ns(const char *ns, const char *ipv4_local, + const char *ipv4_remote, int if_id) +{ + /* State: local -> remote */ + SYS("ip -net %s xfrm state add src %s dst %s spi 1 " + ESP_DUMMY_PARAMS "if_id %d", ns, ipv4_local, ipv4_remote, if_id); + + /* State: local <- remote */ + SYS("ip -net %s xfrm state add src %s dst %s spi 1 " + ESP_DUMMY_PARAMS "if_id %d", ns, ipv4_remote, ipv4_local, if_id); + + /* Policy: local -> remote */ + SYS("ip -net %s xfrm policy add dir out src 0.0.0.0/0 dst 0.0.0.0/0 " + "if_id %d tmpl src %s dst %s proto esp mode tunnel if_id %d", ns, + if_id, ipv4_local, ipv4_remote, if_id); + + /* Policy: local <- remote */ + SYS("ip -net %s xfrm policy add dir in src 0.0.0.0/0 dst 0.0.0.0/0 " + "if_id %d tmpl src %s dst %s proto esp mode tunnel if_id %d", ns, + if_id, ipv4_remote, ipv4_local, if_id); + + return 0; +fail: + return -1; +} + +static int setup_xfrm_tunnel(const char *ns_a, const char *ns_b, + const char *ipv4_a, const char *ipv4_b, + int if_id_a, int if_id_b) +{ + return setup_xfrm_tunnel_ns(ns_a, ipv4_a, ipv4_b, if_id_a) || + setup_xfrm_tunnel_ns(ns_b, ipv4_b, ipv4_a, if_id_b); +} + +static struct rtattr *rtattr_add(struct nlmsghdr *nh, unsigned short type, + unsigned short len) +{ + struct rtattr *rta = + (struct rtattr *)((uint8_t *)nh + RTA_ALIGN(nh->nlmsg_len)); + rta->rta_type = type; + rta->rta_len = RTA_LENGTH(len); + nh->nlmsg_len = RTA_ALIGN(nh->nlmsg_len) + RTA_ALIGN(rta->rta_len); + return rta; +} + +static struct rtattr *rtattr_add_str(struct nlmsghdr *nh, unsigned short type, + const char *s) +{ + struct rtattr *rta = rtattr_add(nh, type, strlen(s)); + + memcpy(RTA_DATA(rta), s, strlen(s)); + return rta; +} + +static struct rtattr *rtattr_begin(struct nlmsghdr *nh, unsigned short type) +{ + return rtattr_add(nh, type, 0); +} + +static void rtattr_end(struct nlmsghdr *nh, struct rtattr *attr) +{ + uint8_t *end = (uint8_t *)nh + nh->nlmsg_len; + + attr->rta_len = end - (uint8_t *)attr; +} + +static int setup_xfrmi_external_dev(const char *ns) +{ + struct { + struct nlmsghdr nh; + struct ifinfomsg info; + unsigned char data[128]; + } req; + struct rtattr *link_info, *info_data; + struct nstoken *nstoken; + int ret = -1, sock = -1; + struct nlmsghdr *nh; + + memset(&req, 0, sizeof(req)); + nh = &req.nh; + nh->nlmsg_len = NLMSG_LENGTH(sizeof(req.info)); + nh->nlmsg_type = RTM_NEWLINK; + nh->nlmsg_flags |= NLM_F_CREATE | NLM_F_REQUEST; + + rtattr_add_str(nh, IFLA_IFNAME, "ipsec0"); + link_info = rtattr_begin(nh, IFLA_LINKINFO); + rtattr_add_str(nh, IFLA_INFO_KIND, "xfrm"); + info_data = rtattr_begin(nh, IFLA_INFO_DATA); + rtattr_add(nh, IFLA_XFRM_COLLECT_METADATA, 0); + rtattr_end(nh, info_data); + rtattr_end(nh, link_info); + + nstoken = open_netns(ns); + if (!ASSERT_OK_PTR(nstoken, "setns")) + goto done; + + sock = socket(AF_NETLINK, SOCK_RAW | SOCK_CLOEXEC, NETLINK_ROUTE); + if (!ASSERT_GE(sock, 0, "netlink socket")) + goto done; + ret = send(sock, nh, nh->nlmsg_len, 0); + if (!ASSERT_EQ(ret, nh->nlmsg_len, "netlink send length")) + goto done; + + ret = 0; +done: + if (sock != -1) + close(sock); + if (nstoken) + close_netns(nstoken); + return ret; +} + +static int config_overlay(void) +{ + if (setup_xfrm_tunnel(NS0, NS1, IP4_ADDR_VETH01, IP4_ADDR_VETH10, + IF_ID_0_TO_1, IF_ID_1)) + goto fail; + if (setup_xfrm_tunnel(NS0, NS2, IP4_ADDR_VETH02, IP4_ADDR_VETH20, + IF_ID_0_TO_2, IF_ID_2)) + goto fail; + + /* Older iproute2 doesn't support this option */ + if (!ASSERT_OK(setup_xfrmi_external_dev(NS0), "xfrmi")) + goto fail; + + SYS("ip -net " NS0 " addr add 192.168.1.100/24 dev ipsec0"); + SYS("ip -net " NS0 " link set dev ipsec0 up"); + + SYS("ip -net " NS1 " link add ipsec0 type xfrm if_id %d", IF_ID_1); + SYS("ip -net " NS1 " addr add 192.168.1.200/24 dev ipsec0"); + SYS("ip -net " NS1 " link set dev ipsec0 up"); + + SYS("ip -net " NS2 " link add ipsec0 type xfrm if_id %d", IF_ID_2); + SYS("ip -net " NS2 " addr add 192.168.1.200/24 dev ipsec0"); + SYS("ip -net " NS2 " link set dev ipsec0 up"); + + return 0; +fail: + return -1; +} + +static int test_xfrm_ping(struct xfrm_info *skel, u32 if_id) +{ + skel->bss->req_if_id = if_id; + + SYS("ping -i 0.01 -c 3 -w 10 -q 192.168.1.200 > /dev/null"); + + if (!ASSERT_EQ(skel->bss->resp_if_id, if_id, "if_id")) + goto fail; + + return 0; +fail: + return -1; +} + +static void _test_xfrm_info(void) +{ + LIBBPF_OPTS(bpf_tc_hook, tc_hook, .attach_point = BPF_TC_INGRESS); + int get_xfrm_info_prog_fd, set_xfrm_info_prog_fd; + struct nstoken *nstoken = NULL; + struct xfrm_info *skel; + int ifindex; + + /* load and attach bpf progs to ipsec dev tc hook point */ + skel = xfrm_info__open_and_load(); + if (!ASSERT_OK_PTR(skel, "xfrm_info__open_and_load")) + goto done; + nstoken = open_netns(NS0); + if (!ASSERT_OK_PTR(nstoken, "setns " NS0)) + goto done; + ifindex = if_nametoindex("ipsec0"); + if (!ASSERT_NEQ(ifindex, 0, "ipsec0 ifindex")) + goto done; + tc_hook.ifindex = ifindex; + set_xfrm_info_prog_fd = bpf_program__fd(skel->progs.set_xfrm_info); + get_xfrm_info_prog_fd = bpf_program__fd(skel->progs.get_xfrm_info); + if (!ASSERT_GE(set_xfrm_info_prog_fd, 0, "bpf_program__fd")) + goto done; + if (!ASSERT_GE(get_xfrm_info_prog_fd, 0, "bpf_program__fd")) + goto done; + if (attach_tc_prog(&tc_hook, get_xfrm_info_prog_fd, + set_xfrm_info_prog_fd)) + goto done; + + /* perform test */ + if (!ASSERT_EQ(test_xfrm_ping(skel, IF_ID_0_TO_1), 0, "ping " NS1)) + goto done; + if (!ASSERT_EQ(test_xfrm_ping(skel, IF_ID_0_TO_2), 0, "ping " NS2)) + goto done; + +done: + if (nstoken) + close_netns(nstoken); + xfrm_info__destroy(skel); +} + +void test_xfrm_info(void) +{ + cleanup(); + + if (!ASSERT_OK(config_underlay(), "config_underlay")) + goto done; + if (!ASSERT_OK(config_overlay(), "config_overlay")) + goto done; + + if (test__start_subtest("xfrm_info")) + _test_xfrm_info(); + +done: + cleanup(); +} diff --git a/tools/testing/selftests/bpf/progs/bpf_hashmap_lookup.c b/tools/testing/selftests/bpf/progs/bpf_hashmap_lookup.c new file mode 100644 index 000000000000..1eb74ddca414 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/bpf_hashmap_lookup.c @@ -0,0 +1,63 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2023 Isovalent */ + +#include "vmlinux.h" + +#include <bpf/bpf_helpers.h> +#include "bpf_misc.h" + +char _license[] SEC("license") = "GPL"; + +struct { + __uint(type, BPF_MAP_TYPE_HASH); +} hash_map_bench SEC(".maps"); + +/* The number of slots to store times */ +#define NR_SLOTS 32 +#define NR_CPUS 256 +#define CPU_MASK (NR_CPUS-1) + +/* Configured by userspace */ +u64 nr_entries; +u64 nr_loops; +u32 __attribute__((__aligned__(8))) key[NR_CPUS]; + +/* Filled by us */ +u64 __attribute__((__aligned__(256))) percpu_times_index[NR_CPUS]; +u64 __attribute__((__aligned__(256))) percpu_times[NR_CPUS][NR_SLOTS]; + +static inline void patch_key(u32 i) +{ +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ + key[0] = i + 1; +#else + key[0] = __builtin_bswap32(i + 1); +#endif + /* the rest of key is random and is configured by userspace */ +} + +static int lookup_callback(__u32 index, u32 *unused) +{ + patch_key(index); + return bpf_map_lookup_elem(&hash_map_bench, key) ? 0 : 1; +} + +static int loop_lookup_callback(__u32 index, u32 *unused) +{ + return bpf_loop(nr_entries, lookup_callback, NULL, 0) ? 0 : 1; +} + +SEC("fentry/" SYS_PREFIX "sys_getpgid") +int benchmark(void *ctx) +{ + u32 cpu = bpf_get_smp_processor_id(); + u32 times_index; + u64 start_time; + + times_index = percpu_times_index[cpu & CPU_MASK] % NR_SLOTS; + start_time = bpf_ktime_get_ns(); + bpf_loop(nr_loops, loop_lookup_callback, NULL, 0); + percpu_times[cpu & CPU_MASK][times_index] = bpf_ktime_get_ns() - start_time; + percpu_times_index[cpu & CPU_MASK] += 1; + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_bpf_array_map.c b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_array_map.c index 6286023fd62b..c5969ca6f26b 100644 --- a/tools/testing/selftests/bpf/progs/bpf_iter_bpf_array_map.c +++ b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_array_map.c @@ -19,13 +19,20 @@ struct { __type(value, __u64); } arraymap1 SEC(".maps"); +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(max_entries, 10); + __type(key, __u64); + __type(value, __u32); +} hashmap1 SEC(".maps"); + __u32 key_sum = 0; __u64 val_sum = 0; SEC("iter/bpf_map_elem") int dump_bpf_array_map(struct bpf_iter__bpf_map_elem *ctx) { - __u32 *key = ctx->key; + __u32 *hmap_val, *key = ctx->key; __u64 *val = ctx->value; if (key == (void *)0 || val == (void *)0) @@ -35,6 +42,18 @@ int dump_bpf_array_map(struct bpf_iter__bpf_map_elem *ctx) bpf_seq_write(ctx->meta->seq, val, sizeof(__u64)); key_sum += *key; val_sum += *val; + + /* workaround - It's necessary to do this convoluted (val, key) + * write into hashmap1, instead of simply doing + * bpf_map_update_elem(&hashmap1, val, key, BPF_ANY); + * because key has MEM_RDONLY flag and bpf_map_update elem expects + * types without this flag + */ + bpf_map_update_elem(&hashmap1, val, val, BPF_ANY); + hmap_val = bpf_map_lookup_elem(&hashmap1, val); + if (hmap_val) + *hmap_val = *key; + *val = *key; return 0; } diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_ksym.c b/tools/testing/selftests/bpf/progs/bpf_iter_ksym.c index 285c008cbf9c..9ba14c37bbcc 100644 --- a/tools/testing/selftests/bpf/progs/bpf_iter_ksym.c +++ b/tools/testing/selftests/bpf/progs/bpf_iter_ksym.c @@ -7,14 +7,14 @@ char _license[] SEC("license") = "GPL"; unsigned long last_sym_value = 0; -static inline char tolower(char c) +static inline char to_lower(char c) { if (c >= 'A' && c <= 'Z') c += ('a' - 'A'); return c; } -static inline char toupper(char c) +static inline char to_upper(char c) { if (c >= 'a' && c <= 'z') c -= ('a' - 'A'); @@ -54,7 +54,7 @@ int dump_ksym(struct bpf_iter__ksym *ctx) type = iter->type; if (iter->module_name[0]) { - type = iter->exported ? toupper(type) : tolower(type); + type = iter->exported ? to_upper(type) : to_lower(type); BPF_SEQ_PRINTF(seq, "0x%llx %c %s [ %s ] ", value, type, iter->name, iter->module_name); } else { diff --git a/tools/testing/selftests/bpf/progs/bpf_misc.h b/tools/testing/selftests/bpf/progs/bpf_misc.h index 5bb11fe595a4..14e28f991451 100644 --- a/tools/testing/selftests/bpf/progs/bpf_misc.h +++ b/tools/testing/selftests/bpf/progs/bpf_misc.h @@ -2,6 +2,18 @@ #ifndef __BPF_MISC_H__ #define __BPF_MISC_H__ +#define __msg(msg) __attribute__((btf_decl_tag("comment:test_expect_msg=" msg))) +#define __failure __attribute__((btf_decl_tag("comment:test_expect_failure"))) +#define __success __attribute__((btf_decl_tag("comment:test_expect_success"))) +#define __log_level(lvl) __attribute__((btf_decl_tag("comment:test_log_level="#lvl))) + +/* Convenience macro for use with 'asm volatile' blocks */ +#define __naked __attribute__((naked)) +#define __clobber_all "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "memory" +#define __clobber_common "r0", "r1", "r2", "r3", "r4", "r5", "memory" +#define __imm(name) [name]"i"(name) +#define __imm_addr(name) [name]"i"(&name) + #if defined(__TARGET_ARCH_x86) #define SYSCALL_WRAPPER 1 #define SYS_PREFIX "__x64_" @@ -16,4 +28,29 @@ #define SYS_PREFIX "__se_" #endif +/* How many arguments are passed to function in register */ +#if defined(__TARGET_ARCH_x86) || defined(__x86_64__) +#define FUNC_REG_ARG_CNT 6 +#elif defined(__i386__) +#define FUNC_REG_ARG_CNT 3 +#elif defined(__TARGET_ARCH_s390) || defined(__s390x__) +#define FUNC_REG_ARG_CNT 5 +#elif defined(__TARGET_ARCH_arm) || defined(__arm__) +#define FUNC_REG_ARG_CNT 4 +#elif defined(__TARGET_ARCH_arm64) || defined(__aarch64__) +#define FUNC_REG_ARG_CNT 8 +#elif defined(__TARGET_ARCH_mips) || defined(__mips__) +#define FUNC_REG_ARG_CNT 8 +#elif defined(__TARGET_ARCH_powerpc) || defined(__powerpc__) || defined(__powerpc64__) +#define FUNC_REG_ARG_CNT 8 +#elif defined(__TARGET_ARCH_sparc) || defined(__sparc__) +#define FUNC_REG_ARG_CNT 6 +#elif defined(__TARGET_ARCH_riscv) || defined(__riscv__) +#define FUNC_REG_ARG_CNT 8 +#else +/* default to 5 for others */ +#define FUNC_REG_ARG_CNT 5 +#endif + + #endif diff --git a/tools/testing/selftests/bpf/progs/bpf_syscall_macro.c b/tools/testing/selftests/bpf/progs/bpf_syscall_macro.c index e1e11897e99b..1a476d8ed354 100644 --- a/tools/testing/selftests/bpf/progs/bpf_syscall_macro.c +++ b/tools/testing/selftests/bpf/progs/bpf_syscall_macro.c @@ -81,4 +81,30 @@ int BPF_KSYSCALL(prctl_enter, int option, unsigned long arg2, return 0; } +__u64 splice_fd_in; +__u64 splice_off_in; +__u64 splice_fd_out; +__u64 splice_off_out; +__u64 splice_len; +__u64 splice_flags; + +SEC("ksyscall/splice") +int BPF_KSYSCALL(splice_enter, int fd_in, loff_t *off_in, int fd_out, + loff_t *off_out, size_t len, unsigned int flags) +{ + pid_t pid = bpf_get_current_pid_tgid() >> 32; + + if (pid != filter_pid) + return 0; + + splice_fd_in = fd_in; + splice_off_in = (__u64)off_in; + splice_fd_out = fd_out; + splice_off_out = (__u64)off_out; + splice_len = len; + splice_flags = flags; + + return 0; +} + char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/bpf_tracing_net.h b/tools/testing/selftests/bpf/progs/bpf_tracing_net.h index adb087aecc9e..cfed4df490f3 100644 --- a/tools/testing/selftests/bpf/progs/bpf_tracing_net.h +++ b/tools/testing/selftests/bpf/progs/bpf_tracing_net.h @@ -25,6 +25,9 @@ #define IPV6_TCLASS 67 #define IPV6_AUTOFLOWLABEL 70 +#define TC_ACT_UNSPEC (-1) +#define TC_ACT_SHOT 2 + #define SOL_TCP 6 #define TCP_NODELAY 1 #define TCP_MAXSEG 2 @@ -47,6 +50,12 @@ #define ICSK_TIME_LOSS_PROBE 5 #define ICSK_TIME_REO_TIMEOUT 6 +#define ETH_HLEN 14 +#define ETH_P_IPV6 0x86DD + +#define CHECKSUM_NONE 0 +#define CHECKSUM_PARTIAL 3 + #define IFNAMSIZ 16 #define RTF_GATEWAY 0x0002 diff --git a/tools/testing/selftests/bpf/progs/btf_dump_test_case_bitfields.c b/tools/testing/selftests/bpf/progs/btf_dump_test_case_bitfields.c index e5560a656030..e01690618e1e 100644 --- a/tools/testing/selftests/bpf/progs/btf_dump_test_case_bitfields.c +++ b/tools/testing/selftests/bpf/progs/btf_dump_test_case_bitfields.c @@ -53,7 +53,7 @@ struct bitfields_only_mixed_types { */ /* ------ END-EXPECTED-OUTPUT ------ */ struct bitfield_mixed_with_others { - long: 4; /* char is enough as a backing field */ + char: 4; /* char is enough as a backing field */ int a: 4; /* 8-bit implicit padding */ short b; /* combined with previous bitfield */ diff --git a/tools/testing/selftests/bpf/progs/btf_dump_test_case_packing.c b/tools/testing/selftests/bpf/progs/btf_dump_test_case_packing.c index e304b6204bd9..7998f27df7dd 100644 --- a/tools/testing/selftests/bpf/progs/btf_dump_test_case_packing.c +++ b/tools/testing/selftests/bpf/progs/btf_dump_test_case_packing.c @@ -58,7 +58,81 @@ union jump_code_union { } __attribute__((packed)); }; -/*------ END-EXPECTED-OUTPUT ------ */ +/* ----- START-EXPECTED-OUTPUT ----- */ +/* + *struct nested_packed_but_aligned_struct { + * int x1; + * int x2; + *}; + * + *struct outer_implicitly_packed_struct { + * char y1; + * struct nested_packed_but_aligned_struct y2; + *} __attribute__((packed)); + * + */ +/* ------ END-EXPECTED-OUTPUT ------ */ + +struct nested_packed_but_aligned_struct { + int x1; + int x2; +} __attribute__((packed)); + +struct outer_implicitly_packed_struct { + char y1; + struct nested_packed_but_aligned_struct y2; +}; +/* ----- START-EXPECTED-OUTPUT ----- */ +/* + *struct usb_ss_ep_comp_descriptor { + * char: 8; + * char bDescriptorType; + * char bMaxBurst; + * short wBytesPerInterval; + *}; + * + *struct usb_host_endpoint { + * long: 64; + * char: 8; + * struct usb_ss_ep_comp_descriptor ss_ep_comp; + * long: 0; + *} __attribute__((packed)); + * + */ +/* ------ END-EXPECTED-OUTPUT ------ */ + +struct usb_ss_ep_comp_descriptor { + char: 8; + char bDescriptorType; + char bMaxBurst; + int: 0; + short wBytesPerInterval; +} __attribute__((packed)); + +struct usb_host_endpoint { + long: 64; + char: 8; + struct usb_ss_ep_comp_descriptor ss_ep_comp; + long: 0; +}; + +/* ----- START-EXPECTED-OUTPUT ----- */ +struct nested_packed_struct { + int a; + char b; +} __attribute__((packed)); + +struct outer_nonpacked_struct { + short a; + struct nested_packed_struct b; +}; + +struct outer_packed_struct { + short a; + struct nested_packed_struct b; +} __attribute__((packed)); + +/* ------ END-EXPECTED-OUTPUT ------ */ int f(struct { struct packed_trailing_space _1; @@ -69,6 +143,10 @@ int f(struct { union union_is_never_packed _6; union union_does_not_need_packing _7; union jump_code_union _8; + struct outer_implicitly_packed_struct _9; + struct usb_host_endpoint _10; + struct outer_nonpacked_struct _11; + struct outer_packed_struct _12; } *_) { return 0; diff --git a/tools/testing/selftests/bpf/progs/btf_dump_test_case_padding.c b/tools/testing/selftests/bpf/progs/btf_dump_test_case_padding.c index f2661c8d2d90..79276fbe454a 100644 --- a/tools/testing/selftests/bpf/progs/btf_dump_test_case_padding.c +++ b/tools/testing/selftests/bpf/progs/btf_dump_test_case_padding.c @@ -19,7 +19,7 @@ struct padded_implicitly { /* *struct padded_explicitly { * int a; - * int: 32; + * long: 0; * int b; *}; * @@ -28,41 +28,28 @@ struct padded_implicitly { struct padded_explicitly { int a; - int: 1; /* algo will explicitly pad with full 32 bits here */ + int: 1; /* algo will emit aligning `long: 0;` here */ int b; }; /* ----- START-EXPECTED-OUTPUT ----- */ -/* - *struct padded_a_lot { - * int a; - * long: 32; - * long: 64; - * long: 64; - * int b; - *}; - * - */ -/* ------ END-EXPECTED-OUTPUT ------ */ - struct padded_a_lot { int a; - /* 32 bit of implicit padding here, which algo will make explicit */ long: 64; long: 64; int b; }; +/* ------ END-EXPECTED-OUTPUT ------ */ + /* ----- START-EXPECTED-OUTPUT ----- */ /* *struct padded_cache_line { * int a; - * long: 32; * long: 64; * long: 64; * long: 64; * int b; - * long: 32; * long: 64; * long: 64; * long: 64; @@ -85,7 +72,7 @@ struct padded_cache_line { *struct zone { * int a; * short b; - * short: 16; + * long: 0; * struct zone_padding __pad__; *}; * @@ -102,12 +89,160 @@ struct zone { struct zone_padding __pad__; }; +/* ----- START-EXPECTED-OUTPUT ----- */ +struct padding_wo_named_members { + long: 64; + long: 64; +}; + +struct padding_weird_1 { + int a; + long: 64; + short: 16; + short b; +}; + +/* ------ END-EXPECTED-OUTPUT ------ */ + +/* ----- START-EXPECTED-OUTPUT ----- */ +/* + *struct padding_weird_2 { + * long: 56; + * char a; + * long: 56; + * char b; + * char: 8; + *}; + * + */ +/* ------ END-EXPECTED-OUTPUT ------ */ +struct padding_weird_2 { + int: 32; /* these paddings will be collapsed into `long: 56;` */ + short: 16; + char: 8; + char a; + int: 32; /* these paddings will be collapsed into `long: 56;` */ + short: 16; + char: 8; + char b; + char: 8; +}; + +/* ----- START-EXPECTED-OUTPUT ----- */ +struct exact_1byte { + char x; +}; + +struct padded_1byte { + char: 8; +}; + +struct exact_2bytes { + short x; +}; + +struct padded_2bytes { + short: 16; +}; + +struct exact_4bytes { + int x; +}; + +struct padded_4bytes { + int: 32; +}; + +struct exact_8bytes { + long x; +}; + +struct padded_8bytes { + long: 64; +}; + +struct ff_periodic_effect { + int: 32; + short magnitude; + long: 0; + short phase; + long: 0; + int: 32; + int custom_len; + short *custom_data; +}; + +struct ib_wc { + long: 64; + long: 64; + int: 32; + int byte_len; + void *qp; + union {} ex; + long: 64; + int slid; + int wc_flags; + long: 64; + char smac[6]; + long: 0; + char network_hdr_type; +}; + +struct acpi_object_method { + long: 64; + char: 8; + char type; + short reference_count; + char flags; + short: 0; + char: 8; + char sync_level; + long: 64; + void *node; + void *aml_start; + union {} dispatch; + long: 64; + int aml_length; +}; + +struct nested_unpacked { + int x; +}; + +struct nested_packed { + struct nested_unpacked a; + char c; +} __attribute__((packed)); + +struct outer_mixed_but_unpacked { + struct nested_packed b1; + short a1; + struct nested_packed b2; +}; + +/* ------ END-EXPECTED-OUTPUT ------ */ + int f(struct { struct padded_implicitly _1; struct padded_explicitly _2; struct padded_a_lot _3; struct padded_cache_line _4; struct zone _5; + struct padding_wo_named_members _6; + struct padding_weird_1 _7; + struct padding_weird_2 _8; + struct exact_1byte _100; + struct padded_1byte _101; + struct exact_2bytes _102; + struct padded_2bytes _103; + struct exact_4bytes _104; + struct padded_4bytes _105; + struct exact_8bytes _106; + struct padded_8bytes _107; + struct ff_periodic_effect _200; + struct ib_wc _201; + struct acpi_object_method _202; + struct outer_mixed_but_unpacked _203; } *_) { return 0; diff --git a/tools/testing/selftests/bpf/progs/btf_dump_test_case_syntax.c b/tools/testing/selftests/bpf/progs/btf_dump_test_case_syntax.c index 4ee4748133fe..ad21ee8c7e23 100644 --- a/tools/testing/selftests/bpf/progs/btf_dump_test_case_syntax.c +++ b/tools/testing/selftests/bpf/progs/btf_dump_test_case_syntax.c @@ -25,6 +25,39 @@ typedef enum { H = 2, } e3_t; +/* ----- START-EXPECTED-OUTPUT ----- */ +/* + *enum e_byte { + * EBYTE_1 = 0, + * EBYTE_2 = 1, + *} __attribute__((mode(byte))); + * + */ +/* ----- END-EXPECTED-OUTPUT ----- */ +enum e_byte { + EBYTE_1, + EBYTE_2, +} __attribute__((mode(byte))); + +/* ----- START-EXPECTED-OUTPUT ----- */ +/* + *enum e_word { + * EWORD_1 = 0LL, + * EWORD_2 = 1LL, + *} __attribute__((mode(word))); + * + */ +/* ----- END-EXPECTED-OUTPUT ----- */ +enum e_word { + EWORD_1, + EWORD_2, +} __attribute__((mode(word))); /* force to use 8-byte backing for this enum */ + +/* ----- START-EXPECTED-OUTPUT ----- */ +enum e_big { + EBIG_1 = 1000000000000ULL, +}; + typedef int int_t; typedef volatile const int * volatile const crazy_ptr_t; @@ -51,7 +84,7 @@ typedef void (*printf_fn_t)(const char *, ...); * typedef int (*fn_t)(int); * typedef char * const * (*fn_ptr2_t)(s_t, fn_t); * - * - `fn_complext_t`: pointer to a function returning struct and accepting + * - `fn_complex_t`: pointer to a function returning struct and accepting * union and struct. All structs and enum are anonymous and defined inline. * * - `signal_t: pointer to a function accepting a pointer to a function as an @@ -224,6 +257,9 @@ struct root_struct { enum e2 _2; e2_t _2_1; e3_t _2_2; + enum e_byte _100; + enum e_word _101; + enum e_big _102; struct struct_w_typedefs _3; anon_struct_t _7; struct struct_fwd *_8; diff --git a/tools/testing/selftests/bpf/progs/btf_type_tag_percpu.c b/tools/testing/selftests/bpf/progs/btf_type_tag_percpu.c index 8feddb8289cf..38f78d9345de 100644 --- a/tools/testing/selftests/bpf/progs/btf_type_tag_percpu.c +++ b/tools/testing/selftests/bpf/progs/btf_type_tag_percpu.c @@ -64,3 +64,4 @@ int BPF_PROG(test_percpu_helper, struct cgroup *cgrp, const char *path) return 0; } +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/cgrp_kfunc_common.h b/tools/testing/selftests/bpf/progs/cgrp_kfunc_common.h new file mode 100644 index 000000000000..7d30855bfe78 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/cgrp_kfunc_common.h @@ -0,0 +1,72 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */ + +#ifndef _CGRP_KFUNC_COMMON_H +#define _CGRP_KFUNC_COMMON_H + +#include <errno.h> +#include <vmlinux.h> +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> + +struct __cgrps_kfunc_map_value { + struct cgroup __kptr_ref * cgrp; +}; + +struct hash_map { + __uint(type, BPF_MAP_TYPE_HASH); + __type(key, int); + __type(value, struct __cgrps_kfunc_map_value); + __uint(max_entries, 1); +} __cgrps_kfunc_map SEC(".maps"); + +struct cgroup *bpf_cgroup_acquire(struct cgroup *p) __ksym; +struct cgroup *bpf_cgroup_kptr_get(struct cgroup **pp) __ksym; +void bpf_cgroup_release(struct cgroup *p) __ksym; +struct cgroup *bpf_cgroup_ancestor(struct cgroup *cgrp, int level) __ksym; + +static inline struct __cgrps_kfunc_map_value *cgrps_kfunc_map_value_lookup(struct cgroup *cgrp) +{ + s32 id; + long status; + + status = bpf_probe_read_kernel(&id, sizeof(id), &cgrp->self.id); + if (status) + return NULL; + + return bpf_map_lookup_elem(&__cgrps_kfunc_map, &id); +} + +static inline int cgrps_kfunc_map_insert(struct cgroup *cgrp) +{ + struct __cgrps_kfunc_map_value local, *v; + long status; + struct cgroup *acquired, *old; + s32 id; + + status = bpf_probe_read_kernel(&id, sizeof(id), &cgrp->self.id); + if (status) + return status; + + local.cgrp = NULL; + status = bpf_map_update_elem(&__cgrps_kfunc_map, &id, &local, BPF_NOEXIST); + if (status) + return status; + + v = bpf_map_lookup_elem(&__cgrps_kfunc_map, &id); + if (!v) { + bpf_map_delete_elem(&__cgrps_kfunc_map, &id); + return -ENOENT; + } + + acquired = bpf_cgroup_acquire(cgrp); + old = bpf_kptr_xchg(&v->cgrp, acquired); + if (old) { + bpf_cgroup_release(old); + return -EEXIST; + } + + return 0; +} + +#endif /* _CGRP_KFUNC_COMMON_H */ diff --git a/tools/testing/selftests/bpf/progs/cgrp_kfunc_failure.c b/tools/testing/selftests/bpf/progs/cgrp_kfunc_failure.c new file mode 100644 index 000000000000..4ad7fe24966d --- /dev/null +++ b/tools/testing/selftests/bpf/progs/cgrp_kfunc_failure.c @@ -0,0 +1,275 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */ + +#include <vmlinux.h> +#include <bpf/bpf_tracing.h> +#include <bpf/bpf_helpers.h> + +#include "bpf_misc.h" +#include "cgrp_kfunc_common.h" + +char _license[] SEC("license") = "GPL"; + +/* Prototype for all of the program trace events below: + * + * TRACE_EVENT(cgroup_mkdir, + * TP_PROTO(struct cgroup *cgrp, const char *path), + * TP_ARGS(cgrp, path) + */ + +static struct __cgrps_kfunc_map_value *insert_lookup_cgrp(struct cgroup *cgrp) +{ + int status; + + status = cgrps_kfunc_map_insert(cgrp); + if (status) + return NULL; + + return cgrps_kfunc_map_value_lookup(cgrp); +} + +SEC("tp_btf/cgroup_mkdir") +__failure __msg("Possibly NULL pointer passed to trusted arg0") +int BPF_PROG(cgrp_kfunc_acquire_untrusted, struct cgroup *cgrp, const char *path) +{ + struct cgroup *acquired; + struct __cgrps_kfunc_map_value *v; + + v = insert_lookup_cgrp(cgrp); + if (!v) + return 0; + + /* Can't invoke bpf_cgroup_acquire() on an untrusted pointer. */ + acquired = bpf_cgroup_acquire(v->cgrp); + bpf_cgroup_release(acquired); + + return 0; +} + +SEC("tp_btf/cgroup_mkdir") +__failure __msg("arg#0 pointer type STRUCT cgroup must point") +int BPF_PROG(cgrp_kfunc_acquire_fp, struct cgroup *cgrp, const char *path) +{ + struct cgroup *acquired, *stack_cgrp = (struct cgroup *)&path; + + /* Can't invoke bpf_cgroup_acquire() on a random frame pointer. */ + acquired = bpf_cgroup_acquire((struct cgroup *)&stack_cgrp); + bpf_cgroup_release(acquired); + + return 0; +} + +SEC("kretprobe/cgroup_destroy_locked") +__failure __msg("reg type unsupported for arg#0 function") +int BPF_PROG(cgrp_kfunc_acquire_unsafe_kretprobe, struct cgroup *cgrp) +{ + struct cgroup *acquired; + + /* Can't acquire an untrusted struct cgroup * pointer. */ + acquired = bpf_cgroup_acquire(cgrp); + bpf_cgroup_release(acquired); + + return 0; +} + +SEC("tp_btf/cgroup_mkdir") +__failure __msg("cgrp_kfunc_acquire_trusted_walked") +int BPF_PROG(cgrp_kfunc_acquire_trusted_walked, struct cgroup *cgrp, const char *path) +{ + struct cgroup *acquired; + + /* Can't invoke bpf_cgroup_acquire() on a pointer obtained from walking a trusted cgroup. */ + acquired = bpf_cgroup_acquire(cgrp->old_dom_cgrp); + bpf_cgroup_release(acquired); + + return 0; +} + +SEC("tp_btf/cgroup_mkdir") +__failure __msg("Possibly NULL pointer passed to trusted arg0") +int BPF_PROG(cgrp_kfunc_acquire_null, struct cgroup *cgrp, const char *path) +{ + struct cgroup *acquired; + + /* Can't invoke bpf_cgroup_acquire() on a NULL pointer. */ + acquired = bpf_cgroup_acquire(NULL); + if (!acquired) + return 0; + bpf_cgroup_release(acquired); + + return 0; +} + +SEC("tp_btf/cgroup_mkdir") +__failure __msg("Unreleased reference") +int BPF_PROG(cgrp_kfunc_acquire_unreleased, struct cgroup *cgrp, const char *path) +{ + struct cgroup *acquired; + + acquired = bpf_cgroup_acquire(cgrp); + + /* Acquired cgroup is never released. */ + + return 0; +} + +SEC("tp_btf/cgroup_mkdir") +__failure __msg("arg#0 expected pointer to map value") +int BPF_PROG(cgrp_kfunc_get_non_kptr_param, struct cgroup *cgrp, const char *path) +{ + struct cgroup *kptr; + + /* Cannot use bpf_cgroup_kptr_get() on a non-kptr, even on a valid cgroup. */ + kptr = bpf_cgroup_kptr_get(&cgrp); + if (!kptr) + return 0; + + bpf_cgroup_release(kptr); + + return 0; +} + +SEC("tp_btf/cgroup_mkdir") +__failure __msg("arg#0 expected pointer to map value") +int BPF_PROG(cgrp_kfunc_get_non_kptr_acquired, struct cgroup *cgrp, const char *path) +{ + struct cgroup *kptr, *acquired; + + acquired = bpf_cgroup_acquire(cgrp); + + /* Cannot use bpf_cgroup_kptr_get() on a non-map-value, even if the kptr was acquired. */ + kptr = bpf_cgroup_kptr_get(&acquired); + bpf_cgroup_release(acquired); + if (!kptr) + return 0; + + bpf_cgroup_release(kptr); + + return 0; +} + +SEC("tp_btf/cgroup_mkdir") +__failure __msg("arg#0 expected pointer to map value") +int BPF_PROG(cgrp_kfunc_get_null, struct cgroup *cgrp, const char *path) +{ + struct cgroup *kptr; + + /* Cannot use bpf_cgroup_kptr_get() on a NULL pointer. */ + kptr = bpf_cgroup_kptr_get(NULL); + if (!kptr) + return 0; + + bpf_cgroup_release(kptr); + + return 0; +} + +SEC("tp_btf/cgroup_mkdir") +__failure __msg("Unreleased reference") +int BPF_PROG(cgrp_kfunc_xchg_unreleased, struct cgroup *cgrp, const char *path) +{ + struct cgroup *kptr; + struct __cgrps_kfunc_map_value *v; + + v = insert_lookup_cgrp(cgrp); + if (!v) + return 0; + + kptr = bpf_kptr_xchg(&v->cgrp, NULL); + if (!kptr) + return 0; + + /* Kptr retrieved from map is never released. */ + + return 0; +} + +SEC("tp_btf/cgroup_mkdir") +__failure __msg("Unreleased reference") +int BPF_PROG(cgrp_kfunc_get_unreleased, struct cgroup *cgrp, const char *path) +{ + struct cgroup *kptr; + struct __cgrps_kfunc_map_value *v; + + v = insert_lookup_cgrp(cgrp); + if (!v) + return 0; + + kptr = bpf_cgroup_kptr_get(&v->cgrp); + if (!kptr) + return 0; + + /* Kptr acquired above is never released. */ + + return 0; +} + +SEC("tp_btf/cgroup_mkdir") +__failure __msg("arg#0 is untrusted_ptr_or_null_ expected ptr_ or socket") +int BPF_PROG(cgrp_kfunc_release_untrusted, struct cgroup *cgrp, const char *path) +{ + struct __cgrps_kfunc_map_value *v; + + v = insert_lookup_cgrp(cgrp); + if (!v) + return 0; + + /* Can't invoke bpf_cgroup_release() on an untrusted pointer. */ + bpf_cgroup_release(v->cgrp); + + return 0; +} + +SEC("tp_btf/cgroup_mkdir") +__failure __msg("arg#0 pointer type STRUCT cgroup must point") +int BPF_PROG(cgrp_kfunc_release_fp, struct cgroup *cgrp, const char *path) +{ + struct cgroup *acquired = (struct cgroup *)&path; + + /* Cannot release random frame pointer. */ + bpf_cgroup_release(acquired); + + return 0; +} + +SEC("tp_btf/cgroup_mkdir") +__failure __msg("arg#0 is ptr_or_null_ expected ptr_ or socket") +int BPF_PROG(cgrp_kfunc_release_null, struct cgroup *cgrp, const char *path) +{ + struct __cgrps_kfunc_map_value local, *v; + long status; + struct cgroup *acquired, *old; + s32 id; + + status = bpf_probe_read_kernel(&id, sizeof(id), &cgrp->self.id); + if (status) + return 0; + + local.cgrp = NULL; + status = bpf_map_update_elem(&__cgrps_kfunc_map, &id, &local, BPF_NOEXIST); + if (status) + return status; + + v = bpf_map_lookup_elem(&__cgrps_kfunc_map, &id); + if (!v) + return -ENOENT; + + acquired = bpf_cgroup_acquire(cgrp); + + old = bpf_kptr_xchg(&v->cgrp, acquired); + + /* old cannot be passed to bpf_cgroup_release() without a NULL check. */ + bpf_cgroup_release(old); + + return 0; +} + +SEC("tp_btf/cgroup_mkdir") +__failure __msg("release kernel function bpf_cgroup_release expects") +int BPF_PROG(cgrp_kfunc_release_unacquired, struct cgroup *cgrp, const char *path) +{ + /* Cannot release trusted cgroup pointer which was not acquired. */ + bpf_cgroup_release(cgrp); + + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/cgrp_kfunc_success.c b/tools/testing/selftests/bpf/progs/cgrp_kfunc_success.c new file mode 100644 index 000000000000..0c23ea32df9f --- /dev/null +++ b/tools/testing/selftests/bpf/progs/cgrp_kfunc_success.c @@ -0,0 +1,170 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */ + +#include <vmlinux.h> +#include <bpf/bpf_tracing.h> +#include <bpf/bpf_helpers.h> + +#include "cgrp_kfunc_common.h" + +char _license[] SEC("license") = "GPL"; + +int err, pid, invocations; + +/* Prototype for all of the program trace events below: + * + * TRACE_EVENT(cgroup_mkdir, + * TP_PROTO(struct cgroup *cgrp, const char *path), + * TP_ARGS(cgrp, path) + */ + +static bool is_test_kfunc_task(void) +{ + int cur_pid = bpf_get_current_pid_tgid() >> 32; + bool same = pid == cur_pid; + + if (same) + __sync_fetch_and_add(&invocations, 1); + + return same; +} + +SEC("tp_btf/cgroup_mkdir") +int BPF_PROG(test_cgrp_acquire_release_argument, struct cgroup *cgrp, const char *path) +{ + struct cgroup *acquired; + + if (!is_test_kfunc_task()) + return 0; + + acquired = bpf_cgroup_acquire(cgrp); + bpf_cgroup_release(acquired); + + return 0; +} + +SEC("tp_btf/cgroup_mkdir") +int BPF_PROG(test_cgrp_acquire_leave_in_map, struct cgroup *cgrp, const char *path) +{ + long status; + + if (!is_test_kfunc_task()) + return 0; + + status = cgrps_kfunc_map_insert(cgrp); + if (status) + err = 1; + + return 0; +} + +SEC("tp_btf/cgroup_mkdir") +int BPF_PROG(test_cgrp_xchg_release, struct cgroup *cgrp, const char *path) +{ + struct cgroup *kptr; + struct __cgrps_kfunc_map_value *v; + long status; + + if (!is_test_kfunc_task()) + return 0; + + status = cgrps_kfunc_map_insert(cgrp); + if (status) { + err = 1; + return 0; + } + + v = cgrps_kfunc_map_value_lookup(cgrp); + if (!v) { + err = 2; + return 0; + } + + kptr = bpf_kptr_xchg(&v->cgrp, NULL); + if (!kptr) { + err = 3; + return 0; + } + + bpf_cgroup_release(kptr); + + return 0; +} + +SEC("tp_btf/cgroup_mkdir") +int BPF_PROG(test_cgrp_get_release, struct cgroup *cgrp, const char *path) +{ + struct cgroup *kptr; + struct __cgrps_kfunc_map_value *v; + long status; + + if (!is_test_kfunc_task()) + return 0; + + status = cgrps_kfunc_map_insert(cgrp); + if (status) { + err = 1; + return 0; + } + + v = cgrps_kfunc_map_value_lookup(cgrp); + if (!v) { + err = 2; + return 0; + } + + kptr = bpf_cgroup_kptr_get(&v->cgrp); + if (!kptr) { + err = 3; + return 0; + } + + bpf_cgroup_release(kptr); + + return 0; +} + +SEC("tp_btf/cgroup_mkdir") +int BPF_PROG(test_cgrp_get_ancestors, struct cgroup *cgrp, const char *path) +{ + struct cgroup *self, *ancestor1, *invalid; + + if (!is_test_kfunc_task()) + return 0; + + self = bpf_cgroup_ancestor(cgrp, cgrp->level); + if (!self) { + err = 1; + return 0; + } + + if (self->self.id != cgrp->self.id) { + bpf_cgroup_release(self); + err = 2; + return 0; + } + bpf_cgroup_release(self); + + ancestor1 = bpf_cgroup_ancestor(cgrp, cgrp->level - 1); + if (!ancestor1) { + err = 3; + return 0; + } + bpf_cgroup_release(ancestor1); + + invalid = bpf_cgroup_ancestor(cgrp, 10000); + if (invalid) { + bpf_cgroup_release(invalid); + err = 4; + return 0; + } + + invalid = bpf_cgroup_ancestor(cgrp, -1); + if (invalid) { + bpf_cgroup_release(invalid); + err = 5; + return 0; + } + + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/cgrp_ls_attach_cgroup.c b/tools/testing/selftests/bpf/progs/cgrp_ls_attach_cgroup.c new file mode 100644 index 000000000000..6652d18465b2 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/cgrp_ls_attach_cgroup.c @@ -0,0 +1,101 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */ + +#include "vmlinux.h" +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> +#include "bpf_tracing_net.h" + +char _license[] SEC("license") = "GPL"; + +struct socket_cookie { + __u64 cookie_key; + __u64 cookie_value; +}; + +struct { + __uint(type, BPF_MAP_TYPE_CGRP_STORAGE); + __uint(map_flags, BPF_F_NO_PREALLOC); + __type(key, int); + __type(value, struct socket_cookie); +} socket_cookies SEC(".maps"); + +SEC("cgroup/connect6") +int set_cookie(struct bpf_sock_addr *ctx) +{ + struct socket_cookie *p; + struct tcp_sock *tcp_sk; + struct bpf_sock *sk; + + if (ctx->family != AF_INET6 || ctx->user_family != AF_INET6) + return 1; + + sk = ctx->sk; + if (!sk) + return 1; + + tcp_sk = bpf_skc_to_tcp_sock(sk); + if (!tcp_sk) + return 1; + + p = bpf_cgrp_storage_get(&socket_cookies, + tcp_sk->inet_conn.icsk_inet.sk.sk_cgrp_data.cgroup, 0, + BPF_LOCAL_STORAGE_GET_F_CREATE); + if (!p) + return 1; + + p->cookie_value = 0xF; + p->cookie_key = bpf_get_socket_cookie(ctx); + return 1; +} + +SEC("sockops") +int update_cookie_sockops(struct bpf_sock_ops *ctx) +{ + struct socket_cookie *p; + struct tcp_sock *tcp_sk; + struct bpf_sock *sk; + + if (ctx->family != AF_INET6 || ctx->op != BPF_SOCK_OPS_TCP_CONNECT_CB) + return 1; + + sk = ctx->sk; + if (!sk) + return 1; + + tcp_sk = bpf_skc_to_tcp_sock(sk); + if (!tcp_sk) + return 1; + + p = bpf_cgrp_storage_get(&socket_cookies, + tcp_sk->inet_conn.icsk_inet.sk.sk_cgrp_data.cgroup, 0, 0); + if (!p) + return 1; + + if (p->cookie_key != bpf_get_socket_cookie(ctx)) + return 1; + + p->cookie_value |= (ctx->local_port << 8); + return 1; +} + +SEC("fexit/inet_stream_connect") +int BPF_PROG(update_cookie_tracing, struct socket *sock, + struct sockaddr *uaddr, int addr_len, int flags) +{ + struct socket_cookie *p; + struct tcp_sock *tcp_sk; + + if (uaddr->sa_family != AF_INET6) + return 0; + + p = bpf_cgrp_storage_get(&socket_cookies, sock->sk->sk_cgrp_data.cgroup, 0, 0); + if (!p) + return 0; + + if (p->cookie_key != bpf_get_socket_cookie(sock->sk)) + return 0; + + p->cookie_value |= 0xF0; + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/cgrp_ls_negative.c b/tools/testing/selftests/bpf/progs/cgrp_ls_negative.c new file mode 100644 index 000000000000..d41f90e2ab64 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/cgrp_ls_negative.c @@ -0,0 +1,26 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */ + +#include "vmlinux.h" +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> + +char _license[] SEC("license") = "GPL"; + +struct { + __uint(type, BPF_MAP_TYPE_CGRP_STORAGE); + __uint(map_flags, BPF_F_NO_PREALLOC); + __type(key, int); + __type(value, long); +} map_a SEC(".maps"); + +SEC("tp_btf/sys_enter") +int BPF_PROG(on_enter, struct pt_regs *regs, long id) +{ + struct task_struct *task; + + task = bpf_get_current_task_btf(); + (void)bpf_cgrp_storage_get(&map_a, (struct cgroup *)task, 0, + BPF_LOCAL_STORAGE_GET_F_CREATE); + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/cgrp_ls_recursion.c b/tools/testing/selftests/bpf/progs/cgrp_ls_recursion.c new file mode 100644 index 000000000000..a043d8fefdac --- /dev/null +++ b/tools/testing/selftests/bpf/progs/cgrp_ls_recursion.c @@ -0,0 +1,70 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */ + +#include "vmlinux.h" +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> + +char _license[] SEC("license") = "GPL"; + +struct { + __uint(type, BPF_MAP_TYPE_CGRP_STORAGE); + __uint(map_flags, BPF_F_NO_PREALLOC); + __type(key, int); + __type(value, long); +} map_a SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_CGRP_STORAGE); + __uint(map_flags, BPF_F_NO_PREALLOC); + __type(key, int); + __type(value, long); +} map_b SEC(".maps"); + +SEC("fentry/bpf_local_storage_lookup") +int BPF_PROG(on_lookup) +{ + struct task_struct *task = bpf_get_current_task_btf(); + + bpf_cgrp_storage_delete(&map_a, task->cgroups->dfl_cgrp); + bpf_cgrp_storage_delete(&map_b, task->cgroups->dfl_cgrp); + return 0; +} + +SEC("fentry/bpf_local_storage_update") +int BPF_PROG(on_update) +{ + struct task_struct *task = bpf_get_current_task_btf(); + long *ptr; + + ptr = bpf_cgrp_storage_get(&map_a, task->cgroups->dfl_cgrp, 0, + BPF_LOCAL_STORAGE_GET_F_CREATE); + if (ptr) + *ptr += 1; + + ptr = bpf_cgrp_storage_get(&map_b, task->cgroups->dfl_cgrp, 0, + BPF_LOCAL_STORAGE_GET_F_CREATE); + if (ptr) + *ptr += 1; + + return 0; +} + +SEC("tp_btf/sys_enter") +int BPF_PROG(on_enter, struct pt_regs *regs, long id) +{ + struct task_struct *task; + long *ptr; + + task = bpf_get_current_task_btf(); + ptr = bpf_cgrp_storage_get(&map_a, task->cgroups->dfl_cgrp, 0, + BPF_LOCAL_STORAGE_GET_F_CREATE); + if (ptr) + *ptr = 200; + + ptr = bpf_cgrp_storage_get(&map_b, task->cgroups->dfl_cgrp, 0, + BPF_LOCAL_STORAGE_GET_F_CREATE); + if (ptr) + *ptr = 100; + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/cgrp_ls_sleepable.c b/tools/testing/selftests/bpf/progs/cgrp_ls_sleepable.c new file mode 100644 index 000000000000..2d11ed528b6f --- /dev/null +++ b/tools/testing/selftests/bpf/progs/cgrp_ls_sleepable.c @@ -0,0 +1,80 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */ + +#include "bpf_iter.h" +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> +#include "bpf_misc.h" + +char _license[] SEC("license") = "GPL"; + +struct { + __uint(type, BPF_MAP_TYPE_CGRP_STORAGE); + __uint(map_flags, BPF_F_NO_PREALLOC); + __type(key, int); + __type(value, long); +} map_a SEC(".maps"); + +__u32 target_pid; +__u64 cgroup_id; + +void bpf_rcu_read_lock(void) __ksym; +void bpf_rcu_read_unlock(void) __ksym; + +SEC("?iter.s/cgroup") +int cgroup_iter(struct bpf_iter__cgroup *ctx) +{ + struct seq_file *seq = ctx->meta->seq; + struct cgroup *cgrp = ctx->cgroup; + long *ptr; + + if (cgrp == NULL) + return 0; + + ptr = bpf_cgrp_storage_get(&map_a, cgrp, 0, + BPF_LOCAL_STORAGE_GET_F_CREATE); + if (ptr) + cgroup_id = cgrp->kn->id; + return 0; +} + +SEC("?fentry.s/" SYS_PREFIX "sys_getpgid") +int no_rcu_lock(void *ctx) +{ + struct task_struct *task; + struct cgroup *cgrp; + long *ptr; + + task = bpf_get_current_task_btf(); + if (task->pid != target_pid) + return 0; + + /* ptr_to_btf_id semantics. should work. */ + cgrp = task->cgroups->dfl_cgrp; + ptr = bpf_cgrp_storage_get(&map_a, cgrp, 0, + BPF_LOCAL_STORAGE_GET_F_CREATE); + if (ptr) + cgroup_id = cgrp->kn->id; + return 0; +} + +SEC("?fentry.s/" SYS_PREFIX "sys_getpgid") +int yes_rcu_lock(void *ctx) +{ + struct task_struct *task; + struct cgroup *cgrp; + long *ptr; + + task = bpf_get_current_task_btf(); + if (task->pid != target_pid) + return 0; + + bpf_rcu_read_lock(); + cgrp = task->cgroups->dfl_cgrp; + /* cgrp is untrusted and cannot pass to bpf_cgrp_storage_get() helper. */ + ptr = bpf_cgrp_storage_get(&map_a, cgrp, 0, BPF_LOCAL_STORAGE_GET_F_CREATE); + if (ptr) + cgroup_id = cgrp->kn->id; + bpf_rcu_read_unlock(); + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/cgrp_ls_tp_btf.c b/tools/testing/selftests/bpf/progs/cgrp_ls_tp_btf.c new file mode 100644 index 000000000000..9ebb8e2fe541 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/cgrp_ls_tp_btf.c @@ -0,0 +1,88 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */ + +#include "vmlinux.h" +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> + +char _license[] SEC("license") = "GPL"; + +struct { + __uint(type, BPF_MAP_TYPE_CGRP_STORAGE); + __uint(map_flags, BPF_F_NO_PREALLOC); + __type(key, int); + __type(value, long); +} map_a SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_CGRP_STORAGE); + __uint(map_flags, BPF_F_NO_PREALLOC); + __type(key, int); + __type(value, long); +} map_b SEC(".maps"); + +#define MAGIC_VALUE 0xabcd1234 + +pid_t target_pid = 0; +int mismatch_cnt = 0; +int enter_cnt = 0; +int exit_cnt = 0; + +SEC("tp_btf/sys_enter") +int BPF_PROG(on_enter, struct pt_regs *regs, long id) +{ + struct task_struct *task; + long *ptr; + int err; + + task = bpf_get_current_task_btf(); + if (task->pid != target_pid) + return 0; + + /* populate value 0 */ + ptr = bpf_cgrp_storage_get(&map_a, task->cgroups->dfl_cgrp, 0, + BPF_LOCAL_STORAGE_GET_F_CREATE); + if (!ptr) + return 0; + + /* delete value 0 */ + err = bpf_cgrp_storage_delete(&map_a, task->cgroups->dfl_cgrp); + if (err) + return 0; + + /* value is not available */ + ptr = bpf_cgrp_storage_get(&map_a, task->cgroups->dfl_cgrp, 0, 0); + if (ptr) + return 0; + + /* re-populate the value */ + ptr = bpf_cgrp_storage_get(&map_a, task->cgroups->dfl_cgrp, 0, + BPF_LOCAL_STORAGE_GET_F_CREATE); + if (!ptr) + return 0; + __sync_fetch_and_add(&enter_cnt, 1); + *ptr = MAGIC_VALUE + enter_cnt; + + return 0; +} + +SEC("tp_btf/sys_exit") +int BPF_PROG(on_exit, struct pt_regs *regs, long id) +{ + struct task_struct *task; + long *ptr; + + task = bpf_get_current_task_btf(); + if (task->pid != target_pid) + return 0; + + ptr = bpf_cgrp_storage_get(&map_a, task->cgroups->dfl_cgrp, 0, + BPF_LOCAL_STORAGE_GET_F_CREATE); + if (!ptr) + return 0; + + __sync_fetch_and_add(&exit_cnt, 1); + if (*ptr != MAGIC_VALUE + exit_cnt) + __sync_fetch_and_add(&mismatch_cnt, 1); + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/cpumask_common.h b/tools/testing/selftests/bpf/progs/cpumask_common.h new file mode 100644 index 000000000000..ad34f3b602be --- /dev/null +++ b/tools/testing/selftests/bpf/progs/cpumask_common.h @@ -0,0 +1,114 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */ + +#ifndef _CPUMASK_COMMON_H +#define _CPUMASK_COMMON_H + +#include "errno.h" +#include <stdbool.h> + +int err; + +struct __cpumask_map_value { + struct bpf_cpumask __kptr_ref * cpumask; +}; + +struct array_map { + __uint(type, BPF_MAP_TYPE_ARRAY); + __type(key, int); + __type(value, struct __cpumask_map_value); + __uint(max_entries, 1); +} __cpumask_map SEC(".maps"); + +struct bpf_cpumask *bpf_cpumask_create(void) __ksym; +void bpf_cpumask_release(struct bpf_cpumask *cpumask) __ksym; +struct bpf_cpumask *bpf_cpumask_acquire(struct bpf_cpumask *cpumask) __ksym; +struct bpf_cpumask *bpf_cpumask_kptr_get(struct bpf_cpumask **cpumask) __ksym; +u32 bpf_cpumask_first(const struct cpumask *cpumask) __ksym; +u32 bpf_cpumask_first_zero(const struct cpumask *cpumask) __ksym; +void bpf_cpumask_set_cpu(u32 cpu, struct bpf_cpumask *cpumask) __ksym; +void bpf_cpumask_clear_cpu(u32 cpu, struct bpf_cpumask *cpumask) __ksym; +bool bpf_cpumask_test_cpu(u32 cpu, const struct cpumask *cpumask) __ksym; +bool bpf_cpumask_test_and_set_cpu(u32 cpu, struct bpf_cpumask *cpumask) __ksym; +bool bpf_cpumask_test_and_clear_cpu(u32 cpu, struct bpf_cpumask *cpumask) __ksym; +void bpf_cpumask_setall(struct bpf_cpumask *cpumask) __ksym; +void bpf_cpumask_clear(struct bpf_cpumask *cpumask) __ksym; +bool bpf_cpumask_and(struct bpf_cpumask *cpumask, + const struct cpumask *src1, + const struct cpumask *src2) __ksym; +void bpf_cpumask_or(struct bpf_cpumask *cpumask, + const struct cpumask *src1, + const struct cpumask *src2) __ksym; +void bpf_cpumask_xor(struct bpf_cpumask *cpumask, + const struct cpumask *src1, + const struct cpumask *src2) __ksym; +bool bpf_cpumask_equal(const struct cpumask *src1, const struct cpumask *src2) __ksym; +bool bpf_cpumask_intersects(const struct cpumask *src1, const struct cpumask *src2) __ksym; +bool bpf_cpumask_subset(const struct cpumask *src1, const struct cpumask *src2) __ksym; +bool bpf_cpumask_empty(const struct cpumask *cpumask) __ksym; +bool bpf_cpumask_full(const struct cpumask *cpumask) __ksym; +void bpf_cpumask_copy(struct bpf_cpumask *dst, const struct cpumask *src) __ksym; +u32 bpf_cpumask_any(const struct cpumask *src) __ksym; +u32 bpf_cpumask_any_and(const struct cpumask *src1, const struct cpumask *src2) __ksym; + +static inline const struct cpumask *cast(struct bpf_cpumask *cpumask) +{ + return (const struct cpumask *)cpumask; +} + +static inline struct bpf_cpumask *create_cpumask(void) +{ + struct bpf_cpumask *cpumask; + + cpumask = bpf_cpumask_create(); + if (!cpumask) { + err = 1; + return NULL; + } + + if (!bpf_cpumask_empty(cast(cpumask))) { + err = 2; + bpf_cpumask_release(cpumask); + return NULL; + } + + return cpumask; +} + +static inline struct __cpumask_map_value *cpumask_map_value_lookup(void) +{ + u32 key = 0; + + return bpf_map_lookup_elem(&__cpumask_map, &key); +} + +static inline int cpumask_map_insert(struct bpf_cpumask *mask) +{ + struct __cpumask_map_value local, *v; + long status; + struct bpf_cpumask *old; + u32 key = 0; + + local.cpumask = NULL; + status = bpf_map_update_elem(&__cpumask_map, &key, &local, 0); + if (status) { + bpf_cpumask_release(mask); + return status; + } + + v = bpf_map_lookup_elem(&__cpumask_map, &key); + if (!v) { + bpf_cpumask_release(mask); + return -ENOENT; + } + + old = bpf_kptr_xchg(&v->cpumask, mask); + if (old) { + bpf_cpumask_release(old); + return -EEXIST; + } + + return 0; +} + +#endif /* _CPUMASK_COMMON_H */ diff --git a/tools/testing/selftests/bpf/progs/cpumask_failure.c b/tools/testing/selftests/bpf/progs/cpumask_failure.c new file mode 100644 index 000000000000..33e8e86dd090 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/cpumask_failure.c @@ -0,0 +1,126 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */ + +#include <vmlinux.h> +#include <bpf/bpf_tracing.h> +#include <bpf/bpf_helpers.h> +#include "bpf_misc.h" + +#include "cpumask_common.h" + +char _license[] SEC("license") = "GPL"; + +/* Prototype for all of the program trace events below: + * + * TRACE_EVENT(task_newtask, + * TP_PROTO(struct task_struct *p, u64 clone_flags) + */ + +SEC("tp_btf/task_newtask") +__failure __msg("Unreleased reference") +int BPF_PROG(test_alloc_no_release, struct task_struct *task, u64 clone_flags) +{ + struct bpf_cpumask *cpumask; + + cpumask = create_cpumask(); + + /* cpumask is never released. */ + return 0; +} + +SEC("tp_btf/task_newtask") +__failure __msg("NULL pointer passed to trusted arg0") +int BPF_PROG(test_alloc_double_release, struct task_struct *task, u64 clone_flags) +{ + struct bpf_cpumask *cpumask; + + cpumask = create_cpumask(); + + /* cpumask is released twice. */ + bpf_cpumask_release(cpumask); + bpf_cpumask_release(cpumask); + + return 0; +} + +SEC("tp_btf/task_newtask") +__failure __msg("bpf_cpumask_acquire args#0 expected pointer to STRUCT bpf_cpumask") +int BPF_PROG(test_acquire_wrong_cpumask, struct task_struct *task, u64 clone_flags) +{ + struct bpf_cpumask *cpumask; + + /* Can't acquire a non-struct bpf_cpumask. */ + cpumask = bpf_cpumask_acquire((struct bpf_cpumask *)task->cpus_ptr); + + return 0; +} + +SEC("tp_btf/task_newtask") +__failure __msg("bpf_cpumask_set_cpu args#1 expected pointer to STRUCT bpf_cpumask") +int BPF_PROG(test_mutate_cpumask, struct task_struct *task, u64 clone_flags) +{ + struct bpf_cpumask *cpumask; + + /* Can't set the CPU of a non-struct bpf_cpumask. */ + bpf_cpumask_set_cpu(0, (struct bpf_cpumask *)task->cpus_ptr); + + return 0; +} + +SEC("tp_btf/task_newtask") +__failure __msg("Unreleased reference") +int BPF_PROG(test_insert_remove_no_release, struct task_struct *task, u64 clone_flags) +{ + struct bpf_cpumask *cpumask; + struct __cpumask_map_value *v; + + cpumask = create_cpumask(); + if (!cpumask) + return 0; + + if (cpumask_map_insert(cpumask)) + return 0; + + v = cpumask_map_value_lookup(); + if (!v) + return 0; + + cpumask = bpf_kptr_xchg(&v->cpumask, NULL); + + /* cpumask is never released. */ + return 0; +} + +SEC("tp_btf/task_newtask") +__failure __msg("Unreleased reference") +int BPF_PROG(test_kptr_get_no_release, struct task_struct *task, u64 clone_flags) +{ + struct bpf_cpumask *cpumask; + struct __cpumask_map_value *v; + + cpumask = create_cpumask(); + if (!cpumask) + return 0; + + if (cpumask_map_insert(cpumask)) + return 0; + + v = cpumask_map_value_lookup(); + if (!v) + return 0; + + cpumask = bpf_cpumask_kptr_get(&v->cpumask); + + /* cpumask is never released. */ + return 0; +} + +SEC("tp_btf/task_newtask") +__failure __msg("NULL pointer passed to trusted arg0") +int BPF_PROG(test_cpumask_null, struct task_struct *task, u64 clone_flags) +{ + /* NULL passed to KF_TRUSTED_ARGS kfunc. */ + bpf_cpumask_empty(NULL); + + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/cpumask_success.c b/tools/testing/selftests/bpf/progs/cpumask_success.c new file mode 100644 index 000000000000..1d38bc65d4b0 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/cpumask_success.c @@ -0,0 +1,426 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */ + +#include <vmlinux.h> +#include <bpf/bpf_tracing.h> +#include <bpf/bpf_helpers.h> + +#include "cpumask_common.h" + +char _license[] SEC("license") = "GPL"; + +int pid, nr_cpus; + +static bool is_test_task(void) +{ + int cur_pid = bpf_get_current_pid_tgid() >> 32; + + return pid == cur_pid; +} + +static bool create_cpumask_set(struct bpf_cpumask **out1, + struct bpf_cpumask **out2, + struct bpf_cpumask **out3, + struct bpf_cpumask **out4) +{ + struct bpf_cpumask *mask1, *mask2, *mask3, *mask4; + + mask1 = create_cpumask(); + if (!mask1) + return false; + + mask2 = create_cpumask(); + if (!mask2) { + bpf_cpumask_release(mask1); + err = 3; + return false; + } + + mask3 = create_cpumask(); + if (!mask3) { + bpf_cpumask_release(mask1); + bpf_cpumask_release(mask2); + err = 4; + return false; + } + + mask4 = create_cpumask(); + if (!mask4) { + bpf_cpumask_release(mask1); + bpf_cpumask_release(mask2); + bpf_cpumask_release(mask3); + err = 5; + return false; + } + + *out1 = mask1; + *out2 = mask2; + *out3 = mask3; + *out4 = mask4; + + return true; +} + +SEC("tp_btf/task_newtask") +int BPF_PROG(test_alloc_free_cpumask, struct task_struct *task, u64 clone_flags) +{ + struct bpf_cpumask *cpumask; + + if (!is_test_task()) + return 0; + + cpumask = create_cpumask(); + if (!cpumask) + return 0; + + bpf_cpumask_release(cpumask); + return 0; +} + +SEC("tp_btf/task_newtask") +int BPF_PROG(test_set_clear_cpu, struct task_struct *task, u64 clone_flags) +{ + struct bpf_cpumask *cpumask; + + if (!is_test_task()) + return 0; + + cpumask = create_cpumask(); + if (!cpumask) + return 0; + + bpf_cpumask_set_cpu(0, cpumask); + if (!bpf_cpumask_test_cpu(0, cast(cpumask))) { + err = 3; + goto release_exit; + } + + bpf_cpumask_clear_cpu(0, cpumask); + if (bpf_cpumask_test_cpu(0, cast(cpumask))) { + err = 4; + goto release_exit; + } + +release_exit: + bpf_cpumask_release(cpumask); + return 0; +} + +SEC("tp_btf/task_newtask") +int BPF_PROG(test_setall_clear_cpu, struct task_struct *task, u64 clone_flags) +{ + struct bpf_cpumask *cpumask; + + if (!is_test_task()) + return 0; + + cpumask = create_cpumask(); + if (!cpumask) + return 0; + + bpf_cpumask_setall(cpumask); + if (!bpf_cpumask_full(cast(cpumask))) { + err = 3; + goto release_exit; + } + + bpf_cpumask_clear(cpumask); + if (!bpf_cpumask_empty(cast(cpumask))) { + err = 4; + goto release_exit; + } + +release_exit: + bpf_cpumask_release(cpumask); + return 0; +} + +SEC("tp_btf/task_newtask") +int BPF_PROG(test_first_firstzero_cpu, struct task_struct *task, u64 clone_flags) +{ + struct bpf_cpumask *cpumask; + + if (!is_test_task()) + return 0; + + cpumask = create_cpumask(); + if (!cpumask) + return 0; + + if (bpf_cpumask_first(cast(cpumask)) < nr_cpus) { + err = 3; + goto release_exit; + } + + if (bpf_cpumask_first_zero(cast(cpumask)) != 0) { + bpf_printk("first zero: %d", bpf_cpumask_first_zero(cast(cpumask))); + err = 4; + goto release_exit; + } + + bpf_cpumask_set_cpu(0, cpumask); + if (bpf_cpumask_first(cast(cpumask)) != 0) { + err = 5; + goto release_exit; + } + + if (bpf_cpumask_first_zero(cast(cpumask)) != 1) { + err = 6; + goto release_exit; + } + +release_exit: + bpf_cpumask_release(cpumask); + return 0; +} + +SEC("tp_btf/task_newtask") +int BPF_PROG(test_test_and_set_clear, struct task_struct *task, u64 clone_flags) +{ + struct bpf_cpumask *cpumask; + + if (!is_test_task()) + return 0; + + cpumask = create_cpumask(); + if (!cpumask) + return 0; + + if (bpf_cpumask_test_and_set_cpu(0, cpumask)) { + err = 3; + goto release_exit; + } + + if (!bpf_cpumask_test_and_set_cpu(0, cpumask)) { + err = 4; + goto release_exit; + } + + if (!bpf_cpumask_test_and_clear_cpu(0, cpumask)) { + err = 5; + goto release_exit; + } + +release_exit: + bpf_cpumask_release(cpumask); + return 0; +} + +SEC("tp_btf/task_newtask") +int BPF_PROG(test_and_or_xor, struct task_struct *task, u64 clone_flags) +{ + struct bpf_cpumask *mask1, *mask2, *dst1, *dst2; + + if (!is_test_task()) + return 0; + + if (!create_cpumask_set(&mask1, &mask2, &dst1, &dst2)) + return 0; + + bpf_cpumask_set_cpu(0, mask1); + bpf_cpumask_set_cpu(1, mask2); + + if (bpf_cpumask_and(dst1, cast(mask1), cast(mask2))) { + err = 6; + goto release_exit; + } + if (!bpf_cpumask_empty(cast(dst1))) { + err = 7; + goto release_exit; + } + + bpf_cpumask_or(dst1, cast(mask1), cast(mask2)); + if (!bpf_cpumask_test_cpu(0, cast(dst1))) { + err = 8; + goto release_exit; + } + if (!bpf_cpumask_test_cpu(1, cast(dst1))) { + err = 9; + goto release_exit; + } + + bpf_cpumask_xor(dst2, cast(mask1), cast(mask2)); + if (!bpf_cpumask_equal(cast(dst1), cast(dst2))) { + err = 10; + goto release_exit; + } + +release_exit: + bpf_cpumask_release(mask1); + bpf_cpumask_release(mask2); + bpf_cpumask_release(dst1); + bpf_cpumask_release(dst2); + return 0; +} + +SEC("tp_btf/task_newtask") +int BPF_PROG(test_intersects_subset, struct task_struct *task, u64 clone_flags) +{ + struct bpf_cpumask *mask1, *mask2, *dst1, *dst2; + + if (!is_test_task()) + return 0; + + if (!create_cpumask_set(&mask1, &mask2, &dst1, &dst2)) + return 0; + + bpf_cpumask_set_cpu(0, mask1); + bpf_cpumask_set_cpu(1, mask2); + if (bpf_cpumask_intersects(cast(mask1), cast(mask2))) { + err = 6; + goto release_exit; + } + + bpf_cpumask_or(dst1, cast(mask1), cast(mask2)); + if (!bpf_cpumask_subset(cast(mask1), cast(dst1))) { + err = 7; + goto release_exit; + } + + if (!bpf_cpumask_subset(cast(mask2), cast(dst1))) { + err = 8; + goto release_exit; + } + + if (bpf_cpumask_subset(cast(dst1), cast(mask1))) { + err = 9; + goto release_exit; + } + +release_exit: + bpf_cpumask_release(mask1); + bpf_cpumask_release(mask2); + bpf_cpumask_release(dst1); + bpf_cpumask_release(dst2); + return 0; +} + +SEC("tp_btf/task_newtask") +int BPF_PROG(test_copy_any_anyand, struct task_struct *task, u64 clone_flags) +{ + struct bpf_cpumask *mask1, *mask2, *dst1, *dst2; + u32 cpu; + + if (!is_test_task()) + return 0; + + if (!create_cpumask_set(&mask1, &mask2, &dst1, &dst2)) + return 0; + + bpf_cpumask_set_cpu(0, mask1); + bpf_cpumask_set_cpu(1, mask2); + bpf_cpumask_or(dst1, cast(mask1), cast(mask2)); + + cpu = bpf_cpumask_any(cast(mask1)); + if (cpu != 0) { + err = 6; + goto release_exit; + } + + cpu = bpf_cpumask_any(cast(dst2)); + if (cpu < nr_cpus) { + err = 7; + goto release_exit; + } + + bpf_cpumask_copy(dst2, cast(dst1)); + if (!bpf_cpumask_equal(cast(dst1), cast(dst2))) { + err = 8; + goto release_exit; + } + + cpu = bpf_cpumask_any(cast(dst2)); + if (cpu > 1) { + err = 9; + goto release_exit; + } + + cpu = bpf_cpumask_any_and(cast(mask1), cast(mask2)); + if (cpu < nr_cpus) { + err = 10; + goto release_exit; + } + +release_exit: + bpf_cpumask_release(mask1); + bpf_cpumask_release(mask2); + bpf_cpumask_release(dst1); + bpf_cpumask_release(dst2); + return 0; +} + +SEC("tp_btf/task_newtask") +int BPF_PROG(test_insert_leave, struct task_struct *task, u64 clone_flags) +{ + struct bpf_cpumask *cpumask; + struct __cpumask_map_value *v; + + cpumask = create_cpumask(); + if (!cpumask) + return 0; + + if (cpumask_map_insert(cpumask)) + err = 3; + + return 0; +} + +SEC("tp_btf/task_newtask") +int BPF_PROG(test_insert_remove_release, struct task_struct *task, u64 clone_flags) +{ + struct bpf_cpumask *cpumask; + struct __cpumask_map_value *v; + + cpumask = create_cpumask(); + if (!cpumask) + return 0; + + if (cpumask_map_insert(cpumask)) { + err = 3; + return 0; + } + + v = cpumask_map_value_lookup(); + if (!v) { + err = 4; + return 0; + } + + cpumask = bpf_kptr_xchg(&v->cpumask, NULL); + if (cpumask) + bpf_cpumask_release(cpumask); + else + err = 5; + + return 0; +} + +SEC("tp_btf/task_newtask") +int BPF_PROG(test_insert_kptr_get_release, struct task_struct *task, u64 clone_flags) +{ + struct bpf_cpumask *cpumask; + struct __cpumask_map_value *v; + + cpumask = create_cpumask(); + if (!cpumask) + return 0; + + if (cpumask_map_insert(cpumask)) { + err = 3; + return 0; + } + + v = cpumask_map_value_lookup(); + if (!v) { + err = 4; + return 0; + } + + cpumask = bpf_cpumask_kptr_get(&v->cpumask); + if (cpumask) + bpf_cpumask_release(cpumask); + else + err = 5; + + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/decap_sanity.c b/tools/testing/selftests/bpf/progs/decap_sanity.c new file mode 100644 index 000000000000..bd3c657c58a7 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/decap_sanity.c @@ -0,0 +1,68 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */ + +#include "vmlinux.h" +#include "bpf_tracing_net.h" +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_endian.h> + +#define UDP_TEST_PORT 7777 + +void *bpf_cast_to_kern_ctx(void *) __ksym; +bool init_csum_partial = false; +bool final_csum_none = false; +bool broken_csum_start = false; + +static unsigned int skb_headlen(const struct sk_buff *skb) +{ + return skb->len - skb->data_len; +} + +static unsigned int skb_headroom(const struct sk_buff *skb) +{ + return skb->data - skb->head; +} + +static int skb_checksum_start_offset(const struct sk_buff *skb) +{ + return skb->csum_start - skb_headroom(skb); +} + +SEC("tc") +int decap_sanity(struct __sk_buff *skb) +{ + struct sk_buff *kskb; + struct ipv6hdr ip6h; + struct udphdr udph; + int err; + + if (skb->protocol != __bpf_constant_htons(ETH_P_IPV6)) + return TC_ACT_SHOT; + + if (bpf_skb_load_bytes(skb, ETH_HLEN, &ip6h, sizeof(ip6h))) + return TC_ACT_SHOT; + + if (ip6h.nexthdr != IPPROTO_UDP) + return TC_ACT_SHOT; + + if (bpf_skb_load_bytes(skb, ETH_HLEN + sizeof(ip6h), &udph, sizeof(udph))) + return TC_ACT_SHOT; + + if (udph.dest != __bpf_constant_htons(UDP_TEST_PORT)) + return TC_ACT_SHOT; + + kskb = bpf_cast_to_kern_ctx(skb); + init_csum_partial = (kskb->ip_summed == CHECKSUM_PARTIAL); + err = bpf_skb_adjust_room(skb, -(s32)(ETH_HLEN + sizeof(ip6h) + sizeof(udph)), + 1, BPF_F_ADJ_ROOM_FIXED_GSO); + if (err) + return TC_ACT_SHOT; + final_csum_none = (kskb->ip_summed == CHECKSUM_NONE); + if (kskb->ip_summed == CHECKSUM_PARTIAL && + (unsigned int)skb_checksum_start_offset(kskb) >= skb_headlen(kskb)) + broken_csum_start = true; + + return TC_ACT_SHOT; +} + +char __license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/dummy_st_ops_fail.c b/tools/testing/selftests/bpf/progs/dummy_st_ops_fail.c new file mode 100644 index 000000000000..0bf969a0b5ed --- /dev/null +++ b/tools/testing/selftests/bpf/progs/dummy_st_ops_fail.c @@ -0,0 +1,27 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */ + +#include "vmlinux.h" +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> + +#include "bpf_misc.h" + +char _license[] SEC("license") = "GPL"; + +SEC("struct_ops.s/test_2") +__failure __msg("attach to unsupported member test_2 of struct bpf_dummy_ops") +int BPF_PROG(test_unsupported_field_sleepable, + struct bpf_dummy_ops_state *state, int a1, unsigned short a2, + char a3, unsigned long a4) +{ + /* Tries to mark an unsleepable field in struct bpf_dummy_ops as sleepable. */ + return 0; +} + +SEC(".struct_ops") +struct bpf_dummy_ops dummy_1 = { + .test_1 = NULL, + .test_2 = (void *)test_unsupported_field_sleepable, + .test_sleepable = (void *)NULL, +}; diff --git a/tools/testing/selftests/bpf/progs/dummy_st_ops.c b/tools/testing/selftests/bpf/progs/dummy_st_ops_success.c index ead87edb75e2..1efa746c25dc 100644 --- a/tools/testing/selftests/bpf/progs/dummy_st_ops.c +++ b/tools/testing/selftests/bpf/progs/dummy_st_ops_success.c @@ -1,19 +1,9 @@ // SPDX-License-Identifier: GPL-2.0 /* Copyright (C) 2021. Huawei Technologies Co., Ltd */ -#include <linux/bpf.h> +#include "vmlinux.h" #include <bpf/bpf_helpers.h> #include <bpf/bpf_tracing.h> -struct bpf_dummy_ops_state { - int val; -} __attribute__((preserve_access_index)); - -struct bpf_dummy_ops { - int (*test_1)(struct bpf_dummy_ops_state *state); - int (*test_2)(struct bpf_dummy_ops_state *state, int a1, unsigned short a2, - char a3, unsigned long a4); -}; - char _license[] SEC("license") = "GPL"; SEC("struct_ops/test_1") @@ -43,8 +33,15 @@ int BPF_PROG(test_2, struct bpf_dummy_ops_state *state, int a1, unsigned short a return 0; } +SEC("struct_ops.s/test_sleepable") +int BPF_PROG(test_sleepable, struct bpf_dummy_ops_state *state) +{ + return 0; +} + SEC(".struct_ops") struct bpf_dummy_ops dummy_1 = { .test_1 = (void *)test_1, .test_2 = (void *)test_2, + .test_sleepable = (void *)test_sleepable, }; diff --git a/tools/testing/selftests/bpf/progs/dynptr_fail.c b/tools/testing/selftests/bpf/progs/dynptr_fail.c index b0f08ff024fb..aa5b69354b91 100644 --- a/tools/testing/selftests/bpf/progs/dynptr_fail.c +++ b/tools/testing/selftests/bpf/progs/dynptr_fail.c @@ -35,6 +35,13 @@ struct { __type(value, __u32); } array_map3 SEC(".maps"); +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, 1); + __type(key, __u32); + __type(value, __u64); +} array_map4 SEC(".maps"); + struct sample { int pid; long value; @@ -43,6 +50,7 @@ struct sample { struct { __uint(type, BPF_MAP_TYPE_RINGBUF); + __uint(max_entries, 4096); } ringbuf SEC(".maps"); int err, val; @@ -66,6 +74,7 @@ static int get_map_val_dynptr(struct bpf_dynptr *ptr) * bpf_ringbuf_submit/discard_dynptr call */ SEC("?raw_tp") +__failure __msg("Unreleased reference id=2") int ringbuf_missing_release1(void *ctx) { struct bpf_dynptr ptr; @@ -78,6 +87,7 @@ int ringbuf_missing_release1(void *ctx) } SEC("?raw_tp") +__failure __msg("Unreleased reference id=4") int ringbuf_missing_release2(void *ctx) { struct bpf_dynptr ptr1, ptr2; @@ -113,6 +123,7 @@ static int missing_release_callback_fn(__u32 index, void *data) /* Any dynptr initialized within a callback must have bpf_dynptr_put called */ SEC("?raw_tp") +__failure __msg("Unreleased reference id") int ringbuf_missing_release_callback(void *ctx) { bpf_loop(10, missing_release_callback_fn, NULL, 0); @@ -121,6 +132,7 @@ int ringbuf_missing_release_callback(void *ctx) /* Can't call bpf_ringbuf_submit/discard_dynptr on a non-initialized dynptr */ SEC("?raw_tp") +__failure __msg("arg 1 is an unacquired reference") int ringbuf_release_uninit_dynptr(void *ctx) { struct bpf_dynptr ptr; @@ -133,6 +145,7 @@ int ringbuf_release_uninit_dynptr(void *ctx) /* A dynptr can't be used after it has been invalidated */ SEC("?raw_tp") +__failure __msg("Expected an initialized dynptr as arg #3") int use_after_invalid(void *ctx) { struct bpf_dynptr ptr; @@ -152,6 +165,7 @@ int use_after_invalid(void *ctx) /* Can't call non-dynptr ringbuf APIs on a dynptr ringbuf sample */ SEC("?raw_tp") +__failure __msg("type=mem expected=ringbuf_mem") int ringbuf_invalid_api(void *ctx) { struct bpf_dynptr ptr; @@ -174,6 +188,7 @@ done: /* Can't add a dynptr to a map */ SEC("?raw_tp") +__failure __msg("invalid indirect read from stack") int add_dynptr_to_map1(void *ctx) { struct bpf_dynptr ptr; @@ -191,6 +206,7 @@ int add_dynptr_to_map1(void *ctx) /* Can't add a struct with an embedded dynptr to a map */ SEC("?raw_tp") +__failure __msg("invalid indirect read from stack") int add_dynptr_to_map2(void *ctx) { struct test_info x; @@ -208,6 +224,7 @@ int add_dynptr_to_map2(void *ctx) /* A data slice can't be accessed out of bounds */ SEC("?raw_tp") +__failure __msg("value is outside of the allowed memory range") int data_slice_out_of_bounds_ringbuf(void *ctx) { struct bpf_dynptr ptr; @@ -228,6 +245,7 @@ done: } SEC("?raw_tp") +__failure __msg("value is outside of the allowed memory range") int data_slice_out_of_bounds_map_value(void *ctx) { __u32 key = 0, map_val; @@ -248,6 +266,7 @@ int data_slice_out_of_bounds_map_value(void *ctx) /* A data slice can't be used after it has been released */ SEC("?raw_tp") +__failure __msg("invalid mem access 'scalar'") int data_slice_use_after_release1(void *ctx) { struct bpf_dynptr ptr; @@ -279,6 +298,7 @@ done: * ptr2 is at fp - 16). */ SEC("?raw_tp") +__failure __msg("invalid mem access 'scalar'") int data_slice_use_after_release2(void *ctx) { struct bpf_dynptr ptr1, ptr2; @@ -310,6 +330,7 @@ done: /* A data slice must be first checked for NULL */ SEC("?raw_tp") +__failure __msg("invalid mem access 'mem_or_null'") int data_slice_missing_null_check1(void *ctx) { struct bpf_dynptr ptr; @@ -330,6 +351,7 @@ int data_slice_missing_null_check1(void *ctx) /* A data slice can't be dereferenced if it wasn't checked for null */ SEC("?raw_tp") +__failure __msg("invalid mem access 'mem_or_null'") int data_slice_missing_null_check2(void *ctx) { struct bpf_dynptr ptr; @@ -352,6 +374,7 @@ done: * dynptr argument */ SEC("?raw_tp") +__failure __msg("invalid indirect read from stack") int invalid_helper1(void *ctx) { struct bpf_dynptr ptr; @@ -366,6 +389,7 @@ int invalid_helper1(void *ctx) /* A dynptr can't be passed into a helper function at a non-zero offset */ SEC("?raw_tp") +__failure __msg("cannot pass in dynptr at an offset=-8") int invalid_helper2(void *ctx) { struct bpf_dynptr ptr; @@ -381,6 +405,7 @@ int invalid_helper2(void *ctx) /* A bpf_dynptr is invalidated if it's been written into */ SEC("?raw_tp") +__failure __msg("Expected an initialized dynptr as arg #1") int invalid_write1(void *ctx) { struct bpf_dynptr ptr; @@ -402,6 +427,7 @@ int invalid_write1(void *ctx) * offset */ SEC("?raw_tp") +__failure __msg("cannot overwrite referenced dynptr") int invalid_write2(void *ctx) { struct bpf_dynptr ptr; @@ -425,6 +451,7 @@ int invalid_write2(void *ctx) * non-const offset */ SEC("?raw_tp") +__failure __msg("cannot overwrite referenced dynptr") int invalid_write3(void *ctx) { struct bpf_dynptr ptr; @@ -456,6 +483,7 @@ static int invalid_write4_callback(__u32 index, void *data) * be invalidated as a dynptr */ SEC("?raw_tp") +__failure __msg("cannot overwrite referenced dynptr") int invalid_write4(void *ctx) { struct bpf_dynptr ptr; @@ -472,7 +500,9 @@ int invalid_write4(void *ctx) /* A globally-defined bpf_dynptr can't be used (it must reside as a stack frame) */ struct bpf_dynptr global_dynptr; + SEC("?raw_tp") +__failure __msg("type=map_value expected=fp") int global(void *ctx) { /* this should fail */ @@ -485,6 +515,7 @@ int global(void *ctx) /* A direct read should fail */ SEC("?raw_tp") +__failure __msg("invalid read from stack") int invalid_read1(void *ctx) { struct bpf_dynptr ptr; @@ -501,6 +532,7 @@ int invalid_read1(void *ctx) /* A direct read at an offset should fail */ SEC("?raw_tp") +__failure __msg("cannot pass in dynptr at an offset") int invalid_read2(void *ctx) { struct bpf_dynptr ptr; @@ -516,6 +548,7 @@ int invalid_read2(void *ctx) /* A direct read at an offset into the lower stack slot should fail */ SEC("?raw_tp") +__failure __msg("invalid read from stack") int invalid_read3(void *ctx) { struct bpf_dynptr ptr1, ptr2; @@ -542,6 +575,7 @@ static int invalid_read4_callback(__u32 index, void *data) /* A direct read within a callback function should fail */ SEC("?raw_tp") +__failure __msg("invalid read from stack") int invalid_read4(void *ctx) { struct bpf_dynptr ptr; @@ -557,6 +591,7 @@ int invalid_read4(void *ctx) /* Initializing a dynptr on an offset should fail */ SEC("?raw_tp") +__failure __msg("cannot pass in dynptr at an offset=0") int invalid_offset(void *ctx) { struct bpf_dynptr ptr; @@ -571,6 +606,7 @@ int invalid_offset(void *ctx) /* Can't release a dynptr twice */ SEC("?raw_tp") +__failure __msg("arg 1 is an unacquired reference") int release_twice(void *ctx) { struct bpf_dynptr ptr; @@ -594,9 +630,10 @@ static int release_twice_callback_fn(__u32 index, void *data) } /* Test that releasing a dynptr twice, where one of the releases happens - * within a calback function, fails + * within a callback function, fails */ SEC("?raw_tp") +__failure __msg("arg 1 is an unacquired reference") int release_twice_callback(void *ctx) { struct bpf_dynptr ptr; @@ -612,6 +649,7 @@ int release_twice_callback(void *ctx) /* Reject unsupported local mem types for dynptr_from_mem API */ SEC("?raw_tp") +__failure __msg("Unsupported reg type fp for bpf_dynptr_from_mem data") int dynptr_from_mem_invalid_api(void *ctx) { struct bpf_dynptr ptr; @@ -622,3 +660,435 @@ int dynptr_from_mem_invalid_api(void *ctx) return 0; } + +SEC("?tc") +__failure __msg("cannot overwrite referenced dynptr") __log_level(2) +int dynptr_pruning_overwrite(struct __sk_buff *ctx) +{ + asm volatile ( + "r9 = 0xeB9F; \ + r6 = %[ringbuf] ll; \ + r1 = r6; \ + r2 = 8; \ + r3 = 0; \ + r4 = r10; \ + r4 += -16; \ + call %[bpf_ringbuf_reserve_dynptr]; \ + if r0 == 0 goto pjmp1; \ + goto pjmp2; \ + pjmp1: \ + *(u64 *)(r10 - 16) = r9; \ + pjmp2: \ + r1 = r10; \ + r1 += -16; \ + r2 = 0; \ + call %[bpf_ringbuf_discard_dynptr]; " + : + : __imm(bpf_ringbuf_reserve_dynptr), + __imm(bpf_ringbuf_discard_dynptr), + __imm_addr(ringbuf) + : __clobber_all + ); + return 0; +} + +SEC("?tc") +__success __msg("12: safe") __log_level(2) +int dynptr_pruning_stacksafe(struct __sk_buff *ctx) +{ + asm volatile ( + "r9 = 0xeB9F; \ + r6 = %[ringbuf] ll; \ + r1 = r6; \ + r2 = 8; \ + r3 = 0; \ + r4 = r10; \ + r4 += -16; \ + call %[bpf_ringbuf_reserve_dynptr]; \ + if r0 == 0 goto stjmp1; \ + goto stjmp2; \ + stjmp1: \ + r9 = r9; \ + stjmp2: \ + r1 = r10; \ + r1 += -16; \ + r2 = 0; \ + call %[bpf_ringbuf_discard_dynptr]; " + : + : __imm(bpf_ringbuf_reserve_dynptr), + __imm(bpf_ringbuf_discard_dynptr), + __imm_addr(ringbuf) + : __clobber_all + ); + return 0; +} + +SEC("?tc") +__failure __msg("cannot overwrite referenced dynptr") __log_level(2) +int dynptr_pruning_type_confusion(struct __sk_buff *ctx) +{ + asm volatile ( + "r6 = %[array_map4] ll; \ + r7 = %[ringbuf] ll; \ + r1 = r6; \ + r2 = r10; \ + r2 += -8; \ + r9 = 0; \ + *(u64 *)(r2 + 0) = r9; \ + r3 = r10; \ + r3 += -24; \ + r9 = 0xeB9FeB9F; \ + *(u64 *)(r10 - 16) = r9; \ + *(u64 *)(r10 - 24) = r9; \ + r9 = 0; \ + r4 = 0; \ + r8 = r2; \ + call %[bpf_map_update_elem]; \ + r1 = r6; \ + r2 = r8; \ + call %[bpf_map_lookup_elem]; \ + if r0 != 0 goto tjmp1; \ + exit; \ + tjmp1: \ + r8 = r0; \ + r1 = r7; \ + r2 = 8; \ + r3 = 0; \ + r4 = r10; \ + r4 += -16; \ + r0 = *(u64 *)(r0 + 0); \ + call %[bpf_ringbuf_reserve_dynptr]; \ + if r0 == 0 goto tjmp2; \ + r8 = r8; \ + r8 = r8; \ + r8 = r8; \ + r8 = r8; \ + r8 = r8; \ + r8 = r8; \ + r8 = r8; \ + goto tjmp3; \ + tjmp2: \ + *(u64 *)(r10 - 8) = r9; \ + *(u64 *)(r10 - 16) = r9; \ + r1 = r8; \ + r1 += 8; \ + r2 = 0; \ + r3 = 0; \ + r4 = r10; \ + r4 += -16; \ + call %[bpf_dynptr_from_mem]; \ + tjmp3: \ + r1 = r10; \ + r1 += -16; \ + r2 = 0; \ + call %[bpf_ringbuf_discard_dynptr]; " + : + : __imm(bpf_map_update_elem), + __imm(bpf_map_lookup_elem), + __imm(bpf_ringbuf_reserve_dynptr), + __imm(bpf_dynptr_from_mem), + __imm(bpf_ringbuf_discard_dynptr), + __imm_addr(array_map4), + __imm_addr(ringbuf) + : __clobber_all + ); + return 0; +} + +SEC("?tc") +__failure __msg("dynptr has to be at a constant offset") __log_level(2) +int dynptr_var_off_overwrite(struct __sk_buff *ctx) +{ + asm volatile ( + "r9 = 16; \ + *(u32 *)(r10 - 4) = r9; \ + r8 = *(u32 *)(r10 - 4); \ + if r8 >= 0 goto vjmp1; \ + r0 = 1; \ + exit; \ + vjmp1: \ + if r8 <= 16 goto vjmp2; \ + r0 = 1; \ + exit; \ + vjmp2: \ + r8 &= 16; \ + r1 = %[ringbuf] ll; \ + r2 = 8; \ + r3 = 0; \ + r4 = r10; \ + r4 += -32; \ + r4 += r8; \ + call %[bpf_ringbuf_reserve_dynptr]; \ + r9 = 0xeB9F; \ + *(u64 *)(r10 - 16) = r9; \ + r1 = r10; \ + r1 += -32; \ + r1 += r8; \ + r2 = 0; \ + call %[bpf_ringbuf_discard_dynptr]; " + : + : __imm(bpf_ringbuf_reserve_dynptr), + __imm(bpf_ringbuf_discard_dynptr), + __imm_addr(ringbuf) + : __clobber_all + ); + return 0; +} + +SEC("?tc") +__failure __msg("cannot overwrite referenced dynptr") __log_level(2) +int dynptr_partial_slot_invalidate(struct __sk_buff *ctx) +{ + asm volatile ( + "r6 = %[ringbuf] ll; \ + r7 = %[array_map4] ll; \ + r1 = r7; \ + r2 = r10; \ + r2 += -8; \ + r9 = 0; \ + *(u64 *)(r2 + 0) = r9; \ + r3 = r2; \ + r4 = 0; \ + r8 = r2; \ + call %[bpf_map_update_elem]; \ + r1 = r7; \ + r2 = r8; \ + call %[bpf_map_lookup_elem]; \ + if r0 != 0 goto sjmp1; \ + exit; \ + sjmp1: \ + r7 = r0; \ + r1 = r6; \ + r2 = 8; \ + r3 = 0; \ + r4 = r10; \ + r4 += -24; \ + call %[bpf_ringbuf_reserve_dynptr]; \ + *(u64 *)(r10 - 16) = r9; \ + r1 = r7; \ + r2 = 8; \ + r3 = 0; \ + r4 = r10; \ + r4 += -16; \ + call %[bpf_dynptr_from_mem]; \ + r1 = r10; \ + r1 += -512; \ + r2 = 488; \ + r3 = r10; \ + r3 += -24; \ + r4 = 0; \ + r5 = 0; \ + call %[bpf_dynptr_read]; \ + r8 = 1; \ + if r0 != 0 goto sjmp2; \ + r8 = 0; \ + sjmp2: \ + r1 = r10; \ + r1 += -24; \ + r2 = 0; \ + call %[bpf_ringbuf_discard_dynptr]; " + : + : __imm(bpf_map_update_elem), + __imm(bpf_map_lookup_elem), + __imm(bpf_ringbuf_reserve_dynptr), + __imm(bpf_ringbuf_discard_dynptr), + __imm(bpf_dynptr_from_mem), + __imm(bpf_dynptr_read), + __imm_addr(ringbuf), + __imm_addr(array_map4) + : __clobber_all + ); + return 0; +} + +/* Test that it is allowed to overwrite unreferenced dynptr. */ +SEC("?raw_tp") +__success +int dynptr_overwrite_unref(void *ctx) +{ + struct bpf_dynptr ptr; + + if (get_map_val_dynptr(&ptr)) + return 0; + if (get_map_val_dynptr(&ptr)) + return 0; + if (get_map_val_dynptr(&ptr)) + return 0; + + return 0; +} + +/* Test that slices are invalidated on reinitializing a dynptr. */ +SEC("?raw_tp") +__failure __msg("invalid mem access 'scalar'") +int dynptr_invalidate_slice_reinit(void *ctx) +{ + struct bpf_dynptr ptr; + __u8 *p; + + if (get_map_val_dynptr(&ptr)) + return 0; + p = bpf_dynptr_data(&ptr, 0, 1); + if (!p) + return 0; + if (get_map_val_dynptr(&ptr)) + return 0; + /* this should fail */ + return *p; +} + +/* Invalidation of dynptr slices on destruction of dynptr should not miss + * mem_or_null pointers. + */ +SEC("?raw_tp") +__failure __msg("R1 type=scalar expected=percpu_ptr_") +int dynptr_invalidate_slice_or_null(void *ctx) +{ + struct bpf_dynptr ptr; + __u8 *p; + + if (get_map_val_dynptr(&ptr)) + return 0; + + p = bpf_dynptr_data(&ptr, 0, 1); + *(__u8 *)&ptr = 0; + /* this should fail */ + bpf_this_cpu_ptr(p); + return 0; +} + +/* Destruction of dynptr should also any slices obtained from it */ +SEC("?raw_tp") +__failure __msg("R7 invalid mem access 'scalar'") +int dynptr_invalidate_slice_failure(void *ctx) +{ + struct bpf_dynptr ptr1; + struct bpf_dynptr ptr2; + __u8 *p1, *p2; + + if (get_map_val_dynptr(&ptr1)) + return 0; + if (get_map_val_dynptr(&ptr2)) + return 0; + + p1 = bpf_dynptr_data(&ptr1, 0, 1); + if (!p1) + return 0; + p2 = bpf_dynptr_data(&ptr2, 0, 1); + if (!p2) + return 0; + + *(__u8 *)&ptr1 = 0; + /* this should fail */ + return *p1; +} + +/* Invalidation of slices should be scoped and should not prevent dereferencing + * slices of another dynptr after destroying unrelated dynptr + */ +SEC("?raw_tp") +__success +int dynptr_invalidate_slice_success(void *ctx) +{ + struct bpf_dynptr ptr1; + struct bpf_dynptr ptr2; + __u8 *p1, *p2; + + if (get_map_val_dynptr(&ptr1)) + return 1; + if (get_map_val_dynptr(&ptr2)) + return 1; + + p1 = bpf_dynptr_data(&ptr1, 0, 1); + if (!p1) + return 1; + p2 = bpf_dynptr_data(&ptr2, 0, 1); + if (!p2) + return 1; + + *(__u8 *)&ptr1 = 0; + return *p2; +} + +/* Overwriting referenced dynptr should be rejected */ +SEC("?raw_tp") +__failure __msg("cannot overwrite referenced dynptr") +int dynptr_overwrite_ref(void *ctx) +{ + struct bpf_dynptr ptr; + + bpf_ringbuf_reserve_dynptr(&ringbuf, 64, 0, &ptr); + /* this should fail */ + if (get_map_val_dynptr(&ptr)) + bpf_ringbuf_discard_dynptr(&ptr, 0); + return 0; +} + +/* Reject writes to dynptr slot from bpf_dynptr_read */ +SEC("?raw_tp") +__failure __msg("potential write to dynptr at off=-16") +int dynptr_read_into_slot(void *ctx) +{ + union { + struct { + char _pad[48]; + struct bpf_dynptr ptr; + }; + char buf[64]; + } data; + + bpf_ringbuf_reserve_dynptr(&ringbuf, 64, 0, &data.ptr); + /* this should fail */ + bpf_dynptr_read(data.buf, sizeof(data.buf), &data.ptr, 0, 0); + + return 0; +} + +/* Reject writes to dynptr slot for uninit arg */ +SEC("?raw_tp") +__failure __msg("potential write to dynptr at off=-16") +int uninit_write_into_slot(void *ctx) +{ + struct { + char buf[64]; + struct bpf_dynptr ptr; + } data; + + bpf_ringbuf_reserve_dynptr(&ringbuf, 80, 0, &data.ptr); + /* this should fail */ + bpf_get_current_comm(data.buf, 80); + + return 0; +} + +static int callback(__u32 index, void *data) +{ + *(__u32 *)data = 123; + + return 0; +} + +/* If the dynptr is written into in a callback function, its data + * slices should be invalidated as well. + */ +SEC("?raw_tp") +__failure __msg("invalid mem access 'scalar'") +int invalid_data_slices(void *ctx) +{ + struct bpf_dynptr ptr; + __u32 *slice; + + if (get_map_val_dynptr(&ptr)) + return 0; + + slice = bpf_dynptr_data(&ptr, 0, sizeof(__u32)); + if (!slice) + return 0; + + bpf_loop(10, callback, &ptr, 0); + + /* this should fail */ + *slice = 1; + + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/dynptr_success.c b/tools/testing/selftests/bpf/progs/dynptr_success.c index a3a6103c8569..35db7c6c1fc7 100644 --- a/tools/testing/selftests/bpf/progs/dynptr_success.c +++ b/tools/testing/selftests/bpf/progs/dynptr_success.c @@ -20,6 +20,7 @@ struct sample { struct { __uint(type, BPF_MAP_TYPE_RINGBUF); + __uint(max_entries, 4096); } ringbuf SEC(".maps"); struct { diff --git a/tools/testing/selftests/bpf/progs/empty_skb.c b/tools/testing/selftests/bpf/progs/empty_skb.c new file mode 100644 index 000000000000..4b0cd6753251 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/empty_skb.c @@ -0,0 +1,37 @@ +// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_endian.h> + +char _license[] SEC("license") = "GPL"; + +int ifindex; +int ret; + +SEC("lwt_xmit") +int redirect_ingress(struct __sk_buff *skb) +{ + ret = bpf_clone_redirect(skb, ifindex, BPF_F_INGRESS); + return 0; +} + +SEC("lwt_xmit") +int redirect_egress(struct __sk_buff *skb) +{ + ret = bpf_clone_redirect(skb, ifindex, 0); + return 0; +} + +SEC("tc") +int tc_redirect_ingress(struct __sk_buff *skb) +{ + ret = bpf_clone_redirect(skb, ifindex, BPF_F_INGRESS); + return 0; +} + +SEC("tc") +int tc_redirect_egress(struct __sk_buff *skb) +{ + ret = bpf_clone_redirect(skb, ifindex, 0); + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/fib_lookup.c b/tools/testing/selftests/bpf/progs/fib_lookup.c new file mode 100644 index 000000000000..c4514dd58c62 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/fib_lookup.c @@ -0,0 +1,22 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */ + +#include <linux/types.h> +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> +#include "bpf_tracing_net.h" + +struct bpf_fib_lookup fib_params = {}; +int fib_lookup_ret = 0; +int lookup_flags = 0; + +SEC("tc") +int fib_lookup(struct __sk_buff *skb) +{ + fib_lookup_ret = bpf_fib_lookup(skb, &fib_params, sizeof(fib_params), + lookup_flags); + + return TC_ACT_SHOT; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/freplace_progmap.c b/tools/testing/selftests/bpf/progs/freplace_progmap.c new file mode 100644 index 000000000000..81b56b9aa7d6 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/freplace_progmap.c @@ -0,0 +1,24 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> + +struct { + __uint(type, BPF_MAP_TYPE_CPUMAP); + __type(key, __u32); + __type(value, struct bpf_cpumap_val); + __uint(max_entries, 1); +} cpu_map SEC(".maps"); + +SEC("xdp/cpumap") +int xdp_drop_prog(struct xdp_md *ctx) +{ + return XDP_DROP; +} + +SEC("freplace") +int xdp_cpumap_prog(struct xdp_md *ctx) +{ + return bpf_redirect_map(&cpu_map, 0, XDP_PASS); +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/htab_reuse.c b/tools/testing/selftests/bpf/progs/htab_reuse.c new file mode 100644 index 000000000000..7f7368cb3095 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/htab_reuse.c @@ -0,0 +1,19 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (C) 2023. Huawei Technologies Co., Ltd */ +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> + +char _license[] SEC("license") = "GPL"; + +struct htab_val { + struct bpf_spin_lock lock; + unsigned int data; +}; + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(max_entries, 64); + __type(key, unsigned int); + __type(value, struct htab_val); + __uint(map_flags, BPF_F_NO_PREALLOC); +} htab SEC(".maps"); diff --git a/tools/testing/selftests/bpf/progs/jeq_infer_not_null_fail.c b/tools/testing/selftests/bpf/progs/jeq_infer_not_null_fail.c new file mode 100644 index 000000000000..f46965053acb --- /dev/null +++ b/tools/testing/selftests/bpf/progs/jeq_infer_not_null_fail.c @@ -0,0 +1,42 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include "vmlinux.h" +#include <bpf/bpf_helpers.h> +#include "bpf_misc.h" + +char _license[] SEC("license") = "GPL"; + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(max_entries, 1); + __type(key, u64); + __type(value, u64); +} m_hash SEC(".maps"); + +SEC("?raw_tp") +__failure __msg("R8 invalid mem access 'map_value_or_null") +int jeq_infer_not_null_ptr_to_btfid(void *ctx) +{ + struct bpf_map *map = (struct bpf_map *)&m_hash; + struct bpf_map *inner_map = map->inner_map_meta; + u64 key = 0, ret = 0, *val; + + val = bpf_map_lookup_elem(map, &key); + /* Do not mark ptr as non-null if one of them is + * PTR_TO_BTF_ID (R9), reject because of invalid + * access to map value (R8). + * + * Here, we need to inline those insns to access + * R8 directly, since compiler may use other reg + * once it figures out val==inner_map. + */ + asm volatile("r8 = %[val];\n" + "r9 = %[inner_map];\n" + "if r8 != r9 goto +1;\n" + "%[ret] = *(u64 *)(r8 +0);\n" + : [ret] "+r"(ret) + : [inner_map] "r"(inner_map), [val] "r"(val) + : "r8", "r9"); + + return ret; +} diff --git a/tools/testing/selftests/bpf/progs/jit_probe_mem.c b/tools/testing/selftests/bpf/progs/jit_probe_mem.c new file mode 100644 index 000000000000..2d2e61470794 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/jit_probe_mem.c @@ -0,0 +1,61 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */ +#include <vmlinux.h> +#include <bpf/bpf_tracing.h> +#include <bpf/bpf_helpers.h> + +static struct prog_test_ref_kfunc __kptr_ref *v; +long total_sum = -1; + +extern struct prog_test_ref_kfunc *bpf_kfunc_call_test_acquire(unsigned long *sp) __ksym; +extern void bpf_kfunc_call_test_release(struct prog_test_ref_kfunc *p) __ksym; + +SEC("tc") +int test_jit_probe_mem(struct __sk_buff *ctx) +{ + struct prog_test_ref_kfunc *p; + unsigned long zero = 0, sum; + + p = bpf_kfunc_call_test_acquire(&zero); + if (!p) + return 1; + + p = bpf_kptr_xchg(&v, p); + if (p) + goto release_out; + + /* Direct map value access of kptr, should be PTR_UNTRUSTED */ + p = v; + if (!p) + return 1; + + asm volatile ( + "r9 = %[p];" + "%[sum] = 0;" + + /* r8 = p->a */ + "r8 = *(u32 *)(r9 + 0);" + "%[sum] += r8;" + + /* r8 = p->b */ + "r8 = *(u32 *)(r9 + 4);" + "%[sum] += r8;" + + "r9 += 8;" + /* r9 = p->a */ + "r9 = *(u32 *)(r9 - 8);" + "%[sum] += r9;" + + : [sum] "=r"(sum) + : [p] "r"(p) + : "r8", "r9" + ); + + total_sum = sum; + return 0; +release_out: + bpf_kfunc_call_test_release(p); + return 1; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/kfunc_call_test.c b/tools/testing/selftests/bpf/progs/kfunc_call_test.c index f636e50be259..7daa8f5720b9 100644 --- a/tools/testing/selftests/bpf/progs/kfunc_call_test.c +++ b/tools/testing/selftests/bpf/progs/kfunc_call_test.c @@ -3,6 +3,7 @@ #include <vmlinux.h> #include <bpf/bpf_helpers.h> +extern long bpf_kfunc_call_test4(signed char a, short b, int c, long d) __ksym; extern int bpf_kfunc_call_test2(struct sock *sk, __u32 a, __u32 b) __ksym; extern __u64 bpf_kfunc_call_test1(struct sock *sk, __u32 a, __u64 b, __u32 c, __u64 d) __ksym; @@ -16,6 +17,24 @@ extern void bpf_kfunc_call_test_mem_len_pass1(void *mem, int len) __ksym; extern void bpf_kfunc_call_test_mem_len_fail2(__u64 *mem, int len) __ksym; extern int *bpf_kfunc_call_test_get_rdwr_mem(struct prog_test_ref_kfunc *p, const int rdwr_buf_size) __ksym; extern int *bpf_kfunc_call_test_get_rdonly_mem(struct prog_test_ref_kfunc *p, const int rdonly_buf_size) __ksym; +extern u32 bpf_kfunc_call_test_static_unused_arg(u32 arg, u32 unused) __ksym; + +SEC("tc") +int kfunc_call_test4(struct __sk_buff *skb) +{ + struct bpf_sock *sk = skb->sk; + long tmp; + + if (!sk) + return -1; + + sk = bpf_sk_fullsock(sk); + if (!sk) + return -1; + + tmp = bpf_kfunc_call_test4(-3, -30, -200, -1000); + return (tmp >> 32) + tmp; +} SEC("tc") int kfunc_call_test2(struct __sk_buff *skb) @@ -163,4 +182,14 @@ int kfunc_call_test_get_mem(struct __sk_buff *skb) return ret; } +SEC("tc") +int kfunc_call_test_static_unused_arg(struct __sk_buff *skb) +{ + + u32 expected = 5, actual; + + actual = bpf_kfunc_call_test_static_unused_arg(expected, 0xdeadbeef); + return actual != expected ? -1 : 0; +} + char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/kprobe_multi.c b/tools/testing/selftests/bpf/progs/kprobe_multi.c index 98c3399e15c0..9e1ca8e34913 100644 --- a/tools/testing/selftests/bpf/progs/kprobe_multi.c +++ b/tools/testing/selftests/bpf/progs/kprobe_multi.c @@ -110,3 +110,53 @@ int test_kretprobe_manual(struct pt_regs *ctx) kprobe_multi_check(ctx, true); return 0; } + +extern const void bpf_testmod_fentry_test1 __ksym; +extern const void bpf_testmod_fentry_test2 __ksym; +extern const void bpf_testmod_fentry_test3 __ksym; + +__u64 kprobe_testmod_test1_result = 0; +__u64 kprobe_testmod_test2_result = 0; +__u64 kprobe_testmod_test3_result = 0; + +__u64 kretprobe_testmod_test1_result = 0; +__u64 kretprobe_testmod_test2_result = 0; +__u64 kretprobe_testmod_test3_result = 0; + +static void kprobe_multi_testmod_check(void *ctx, bool is_return) +{ + if (bpf_get_current_pid_tgid() >> 32 != pid) + return; + + __u64 addr = bpf_get_func_ip(ctx); + + if (is_return) { + if ((const void *) addr == &bpf_testmod_fentry_test1) + kretprobe_testmod_test1_result = 1; + if ((const void *) addr == &bpf_testmod_fentry_test2) + kretprobe_testmod_test2_result = 1; + if ((const void *) addr == &bpf_testmod_fentry_test3) + kretprobe_testmod_test3_result = 1; + } else { + if ((const void *) addr == &bpf_testmod_fentry_test1) + kprobe_testmod_test1_result = 1; + if ((const void *) addr == &bpf_testmod_fentry_test2) + kprobe_testmod_test2_result = 1; + if ((const void *) addr == &bpf_testmod_fentry_test3) + kprobe_testmod_test3_result = 1; + } +} + +SEC("kprobe.multi") +int test_kprobe_testmod(struct pt_regs *ctx) +{ + kprobe_multi_testmod_check(ctx, false); + return 0; +} + +SEC("kretprobe.multi") +int test_kretprobe_testmod(struct pt_regs *ctx) +{ + kprobe_multi_testmod_check(ctx, true); + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/linked_list.c b/tools/testing/selftests/bpf/progs/linked_list.c new file mode 100644 index 000000000000..4fa4a9b01bde --- /dev/null +++ b/tools/testing/selftests/bpf/progs/linked_list.c @@ -0,0 +1,385 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <vmlinux.h> +#include <bpf/bpf_tracing.h> +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_core_read.h> +#include "bpf_experimental.h" + +#ifndef ARRAY_SIZE +#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) +#endif + +#include "linked_list.h" + +static __always_inline +int list_push_pop(struct bpf_spin_lock *lock, struct bpf_list_head *head, bool leave_in_map) +{ + struct bpf_list_node *n; + struct foo *f; + + f = bpf_obj_new(typeof(*f)); + if (!f) + return 2; + + bpf_spin_lock(lock); + n = bpf_list_pop_front(head); + bpf_spin_unlock(lock); + if (n) { + bpf_obj_drop(container_of(n, struct foo, node)); + bpf_obj_drop(f); + return 3; + } + + bpf_spin_lock(lock); + n = bpf_list_pop_back(head); + bpf_spin_unlock(lock); + if (n) { + bpf_obj_drop(container_of(n, struct foo, node)); + bpf_obj_drop(f); + return 4; + } + + + bpf_spin_lock(lock); + f->data = 42; + bpf_list_push_front(head, &f->node); + bpf_spin_unlock(lock); + if (leave_in_map) + return 0; + bpf_spin_lock(lock); + n = bpf_list_pop_back(head); + bpf_spin_unlock(lock); + if (!n) + return 5; + f = container_of(n, struct foo, node); + if (f->data != 42) { + bpf_obj_drop(f); + return 6; + } + + bpf_spin_lock(lock); + f->data = 13; + bpf_list_push_front(head, &f->node); + bpf_spin_unlock(lock); + bpf_spin_lock(lock); + n = bpf_list_pop_front(head); + bpf_spin_unlock(lock); + if (!n) + return 7; + f = container_of(n, struct foo, node); + if (f->data != 13) { + bpf_obj_drop(f); + return 8; + } + bpf_obj_drop(f); + + bpf_spin_lock(lock); + n = bpf_list_pop_front(head); + bpf_spin_unlock(lock); + if (n) { + bpf_obj_drop(container_of(n, struct foo, node)); + return 9; + } + + bpf_spin_lock(lock); + n = bpf_list_pop_back(head); + bpf_spin_unlock(lock); + if (n) { + bpf_obj_drop(container_of(n, struct foo, node)); + return 10; + } + return 0; +} + + +static __always_inline +int list_push_pop_multiple(struct bpf_spin_lock *lock, struct bpf_list_head *head, bool leave_in_map) +{ + struct bpf_list_node *n; + struct foo *f[8], *pf; + int i; + + /* Loop following this check adds nodes 2-at-a-time in order to + * validate multiple release_on_unlock release logic + */ + if (ARRAY_SIZE(f) % 2) + return 10; + + for (i = 0; i < ARRAY_SIZE(f); i += 2) { + f[i] = bpf_obj_new(typeof(**f)); + if (!f[i]) + return 2; + f[i]->data = i; + + f[i + 1] = bpf_obj_new(typeof(**f)); + if (!f[i + 1]) { + bpf_obj_drop(f[i]); + return 9; + } + f[i + 1]->data = i + 1; + + bpf_spin_lock(lock); + bpf_list_push_front(head, &f[i]->node); + bpf_list_push_front(head, &f[i + 1]->node); + bpf_spin_unlock(lock); + } + + for (i = 0; i < ARRAY_SIZE(f); i++) { + bpf_spin_lock(lock); + n = bpf_list_pop_front(head); + bpf_spin_unlock(lock); + if (!n) + return 3; + pf = container_of(n, struct foo, node); + if (pf->data != (ARRAY_SIZE(f) - i - 1)) { + bpf_obj_drop(pf); + return 4; + } + bpf_spin_lock(lock); + bpf_list_push_back(head, &pf->node); + bpf_spin_unlock(lock); + } + + if (leave_in_map) + return 0; + + for (i = 0; i < ARRAY_SIZE(f); i++) { + bpf_spin_lock(lock); + n = bpf_list_pop_back(head); + bpf_spin_unlock(lock); + if (!n) + return 5; + pf = container_of(n, struct foo, node); + if (pf->data != i) { + bpf_obj_drop(pf); + return 6; + } + bpf_obj_drop(pf); + } + bpf_spin_lock(lock); + n = bpf_list_pop_back(head); + bpf_spin_unlock(lock); + if (n) { + bpf_obj_drop(container_of(n, struct foo, node)); + return 7; + } + + bpf_spin_lock(lock); + n = bpf_list_pop_front(head); + bpf_spin_unlock(lock); + if (n) { + bpf_obj_drop(container_of(n, struct foo, node)); + return 8; + } + return 0; +} + +static __always_inline +int list_in_list(struct bpf_spin_lock *lock, struct bpf_list_head *head, bool leave_in_map) +{ + struct bpf_list_node *n; + struct bar *ba[8], *b; + struct foo *f; + int i; + + f = bpf_obj_new(typeof(*f)); + if (!f) + return 2; + for (i = 0; i < ARRAY_SIZE(ba); i++) { + b = bpf_obj_new(typeof(*b)); + if (!b) { + bpf_obj_drop(f); + return 3; + } + b->data = i; + bpf_spin_lock(&f->lock); + bpf_list_push_back(&f->head, &b->node); + bpf_spin_unlock(&f->lock); + } + + bpf_spin_lock(lock); + f->data = 42; + bpf_list_push_front(head, &f->node); + bpf_spin_unlock(lock); + + if (leave_in_map) + return 0; + + bpf_spin_lock(lock); + n = bpf_list_pop_front(head); + bpf_spin_unlock(lock); + if (!n) + return 4; + f = container_of(n, struct foo, node); + if (f->data != 42) { + bpf_obj_drop(f); + return 5; + } + + for (i = 0; i < ARRAY_SIZE(ba); i++) { + bpf_spin_lock(&f->lock); + n = bpf_list_pop_front(&f->head); + bpf_spin_unlock(&f->lock); + if (!n) { + bpf_obj_drop(f); + return 6; + } + b = container_of(n, struct bar, node); + if (b->data != i) { + bpf_obj_drop(f); + bpf_obj_drop(b); + return 7; + } + bpf_obj_drop(b); + } + bpf_spin_lock(&f->lock); + n = bpf_list_pop_front(&f->head); + bpf_spin_unlock(&f->lock); + if (n) { + bpf_obj_drop(f); + bpf_obj_drop(container_of(n, struct bar, node)); + return 8; + } + bpf_obj_drop(f); + return 0; +} + +static __always_inline +int test_list_push_pop(struct bpf_spin_lock *lock, struct bpf_list_head *head) +{ + int ret; + + ret = list_push_pop(lock, head, false); + if (ret) + return ret; + return list_push_pop(lock, head, true); +} + +static __always_inline +int test_list_push_pop_multiple(struct bpf_spin_lock *lock, struct bpf_list_head *head) +{ + int ret; + + ret = list_push_pop_multiple(lock, head, false); + if (ret) + return ret; + return list_push_pop_multiple(lock, head, true); +} + +static __always_inline +int test_list_in_list(struct bpf_spin_lock *lock, struct bpf_list_head *head) +{ + int ret; + + ret = list_in_list(lock, head, false); + if (ret) + return ret; + return list_in_list(lock, head, true); +} + +SEC("tc") +int map_list_push_pop(void *ctx) +{ + struct map_value *v; + + v = bpf_map_lookup_elem(&array_map, &(int){0}); + if (!v) + return 1; + return test_list_push_pop(&v->lock, &v->head); +} + +SEC("tc") +int inner_map_list_push_pop(void *ctx) +{ + struct map_value *v; + void *map; + + map = bpf_map_lookup_elem(&map_of_maps, &(int){0}); + if (!map) + return 1; + v = bpf_map_lookup_elem(map, &(int){0}); + if (!v) + return 1; + return test_list_push_pop(&v->lock, &v->head); +} + +SEC("tc") +int global_list_push_pop(void *ctx) +{ + return test_list_push_pop(&glock, &ghead); +} + +SEC("tc") +int map_list_push_pop_multiple(void *ctx) +{ + struct map_value *v; + int ret; + + v = bpf_map_lookup_elem(&array_map, &(int){0}); + if (!v) + return 1; + return test_list_push_pop_multiple(&v->lock, &v->head); +} + +SEC("tc") +int inner_map_list_push_pop_multiple(void *ctx) +{ + struct map_value *v; + void *map; + int ret; + + map = bpf_map_lookup_elem(&map_of_maps, &(int){0}); + if (!map) + return 1; + v = bpf_map_lookup_elem(map, &(int){0}); + if (!v) + return 1; + return test_list_push_pop_multiple(&v->lock, &v->head); +} + +SEC("tc") +int global_list_push_pop_multiple(void *ctx) +{ + int ret; + + ret = list_push_pop_multiple(&glock, &ghead, false); + if (ret) + return ret; + return list_push_pop_multiple(&glock, &ghead, true); +} + +SEC("tc") +int map_list_in_list(void *ctx) +{ + struct map_value *v; + int ret; + + v = bpf_map_lookup_elem(&array_map, &(int){0}); + if (!v) + return 1; + return test_list_in_list(&v->lock, &v->head); +} + +SEC("tc") +int inner_map_list_in_list(void *ctx) +{ + struct map_value *v; + void *map; + int ret; + + map = bpf_map_lookup_elem(&map_of_maps, &(int){0}); + if (!map) + return 1; + v = bpf_map_lookup_elem(map, &(int){0}); + if (!v) + return 1; + return test_list_in_list(&v->lock, &v->head); +} + +SEC("tc") +int global_list_in_list(void *ctx) +{ + return test_list_in_list(&glock, &ghead); +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/linked_list.h b/tools/testing/selftests/bpf/progs/linked_list.h new file mode 100644 index 000000000000..3fb2412552fc --- /dev/null +++ b/tools/testing/selftests/bpf/progs/linked_list.h @@ -0,0 +1,56 @@ +// SPDX-License-Identifier: GPL-2.0 +#ifndef LINKED_LIST_H +#define LINKED_LIST_H + +#include <vmlinux.h> +#include <bpf/bpf_helpers.h> +#include "bpf_experimental.h" + +struct bar { + struct bpf_list_node node; + int data; +}; + +struct foo { + struct bpf_list_node node; + struct bpf_list_head head __contains(bar, node); + struct bpf_spin_lock lock; + int data; + struct bpf_list_node node2; +}; + +struct map_value { + struct bpf_spin_lock lock; + int data; + struct bpf_list_head head __contains(foo, node); +}; + +struct array_map { + __uint(type, BPF_MAP_TYPE_ARRAY); + __type(key, int); + __type(value, struct map_value); + __uint(max_entries, 1); +}; + +struct array_map array_map SEC(".maps"); +struct array_map inner_map SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY_OF_MAPS); + __uint(max_entries, 1); + __type(key, int); + __type(value, int); + __array(values, struct array_map); +} map_of_maps SEC(".maps") = { + .values = { + [0] = &inner_map, + }, +}; + +#define private(name) SEC(".bss." #name) __hidden __attribute__((aligned(8))) + +private(A) struct bpf_spin_lock glock; +private(A) struct bpf_list_head ghead __contains(foo, node); +private(B) struct bpf_spin_lock glock2; + +#endif diff --git a/tools/testing/selftests/bpf/progs/linked_list_fail.c b/tools/testing/selftests/bpf/progs/linked_list_fail.c new file mode 100644 index 000000000000..69cdc07cba13 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/linked_list_fail.c @@ -0,0 +1,605 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <vmlinux.h> +#include <bpf/bpf_tracing.h> +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_core_read.h> +#include "bpf_experimental.h" + +#include "linked_list.h" + +#define INIT \ + struct map_value *v, *v2, *iv, *iv2; \ + struct foo *f, *f1, *f2; \ + struct bar *b; \ + void *map; \ + \ + map = bpf_map_lookup_elem(&map_of_maps, &(int){ 0 }); \ + if (!map) \ + return 0; \ + v = bpf_map_lookup_elem(&array_map, &(int){ 0 }); \ + if (!v) \ + return 0; \ + v2 = bpf_map_lookup_elem(&array_map, &(int){ 0 }); \ + if (!v2) \ + return 0; \ + iv = bpf_map_lookup_elem(map, &(int){ 0 }); \ + if (!iv) \ + return 0; \ + iv2 = bpf_map_lookup_elem(map, &(int){ 0 }); \ + if (!iv2) \ + return 0; \ + f = bpf_obj_new(typeof(*f)); \ + if (!f) \ + return 0; \ + f1 = f; \ + f2 = bpf_obj_new(typeof(*f2)); \ + if (!f2) { \ + bpf_obj_drop(f1); \ + return 0; \ + } \ + b = bpf_obj_new(typeof(*b)); \ + if (!b) { \ + bpf_obj_drop(f2); \ + bpf_obj_drop(f1); \ + return 0; \ + } + +#define CHECK(test, op, hexpr) \ + SEC("?tc") \ + int test##_missing_lock_##op(void *ctx) \ + { \ + INIT; \ + void (*p)(void *) = (void *)&bpf_list_##op; \ + p(hexpr); \ + return 0; \ + } + +CHECK(kptr, pop_front, &f->head); +CHECK(kptr, pop_back, &f->head); + +CHECK(global, pop_front, &ghead); +CHECK(global, pop_back, &ghead); + +CHECK(map, pop_front, &v->head); +CHECK(map, pop_back, &v->head); + +CHECK(inner_map, pop_front, &iv->head); +CHECK(inner_map, pop_back, &iv->head); + +#undef CHECK + +#define CHECK(test, op, hexpr, nexpr) \ + SEC("?tc") \ + int test##_missing_lock_##op(void *ctx) \ + { \ + INIT; \ + void (*p)(void *, void *) = (void *)&bpf_list_##op; \ + p(hexpr, nexpr); \ + return 0; \ + } + +CHECK(kptr, push_front, &f->head, b); +CHECK(kptr, push_back, &f->head, b); + +CHECK(global, push_front, &ghead, f); +CHECK(global, push_back, &ghead, f); + +CHECK(map, push_front, &v->head, f); +CHECK(map, push_back, &v->head, f); + +CHECK(inner_map, push_front, &iv->head, f); +CHECK(inner_map, push_back, &iv->head, f); + +#undef CHECK + +#define CHECK(test, op, lexpr, hexpr) \ + SEC("?tc") \ + int test##_incorrect_lock_##op(void *ctx) \ + { \ + INIT; \ + void (*p)(void *) = (void *)&bpf_list_##op; \ + bpf_spin_lock(lexpr); \ + p(hexpr); \ + return 0; \ + } + +#define CHECK_OP(op) \ + CHECK(kptr_kptr, op, &f1->lock, &f2->head); \ + CHECK(kptr_global, op, &f1->lock, &ghead); \ + CHECK(kptr_map, op, &f1->lock, &v->head); \ + CHECK(kptr_inner_map, op, &f1->lock, &iv->head); \ + \ + CHECK(global_global, op, &glock2, &ghead); \ + CHECK(global_kptr, op, &glock, &f1->head); \ + CHECK(global_map, op, &glock, &v->head); \ + CHECK(global_inner_map, op, &glock, &iv->head); \ + \ + CHECK(map_map, op, &v->lock, &v2->head); \ + CHECK(map_kptr, op, &v->lock, &f2->head); \ + CHECK(map_global, op, &v->lock, &ghead); \ + CHECK(map_inner_map, op, &v->lock, &iv->head); \ + \ + CHECK(inner_map_inner_map, op, &iv->lock, &iv2->head); \ + CHECK(inner_map_kptr, op, &iv->lock, &f2->head); \ + CHECK(inner_map_global, op, &iv->lock, &ghead); \ + CHECK(inner_map_map, op, &iv->lock, &v->head); + +CHECK_OP(pop_front); +CHECK_OP(pop_back); + +#undef CHECK +#undef CHECK_OP + +#define CHECK(test, op, lexpr, hexpr, nexpr) \ + SEC("?tc") \ + int test##_incorrect_lock_##op(void *ctx) \ + { \ + INIT; \ + void (*p)(void *, void*) = (void *)&bpf_list_##op; \ + bpf_spin_lock(lexpr); \ + p(hexpr, nexpr); \ + return 0; \ + } + +#define CHECK_OP(op) \ + CHECK(kptr_kptr, op, &f1->lock, &f2->head, b); \ + CHECK(kptr_global, op, &f1->lock, &ghead, f); \ + CHECK(kptr_map, op, &f1->lock, &v->head, f); \ + CHECK(kptr_inner_map, op, &f1->lock, &iv->head, f); \ + \ + CHECK(global_global, op, &glock2, &ghead, f); \ + CHECK(global_kptr, op, &glock, &f1->head, b); \ + CHECK(global_map, op, &glock, &v->head, f); \ + CHECK(global_inner_map, op, &glock, &iv->head, f); \ + \ + CHECK(map_map, op, &v->lock, &v2->head, f); \ + CHECK(map_kptr, op, &v->lock, &f2->head, b); \ + CHECK(map_global, op, &v->lock, &ghead, f); \ + CHECK(map_inner_map, op, &v->lock, &iv->head, f); \ + \ + CHECK(inner_map_inner_map, op, &iv->lock, &iv2->head, f); \ + CHECK(inner_map_kptr, op, &iv->lock, &f2->head, b); \ + CHECK(inner_map_global, op, &iv->lock, &ghead, f); \ + CHECK(inner_map_map, op, &iv->lock, &v->head, f); + +CHECK_OP(push_front); +CHECK_OP(push_back); + +#undef CHECK +#undef CHECK_OP +#undef INIT + +SEC("?kprobe/xyz") +int map_compat_kprobe(void *ctx) +{ + bpf_list_push_front(&ghead, NULL); + return 0; +} + +SEC("?kretprobe/xyz") +int map_compat_kretprobe(void *ctx) +{ + bpf_list_push_front(&ghead, NULL); + return 0; +} + +SEC("?tracepoint/xyz") +int map_compat_tp(void *ctx) +{ + bpf_list_push_front(&ghead, NULL); + return 0; +} + +SEC("?perf_event") +int map_compat_perf(void *ctx) +{ + bpf_list_push_front(&ghead, NULL); + return 0; +} + +SEC("?raw_tp/xyz") +int map_compat_raw_tp(void *ctx) +{ + bpf_list_push_front(&ghead, NULL); + return 0; +} + +SEC("?raw_tp.w/xyz") +int map_compat_raw_tp_w(void *ctx) +{ + bpf_list_push_front(&ghead, NULL); + return 0; +} + +SEC("?tc") +int obj_type_id_oor(void *ctx) +{ + bpf_obj_new_impl(~0UL, NULL); + return 0; +} + +SEC("?tc") +int obj_new_no_composite(void *ctx) +{ + bpf_obj_new_impl(bpf_core_type_id_local(int), (void *)42); + return 0; +} + +SEC("?tc") +int obj_new_no_struct(void *ctx) +{ + + bpf_obj_new(union { int data; unsigned udata; }); + return 0; +} + +SEC("?tc") +int obj_drop_non_zero_off(void *ctx) +{ + void *f; + + f = bpf_obj_new(struct foo); + if (!f) + return 0; + bpf_obj_drop(f+1); + return 0; +} + +SEC("?tc") +int new_null_ret(void *ctx) +{ + return bpf_obj_new(struct foo)->data; +} + +SEC("?tc") +int obj_new_acq(void *ctx) +{ + bpf_obj_new(struct foo); + return 0; +} + +SEC("?tc") +int use_after_drop(void *ctx) +{ + struct foo *f; + + f = bpf_obj_new(typeof(*f)); + if (!f) + return 0; + bpf_obj_drop(f); + return f->data; +} + +SEC("?tc") +int ptr_walk_scalar(void *ctx) +{ + struct test1 { + struct test2 { + struct test2 *next; + } *ptr; + } *p; + + p = bpf_obj_new(typeof(*p)); + if (!p) + return 0; + bpf_this_cpu_ptr(p->ptr); + return 0; +} + +SEC("?tc") +int direct_read_lock(void *ctx) +{ + struct foo *f; + + f = bpf_obj_new(typeof(*f)); + if (!f) + return 0; + return *(int *)&f->lock; +} + +SEC("?tc") +int direct_write_lock(void *ctx) +{ + struct foo *f; + + f = bpf_obj_new(typeof(*f)); + if (!f) + return 0; + *(int *)&f->lock = 0; + return 0; +} + +SEC("?tc") +int direct_read_head(void *ctx) +{ + struct foo *f; + + f = bpf_obj_new(typeof(*f)); + if (!f) + return 0; + return *(int *)&f->head; +} + +SEC("?tc") +int direct_write_head(void *ctx) +{ + struct foo *f; + + f = bpf_obj_new(typeof(*f)); + if (!f) + return 0; + *(int *)&f->head = 0; + return 0; +} + +SEC("?tc") +int direct_read_node(void *ctx) +{ + struct foo *f; + + f = bpf_obj_new(typeof(*f)); + if (!f) + return 0; + return *(int *)&f->node; +} + +SEC("?tc") +int direct_write_node(void *ctx) +{ + struct foo *f; + + f = bpf_obj_new(typeof(*f)); + if (!f) + return 0; + *(int *)&f->node = 0; + return 0; +} + +static __always_inline +int use_after_unlock(void (*op)(void *head, void *node)) +{ + struct foo *f; + + f = bpf_obj_new(typeof(*f)); + if (!f) + return 0; + bpf_spin_lock(&glock); + f->data = 42; + op(&ghead, &f->node); + bpf_spin_unlock(&glock); + + return f->data; +} + +SEC("?tc") +int use_after_unlock_push_front(void *ctx) +{ + return use_after_unlock((void *)bpf_list_push_front); +} + +SEC("?tc") +int use_after_unlock_push_back(void *ctx) +{ + return use_after_unlock((void *)bpf_list_push_back); +} + +static __always_inline +int list_double_add(void (*op)(void *head, void *node)) +{ + struct foo *f; + + f = bpf_obj_new(typeof(*f)); + if (!f) + return 0; + bpf_spin_lock(&glock); + op(&ghead, &f->node); + op(&ghead, &f->node); + bpf_spin_unlock(&glock); + + return 0; +} + +SEC("?tc") +int double_push_front(void *ctx) +{ + return list_double_add((void *)bpf_list_push_front); +} + +SEC("?tc") +int double_push_back(void *ctx) +{ + return list_double_add((void *)bpf_list_push_back); +} + +SEC("?tc") +int no_node_value_type(void *ctx) +{ + void *p; + + p = bpf_obj_new(struct { int data; }); + if (!p) + return 0; + bpf_spin_lock(&glock); + bpf_list_push_front(&ghead, p); + bpf_spin_unlock(&glock); + + return 0; +} + +SEC("?tc") +int incorrect_value_type(void *ctx) +{ + struct bar *b; + + b = bpf_obj_new(typeof(*b)); + if (!b) + return 0; + bpf_spin_lock(&glock); + bpf_list_push_front(&ghead, &b->node); + bpf_spin_unlock(&glock); + + return 0; +} + +SEC("?tc") +int incorrect_node_var_off(struct __sk_buff *ctx) +{ + struct foo *f; + + f = bpf_obj_new(typeof(*f)); + if (!f) + return 0; + bpf_spin_lock(&glock); + bpf_list_push_front(&ghead, (void *)&f->node + ctx->protocol); + bpf_spin_unlock(&glock); + + return 0; +} + +SEC("?tc") +int incorrect_node_off1(void *ctx) +{ + struct foo *f; + + f = bpf_obj_new(typeof(*f)); + if (!f) + return 0; + bpf_spin_lock(&glock); + bpf_list_push_front(&ghead, (void *)&f->node + 1); + bpf_spin_unlock(&glock); + + return 0; +} + +SEC("?tc") +int incorrect_node_off2(void *ctx) +{ + struct foo *f; + + f = bpf_obj_new(typeof(*f)); + if (!f) + return 0; + bpf_spin_lock(&glock); + bpf_list_push_front(&ghead, &f->node2); + bpf_spin_unlock(&glock); + + return 0; +} + +SEC("?tc") +int no_head_type(void *ctx) +{ + void *p; + + p = bpf_obj_new(typeof(struct { int data; })); + if (!p) + return 0; + bpf_spin_lock(&glock); + bpf_list_push_front(p, NULL); + bpf_spin_lock(&glock); + + return 0; +} + +SEC("?tc") +int incorrect_head_var_off1(struct __sk_buff *ctx) +{ + struct foo *f; + + f = bpf_obj_new(typeof(*f)); + if (!f) + return 0; + bpf_spin_lock(&glock); + bpf_list_push_front((void *)&ghead + ctx->protocol, &f->node); + bpf_spin_unlock(&glock); + + return 0; +} + +SEC("?tc") +int incorrect_head_var_off2(struct __sk_buff *ctx) +{ + struct foo *f; + + f = bpf_obj_new(typeof(*f)); + if (!f) + return 0; + bpf_spin_lock(&glock); + bpf_list_push_front((void *)&f->head + ctx->protocol, &f->node); + bpf_spin_unlock(&glock); + + return 0; +} + +SEC("?tc") +int incorrect_head_off1(void *ctx) +{ + struct foo *f; + struct bar *b; + + f = bpf_obj_new(typeof(*f)); + if (!f) + return 0; + b = bpf_obj_new(typeof(*b)); + if (!b) { + bpf_obj_drop(f); + return 0; + } + + bpf_spin_lock(&f->lock); + bpf_list_push_front((void *)&f->head + 1, &b->node); + bpf_spin_unlock(&f->lock); + + return 0; +} + +SEC("?tc") +int incorrect_head_off2(void *ctx) +{ + struct foo *f; + struct bar *b; + + f = bpf_obj_new(typeof(*f)); + if (!f) + return 0; + + bpf_spin_lock(&glock); + bpf_list_push_front((void *)&ghead + 1, &f->node); + bpf_spin_unlock(&glock); + + return 0; +} + +static __always_inline +int pop_ptr_off(void *(*op)(void *head)) +{ + struct { + struct bpf_list_head head __contains(foo, node2); + struct bpf_spin_lock lock; + } *p; + struct bpf_list_node *n; + + p = bpf_obj_new(typeof(*p)); + if (!p) + return 0; + bpf_spin_lock(&p->lock); + n = op(&p->head); + bpf_spin_unlock(&p->lock); + + bpf_this_cpu_ptr(n); + return 0; +} + +SEC("?tc") +int pop_front_off(void *ctx) +{ + return pop_ptr_off((void *)bpf_list_pop_front); +} + +SEC("?tc") +int pop_back_off(void *ctx) +{ + return pop_ptr_off((void *)bpf_list_pop_back); +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/lsm.c b/tools/testing/selftests/bpf/progs/lsm.c index d8d8af623bc2..dc93887ed34c 100644 --- a/tools/testing/selftests/bpf/progs/lsm.c +++ b/tools/testing/selftests/bpf/progs/lsm.c @@ -6,9 +6,10 @@ #include "bpf_misc.h" #include "vmlinux.h" +#include <bpf/bpf_core_read.h> #include <bpf/bpf_helpers.h> #include <bpf/bpf_tracing.h> -#include <errno.h> +#include <errno.h> struct { __uint(type, BPF_MAP_TYPE_ARRAY); @@ -164,8 +165,8 @@ int copy_test = 0; SEC("fentry.s/" SYS_PREFIX "sys_setdomainname") int BPF_PROG(test_sys_setdomainname, struct pt_regs *regs) { - void *ptr = (void *)PT_REGS_PARM1(regs); - int len = PT_REGS_PARM2(regs); + void *ptr = (void *)PT_REGS_PARM1_SYSCALL(regs); + int len = PT_REGS_PARM2_SYSCALL(regs); int buf = 0; long ret; diff --git a/tools/testing/selftests/bpf/progs/lsm_cgroup.c b/tools/testing/selftests/bpf/progs/lsm_cgroup.c index 4f2d60b87b75..02c11d16b692 100644 --- a/tools/testing/selftests/bpf/progs/lsm_cgroup.c +++ b/tools/testing/selftests/bpf/progs/lsm_cgroup.c @@ -7,6 +7,10 @@ char _license[] SEC("license") = "GPL"; +extern bool CONFIG_SECURITY_SELINUX __kconfig __weak; +extern bool CONFIG_SECURITY_SMACK __kconfig __weak; +extern bool CONFIG_SECURITY_APPARMOR __kconfig __weak; + #ifndef AF_PACKET #define AF_PACKET 17 #endif @@ -140,6 +144,10 @@ SEC("lsm_cgroup/sk_alloc_security") int BPF_PROG(socket_alloc, struct sock *sk, int family, gfp_t priority) { called_socket_alloc++; + /* if already have non-bpf lsms installed, EPERM will cause memory leak of non-bpf lsms */ + if (CONFIG_SECURITY_SELINUX || CONFIG_SECURITY_SMACK || CONFIG_SECURITY_APPARMOR) + return 1; + if (family == AF_UNIX) return 0; /* EPERM */ diff --git a/tools/testing/selftests/bpf/progs/map_kptr.c b/tools/testing/selftests/bpf/progs/map_kptr.c index eb8217803493..228ec45365a8 100644 --- a/tools/testing/selftests/bpf/progs/map_kptr.c +++ b/tools/testing/selftests/bpf/progs/map_kptr.c @@ -62,21 +62,23 @@ extern struct prog_test_ref_kfunc * bpf_kfunc_call_test_kptr_get(struct prog_test_ref_kfunc **p, int a, int b) __ksym; extern void bpf_kfunc_call_test_release(struct prog_test_ref_kfunc *p) __ksym; +#define WRITE_ONCE(x, val) ((*(volatile typeof(x) *) &(x)) = (val)) + static void test_kptr_unref(struct map_value *v) { struct prog_test_ref_kfunc *p; p = v->unref_ptr; /* store untrusted_ptr_or_null_ */ - v->unref_ptr = p; + WRITE_ONCE(v->unref_ptr, p); if (!p) return; if (p->a + p->b > 100) return; /* store untrusted_ptr_ */ - v->unref_ptr = p; + WRITE_ONCE(v->unref_ptr, p); /* store NULL */ - v->unref_ptr = NULL; + WRITE_ONCE(v->unref_ptr, NULL); } static void test_kptr_ref(struct map_value *v) @@ -85,7 +87,7 @@ static void test_kptr_ref(struct map_value *v) p = v->ref_ptr; /* store ptr_or_null_ */ - v->unref_ptr = p; + WRITE_ONCE(v->unref_ptr, p); if (!p) return; if (p->a + p->b > 100) @@ -99,7 +101,7 @@ static void test_kptr_ref(struct map_value *v) return; } /* store ptr_ */ - v->unref_ptr = p; + WRITE_ONCE(v->unref_ptr, p); bpf_kfunc_call_test_release(p); p = bpf_kfunc_call_test_acquire(&(unsigned long){0}); diff --git a/tools/testing/selftests/bpf/progs/map_kptr_fail.c b/tools/testing/selftests/bpf/progs/map_kptr_fail.c index 05e209b1b12a..760e41e1a632 100644 --- a/tools/testing/selftests/bpf/progs/map_kptr_fail.c +++ b/tools/testing/selftests/bpf/progs/map_kptr_fail.c @@ -3,6 +3,7 @@ #include <bpf/bpf_tracing.h> #include <bpf/bpf_helpers.h> #include <bpf/bpf_core_read.h> +#include "bpf_misc.h" struct map_value { char buf[8]; @@ -23,6 +24,7 @@ extern struct prog_test_ref_kfunc * bpf_kfunc_call_test_kptr_get(struct prog_test_ref_kfunc **p, int a, int b) __ksym; SEC("?tc") +__failure __msg("kptr access size must be BPF_DW") int size_not_bpf_dw(struct __sk_buff *ctx) { struct map_value *v; @@ -37,6 +39,7 @@ int size_not_bpf_dw(struct __sk_buff *ctx) } SEC("?tc") +__failure __msg("kptr access cannot have variable offset") int non_const_var_off(struct __sk_buff *ctx) { struct map_value *v; @@ -55,6 +58,7 @@ int non_const_var_off(struct __sk_buff *ctx) } SEC("?tc") +__failure __msg("R1 doesn't have constant offset. kptr has to be") int non_const_var_off_kptr_xchg(struct __sk_buff *ctx) { struct map_value *v; @@ -73,6 +77,7 @@ int non_const_var_off_kptr_xchg(struct __sk_buff *ctx) } SEC("?tc") +__failure __msg("kptr access misaligned expected=8 off=7") int misaligned_access_write(struct __sk_buff *ctx) { struct map_value *v; @@ -88,6 +93,7 @@ int misaligned_access_write(struct __sk_buff *ctx) } SEC("?tc") +__failure __msg("kptr access misaligned expected=8 off=1") int misaligned_access_read(struct __sk_buff *ctx) { struct map_value *v; @@ -101,6 +107,7 @@ int misaligned_access_read(struct __sk_buff *ctx) } SEC("?tc") +__failure __msg("variable untrusted_ptr_ access var_off=(0x0; 0x1e0)") int reject_var_off_store(struct __sk_buff *ctx) { struct prog_test_ref_kfunc *unref_ptr; @@ -124,6 +131,7 @@ int reject_var_off_store(struct __sk_buff *ctx) } SEC("?tc") +__failure __msg("invalid kptr access, R1 type=untrusted_ptr_prog_test_ref_kfunc") int reject_bad_type_match(struct __sk_buff *ctx) { struct prog_test_ref_kfunc *unref_ptr; @@ -144,6 +152,7 @@ int reject_bad_type_match(struct __sk_buff *ctx) } SEC("?tc") +__failure __msg("R1 type=untrusted_ptr_or_null_ expected=percpu_ptr_") int marked_as_untrusted_or_null(struct __sk_buff *ctx) { struct map_value *v; @@ -158,6 +167,7 @@ int marked_as_untrusted_or_null(struct __sk_buff *ctx) } SEC("?tc") +__failure __msg("access beyond struct prog_test_ref_kfunc at off 32 size 4") int correct_btf_id_check_size(struct __sk_buff *ctx) { struct prog_test_ref_kfunc *p; @@ -175,6 +185,7 @@ int correct_btf_id_check_size(struct __sk_buff *ctx) } SEC("?tc") +__failure __msg("R1 type=untrusted_ptr_ expected=percpu_ptr_") int inherit_untrusted_on_walk(struct __sk_buff *ctx) { struct prog_test_ref_kfunc *unref_ptr; @@ -194,6 +205,7 @@ int inherit_untrusted_on_walk(struct __sk_buff *ctx) } SEC("?tc") +__failure __msg("off=8 kptr isn't referenced kptr") int reject_kptr_xchg_on_unref(struct __sk_buff *ctx) { struct map_value *v; @@ -208,6 +220,7 @@ int reject_kptr_xchg_on_unref(struct __sk_buff *ctx) } SEC("?tc") +__failure __msg("arg#0 expected pointer to map value") int reject_kptr_get_no_map_val(struct __sk_buff *ctx) { bpf_kfunc_call_test_kptr_get((void *)&ctx, 0, 0); @@ -215,6 +228,7 @@ int reject_kptr_get_no_map_val(struct __sk_buff *ctx) } SEC("?tc") +__failure __msg("arg#0 expected pointer to map value") int reject_kptr_get_no_null_map_val(struct __sk_buff *ctx) { bpf_kfunc_call_test_kptr_get(bpf_map_lookup_elem(&array_map, &(int){0}), 0, 0); @@ -222,6 +236,7 @@ int reject_kptr_get_no_null_map_val(struct __sk_buff *ctx) } SEC("?tc") +__failure __msg("arg#0 no referenced kptr at map value offset=0") int reject_kptr_get_no_kptr(struct __sk_buff *ctx) { struct map_value *v; @@ -236,6 +251,7 @@ int reject_kptr_get_no_kptr(struct __sk_buff *ctx) } SEC("?tc") +__failure __msg("arg#0 no referenced kptr at map value offset=8") int reject_kptr_get_on_unref(struct __sk_buff *ctx) { struct map_value *v; @@ -250,6 +266,7 @@ int reject_kptr_get_on_unref(struct __sk_buff *ctx) } SEC("?tc") +__failure __msg("kernel function bpf_kfunc_call_test_kptr_get args#0") int reject_kptr_get_bad_type_match(struct __sk_buff *ctx) { struct map_value *v; @@ -264,6 +281,7 @@ int reject_kptr_get_bad_type_match(struct __sk_buff *ctx) } SEC("?tc") +__failure __msg("R1 type=untrusted_ptr_or_null_ expected=percpu_ptr_") int mark_ref_as_untrusted_or_null(struct __sk_buff *ctx) { struct map_value *v; @@ -278,6 +296,7 @@ int mark_ref_as_untrusted_or_null(struct __sk_buff *ctx) } SEC("?tc") +__failure __msg("store to referenced kptr disallowed") int reject_untrusted_store_to_ref(struct __sk_buff *ctx) { struct prog_test_ref_kfunc *p; @@ -297,6 +316,7 @@ int reject_untrusted_store_to_ref(struct __sk_buff *ctx) } SEC("?tc") +__failure __msg("R2 type=untrusted_ptr_ expected=ptr_") int reject_untrusted_xchg(struct __sk_buff *ctx) { struct prog_test_ref_kfunc *p; @@ -315,6 +335,8 @@ int reject_untrusted_xchg(struct __sk_buff *ctx) } SEC("?tc") +__failure +__msg("invalid kptr access, R2 type=ptr_prog_test_ref_kfunc expected=ptr_prog_test_member") int reject_bad_type_xchg(struct __sk_buff *ctx) { struct prog_test_ref_kfunc *ref_ptr; @@ -333,6 +355,7 @@ int reject_bad_type_xchg(struct __sk_buff *ctx) } SEC("?tc") +__failure __msg("invalid kptr access, R2 type=ptr_prog_test_ref_kfunc") int reject_member_of_ref_xchg(struct __sk_buff *ctx) { struct prog_test_ref_kfunc *ref_ptr; @@ -351,6 +374,7 @@ int reject_member_of_ref_xchg(struct __sk_buff *ctx) } SEC("?syscall") +__failure __msg("kptr cannot be accessed indirectly by helper") int reject_indirect_helper_access(struct __sk_buff *ctx) { struct map_value *v; @@ -371,6 +395,7 @@ int write_func(int *p) } SEC("?tc") +__failure __msg("kptr cannot be accessed indirectly by helper") int reject_indirect_global_func_access(struct __sk_buff *ctx) { struct map_value *v; @@ -384,6 +409,7 @@ int reject_indirect_global_func_access(struct __sk_buff *ctx) } SEC("?tc") +__failure __msg("Unreleased reference id=5 alloc_insn=") int kptr_xchg_ref_state(struct __sk_buff *ctx) { struct prog_test_ref_kfunc *p; @@ -402,6 +428,7 @@ int kptr_xchg_ref_state(struct __sk_buff *ctx) } SEC("?tc") +__failure __msg("Unreleased reference id=3 alloc_insn=") int kptr_get_ref_state(struct __sk_buff *ctx) { struct map_value *v; diff --git a/tools/testing/selftests/bpf/progs/nested_trust_common.h b/tools/testing/selftests/bpf/progs/nested_trust_common.h new file mode 100644 index 000000000000..83d33931136e --- /dev/null +++ b/tools/testing/selftests/bpf/progs/nested_trust_common.h @@ -0,0 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */ + +#ifndef _NESTED_TRUST_COMMON_H +#define _NESTED_TRUST_COMMON_H + +#include <stdbool.h> + +bool bpf_cpumask_test_cpu(unsigned int cpu, const struct cpumask *cpumask) __ksym; +bool bpf_cpumask_first_zero(const struct cpumask *cpumask) __ksym; + +#endif /* _NESTED_TRUST_COMMON_H */ diff --git a/tools/testing/selftests/bpf/progs/nested_trust_failure.c b/tools/testing/selftests/bpf/progs/nested_trust_failure.c new file mode 100644 index 000000000000..14aff7676436 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/nested_trust_failure.c @@ -0,0 +1,33 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */ + +#include <vmlinux.h> +#include <bpf/bpf_tracing.h> +#include <bpf/bpf_helpers.h> +#include "bpf_misc.h" + +#include "nested_trust_common.h" + +char _license[] SEC("license") = "GPL"; + +/* Prototype for all of the program trace events below: + * + * TRACE_EVENT(task_newtask, + * TP_PROTO(struct task_struct *p, u64 clone_flags) + */ + +SEC("tp_btf/task_newtask") +__failure __msg("R2 must be referenced or trusted") +int BPF_PROG(test_invalid_nested_user_cpus, struct task_struct *task, u64 clone_flags) +{ + bpf_cpumask_test_cpu(0, task->user_cpus_ptr); + return 0; +} + +SEC("tp_btf/task_newtask") +__failure __msg("R1 must have zero offset when passed to release func or trusted arg to kfunc") +int BPF_PROG(test_invalid_nested_offset, struct task_struct *task, u64 clone_flags) +{ + bpf_cpumask_first_zero(&task->cpus_mask); + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/nested_trust_success.c b/tools/testing/selftests/bpf/progs/nested_trust_success.c new file mode 100644 index 000000000000..886ade4aa99d --- /dev/null +++ b/tools/testing/selftests/bpf/progs/nested_trust_success.c @@ -0,0 +1,19 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */ + +#include <vmlinux.h> +#include <bpf/bpf_tracing.h> +#include <bpf/bpf_helpers.h> +#include "bpf_misc.h" + +#include "nested_trust_common.h" + +char _license[] SEC("license") = "GPL"; + +SEC("tp_btf/task_newtask") +__success +int BPF_PROG(test_read_cpumask, struct task_struct *task, u64 clone_flags) +{ + bpf_cpumask_test_cpu(0, task->cpus_ptr); + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/profiler.inc.h b/tools/testing/selftests/bpf/progs/profiler.inc.h index 92331053dba3..875513866032 100644 --- a/tools/testing/selftests/bpf/progs/profiler.inc.h +++ b/tools/testing/selftests/bpf/progs/profiler.inc.h @@ -156,10 +156,10 @@ probe_read_lim(void* dst, void* src, unsigned long len, unsigned long max) { len = len < max ? len : max; if (len > 1) { - if (bpf_probe_read(dst, len, src)) + if (bpf_probe_read_kernel(dst, len, src)) return 0; } else if (len == 1) { - if (bpf_probe_read(dst, 1, src)) + if (bpf_probe_read_kernel(dst, 1, src)) return 0; } return len; @@ -216,7 +216,8 @@ static INLINE void* read_full_cgroup_path(struct kernfs_node* cgroup_node, #endif for (int i = 0; i < MAX_CGROUPS_PATH_DEPTH; i++) { filepart_length = - bpf_probe_read_str(payload, MAX_PATH, BPF_CORE_READ(cgroup_node, name)); + bpf_probe_read_kernel_str(payload, MAX_PATH, + BPF_CORE_READ(cgroup_node, name)); if (!cgroup_node) return payload; if (cgroup_node == cgroup_root_node) @@ -303,7 +304,8 @@ static INLINE void* populate_cgroup_info(struct cgroup_data_t* cgroup_data, cgroup_data->cgroup_full_length = 0; size_t cgroup_root_length = - bpf_probe_read_str(payload, MAX_PATH, BPF_CORE_READ(root_kernfs, name)); + bpf_probe_read_kernel_str(payload, MAX_PATH, + BPF_CORE_READ(root_kernfs, name)); barrier_var(cgroup_root_length); if (cgroup_root_length <= MAX_PATH) { barrier_var(cgroup_root_length); @@ -312,7 +314,8 @@ static INLINE void* populate_cgroup_info(struct cgroup_data_t* cgroup_data, } size_t cgroup_proc_length = - bpf_probe_read_str(payload, MAX_PATH, BPF_CORE_READ(proc_kernfs, name)); + bpf_probe_read_kernel_str(payload, MAX_PATH, + BPF_CORE_READ(proc_kernfs, name)); barrier_var(cgroup_proc_length); if (cgroup_proc_length <= MAX_PATH) { barrier_var(cgroup_proc_length); @@ -395,7 +398,8 @@ static INLINE int trace_var_sys_kill(void* ctx, int tpid, int sig) arr_struct = bpf_map_lookup_elem(&data_heap, &zero); if (arr_struct == NULL) return 0; - bpf_probe_read(&arr_struct->array[0], sizeof(arr_struct->array[0]), kill_data); + bpf_probe_read_kernel(&arr_struct->array[0], + sizeof(arr_struct->array[0]), kill_data); } else { int index = get_var_spid_index(arr_struct, spid); @@ -409,8 +413,9 @@ static INLINE int trace_var_sys_kill(void* ctx, int tpid, int sig) #endif for (int i = 0; i < ARRAY_SIZE(arr_struct->array); i++) if (arr_struct->array[i].meta.pid == 0) { - bpf_probe_read(&arr_struct->array[i], - sizeof(arr_struct->array[i]), kill_data); + bpf_probe_read_kernel(&arr_struct->array[i], + sizeof(arr_struct->array[i]), + kill_data); bpf_map_update_elem(&var_tpid_to_data, &tpid, arr_struct, 0); @@ -427,17 +432,17 @@ static INLINE int trace_var_sys_kill(void* ctx, int tpid, int sig) if (delta_sec < STALE_INFO) { kill_data->kill_count++; kill_data->last_kill_time = bpf_ktime_get_ns(); - bpf_probe_read(&arr_struct->array[index], - sizeof(arr_struct->array[index]), - kill_data); + bpf_probe_read_kernel(&arr_struct->array[index], + sizeof(arr_struct->array[index]), + kill_data); } else { struct var_kill_data_t* kill_data = get_var_kill_data(ctx, spid, tpid, sig); if (kill_data == NULL) return 0; - bpf_probe_read(&arr_struct->array[index], - sizeof(arr_struct->array[index]), - kill_data); + bpf_probe_read_kernel(&arr_struct->array[index], + sizeof(arr_struct->array[index]), + kill_data); } } bpf_map_update_elem(&var_tpid_to_data, &tpid, arr_struct, 0); @@ -487,8 +492,9 @@ read_absolute_file_path_from_dentry(struct dentry* filp_dentry, void* payload) #pragma unroll #endif for (int i = 0; i < MAX_PATH_DEPTH; i++) { - filepart_length = bpf_probe_read_str(payload, MAX_PATH, - BPF_CORE_READ(filp_dentry, d_name.name)); + filepart_length = + bpf_probe_read_kernel_str(payload, MAX_PATH, + BPF_CORE_READ(filp_dentry, d_name.name)); barrier_var(filepart_length); if (filepart_length > MAX_PATH) break; @@ -572,7 +578,8 @@ ssize_t BPF_KPROBE(kprobe__proc_sys_write, sysctl_data->sysctl_val_length = 0; sysctl_data->sysctl_path_length = 0; - size_t sysctl_val_length = bpf_probe_read_str(payload, CTL_MAXNAME, buf); + size_t sysctl_val_length = bpf_probe_read_kernel_str(payload, + CTL_MAXNAME, buf); barrier_var(sysctl_val_length); if (sysctl_val_length <= CTL_MAXNAME) { barrier_var(sysctl_val_length); @@ -580,8 +587,10 @@ ssize_t BPF_KPROBE(kprobe__proc_sys_write, payload += sysctl_val_length; } - size_t sysctl_path_length = bpf_probe_read_str(payload, MAX_PATH, - BPF_CORE_READ(filp, f_path.dentry, d_name.name)); + size_t sysctl_path_length = + bpf_probe_read_kernel_str(payload, MAX_PATH, + BPF_CORE_READ(filp, f_path.dentry, + d_name.name)); barrier_var(sysctl_path_length); if (sysctl_path_length <= MAX_PATH) { barrier_var(sysctl_path_length); @@ -638,7 +647,8 @@ int raw_tracepoint__sched_process_exit(void* ctx) struct var_kill_data_t* past_kill_data = &arr_struct->array[i]; if (past_kill_data != NULL && past_kill_data->kill_target_pid == tpid) { - bpf_probe_read(kill_data, sizeof(*past_kill_data), past_kill_data); + bpf_probe_read_kernel(kill_data, sizeof(*past_kill_data), + past_kill_data); void* payload = kill_data->payload; size_t offset = kill_data->payload_length; if (offset >= MAX_METADATA_PAYLOAD_LEN + MAX_CGROUP_PAYLOAD_LEN) @@ -656,8 +666,10 @@ int raw_tracepoint__sched_process_exit(void* ctx) payload += comm_length; } - size_t cgroup_proc_length = bpf_probe_read_str(payload, KILL_TARGET_LEN, - BPF_CORE_READ(proc_kernfs, name)); + size_t cgroup_proc_length = + bpf_probe_read_kernel_str(payload, + KILL_TARGET_LEN, + BPF_CORE_READ(proc_kernfs, name)); barrier_var(cgroup_proc_length); if (cgroup_proc_length <= KILL_TARGET_LEN) { barrier_var(cgroup_proc_length); @@ -718,7 +730,8 @@ int raw_tracepoint__sched_process_exec(struct bpf_raw_tracepoint_args* ctx) proc_exec_data->parent_start_time = BPF_CORE_READ(parent_task, start_time); const char* filename = BPF_CORE_READ(bprm, filename); - size_t bin_path_length = bpf_probe_read_str(payload, MAX_FILENAME_LEN, filename); + size_t bin_path_length = + bpf_probe_read_kernel_str(payload, MAX_FILENAME_LEN, filename); barrier_var(bin_path_length); if (bin_path_length <= MAX_FILENAME_LEN) { barrier_var(bin_path_length); @@ -826,7 +839,7 @@ out: SEC("kprobe/vfs_link") int BPF_KPROBE(kprobe__vfs_link, - struct dentry* old_dentry, struct user_namespace *mnt_userns, + struct dentry* old_dentry, struct mnt_idmap *idmap, struct inode* dir, struct dentry* new_dentry, struct inode** delegated_inode) { @@ -922,7 +935,8 @@ int BPF_KPROBE(kprobe__vfs_symlink, struct inode* dir, struct dentry* dentry, filemod_data->payload); payload = populate_cgroup_info(&filemod_data->cgroup_data, task, payload); - size_t len = bpf_probe_read_str(payload, MAX_FILEPATH_LENGTH, oldname); + size_t len = bpf_probe_read_kernel_str(payload, MAX_FILEPATH_LENGTH, + oldname); barrier_var(len); if (len <= MAX_FILEPATH_LENGTH) { barrier_var(len); diff --git a/tools/testing/selftests/bpf/progs/rbtree.c b/tools/testing/selftests/bpf/progs/rbtree.c new file mode 100644 index 000000000000..e5db1a4287e5 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/rbtree.c @@ -0,0 +1,176 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */ + +#include <vmlinux.h> +#include <bpf/bpf_tracing.h> +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_core_read.h> +#include "bpf_experimental.h" + +struct node_data { + long key; + long data; + struct bpf_rb_node node; +}; + +long less_callback_ran = -1; +long removed_key = -1; +long first_data[2] = {-1, -1}; + +#define private(name) SEC(".data." #name) __hidden __attribute__((aligned(8))) +private(A) struct bpf_spin_lock glock; +private(A) struct bpf_rb_root groot __contains(node_data, node); + +static bool less(struct bpf_rb_node *a, const struct bpf_rb_node *b) +{ + struct node_data *node_a; + struct node_data *node_b; + + node_a = container_of(a, struct node_data, node); + node_b = container_of(b, struct node_data, node); + less_callback_ran = 1; + + return node_a->key < node_b->key; +} + +static long __add_three(struct bpf_rb_root *root, struct bpf_spin_lock *lock) +{ + struct node_data *n, *m; + + n = bpf_obj_new(typeof(*n)); + if (!n) + return 1; + n->key = 5; + + m = bpf_obj_new(typeof(*m)); + if (!m) { + bpf_obj_drop(n); + return 2; + } + m->key = 1; + + bpf_spin_lock(&glock); + bpf_rbtree_add(&groot, &n->node, less); + bpf_rbtree_add(&groot, &m->node, less); + bpf_spin_unlock(&glock); + + n = bpf_obj_new(typeof(*n)); + if (!n) + return 3; + n->key = 3; + + bpf_spin_lock(&glock); + bpf_rbtree_add(&groot, &n->node, less); + bpf_spin_unlock(&glock); + return 0; +} + +SEC("tc") +long rbtree_add_nodes(void *ctx) +{ + return __add_three(&groot, &glock); +} + +SEC("tc") +long rbtree_add_and_remove(void *ctx) +{ + struct bpf_rb_node *res = NULL; + struct node_data *n, *m; + + n = bpf_obj_new(typeof(*n)); + if (!n) + goto err_out; + n->key = 5; + + m = bpf_obj_new(typeof(*m)); + if (!m) + goto err_out; + m->key = 3; + + bpf_spin_lock(&glock); + bpf_rbtree_add(&groot, &n->node, less); + bpf_rbtree_add(&groot, &m->node, less); + res = bpf_rbtree_remove(&groot, &n->node); + bpf_spin_unlock(&glock); + + n = container_of(res, struct node_data, node); + removed_key = n->key; + + bpf_obj_drop(n); + + return 0; +err_out: + if (n) + bpf_obj_drop(n); + if (m) + bpf_obj_drop(m); + return 1; +} + +SEC("tc") +long rbtree_first_and_remove(void *ctx) +{ + struct bpf_rb_node *res = NULL; + struct node_data *n, *m, *o; + + n = bpf_obj_new(typeof(*n)); + if (!n) + return 1; + n->key = 3; + n->data = 4; + + m = bpf_obj_new(typeof(*m)); + if (!m) + goto err_out; + m->key = 5; + m->data = 6; + + o = bpf_obj_new(typeof(*o)); + if (!o) + goto err_out; + o->key = 1; + o->data = 2; + + bpf_spin_lock(&glock); + bpf_rbtree_add(&groot, &n->node, less); + bpf_rbtree_add(&groot, &m->node, less); + bpf_rbtree_add(&groot, &o->node, less); + + res = bpf_rbtree_first(&groot); + if (!res) { + bpf_spin_unlock(&glock); + return 2; + } + + o = container_of(res, struct node_data, node); + first_data[0] = o->data; + + res = bpf_rbtree_remove(&groot, &o->node); + bpf_spin_unlock(&glock); + + o = container_of(res, struct node_data, node); + removed_key = o->key; + + bpf_obj_drop(o); + + bpf_spin_lock(&glock); + res = bpf_rbtree_first(&groot); + if (!res) { + bpf_spin_unlock(&glock); + return 3; + } + + o = container_of(res, struct node_data, node); + first_data[1] = o->data; + bpf_spin_unlock(&glock); + + return 0; +err_out: + if (n) + bpf_obj_drop(n); + if (m) + bpf_obj_drop(m); + return 1; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/rbtree_btf_fail__add_wrong_type.c b/tools/testing/selftests/bpf/progs/rbtree_btf_fail__add_wrong_type.c new file mode 100644 index 000000000000..60079b202c07 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/rbtree_btf_fail__add_wrong_type.c @@ -0,0 +1,52 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */ + +#include <vmlinux.h> +#include <bpf/bpf_tracing.h> +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_core_read.h> +#include "bpf_experimental.h" + +struct node_data { + int key; + int data; + struct bpf_rb_node node; +}; + +struct node_data2 { + int key; + struct bpf_rb_node node; + int data; +}; + +static bool less2(struct bpf_rb_node *a, const struct bpf_rb_node *b) +{ + struct node_data2 *node_a; + struct node_data2 *node_b; + + node_a = container_of(a, struct node_data2, node); + node_b = container_of(b, struct node_data2, node); + + return node_a->key < node_b->key; +} + +#define private(name) SEC(".data." #name) __hidden __attribute__((aligned(8))) +private(A) struct bpf_spin_lock glock; +private(A) struct bpf_rb_root groot __contains(node_data, node); + +SEC("tc") +long rbtree_api_add__add_wrong_type(void *ctx) +{ + struct node_data2 *n; + + n = bpf_obj_new(typeof(*n)); + if (!n) + return 1; + + bpf_spin_lock(&glock); + bpf_rbtree_add(&groot, &n->node, less2); + bpf_spin_unlock(&glock); + return 0; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/rbtree_btf_fail__wrong_node_type.c b/tools/testing/selftests/bpf/progs/rbtree_btf_fail__wrong_node_type.c new file mode 100644 index 000000000000..340f97da1084 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/rbtree_btf_fail__wrong_node_type.c @@ -0,0 +1,49 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */ + +#include <vmlinux.h> +#include <bpf/bpf_tracing.h> +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_core_read.h> +#include "bpf_experimental.h" + +/* BTF load should fail as bpf_rb_root __contains this type and points to + * 'node', but 'node' is not a bpf_rb_node + */ +struct node_data { + int key; + int data; + struct bpf_list_node node; +}; + +static bool less(struct bpf_rb_node *a, const struct bpf_rb_node *b) +{ + struct node_data *node_a; + struct node_data *node_b; + + node_a = container_of(a, struct node_data, node); + node_b = container_of(b, struct node_data, node); + + return node_a->key < node_b->key; +} + +#define private(name) SEC(".data." #name) __hidden __attribute__((aligned(8))) +private(A) struct bpf_spin_lock glock; +private(A) struct bpf_rb_root groot __contains(node_data, node); + +SEC("tc") +long rbtree_api_add__wrong_node_type(void *ctx) +{ + struct node_data *n; + + n = bpf_obj_new(typeof(*n)); + if (!n) + return 1; + + bpf_spin_lock(&glock); + bpf_rbtree_first(&groot); + bpf_spin_unlock(&glock); + return 0; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/rbtree_fail.c b/tools/testing/selftests/bpf/progs/rbtree_fail.c new file mode 100644 index 000000000000..bf3cba115897 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/rbtree_fail.c @@ -0,0 +1,322 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <vmlinux.h> +#include <bpf/bpf_tracing.h> +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_core_read.h> +#include "bpf_experimental.h" +#include "bpf_misc.h" + +struct node_data { + long key; + long data; + struct bpf_rb_node node; +}; + +#define private(name) SEC(".data." #name) __hidden __attribute__((aligned(8))) +private(A) struct bpf_spin_lock glock; +private(A) struct bpf_rb_root groot __contains(node_data, node); +private(A) struct bpf_rb_root groot2 __contains(node_data, node); + +static bool less(struct bpf_rb_node *a, const struct bpf_rb_node *b) +{ + struct node_data *node_a; + struct node_data *node_b; + + node_a = container_of(a, struct node_data, node); + node_b = container_of(b, struct node_data, node); + + return node_a->key < node_b->key; +} + +SEC("?tc") +__failure __msg("bpf_spin_lock at off=16 must be held for bpf_rb_root") +long rbtree_api_nolock_add(void *ctx) +{ + struct node_data *n; + + n = bpf_obj_new(typeof(*n)); + if (!n) + return 1; + + bpf_rbtree_add(&groot, &n->node, less); + return 0; +} + +SEC("?tc") +__failure __msg("bpf_spin_lock at off=16 must be held for bpf_rb_root") +long rbtree_api_nolock_remove(void *ctx) +{ + struct node_data *n; + + n = bpf_obj_new(typeof(*n)); + if (!n) + return 1; + + bpf_spin_lock(&glock); + bpf_rbtree_add(&groot, &n->node, less); + bpf_spin_unlock(&glock); + + bpf_rbtree_remove(&groot, &n->node); + return 0; +} + +SEC("?tc") +__failure __msg("bpf_spin_lock at off=16 must be held for bpf_rb_root") +long rbtree_api_nolock_first(void *ctx) +{ + bpf_rbtree_first(&groot); + return 0; +} + +SEC("?tc") +__failure __msg("rbtree_remove node input must be non-owning ref") +long rbtree_api_remove_unadded_node(void *ctx) +{ + struct node_data *n, *m; + struct bpf_rb_node *res; + + n = bpf_obj_new(typeof(*n)); + if (!n) + return 1; + + m = bpf_obj_new(typeof(*m)); + if (!m) { + bpf_obj_drop(n); + return 1; + } + + bpf_spin_lock(&glock); + bpf_rbtree_add(&groot, &n->node, less); + + /* This remove should pass verifier */ + res = bpf_rbtree_remove(&groot, &n->node); + n = container_of(res, struct node_data, node); + + /* This remove shouldn't, m isn't in an rbtree */ + res = bpf_rbtree_remove(&groot, &m->node); + m = container_of(res, struct node_data, node); + bpf_spin_unlock(&glock); + + if (n) + bpf_obj_drop(n); + if (m) + bpf_obj_drop(m); + return 0; +} + +SEC("?tc") +__failure __msg("Unreleased reference id=2 alloc_insn=11") +long rbtree_api_remove_no_drop(void *ctx) +{ + struct bpf_rb_node *res; + struct node_data *n; + + bpf_spin_lock(&glock); + res = bpf_rbtree_first(&groot); + if (!res) + goto unlock_err; + + res = bpf_rbtree_remove(&groot, res); + + n = container_of(res, struct node_data, node); + bpf_spin_unlock(&glock); + + /* bpf_obj_drop(n) is missing here */ + return 0; + +unlock_err: + bpf_spin_unlock(&glock); + return 1; +} + +SEC("?tc") +__failure __msg("arg#1 expected pointer to allocated object") +long rbtree_api_add_to_multiple_trees(void *ctx) +{ + struct node_data *n; + + n = bpf_obj_new(typeof(*n)); + if (!n) + return 1; + + bpf_spin_lock(&glock); + bpf_rbtree_add(&groot, &n->node, less); + + /* This add should fail since n already in groot's tree */ + bpf_rbtree_add(&groot2, &n->node, less); + bpf_spin_unlock(&glock); + return 0; +} + +SEC("?tc") +__failure __msg("rbtree_remove node input must be non-owning ref") +long rbtree_api_add_release_unlock_escape(void *ctx) +{ + struct node_data *n; + + n = bpf_obj_new(typeof(*n)); + if (!n) + return 1; + + bpf_spin_lock(&glock); + bpf_rbtree_add(&groot, &n->node, less); + bpf_spin_unlock(&glock); + + bpf_spin_lock(&glock); + /* After add() in previous critical section, n should be + * release_on_unlock and released after previous spin_unlock, + * so should not be possible to use it here + */ + bpf_rbtree_remove(&groot, &n->node); + bpf_spin_unlock(&glock); + return 0; +} + +SEC("?tc") +__failure __msg("rbtree_remove node input must be non-owning ref") +long rbtree_api_release_aliasing(void *ctx) +{ + struct node_data *n, *m, *o; + struct bpf_rb_node *res; + + n = bpf_obj_new(typeof(*n)); + if (!n) + return 1; + + bpf_spin_lock(&glock); + bpf_rbtree_add(&groot, &n->node, less); + bpf_spin_unlock(&glock); + + bpf_spin_lock(&glock); + + /* m and o point to the same node, + * but verifier doesn't know this + */ + res = bpf_rbtree_first(&groot); + if (!res) + return 1; + o = container_of(res, struct node_data, node); + + res = bpf_rbtree_first(&groot); + if (!res) + return 1; + m = container_of(res, struct node_data, node); + + bpf_rbtree_remove(&groot, &m->node); + /* This second remove shouldn't be possible. Retval of previous + * remove returns owning reference to m, which is the same + * node o's non-owning ref is pointing at + * + * In order to preserve property + * * owning ref must not be in rbtree + * * non-owning ref must be in rbtree + * + * o's ref must be invalidated after previous remove. Otherwise + * we'd have non-owning ref to node that isn't in rbtree, and + * verifier wouldn't be able to use type system to prevent remove + * of ref that already isn't in any tree. Would have to do runtime + * checks in that case. + */ + bpf_rbtree_remove(&groot, &o->node); + + bpf_spin_unlock(&glock); + return 0; +} + +SEC("?tc") +__failure __msg("rbtree_remove node input must be non-owning ref") +long rbtree_api_first_release_unlock_escape(void *ctx) +{ + struct bpf_rb_node *res; + struct node_data *n; + + bpf_spin_lock(&glock); + res = bpf_rbtree_first(&groot); + if (res) + n = container_of(res, struct node_data, node); + bpf_spin_unlock(&glock); + + bpf_spin_lock(&glock); + /* After first() in previous critical section, n should be + * release_on_unlock and released after previous spin_unlock, + * so should not be possible to use it here + */ + bpf_rbtree_remove(&groot, &n->node); + bpf_spin_unlock(&glock); + return 0; +} + +static bool less__bad_fn_call_add(struct bpf_rb_node *a, const struct bpf_rb_node *b) +{ + struct node_data *node_a; + struct node_data *node_b; + + node_a = container_of(a, struct node_data, node); + node_b = container_of(b, struct node_data, node); + bpf_rbtree_add(&groot, &node_a->node, less); + + return node_a->key < node_b->key; +} + +static bool less__bad_fn_call_remove(struct bpf_rb_node *a, const struct bpf_rb_node *b) +{ + struct node_data *node_a; + struct node_data *node_b; + + node_a = container_of(a, struct node_data, node); + node_b = container_of(b, struct node_data, node); + bpf_rbtree_remove(&groot, &node_a->node); + + return node_a->key < node_b->key; +} + +static bool less__bad_fn_call_first_unlock_after(struct bpf_rb_node *a, const struct bpf_rb_node *b) +{ + struct node_data *node_a; + struct node_data *node_b; + + node_a = container_of(a, struct node_data, node); + node_b = container_of(b, struct node_data, node); + bpf_rbtree_first(&groot); + bpf_spin_unlock(&glock); + + return node_a->key < node_b->key; +} + +static __always_inline +long add_with_cb(bool (cb)(struct bpf_rb_node *a, const struct bpf_rb_node *b)) +{ + struct node_data *n; + + n = bpf_obj_new(typeof(*n)); + if (!n) + return 1; + + bpf_spin_lock(&glock); + bpf_rbtree_add(&groot, &n->node, cb); + bpf_spin_unlock(&glock); + return 0; +} + +SEC("?tc") +__failure __msg("arg#1 expected pointer to allocated object") +long rbtree_api_add_bad_cb_bad_fn_call_add(void *ctx) +{ + return add_with_cb(less__bad_fn_call_add); +} + +SEC("?tc") +__failure __msg("rbtree_remove not allowed in rbtree cb") +long rbtree_api_add_bad_cb_bad_fn_call_remove(void *ctx) +{ + return add_with_cb(less__bad_fn_call_remove); +} + +SEC("?tc") +__failure __msg("can't spin_{lock,unlock} in rbtree cb") +long rbtree_api_add_bad_cb_bad_fn_call_first_unlock_after(void *ctx) +{ + return add_with_cb(less__bad_fn_call_first_unlock_after); +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/rcu_read_lock.c b/tools/testing/selftests/bpf/progs/rcu_read_lock.c new file mode 100644 index 000000000000..5cecbdbbb16e --- /dev/null +++ b/tools/testing/selftests/bpf/progs/rcu_read_lock.c @@ -0,0 +1,330 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */ + +#include "vmlinux.h" +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> +#include "bpf_tracing_net.h" +#include "bpf_misc.h" + +char _license[] SEC("license") = "GPL"; + +struct { + __uint(type, BPF_MAP_TYPE_TASK_STORAGE); + __uint(map_flags, BPF_F_NO_PREALLOC); + __type(key, int); + __type(value, long); +} map_a SEC(".maps"); + +__u32 user_data, key_serial, target_pid; +__u64 flags, task_storage_val, cgroup_id; + +struct bpf_key *bpf_lookup_user_key(__u32 serial, __u64 flags) __ksym; +void bpf_key_put(struct bpf_key *key) __ksym; +void bpf_rcu_read_lock(void) __ksym; +void bpf_rcu_read_unlock(void) __ksym; +struct task_struct *bpf_task_acquire_not_zero(struct task_struct *p) __ksym; +void bpf_task_release(struct task_struct *p) __ksym; + +SEC("?fentry.s/" SYS_PREFIX "sys_getpgid") +int get_cgroup_id(void *ctx) +{ + struct task_struct *task; + struct css_set *cgroups; + + task = bpf_get_current_task_btf(); + if (task->pid != target_pid) + return 0; + + /* simulate bpf_get_current_cgroup_id() helper */ + bpf_rcu_read_lock(); + cgroups = task->cgroups; + if (!cgroups) + goto unlock; + cgroup_id = cgroups->dfl_cgrp->kn->id; +unlock: + bpf_rcu_read_unlock(); + return 0; +} + +SEC("?fentry.s/" SYS_PREFIX "sys_getpgid") +int task_succ(void *ctx) +{ + struct task_struct *task, *real_parent; + long init_val = 2; + long *ptr; + + task = bpf_get_current_task_btf(); + if (task->pid != target_pid) + return 0; + + bpf_rcu_read_lock(); + /* region including helper using rcu ptr real_parent */ + real_parent = task->real_parent; + if (!real_parent) + goto out; + ptr = bpf_task_storage_get(&map_a, real_parent, &init_val, + BPF_LOCAL_STORAGE_GET_F_CREATE); + if (!ptr) + goto out; + ptr = bpf_task_storage_get(&map_a, real_parent, 0, 0); + if (!ptr) + goto out; + task_storage_val = *ptr; +out: + bpf_rcu_read_unlock(); + return 0; +} + +SEC("?fentry.s/" SYS_PREFIX "sys_nanosleep") +int no_lock(void *ctx) +{ + struct task_struct *task, *real_parent; + + /* no bpf_rcu_read_lock(), old code still works */ + task = bpf_get_current_task_btf(); + real_parent = task->real_parent; + (void)bpf_task_storage_get(&map_a, real_parent, 0, 0); + return 0; +} + +SEC("?fentry.s/" SYS_PREFIX "sys_nanosleep") +int two_regions(void *ctx) +{ + struct task_struct *task, *real_parent; + + /* two regions */ + task = bpf_get_current_task_btf(); + bpf_rcu_read_lock(); + bpf_rcu_read_unlock(); + bpf_rcu_read_lock(); + real_parent = task->real_parent; + if (!real_parent) + goto out; + (void)bpf_task_storage_get(&map_a, real_parent, 0, 0); +out: + bpf_rcu_read_unlock(); + return 0; +} + +SEC("?fentry/" SYS_PREFIX "sys_getpgid") +int non_sleepable_1(void *ctx) +{ + struct task_struct *task, *real_parent; + + task = bpf_get_current_task_btf(); + bpf_rcu_read_lock(); + real_parent = task->real_parent; + if (!real_parent) + goto out; + (void)bpf_task_storage_get(&map_a, real_parent, 0, 0); +out: + bpf_rcu_read_unlock(); + return 0; +} + +SEC("?fentry/" SYS_PREFIX "sys_getpgid") +int non_sleepable_2(void *ctx) +{ + struct task_struct *task, *real_parent; + + bpf_rcu_read_lock(); + task = bpf_get_current_task_btf(); + bpf_rcu_read_unlock(); + + bpf_rcu_read_lock(); + real_parent = task->real_parent; + if (!real_parent) + goto out; + (void)bpf_task_storage_get(&map_a, real_parent, 0, 0); +out: + bpf_rcu_read_unlock(); + return 0; +} + +SEC("?fentry.s/" SYS_PREFIX "sys_nanosleep") +int task_acquire(void *ctx) +{ + struct task_struct *task, *real_parent, *gparent; + + task = bpf_get_current_task_btf(); + bpf_rcu_read_lock(); + real_parent = task->real_parent; + if (!real_parent) + goto out; + + /* rcu_ptr->rcu_field */ + gparent = real_parent->real_parent; + if (!gparent) + goto out; + + /* acquire a reference which can be used outside rcu read lock region */ + gparent = bpf_task_acquire_not_zero(gparent); + if (!gparent) + /* Until we resolve the issues with using task->rcu_users, we + * expect bpf_task_acquire_not_zero() to return a NULL task. + * See the comment at the definition of + * bpf_task_acquire_not_zero() for more details. + */ + goto out; + + (void)bpf_task_storage_get(&map_a, gparent, 0, 0); + bpf_task_release(gparent); +out: + bpf_rcu_read_unlock(); + return 0; +} + +SEC("?fentry.s/" SYS_PREFIX "sys_getpgid") +int miss_lock(void *ctx) +{ + struct task_struct *task; + struct css_set *cgroups; + struct cgroup *dfl_cgrp; + + /* missing bpf_rcu_read_lock() */ + task = bpf_get_current_task_btf(); + bpf_rcu_read_lock(); + (void)bpf_task_storage_get(&map_a, task, 0, 0); + bpf_rcu_read_unlock(); + bpf_rcu_read_unlock(); + return 0; +} + +SEC("?fentry.s/" SYS_PREFIX "sys_getpgid") +int miss_unlock(void *ctx) +{ + struct task_struct *task; + struct css_set *cgroups; + struct cgroup *dfl_cgrp; + + /* missing bpf_rcu_read_unlock() */ + task = bpf_get_current_task_btf(); + bpf_rcu_read_lock(); + (void)bpf_task_storage_get(&map_a, task, 0, 0); + return 0; +} + +SEC("?fentry/" SYS_PREFIX "sys_getpgid") +int non_sleepable_rcu_mismatch(void *ctx) +{ + struct task_struct *task, *real_parent; + + task = bpf_get_current_task_btf(); + /* non-sleepable: missing bpf_rcu_read_unlock() in one path */ + bpf_rcu_read_lock(); + real_parent = task->real_parent; + if (!real_parent) + goto out; + (void)bpf_task_storage_get(&map_a, real_parent, 0, 0); + if (real_parent) + bpf_rcu_read_unlock(); +out: + return 0; +} + +SEC("?fentry.s/" SYS_PREFIX "sys_getpgid") +int inproper_sleepable_helper(void *ctx) +{ + struct task_struct *task, *real_parent; + struct pt_regs *regs; + __u32 value = 0; + void *ptr; + + task = bpf_get_current_task_btf(); + /* sleepable helper in rcu read lock region */ + bpf_rcu_read_lock(); + real_parent = task->real_parent; + if (!real_parent) + goto out; + regs = (struct pt_regs *)bpf_task_pt_regs(real_parent); + if (!regs) + goto out; + + ptr = (void *)PT_REGS_IP(regs); + (void)bpf_copy_from_user_task(&value, sizeof(uint32_t), ptr, task, 0); + user_data = value; + (void)bpf_task_storage_get(&map_a, real_parent, 0, 0); +out: + bpf_rcu_read_unlock(); + return 0; +} + +SEC("?lsm.s/bpf") +int BPF_PROG(inproper_sleepable_kfunc, int cmd, union bpf_attr *attr, unsigned int size) +{ + struct bpf_key *bkey; + + /* sleepable kfunc in rcu read lock region */ + bpf_rcu_read_lock(); + bkey = bpf_lookup_user_key(key_serial, flags); + bpf_rcu_read_unlock(); + if (!bkey) + return -1; + bpf_key_put(bkey); + + return 0; +} + +SEC("?fentry.s/" SYS_PREFIX "sys_nanosleep") +int nested_rcu_region(void *ctx) +{ + struct task_struct *task, *real_parent; + + /* nested rcu read lock regions */ + task = bpf_get_current_task_btf(); + bpf_rcu_read_lock(); + bpf_rcu_read_lock(); + real_parent = task->real_parent; + if (!real_parent) + goto out; + (void)bpf_task_storage_get(&map_a, real_parent, 0, 0); +out: + bpf_rcu_read_unlock(); + bpf_rcu_read_unlock(); + return 0; +} + +SEC("?fentry.s/" SYS_PREFIX "sys_getpgid") +int task_untrusted_non_rcuptr(void *ctx) +{ + struct task_struct *task, *group_leader; + + task = bpf_get_current_task_btf(); + bpf_rcu_read_lock(); + /* the pointer group_leader marked as untrusted */ + group_leader = task->real_parent->group_leader; + (void)bpf_task_storage_get(&map_a, group_leader, 0, 0); + bpf_rcu_read_unlock(); + return 0; +} + +SEC("?fentry.s/" SYS_PREFIX "sys_getpgid") +int task_untrusted_rcuptr(void *ctx) +{ + struct task_struct *task, *real_parent; + + task = bpf_get_current_task_btf(); + bpf_rcu_read_lock(); + real_parent = task->real_parent; + bpf_rcu_read_unlock(); + /* helper use of rcu ptr outside the rcu read lock region */ + (void)bpf_task_storage_get(&map_a, real_parent, 0, 0); + return 0; +} + +SEC("?fentry.s/" SYS_PREFIX "sys_nanosleep") +int cross_rcu_region(void *ctx) +{ + struct task_struct *task, *real_parent; + + /* rcu ptr define/use in different regions */ + task = bpf_get_current_task_btf(); + bpf_rcu_read_lock(); + real_parent = task->real_parent; + bpf_rcu_read_unlock(); + bpf_rcu_read_lock(); + (void)bpf_task_storage_get(&map_a, real_parent, 0, 0); + bpf_rcu_read_unlock(); + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/setget_sockopt.c b/tools/testing/selftests/bpf/progs/setget_sockopt.c index 9523333b8905..7a438600ae98 100644 --- a/tools/testing/selftests/bpf/progs/setget_sockopt.c +++ b/tools/testing/selftests/bpf/progs/setget_sockopt.c @@ -22,6 +22,7 @@ int nr_active; int nr_connect; int nr_binddev; int nr_socket_post_create; +int nr_fin_wait1; struct sockopt_test { int opt; @@ -386,6 +387,13 @@ int skops_sockopt(struct bpf_sock_ops *skops) nr_passive += !(bpf_test_sockopt(skops, sk) || test_tcp_maxseg(skops, sk) || test_tcp_saved_syn(skops, sk)); + bpf_sock_ops_cb_flags_set(skops, + skops->bpf_sock_ops_cb_flags | + BPF_SOCK_OPS_STATE_CB_FLAG); + break; + case BPF_SOCK_OPS_STATE_CB: + if (skops->args[1] == BPF_TCP_CLOSE_WAIT) + nr_fin_wait1 += !bpf_test_sockopt(skops, sk); break; } diff --git a/tools/testing/selftests/bpf/progs/strobemeta.h b/tools/testing/selftests/bpf/progs/strobemeta.h index 753718595c26..e562be6356f3 100644 --- a/tools/testing/selftests/bpf/progs/strobemeta.h +++ b/tools/testing/selftests/bpf/progs/strobemeta.h @@ -135,7 +135,7 @@ struct strobe_value_loc { * tpidr_el0 for aarch64). * TLS_IMM_EXEC: absolute address of GOT entry containing offset * from thread pointer; - * TLS_GENERAL_DYN: absolute addres of double GOT entry + * TLS_GENERAL_DYN: absolute address of double GOT entry * containing tls_index_t struct; */ int64_t offset; diff --git a/tools/testing/selftests/bpf/progs/task_kfunc_common.h b/tools/testing/selftests/bpf/progs/task_kfunc_common.h new file mode 100644 index 000000000000..c0ffd171743e --- /dev/null +++ b/tools/testing/selftests/bpf/progs/task_kfunc_common.h @@ -0,0 +1,72 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */ + +#ifndef _TASK_KFUNC_COMMON_H +#define _TASK_KFUNC_COMMON_H + +#include <errno.h> +#include <vmlinux.h> +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> + +struct __tasks_kfunc_map_value { + struct task_struct __kptr_ref * task; +}; + +struct hash_map { + __uint(type, BPF_MAP_TYPE_HASH); + __type(key, int); + __type(value, struct __tasks_kfunc_map_value); + __uint(max_entries, 1); +} __tasks_kfunc_map SEC(".maps"); + +struct task_struct *bpf_task_acquire(struct task_struct *p) __ksym; +struct task_struct *bpf_task_kptr_get(struct task_struct **pp) __ksym; +void bpf_task_release(struct task_struct *p) __ksym; +struct task_struct *bpf_task_from_pid(s32 pid) __ksym; + +static inline struct __tasks_kfunc_map_value *tasks_kfunc_map_value_lookup(struct task_struct *p) +{ + s32 pid; + long status; + + status = bpf_probe_read_kernel(&pid, sizeof(pid), &p->pid); + if (status) + return NULL; + + return bpf_map_lookup_elem(&__tasks_kfunc_map, &pid); +} + +static inline int tasks_kfunc_map_insert(struct task_struct *p) +{ + struct __tasks_kfunc_map_value local, *v; + long status; + struct task_struct *acquired, *old; + s32 pid; + + status = bpf_probe_read_kernel(&pid, sizeof(pid), &p->pid); + if (status) + return status; + + local.task = NULL; + status = bpf_map_update_elem(&__tasks_kfunc_map, &pid, &local, BPF_NOEXIST); + if (status) + return status; + + v = bpf_map_lookup_elem(&__tasks_kfunc_map, &pid); + if (!v) { + bpf_map_delete_elem(&__tasks_kfunc_map, &pid); + return -ENOENT; + } + + acquired = bpf_task_acquire(p); + old = bpf_kptr_xchg(&v->task, acquired); + if (old) { + bpf_task_release(old); + return -EEXIST; + } + + return 0; +} + +#endif /* _TASK_KFUNC_COMMON_H */ diff --git a/tools/testing/selftests/bpf/progs/task_kfunc_failure.c b/tools/testing/selftests/bpf/progs/task_kfunc_failure.c new file mode 100644 index 000000000000..f19d54eda4f1 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/task_kfunc_failure.c @@ -0,0 +1,302 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */ + +#include <vmlinux.h> +#include <bpf/bpf_tracing.h> +#include <bpf/bpf_helpers.h> + +#include "bpf_misc.h" +#include "task_kfunc_common.h" + +char _license[] SEC("license") = "GPL"; + +/* Prototype for all of the program trace events below: + * + * TRACE_EVENT(task_newtask, + * TP_PROTO(struct task_struct *p, u64 clone_flags) + */ + +static struct __tasks_kfunc_map_value *insert_lookup_task(struct task_struct *task) +{ + int status; + + status = tasks_kfunc_map_insert(task); + if (status) + return NULL; + + return tasks_kfunc_map_value_lookup(task); +} + +SEC("tp_btf/task_newtask") +__failure __msg("Possibly NULL pointer passed to trusted arg0") +int BPF_PROG(task_kfunc_acquire_untrusted, struct task_struct *task, u64 clone_flags) +{ + struct task_struct *acquired; + struct __tasks_kfunc_map_value *v; + + v = insert_lookup_task(task); + if (!v) + return 0; + + /* Can't invoke bpf_task_acquire() on an untrusted pointer. */ + acquired = bpf_task_acquire(v->task); + bpf_task_release(acquired); + + return 0; +} + +SEC("tp_btf/task_newtask") +__failure __msg("arg#0 pointer type STRUCT task_struct must point") +int BPF_PROG(task_kfunc_acquire_fp, struct task_struct *task, u64 clone_flags) +{ + struct task_struct *acquired, *stack_task = (struct task_struct *)&clone_flags; + + /* Can't invoke bpf_task_acquire() on a random frame pointer. */ + acquired = bpf_task_acquire((struct task_struct *)&stack_task); + bpf_task_release(acquired); + + return 0; +} + +SEC("kretprobe/free_task") +__failure __msg("reg type unsupported for arg#0 function") +int BPF_PROG(task_kfunc_acquire_unsafe_kretprobe, struct task_struct *task, u64 clone_flags) +{ + struct task_struct *acquired; + + acquired = bpf_task_acquire(task); + /* Can't release a bpf_task_acquire()'d task without a NULL check. */ + bpf_task_release(acquired); + + return 0; +} + +SEC("tp_btf/task_newtask") +__failure __msg("R1 must be referenced or trusted") +int BPF_PROG(task_kfunc_acquire_trusted_walked, struct task_struct *task, u64 clone_flags) +{ + struct task_struct *acquired; + + /* Can't invoke bpf_task_acquire() on a trusted pointer obtained from walking a struct. */ + acquired = bpf_task_acquire(task->group_leader); + bpf_task_release(acquired); + + return 0; +} + + +SEC("tp_btf/task_newtask") +__failure __msg("Possibly NULL pointer passed to trusted arg0") +int BPF_PROG(task_kfunc_acquire_null, struct task_struct *task, u64 clone_flags) +{ + struct task_struct *acquired; + + /* Can't invoke bpf_task_acquire() on a NULL pointer. */ + acquired = bpf_task_acquire(NULL); + if (!acquired) + return 0; + bpf_task_release(acquired); + + return 0; +} + +SEC("tp_btf/task_newtask") +__failure __msg("Unreleased reference") +int BPF_PROG(task_kfunc_acquire_unreleased, struct task_struct *task, u64 clone_flags) +{ + struct task_struct *acquired; + + acquired = bpf_task_acquire(task); + + /* Acquired task is never released. */ + + return 0; +} + +SEC("tp_btf/task_newtask") +__failure __msg("arg#0 expected pointer to map value") +int BPF_PROG(task_kfunc_get_non_kptr_param, struct task_struct *task, u64 clone_flags) +{ + struct task_struct *kptr; + + /* Cannot use bpf_task_kptr_get() on a non-kptr, even on a valid task. */ + kptr = bpf_task_kptr_get(&task); + if (!kptr) + return 0; + + bpf_task_release(kptr); + + return 0; +} + +SEC("tp_btf/task_newtask") +__failure __msg("arg#0 expected pointer to map value") +int BPF_PROG(task_kfunc_get_non_kptr_acquired, struct task_struct *task, u64 clone_flags) +{ + struct task_struct *kptr, *acquired; + + acquired = bpf_task_acquire(task); + + /* Cannot use bpf_task_kptr_get() on a non-kptr, even if it was acquired. */ + kptr = bpf_task_kptr_get(&acquired); + bpf_task_release(acquired); + if (!kptr) + return 0; + + bpf_task_release(kptr); + + return 0; +} + +SEC("tp_btf/task_newtask") +__failure __msg("arg#0 expected pointer to map value") +int BPF_PROG(task_kfunc_get_null, struct task_struct *task, u64 clone_flags) +{ + struct task_struct *kptr; + + /* Cannot use bpf_task_kptr_get() on a NULL pointer. */ + kptr = bpf_task_kptr_get(NULL); + if (!kptr) + return 0; + + bpf_task_release(kptr); + + return 0; +} + +SEC("tp_btf/task_newtask") +__failure __msg("Unreleased reference") +int BPF_PROG(task_kfunc_xchg_unreleased, struct task_struct *task, u64 clone_flags) +{ + struct task_struct *kptr; + struct __tasks_kfunc_map_value *v; + + v = insert_lookup_task(task); + if (!v) + return 0; + + kptr = bpf_kptr_xchg(&v->task, NULL); + if (!kptr) + return 0; + + /* Kptr retrieved from map is never released. */ + + return 0; +} + +SEC("tp_btf/task_newtask") +__failure __msg("Unreleased reference") +int BPF_PROG(task_kfunc_get_unreleased, struct task_struct *task, u64 clone_flags) +{ + struct task_struct *kptr; + struct __tasks_kfunc_map_value *v; + + v = insert_lookup_task(task); + if (!v) + return 0; + + kptr = bpf_task_kptr_get(&v->task); + if (!kptr) + return 0; + + /* Kptr acquired above is never released. */ + + return 0; +} + +SEC("tp_btf/task_newtask") +__failure __msg("arg#0 is untrusted_ptr_or_null_ expected ptr_ or socket") +int BPF_PROG(task_kfunc_release_untrusted, struct task_struct *task, u64 clone_flags) +{ + struct __tasks_kfunc_map_value *v; + + v = insert_lookup_task(task); + if (!v) + return 0; + + /* Can't invoke bpf_task_release() on an untrusted pointer. */ + bpf_task_release(v->task); + + return 0; +} + +SEC("tp_btf/task_newtask") +__failure __msg("arg#0 pointer type STRUCT task_struct must point") +int BPF_PROG(task_kfunc_release_fp, struct task_struct *task, u64 clone_flags) +{ + struct task_struct *acquired = (struct task_struct *)&clone_flags; + + /* Cannot release random frame pointer. */ + bpf_task_release(acquired); + + return 0; +} + +SEC("tp_btf/task_newtask") +__failure __msg("arg#0 is ptr_or_null_ expected ptr_ or socket") +int BPF_PROG(task_kfunc_release_null, struct task_struct *task, u64 clone_flags) +{ + struct __tasks_kfunc_map_value local, *v; + long status; + struct task_struct *acquired, *old; + s32 pid; + + status = bpf_probe_read_kernel(&pid, sizeof(pid), &task->pid); + if (status) + return 0; + + local.task = NULL; + status = bpf_map_update_elem(&__tasks_kfunc_map, &pid, &local, BPF_NOEXIST); + if (status) + return status; + + v = bpf_map_lookup_elem(&__tasks_kfunc_map, &pid); + if (!v) + return -ENOENT; + + acquired = bpf_task_acquire(task); + + old = bpf_kptr_xchg(&v->task, acquired); + + /* old cannot be passed to bpf_task_release() without a NULL check. */ + bpf_task_release(old); + bpf_task_release(old); + + return 0; +} + +SEC("tp_btf/task_newtask") +__failure __msg("release kernel function bpf_task_release expects") +int BPF_PROG(task_kfunc_release_unacquired, struct task_struct *task, u64 clone_flags) +{ + /* Cannot release trusted task pointer which was not acquired. */ + bpf_task_release(task); + + return 0; +} + +SEC("tp_btf/task_newtask") +__failure __msg("arg#0 is ptr_or_null_ expected ptr_ or socket") +int BPF_PROG(task_kfunc_from_pid_no_null_check, struct task_struct *task, u64 clone_flags) +{ + struct task_struct *acquired; + + acquired = bpf_task_from_pid(task->pid); + + /* Releasing bpf_task_from_pid() lookup without a NULL check. */ + bpf_task_release(acquired); + + return 0; +} + +SEC("lsm/task_free") +__failure __msg("reg type unsupported for arg#0 function") +int BPF_PROG(task_kfunc_from_lsm_task_free, struct task_struct *task) +{ + struct task_struct *acquired; + + /* the argument of lsm task_free hook is untrusted. */ + acquired = bpf_task_acquire(task); + bpf_task_release(acquired); + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/task_kfunc_success.c b/tools/testing/selftests/bpf/progs/task_kfunc_success.c new file mode 100644 index 000000000000..9f359cfd29e7 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/task_kfunc_success.c @@ -0,0 +1,227 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */ + +#include <vmlinux.h> +#include <bpf/bpf_tracing.h> +#include <bpf/bpf_helpers.h> + +#include "task_kfunc_common.h" + +char _license[] SEC("license") = "GPL"; + +int err, pid; + +/* Prototype for all of the program trace events below: + * + * TRACE_EVENT(task_newtask, + * TP_PROTO(struct task_struct *p, u64 clone_flags) + */ + +static bool is_test_kfunc_task(void) +{ + int cur_pid = bpf_get_current_pid_tgid() >> 32; + + return pid == cur_pid; +} + +static int test_acquire_release(struct task_struct *task) +{ + struct task_struct *acquired; + + acquired = bpf_task_acquire(task); + bpf_task_release(acquired); + + return 0; +} + +SEC("tp_btf/task_newtask") +int BPF_PROG(test_task_acquire_release_argument, struct task_struct *task, u64 clone_flags) +{ + if (!is_test_kfunc_task()) + return 0; + + return test_acquire_release(task); +} + +SEC("tp_btf/task_newtask") +int BPF_PROG(test_task_acquire_release_current, struct task_struct *task, u64 clone_flags) +{ + if (!is_test_kfunc_task()) + return 0; + + return test_acquire_release(bpf_get_current_task_btf()); +} + +SEC("tp_btf/task_newtask") +int BPF_PROG(test_task_acquire_leave_in_map, struct task_struct *task, u64 clone_flags) +{ + long status; + + if (!is_test_kfunc_task()) + return 0; + + status = tasks_kfunc_map_insert(task); + if (status) + err = 1; + + return 0; +} + +SEC("tp_btf/task_newtask") +int BPF_PROG(test_task_xchg_release, struct task_struct *task, u64 clone_flags) +{ + struct task_struct *kptr; + struct __tasks_kfunc_map_value *v; + long status; + + if (!is_test_kfunc_task()) + return 0; + + status = tasks_kfunc_map_insert(task); + if (status) { + err = 1; + return 0; + } + + v = tasks_kfunc_map_value_lookup(task); + if (!v) { + err = 2; + return 0; + } + + kptr = bpf_kptr_xchg(&v->task, NULL); + if (!kptr) { + err = 3; + return 0; + } + + bpf_task_release(kptr); + + return 0; +} + +SEC("tp_btf/task_newtask") +int BPF_PROG(test_task_get_release, struct task_struct *task, u64 clone_flags) +{ + struct task_struct *kptr; + struct __tasks_kfunc_map_value *v; + long status; + + if (!is_test_kfunc_task()) + return 0; + + status = tasks_kfunc_map_insert(task); + if (status) { + err = 1; + return 0; + } + + v = tasks_kfunc_map_value_lookup(task); + if (!v) { + err = 2; + return 0; + } + + kptr = bpf_task_kptr_get(&v->task); + if (kptr) { + /* Until we resolve the issues with using task->rcu_users, we + * expect bpf_task_kptr_get() to return a NULL task. See the + * comment at the definition of bpf_task_acquire_not_zero() for + * more details. + */ + bpf_task_release(kptr); + err = 3; + return 0; + } + + + return 0; +} + +SEC("tp_btf/task_newtask") +int BPF_PROG(test_task_current_acquire_release, struct task_struct *task, u64 clone_flags) +{ + struct task_struct *current, *acquired; + + if (!is_test_kfunc_task()) + return 0; + + current = bpf_get_current_task_btf(); + acquired = bpf_task_acquire(current); + bpf_task_release(acquired); + + return 0; +} + +static void lookup_compare_pid(const struct task_struct *p) +{ + struct task_struct *acquired; + + acquired = bpf_task_from_pid(p->pid); + if (!acquired) { + err = 1; + return; + } + + if (acquired->pid != p->pid) + err = 2; + bpf_task_release(acquired); +} + +SEC("tp_btf/task_newtask") +int BPF_PROG(test_task_from_pid_arg, struct task_struct *task, u64 clone_flags) +{ + struct task_struct *acquired; + + if (!is_test_kfunc_task()) + return 0; + + lookup_compare_pid(task); + return 0; +} + +SEC("tp_btf/task_newtask") +int BPF_PROG(test_task_from_pid_current, struct task_struct *task, u64 clone_flags) +{ + struct task_struct *current, *acquired; + + if (!is_test_kfunc_task()) + return 0; + + lookup_compare_pid(bpf_get_current_task_btf()); + return 0; +} + +static int is_pid_lookup_valid(s32 pid) +{ + struct task_struct *acquired; + + acquired = bpf_task_from_pid(pid); + if (acquired) { + bpf_task_release(acquired); + return 1; + } + + return 0; +} + +SEC("tp_btf/task_newtask") +int BPF_PROG(test_task_from_pid_invalid, struct task_struct *task, u64 clone_flags) +{ + struct task_struct *acquired; + + if (!is_test_kfunc_task()) + return 0; + + if (is_pid_lookup_valid(-1)) { + err = 1; + return 0; + } + + if (is_pid_lookup_valid(0xcafef00d)) { + err = 2; + return 0; + } + + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/task_local_storage_exit_creds.c b/tools/testing/selftests/bpf/progs/task_local_storage_exit_creds.c index 81758c0aef99..41d88ed222ff 100644 --- a/tools/testing/selftests/bpf/progs/task_local_storage_exit_creds.c +++ b/tools/testing/selftests/bpf/progs/task_local_storage_exit_creds.c @@ -14,6 +14,7 @@ struct { __type(value, __u64); } task_storage SEC(".maps"); +int run_count = 0; int valid_ptr_count = 0; int null_ptr_count = 0; @@ -28,5 +29,7 @@ int BPF_PROG(trace_exit_creds, struct task_struct *task) __sync_fetch_and_add(&valid_ptr_count, 1); else __sync_fetch_and_add(&null_ptr_count, 1); + + __sync_fetch_and_add(&run_count, 1); return 0; } diff --git a/tools/testing/selftests/bpf/progs/task_ls_recursion.c b/tools/testing/selftests/bpf/progs/task_ls_recursion.c index 564583dca7c8..4542dc683b44 100644 --- a/tools/testing/selftests/bpf/progs/task_ls_recursion.c +++ b/tools/testing/selftests/bpf/progs/task_ls_recursion.c @@ -5,7 +5,13 @@ #include <bpf/bpf_helpers.h> #include <bpf/bpf_tracing.h> +#ifndef EBUSY +#define EBUSY 16 +#endif + char _license[] SEC("license") = "GPL"; +int nr_del_errs = 0; +int test_pid = 0; struct { __uint(type, BPF_MAP_TYPE_TASK_STORAGE); @@ -26,6 +32,13 @@ int BPF_PROG(on_lookup) { struct task_struct *task = bpf_get_current_task_btf(); + if (!test_pid || task->pid != test_pid) + return 0; + + /* The bpf_task_storage_delete will call + * bpf_local_storage_lookup. The prog->active will + * stop the recursion. + */ bpf_task_storage_delete(&map_a, task); bpf_task_storage_delete(&map_b, task); return 0; @@ -37,11 +50,32 @@ int BPF_PROG(on_update) struct task_struct *task = bpf_get_current_task_btf(); long *ptr; + if (!test_pid || task->pid != test_pid) + return 0; + ptr = bpf_task_storage_get(&map_a, task, 0, BPF_LOCAL_STORAGE_GET_F_CREATE); - if (ptr) + /* ptr will not be NULL when it is called from + * the bpf_task_storage_get(&map_b,...F_CREATE) in + * the BPF_PROG(on_enter) below. It is because + * the value can be found in map_a and the kernel + * does not need to acquire any spin_lock. + */ + if (ptr) { + int err; + *ptr += 1; + err = bpf_task_storage_delete(&map_a, task); + if (err == -EBUSY) + nr_del_errs++; + } + /* This will still fail because map_b is empty and + * this BPF_PROG(on_update) has failed to acquire + * the percpu busy lock => meaning potential + * deadlock is detected and it will fail to create + * new storage. + */ ptr = bpf_task_storage_get(&map_b, task, 0, BPF_LOCAL_STORAGE_GET_F_CREATE); if (ptr) @@ -57,14 +91,17 @@ int BPF_PROG(on_enter, struct pt_regs *regs, long id) long *ptr; task = bpf_get_current_task_btf(); + if (!test_pid || task->pid != test_pid) + return 0; + ptr = bpf_task_storage_get(&map_a, task, 0, BPF_LOCAL_STORAGE_GET_F_CREATE); - if (ptr) + if (ptr && !*ptr) *ptr = 200; ptr = bpf_task_storage_get(&map_b, task, 0, BPF_LOCAL_STORAGE_GET_F_CREATE); - if (ptr) + if (ptr && !*ptr) *ptr = 100; return 0; } diff --git a/tools/testing/selftests/bpf/progs/task_storage_nodeadlock.c b/tools/testing/selftests/bpf/progs/task_storage_nodeadlock.c new file mode 100644 index 000000000000..ea2dbb80f7b3 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/task_storage_nodeadlock.c @@ -0,0 +1,47 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include "vmlinux.h" +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> + +char _license[] SEC("license") = "GPL"; + +#ifndef EBUSY +#define EBUSY 16 +#endif + +extern bool CONFIG_PREEMPT __kconfig __weak; +int nr_get_errs = 0; +int nr_del_errs = 0; + +struct { + __uint(type, BPF_MAP_TYPE_TASK_STORAGE); + __uint(map_flags, BPF_F_NO_PREALLOC); + __type(key, int); + __type(value, int); +} task_storage SEC(".maps"); + +SEC("lsm.s/socket_post_create") +int BPF_PROG(socket_post_create, struct socket *sock, int family, int type, + int protocol, int kern) +{ + struct task_struct *task; + int ret, zero = 0; + int *value; + + if (!CONFIG_PREEMPT) + return 0; + + task = bpf_get_current_task_btf(); + value = bpf_task_storage_get(&task_storage, task, &zero, + BPF_LOCAL_STORAGE_GET_F_CREATE); + if (!value) + __sync_fetch_and_add(&nr_get_errs, 1); + + ret = bpf_task_storage_delete(&task_storage, + bpf_get_current_task_btf()); + if (ret == -EBUSY) + __sync_fetch_and_add(&nr_del_errs, 1); + + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/test_attach_probe.c b/tools/testing/selftests/bpf/progs/test_attach_probe.c index a1e45fec8938..3b5dc34d23e9 100644 --- a/tools/testing/selftests/bpf/progs/test_attach_probe.c +++ b/tools/testing/selftests/bpf/progs/test_attach_probe.c @@ -92,18 +92,19 @@ int handle_uretprobe_byname(struct pt_regs *ctx) } SEC("uprobe") -int handle_uprobe_byname2(struct pt_regs *ctx) +int BPF_UPROBE(handle_uprobe_byname2, const char *pathname, const char *mode) { - unsigned int size = PT_REGS_PARM1(ctx); + char mode_buf[2] = {}; - /* verify malloc size */ - if (size == 1) + /* verify fopen mode */ + bpf_probe_read_user(mode_buf, sizeof(mode_buf), mode); + if (mode_buf[0] == 'r' && mode_buf[1] == 0) uprobe_byname2_res = 7; return 0; } SEC("uretprobe") -int handle_uretprobe_byname2(struct pt_regs *ctx) +int BPF_URETPROBE(handle_uretprobe_byname2, void *ret) { uretprobe_byname2_res = 8; return 0; diff --git a/tools/testing/selftests/bpf/progs/test_bpf_nf.c b/tools/testing/selftests/bpf/progs/test_bpf_nf.c index 227e85e85dda..9fc603c9d673 100644 --- a/tools/testing/selftests/bpf/progs/test_bpf_nf.c +++ b/tools/testing/selftests/bpf/progs/test_bpf_nf.c @@ -34,6 +34,11 @@ __be16 dport = 0; int test_exist_lookup = -ENOENT; u32 test_exist_lookup_mark = 0; +enum nf_nat_manip_type___local { + NF_NAT_MANIP_SRC___local, + NF_NAT_MANIP_DST___local +}; + struct nf_conn; struct bpf_ct_opts___local { @@ -58,7 +63,7 @@ int bpf_ct_change_timeout(struct nf_conn *, u32) __ksym; int bpf_ct_set_status(struct nf_conn *, u32) __ksym; int bpf_ct_change_status(struct nf_conn *, u32) __ksym; int bpf_ct_set_nat_info(struct nf_conn *, union nf_inet_addr *, - int port, enum nf_nat_manip_type) __ksym; + int port, enum nf_nat_manip_type___local) __ksym; static __always_inline void nf_ct_test(struct nf_conn *(*lookup_fn)(void *, struct bpf_sock_tuple *, u32, @@ -157,10 +162,10 @@ nf_ct_test(struct nf_conn *(*lookup_fn)(void *, struct bpf_sock_tuple *, u32, /* snat */ saddr.ip = bpf_get_prandom_u32(); - bpf_ct_set_nat_info(ct, &saddr, sport, NF_NAT_MANIP_SRC); + bpf_ct_set_nat_info(ct, &saddr, sport, NF_NAT_MANIP_SRC___local); /* dnat */ daddr.ip = bpf_get_prandom_u32(); - bpf_ct_set_nat_info(ct, &daddr, dport, NF_NAT_MANIP_DST); + bpf_ct_set_nat_info(ct, &daddr, dport, NF_NAT_MANIP_DST___local); ct_ins = bpf_ct_insert_entry(ct); if (ct_ins) { diff --git a/tools/testing/selftests/bpf/progs/test_cls_redirect.c b/tools/testing/selftests/bpf/progs/test_cls_redirect.c index 2833ad722cb7..66b304982245 100644 --- a/tools/testing/selftests/bpf/progs/test_cls_redirect.c +++ b/tools/testing/selftests/bpf/progs/test_cls_redirect.c @@ -600,7 +600,7 @@ static INLINING ret_t get_next_hop(buf_t *pkt, encap_headers_t *encap, return TC_ACT_SHOT; } - /* Skip the remainig next hops (may be zero). */ + /* Skip the remaining next hops (may be zero). */ return skip_next_hops(pkt, encap->unigue.hop_count - encap->unigue.next_hop - 1); } @@ -610,8 +610,8 @@ static INLINING ret_t get_next_hop(buf_t *pkt, encap_headers_t *encap, * * fill_tuple(&t, foo, sizeof(struct iphdr), 123, 321) * - * clang will substitue a costant for sizeof, which allows the verifier - * to track it's value. Based on this, it can figure out the constant + * clang will substitute a constant for sizeof, which allows the verifier + * to track its value. Based on this, it can figure out the constant * return value, and calling code works while still being "generic" to * IPv4 and IPv6. */ diff --git a/tools/testing/selftests/bpf/progs/test_global_func1.c b/tools/testing/selftests/bpf/progs/test_global_func1.c index 7b42dad187b8..23970a20b324 100644 --- a/tools/testing/selftests/bpf/progs/test_global_func1.c +++ b/tools/testing/selftests/bpf/progs/test_global_func1.c @@ -3,10 +3,9 @@ #include <stddef.h> #include <linux/bpf.h> #include <bpf/bpf_helpers.h> +#include "bpf_misc.h" -#ifndef MAX_STACK #define MAX_STACK (512 - 3 * 32 + 8) -#endif static __attribute__ ((noinline)) int f0(int var, struct __sk_buff *skb) @@ -39,7 +38,8 @@ int f3(int val, struct __sk_buff *skb, int var) } SEC("tc") -int test_cls(struct __sk_buff *skb) +__failure __msg("combined stack size of 4 calls is 544") +int global_func1(struct __sk_buff *skb) { return f0(1, skb) + f1(skb) + f2(2, skb) + f3(3, skb, 4); } diff --git a/tools/testing/selftests/bpf/progs/test_global_func10.c b/tools/testing/selftests/bpf/progs/test_global_func10.c index 97b7031d0e22..98327bdbbfd2 100644 --- a/tools/testing/selftests/bpf/progs/test_global_func10.c +++ b/tools/testing/selftests/bpf/progs/test_global_func10.c @@ -2,6 +2,7 @@ #include <stddef.h> #include <linux/bpf.h> #include <bpf/bpf_helpers.h> +#include "bpf_misc.h" struct Small { int x; @@ -21,7 +22,8 @@ __noinline int foo(const struct Big *big) } SEC("cgroup_skb/ingress") -int test_cls(struct __sk_buff *skb) +__failure __msg("invalid indirect read from stack") +int global_func10(struct __sk_buff *skb) { const struct Small small = {.x = skb->len }; diff --git a/tools/testing/selftests/bpf/progs/test_global_func11.c b/tools/testing/selftests/bpf/progs/test_global_func11.c index ef5277d982d9..283e036dc401 100644 --- a/tools/testing/selftests/bpf/progs/test_global_func11.c +++ b/tools/testing/selftests/bpf/progs/test_global_func11.c @@ -2,6 +2,7 @@ #include <stddef.h> #include <linux/bpf.h> #include <bpf/bpf_helpers.h> +#include "bpf_misc.h" struct S { int x; @@ -13,7 +14,8 @@ __noinline int foo(const struct S *s) } SEC("cgroup_skb/ingress") -int test_cls(struct __sk_buff *skb) +__failure __msg("Caller passes invalid args into func#1") +int global_func11(struct __sk_buff *skb) { return foo((const void *)skb); } diff --git a/tools/testing/selftests/bpf/progs/test_global_func12.c b/tools/testing/selftests/bpf/progs/test_global_func12.c index 62343527cc59..7f159d83c6f6 100644 --- a/tools/testing/selftests/bpf/progs/test_global_func12.c +++ b/tools/testing/selftests/bpf/progs/test_global_func12.c @@ -2,6 +2,7 @@ #include <stddef.h> #include <linux/bpf.h> #include <bpf/bpf_helpers.h> +#include "bpf_misc.h" struct S { int x; @@ -13,7 +14,8 @@ __noinline int foo(const struct S *s) } SEC("cgroup_skb/ingress") -int test_cls(struct __sk_buff *skb) +__failure __msg("invalid mem access 'mem_or_null'") +int global_func12(struct __sk_buff *skb) { const struct S s = {.x = skb->len }; diff --git a/tools/testing/selftests/bpf/progs/test_global_func13.c b/tools/testing/selftests/bpf/progs/test_global_func13.c index ff8897c1ac22..02ea80da75b5 100644 --- a/tools/testing/selftests/bpf/progs/test_global_func13.c +++ b/tools/testing/selftests/bpf/progs/test_global_func13.c @@ -2,6 +2,7 @@ #include <stddef.h> #include <linux/bpf.h> #include <bpf/bpf_helpers.h> +#include "bpf_misc.h" struct S { int x; @@ -16,7 +17,8 @@ __noinline int foo(const struct S *s) } SEC("cgroup_skb/ingress") -int test_cls(struct __sk_buff *skb) +__failure __msg("Caller passes invalid args into func#1") +int global_func13(struct __sk_buff *skb) { const struct S *s = (const struct S *)(0xbedabeda); diff --git a/tools/testing/selftests/bpf/progs/test_global_func14.c b/tools/testing/selftests/bpf/progs/test_global_func14.c index 698c77199ebf..33b7d5efd7b2 100644 --- a/tools/testing/selftests/bpf/progs/test_global_func14.c +++ b/tools/testing/selftests/bpf/progs/test_global_func14.c @@ -2,6 +2,7 @@ #include <stddef.h> #include <linux/bpf.h> #include <bpf/bpf_helpers.h> +#include "bpf_misc.h" struct S; @@ -14,7 +15,8 @@ __noinline int foo(const struct S *s) } SEC("cgroup_skb/ingress") -int test_cls(struct __sk_buff *skb) +__failure __msg("reference type('FWD S') size cannot be determined") +int global_func14(struct __sk_buff *skb) { return foo(NULL); diff --git a/tools/testing/selftests/bpf/progs/test_global_func15.c b/tools/testing/selftests/bpf/progs/test_global_func15.c index c19c435988d5..b512d6a6c75e 100644 --- a/tools/testing/selftests/bpf/progs/test_global_func15.c +++ b/tools/testing/selftests/bpf/progs/test_global_func15.c @@ -2,6 +2,7 @@ #include <stddef.h> #include <linux/bpf.h> #include <bpf/bpf_helpers.h> +#include "bpf_misc.h" __noinline int foo(unsigned int *v) { @@ -12,7 +13,8 @@ __noinline int foo(unsigned int *v) } SEC("cgroup_skb/ingress") -int test_cls(struct __sk_buff *skb) +__failure __msg("At program exit the register R0 has value") +int global_func15(struct __sk_buff *skb) { unsigned int v = 1; diff --git a/tools/testing/selftests/bpf/progs/test_global_func16.c b/tools/testing/selftests/bpf/progs/test_global_func16.c index 0312d1e8d8c0..e7206304632e 100644 --- a/tools/testing/selftests/bpf/progs/test_global_func16.c +++ b/tools/testing/selftests/bpf/progs/test_global_func16.c @@ -2,6 +2,7 @@ #include <stddef.h> #include <linux/bpf.h> #include <bpf/bpf_helpers.h> +#include "bpf_misc.h" __noinline int foo(int (*arr)[10]) { @@ -12,7 +13,8 @@ __noinline int foo(int (*arr)[10]) } SEC("cgroup_skb/ingress") -int test_cls(struct __sk_buff *skb) +__failure __msg("invalid indirect read from stack") +int global_func16(struct __sk_buff *skb) { int array[10]; diff --git a/tools/testing/selftests/bpf/progs/test_global_func17.c b/tools/testing/selftests/bpf/progs/test_global_func17.c index 2b8b9b8ba018..a32e11c7d933 100644 --- a/tools/testing/selftests/bpf/progs/test_global_func17.c +++ b/tools/testing/selftests/bpf/progs/test_global_func17.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only #include <vmlinux.h> #include <bpf/bpf_helpers.h> +#include "bpf_misc.h" __noinline int foo(int *p) { @@ -10,7 +11,8 @@ __noinline int foo(int *p) const volatile int i; SEC("tc") -int test_cls(struct __sk_buff *skb) +__failure __msg("Caller passes invalid args into func#1") +int global_func17(struct __sk_buff *skb) { return foo((int *)&i); } diff --git a/tools/testing/selftests/bpf/progs/test_global_func2.c b/tools/testing/selftests/bpf/progs/test_global_func2.c index 2c18d82923a2..3dce97fb52a4 100644 --- a/tools/testing/selftests/bpf/progs/test_global_func2.c +++ b/tools/testing/selftests/bpf/progs/test_global_func2.c @@ -1,4 +1,45 @@ // SPDX-License-Identifier: GPL-2.0-only /* Copyright (c) 2020 Facebook */ +#include <stddef.h> +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> +#include "bpf_misc.h" + #define MAX_STACK (512 - 3 * 32) -#include "test_global_func1.c" + +static __attribute__ ((noinline)) +int f0(int var, struct __sk_buff *skb) +{ + return skb->len; +} + +__attribute__ ((noinline)) +int f1(struct __sk_buff *skb) +{ + volatile char buf[MAX_STACK] = {}; + + return f0(0, skb) + skb->len; +} + +int f3(int, struct __sk_buff *skb, int); + +__attribute__ ((noinline)) +int f2(int val, struct __sk_buff *skb) +{ + return f1(skb) + f3(val, skb, 1); +} + +__attribute__ ((noinline)) +int f3(int val, struct __sk_buff *skb, int var) +{ + volatile char buf[MAX_STACK] = {}; + + return skb->ifindex * val * var; +} + +SEC("tc") +__success +int global_func2(struct __sk_buff *skb) +{ + return f0(1, skb) + f1(skb) + f2(2, skb) + f3(3, skb, 4); +} diff --git a/tools/testing/selftests/bpf/progs/test_global_func3.c b/tools/testing/selftests/bpf/progs/test_global_func3.c index 01bf8275dfd6..142b682d3c2f 100644 --- a/tools/testing/selftests/bpf/progs/test_global_func3.c +++ b/tools/testing/selftests/bpf/progs/test_global_func3.c @@ -3,6 +3,7 @@ #include <stddef.h> #include <linux/bpf.h> #include <bpf/bpf_helpers.h> +#include "bpf_misc.h" __attribute__ ((noinline)) int f1(struct __sk_buff *skb) @@ -46,20 +47,15 @@ int f7(struct __sk_buff *skb) return f6(skb); } -#ifndef NO_FN8 __attribute__ ((noinline)) int f8(struct __sk_buff *skb) { return f7(skb); } -#endif SEC("tc") -int test_cls(struct __sk_buff *skb) +__failure __msg("the call stack of 8 frames") +int global_func3(struct __sk_buff *skb) { -#ifndef NO_FN8 return f8(skb); -#else - return f7(skb); -#endif } diff --git a/tools/testing/selftests/bpf/progs/test_global_func4.c b/tools/testing/selftests/bpf/progs/test_global_func4.c index 610f75edf276..1733d87ad3f3 100644 --- a/tools/testing/selftests/bpf/progs/test_global_func4.c +++ b/tools/testing/selftests/bpf/progs/test_global_func4.c @@ -1,4 +1,55 @@ // SPDX-License-Identifier: GPL-2.0-only /* Copyright (c) 2020 Facebook */ -#define NO_FN8 -#include "test_global_func3.c" +#include <stddef.h> +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> +#include "bpf_misc.h" + +__attribute__ ((noinline)) +int f1(struct __sk_buff *skb) +{ + return skb->len; +} + +__attribute__ ((noinline)) +int f2(int val, struct __sk_buff *skb) +{ + return f1(skb) + val; +} + +__attribute__ ((noinline)) +int f3(int val, struct __sk_buff *skb, int var) +{ + return f2(var, skb) + val; +} + +__attribute__ ((noinline)) +int f4(struct __sk_buff *skb) +{ + return f3(1, skb, 2); +} + +__attribute__ ((noinline)) +int f5(struct __sk_buff *skb) +{ + return f4(skb); +} + +__attribute__ ((noinline)) +int f6(struct __sk_buff *skb) +{ + return f5(skb); +} + +__attribute__ ((noinline)) +int f7(struct __sk_buff *skb) +{ + return f6(skb); +} + +SEC("tc") +__success +int global_func4(struct __sk_buff *skb) +{ + return f7(skb); +} diff --git a/tools/testing/selftests/bpf/progs/test_global_func5.c b/tools/testing/selftests/bpf/progs/test_global_func5.c index 9248d03e0d06..cc55aedaf82d 100644 --- a/tools/testing/selftests/bpf/progs/test_global_func5.c +++ b/tools/testing/selftests/bpf/progs/test_global_func5.c @@ -3,6 +3,7 @@ #include <stddef.h> #include <linux/bpf.h> #include <bpf/bpf_helpers.h> +#include "bpf_misc.h" __attribute__ ((noinline)) int f1(struct __sk_buff *skb) @@ -25,7 +26,8 @@ int f3(int val, struct __sk_buff *skb) } SEC("tc") -int test_cls(struct __sk_buff *skb) +__failure __msg("expected pointer to ctx, but got PTR") +int global_func5(struct __sk_buff *skb) { return f1(skb) + f2(2, skb) + f3(3, skb); } diff --git a/tools/testing/selftests/bpf/progs/test_global_func6.c b/tools/testing/selftests/bpf/progs/test_global_func6.c index af8c78bdfb25..46c38c8f2cf0 100644 --- a/tools/testing/selftests/bpf/progs/test_global_func6.c +++ b/tools/testing/selftests/bpf/progs/test_global_func6.c @@ -3,6 +3,7 @@ #include <stddef.h> #include <linux/bpf.h> #include <bpf/bpf_helpers.h> +#include "bpf_misc.h" __attribute__ ((noinline)) int f1(struct __sk_buff *skb) @@ -25,7 +26,8 @@ int f3(int val, struct __sk_buff *skb) } SEC("tc") -int test_cls(struct __sk_buff *skb) +__failure __msg("modified ctx ptr R2") +int global_func6(struct __sk_buff *skb) { return f1(skb) + f2(2, skb) + f3(3, skb); } diff --git a/tools/testing/selftests/bpf/progs/test_global_func7.c b/tools/testing/selftests/bpf/progs/test_global_func7.c index 6cb8e2f5254c..f182febfde3c 100644 --- a/tools/testing/selftests/bpf/progs/test_global_func7.c +++ b/tools/testing/selftests/bpf/progs/test_global_func7.c @@ -3,6 +3,7 @@ #include <stddef.h> #include <linux/bpf.h> #include <bpf/bpf_helpers.h> +#include "bpf_misc.h" __attribute__ ((noinline)) void foo(struct __sk_buff *skb) @@ -11,7 +12,8 @@ void foo(struct __sk_buff *skb) } SEC("tc") -int test_cls(struct __sk_buff *skb) +__failure __msg("foo() doesn't return scalar") +int global_func7(struct __sk_buff *skb) { foo(skb); return 0; diff --git a/tools/testing/selftests/bpf/progs/test_global_func8.c b/tools/testing/selftests/bpf/progs/test_global_func8.c index d55a6544b1ab..9b9c57fa2dd3 100644 --- a/tools/testing/selftests/bpf/progs/test_global_func8.c +++ b/tools/testing/selftests/bpf/progs/test_global_func8.c @@ -3,6 +3,7 @@ #include <stddef.h> #include <linux/bpf.h> #include <bpf/bpf_helpers.h> +#include "bpf_misc.h" __noinline int foo(struct __sk_buff *skb) { @@ -10,7 +11,8 @@ __noinline int foo(struct __sk_buff *skb) } SEC("cgroup_skb/ingress") -int test_cls(struct __sk_buff *skb) +__success +int global_func8(struct __sk_buff *skb) { if (!foo(skb)) return 0; diff --git a/tools/testing/selftests/bpf/progs/test_global_func9.c b/tools/testing/selftests/bpf/progs/test_global_func9.c index bd233ddede98..1f2cb0159b8d 100644 --- a/tools/testing/selftests/bpf/progs/test_global_func9.c +++ b/tools/testing/selftests/bpf/progs/test_global_func9.c @@ -2,6 +2,7 @@ #include <stddef.h> #include <linux/bpf.h> #include <bpf/bpf_helpers.h> +#include "bpf_misc.h" struct S { int x; @@ -74,7 +75,8 @@ __noinline int quuz(int **p) } SEC("cgroup_skb/ingress") -int test_cls(struct __sk_buff *skb) +__success +int global_func9(struct __sk_buff *skb) { int result = 0; diff --git a/tools/testing/selftests/bpf/progs/test_global_func_ctx_args.c b/tools/testing/selftests/bpf/progs/test_global_func_ctx_args.c new file mode 100644 index 000000000000..7faa8eef0598 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_global_func_ctx_args.c @@ -0,0 +1,104 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */ +#include "vmlinux.h" +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> +#include <bpf/bpf_core_read.h> +#include "bpf_misc.h" + +char _license[] SEC("license") = "GPL"; + +static long stack[256]; + +/* + * KPROBE contexts + */ + +__weak int kprobe_typedef_ctx_subprog(bpf_user_pt_regs_t *ctx) +{ + return bpf_get_stack(ctx, &stack, sizeof(stack), 0); +} + +SEC("?kprobe") +__success +int kprobe_typedef_ctx(void *ctx) +{ + return kprobe_typedef_ctx_subprog(ctx); +} + +#define pt_regs_struct_t typeof(*(__PT_REGS_CAST((struct pt_regs *)NULL))) + +__weak int kprobe_struct_ctx_subprog(pt_regs_struct_t *ctx) +{ + return bpf_get_stack((void *)ctx, &stack, sizeof(stack), 0); +} + +SEC("?kprobe") +__success +int kprobe_resolved_ctx(void *ctx) +{ + return kprobe_struct_ctx_subprog(ctx); +} + +/* this is current hack to make this work on old kernels */ +struct bpf_user_pt_regs_t {}; + +__weak int kprobe_workaround_ctx_subprog(struct bpf_user_pt_regs_t *ctx) +{ + return bpf_get_stack(ctx, &stack, sizeof(stack), 0); +} + +SEC("?kprobe") +__success +int kprobe_workaround_ctx(void *ctx) +{ + return kprobe_workaround_ctx_subprog(ctx); +} + +/* + * RAW_TRACEPOINT contexts + */ + +__weak int raw_tp_ctx_subprog(struct bpf_raw_tracepoint_args *ctx) +{ + return bpf_get_stack(ctx, &stack, sizeof(stack), 0); +} + +SEC("?raw_tp") +__success +int raw_tp_ctx(void *ctx) +{ + return raw_tp_ctx_subprog(ctx); +} + +/* + * RAW_TRACEPOINT_WRITABLE contexts + */ + +__weak int raw_tp_writable_ctx_subprog(struct bpf_raw_tracepoint_args *ctx) +{ + return bpf_get_stack(ctx, &stack, sizeof(stack), 0); +} + +SEC("?raw_tp") +__success +int raw_tp_writable_ctx(void *ctx) +{ + return raw_tp_writable_ctx_subprog(ctx); +} + +/* + * PERF_EVENT contexts + */ + +__weak int perf_event_ctx_subprog(struct bpf_perf_event_data *ctx) +{ + return bpf_get_stack(ctx, &stack, sizeof(stack), 0); +} + +SEC("?perf_event") +__success +int perf_event_ctx(void *ctx) +{ + return perf_event_ctx_subprog(ctx); +} diff --git a/tools/testing/selftests/bpf/progs/test_kfunc_dynptr_param.c b/tools/testing/selftests/bpf/progs/test_kfunc_dynptr_param.c index ce39d096bba3..2fbef3cc7ad8 100644 --- a/tools/testing/selftests/bpf/progs/test_kfunc_dynptr_param.c +++ b/tools/testing/selftests/bpf/progs/test_kfunc_dynptr_param.c @@ -10,6 +10,7 @@ #include <errno.h> #include <bpf/bpf_helpers.h> #include <bpf/bpf_tracing.h> +#include "bpf_misc.h" extern struct bpf_key *bpf_lookup_system_key(__u64 id) __ksym; extern void bpf_key_put(struct bpf_key *key) __ksym; @@ -19,6 +20,7 @@ extern int bpf_verify_pkcs7_signature(struct bpf_dynptr *data_ptr, struct { __uint(type, BPF_MAP_TYPE_RINGBUF); + __uint(max_entries, 4096); } ringbuf SEC(".maps"); struct { @@ -33,18 +35,7 @@ int err, pid; char _license[] SEC("license") = "GPL"; SEC("?lsm.s/bpf") -int BPF_PROG(dynptr_type_not_supp, int cmd, union bpf_attr *attr, - unsigned int size) -{ - char write_data[64] = "hello there, world!!"; - struct bpf_dynptr ptr; - - bpf_ringbuf_reserve_dynptr(&ringbuf, sizeof(write_data), 0, &ptr); - - return bpf_verify_pkcs7_signature(&ptr, &ptr, NULL); -} - -SEC("?lsm.s/bpf") +__failure __msg("cannot pass in dynptr at an offset=-8") int BPF_PROG(not_valid_dynptr, int cmd, union bpf_attr *attr, unsigned int size) { unsigned long val; @@ -54,6 +45,7 @@ int BPF_PROG(not_valid_dynptr, int cmd, union bpf_attr *attr, unsigned int size) } SEC("?lsm.s/bpf") +__failure __msg("arg#0 expected pointer to stack or dynptr_ptr") int BPF_PROG(not_ptr_to_stack, int cmd, union bpf_attr *attr, unsigned int size) { unsigned long val; diff --git a/tools/testing/selftests/bpf/progs/test_libbpf_get_fd_by_id_opts.c b/tools/testing/selftests/bpf/progs/test_libbpf_get_fd_by_id_opts.c new file mode 100644 index 000000000000..f5ac5f3e8919 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_libbpf_get_fd_by_id_opts.c @@ -0,0 +1,36 @@ +// SPDX-License-Identifier: GPL-2.0 + +/* + * Copyright (C) 2022 Huawei Technologies Duesseldorf GmbH + * + * Author: Roberto Sassu <roberto.sassu@huawei.com> + */ + +#include "vmlinux.h" +#include <errno.h> +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> + +/* From include/linux/mm.h. */ +#define FMODE_WRITE 0x2 + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, 1); + __type(key, __u32); + __type(value, __u32); +} data_input SEC(".maps"); + +char _license[] SEC("license") = "GPL"; + +SEC("lsm/bpf_map") +int BPF_PROG(check_access, struct bpf_map *map, fmode_t fmode) +{ + if (map != (struct bpf_map *)&data_input) + return 0; + + if (fmode & FMODE_WRITE) + return -EACCES; + + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/test_misc_tcp_hdr_options.c b/tools/testing/selftests/bpf/progs/test_misc_tcp_hdr_options.c index 2c121c5d66a7..d487153a839d 100644 --- a/tools/testing/selftests/bpf/progs/test_misc_tcp_hdr_options.c +++ b/tools/testing/selftests/bpf/progs/test_misc_tcp_hdr_options.c @@ -27,6 +27,7 @@ unsigned int nr_pure_ack = 0; unsigned int nr_data = 0; unsigned int nr_syn = 0; unsigned int nr_fin = 0; +unsigned int nr_hwtstamp = 0; /* Check the header received from the active side */ static int __check_active_hdr_in(struct bpf_sock_ops *skops, bool check_syn) @@ -146,6 +147,9 @@ static int check_active_hdr_in(struct bpf_sock_ops *skops) if (th->ack && !th->fin && tcp_hdrlen(th) == skops->skb_len) nr_pure_ack++; + if (skops->skb_hwtstamp) + nr_hwtstamp++; + return CG_OK; } diff --git a/tools/testing/selftests/bpf/progs/test_module_attach.c b/tools/testing/selftests/bpf/progs/test_module_attach.c index 08628afedb77..8a1b50f3a002 100644 --- a/tools/testing/selftests/bpf/progs/test_module_attach.c +++ b/tools/testing/selftests/bpf/progs/test_module_attach.c @@ -110,4 +110,10 @@ int BPF_PROG(handle_fmod_ret, return 0; /* don't override the exit code */ } +SEC("kprobe.multi/bpf_testmod_test_read") +int BPF_PROG(kprobe_multi) +{ + return 0; +} + char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_ringbuf_map_key.c b/tools/testing/selftests/bpf/progs/test_ringbuf_map_key.c new file mode 100644 index 000000000000..2760bf60d05a --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_ringbuf_map_key.c @@ -0,0 +1,70 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */ + +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> +#include "bpf_misc.h" + +char _license[] SEC("license") = "GPL"; + +struct sample { + int pid; + int seq; + long value; + char comm[16]; +}; + +struct { + __uint(type, BPF_MAP_TYPE_RINGBUF); +} ringbuf SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(max_entries, 1000); + __type(key, struct sample); + __type(value, int); +} hash_map SEC(".maps"); + +/* inputs */ +int pid = 0; + +/* inner state */ +long seq = 0; + +SEC("fentry/" SYS_PREFIX "sys_getpgid") +int test_ringbuf_mem_map_key(void *ctx) +{ + int cur_pid = bpf_get_current_pid_tgid() >> 32; + struct sample *sample, sample_copy; + int *lookup_val; + + if (cur_pid != pid) + return 0; + + sample = bpf_ringbuf_reserve(&ringbuf, sizeof(*sample), 0); + if (!sample) + return 0; + + sample->pid = pid; + bpf_get_current_comm(sample->comm, sizeof(sample->comm)); + sample->seq = ++seq; + sample->value = 42; + + /* test using 'sample' (PTR_TO_MEM | MEM_ALLOC) as map key arg + */ + lookup_val = (int *)bpf_map_lookup_elem(&hash_map, sample); + + /* workaround - memcpy is necessary so that verifier doesn't + * complain with: + * verifier internal error: more than one arg with ref_obj_id R3 + * when trying to do bpf_map_update_elem(&hash_map, sample, &sample->seq, BPF_ANY); + * + * Since bpf_map_lookup_elem above uses 'sample' as key, test using + * sample field as value below + */ + __builtin_memcpy(&sample_copy, sample, sizeof(struct sample)); + bpf_map_update_elem(&hash_map, &sample_copy, &sample->seq, BPF_ANY); + + bpf_ringbuf_submit(sample, 0); + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/test_sk_assign.c b/tools/testing/selftests/bpf/progs/test_sk_assign.c index 98c6493d9b91..21b19b758c4e 100644 --- a/tools/testing/selftests/bpf/progs/test_sk_assign.c +++ b/tools/testing/selftests/bpf/progs/test_sk_assign.c @@ -16,6 +16,16 @@ #include <bpf/bpf_helpers.h> #include <bpf/bpf_endian.h> +#if defined(IPROUTE2_HAVE_LIBBPF) +/* Use a new-style map definition. */ +struct { + __uint(type, BPF_MAP_TYPE_SOCKMAP); + __type(key, int); + __type(value, __u64); + __uint(pinning, LIBBPF_PIN_BY_NAME); + __uint(max_entries, 1); +} server_map SEC(".maps"); +#else /* Pin map under /sys/fs/bpf/tc/globals/<map name> */ #define PIN_GLOBAL_NS 2 @@ -35,6 +45,7 @@ struct { .max_elem = 1, .pinning = PIN_GLOBAL_NS, }; +#endif char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_sk_assign_libbpf.c b/tools/testing/selftests/bpf/progs/test_sk_assign_libbpf.c new file mode 100644 index 000000000000..dcf46adfda04 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_sk_assign_libbpf.c @@ -0,0 +1,3 @@ +// SPDX-License-Identifier: GPL-2.0 +#define IPROUTE2_HAVE_LIBBPF +#include "test_sk_assign.c" diff --git a/tools/testing/selftests/bpf/progs/test_skeleton.c b/tools/testing/selftests/bpf/progs/test_skeleton.c index 1a4e93f6d9df..adece9f91f58 100644 --- a/tools/testing/selftests/bpf/progs/test_skeleton.c +++ b/tools/testing/selftests/bpf/progs/test_skeleton.c @@ -53,6 +53,20 @@ int out_mostly_var; char huge_arr[16 * 1024 * 1024]; +/* non-mmapable custom .data section */ + +struct my_value { int x, y, z; }; + +__hidden int zero_key SEC(".data.non_mmapable"); +static struct my_value zero_value SEC(".data.non_mmapable"); + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __type(key, int); + __type(value, struct my_value); + __uint(max_entries, 1); +} my_map SEC(".maps"); + SEC("raw_tp/sys_enter") int handler(const void *ctx) { @@ -75,6 +89,9 @@ int handler(const void *ctx) huge_arr[sizeof(huge_arr) - 1] = 123; + /* make sure zero_key and zero_value are not optimized out */ + bpf_map_update_elem(&my_map, &zero_key, &zero_value, BPF_ANY); + return 0; } diff --git a/tools/testing/selftests/bpf/progs/test_spin_lock.c b/tools/testing/selftests/bpf/progs/test_spin_lock.c index 7e88309d3229..5bd10409285b 100644 --- a/tools/testing/selftests/bpf/progs/test_spin_lock.c +++ b/tools/testing/selftests/bpf/progs/test_spin_lock.c @@ -45,8 +45,8 @@ struct { #define CREDIT_PER_NS(delta, rate) (((delta) * rate) >> 20) -SEC("tc") -int bpf_sping_lock_test(struct __sk_buff *skb) +SEC("cgroup_skb/ingress") +int bpf_spin_lock_test(struct __sk_buff *skb) { volatile int credit = 0, max_credit = 100, pkt_len = 64; struct hmap_elem zero = {}, *val; diff --git a/tools/testing/selftests/bpf/progs/test_spin_lock_fail.c b/tools/testing/selftests/bpf/progs/test_spin_lock_fail.c new file mode 100644 index 000000000000..86cd183ef6dc --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_spin_lock_fail.c @@ -0,0 +1,204 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <vmlinux.h> +#include <bpf/bpf_tracing.h> +#include <bpf/bpf_helpers.h> +#include "bpf_experimental.h" + +struct foo { + struct bpf_spin_lock lock; + int data; +}; + +struct array_map { + __uint(type, BPF_MAP_TYPE_ARRAY); + __type(key, int); + __type(value, struct foo); + __uint(max_entries, 1); +} array_map SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY_OF_MAPS); + __uint(max_entries, 1); + __type(key, int); + __type(value, int); + __array(values, struct array_map); +} map_of_maps SEC(".maps") = { + .values = { + [0] = &array_map, + }, +}; + +SEC(".data.A") struct bpf_spin_lock lockA; +SEC(".data.B") struct bpf_spin_lock lockB; + +SEC("?tc") +int lock_id_kptr_preserve(void *ctx) +{ + struct foo *f; + + f = bpf_obj_new(typeof(*f)); + if (!f) + return 0; + bpf_this_cpu_ptr(f); + return 0; +} + +SEC("?tc") +int lock_id_global_zero(void *ctx) +{ + bpf_this_cpu_ptr(&lockA); + return 0; +} + +SEC("?tc") +int lock_id_mapval_preserve(void *ctx) +{ + struct foo *f; + int key = 0; + + f = bpf_map_lookup_elem(&array_map, &key); + if (!f) + return 0; + bpf_this_cpu_ptr(f); + return 0; +} + +SEC("?tc") +int lock_id_innermapval_preserve(void *ctx) +{ + struct foo *f; + int key = 0; + void *map; + + map = bpf_map_lookup_elem(&map_of_maps, &key); + if (!map) + return 0; + f = bpf_map_lookup_elem(map, &key); + if (!f) + return 0; + bpf_this_cpu_ptr(f); + return 0; +} + +#define CHECK(test, A, B) \ + SEC("?tc") \ + int lock_id_mismatch_##test(void *ctx) \ + { \ + struct foo *f1, *f2, *v, *iv; \ + int key = 0; \ + void *map; \ + \ + map = bpf_map_lookup_elem(&map_of_maps, &key); \ + if (!map) \ + return 0; \ + iv = bpf_map_lookup_elem(map, &key); \ + if (!iv) \ + return 0; \ + v = bpf_map_lookup_elem(&array_map, &key); \ + if (!v) \ + return 0; \ + f1 = bpf_obj_new(typeof(*f1)); \ + if (!f1) \ + return 0; \ + f2 = bpf_obj_new(typeof(*f2)); \ + if (!f2) { \ + bpf_obj_drop(f1); \ + return 0; \ + } \ + bpf_spin_lock(A); \ + bpf_spin_unlock(B); \ + return 0; \ + } + +CHECK(kptr_kptr, &f1->lock, &f2->lock); +CHECK(kptr_global, &f1->lock, &lockA); +CHECK(kptr_mapval, &f1->lock, &v->lock); +CHECK(kptr_innermapval, &f1->lock, &iv->lock); + +CHECK(global_global, &lockA, &lockB); +CHECK(global_kptr, &lockA, &f1->lock); +CHECK(global_mapval, &lockA, &v->lock); +CHECK(global_innermapval, &lockA, &iv->lock); + +SEC("?tc") +int lock_id_mismatch_mapval_mapval(void *ctx) +{ + struct foo *f1, *f2; + int key = 0; + + f1 = bpf_map_lookup_elem(&array_map, &key); + if (!f1) + return 0; + f2 = bpf_map_lookup_elem(&array_map, &key); + if (!f2) + return 0; + + bpf_spin_lock(&f1->lock); + f1->data = 42; + bpf_spin_unlock(&f2->lock); + + return 0; +} + +CHECK(mapval_kptr, &v->lock, &f1->lock); +CHECK(mapval_global, &v->lock, &lockB); +CHECK(mapval_innermapval, &v->lock, &iv->lock); + +SEC("?tc") +int lock_id_mismatch_innermapval_innermapval1(void *ctx) +{ + struct foo *f1, *f2; + int key = 0; + void *map; + + map = bpf_map_lookup_elem(&map_of_maps, &key); + if (!map) + return 0; + f1 = bpf_map_lookup_elem(map, &key); + if (!f1) + return 0; + f2 = bpf_map_lookup_elem(map, &key); + if (!f2) + return 0; + + bpf_spin_lock(&f1->lock); + f1->data = 42; + bpf_spin_unlock(&f2->lock); + + return 0; +} + +SEC("?tc") +int lock_id_mismatch_innermapval_innermapval2(void *ctx) +{ + struct foo *f1, *f2; + int key = 0; + void *map; + + map = bpf_map_lookup_elem(&map_of_maps, &key); + if (!map) + return 0; + f1 = bpf_map_lookup_elem(map, &key); + if (!f1) + return 0; + map = bpf_map_lookup_elem(&map_of_maps, &key); + if (!map) + return 0; + f2 = bpf_map_lookup_elem(map, &key); + if (!f2) + return 0; + + bpf_spin_lock(&f1->lock); + f1->data = 42; + bpf_spin_unlock(&f2->lock); + + return 0; +} + +CHECK(innermapval_kptr, &iv->lock, &f1->lock); +CHECK(innermapval_global, &iv->lock, &lockA); +CHECK(innermapval_mapval, &iv->lock, &v->lock); + +#undef CHECK + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_subprogs.c b/tools/testing/selftests/bpf/progs/test_subprogs.c index f8e9256cf18d..a8d602d7c88a 100644 --- a/tools/testing/selftests/bpf/progs/test_subprogs.c +++ b/tools/testing/selftests/bpf/progs/test_subprogs.c @@ -47,7 +47,7 @@ static __noinline int sub5(int v) return sub1(v) - 1; /* compensates sub1()'s + 1 */ } -/* unfortunately verifier rejects `struct task_struct *t` as an unkown pointer +/* unfortunately verifier rejects `struct task_struct *t` as an unknown pointer * type, so we need to accept pointer as integer and then cast it inside the * function */ diff --git a/tools/testing/selftests/bpf/progs/test_tc_tunnel.c b/tools/testing/selftests/bpf/progs/test_tc_tunnel.c index a0e7762b1e5a..e6e678aa9874 100644 --- a/tools/testing/selftests/bpf/progs/test_tc_tunnel.c +++ b/tools/testing/selftests/bpf/progs/test_tc_tunnel.c @@ -38,6 +38,10 @@ static const int cfg_udp_src = 20000; #define VXLAN_FLAGS 0x8 #define VXLAN_VNI 1 +#ifndef NEXTHDR_DEST +#define NEXTHDR_DEST 60 +#endif + /* MPLS label 1000 with S bit (last label) set and ttl of 255. */ static const __u32 mpls_label = __bpf_constant_htonl(1000 << 12 | MPLS_LS_S_MASK | 0xff); @@ -363,6 +367,61 @@ static __always_inline int __encap_ipv6(struct __sk_buff *skb, __u8 encap_proto, return TC_ACT_OK; } +static int encap_ipv6_ipip6(struct __sk_buff *skb) +{ + struct iphdr iph_inner; + struct v6hdr h_outer; + struct tcphdr tcph; + struct ethhdr eth; + __u64 flags; + int olen; + + if (bpf_skb_load_bytes(skb, ETH_HLEN, &iph_inner, + sizeof(iph_inner)) < 0) + return TC_ACT_OK; + + /* filter only packets we want */ + if (bpf_skb_load_bytes(skb, ETH_HLEN + (iph_inner.ihl << 2), + &tcph, sizeof(tcph)) < 0) + return TC_ACT_OK; + + if (tcph.dest != __bpf_constant_htons(cfg_port)) + return TC_ACT_OK; + + olen = sizeof(h_outer.ip); + + flags = BPF_F_ADJ_ROOM_FIXED_GSO | BPF_F_ADJ_ROOM_ENCAP_L3_IPV6; + + /* add room between mac and network header */ + if (bpf_skb_adjust_room(skb, olen, BPF_ADJ_ROOM_MAC, flags)) + return TC_ACT_SHOT; + + /* prepare new outer network header */ + memset(&h_outer.ip, 0, sizeof(h_outer.ip)); + h_outer.ip.version = 6; + h_outer.ip.hop_limit = iph_inner.ttl; + h_outer.ip.saddr.s6_addr[1] = 0xfd; + h_outer.ip.saddr.s6_addr[15] = 1; + h_outer.ip.daddr.s6_addr[1] = 0xfd; + h_outer.ip.daddr.s6_addr[15] = 2; + h_outer.ip.payload_len = iph_inner.tot_len; + h_outer.ip.nexthdr = IPPROTO_IPIP; + + /* store new outer network header */ + if (bpf_skb_store_bytes(skb, ETH_HLEN, &h_outer, olen, + BPF_F_INVALIDATE_HASH) < 0) + return TC_ACT_SHOT; + + /* update eth->h_proto */ + if (bpf_skb_load_bytes(skb, 0, ð, sizeof(eth)) < 0) + return TC_ACT_SHOT; + eth.h_proto = bpf_htons(ETH_P_IPV6); + if (bpf_skb_store_bytes(skb, 0, ð, sizeof(eth), 0) < 0) + return TC_ACT_SHOT; + + return TC_ACT_OK; +} + static __always_inline int encap_ipv6(struct __sk_buff *skb, __u8 encap_proto, __u16 l2_proto) { @@ -461,6 +520,15 @@ int __encap_ip6tnl_none(struct __sk_buff *skb) return TC_ACT_OK; } +SEC("encap_ipip6_none") +int __encap_ipip6_none(struct __sk_buff *skb) +{ + if (skb->protocol == __bpf_constant_htons(ETH_P_IP)) + return encap_ipv6_ipip6(skb); + else + return TC_ACT_OK; +} + SEC("encap_ip6gre_none") int __encap_ip6gre_none(struct __sk_buff *skb) { @@ -528,13 +596,33 @@ int __encap_ip6vxlan_eth(struct __sk_buff *skb) static int decap_internal(struct __sk_buff *skb, int off, int len, char proto) { + __u64 flags = BPF_F_ADJ_ROOM_FIXED_GSO; + struct ipv6_opt_hdr ip6_opt_hdr; struct gre_hdr greh; struct udphdr udph; int olen = len; switch (proto) { case IPPROTO_IPIP: + flags |= BPF_F_ADJ_ROOM_DECAP_L3_IPV4; + break; case IPPROTO_IPV6: + flags |= BPF_F_ADJ_ROOM_DECAP_L3_IPV6; + break; + case NEXTHDR_DEST: + if (bpf_skb_load_bytes(skb, off + len, &ip6_opt_hdr, + sizeof(ip6_opt_hdr)) < 0) + return TC_ACT_OK; + switch (ip6_opt_hdr.nexthdr) { + case IPPROTO_IPIP: + flags |= BPF_F_ADJ_ROOM_DECAP_L3_IPV4; + break; + case IPPROTO_IPV6: + flags |= BPF_F_ADJ_ROOM_DECAP_L3_IPV6; + break; + default: + return TC_ACT_OK; + } break; case IPPROTO_GRE: olen += sizeof(struct gre_hdr); @@ -569,8 +657,7 @@ static int decap_internal(struct __sk_buff *skb, int off, int len, char proto) return TC_ACT_OK; } - if (bpf_skb_adjust_room(skb, -olen, BPF_ADJ_ROOM_MAC, - BPF_F_ADJ_ROOM_FIXED_GSO)) + if (bpf_skb_adjust_room(skb, -olen, BPF_ADJ_ROOM_MAC, flags)) return TC_ACT_SHOT; return TC_ACT_OK; diff --git a/tools/testing/selftests/bpf/progs/test_tunnel_kern.c b/tools/testing/selftests/bpf/progs/test_tunnel_kern.c index 98af55f0bcd3..508da4a23c4f 100644 --- a/tools/testing/selftests/bpf/progs/test_tunnel_kern.c +++ b/tools/testing/selftests/bpf/progs/test_tunnel_kern.c @@ -82,6 +82,27 @@ int gre_set_tunnel(struct __sk_buff *skb) } SEC("tc") +int gre_set_tunnel_no_key(struct __sk_buff *skb) +{ + int ret; + struct bpf_tunnel_key key; + + __builtin_memset(&key, 0x0, sizeof(key)); + key.remote_ipv4 = 0xac100164; /* 172.16.1.100 */ + key.tunnel_ttl = 64; + + ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key), + BPF_F_ZERO_CSUM_TX | BPF_F_SEQ_NUMBER | + BPF_F_NO_TUNNEL_KEY); + if (ret < 0) { + log_err(ret); + return TC_ACT_SHOT; + } + + return TC_ACT_OK; +} + +SEC("tc") int gre_get_tunnel(struct __sk_buff *skb) { int ret; diff --git a/tools/testing/selftests/bpf/progs/test_uprobe_autoattach.c b/tools/testing/selftests/bpf/progs/test_uprobe_autoattach.c index ab75522e2eeb..da4bf89d004c 100644 --- a/tools/testing/selftests/bpf/progs/test_uprobe_autoattach.c +++ b/tools/testing/selftests/bpf/progs/test_uprobe_autoattach.c @@ -6,18 +6,22 @@ #include <bpf/bpf_core_read.h> #include <bpf/bpf_helpers.h> #include <bpf/bpf_tracing.h> +#include "bpf_misc.h" int uprobe_byname_parm1 = 0; int uprobe_byname_ran = 0; int uretprobe_byname_rc = 0; +int uretprobe_byname_ret = 0; int uretprobe_byname_ran = 0; -size_t uprobe_byname2_parm1 = 0; +u64 uprobe_byname2_parm1 = 0; int uprobe_byname2_ran = 0; -char *uretprobe_byname2_rc = NULL; +u64 uretprobe_byname2_rc = 0; int uretprobe_byname2_ran = 0; int test_pid; +int a[8]; + /* This program cannot auto-attach, but that should not stop other * programs from attaching. */ @@ -28,44 +32,84 @@ int handle_uprobe_noautoattach(struct pt_regs *ctx) } SEC("uprobe//proc/self/exe:autoattach_trigger_func") -int handle_uprobe_byname(struct pt_regs *ctx) +int BPF_UPROBE(handle_uprobe_byname + , int arg1 + , int arg2 + , int arg3 +#if FUNC_REG_ARG_CNT > 3 + , int arg4 +#endif +#if FUNC_REG_ARG_CNT > 4 + , int arg5 +#endif +#if FUNC_REG_ARG_CNT > 5 + , int arg6 +#endif +#if FUNC_REG_ARG_CNT > 6 + , int arg7 +#endif +#if FUNC_REG_ARG_CNT > 7 + , int arg8 +#endif +) { uprobe_byname_parm1 = PT_REGS_PARM1_CORE(ctx); uprobe_byname_ran = 1; + + a[0] = arg1; + a[1] = arg2; + a[2] = arg3; +#if FUNC_REG_ARG_CNT > 3 + a[3] = arg4; +#endif +#if FUNC_REG_ARG_CNT > 4 + a[4] = arg5; +#endif +#if FUNC_REG_ARG_CNT > 5 + a[5] = arg6; +#endif +#if FUNC_REG_ARG_CNT > 6 + a[6] = arg7; +#endif +#if FUNC_REG_ARG_CNT > 7 + a[7] = arg8; +#endif return 0; } SEC("uretprobe//proc/self/exe:autoattach_trigger_func") -int handle_uretprobe_byname(struct pt_regs *ctx) +int BPF_URETPROBE(handle_uretprobe_byname, int ret) { uretprobe_byname_rc = PT_REGS_RC_CORE(ctx); + uretprobe_byname_ret = ret; uretprobe_byname_ran = 2; + return 0; } -SEC("uprobe/libc.so.6:malloc") -int handle_uprobe_byname2(struct pt_regs *ctx) +SEC("uprobe/libc.so.6:fopen") +int BPF_UPROBE(handle_uprobe_byname2, const char *pathname, const char *mode) { int pid = bpf_get_current_pid_tgid() >> 32; /* ignore irrelevant invocations */ if (test_pid != pid) return 0; - uprobe_byname2_parm1 = PT_REGS_PARM1_CORE(ctx); + uprobe_byname2_parm1 = (u64)(long)pathname; uprobe_byname2_ran = 3; return 0; } -SEC("uretprobe/libc.so.6:malloc") -int handle_uretprobe_byname2(struct pt_regs *ctx) +SEC("uretprobe/libc.so.6:fopen") +int BPF_URETPROBE(handle_uretprobe_byname2, void *ret) { int pid = bpf_get_current_pid_tgid() >> 32; /* ignore irrelevant invocations */ if (test_pid != pid) return 0; - uretprobe_byname2_rc = (char *)PT_REGS_RC_CORE(ctx); + uretprobe_byname2_rc = (u64)(long)ret; uretprobe_byname2_ran = 4; return 0; } diff --git a/tools/testing/selftests/bpf/progs/test_verify_pkcs7_sig.c b/tools/testing/selftests/bpf/progs/test_verify_pkcs7_sig.c index ce419304ff1f..7748cc23de8a 100644 --- a/tools/testing/selftests/bpf/progs/test_verify_pkcs7_sig.c +++ b/tools/testing/selftests/bpf/progs/test_verify_pkcs7_sig.c @@ -59,10 +59,14 @@ int BPF_PROG(bpf, int cmd, union bpf_attr *attr, unsigned int size) if (!data_val) return 0; - bpf_probe_read(&value, sizeof(value), &attr->value); - - bpf_copy_from_user(data_val, sizeof(struct data), - (void *)(unsigned long)value); + ret = bpf_probe_read_kernel(&value, sizeof(value), &attr->value); + if (ret) + return ret; + + ret = bpf_copy_from_user(data_val, sizeof(struct data), + (void *)(unsigned long)value); + if (ret) + return ret; if (data_val->data_len > sizeof(data_val->data)) return -EINVAL; diff --git a/tools/testing/selftests/bpf/progs/test_vmlinux.c b/tools/testing/selftests/bpf/progs/test_vmlinux.c index e9dfa0313d1b..4b8e37f7fd06 100644 --- a/tools/testing/selftests/bpf/progs/test_vmlinux.c +++ b/tools/testing/selftests/bpf/progs/test_vmlinux.c @@ -42,7 +42,7 @@ int BPF_PROG(handle__raw_tp, struct pt_regs *regs, long id) if (id != __NR_nanosleep) return 0; - ts = (void *)PT_REGS_PARM1_CORE(regs); + ts = (void *)PT_REGS_PARM1_CORE_SYSCALL(regs); if (bpf_probe_read_user(&tv_nsec, sizeof(ts->tv_nsec), &ts->tv_nsec) || tv_nsec != MY_TV_NSEC) return 0; @@ -60,7 +60,7 @@ int BPF_PROG(handle__tp_btf, struct pt_regs *regs, long id) if (id != __NR_nanosleep) return 0; - ts = (void *)PT_REGS_PARM1_CORE(regs); + ts = (void *)PT_REGS_PARM1_CORE_SYSCALL(regs); if (bpf_probe_read_user(&tv_nsec, sizeof(ts->tv_nsec), &ts->tv_nsec) || tv_nsec != MY_TV_NSEC) return 0; diff --git a/tools/testing/selftests/bpf/progs/test_xdp_adjust_tail_grow.c b/tools/testing/selftests/bpf/progs/test_xdp_adjust_tail_grow.c index 53b64c999450..297c260fc364 100644 --- a/tools/testing/selftests/bpf/progs/test_xdp_adjust_tail_grow.c +++ b/tools/testing/selftests/bpf/progs/test_xdp_adjust_tail_grow.c @@ -9,6 +9,12 @@ int _xdp_adjust_tail_grow(struct xdp_md *xdp) void *data = (void *)(long)xdp->data; int data_len = bpf_xdp_get_buff_len(xdp); int offset = 0; + /* SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) */ +#if defined(__TARGET_ARCH_s390) + int tailroom = 512; +#else + int tailroom = 320; +#endif /* Data length determine test case */ @@ -20,7 +26,7 @@ int _xdp_adjust_tail_grow(struct xdp_md *xdp) offset = 128; } else if (data_len == 128) { /* Max tail grow 3520 */ - offset = 4096 - 256 - 320 - data_len; + offset = 4096 - 256 - tailroom - data_len; } else if (data_len == 9000) { offset = 10; } else if (data_len == 9001) { diff --git a/tools/testing/selftests/bpf/progs/test_xdp_vlan.c b/tools/testing/selftests/bpf/progs/test_xdp_vlan.c index 134768f6b788..4ddcb6dfe500 100644 --- a/tools/testing/selftests/bpf/progs/test_xdp_vlan.c +++ b/tools/testing/selftests/bpf/progs/test_xdp_vlan.c @@ -98,7 +98,7 @@ bool parse_eth_frame(struct ethhdr *eth, void *data_end, struct parse_pkt *pkt) return true; } -/* Hint, VLANs are choosen to hit network-byte-order issues */ +/* Hint, VLANs are chosen to hit network-byte-order issues */ #define TESTVLAN 4011 /* 0xFAB */ // #define TO_VLAN 4000 /* 0xFA0 (hint 0xOA0 = 160) */ @@ -195,7 +195,7 @@ int xdp_prognum2(struct xdp_md *ctx) /* Moving Ethernet header, dest overlap with src, memmove handle this */ dest = data; - dest+= VLAN_HDR_SZ; + dest += VLAN_HDR_SZ; /* * Notice: Taking over vlan_hdr->h_vlan_encapsulated_proto, by * only moving two MAC addrs (12 bytes), not overwriting last 2 bytes diff --git a/tools/testing/selftests/bpf/progs/type_cast.c b/tools/testing/selftests/bpf/progs/type_cast.c new file mode 100644 index 000000000000..eb78e6f03129 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/type_cast.c @@ -0,0 +1,83 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */ +#include "vmlinux.h" +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> +#include <bpf/bpf_core_read.h> + +struct { + __uint(type, BPF_MAP_TYPE_TASK_STORAGE); + __uint(map_flags, BPF_F_NO_PREALLOC); + __type(key, int); + __type(value, long); +} enter_id SEC(".maps"); + +#define IFNAMSIZ 16 + +int ifindex, ingress_ifindex; +char name[IFNAMSIZ]; +unsigned int inum; +unsigned int meta_len, frag0_len, kskb_len, kskb2_len; + +void *bpf_cast_to_kern_ctx(void *) __ksym; +void *bpf_rdonly_cast(void *, __u32) __ksym; + +SEC("?xdp") +int md_xdp(struct xdp_md *ctx) +{ + struct xdp_buff *kctx = bpf_cast_to_kern_ctx(ctx); + struct net_device *dev; + + dev = kctx->rxq->dev; + ifindex = dev->ifindex; + inum = dev->nd_net.net->ns.inum; + __builtin_memcpy(name, dev->name, IFNAMSIZ); + ingress_ifindex = ctx->ingress_ifindex; + return XDP_PASS; +} + +SEC("?tc") +int md_skb(struct __sk_buff *skb) +{ + struct sk_buff *kskb = bpf_cast_to_kern_ctx(skb); + struct skb_shared_info *shared_info; + struct sk_buff *kskb2; + + kskb_len = kskb->len; + + /* Simulate the following kernel macro: + * #define skb_shinfo(SKB) ((struct skb_shared_info *)(skb_end_pointer(SKB))) + */ + shared_info = bpf_rdonly_cast(kskb->head + kskb->end, + bpf_core_type_id_kernel(struct skb_shared_info)); + meta_len = shared_info->meta_len; + frag0_len = shared_info->frag_list->len; + + /* kskb2 should be equal to kskb */ + kskb2 = bpf_rdonly_cast(kskb, bpf_core_type_id_kernel(struct sk_buff)); + kskb2_len = kskb2->len; + return 0; +} + +SEC("?tp_btf/sys_enter") +int BPF_PROG(untrusted_ptr, struct pt_regs *regs, long id) +{ + struct task_struct *task, *task_dup; + long *ptr; + + task = bpf_get_current_task_btf(); + task_dup = bpf_rdonly_cast(task, bpf_core_type_id_kernel(struct task_struct)); + (void)bpf_task_storage_get(&enter_id, task_dup, 0, 0); + return 0; +} + +SEC("?tracepoint/syscalls/sys_enter_nanosleep") +int kctx_u64(void *ctx) +{ + u64 *kctx = bpf_rdonly_cast(ctx, bpf_core_type_id_kernel(u64)); + + (void)kctx; + return 0; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/user_ringbuf_fail.c b/tools/testing/selftests/bpf/progs/user_ringbuf_fail.c index 82aba4529aa9..03ee946c6bf7 100644 --- a/tools/testing/selftests/bpf/progs/user_ringbuf_fail.c +++ b/tools/testing/selftests/bpf/progs/user_ringbuf_fail.c @@ -16,8 +16,16 @@ struct sample { struct { __uint(type, BPF_MAP_TYPE_USER_RINGBUF); + __uint(max_entries, 4096); } user_ringbuf SEC(".maps"); +struct { + __uint(type, BPF_MAP_TYPE_RINGBUF); + __uint(max_entries, 2); +} ringbuf SEC(".maps"); + +static int map_value; + static long bad_access1(struct bpf_dynptr *dynptr, void *context) { @@ -32,7 +40,8 @@ bad_access1(struct bpf_dynptr *dynptr, void *context) /* A callback that accesses a dynptr in a bpf_user_ringbuf_drain callback should * not be able to read before the pointer. */ -SEC("?raw_tp/sys_nanosleep") +SEC("?raw_tp") +__failure __msg("negative offset dynptr_ptr ptr") int user_ringbuf_callback_bad_access1(void *ctx) { bpf_user_ringbuf_drain(&user_ringbuf, bad_access1, NULL, 0); @@ -54,7 +63,8 @@ bad_access2(struct bpf_dynptr *dynptr, void *context) /* A callback that accesses a dynptr in a bpf_user_ringbuf_drain callback should * not be able to read past the end of the pointer. */ -SEC("?raw_tp/sys_nanosleep") +SEC("?raw_tp") +__failure __msg("dereference of modified dynptr_ptr ptr") int user_ringbuf_callback_bad_access2(void *ctx) { bpf_user_ringbuf_drain(&user_ringbuf, bad_access2, NULL, 0); @@ -73,7 +83,8 @@ write_forbidden(struct bpf_dynptr *dynptr, void *context) /* A callback that accesses a dynptr in a bpf_user_ringbuf_drain callback should * not be able to write to that pointer. */ -SEC("?raw_tp/sys_nanosleep") +SEC("?raw_tp") +__failure __msg("invalid mem access 'dynptr_ptr'") int user_ringbuf_callback_write_forbidden(void *ctx) { bpf_user_ringbuf_drain(&user_ringbuf, write_forbidden, NULL, 0); @@ -92,7 +103,8 @@ null_context_write(struct bpf_dynptr *dynptr, void *context) /* A callback that accesses a dynptr in a bpf_user_ringbuf_drain callback should * not be able to write to that pointer. */ -SEC("?raw_tp/sys_nanosleep") +SEC("?raw_tp") +__failure __msg("invalid mem access 'scalar'") int user_ringbuf_callback_null_context_write(void *ctx) { bpf_user_ringbuf_drain(&user_ringbuf, null_context_write, NULL, 0); @@ -113,7 +125,8 @@ null_context_read(struct bpf_dynptr *dynptr, void *context) /* A callback that accesses a dynptr in a bpf_user_ringbuf_drain callback should * not be able to write to that pointer. */ -SEC("?raw_tp/sys_nanosleep") +SEC("?raw_tp") +__failure __msg("invalid mem access 'scalar'") int user_ringbuf_callback_null_context_read(void *ctx) { bpf_user_ringbuf_drain(&user_ringbuf, null_context_read, NULL, 0); @@ -132,7 +145,8 @@ try_discard_dynptr(struct bpf_dynptr *dynptr, void *context) /* A callback that accesses a dynptr in a bpf_user_ringbuf_drain callback should * not be able to read past the end of the pointer. */ -SEC("?raw_tp/sys_nanosleep") +SEC("?raw_tp") +__failure __msg("cannot release unowned const bpf_dynptr") int user_ringbuf_callback_discard_dynptr(void *ctx) { bpf_user_ringbuf_drain(&user_ringbuf, try_discard_dynptr, NULL, 0); @@ -151,7 +165,8 @@ try_submit_dynptr(struct bpf_dynptr *dynptr, void *context) /* A callback that accesses a dynptr in a bpf_user_ringbuf_drain callback should * not be able to read past the end of the pointer. */ -SEC("?raw_tp/sys_nanosleep") +SEC("?raw_tp") +__failure __msg("cannot release unowned const bpf_dynptr") int user_ringbuf_callback_submit_dynptr(void *ctx) { bpf_user_ringbuf_drain(&user_ringbuf, try_submit_dynptr, NULL, 0); @@ -168,10 +183,41 @@ invalid_drain_callback_return(struct bpf_dynptr *dynptr, void *context) /* A callback that accesses a dynptr in a bpf_user_ringbuf_drain callback should * not be able to write to that pointer. */ -SEC("?raw_tp/sys_nanosleep") +SEC("?raw_tp") +__failure __msg("At callback return the register R0 has value") int user_ringbuf_callback_invalid_return(void *ctx) { bpf_user_ringbuf_drain(&user_ringbuf, invalid_drain_callback_return, NULL, 0); return 0; } + +static long +try_reinit_dynptr_mem(struct bpf_dynptr *dynptr, void *context) +{ + bpf_dynptr_from_mem(&map_value, 4, 0, dynptr); + return 0; +} + +static long +try_reinit_dynptr_ringbuf(struct bpf_dynptr *dynptr, void *context) +{ + bpf_ringbuf_reserve_dynptr(&ringbuf, 8, 0, dynptr); + return 0; +} + +SEC("?raw_tp") +__failure __msg("Dynptr has to be an uninitialized dynptr") +int user_ringbuf_callback_reinit_dynptr_mem(void *ctx) +{ + bpf_user_ringbuf_drain(&user_ringbuf, try_reinit_dynptr_mem, NULL, 0); + return 0; +} + +SEC("?raw_tp") +__failure __msg("Dynptr has to be an uninitialized dynptr") +int user_ringbuf_callback_reinit_dynptr_ringbuf(void *ctx) +{ + bpf_user_ringbuf_drain(&user_ringbuf, try_reinit_dynptr_ringbuf, NULL, 0); + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/xdp_features.c b/tools/testing/selftests/bpf/progs/xdp_features.c new file mode 100644 index 000000000000..87c247d56f72 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/xdp_features.c @@ -0,0 +1,269 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <stdbool.h> +#include <linux/bpf.h> +#include <linux/netdev.h> +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_endian.h> +#include <bpf/bpf_tracing.h> +#include <linux/if_ether.h> +#include <linux/ip.h> +#include <linux/ipv6.h> +#include <linux/in.h> +#include <linux/in6.h> +#include <linux/udp.h> +#include <asm-generic/errno-base.h> + +#include "xdp_features.h" + +#define ipv6_addr_equal(a, b) ((a).s6_addr32[0] == (b).s6_addr32[0] && \ + (a).s6_addr32[1] == (b).s6_addr32[1] && \ + (a).s6_addr32[2] == (b).s6_addr32[2] && \ + (a).s6_addr32[3] == (b).s6_addr32[3]) + +struct net_device; +struct bpf_prog; + +struct xdp_cpumap_stats { + unsigned int redirect; + unsigned int pass; + unsigned int drop; +}; + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __type(key, __u32); + __type(value, __u32); + __uint(max_entries, 1); +} stats SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __type(key, __u32); + __type(value, __u32); + __uint(max_entries, 1); +} dut_stats SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_CPUMAP); + __uint(key_size, sizeof(__u32)); + __uint(value_size, sizeof(struct bpf_cpumap_val)); + __uint(max_entries, 1); +} cpu_map SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_DEVMAP); + __uint(key_size, sizeof(__u32)); + __uint(value_size, sizeof(struct bpf_devmap_val)); + __uint(max_entries, 1); +} dev_map SEC(".maps"); + +const volatile struct in6_addr tester_addr; +const volatile struct in6_addr dut_addr; + +static __always_inline int +xdp_process_echo_packet(struct xdp_md *xdp, bool dut) +{ + void *data_end = (void *)(long)xdp->data_end; + void *data = (void *)(long)xdp->data; + struct ethhdr *eh = data; + struct tlv_hdr *tlv; + struct udphdr *uh; + __be16 port; + __u8 *cmd; + + if (eh + 1 > (struct ethhdr *)data_end) + return -EINVAL; + + if (eh->h_proto == bpf_htons(ETH_P_IP)) { + struct iphdr *ih = (struct iphdr *)(eh + 1); + __be32 saddr = dut ? tester_addr.s6_addr32[3] + : dut_addr.s6_addr32[3]; + __be32 daddr = dut ? dut_addr.s6_addr32[3] + : tester_addr.s6_addr32[3]; + + ih = (struct iphdr *)(eh + 1); + if (ih + 1 > (struct iphdr *)data_end) + return -EINVAL; + + if (saddr != ih->saddr) + return -EINVAL; + + if (daddr != ih->daddr) + return -EINVAL; + + if (ih->protocol != IPPROTO_UDP) + return -EINVAL; + + uh = (struct udphdr *)(ih + 1); + } else if (eh->h_proto == bpf_htons(ETH_P_IPV6)) { + struct in6_addr saddr = dut ? tester_addr : dut_addr; + struct in6_addr daddr = dut ? dut_addr : tester_addr; + struct ipv6hdr *ih6 = (struct ipv6hdr *)(eh + 1); + + if (ih6 + 1 > (struct ipv6hdr *)data_end) + return -EINVAL; + + if (!ipv6_addr_equal(saddr, ih6->saddr)) + return -EINVAL; + + if (!ipv6_addr_equal(daddr, ih6->daddr)) + return -EINVAL; + + if (ih6->nexthdr != IPPROTO_UDP) + return -EINVAL; + + uh = (struct udphdr *)(ih6 + 1); + } else { + return -EINVAL; + } + + if (uh + 1 > (struct udphdr *)data_end) + return -EINVAL; + + port = dut ? uh->dest : uh->source; + if (port != bpf_htons(DUT_ECHO_PORT)) + return -EINVAL; + + tlv = (struct tlv_hdr *)(uh + 1); + if (tlv + 1 > data_end) + return -EINVAL; + + return bpf_htons(tlv->type) == CMD_ECHO ? 0 : -EINVAL; +} + +static __always_inline int +xdp_update_stats(struct xdp_md *xdp, bool tx, bool dut) +{ + __u32 *val, key = 0; + + if (xdp_process_echo_packet(xdp, tx)) + return -EINVAL; + + if (dut) + val = bpf_map_lookup_elem(&dut_stats, &key); + else + val = bpf_map_lookup_elem(&stats, &key); + + if (val) + __sync_add_and_fetch(val, 1); + + return 0; +} + +/* Tester */ + +SEC("xdp") +int xdp_tester_check_tx(struct xdp_md *xdp) +{ + xdp_update_stats(xdp, true, false); + + return XDP_PASS; +} + +SEC("xdp") +int xdp_tester_check_rx(struct xdp_md *xdp) +{ + xdp_update_stats(xdp, false, false); + + return XDP_PASS; +} + +/* DUT */ + +SEC("xdp") +int xdp_do_pass(struct xdp_md *xdp) +{ + xdp_update_stats(xdp, true, true); + + return XDP_PASS; +} + +SEC("xdp") +int xdp_do_drop(struct xdp_md *xdp) +{ + if (xdp_update_stats(xdp, true, true)) + return XDP_PASS; + + return XDP_DROP; +} + +SEC("xdp") +int xdp_do_aborted(struct xdp_md *xdp) +{ + if (xdp_process_echo_packet(xdp, true)) + return XDP_PASS; + + return XDP_ABORTED; +} + +SEC("xdp") +int xdp_do_tx(struct xdp_md *xdp) +{ + void *data = (void *)(long)xdp->data; + struct ethhdr *eh = data; + __u8 tmp_mac[ETH_ALEN]; + + if (xdp_update_stats(xdp, true, true)) + return XDP_PASS; + + __builtin_memcpy(tmp_mac, eh->h_source, ETH_ALEN); + __builtin_memcpy(eh->h_source, eh->h_dest, ETH_ALEN); + __builtin_memcpy(eh->h_dest, tmp_mac, ETH_ALEN); + + return XDP_TX; +} + +SEC("xdp") +int xdp_do_redirect(struct xdp_md *xdp) +{ + if (xdp_process_echo_packet(xdp, true)) + return XDP_PASS; + + return bpf_redirect_map(&cpu_map, 0, 0); +} + +SEC("tp_btf/xdp_exception") +int BPF_PROG(xdp_exception, const struct net_device *dev, + const struct bpf_prog *xdp, __u32 act) +{ + __u32 *val, key = 0; + + val = bpf_map_lookup_elem(&dut_stats, &key); + if (val) + __sync_add_and_fetch(val, 1); + + return 0; +} + +SEC("tp_btf/xdp_cpumap_kthread") +int BPF_PROG(tp_xdp_cpumap_kthread, int map_id, unsigned int processed, + unsigned int drops, int sched, struct xdp_cpumap_stats *xdp_stats) +{ + __u32 *val, key = 0; + + val = bpf_map_lookup_elem(&dut_stats, &key); + if (val) + __sync_add_and_fetch(val, 1); + + return 0; +} + +SEC("xdp/cpumap") +int xdp_do_redirect_cpumap(struct xdp_md *xdp) +{ + void *data = (void *)(long)xdp->data; + struct ethhdr *eh = data; + __u8 tmp_mac[ETH_ALEN]; + + if (xdp_process_echo_packet(xdp, true)) + return XDP_PASS; + + __builtin_memcpy(tmp_mac, eh->h_source, ETH_ALEN); + __builtin_memcpy(eh->h_source, eh->h_dest, ETH_ALEN); + __builtin_memcpy(eh->h_dest, tmp_mac, ETH_ALEN); + + return bpf_redirect_map(&dev_map, 0, 0); +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/xdp_hw_metadata.c b/tools/testing/selftests/bpf/progs/xdp_hw_metadata.c new file mode 100644 index 000000000000..4c55b4d79d3d --- /dev/null +++ b/tools/testing/selftests/bpf/progs/xdp_hw_metadata.c @@ -0,0 +1,85 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <vmlinux.h> +#include "xdp_metadata.h" +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_endian.h> + +struct { + __uint(type, BPF_MAP_TYPE_XSKMAP); + __uint(max_entries, 256); + __type(key, __u32); + __type(value, __u32); +} xsk SEC(".maps"); + +extern int bpf_xdp_metadata_rx_timestamp(const struct xdp_md *ctx, + __u64 *timestamp) __ksym; +extern int bpf_xdp_metadata_rx_hash(const struct xdp_md *ctx, + __u32 *hash) __ksym; + +SEC("xdp") +int rx(struct xdp_md *ctx) +{ + void *data, *data_meta, *data_end; + struct ipv6hdr *ip6h = NULL; + struct ethhdr *eth = NULL; + struct udphdr *udp = NULL; + struct iphdr *iph = NULL; + struct xdp_meta *meta; + int ret; + + data = (void *)(long)ctx->data; + data_end = (void *)(long)ctx->data_end; + eth = data; + if (eth + 1 < data_end) { + if (eth->h_proto == bpf_htons(ETH_P_IP)) { + iph = (void *)(eth + 1); + if (iph + 1 < data_end && iph->protocol == IPPROTO_UDP) + udp = (void *)(iph + 1); + } + if (eth->h_proto == bpf_htons(ETH_P_IPV6)) { + ip6h = (void *)(eth + 1); + if (ip6h + 1 < data_end && ip6h->nexthdr == IPPROTO_UDP) + udp = (void *)(ip6h + 1); + } + if (udp && udp + 1 > data_end) + udp = NULL; + } + + if (!udp) + return XDP_PASS; + + if (udp->dest != bpf_htons(9091)) + return XDP_PASS; + + bpf_printk("forwarding UDP:9091 to AF_XDP"); + + ret = bpf_xdp_adjust_meta(ctx, -(int)sizeof(struct xdp_meta)); + if (ret != 0) { + bpf_printk("bpf_xdp_adjust_meta returned %d", ret); + return XDP_PASS; + } + + data = (void *)(long)ctx->data; + data_meta = (void *)(long)ctx->data_meta; + meta = data_meta; + + if (meta + 1 > data) { + bpf_printk("bpf_xdp_adjust_meta doesn't appear to work"); + return XDP_PASS; + } + + if (!bpf_xdp_metadata_rx_timestamp(ctx, &meta->rx_timestamp)) + bpf_printk("populated rx_timestamp with %llu", meta->rx_timestamp); + else + meta->rx_timestamp = 0; /* Used by AF_XDP as not avail signal */ + + if (!bpf_xdp_metadata_rx_hash(ctx, &meta->rx_hash)) + bpf_printk("populated rx_hash with %u", meta->rx_hash); + else + meta->rx_hash = 0; /* Used by AF_XDP as not avail signal */ + + return bpf_redirect_map(&xsk, ctx->rx_queue_index, XDP_PASS); +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/xdp_metadata.c b/tools/testing/selftests/bpf/progs/xdp_metadata.c new file mode 100644 index 000000000000..77678b034389 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/xdp_metadata.c @@ -0,0 +1,64 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <vmlinux.h> +#include "xdp_metadata.h" +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_endian.h> + +struct { + __uint(type, BPF_MAP_TYPE_XSKMAP); + __uint(max_entries, 4); + __type(key, __u32); + __type(value, __u32); +} xsk SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_PROG_ARRAY); + __uint(max_entries, 1); + __type(key, __u32); + __type(value, __u32); +} prog_arr SEC(".maps"); + +extern int bpf_xdp_metadata_rx_timestamp(const struct xdp_md *ctx, + __u64 *timestamp) __ksym; +extern int bpf_xdp_metadata_rx_hash(const struct xdp_md *ctx, + __u32 *hash) __ksym; + +SEC("xdp") +int rx(struct xdp_md *ctx) +{ + void *data, *data_meta; + struct xdp_meta *meta; + u64 timestamp = -1; + int ret; + + /* Reserve enough for all custom metadata. */ + + ret = bpf_xdp_adjust_meta(ctx, -(int)sizeof(struct xdp_meta)); + if (ret != 0) + return XDP_DROP; + + data = (void *)(long)ctx->data; + data_meta = (void *)(long)ctx->data_meta; + + if (data_meta + sizeof(struct xdp_meta) > data) + return XDP_DROP; + + meta = data_meta; + + /* Export metadata. */ + + /* We expect veth bpf_xdp_metadata_rx_timestamp to return 0 HW + * timestamp, so put some non-zero value into AF_XDP frame for + * the userspace. + */ + bpf_xdp_metadata_rx_timestamp(ctx, ×tamp); + if (timestamp == 0) + meta->rx_timestamp = 1; + + bpf_xdp_metadata_rx_hash(ctx, &meta->rx_hash); + + return bpf_redirect_map(&xsk, ctx->rx_queue_index, XDP_PASS); +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/xdp_metadata2.c b/tools/testing/selftests/bpf/progs/xdp_metadata2.c new file mode 100644 index 000000000000..cf69d05451c3 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/xdp_metadata2.c @@ -0,0 +1,23 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <vmlinux.h> +#include "xdp_metadata.h" +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_endian.h> + +extern int bpf_xdp_metadata_rx_hash(const struct xdp_md *ctx, + __u32 *hash) __ksym; + +int called; + +SEC("freplace/rx") +int freplace_rx(struct xdp_md *ctx) +{ + u32 hash = 0; + /* Call _any_ metadata function to make sure we don't crash. */ + bpf_xdp_metadata_rx_hash(ctx, &hash); + called++; + return XDP_PASS; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/xdp_synproxy_kern.c b/tools/testing/selftests/bpf/progs/xdp_synproxy_kern.c index 736686e903f6..07d786329105 100644 --- a/tools/testing/selftests/bpf/progs/xdp_synproxy_kern.c +++ b/tools/testing/selftests/bpf/progs/xdp_synproxy_kern.c @@ -310,7 +310,7 @@ static __always_inline void values_get_tcpipopts(__u16 *mss, __u8 *wscale, static __always_inline void values_inc_synacks(void) { __u32 key = 1; - __u32 *value; + __u64 *value; value = bpf_map_lookup_elem(&values, &key); if (value) diff --git a/tools/testing/selftests/bpf/progs/xfrm_info.c b/tools/testing/selftests/bpf/progs/xfrm_info.c new file mode 100644 index 000000000000..f6a501fbba2b --- /dev/null +++ b/tools/testing/selftests/bpf/progs/xfrm_info.c @@ -0,0 +1,40 @@ +// SPDX-License-Identifier: GPL-2.0 +#include "vmlinux.h" +#include "bpf_tracing_net.h" +#include <bpf/bpf_helpers.h> + +struct bpf_xfrm_info___local { + u32 if_id; + int link; +} __attribute__((preserve_access_index)); + +__u32 req_if_id; +__u32 resp_if_id; + +int bpf_skb_set_xfrm_info(struct __sk_buff *skb_ctx, + const struct bpf_xfrm_info___local *from) __ksym; +int bpf_skb_get_xfrm_info(struct __sk_buff *skb_ctx, + struct bpf_xfrm_info___local *to) __ksym; + +SEC("tc") +int set_xfrm_info(struct __sk_buff *skb) +{ + struct bpf_xfrm_info___local info = { .if_id = req_if_id }; + + return bpf_skb_set_xfrm_info(skb, &info) ? TC_ACT_SHOT : TC_ACT_UNSPEC; +} + +SEC("tc") +int get_xfrm_info(struct __sk_buff *skb) +{ + struct bpf_xfrm_info___local info = {}; + + if (bpf_skb_get_xfrm_info(skb, &info) < 0) + return TC_ACT_SHOT; + + resp_if_id = info.if_id; + + return TC_ACT_UNSPEC; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/xsk_xdp_progs.c b/tools/testing/selftests/bpf/progs/xsk_xdp_progs.c new file mode 100644 index 000000000000..744a01d0e57d --- /dev/null +++ b/tools/testing/selftests/bpf/progs/xsk_xdp_progs.c @@ -0,0 +1,30 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2022 Intel */ + +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> + +struct { + __uint(type, BPF_MAP_TYPE_XSKMAP); + __uint(max_entries, 1); + __uint(key_size, sizeof(int)); + __uint(value_size, sizeof(int)); +} xsk SEC(".maps"); + +static unsigned int idx; + +SEC("xdp") int xsk_def_prog(struct xdp_md *xdp) +{ + return bpf_redirect_map(&xsk, 0, XDP_DROP); +} + +SEC("xdp") int xsk_xdp_drop(struct xdp_md *xdp) +{ + /* Drop every other packet */ + if (idx++ % 2) + return XDP_DROP; + + return bpf_redirect_map(&xsk, 0, XDP_DROP); +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/task_local_storage_helpers.h b/tools/testing/selftests/bpf/task_local_storage_helpers.h index 711d5abb7d51..281f86132766 100644 --- a/tools/testing/selftests/bpf/task_local_storage_helpers.h +++ b/tools/testing/selftests/bpf/task_local_storage_helpers.h @@ -7,8 +7,12 @@ #include <sys/types.h> #ifndef __NR_pidfd_open +#ifdef __alpha__ +#define __NR_pidfd_open 544 +#else #define __NR_pidfd_open 434 #endif +#endif static inline int sys_pidfd_open(pid_t pid, unsigned int flags) { diff --git a/tools/testing/selftests/bpf/test_bpftool_metadata.sh b/tools/testing/selftests/bpf/test_bpftool_metadata.sh index 1bf81b49457a..b5520692f41b 100755 --- a/tools/testing/selftests/bpf/test_bpftool_metadata.sh +++ b/tools/testing/selftests/bpf/test_bpftool_metadata.sh @@ -4,6 +4,9 @@ # Kselftest framework requirement - SKIP code is 4. ksft_skip=4 +BPF_FILE_USED="metadata_used.bpf.o" +BPF_FILE_UNUSED="metadata_unused.bpf.o" + TESTNAME=bpftool_metadata BPF_FS=$(awk '$3 == "bpf" {print $2; exit}' /proc/mounts) BPF_DIR=$BPF_FS/test_$TESTNAME @@ -55,7 +58,7 @@ mkdir $BPF_DIR trap cleanup EXIT -bpftool prog load metadata_unused.o $BPF_DIR/unused +bpftool prog load $BPF_FILE_UNUSED $BPF_DIR/unused METADATA_PLAIN="$(bpftool prog)" echo "$METADATA_PLAIN" | grep 'a = "foo"' > /dev/null @@ -67,7 +70,7 @@ bpftool map | grep 'metadata.rodata' > /dev/null rm $BPF_DIR/unused -bpftool prog load metadata_used.o $BPF_DIR/used +bpftool prog load $BPF_FILE_USED $BPF_DIR/used METADATA_PLAIN="$(bpftool prog)" echo "$METADATA_PLAIN" | grep 'a = "bar"' > /dev/null diff --git a/tools/testing/selftests/bpf/test_bpftool_synctypes.py b/tools/testing/selftests/bpf/test_bpftool_synctypes.py index a6410bebe603..0cfece7ff4f8 100755 --- a/tools/testing/selftests/bpf/test_bpftool_synctypes.py +++ b/tools/testing/selftests/bpf/test_bpftool_synctypes.py @@ -309,11 +309,11 @@ class MainHeaderFileExtractor(SourceFileExtractor): commands), which looks to the lists of options in other source files but has different start and end markers: - "OPTIONS := { {-j|--json} [{-p|--pretty}] | {-d|--debug} | {-l|--legacy}" + "OPTIONS := { {-j|--json} [{-p|--pretty}] | {-d|--debug}" Return a set containing all options, such as: - {'-p', '-d', '--legacy', '--pretty', '--debug', '--json', '-l', '-j'} + {'-p', '-d', '--pretty', '--debug', '--json', '-j'} """ start_marker = re.compile(f'"OPTIONS :=') pattern = re.compile('([\w-]+) ?(?:\||}[ }\]"])') @@ -336,7 +336,7 @@ class ManSubstitutionsExtractor(SourceFileExtractor): Return a set containing all options, such as: - {'-p', '-d', '--legacy', '--pretty', '--debug', '--json', '-l', '-j'} + {'-p', '-d', '--pretty', '--debug', '--json', '-j'} """ start_marker = re.compile('\|COMMON_OPTIONS\| replace:: {') pattern = re.compile('\*\*([\w/-]+)\*\*') @@ -501,6 +501,14 @@ def main(): source_map_types = set(bpf_info.get_map_type_map().values()) source_map_types.discard('unspec') + # BPF_MAP_TYPE_CGROUP_STORAGE_DEPRECATED and BPF_MAP_TYPE_CGROUP_STORAGE + # share the same enum value and source_map_types picks + # BPF_MAP_TYPE_CGROUP_STORAGE_DEPRECATED/cgroup_storage_deprecated. + # Replace 'cgroup_storage_deprecated' with 'cgroup_storage' + # so it aligns with what `bpftool map help` shows. + source_map_types.remove('cgroup_storage_deprecated') + source_map_types.add('cgroup_storage') + help_map_types = map_info.get_map_help() help_map_options = map_info.get_options() map_info.close() diff --git a/tools/testing/selftests/bpf/test_cpp.cpp b/tools/testing/selftests/bpf/test_cpp.cpp index 19ad172036da..f4936834f76f 100644 --- a/tools/testing/selftests/bpf/test_cpp.cpp +++ b/tools/testing/selftests/bpf/test_cpp.cpp @@ -1,9 +1,9 @@ /* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */ #include <iostream> -#pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Wdeprecated-declarations" +#include <unistd.h> +#include <linux/bpf.h> +#include <linux/btf.h> #include <bpf/libbpf.h> -#pragma GCC diagnostic pop #include <bpf/bpf.h> #include <bpf/btf.h> #include "test_core_extern.skel.h" @@ -91,7 +91,7 @@ static void try_skeleton_template() skel.detach(); - /* destructor will destory underlying skeleton */ + /* destructor will destroy underlying skeleton */ } int main(int argc, char *argv[]) @@ -99,6 +99,7 @@ int main(int argc, char *argv[]) struct btf_dump_opts opts = { }; struct test_core_extern *skel; struct btf *btf; + int fd; try_skeleton_template(); @@ -117,6 +118,12 @@ int main(int argc, char *argv[]) skel = test_core_extern__open_and_load(); test_core_extern__destroy(skel); + fd = bpf_enable_stats(BPF_STATS_RUN_TIME); + if (fd < 0) + std::cout << "FAILED to enable stats: " << fd << std::endl; + else + ::close(fd); + std::cout << "DONE!" << std::endl; return 0; diff --git a/tools/testing/selftests/bpf/test_flow_dissector.sh b/tools/testing/selftests/bpf/test_flow_dissector.sh index 5303ce0c977b..4b298863797a 100755 --- a/tools/testing/selftests/bpf/test_flow_dissector.sh +++ b/tools/testing/selftests/bpf/test_flow_dissector.sh @@ -2,6 +2,8 @@ # SPDX-License-Identifier: GPL-2.0 # # Load BPF flow dissector and verify it correctly dissects traffic + +BPF_FILE="bpf_flow.bpf.o" export TESTNAME=test_flow_dissector unmount=0 @@ -22,7 +24,7 @@ if [[ -z $(ip netns identify $$) ]]; then if bpftool="$(which bpftool)"; then echo "Testing global flow dissector..." - $bpftool prog loadall ./bpf_flow.o /sys/fs/bpf/flow \ + $bpftool prog loadall $BPF_FILE /sys/fs/bpf/flow \ type flow_dissector if ! unshare --net $bpftool prog attach pinned \ @@ -95,7 +97,7 @@ else fi # Attach BPF program -./flow_dissector_load -p bpf_flow.o -s _dissect +./flow_dissector_load -p $BPF_FILE -s _dissect # Setup tc qdisc add dev lo ingress diff --git a/tools/testing/selftests/bpf/test_loader.c b/tools/testing/selftests/bpf/test_loader.c new file mode 100644 index 000000000000..679efb3aa785 --- /dev/null +++ b/tools/testing/selftests/bpf/test_loader.c @@ -0,0 +1,233 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */ +#include <stdlib.h> +#include <test_progs.h> +#include <bpf/btf.h> + +#define str_has_pfx(str, pfx) \ + (strncmp(str, pfx, __builtin_constant_p(pfx) ? sizeof(pfx) - 1 : strlen(pfx)) == 0) + +#define TEST_LOADER_LOG_BUF_SZ 1048576 + +#define TEST_TAG_EXPECT_FAILURE "comment:test_expect_failure" +#define TEST_TAG_EXPECT_SUCCESS "comment:test_expect_success" +#define TEST_TAG_EXPECT_MSG_PFX "comment:test_expect_msg=" +#define TEST_TAG_LOG_LEVEL_PFX "comment:test_log_level=" + +struct test_spec { + const char *name; + bool expect_failure; + const char *expect_msg; + int log_level; +}; + +static int tester_init(struct test_loader *tester) +{ + if (!tester->log_buf) { + tester->log_buf_sz = TEST_LOADER_LOG_BUF_SZ; + tester->log_buf = malloc(tester->log_buf_sz); + if (!ASSERT_OK_PTR(tester->log_buf, "tester_log_buf")) + return -ENOMEM; + } + + return 0; +} + +void test_loader_fini(struct test_loader *tester) +{ + if (!tester) + return; + + free(tester->log_buf); +} + +static int parse_test_spec(struct test_loader *tester, + struct bpf_object *obj, + struct bpf_program *prog, + struct test_spec *spec) +{ + struct btf *btf; + int func_id, i; + + memset(spec, 0, sizeof(*spec)); + + spec->name = bpf_program__name(prog); + + btf = bpf_object__btf(obj); + if (!btf) { + ASSERT_FAIL("BPF object has no BTF"); + return -EINVAL; + } + + func_id = btf__find_by_name_kind(btf, spec->name, BTF_KIND_FUNC); + if (func_id < 0) { + ASSERT_FAIL("failed to find FUNC BTF type for '%s'", spec->name); + return -EINVAL; + } + + for (i = 1; i < btf__type_cnt(btf); i++) { + const struct btf_type *t; + const char *s; + + t = btf__type_by_id(btf, i); + if (!btf_is_decl_tag(t)) + continue; + + if (t->type != func_id || btf_decl_tag(t)->component_idx != -1) + continue; + + s = btf__str_by_offset(btf, t->name_off); + if (strcmp(s, TEST_TAG_EXPECT_FAILURE) == 0) { + spec->expect_failure = true; + } else if (strcmp(s, TEST_TAG_EXPECT_SUCCESS) == 0) { + spec->expect_failure = false; + } else if (str_has_pfx(s, TEST_TAG_EXPECT_MSG_PFX)) { + spec->expect_msg = s + sizeof(TEST_TAG_EXPECT_MSG_PFX) - 1; + } else if (str_has_pfx(s, TEST_TAG_LOG_LEVEL_PFX)) { + errno = 0; + spec->log_level = strtol(s + sizeof(TEST_TAG_LOG_LEVEL_PFX) - 1, NULL, 0); + if (errno) { + ASSERT_FAIL("failed to parse test log level from '%s'", s); + return -EINVAL; + } + } + } + + return 0; +} + +static void prepare_case(struct test_loader *tester, + struct test_spec *spec, + struct bpf_object *obj, + struct bpf_program *prog) +{ + int min_log_level = 0; + + if (env.verbosity > VERBOSE_NONE) + min_log_level = 1; + if (env.verbosity > VERBOSE_VERY) + min_log_level = 2; + + bpf_program__set_log_buf(prog, tester->log_buf, tester->log_buf_sz); + + /* Make sure we set at least minimal log level, unless test requirest + * even higher level already. Make sure to preserve independent log + * level 4 (verifier stats), though. + */ + if ((spec->log_level & 3) < min_log_level) + bpf_program__set_log_level(prog, (spec->log_level & 4) | min_log_level); + else + bpf_program__set_log_level(prog, spec->log_level); + + tester->log_buf[0] = '\0'; +} + +static void emit_verifier_log(const char *log_buf, bool force) +{ + if (!force && env.verbosity == VERBOSE_NONE) + return; + fprintf(stdout, "VERIFIER LOG:\n=============\n%s=============\n", log_buf); +} + +static void validate_case(struct test_loader *tester, + struct test_spec *spec, + struct bpf_object *obj, + struct bpf_program *prog, + int load_err) +{ + if (spec->expect_msg) { + char *match; + + match = strstr(tester->log_buf, spec->expect_msg); + if (!ASSERT_OK_PTR(match, "expect_msg")) { + /* if we are in verbose mode, we've already emitted log */ + if (env.verbosity == VERBOSE_NONE) + emit_verifier_log(tester->log_buf, true /*force*/); + fprintf(stderr, "EXPECTED MSG: '%s'\n", spec->expect_msg); + return; + } + } +} + +/* this function is forced noinline and has short generic name to look better + * in test_progs output (in case of a failure) + */ +static noinline +void run_subtest(struct test_loader *tester, + const char *skel_name, + skel_elf_bytes_fn elf_bytes_factory) +{ + LIBBPF_OPTS(bpf_object_open_opts, open_opts, .object_name = skel_name); + struct bpf_object *obj = NULL, *tobj; + struct bpf_program *prog, *tprog; + const void *obj_bytes; + size_t obj_byte_cnt; + int err; + + if (tester_init(tester) < 0) + return; /* failed to initialize tester */ + + obj_bytes = elf_bytes_factory(&obj_byte_cnt); + obj = bpf_object__open_mem(obj_bytes, obj_byte_cnt, &open_opts); + if (!ASSERT_OK_PTR(obj, "obj_open_mem")) + return; + + bpf_object__for_each_program(prog, obj) { + const char *prog_name = bpf_program__name(prog); + struct test_spec spec; + + if (!test__start_subtest(prog_name)) + continue; + + /* if we can't derive test specification, go to the next test */ + err = parse_test_spec(tester, obj, prog, &spec); + if (!ASSERT_OK(err, "parse_test_spec")) + continue; + + tobj = bpf_object__open_mem(obj_bytes, obj_byte_cnt, &open_opts); + if (!ASSERT_OK_PTR(tobj, "obj_open_mem")) /* shouldn't happen */ + continue; + + bpf_object__for_each_program(tprog, tobj) + bpf_program__set_autoload(tprog, false); + + bpf_object__for_each_program(tprog, tobj) { + /* only load specified program */ + if (strcmp(bpf_program__name(tprog), prog_name) == 0) { + bpf_program__set_autoload(tprog, true); + break; + } + } + + prepare_case(tester, &spec, tobj, tprog); + + err = bpf_object__load(tobj); + if (spec.expect_failure) { + if (!ASSERT_ERR(err, "unexpected_load_success")) { + emit_verifier_log(tester->log_buf, false /*force*/); + goto tobj_cleanup; + } + } else { + if (!ASSERT_OK(err, "unexpected_load_failure")) { + emit_verifier_log(tester->log_buf, true /*force*/); + goto tobj_cleanup; + } + } + + emit_verifier_log(tester->log_buf, false /*force*/); + validate_case(tester, &spec, tobj, tprog, err); + +tobj_cleanup: + bpf_object__close(tobj); + } + + bpf_object__close(obj); +} + +void test_loader__run_subtests(struct test_loader *tester, + const char *skel_name, + skel_elf_bytes_fn elf_bytes_factory) +{ + /* see comment in run_subtest() for why we do this function nesting */ + run_subtest(tester, skel_name, elf_bytes_factory); +} diff --git a/tools/testing/selftests/bpf/test_lwt_ip_encap.sh b/tools/testing/selftests/bpf/test_lwt_ip_encap.sh index 6c69c42b1d60..1e565f47aca9 100755 --- a/tools/testing/selftests/bpf/test_lwt_ip_encap.sh +++ b/tools/testing/selftests/bpf/test_lwt_ip_encap.sh @@ -38,6 +38,7 @@ # ping: SRC->[encap at veth2:ingress]->GRE:decap->DST # ping replies go DST->SRC directly +BPF_FILE="test_lwt_ip_encap.bpf.o" if [[ $EUID -ne 0 ]]; then echo "This script must be run as root" echo "FAIL" @@ -373,14 +374,14 @@ test_egress() # install replacement routes (LWT/eBPF), pings succeed if [ "${ENCAP}" == "IPv4" ] ; then ip -netns ${NS1} route add ${IPv4_DST} encap bpf xmit obj \ - test_lwt_ip_encap.o sec encap_gre dev veth1 ${VRF} + ${BPF_FILE} sec encap_gre dev veth1 ${VRF} ip -netns ${NS1} -6 route add ${IPv6_DST} encap bpf xmit obj \ - test_lwt_ip_encap.o sec encap_gre dev veth1 ${VRF} + ${BPF_FILE} sec encap_gre dev veth1 ${VRF} elif [ "${ENCAP}" == "IPv6" ] ; then ip -netns ${NS1} route add ${IPv4_DST} encap bpf xmit obj \ - test_lwt_ip_encap.o sec encap_gre6 dev veth1 ${VRF} + ${BPF_FILE} sec encap_gre6 dev veth1 ${VRF} ip -netns ${NS1} -6 route add ${IPv6_DST} encap bpf xmit obj \ - test_lwt_ip_encap.o sec encap_gre6 dev veth1 ${VRF} + ${BPF_FILE} sec encap_gre6 dev veth1 ${VRF} else echo " unknown encap ${ENCAP}" TEST_STATUS=1 @@ -431,14 +432,14 @@ test_ingress() # install replacement routes (LWT/eBPF), pings succeed if [ "${ENCAP}" == "IPv4" ] ; then ip -netns ${NS2} route add ${IPv4_DST} encap bpf in obj \ - test_lwt_ip_encap.o sec encap_gre dev veth2 ${VRF} + ${BPF_FILE} sec encap_gre dev veth2 ${VRF} ip -netns ${NS2} -6 route add ${IPv6_DST} encap bpf in obj \ - test_lwt_ip_encap.o sec encap_gre dev veth2 ${VRF} + ${BPF_FILE} sec encap_gre dev veth2 ${VRF} elif [ "${ENCAP}" == "IPv6" ] ; then ip -netns ${NS2} route add ${IPv4_DST} encap bpf in obj \ - test_lwt_ip_encap.o sec encap_gre6 dev veth2 ${VRF} + ${BPF_FILE} sec encap_gre6 dev veth2 ${VRF} ip -netns ${NS2} -6 route add ${IPv6_DST} encap bpf in obj \ - test_lwt_ip_encap.o sec encap_gre6 dev veth2 ${VRF} + ${BPF_FILE} sec encap_gre6 dev veth2 ${VRF} else echo "FAIL: unknown encap ${ENCAP}" TEST_STATUS=1 diff --git a/tools/testing/selftests/bpf/test_lwt_seg6local.sh b/tools/testing/selftests/bpf/test_lwt_seg6local.sh index 826f4423ce02..0efea2292d6a 100755 --- a/tools/testing/selftests/bpf/test_lwt_seg6local.sh +++ b/tools/testing/selftests/bpf/test_lwt_seg6local.sh @@ -23,6 +23,7 @@ # Kselftest framework requirement - SKIP code is 4. ksft_skip=4 +BPF_FILE="test_lwt_seg6local.bpf.o" readonly NS1="ns1-$(mktemp -u XXXXXX)" readonly NS2="ns2-$(mktemp -u XXXXXX)" readonly NS3="ns3-$(mktemp -u XXXXXX)" @@ -117,18 +118,18 @@ ip netns exec ${NS6} ip -6 addr add fb00::109/16 dev veth10 scope link ip netns exec ${NS1} ip -6 addr add fb00::1/16 dev lo ip netns exec ${NS1} ip -6 route add fb00::6 dev veth1 via fb00::21 -ip netns exec ${NS2} ip -6 route add fb00::6 encap bpf in obj test_lwt_seg6local.o sec encap_srh dev veth2 +ip netns exec ${NS2} ip -6 route add fb00::6 encap bpf in obj ${BPF_FILE} sec encap_srh dev veth2 ip netns exec ${NS2} ip -6 route add fd00::1 dev veth3 via fb00::43 scope link ip netns exec ${NS3} ip -6 route add fc42::1 dev veth5 via fb00::65 -ip netns exec ${NS3} ip -6 route add fd00::1 encap seg6local action End.BPF endpoint obj test_lwt_seg6local.o sec add_egr_x dev veth4 +ip netns exec ${NS3} ip -6 route add fd00::1 encap seg6local action End.BPF endpoint obj ${BPF_FILE} sec add_egr_x dev veth4 -ip netns exec ${NS4} ip -6 route add fd00::2 encap seg6local action End.BPF endpoint obj test_lwt_seg6local.o sec pop_egr dev veth6 +ip netns exec ${NS4} ip -6 route add fd00::2 encap seg6local action End.BPF endpoint obj ${BPF_FILE} sec pop_egr dev veth6 ip netns exec ${NS4} ip -6 addr add fc42::1 dev lo ip netns exec ${NS4} ip -6 route add fd00::3 dev veth7 via fb00::87 ip netns exec ${NS5} ip -6 route add fd00::4 table 117 dev veth9 via fb00::109 -ip netns exec ${NS5} ip -6 route add fd00::3 encap seg6local action End.BPF endpoint obj test_lwt_seg6local.o sec inspect_t dev veth8 +ip netns exec ${NS5} ip -6 route add fd00::3 encap seg6local action End.BPF endpoint obj ${BPF_FILE} sec inspect_t dev veth8 ip netns exec ${NS6} ip -6 addr add fb00::6/16 dev lo ip netns exec ${NS6} ip -6 addr add fd00::4/16 dev lo diff --git a/tools/testing/selftests/bpf/test_maps.c b/tools/testing/selftests/bpf/test_maps.c index b73152822aa2..7fc00e423e4d 100644 --- a/tools/testing/selftests/bpf/test_maps.c +++ b/tools/testing/selftests/bpf/test_maps.c @@ -1275,7 +1275,7 @@ static void test_map_in_map(void) goto out_map_in_map; } - err = bpf_obj_get_info_by_fd(fd, &info, &len); + err = bpf_map_get_info_by_fd(fd, &info, &len); if (err) { printf("Failed to get map info by fd %d: %d", fd, errno); diff --git a/tools/testing/selftests/bpf/test_offload.py b/tools/testing/selftests/bpf/test_offload.py index 7fc15e0d24a9..40cba8d368d9 100755 --- a/tools/testing/selftests/bpf/test_offload.py +++ b/tools/testing/selftests/bpf/test_offload.py @@ -769,12 +769,14 @@ skip(ret != 0, "bpftool not installed") base_progs = progs _, base_maps = bpftool("map") base_map_names = [ - 'pid_iter.rodata' # created on each bpftool invocation + 'pid_iter.rodata', # created on each bpftool invocation + 'libbpf_det_bind', # created on each bpftool invocation ] # Check netdevsim -ret, out = cmd("modprobe netdevsim", fail=False) -skip(ret != 0, "netdevsim module could not be loaded") +if not os.path.isdir("/sys/bus/netdevsim/"): + ret, out = cmd("modprobe netdevsim", fail=False) + skip(ret != 0, "netdevsim module could not be loaded") # Check debugfs _, out = cmd("mount") @@ -1037,7 +1039,7 @@ try: offload = bpf_pinned("/sys/fs/bpf/offload") ret, _, err = sim.set_xdp(offload, "drv", fail=False, include_stderr=True) fail(ret == 0, "attached offloaded XDP program to drv") - check_extack(err, "Using device-bound program without HW_MODE flag is not supported.", args) + check_extack(err, "Using offloaded program without HW_MODE flag is not supported.", args) rm("/sys/fs/bpf/offload") sim.wait_for_flush() @@ -1086,12 +1088,12 @@ try: ret, _, err = sim.set_xdp(pinned, "offload", fail=False, include_stderr=True) fail(ret == 0, "Pinned program loaded for a different device accepted") - check_extack_nsim(err, "program bound to different dev.", args) + check_extack(err, "Program bound to different device.", args) simdev2.remove() ret, _, err = sim.set_xdp(pinned, "offload", fail=False, include_stderr=True) fail(ret == 0, "Pinned program loaded for a removed device accepted") - check_extack_nsim(err, "xdpoffload of non-bound program.", args) + check_extack(err, "Program bound to different device.", args) rm(pin_file) bpftool_prog_list_wait(expected=0) @@ -1332,12 +1334,12 @@ try: ret, _, err = simA.set_xdp(progB, "offload", force=True, JSON=False, fail=False, include_stderr=True) fail(ret == 0, "cross-ASIC program allowed") - check_extack_nsim(err, "program bound to different dev.", args) + check_extack(err, "Program bound to different device.", args) for d in simdevB.nsims: ret, _, err = d.set_xdp(progA, "offload", force=True, JSON=False, fail=False, include_stderr=True) fail(ret == 0, "cross-ASIC program allowed") - check_extack_nsim(err, "program bound to different dev.", args) + check_extack(err, "Program bound to different device.", args) start_test("Test multi-dev ASIC cross-dev map reuse...") diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c index 3fef451d8831..6d5e3022c75f 100644 --- a/tools/testing/selftests/bpf/test_progs.c +++ b/tools/testing/selftests/bpf/test_progs.c @@ -17,6 +17,7 @@ #include <sys/select.h> #include <sys/socket.h> #include <sys/un.h> +#include <bpf/btf.h> static bool verbose(void) { @@ -222,6 +223,26 @@ static char *test_result(bool failed, bool skipped) return failed ? "FAIL" : (skipped ? "SKIP" : "OK"); } +#define TEST_NUM_WIDTH 7 + +static void print_test_result(const struct prog_test_def *test, const struct test_state *test_state) +{ + int skipped_cnt = test_state->skip_cnt; + int subtests_cnt = test_state->subtest_num; + + fprintf(env.stdout, "#%-*d %s:", TEST_NUM_WIDTH, test->test_num, test->test_name); + if (test_state->error_cnt) + fprintf(env.stdout, "FAIL"); + else if (!skipped_cnt) + fprintf(env.stdout, "OK"); + else if (skipped_cnt == subtests_cnt || !subtests_cnt) + fprintf(env.stdout, "SKIP"); + else + fprintf(env.stdout, "OK (SKIP: %d/%d)", skipped_cnt, subtests_cnt); + + fprintf(env.stdout, "\n"); +} + static void print_test_log(char *log_buf, size_t log_cnt) { log_buf[log_cnt] = '\0'; @@ -230,18 +251,6 @@ static void print_test_log(char *log_buf, size_t log_cnt) fprintf(env.stdout, "\n"); } -#define TEST_NUM_WIDTH 7 - -static void print_test_name(int test_num, const char *test_name, char *result) -{ - fprintf(env.stdout, "#%-*d %s", TEST_NUM_WIDTH, test_num, test_name); - - if (result) - fprintf(env.stdout, ":%s", result); - - fprintf(env.stdout, "\n"); -} - static void print_subtest_name(int test_num, int subtest_num, const char *test_name, char *subtest_name, char *result) @@ -307,8 +316,7 @@ static void dump_test_log(const struct prog_test_def *test, subtest_state->skipped)); } - print_test_name(test->test_num, test->test_name, - test_result(test_failed, test_state->skip_cnt)); + print_test_result(test, test_state); } static void stdio_restore(void); @@ -960,6 +968,43 @@ int write_sysctl(const char *sysctl, const char *value) return 0; } +int get_bpf_max_tramp_links_from(struct btf *btf) +{ + const struct btf_enum *e; + const struct btf_type *t; + __u32 i, type_cnt; + const char *name; + __u16 j, vlen; + + for (i = 1, type_cnt = btf__type_cnt(btf); i < type_cnt; i++) { + t = btf__type_by_id(btf, i); + if (!t || !btf_is_enum(t) || t->name_off) + continue; + e = btf_enum(t); + for (j = 0, vlen = btf_vlen(t); j < vlen; j++, e++) { + name = btf__str_by_offset(btf, e->name_off); + if (name && !strcmp(name, "BPF_MAX_TRAMP_LINKS")) + return e->val; + } + } + + return -1; +} + +int get_bpf_max_tramp_links(void) +{ + struct btf *vmlinux_btf; + int ret; + + vmlinux_btf = btf__load_vmlinux_btf(); + if (!ASSERT_OK_PTR(vmlinux_btf, "vmlinux btf")) + return -1; + ret = get_bpf_max_tramp_links_from(vmlinux_btf); + btf__free(vmlinux_btf); + + return ret; +} + #define MAX_BACKTRACE_SZ 128 void crash_handler(int signum) { @@ -968,12 +1013,12 @@ void crash_handler(int signum) sz = backtrace(bt, ARRAY_SIZE(bt)); + if (env.stdout) + stdio_restore(); if (env.test) { env.test_state->error_cnt++; dump_test_log(env.test, env.test_state, true, false); } - if (env.stdout) - stdio_restore(); if (env.worker_id != -1) fprintf(stderr, "[%d]: ", env.worker_id); fprintf(stderr, "Caught signal #%d!\nStack trace:\n", signum); @@ -1070,8 +1115,7 @@ static void run_one_test(int test_num) state->tested = true; if (verbose() && env.worker_id == -1) - print_test_name(test_num + 1, test->test_name, - test_result(state->error_cnt, state->skip_cnt)); + print_test_result(test, state); reset_affinity(); restore_netns(); diff --git a/tools/testing/selftests/bpf/test_progs.h b/tools/testing/selftests/bpf/test_progs.h index b090996daee5..d5d51ec97ec8 100644 --- a/tools/testing/selftests/bpf/test_progs.h +++ b/tools/testing/selftests/bpf/test_progs.h @@ -1,4 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __TEST_PROGS_H +#define __TEST_PROGS_H + #include <stdio.h> #include <unistd.h> #include <errno.h> @@ -210,6 +213,12 @@ int test__join_cgroup(const char *path); #define CHECK_ATTR(condition, tag, format...) \ _CHECK(condition, tag, tattr.duration, format) +#define ASSERT_FAIL(fmt, args...) ({ \ + static int duration = 0; \ + CHECK(false, "", fmt"\n", ##args); \ + false; \ +}) + #define ASSERT_TRUE(actual, name) ({ \ static int duration = 0; \ bool ___ok = (actual); \ @@ -385,6 +394,8 @@ int kern_sync_rcu(void); int trigger_module_test_read(int read_sz); int trigger_module_test_write(int write_sz); int write_sysctl(const char *sysctl, const char *value); +int get_bpf_max_tramp_links_from(struct btf *btf); +int get_bpf_max_tramp_links(void); #ifdef __x86_64__ #define SYS_NANOSLEEP_KPROBE_NAME "__x64_sys_nanosleep" @@ -397,3 +408,27 @@ int write_sysctl(const char *sysctl, const char *value); #endif #define BPF_TESTMOD_TEST_FILE "/sys/kernel/bpf_testmod" + +struct test_loader { + char *log_buf; + size_t log_buf_sz; + + struct bpf_object *obj; +}; + +typedef const void *(*skel_elf_bytes_fn)(size_t *sz); + +extern void test_loader__run_subtests(struct test_loader *tester, + const char *skel_name, + skel_elf_bytes_fn elf_bytes_factory); + +extern void test_loader_fini(struct test_loader *tester); + +#define RUN_TESTS(skel) ({ \ + struct test_loader tester = {}; \ + \ + test_loader__run_subtests(&tester, #skel, skel##__elf_bytes); \ + test_loader_fini(&tester); \ +}) + +#endif /* __TEST_PROGS_H */ diff --git a/tools/testing/selftests/bpf/test_skb_cgroup_id_user.c b/tools/testing/selftests/bpf/test_skb_cgroup_id_user.c index 3256de30f563..ed518d075d1d 100644 --- a/tools/testing/selftests/bpf/test_skb_cgroup_id_user.c +++ b/tools/testing/selftests/bpf/test_skb_cgroup_id_user.c @@ -93,7 +93,7 @@ int get_map_fd_by_prog_id(int prog_id) info.nr_map_ids = 1; info.map_ids = (__u64) (unsigned long) map_ids; - if (bpf_obj_get_info_by_fd(prog_fd, &info, &info_len)) { + if (bpf_prog_get_info_by_fd(prog_fd, &info, &info_len)) { log_err("Failed to get info by prog fd %d", prog_fd); goto err; } diff --git a/tools/testing/selftests/bpf/test_sockmap.c b/tools/testing/selftests/bpf/test_sockmap.c index e768181a1bd7..024a0faafb3b 100644 --- a/tools/testing/selftests/bpf/test_sockmap.c +++ b/tools/testing/selftests/bpf/test_sockmap.c @@ -1690,24 +1690,42 @@ static void test_txmsg_apply(int cgrp, struct sockmap_options *opt) { txmsg_pass = 1; txmsg_redir = 0; + txmsg_ingress = 0; txmsg_apply = 1; txmsg_cork = 0; test_send_one(opt, cgrp); txmsg_pass = 0; txmsg_redir = 1; + txmsg_ingress = 0; + txmsg_apply = 1; + txmsg_cork = 0; + test_send_one(opt, cgrp); + + txmsg_pass = 0; + txmsg_redir = 1; + txmsg_ingress = 1; txmsg_apply = 1; txmsg_cork = 0; test_send_one(opt, cgrp); txmsg_pass = 1; txmsg_redir = 0; + txmsg_ingress = 0; + txmsg_apply = 1024; + txmsg_cork = 0; + test_send_large(opt, cgrp); + + txmsg_pass = 0; + txmsg_redir = 1; + txmsg_ingress = 0; txmsg_apply = 1024; txmsg_cork = 0; test_send_large(opt, cgrp); txmsg_pass = 0; txmsg_redir = 1; + txmsg_ingress = 1; txmsg_apply = 1024; txmsg_cork = 0; test_send_large(opt, cgrp); diff --git a/tools/testing/selftests/bpf/test_tc_edt.sh b/tools/testing/selftests/bpf/test_tc_edt.sh index daa7d1b8d309..76f0bd17061f 100755 --- a/tools/testing/selftests/bpf/test_tc_edt.sh +++ b/tools/testing/selftests/bpf/test_tc_edt.sh @@ -5,6 +5,7 @@ # with dst port = 9000 down to 5MBps. Then it measures actual # throughput of the flow. +BPF_FILE="test_tc_edt.bpf.o" if [[ $EUID -ne 0 ]]; then echo "This script must be run as root" echo "FAIL" @@ -54,7 +55,7 @@ ip -netns ${NS_DST} route add ${IP_SRC}/32 dev veth_dst ip netns exec ${NS_SRC} tc qdisc add dev veth_src root fq ip netns exec ${NS_SRC} tc qdisc add dev veth_src clsact ip netns exec ${NS_SRC} tc filter add dev veth_src egress \ - bpf da obj test_tc_edt.o sec cls_test + bpf da obj ${BPF_FILE} sec cls_test # start the listener diff --git a/tools/testing/selftests/bpf/test_tc_tunnel.sh b/tools/testing/selftests/bpf/test_tc_tunnel.sh index 088fcad138c9..910044f08908 100755 --- a/tools/testing/selftests/bpf/test_tc_tunnel.sh +++ b/tools/testing/selftests/bpf/test_tc_tunnel.sh @@ -3,6 +3,7 @@ # # In-place tunneling +BPF_FILE="test_tc_tunnel.bpf.o" # must match the port that the bpf program filters on readonly port=8000 @@ -99,6 +100,9 @@ if [[ "$#" -eq "0" ]]; then echo "ipip" $0 ipv4 ipip none 100 + echo "ipip6" + $0 ipv4 ipip6 none 100 + echo "ip6ip6" $0 ipv6 ip6tnl none 100 @@ -196,7 +200,7 @@ verify_data # client can no longer connect ip netns exec "${ns1}" tc qdisc add dev veth1 clsact ip netns exec "${ns1}" tc filter add dev veth1 egress \ - bpf direct-action object-file ./test_tc_tunnel.o \ + bpf direct-action object-file ${BPF_FILE} \ section "encap_${tuntype}_${mac}" echo "test bpf encap without decap (expect failure)" server_listen @@ -223,6 +227,9 @@ elif [[ "$tuntype" =~ "gre" && "$mac" == "eth" ]]; then elif [[ "$tuntype" =~ "vxlan" && "$mac" == "eth" ]]; then ttype="vxlan" targs="id 1 dstport 8472 udp6zerocsumrx" +elif [[ "$tuntype" == "ipip6" ]]; then + ttype="ip6tnl" + targs="" else ttype=$tuntype targs="" @@ -232,6 +239,9 @@ fi if [[ "${tuntype}" == "sit" ]]; then link_addr1="${ns1_v4}" link_addr2="${ns2_v4}" +elif [[ "${tuntype}" == "ipip6" ]]; then + link_addr1="${ns1_v6}" + link_addr2="${ns2_v6}" else link_addr1="${addr1}" link_addr2="${addr2}" @@ -286,17 +296,11 @@ else server_listen fi -# bpf_skb_net_shrink does not take tunnel flags yet, cannot update L3. -if [[ "${tuntype}" == "sit" ]]; then - echo OK - exit 0 -fi - # serverside, use BPF for decap ip netns exec "${ns2}" ip link del dev testtun0 ip netns exec "${ns2}" tc qdisc add dev veth2 clsact ip netns exec "${ns2}" tc filter add dev veth2 ingress \ - bpf direct-action object-file ./test_tc_tunnel.o section decap + bpf direct-action object-file ${BPF_FILE} section decap echo "test bpf encap with bpf decap" client_connect verify_data diff --git a/tools/testing/selftests/bpf/test_tcp_check_syncookie_user.c b/tools/testing/selftests/bpf/test_tcp_check_syncookie_user.c index 5c8ef062f760..32df93747095 100644 --- a/tools/testing/selftests/bpf/test_tcp_check_syncookie_user.c +++ b/tools/testing/selftests/bpf/test_tcp_check_syncookie_user.c @@ -96,7 +96,7 @@ static int get_map_fd_by_prog_id(int prog_id, bool *xdp) info.nr_map_ids = 1; info.map_ids = (__u64)(unsigned long)map_ids; - if (bpf_obj_get_info_by_fd(prog_fd, &info, &info_len)) { + if (bpf_prog_get_info_by_fd(prog_fd, &info, &info_len)) { log_err("Failed to get info by prog fd %d", prog_fd); goto err; } diff --git a/tools/testing/selftests/bpf/test_tunnel.sh b/tools/testing/selftests/bpf/test_tunnel.sh index e9ebc67d73f7..06857b689c11 100755 --- a/tools/testing/selftests/bpf/test_tunnel.sh +++ b/tools/testing/selftests/bpf/test_tunnel.sh @@ -45,6 +45,7 @@ # 5) Tunnel protocol handler, ex: vxlan_rcv, decap the packet # 6) Forward the packet to the overlay tnl dev +BPF_FILE="test_tunnel_kern.bpf.o" BPF_PIN_TUNNEL_DIR="/sys/fs/bpf/tc/tunnel" PING_ARG="-c 3 -w 10 -q" ret=0 @@ -65,15 +66,20 @@ config_device() add_gre_tunnel() { + tun_key= + if [ -n "$1" ]; then + tun_key="key $1" + fi + # at_ns0 namespace ip netns exec at_ns0 \ - ip link add dev $DEV_NS type $TYPE seq key 2 \ + ip link add dev $DEV_NS type $TYPE seq $tun_key \ local 172.16.1.100 remote 172.16.1.200 ip netns exec at_ns0 ip link set dev $DEV_NS up ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24 # root namespace - ip link add dev $DEV type $TYPE key 2 external + ip link add dev $DEV type $TYPE $tun_key external ip link set dev $DEV up ip addr add dev $DEV 10.1.1.200/24 } @@ -237,7 +243,7 @@ test_gre() check $TYPE config_device - add_gre_tunnel + add_gre_tunnel 2 attach_bpf $DEV gre_set_tunnel gre_get_tunnel ping $PING_ARG 10.1.1.100 check_err $? @@ -252,6 +258,30 @@ test_gre() echo -e ${GREEN}"PASS: $TYPE"${NC} } +test_gre_no_tunnel_key() +{ + TYPE=gre + DEV_NS=gre00 + DEV=gre11 + ret=0 + + check $TYPE + config_device + add_gre_tunnel + attach_bpf $DEV gre_set_tunnel_no_key gre_get_tunnel + ping $PING_ARG 10.1.1.100 + check_err $? + ip netns exec at_ns0 ping $PING_ARG 10.1.1.200 + check_err $? + cleanup + + if [ $ret -ne 0 ]; then + echo -e ${RED}"FAIL: $TYPE"${NC} + return 1 + fi + echo -e ${GREEN}"PASS: $TYPE"${NC} +} + test_ip6gre() { TYPE=ip6gre @@ -545,7 +575,7 @@ test_xfrm_tunnel() > /sys/kernel/debug/tracing/trace setup_xfrm_tunnel mkdir -p ${BPF_PIN_TUNNEL_DIR} - bpftool prog loadall ./test_tunnel_kern.o ${BPF_PIN_TUNNEL_DIR} + bpftool prog loadall ${BPF_FILE} ${BPF_PIN_TUNNEL_DIR} tc qdisc add dev veth1 clsact tc filter add dev veth1 proto ip ingress bpf da object-pinned \ ${BPF_PIN_TUNNEL_DIR}/xfrm_get_state @@ -572,7 +602,7 @@ attach_bpf() SET=$2 GET=$3 mkdir -p ${BPF_PIN_TUNNEL_DIR} - bpftool prog loadall ./test_tunnel_kern.o ${BPF_PIN_TUNNEL_DIR}/ + bpftool prog loadall ${BPF_FILE} ${BPF_PIN_TUNNEL_DIR}/ tc qdisc add dev $DEV clsact tc filter add dev $DEV egress bpf da object-pinned ${BPF_PIN_TUNNEL_DIR}/$SET tc filter add dev $DEV ingress bpf da object-pinned ${BPF_PIN_TUNNEL_DIR}/$GET @@ -588,6 +618,7 @@ cleanup() ip link del ipip6tnl11 2> /dev/null ip link del ip6ip6tnl11 2> /dev/null ip link del gretap11 2> /dev/null + ip link del gre11 2> /dev/null ip link del ip6gre11 2> /dev/null ip link del ip6gretap11 2> /dev/null ip link del geneve11 2> /dev/null @@ -640,6 +671,10 @@ bpf_tunnel_test() test_gre errors=$(( $errors + $? )) + echo "Testing GRE tunnel (without tunnel keys)..." + test_gre_no_tunnel_key + errors=$(( $errors + $? )) + echo "Testing IP6GRE tunnel..." test_ip6gre errors=$(( $errors + $? )) diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c index b605a70d4f6b..8b9949bb833d 100644 --- a/tools/testing/selftests/bpf/test_verifier.c +++ b/tools/testing/selftests/bpf/test_verifier.c @@ -68,7 +68,6 @@ #define SKIP_INSNS() BPF_RAW_INSN(0xde, 0xa, 0xd, 0xbeef, 0xdeadbeef) #define DEFAULT_LIBBPF_LOG_LEVEL 4 -#define VERBOSE_LIBBPF_LOG_LEVEL 1 #define F_NEEDS_EFFICIENT_UNALIGNED_ACCESS (1 << 0) #define F_LOAD_WITH_STRICT_ALIGNMENT (1 << 1) @@ -81,6 +80,7 @@ static bool unpriv_disabled = false; static int skips; static bool verbose = false; +static int verif_log_level = 0; struct kfunc_btf_id_pair { const char *kfunc; @@ -209,7 +209,7 @@ loop: insn[i++] = BPF_MOV64_IMM(BPF_REG_2, 1); insn[i++] = BPF_MOV64_IMM(BPF_REG_3, 2); insn[i++] = BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, - BPF_FUNC_skb_vlan_push), + BPF_FUNC_skb_vlan_push); insn[i] = BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, len - i - 3); i++; } @@ -220,7 +220,7 @@ loop: i++; insn[i++] = BPF_MOV64_REG(BPF_REG_1, BPF_REG_6); insn[i++] = BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, - BPF_FUNC_skb_vlan_pop), + BPF_FUNC_skb_vlan_pop); insn[i] = BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, len - i - 3); i++; } @@ -759,7 +759,7 @@ static int load_btf_spec(__u32 *types, int types_len, .log_buf = bpf_vlog, .log_size = sizeof(bpf_vlog), .log_level = (verbose - ? VERBOSE_LIBBPF_LOG_LEVEL + ? verif_log_level : DEFAULT_LIBBPF_LOG_LEVEL), ); @@ -1239,8 +1239,8 @@ static int get_xlated_program(int fd_prog, struct bpf_insn **buf, int *cnt) __u32 xlated_prog_len; __u32 buf_element_size = sizeof(struct bpf_insn); - if (bpf_obj_get_info_by_fd(fd_prog, &info, &info_len)) { - perror("bpf_obj_get_info_by_fd failed"); + if (bpf_prog_get_info_by_fd(fd_prog, &info, &info_len)) { + perror("bpf_prog_get_info_by_fd failed"); return -1; } @@ -1261,8 +1261,8 @@ static int get_xlated_program(int fd_prog, struct bpf_insn **buf, int *cnt) bzero(&info, sizeof(info)); info.xlated_prog_len = xlated_prog_len; info.xlated_prog_insns = (__u64)(unsigned long)*buf; - if (bpf_obj_get_info_by_fd(fd_prog, &info, &info_len)) { - perror("second bpf_obj_get_info_by_fd failed"); + if (bpf_prog_get_info_by_fd(fd_prog, &info, &info_len)) { + perror("second bpf_prog_get_info_by_fd failed"); goto out_free_buf; } @@ -1491,7 +1491,7 @@ static void do_test_single(struct bpf_test *test, bool unpriv, opts.expected_attach_type = test->expected_attach_type; if (verbose) - opts.log_level = VERBOSE_LIBBPF_LOG_LEVEL; + opts.log_level = verif_log_level | 4; /* force stats */ else if (expected_ret == VERBOSE_ACCEPT) opts.log_level = 2; else @@ -1746,6 +1746,13 @@ int main(int argc, char **argv) if (argc > 1 && strcmp(argv[1], "-v") == 0) { arg++; verbose = true; + verif_log_level = 1; + argc--; + } + if (argc > 1 && strcmp(argv[1], "-vv") == 0) { + arg++; + verbose = true; + verif_log_level = 2; argc--; } diff --git a/tools/testing/selftests/bpf/test_xdp_features.sh b/tools/testing/selftests/bpf/test_xdp_features.sh new file mode 100755 index 000000000000..0aa71c4455c0 --- /dev/null +++ b/tools/testing/selftests/bpf/test_xdp_features.sh @@ -0,0 +1,107 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +readonly NS="ns1-$(mktemp -u XXXXXX)" +readonly V0_IP4=10.10.0.11 +readonly V1_IP4=10.10.0.1 +readonly V0_IP6=2001:db8::11 +readonly V1_IP6=2001:db8::1 + +ret=1 + +setup() { + { + ip netns add ${NS} + + ip link add v1 type veth peer name v0 netns ${NS} + + ip link set v1 up + ip addr add $V1_IP4/24 dev v1 + ip addr add $V1_IP6/64 nodad dev v1 + ip -n ${NS} link set dev v0 up + ip -n ${NS} addr add $V0_IP4/24 dev v0 + ip -n ${NS} addr add $V0_IP6/64 nodad dev v0 + + # Enable XDP mode and disable checksum offload + ethtool -K v1 gro on + ethtool -K v1 tx-checksumming off + ip netns exec ${NS} ethtool -K v0 gro on + ip netns exec ${NS} ethtool -K v0 tx-checksumming off + } > /dev/null 2>&1 +} + +cleanup() { + ip link del v1 2> /dev/null + ip netns del ${NS} 2> /dev/null + [ "$(pidof xdp_features)" = "" ] || kill $(pidof xdp_features) 2> /dev/null +} + +wait_for_dut_server() { + while sleep 1; do + ss -tlp | grep -q xdp_features + [ $? -eq 0 ] && break + done +} + +test_xdp_features() { + setup + + ## XDP_PASS + ./xdp_features -f XDP_PASS -D $V1_IP6 -T $V0_IP6 v1 & + wait_for_dut_server + ip netns exec ${NS} ./xdp_features -t -f XDP_PASS \ + -D $V1_IP6 -C $V1_IP6 \ + -T $V0_IP6 v0 + [ $? -ne 0 ] && exit + + ## XDP_DROP + ./xdp_features -f XDP_DROP -D ::ffff:$V1_IP4 -T ::ffff:$V0_IP4 v1 & + wait_for_dut_server + ip netns exec ${NS} ./xdp_features -t -f XDP_DROP \ + -D ::ffff:$V1_IP4 \ + -C ::ffff:$V1_IP4 \ + -T ::ffff:$V0_IP4 v0 + [ $? -ne 0 ] && exit + + ## XDP_ABORTED + ./xdp_features -f XDP_ABORTED -D $V1_IP6 -T $V0_IP6 v1 & + wait_for_dut_server + ip netns exec ${NS} ./xdp_features -t -f XDP_ABORTED \ + -D $V1_IP6 -C $V1_IP6 \ + -T $V0_IP6 v0 + [ $? -ne 0 ] && exit + + ## XDP_TX + ./xdp_features -f XDP_TX -D ::ffff:$V1_IP4 -T ::ffff:$V0_IP4 v1 & + wait_for_dut_server + ip netns exec ${NS} ./xdp_features -t -f XDP_TX \ + -D ::ffff:$V1_IP4 \ + -C ::ffff:$V1_IP4 \ + -T ::ffff:$V0_IP4 v0 + [ $? -ne 0 ] && exit + + ## XDP_REDIRECT + ./xdp_features -f XDP_REDIRECT -D $V1_IP6 -T $V0_IP6 v1 & + wait_for_dut_server + ip netns exec ${NS} ./xdp_features -t -f XDP_REDIRECT \ + -D $V1_IP6 -C $V1_IP6 \ + -T $V0_IP6 v0 + [ $? -ne 0 ] && exit + + ## XDP_NDO_XMIT + ./xdp_features -f XDP_NDO_XMIT -D ::ffff:$V1_IP4 -T ::ffff:$V0_IP4 v1 & + wait_for_dut_server + ip netns exec ${NS} ./xdp_features -t -f XDP_NDO_XMIT \ + -D ::ffff:$V1_IP4 \ + -C ::ffff:$V1_IP4 \ + -T ::ffff:$V0_IP4 v0 + ret=$? + cleanup +} + +set -e +trap cleanup 2 3 6 9 + +test_xdp_features + +exit $ret diff --git a/tools/testing/selftests/bpf/test_xdp_meta.sh b/tools/testing/selftests/bpf/test_xdp_meta.sh index ea69370caae3..2740322c1878 100755 --- a/tools/testing/selftests/bpf/test_xdp_meta.sh +++ b/tools/testing/selftests/bpf/test_xdp_meta.sh @@ -1,5 +1,6 @@ #!/bin/sh +BPF_FILE="test_xdp_meta.bpf.o" # Kselftest framework requirement - SKIP code is 4. readonly KSFT_SKIP=4 readonly NS1="ns1-$(mktemp -u XXXXXX)" @@ -42,11 +43,11 @@ ip netns exec ${NS2} ip addr add 10.1.1.22/24 dev veth2 ip netns exec ${NS1} tc qdisc add dev veth1 clsact ip netns exec ${NS2} tc qdisc add dev veth2 clsact -ip netns exec ${NS1} tc filter add dev veth1 ingress bpf da obj test_xdp_meta.o sec t -ip netns exec ${NS2} tc filter add dev veth2 ingress bpf da obj test_xdp_meta.o sec t +ip netns exec ${NS1} tc filter add dev veth1 ingress bpf da obj ${BPF_FILE} sec t +ip netns exec ${NS2} tc filter add dev veth2 ingress bpf da obj ${BPF_FILE} sec t -ip netns exec ${NS1} ip link set dev veth1 xdp obj test_xdp_meta.o sec x -ip netns exec ${NS2} ip link set dev veth2 xdp obj test_xdp_meta.o sec x +ip netns exec ${NS1} ip link set dev veth1 xdp obj ${BPF_FILE} sec x +ip netns exec ${NS2} ip link set dev veth2 xdp obj ${BPF_FILE} sec x ip netns exec ${NS1} ip link set dev veth1 up ip netns exec ${NS2} ip link set dev veth2 up diff --git a/tools/testing/selftests/bpf/test_xdp_vlan.sh b/tools/testing/selftests/bpf/test_xdp_vlan.sh index 810c407e0286..fbcaa9f0120b 100755 --- a/tools/testing/selftests/bpf/test_xdp_vlan.sh +++ b/tools/testing/selftests/bpf/test_xdp_vlan.sh @@ -200,11 +200,11 @@ ip netns exec ${NS2} sh -c 'ping -W 1 -c 1 100.64.41.1 || echo "Success: First p # ---------------------------------------------------------------------- # In ns1: ingress use XDP to remove VLAN tags export DEVNS1=veth1 -export FILE=test_xdp_vlan.o +export BPF_FILE=test_xdp_vlan.bpf.o # First test: Remove VLAN by setting VLAN ID 0, using "xdp_vlan_change" export XDP_PROG=xdp_vlan_change -ip netns exec ${NS1} ip link set $DEVNS1 $XDP_MODE object $FILE section $XDP_PROG +ip netns exec ${NS1} ip link set $DEVNS1 $XDP_MODE object $BPF_FILE section $XDP_PROG # In ns1: egress use TC to add back VLAN tag 4011 # (del cmd) @@ -212,7 +212,7 @@ ip netns exec ${NS1} ip link set $DEVNS1 $XDP_MODE object $FILE section $XDP_PRO # ip netns exec ${NS1} tc qdisc add dev $DEVNS1 clsact ip netns exec ${NS1} tc filter add dev $DEVNS1 egress \ - prio 1 handle 1 bpf da obj $FILE sec tc_vlan_push + prio 1 handle 1 bpf da obj $BPF_FILE sec tc_vlan_push # Now the namespaces can reach each-other, test with ping: ip netns exec ${NS2} ping -i 0.2 -W 2 -c 2 $IPADDR1 @@ -226,7 +226,7 @@ ip netns exec ${NS1} ping -i 0.2 -W 2 -c 2 $IPADDR2 # export XDP_PROG=xdp_vlan_remove_outer2 ip netns exec ${NS1} ip link set $DEVNS1 $XDP_MODE off -ip netns exec ${NS1} ip link set $DEVNS1 $XDP_MODE object $FILE section $XDP_PROG +ip netns exec ${NS1} ip link set $DEVNS1 $XDP_MODE object $BPF_FILE section $XDP_PROG # Now the namespaces should still be able reach each-other, test with ping: ip netns exec ${NS2} ping -i 0.2 -W 2 -c 2 $IPADDR1 diff --git a/tools/testing/selftests/bpf/test_xsk.sh b/tools/testing/selftests/bpf/test_xsk.sh index d821fd098504..b077cf58f825 100755 --- a/tools/testing/selftests/bpf/test_xsk.sh +++ b/tools/testing/selftests/bpf/test_xsk.sh @@ -24,8 +24,6 @@ # ----------- | ---------- # | vethX | --------- | vethY | # ----------- peer ---------- -# | | | -# namespaceX | namespaceY # # AF_XDP is an address family optimized for high performance packet processing, # it is XDP’s user-space interface. @@ -39,10 +37,9 @@ # Prerequisites setup by script: # # Set up veth interfaces as per the topology shown ^^: -# * setup two veth interfaces and one namespace -# ** veth<xxxx> in root namespace -# ** veth<yyyy> in af_xdp<xxxx> namespace -# ** namespace af_xdp<xxxx> +# * setup two veth interfaces +# ** veth<xxxx> +# ** veth<yyyy> # *** xxxx and yyyy are randomly generated 4 digit numbers used to avoid # conflict with any existing interface # * tests the veth and xsk layers of the topology @@ -74,6 +71,9 @@ # Run and dump packet contents: # sudo ./test_xsk.sh -D # +# Set up veth interfaces and leave them up so xskxceiver can be launched in a debugger: +# sudo ./test_xsk.sh -d +# # Run test suite for physical device in loopback mode # sudo ./test_xsk.sh -i IFACE @@ -81,11 +81,12 @@ ETH="" -while getopts "vDi:" flag +while getopts "vDi:d" flag do case "${flag}" in v) verbose=1;; D) dump_pkts=1;; + d) debug=1;; i) ETH=${OPTARG};; esac done @@ -99,28 +100,25 @@ VETH0_POSTFIX=$(cat ${URANDOM} | tr -dc '0-9' | fold -w 256 | head -n 1 | head - VETH0=ve${VETH0_POSTFIX} VETH1_POSTFIX=$(cat ${URANDOM} | tr -dc '0-9' | fold -w 256 | head -n 1 | head --bytes 4) VETH1=ve${VETH1_POSTFIX} -NS0=root -NS1=af_xdp${VETH1_POSTFIX} MTU=1500 trap ctrl_c INT function ctrl_c() { - cleanup_exit ${VETH0} ${VETH1} ${NS1} + cleanup_exit ${VETH0} ${VETH1} exit 1 } setup_vethPairs() { if [[ $verbose -eq 1 ]]; then - echo "setting up ${VETH0}: namespace: ${NS0}" + echo "setting up ${VETH0}" fi - ip netns add ${NS1} ip link add ${VETH0} numtxqueues 4 numrxqueues 4 type veth peer name ${VETH1} numtxqueues 4 numrxqueues 4 if [ -f /proc/net/if_inet6 ]; then echo 1 > /proc/sys/net/ipv6/conf/${VETH0}/disable_ipv6 fi if [[ $verbose -eq 1 ]]; then - echo "setting up ${VETH1}: namespace: ${NS1}" + echo "setting up ${VETH1}" fi if [[ $busy_poll -eq 1 ]]; then @@ -130,18 +128,15 @@ setup_vethPairs() { echo 200000 > /sys/class/net/${VETH1}/gro_flush_timeout fi - ip link set ${VETH1} netns ${NS1} - ip netns exec ${NS1} ip link set ${VETH1} mtu ${MTU} + ip link set ${VETH1} mtu ${MTU} ip link set ${VETH0} mtu ${MTU} - ip netns exec ${NS1} ip link set ${VETH1} up - ip netns exec ${NS1} ip link set dev lo up + ip link set ${VETH1} up ip link set ${VETH0} up } if [ ! -z $ETH ]; then VETH0=${ETH} VETH1=${ETH} - NS1="" else validate_root_exec validate_veth_support ${VETH0} @@ -151,7 +146,7 @@ else retval=$? if [ $retval -ne 0 ]; then test_status $retval "${TEST_NAME}" - cleanup_exit ${VETH0} ${VETH1} ${NS1} + cleanup_exit ${VETH0} ${VETH1} exit $retval fi fi @@ -174,10 +169,15 @@ statusList=() TEST_NAME="XSK_SELFTESTS_${VETH0}_SOFTIRQ" +if [[ $debug -eq 1 ]]; then + echo "-i" ${VETH0} "-i" ${VETH1} + exit +fi + exec_xskxceiver if [ -z $ETH ]; then - cleanup_exit ${VETH0} ${VETH1} ${NS1} + cleanup_exit ${VETH0} ${VETH1} fi TEST_NAME="XSK_SELFTESTS_${VETH0}_BUSY_POLL" busy_poll=1 @@ -190,7 +190,7 @@ exec_xskxceiver ## END TESTS if [ -z $ETH ]; then - cleanup_exit ${VETH0} ${VETH1} ${NS1} + cleanup_exit ${VETH0} ${VETH1} fi failures=0 diff --git a/tools/testing/selftests/bpf/testing_helpers.c b/tools/testing/selftests/bpf/testing_helpers.c index 9695318e8132..6c44153755e6 100644 --- a/tools/testing/selftests/bpf/testing_helpers.c +++ b/tools/testing/selftests/bpf/testing_helpers.c @@ -164,7 +164,7 @@ __u32 link_info_prog_id(const struct bpf_link *link, struct bpf_link_info *info) int err; memset(info, 0, sizeof(*info)); - err = bpf_obj_get_info_by_fd(bpf_link__fd(link), info, &info_len); + err = bpf_link_get_info_by_fd(bpf_link__fd(link), info, &info_len); if (err) { printf("failed to get link info: %d\n", -errno); return 0; diff --git a/tools/testing/selftests/bpf/trace_helpers.c b/tools/testing/selftests/bpf/trace_helpers.c index 9c4be2cdb21a..09a16a77bae4 100644 --- a/tools/testing/selftests/bpf/trace_helpers.c +++ b/tools/testing/selftests/bpf/trace_helpers.c @@ -23,7 +23,7 @@ static int ksym_cmp(const void *p1, const void *p2) return ((struct ksym *)p1)->addr - ((struct ksym *)p2)->addr; } -int load_kallsyms(void) +int load_kallsyms_refresh(void) { FILE *f; char func[256], buf[256]; @@ -31,12 +31,7 @@ int load_kallsyms(void) void *addr; int i = 0; - /* - * This is called/used from multiplace places, - * load symbols just once. - */ - if (sym_cnt) - return 0; + sym_cnt = 0; f = fopen("/proc/kallsyms", "r"); if (!f) @@ -57,6 +52,17 @@ int load_kallsyms(void) return 0; } +int load_kallsyms(void) +{ + /* + * This is called/used from multiplace places, + * load symbols just once. + */ + if (sym_cnt) + return 0; + return load_kallsyms_refresh(); +} + struct ksym *ksym_search(long key) { int start = 0, end = sym_cnt; diff --git a/tools/testing/selftests/bpf/trace_helpers.h b/tools/testing/selftests/bpf/trace_helpers.h index 238a9c98cde2..53efde0e2998 100644 --- a/tools/testing/selftests/bpf/trace_helpers.h +++ b/tools/testing/selftests/bpf/trace_helpers.h @@ -10,6 +10,8 @@ struct ksym { }; int load_kallsyms(void); +int load_kallsyms_refresh(void); + struct ksym *ksym_search(long key); long ksym_get_addr(const char *name); diff --git a/tools/testing/selftests/bpf/verifier/bounds_mix_sign_unsign.c b/tools/testing/selftests/bpf/verifier/bounds_mix_sign_unsign.c index c2aa6f26738b..bf82b923c5fe 100644 --- a/tools/testing/selftests/bpf/verifier/bounds_mix_sign_unsign.c +++ b/tools/testing/selftests/bpf/verifier/bounds_mix_sign_unsign.c @@ -1,13 +1,14 @@ { "bounds checks mixing signed and unsigned, positive bounds", .insns = { + BPF_EMIT_CALL(BPF_FUNC_ktime_get_ns), + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -16), BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), BPF_LD_MAP_FD(BPF_REG_1, 0), BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 7), - BPF_ST_MEM(BPF_DW, BPF_REG_10, -16, -8), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6), BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -16), BPF_MOV64_IMM(BPF_REG_2, 2), BPF_JMP_REG(BPF_JGE, BPF_REG_2, BPF_REG_1, 3), @@ -17,20 +18,21 @@ BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, - .fixup_map_hash_8b = { 3 }, + .fixup_map_hash_8b = { 5 }, .errstr = "unbounded min value", .result = REJECT, }, { "bounds checks mixing signed and unsigned", .insns = { + BPF_EMIT_CALL(BPF_FUNC_ktime_get_ns), + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -16), BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), BPF_LD_MAP_FD(BPF_REG_1, 0), BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 7), - BPF_ST_MEM(BPF_DW, BPF_REG_10, -16, -8), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6), BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -16), BPF_MOV64_IMM(BPF_REG_2, -1), BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_2, 3), @@ -40,20 +42,21 @@ BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, - .fixup_map_hash_8b = { 3 }, + .fixup_map_hash_8b = { 5 }, .errstr = "unbounded min value", .result = REJECT, }, { "bounds checks mixing signed and unsigned, variant 2", .insns = { + BPF_EMIT_CALL(BPF_FUNC_ktime_get_ns), + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -16), BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), BPF_LD_MAP_FD(BPF_REG_1, 0), BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 9), - BPF_ST_MEM(BPF_DW, BPF_REG_10, -16, -8), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 8), BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -16), BPF_MOV64_IMM(BPF_REG_2, -1), BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_2, 5), @@ -65,20 +68,21 @@ BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, - .fixup_map_hash_8b = { 3 }, + .fixup_map_hash_8b = { 5 }, .errstr = "unbounded min value", .result = REJECT, }, { "bounds checks mixing signed and unsigned, variant 3", .insns = { + BPF_EMIT_CALL(BPF_FUNC_ktime_get_ns), + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -16), BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), BPF_LD_MAP_FD(BPF_REG_1, 0), BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 8), - BPF_ST_MEM(BPF_DW, BPF_REG_10, -16, -8), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 7), BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -16), BPF_MOV64_IMM(BPF_REG_2, -1), BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_2, 4), @@ -89,20 +93,21 @@ BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, - .fixup_map_hash_8b = { 3 }, + .fixup_map_hash_8b = { 5 }, .errstr = "unbounded min value", .result = REJECT, }, { "bounds checks mixing signed and unsigned, variant 4", .insns = { + BPF_EMIT_CALL(BPF_FUNC_ktime_get_ns), + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -16), BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), BPF_LD_MAP_FD(BPF_REG_1, 0), BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 7), - BPF_ST_MEM(BPF_DW, BPF_REG_10, -16, -8), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6), BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -16), BPF_MOV64_IMM(BPF_REG_2, 1), BPF_ALU64_REG(BPF_AND, BPF_REG_1, BPF_REG_2), @@ -112,19 +117,20 @@ BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, - .fixup_map_hash_8b = { 3 }, + .fixup_map_hash_8b = { 5 }, .result = ACCEPT, }, { "bounds checks mixing signed and unsigned, variant 5", .insns = { + BPF_EMIT_CALL(BPF_FUNC_ktime_get_ns), + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -16), BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), BPF_LD_MAP_FD(BPF_REG_1, 0), BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 9), - BPF_ST_MEM(BPF_DW, BPF_REG_10, -16, -8), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 8), BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -16), BPF_MOV64_IMM(BPF_REG_2, -1), BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_2, 5), @@ -135,17 +141,20 @@ BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, - .fixup_map_hash_8b = { 3 }, + .fixup_map_hash_8b = { 5 }, .errstr = "unbounded min value", .result = REJECT, }, { "bounds checks mixing signed and unsigned, variant 6", .insns = { + BPF_MOV64_REG(BPF_REG_9, BPF_REG_1), + BPF_EMIT_CALL(BPF_FUNC_ktime_get_ns), + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -16), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_9), BPF_MOV64_IMM(BPF_REG_2, 0), BPF_MOV64_REG(BPF_REG_3, BPF_REG_10), BPF_ALU64_IMM(BPF_ADD, BPF_REG_3, -512), - BPF_ST_MEM(BPF_DW, BPF_REG_10, -16, -8), BPF_LDX_MEM(BPF_DW, BPF_REG_4, BPF_REG_10, -16), BPF_MOV64_IMM(BPF_REG_6, -1), BPF_JMP_REG(BPF_JGT, BPF_REG_4, BPF_REG_6, 5), @@ -163,13 +172,14 @@ { "bounds checks mixing signed and unsigned, variant 7", .insns = { + BPF_EMIT_CALL(BPF_FUNC_ktime_get_ns), + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -16), BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), BPF_LD_MAP_FD(BPF_REG_1, 0), BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 7), - BPF_ST_MEM(BPF_DW, BPF_REG_10, -16, -8), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6), BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -16), BPF_MOV64_IMM(BPF_REG_2, 1024 * 1024 * 1024), BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_2, 3), @@ -179,19 +189,20 @@ BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, - .fixup_map_hash_8b = { 3 }, + .fixup_map_hash_8b = { 5 }, .result = ACCEPT, }, { "bounds checks mixing signed and unsigned, variant 8", .insns = { + BPF_EMIT_CALL(BPF_FUNC_ktime_get_ns), + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -16), BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), BPF_LD_MAP_FD(BPF_REG_1, 0), BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 9), - BPF_ST_MEM(BPF_DW, BPF_REG_10, -16, -8), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 8), BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -16), BPF_MOV64_IMM(BPF_REG_2, -1), BPF_JMP_REG(BPF_JGT, BPF_REG_2, BPF_REG_1, 2), @@ -203,20 +214,21 @@ BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, - .fixup_map_hash_8b = { 3 }, + .fixup_map_hash_8b = { 5 }, .errstr = "unbounded min value", .result = REJECT, }, { "bounds checks mixing signed and unsigned, variant 9", .insns = { + BPF_EMIT_CALL(BPF_FUNC_ktime_get_ns), + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -16), BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), BPF_LD_MAP_FD(BPF_REG_1, 0), BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 10), - BPF_ST_MEM(BPF_DW, BPF_REG_10, -16, -8), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 9), BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -16), BPF_LD_IMM64(BPF_REG_2, -9223372036854775808ULL), BPF_JMP_REG(BPF_JGT, BPF_REG_2, BPF_REG_1, 2), @@ -228,19 +240,20 @@ BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, - .fixup_map_hash_8b = { 3 }, + .fixup_map_hash_8b = { 5 }, .result = ACCEPT, }, { "bounds checks mixing signed and unsigned, variant 10", .insns = { + BPF_EMIT_CALL(BPF_FUNC_ktime_get_ns), + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -16), BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), BPF_LD_MAP_FD(BPF_REG_1, 0), BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 9), - BPF_ST_MEM(BPF_DW, BPF_REG_10, -16, -8), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 8), BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -16), BPF_MOV64_IMM(BPF_REG_2, 0), BPF_JMP_REG(BPF_JGT, BPF_REG_2, BPF_REG_1, 2), @@ -252,20 +265,21 @@ BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, - .fixup_map_hash_8b = { 3 }, + .fixup_map_hash_8b = { 5 }, .errstr = "unbounded min value", .result = REJECT, }, { "bounds checks mixing signed and unsigned, variant 11", .insns = { + BPF_EMIT_CALL(BPF_FUNC_ktime_get_ns), + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -16), BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), BPF_LD_MAP_FD(BPF_REG_1, 0), BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 9), - BPF_ST_MEM(BPF_DW, BPF_REG_10, -16, -8), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 8), BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -16), BPF_MOV64_IMM(BPF_REG_2, -1), BPF_JMP_REG(BPF_JGE, BPF_REG_2, BPF_REG_1, 2), @@ -278,20 +292,21 @@ BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, - .fixup_map_hash_8b = { 3 }, + .fixup_map_hash_8b = { 5 }, .errstr = "unbounded min value", .result = REJECT, }, { "bounds checks mixing signed and unsigned, variant 12", .insns = { + BPF_EMIT_CALL(BPF_FUNC_ktime_get_ns), + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -16), BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), BPF_LD_MAP_FD(BPF_REG_1, 0), BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 9), - BPF_ST_MEM(BPF_DW, BPF_REG_10, -16, -8), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 8), BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -16), BPF_MOV64_IMM(BPF_REG_2, -6), BPF_JMP_REG(BPF_JGE, BPF_REG_2, BPF_REG_1, 2), @@ -303,20 +318,21 @@ BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, - .fixup_map_hash_8b = { 3 }, + .fixup_map_hash_8b = { 5 }, .errstr = "unbounded min value", .result = REJECT, }, { "bounds checks mixing signed and unsigned, variant 13", .insns = { + BPF_EMIT_CALL(BPF_FUNC_ktime_get_ns), + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -16), BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), BPF_LD_MAP_FD(BPF_REG_1, 0), BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6), - BPF_ST_MEM(BPF_DW, BPF_REG_10, -16, -8), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 5), BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -16), BPF_MOV64_IMM(BPF_REG_2, 2), BPF_JMP_REG(BPF_JGE, BPF_REG_2, BPF_REG_1, 2), @@ -331,7 +347,7 @@ BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, - .fixup_map_hash_8b = { 3 }, + .fixup_map_hash_8b = { 5 }, .errstr = "unbounded min value", .result = REJECT, }, @@ -340,13 +356,14 @@ .insns = { BPF_LDX_MEM(BPF_W, BPF_REG_9, BPF_REG_1, offsetof(struct __sk_buff, mark)), + BPF_EMIT_CALL(BPF_FUNC_ktime_get_ns), + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -16), BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), BPF_LD_MAP_FD(BPF_REG_1, 0), BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 8), - BPF_ST_MEM(BPF_DW, BPF_REG_10, -16, -8), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 7), BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -16), BPF_MOV64_IMM(BPF_REG_2, -1), BPF_MOV64_IMM(BPF_REG_8, 2), @@ -360,20 +377,21 @@ BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_2, -3), BPF_JMP_IMM(BPF_JA, 0, 0, -7), }, - .fixup_map_hash_8b = { 4 }, + .fixup_map_hash_8b = { 6 }, .errstr = "unbounded min value", .result = REJECT, }, { "bounds checks mixing signed and unsigned, variant 15", .insns = { + BPF_EMIT_CALL(BPF_FUNC_ktime_get_ns), + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -16), BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), BPF_LD_MAP_FD(BPF_REG_1, 0), BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4), - BPF_ST_MEM(BPF_DW, BPF_REG_10, -16, -8), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3), BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -16), BPF_MOV64_IMM(BPF_REG_2, -6), BPF_JMP_REG(BPF_JGE, BPF_REG_2, BPF_REG_1, 2), @@ -387,7 +405,7 @@ BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, - .fixup_map_hash_8b = { 3 }, + .fixup_map_hash_8b = { 5 }, .errstr = "unbounded min value", .result = REJECT, }, diff --git a/tools/testing/selftests/bpf/verifier/bpf_st_mem.c b/tools/testing/selftests/bpf/verifier/bpf_st_mem.c new file mode 100644 index 000000000000..3af2501082b2 --- /dev/null +++ b/tools/testing/selftests/bpf/verifier/bpf_st_mem.c @@ -0,0 +1,67 @@ +{ + "BPF_ST_MEM stack imm non-zero", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 42), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_10, -8), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, -42), + /* if value is tracked correctly R0 is zero */ + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + /* Use prog type that requires return value in range [0, 1] */ + .prog_type = BPF_PROG_TYPE_SK_LOOKUP, + .expected_attach_type = BPF_SK_LOOKUP, + .runs = -1, +}, +{ + "BPF_ST_MEM stack imm zero", + .insns = { + /* mark stack 0000 0000 */ + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + /* read and sum a few bytes */ + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_10, -8), + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), + BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_10, -4), + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), + BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_10, -1), + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), + /* if value is tracked correctly R0 is zero */ + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + /* Use prog type that requires return value in range [0, 1] */ + .prog_type = BPF_PROG_TYPE_SK_LOOKUP, + .expected_attach_type = BPF_SK_LOOKUP, + .runs = -1, +}, +{ + "BPF_ST_MEM stack imm zero, variable offset", + .insns = { + /* set fp[-16], fp[-24] to zeros */ + BPF_ST_MEM(BPF_DW, BPF_REG_10, -16, 0), + BPF_ST_MEM(BPF_DW, BPF_REG_10, -24, 0), + /* r0 = random value in range [-32, -15] */ + BPF_EMIT_CALL(BPF_FUNC_get_prandom_u32), + BPF_JMP_IMM(BPF_JLE, BPF_REG_0, 16, 2), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + BPF_ALU64_IMM(BPF_SUB, BPF_REG_0, 32), + /* fp[r0] = 0, make a variable offset write of zero, + * this should preserve zero marks on stack. + */ + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_10), + BPF_ST_MEM(BPF_B, BPF_REG_0, 0, 0), + /* r0 = fp[-20], if variable offset write was tracked correctly + * r0 would be a known zero. + */ + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_10, -20), + /* Would fail return code verification if r0 range is not tracked correctly. */ + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + /* Use prog type that requires return value in range [0, 1] */ + .prog_type = BPF_PROG_TYPE_SK_LOOKUP, + .expected_attach_type = BPF_SK_LOOKUP, + .runs = -1, +}, diff --git a/tools/testing/selftests/bpf/verifier/calls.c b/tools/testing/selftests/bpf/verifier/calls.c index e1a937277b54..9d993926bf0e 100644 --- a/tools/testing/selftests/bpf/verifier/calls.c +++ b/tools/testing/selftests/bpf/verifier/calls.c @@ -76,7 +76,7 @@ }, .prog_type = BPF_PROG_TYPE_SCHED_CLS, .result = REJECT, - .errstr = "arg#0 expected pointer to ctx, but got PTR", + .errstr = "R1 must have zero offset when passed to release func or trusted arg to kfunc", .fixup_kfunc_btf_id = { { "bpf_kfunc_call_test_pass_ctx", 2 }, }, @@ -109,7 +109,7 @@ }, .prog_type = BPF_PROG_TYPE_SCHED_CLS, .result = REJECT, - .errstr = "arg#0 pointer type STRUCT prog_test_ref_kfunc must point", + .errstr = "arg#0 is ptr_or_null_ expected ptr_ or socket", .fixup_kfunc_btf_id = { { "bpf_kfunc_call_test_acquire", 3 }, { "bpf_kfunc_call_test_release", 5 }, @@ -2305,3 +2305,85 @@ .errstr = "!read_ok", .result = REJECT, }, +/* Make sure that verifier.c:states_equal() considers IDs from all + * frames when building 'idmap' for check_ids(). + */ +{ + "calls: check_ids() across call boundary", + .insns = { + /* Function main() */ + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + /* fp[-24] = map_lookup_elem(...) ; get a MAP_VALUE_PTR_OR_NULL with some ID */ + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, + 0), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_STX_MEM(BPF_DW, BPF_REG_FP, BPF_REG_0, -24), + /* fp[-32] = map_lookup_elem(...) ; get a MAP_VALUE_PTR_OR_NULL with some ID */ + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, + 0), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_STX_MEM(BPF_DW, BPF_REG_FP, BPF_REG_0, -32), + /* call foo(&fp[-24], &fp[-32]) ; both arguments have IDs in the current + * ; stack frame + */ + BPF_MOV64_REG(BPF_REG_1, BPF_REG_FP), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -24), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_FP), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -32), + BPF_CALL_REL(2), + /* exit 0 */ + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + /* Function foo() + * + * r9 = &frame[0].fp[-24] ; save arguments in the callee saved registers, + * r8 = &frame[0].fp[-32] ; arguments are pointers to pointers to map value + */ + BPF_MOV64_REG(BPF_REG_9, BPF_REG_1), + BPF_MOV64_REG(BPF_REG_8, BPF_REG_2), + /* r7 = ktime_get_ns() */ + BPF_EMIT_CALL(BPF_FUNC_ktime_get_ns), + BPF_MOV64_REG(BPF_REG_7, BPF_REG_0), + /* r6 = ktime_get_ns() */ + BPF_EMIT_CALL(BPF_FUNC_ktime_get_ns), + BPF_MOV64_REG(BPF_REG_6, BPF_REG_0), + /* if r6 > r7 goto +1 ; no new information about the state is derived from + * ; this check, thus produced verifier states differ + * ; only in 'insn_idx' + * r9 = r8 + */ + BPF_JMP_REG(BPF_JGT, BPF_REG_6, BPF_REG_7, 1), + BPF_MOV64_REG(BPF_REG_9, BPF_REG_8), + /* r9 = *r9 ; verifier get's to this point via two paths: + * ; (I) one including r9 = r8, verified first; + * ; (II) one excluding r9 = r8, verified next. + * ; After load of *r9 to r9 the frame[0].fp[-24].id == r9.id. + * ; Suppose that checkpoint is created here via path (I). + * ; When verifying via (II) the r9.id must be compared against + * ; frame[0].fp[-24].id, otherwise (I) and (II) would be + * ; incorrectly deemed equivalent. + * if r9 == 0 goto <exit> + */ + BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_9, 0), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_9, 0, 1), + /* r8 = *r8 ; read map value via r8, this is not safe + * r0 = *r8 ; because r8 might be not equal to r9. + */ + BPF_LDX_MEM(BPF_DW, BPF_REG_8, BPF_REG_8, 0), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_8, 0), + /* exit 0 */ + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .flags = BPF_F_TEST_STATE_FREQ, + .fixup_map_hash_8b = { 3, 9 }, + .result = REJECT, + .errstr = "R8 invalid mem access 'map_value_or_null'", + .result_unpriv = REJECT, + .errstr_unpriv = "", + .prog_type = BPF_PROG_TYPE_CGROUP_SKB, +}, diff --git a/tools/testing/selftests/bpf/verifier/direct_packet_access.c b/tools/testing/selftests/bpf/verifier/direct_packet_access.c index 11acd1855acf..dce2e28aeb43 100644 --- a/tools/testing/selftests/bpf/verifier/direct_packet_access.c +++ b/tools/testing/selftests/bpf/verifier/direct_packet_access.c @@ -654,3 +654,57 @@ .result = ACCEPT, .prog_type = BPF_PROG_TYPE_SCHED_CLS, }, +{ + "direct packet access: test30 (check_id() in regsafe(), bad access)", + .insns = { + /* r9 = ctx */ + BPF_MOV64_REG(BPF_REG_9, BPF_REG_1), + /* r7 = ktime_get_ns() */ + BPF_EMIT_CALL(BPF_FUNC_ktime_get_ns), + BPF_MOV64_REG(BPF_REG_7, BPF_REG_0), + /* r6 = ktime_get_ns() */ + BPF_EMIT_CALL(BPF_FUNC_ktime_get_ns), + BPF_MOV64_REG(BPF_REG_6, BPF_REG_0), + /* r2 = ctx->data + * r3 = ctx->data + * r4 = ctx->data_end + */ + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_9, offsetof(struct __sk_buff, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_9, offsetof(struct __sk_buff, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_9, offsetof(struct __sk_buff, data_end)), + /* if r6 > 100 goto exit + * if r7 > 100 goto exit + */ + BPF_JMP_IMM(BPF_JGT, BPF_REG_6, 100, 9), + BPF_JMP_IMM(BPF_JGT, BPF_REG_7, 100, 8), + /* r2 += r6 ; this forces assignment of ID to r2 + * r2 += 1 ; get some fixed off for r2 + * r3 += r7 ; this forces assignment of ID to r3 + * r3 += 1 ; get some fixed off for r3 + */ + BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_6), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, 1), + BPF_ALU64_REG(BPF_ADD, BPF_REG_3, BPF_REG_7), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_3, 1), + /* if r6 > r7 goto +1 ; no new information about the state is derived from + * ; this check, thus produced verifier states differ + * ; only in 'insn_idx' + * r2 = r3 ; optionally share ID between r2 and r3 + */ + BPF_JMP_REG(BPF_JNE, BPF_REG_6, BPF_REG_7, 1), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_3), + /* if r3 > ctx->data_end goto exit */ + BPF_JMP_REG(BPF_JGT, BPF_REG_3, BPF_REG_4, 1), + /* r5 = *(u8 *) (r2 - 1) ; access packet memory using r2, + * ; this is not always safe + */ + BPF_LDX_MEM(BPF_B, BPF_REG_5, BPF_REG_2, -1), + /* exit(0) */ + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .flags = BPF_F_TEST_STATE_FREQ, + .result = REJECT, + .errstr = "invalid access to packet, off=0 size=1, R2", + .prog_type = BPF_PROG_TYPE_SCHED_CLS, +}, diff --git a/tools/testing/selftests/bpf/verifier/jeq_infer_not_null.c b/tools/testing/selftests/bpf/verifier/jeq_infer_not_null.c new file mode 100644 index 000000000000..67a1c07ead34 --- /dev/null +++ b/tools/testing/selftests/bpf/verifier/jeq_infer_not_null.c @@ -0,0 +1,174 @@ +{ + /* This is equivalent to the following program: + * + * r6 = skb->sk; + * r7 = sk_fullsock(r6); + * r0 = sk_fullsock(r6); + * if (r0 == 0) return 0; (a) + * if (r0 != r7) return 0; (b) + * *r7->type; (c) + * return 0; + * + * It is safe to dereference r7 at point (c), because of (a) and (b). + * The test verifies that relation r0 == r7 is propagated from (b) to (c). + */ + "jne/jeq infer not null, PTR_TO_SOCKET_OR_NULL -> PTR_TO_SOCKET for JNE false branch", + .insns = { + /* r6 = skb->sk; */ + BPF_LDX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, offsetof(struct __sk_buff, sk)), + /* if (r6 == 0) return 0; */ + BPF_JMP_IMM(BPF_JEQ, BPF_REG_6, 0, 8), + /* r7 = sk_fullsock(skb); */ + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_EMIT_CALL(BPF_FUNC_sk_fullsock), + BPF_MOV64_REG(BPF_REG_7, BPF_REG_0), + /* r0 = sk_fullsock(skb); */ + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_EMIT_CALL(BPF_FUNC_sk_fullsock), + /* if (r0 == null) return 0; */ + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2), + /* if (r0 == r7) r0 = *(r7->type); */ + BPF_JMP_REG(BPF_JNE, BPF_REG_0, BPF_REG_7, 1), /* Use ! JNE ! */ + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_7, offsetof(struct bpf_sock, type)), + /* return 0 */ + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_CGROUP_SKB, + .result = ACCEPT, + .result_unpriv = REJECT, + .errstr_unpriv = "R7 pointer comparison", +}, +{ + /* Same as above, but verify that another branch of JNE still + * prohibits access to PTR_MAYBE_NULL. + */ + "jne/jeq infer not null, PTR_TO_SOCKET_OR_NULL unchanged for JNE true branch", + .insns = { + /* r6 = skb->sk */ + BPF_LDX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, offsetof(struct __sk_buff, sk)), + /* if (r6 == 0) return 0; */ + BPF_JMP_IMM(BPF_JEQ, BPF_REG_6, 0, 9), + /* r7 = sk_fullsock(skb); */ + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_EMIT_CALL(BPF_FUNC_sk_fullsock), + BPF_MOV64_REG(BPF_REG_7, BPF_REG_0), + /* r0 = sk_fullsock(skb); */ + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_EMIT_CALL(BPF_FUNC_sk_fullsock), + /* if (r0 == null) return 0; */ + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 3), + /* if (r0 == r7) return 0; */ + BPF_JMP_REG(BPF_JNE, BPF_REG_0, BPF_REG_7, 1), /* Use ! JNE ! */ + BPF_JMP_IMM(BPF_JA, 0, 0, 1), + /* r0 = *(r7->type); */ + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_7, offsetof(struct bpf_sock, type)), + /* return 0 */ + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_CGROUP_SKB, + .result = REJECT, + .errstr = "R7 invalid mem access 'sock_or_null'", + .result_unpriv = REJECT, + .errstr_unpriv = "R7 pointer comparison", +}, +{ + /* Same as a first test, but not null should be inferred for JEQ branch */ + "jne/jeq infer not null, PTR_TO_SOCKET_OR_NULL -> PTR_TO_SOCKET for JEQ true branch", + .insns = { + /* r6 = skb->sk; */ + BPF_LDX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, offsetof(struct __sk_buff, sk)), + /* if (r6 == null) return 0; */ + BPF_JMP_IMM(BPF_JEQ, BPF_REG_6, 0, 9), + /* r7 = sk_fullsock(skb); */ + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_EMIT_CALL(BPF_FUNC_sk_fullsock), + BPF_MOV64_REG(BPF_REG_7, BPF_REG_0), + /* r0 = sk_fullsock(skb); */ + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_EMIT_CALL(BPF_FUNC_sk_fullsock), + /* if (r0 == null) return 0; */ + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3), + /* if (r0 != r7) return 0; */ + BPF_JMP_REG(BPF_JEQ, BPF_REG_0, BPF_REG_7, 1), /* Use ! JEQ ! */ + BPF_JMP_IMM(BPF_JA, 0, 0, 1), + /* r0 = *(r7->type); */ + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_7, offsetof(struct bpf_sock, type)), + /* return 0; */ + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_CGROUP_SKB, + .result = ACCEPT, + .result_unpriv = REJECT, + .errstr_unpriv = "R7 pointer comparison", +}, +{ + /* Same as above, but verify that another branch of JNE still + * prohibits access to PTR_MAYBE_NULL. + */ + "jne/jeq infer not null, PTR_TO_SOCKET_OR_NULL unchanged for JEQ false branch", + .insns = { + /* r6 = skb->sk; */ + BPF_LDX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, offsetof(struct __sk_buff, sk)), + /* if (r6 == null) return 0; */ + BPF_JMP_IMM(BPF_JEQ, BPF_REG_6, 0, 8), + /* r7 = sk_fullsock(skb); */ + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_EMIT_CALL(BPF_FUNC_sk_fullsock), + BPF_MOV64_REG(BPF_REG_7, BPF_REG_0), + /* r0 = sk_fullsock(skb); */ + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_EMIT_CALL(BPF_FUNC_sk_fullsock), + /* if (r0 == null) return 0; */ + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2), + /* if (r0 != r7) r0 = *(r7->type); */ + BPF_JMP_REG(BPF_JEQ, BPF_REG_0, BPF_REG_7, 1), /* Use ! JEQ ! */ + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_7, offsetof(struct bpf_sock, type)), + /* return 0; */ + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_CGROUP_SKB, + .result = REJECT, + .errstr = "R7 invalid mem access 'sock_or_null'", + .result_unpriv = REJECT, + .errstr_unpriv = "R7 pointer comparison", +}, +{ + /* Maps are treated in a different branch of `mark_ptr_not_null_reg`, + * so separate test for maps case. + */ + "jne/jeq infer not null, PTR_TO_MAP_VALUE_OR_NULL -> PTR_TO_MAP_VALUE", + .insns = { + /* r9 = &some stack to use as key */ + BPF_ST_MEM(BPF_W, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_9, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_9, -8), + /* r8 = process local map */ + BPF_LD_MAP_FD(BPF_REG_8, 0), + /* r6 = map_lookup_elem(r8, r9); */ + BPF_MOV64_REG(BPF_REG_1, BPF_REG_8), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_9), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_MOV64_REG(BPF_REG_6, BPF_REG_0), + /* r7 = map_lookup_elem(r8, r9); */ + BPF_MOV64_REG(BPF_REG_1, BPF_REG_8), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_9), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_MOV64_REG(BPF_REG_7, BPF_REG_0), + /* if (r6 == 0) return 0; */ + BPF_JMP_IMM(BPF_JEQ, BPF_REG_6, 0, 2), + /* if (r6 != r7) return 0; */ + BPF_JMP_REG(BPF_JNE, BPF_REG_6, BPF_REG_7, 1), + /* read *r7; */ + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_7, offsetof(struct bpf_xdp_sock, queue_id)), + /* return 0; */ + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map_xskmap = { 3 }, + .prog_type = BPF_PROG_TYPE_XDP, + .result = ACCEPT, +}, diff --git a/tools/testing/selftests/bpf/verifier/jit.c b/tools/testing/selftests/bpf/verifier/jit.c index 79021c30e51e..8bf37e5207f1 100644 --- a/tools/testing/selftests/bpf/verifier/jit.c +++ b/tools/testing/selftests/bpf/verifier/jit.c @@ -21,6 +21,30 @@ .retval = 2, }, { + "jit: lsh, rsh, arsh by reg", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_MOV64_IMM(BPF_REG_4, 1), + BPF_MOV64_IMM(BPF_REG_1, 0xff), + BPF_ALU64_REG(BPF_LSH, BPF_REG_1, BPF_REG_0), + BPF_ALU32_REG(BPF_LSH, BPF_REG_1, BPF_REG_4), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0x3fc, 1), + BPF_EXIT_INSN(), + BPF_ALU64_REG(BPF_RSH, BPF_REG_1, BPF_REG_4), + BPF_MOV64_REG(BPF_REG_4, BPF_REG_1), + BPF_ALU32_REG(BPF_RSH, BPF_REG_4, BPF_REG_0), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_4, 0xff, 1), + BPF_EXIT_INSN(), + BPF_ALU64_REG(BPF_ARSH, BPF_REG_4, BPF_REG_4), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_4, 0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_IMM(BPF_REG_0, 2), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .retval = 2, +}, +{ "jit: mov32 for ldimm64, 1", .insns = { BPF_MOV64_IMM(BPF_REG_0, 2), diff --git a/tools/testing/selftests/bpf/verifier/map_ptr.c b/tools/testing/selftests/bpf/verifier/map_ptr.c index 1f82021429bf..17ee84dc7766 100644 --- a/tools/testing/selftests/bpf/verifier/map_ptr.c +++ b/tools/testing/selftests/bpf/verifier/map_ptr.c @@ -9,7 +9,7 @@ }, .fixup_map_array_48b = { 1 }, .result_unpriv = REJECT, - .errstr_unpriv = "bpf_array access is allowed only to CAP_PERFMON and CAP_SYS_ADMIN", + .errstr_unpriv = "access is allowed only to CAP_PERFMON and CAP_SYS_ADMIN", .result = REJECT, .errstr = "R1 is bpf_array invalid negative access: off=-8", }, @@ -26,7 +26,7 @@ }, .fixup_map_array_48b = { 3 }, .result_unpriv = REJECT, - .errstr_unpriv = "bpf_array access is allowed only to CAP_PERFMON and CAP_SYS_ADMIN", + .errstr_unpriv = "access is allowed only to CAP_PERFMON and CAP_SYS_ADMIN", .result = REJECT, .errstr = "only read from bpf_array is supported", }, @@ -41,7 +41,7 @@ }, .fixup_map_array_48b = { 1 }, .result_unpriv = REJECT, - .errstr_unpriv = "bpf_array access is allowed only to CAP_PERFMON and CAP_SYS_ADMIN", + .errstr_unpriv = "access is allowed only to CAP_PERFMON and CAP_SYS_ADMIN", .result = REJECT, .errstr = "cannot access ptr member ops with moff 0 in struct bpf_map with off 1 size 4", .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, @@ -57,7 +57,7 @@ }, .fixup_map_array_48b = { 1 }, .result_unpriv = REJECT, - .errstr_unpriv = "bpf_array access is allowed only to CAP_PERFMON and CAP_SYS_ADMIN", + .errstr_unpriv = "access is allowed only to CAP_PERFMON and CAP_SYS_ADMIN", .result = ACCEPT, .retval = 1, }, diff --git a/tools/testing/selftests/bpf/verifier/ref_tracking.c b/tools/testing/selftests/bpf/verifier/ref_tracking.c index fd683a32a276..9540164712b7 100644 --- a/tools/testing/selftests/bpf/verifier/ref_tracking.c +++ b/tools/testing/selftests/bpf/verifier/ref_tracking.c @@ -142,7 +142,7 @@ .kfunc = "bpf", .expected_attach_type = BPF_LSM_MAC, .flags = BPF_F_SLEEPABLE, - .errstr = "arg#0 pointer type STRUCT bpf_key must point to scalar, or struct with scalar", + .errstr = "arg#0 is ptr_or_null_ expected ptr_ or socket", .fixup_kfunc_btf_id = { { "bpf_lookup_user_key", 2 }, { "bpf_key_put", 4 }, @@ -163,7 +163,7 @@ .kfunc = "bpf", .expected_attach_type = BPF_LSM_MAC, .flags = BPF_F_SLEEPABLE, - .errstr = "arg#0 pointer type STRUCT bpf_key must point to scalar, or struct with scalar", + .errstr = "arg#0 is ptr_or_null_ expected ptr_ or socket", .fixup_kfunc_btf_id = { { "bpf_lookup_system_key", 1 }, { "bpf_key_put", 3 }, diff --git a/tools/testing/selftests/bpf/verifier/ringbuf.c b/tools/testing/selftests/bpf/verifier/ringbuf.c index b64d33e4833c..92e3f6a61a79 100644 --- a/tools/testing/selftests/bpf/verifier/ringbuf.c +++ b/tools/testing/selftests/bpf/verifier/ringbuf.c @@ -28,7 +28,7 @@ }, .fixup_map_ringbuf = { 1 }, .result = REJECT, - .errstr = "dereference of modified alloc_mem ptr R1", + .errstr = "R1 must have zero offset when passed to release func", }, { "ringbuf: invalid reservation offset 2", diff --git a/tools/testing/selftests/bpf/verifier/search_pruning.c b/tools/testing/selftests/bpf/verifier/search_pruning.c index 68b14fdfebdb..d63fd8991b03 100644 --- a/tools/testing/selftests/bpf/verifier/search_pruning.c +++ b/tools/testing/selftests/bpf/verifier/search_pruning.c @@ -225,3 +225,39 @@ .result_unpriv = ACCEPT, .insn_processed = 15, }, +/* The test performs a conditional 64-bit write to a stack location + * fp[-8], this is followed by an unconditional 8-bit write to fp[-8], + * then data is read from fp[-8]. This sequence is unsafe. + * + * The test would be mistakenly marked as safe w/o dst register parent + * preservation in verifier.c:copy_register_state() function. + * + * Note the usage of BPF_F_TEST_STATE_FREQ to force creation of the + * checkpoint state after conditional 64-bit assignment. + */ +{ + "write tracking and register parent chain bug", + .insns = { + /* r6 = ktime_get_ns() */ + BPF_EMIT_CALL(BPF_FUNC_ktime_get_ns), + BPF_MOV64_REG(BPF_REG_6, BPF_REG_0), + /* r0 = ktime_get_ns() */ + BPF_EMIT_CALL(BPF_FUNC_ktime_get_ns), + /* if r0 > r6 goto +1 */ + BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_6, 1), + /* *(u64 *)(r10 - 8) = 0xdeadbeef */ + BPF_ST_MEM(BPF_DW, BPF_REG_FP, -8, 0xdeadbeef), + /* r1 = 42 */ + BPF_MOV64_IMM(BPF_REG_1, 42), + /* *(u8 *)(r10 - 8) = r1 */ + BPF_STX_MEM(BPF_B, BPF_REG_FP, BPF_REG_1, -8), + /* r2 = *(u64 *)(r10 - 8) */ + BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_FP, -8), + /* exit(0) */ + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .flags = BPF_F_TEST_STATE_FREQ, + .errstr = "invalid read from stack off -8+1 size 8", + .result = REJECT, +}, diff --git a/tools/testing/selftests/bpf/verifier/sleepable.c b/tools/testing/selftests/bpf/verifier/sleepable.c new file mode 100644 index 000000000000..1f0d2bdc673f --- /dev/null +++ b/tools/testing/selftests/bpf/verifier/sleepable.c @@ -0,0 +1,91 @@ +{ + "sleepable fentry accept", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACING, + .expected_attach_type = BPF_TRACE_FENTRY, + .kfunc = "bpf_fentry_test1", + .result = ACCEPT, + .flags = BPF_F_SLEEPABLE, + .runs = -1, +}, +{ + "sleepable fexit accept", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACING, + .expected_attach_type = BPF_TRACE_FENTRY, + .kfunc = "bpf_fentry_test1", + .result = ACCEPT, + .flags = BPF_F_SLEEPABLE, + .runs = -1, +}, +{ + "sleepable fmod_ret accept", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACING, + .expected_attach_type = BPF_MODIFY_RETURN, + .kfunc = "bpf_fentry_test1", + .result = ACCEPT, + .flags = BPF_F_SLEEPABLE, + .runs = -1, +}, +{ + "sleepable iter accept", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACING, + .expected_attach_type = BPF_TRACE_ITER, + .kfunc = "task", + .result = ACCEPT, + .flags = BPF_F_SLEEPABLE, + .runs = -1, +}, +{ + "sleepable lsm accept", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_LSM, + .kfunc = "bpf", + .expected_attach_type = BPF_LSM_MAC, + .result = ACCEPT, + .flags = BPF_F_SLEEPABLE, + .runs = -1, +}, +{ + "sleepable uprobe accept", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_KPROBE, + .kfunc = "bpf_fentry_test1", + .result = ACCEPT, + .flags = BPF_F_SLEEPABLE, + .runs = -1, +}, +{ + "sleepable raw tracepoint reject", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACING, + .expected_attach_type = BPF_TRACE_RAW_TP, + .kfunc = "sched_switch", + .result = REJECT, + .errstr = "Only fentry/fexit/fmod_ret, lsm, iter, uprobe, and struct_ops programs can be sleepable", + .flags = BPF_F_SLEEPABLE, + .runs = -1, +}, diff --git a/tools/testing/selftests/bpf/verifier/spill_fill.c b/tools/testing/selftests/bpf/verifier/spill_fill.c index e23f07175e1b..9bb302dade23 100644 --- a/tools/testing/selftests/bpf/verifier/spill_fill.c +++ b/tools/testing/selftests/bpf/verifier/spill_fill.c @@ -84,7 +84,7 @@ }, .fixup_map_ringbuf = { 1 }, .result = REJECT, - .errstr = "R0 pointer arithmetic on alloc_mem_or_null prohibited", + .errstr = "R0 pointer arithmetic on ringbuf_mem_or_null prohibited", }, { "check corrupted spill/fill", diff --git a/tools/testing/selftests/bpf/verifier/spin_lock.c b/tools/testing/selftests/bpf/verifier/spin_lock.c index 781621facae4..eaf114f07e2e 100644 --- a/tools/testing/selftests/bpf/verifier/spin_lock.c +++ b/tools/testing/selftests/bpf/verifier/spin_lock.c @@ -331,3 +331,117 @@ .errstr = "inside bpf_spin_lock", .prog_type = BPF_PROG_TYPE_SCHED_CLS, }, +{ + "spin_lock: regsafe compare reg->id for map value", + .insns = { + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_6, offsetof(struct __sk_buff, mark)), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_MOV64_REG(BPF_REG_9, BPF_REG_1), + BPF_ST_MEM(BPF_W, BPF_REG_10, -4, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_REG(BPF_REG_7, BPF_REG_0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_9), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_REG(BPF_REG_8, BPF_REG_0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_7), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 4), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_spin_lock), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_6, 0, 1), + BPF_JMP_IMM(BPF_JA, 0, 0, 1), + BPF_MOV64_REG(BPF_REG_7, BPF_REG_8), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_7), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 4), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_spin_unlock), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map_spin_lock = { 2 }, + .result = REJECT, + .errstr = "bpf_spin_unlock of different lock", + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .flags = BPF_F_TEST_STATE_FREQ, +}, +/* Make sure that regsafe() compares ids for spin lock records using + * check_ids(): + * 1: r9 = map_lookup_elem(...) ; r9.id == 1 + * 2: r8 = map_lookup_elem(...) ; r8.id == 2 + * 3: r7 = ktime_get_ns() + * 4: r6 = ktime_get_ns() + * 5: if r6 > r7 goto <9> + * 6: spin_lock(r8) + * 7: r9 = r8 + * 8: goto <10> + * 9: spin_lock(r9) + * 10: spin_unlock(r9) ; r9.id == 1 || r9.id == 2 and lock is active, + * ; second visit to (10) should be considered safe + * ; if check_ids() is used. + * 11: exit(0) + */ +{ + "spin_lock: regsafe() check_ids() similar id mappings", + .insns = { + BPF_ST_MEM(BPF_W, BPF_REG_10, -4, 0), + /* r9 = map_lookup_elem(...) */ + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), + BPF_LD_MAP_FD(BPF_REG_1, + 0), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 24), + BPF_MOV64_REG(BPF_REG_9, BPF_REG_0), + /* r8 = map_lookup_elem(...) */ + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), + BPF_LD_MAP_FD(BPF_REG_1, + 0), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 18), + BPF_MOV64_REG(BPF_REG_8, BPF_REG_0), + /* r7 = ktime_get_ns() */ + BPF_EMIT_CALL(BPF_FUNC_ktime_get_ns), + BPF_MOV64_REG(BPF_REG_7, BPF_REG_0), + /* r6 = ktime_get_ns() */ + BPF_EMIT_CALL(BPF_FUNC_ktime_get_ns), + BPF_MOV64_REG(BPF_REG_6, BPF_REG_0), + /* if r6 > r7 goto +5 ; no new information about the state is derived from + * ; this check, thus produced verifier states differ + * ; only in 'insn_idx' + * spin_lock(r8) + * r9 = r8 + * goto unlock + */ + BPF_JMP_REG(BPF_JGT, BPF_REG_6, BPF_REG_7, 5), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_8), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 4), + BPF_EMIT_CALL(BPF_FUNC_spin_lock), + BPF_MOV64_REG(BPF_REG_9, BPF_REG_8), + BPF_JMP_A(3), + /* spin_lock(r9) */ + BPF_MOV64_REG(BPF_REG_1, BPF_REG_9), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 4), + BPF_EMIT_CALL(BPF_FUNC_spin_lock), + /* spin_unlock(r9) */ + BPF_MOV64_REG(BPF_REG_1, BPF_REG_9), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 4), + BPF_EMIT_CALL(BPF_FUNC_spin_unlock), + /* exit(0) */ + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map_spin_lock = { 3, 10 }, + .result = VERBOSE_ACCEPT, + .errstr = "28: safe", + .result_unpriv = REJECT, + .errstr_unpriv = "", + .prog_type = BPF_PROG_TYPE_CGROUP_SKB, + .flags = BPF_F_TEST_STATE_FREQ, +}, diff --git a/tools/testing/selftests/bpf/verifier/value_or_null.c b/tools/testing/selftests/bpf/verifier/value_or_null.c index 3ecb70a3d939..52a8bca14f03 100644 --- a/tools/testing/selftests/bpf/verifier/value_or_null.c +++ b/tools/testing/selftests/bpf/verifier/value_or_null.c @@ -169,3 +169,52 @@ .prog_type = BPF_PROG_TYPE_SCHED_CLS, .result = ACCEPT, }, +{ + "MAP_VALUE_OR_NULL check_ids() in regsafe()", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + /* r9 = map_lookup_elem(...) */ + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, + 0), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_MOV64_REG(BPF_REG_9, BPF_REG_0), + /* r8 = map_lookup_elem(...) */ + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, + 0), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_MOV64_REG(BPF_REG_8, BPF_REG_0), + /* r7 = ktime_get_ns() */ + BPF_EMIT_CALL(BPF_FUNC_ktime_get_ns), + BPF_MOV64_REG(BPF_REG_7, BPF_REG_0), + /* r6 = ktime_get_ns() */ + BPF_EMIT_CALL(BPF_FUNC_ktime_get_ns), + BPF_MOV64_REG(BPF_REG_6, BPF_REG_0), + /* if r6 > r7 goto +1 ; no new information about the state is derived from + * ; this check, thus produced verifier states differ + * ; only in 'insn_idx' + * r9 = r8 ; optionally share ID between r9 and r8 + */ + BPF_JMP_REG(BPF_JGT, BPF_REG_6, BPF_REG_7, 1), + BPF_MOV64_REG(BPF_REG_9, BPF_REG_8), + /* if r9 == 0 goto <exit> */ + BPF_JMP_IMM(BPF_JEQ, BPF_REG_9, 0, 1), + /* read map value via r8, this is not always + * safe because r8 might be not equal to r9. + */ + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_8, 0), + /* exit 0 */ + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .flags = BPF_F_TEST_STATE_FREQ, + .fixup_map_hash_8b = { 3, 9 }, + .result = REJECT, + .errstr = "R8 invalid mem access 'map_value_or_null'", + .result_unpriv = REJECT, + .errstr_unpriv = "", + .prog_type = BPF_PROG_TYPE_CGROUP_SKB, +}, diff --git a/tools/testing/selftests/bpf/veristat.c b/tools/testing/selftests/bpf/veristat.c index b0d83a28e348..83231456d3c5 100644 --- a/tools/testing/selftests/bpf/veristat.c +++ b/tools/testing/selftests/bpf/veristat.c @@ -17,6 +17,7 @@ #include <bpf/libbpf.h> #include <libelf.h> #include <gelf.h> +#include <float.h> enum stat_id { VERDICT, @@ -34,6 +35,45 @@ enum stat_id { NUM_STATS_CNT = FILE_NAME - VERDICT, }; +/* In comparison mode each stat can specify up to four different values: + * - A side value; + * - B side value; + * - absolute diff value; + * - relative (percentage) diff value. + * + * When specifying stat specs in comparison mode, user can use one of the + * following variant suffixes to specify which exact variant should be used for + * ordering or filtering: + * - `_a` for A side value; + * - `_b` for B side value; + * - `_diff` for absolute diff value; + * - `_pct` for relative (percentage) diff value. + * + * If no variant suffix is provided, then `_b` (control data) is assumed. + * + * As an example, let's say instructions stat has the following output: + * + * Insns (A) Insns (B) Insns (DIFF) + * --------- --------- -------------- + * 21547 20920 -627 (-2.91%) + * + * Then: + * - 21547 is A side value (insns_a); + * - 20920 is B side value (insns_b); + * - -627 is absolute diff value (insns_diff); + * - -2.91% is relative diff value (insns_pct). + * + * For verdict there is no verdict_pct variant. + * For file and program name, _a and _b variants are equivalent and there are + * no _diff or _pct variants. + */ +enum stat_variant { + VARIANT_A, + VARIANT_B, + VARIANT_DIFF, + VARIANT_PCT, +}; + struct verif_stats { char *file_name; char *prog_name; @@ -41,9 +81,19 @@ struct verif_stats { long stats[NUM_STATS_CNT]; }; +/* joined comparison mode stats */ +struct verif_stats_join { + char *file_name; + char *prog_name; + + const struct verif_stats *stats_a; + const struct verif_stats *stats_b; +}; + struct stat_specs { int spec_cnt; enum stat_id ids[ALL_STATS_CNT]; + enum stat_variant variants[ALL_STATS_CNT]; bool asc[ALL_STATS_CNT]; int lens[ALL_STATS_CNT * 3]; /* 3x for comparison mode */ }; @@ -54,9 +104,31 @@ enum resfmt { RESFMT_CSV, }; +enum filter_kind { + FILTER_NAME, + FILTER_STAT, +}; + +enum operator_kind { + OP_EQ, /* == or = */ + OP_NEQ, /* != or <> */ + OP_LT, /* < */ + OP_LE, /* <= */ + OP_GT, /* > */ + OP_GE, /* >= */ +}; + struct filter { + enum filter_kind kind; + /* FILTER_NAME */ + char *any_glob; char *file_glob; char *prog_glob; + /* FILTER_STAT */ + enum operator_kind op; + int stat_id; + enum stat_variant stat_var; + long value; }; static struct env { @@ -67,14 +139,18 @@ static struct env { int log_level; enum resfmt out_fmt; bool comparison_mode; + bool replay_mode; struct verif_stats *prog_stats; int prog_stat_cnt; - /* baseline_stats is allocated and used only in comparsion mode */ + /* baseline_stats is allocated and used only in comparison mode */ struct verif_stats *baseline_stats; int baseline_stat_cnt; + struct verif_stats_join *join_stats; + int join_stat_cnt; + struct stat_specs output_spec; struct stat_specs sort_spec; @@ -115,6 +191,7 @@ static const struct argp_option opts[] = { { "sort", 's', "SPEC", 0, "Specify sort order" }, { "output-format", 'o', "FMT", 0, "Result output format (table, csv), default is table." }, { "compare", 'C', NULL, 0, "Comparison mode" }, + { "replay", 'R', NULL, 0, "Replay mode" }, { "filter", 'f', "FILTER", 0, "Filter expressions (or @filename for file with expressions)." }, {}, }; @@ -169,6 +246,9 @@ static error_t parse_arg(int key, char *arg, struct argp_state *state) case 'C': env.comparison_mode = true; break; + case 'R': + env.replay_mode = true; + break; case 'f': if (arg[0] == '@') err = append_filter_file(arg + 1); @@ -226,28 +306,6 @@ static bool glob_matches(const char *str, const char *pat) return !*str && !*pat; } -static bool should_process_file(const char *filename) -{ - int i; - - if (env.deny_filter_cnt > 0) { - for (i = 0; i < env.deny_filter_cnt; i++) { - if (glob_matches(filename, env.deny_filters[i].file_glob)) - return false; - } - } - - if (env.allow_filter_cnt == 0) - return true; - - for (i = 0; i < env.allow_filter_cnt; i++) { - if (glob_matches(filename, env.allow_filters[i].file_glob)) - return true; - } - - return false; -} - static bool is_bpf_obj_file(const char *path) { Elf64_Ehdr *ehdr; int fd, err = -EINVAL; @@ -280,45 +338,84 @@ cleanup: return err == 0; } -static bool should_process_prog(const char *path, const char *prog_name) +static bool should_process_file_prog(const char *filename, const char *prog_name) { - const char *filename = basename(path); - int i; + struct filter *f; + int i, allow_cnt = 0; - if (env.deny_filter_cnt > 0) { - for (i = 0; i < env.deny_filter_cnt; i++) { - if (glob_matches(filename, env.deny_filters[i].file_glob)) - return false; - if (!env.deny_filters[i].prog_glob) - continue; - if (glob_matches(prog_name, env.deny_filters[i].prog_glob)) - return false; - } - } + for (i = 0; i < env.deny_filter_cnt; i++) { + f = &env.deny_filters[i]; + if (f->kind != FILTER_NAME) + continue; - if (env.allow_filter_cnt == 0) - return true; + if (f->any_glob && glob_matches(filename, f->any_glob)) + return false; + if (f->any_glob && prog_name && glob_matches(prog_name, f->any_glob)) + return false; + if (f->file_glob && glob_matches(filename, f->file_glob)) + return false; + if (f->prog_glob && prog_name && glob_matches(prog_name, f->prog_glob)) + return false; + } for (i = 0; i < env.allow_filter_cnt; i++) { - if (!glob_matches(filename, env.allow_filters[i].file_glob)) + f = &env.allow_filters[i]; + if (f->kind != FILTER_NAME) continue; - /* if filter specifies only filename glob part, it implicitly - * allows all progs within that file - */ - if (!env.allow_filters[i].prog_glob) - return true; - if (glob_matches(prog_name, env.allow_filters[i].prog_glob)) + + allow_cnt++; + if (f->any_glob) { + if (glob_matches(filename, f->any_glob)) + return true; + /* If we don't know program name yet, any_glob filter + * has to assume that current BPF object file might be + * relevant; we'll check again later on after opening + * BPF object file, at which point program name will + * be known finally. + */ + if (!prog_name || glob_matches(prog_name, f->any_glob)) + return true; + } else { + if (f->file_glob && !glob_matches(filename, f->file_glob)) + continue; + if (f->prog_glob && prog_name && !glob_matches(prog_name, f->prog_glob)) + continue; return true; + } } - return false; + /* if there are no file/prog name allow filters, allow all progs, + * unless they are denied earlier explicitly + */ + return allow_cnt == 0; } +static struct { + enum operator_kind op_kind; + const char *op_str; +} operators[] = { + /* Order of these definitions matter to avoid situations like '<' + * matching part of what is actually a '<>' operator. That is, + * substrings should go last. + */ + { OP_EQ, "==" }, + { OP_NEQ, "!=" }, + { OP_NEQ, "<>" }, + { OP_LE, "<=" }, + { OP_LT, "<" }, + { OP_GE, ">=" }, + { OP_GT, ">" }, + { OP_EQ, "=" }, +}; + +static bool parse_stat_id_var(const char *name, size_t len, int *id, enum stat_variant *var); + static int append_filter(struct filter **filters, int *cnt, const char *str) { struct filter *f; void *tmp; const char *p; + int i; tmp = realloc(*filters, (*cnt + 1) * sizeof(**filters)); if (!tmp) @@ -326,26 +423,108 @@ static int append_filter(struct filter **filters, int *cnt, const char *str) *filters = tmp; f = &(*filters)[*cnt]; - f->file_glob = f->prog_glob = NULL; + memset(f, 0, sizeof(*f)); + + /* First, let's check if it's a stats filter of the following form: + * <stat><op><value, where: + * - <stat> is one of supported numerical stats (verdict is also + * considered numerical, failure == 0, success == 1); + * - <op> is comparison operator (see `operators` definitions); + * - <value> is an integer (or failure/success, or false/true as + * special aliases for 0 and 1, respectively). + * If the form doesn't match what user provided, we assume file/prog + * glob filter. + */ + for (i = 0; i < ARRAY_SIZE(operators); i++) { + enum stat_variant var; + int id; + long val; + const char *end = str; + const char *op_str; + + op_str = operators[i].op_str; + p = strstr(str, op_str); + if (!p) + continue; + + if (!parse_stat_id_var(str, p - str, &id, &var)) { + fprintf(stderr, "Unrecognized stat name in '%s'!\n", str); + return -EINVAL; + } + if (id >= FILE_NAME) { + fprintf(stderr, "Non-integer stat is specified in '%s'!\n", str); + return -EINVAL; + } + + p += strlen(op_str); + + if (strcasecmp(p, "true") == 0 || + strcasecmp(p, "t") == 0 || + strcasecmp(p, "success") == 0 || + strcasecmp(p, "succ") == 0 || + strcasecmp(p, "s") == 0 || + strcasecmp(p, "match") == 0 || + strcasecmp(p, "m") == 0) { + val = 1; + } else if (strcasecmp(p, "false") == 0 || + strcasecmp(p, "f") == 0 || + strcasecmp(p, "failure") == 0 || + strcasecmp(p, "fail") == 0 || + strcasecmp(p, "mismatch") == 0 || + strcasecmp(p, "mis") == 0) { + val = 0; + } else { + errno = 0; + val = strtol(p, (char **)&end, 10); + if (errno || end == p || *end != '\0' ) { + fprintf(stderr, "Invalid integer value in '%s'!\n", str); + return -EINVAL; + } + } + + f->kind = FILTER_STAT; + f->stat_id = id; + f->stat_var = var; + f->op = operators[i].op_kind; + f->value = val; + + *cnt += 1; + return 0; + } - /* filter can be specified either as "<obj-glob>" or "<obj-glob>/<prog-glob>" */ + /* File/prog filter can be specified either as '<glob>' or + * '<file-glob>/<prog-glob>'. In the former case <glob> is applied to + * both file and program names. This seems to be way more useful in + * practice. If user needs full control, they can use '/<prog-glob>' + * form to glob just program name, or '<file-glob>/' to glob only file + * name. But usually common <glob> seems to be the most useful and + * ergonomic way. + */ + f->kind = FILTER_NAME; p = strchr(str, '/'); if (!p) { - f->file_glob = strdup(str); - if (!f->file_glob) + f->any_glob = strdup(str); + if (!f->any_glob) return -ENOMEM; } else { - f->file_glob = strndup(str, p - str); - f->prog_glob = strdup(p + 1); - if (!f->file_glob || !f->prog_glob) { - free(f->file_glob); - free(f->prog_glob); - f->file_glob = f->prog_glob = NULL; - return -ENOMEM; + if (str != p) { + /* non-empty file glob */ + f->file_glob = strndup(str, p - str); + if (!f->file_glob) + return -ENOMEM; + } + if (strlen(p + 1) > 0) { + /* non-empty prog glob */ + f->prog_glob = strdup(p + 1); + if (!f->prog_glob) { + free(f->file_glob); + f->file_glob = NULL; + return -ENOMEM; + } } } - *cnt = *cnt + 1; + *cnt += 1; return 0; } @@ -388,6 +567,15 @@ static const struct stat_specs default_output_spec = { }, }; +static const struct stat_specs default_csv_output_spec = { + .spec_cnt = 9, + .ids = { + FILE_NAME, PROG_NAME, VERDICT, DURATION, + TOTAL_INSNS, TOTAL_STATES, PEAK_STATES, + MAX_STATES_PER_INSN, MARK_READ_MAX_LEN, + }, +}; + static const struct stat_specs default_sort_spec = { .spec_cnt = 2, .ids = { @@ -396,48 +584,123 @@ static const struct stat_specs default_sort_spec = { .asc = { true, true, }, }; +/* sorting for comparison mode to join two data sets */ +static const struct stat_specs join_sort_spec = { + .spec_cnt = 2, + .ids = { + FILE_NAME, PROG_NAME, + }, + .asc = { true, true, }, +}; + static struct stat_def { const char *header; const char *names[4]; bool asc_by_default; + bool left_aligned; } stat_defs[] = { - [FILE_NAME] = { "File", {"file_name", "filename", "file"}, true /* asc */ }, - [PROG_NAME] = { "Program", {"prog_name", "progname", "prog"}, true /* asc */ }, - [VERDICT] = { "Verdict", {"verdict"}, true /* asc: failure, success */ }, + [FILE_NAME] = { "File", {"file_name", "filename", "file"}, true /* asc */, true /* left */ }, + [PROG_NAME] = { "Program", {"prog_name", "progname", "prog"}, true /* asc */, true /* left */ }, + [VERDICT] = { "Verdict", {"verdict"}, true /* asc: failure, success */, true /* left */ }, [DURATION] = { "Duration (us)", {"duration", "dur"}, }, - [TOTAL_INSNS] = { "Total insns", {"total_insns", "insns"}, }, - [TOTAL_STATES] = { "Total states", {"total_states", "states"}, }, + [TOTAL_INSNS] = { "Insns", {"total_insns", "insns"}, }, + [TOTAL_STATES] = { "States", {"total_states", "states"}, }, [PEAK_STATES] = { "Peak states", {"peak_states"}, }, [MAX_STATES_PER_INSN] = { "Max states per insn", {"max_states_per_insn"}, }, [MARK_READ_MAX_LEN] = { "Max mark read length", {"max_mark_read_len", "mark_read"}, }, }; +static bool parse_stat_id_var(const char *name, size_t len, int *id, enum stat_variant *var) +{ + static const char *var_sfxs[] = { + [VARIANT_A] = "_a", + [VARIANT_B] = "_b", + [VARIANT_DIFF] = "_diff", + [VARIANT_PCT] = "_pct", + }; + int i, j, k; + + for (i = 0; i < ARRAY_SIZE(stat_defs); i++) { + struct stat_def *def = &stat_defs[i]; + size_t alias_len, sfx_len; + const char *alias; + + for (j = 0; j < ARRAY_SIZE(stat_defs[i].names); j++) { + alias = def->names[j]; + if (!alias) + continue; + + alias_len = strlen(alias); + if (strncmp(name, alias, alias_len) != 0) + continue; + + if (alias_len == len) { + /* If no variant suffix is specified, we + * assume control group (just in case we are + * in comparison mode. Variant is ignored in + * non-comparison mode. + */ + *var = VARIANT_B; + *id = i; + return true; + } + + for (k = 0; k < ARRAY_SIZE(var_sfxs); k++) { + sfx_len = strlen(var_sfxs[k]); + if (alias_len + sfx_len != len) + continue; + + if (strncmp(name + alias_len, var_sfxs[k], sfx_len) == 0) { + *var = (enum stat_variant)k; + *id = i; + return true; + } + } + } + } + + return false; +} + +static bool is_asc_sym(char c) +{ + return c == '^'; +} + +static bool is_desc_sym(char c) +{ + return c == 'v' || c == 'V' || c == '.' || c == '!' || c == '_'; +} + static int parse_stat(const char *stat_name, struct stat_specs *specs) { - int id, i; + int id; + bool has_order = false, is_asc = false; + size_t len = strlen(stat_name); + enum stat_variant var; if (specs->spec_cnt >= ARRAY_SIZE(specs->ids)) { fprintf(stderr, "Can't specify more than %zd stats\n", ARRAY_SIZE(specs->ids)); return -E2BIG; } - for (id = 0; id < ARRAY_SIZE(stat_defs); id++) { - struct stat_def *def = &stat_defs[id]; - - for (i = 0; i < ARRAY_SIZE(stat_defs[id].names); i++) { - if (!def->names[i] || strcmp(def->names[i], stat_name) != 0) - continue; - - specs->ids[specs->spec_cnt] = id; - specs->asc[specs->spec_cnt] = def->asc_by_default; - specs->spec_cnt++; + if (len > 1 && (is_asc_sym(stat_name[len - 1]) || is_desc_sym(stat_name[len - 1]))) { + has_order = true; + is_asc = is_asc_sym(stat_name[len - 1]); + len -= 1; + } - return 0; - } + if (!parse_stat_id_var(stat_name, len, &id, &var)) { + fprintf(stderr, "Unrecognized stat name '%s'\n", stat_name); + return -ESRCH; } - fprintf(stderr, "Unrecognized stat name '%s'\n", stat_name); - return -ESRCH; + specs->ids[specs->spec_cnt] = id; + specs->variants[specs->spec_cnt] = var; + specs->asc[specs->spec_cnt] = has_order ? is_asc : stat_defs[id].asc_by_default; + specs->spec_cnt++; + + return 0; } static int parse_stats(const char *stats_str, struct stat_specs *specs) @@ -509,6 +772,28 @@ static int parse_verif_log(char * const buf, size_t buf_sz, struct verif_stats * return 0; } +static void fixup_obj(struct bpf_object *obj) +{ + struct bpf_map *map; + + bpf_object__for_each_map(map, obj) { + /* disable pinning */ + bpf_map__set_pin_path(map, NULL); + + /* fix up map size, if necessary */ + switch (bpf_map__type(map)) { + case BPF_MAP_TYPE_SK_STORAGE: + case BPF_MAP_TYPE_TASK_STORAGE: + case BPF_MAP_TYPE_INODE_STORAGE: + case BPF_MAP_TYPE_CGROUP_STORAGE: + break; + default: + if (bpf_map__max_entries(map) == 0) + bpf_map__set_max_entries(map, 1); + } + } +} + static int process_prog(const char *filename, struct bpf_object *obj, struct bpf_program *prog) { const char *prog_name = bpf_program__name(prog); @@ -518,7 +803,7 @@ static int process_prog(const char *filename, struct bpf_object *obj, struct bpf int err = 0; void *tmp; - if (!should_process_prog(filename, bpf_program__name(prog))) { + if (!should_process_file_prog(basename(filename), bpf_program__name(prog))) { env.progs_skipped++; return 0; } @@ -543,6 +828,9 @@ static int process_prog(const char *filename, struct bpf_object *obj, struct bpf } verif_log_buf[0] = '\0'; + /* increase chances of successful BPF object loading */ + fixup_obj(obj); + err = bpf_object__load(obj); env.progs_processed++; @@ -571,7 +859,7 @@ static int process_obj(const char *filename) LIBBPF_OPTS(bpf_object_open_opts, opts); int err = 0, prog_cnt = 0; - if (!should_process_file(basename(filename))) { + if (!should_process_file_prog(basename(filename), NULL)) { if (env.verbose) printf("Skipping '%s' due to filters...\n", filename); env.files_skipped++; @@ -594,7 +882,7 @@ static int process_obj(const char *filename) * that BPF object file is incomplete and has to be statically * linked into a final BPF object file; instead of bailing * out, report it into stderr, mark it as skipped, and - * proceeed + * proceed */ fprintf(stderr, "Failed to open '%s': %d\n", filename, -errno); env.files_skipped++; @@ -691,7 +979,106 @@ static int cmp_prog_stats(const void *v1, const void *v2) return cmp; } - return 0; + /* always disambiguate with file+prog, which are unique */ + cmp = strcmp(s1->file_name, s2->file_name); + if (cmp != 0) + return cmp; + return strcmp(s1->prog_name, s2->prog_name); +} + +static void fetch_join_stat_value(const struct verif_stats_join *s, + enum stat_id id, enum stat_variant var, + const char **str_val, + double *num_val) +{ + long v1, v2; + + if (id == FILE_NAME) { + *str_val = s->file_name; + return; + } + if (id == PROG_NAME) { + *str_val = s->prog_name; + return; + } + + v1 = s->stats_a ? s->stats_a->stats[id] : 0; + v2 = s->stats_b ? s->stats_b->stats[id] : 0; + + switch (var) { + case VARIANT_A: + if (!s->stats_a) + *num_val = -DBL_MAX; + else + *num_val = s->stats_a->stats[id]; + return; + case VARIANT_B: + if (!s->stats_b) + *num_val = -DBL_MAX; + else + *num_val = s->stats_b->stats[id]; + return; + case VARIANT_DIFF: + if (!s->stats_a || !s->stats_b) + *num_val = -DBL_MAX; + else if (id == VERDICT) + *num_val = v1 == v2 ? 1.0 /* MATCH */ : 0.0 /* MISMATCH */; + else + *num_val = (double)(v2 - v1); + return; + case VARIANT_PCT: + if (!s->stats_a || !s->stats_b) { + *num_val = -DBL_MAX; + } else if (v1 == 0) { + if (v1 == v2) + *num_val = 0.0; + else + *num_val = v2 < v1 ? -100.0 : 100.0; + } else { + *num_val = (v2 - v1) * 100.0 / v1; + } + return; + } +} + +static int cmp_join_stat(const struct verif_stats_join *s1, + const struct verif_stats_join *s2, + enum stat_id id, enum stat_variant var, bool asc) +{ + const char *str1 = NULL, *str2 = NULL; + double v1, v2; + int cmp = 0; + + fetch_join_stat_value(s1, id, var, &str1, &v1); + fetch_join_stat_value(s2, id, var, &str2, &v2); + + if (str1) + cmp = strcmp(str1, str2); + else if (v1 != v2) + cmp = v1 < v2 ? -1 : 1; + + return asc ? cmp : -cmp; +} + +static int cmp_join_stats(const void *v1, const void *v2) +{ + const struct verif_stats_join *s1 = v1, *s2 = v2; + int i, cmp; + + for (i = 0; i < env.sort_spec.spec_cnt; i++) { + cmp = cmp_join_stat(s1, s2, + env.sort_spec.ids[i], + env.sort_spec.variants[i], + env.sort_spec.asc[i]); + if (cmp != 0) + return cmp; + } + + /* always disambiguate with file+prog, which are unique */ + cmp = strcmp(s1->file_name, s2->file_name); + if (cmp != 0) + return cmp; + return strcmp(s1->prog_name, s2->prog_name); } #define HEADER_CHAR '-' @@ -713,6 +1100,7 @@ static void output_header_underlines(void) static void output_headers(enum resfmt fmt) { + const char *fmt_str; int i, len; for (i = 0; i < env.output_spec.spec_cnt; i++) { @@ -726,7 +1114,8 @@ static void output_headers(enum resfmt fmt) *max_len = len; break; case RESFMT_TABLE: - printf("%s%-*s", i == 0 ? "" : COLUMN_SEP, *max_len, stat_defs[id].header); + fmt_str = stat_defs[id].left_aligned ? "%s%-*s" : "%s%*s"; + printf(fmt_str, i == 0 ? "" : COLUMN_SEP, *max_len, stat_defs[id].header); if (i == env.output_spec.spec_cnt - 1) printf("\n"); break; @@ -747,13 +1136,16 @@ static void prepare_value(const struct verif_stats *s, enum stat_id id, { switch (id) { case FILE_NAME: - *str = s->file_name; + *str = s ? s->file_name : "N/A"; break; case PROG_NAME: - *str = s->prog_name; + *str = s ? s->prog_name : "N/A"; break; case VERDICT: - *str = s->stats[VERDICT] ? "success" : "failure"; + if (!s) + *str = "N/A"; + else + *str = s->stats[VERDICT] ? "success" : "failure"; break; case DURATION: case TOTAL_INSNS: @@ -761,7 +1153,7 @@ static void prepare_value(const struct verif_stats *s, enum stat_id id, case PEAK_STATES: case MAX_STATES_PER_INSN: case MARK_READ_MAX_LEN: - *val = s->stats[id]; + *val = s ? s->stats[id] : 0; break; default: fprintf(stderr, "Unrecognized stat #%d\n", id); @@ -816,42 +1208,6 @@ static void output_stats(const struct verif_stats *s, enum resfmt fmt, bool last } } -static int handle_verif_mode(void) -{ - int i, err; - - if (env.filename_cnt == 0) { - fprintf(stderr, "Please provide path to BPF object file!\n"); - argp_help(&argp, stderr, ARGP_HELP_USAGE, "veristat"); - return -EINVAL; - } - - for (i = 0; i < env.filename_cnt; i++) { - err = process_obj(env.filenames[i]); - if (err) { - fprintf(stderr, "Failed to process '%s': %d\n", env.filenames[i], err); - return err; - } - } - - qsort(env.prog_stats, env.prog_stat_cnt, sizeof(*env.prog_stats), cmp_prog_stats); - - if (env.out_fmt == RESFMT_TABLE) { - /* calculate column widths */ - output_headers(RESFMT_TABLE_CALCLEN); - for (i = 0; i < env.prog_stat_cnt; i++) - output_stats(&env.prog_stats[i], RESFMT_TABLE_CALCLEN, false); - } - - /* actually output the table */ - output_headers(env.out_fmt); - for (i = 0; i < env.prog_stat_cnt; i++) { - output_stats(&env.prog_stats[i], env.out_fmt, i == env.prog_stat_cnt - 1); - } - - return 0; -} - static int parse_stat_value(const char *str, enum stat_id id, struct verif_stats *st) { switch (id) { @@ -983,7 +1339,7 @@ static int parse_stats_csv(const char *filename, struct stat_specs *specs, * parsed entire line; if row should be ignored we pretend we * never parsed it */ - if (!should_process_prog(st->file_name, st->prog_name)) { + if (!should_process_file_prog(st->file_name, st->prog_name)) { free(st->file_name); free(st->prog_name); *stat_cntp -= 1; @@ -1072,9 +1428,11 @@ static void output_comp_headers(enum resfmt fmt) output_comp_header_underlines(); } -static void output_comp_stats(const struct verif_stats *base, const struct verif_stats *comp, +static void output_comp_stats(const struct verif_stats_join *join_stats, enum resfmt fmt, bool last) { + const struct verif_stats *base = join_stats->stats_a; + const struct verif_stats *comp = join_stats->stats_b; char base_buf[1024] = {}, comp_buf[1024] = {}, diff_buf[1024] = {}; int i; @@ -1092,28 +1450,44 @@ static void output_comp_stats(const struct verif_stats *base, const struct verif /* normalize all the outputs to be in string buffers for simplicity */ if (is_key_stat(id)) { /* key stats (file and program name) are always strings */ - if (base != &fallback_stats) + if (base) snprintf(base_buf, sizeof(base_buf), "%s", base_str); else snprintf(base_buf, sizeof(base_buf), "%s", comp_str); } else if (base_str) { snprintf(base_buf, sizeof(base_buf), "%s", base_str); snprintf(comp_buf, sizeof(comp_buf), "%s", comp_str); - if (strcmp(base_str, comp_str) == 0) + if (!base || !comp) + snprintf(diff_buf, sizeof(diff_buf), "%s", "N/A"); + else if (strcmp(base_str, comp_str) == 0) snprintf(diff_buf, sizeof(diff_buf), "%s", "MATCH"); else snprintf(diff_buf, sizeof(diff_buf), "%s", "MISMATCH"); } else { - snprintf(base_buf, sizeof(base_buf), "%ld", base_val); - snprintf(comp_buf, sizeof(comp_buf), "%ld", comp_val); + double p = 0.0; + + if (base) + snprintf(base_buf, sizeof(base_buf), "%ld", base_val); + else + snprintf(base_buf, sizeof(base_buf), "%s", "N/A"); + if (comp) + snprintf(comp_buf, sizeof(comp_buf), "%ld", comp_val); + else + snprintf(comp_buf, sizeof(comp_buf), "%s", "N/A"); diff_val = comp_val - base_val; - if (base == &fallback_stats || comp == &fallback_stats || base_val == 0) { - snprintf(diff_buf, sizeof(diff_buf), "%+ld (%+.2lf%%)", - diff_val, comp_val < base_val ? -100.0 : 100.0); + if (!base || !comp) { + snprintf(diff_buf, sizeof(diff_buf), "%s", "N/A"); } else { - snprintf(diff_buf, sizeof(diff_buf), "%+ld (%+.2lf%%)", - diff_val, diff_val * 100.0 / base_val); + if (base_val == 0) { + if (comp_val == base_val) + p = 0.0; /* avoid +0 (+100%) case */ + else + p = comp_val < base_val ? -100.0 : 100.0; + } else { + p = diff_val * 100.0 / base_val; + } + snprintf(diff_buf, sizeof(diff_buf), "%+ld (%+.2lf%%)", diff_val, p); } } @@ -1170,14 +1544,64 @@ static int cmp_stats_key(const struct verif_stats *base, const struct verif_stat return strcmp(base->prog_name, comp->prog_name); } +static bool is_join_stat_filter_matched(struct filter *f, const struct verif_stats_join *stats) +{ + static const double eps = 1e-9; + const char *str = NULL; + double value = 0.0; + + fetch_join_stat_value(stats, f->stat_id, f->stat_var, &str, &value); + + switch (f->op) { + case OP_EQ: return value > f->value - eps && value < f->value + eps; + case OP_NEQ: return value < f->value - eps || value > f->value + eps; + case OP_LT: return value < f->value - eps; + case OP_LE: return value <= f->value + eps; + case OP_GT: return value > f->value + eps; + case OP_GE: return value >= f->value - eps; + } + + fprintf(stderr, "BUG: unknown filter op %d!\n", f->op); + return false; +} + +static bool should_output_join_stats(const struct verif_stats_join *stats) +{ + struct filter *f; + int i, allow_cnt = 0; + + for (i = 0; i < env.deny_filter_cnt; i++) { + f = &env.deny_filters[i]; + if (f->kind != FILTER_STAT) + continue; + + if (is_join_stat_filter_matched(f, stats)) + return false; + } + + for (i = 0; i < env.allow_filter_cnt; i++) { + f = &env.allow_filters[i]; + if (f->kind != FILTER_STAT) + continue; + allow_cnt++; + + if (is_join_stat_filter_matched(f, stats)) + return true; + } + + /* if there are no stat allowed filters, pass everything through */ + return allow_cnt == 0; +} + static int handle_comparison_mode(void) { struct stat_specs base_specs = {}, comp_specs = {}; + struct stat_specs tmp_sort_spec; enum resfmt cur_fmt; - int err, i, j; + int err, i, j, last_idx; if (env.filename_cnt != 2) { - fprintf(stderr, "Comparison mode expects exactly two input CSV files!\n"); + fprintf(stderr, "Comparison mode expects exactly two input CSV files!\n\n"); argp_help(&argp, stderr, ARGP_HELP_USAGE, "veristat"); return -EINVAL; } @@ -1215,31 +1639,26 @@ static int handle_comparison_mode(void) } } + /* Replace user-specified sorting spec with file+prog sorting rule to + * be able to join two datasets correctly. Once we are done, we will + * restore the original sort spec. + */ + tmp_sort_spec = env.sort_spec; + env.sort_spec = join_sort_spec; qsort(env.prog_stats, env.prog_stat_cnt, sizeof(*env.prog_stats), cmp_prog_stats); qsort(env.baseline_stats, env.baseline_stat_cnt, sizeof(*env.baseline_stats), cmp_prog_stats); + env.sort_spec = tmp_sort_spec; - /* for human-readable table output we need to do extra pass to - * calculate column widths, so we substitute current output format - * with RESFMT_TABLE_CALCLEN and later revert it back to RESFMT_TABLE - * and do everything again. - */ - if (env.out_fmt == RESFMT_TABLE) - cur_fmt = RESFMT_TABLE_CALCLEN; - else - cur_fmt = env.out_fmt; - -one_more_time: - output_comp_headers(cur_fmt); - - /* If baseline and comparison datasets have different subset of rows - * (we match by 'object + prog' as a unique key) then assume - * empty/missing/zero value for rows that are missing in the opposite - * data set + /* Join two datasets together. If baseline and comparison datasets + * have different subset of rows (we match by 'object + prog' as + * a unique key) then assume empty/missing/zero value for rows that + * are missing in the opposite data set. */ i = j = 0; while (i < env.baseline_stat_cnt || j < env.prog_stat_cnt) { - bool last = (i == env.baseline_stat_cnt - 1) || (j == env.prog_stat_cnt - 1); const struct verif_stats *base, *comp; + struct verif_stats_join *join; + void *tmp; int r; base = i < env.baseline_stat_cnt ? &env.baseline_stats[i] : &fallback_stats; @@ -1256,18 +1675,64 @@ one_more_time: return -EINVAL; } + tmp = realloc(env.join_stats, (env.join_stat_cnt + 1) * sizeof(*env.join_stats)); + if (!tmp) + return -ENOMEM; + env.join_stats = tmp; + + join = &env.join_stats[env.join_stat_cnt]; + memset(join, 0, sizeof(*join)); + r = cmp_stats_key(base, comp); if (r == 0) { - output_comp_stats(base, comp, cur_fmt, last); + join->file_name = base->file_name; + join->prog_name = base->prog_name; + join->stats_a = base; + join->stats_b = comp; i++; j++; } else if (comp == &fallback_stats || r < 0) { - output_comp_stats(base, &fallback_stats, cur_fmt, last); + join->file_name = base->file_name; + join->prog_name = base->prog_name; + join->stats_a = base; + join->stats_b = NULL; i++; } else { - output_comp_stats(&fallback_stats, comp, cur_fmt, last); + join->file_name = comp->file_name; + join->prog_name = comp->prog_name; + join->stats_a = NULL; + join->stats_b = comp; j++; } + env.join_stat_cnt += 1; + } + + /* now sort joined results accorsing to sort spec */ + qsort(env.join_stats, env.join_stat_cnt, sizeof(*env.join_stats), cmp_join_stats); + + /* for human-readable table output we need to do extra pass to + * calculate column widths, so we substitute current output format + * with RESFMT_TABLE_CALCLEN and later revert it back to RESFMT_TABLE + * and do everything again. + */ + if (env.out_fmt == RESFMT_TABLE) + cur_fmt = RESFMT_TABLE_CALCLEN; + else + cur_fmt = env.out_fmt; + +one_more_time: + output_comp_headers(cur_fmt); + + for (i = 0; i < env.join_stat_cnt; i++) { + const struct verif_stats_join *join = &env.join_stats[i]; + + if (!should_output_join_stats(join)) + continue; + + if (cur_fmt == RESFMT_TABLE_CALCLEN) + last_idx = i; + + output_comp_stats(join, cur_fmt, i == last_idx); } if (cur_fmt == RESFMT_TABLE_CALCLEN) { @@ -1278,6 +1743,128 @@ one_more_time: return 0; } +static bool is_stat_filter_matched(struct filter *f, const struct verif_stats *stats) +{ + long value = stats->stats[f->stat_id]; + + switch (f->op) { + case OP_EQ: return value == f->value; + case OP_NEQ: return value != f->value; + case OP_LT: return value < f->value; + case OP_LE: return value <= f->value; + case OP_GT: return value > f->value; + case OP_GE: return value >= f->value; + } + + fprintf(stderr, "BUG: unknown filter op %d!\n", f->op); + return false; +} + +static bool should_output_stats(const struct verif_stats *stats) +{ + struct filter *f; + int i, allow_cnt = 0; + + for (i = 0; i < env.deny_filter_cnt; i++) { + f = &env.deny_filters[i]; + if (f->kind != FILTER_STAT) + continue; + + if (is_stat_filter_matched(f, stats)) + return false; + } + + for (i = 0; i < env.allow_filter_cnt; i++) { + f = &env.allow_filters[i]; + if (f->kind != FILTER_STAT) + continue; + allow_cnt++; + + if (is_stat_filter_matched(f, stats)) + return true; + } + + /* if there are no stat allowed filters, pass everything through */ + return allow_cnt == 0; +} + +static void output_prog_stats(void) +{ + const struct verif_stats *stats; + int i, last_stat_idx = 0; + + if (env.out_fmt == RESFMT_TABLE) { + /* calculate column widths */ + output_headers(RESFMT_TABLE_CALCLEN); + for (i = 0; i < env.prog_stat_cnt; i++) { + stats = &env.prog_stats[i]; + if (!should_output_stats(stats)) + continue; + output_stats(stats, RESFMT_TABLE_CALCLEN, false); + last_stat_idx = i; + } + } + + /* actually output the table */ + output_headers(env.out_fmt); + for (i = 0; i < env.prog_stat_cnt; i++) { + stats = &env.prog_stats[i]; + if (!should_output_stats(stats)) + continue; + output_stats(stats, env.out_fmt, i == last_stat_idx); + } +} + +static int handle_verif_mode(void) +{ + int i, err; + + if (env.filename_cnt == 0) { + fprintf(stderr, "Please provide path to BPF object file!\n\n"); + argp_help(&argp, stderr, ARGP_HELP_USAGE, "veristat"); + return -EINVAL; + } + + for (i = 0; i < env.filename_cnt; i++) { + err = process_obj(env.filenames[i]); + if (err) { + fprintf(stderr, "Failed to process '%s': %d\n", env.filenames[i], err); + return err; + } + } + + qsort(env.prog_stats, env.prog_stat_cnt, sizeof(*env.prog_stats), cmp_prog_stats); + + output_prog_stats(); + + return 0; +} + +static int handle_replay_mode(void) +{ + struct stat_specs specs = {}; + int err; + + if (env.filename_cnt != 1) { + fprintf(stderr, "Replay mode expects exactly one input CSV file!\n\n"); + argp_help(&argp, stderr, ARGP_HELP_USAGE, "veristat"); + return -EINVAL; + } + + err = parse_stats_csv(env.filenames[0], &specs, + &env.prog_stats, &env.prog_stat_cnt); + if (err) { + fprintf(stderr, "Failed to parse stats from '%s': %d\n", env.filenames[0], err); + return err; + } + + qsort(env.prog_stats, env.prog_stat_cnt, sizeof(*env.prog_stats), cmp_prog_stats); + + output_prog_stats(); + + return 0; +} + int main(int argc, char **argv) { int err = 0, i; @@ -1286,34 +1873,49 @@ int main(int argc, char **argv) return 1; if (env.verbose && env.quiet) { - fprintf(stderr, "Verbose and quiet modes are incompatible, please specify just one or neither!\n"); + fprintf(stderr, "Verbose and quiet modes are incompatible, please specify just one or neither!\n\n"); argp_help(&argp, stderr, ARGP_HELP_USAGE, "veristat"); return 1; } if (env.verbose && env.log_level == 0) env.log_level = 1; - if (env.output_spec.spec_cnt == 0) - env.output_spec = default_output_spec; + if (env.output_spec.spec_cnt == 0) { + if (env.out_fmt == RESFMT_CSV) + env.output_spec = default_csv_output_spec; + else + env.output_spec = default_output_spec; + } if (env.sort_spec.spec_cnt == 0) env.sort_spec = default_sort_spec; + if (env.comparison_mode && env.replay_mode) { + fprintf(stderr, "Can't specify replay and comparison mode at the same time!\n\n"); + argp_help(&argp, stderr, ARGP_HELP_USAGE, "veristat"); + return 1; + } + if (env.comparison_mode) err = handle_comparison_mode(); + else if (env.replay_mode) + err = handle_replay_mode(); else err = handle_verif_mode(); free_verif_stats(env.prog_stats, env.prog_stat_cnt); free_verif_stats(env.baseline_stats, env.baseline_stat_cnt); + free(env.join_stats); for (i = 0; i < env.filename_cnt; i++) free(env.filenames[i]); free(env.filenames); for (i = 0; i < env.allow_filter_cnt; i++) { + free(env.allow_filters[i].any_glob); free(env.allow_filters[i].file_glob); free(env.allow_filters[i].prog_glob); } free(env.allow_filters); for (i = 0; i < env.deny_filter_cnt; i++) { + free(env.deny_filters[i].any_glob); free(env.deny_filters[i].file_glob); free(env.deny_filters[i].prog_glob); } diff --git a/tools/testing/selftests/bpf/vmtest.sh b/tools/testing/selftests/bpf/vmtest.sh index a29aa05ebb3e..685034528018 100755 --- a/tools/testing/selftests/bpf/vmtest.sh +++ b/tools/testing/selftests/bpf/vmtest.sh @@ -13,7 +13,7 @@ s390x) QEMU_BINARY=qemu-system-s390x QEMU_CONSOLE="ttyS1" QEMU_FLAGS=(-smp 2) - BZIMAGE="arch/s390/boot/compressed/vmlinux" + BZIMAGE="arch/s390/boot/vmlinux" ;; x86_64) QEMU_BINARY=qemu-system-x86_64 @@ -21,6 +21,12 @@ x86_64) QEMU_FLAGS=(-cpu host -smp 8) BZIMAGE="arch/x86/boot/bzImage" ;; +aarch64) + QEMU_BINARY=qemu-system-aarch64 + QEMU_CONSOLE="ttyAMA0,115200" + QEMU_FLAGS=(-M virt,gic-version=3 -cpu host -smp 8) + BZIMAGE="arch/arm64/boot/Image" + ;; *) echo "Unsupported architecture" exit 1 diff --git a/tools/testing/selftests/bpf/xdp_features.c b/tools/testing/selftests/bpf/xdp_features.c new file mode 100644 index 000000000000..fce12165213b --- /dev/null +++ b/tools/testing/selftests/bpf/xdp_features.c @@ -0,0 +1,699 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <uapi/linux/bpf.h> +#include <uapi/linux/netdev.h> +#include <linux/if_link.h> +#include <signal.h> +#include <argp.h> +#include <net/if.h> +#include <sys/socket.h> +#include <netinet/in.h> +#include <netinet/tcp.h> +#include <unistd.h> +#include <arpa/inet.h> +#include <bpf/bpf.h> +#include <bpf/libbpf.h> +#include <pthread.h> + +#include <network_helpers.h> + +#include "xdp_features.skel.h" +#include "xdp_features.h" + +#define RED(str) "\033[0;31m" str "\033[0m" +#define GREEN(str) "\033[0;32m" str "\033[0m" +#define YELLOW(str) "\033[0;33m" str "\033[0m" + +static struct env { + bool verbosity; + int ifindex; + bool is_tester; + struct { + enum netdev_xdp_act drv_feature; + enum xdp_action action; + } feature; + struct sockaddr_storage dut_ctrl_addr; + struct sockaddr_storage dut_addr; + struct sockaddr_storage tester_addr; +} env; + +#define BUFSIZE 128 + +void test__fail(void) { /* for network_helpers.c */ } + +static int libbpf_print_fn(enum libbpf_print_level level, + const char *format, va_list args) +{ + if (level == LIBBPF_DEBUG && !env.verbosity) + return 0; + return vfprintf(stderr, format, args); +} + +static volatile bool exiting; + +static void sig_handler(int sig) +{ + exiting = true; +} + +const char *argp_program_version = "xdp-features 0.0"; +const char argp_program_doc[] = +"XDP features detection application.\n" +"\n" +"XDP features application checks the XDP advertised features match detected ones.\n" +"\n" +"USAGE: ./xdp-features [-vt] [-f <xdp-feature>] [-D <dut-data-ip>] [-T <tester-data-ip>] [-C <dut-ctrl-ip>] <iface-name>\n" +"\n" +"dut-data-ip, tester-data-ip, dut-ctrl-ip: IPv6 or IPv4-mapped-IPv6 addresses;\n" +"\n" +"XDP features\n:" +"- XDP_PASS\n" +"- XDP_DROP\n" +"- XDP_ABORTED\n" +"- XDP_REDIRECT\n" +"- XDP_NDO_XMIT\n" +"- XDP_TX\n"; + +static const struct argp_option opts[] = { + { "verbose", 'v', NULL, 0, "Verbose debug output" }, + { "tester", 't', NULL, 0, "Tester mode" }, + { "feature", 'f', "XDP-FEATURE", 0, "XDP feature to test" }, + { "dut_data_ip", 'D', "DUT-DATA-IP", 0, "DUT IP data channel" }, + { "dut_ctrl_ip", 'C', "DUT-CTRL-IP", 0, "DUT IP control channel" }, + { "tester_data_ip", 'T', "TESTER-DATA-IP", 0, "Tester IP data channel" }, + {}, +}; + +static int get_xdp_feature(const char *arg) +{ + if (!strcmp(arg, "XDP_PASS")) { + env.feature.action = XDP_PASS; + env.feature.drv_feature = NETDEV_XDP_ACT_BASIC; + } else if (!strcmp(arg, "XDP_DROP")) { + env.feature.drv_feature = NETDEV_XDP_ACT_BASIC; + env.feature.action = XDP_DROP; + } else if (!strcmp(arg, "XDP_ABORTED")) { + env.feature.drv_feature = NETDEV_XDP_ACT_BASIC; + env.feature.action = XDP_ABORTED; + } else if (!strcmp(arg, "XDP_TX")) { + env.feature.drv_feature = NETDEV_XDP_ACT_BASIC; + env.feature.action = XDP_TX; + } else if (!strcmp(arg, "XDP_REDIRECT")) { + env.feature.drv_feature = NETDEV_XDP_ACT_REDIRECT; + env.feature.action = XDP_REDIRECT; + } else if (!strcmp(arg, "XDP_NDO_XMIT")) { + env.feature.drv_feature = NETDEV_XDP_ACT_NDO_XMIT; + } else { + return -EINVAL; + } + + return 0; +} + +static char *get_xdp_feature_str(void) +{ + switch (env.feature.action) { + case XDP_PASS: + return YELLOW("XDP_PASS"); + case XDP_DROP: + return YELLOW("XDP_DROP"); + case XDP_ABORTED: + return YELLOW("XDP_ABORTED"); + case XDP_TX: + return YELLOW("XDP_TX"); + case XDP_REDIRECT: + return YELLOW("XDP_REDIRECT"); + default: + break; + } + + if (env.feature.drv_feature == NETDEV_XDP_ACT_NDO_XMIT) + return YELLOW("XDP_NDO_XMIT"); + + return ""; +} + +static error_t parse_arg(int key, char *arg, struct argp_state *state) +{ + switch (key) { + case 'v': + env.verbosity = true; + break; + case 't': + env.is_tester = true; + break; + case 'f': + if (get_xdp_feature(arg) < 0) { + fprintf(stderr, "Invalid xdp feature: %s\n", arg); + argp_usage(state); + return ARGP_ERR_UNKNOWN; + } + break; + case 'D': + if (make_sockaddr(AF_INET6, arg, DUT_ECHO_PORT, + &env.dut_addr, NULL)) { + fprintf(stderr, "Invalid DUT address: %s\n", arg); + return ARGP_ERR_UNKNOWN; + } + break; + case 'C': + if (make_sockaddr(AF_INET6, arg, DUT_CTRL_PORT, + &env.dut_ctrl_addr, NULL)) { + fprintf(stderr, "Invalid DUT CTRL address: %s\n", arg); + return ARGP_ERR_UNKNOWN; + } + break; + case 'T': + if (make_sockaddr(AF_INET6, arg, 0, &env.tester_addr, NULL)) { + fprintf(stderr, "Invalid Tester address: %s\n", arg); + return ARGP_ERR_UNKNOWN; + } + break; + case ARGP_KEY_ARG: + errno = 0; + if (strlen(arg) >= IF_NAMESIZE) { + fprintf(stderr, "Invalid device name: %s\n", arg); + argp_usage(state); + return ARGP_ERR_UNKNOWN; + } + + env.ifindex = if_nametoindex(arg); + if (!env.ifindex) + env.ifindex = strtoul(arg, NULL, 0); + if (!env.ifindex) { + fprintf(stderr, + "Bad interface index or name (%d): %s\n", + errno, strerror(errno)); + argp_usage(state); + return ARGP_ERR_UNKNOWN; + } + break; + default: + return ARGP_ERR_UNKNOWN; + } + + return 0; +} + +static const struct argp argp = { + .options = opts, + .parser = parse_arg, + .doc = argp_program_doc, +}; + +static void set_env_default(void) +{ + env.feature.drv_feature = NETDEV_XDP_ACT_NDO_XMIT; + env.feature.action = -EINVAL; + env.ifindex = -ENODEV; + make_sockaddr(AF_INET6, "::ffff:127.0.0.1", DUT_CTRL_PORT, + &env.dut_ctrl_addr, NULL); + make_sockaddr(AF_INET6, "::ffff:127.0.0.1", DUT_ECHO_PORT, + &env.dut_addr, NULL); + make_sockaddr(AF_INET6, "::ffff:127.0.0.1", 0, &env.tester_addr, NULL); +} + +static void *dut_echo_thread(void *arg) +{ + unsigned char buf[sizeof(struct tlv_hdr)]; + int sockfd = *(int *)arg; + + while (!exiting) { + struct tlv_hdr *tlv = (struct tlv_hdr *)buf; + struct sockaddr_storage addr; + socklen_t addrlen; + size_t n; + + n = recvfrom(sockfd, buf, sizeof(buf), MSG_WAITALL, + (struct sockaddr *)&addr, &addrlen); + if (n != ntohs(tlv->len)) + continue; + + if (ntohs(tlv->type) != CMD_ECHO) + continue; + + sendto(sockfd, buf, sizeof(buf), MSG_NOSIGNAL | MSG_CONFIRM, + (struct sockaddr *)&addr, addrlen); + } + + pthread_exit((void *)0); + close(sockfd); + + return NULL; +} + +static int dut_run_echo_thread(pthread_t *t, int *sockfd) +{ + int err; + + sockfd = start_reuseport_server(AF_INET6, SOCK_DGRAM, NULL, + DUT_ECHO_PORT, 0, 1); + if (!sockfd) { + fprintf(stderr, "Failed to create echo socket\n"); + return -errno; + } + + /* start echo channel */ + err = pthread_create(t, NULL, dut_echo_thread, sockfd); + if (err) { + fprintf(stderr, "Failed creating dut_echo thread: %s\n", + strerror(-err)); + free_fds(sockfd, 1); + return -EINVAL; + } + + return 0; +} + +static int dut_attach_xdp_prog(struct xdp_features *skel, int flags) +{ + enum xdp_action action = env.feature.action; + struct bpf_program *prog; + unsigned int key = 0; + int err, fd = 0; + + if (env.feature.drv_feature == NETDEV_XDP_ACT_NDO_XMIT) { + struct bpf_devmap_val entry = { + .ifindex = env.ifindex, + }; + + err = bpf_map__update_elem(skel->maps.dev_map, + &key, sizeof(key), + &entry, sizeof(entry), 0); + if (err < 0) + return err; + + fd = bpf_program__fd(skel->progs.xdp_do_redirect_cpumap); + action = XDP_REDIRECT; + } + + switch (action) { + case XDP_TX: + prog = skel->progs.xdp_do_tx; + break; + case XDP_DROP: + prog = skel->progs.xdp_do_drop; + break; + case XDP_ABORTED: + prog = skel->progs.xdp_do_aborted; + break; + case XDP_PASS: + prog = skel->progs.xdp_do_pass; + break; + case XDP_REDIRECT: { + struct bpf_cpumap_val entry = { + .qsize = 2048, + .bpf_prog.fd = fd, + }; + + err = bpf_map__update_elem(skel->maps.cpu_map, + &key, sizeof(key), + &entry, sizeof(entry), 0); + if (err < 0) + return err; + + prog = skel->progs.xdp_do_redirect; + break; + } + default: + return -EINVAL; + } + + err = bpf_xdp_attach(env.ifindex, bpf_program__fd(prog), flags, NULL); + if (err) + fprintf(stderr, + "Failed to attach XDP program to ifindex %d\n", + env.ifindex); + return err; +} + +static int recv_msg(int sockfd, void *buf, size_t bufsize, void *val, + size_t val_size) +{ + struct tlv_hdr *tlv = (struct tlv_hdr *)buf; + size_t len; + + len = recv(sockfd, buf, bufsize, 0); + if (len != ntohs(tlv->len) || len < sizeof(*tlv)) + return -EINVAL; + + if (val) { + len -= sizeof(*tlv); + if (len > val_size) + return -ENOMEM; + + memcpy(val, tlv->data, len); + } + + return 0; +} + +static int dut_run(struct xdp_features *skel) +{ + int flags = XDP_FLAGS_UPDATE_IF_NOEXIST | XDP_FLAGS_DRV_MODE; + int state, err, *sockfd, ctrl_sockfd, echo_sockfd; + struct sockaddr_storage ctrl_addr; + pthread_t dut_thread; + socklen_t addrlen; + + sockfd = start_reuseport_server(AF_INET6, SOCK_STREAM, NULL, + DUT_CTRL_PORT, 0, 1); + if (!sockfd) { + fprintf(stderr, "Failed to create DUT socket\n"); + return -errno; + } + + ctrl_sockfd = accept(*sockfd, (struct sockaddr *)&ctrl_addr, &addrlen); + if (ctrl_sockfd < 0) { + fprintf(stderr, "Failed to accept connection on DUT socket\n"); + free_fds(sockfd, 1); + return -errno; + } + + /* CTRL loop */ + while (!exiting) { + unsigned char buf[BUFSIZE] = {}; + struct tlv_hdr *tlv = (struct tlv_hdr *)buf; + + err = recv_msg(ctrl_sockfd, buf, BUFSIZE, NULL, 0); + if (err) + continue; + + switch (ntohs(tlv->type)) { + case CMD_START: { + if (state == CMD_START) + continue; + + state = CMD_START; + /* Load the XDP program on the DUT */ + err = dut_attach_xdp_prog(skel, flags); + if (err) + goto out; + + err = dut_run_echo_thread(&dut_thread, &echo_sockfd); + if (err < 0) + goto out; + + tlv->type = htons(CMD_ACK); + tlv->len = htons(sizeof(*tlv)); + err = send(ctrl_sockfd, buf, sizeof(*tlv), 0); + if (err < 0) + goto end_thread; + break; + } + case CMD_STOP: + if (state != CMD_START) + break; + + state = CMD_STOP; + + exiting = true; + bpf_xdp_detach(env.ifindex, flags, NULL); + + tlv->type = htons(CMD_ACK); + tlv->len = htons(sizeof(*tlv)); + err = send(ctrl_sockfd, buf, sizeof(*tlv), 0); + goto end_thread; + case CMD_GET_XDP_CAP: { + LIBBPF_OPTS(bpf_xdp_query_opts, opts); + unsigned long long val; + size_t n; + + err = bpf_xdp_query(env.ifindex, XDP_FLAGS_DRV_MODE, + &opts); + if (err) { + fprintf(stderr, + "Failed to query XDP cap for ifindex %d\n", + env.ifindex); + goto end_thread; + } + + tlv->type = htons(CMD_ACK); + n = sizeof(*tlv) + sizeof(opts.feature_flags); + tlv->len = htons(n); + + val = htobe64(opts.feature_flags); + memcpy(tlv->data, &val, sizeof(val)); + + err = send(ctrl_sockfd, buf, n, 0); + if (err < 0) + goto end_thread; + break; + } + case CMD_GET_STATS: { + unsigned int key = 0, val; + size_t n; + + err = bpf_map__lookup_elem(skel->maps.dut_stats, + &key, sizeof(key), + &val, sizeof(val), 0); + if (err) { + fprintf(stderr, "bpf_map_lookup_elem failed\n"); + goto end_thread; + } + + tlv->type = htons(CMD_ACK); + n = sizeof(*tlv) + sizeof(val); + tlv->len = htons(n); + + val = htonl(val); + memcpy(tlv->data, &val, sizeof(val)); + + err = send(ctrl_sockfd, buf, n, 0); + if (err < 0) + goto end_thread; + break; + } + default: + break; + } + } + +end_thread: + pthread_join(dut_thread, NULL); +out: + bpf_xdp_detach(env.ifindex, flags, NULL); + close(ctrl_sockfd); + free_fds(sockfd, 1); + + return err; +} + +static bool tester_collect_detected_cap(struct xdp_features *skel, + unsigned int dut_stats) +{ + unsigned int err, key = 0, val; + + if (!dut_stats) + return false; + + err = bpf_map__lookup_elem(skel->maps.stats, &key, sizeof(key), + &val, sizeof(val), 0); + if (err) { + fprintf(stderr, "bpf_map_lookup_elem failed\n"); + return false; + } + + switch (env.feature.action) { + case XDP_PASS: + case XDP_TX: + case XDP_REDIRECT: + return val > 0; + case XDP_DROP: + case XDP_ABORTED: + return val == 0; + default: + break; + } + + if (env.feature.drv_feature == NETDEV_XDP_ACT_NDO_XMIT) + return val > 0; + + return false; +} + +static int send_and_recv_msg(int sockfd, enum test_commands cmd, void *val, + size_t val_size) +{ + unsigned char buf[BUFSIZE] = {}; + struct tlv_hdr *tlv = (struct tlv_hdr *)buf; + int err; + + tlv->type = htons(cmd); + tlv->len = htons(sizeof(*tlv)); + + err = send(sockfd, buf, sizeof(*tlv), 0); + if (err < 0) + return err; + + err = recv_msg(sockfd, buf, BUFSIZE, val, val_size); + if (err < 0) + return err; + + return ntohs(tlv->type) == CMD_ACK ? 0 : -EINVAL; +} + +static int send_echo_msg(void) +{ + unsigned char buf[sizeof(struct tlv_hdr)]; + struct tlv_hdr *tlv = (struct tlv_hdr *)buf; + int sockfd, n; + + sockfd = socket(AF_INET6, SOCK_DGRAM, 0); + if (sockfd < 0) { + fprintf(stderr, "Failed to create echo socket\n"); + return -errno; + } + + tlv->type = htons(CMD_ECHO); + tlv->len = htons(sizeof(*tlv)); + + n = sendto(sockfd, buf, sizeof(*tlv), MSG_NOSIGNAL | MSG_CONFIRM, + (struct sockaddr *)&env.dut_addr, sizeof(env.dut_addr)); + close(sockfd); + + return n == ntohs(tlv->len) ? 0 : -EINVAL; +} + +static int tester_run(struct xdp_features *skel) +{ + int flags = XDP_FLAGS_UPDATE_IF_NOEXIST | XDP_FLAGS_DRV_MODE; + unsigned long long advertised_feature; + struct bpf_program *prog; + unsigned int stats; + int i, err, sockfd; + bool detected_cap; + + sockfd = socket(AF_INET6, SOCK_STREAM, 0); + if (sockfd < 0) { + fprintf(stderr, "Failed to create tester socket\n"); + return -errno; + } + + if (settimeo(sockfd, 1000) < 0) + return -EINVAL; + + err = connect(sockfd, (struct sockaddr *)&env.dut_ctrl_addr, + sizeof(env.dut_ctrl_addr)); + if (err) { + fprintf(stderr, "Failed to connect to the DUT\n"); + return -errno; + } + + err = send_and_recv_msg(sockfd, CMD_GET_XDP_CAP, &advertised_feature, + sizeof(advertised_feature)); + if (err < 0) { + close(sockfd); + return err; + } + + advertised_feature = be64toh(advertised_feature); + + if (env.feature.drv_feature == NETDEV_XDP_ACT_NDO_XMIT || + env.feature.action == XDP_TX) + prog = skel->progs.xdp_tester_check_tx; + else + prog = skel->progs.xdp_tester_check_rx; + + err = bpf_xdp_attach(env.ifindex, bpf_program__fd(prog), flags, NULL); + if (err) { + fprintf(stderr, "Failed to attach XDP program to ifindex %d\n", + env.ifindex); + goto out; + } + + err = send_and_recv_msg(sockfd, CMD_START, NULL, 0); + if (err) + goto out; + + for (i = 0; i < 10 && !exiting; i++) { + err = send_echo_msg(); + if (err < 0) + goto out; + + sleep(1); + } + + err = send_and_recv_msg(sockfd, CMD_GET_STATS, &stats, sizeof(stats)); + if (err) + goto out; + + /* stop the test */ + err = send_and_recv_msg(sockfd, CMD_STOP, NULL, 0); + /* send a new echo message to wake echo thread of the dut */ + send_echo_msg(); + + detected_cap = tester_collect_detected_cap(skel, ntohl(stats)); + + fprintf(stdout, "Feature %s: [%s][%s]\n", get_xdp_feature_str(), + detected_cap ? GREEN("DETECTED") : RED("NOT DETECTED"), + env.feature.drv_feature & advertised_feature ? GREEN("ADVERTISED") + : RED("NOT ADVERTISED")); +out: + bpf_xdp_detach(env.ifindex, flags, NULL); + close(sockfd); + return err < 0 ? err : 0; +} + +int main(int argc, char **argv) +{ + struct xdp_features *skel; + int err; + + libbpf_set_strict_mode(LIBBPF_STRICT_ALL); + libbpf_set_print(libbpf_print_fn); + + signal(SIGINT, sig_handler); + signal(SIGTERM, sig_handler); + + set_env_default(); + + /* Parse command line arguments */ + err = argp_parse(&argp, argc, argv, 0, NULL, NULL); + if (err) + return err; + + if (env.ifindex < 0) { + fprintf(stderr, "Invalid ifindex\n"); + return -ENODEV; + } + + /* Load and verify BPF application */ + skel = xdp_features__open(); + if (!skel) { + fprintf(stderr, "Failed to open and load BPF skeleton\n"); + return -EINVAL; + } + + skel->rodata->tester_addr = + ((struct sockaddr_in6 *)&env.tester_addr)->sin6_addr; + skel->rodata->dut_addr = + ((struct sockaddr_in6 *)&env.dut_addr)->sin6_addr; + + /* Load & verify BPF programs */ + err = xdp_features__load(skel); + if (err) { + fprintf(stderr, "Failed to load and verify BPF skeleton\n"); + goto cleanup; + } + + err = xdp_features__attach(skel); + if (err) { + fprintf(stderr, "Failed to attach BPF skeleton\n"); + goto cleanup; + } + + if (env.is_tester) { + /* Tester */ + fprintf(stdout, "Starting tester on device %d\n", env.ifindex); + err = tester_run(skel); + } else { + /* DUT */ + fprintf(stdout, "Starting DUT on device %d\n", env.ifindex); + err = dut_run(skel); + } + +cleanup: + xdp_features__destroy(skel); + + return err < 0 ? -err : 0; +} diff --git a/tools/testing/selftests/bpf/xdp_features.h b/tools/testing/selftests/bpf/xdp_features.h new file mode 100644 index 000000000000..2670c541713b --- /dev/null +++ b/tools/testing/selftests/bpf/xdp_features.h @@ -0,0 +1,20 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +/* test commands */ +enum test_commands { + CMD_STOP, /* CMD */ + CMD_START, /* CMD */ + CMD_ECHO, /* CMD */ + CMD_ACK, /* CMD + data */ + CMD_GET_XDP_CAP, /* CMD */ + CMD_GET_STATS, /* CMD */ +}; + +#define DUT_CTRL_PORT 12345 +#define DUT_ECHO_PORT 12346 + +struct tlv_hdr { + __be16 type; + __be16 len; + __u8 data[]; +}; diff --git a/tools/testing/selftests/bpf/xdp_hw_metadata.c b/tools/testing/selftests/bpf/xdp_hw_metadata.c new file mode 100644 index 000000000000..1c8acb68b977 --- /dev/null +++ b/tools/testing/selftests/bpf/xdp_hw_metadata.c @@ -0,0 +1,445 @@ +// SPDX-License-Identifier: GPL-2.0 + +/* Reference program for verifying XDP metadata on real HW. Functional test + * only, doesn't test the performance. + * + * RX: + * - UDP 9091 packets are diverted into AF_XDP + * - Metadata verified: + * - rx_timestamp + * - rx_hash + * + * TX: + * - TBD + */ + +#include <test_progs.h> +#include <network_helpers.h> +#include "xdp_hw_metadata.skel.h" +#include "xsk.h" + +#include <error.h> +#include <linux/errqueue.h> +#include <linux/if_link.h> +#include <linux/net_tstamp.h> +#include <linux/udp.h> +#include <linux/sockios.h> +#include <sys/mman.h> +#include <net/if.h> +#include <poll.h> + +#include "xdp_metadata.h" + +#define UMEM_NUM 16 +#define UMEM_FRAME_SIZE XSK_UMEM__DEFAULT_FRAME_SIZE +#define UMEM_SIZE (UMEM_FRAME_SIZE * UMEM_NUM) +#define XDP_FLAGS (XDP_FLAGS_DRV_MODE | XDP_FLAGS_REPLACE) + +struct xsk { + void *umem_area; + struct xsk_umem *umem; + struct xsk_ring_prod fill; + struct xsk_ring_cons comp; + struct xsk_ring_prod tx; + struct xsk_ring_cons rx; + struct xsk_socket *socket; +}; + +struct xdp_hw_metadata *bpf_obj; +struct xsk *rx_xsk; +const char *ifname; +int ifindex; +int rxq; + +void test__fail(void) { /* for network_helpers.c */ } + +static int open_xsk(int ifindex, struct xsk *xsk, __u32 queue_id) +{ + int mmap_flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE; + const struct xsk_socket_config socket_config = { + .rx_size = XSK_RING_PROD__DEFAULT_NUM_DESCS, + .tx_size = XSK_RING_PROD__DEFAULT_NUM_DESCS, + .bind_flags = XDP_COPY, + }; + const struct xsk_umem_config umem_config = { + .fill_size = XSK_RING_PROD__DEFAULT_NUM_DESCS, + .comp_size = XSK_RING_CONS__DEFAULT_NUM_DESCS, + .frame_size = XSK_UMEM__DEFAULT_FRAME_SIZE, + .flags = XDP_UMEM_UNALIGNED_CHUNK_FLAG, + }; + __u32 idx; + u64 addr; + int ret; + int i; + + xsk->umem_area = mmap(NULL, UMEM_SIZE, PROT_READ | PROT_WRITE, mmap_flags, -1, 0); + if (xsk->umem_area == MAP_FAILED) + return -ENOMEM; + + ret = xsk_umem__create(&xsk->umem, + xsk->umem_area, UMEM_SIZE, + &xsk->fill, + &xsk->comp, + &umem_config); + if (ret) + return ret; + + ret = xsk_socket__create(&xsk->socket, ifindex, queue_id, + xsk->umem, + &xsk->rx, + &xsk->tx, + &socket_config); + if (ret) + return ret; + + /* First half of umem is for TX. This way address matches 1-to-1 + * to the completion queue index. + */ + + for (i = 0; i < UMEM_NUM / 2; i++) { + addr = i * UMEM_FRAME_SIZE; + printf("%p: tx_desc[%d] -> %lx\n", xsk, i, addr); + } + + /* Second half of umem is for RX. */ + + ret = xsk_ring_prod__reserve(&xsk->fill, UMEM_NUM / 2, &idx); + for (i = 0; i < UMEM_NUM / 2; i++) { + addr = (UMEM_NUM / 2 + i) * UMEM_FRAME_SIZE; + printf("%p: rx_desc[%d] -> %lx\n", xsk, i, addr); + *xsk_ring_prod__fill_addr(&xsk->fill, i) = addr; + } + xsk_ring_prod__submit(&xsk->fill, ret); + + return 0; +} + +static void close_xsk(struct xsk *xsk) +{ + if (xsk->umem) + xsk_umem__delete(xsk->umem); + if (xsk->socket) + xsk_socket__delete(xsk->socket); + munmap(xsk->umem_area, UMEM_SIZE); +} + +static void refill_rx(struct xsk *xsk, __u64 addr) +{ + __u32 idx; + + if (xsk_ring_prod__reserve(&xsk->fill, 1, &idx) == 1) { + printf("%p: complete idx=%u addr=%llx\n", xsk, idx, addr); + *xsk_ring_prod__fill_addr(&xsk->fill, idx) = addr; + xsk_ring_prod__submit(&xsk->fill, 1); + } +} + +static void verify_xdp_metadata(void *data) +{ + struct xdp_meta *meta; + + meta = data - sizeof(*meta); + + printf("rx_timestamp: %llu\n", meta->rx_timestamp); + printf("rx_hash: %u\n", meta->rx_hash); +} + +static void verify_skb_metadata(int fd) +{ + char cmsg_buf[1024]; + char packet_buf[128]; + + struct scm_timestamping *ts; + struct iovec packet_iov; + struct cmsghdr *cmsg; + struct msghdr hdr; + + memset(&hdr, 0, sizeof(hdr)); + hdr.msg_iov = &packet_iov; + hdr.msg_iovlen = 1; + packet_iov.iov_base = packet_buf; + packet_iov.iov_len = sizeof(packet_buf); + + hdr.msg_control = cmsg_buf; + hdr.msg_controllen = sizeof(cmsg_buf); + + if (recvmsg(fd, &hdr, 0) < 0) + error(1, errno, "recvmsg"); + + for (cmsg = CMSG_FIRSTHDR(&hdr); cmsg != NULL; + cmsg = CMSG_NXTHDR(&hdr, cmsg)) { + + if (cmsg->cmsg_level != SOL_SOCKET) + continue; + + switch (cmsg->cmsg_type) { + case SCM_TIMESTAMPING: + ts = (struct scm_timestamping *)CMSG_DATA(cmsg); + if (ts->ts[2].tv_sec || ts->ts[2].tv_nsec) { + printf("found skb hwtstamp = %lu.%lu\n", + ts->ts[2].tv_sec, ts->ts[2].tv_nsec); + return; + } + break; + default: + break; + } + } + + printf("skb hwtstamp is not found!\n"); +} + +static int verify_metadata(struct xsk *rx_xsk, int rxq, int server_fd) +{ + const struct xdp_desc *rx_desc; + struct pollfd fds[rxq + 1]; + __u64 comp_addr; + __u64 addr; + __u32 idx; + int ret; + int i; + + for (i = 0; i < rxq; i++) { + fds[i].fd = xsk_socket__fd(rx_xsk[i].socket); + fds[i].events = POLLIN; + fds[i].revents = 0; + } + + fds[rxq].fd = server_fd; + fds[rxq].events = POLLIN; + fds[rxq].revents = 0; + + while (true) { + errno = 0; + ret = poll(fds, rxq + 1, 1000); + printf("poll: %d (%d)\n", ret, errno); + if (ret < 0) + break; + if (ret == 0) + continue; + + if (fds[rxq].revents) + verify_skb_metadata(server_fd); + + for (i = 0; i < rxq; i++) { + if (fds[i].revents == 0) + continue; + + struct xsk *xsk = &rx_xsk[i]; + + ret = xsk_ring_cons__peek(&xsk->rx, 1, &idx); + printf("xsk_ring_cons__peek: %d\n", ret); + if (ret != 1) + continue; + + rx_desc = xsk_ring_cons__rx_desc(&xsk->rx, idx); + comp_addr = xsk_umem__extract_addr(rx_desc->addr); + addr = xsk_umem__add_offset_to_addr(rx_desc->addr); + printf("%p: rx_desc[%u]->addr=%llx addr=%llx comp_addr=%llx\n", + xsk, idx, rx_desc->addr, addr, comp_addr); + verify_xdp_metadata(xsk_umem__get_data(xsk->umem_area, addr)); + xsk_ring_cons__release(&xsk->rx, 1); + refill_rx(xsk, comp_addr); + } + } + + return 0; +} + +struct ethtool_channels { + __u32 cmd; + __u32 max_rx; + __u32 max_tx; + __u32 max_other; + __u32 max_combined; + __u32 rx_count; + __u32 tx_count; + __u32 other_count; + __u32 combined_count; +}; + +#define ETHTOOL_GCHANNELS 0x0000003c /* Get no of channels */ + +static int rxq_num(const char *ifname) +{ + struct ethtool_channels ch = { + .cmd = ETHTOOL_GCHANNELS, + }; + + struct ifreq ifr = { + .ifr_data = (void *)&ch, + }; + strncpy(ifr.ifr_name, ifname, IF_NAMESIZE - 1); + int fd, ret; + + fd = socket(AF_UNIX, SOCK_DGRAM, 0); + if (fd < 0) + error(1, errno, "socket"); + + ret = ioctl(fd, SIOCETHTOOL, &ifr); + if (ret < 0) + error(1, errno, "ioctl(SIOCETHTOOL)"); + + close(fd); + + return ch.rx_count + ch.combined_count; +} + +static void hwtstamp_ioctl(int op, const char *ifname, struct hwtstamp_config *cfg) +{ + struct ifreq ifr = { + .ifr_data = (void *)cfg, + }; + strncpy(ifr.ifr_name, ifname, IF_NAMESIZE - 1); + int fd, ret; + + fd = socket(AF_UNIX, SOCK_DGRAM, 0); + if (fd < 0) + error(1, errno, "socket"); + + ret = ioctl(fd, op, &ifr); + if (ret < 0) + error(1, errno, "ioctl(%d)", op); + + close(fd); +} + +static struct hwtstamp_config saved_hwtstamp_cfg; +static const char *saved_hwtstamp_ifname; + +static void hwtstamp_restore(void) +{ + hwtstamp_ioctl(SIOCSHWTSTAMP, saved_hwtstamp_ifname, &saved_hwtstamp_cfg); +} + +static void hwtstamp_enable(const char *ifname) +{ + struct hwtstamp_config cfg = { + .rx_filter = HWTSTAMP_FILTER_ALL, + }; + + hwtstamp_ioctl(SIOCGHWTSTAMP, ifname, &saved_hwtstamp_cfg); + saved_hwtstamp_ifname = strdup(ifname); + atexit(hwtstamp_restore); + + hwtstamp_ioctl(SIOCSHWTSTAMP, ifname, &cfg); +} + +static void cleanup(void) +{ + LIBBPF_OPTS(bpf_xdp_attach_opts, opts); + int ret; + int i; + + if (bpf_obj) { + opts.old_prog_fd = bpf_program__fd(bpf_obj->progs.rx); + if (opts.old_prog_fd >= 0) { + printf("detaching bpf program....\n"); + ret = bpf_xdp_detach(ifindex, XDP_FLAGS, &opts); + if (ret) + printf("failed to detach XDP program: %d\n", ret); + } + } + + for (i = 0; i < rxq; i++) + close_xsk(&rx_xsk[i]); + + if (bpf_obj) + xdp_hw_metadata__destroy(bpf_obj); +} + +static void handle_signal(int sig) +{ + /* interrupting poll() is all we need */ +} + +static void timestamping_enable(int fd, int val) +{ + int ret; + + ret = setsockopt(fd, SOL_SOCKET, SO_TIMESTAMPING, &val, sizeof(val)); + if (ret < 0) + error(1, errno, "setsockopt(SO_TIMESTAMPING)"); +} + +int main(int argc, char *argv[]) +{ + int server_fd = -1; + int ret; + int i; + + struct bpf_program *prog; + + if (argc != 2) { + fprintf(stderr, "pass device name\n"); + return -1; + } + + ifname = argv[1]; + ifindex = if_nametoindex(ifname); + rxq = rxq_num(ifname); + + printf("rxq: %d\n", rxq); + + hwtstamp_enable(ifname); + + rx_xsk = malloc(sizeof(struct xsk) * rxq); + if (!rx_xsk) + error(1, ENOMEM, "malloc"); + + for (i = 0; i < rxq; i++) { + printf("open_xsk(%s, %p, %d)\n", ifname, &rx_xsk[i], i); + ret = open_xsk(ifindex, &rx_xsk[i], i); + if (ret) + error(1, -ret, "open_xsk"); + + printf("xsk_socket__fd() -> %d\n", xsk_socket__fd(rx_xsk[i].socket)); + } + + printf("open bpf program...\n"); + bpf_obj = xdp_hw_metadata__open(); + if (libbpf_get_error(bpf_obj)) + error(1, libbpf_get_error(bpf_obj), "xdp_hw_metadata__open"); + + prog = bpf_object__find_program_by_name(bpf_obj->obj, "rx"); + bpf_program__set_ifindex(prog, ifindex); + bpf_program__set_flags(prog, BPF_F_XDP_DEV_BOUND_ONLY); + + printf("load bpf program...\n"); + ret = xdp_hw_metadata__load(bpf_obj); + if (ret) + error(1, -ret, "xdp_hw_metadata__load"); + + printf("prepare skb endpoint...\n"); + server_fd = start_server(AF_INET6, SOCK_DGRAM, NULL, 9092, 1000); + if (server_fd < 0) + error(1, errno, "start_server"); + timestamping_enable(server_fd, + SOF_TIMESTAMPING_SOFTWARE | + SOF_TIMESTAMPING_RAW_HARDWARE); + + printf("prepare xsk map...\n"); + for (i = 0; i < rxq; i++) { + int sock_fd = xsk_socket__fd(rx_xsk[i].socket); + __u32 queue_id = i; + + printf("map[%d] = %d\n", queue_id, sock_fd); + ret = bpf_map_update_elem(bpf_map__fd(bpf_obj->maps.xsk), &queue_id, &sock_fd, 0); + if (ret) + error(1, -ret, "bpf_map_update_elem"); + } + + printf("attach bpf program...\n"); + ret = bpf_xdp_attach(ifindex, + bpf_program__fd(bpf_obj->progs.rx), + XDP_FLAGS, NULL); + if (ret) + error(1, -ret, "bpf_xdp_attach"); + + signal(SIGINT, handle_signal); + ret = verify_metadata(rx_xsk, rxq, server_fd); + close(server_fd); + cleanup(); + if (ret) + error(1, -ret, "verify_metadata"); +} diff --git a/tools/testing/selftests/bpf/xdp_metadata.h b/tools/testing/selftests/bpf/xdp_metadata.h new file mode 100644 index 000000000000..f6780fbb0a21 --- /dev/null +++ b/tools/testing/selftests/bpf/xdp_metadata.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#pragma once + +#ifndef ETH_P_IP +#define ETH_P_IP 0x0800 +#endif + +#ifndef ETH_P_IPV6 +#define ETH_P_IPV6 0x86DD +#endif + +struct xdp_meta { + __u64 rx_timestamp; + __u32 rx_hash; +}; diff --git a/tools/testing/selftests/bpf/xdp_synproxy.c b/tools/testing/selftests/bpf/xdp_synproxy.c index ff35320d2be9..ce68c342b56f 100644 --- a/tools/testing/selftests/bpf/xdp_synproxy.c +++ b/tools/testing/selftests/bpf/xdp_synproxy.c @@ -104,7 +104,8 @@ static void parse_options(int argc, char *argv[], unsigned int *ifindex, __u32 * { "tc", no_argument, NULL, 'c' }, { NULL, 0, NULL, 0 }, }; - unsigned long mss4, mss6, wscale, ttl; + unsigned long mss4, wscale, ttl; + unsigned long long mss6; unsigned int tcpipopts_mask = 0; if (argc < 2) @@ -115,6 +116,7 @@ static void parse_options(int argc, char *argv[], unsigned int *ifindex, __u32 * *tcpipopts = 0; *ports = NULL; *single = false; + *tc = false; while (true) { int opt; @@ -215,9 +217,10 @@ static int syncookie_attach(const char *argv0, unsigned int ifindex, bool tc) prog_fd = bpf_program__fd(prog); - err = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len); + err = bpf_prog_get_info_by_fd(prog_fd, &info, &info_len); if (err < 0) { - fprintf(stderr, "Error: bpf_obj_get_info_by_fd: %s\n", strerror(-err)); + fprintf(stderr, "Error: bpf_prog_get_info_by_fd: %s\n", + strerror(-err)); goto out; } attached_tc = tc; @@ -286,13 +289,14 @@ static int syncookie_open_bpf_maps(__u32 prog_id, int *values_map_fd, int *ports prog_info = (struct bpf_prog_info) { .nr_map_ids = 8, - .map_ids = (__u64)map_ids, + .map_ids = (__u64)(unsigned long)map_ids, }; info_len = sizeof(prog_info); - err = bpf_obj_get_info_by_fd(prog_fd, &prog_info, &info_len); + err = bpf_prog_get_info_by_fd(prog_fd, &prog_info, &info_len); if (err != 0) { - fprintf(stderr, "Error: bpf_obj_get_info_by_fd: %s\n", strerror(-err)); + fprintf(stderr, "Error: bpf_prog_get_info_by_fd: %s\n", + strerror(-err)); goto out; } @@ -315,9 +319,10 @@ static int syncookie_open_bpf_maps(__u32 prog_id, int *values_map_fd, int *ports map_fd = err; info_len = sizeof(map_info); - err = bpf_obj_get_info_by_fd(map_fd, &map_info, &info_len); + err = bpf_map_get_info_by_fd(map_fd, &map_info, &info_len); if (err != 0) { - fprintf(stderr, "Error: bpf_obj_get_info_by_fd: %s\n", strerror(-err)); + fprintf(stderr, "Error: bpf_map_get_info_by_fd: %s\n", + strerror(-err)); close(map_fd); goto err_close_map_fds; } diff --git a/tools/testing/selftests/bpf/xsk.c b/tools/testing/selftests/bpf/xsk.c index 0b3ff49c740d..687d83e707f8 100644 --- a/tools/testing/selftests/bpf/xsk.c +++ b/tools/testing/selftests/bpf/xsk.c @@ -33,6 +33,7 @@ #include <bpf/bpf.h> #include <bpf/libbpf.h> #include "xsk.h" +#include "bpf_util.h" #ifndef SOL_XDP #define SOL_XDP 283 @@ -48,10 +49,7 @@ #define pr_warn(fmt, ...) fprintf(stderr, fmt, ##__VA_ARGS__) -enum xsk_prog { - XSK_PROG_FALLBACK, - XSK_PROG_REDIRECT_FLAGS, -}; +#define XSKMAP_SIZE 1 struct xsk_umem { struct xsk_ring_prod *fill_save; @@ -73,43 +71,16 @@ struct xsk_ctx { int refcount; int ifindex; struct list_head list; - int prog_fd; - int link_fd; - int xsks_map_fd; - char ifname[IFNAMSIZ]; - bool has_bpf_link; }; struct xsk_socket { struct xsk_ring_cons *rx; struct xsk_ring_prod *tx; - __u64 outstanding_tx; struct xsk_ctx *ctx; struct xsk_socket_config config; int fd; }; -struct xsk_nl_info { - bool xdp_prog_attached; - int ifindex; - int fd; -}; - -/* Up until and including Linux 5.3 */ -struct xdp_ring_offset_v1 { - __u64 producer; - __u64 consumer; - __u64 desc; -}; - -/* Up until and including Linux 5.3 */ -struct xdp_mmap_offsets_v1 { - struct xdp_ring_offset_v1 rx; - struct xdp_ring_offset_v1 tx; - struct xdp_ring_offset_v1 fr; - struct xdp_ring_offset_v1 cr; -}; - int xsk_umem__fd(const struct xsk_umem *umem) { return umem ? umem->fd : -EINVAL; @@ -152,55 +123,17 @@ static int xsk_set_xdp_socket_config(struct xsk_socket_config *cfg, if (!usr_cfg) { cfg->rx_size = XSK_RING_CONS__DEFAULT_NUM_DESCS; cfg->tx_size = XSK_RING_PROD__DEFAULT_NUM_DESCS; - cfg->libbpf_flags = 0; - cfg->xdp_flags = 0; cfg->bind_flags = 0; return 0; } - if (usr_cfg->libbpf_flags & ~XSK_LIBBPF_FLAGS__INHIBIT_PROG_LOAD) - return -EINVAL; - cfg->rx_size = usr_cfg->rx_size; cfg->tx_size = usr_cfg->tx_size; - cfg->libbpf_flags = usr_cfg->libbpf_flags; - cfg->xdp_flags = usr_cfg->xdp_flags; cfg->bind_flags = usr_cfg->bind_flags; return 0; } -static void xsk_mmap_offsets_v1(struct xdp_mmap_offsets *off) -{ - struct xdp_mmap_offsets_v1 off_v1; - - /* getsockopt on a kernel <= 5.3 has no flags fields. - * Copy over the offsets to the correct places in the >=5.4 format - * and put the flags where they would have been on that kernel. - */ - memcpy(&off_v1, off, sizeof(off_v1)); - - off->rx.producer = off_v1.rx.producer; - off->rx.consumer = off_v1.rx.consumer; - off->rx.desc = off_v1.rx.desc; - off->rx.flags = off_v1.rx.consumer + sizeof(__u32); - - off->tx.producer = off_v1.tx.producer; - off->tx.consumer = off_v1.tx.consumer; - off->tx.desc = off_v1.tx.desc; - off->tx.flags = off_v1.tx.consumer + sizeof(__u32); - - off->fr.producer = off_v1.fr.producer; - off->fr.consumer = off_v1.fr.consumer; - off->fr.desc = off_v1.fr.desc; - off->fr.flags = off_v1.fr.consumer + sizeof(__u32); - - off->cr.producer = off_v1.cr.producer; - off->cr.consumer = off_v1.cr.consumer; - off->cr.desc = off_v1.cr.desc; - off->cr.flags = off_v1.cr.consumer + sizeof(__u32); -} - static int xsk_get_mmap_offsets(int fd, struct xdp_mmap_offsets *off) { socklen_t optlen; @@ -214,11 +147,6 @@ static int xsk_get_mmap_offsets(int fd, struct xdp_mmap_offsets *off) if (optlen == sizeof(*off)) return 0; - if (optlen == sizeof(struct xdp_mmap_offsets_v1)) { - xsk_mmap_offsets_v1(off); - return 0; - } - return -EINVAL; } @@ -339,550 +267,56 @@ out_umem_alloc: return err; } -struct xsk_umem_config_v1 { - __u32 fill_size; - __u32 comp_size; - __u32 frame_size; - __u32 frame_headroom; -}; - -static enum xsk_prog get_xsk_prog(void) -{ - enum xsk_prog detected = XSK_PROG_FALLBACK; - char data_in = 0, data_out; - struct bpf_insn insns[] = { - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_MOV64_IMM(BPF_REG_2, 0), - BPF_MOV64_IMM(BPF_REG_3, XDP_PASS), - BPF_EMIT_CALL(BPF_FUNC_redirect_map), - BPF_EXIT_INSN(), - }; - LIBBPF_OPTS(bpf_test_run_opts, opts, - .data_in = &data_in, - .data_size_in = 1, - .data_out = &data_out, - ); - - int prog_fd, map_fd, ret, insn_cnt = ARRAY_SIZE(insns); - - map_fd = bpf_map_create(BPF_MAP_TYPE_XSKMAP, NULL, sizeof(int), sizeof(int), 1, NULL); - if (map_fd < 0) - return detected; - - insns[0].imm = map_fd; - - prog_fd = bpf_prog_load(BPF_PROG_TYPE_XDP, NULL, "GPL", insns, insn_cnt, NULL); - if (prog_fd < 0) { - close(map_fd); - return detected; - } - - ret = bpf_prog_test_run_opts(prog_fd, &opts); - if (!ret && opts.retval == XDP_PASS) - detected = XSK_PROG_REDIRECT_FLAGS; - close(prog_fd); - close(map_fd); - return detected; -} - -static int xsk_load_xdp_prog(struct xsk_socket *xsk) -{ - static const int log_buf_size = 16 * 1024; - struct xsk_ctx *ctx = xsk->ctx; - char log_buf[log_buf_size]; - int prog_fd; - - /* This is the fallback C-program: - * SEC("xdp_sock") int xdp_sock_prog(struct xdp_md *ctx) - * { - * int ret, index = ctx->rx_queue_index; - * - * // A set entry here means that the correspnding queue_id - * // has an active AF_XDP socket bound to it. - * ret = bpf_redirect_map(&xsks_map, index, XDP_PASS); - * if (ret > 0) - * return ret; - * - * // Fallback for pre-5.3 kernels, not supporting default - * // action in the flags parameter. - * if (bpf_map_lookup_elem(&xsks_map, &index)) - * return bpf_redirect_map(&xsks_map, index, 0); - * return XDP_PASS; - * } - */ - struct bpf_insn prog[] = { - /* r2 = *(u32 *)(r1 + 16) */ - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, 16), - /* *(u32 *)(r10 - 4) = r2 */ - BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_2, -4), - /* r1 = xskmap[] */ - BPF_LD_MAP_FD(BPF_REG_1, ctx->xsks_map_fd), - /* r3 = XDP_PASS */ - BPF_MOV64_IMM(BPF_REG_3, 2), - /* call bpf_redirect_map */ - BPF_EMIT_CALL(BPF_FUNC_redirect_map), - /* if w0 != 0 goto pc+13 */ - BPF_JMP32_IMM(BPF_JSGT, BPF_REG_0, 0, 13), - /* r2 = r10 */ - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - /* r2 += -4 */ - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), - /* r1 = xskmap[] */ - BPF_LD_MAP_FD(BPF_REG_1, ctx->xsks_map_fd), - /* call bpf_map_lookup_elem */ - BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), - /* r1 = r0 */ - BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), - /* r0 = XDP_PASS */ - BPF_MOV64_IMM(BPF_REG_0, 2), - /* if r1 == 0 goto pc+5 */ - BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 5), - /* r2 = *(u32 *)(r10 - 4) */ - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_10, -4), - /* r1 = xskmap[] */ - BPF_LD_MAP_FD(BPF_REG_1, ctx->xsks_map_fd), - /* r3 = 0 */ - BPF_MOV64_IMM(BPF_REG_3, 0), - /* call bpf_redirect_map */ - BPF_EMIT_CALL(BPF_FUNC_redirect_map), - /* The jumps are to this instruction */ - BPF_EXIT_INSN(), - }; - - /* This is the post-5.3 kernel C-program: - * SEC("xdp_sock") int xdp_sock_prog(struct xdp_md *ctx) - * { - * return bpf_redirect_map(&xsks_map, ctx->rx_queue_index, XDP_PASS); - * } - */ - struct bpf_insn prog_redirect_flags[] = { - /* r2 = *(u32 *)(r1 + 16) */ - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, 16), - /* r1 = xskmap[] */ - BPF_LD_MAP_FD(BPF_REG_1, ctx->xsks_map_fd), - /* r3 = XDP_PASS */ - BPF_MOV64_IMM(BPF_REG_3, 2), - /* call bpf_redirect_map */ - BPF_EMIT_CALL(BPF_FUNC_redirect_map), - BPF_EXIT_INSN(), - }; - size_t insns_cnt[] = {ARRAY_SIZE(prog), - ARRAY_SIZE(prog_redirect_flags), - }; - struct bpf_insn *progs[] = {prog, prog_redirect_flags}; - enum xsk_prog option = get_xsk_prog(); - LIBBPF_OPTS(bpf_prog_load_opts, opts, - .log_buf = log_buf, - .log_size = log_buf_size, - ); - - prog_fd = bpf_prog_load(BPF_PROG_TYPE_XDP, NULL, "LGPL-2.1 or BSD-2-Clause", - progs[option], insns_cnt[option], &opts); - if (prog_fd < 0) { - pr_warn("BPF log buffer:\n%s", log_buf); - return prog_fd; - } - - ctx->prog_fd = prog_fd; - return 0; -} - -static int xsk_create_bpf_link(struct xsk_socket *xsk) -{ - DECLARE_LIBBPF_OPTS(bpf_link_create_opts, opts); - struct xsk_ctx *ctx = xsk->ctx; - __u32 prog_id = 0; - int link_fd; - int err; - - err = bpf_xdp_query_id(ctx->ifindex, xsk->config.xdp_flags, &prog_id); - if (err) { - pr_warn("getting XDP prog id failed\n"); - return err; - } - - /* if there's a netlink-based XDP prog loaded on interface, bail out - * and ask user to do the removal by himself - */ - if (prog_id) { - pr_warn("Netlink-based XDP prog detected, please unload it in order to launch AF_XDP prog\n"); - return -EINVAL; - } - - opts.flags = xsk->config.xdp_flags & ~(XDP_FLAGS_UPDATE_IF_NOEXIST | XDP_FLAGS_REPLACE); - - link_fd = bpf_link_create(ctx->prog_fd, ctx->ifindex, BPF_XDP, &opts); - if (link_fd < 0) { - pr_warn("bpf_link_create failed: %s\n", strerror(errno)); - return link_fd; - } - - ctx->link_fd = link_fd; - return 0; -} - -/* Copy up to sz - 1 bytes from zero-terminated src string and ensure that dst - * is zero-terminated string no matter what (unless sz == 0, in which case - * it's a no-op). It's conceptually close to FreeBSD's strlcpy(), but differs - * in what is returned. Given this is internal helper, it's trivial to extend - * this, when necessary. Use this instead of strncpy inside libbpf source code. - */ -static inline void libbpf_strlcpy(char *dst, const char *src, size_t sz) -{ - size_t i; - - if (sz == 0) - return; - - sz--; - for (i = 0; i < sz && src[i]; i++) - dst[i] = src[i]; - dst[i] = '\0'; -} - -static int xsk_get_max_queues(struct xsk_socket *xsk) -{ - struct ethtool_channels channels = { .cmd = ETHTOOL_GCHANNELS }; - struct xsk_ctx *ctx = xsk->ctx; - struct ifreq ifr = {}; - int fd, err, ret; - - fd = socket(AF_LOCAL, SOCK_DGRAM | SOCK_CLOEXEC, 0); - if (fd < 0) - return -errno; - - ifr.ifr_data = (void *)&channels; - libbpf_strlcpy(ifr.ifr_name, ctx->ifname, IFNAMSIZ); - err = ioctl(fd, SIOCETHTOOL, &ifr); - if (err && errno != EOPNOTSUPP) { - ret = -errno; - goto out; - } - - if (err) { - /* If the device says it has no channels, then all traffic - * is sent to a single stream, so max queues = 1. - */ - ret = 1; - } else { - /* Take the max of rx, tx, combined. Drivers return - * the number of channels in different ways. - */ - ret = max(channels.max_rx, channels.max_tx); - ret = max(ret, (int)channels.max_combined); - } - -out: - close(fd); - return ret; -} - -static int xsk_create_bpf_maps(struct xsk_socket *xsk) -{ - struct xsk_ctx *ctx = xsk->ctx; - int max_queues; - int fd; - - max_queues = xsk_get_max_queues(xsk); - if (max_queues < 0) - return max_queues; - - fd = bpf_map_create(BPF_MAP_TYPE_XSKMAP, "xsks_map", - sizeof(int), sizeof(int), max_queues, NULL); - if (fd < 0) - return fd; - - ctx->xsks_map_fd = fd; - - return 0; -} - -static void xsk_delete_bpf_maps(struct xsk_socket *xsk) -{ - struct xsk_ctx *ctx = xsk->ctx; - - bpf_map_delete_elem(ctx->xsks_map_fd, &ctx->queue_id); - close(ctx->xsks_map_fd); -} - -static int xsk_lookup_bpf_maps(struct xsk_socket *xsk) -{ - __u32 i, *map_ids, num_maps, prog_len = sizeof(struct bpf_prog_info); - __u32 map_len = sizeof(struct bpf_map_info); - struct bpf_prog_info prog_info = {}; - struct xsk_ctx *ctx = xsk->ctx; - struct bpf_map_info map_info; - int fd, err; - - err = bpf_obj_get_info_by_fd(ctx->prog_fd, &prog_info, &prog_len); - if (err) - return err; - - num_maps = prog_info.nr_map_ids; - - map_ids = calloc(prog_info.nr_map_ids, sizeof(*map_ids)); - if (!map_ids) - return -ENOMEM; - - memset(&prog_info, 0, prog_len); - prog_info.nr_map_ids = num_maps; - prog_info.map_ids = (__u64)(unsigned long)map_ids; - - err = bpf_obj_get_info_by_fd(ctx->prog_fd, &prog_info, &prog_len); - if (err) - goto out_map_ids; - - ctx->xsks_map_fd = -1; - - for (i = 0; i < prog_info.nr_map_ids; i++) { - fd = bpf_map_get_fd_by_id(map_ids[i]); - if (fd < 0) - continue; - - memset(&map_info, 0, map_len); - err = bpf_obj_get_info_by_fd(fd, &map_info, &map_len); - if (err) { - close(fd); - continue; - } - - if (!strncmp(map_info.name, "xsks_map", sizeof(map_info.name))) { - ctx->xsks_map_fd = fd; - break; - } - - close(fd); - } - - if (ctx->xsks_map_fd == -1) - err = -ENOENT; - -out_map_ids: - free(map_ids); - return err; -} - -static int xsk_set_bpf_maps(struct xsk_socket *xsk) +bool xsk_is_in_mode(u32 ifindex, int mode) { - struct xsk_ctx *ctx = xsk->ctx; + LIBBPF_OPTS(bpf_xdp_query_opts, opts); + int ret; - return bpf_map_update_elem(ctx->xsks_map_fd, &ctx->queue_id, - &xsk->fd, 0); -} - -static int xsk_link_lookup(int ifindex, __u32 *prog_id, int *link_fd) -{ - struct bpf_link_info link_info; - __u32 link_len; - __u32 id = 0; - int err; - int fd; - - while (true) { - err = bpf_link_get_next_id(id, &id); - if (err) { - if (errno == ENOENT) { - err = 0; - break; - } - pr_warn("can't get next link: %s\n", strerror(errno)); - break; - } - - fd = bpf_link_get_fd_by_id(id); - if (fd < 0) { - if (errno == ENOENT) - continue; - pr_warn("can't get link by id (%u): %s\n", id, strerror(errno)); - err = -errno; - break; - } - - link_len = sizeof(struct bpf_link_info); - memset(&link_info, 0, link_len); - err = bpf_obj_get_info_by_fd(fd, &link_info, &link_len); - if (err) { - pr_warn("can't get link info: %s\n", strerror(errno)); - close(fd); - break; - } - if (link_info.type == BPF_LINK_TYPE_XDP) { - if (link_info.xdp.ifindex == ifindex) { - *link_fd = fd; - if (prog_id) - *prog_id = link_info.prog_id; - break; - } - } - close(fd); + ret = bpf_xdp_query(ifindex, mode, &opts); + if (ret) { + printf("XDP mode query returned error %s\n", strerror(errno)); + return false; } - return err; -} - -static bool xsk_probe_bpf_link(void) -{ - LIBBPF_OPTS(bpf_link_create_opts, opts, .flags = XDP_FLAGS_SKB_MODE); - struct bpf_insn insns[2] = { - BPF_MOV64_IMM(BPF_REG_0, XDP_PASS), - BPF_EXIT_INSN() - }; - int prog_fd, link_fd = -1, insn_cnt = ARRAY_SIZE(insns); - int ifindex_lo = 1; - bool ret = false; - int err; - - err = xsk_link_lookup(ifindex_lo, NULL, &link_fd); - if (err) - return ret; - - if (link_fd >= 0) - return true; - - prog_fd = bpf_prog_load(BPF_PROG_TYPE_XDP, NULL, "GPL", insns, insn_cnt, NULL); - if (prog_fd < 0) - return ret; - - link_fd = bpf_link_create(prog_fd, ifindex_lo, BPF_XDP, &opts); - close(prog_fd); + if (mode == XDP_FLAGS_DRV_MODE) + return opts.attach_mode == XDP_ATTACHED_DRV; + else if (mode == XDP_FLAGS_SKB_MODE) + return opts.attach_mode == XDP_ATTACHED_SKB; - if (link_fd >= 0) { - ret = true; - close(link_fd); - } - - return ret; + return false; } -static int xsk_create_xsk_struct(int ifindex, struct xsk_socket *xsk) +int xsk_attach_xdp_program(struct bpf_program *prog, int ifindex, u32 xdp_flags) { - char ifname[IFNAMSIZ]; - struct xsk_ctx *ctx; - char *interface; - - ctx = calloc(1, sizeof(*ctx)); - if (!ctx) - return -ENOMEM; - - interface = if_indextoname(ifindex, &ifname[0]); - if (!interface) { - free(ctx); - return -errno; - } - - ctx->ifindex = ifindex; - libbpf_strlcpy(ctx->ifname, ifname, IFNAMSIZ); - - xsk->ctx = ctx; - xsk->ctx->has_bpf_link = xsk_probe_bpf_link(); + int prog_fd; - return 0; + prog_fd = bpf_program__fd(prog); + return bpf_xdp_attach(ifindex, prog_fd, xdp_flags, NULL); } -static int xsk_init_xdp_res(struct xsk_socket *xsk, - int *xsks_map_fd) +void xsk_detach_xdp_program(int ifindex, u32 xdp_flags) { - struct xsk_ctx *ctx = xsk->ctx; - int err; - - err = xsk_create_bpf_maps(xsk); - if (err) - return err; - - err = xsk_load_xdp_prog(xsk); - if (err) - goto err_load_xdp_prog; - - if (ctx->has_bpf_link) - err = xsk_create_bpf_link(xsk); - else - err = bpf_xdp_attach(xsk->ctx->ifindex, ctx->prog_fd, - xsk->config.xdp_flags, NULL); - - if (err) - goto err_attach_xdp_prog; - - if (!xsk->rx) - return err; - - err = xsk_set_bpf_maps(xsk); - if (err) - goto err_set_bpf_maps; - - return err; - -err_set_bpf_maps: - if (ctx->has_bpf_link) - close(ctx->link_fd); - else - bpf_xdp_detach(ctx->ifindex, 0, NULL); -err_attach_xdp_prog: - close(ctx->prog_fd); -err_load_xdp_prog: - xsk_delete_bpf_maps(xsk); - return err; + bpf_xdp_detach(ifindex, xdp_flags, NULL); } -static int xsk_lookup_xdp_res(struct xsk_socket *xsk, int *xsks_map_fd, int prog_id) +void xsk_clear_xskmap(struct bpf_map *map) { - struct xsk_ctx *ctx = xsk->ctx; - int err; - - ctx->prog_fd = bpf_prog_get_fd_by_id(prog_id); - if (ctx->prog_fd < 0) { - err = -errno; - goto err_prog_fd; - } - err = xsk_lookup_bpf_maps(xsk); - if (err) - goto err_lookup_maps; - - if (!xsk->rx) - return err; - - err = xsk_set_bpf_maps(xsk); - if (err) - goto err_set_maps; - - return err; + u32 index = 0; + int map_fd; -err_set_maps: - close(ctx->xsks_map_fd); -err_lookup_maps: - close(ctx->prog_fd); -err_prog_fd: - if (ctx->has_bpf_link) - close(ctx->link_fd); - return err; + map_fd = bpf_map__fd(map); + bpf_map_delete_elem(map_fd, &index); } -static int __xsk_setup_xdp_prog(struct xsk_socket *_xdp, int *xsks_map_fd) +int xsk_update_xskmap(struct bpf_map *map, struct xsk_socket *xsk) { - struct xsk_socket *xsk = _xdp; - struct xsk_ctx *ctx = xsk->ctx; - __u32 prog_id = 0; - int err; - - if (ctx->has_bpf_link) - err = xsk_link_lookup(ctx->ifindex, &prog_id, &ctx->link_fd); - else - err = bpf_xdp_query_id(ctx->ifindex, xsk->config.xdp_flags, &prog_id); - - if (err) - return err; - - err = !prog_id ? xsk_init_xdp_res(xsk, xsks_map_fd) : - xsk_lookup_xdp_res(xsk, xsks_map_fd, prog_id); + int map_fd, sock_fd; + u32 index = 0; - if (!err && xsks_map_fd) - *xsks_map_fd = ctx->xsks_map_fd; + map_fd = bpf_map__fd(map); + sock_fd = xsk_socket__fd(xsk); - return err; -} - -int xsk_setup_xdp_prog_xsk(struct xsk_socket *xsk, int *xsks_map_fd) -{ - return __xsk_setup_xdp_prog(xsk, xsks_map_fd); + return bpf_map_update_elem(map_fd, &index, &sock_fd, 0); } static struct xsk_ctx *xsk_get_ctx(struct xsk_umem *umem, int ifindex, @@ -931,7 +365,7 @@ out_free: static struct xsk_ctx *xsk_create_ctx(struct xsk_socket *xsk, struct xsk_umem *umem, int ifindex, - const char *ifname, __u32 queue_id, + __u32 queue_id, struct xsk_ring_prod *fill, struct xsk_ring_cons *comp) { @@ -958,51 +392,15 @@ static struct xsk_ctx *xsk_create_ctx(struct xsk_socket *xsk, ctx->refcount = 1; ctx->umem = umem; ctx->queue_id = queue_id; - libbpf_strlcpy(ctx->ifname, ifname, IFNAMSIZ); ctx->fill = fill; ctx->comp = comp; list_add(&ctx->list, &umem->ctx_list); - ctx->has_bpf_link = xsk_probe_bpf_link(); return ctx; } -static void xsk_destroy_xsk_struct(struct xsk_socket *xsk) -{ - free(xsk->ctx); - free(xsk); -} - -int xsk_socket__update_xskmap(struct xsk_socket *xsk, int fd) -{ - xsk->ctx->xsks_map_fd = fd; - return xsk_set_bpf_maps(xsk); -} - -int xsk_setup_xdp_prog(int ifindex, int *xsks_map_fd) -{ - struct xsk_socket *xsk; - int res; - - xsk = calloc(1, sizeof(*xsk)); - if (!xsk) - return -ENOMEM; - - res = xsk_create_xsk_struct(ifindex, xsk); - if (res) { - free(xsk); - return -EINVAL; - } - - res = __xsk_setup_xdp_prog(xsk, xsks_map_fd); - - xsk_destroy_xsk_struct(xsk); - - return res; -} - int xsk_socket__create_shared(struct xsk_socket **xsk_ptr, - const char *ifname, + int ifindex, __u32 queue_id, struct xsk_umem *umem, struct xsk_ring_cons *rx, struct xsk_ring_prod *tx, @@ -1016,7 +414,7 @@ int xsk_socket__create_shared(struct xsk_socket **xsk_ptr, struct xdp_mmap_offsets off; struct xsk_socket *xsk; struct xsk_ctx *ctx; - int err, ifindex; + int err; if (!umem || !xsk_ptr || !(rx || tx)) return -EFAULT; @@ -1031,13 +429,6 @@ int xsk_socket__create_shared(struct xsk_socket **xsk_ptr, if (err) goto out_xsk_alloc; - xsk->outstanding_tx = 0; - ifindex = if_nametoindex(ifname); - if (!ifindex) { - err = -errno; - goto out_xsk_alloc; - } - if (umem->refcount++ > 0) { xsk->fd = socket(AF_XDP, SOCK_RAW | SOCK_CLOEXEC, 0); if (xsk->fd < 0) { @@ -1057,8 +448,7 @@ int xsk_socket__create_shared(struct xsk_socket **xsk_ptr, goto out_socket; } - ctx = xsk_create_ctx(xsk, umem, ifindex, ifname, queue_id, - fill, comp); + ctx = xsk_create_ctx(xsk, umem, ifindex, queue_id, fill, comp); if (!ctx) { err = -ENOMEM; goto out_socket; @@ -1156,12 +546,6 @@ int xsk_socket__create_shared(struct xsk_socket **xsk_ptr, goto out_mmap_tx; } - if (!(xsk->config.libbpf_flags & XSK_LIBBPF_FLAGS__INHIBIT_PROG_LOAD)) { - err = __xsk_setup_xdp_prog(xsk, NULL); - if (err) - goto out_mmap_tx; - } - *xsk_ptr = xsk; umem->fill_save = NULL; umem->comp_save = NULL; @@ -1185,7 +569,7 @@ out_xsk_alloc: return err; } -int xsk_socket__create(struct xsk_socket **xsk_ptr, const char *ifname, +int xsk_socket__create(struct xsk_socket **xsk_ptr, int ifindex, __u32 queue_id, struct xsk_umem *umem, struct xsk_ring_cons *rx, struct xsk_ring_prod *tx, const struct xsk_socket_config *usr_config) @@ -1193,7 +577,7 @@ int xsk_socket__create(struct xsk_socket **xsk_ptr, const char *ifname, if (!umem) return -EFAULT; - return xsk_socket__create_shared(xsk_ptr, ifname, queue_id, umem, + return xsk_socket__create_shared(xsk_ptr, ifindex, queue_id, umem, rx, tx, umem->fill_save, umem->comp_save, usr_config); } @@ -1237,13 +621,6 @@ void xsk_socket__delete(struct xsk_socket *xsk) ctx = xsk->ctx; umem = ctx->umem; - if (ctx->refcount == 1) { - xsk_delete_bpf_maps(xsk); - close(ctx->prog_fd); - if (ctx->has_bpf_link) - close(ctx->link_fd); - } - xsk_put_ctx(ctx, true); err = xsk_get_mmap_offsets(xsk->fd, &off); diff --git a/tools/testing/selftests/bpf/xsk.h b/tools/testing/selftests/bpf/xsk.h index 997723b0bfb2..04ed8b544712 100644 --- a/tools/testing/selftests/bpf/xsk.h +++ b/tools/testing/selftests/bpf/xsk.h @@ -23,77 +23,6 @@ extern "C" { #endif -/* This whole API has been deprecated and moved to libxdp that can be found at - * https://github.com/xdp-project/xdp-tools. The APIs are exactly the same so - * it should just be linking with libxdp instead of libbpf for this set of - * functionality. If not, please submit a bug report on the aforementioned page. - */ - -/* Load-Acquire Store-Release barriers used by the XDP socket - * library. The following macros should *NOT* be considered part of - * the xsk.h API, and is subject to change anytime. - * - * LIBRARY INTERNAL - */ - -#define __XSK_READ_ONCE(x) (*(volatile typeof(x) *)&x) -#define __XSK_WRITE_ONCE(x, v) (*(volatile typeof(x) *)&x) = (v) - -#if defined(__i386__) || defined(__x86_64__) -# define libbpf_smp_store_release(p, v) \ - do { \ - asm volatile("" : : : "memory"); \ - __XSK_WRITE_ONCE(*p, v); \ - } while (0) -# define libbpf_smp_load_acquire(p) \ - ({ \ - typeof(*p) ___p1 = __XSK_READ_ONCE(*p); \ - asm volatile("" : : : "memory"); \ - ___p1; \ - }) -#elif defined(__aarch64__) -# define libbpf_smp_store_release(p, v) \ - asm volatile ("stlr %w1, %0" : "=Q" (*p) : "r" (v) : "memory") -# define libbpf_smp_load_acquire(p) \ - ({ \ - typeof(*p) ___p1; \ - asm volatile ("ldar %w0, %1" \ - : "=r" (___p1) : "Q" (*p) : "memory"); \ - ___p1; \ - }) -#elif defined(__riscv) -# define libbpf_smp_store_release(p, v) \ - do { \ - asm volatile ("fence rw,w" : : : "memory"); \ - __XSK_WRITE_ONCE(*p, v); \ - } while (0) -# define libbpf_smp_load_acquire(p) \ - ({ \ - typeof(*p) ___p1 = __XSK_READ_ONCE(*p); \ - asm volatile ("fence r,rw" : : : "memory"); \ - ___p1; \ - }) -#endif - -#ifndef libbpf_smp_store_release -#define libbpf_smp_store_release(p, v) \ - do { \ - __sync_synchronize(); \ - __XSK_WRITE_ONCE(*p, v); \ - } while (0) -#endif - -#ifndef libbpf_smp_load_acquire -#define libbpf_smp_load_acquire(p) \ - ({ \ - typeof(*p) ___p1 = __XSK_READ_ONCE(*p); \ - __sync_synchronize(); \ - ___p1; \ - }) -#endif - -/* LIBRARY INTERNAL -- END */ - /* Do not access these members directly. Use the functions below. */ #define DEFINE_XSK_RING(name) \ struct name { \ @@ -168,7 +97,7 @@ static inline __u32 xsk_prod_nb_free(struct xsk_ring_prod *r, __u32 nb) * this function. Without this optimization it whould have been * free_entries = r->cached_prod - r->cached_cons + r->size. */ - r->cached_cons = libbpf_smp_load_acquire(r->consumer); + r->cached_cons = __atomic_load_n(r->consumer, __ATOMIC_ACQUIRE); r->cached_cons += r->size; return r->cached_cons - r->cached_prod; @@ -179,7 +108,7 @@ static inline __u32 xsk_cons_nb_avail(struct xsk_ring_cons *r, __u32 nb) __u32 entries = r->cached_prod - r->cached_cons; if (entries == 0) { - r->cached_prod = libbpf_smp_load_acquire(r->producer); + r->cached_prod = __atomic_load_n(r->producer, __ATOMIC_ACQUIRE); entries = r->cached_prod - r->cached_cons; } @@ -202,7 +131,7 @@ static inline void xsk_ring_prod__submit(struct xsk_ring_prod *prod, __u32 nb) /* Make sure everything has been written to the ring before indicating * this to the kernel by writing the producer pointer. */ - libbpf_smp_store_release(prod->producer, *prod->producer + nb); + __atomic_store_n(prod->producer, *prod->producer + nb, __ATOMIC_RELEASE); } static inline __u32 xsk_ring_cons__peek(struct xsk_ring_cons *cons, __u32 nb, __u32 *idx) @@ -227,8 +156,7 @@ static inline void xsk_ring_cons__release(struct xsk_ring_cons *cons, __u32 nb) /* Make sure data has been read before indicating we are done * with the entries by updating the consumer pointer. */ - libbpf_smp_store_release(cons->consumer, *cons->consumer + nb); - + __atomic_store_n(cons->consumer, *cons->consumer + nb, __ATOMIC_RELEASE); } static inline void *xsk_umem__get_data(void *umem_area, __u64 addr) @@ -269,18 +197,15 @@ struct xsk_umem_config { __u32 flags; }; -int xsk_setup_xdp_prog_xsk(struct xsk_socket *xsk, int *xsks_map_fd); -int xsk_setup_xdp_prog(int ifindex, int *xsks_map_fd); -int xsk_socket__update_xskmap(struct xsk_socket *xsk, int xsks_map_fd); - -/* Flags for the libbpf_flags field. */ -#define XSK_LIBBPF_FLAGS__INHIBIT_PROG_LOAD (1 << 0) +int xsk_attach_xdp_program(struct bpf_program *prog, int ifindex, u32 xdp_flags); +void xsk_detach_xdp_program(int ifindex, u32 xdp_flags); +int xsk_update_xskmap(struct bpf_map *map, struct xsk_socket *xsk); +void xsk_clear_xskmap(struct bpf_map *map); +bool xsk_is_in_mode(u32 ifindex, int mode); struct xsk_socket_config { __u32 rx_size; __u32 tx_size; - __u32 libbpf_flags; - __u32 xdp_flags; __u16 bind_flags; }; @@ -291,13 +216,13 @@ int xsk_umem__create(struct xsk_umem **umem, struct xsk_ring_cons *comp, const struct xsk_umem_config *config); int xsk_socket__create(struct xsk_socket **xsk, - const char *ifname, __u32 queue_id, + int ifindex, __u32 queue_id, struct xsk_umem *umem, struct xsk_ring_cons *rx, struct xsk_ring_prod *tx, const struct xsk_socket_config *config); int xsk_socket__create_shared(struct xsk_socket **xsk_ptr, - const char *ifname, + int ifindex, __u32 queue_id, struct xsk_umem *umem, struct xsk_ring_cons *rx, struct xsk_ring_prod *tx, diff --git a/tools/testing/selftests/bpf/xsk_prereqs.sh b/tools/testing/selftests/bpf/xsk_prereqs.sh index a0b71723a818..ae697a10a056 100755 --- a/tools/testing/selftests/bpf/xsk_prereqs.sh +++ b/tools/testing/selftests/bpf/xsk_prereqs.sh @@ -55,21 +55,13 @@ test_exit() clear_configs() { - if [ $(ip netns show | grep $3 &>/dev/null; echo $?;) == 0 ]; then - [ $(ip netns exec $3 ip link show $2 &>/dev/null; echo $?;) == 0 ] && - { ip netns exec $3 ip link del $2; } - ip netns del $3 - fi - #Once we delete a veth pair node, the entire veth pair is removed, - #this is just to be cautious just incase the NS does not exist then - #veth node inside NS won't get removed so we explicitly remove it [ $(ip link show $1 &>/dev/null; echo $?;) == 0 ] && { ip link del $1; } } cleanup_exit() { - clear_configs $1 $2 $3 + clear_configs $1 $2 } validate_ip_utility() @@ -83,7 +75,7 @@ exec_xskxceiver() ARGS+="-b " fi - ./${XSKOBJ} -i ${VETH0} -i ${VETH1},${NS1} ${ARGS} + ./${XSKOBJ} -i ${VETH0} -i ${VETH1} ${ARGS} retval=$? test_status $retval "${TEST_NAME}" diff --git a/tools/testing/selftests/bpf/xskxceiver.c b/tools/testing/selftests/bpf/xskxceiver.c index 681a5db80dae..a17655107a94 100644 --- a/tools/testing/selftests/bpf/xskxceiver.c +++ b/tools/testing/selftests/bpf/xskxceiver.c @@ -55,12 +55,11 @@ * Flow: * ----- * - Single process spawns two threads: Tx and Rx - * - Each of these two threads attach to a veth interface within their assigned - * namespaces - * - Each thread Creates one AF_XDP socket connected to a unique umem for each + * - Each of these two threads attach to a veth interface + * - Each thread creates one AF_XDP socket connected to a unique umem for each * veth interface - * - Tx thread Transmits 10k packets from veth<xxxx> to veth<yyyy> - * - Rx thread verifies if all 10k packets were received and delivered in-order, + * - Tx thread Transmits a number of packets from veth<xxxx> to veth<yyyy> + * - Rx thread verifies if all packets were received and delivered in-order, * and have the right content * * Enable/disable packet dump mode: @@ -97,18 +96,14 @@ #include <time.h> #include <unistd.h> #include <stdatomic.h> + +#include "xsk_xdp_progs.skel.h" #include "xsk.h" #include "xskxceiver.h" #include <bpf/bpf.h> #include <linux/filter.h> #include "../kselftest.h" -/* AF_XDP APIs were moved into libxdp and marked as deprecated in libbpf. - * Until xskxceiver is either moved or re-writed into libxdp, suppress - * deprecation warnings in this file - */ -#pragma GCC diagnostic ignored "-Wdeprecated-declarations" - static const char *MAC1 = "\x00\x0A\x56\x9E\xEE\x62"; static const char *MAC2 = "\x00\x0A\x56\x9E\xEE\x61"; static const char *IP1 = "192.168.100.162"; @@ -269,6 +264,11 @@ static void gen_udp_csum(struct udphdr *udp_hdr, struct iphdr *ip_hdr) udp_csum(ip_hdr->saddr, ip_hdr->daddr, UDP_PKT_SIZE, IPPROTO_UDP, (u16 *)udp_hdr); } +static u32 mode_to_xdp_flags(enum test_mode mode) +{ + return (mode == TEST_MODE_SKB) ? XDP_FLAGS_SKB_MODE : XDP_FLAGS_DRV_MODE; +} + static int xsk_configure_umem(struct xsk_umem_info *umem, void *buffer, u64 size) { struct xsk_umem_config cfg = { @@ -322,15 +322,13 @@ static int __xsk_configure_socket(struct xsk_socket_info *xsk, struct xsk_umem_i xsk->umem = umem; cfg.rx_size = xsk->rxqsize; cfg.tx_size = XSK_RING_PROD__DEFAULT_NUM_DESCS; - cfg.libbpf_flags = XSK_LIBBPF_FLAGS__INHIBIT_PROG_LOAD; - cfg.xdp_flags = ifobject->xdp_flags; cfg.bind_flags = ifobject->bind_flags; if (shared) cfg.bind_flags |= XDP_SHARED_UMEM; txr = ifobject->tx_on ? &xsk->tx : NULL; rxr = ifobject->rx_on ? &xsk->rx : NULL; - return xsk_socket__create(&xsk->xsk, ifobject->ifname, 0, umem->umem, rxr, txr, &cfg); + return xsk_socket__create(&xsk->xsk, ifobject->ifindex, 0, umem->umem, rxr, txr, &cfg); } static bool ifobj_zc_avail(struct ifobject *ifobject) @@ -350,7 +348,7 @@ static bool ifobj_zc_avail(struct ifobject *ifobject) umem = calloc(1, sizeof(struct xsk_umem_info)); if (!umem) { munmap(bufs, umem_sz); - exit_with_error(-ENOMEM); + exit_with_error(ENOMEM); } umem->frame_size = XSK_UMEM__DEFAULT_FRAME_SIZE; ret = xsk_configure_umem(umem, bufs, umem_sz); @@ -360,8 +358,6 @@ static bool ifobj_zc_avail(struct ifobject *ifobject) xsk = calloc(1, sizeof(struct xsk_socket_info)); if (!xsk) goto out; - ifobject->xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST; - ifobject->xdp_flags |= XDP_FLAGS_DRV_MODE; ifobject->bind_flags = XDP_USE_NEED_WAKEUP | XDP_ZEROCOPY; ifobject->rx_on = true; xsk->rxqsize = XSK_RING_CONS__DEFAULT_NUM_DESCS; @@ -399,28 +395,6 @@ static void usage(const char *prog) ksft_print_msg(str, prog); } -static int switch_namespace(const char *nsname) -{ - char fqns[26] = "/var/run/netns/"; - int nsfd; - - if (!nsname || strlen(nsname) == 0) - return -1; - - strncat(fqns, nsname, sizeof(fqns) - strlen(fqns) - 1); - nsfd = open(fqns, O_RDONLY); - - if (nsfd == -1) - exit_with_error(errno); - - if (setns(nsfd, 0) == -1) - exit_with_error(errno); - - print_verbose("NS switched: %s\n", nsname); - - return nsfd; -} - static bool validate_interface(struct ifobject *ifobj) { if (!strcmp(ifobj->ifname, "")) @@ -438,8 +412,6 @@ static void parse_command_line(struct ifobject *ifobj_tx, struct ifobject *ifobj opterr = 0; for (;;) { - char *sptr, *token; - c = getopt_long(argc, argv, "i:Dvb", long_options, &option_index); if (c == -1) break; @@ -453,11 +425,13 @@ static void parse_command_line(struct ifobject *ifobj_tx, struct ifobject *ifobj else break; - sptr = strndupa(optarg, strlen(optarg)); - memcpy(ifobj->ifname, strsep(&sptr, ","), MAX_INTERFACE_NAME_CHARS); - token = strsep(&sptr, ","); - if (token) - memcpy(ifobj->nsname, token, MAX_INTERFACES_NAMESPACE_CHARS); + memcpy(ifobj->ifname, optarg, + min_t(size_t, MAX_INTERFACE_NAME_CHARS, strlen(optarg))); + + ifobj->ifindex = if_nametoindex(ifobj->ifname); + if (!ifobj->ifindex) + exit_with_error(errno); + interface_nb++; break; case 'D': @@ -520,6 +494,10 @@ static void __test_spec_init(struct test_spec *test, struct ifobject *ifobj_tx, test->total_steps = 1; test->nb_sockets = 1; test->fail = false; + test->xdp_prog_rx = ifobj_rx->xdp_progs->progs.xsk_def_prog; + test->xskmap_rx = ifobj_rx->xdp_progs->maps.xsk; + test->xdp_prog_tx = ifobj_tx->xdp_progs->progs.xsk_def_prog; + test->xskmap_tx = ifobj_tx->xdp_progs->maps.xsk; } static void test_spec_init(struct test_spec *test, struct ifobject *ifobj_tx, @@ -538,12 +516,6 @@ static void test_spec_init(struct test_spec *test, struct ifobject *ifobj_tx, for (i = 0; i < MAX_INTERFACES; i++) { struct ifobject *ifobj = i ? ifobj_rx : ifobj_tx; - ifobj->xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST; - if (mode == TEST_MODE_SKB) - ifobj->xdp_flags |= XDP_FLAGS_SKB_MODE; - else - ifobj->xdp_flags |= XDP_FLAGS_DRV_MODE; - ifobj->bind_flags = XDP_USE_NEED_WAKEUP; if (mode == TEST_MODE_ZC) ifobj->bind_flags |= XDP_ZEROCOPY; @@ -565,6 +537,16 @@ static void test_spec_set_name(struct test_spec *test, const char *name) strncpy(test->name, name, MAX_TEST_NAME_SIZE); } +static void test_spec_set_xdp_prog(struct test_spec *test, struct bpf_program *xdp_prog_rx, + struct bpf_program *xdp_prog_tx, struct bpf_map *xskmap_rx, + struct bpf_map *xskmap_tx) +{ + test->xdp_prog_rx = xdp_prog_rx; + test->xdp_prog_tx = xdp_prog_tx; + test->xskmap_rx = xskmap_rx; + test->xskmap_tx = xskmap_tx; +} + static void pkt_stream_reset(struct pkt_stream *pkt_stream) { if (pkt_stream) @@ -767,7 +749,7 @@ static void pkt_dump(void *pkt, u32 len) struct ethhdr *ethhdr; struct udphdr *udphdr; struct iphdr *iphdr; - int payload, i; + u32 payload, i; ethhdr = pkt; iphdr = pkt + sizeof(*ethhdr); @@ -792,7 +774,7 @@ static void pkt_dump(void *pkt, u32 len) fprintf(stdout, "DEBUG>> L4: udp_hdr->src: %d\n", ntohs(udphdr->source)); fprintf(stdout, "DEBUG>> L4: udp_hdr->dst: %d\n", ntohs(udphdr->dest)); /*extract L5 frame */ - payload = *((uint32_t *)(pkt + PKT_HDR_SIZE)); + payload = ntohl(*((u32 *)(pkt + PKT_HDR_SIZE))); fprintf(stdout, "DEBUG>> L5: payload: %d\n", payload); fprintf(stdout, "---------------------------------------\n"); @@ -936,7 +918,7 @@ static int receive_pkts(struct test_spec *test, struct pollfd *fds) if (ifobj->use_poll) { ret = poll(fds, 1, POLL_TMOUT); if (ret < 0) - exit_with_error(-ret); + exit_with_error(errno); if (!ret) { if (!is_umem_valid(test->ifobj_tx)) @@ -963,7 +945,7 @@ static int receive_pkts(struct test_spec *test, struct pollfd *fds) if (xsk_ring_prod__needs_wakeup(&umem->fq)) { ret = poll(fds, 1, POLL_TMOUT); if (ret < 0) - exit_with_error(-ret); + exit_with_error(errno); } ret = xsk_ring_prod__reserve(&umem->fq, rcvd, &idx_fq); } @@ -1006,7 +988,8 @@ static int __send_pkts(struct ifobject *ifobject, u32 *pkt_nb, struct pollfd *fd { struct xsk_socket_info *xsk = ifobject->xsk; bool use_poll = ifobject->use_poll; - u32 i, idx = 0, ret, valid_pkts = 0; + u32 i, idx = 0, valid_pkts = 0; + int ret; while (xsk_ring_prod__reserve(&xsk->tx, BATCH_SIZE, &idx) < BATCH_SIZE) { if (use_poll) { @@ -1014,7 +997,7 @@ static int __send_pkts(struct ifobject *ifobject, u32 *pkt_nb, struct pollfd *fd if (timeout) { if (ret < 0) { ksft_print_msg("ERROR: [%s] Poll error %d\n", - __func__, ret); + __func__, errno); return TEST_FAILURE; } if (ret == 0) @@ -1023,7 +1006,7 @@ static int __send_pkts(struct ifobject *ifobject, u32 *pkt_nb, struct pollfd *fd } if (ret <= 0) { ksft_print_msg("ERROR: [%s] Poll error %d\n", - __func__, ret); + __func__, errno); return TEST_FAILURE; } } @@ -1239,7 +1222,7 @@ static void thread_common_ops_tx(struct test_spec *test, struct ifobject *ifobje { xsk_configure_socket(test, ifobject, test->ifobj_rx->umem, true); ifobject->xsk = &ifobject->xsk_arr[0]; - ifobject->xsk_map_fd = test->ifobj_rx->xsk_map_fd; + ifobject->xskmap = test->ifobj_rx->xskmap; memcpy(ifobject->umem, test->ifobj_rx->umem, sizeof(struct xsk_umem_info)); } @@ -1271,7 +1254,7 @@ static void xsk_populate_fill_ring(struct xsk_umem_info *umem, struct pkt_stream *xsk_ring_prod__fill_addr(&umem->fq, idx++) = addr; } - xsk_ring_prod__submit(&umem->fq, buffers_to_fill); + xsk_ring_prod__submit(&umem->fq, i); } static void thread_common_ops(struct test_spec *test, struct ifobject *ifobject) @@ -1279,10 +1262,8 @@ static void thread_common_ops(struct test_spec *test, struct ifobject *ifobject) u64 umem_sz = ifobject->umem->num_frames * ifobject->umem->frame_size; int mmap_flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE; LIBBPF_OPTS(bpf_xdp_query_opts, opts); - int ret, ifindex; void *bufs; - - ifobject->ns_fd = switch_namespace(ifobject->nsname); + int ret; if (ifobject->umem->unaligned_mode) mmap_flags |= MAP_HUGETLB; @@ -1307,33 +1288,9 @@ static void thread_common_ops(struct test_spec *test, struct ifobject *ifobject) if (!ifobject->rx_on) return; - ifindex = if_nametoindex(ifobject->ifname); - if (!ifindex) - exit_with_error(errno); - - ret = xsk_setup_xdp_prog_xsk(ifobject->xsk->xsk, &ifobject->xsk_map_fd); + ret = xsk_update_xskmap(ifobject->xskmap, ifobject->xsk->xsk); if (ret) - exit_with_error(-ret); - - ret = bpf_xdp_query(ifindex, ifobject->xdp_flags, &opts); - if (ret) - exit_with_error(-ret); - - if (ifobject->xdp_flags & XDP_FLAGS_SKB_MODE) { - if (opts.attach_mode != XDP_ATTACHED_SKB) { - ksft_print_msg("ERROR: [%s] XDP prog not in SKB mode\n"); - exit_with_error(-EINVAL); - } - } else if (ifobject->xdp_flags & XDP_FLAGS_DRV_MODE) { - if (opts.attach_mode != XDP_ATTACHED_DRV) { - ksft_print_msg("ERROR: [%s] XDP prog not in DRV mode\n"); - exit_with_error(-EINVAL); - } - } - - ret = xsk_socket__update_xskmap(ifobject->xsk->xsk, ifobject->xsk_map_fd); - if (ret) - exit_with_error(-ret); + exit_with_error(errno); } static void *worker_testapp_validate_tx(void *arg) @@ -1366,14 +1323,17 @@ static void *worker_testapp_validate_rx(void *arg) struct test_spec *test = (struct test_spec *)arg; struct ifobject *ifobject = test->ifobj_rx; struct pollfd fds = { }; - int id = 0; int err; if (test->current_step == 1) { thread_common_ops(test, ifobject); } else { - bpf_map_delete_elem(ifobject->xsk_map_fd, &id); - xsk_socket__update_xskmap(ifobject->xsk->xsk, ifobject->xsk_map_fd); + xsk_clear_xskmap(ifobject->xskmap); + err = xsk_update_xskmap(ifobject->xskmap, ifobject->xsk->xsk); + if (err) { + printf("Error: Failed to update xskmap, error %s\n", strerror(-err)); + exit_with_error(-err); + } } fds.fd = xsk_socket__fd(ifobject->xsk->xsk); @@ -1411,84 +1371,106 @@ static void handler(int signum) pthread_exit(NULL); } -static int testapp_validate_traffic_single_thread(struct test_spec *test, struct ifobject *ifobj, - enum test_type type) +static bool xdp_prog_changed(struct test_spec *test, struct ifobject *ifobj) { - bool old_shared_umem = ifobj->shared_umem; - pthread_t t0; - - if (pthread_barrier_init(&barr, NULL, 2)) - exit_with_error(errno); - - test->current_step++; - if (type == TEST_TYPE_POLL_RXQ_TMOUT) - pkt_stream_reset(ifobj->pkt_stream); - pkts_in_flight = 0; - - test->ifobj_rx->shared_umem = false; - test->ifobj_tx->shared_umem = false; + return ifobj->xdp_prog != test->xdp_prog_rx || ifobj->mode != test->mode; +} - signal(SIGUSR1, handler); - /* Spawn thread */ - pthread_create(&t0, NULL, ifobj->func_ptr, test); +static void xsk_reattach_xdp(struct ifobject *ifobj, struct bpf_program *xdp_prog, + struct bpf_map *xskmap, enum test_mode mode) +{ + int err; - if (type != TEST_TYPE_POLL_TXQ_TMOUT) - pthread_barrier_wait(&barr); + xsk_detach_xdp_program(ifobj->ifindex, mode_to_xdp_flags(ifobj->mode)); + err = xsk_attach_xdp_program(xdp_prog, ifobj->ifindex, mode_to_xdp_flags(mode)); + if (err) { + printf("Error attaching XDP program\n"); + exit_with_error(-err); + } - if (pthread_barrier_destroy(&barr)) - exit_with_error(errno); + if (ifobj->mode != mode && (mode == TEST_MODE_DRV || mode == TEST_MODE_ZC)) + if (!xsk_is_in_mode(ifobj->ifindex, XDP_FLAGS_DRV_MODE)) { + ksft_print_msg("ERROR: XDP prog not in DRV mode\n"); + exit_with_error(EINVAL); + } - pthread_kill(t0, SIGUSR1); - pthread_join(t0, NULL); + ifobj->xdp_prog = xdp_prog; + ifobj->xskmap = xskmap; + ifobj->mode = mode; +} - if (test->total_steps == test->current_step || test->fail) { - xsk_socket__delete(ifobj->xsk->xsk); - testapp_clean_xsk_umem(ifobj); - } +static void xsk_attach_xdp_progs(struct test_spec *test, struct ifobject *ifobj_rx, + struct ifobject *ifobj_tx) +{ + if (xdp_prog_changed(test, ifobj_rx)) + xsk_reattach_xdp(ifobj_rx, test->xdp_prog_rx, test->xskmap_rx, test->mode); - test->ifobj_rx->shared_umem = old_shared_umem; - test->ifobj_tx->shared_umem = old_shared_umem; + if (!ifobj_tx || ifobj_tx->shared_umem) + return; - return !!test->fail; + if (xdp_prog_changed(test, ifobj_tx)) + xsk_reattach_xdp(ifobj_tx, test->xdp_prog_tx, test->xskmap_tx, test->mode); } -static int testapp_validate_traffic(struct test_spec *test) +static int __testapp_validate_traffic(struct test_spec *test, struct ifobject *ifobj1, + struct ifobject *ifobj2) { - struct ifobject *ifobj_tx = test->ifobj_tx; - struct ifobject *ifobj_rx = test->ifobj_rx; pthread_t t0, t1; - if (pthread_barrier_init(&barr, NULL, 2)) - exit_with_error(errno); + if (ifobj2) + if (pthread_barrier_init(&barr, NULL, 2)) + exit_with_error(errno); test->current_step++; - pkt_stream_reset(ifobj_rx->pkt_stream); + pkt_stream_reset(ifobj1->pkt_stream); pkts_in_flight = 0; + signal(SIGUSR1, handler); /*Spawn RX thread */ - pthread_create(&t0, NULL, ifobj_rx->func_ptr, test); + pthread_create(&t0, NULL, ifobj1->func_ptr, test); - pthread_barrier_wait(&barr); - if (pthread_barrier_destroy(&barr)) - exit_with_error(errno); + if (ifobj2) { + pthread_barrier_wait(&barr); + if (pthread_barrier_destroy(&barr)) + exit_with_error(errno); - /*Spawn TX thread */ - pthread_create(&t1, NULL, ifobj_tx->func_ptr, test); + /*Spawn TX thread */ + pthread_create(&t1, NULL, ifobj2->func_ptr, test); - pthread_join(t1, NULL); - pthread_join(t0, NULL); + pthread_join(t1, NULL); + } + + if (!ifobj2) + pthread_kill(t0, SIGUSR1); + else + pthread_join(t0, NULL); if (test->total_steps == test->current_step || test->fail) { - xsk_socket__delete(ifobj_tx->xsk->xsk); - xsk_socket__delete(ifobj_rx->xsk->xsk); - testapp_clean_xsk_umem(ifobj_rx); - if (!ifobj_tx->shared_umem) - testapp_clean_xsk_umem(ifobj_tx); + if (ifobj2) + xsk_socket__delete(ifobj2->xsk->xsk); + xsk_socket__delete(ifobj1->xsk->xsk); + testapp_clean_xsk_umem(ifobj1); + if (ifobj2 && !ifobj2->shared_umem) + testapp_clean_xsk_umem(ifobj2); } return !!test->fail; } +static int testapp_validate_traffic(struct test_spec *test) +{ + struct ifobject *ifobj_rx = test->ifobj_rx; + struct ifobject *ifobj_tx = test->ifobj_tx; + + xsk_attach_xdp_progs(test, ifobj_rx, ifobj_tx); + return __testapp_validate_traffic(test, ifobj_rx, ifobj_tx); +} + +static int testapp_validate_traffic_single_thread(struct test_spec *test, struct ifobject *ifobj) +{ + return __testapp_validate_traffic(test, ifobj, NULL); +} + static void testapp_teardown(struct test_spec *test) { int i; @@ -1524,7 +1506,7 @@ static void testapp_bidi(struct test_spec *test) print_verbose("Switching Tx/Rx vectors\n"); swap_directions(&test->ifobj_rx, &test->ifobj_tx); - testapp_validate_traffic(test); + __testapp_validate_traffic(test, test->ifobj_rx, test->ifobj_tx); swap_directions(&test->ifobj_rx, &test->ifobj_tx); } @@ -1538,9 +1520,9 @@ static void swap_xsk_resources(struct ifobject *ifobj_tx, struct ifobject *ifobj ifobj_tx->xsk = &ifobj_tx->xsk_arr[1]; ifobj_rx->xsk = &ifobj_rx->xsk_arr[1]; - ret = xsk_socket__update_xskmap(ifobj_rx->xsk->xsk, ifobj_rx->xsk_map_fd); + ret = xsk_update_xskmap(ifobj_rx->xskmap, ifobj_rx->xsk->xsk); if (ret) - exit_with_error(-ret); + exit_with_error(errno); } static void testapp_bpf_res(struct test_spec *test) @@ -1579,8 +1561,6 @@ static void testapp_stats_tx_invalid_descs(struct test_spec *test) pkt_stream_replace_half(test, XSK_UMEM__INVALID_FRAME_SIZE, 0); test->ifobj_tx->validation_func = validate_tx_invalid_descs; testapp_validate_traffic(test); - - pkt_stream_restore_default(test); } static void testapp_stats_rx_full(struct test_spec *test) @@ -1596,8 +1576,6 @@ static void testapp_stats_rx_full(struct test_spec *test) test->ifobj_rx->release_rx = false; test->ifobj_rx->validation_func = validate_rx_full; testapp_validate_traffic(test); - - pkt_stream_restore_default(test); } static void testapp_stats_fill_empty(struct test_spec *test) @@ -1612,8 +1590,6 @@ static void testapp_stats_fill_empty(struct test_spec *test) test->ifobj_rx->use_fill_ring = false; test->ifobj_rx->validation_func = validate_fill_empty; testapp_validate_traffic(test); - - pkt_stream_restore_default(test); } /* Simple test */ @@ -1646,7 +1622,6 @@ static bool testapp_unaligned(struct test_spec *test) test->ifobj_rx->pkt_stream->use_addr_for_fill = true; testapp_validate_traffic(test); - pkt_stream_restore_default(test); return true; } @@ -1656,7 +1631,6 @@ static void testapp_single_pkt(struct test_spec *test) pkt_stream_generate_custom(test, pkts, ARRAY_SIZE(pkts)); testapp_validate_traffic(test); - pkt_stream_restore_default(test); } static void testapp_invalid_desc(struct test_spec *test) @@ -1697,7 +1671,51 @@ static void testapp_invalid_desc(struct test_spec *test) pkt_stream_generate_custom(test, pkts, ARRAY_SIZE(pkts)); testapp_validate_traffic(test); - pkt_stream_restore_default(test); +} + +static void testapp_xdp_drop(struct test_spec *test) +{ + struct xsk_xdp_progs *skel_rx = test->ifobj_rx->xdp_progs; + struct xsk_xdp_progs *skel_tx = test->ifobj_tx->xdp_progs; + + test_spec_set_name(test, "XDP_DROP_HALF"); + test_spec_set_xdp_prog(test, skel_rx->progs.xsk_xdp_drop, skel_tx->progs.xsk_xdp_drop, + skel_rx->maps.xsk, skel_tx->maps.xsk); + + pkt_stream_receive_half(test); + testapp_validate_traffic(test); +} + +static void testapp_poll_txq_tmout(struct test_spec *test) +{ + test_spec_set_name(test, "POLL_TXQ_FULL"); + + test->ifobj_tx->use_poll = true; + /* create invalid frame by set umem frame_size and pkt length equal to 2048 */ + test->ifobj_tx->umem->frame_size = 2048; + pkt_stream_replace(test, 2 * DEFAULT_PKT_CNT, 2048); + testapp_validate_traffic_single_thread(test, test->ifobj_tx); +} + +static void testapp_poll_rxq_tmout(struct test_spec *test) +{ + test_spec_set_name(test, "POLL_RXQ_EMPTY"); + test->ifobj_rx->use_poll = true; + testapp_validate_traffic_single_thread(test, test->ifobj_rx); +} + +static int xsk_load_xdp_programs(struct ifobject *ifobj) +{ + ifobj->xdp_progs = xsk_xdp_progs__open_and_load(); + if (libbpf_get_error(ifobj->xdp_progs)) + return libbpf_get_error(ifobj->xdp_progs); + + return 0; +} + +static void xsk_unload_xdp_programs(struct ifobject *ifobj) +{ + xsk_xdp_progs__destroy(ifobj->xdp_progs); } static void init_iface(struct ifobject *ifobj, const char *dst_mac, const char *src_mac, @@ -1705,6 +1723,7 @@ static void init_iface(struct ifobject *ifobj, const char *dst_mac, const char * const u16 src_port, thread_func_t func_ptr) { struct in_addr ip; + int err; memcpy(ifobj->dst_mac, dst_mac, ETH_ALEN); memcpy(ifobj->src_mac, src_mac, ETH_ALEN); @@ -1719,6 +1738,12 @@ static void init_iface(struct ifobject *ifobj, const char *dst_mac, const char * ifobj->src_port = src_port; ifobj->func_ptr = func_ptr; + + err = xsk_load_xdp_programs(ifobj); + if (err) { + printf("Error loading XDP program\n"); + exit_with_error(err); + } } static void run_pkt_test(struct test_spec *test, enum test_mode mode, enum test_type type) @@ -1763,8 +1788,6 @@ static void run_pkt_test(struct test_spec *test, enum test_mode mode, enum test_ test->ifobj_rx->umem->frame_size = 2048; pkt_stream_replace(test, DEFAULT_PKT_CNT, PKT_SIZE); testapp_validate_traffic(test); - - pkt_stream_restore_default(test); break; case TEST_TYPE_RX_POLL: test->ifobj_rx->use_poll = true; @@ -1777,18 +1800,10 @@ static void run_pkt_test(struct test_spec *test, enum test_mode mode, enum test_ testapp_validate_traffic(test); break; case TEST_TYPE_POLL_TXQ_TMOUT: - test_spec_set_name(test, "POLL_TXQ_FULL"); - test->ifobj_tx->use_poll = true; - /* create invalid frame by set umem frame_size and pkt length equal to 2048 */ - test->ifobj_tx->umem->frame_size = 2048; - pkt_stream_replace(test, 2 * DEFAULT_PKT_CNT, 2048); - testapp_validate_traffic_single_thread(test, test->ifobj_tx, type); - pkt_stream_restore_default(test); + testapp_poll_txq_tmout(test); break; case TEST_TYPE_POLL_RXQ_TMOUT: - test_spec_set_name(test, "POLL_RXQ_EMPTY"); - test->ifobj_rx->use_poll = true; - testapp_validate_traffic_single_thread(test, test->ifobj_rx, type); + testapp_poll_rxq_tmout(test); break; case TEST_TYPE_ALIGNED_INV_DESC: test_spec_set_name(test, "ALIGNED_INV_DESC"); @@ -1817,6 +1832,9 @@ static void run_pkt_test(struct test_spec *test, enum test_mode mode, enum test_ case TEST_TYPE_HEADROOM: testapp_headroom(test); break; + case TEST_TYPE_XDP_DROP_HALF: + testapp_xdp_drop(test); + break; default: break; } @@ -1824,6 +1842,7 @@ static void run_pkt_test(struct test_spec *test, enum test_mode mode, enum test_ if (!test->fail) ksft_test_result_pass("PASS: %s %s%s\n", mode_string(test), busy_poll_string(test), test->name); + pkt_stream_restore_default(test); } static struct ifobject *ifobject_create(void) @@ -1842,8 +1861,6 @@ static struct ifobject *ifobject_create(void) if (!ifobj->umem) goto out_umem; - ifobj->ns_fd = -1; - return ifobj; out_umem: @@ -1855,14 +1872,12 @@ out_xsk_arr: static void ifobject_delete(struct ifobject *ifobj) { - if (ifobj->ns_fd != -1) - close(ifobj->ns_fd); free(ifobj->umem); free(ifobj->xsk_arr); free(ifobj); } -static bool is_xdp_supported(struct ifobject *ifobject) +static bool is_xdp_supported(int ifindex) { int flags = XDP_FLAGS_DRV_MODE; @@ -1871,7 +1886,6 @@ static bool is_xdp_supported(struct ifobject *ifobject) BPF_MOV64_IMM(BPF_REG_0, XDP_PASS), BPF_EXIT_INSN() }; - int ifindex = if_nametoindex(ifobject->ifname); int prog_fd, insn_cnt = ARRAY_SIZE(insns); int err; @@ -1899,7 +1913,7 @@ int main(int argc, char **argv) int modes = TEST_MODE_SKB + 1; u32 i, j, failed_tests = 0; struct test_spec test; - bool shared_umem; + bool shared_netdev; /* Use libbpf 1.0 API mode */ libbpf_set_strict_mode(LIBBPF_STRICT_ALL); @@ -1914,27 +1928,27 @@ int main(int argc, char **argv) setlocale(LC_ALL, ""); parse_command_line(ifobj_tx, ifobj_rx, argc, argv); - shared_umem = !strcmp(ifobj_tx->ifname, ifobj_rx->ifname); - ifobj_tx->shared_umem = shared_umem; - ifobj_rx->shared_umem = shared_umem; + shared_netdev = (ifobj_tx->ifindex == ifobj_rx->ifindex); + ifobj_tx->shared_umem = shared_netdev; + ifobj_rx->shared_umem = shared_netdev; if (!validate_interface(ifobj_tx) || !validate_interface(ifobj_rx)) { usage(basename(argv[0])); ksft_exit_xfail(); } - init_iface(ifobj_tx, MAC1, MAC2, IP1, IP2, UDP_PORT1, UDP_PORT2, - worker_testapp_validate_tx); - init_iface(ifobj_rx, MAC2, MAC1, IP2, IP1, UDP_PORT2, UDP_PORT1, - worker_testapp_validate_rx); - - if (is_xdp_supported(ifobj_tx)) { + if (is_xdp_supported(ifobj_tx->ifindex)) { modes++; if (ifobj_zc_avail(ifobj_tx)) modes++; } + init_iface(ifobj_rx, MAC1, MAC2, IP1, IP2, UDP_PORT1, UDP_PORT2, + worker_testapp_validate_rx); + init_iface(ifobj_tx, MAC2, MAC1, IP2, IP1, UDP_PORT2, UDP_PORT1, + worker_testapp_validate_tx); + test_spec_init(&test, ifobj_tx, ifobj_rx, 0); tx_pkt_stream_default = pkt_stream_generate(ifobj_tx->umem, DEFAULT_PKT_CNT, PKT_SIZE); rx_pkt_stream_default = pkt_stream_generate(ifobj_rx->umem, DEFAULT_PKT_CNT, PKT_SIZE); @@ -1945,7 +1959,7 @@ int main(int argc, char **argv) ksft_set_plan(modes * TEST_TYPE_MAX); - for (i = 0; i < modes; i++) + for (i = 0; i < modes; i++) { for (j = 0; j < TEST_TYPE_MAX; j++) { test_spec_init(&test, ifobj_tx, ifobj_rx, i); run_pkt_test(&test, i, j); @@ -1954,9 +1968,12 @@ int main(int argc, char **argv) if (test.fail) failed_tests++; } + } pkt_stream_delete(tx_pkt_stream_default); pkt_stream_delete(rx_pkt_stream_default); + xsk_unload_xdp_programs(ifobj_tx); + xsk_unload_xdp_programs(ifobj_rx); ifobject_delete(ifobj_tx); ifobject_delete(ifobj_rx); diff --git a/tools/testing/selftests/bpf/xskxceiver.h b/tools/testing/selftests/bpf/xskxceiver.h index edb76d2def9f..3e8ec7d8ec32 100644 --- a/tools/testing/selftests/bpf/xskxceiver.h +++ b/tools/testing/selftests/bpf/xskxceiver.h @@ -5,6 +5,8 @@ #ifndef XSKXCEIVER_H_ #define XSKXCEIVER_H_ +#include "xsk_xdp_progs.skel.h" + #ifndef SOL_XDP #define SOL_XDP 283 #endif @@ -30,7 +32,6 @@ #define TEST_CONTINUE 1 #define MAX_INTERFACES 2 #define MAX_INTERFACE_NAME_CHARS 16 -#define MAX_INTERFACES_NAMESPACE_CHARS 16 #define MAX_SOCKETS 2 #define MAX_TEST_NAME_SIZE 32 #define MAX_TEARDOWN_ITER 10 @@ -86,6 +87,7 @@ enum test_type { TEST_TYPE_STATS_RX_FULL, TEST_TYPE_STATS_FILL_EMPTY, TEST_TYPE_BPF_RES, + TEST_TYPE_XDP_DROP_HALF, TEST_TYPE_MAX }; @@ -133,18 +135,19 @@ typedef void *(*thread_func_t)(void *arg); struct ifobject { char ifname[MAX_INTERFACE_NAME_CHARS]; - char nsname[MAX_INTERFACES_NAMESPACE_CHARS]; struct xsk_socket_info *xsk; struct xsk_socket_info *xsk_arr; struct xsk_umem_info *umem; thread_func_t func_ptr; validation_func_t validation_func; struct pkt_stream *pkt_stream; - int ns_fd; - int xsk_map_fd; + struct xsk_xdp_progs *xdp_progs; + struct bpf_map *xskmap; + struct bpf_program *xdp_prog; + enum test_mode mode; + int ifindex; u32 dst_ip; u32 src_ip; - u32 xdp_flags; u32 bind_flags; u16 src_port; u16 dst_port; @@ -164,6 +167,10 @@ struct test_spec { struct ifobject *ifobj_rx; struct pkt_stream *tx_pkt_stream_default; struct pkt_stream *rx_pkt_stream_default; + struct bpf_program *xdp_prog_rx; + struct bpf_program *xdp_prog_tx; + struct bpf_map *xskmap_rx; + struct bpf_map *xskmap_tx; u16 total_steps; u16 current_step; u16 nb_sockets; diff --git a/tools/testing/selftests/cgroup/cgroup_util.c b/tools/testing/selftests/cgroup/cgroup_util.c index 4c52cc6f2f9c..e8bbbdb77e0d 100644 --- a/tools/testing/selftests/cgroup/cgroup_util.c +++ b/tools/testing/selftests/cgroup/cgroup_util.c @@ -555,6 +555,7 @@ int proc_mount_contains(const char *option) ssize_t proc_read_text(int pid, bool thread, const char *item, char *buf, size_t size) { char path[PATH_MAX]; + ssize_t ret; if (!pid) snprintf(path, sizeof(path), "/proc/%s/%s", @@ -562,8 +563,8 @@ ssize_t proc_read_text(int pid, bool thread, const char *item, char *buf, size_t else snprintf(path, sizeof(path), "/proc/%d/%s", pid, item); - size = read_text(path, buf, size); - return size < 0 ? -1 : size; + ret = read_text(path, buf, size); + return ret < 0 ? -1 : ret; } int proc_read_strstr(int pid, bool thread, const char *item, const char *needle) diff --git a/tools/testing/selftests/cgroup/test_cpuset_prs.sh b/tools/testing/selftests/cgroup/test_cpuset_prs.sh index 526d2c42d870..75c100de90ff 100755 --- a/tools/testing/selftests/cgroup/test_cpuset_prs.sh +++ b/tools/testing/selftests/cgroup/test_cpuset_prs.sh @@ -16,7 +16,12 @@ skip_test() { [[ $(id -u) -eq 0 ]] || skip_test "Test must be run as root!" # Set sched verbose flag, if available -[[ -d /sys/kernel/debug/sched ]] && echo Y > /sys/kernel/debug/sched/verbose +if [[ -d /sys/kernel/debug/sched ]] +then + # Used to restore the original setting during cleanup + SCHED_DEBUG=$(cat /sys/kernel/debug/sched/verbose) + echo Y > /sys/kernel/debug/sched/verbose +fi # Get wait_inotify location WAIT_INOTIFY=$(cd $(dirname $0); pwd)/wait_inotify @@ -25,7 +30,7 @@ WAIT_INOTIFY=$(cd $(dirname $0); pwd)/wait_inotify CGROUP2=$(mount -t cgroup2 | head -1 | awk -e '{print $3}') [[ -n "$CGROUP2" ]] || skip_test "Cgroup v2 mount point not found!" -CPUS=$(lscpu | grep "^CPU(s)" | sed -e "s/.*:[[:space:]]*//") +CPUS=$(lscpu | grep "^CPU(s):" | sed -e "s/.*:[[:space:]]*//") [[ $CPUS -lt 8 ]] && skip_test "Test needs at least 8 cpus available!" # Set verbose flag and delay factor @@ -54,6 +59,15 @@ echo +cpuset > cgroup.subtree_control [[ -d test ]] || mkdir test cd test +cleanup() +{ + online_cpus + rmdir A1/A2/A3 A1/A2 A1 B1 > /dev/null 2>&1 + cd .. + rmdir test > /dev/null 2>&1 + echo "$SCHED_DEBUG" > /sys/kernel/debug/sched/verbose +} + # Pause in ms pause() { @@ -254,6 +268,7 @@ TEST_MATRIX=( # Taking away all CPUs from parent or itself if there are tasks # will make the partition invalid. " S+ C2-3:P1:S+ C3:P1 . . T C2-3 . . 0 A1:2-3,A2:2-3 A1:P1,A2:P-1" + " S+ C3:P1:S+ C3 . . T P1 . . 0 A1:3,A2:3 A1:P1,A2:P-1" " S+ $SETUP_A123_PARTITIONS . T:C2-3 . . . 0 A1:2-3,A2:2-3,A3:3 A1:P1,A2:P-1,A3:P-1" " S+ $SETUP_A123_PARTITIONS . T:C2-3:C1-3 . . . 0 A1:1,A2:2,A3:3 A1:P1,A2:P1,A3:P1" @@ -666,6 +681,7 @@ test_inotify() fi } +trap cleanup 0 2 3 6 run_state_test TEST_MATRIX test_isolated test_inotify diff --git a/tools/testing/selftests/cgroup/test_kmem.c b/tools/testing/selftests/cgroup/test_kmem.c index 22b31ebb3513..258ddc565deb 100644 --- a/tools/testing/selftests/cgroup/test_kmem.c +++ b/tools/testing/selftests/cgroup/test_kmem.c @@ -19,12 +19,12 @@ /* - * Memory cgroup charging is performed using percpu batches 32 pages + * Memory cgroup charging is performed using percpu batches 64 pages * big (look at MEMCG_CHARGE_BATCH), whereas memory.stat is exact. So * the maximum discrepancy between charge and vmstat entries is number - * of cpus multiplied by 32 pages. + * of cpus multiplied by 64 pages. */ -#define MAX_VMSTAT_ERROR (4096 * 32 * get_nprocs()) +#define MAX_VMSTAT_ERROR (4096 * 64 * get_nprocs()) static int alloc_dcache(const char *cgroup, void *arg) diff --git a/tools/testing/selftests/cgroup/test_memcontrol.c b/tools/testing/selftests/cgroup/test_memcontrol.c index 8833359556f3..1e616a8c6a9c 100644 --- a/tools/testing/selftests/cgroup/test_memcontrol.c +++ b/tools/testing/selftests/cgroup/test_memcontrol.c @@ -238,6 +238,8 @@ static int cg_test_proc_killed(const char *cgroup) return -1; } +static bool reclaim_until(const char *memcg, long goal); + /* * First, this test creates the following hierarchy: * A memory.min = 0, memory.max = 200M @@ -266,6 +268,12 @@ static int cg_test_proc_killed(const char *cgroup) * unprotected memory in A available, and checks that: * a) memory.min protects pagecache even in this case, * b) memory.low allows reclaiming page cache with low events. + * + * Then we try to reclaim from A/B/C using memory.reclaim until its + * usage reaches 10M. + * This makes sure that: + * (a) We ignore the protection of the reclaim target memcg. + * (b) The previously calculated emin value (~29M) should be dismissed. */ static int test_memcg_protection(const char *root, bool min) { @@ -385,6 +393,9 @@ static int test_memcg_protection(const char *root, bool min) if (!values_close(cg_read_long(parent[1], "memory.current"), MB(50), 3)) goto cleanup; + if (!reclaim_until(children[0], MB(10))) + goto cleanup; + if (min) { ret = KSFT_PASS; goto cleanup; @@ -646,6 +657,48 @@ cleanup: } /* + * Reclaim from @memcg until usage reaches @goal by writing to + * memory.reclaim. + * + * This function will return false if the usage is already below the + * goal. + * + * This function assumes that writing to memory.reclaim is the only + * source of change in memory.current (no concurrent allocations or + * reclaim). + * + * This function makes sure memory.reclaim is sane. It will return + * false if memory.reclaim's error codes do not make sense, even if + * the usage goal was satisfied. + */ +static bool reclaim_until(const char *memcg, long goal) +{ + char buf[64]; + int retries, err; + long current, to_reclaim; + bool reclaimed = false; + + for (retries = 5; retries > 0; retries--) { + current = cg_read_long(memcg, "memory.current"); + + if (current < goal || values_close(current, goal, 3)) + break; + /* Did memory.reclaim return 0 incorrectly? */ + else if (reclaimed) + return false; + + to_reclaim = current - goal; + snprintf(buf, sizeof(buf), "%ld", to_reclaim); + err = cg_write(memcg, "memory.reclaim", buf); + if (!err) + reclaimed = true; + else if (err != -EAGAIN) + return false; + } + return reclaimed; +} + +/* * This test checks that memory.reclaim reclaims the given * amount of memory (from both anon and file, if possible). */ @@ -653,8 +706,7 @@ static int test_memcg_reclaim(const char *root) { int ret = KSFT_FAIL, fd, retries; char *memcg; - long current, expected_usage, to_reclaim; - char buf[64]; + long current, expected_usage; memcg = cg_name(root, "memcg_test"); if (!memcg) @@ -705,41 +757,8 @@ static int test_memcg_reclaim(const char *root) * Reclaim until current reaches 30M, this makes sure we hit both anon * and file if swap is enabled. */ - retries = 5; - while (true) { - int err; - - current = cg_read_long(memcg, "memory.current"); - to_reclaim = current - MB(30); - - /* - * We only keep looping if we get EAGAIN, which means we could - * not reclaim the full amount. - */ - if (to_reclaim <= 0) - goto cleanup; - - - snprintf(buf, sizeof(buf), "%ld", to_reclaim); - err = cg_write(memcg, "memory.reclaim", buf); - if (!err) { - /* - * If writing succeeds, then the written amount should have been - * fully reclaimed (and maybe more). - */ - current = cg_read_long(memcg, "memory.current"); - if (!values_close(current, MB(30), 3) && current > MB(30)) - goto cleanup; - break; - } - - /* The kernel could not reclaim the full amount, try again. */ - if (err == -EAGAIN && retries--) - continue; - - /* We got an unexpected error or ran out of retries. */ + if (!reclaim_until(memcg, MB(30))) goto cleanup; - } ret = KSFT_PASS; cleanup: diff --git a/tools/testing/selftests/clone3/Makefile b/tools/testing/selftests/clone3/Makefile index 79b19a2863a0..84832c369a2e 100644 --- a/tools/testing/selftests/clone3/Makefile +++ b/tools/testing/selftests/clone3/Makefile @@ -1,5 +1,5 @@ # SPDX-License-Identifier: GPL-2.0 -CFLAGS += -g -std=gnu99 -I../../../../usr/include/ +CFLAGS += -g -std=gnu99 $(KHDR_INCLUDES) LDLIBS += -lcap TEST_GEN_PROGS := clone3 clone3_clear_sighand clone3_set_tid \ diff --git a/tools/testing/selftests/core/Makefile b/tools/testing/selftests/core/Makefile index f6f2d6f473c6..ce262d097269 100644 --- a/tools/testing/selftests/core/Makefile +++ b/tools/testing/selftests/core/Makefile @@ -1,5 +1,5 @@ # SPDX-License-Identifier: GPL-2.0-only -CFLAGS += -g -I../../../../usr/include/ +CFLAGS += -g $(KHDR_INCLUDES) TEST_GEN_PROGS := close_range_test diff --git a/tools/testing/selftests/damon/Makefile b/tools/testing/selftests/damon/Makefile index a1fa2eff8192..b71247ba7196 100644 --- a/tools/testing/selftests/damon/Makefile +++ b/tools/testing/selftests/damon/Makefile @@ -7,6 +7,8 @@ TEST_FILES = _chk_dependency.sh _debugfs_common.sh TEST_PROGS = debugfs_attrs.sh debugfs_schemes.sh debugfs_target_ids.sh TEST_PROGS += debugfs_empty_targets.sh debugfs_huge_count_read_write.sh TEST_PROGS += debugfs_duplicate_context_creation.sh -TEST_PROGS += sysfs.sh +TEST_PROGS += debugfs_rm_non_contexts.sh +TEST_PROGS += sysfs.sh sysfs_update_removed_scheme_dir.sh +TEST_PROGS += reclaim.sh lru_sort.sh include ../lib.mk diff --git a/tools/testing/selftests/damon/debugfs_rm_non_contexts.sh b/tools/testing/selftests/damon/debugfs_rm_non_contexts.sh new file mode 100644 index 000000000000..48b7af6b022c --- /dev/null +++ b/tools/testing/selftests/damon/debugfs_rm_non_contexts.sh @@ -0,0 +1,19 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +source _debugfs_common.sh + +# Test putting non-ctx files/dirs to rm_contexts file +# =================================================== + +dmesg -C + +for file in "$DBGFS/"* +do + echo "$(basename "$f")" > "$DBGFS/rm_contexts" + if dmesg | grep -q BUG + then + dmesg + exit 1 + fi +done diff --git a/tools/testing/selftests/damon/huge_count_read_write.c b/tools/testing/selftests/damon/huge_count_read_write.c index ad7a6b4cf338..a6fe0689f88d 100644 --- a/tools/testing/selftests/damon/huge_count_read_write.c +++ b/tools/testing/selftests/damon/huge_count_read_write.c @@ -8,6 +8,13 @@ #include <unistd.h> #include <stdio.h> +#pragma GCC diagnostic push +#if __GNUC__ >= 11 && __GNUC_MINOR__ >= 1 +/* Ignore read(2) overflow and write(2) overread compile warnings */ +#pragma GCC diagnostic ignored "-Wstringop-overread" +#pragma GCC diagnostic ignored "-Wstringop-overflow" +#endif + void write_read_with_huge_count(char *file) { int filedesc = open(file, O_RDWR); @@ -27,6 +34,8 @@ void write_read_with_huge_count(char *file) close(filedesc); } +#pragma GCC diagnostic pop + int main(int argc, char *argv[]) { if (argc != 2) { diff --git a/tools/testing/selftests/damon/lru_sort.sh b/tools/testing/selftests/damon/lru_sort.sh new file mode 100644 index 000000000000..61b80197c896 --- /dev/null +++ b/tools/testing/selftests/damon/lru_sort.sh @@ -0,0 +1,41 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# Kselftest framework requirement - SKIP code is 4. +ksft_skip=4 + +if [ $EUID -ne 0 ] +then + echo "Run as root" + exit $ksft_skip +fi + +damon_lru_sort_enabled="/sys/module/damon_lru_sort/parameters/enabled" +if [ ! -f "$damon_lru_sort_enabled" ] +then + echo "No 'enabled' file. Maybe DAMON_LRU_SORT not built" + exit $ksft_skip +fi + +nr_kdamonds=$(pgrep kdamond | wc -l) +if [ "$nr_kdamonds" -ne 0 ] +then + echo "Another kdamond is running" + exit $ksft_skip +fi + +echo Y > "$damon_lru_sort_enabled" +nr_kdamonds=$(pgrep kdamond | wc -l) +if [ "$nr_kdamonds" -ne 1 ] +then + echo "kdamond is not turned on" + exit 1 +fi + +echo N > "$damon_lru_sort_enabled" +nr_kdamonds=$(pgrep kdamond | wc -l) +if [ "$nr_kdamonds" -ne 0 ] +then + echo "kdamond is not turned off" + exit 1 +fi diff --git a/tools/testing/selftests/damon/reclaim.sh b/tools/testing/selftests/damon/reclaim.sh new file mode 100644 index 000000000000..78dbc2334cbe --- /dev/null +++ b/tools/testing/selftests/damon/reclaim.sh @@ -0,0 +1,42 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# Kselftest framework requirement - SKIP code is 4. +ksft_skip=4 + +if [ $EUID -ne 0 ] +then + echo "Run as root" + exit $ksft_skip +fi + +damon_reclaim_enabled="/sys/module/damon_reclaim/parameters/enabled" +if [ ! -f "$damon_reclaim_enabled" ] +then + echo "No 'enabled' file. Maybe DAMON_RECLAIM not built" + exit $ksft_skip +fi + +nr_kdamonds=$(pgrep kdamond | wc -l) +if [ "$nr_kdamonds" -ne 0 ] +then + echo "Another kdamond is running" + exit $ksft_skip +fi + +echo Y > "$damon_reclaim_enabled" + +nr_kdamonds=$(pgrep kdamond | wc -l) +if [ "$nr_kdamonds" -ne 1 ] +then + echo "kdamond is not turned on" + exit 1 +fi + +echo N > "$damon_reclaim_enabled" +nr_kdamonds=$(pgrep kdamond | wc -l) +if [ "$nr_kdamonds" -ne 0 ] +then + echo "kdamond is not turned off" + exit 1 +fi diff --git a/tools/testing/selftests/damon/sysfs.sh b/tools/testing/selftests/damon/sysfs.sh index 89592c64462f..db4942383a50 100644 --- a/tools/testing/selftests/damon/sysfs.sh +++ b/tools/testing/selftests/damon/sysfs.sh @@ -80,6 +80,12 @@ test_range() ensure_file "$range_dir/max" "exist" 600 } +test_tried_regions() +{ + tried_regions_dir=$1 + ensure_dir "$tried_regions_dir" "exist" +} + test_stats() { stats_dir=$1 @@ -138,6 +144,7 @@ test_scheme() test_quotas "$scheme_dir/quotas" test_watermarks "$scheme_dir/watermarks" test_stats "$scheme_dir/stats" + test_tried_regions "$scheme_dir/tried_regions" } test_schemes() diff --git a/tools/testing/selftests/damon/sysfs_update_removed_scheme_dir.sh b/tools/testing/selftests/damon/sysfs_update_removed_scheme_dir.sh new file mode 100644 index 000000000000..ade35576e748 --- /dev/null +++ b/tools/testing/selftests/damon/sysfs_update_removed_scheme_dir.sh @@ -0,0 +1,58 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# Kselftest framework requirement - SKIP code is 4. +ksft_skip=4 + +if [ $EUID -ne 0 ] +then + echo "Run as root" + exit $ksft_skip +fi + +damon_sysfs="/sys/kernel/mm/damon/admin" +if [ ! -d "$damon_sysfs" ] +then + echo "damon sysfs not found" + exit $ksft_skip +fi + +# clear log +dmesg -C + +# start DAMON with a scheme +echo 1 > "$damon_sysfs/kdamonds/nr_kdamonds" +echo 1 > "$damon_sysfs/kdamonds/0/contexts/nr_contexts" +echo "vaddr" > "$damon_sysfs/kdamonds/0/contexts/0/operations" +echo 1 > "$damon_sysfs/kdamonds/0/contexts/0/targets/nr_targets" +echo $$ > "$damon_sysfs/kdamonds/0/contexts/0/targets/0/pid_target" +echo 1 > "$damon_sysfs/kdamonds/0/contexts/0/schemes/nr_schemes" +scheme_dir="$damon_sysfs/kdamonds/0/contexts/0/schemes/0" +echo 4096000 > "$scheme_dir/access_pattern/sz/max" +echo 20 > "$scheme_dir/access_pattern/nr_accesses/max" +echo 1024 > "$scheme_dir/access_pattern/age/max" +echo "on" > "$damon_sysfs/kdamonds/0/state" +sleep 0.3 + +# remove scheme sysfs dir +echo 0 > "$damon_sysfs/kdamonds/0/contexts/0/schemes/nr_schemes" + +# try to update stat of already removed scheme sysfs dir +echo "update_schemes_stats" > "$damon_sysfs/kdamonds/0/state" +if dmesg | grep -q BUG +then + echo "update_schemes_stats triggers a kernel bug" + dmesg + exit 1 +fi + +# try to update tried regions of already removed scheme sysfs dir +echo "update_schemes_tried_regions" > "$damon_sysfs/kdamonds/0/state" +if dmesg | grep -q BUG +then + echo "update_schemes_tried_regions triggers a kernel bug" + dmesg + exit 1 +fi + +echo "off" > "$damon_sysfs/kdamonds/0/state" diff --git a/tools/testing/selftests/dmabuf-heaps/Makefile b/tools/testing/selftests/dmabuf-heaps/Makefile index 604b43ece15f..9e7e158d5fa3 100644 --- a/tools/testing/selftests/dmabuf-heaps/Makefile +++ b/tools/testing/selftests/dmabuf-heaps/Makefile @@ -1,5 +1,5 @@ # SPDX-License-Identifier: GPL-2.0 -CFLAGS += -static -O3 -Wl,-no-as-needed -Wall +CFLAGS += -static -O3 -Wl,-no-as-needed -Wall $(KHDR_INCLUDES) TEST_GEN_PROGS = dmabuf-heap diff --git a/tools/testing/selftests/dmabuf-heaps/dmabuf-heap.c b/tools/testing/selftests/dmabuf-heaps/dmabuf-heap.c index 29af27acd40e..890a8236a8ba 100644 --- a/tools/testing/selftests/dmabuf-heaps/dmabuf-heap.c +++ b/tools/testing/selftests/dmabuf-heaps/dmabuf-heap.c @@ -13,10 +13,9 @@ #include <sys/types.h> #include <linux/dma-buf.h> +#include <linux/dma-heap.h> #include <drm/drm.h> -#include "../../../../include/uapi/linux/dma-heap.h" - #define DEVPATH "/dev/dma_heap" static int check_vgem(int fd) diff --git a/tools/testing/selftests/drivers/dma-buf/Makefile b/tools/testing/selftests/drivers/dma-buf/Makefile index 79cb16b4e01a..441407bb0e80 100644 --- a/tools/testing/selftests/drivers/dma-buf/Makefile +++ b/tools/testing/selftests/drivers/dma-buf/Makefile @@ -1,5 +1,5 @@ # SPDX-License-Identifier: GPL-2.0-only -CFLAGS += -I../../../../../usr/include/ +CFLAGS += $(KHDR_INCLUDES) TEST_GEN_PROGS := udmabuf diff --git a/tools/testing/selftests/drivers/net/bonding/Makefile b/tools/testing/selftests/drivers/net/bonding/Makefile index 6b8d2e2f23c2..8e3b786a748f 100644 --- a/tools/testing/selftests/drivers/net/bonding/Makefile +++ b/tools/testing/selftests/drivers/net/bonding/Makefile @@ -5,7 +5,10 @@ TEST_PROGS := \ bond-arp-interval-causes-panic.sh \ bond-break-lacpdu-tx.sh \ bond-lladdr-target.sh \ - dev_addr_lists.sh + dev_addr_lists.sh \ + mode-1-recovery-updelay.sh \ + mode-2-recovery-updelay.sh \ + option_prio.sh TEST_FILES := \ lag_lib.sh \ diff --git a/tools/testing/selftests/drivers/net/bonding/lag_lib.sh b/tools/testing/selftests/drivers/net/bonding/lag_lib.sh index 16c7fb858ac1..2a268b17b61f 100644 --- a/tools/testing/selftests/drivers/net/bonding/lag_lib.sh +++ b/tools/testing/selftests/drivers/net/bonding/lag_lib.sh @@ -1,6 +1,8 @@ #!/bin/bash # SPDX-License-Identifier: GPL-2.0 +NAMESPACES="" + # Test that a link aggregation device (bonding, team) removes the hardware # addresses that it adds on its underlying devices. test_LAG_cleanup() @@ -59,3 +61,107 @@ test_LAG_cleanup() log_test "$driver cleanup mode $mode" } + +# Build a generic 2 node net namespace with 2 connections +# between the namespaces +# +# +-----------+ +-----------+ +# | node1 | | node2 | +# | | | | +# | | | | +# | eth0 +-------+ eth0 | +# | | | | +# | eth1 +-------+ eth1 | +# | | | | +# +-----------+ +-----------+ +lag_setup2x2() +{ + local state=${1:-down} + local namespaces="lag_node1 lag_node2" + + # create namespaces + for n in ${namespaces}; do + ip netns add ${n} + done + + # wire up namespaces + ip link add name lag1 type veth peer name lag1-end + ip link set dev lag1 netns lag_node1 $state name eth0 + ip link set dev lag1-end netns lag_node2 $state name eth0 + + ip link add name lag1 type veth peer name lag1-end + ip link set dev lag1 netns lag_node1 $state name eth1 + ip link set dev lag1-end netns lag_node2 $state name eth1 + + NAMESPACES="${namespaces}" +} + +# cleanup all lag related namespaces and remove the bonding module +lag_cleanup() +{ + for n in ${NAMESPACES}; do + ip netns delete ${n} >/dev/null 2>&1 || true + done + modprobe -r bonding +} + +SWITCH="lag_node1" +CLIENT="lag_node2" +CLIENTIP="172.20.2.1" +SWITCHIP="172.20.2.2" + +lag_setup_network() +{ + lag_setup2x2 "down" + + # create switch + ip netns exec ${SWITCH} ip link add br0 up type bridge + ip netns exec ${SWITCH} ip link set eth0 master br0 up + ip netns exec ${SWITCH} ip link set eth1 master br0 up + ip netns exec ${SWITCH} ip addr add ${SWITCHIP}/24 dev br0 +} + +lag_reset_network() +{ + ip netns exec ${CLIENT} ip link del bond0 + ip netns exec ${SWITCH} ip link set eth0 up + ip netns exec ${SWITCH} ip link set eth1 up +} + +create_bond() +{ + # create client + ip netns exec ${CLIENT} ip link set eth0 down + ip netns exec ${CLIENT} ip link set eth1 down + + ip netns exec ${CLIENT} ip link add bond0 type bond $@ + ip netns exec ${CLIENT} ip link set eth0 master bond0 + ip netns exec ${CLIENT} ip link set eth1 master bond0 + ip netns exec ${CLIENT} ip link set bond0 up + ip netns exec ${CLIENT} ip addr add ${CLIENTIP}/24 dev bond0 +} + +test_bond_recovery() +{ + RET=0 + + create_bond $@ + + # verify connectivity + ip netns exec ${CLIENT} ping ${SWITCHIP} -c 2 >/dev/null 2>&1 + check_err $? "No connectivity" + + # force the links of the bond down + ip netns exec ${SWITCH} ip link set eth0 down + sleep 2 + ip netns exec ${SWITCH} ip link set eth0 up + ip netns exec ${SWITCH} ip link set eth1 down + + # re-verify connectivity + ip netns exec ${CLIENT} ping ${SWITCHIP} -c 2 >/dev/null 2>&1 + + local rc=$? + check_err $rc "Bond failed to recover" + log_test "$1 ($2) bond recovery" + lag_reset_network +} diff --git a/tools/testing/selftests/drivers/net/bonding/mode-1-recovery-updelay.sh b/tools/testing/selftests/drivers/net/bonding/mode-1-recovery-updelay.sh new file mode 100755 index 000000000000..ad4c845a4ac7 --- /dev/null +++ b/tools/testing/selftests/drivers/net/bonding/mode-1-recovery-updelay.sh @@ -0,0 +1,45 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 + +# Regression Test: +# When the bond is configured with down/updelay and the link state of +# slave members flaps if there are no remaining members up the bond +# should immediately select a member to bring up. (from bonding.txt +# section 13.1 paragraph 4) +# +# +-------------+ +-----------+ +# | client | | switch | +# | | | | +# | +--------| link1 |-----+ | +# | | +-------+ | | +# | | | | | | +# | | +-------+ | | +# | | bond | link2 | Br0 | | +# +-------------+ +-----------+ +# 172.20.2.1 172.20.2.2 + + +REQUIRE_MZ=no +REQUIRE_JQ=no +NUM_NETIFS=0 +lib_dir=$(dirname "$0") +source "$lib_dir"/net_forwarding_lib.sh +source "$lib_dir"/lag_lib.sh + +cleanup() +{ + lag_cleanup +} + +trap cleanup 0 1 2 + +lag_setup_network +test_bond_recovery mode 1 miimon 100 updelay 0 +test_bond_recovery mode 1 miimon 100 updelay 200 +test_bond_recovery mode 1 miimon 100 updelay 500 +test_bond_recovery mode 1 miimon 100 updelay 1000 +test_bond_recovery mode 1 miimon 100 updelay 2000 +test_bond_recovery mode 1 miimon 100 updelay 5000 +test_bond_recovery mode 1 miimon 100 updelay 10000 + +exit "$EXIT_STATUS" diff --git a/tools/testing/selftests/drivers/net/bonding/mode-2-recovery-updelay.sh b/tools/testing/selftests/drivers/net/bonding/mode-2-recovery-updelay.sh new file mode 100755 index 000000000000..2330d37453f9 --- /dev/null +++ b/tools/testing/selftests/drivers/net/bonding/mode-2-recovery-updelay.sh @@ -0,0 +1,45 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 + +# Regression Test: +# When the bond is configured with down/updelay and the link state of +# slave members flaps if there are no remaining members up the bond +# should immediately select a member to bring up. (from bonding.txt +# section 13.1 paragraph 4) +# +# +-------------+ +-----------+ +# | client | | switch | +# | | | | +# | +--------| link1 |-----+ | +# | | +-------+ | | +# | | | | | | +# | | +-------+ | | +# | | bond | link2 | Br0 | | +# +-------------+ +-----------+ +# 172.20.2.1 172.20.2.2 + + +REQUIRE_MZ=no +REQUIRE_JQ=no +NUM_NETIFS=0 +lib_dir=$(dirname "$0") +source "$lib_dir"/net_forwarding_lib.sh +source "$lib_dir"/lag_lib.sh + +cleanup() +{ + lag_cleanup +} + +trap cleanup 0 1 2 + +lag_setup_network +test_bond_recovery mode 2 miimon 100 updelay 0 +test_bond_recovery mode 2 miimon 100 updelay 200 +test_bond_recovery mode 2 miimon 100 updelay 500 +test_bond_recovery mode 2 miimon 100 updelay 1000 +test_bond_recovery mode 2 miimon 100 updelay 2000 +test_bond_recovery mode 2 miimon 100 updelay 5000 +test_bond_recovery mode 2 miimon 100 updelay 10000 + +exit "$EXIT_STATUS" diff --git a/tools/testing/selftests/drivers/net/bonding/option_prio.sh b/tools/testing/selftests/drivers/net/bonding/option_prio.sh new file mode 100755 index 000000000000..c32eebff5005 --- /dev/null +++ b/tools/testing/selftests/drivers/net/bonding/option_prio.sh @@ -0,0 +1,245 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Test bonding option prio +# + +ALL_TESTS=" + prio_arp_ip_target_test + prio_miimon_test +" + +REQUIRE_MZ=no +REQUIRE_JQ=no +NUM_NETIFS=0 +lib_dir=$(dirname "$0") +source "$lib_dir"/net_forwarding_lib.sh + +destroy() +{ + ip link del bond0 &>/dev/null + ip link del br0 &>/dev/null + ip link del veth0 &>/dev/null + ip link del veth1 &>/dev/null + ip link del veth2 &>/dev/null + ip netns del ns1 &>/dev/null + ip link del veth3 &>/dev/null +} + +cleanup() +{ + pre_cleanup + + destroy +} + +skip() +{ + local skip=1 + ip link add name bond0 type bond mode 1 miimon 100 &>/dev/null + ip link add name veth0 type veth peer name veth0_p + ip link set veth0 master bond0 + + # check if iproute support prio option + ip link set dev veth0 type bond_slave prio 10 + [[ $? -ne 0 ]] && skip=0 + + # check if bonding support prio option + ip -d link show veth0 | grep -q "prio 10" + [[ $? -ne 0 ]] && skip=0 + + ip link del bond0 &>/dev/null + ip link del veth0 + + return $skip +} + +active_slave="" +check_active_slave() +{ + local target_active_slave=$1 + active_slave="$(cat /sys/class/net/bond0/bonding/active_slave)" + test "$active_slave" = "$target_active_slave" + check_err $? "Current active slave is $active_slave but not $target_active_slave" +} + + +# Test bonding prio option with mode=$mode monitor=$monitor +# and primary_reselect=$primary_reselect +prio_test() +{ + RET=0 + + local monitor=$1 + local mode=$2 + local primary_reselect=$3 + + local bond_ip4="192.169.1.2" + local peer_ip4="192.169.1.1" + local bond_ip6="2009:0a:0b::02" + local peer_ip6="2009:0a:0b::01" + + + # create veths + ip link add name veth0 type veth peer name veth0_p + ip link add name veth1 type veth peer name veth1_p + ip link add name veth2 type veth peer name veth2_p + + # create bond + if [[ "$monitor" == "miimon" ]];then + ip link add name bond0 type bond mode $mode miimon 100 primary veth1 primary_reselect $primary_reselect + elif [[ "$monitor" == "arp_ip_target" ]];then + ip link add name bond0 type bond mode $mode arp_interval 1000 arp_ip_target $peer_ip4 primary veth1 primary_reselect $primary_reselect + elif [[ "$monitor" == "ns_ip6_target" ]];then + ip link add name bond0 type bond mode $mode arp_interval 1000 ns_ip6_target $peer_ip6 primary veth1 primary_reselect $primary_reselect + fi + ip link set bond0 up + ip link set veth0 master bond0 + ip link set veth1 master bond0 + ip link set veth2 master bond0 + # check bonding member prio value + ip link set dev veth0 type bond_slave prio 0 + ip link set dev veth1 type bond_slave prio 10 + ip link set dev veth2 type bond_slave prio 11 + ip -d link show veth0 | grep -q 'prio 0' + check_err $? "veth0 prio is not 0" + ip -d link show veth1 | grep -q 'prio 10' + check_err $? "veth0 prio is not 10" + ip -d link show veth2 | grep -q 'prio 11' + check_err $? "veth0 prio is not 11" + + ip link set veth0 up + ip link set veth1 up + ip link set veth2 up + ip link set veth0_p up + ip link set veth1_p up + ip link set veth2_p up + + # prepare ping target + ip link add name br0 type bridge + ip link set br0 up + ip link set veth0_p master br0 + ip link set veth1_p master br0 + ip link set veth2_p master br0 + ip link add name veth3 type veth peer name veth3_p + ip netns add ns1 + ip link set veth3_p master br0 up + ip link set veth3 netns ns1 up + ip netns exec ns1 ip addr add $peer_ip4/24 dev veth3 + ip netns exec ns1 ip addr add $peer_ip6/64 dev veth3 + ip addr add $bond_ip4/24 dev bond0 + ip addr add $bond_ip6/64 dev bond0 + sleep 5 + + ping $peer_ip4 -c5 -I bond0 &>/dev/null + check_err $? "ping failed 1." + ping6 $peer_ip6 -c5 -I bond0 &>/dev/null + check_err $? "ping6 failed 1." + + # active salve should be the primary slave + check_active_slave veth1 + + # active slave should be the higher prio slave + ip link set $active_slave down + ping $peer_ip4 -c5 -I bond0 &>/dev/null + check_err $? "ping failed 2." + check_active_slave veth2 + + # when only 1 slave is up + ip link set $active_slave down + ping $peer_ip4 -c5 -I bond0 &>/dev/null + check_err $? "ping failed 3." + check_active_slave veth0 + + # when a higher prio slave change to up + ip link set veth2 up + ping $peer_ip4 -c5 -I bond0 &>/dev/null + check_err $? "ping failed 4." + case $primary_reselect in + "0") + check_active_slave "veth2" + ;; + "1") + check_active_slave "veth0" + ;; + "2") + check_active_slave "veth0" + ;; + esac + local pre_active_slave=$active_slave + + # when the primary slave change to up + ip link set veth1 up + ping $peer_ip4 -c5 -I bond0 &>/dev/null + check_err $? "ping failed 5." + case $primary_reselect in + "0") + check_active_slave "veth1" + ;; + "1") + check_active_slave "$pre_active_slave" + ;; + "2") + check_active_slave "$pre_active_slave" + ip link set $active_slave down + ping $peer_ip4 -c5 -I bond0 &>/dev/null + check_err $? "ping failed 6." + check_active_slave "veth1" + ;; + esac + + # Test changing bond salve prio + if [[ "$primary_reselect" == "0" ]];then + ip link set dev veth0 type bond_slave prio 1000000 + ip link set dev veth1 type bond_slave prio 0 + ip link set dev veth2 type bond_slave prio -50 + ip -d link show veth0 | grep -q 'prio 1000000' + check_err $? "veth0 prio is not 1000000" + ip -d link show veth1 | grep -q 'prio 0' + check_err $? "veth1 prio is not 0" + ip -d link show veth2 | grep -q 'prio -50' + check_err $? "veth3 prio is not -50" + check_active_slave "veth1" + + ip link set $active_slave down + ping $peer_ip4 -c5 -I bond0 &>/dev/null + check_err $? "ping failed 7." + check_active_slave "veth0" + fi + + cleanup + + log_test "prio_test" "Test bonding option 'prio' with mode=$mode monitor=$monitor and primary_reselect=$primary_reselect" +} + +prio_miimon_test() +{ + local mode + local primary_reselect + + for mode in 1 5 6; do + for primary_reselect in 0 1 2; do + prio_test "miimon" $mode $primary_reselect + done + done +} + +prio_arp_ip_target_test() +{ + local primary_reselect + + for primary_reselect in 0 1 2; do + prio_test "arp_ip_target" 1 $primary_reselect + done +} + +if skip;then + log_test_skip "option_prio.sh" "Current iproute doesn't support 'prio'." + exit 0 +fi + +trap cleanup EXIT + +tests_run + +exit "$EXIT_STATUS" diff --git a/tools/testing/selftests/drivers/net/bonding/settings b/tools/testing/selftests/drivers/net/bonding/settings index 867e118223cd..6091b45d226b 100644 --- a/tools/testing/selftests/drivers/net/bonding/settings +++ b/tools/testing/selftests/drivers/net/bonding/settings @@ -1 +1 @@ -timeout=60 +timeout=120 diff --git a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_control.sh b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_control.sh index d3a891d421ab..64153bbf95df 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_control.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_control.sh @@ -83,6 +83,7 @@ ALL_TESTS=" ptp_general_test flow_action_sample_test flow_action_trap_test + eapol_test " NUM_NETIFS=4 source $lib_dir/lib.sh @@ -677,6 +678,27 @@ flow_action_trap_test() tc qdisc del dev $rp1 clsact } +eapol_payload_get() +{ + local source_mac=$1; shift + local p + + p=$(: + )"01:80:C2:00:00:03:"$( : ETH daddr + )"$source_mac:"$( : ETH saddr + )"88:8E:"$( : ETH type + ) + echo $p +} + +eapol_test() +{ + local h1mac=$(mac_get $h1) + + devlink_trap_stats_test "EAPOL" "eapol" $MZ $h1 -c 1 \ + $(eapol_payload_get $h1mac) -p 100 -q +} + trap cleanup EXIT setup_prepare diff --git a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l2_drops.sh b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l2_drops.sh index a4c2812e9807..8d4b2c6265b3 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l2_drops.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l2_drops.sh @@ -14,6 +14,7 @@ ALL_TESTS=" ingress_stp_filter_test port_list_is_empty_test port_loopback_filter_test + locked_port_test " NUM_NETIFS=4 source $lib_dir/tc_common.sh @@ -420,6 +421,110 @@ port_loopback_filter_test() port_loopback_filter_uc_test } +locked_port_miss_test() +{ + local trap_name="locked_port" + local smac=00:11:22:33:44:55 + + bridge link set dev $swp1 learning off + bridge link set dev $swp1 locked on + + RET=0 + + devlink_trap_stats_check $trap_name $MZ $h1 -c 1 \ + -a $smac -b $(mac_get $h2) -A 192.0.2.1 -B 192.0.2.2 -p 100 -q + check_fail $? "Trap stats increased before setting action to \"trap\"" + + devlink_trap_action_set $trap_name "trap" + + devlink_trap_stats_check $trap_name $MZ $h1 -c 1 \ + -a $smac -b $(mac_get $h2) -A 192.0.2.1 -B 192.0.2.2 -p 100 -q + check_err $? "Trap stats did not increase when should" + + devlink_trap_action_set $trap_name "drop" + + devlink_trap_stats_check $trap_name $MZ $h1 -c 1 \ + -a $smac -b $(mac_get $h2) -A 192.0.2.1 -B 192.0.2.2 -p 100 -q + check_fail $? "Trap stats increased after setting action to \"drop\"" + + devlink_trap_action_set $trap_name "trap" + + bridge fdb replace $smac dev $swp1 master static vlan 1 + + devlink_trap_stats_check $trap_name $MZ $h1 -c 1 \ + -a $smac -b $(mac_get $h2) -A 192.0.2.1 -B 192.0.2.2 -p 100 -q + check_fail $? "Trap stats increased after adding an FDB entry" + + bridge fdb del $smac dev $swp1 master static vlan 1 + bridge link set dev $swp1 locked off + + devlink_trap_stats_check $trap_name $MZ $h1 -c 1 \ + -a $smac -b $(mac_get $h2) -A 192.0.2.1 -B 192.0.2.2 -p 100 -q + check_fail $? "Trap stats increased after unlocking port" + + log_test "Locked port - FDB miss" + + devlink_trap_action_set $trap_name "drop" + bridge link set dev $swp1 learning on +} + +locked_port_mismatch_test() +{ + local trap_name="locked_port" + local smac=00:11:22:33:44:55 + + bridge link set dev $swp1 learning off + bridge link set dev $swp1 locked on + + RET=0 + + bridge fdb replace $smac dev $swp2 master static vlan 1 + + devlink_trap_stats_check $trap_name $MZ $h1 -c 1 \ + -a $smac -b $(mac_get $h2) -A 192.0.2.1 -B 192.0.2.2 -p 100 -q + check_fail $? "Trap stats increased before setting action to \"trap\"" + + devlink_trap_action_set $trap_name "trap" + + devlink_trap_stats_check $trap_name $MZ $h1 -c 1 \ + -a $smac -b $(mac_get $h2) -A 192.0.2.1 -B 192.0.2.2 -p 100 -q + check_err $? "Trap stats did not increase when should" + + devlink_trap_action_set $trap_name "drop" + + devlink_trap_stats_check $trap_name $MZ $h1 -c 1 \ + -a $smac -b $(mac_get $h2) -A 192.0.2.1 -B 192.0.2.2 -p 100 -q + check_fail $? "Trap stats increased after setting action to \"drop\"" + + devlink_trap_action_set $trap_name "trap" + bridge link set dev $swp1 locked off + + devlink_trap_stats_check $trap_name $MZ $h1 -c 1 \ + -a $smac -b $(mac_get $h2) -A 192.0.2.1 -B 192.0.2.2 -p 100 -q + check_fail $? "Trap stats increased after unlocking port" + + bridge link set dev $swp1 locked on + bridge fdb replace $smac dev $swp1 master static vlan 1 + + devlink_trap_stats_check $trap_name $MZ $h1 -c 1 \ + -a $smac -b $(mac_get $h2) -A 192.0.2.1 -B 192.0.2.2 -p 100 -q + check_fail $? "Trap stats increased after replacing an FDB entry" + + bridge fdb del $smac dev $swp1 master static vlan 1 + devlink_trap_action_set $trap_name "drop" + + log_test "Locked port - FDB mismatch" + + bridge link set dev $swp1 locked off + bridge link set dev $swp1 learning on +} + +locked_port_test() +{ + locked_port_miss_test + locked_port_mismatch_test +} + trap cleanup EXIT setup_prepare diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/devlink_trap_tunnel_ipip6.sh b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_tunnel_ipip6.sh index f62ce479c266..878125041fc3 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/devlink_trap_tunnel_ipip6.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_tunnel_ipip6.sh @@ -31,7 +31,7 @@ # | 2001:db8:10::2/64 | # +-------------------------+ -lib_dir=$(dirname $0)/../../../../net/forwarding +lib_dir=$(dirname $0)/../../../net/forwarding ALL_TESTS=" decap_error_test diff --git a/tools/testing/selftests/drivers/net/mlxsw/qos_defprio.sh b/tools/testing/selftests/drivers/net/mlxsw/qos_defprio.sh index 71066bc4b886..5492fa5550d7 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/qos_defprio.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/qos_defprio.sh @@ -5,18 +5,18 @@ # prioritized according to the default priority specified at the port. # rx_octets_prio_* counters are used to verify the prioritization. # -# +-----------------------+ -# | H1 | -# | + $h1 | -# | | 192.0.2.1/28 | -# +----|------------------+ +# +----------------------------------+ +# | H1 | +# | + $h1 | +# | | 192.0.2.1/28 | +# +----|-----------------------------+ # | -# +----|------------------+ -# | SW | | -# | + $swp1 | -# | 192.0.2.2/28 | -# | APP=<prio>,1,0 | -# +-----------------------+ +# +----|-----------------------------+ +# | SW | | +# | + $swp1 | +# | 192.0.2.2/28 | +# | dcb app default-prio <prio> | +# +----------------------------------+ ALL_TESTS=" ping_ipv4 @@ -29,42 +29,6 @@ NUM_NETIFS=2 : ${HIT_TIMEOUT:=1000} # ms source $lib_dir/lib.sh -declare -a APP - -defprio_install() -{ - local dev=$1; shift - local prio=$1; shift - local app="app=$prio,1,0" - - lldptool -T -i $dev -V APP $app >/dev/null - lldpad_app_wait_set $dev - APP[$prio]=$app -} - -defprio_uninstall() -{ - local dev=$1; shift - local prio=$1; shift - local app=${APP[$prio]} - - lldptool -T -i $dev -V APP -d $app >/dev/null - lldpad_app_wait_del - unset APP[$prio] -} - -defprio_flush() -{ - local dev=$1; shift - local prio - - if ((${#APP[@]})); then - lldptool -T -i $dev -V APP -d ${APP[@]} >/dev/null - fi - lldpad_app_wait_del - APP=() -} - h1_create() { simple_if_init $h1 192.0.2.1/28 @@ -83,7 +47,7 @@ switch_create() switch_destroy() { - defprio_flush $swp1 + dcb app flush dev $swp1 default-prio ip addr del dev $swp1 192.0.2.2/28 ip link set dev $swp1 down } @@ -124,7 +88,7 @@ __test_defprio() RET=0 - defprio_install $swp1 $prio_install + dcb app add dev $swp1 default-prio $prio_install local t0=$(ethtool_stats_get $swp1 rx_frames_prio_$prio_observe) mausezahn -q $h1 -d 100m -c 10 -t arp reply @@ -134,7 +98,7 @@ __test_defprio() check_err $? "Default priority $prio_install/$prio_observe: Expected to capture 10 packets, got $((t1 - t0))." log_test "Default priority $prio_install/$prio_observe" - defprio_uninstall $swp1 $prio_install + dcb app del dev $swp1 default-prio $prio_install } test_defprio() @@ -145,7 +109,7 @@ test_defprio() __test_defprio $prio $prio done - defprio_install $swp1 3 + dcb app add dev $swp1 default-prio 3 __test_defprio 0 3 __test_defprio 1 3 __test_defprio 2 3 @@ -153,7 +117,7 @@ test_defprio() __test_defprio 5 5 __test_defprio 6 6 __test_defprio 7 7 - defprio_uninstall $swp1 3 + dcb app del dev $swp1 default-prio 3 } trap cleanup EXIT diff --git a/tools/testing/selftests/drivers/net/mlxsw/qos_dscp_bridge.sh b/tools/testing/selftests/drivers/net/mlxsw/qos_dscp_bridge.sh index 28a570006d4d..87c41f5727c9 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/qos_dscp_bridge.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/qos_dscp_bridge.sh @@ -20,7 +20,7 @@ # | SW | | | # | +-|----------------------------------------------------------------|-+ | # | | + $swp1 BR $swp2 + | | -# | | APP=0,5,10 .. 7,5,17 APP=0,5,20 .. 7,5,27 | | +# | | dcb dscp-prio 10:0...17:7 dcb dscp-prio 20:0...27:7 | | # | +--------------------------------------------------------------------+ | # +---------------------------------------------------------------------------+ @@ -62,16 +62,6 @@ h2_destroy() simple_if_fini $h2 192.0.2.2/28 } -dscp_map() -{ - local base=$1; shift - local prio - - for prio in {0..7}; do - echo app=$prio,5,$((base + prio)) - done -} - switch_create() { ip link add name br1 type bridge vlan_filtering 1 @@ -81,17 +71,14 @@ switch_create() ip link set dev $swp2 master br1 ip link set dev $swp2 up - lldptool -T -i $swp1 -V APP $(dscp_map 10) >/dev/null - lldptool -T -i $swp2 -V APP $(dscp_map 20) >/dev/null - lldpad_app_wait_set $swp1 - lldpad_app_wait_set $swp2 + dcb app add dev $swp1 dscp-prio 10:0 11:1 12:2 13:3 14:4 15:5 16:6 17:7 + dcb app add dev $swp2 dscp-prio 20:0 21:1 22:2 23:3 24:4 25:5 26:6 27:7 } switch_destroy() { - lldptool -T -i $swp2 -V APP -d $(dscp_map 20) >/dev/null - lldptool -T -i $swp1 -V APP -d $(dscp_map 10) >/dev/null - lldpad_app_wait_del + dcb app del dev $swp2 dscp-prio 20:0 21:1 22:2 23:3 24:4 25:5 26:6 27:7 + dcb app del dev $swp1 dscp-prio 10:0 11:1 12:2 13:3 14:4 15:5 16:6 17:7 ip link set dev $swp2 down ip link set dev $swp2 nomaster diff --git a/tools/testing/selftests/drivers/net/mlxsw/qos_dscp_router.sh b/tools/testing/selftests/drivers/net/mlxsw/qos_dscp_router.sh index 4cb2aa65278a..f6c23f84423e 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/qos_dscp_router.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/qos_dscp_router.sh @@ -94,16 +94,6 @@ h2_destroy() simple_if_fini $h2 192.0.2.18/28 } -dscp_map() -{ - local base=$1; shift - local prio - - for prio in {0..7}; do - echo app=$prio,5,$((base + prio)) - done -} - switch_create() { simple_if_init $swp1 192.0.2.2/28 @@ -112,17 +102,14 @@ switch_create() tc qdisc add dev $swp1 clsact tc qdisc add dev $swp2 clsact - lldptool -T -i $swp1 -V APP $(dscp_map 0) >/dev/null - lldptool -T -i $swp2 -V APP $(dscp_map 0) >/dev/null - lldpad_app_wait_set $swp1 - lldpad_app_wait_set $swp2 + dcb app add dev $swp1 dscp-prio 0:0 1:1 2:2 3:3 4:4 5:5 6:6 7:7 + dcb app add dev $swp2 dscp-prio 0:0 1:1 2:2 3:3 4:4 5:5 6:6 7:7 } switch_destroy() { - lldptool -T -i $swp2 -V APP -d $(dscp_map 0) >/dev/null - lldptool -T -i $swp1 -V APP -d $(dscp_map 0) >/dev/null - lldpad_app_wait_del + dcb app del dev $swp2 dscp-prio 0:0 1:1 2:2 3:3 4:4 5:5 6:6 7:7 + dcb app del dev $swp1 dscp-prio 0:0 1:1 2:2 3:3 4:4 5:5 6:6 7:7 tc qdisc del dev $swp2 clsact tc qdisc del dev $swp1 clsact @@ -265,13 +252,11 @@ test_dscp_leftover() { echo "Test that last removed DSCP rule is deconfigured correctly" - lldptool -T -i $swp2 -V APP -d $(dscp_map 0) >/dev/null - lldpad_app_wait_del + dcb app del dev $swp2 dscp-prio 0:0 1:1 2:2 3:3 4:4 5:5 6:6 7:7 __test_update 0 zero - lldptool -T -i $swp2 -V APP $(dscp_map 0) >/dev/null - lldpad_app_wait_set $swp2 + dcb app add dev $swp2 dscp-prio 0:0 1:1 2:2 3:3 4:4 5:5 6:6 7:7 } trap cleanup EXIT diff --git a/tools/testing/selftests/drivers/net/mlxsw/rtnetlink.sh b/tools/testing/selftests/drivers/net/mlxsw/rtnetlink.sh index 04f03ae9d8fb..5e89657857c7 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/rtnetlink.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/rtnetlink.sh @@ -34,6 +34,7 @@ ALL_TESTS=" nexthop_obj_bucket_offload_test nexthop_obj_blackhole_offload_test nexthop_obj_route_offload_test + bridge_locked_port_test devlink_reload_test " NUM_NETIFS=2 @@ -917,6 +918,36 @@ nexthop_obj_route_offload_test() simple_if_fini $swp1 192.0.2.1/24 2001:db8:1::1/64 } +bridge_locked_port_test() +{ + RET=0 + + ip link add name br1 up type bridge vlan_filtering 0 + + ip link add link $swp1 name $swp1.10 type vlan id 10 + ip link set dev $swp1.10 master br1 + + bridge link set dev $swp1.10 locked on + check_fail $? "managed to set locked flag on a VLAN upper" + + ip link set dev $swp1.10 nomaster + ip link set dev $swp1 master br1 + + bridge link set dev $swp1 locked on + check_fail $? "managed to set locked flag on a bridge port that has a VLAN upper" + + ip link del dev $swp1.10 + bridge link set dev $swp1 locked on + + ip link add link $swp1 name $swp1.10 type vlan id 10 + check_fail $? "managed to configure a VLAN upper on a locked port" + + log_test "bridge locked port" + + ip link del dev $swp1.10 &> /dev/null + ip link del dev br1 +} + devlink_reload_test() { # Test that after executing all the above configuration tests, a diff --git a/tools/testing/selftests/drivers/net/netdevsim/devlink.sh b/tools/testing/selftests/drivers/net/netdevsim/devlink.sh index 9de1d123f4f5..7f7d20f22207 100755 --- a/tools/testing/selftests/drivers/net/netdevsim/devlink.sh +++ b/tools/testing/selftests/drivers/net/netdevsim/devlink.sh @@ -17,6 +17,18 @@ SYSFS_NET_DIR=/sys/bus/netdevsim/devices/$DEV_NAME/net/ DEBUGFS_DIR=/sys/kernel/debug/netdevsim/$DEV_NAME/ DL_HANDLE=netdevsim/$DEV_NAME +wait_for_devlink() +{ + "$@" | grep -q $DL_HANDLE +} + +devlink_wait() +{ + local timeout=$1 + + busywait "$timeout" wait_for_devlink devlink dev +} + fw_flash_test() { RET=0 @@ -256,6 +268,9 @@ netns_reload_test() ip netns del testns2 ip netns del testns1 + # Wait until netns async cleanup is done. + devlink_wait 2000 + log_test "netns reload test" } @@ -348,6 +363,9 @@ resource_test() ip netns del testns2 ip netns del testns1 + # Wait until netns async cleanup is done. + devlink_wait 2000 + log_test "resource test" } @@ -496,8 +514,8 @@ dummy_reporter_test() check_reporter_info dummy healthy 3 3 10 true - echo 8192> $DEBUGFS_DIR/health/binary_len - check_fail $? "Failed set dummy reporter binary len to 8192" + echo 8192 > $DEBUGFS_DIR/health/binary_len + check_err $? "Failed set dummy reporter binary len to 8192" local dump=$(devlink health dump show $DL_HANDLE reporter dummy -j) check_err $? "Failed show dump of dummy reporter" diff --git a/tools/testing/selftests/drivers/net/netdevsim/devlink_trap.sh b/tools/testing/selftests/drivers/net/netdevsim/devlink_trap.sh index 109900c817be..b64d98ca0df7 100755 --- a/tools/testing/selftests/drivers/net/netdevsim/devlink_trap.sh +++ b/tools/testing/selftests/drivers/net/netdevsim/devlink_trap.sh @@ -47,6 +47,17 @@ if [ -d "${NETDEVSIM_PATH}/devices/netdevsim${DEV_ADDR}" ]; then exit 1 fi +check_netdev_down() +{ + state=$(cat /sys/class/net/${NETDEV}/flags) + + if [ $((state & 1)) -ne 0 ]; then + echo "WARNING: unexpected interface UP, disable NetworkManager?" + + ip link set dev $NETDEV down + fi +} + init_test() { RET=0 @@ -151,6 +162,7 @@ trap_stats_test() RET=0 + check_netdev_down for trap_name in $(devlink_traps_get); do devlink_trap_stats_idle_test $trap_name check_err $? "Stats of trap $trap_name not idle when netdev down" @@ -254,6 +266,7 @@ trap_group_stats_test() RET=0 + check_netdev_down for group_name in $(devlink_trap_groups_get); do devlink_trap_group_stats_idle_test $group_name check_err $? "Stats of trap group $group_name not idle when netdev down" diff --git a/tools/testing/selftests/drivers/net/ocelot/tc_flower_chains.sh b/tools/testing/selftests/drivers/net/ocelot/tc_flower_chains.sh index 9c79bbcce5a8..aff0a59f92d9 100755 --- a/tools/testing/selftests/drivers/net/ocelot/tc_flower_chains.sh +++ b/tools/testing/selftests/drivers/net/ocelot/tc_flower_chains.sh @@ -246,7 +246,7 @@ test_vlan_ingress_modify() bridge vlan add dev $swp2 vid 300 tc filter add dev $swp1 ingress chain $(IS1 2) pref 3 \ - protocol 802.1Q flower skip_sw vlan_id 200 \ + protocol 802.1Q flower skip_sw vlan_id 200 src_mac $h1_mac \ action vlan modify id 300 \ action goto chain $(IS2 0 0) diff --git a/tools/testing/selftests/drivers/s390x/uvdevice/Makefile b/tools/testing/selftests/drivers/s390x/uvdevice/Makefile index 891215a7dc8a..755d164384c4 100644 --- a/tools/testing/selftests/drivers/s390x/uvdevice/Makefile +++ b/tools/testing/selftests/drivers/s390x/uvdevice/Makefile @@ -11,10 +11,9 @@ else TEST_GEN_PROGS := test_uvdevice top_srcdir ?= ../../../../../.. -khdr_dir = $(top_srcdir)/usr/include LINUX_TOOL_ARCH_INCLUDE = $(top_srcdir)/tools/arch/$(ARCH)/include -CFLAGS += -Wall -Werror -static -I$(khdr_dir) -I$(LINUX_TOOL_ARCH_INCLUDE) +CFLAGS += -Wall -Werror -static $(KHDR_INCLUDES) -I$(LINUX_TOOL_ARCH_INCLUDE) include ../../../lib.mk diff --git a/tools/testing/selftests/efivarfs/efivarfs.sh b/tools/testing/selftests/efivarfs/efivarfs.sh index a90f394f9aa9..d374878cc0ba 100755 --- a/tools/testing/selftests/efivarfs/efivarfs.sh +++ b/tools/testing/selftests/efivarfs/efivarfs.sh @@ -87,6 +87,11 @@ test_create_read() { local file=$efivarfs_mount/$FUNCNAME-$test_guid ./create-read $file + if [ $? -ne 0 ]; then + echo "create and read $file failed" + file_cleanup $file + exit 1 + fi file_cleanup $file } diff --git a/tools/testing/selftests/filesystems/Makefile b/tools/testing/selftests/filesystems/Makefile index 129880fb42d3..c647fd6a0446 100644 --- a/tools/testing/selftests/filesystems/Makefile +++ b/tools/testing/selftests/filesystems/Makefile @@ -1,6 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 -CFLAGS += -I../../../../usr/include/ +CFLAGS += $(KHDR_INCLUDES) TEST_GEN_PROGS := devpts_pts TEST_GEN_PROGS_EXTENDED := dnotify_test diff --git a/tools/testing/selftests/filesystems/binderfs/Makefile b/tools/testing/selftests/filesystems/binderfs/Makefile index 8af25ae96049..c2f7cef919c0 100644 --- a/tools/testing/selftests/filesystems/binderfs/Makefile +++ b/tools/testing/selftests/filesystems/binderfs/Makefile @@ -1,6 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 -CFLAGS += -I../../../../../usr/include/ -pthread +CFLAGS += $(KHDR_INCLUDES) -pthread TEST_GEN_PROGS := binderfs_test binderfs_test: binderfs_test.c ../../kselftest.h ../../kselftest_harness.h diff --git a/tools/testing/selftests/filesystems/epoll/Makefile b/tools/testing/selftests/filesystems/epoll/Makefile index 78ae4aaf7141..0788a7dc8004 100644 --- a/tools/testing/selftests/filesystems/epoll/Makefile +++ b/tools/testing/selftests/filesystems/epoll/Makefile @@ -1,6 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 -CFLAGS += -I../../../../../usr/include/ +CFLAGS += $(KHDR_INCLUDES) LDLIBS += -lpthread TEST_GEN_PROGS := epoll_wakeup_test diff --git a/tools/testing/selftests/filesystems/fat/run_fat_tests.sh b/tools/testing/selftests/filesystems/fat/run_fat_tests.sh index 7f35dc3d15df..7f35dc3d15df 100644..100755 --- a/tools/testing/selftests/filesystems/fat/run_fat_tests.sh +++ b/tools/testing/selftests/filesystems/fat/run_fat_tests.sh diff --git a/tools/testing/selftests/ftrace/test.d/ftrace/func_event_triggers.tc b/tools/testing/selftests/ftrace/test.d/ftrace/func_event_triggers.tc index 8d26d5505808..2ad7d4b501cc 100644 --- a/tools/testing/selftests/ftrace/test.d/ftrace/func_event_triggers.tc +++ b/tools/testing/selftests/ftrace/test.d/ftrace/func_event_triggers.tc @@ -38,11 +38,18 @@ cnt_trace() { test_event_enabled() { val=$1 + check_times=10 # wait for 10 * SLEEP_TIME at most - e=`cat $EVENT_ENABLE` - if [ "$e" != $val ]; then - fail "Expected $val but found $e" - fi + while [ $check_times -ne 0 ]; do + e=`cat $EVENT_ENABLE` + if [ "$e" = $val ]; then + return 0 + fi + sleep $SLEEP_TIME + check_times=$((check_times - 1)) + done + + fail "Expected $val but found $e" } run_enable_disable() { diff --git a/tools/testing/selftests/ftrace/test.d/functions b/tools/testing/selftests/ftrace/test.d/functions index 5f6cbec847fc..779f3e62ec90 100644 --- a/tools/testing/selftests/ftrace/test.d/functions +++ b/tools/testing/selftests/ftrace/test.d/functions @@ -142,9 +142,15 @@ finish_ftrace() { check_requires() { # Check required files and tracers for i in "$@" ; do + p=${i%:program} r=${i%:README} t=${i%:tracer} - if [ $t != $i ]; then + if [ $p != $i ]; then + if ! which $p ; then + echo "Required program $p is not found." + exit_unresolved + fi + elif [ $t != $i ]; then if ! grep -wq $t available_tracers ; then echo "Required tracer $t is not configured." exit_unsupported diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/uprobe_syntax_errors.tc b/tools/testing/selftests/ftrace/test.d/kprobe/uprobe_syntax_errors.tc index f5e3f9e4a01f..c817158b99db 100644 --- a/tools/testing/selftests/ftrace/test.d/kprobe/uprobe_syntax_errors.tc +++ b/tools/testing/selftests/ftrace/test.d/kprobe/uprobe_syntax_errors.tc @@ -23,4 +23,9 @@ check_error 'p /bin/sh:10^%hoge' # BAD_ADDR_SUFFIX check_error 'p /bin/sh:10(10)^%return' # BAD_REFCNT_SUFFIX fi +# symstr is not supported by uprobe +if grep -q ".*symstr.*" README; then +check_error 'p /bin/sh:10 $stack0:^symstr' # BAD_TYPE +fi + exit 0 diff --git a/tools/testing/selftests/ftrace/test.d/preemptirq/irqsoff_tracer.tc b/tools/testing/selftests/ftrace/test.d/preemptirq/irqsoff_tracer.tc index 22bff122b933..ba1038953873 100644 --- a/tools/testing/selftests/ftrace/test.d/preemptirq/irqsoff_tracer.tc +++ b/tools/testing/selftests/ftrace/test.d/preemptirq/irqsoff_tracer.tc @@ -46,10 +46,10 @@ cat trace grep -q "tracer: preemptoff" trace || fail # Check the end of the section -egrep -q "5.....us : <stack trace>" trace || fail +grep -E -q "5.....us : <stack trace>" trace || fail # Check for 500ms of latency -egrep -q "latency: 5..... us" trace || fail +grep -E -q "latency: 5..... us" trace || fail reset_tracer @@ -69,10 +69,10 @@ cat trace grep -q "tracer: irqsoff" trace || fail # Check the end of the section -egrep -q "5.....us : <stack trace>" trace || fail +grep -E -q "5.....us : <stack trace>" trace || fail # Check for 500ms of latency -egrep -q "latency: 5..... us" trace || fail +grep -E -q "latency: 5..... us" trace || fail reset_tracer exit 0 diff --git a/tools/testing/selftests/ftrace/test.d/tracer/wakeup.tc b/tools/testing/selftests/ftrace/test.d/tracer/wakeup.tc index 11be10e1bf96..e8f0fac9a110 100644 --- a/tools/testing/selftests/ftrace/test.d/tracer/wakeup.tc +++ b/tools/testing/selftests/ftrace/test.d/tracer/wakeup.tc @@ -1,12 +1,7 @@ #!/bin/sh # SPDX-License-Identifier: GPL-2.0 # description: Test wakeup tracer -# requires: wakeup:tracer - -if ! which chrt ; then - echo "chrt is not found. This test requires nice command." - exit_unresolved -fi +# requires: wakeup:tracer chrt:program echo wakeup > current_tracer echo 1 > tracing_on diff --git a/tools/testing/selftests/ftrace/test.d/tracer/wakeup_rt.tc b/tools/testing/selftests/ftrace/test.d/tracer/wakeup_rt.tc index 3a77198b3c69..79807656785b 100644 --- a/tools/testing/selftests/ftrace/test.d/tracer/wakeup_rt.tc +++ b/tools/testing/selftests/ftrace/test.d/tracer/wakeup_rt.tc @@ -1,12 +1,7 @@ #!/bin/sh # SPDX-License-Identifier: GPL-2.0 # description: Test wakeup RT tracer -# requires: wakeup_rt:tracer - -if ! which chrt ; then - echo "chrt is not found. This test requires chrt command." - exit_unresolved -fi +# requires: wakeup_rt:tracer chrt:program echo wakeup_rt > current_tracer echo 1 > tracing_on diff --git a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-field-variable-support.tc b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-field-variable-support.tc index 41119e0440e9..04c5dd7d0acc 100644 --- a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-field-variable-support.tc +++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-field-variable-support.tc @@ -1,7 +1,7 @@ #!/bin/sh # SPDX-License-Identifier: GPL-2.0 # description: event trigger - test field variable support -# requires: set_event synthetic_events events/sched/sched_process_fork/hist +# requires: set_event synthetic_events events/sched/sched_process_fork/hist ping:program fail() { #msg echo $1 diff --git a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-inter-event-combined-hist.tc b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-inter-event-combined-hist.tc index 9098f1e7433f..f7447d800899 100644 --- a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-inter-event-combined-hist.tc +++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-inter-event-combined-hist.tc @@ -1,7 +1,7 @@ #!/bin/sh # SPDX-License-Identifier: GPL-2.0 # description: event trigger - test inter-event combined histogram trigger -# requires: set_event synthetic_events events/sched/sched_process_fork/hist +# requires: set_event synthetic_events events/sched/sched_process_fork/hist ping:program fail() { #msg echo $1 diff --git a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onchange-action-hist.tc b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onchange-action-hist.tc index adaabb873ed4..91339c130832 100644 --- a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onchange-action-hist.tc +++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onchange-action-hist.tc @@ -1,7 +1,7 @@ #!/bin/sh # SPDX-License-Identifier: GPL-2.0 # description: event trigger - test inter-event histogram trigger onchange action -# requires: set_event "onchange(var)":README +# requires: set_event "onchange(var)":README ping:program fail() { #msg echo $1 diff --git a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmatch-action-hist.tc b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmatch-action-hist.tc index 20e39471052e..d645abcf11c4 100644 --- a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmatch-action-hist.tc +++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmatch-action-hist.tc @@ -1,7 +1,7 @@ #!/bin/sh # SPDX-License-Identifier: GPL-2.0 # description: event trigger - test inter-event histogram trigger onmatch action -# requires: set_event synthetic_events events/sched/sched_process_fork/hist +# requires: set_event synthetic_events events/sched/sched_process_fork/hist ping:program fail() { #msg echo $1 diff --git a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmatch-onmax-action-hist.tc b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmatch-onmax-action-hist.tc index f4b03ab7c287..c369247efb35 100644 --- a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmatch-onmax-action-hist.tc +++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmatch-onmax-action-hist.tc @@ -1,7 +1,7 @@ #!/bin/sh # SPDX-License-Identifier: GPL-2.0 # description: event trigger - test inter-event histogram trigger onmatch-onmax action -# requires: set_event synthetic_events events/sched/sched_process_fork/hist +# requires: set_event synthetic_events events/sched/sched_process_fork/hist ping:program fail() { #msg echo $1 diff --git a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmax-action-hist.tc b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmax-action-hist.tc index 71c9b5911c70..e28dc5f11b2b 100644 --- a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmax-action-hist.tc +++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmax-action-hist.tc @@ -1,7 +1,7 @@ #!/bin/sh # SPDX-License-Identifier: GPL-2.0 # description: event trigger - test inter-event histogram trigger onmax action -# requires: set_event synthetic_events events/sched/sched_process_fork/hist +# requires: set_event synthetic_events events/sched/sched_process_fork/hist ping:program fail() { #msg echo $1 diff --git a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-snapshot-action-hist.tc b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-snapshot-action-hist.tc index 67fa328b830f..147967e86584 100644 --- a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-snapshot-action-hist.tc +++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-snapshot-action-hist.tc @@ -1,7 +1,7 @@ #!/bin/sh # SPDX-License-Identifier: GPL-2.0 # description: event trigger - test inter-event histogram trigger snapshot action -# requires: set_event snapshot events/sched/sched_process_fork/hist "onchange(var)":README "snapshot()":README +# requires: set_event snapshot events/sched/sched_process_fork/hist "onchange(var)":README "snapshot()":README ping:program fail() { #msg echo $1 diff --git a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic-eprobe.tc b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic-eprobe.tc index 6461c375694f..c2a8ab01e13b 100644 --- a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic-eprobe.tc +++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic-eprobe.tc @@ -14,7 +14,7 @@ FIELD="filename" SYNTH="synth_open" EPROBE="eprobe_open" -echo "$SYNTH u64 filename; s64 ret;" > synthetic_events +echo "$SYNTH unsigned long filename; long ret;" > synthetic_events echo "hist:keys=common_pid:__arg__1=$FIELD" > events/$SYSTEM/$START/trigger echo "hist:keys=common_pid:filename=\$__arg__1,ret=ret:onmatch($SYSTEM.$START).trace($SYNTH,\$filename,\$ret)" > events/$SYSTEM/$END/trigger diff --git a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic-event-dynstring.tc b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic-event-dynstring.tc index 3d65c856eca3..213d890ed188 100644 --- a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic-event-dynstring.tc +++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic-event-dynstring.tc @@ -1,7 +1,7 @@ #!/bin/sh # SPDX-License-Identifier: GPL-2.0 # description: event trigger - test inter-event histogram trigger trace action with dynamic string param -# requires: set_event synthetic_events events/sched/sched_process_exec/hist "char name[]' >> synthetic_events":README +# requires: set_event synthetic_events events/sched/sched_process_exec/hist "char name[]' >> synthetic_events":README ping:program fail() { #msg echo $1 diff --git a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-trace-action-hist.tc b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-trace-action-hist.tc index c126d2350a6d..d7312047ce28 100644 --- a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-trace-action-hist.tc +++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-trace-action-hist.tc @@ -1,7 +1,7 @@ #!/bin/sh # SPDX-License-Identifier: GPL-2.0 # description: event trigger - test inter-event histogram trigger trace action -# requires: set_event synthetic_events events/sched/sched_process_fork/hist "trace(<synthetic_event>":README +# requires: set_event synthetic_events events/sched/sched_process_fork/hist "trace(<synthetic_event>":README ping:program fail() { #msg echo $1 diff --git a/tools/testing/selftests/futex/functional/Makefile b/tools/testing/selftests/futex/functional/Makefile index 5a0e0df8de9b..a392d0917b4e 100644 --- a/tools/testing/selftests/futex/functional/Makefile +++ b/tools/testing/selftests/futex/functional/Makefile @@ -1,5 +1,5 @@ # SPDX-License-Identifier: GPL-2.0 -INCLUDES := -I../include -I../../ -I../../../../../usr/include/ +INCLUDES := -I../include -I../../ $(KHDR_INCLUDES) CFLAGS := $(CFLAGS) -g -O2 -Wall -D_GNU_SOURCE -pthread $(INCLUDES) $(KHDR_INCLUDES) LDLIBS := -lpthread -lrt diff --git a/tools/testing/selftests/gpio/Makefile b/tools/testing/selftests/gpio/Makefile index 616ed4019655..e0884390447d 100644 --- a/tools/testing/selftests/gpio/Makefile +++ b/tools/testing/selftests/gpio/Makefile @@ -3,6 +3,6 @@ TEST_PROGS := gpio-mockup.sh gpio-sim.sh TEST_FILES := gpio-mockup-sysfs.sh TEST_GEN_PROGS_EXTENDED := gpio-mockup-cdev gpio-chip-info gpio-line-name -CFLAGS += -O2 -g -Wall -I../../../../usr/include/ $(KHDR_INCLUDES) +CFLAGS += -O2 -g -Wall $(KHDR_INCLUDES) include ../lib.mk diff --git a/tools/testing/selftests/gpio/gpio-sim.sh b/tools/testing/selftests/gpio/gpio-sim.sh index 341e3de00896..9f539d454ee4 100755 --- a/tools/testing/selftests/gpio/gpio-sim.sh +++ b/tools/testing/selftests/gpio/gpio-sim.sh @@ -27,7 +27,7 @@ remove_chip() { continue fi - LINES=`ls $CONFIGFS_DIR/$CHIP/$BANK/ | egrep ^line` + LINES=`ls $CONFIGFS_DIR/$CHIP/$BANK/ | grep -E ^line` if [ "$?" = 0 ]; then for LINE in $LINES; do if [ -e $CONFIGFS_DIR/$CHIP/$BANK/$LINE/hog ]; then diff --git a/tools/testing/selftests/hid/.gitignore b/tools/testing/selftests/hid/.gitignore new file mode 100644 index 000000000000..995af0670f69 --- /dev/null +++ b/tools/testing/selftests/hid/.gitignore @@ -0,0 +1,5 @@ +bpftool +*.skel.h +/tools +hid_bpf +results diff --git a/tools/testing/selftests/hid/Makefile b/tools/testing/selftests/hid/Makefile new file mode 100644 index 000000000000..83e8f87d643a --- /dev/null +++ b/tools/testing/selftests/hid/Makefile @@ -0,0 +1,231 @@ +# SPDX-License-Identifier: GPL-2.0 + +# based on tools/testing/selftest/bpf/Makefile +include ../../../build/Build.include +include ../../../scripts/Makefile.arch +include ../../../scripts/Makefile.include + +CXX ?= $(CROSS_COMPILE)g++ + +HOSTPKG_CONFIG := pkg-config + +CFLAGS += -g -O0 -rdynamic -Wall -Werror -I$(KHDR_INCLUDES) -I$(OUTPUT) +LDLIBS += -lelf -lz -lrt -lpthread + +# Silence some warnings when compiled with clang +ifneq ($(LLVM),) +CFLAGS += -Wno-unused-command-line-argument +endif + +# Order correspond to 'make run_tests' order +TEST_GEN_PROGS = hid_bpf + +# Emit succinct information message describing current building step +# $1 - generic step name (e.g., CC, LINK, etc); +# $2 - optional "flavor" specifier; if provided, will be emitted as [flavor]; +# $3 - target (assumed to be file); only file name will be emitted; +# $4 - optional extra arg, emitted as-is, if provided. +ifeq ($(V),1) +Q = +msg = +else +Q = @ +msg = @printf ' %-8s%s %s%s\n' "$(1)" "$(if $(2), [$(2)])" "$(notdir $(3))" "$(if $(4), $(4))"; +MAKEFLAGS += --no-print-directory +submake_extras := feature_display=0 +endif + +# override lib.mk's default rules +OVERRIDE_TARGETS := 1 +override define CLEAN + $(call msg,CLEAN) + $(Q)$(RM) -r $(TEST_GEN_PROGS) + $(Q)$(RM) -r $(EXTRA_CLEAN) +endef + +include ../lib.mk + +TOOLSDIR := $(top_srcdir)/tools +LIBDIR := $(TOOLSDIR)/lib +BPFDIR := $(LIBDIR)/bpf +TOOLSINCDIR := $(TOOLSDIR)/include +BPFTOOLDIR := $(TOOLSDIR)/bpf/bpftool +SCRATCH_DIR := $(OUTPUT)/tools +BUILD_DIR := $(SCRATCH_DIR)/build +INCLUDE_DIR := $(SCRATCH_DIR)/include +KHDR_INCLUDES := $(SCRATCH_DIR)/uapi/include +BPFOBJ := $(BUILD_DIR)/libbpf/libbpf.a +ifneq ($(CROSS_COMPILE),) +HOST_BUILD_DIR := $(BUILD_DIR)/host +HOST_SCRATCH_DIR := $(OUTPUT)/host-tools +HOST_INCLUDE_DIR := $(HOST_SCRATCH_DIR)/include +else +HOST_BUILD_DIR := $(BUILD_DIR) +HOST_SCRATCH_DIR := $(SCRATCH_DIR) +HOST_INCLUDE_DIR := $(INCLUDE_DIR) +endif +HOST_BPFOBJ := $(HOST_BUILD_DIR)/libbpf/libbpf.a +RESOLVE_BTFIDS := $(HOST_BUILD_DIR)/resolve_btfids/resolve_btfids + +VMLINUX_BTF_PATHS ?= $(if $(O),$(O)/vmlinux) \ + $(if $(KBUILD_OUTPUT),$(KBUILD_OUTPUT)/vmlinux) \ + ../../../../vmlinux \ + /sys/kernel/btf/vmlinux \ + /boot/vmlinux-$(shell uname -r) +VMLINUX_BTF ?= $(abspath $(firstword $(wildcard $(VMLINUX_BTF_PATHS)))) +ifeq ($(VMLINUX_BTF),) +$(error Cannot find a vmlinux for VMLINUX_BTF at any of "$(VMLINUX_BTF_PATHS)") +endif + +# Define simple and short `make test_progs`, `make test_sysctl`, etc targets +# to build individual tests. +# NOTE: Semicolon at the end is critical to override lib.mk's default static +# rule for binaries. +$(notdir $(TEST_GEN_PROGS)): %: $(OUTPUT)/% ; + +# sort removes libbpf duplicates when not cross-building +MAKE_DIRS := $(sort $(BUILD_DIR)/libbpf $(HOST_BUILD_DIR)/libbpf \ + $(HOST_BUILD_DIR)/bpftool $(HOST_BUILD_DIR)/resolve_btfids \ + $(INCLUDE_DIR)) +$(MAKE_DIRS): + $(call msg,MKDIR,,$@) + $(Q)mkdir -p $@ + +# LLVM's ld.lld doesn't support all the architectures, so use it only on x86 +ifeq ($(SRCARCH),x86) +LLD := lld +else +LLD := ld +endif + +DEFAULT_BPFTOOL := $(HOST_SCRATCH_DIR)/sbin/bpftool + +TEST_GEN_PROGS_EXTENDED += $(DEFAULT_BPFTOOL) + +$(TEST_GEN_PROGS) $(TEST_GEN_PROGS_EXTENDED): $(BPFOBJ) + +BPFTOOL ?= $(DEFAULT_BPFTOOL) +$(DEFAULT_BPFTOOL): $(wildcard $(BPFTOOLDIR)/*.[ch] $(BPFTOOLDIR)/Makefile) \ + $(HOST_BPFOBJ) | $(HOST_BUILD_DIR)/bpftool + $(Q)$(MAKE) $(submake_extras) -C $(BPFTOOLDIR) \ + ARCH= CROSS_COMPILE= CC=$(HOSTCC) LD=$(HOSTLD) \ + EXTRA_CFLAGS='-g -O0' \ + OUTPUT=$(HOST_BUILD_DIR)/bpftool/ \ + LIBBPF_OUTPUT=$(HOST_BUILD_DIR)/libbpf/ \ + LIBBPF_DESTDIR=$(HOST_SCRATCH_DIR)/ \ + prefix= DESTDIR=$(HOST_SCRATCH_DIR)/ install-bin + +$(BPFOBJ): $(wildcard $(BPFDIR)/*.[ch] $(BPFDIR)/Makefile) \ + | $(BUILD_DIR)/libbpf + $(Q)$(MAKE) $(submake_extras) -C $(BPFDIR) OUTPUT=$(BUILD_DIR)/libbpf/ \ + EXTRA_CFLAGS='-g -O0' \ + DESTDIR=$(SCRATCH_DIR) prefix= all install_headers + +ifneq ($(BPFOBJ),$(HOST_BPFOBJ)) +$(HOST_BPFOBJ): $(wildcard $(BPFDIR)/*.[ch] $(BPFDIR)/Makefile) \ + | $(HOST_BUILD_DIR)/libbpf + $(Q)$(MAKE) $(submake_extras) -C $(BPFDIR) \ + EXTRA_CFLAGS='-g -O0' ARCH= CROSS_COMPILE= \ + OUTPUT=$(HOST_BUILD_DIR)/libbpf/ CC=$(HOSTCC) LD=$(HOSTLD) \ + DESTDIR=$(HOST_SCRATCH_DIR)/ prefix= all install_headers +endif + +$(INCLUDE_DIR)/vmlinux.h: $(VMLINUX_BTF) $(BPFTOOL) | $(INCLUDE_DIR) +ifeq ($(VMLINUX_H),) + $(call msg,GEN,,$@) + $(Q)$(BPFTOOL) btf dump file $(VMLINUX_BTF) format c > $@ +else + $(call msg,CP,,$@) + $(Q)cp "$(VMLINUX_H)" $@ +endif + +$(KHDR_INCLUDES)/linux/hid.h: $(top_srcdir)/include/uapi/linux/hid.h + $(MAKE) -C $(top_srcdir) INSTALL_HDR_PATH=$(SCRATCH_DIR)/uapi headers_install + +$(RESOLVE_BTFIDS): $(HOST_BPFOBJ) | $(HOST_BUILD_DIR)/resolve_btfids \ + $(TOOLSDIR)/bpf/resolve_btfids/main.c \ + $(TOOLSDIR)/lib/rbtree.c \ + $(TOOLSDIR)/lib/zalloc.c \ + $(TOOLSDIR)/lib/string.c \ + $(TOOLSDIR)/lib/ctype.c \ + $(TOOLSDIR)/lib/str_error_r.c + $(Q)$(MAKE) $(submake_extras) -C $(TOOLSDIR)/bpf/resolve_btfids \ + CC=$(HOSTCC) LD=$(HOSTLD) AR=$(HOSTAR) \ + LIBBPF_INCLUDE=$(HOST_INCLUDE_DIR) \ + OUTPUT=$(HOST_BUILD_DIR)/resolve_btfids/ BPFOBJ=$(HOST_BPFOBJ) + +# Get Clang's default includes on this system, as opposed to those seen by +# '-target bpf'. This fixes "missing" files on some architectures/distros, +# such as asm/byteorder.h, asm/socket.h, asm/sockios.h, sys/cdefs.h etc. +# +# Use '-idirafter': Don't interfere with include mechanics except where the +# build would have failed anyways. +define get_sys_includes +$(shell $(1) -v -E - </dev/null 2>&1 \ + | sed -n '/<...> search starts here:/,/End of search list./{ s| \(/.*\)|-idirafter \1|p }') \ +$(shell $(1) -dM -E - </dev/null | grep '__riscv_xlen ' | awk '{printf("-D__riscv_xlen=%d -D__BITS_PER_LONG=%d", $$3, $$3)}') +endef + +# Determine target endianness. +IS_LITTLE_ENDIAN = $(shell $(CC) -dM -E - </dev/null | \ + grep 'define __BYTE_ORDER__ __ORDER_LITTLE_ENDIAN__') +MENDIAN=$(if $(IS_LITTLE_ENDIAN),-mlittle-endian,-mbig-endian) + +CLANG_SYS_INCLUDES = $(call get_sys_includes,$(CLANG)) +BPF_CFLAGS = -g -Werror -D__TARGET_ARCH_$(SRCARCH) $(MENDIAN) \ + -I$(INCLUDE_DIR) + +CLANG_CFLAGS = $(CLANG_SYS_INCLUDES) \ + -Wno-compare-distinct-pointer-types + +# Build BPF object using Clang +# $1 - input .c file +# $2 - output .o file +# $3 - CFLAGS +define CLANG_BPF_BUILD_RULE + $(call msg,CLNG-BPF,$(TRUNNER_BINARY),$2) + $(Q)$(CLANG) $3 -O2 -target bpf -c $1 -mcpu=v3 -o $2 +endef +# Similar to CLANG_BPF_BUILD_RULE, but with disabled alu32 +define CLANG_NOALU32_BPF_BUILD_RULE + $(call msg,CLNG-BPF,$(TRUNNER_BINARY),$2) + $(Q)$(CLANG) $3 -O2 -target bpf -c $1 -mcpu=v2 -o $2 +endef +# Build BPF object using GCC +define GCC_BPF_BUILD_RULE + $(call msg,GCC-BPF,$(TRUNNER_BINARY),$2) + $(Q)$(BPF_GCC) $3 -O2 -c $1 -o $2 +endef + +BPF_PROGS_DIR := progs +BPF_BUILD_RULE := CLANG_BPF_BUILD_RULE +BPF_SRCS := $(notdir $(wildcard $(BPF_PROGS_DIR)/*.c)) +BPF_OBJS := $(patsubst %.c,$(OUTPUT)/%.bpf.o, $(BPF_SRCS)) +BPF_SKELS := $(patsubst %.c,$(OUTPUT)/%.skel.h, $(BPF_SRCS)) +TEST_GEN_FILES += $(BPF_OBJS) + +$(BPF_PROGS_DIR)-bpfobjs := y +$(BPF_OBJS): $(OUTPUT)/%.bpf.o: \ + $(BPF_PROGS_DIR)/%.c \ + $(wildcard $(BPF_PROGS_DIR)/*.h) \ + $(INCLUDE_DIR)/vmlinux.h \ + $(wildcard $(BPFDIR)/hid_bpf_*.h) \ + $(wildcard $(BPFDIR)/*.bpf.h) \ + | $(OUTPUT) $(BPFOBJ) + $(call $(BPF_BUILD_RULE),$<,$@, $(BPF_CFLAGS)) + +$(BPF_SKELS): %.skel.h: %.bpf.o $(BPFTOOL) | $(OUTPUT) + $(call msg,GEN-SKEL,$(BINARY),$@) + $(Q)$(BPFTOOL) gen object $(<:.o=.linked1.o) $< + $(Q)$(BPFTOOL) gen skeleton $(<:.o=.linked1.o) name $(notdir $(<:.bpf.o=)) > $@ + +$(OUTPUT)/%.o: %.c $(BPF_SKELS) $(KHDR_INCLUDES)/linux/hid.h + $(call msg,CC,,$@) + $(Q)$(CC) $(CFLAGS) -c $(filter %.c,$^) $(LDLIBS) -o $@ + +$(OUTPUT)/%: $(OUTPUT)/%.o + $(call msg,BINARY,,$@) + $(Q)$(LINK.c) $^ $(LDLIBS) -o $@ + +EXTRA_CLEAN := $(SCRATCH_DIR) $(HOST_SCRATCH_DIR) feature bpftool \ + $(addprefix $(OUTPUT)/,*.o *.skel.h no_alu32) diff --git a/tools/testing/selftests/hid/config b/tools/testing/selftests/hid/config new file mode 100644 index 000000000000..9c5a55abca6b --- /dev/null +++ b/tools/testing/selftests/hid/config @@ -0,0 +1,21 @@ +CONFIG_BPF_EVENTS=y +CONFIG_BPFILTER=y +CONFIG_BPF_JIT_ALWAYS_ON=y +CONFIG_BPF_JIT=y +CONFIG_BPF_KPROBE_OVERRIDE=y +CONFIG_BPF_LSM=y +CONFIG_BPF_PRELOAD_UMD=y +CONFIG_BPF_PRELOAD=y +CONFIG_BPF_STREAM_PARSER=y +CONFIG_BPF_SYSCALL=y +CONFIG_BPF=y +CONFIG_CGROUP_BPF=y +CONFIG_DEBUG_INFO_BTF=y +CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS=y +CONFIG_FPROBE=y +CONFIG_FTRACE_SYSCALLS=y +CONFIG_FUNCTION_TRACER=y +CONFIG_HIDRAW=y +CONFIG_HID=y +CONFIG_INPUT_EVDEV=y +CONFIG_UHID=y diff --git a/tools/testing/selftests/hid/config.common b/tools/testing/selftests/hid/config.common new file mode 100644 index 000000000000..0617275d93cc --- /dev/null +++ b/tools/testing/selftests/hid/config.common @@ -0,0 +1,241 @@ +CONFIG_9P_FS_POSIX_ACL=y +CONFIG_9P_FS_SECURITY=y +CONFIG_9P_FS=y +CONFIG_AUDIT=y +CONFIG_BINFMT_MISC=y +CONFIG_BLK_CGROUP_IOLATENCY=y +CONFIG_BLK_CGROUP=y +CONFIG_BLK_DEV_BSGLIB=y +CONFIG_BLK_DEV_IO_TRACE=y +CONFIG_BLK_DEV_RAM_SIZE=16384 +CONFIG_BLK_DEV_RAM=y +CONFIG_BLK_DEV_THROTTLING=y +CONFIG_BONDING=y +CONFIG_BOOTPARAM_HARDLOCKUP_PANIC=y +CONFIG_BOOTTIME_TRACING=y +CONFIG_BSD_DISKLABEL=y +CONFIG_BSD_PROCESS_ACCT=y +CONFIG_CFS_BANDWIDTH=y +CONFIG_CGROUP_CPUACCT=y +CONFIG_CGROUP_DEBUG=y +CONFIG_CGROUP_DEVICE=y +CONFIG_CGROUP_FREEZER=y +CONFIG_CGROUP_HUGETLB=y +CONFIG_CGROUP_NET_CLASSID=y +CONFIG_CGROUP_NET_PRIO=y +CONFIG_CGROUP_PERF=y +CONFIG_CGROUP_PIDS=y +CONFIG_CGROUP_RDMA=y +CONFIG_CGROUP_SCHED=y +CONFIG_CGROUPS=y +CONFIG_CGROUP_WRITEBACK=y +CONFIG_CMA_AREAS=7 +CONFIG_CMA=y +CONFIG_COMPAT_32BIT_TIME=y +CONFIG_CPU_FREQ_DEFAULT_GOV_PERFORMANCE=y +CONFIG_CPU_FREQ_GOV_CONSERVATIVE=y +CONFIG_CPU_FREQ_GOV_ONDEMAND=y +CONFIG_CPU_FREQ_GOV_USERSPACE=y +CONFIG_CPU_FREQ_STAT=y +CONFIG_CPU_IDLE_GOV_LADDER=y +CONFIG_CPUSETS=y +CONFIG_CRC_T10DIF=y +CONFIG_CRYPTO_BLAKE2B=y +CONFIG_CRYPTO_DEV_VIRTIO=y +CONFIG_CRYPTO_SEQIV=y +CONFIG_CRYPTO_XXHASH=y +CONFIG_DCB=y +CONFIG_DEBUG_ATOMIC_SLEEP=y +CONFIG_DEBUG_CREDENTIALS=y +CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y +CONFIG_DEBUG_MEMORY_INIT=y +CONFIG_DEFAULT_FQ_CODEL=y +CONFIG_DEFAULT_RENO=y +CONFIG_DEFAULT_SECURITY_DAC=y +CONFIG_DEVTMPFS_MOUNT=y +CONFIG_DEVTMPFS=y +CONFIG_DMA_CMA=y +CONFIG_DNS_RESOLVER=y +CONFIG_EFI_STUB=y +CONFIG_EFI=y +CONFIG_EXPERT=y +CONFIG_EXT4_FS_POSIX_ACL=y +CONFIG_EXT4_FS_SECURITY=y +CONFIG_EXT4_FS=y +CONFIG_FAIL_FUNCTION=y +CONFIG_FAULT_INJECTION_DEBUG_FS=y +CONFIG_FAULT_INJECTION=y +CONFIG_FB_MODE_HELPERS=y +CONFIG_FB_TILEBLITTING=y +CONFIG_FB_VESA=y +CONFIG_FB=y +CONFIG_FONT_8x16=y +CONFIG_FONT_MINI_4x6=y +CONFIG_FONTS=y +CONFIG_FRAMEBUFFER_CONSOLE_DETECT_PRIMARY=y +CONFIG_FRAMEBUFFER_CONSOLE_ROTATION=y +CONFIG_FRAMEBUFFER_CONSOLE=y +CONFIG_FUSE_FS=y +CONFIG_FW_LOADER_USER_HELPER=y +CONFIG_GART_IOMMU=y +CONFIG_GENERIC_PHY=y +CONFIG_HARDLOCKUP_DETECTOR=y +CONFIG_HIGH_RES_TIMERS=y +CONFIG_HPET=y +CONFIG_HUGETLBFS=y +CONFIG_HUGETLB_PAGE=y +CONFIG_HWPOISON_INJECT=y +CONFIG_HZ_1000=y +CONFIG_INET=y +CONFIG_INTEL_POWERCLAMP=y +CONFIG_IP6_NF_FILTER=y +CONFIG_IP6_NF_IPTABLES=y +CONFIG_IP6_NF_NAT=y +CONFIG_IP6_NF_TARGET_MASQUERADE=y +CONFIG_IP_ADVANCED_ROUTER=y +CONFIG_IP_MROUTE=y +CONFIG_IP_MULTICAST=y +CONFIG_IP_MULTIPLE_TABLES=y +CONFIG_IP_NF_FILTER=y +CONFIG_IP_NF_IPTABLES=y +CONFIG_IP_NF_NAT=y +CONFIG_IP_NF_TARGET_MASQUERADE=y +CONFIG_IP_PIMSM_V1=y +CONFIG_IP_PIMSM_V2=y +CONFIG_IP_ROUTE_MULTIPATH=y +CONFIG_IP_ROUTE_VERBOSE=y +CONFIG_IPV6_MIP6=y +CONFIG_IPV6_ROUTE_INFO=y +CONFIG_IPV6_ROUTER_PREF=y +CONFIG_IPV6_SEG6_LWTUNNEL=y +CONFIG_IPV6_SUBTREES=y +CONFIG_IRQ_POLL=y +CONFIG_JUMP_LABEL=y +CONFIG_KARMA_PARTITION=y +CONFIG_KEXEC=y +CONFIG_KPROBES=y +CONFIG_KSM=y +CONFIG_LEGACY_VSYSCALL_NONE=y +CONFIG_LOG_BUF_SHIFT=21 +CONFIG_LOG_CPU_MAX_BUF_SHIFT=0 +CONFIG_LOGO=y +CONFIG_LSM="selinux,bpf,integrity" +CONFIG_MAC_PARTITION=y +CONFIG_MAGIC_SYSRQ=y +CONFIG_MCORE2=y +CONFIG_MEMCG=y +CONFIG_MEMORY_FAILURE=y +CONFIG_MINIX_SUBPARTITION=y +CONFIG_MODULES=y +CONFIG_NAMESPACES=y +CONFIG_NET_9P_VIRTIO=y +CONFIG_NET_9P=y +CONFIG_NET_ACT_BPF=y +CONFIG_NET_CLS_CGROUP=y +CONFIG_NETDEVICES=y +CONFIG_NET_EMATCH=y +CONFIG_NETFILTER_NETLINK_LOG=y +CONFIG_NETFILTER_NETLINK_QUEUE=y +CONFIG_NETFILTER_XTABLES=y +CONFIG_NETFILTER_XT_MATCH_ADDRTYPE=y +CONFIG_NETFILTER_XT_MATCH_BPF=y +CONFIG_NETFILTER_XT_MATCH_COMMENT=y +CONFIG_NETFILTER_XT_MATCH_CONNTRACK=y +CONFIG_NETFILTER_XT_MATCH_MARK=y +CONFIG_NETFILTER_XT_MATCH_MULTIPORT=y +CONFIG_NETFILTER_XT_MATCH_STATISTIC=y +CONFIG_NETFILTER_XT_NAT=y +CONFIG_NETFILTER_XT_TARGET_MASQUERADE=y +CONFIG_NET_IPGRE_BROADCAST=y +CONFIG_NET_L3_MASTER_DEV=y +CONFIG_NETLABEL=y +CONFIG_NET_SCH_DEFAULT=y +CONFIG_NET_SCHED=y +CONFIG_NET_SCH_FQ_CODEL=y +CONFIG_NET_TC_SKB_EXT=y +CONFIG_NET_VRF=y +CONFIG_NET=y +CONFIG_NF_CONNTRACK=y +CONFIG_NF_NAT_MASQUERADE=y +CONFIG_NF_NAT=y +CONFIG_NLS_ASCII=y +CONFIG_NLS_CODEPAGE_437=y +CONFIG_NLS_DEFAULT="utf8" +CONFIG_NO_HZ=y +CONFIG_NR_CPUS=128 +CONFIG_NUMA_BALANCING=y +CONFIG_NUMA=y +CONFIG_NVMEM=y +CONFIG_OSF_PARTITION=y +CONFIG_OVERLAY_FS_INDEX=y +CONFIG_OVERLAY_FS_METACOPY=y +CONFIG_OVERLAY_FS_XINO_AUTO=y +CONFIG_OVERLAY_FS=y +CONFIG_PACKET=y +CONFIG_PANIC_ON_OOPS=y +CONFIG_PARTITION_ADVANCED=y +CONFIG_PCIEPORTBUS=y +CONFIG_PCI_IOV=y +CONFIG_PCI_MSI=y +CONFIG_PCI=y +CONFIG_PHYSICAL_ALIGN=0x1000000 +CONFIG_POSIX_MQUEUE=y +CONFIG_POWER_SUPPLY=y +CONFIG_PREEMPT=y +CONFIG_PRINTK_TIME=y +CONFIG_PROC_KCORE=y +CONFIG_PROFILING=y +CONFIG_PROVE_LOCKING=y +CONFIG_PTP_1588_CLOCK=y +CONFIG_RC_DEVICES=y +CONFIG_RC_LOOPBACK=y +CONFIG_RCU_CPU_STALL_TIMEOUT=60 +CONFIG_SCHED_STACK_END_CHECK=y +CONFIG_SCHEDSTATS=y +CONFIG_SECURITY_NETWORK=y +CONFIG_SECURITY_SELINUX=y +CONFIG_SERIAL_8250_CONSOLE=y +CONFIG_SERIAL_8250_DETECT_IRQ=y +CONFIG_SERIAL_8250_EXTENDED=y +CONFIG_SERIAL_8250_MANY_PORTS=y +CONFIG_SERIAL_8250_NR_UARTS=32 +CONFIG_SERIAL_8250_RSA=y +CONFIG_SERIAL_8250_SHARE_IRQ=y +CONFIG_SERIAL_8250=y +CONFIG_SERIAL_NONSTANDARD=y +CONFIG_SERIO_LIBPS2=y +CONFIG_SGI_PARTITION=y +CONFIG_SMP=y +CONFIG_SOCK_CGROUP_DATA=y +CONFIG_SOLARIS_X86_PARTITION=y +CONFIG_SUN_PARTITION=y +CONFIG_SYNC_FILE=y +CONFIG_SYSVIPC=y +CONFIG_TASK_DELAY_ACCT=y +CONFIG_TASK_IO_ACCOUNTING=y +CONFIG_TASKSTATS=y +CONFIG_TASK_XACCT=y +CONFIG_TCP_CONG_ADVANCED=y +CONFIG_TCP_MD5SIG=y +CONFIG_TLS=y +CONFIG_TMPFS_POSIX_ACL=y +CONFIG_TMPFS=y +CONFIG_TRANSPARENT_HUGEPAGE_MADVISE=y +CONFIG_TRANSPARENT_HUGEPAGE=y +CONFIG_TUN=y +CONFIG_UNIXWARE_DISKLABEL=y +CONFIG_UNIX=y +CONFIG_USER_NS=y +CONFIG_VALIDATE_FS_PARSER=y +CONFIG_VETH=y +CONFIG_VIRT_DRIVERS=y +CONFIG_VIRTIO_BALLOON=y +CONFIG_VIRTIO_BLK=y +CONFIG_VIRTIO_CONSOLE=y +CONFIG_VIRTIO_FS=y +CONFIG_VIRTIO_NET=y +CONFIG_VIRTIO_PCI=y +CONFIG_VLAN_8021Q=y +CONFIG_XFRM_SUB_POLICY=y +CONFIG_XFRM_USER=y +CONFIG_ZEROPLUS_FF=y diff --git a/tools/testing/selftests/hid/config.x86_64 b/tools/testing/selftests/hid/config.x86_64 new file mode 100644 index 000000000000..a8721f403c21 --- /dev/null +++ b/tools/testing/selftests/hid/config.x86_64 @@ -0,0 +1,4 @@ +CONFIG_X86_ACPI_CPUFREQ=y +CONFIG_X86_CPUID=y +CONFIG_X86_MSR=y +CONFIG_X86_POWERNOW_K8=y diff --git a/tools/testing/selftests/hid/hid_bpf.c b/tools/testing/selftests/hid/hid_bpf.c new file mode 100644 index 000000000000..2cf96f818f25 --- /dev/null +++ b/tools/testing/selftests/hid/hid_bpf.c @@ -0,0 +1,869 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2022 Red Hat */ +#include "hid.skel.h" + +#include "../kselftest_harness.h" + +#include <bpf/bpf.h> +#include <fcntl.h> +#include <fnmatch.h> +#include <dirent.h> +#include <poll.h> +#include <pthread.h> +#include <stdbool.h> +#include <linux/hidraw.h> +#include <linux/uhid.h> + +#define SHOW_UHID_DEBUG 0 + +static unsigned char rdesc[] = { + 0x06, 0x00, 0xff, /* Usage Page (Vendor Defined Page 1) */ + 0x09, 0x21, /* Usage (Vendor Usage 0x21) */ + 0xa1, 0x01, /* COLLECTION (Application) */ + 0x09, 0x01, /* Usage (Vendor Usage 0x01) */ + 0xa1, 0x00, /* COLLECTION (Physical) */ + 0x85, 0x02, /* REPORT_ID (2) */ + 0x19, 0x01, /* USAGE_MINIMUM (1) */ + 0x29, 0x08, /* USAGE_MAXIMUM (3) */ + 0x15, 0x00, /* LOGICAL_MINIMUM (0) */ + 0x25, 0xff, /* LOGICAL_MAXIMUM (255) */ + 0x95, 0x08, /* REPORT_COUNT (8) */ + 0x75, 0x08, /* REPORT_SIZE (8) */ + 0x81, 0x02, /* INPUT (Data,Var,Abs) */ + 0xc0, /* END_COLLECTION */ + 0x09, 0x01, /* Usage (Vendor Usage 0x01) */ + 0xa1, 0x00, /* COLLECTION (Physical) */ + 0x85, 0x01, /* REPORT_ID (1) */ + 0x06, 0x00, 0xff, /* Usage Page (Vendor Defined Page 1) */ + 0x19, 0x01, /* USAGE_MINIMUM (1) */ + 0x29, 0x03, /* USAGE_MAXIMUM (3) */ + 0x15, 0x00, /* LOGICAL_MINIMUM (0) */ + 0x25, 0x01, /* LOGICAL_MAXIMUM (1) */ + 0x95, 0x03, /* REPORT_COUNT (3) */ + 0x75, 0x01, /* REPORT_SIZE (1) */ + 0x81, 0x02, /* INPUT (Data,Var,Abs) */ + 0x95, 0x01, /* REPORT_COUNT (1) */ + 0x75, 0x05, /* REPORT_SIZE (5) */ + 0x81, 0x01, /* INPUT (Cnst,Var,Abs) */ + 0x05, 0x01, /* USAGE_PAGE (Generic Desktop) */ + 0x09, 0x30, /* USAGE (X) */ + 0x09, 0x31, /* USAGE (Y) */ + 0x15, 0x81, /* LOGICAL_MINIMUM (-127) */ + 0x25, 0x7f, /* LOGICAL_MAXIMUM (127) */ + 0x75, 0x10, /* REPORT_SIZE (16) */ + 0x95, 0x02, /* REPORT_COUNT (2) */ + 0x81, 0x06, /* INPUT (Data,Var,Rel) */ + + 0x06, 0x00, 0xff, /* Usage Page (Vendor Defined Page 1) */ + 0x19, 0x01, /* USAGE_MINIMUM (1) */ + 0x29, 0x03, /* USAGE_MAXIMUM (3) */ + 0x15, 0x00, /* LOGICAL_MINIMUM (0) */ + 0x25, 0x01, /* LOGICAL_MAXIMUM (1) */ + 0x95, 0x03, /* REPORT_COUNT (3) */ + 0x75, 0x01, /* REPORT_SIZE (1) */ + 0x91, 0x02, /* Output (Data,Var,Abs) */ + 0x95, 0x01, /* REPORT_COUNT (1) */ + 0x75, 0x05, /* REPORT_SIZE (5) */ + 0x91, 0x01, /* Output (Cnst,Var,Abs) */ + + 0x06, 0x00, 0xff, /* Usage Page (Vendor Defined Page 1) */ + 0x19, 0x06, /* USAGE_MINIMUM (6) */ + 0x29, 0x08, /* USAGE_MAXIMUM (8) */ + 0x15, 0x00, /* LOGICAL_MINIMUM (0) */ + 0x25, 0x01, /* LOGICAL_MAXIMUM (1) */ + 0x95, 0x03, /* REPORT_COUNT (3) */ + 0x75, 0x01, /* REPORT_SIZE (1) */ + 0xb1, 0x02, /* Feature (Data,Var,Abs) */ + 0x95, 0x01, /* REPORT_COUNT (1) */ + 0x75, 0x05, /* REPORT_SIZE (5) */ + 0x91, 0x01, /* Output (Cnst,Var,Abs) */ + + 0xc0, /* END_COLLECTION */ + 0xc0, /* END_COLLECTION */ +}; + +static __u8 feature_data[] = { 1, 2 }; + +struct attach_prog_args { + int prog_fd; + unsigned int hid; + int retval; + int insert_head; +}; + +struct hid_hw_request_syscall_args { + __u8 data[10]; + unsigned int hid; + int retval; + size_t size; + enum hid_report_type type; + __u8 request_type; +}; + +#define ASSERT_OK(data) ASSERT_FALSE(data) +#define ASSERT_OK_PTR(ptr) ASSERT_NE(NULL, ptr) + +#define UHID_LOG(fmt, ...) do { \ + if (SHOW_UHID_DEBUG) \ + TH_LOG(fmt, ##__VA_ARGS__); \ +} while (0) + +static pthread_mutex_t uhid_started_mtx = PTHREAD_MUTEX_INITIALIZER; +static pthread_cond_t uhid_started = PTHREAD_COND_INITIALIZER; + +/* no need to protect uhid_stopped, only one thread accesses it */ +static bool uhid_stopped; + +static int uhid_write(struct __test_metadata *_metadata, int fd, const struct uhid_event *ev) +{ + ssize_t ret; + + ret = write(fd, ev, sizeof(*ev)); + if (ret < 0) { + TH_LOG("Cannot write to uhid: %m"); + return -errno; + } else if (ret != sizeof(*ev)) { + TH_LOG("Wrong size written to uhid: %zd != %zu", + ret, sizeof(ev)); + return -EFAULT; + } else { + return 0; + } +} + +static int uhid_create(struct __test_metadata *_metadata, int fd, int rand_nb) +{ + struct uhid_event ev; + char buf[25]; + + sprintf(buf, "test-uhid-device-%d", rand_nb); + + memset(&ev, 0, sizeof(ev)); + ev.type = UHID_CREATE; + strcpy((char *)ev.u.create.name, buf); + ev.u.create.rd_data = rdesc; + ev.u.create.rd_size = sizeof(rdesc); + ev.u.create.bus = BUS_USB; + ev.u.create.vendor = 0x0001; + ev.u.create.product = 0x0a37; + ev.u.create.version = 0; + ev.u.create.country = 0; + + sprintf(buf, "%d", rand_nb); + strcpy((char *)ev.u.create.phys, buf); + + return uhid_write(_metadata, fd, &ev); +} + +static void uhid_destroy(struct __test_metadata *_metadata, int fd) +{ + struct uhid_event ev; + + memset(&ev, 0, sizeof(ev)); + ev.type = UHID_DESTROY; + + uhid_write(_metadata, fd, &ev); +} + +static int uhid_event(struct __test_metadata *_metadata, int fd) +{ + struct uhid_event ev, answer; + ssize_t ret; + + memset(&ev, 0, sizeof(ev)); + ret = read(fd, &ev, sizeof(ev)); + if (ret == 0) { + UHID_LOG("Read HUP on uhid-cdev"); + return -EFAULT; + } else if (ret < 0) { + UHID_LOG("Cannot read uhid-cdev: %m"); + return -errno; + } else if (ret != sizeof(ev)) { + UHID_LOG("Invalid size read from uhid-dev: %zd != %zu", + ret, sizeof(ev)); + return -EFAULT; + } + + switch (ev.type) { + case UHID_START: + pthread_mutex_lock(&uhid_started_mtx); + pthread_cond_signal(&uhid_started); + pthread_mutex_unlock(&uhid_started_mtx); + + UHID_LOG("UHID_START from uhid-dev"); + break; + case UHID_STOP: + uhid_stopped = true; + + UHID_LOG("UHID_STOP from uhid-dev"); + break; + case UHID_OPEN: + UHID_LOG("UHID_OPEN from uhid-dev"); + break; + case UHID_CLOSE: + UHID_LOG("UHID_CLOSE from uhid-dev"); + break; + case UHID_OUTPUT: + UHID_LOG("UHID_OUTPUT from uhid-dev"); + break; + case UHID_GET_REPORT: + UHID_LOG("UHID_GET_REPORT from uhid-dev"); + + answer.type = UHID_GET_REPORT_REPLY; + answer.u.get_report_reply.id = ev.u.get_report.id; + answer.u.get_report_reply.err = ev.u.get_report.rnum == 1 ? 0 : -EIO; + answer.u.get_report_reply.size = sizeof(feature_data); + memcpy(answer.u.get_report_reply.data, feature_data, sizeof(feature_data)); + + uhid_write(_metadata, fd, &answer); + + break; + case UHID_SET_REPORT: + UHID_LOG("UHID_SET_REPORT from uhid-dev"); + break; + default: + TH_LOG("Invalid event from uhid-dev: %u", ev.type); + } + + return 0; +} + +struct uhid_thread_args { + int fd; + struct __test_metadata *_metadata; +}; +static void *uhid_read_events_thread(void *arg) +{ + struct uhid_thread_args *args = (struct uhid_thread_args *)arg; + struct __test_metadata *_metadata = args->_metadata; + struct pollfd pfds[1]; + int fd = args->fd; + int ret = 0; + + pfds[0].fd = fd; + pfds[0].events = POLLIN; + + uhid_stopped = false; + + while (!uhid_stopped) { + ret = poll(pfds, 1, 100); + if (ret < 0) { + TH_LOG("Cannot poll for fds: %m"); + break; + } + if (pfds[0].revents & POLLIN) { + ret = uhid_event(_metadata, fd); + if (ret) + break; + } + } + + return (void *)(long)ret; +} + +static int uhid_start_listener(struct __test_metadata *_metadata, pthread_t *tid, int uhid_fd) +{ + struct uhid_thread_args args = { + .fd = uhid_fd, + ._metadata = _metadata, + }; + int err; + + pthread_mutex_lock(&uhid_started_mtx); + err = pthread_create(tid, NULL, uhid_read_events_thread, (void *)&args); + ASSERT_EQ(0, err) { + TH_LOG("Could not start the uhid thread: %d", err); + pthread_mutex_unlock(&uhid_started_mtx); + close(uhid_fd); + return -EIO; + } + pthread_cond_wait(&uhid_started, &uhid_started_mtx); + pthread_mutex_unlock(&uhid_started_mtx); + + return 0; +} + +static int uhid_send_event(struct __test_metadata *_metadata, int fd, __u8 *buf, size_t size) +{ + struct uhid_event ev; + + if (size > sizeof(ev.u.input.data)) + return -E2BIG; + + memset(&ev, 0, sizeof(ev)); + ev.type = UHID_INPUT2; + ev.u.input2.size = size; + + memcpy(ev.u.input2.data, buf, size); + + return uhid_write(_metadata, fd, &ev); +} + +static int setup_uhid(struct __test_metadata *_metadata, int rand_nb) +{ + int fd; + const char *path = "/dev/uhid"; + int ret; + + fd = open(path, O_RDWR | O_CLOEXEC); + ASSERT_GE(fd, 0) TH_LOG("open uhid-cdev failed; %d", fd); + + ret = uhid_create(_metadata, fd, rand_nb); + ASSERT_EQ(0, ret) { + TH_LOG("create uhid device failed: %d", ret); + close(fd); + } + + return fd; +} + +static bool match_sysfs_device(int dev_id, const char *workdir, struct dirent *dir) +{ + const char *target = "0003:0001:0A37.*"; + char phys[512]; + char uevent[1024]; + char temp[512]; + int fd, nread; + bool found = false; + + if (fnmatch(target, dir->d_name, 0)) + return false; + + /* we found the correct VID/PID, now check for phys */ + sprintf(uevent, "%s/%s/uevent", workdir, dir->d_name); + + fd = open(uevent, O_RDONLY | O_NONBLOCK); + if (fd < 0) + return false; + + sprintf(phys, "PHYS=%d", dev_id); + + nread = read(fd, temp, ARRAY_SIZE(temp)); + if (nread > 0 && (strstr(temp, phys)) != NULL) + found = true; + + close(fd); + + return found; +} + +static int get_hid_id(int dev_id) +{ + const char *workdir = "/sys/devices/virtual/misc/uhid"; + const char *str_id; + DIR *d; + struct dirent *dir; + int found = -1, attempts = 3; + + /* it would be nice to be able to use nftw, but the no_alu32 target doesn't support it */ + + while (found < 0 && attempts > 0) { + attempts--; + d = opendir(workdir); + if (d) { + while ((dir = readdir(d)) != NULL) { + if (!match_sysfs_device(dev_id, workdir, dir)) + continue; + + str_id = dir->d_name + sizeof("0003:0001:0A37."); + found = (int)strtol(str_id, NULL, 16); + + break; + } + closedir(d); + } + if (found < 0) + usleep(100000); + } + + return found; +} + +static int get_hidraw(int dev_id) +{ + const char *workdir = "/sys/devices/virtual/misc/uhid"; + char sysfs[1024]; + DIR *d, *subd; + struct dirent *dir, *subdir; + int i, found = -1; + + /* retry 5 times in case the system is loaded */ + for (i = 5; i > 0; i--) { + usleep(10); + d = opendir(workdir); + + if (!d) + continue; + + while ((dir = readdir(d)) != NULL) { + if (!match_sysfs_device(dev_id, workdir, dir)) + continue; + + sprintf(sysfs, "%s/%s/hidraw", workdir, dir->d_name); + + subd = opendir(sysfs); + if (!subd) + continue; + + while ((subdir = readdir(subd)) != NULL) { + if (fnmatch("hidraw*", subdir->d_name, 0)) + continue; + + found = atoi(subdir->d_name + strlen("hidraw")); + } + + closedir(subd); + + if (found > 0) + break; + } + closedir(d); + } + + return found; +} + +static int open_hidraw(int dev_id) +{ + int hidraw_number; + char hidraw_path[64] = { 0 }; + + hidraw_number = get_hidraw(dev_id); + if (hidraw_number < 0) + return hidraw_number; + + /* open hidraw node to check the other side of the pipe */ + sprintf(hidraw_path, "/dev/hidraw%d", hidraw_number); + return open(hidraw_path, O_RDWR | O_NONBLOCK); +} + +FIXTURE(hid_bpf) { + int dev_id; + int uhid_fd; + int hidraw_fd; + int hid_id; + pthread_t tid; + struct hid *skel; + int hid_links[3]; /* max number of programs loaded in a single test */ +}; +static void detach_bpf(FIXTURE_DATA(hid_bpf) * self) +{ + int i; + + if (self->hidraw_fd) + close(self->hidraw_fd); + self->hidraw_fd = 0; + + for (i = 0; i < ARRAY_SIZE(self->hid_links); i++) { + if (self->hid_links[i]) + close(self->hid_links[i]); + } + + hid__destroy(self->skel); + self->skel = NULL; +} + +FIXTURE_TEARDOWN(hid_bpf) { + void *uhid_err; + + uhid_destroy(_metadata, self->uhid_fd); + + detach_bpf(self); + pthread_join(self->tid, &uhid_err); +} +#define TEARDOWN_LOG(fmt, ...) do { \ + TH_LOG(fmt, ##__VA_ARGS__); \ + hid_bpf_teardown(_metadata, self, variant); \ +} while (0) + +FIXTURE_SETUP(hid_bpf) +{ + time_t t; + int err; + + /* initialize random number generator */ + srand((unsigned int)time(&t)); + + self->dev_id = rand() % 1024; + + self->uhid_fd = setup_uhid(_metadata, self->dev_id); + + /* locate the uev, self, variant);ent file of the created device */ + self->hid_id = get_hid_id(self->dev_id); + ASSERT_GT(self->hid_id, 0) + TEARDOWN_LOG("Could not locate uhid device id: %d", self->hid_id); + + err = uhid_start_listener(_metadata, &self->tid, self->uhid_fd); + ASSERT_EQ(0, err) TEARDOWN_LOG("could not start udev listener: %d", err); +} + +struct test_program { + const char *name; + int insert_head; +}; +#define LOAD_PROGRAMS(progs) \ + load_programs(progs, ARRAY_SIZE(progs), _metadata, self, variant) +#define LOAD_BPF \ + load_programs(NULL, 0, _metadata, self, variant) +static void load_programs(const struct test_program programs[], + const size_t progs_count, + struct __test_metadata *_metadata, + FIXTURE_DATA(hid_bpf) * self, + const FIXTURE_VARIANT(hid_bpf) * variant) +{ + int attach_fd, err = -EINVAL; + struct attach_prog_args args = { + .retval = -1, + }; + DECLARE_LIBBPF_OPTS(bpf_test_run_opts, tattr, + .ctx_in = &args, + .ctx_size_in = sizeof(args), + ); + + ASSERT_LE(progs_count, ARRAY_SIZE(self->hid_links)) + TH_LOG("too many programs are to be loaded"); + + /* open the bpf file */ + self->skel = hid__open(); + ASSERT_OK_PTR(self->skel) TEARDOWN_LOG("Error while calling hid__open"); + + for (int i = 0; i < progs_count; i++) { + struct bpf_program *prog; + + prog = bpf_object__find_program_by_name(*self->skel->skeleton->obj, + programs[i].name); + ASSERT_OK_PTR(prog) TH_LOG("can not find program by name '%s'", programs[i].name); + + bpf_program__set_autoload(prog, true); + } + + err = hid__load(self->skel); + ASSERT_OK(err) TH_LOG("hid_skel_load failed: %d", err); + + attach_fd = bpf_program__fd(self->skel->progs.attach_prog); + ASSERT_GE(attach_fd, 0) TH_LOG("locate attach_prog: %d", attach_fd); + + for (int i = 0; i < progs_count; i++) { + struct bpf_program *prog; + + prog = bpf_object__find_program_by_name(*self->skel->skeleton->obj, + programs[i].name); + ASSERT_OK_PTR(prog) TH_LOG("can not find program by name '%s'", programs[i].name); + + args.prog_fd = bpf_program__fd(prog); + args.hid = self->hid_id; + args.insert_head = programs[i].insert_head; + err = bpf_prog_test_run_opts(attach_fd, &tattr); + ASSERT_GE(args.retval, 0) + TH_LOG("attach_hid(%s): %d", programs[i].name, args.retval); + + self->hid_links[i] = args.retval; + } + + self->hidraw_fd = open_hidraw(self->dev_id); + ASSERT_GE(self->hidraw_fd, 0) TH_LOG("open_hidraw"); +} + +/* + * A simple test to see if the fixture is working fine. + * If this fails, none of the other tests will pass. + */ +TEST_F(hid_bpf, test_create_uhid) +{ +} + +/* + * Attach hid_first_event to the given uhid device, + * retrieve and open the matching hidraw node, + * inject one event in the uhid device, + * check that the program sees it and can change the data + */ +TEST_F(hid_bpf, raw_event) +{ + const struct test_program progs[] = { + { .name = "hid_first_event" }, + }; + __u8 buf[10] = {0}; + int err; + + LOAD_PROGRAMS(progs); + + /* check that the program is correctly loaded */ + ASSERT_EQ(self->skel->data->callback_check, 52) TH_LOG("callback_check1"); + ASSERT_EQ(self->skel->data->callback2_check, 52) TH_LOG("callback2_check1"); + + /* inject one event */ + buf[0] = 1; + buf[1] = 42; + uhid_send_event(_metadata, self->uhid_fd, buf, 6); + + /* check that hid_first_event() was executed */ + ASSERT_EQ(self->skel->data->callback_check, 42) TH_LOG("callback_check1"); + + /* read the data from hidraw */ + memset(buf, 0, sizeof(buf)); + err = read(self->hidraw_fd, buf, sizeof(buf)); + ASSERT_EQ(err, 6) TH_LOG("read_hidraw"); + ASSERT_EQ(buf[0], 1); + ASSERT_EQ(buf[2], 47); + + /* inject another event */ + memset(buf, 0, sizeof(buf)); + buf[0] = 1; + buf[1] = 47; + uhid_send_event(_metadata, self->uhid_fd, buf, 6); + + /* check that hid_first_event() was executed */ + ASSERT_EQ(self->skel->data->callback_check, 47) TH_LOG("callback_check1"); + + /* read the data from hidraw */ + memset(buf, 0, sizeof(buf)); + err = read(self->hidraw_fd, buf, sizeof(buf)); + ASSERT_EQ(err, 6) TH_LOG("read_hidraw"); + ASSERT_EQ(buf[2], 52); +} + +/* + * Ensures that we can attach/detach programs + */ +TEST_F(hid_bpf, test_attach_detach) +{ + const struct test_program progs[] = { + { .name = "hid_first_event" }, + { .name = "hid_second_event" }, + }; + __u8 buf[10] = {0}; + int err, link; + + LOAD_PROGRAMS(progs); + + link = self->hid_links[0]; + ASSERT_GT(link, 0) TH_LOG("HID-BPF link not created"); + + /* inject one event */ + buf[0] = 1; + buf[1] = 42; + uhid_send_event(_metadata, self->uhid_fd, buf, 6); + + /* read the data from hidraw */ + memset(buf, 0, sizeof(buf)); + err = read(self->hidraw_fd, buf, sizeof(buf)); + ASSERT_EQ(err, 6) TH_LOG("read_hidraw"); + ASSERT_EQ(buf[0], 1); + ASSERT_EQ(buf[2], 47); + + /* make sure both programs are run */ + ASSERT_EQ(buf[3], 52); + + /* pin the first program and immediately unpin it */ +#define PIN_PATH "/sys/fs/bpf/hid_first_event" + err = bpf_obj_pin(link, PIN_PATH); + ASSERT_OK(err) TH_LOG("error while calling bpf_obj_pin"); + remove(PIN_PATH); +#undef PIN_PATH + usleep(100000); + + /* detach the program */ + detach_bpf(self); + + self->hidraw_fd = open_hidraw(self->dev_id); + ASSERT_GE(self->hidraw_fd, 0) TH_LOG("open_hidraw"); + + /* inject another event */ + memset(buf, 0, sizeof(buf)); + buf[0] = 1; + buf[1] = 47; + uhid_send_event(_metadata, self->uhid_fd, buf, 6); + + /* read the data from hidraw */ + memset(buf, 0, sizeof(buf)); + err = read(self->hidraw_fd, buf, sizeof(buf)); + ASSERT_EQ(err, 6) TH_LOG("read_hidraw_no_bpf"); + ASSERT_EQ(buf[0], 1); + ASSERT_EQ(buf[1], 47); + ASSERT_EQ(buf[2], 0); + ASSERT_EQ(buf[3], 0); + + /* re-attach our program */ + + LOAD_PROGRAMS(progs); + + /* inject one event */ + memset(buf, 0, sizeof(buf)); + buf[0] = 1; + buf[1] = 42; + uhid_send_event(_metadata, self->uhid_fd, buf, 6); + + /* read the data from hidraw */ + memset(buf, 0, sizeof(buf)); + err = read(self->hidraw_fd, buf, sizeof(buf)); + ASSERT_EQ(err, 6) TH_LOG("read_hidraw"); + ASSERT_EQ(buf[0], 1); + ASSERT_EQ(buf[2], 47); + ASSERT_EQ(buf[3], 52); +} + +/* + * Attach hid_change_report_id to the given uhid device, + * retrieve and open the matching hidraw node, + * inject one event in the uhid device, + * check that the program sees it and can change the data + */ +TEST_F(hid_bpf, test_hid_change_report) +{ + const struct test_program progs[] = { + { .name = "hid_change_report_id" }, + }; + __u8 buf[10] = {0}; + int err; + + LOAD_PROGRAMS(progs); + + /* inject one event */ + buf[0] = 1; + buf[1] = 42; + uhid_send_event(_metadata, self->uhid_fd, buf, 6); + + /* read the data from hidraw */ + memset(buf, 0, sizeof(buf)); + err = read(self->hidraw_fd, buf, sizeof(buf)); + ASSERT_EQ(err, 9) TH_LOG("read_hidraw"); + ASSERT_EQ(buf[0], 2); + ASSERT_EQ(buf[1], 42); + ASSERT_EQ(buf[2], 0) TH_LOG("leftovers_from_previous_test"); +} + +/* + * Attach hid_user_raw_request to the given uhid device, + * call the bpf program from userspace + * check that the program is called and does the expected. + */ +TEST_F(hid_bpf, test_hid_user_raw_request_call) +{ + struct hid_hw_request_syscall_args args = { + .retval = -1, + .type = HID_FEATURE_REPORT, + .request_type = HID_REQ_GET_REPORT, + .size = 10, + }; + DECLARE_LIBBPF_OPTS(bpf_test_run_opts, tattrs, + .ctx_in = &args, + .ctx_size_in = sizeof(args), + ); + int err, prog_fd; + + LOAD_BPF; + + args.hid = self->hid_id; + args.data[0] = 1; /* report ID */ + + prog_fd = bpf_program__fd(self->skel->progs.hid_user_raw_request); + + err = bpf_prog_test_run_opts(prog_fd, &tattrs); + ASSERT_OK(err) TH_LOG("error while calling bpf_prog_test_run_opts"); + + ASSERT_EQ(args.retval, 2); + + ASSERT_EQ(args.data[1], 2); +} + +/* + * Attach hid_insert{0,1,2} to the given uhid device, + * retrieve and open the matching hidraw node, + * inject one event in the uhid device, + * check that the programs have been inserted in the correct order. + */ +TEST_F(hid_bpf, test_hid_attach_flags) +{ + const struct test_program progs[] = { + { + .name = "hid_test_insert2", + .insert_head = 0, + }, + { + .name = "hid_test_insert1", + .insert_head = 1, + }, + { + .name = "hid_test_insert3", + .insert_head = 0, + }, + }; + __u8 buf[10] = {0}; + int err; + + LOAD_PROGRAMS(progs); + + /* inject one event */ + buf[0] = 1; + uhid_send_event(_metadata, self->uhid_fd, buf, 6); + + /* read the data from hidraw */ + memset(buf, 0, sizeof(buf)); + err = read(self->hidraw_fd, buf, sizeof(buf)); + ASSERT_EQ(err, 6) TH_LOG("read_hidraw"); + ASSERT_EQ(buf[1], 1); + ASSERT_EQ(buf[2], 2); + ASSERT_EQ(buf[3], 3); +} + +/* + * Attach hid_rdesc_fixup to the given uhid device, + * retrieve and open the matching hidraw node, + * check that the hidraw report descriptor has been updated. + */ +TEST_F(hid_bpf, test_rdesc_fixup) +{ + struct hidraw_report_descriptor rpt_desc = {0}; + const struct test_program progs[] = { + { .name = "hid_rdesc_fixup" }, + }; + int err, desc_size; + + LOAD_PROGRAMS(progs); + + /* check that hid_rdesc_fixup() was executed */ + ASSERT_EQ(self->skel->data->callback2_check, 0x21); + + /* read the exposed report descriptor from hidraw */ + err = ioctl(self->hidraw_fd, HIDIOCGRDESCSIZE, &desc_size); + ASSERT_GE(err, 0) TH_LOG("error while reading HIDIOCGRDESCSIZE: %d", err); + + /* ensure the new size of the rdesc is bigger than the old one */ + ASSERT_GT(desc_size, sizeof(rdesc)); + + rpt_desc.size = desc_size; + err = ioctl(self->hidraw_fd, HIDIOCGRDESC, &rpt_desc); + ASSERT_GE(err, 0) TH_LOG("error while reading HIDIOCGRDESC: %d", err); + + ASSERT_EQ(rpt_desc.value[4], 0x42); +} + +static int libbpf_print_fn(enum libbpf_print_level level, + const char *format, va_list args) +{ + char buf[1024]; + + if (level == LIBBPF_DEBUG) + return 0; + + snprintf(buf, sizeof(buf), "# %s", format); + + vfprintf(stdout, buf, args); + return 0; +} + +static void __attribute__((constructor)) __constructor_order_last(void) +{ + if (!__constructor_order) + __constructor_order = _CONSTRUCTOR_ORDER_BACKWARD; +} + +int main(int argc, char **argv) +{ + /* Use libbpf 1.0 API mode */ + libbpf_set_strict_mode(LIBBPF_STRICT_ALL); + libbpf_set_print(libbpf_print_fn); + + return test_harness_run(argc, argv); +} diff --git a/tools/testing/selftests/hid/progs/hid.c b/tools/testing/selftests/hid/progs/hid.c new file mode 100644 index 000000000000..88c593f753b5 --- /dev/null +++ b/tools/testing/selftests/hid/progs/hid.c @@ -0,0 +1,209 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2022 Red hat */ +#include "vmlinux.h" +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> +#include "hid_bpf_helpers.h" + +char _license[] SEC("license") = "GPL"; + +struct attach_prog_args { + int prog_fd; + unsigned int hid; + int retval; + int insert_head; +}; + +__u64 callback_check = 52; +__u64 callback2_check = 52; + +SEC("?fmod_ret/hid_bpf_device_event") +int BPF_PROG(hid_first_event, struct hid_bpf_ctx *hid_ctx) +{ + __u8 *rw_data = hid_bpf_get_data(hid_ctx, 0 /* offset */, 3 /* size */); + + if (!rw_data) + return 0; /* EPERM check */ + + callback_check = rw_data[1]; + + rw_data[2] = rw_data[1] + 5; + + return hid_ctx->size; +} + +SEC("?fmod_ret/hid_bpf_device_event") +int BPF_PROG(hid_second_event, struct hid_bpf_ctx *hid_ctx) +{ + __u8 *rw_data = hid_bpf_get_data(hid_ctx, 0 /* offset */, 4 /* size */); + + if (!rw_data) + return 0; /* EPERM check */ + + rw_data[3] = rw_data[2] + 5; + + return hid_ctx->size; +} + +SEC("?fmod_ret/hid_bpf_device_event") +int BPF_PROG(hid_change_report_id, struct hid_bpf_ctx *hid_ctx) +{ + __u8 *rw_data = hid_bpf_get_data(hid_ctx, 0 /* offset */, 3 /* size */); + + if (!rw_data) + return 0; /* EPERM check */ + + rw_data[0] = 2; + + return 9; +} + +SEC("syscall") +int attach_prog(struct attach_prog_args *ctx) +{ + ctx->retval = hid_bpf_attach_prog(ctx->hid, + ctx->prog_fd, + ctx->insert_head ? HID_BPF_FLAG_INSERT_HEAD : + HID_BPF_FLAG_NONE); + return 0; +} + +struct hid_hw_request_syscall_args { + /* data needs to come at offset 0 so we can use it in calls */ + __u8 data[10]; + unsigned int hid; + int retval; + size_t size; + enum hid_report_type type; + __u8 request_type; +}; + +SEC("syscall") +int hid_user_raw_request(struct hid_hw_request_syscall_args *args) +{ + struct hid_bpf_ctx *ctx; + const size_t size = args->size; + int i, ret = 0; + + if (size > sizeof(args->data)) + return -7; /* -E2BIG */ + + ctx = hid_bpf_allocate_context(args->hid); + if (!ctx) + return -1; /* EPERM check */ + + ret = hid_bpf_hw_request(ctx, + args->data, + size, + args->type, + args->request_type); + args->retval = ret; + + hid_bpf_release_context(ctx); + + return 0; +} + +static const __u8 rdesc[] = { + 0x05, 0x01, /* USAGE_PAGE (Generic Desktop) */ + 0x09, 0x32, /* USAGE (Z) */ + 0x95, 0x01, /* REPORT_COUNT (1) */ + 0x81, 0x06, /* INPUT (Data,Var,Rel) */ + + 0x06, 0x00, 0xff, /* Usage Page (Vendor Defined Page 1) */ + 0x19, 0x01, /* USAGE_MINIMUM (1) */ + 0x29, 0x03, /* USAGE_MAXIMUM (3) */ + 0x15, 0x00, /* LOGICAL_MINIMUM (0) */ + 0x25, 0x01, /* LOGICAL_MAXIMUM (1) */ + 0x95, 0x03, /* REPORT_COUNT (3) */ + 0x75, 0x01, /* REPORT_SIZE (1) */ + 0x91, 0x02, /* Output (Data,Var,Abs) */ + 0x95, 0x01, /* REPORT_COUNT (1) */ + 0x75, 0x05, /* REPORT_SIZE (5) */ + 0x91, 0x01, /* Output (Cnst,Var,Abs) */ + + 0x06, 0x00, 0xff, /* Usage Page (Vendor Defined Page 1) */ + 0x19, 0x06, /* USAGE_MINIMUM (6) */ + 0x29, 0x08, /* USAGE_MAXIMUM (8) */ + 0x15, 0x00, /* LOGICAL_MINIMUM (0) */ + 0x25, 0x01, /* LOGICAL_MAXIMUM (1) */ + 0x95, 0x03, /* REPORT_COUNT (3) */ + 0x75, 0x01, /* REPORT_SIZE (1) */ + 0xb1, 0x02, /* Feature (Data,Var,Abs) */ + 0x95, 0x01, /* REPORT_COUNT (1) */ + 0x75, 0x05, /* REPORT_SIZE (5) */ + 0x91, 0x01, /* Output (Cnst,Var,Abs) */ + + 0xc0, /* END_COLLECTION */ + 0xc0, /* END_COLLECTION */ +}; + +SEC("?fmod_ret/hid_bpf_rdesc_fixup") +int BPF_PROG(hid_rdesc_fixup, struct hid_bpf_ctx *hid_ctx) +{ + __u8 *data = hid_bpf_get_data(hid_ctx, 0 /* offset */, 4096 /* size */); + + if (!data) + return 0; /* EPERM check */ + + callback2_check = data[4]; + + /* insert rdesc at offset 73 */ + __builtin_memcpy(&data[73], rdesc, sizeof(rdesc)); + + /* Change Usage Vendor globally */ + data[4] = 0x42; + + return sizeof(rdesc) + 73; +} + +SEC("?fmod_ret/hid_bpf_device_event") +int BPF_PROG(hid_test_insert1, struct hid_bpf_ctx *hid_ctx) +{ + __u8 *data = hid_bpf_get_data(hid_ctx, 0 /* offset */, 4 /* size */); + + if (!data) + return 0; /* EPERM check */ + + /* we need to be run first */ + if (data[2] || data[3]) + return -1; + + data[1] = 1; + + return 0; +} + +SEC("?fmod_ret/hid_bpf_device_event") +int BPF_PROG(hid_test_insert2, struct hid_bpf_ctx *hid_ctx) +{ + __u8 *data = hid_bpf_get_data(hid_ctx, 0 /* offset */, 4 /* size */); + + if (!data) + return 0; /* EPERM check */ + + /* after insert0 and before insert2 */ + if (!data[1] || data[3]) + return -1; + + data[2] = 2; + + return 0; +} + +SEC("?fmod_ret/hid_bpf_device_event") +int BPF_PROG(hid_test_insert3, struct hid_bpf_ctx *hid_ctx) +{ + __u8 *data = hid_bpf_get_data(hid_ctx, 0 /* offset */, 4 /* size */); + + if (!data) + return 0; /* EPERM check */ + + /* at the end */ + if (!data[1] || !data[2]) + return -1; + + data[3] = 3; + + return 0; +} diff --git a/tools/testing/selftests/hid/progs/hid_bpf_helpers.h b/tools/testing/selftests/hid/progs/hid_bpf_helpers.h new file mode 100644 index 000000000000..4fff31dbe0e7 --- /dev/null +++ b/tools/testing/selftests/hid/progs/hid_bpf_helpers.h @@ -0,0 +1,21 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* Copyright (c) 2022 Benjamin Tissoires + */ + +#ifndef __HID_BPF_HELPERS_H +#define __HID_BPF_HELPERS_H + +/* following are kfuncs exported by HID for HID-BPF */ +extern __u8 *hid_bpf_get_data(struct hid_bpf_ctx *ctx, + unsigned int offset, + const size_t __sz) __ksym; +extern int hid_bpf_attach_prog(unsigned int hid_id, int prog_fd, u32 flags) __ksym; +extern struct hid_bpf_ctx *hid_bpf_allocate_context(unsigned int hid_id) __ksym; +extern void hid_bpf_release_context(struct hid_bpf_ctx *ctx) __ksym; +extern int hid_bpf_hw_request(struct hid_bpf_ctx *ctx, + __u8 *data, + size_t buf__sz, + enum hid_report_type type, + enum hid_class_request reqtype) __ksym; + +#endif /* __HID_BPF_HELPERS_H */ diff --git a/tools/testing/selftests/hid/vmtest.sh b/tools/testing/selftests/hid/vmtest.sh new file mode 100755 index 000000000000..90f34150f257 --- /dev/null +++ b/tools/testing/selftests/hid/vmtest.sh @@ -0,0 +1,284 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +set -u +set -e + +# This script currently only works for x86_64 +ARCH="$(uname -m)" +case "${ARCH}" in +x86_64) + QEMU_BINARY=qemu-system-x86_64 + BZIMAGE="arch/x86/boot/bzImage" + ;; +*) + echo "Unsupported architecture" + exit 1 + ;; +esac +DEFAULT_COMMAND="./hid_bpf" +SCRIPT_DIR="$(dirname $(realpath $0))" +OUTPUT_DIR="$SCRIPT_DIR/results" +KCONFIG_REL_PATHS=("${SCRIPT_DIR}/config" "${SCRIPT_DIR}/config.common" "${SCRIPT_DIR}/config.${ARCH}") +B2C_URL="https://gitlab.freedesktop.org/mupuf/boot2container/-/raw/master/vm2c.py" +NUM_COMPILE_JOBS="$(nproc)" +LOG_FILE_BASE="$(date +"hid_selftests.%Y-%m-%d_%H-%M-%S")" +LOG_FILE="${LOG_FILE_BASE}.log" +EXIT_STATUS_FILE="${LOG_FILE_BASE}.exit_status" +CONTAINER_IMAGE="registry.fedoraproject.org/fedora:36" + +usage() +{ + cat <<EOF +Usage: $0 [-i] [-s] [-d <output_dir>] -- [<command>] + +<command> is the command you would normally run when you are in +tools/testing/selftests/bpf. e.g: + + $0 -- ./hid_bpf + +If no command is specified and a debug shell (-s) is not requested, +"${DEFAULT_COMMAND}" will be run by default. + +If you build your kernel using KBUILD_OUTPUT= or O= options, these +can be passed as environment variables to the script: + + O=<kernel_build_path> $0 -- ./hid_bpf + +or + + KBUILD_OUTPUT=<kernel_build_path> $0 -- ./hid_bpf + +Options: + + -u) Update the boot2container script to a newer version. + -d) Update the output directory (default: ${OUTPUT_DIR}) + -j) Number of jobs for compilation, similar to -j in make + (default: ${NUM_COMPILE_JOBS}) + -s) Instead of powering off the VM, start an interactive + shell. If <command> is specified, the shell runs after + the command finishes executing +EOF +} + +download() +{ + local file="$1" + + echo "Downloading $file..." >&2 + curl -Lsf "$file" -o "${@:2}" +} + +recompile_kernel() +{ + local kernel_checkout="$1" + local make_command="$2" + + cd "${kernel_checkout}" + + ${make_command} olddefconfig + ${make_command} +} + +update_selftests() +{ + local kernel_checkout="$1" + local selftests_dir="${kernel_checkout}/tools/testing/selftests/hid" + + cd "${selftests_dir}" + ${make_command} +} + +run_vm() +{ + local b2c="$1" + local kernel_bzimage="$2" + local command="$3" + local post_command="" + + if ! which "${QEMU_BINARY}" &> /dev/null; then + cat <<EOF +Could not find ${QEMU_BINARY} +Please install qemu or set the QEMU_BINARY environment variable. +EOF + exit 1 + fi + + # alpine (used in post-container requires the PATH to have /bin + export PATH=$PATH:/bin + + if [[ "${debug_shell}" != "yes" ]] + then + touch ${OUTPUT_DIR}/${LOG_FILE} + command="mount bpffs -t bpf /sys/fs/bpf/; set -o pipefail ; ${command} 2>&1 | tee ${OUTPUT_DIR}/${LOG_FILE}" + post_command="cat ${OUTPUT_DIR}/${LOG_FILE}" + else + command="mount bpffs -t bpf /sys/fs/bpf/; ${command}" + fi + + set +e + $b2c --command "${command}" \ + --kernel ${kernel_bzimage} \ + --workdir ${OUTPUT_DIR} \ + --image ${CONTAINER_IMAGE} + + echo $? > ${OUTPUT_DIR}/${EXIT_STATUS_FILE} + + set -e + + ${post_command} +} + +is_rel_path() +{ + local path="$1" + + [[ ${path:0:1} != "/" ]] +} + +do_update_kconfig() +{ + local kernel_checkout="$1" + local kconfig_file="$2" + + rm -f "$kconfig_file" 2> /dev/null + + for config in "${KCONFIG_REL_PATHS[@]}"; do + local kconfig_src="${config}" + cat "$kconfig_src" >> "$kconfig_file" + done +} + +update_kconfig() +{ + local kernel_checkout="$1" + local kconfig_file="$2" + + if [[ -f "${kconfig_file}" ]]; then + local local_modified="$(stat -c %Y "${kconfig_file}")" + + for config in "${KCONFIG_REL_PATHS[@]}"; do + local kconfig_src="${config}" + local src_modified="$(stat -c %Y "${kconfig_src}")" + # Only update the config if it has been updated after the + # previously cached config was created. This avoids + # unnecessarily compiling the kernel and selftests. + if [[ "${src_modified}" -gt "${local_modified}" ]]; then + do_update_kconfig "$kernel_checkout" "$kconfig_file" + # Once we have found one outdated configuration + # there is no need to check other ones. + break + fi + done + else + do_update_kconfig "$kernel_checkout" "$kconfig_file" + fi +} + +main() +{ + local script_dir="$(cd -P -- "$(dirname -- "${BASH_SOURCE[0]}")" && pwd -P)" + local kernel_checkout=$(realpath "${script_dir}"/../../../../) + # By default the script searches for the kernel in the checkout directory but + # it also obeys environment variables O= and KBUILD_OUTPUT= + local kernel_bzimage="${kernel_checkout}/${BZIMAGE}" + local command="${DEFAULT_COMMAND}" + local update_b2c="no" + local debug_shell="no" + + while getopts ':hsud:j:' opt; do + case ${opt} in + u) + update_b2c="yes" + ;; + d) + OUTPUT_DIR="$OPTARG" + ;; + j) + NUM_COMPILE_JOBS="$OPTARG" + ;; + s) + command="/bin/sh" + debug_shell="yes" + ;; + h) + usage + exit 0 + ;; + \? ) + echo "Invalid Option: -$OPTARG" + usage + exit 1 + ;; + : ) + echo "Invalid Option: -$OPTARG requires an argument" + usage + exit 1 + ;; + esac + done + shift $((OPTIND -1)) + + # trap 'catch "$?"' EXIT + + if [[ "${debug_shell}" == "no" ]]; then + if [[ $# -eq 0 ]]; then + echo "No command specified, will run ${DEFAULT_COMMAND} in the vm" + else + command="$@" + + if [[ "${command}" == "/bin/bash" || "${command}" == "bash" ]] + then + debug_shell="yes" + fi + fi + fi + + local kconfig_file="${OUTPUT_DIR}/latest.config" + local make_command="make -j ${NUM_COMPILE_JOBS} KCONFIG_CONFIG=${kconfig_file}" + + # Figure out where the kernel is being built. + # O takes precedence over KBUILD_OUTPUT. + if [[ "${O:=""}" != "" ]]; then + if is_rel_path "${O}"; then + O="$(realpath "${PWD}/${O}")" + fi + kernel_bzimage="${O}/${BZIMAGE}" + make_command="${make_command} O=${O}" + elif [[ "${KBUILD_OUTPUT:=""}" != "" ]]; then + if is_rel_path "${KBUILD_OUTPUT}"; then + KBUILD_OUTPUT="$(realpath "${PWD}/${KBUILD_OUTPUT}")" + fi + kernel_bzimage="${KBUILD_OUTPUT}/${BZIMAGE}" + make_command="${make_command} KBUILD_OUTPUT=${KBUILD_OUTPUT}" + fi + + local b2c="${OUTPUT_DIR}/vm2c.py" + + echo "Output directory: ${OUTPUT_DIR}" + + mkdir -p "${OUTPUT_DIR}" + update_kconfig "${kernel_checkout}" "${kconfig_file}" + + recompile_kernel "${kernel_checkout}" "${make_command}" + + if [[ "${update_b2c}" == "no" && ! -f "${b2c}" ]]; then + echo "vm2c script not found in ${b2c}" + update_b2c="yes" + fi + + if [[ "${update_b2c}" == "yes" ]]; then + download $B2C_URL $b2c + chmod +x $b2c + fi + + update_selftests "${kernel_checkout}" "${make_command}" + run_vm $b2c "${kernel_bzimage}" "${command}" + if [[ "${debug_shell}" != "yes" ]]; then + echo "Logs saved in ${OUTPUT_DIR}/${LOG_FILE}" + fi + + exit $(cat ${OUTPUT_DIR}/${EXIT_STATUS_FILE}) +} + +main "$@" diff --git a/tools/testing/selftests/iommu/.gitignore b/tools/testing/selftests/iommu/.gitignore new file mode 100644 index 000000000000..7d0703049eba --- /dev/null +++ b/tools/testing/selftests/iommu/.gitignore @@ -0,0 +1,3 @@ +# SPDX-License-Identifier: GPL-2.0-only +/iommufd +/iommufd_fail_nth diff --git a/tools/testing/selftests/iommu/Makefile b/tools/testing/selftests/iommu/Makefile new file mode 100644 index 000000000000..32c5fdfd0eef --- /dev/null +++ b/tools/testing/selftests/iommu/Makefile @@ -0,0 +1,11 @@ +# SPDX-License-Identifier: GPL-2.0-only +CFLAGS += -Wall -O2 -Wno-unused-function +CFLAGS += $(KHDR_INCLUDES) + +CFLAGS += -D_GNU_SOURCE + +TEST_GEN_PROGS := +TEST_GEN_PROGS += iommufd +TEST_GEN_PROGS += iommufd_fail_nth + +include ../lib.mk diff --git a/tools/testing/selftests/iommu/config b/tools/testing/selftests/iommu/config new file mode 100644 index 000000000000..6c4f901d6fed --- /dev/null +++ b/tools/testing/selftests/iommu/config @@ -0,0 +1,2 @@ +CONFIG_IOMMUFD +CONFIG_IOMMUFD_TEST diff --git a/tools/testing/selftests/iommu/iommufd.c b/tools/testing/selftests/iommu/iommufd.c new file mode 100644 index 000000000000..8aa8a346cf22 --- /dev/null +++ b/tools/testing/selftests/iommu/iommufd.c @@ -0,0 +1,1654 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright (c) 2021-2022, NVIDIA CORPORATION & AFFILIATES */ +#include <stdlib.h> +#include <sys/mman.h> +#include <sys/eventfd.h> + +#define __EXPORTED_HEADERS__ +#include <linux/vfio.h> + +#include "iommufd_utils.h" + +static void *buffer; + +static unsigned long PAGE_SIZE; +static unsigned long HUGEPAGE_SIZE; + +#define MOCK_PAGE_SIZE (PAGE_SIZE / 2) + +static unsigned long get_huge_page_size(void) +{ + char buf[80]; + int ret; + int fd; + + fd = open("/sys/kernel/mm/transparent_hugepage/hpage_pmd_size", + O_RDONLY); + if (fd < 0) + return 2 * 1024 * 1024; + + ret = read(fd, buf, sizeof(buf)); + close(fd); + if (ret <= 0 || ret == sizeof(buf)) + return 2 * 1024 * 1024; + buf[ret] = 0; + return strtoul(buf, NULL, 10); +} + +static __attribute__((constructor)) void setup_sizes(void) +{ + void *vrc; + int rc; + + PAGE_SIZE = sysconf(_SC_PAGE_SIZE); + HUGEPAGE_SIZE = get_huge_page_size(); + + BUFFER_SIZE = PAGE_SIZE * 16; + rc = posix_memalign(&buffer, HUGEPAGE_SIZE, BUFFER_SIZE); + assert(!rc); + assert(buffer); + assert((uintptr_t)buffer % HUGEPAGE_SIZE == 0); + vrc = mmap(buffer, BUFFER_SIZE, PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_ANONYMOUS | MAP_FIXED, -1, 0); + assert(vrc == buffer); +} + +FIXTURE(iommufd) +{ + int fd; +}; + +FIXTURE_SETUP(iommufd) +{ + self->fd = open("/dev/iommu", O_RDWR); + ASSERT_NE(-1, self->fd); +} + +FIXTURE_TEARDOWN(iommufd) +{ + teardown_iommufd(self->fd, _metadata); +} + +TEST_F(iommufd, simple_close) +{ +} + +TEST_F(iommufd, cmd_fail) +{ + struct iommu_destroy cmd = { .size = sizeof(cmd), .id = 0 }; + + /* object id is invalid */ + EXPECT_ERRNO(ENOENT, _test_ioctl_destroy(self->fd, 0)); + /* Bad pointer */ + EXPECT_ERRNO(EFAULT, ioctl(self->fd, IOMMU_DESTROY, NULL)); + /* Unknown ioctl */ + EXPECT_ERRNO(ENOTTY, + ioctl(self->fd, _IO(IOMMUFD_TYPE, IOMMUFD_CMD_BASE - 1), + &cmd)); +} + +TEST_F(iommufd, cmd_length) +{ +#define TEST_LENGTH(_struct, _ioctl) \ + { \ + struct { \ + struct _struct cmd; \ + uint8_t extra; \ + } cmd = { .cmd = { .size = sizeof(struct _struct) - 1 }, \ + .extra = UINT8_MAX }; \ + int old_errno; \ + int rc; \ + \ + EXPECT_ERRNO(EINVAL, ioctl(self->fd, _ioctl, &cmd)); \ + cmd.cmd.size = sizeof(struct _struct) + 1; \ + EXPECT_ERRNO(E2BIG, ioctl(self->fd, _ioctl, &cmd)); \ + cmd.cmd.size = sizeof(struct _struct); \ + rc = ioctl(self->fd, _ioctl, &cmd); \ + old_errno = errno; \ + cmd.cmd.size = sizeof(struct _struct) + 1; \ + cmd.extra = 0; \ + if (rc) { \ + EXPECT_ERRNO(old_errno, \ + ioctl(self->fd, _ioctl, &cmd)); \ + } else { \ + ASSERT_EQ(0, ioctl(self->fd, _ioctl, &cmd)); \ + } \ + } + + TEST_LENGTH(iommu_destroy, IOMMU_DESTROY); + TEST_LENGTH(iommu_ioas_alloc, IOMMU_IOAS_ALLOC); + TEST_LENGTH(iommu_ioas_iova_ranges, IOMMU_IOAS_IOVA_RANGES); + TEST_LENGTH(iommu_ioas_allow_iovas, IOMMU_IOAS_ALLOW_IOVAS); + TEST_LENGTH(iommu_ioas_map, IOMMU_IOAS_MAP); + TEST_LENGTH(iommu_ioas_copy, IOMMU_IOAS_COPY); + TEST_LENGTH(iommu_ioas_unmap, IOMMU_IOAS_UNMAP); + TEST_LENGTH(iommu_option, IOMMU_OPTION); + TEST_LENGTH(iommu_vfio_ioas, IOMMU_VFIO_IOAS); +#undef TEST_LENGTH +} + +TEST_F(iommufd, cmd_ex_fail) +{ + struct { + struct iommu_destroy cmd; + __u64 future; + } cmd = { .cmd = { .size = sizeof(cmd), .id = 0 } }; + + /* object id is invalid and command is longer */ + EXPECT_ERRNO(ENOENT, ioctl(self->fd, IOMMU_DESTROY, &cmd)); + /* future area is non-zero */ + cmd.future = 1; + EXPECT_ERRNO(E2BIG, ioctl(self->fd, IOMMU_DESTROY, &cmd)); + /* Original command "works" */ + cmd.cmd.size = sizeof(cmd.cmd); + EXPECT_ERRNO(ENOENT, ioctl(self->fd, IOMMU_DESTROY, &cmd)); + /* Short command fails */ + cmd.cmd.size = sizeof(cmd.cmd) - 1; + EXPECT_ERRNO(EINVAL, ioctl(self->fd, IOMMU_DESTROY, &cmd)); +} + +TEST_F(iommufd, global_options) +{ + struct iommu_option cmd = { + .size = sizeof(cmd), + .option_id = IOMMU_OPTION_RLIMIT_MODE, + .op = IOMMU_OPTION_OP_GET, + .val64 = 1, + }; + + cmd.option_id = IOMMU_OPTION_RLIMIT_MODE; + ASSERT_EQ(0, ioctl(self->fd, IOMMU_OPTION, &cmd)); + ASSERT_EQ(0, cmd.val64); + + /* This requires root */ + cmd.op = IOMMU_OPTION_OP_SET; + cmd.val64 = 1; + ASSERT_EQ(0, ioctl(self->fd, IOMMU_OPTION, &cmd)); + cmd.val64 = 2; + EXPECT_ERRNO(EINVAL, ioctl(self->fd, IOMMU_OPTION, &cmd)); + + cmd.op = IOMMU_OPTION_OP_GET; + ASSERT_EQ(0, ioctl(self->fd, IOMMU_OPTION, &cmd)); + ASSERT_EQ(1, cmd.val64); + + cmd.op = IOMMU_OPTION_OP_SET; + cmd.val64 = 0; + ASSERT_EQ(0, ioctl(self->fd, IOMMU_OPTION, &cmd)); + + cmd.op = IOMMU_OPTION_OP_GET; + cmd.option_id = IOMMU_OPTION_HUGE_PAGES; + EXPECT_ERRNO(ENOENT, ioctl(self->fd, IOMMU_OPTION, &cmd)); + cmd.op = IOMMU_OPTION_OP_SET; + EXPECT_ERRNO(ENOENT, ioctl(self->fd, IOMMU_OPTION, &cmd)); +} + +FIXTURE(iommufd_ioas) +{ + int fd; + uint32_t ioas_id; + uint32_t domain_id; + uint64_t base_iova; +}; + +FIXTURE_VARIANT(iommufd_ioas) +{ + unsigned int mock_domains; + unsigned int memory_limit; +}; + +FIXTURE_SETUP(iommufd_ioas) +{ + unsigned int i; + + + self->fd = open("/dev/iommu", O_RDWR); + ASSERT_NE(-1, self->fd); + test_ioctl_ioas_alloc(&self->ioas_id); + + if (!variant->memory_limit) { + test_ioctl_set_default_memory_limit(); + } else { + test_ioctl_set_temp_memory_limit(variant->memory_limit); + } + + for (i = 0; i != variant->mock_domains; i++) { + test_cmd_mock_domain(self->ioas_id, NULL, &self->domain_id); + self->base_iova = MOCK_APERTURE_START; + } +} + +FIXTURE_TEARDOWN(iommufd_ioas) +{ + test_ioctl_set_default_memory_limit(); + teardown_iommufd(self->fd, _metadata); +} + +FIXTURE_VARIANT_ADD(iommufd_ioas, no_domain) +{ +}; + +FIXTURE_VARIANT_ADD(iommufd_ioas, mock_domain) +{ + .mock_domains = 1, +}; + +FIXTURE_VARIANT_ADD(iommufd_ioas, two_mock_domain) +{ + .mock_domains = 2, +}; + +FIXTURE_VARIANT_ADD(iommufd_ioas, mock_domain_limit) +{ + .mock_domains = 1, + .memory_limit = 16, +}; + +TEST_F(iommufd_ioas, ioas_auto_destroy) +{ +} + +TEST_F(iommufd_ioas, ioas_destroy) +{ + if (self->domain_id) { + /* IOAS cannot be freed while a domain is on it */ + EXPECT_ERRNO(EBUSY, + _test_ioctl_destroy(self->fd, self->ioas_id)); + } else { + /* Can allocate and manually free an IOAS table */ + test_ioctl_destroy(self->ioas_id); + } +} + +TEST_F(iommufd_ioas, ioas_area_destroy) +{ + /* Adding an area does not change ability to destroy */ + test_ioctl_ioas_map_fixed(buffer, PAGE_SIZE, self->base_iova); + if (self->domain_id) + EXPECT_ERRNO(EBUSY, + _test_ioctl_destroy(self->fd, self->ioas_id)); + else + test_ioctl_destroy(self->ioas_id); +} + +TEST_F(iommufd_ioas, ioas_area_auto_destroy) +{ + int i; + + /* Can allocate and automatically free an IOAS table with many areas */ + for (i = 0; i != 10; i++) { + test_ioctl_ioas_map_fixed(buffer, PAGE_SIZE, + self->base_iova + i * PAGE_SIZE); + } +} + +TEST_F(iommufd_ioas, area) +{ + int i; + + /* Unmap fails if nothing is mapped */ + for (i = 0; i != 10; i++) + test_err_ioctl_ioas_unmap(ENOENT, i * PAGE_SIZE, PAGE_SIZE); + + /* Unmap works */ + for (i = 0; i != 10; i++) + test_ioctl_ioas_map_fixed(buffer, PAGE_SIZE, + self->base_iova + i * PAGE_SIZE); + for (i = 0; i != 10; i++) + test_ioctl_ioas_unmap(self->base_iova + i * PAGE_SIZE, + PAGE_SIZE); + + /* Split fails */ + test_ioctl_ioas_map_fixed(buffer, PAGE_SIZE * 2, + self->base_iova + 16 * PAGE_SIZE); + test_err_ioctl_ioas_unmap(ENOENT, self->base_iova + 16 * PAGE_SIZE, + PAGE_SIZE); + test_err_ioctl_ioas_unmap(ENOENT, self->base_iova + 17 * PAGE_SIZE, + PAGE_SIZE); + + /* Over map fails */ + test_err_ioctl_ioas_map_fixed(EEXIST, buffer, PAGE_SIZE * 2, + self->base_iova + 16 * PAGE_SIZE); + test_err_ioctl_ioas_map_fixed(EEXIST, buffer, PAGE_SIZE, + self->base_iova + 16 * PAGE_SIZE); + test_err_ioctl_ioas_map_fixed(EEXIST, buffer, PAGE_SIZE, + self->base_iova + 17 * PAGE_SIZE); + test_err_ioctl_ioas_map_fixed(EEXIST, buffer, PAGE_SIZE * 2, + self->base_iova + 15 * PAGE_SIZE); + test_err_ioctl_ioas_map_fixed(EEXIST, buffer, PAGE_SIZE * 3, + self->base_iova + 15 * PAGE_SIZE); + + /* unmap all works */ + test_ioctl_ioas_unmap(0, UINT64_MAX); + + /* Unmap all succeeds on an empty IOAS */ + test_ioctl_ioas_unmap(0, UINT64_MAX); +} + +TEST_F(iommufd_ioas, unmap_fully_contained_areas) +{ + uint64_t unmap_len; + int i; + + /* Give no_domain some space to rewind base_iova */ + self->base_iova += 4 * PAGE_SIZE; + + for (i = 0; i != 4; i++) + test_ioctl_ioas_map_fixed(buffer, 8 * PAGE_SIZE, + self->base_iova + i * 16 * PAGE_SIZE); + + /* Unmap not fully contained area doesn't work */ + test_err_ioctl_ioas_unmap(ENOENT, self->base_iova - 4 * PAGE_SIZE, + 8 * PAGE_SIZE); + test_err_ioctl_ioas_unmap(ENOENT, + self->base_iova + 3 * 16 * PAGE_SIZE + + 8 * PAGE_SIZE - 4 * PAGE_SIZE, + 8 * PAGE_SIZE); + + /* Unmap fully contained areas works */ + ASSERT_EQ(0, _test_ioctl_ioas_unmap(self->fd, self->ioas_id, + self->base_iova - 4 * PAGE_SIZE, + 3 * 16 * PAGE_SIZE + 8 * PAGE_SIZE + + 4 * PAGE_SIZE, + &unmap_len)); + ASSERT_EQ(32 * PAGE_SIZE, unmap_len); +} + +TEST_F(iommufd_ioas, area_auto_iova) +{ + struct iommu_test_cmd test_cmd = { + .size = sizeof(test_cmd), + .op = IOMMU_TEST_OP_ADD_RESERVED, + .id = self->ioas_id, + .add_reserved = { .start = PAGE_SIZE * 4, + .length = PAGE_SIZE * 100 }, + }; + struct iommu_iova_range ranges[1] = {}; + struct iommu_ioas_allow_iovas allow_cmd = { + .size = sizeof(allow_cmd), + .ioas_id = self->ioas_id, + .num_iovas = 1, + .allowed_iovas = (uintptr_t)ranges, + }; + __u64 iovas[10]; + int i; + + /* Simple 4k pages */ + for (i = 0; i != 10; i++) + test_ioctl_ioas_map(buffer, PAGE_SIZE, &iovas[i]); + for (i = 0; i != 10; i++) + test_ioctl_ioas_unmap(iovas[i], PAGE_SIZE); + + /* Kernel automatically aligns IOVAs properly */ + for (i = 0; i != 10; i++) { + size_t length = PAGE_SIZE * (i + 1); + + if (self->domain_id) { + test_ioctl_ioas_map(buffer, length, &iovas[i]); + } else { + test_ioctl_ioas_map((void *)(1UL << 31), length, + &iovas[i]); + } + EXPECT_EQ(0, iovas[i] % (1UL << (ffs(length) - 1))); + } + for (i = 0; i != 10; i++) + test_ioctl_ioas_unmap(iovas[i], PAGE_SIZE * (i + 1)); + + /* Avoids a reserved region */ + ASSERT_EQ(0, + ioctl(self->fd, _IOMMU_TEST_CMD(IOMMU_TEST_OP_ADD_RESERVED), + &test_cmd)); + for (i = 0; i != 10; i++) { + size_t length = PAGE_SIZE * (i + 1); + + test_ioctl_ioas_map(buffer, length, &iovas[i]); + EXPECT_EQ(0, iovas[i] % (1UL << (ffs(length) - 1))); + EXPECT_EQ(false, + iovas[i] > test_cmd.add_reserved.start && + iovas[i] < + test_cmd.add_reserved.start + + test_cmd.add_reserved.length); + } + for (i = 0; i != 10; i++) + test_ioctl_ioas_unmap(iovas[i], PAGE_SIZE * (i + 1)); + + /* Allowed region intersects with a reserved region */ + ranges[0].start = PAGE_SIZE; + ranges[0].last = PAGE_SIZE * 600; + EXPECT_ERRNO(EADDRINUSE, + ioctl(self->fd, IOMMU_IOAS_ALLOW_IOVAS, &allow_cmd)); + + /* Allocate from an allowed region */ + if (self->domain_id) { + ranges[0].start = MOCK_APERTURE_START + PAGE_SIZE; + ranges[0].last = MOCK_APERTURE_START + PAGE_SIZE * 600 - 1; + } else { + ranges[0].start = PAGE_SIZE * 200; + ranges[0].last = PAGE_SIZE * 600 - 1; + } + ASSERT_EQ(0, ioctl(self->fd, IOMMU_IOAS_ALLOW_IOVAS, &allow_cmd)); + for (i = 0; i != 10; i++) { + size_t length = PAGE_SIZE * (i + 1); + + test_ioctl_ioas_map(buffer, length, &iovas[i]); + EXPECT_EQ(0, iovas[i] % (1UL << (ffs(length) - 1))); + EXPECT_EQ(true, iovas[i] >= ranges[0].start); + EXPECT_EQ(true, iovas[i] <= ranges[0].last); + EXPECT_EQ(true, iovas[i] + length > ranges[0].start); + EXPECT_EQ(true, iovas[i] + length <= ranges[0].last + 1); + } + for (i = 0; i != 10; i++) + test_ioctl_ioas_unmap(iovas[i], PAGE_SIZE * (i + 1)); +} + +TEST_F(iommufd_ioas, area_allowed) +{ + struct iommu_test_cmd test_cmd = { + .size = sizeof(test_cmd), + .op = IOMMU_TEST_OP_ADD_RESERVED, + .id = self->ioas_id, + .add_reserved = { .start = PAGE_SIZE * 4, + .length = PAGE_SIZE * 100 }, + }; + struct iommu_iova_range ranges[1] = {}; + struct iommu_ioas_allow_iovas allow_cmd = { + .size = sizeof(allow_cmd), + .ioas_id = self->ioas_id, + .num_iovas = 1, + .allowed_iovas = (uintptr_t)ranges, + }; + + /* Reserved intersects an allowed */ + allow_cmd.num_iovas = 1; + ranges[0].start = self->base_iova; + ranges[0].last = ranges[0].start + PAGE_SIZE * 600; + ASSERT_EQ(0, ioctl(self->fd, IOMMU_IOAS_ALLOW_IOVAS, &allow_cmd)); + test_cmd.add_reserved.start = ranges[0].start + PAGE_SIZE; + test_cmd.add_reserved.length = PAGE_SIZE; + EXPECT_ERRNO(EADDRINUSE, + ioctl(self->fd, + _IOMMU_TEST_CMD(IOMMU_TEST_OP_ADD_RESERVED), + &test_cmd)); + allow_cmd.num_iovas = 0; + ASSERT_EQ(0, ioctl(self->fd, IOMMU_IOAS_ALLOW_IOVAS, &allow_cmd)); + + /* Allowed intersects a reserved */ + ASSERT_EQ(0, + ioctl(self->fd, _IOMMU_TEST_CMD(IOMMU_TEST_OP_ADD_RESERVED), + &test_cmd)); + allow_cmd.num_iovas = 1; + ranges[0].start = self->base_iova; + ranges[0].last = ranges[0].start + PAGE_SIZE * 600; + EXPECT_ERRNO(EADDRINUSE, + ioctl(self->fd, IOMMU_IOAS_ALLOW_IOVAS, &allow_cmd)); +} + +TEST_F(iommufd_ioas, copy_area) +{ + struct iommu_ioas_copy copy_cmd = { + .size = sizeof(copy_cmd), + .flags = IOMMU_IOAS_MAP_FIXED_IOVA, + .dst_ioas_id = self->ioas_id, + .src_ioas_id = self->ioas_id, + .length = PAGE_SIZE, + }; + + test_ioctl_ioas_map_fixed(buffer, PAGE_SIZE, self->base_iova); + + /* Copy inside a single IOAS */ + copy_cmd.src_iova = self->base_iova; + copy_cmd.dst_iova = self->base_iova + PAGE_SIZE; + ASSERT_EQ(0, ioctl(self->fd, IOMMU_IOAS_COPY, ©_cmd)); + + /* Copy between IOAS's */ + copy_cmd.src_iova = self->base_iova; + copy_cmd.dst_iova = 0; + test_ioctl_ioas_alloc(©_cmd.dst_ioas_id); + ASSERT_EQ(0, ioctl(self->fd, IOMMU_IOAS_COPY, ©_cmd)); +} + +TEST_F(iommufd_ioas, iova_ranges) +{ + struct iommu_test_cmd test_cmd = { + .size = sizeof(test_cmd), + .op = IOMMU_TEST_OP_ADD_RESERVED, + .id = self->ioas_id, + .add_reserved = { .start = PAGE_SIZE, .length = PAGE_SIZE }, + }; + struct iommu_iova_range *ranges = buffer; + struct iommu_ioas_iova_ranges ranges_cmd = { + .size = sizeof(ranges_cmd), + .ioas_id = self->ioas_id, + .num_iovas = BUFFER_SIZE / sizeof(*ranges), + .allowed_iovas = (uintptr_t)ranges, + }; + + /* Range can be read */ + ASSERT_EQ(0, ioctl(self->fd, IOMMU_IOAS_IOVA_RANGES, &ranges_cmd)); + EXPECT_EQ(1, ranges_cmd.num_iovas); + if (!self->domain_id) { + EXPECT_EQ(0, ranges[0].start); + EXPECT_EQ(SIZE_MAX, ranges[0].last); + EXPECT_EQ(1, ranges_cmd.out_iova_alignment); + } else { + EXPECT_EQ(MOCK_APERTURE_START, ranges[0].start); + EXPECT_EQ(MOCK_APERTURE_LAST, ranges[0].last); + EXPECT_EQ(MOCK_PAGE_SIZE, ranges_cmd.out_iova_alignment); + } + + /* Buffer too small */ + memset(ranges, 0, BUFFER_SIZE); + ranges_cmd.num_iovas = 0; + EXPECT_ERRNO(EMSGSIZE, + ioctl(self->fd, IOMMU_IOAS_IOVA_RANGES, &ranges_cmd)); + EXPECT_EQ(1, ranges_cmd.num_iovas); + EXPECT_EQ(0, ranges[0].start); + EXPECT_EQ(0, ranges[0].last); + + /* 2 ranges */ + ASSERT_EQ(0, + ioctl(self->fd, _IOMMU_TEST_CMD(IOMMU_TEST_OP_ADD_RESERVED), + &test_cmd)); + ranges_cmd.num_iovas = BUFFER_SIZE / sizeof(*ranges); + ASSERT_EQ(0, ioctl(self->fd, IOMMU_IOAS_IOVA_RANGES, &ranges_cmd)); + if (!self->domain_id) { + EXPECT_EQ(2, ranges_cmd.num_iovas); + EXPECT_EQ(0, ranges[0].start); + EXPECT_EQ(PAGE_SIZE - 1, ranges[0].last); + EXPECT_EQ(PAGE_SIZE * 2, ranges[1].start); + EXPECT_EQ(SIZE_MAX, ranges[1].last); + } else { + EXPECT_EQ(1, ranges_cmd.num_iovas); + EXPECT_EQ(MOCK_APERTURE_START, ranges[0].start); + EXPECT_EQ(MOCK_APERTURE_LAST, ranges[0].last); + } + + /* Buffer too small */ + memset(ranges, 0, BUFFER_SIZE); + ranges_cmd.num_iovas = 1; + if (!self->domain_id) { + EXPECT_ERRNO(EMSGSIZE, ioctl(self->fd, IOMMU_IOAS_IOVA_RANGES, + &ranges_cmd)); + EXPECT_EQ(2, ranges_cmd.num_iovas); + EXPECT_EQ(0, ranges[0].start); + EXPECT_EQ(PAGE_SIZE - 1, ranges[0].last); + } else { + ASSERT_EQ(0, + ioctl(self->fd, IOMMU_IOAS_IOVA_RANGES, &ranges_cmd)); + EXPECT_EQ(1, ranges_cmd.num_iovas); + EXPECT_EQ(MOCK_APERTURE_START, ranges[0].start); + EXPECT_EQ(MOCK_APERTURE_LAST, ranges[0].last); + } + EXPECT_EQ(0, ranges[1].start); + EXPECT_EQ(0, ranges[1].last); +} + +TEST_F(iommufd_ioas, access_pin) +{ + struct iommu_test_cmd access_cmd = { + .size = sizeof(access_cmd), + .op = IOMMU_TEST_OP_ACCESS_PAGES, + .access_pages = { .iova = MOCK_APERTURE_START, + .length = BUFFER_SIZE, + .uptr = (uintptr_t)buffer }, + }; + struct iommu_test_cmd check_map_cmd = { + .size = sizeof(check_map_cmd), + .op = IOMMU_TEST_OP_MD_CHECK_MAP, + .check_map = { .iova = MOCK_APERTURE_START, + .length = BUFFER_SIZE, + .uptr = (uintptr_t)buffer }, + }; + uint32_t access_pages_id; + unsigned int npages; + + test_cmd_create_access(self->ioas_id, &access_cmd.id, + MOCK_FLAGS_ACCESS_CREATE_NEEDS_PIN_PAGES); + + for (npages = 1; npages < BUFFER_SIZE / PAGE_SIZE; npages++) { + uint32_t mock_device_id; + uint32_t mock_hwpt_id; + + access_cmd.access_pages.length = npages * PAGE_SIZE; + + /* Single map/unmap */ + test_ioctl_ioas_map_fixed(buffer, BUFFER_SIZE, + MOCK_APERTURE_START); + ASSERT_EQ(0, ioctl(self->fd, + _IOMMU_TEST_CMD(IOMMU_TEST_OP_ACCESS_PAGES), + &access_cmd)); + test_cmd_destroy_access_pages( + access_cmd.id, + access_cmd.access_pages.out_access_pages_id); + + /* Double user */ + ASSERT_EQ(0, ioctl(self->fd, + _IOMMU_TEST_CMD(IOMMU_TEST_OP_ACCESS_PAGES), + &access_cmd)); + access_pages_id = access_cmd.access_pages.out_access_pages_id; + ASSERT_EQ(0, ioctl(self->fd, + _IOMMU_TEST_CMD(IOMMU_TEST_OP_ACCESS_PAGES), + &access_cmd)); + test_cmd_destroy_access_pages( + access_cmd.id, + access_cmd.access_pages.out_access_pages_id); + test_cmd_destroy_access_pages(access_cmd.id, access_pages_id); + + /* Add/remove a domain with a user */ + ASSERT_EQ(0, ioctl(self->fd, + _IOMMU_TEST_CMD(IOMMU_TEST_OP_ACCESS_PAGES), + &access_cmd)); + test_cmd_mock_domain(self->ioas_id, &mock_device_id, + &mock_hwpt_id); + check_map_cmd.id = mock_hwpt_id; + ASSERT_EQ(0, ioctl(self->fd, + _IOMMU_TEST_CMD(IOMMU_TEST_OP_MD_CHECK_MAP), + &check_map_cmd)); + + test_ioctl_destroy(mock_device_id); + test_ioctl_destroy(mock_hwpt_id); + test_cmd_destroy_access_pages( + access_cmd.id, + access_cmd.access_pages.out_access_pages_id); + + test_ioctl_ioas_unmap(MOCK_APERTURE_START, BUFFER_SIZE); + } + test_cmd_destroy_access(access_cmd.id); +} + +TEST_F(iommufd_ioas, access_pin_unmap) +{ + struct iommu_test_cmd access_pages_cmd = { + .size = sizeof(access_pages_cmd), + .op = IOMMU_TEST_OP_ACCESS_PAGES, + .access_pages = { .iova = MOCK_APERTURE_START, + .length = BUFFER_SIZE, + .uptr = (uintptr_t)buffer }, + }; + + test_cmd_create_access(self->ioas_id, &access_pages_cmd.id, + MOCK_FLAGS_ACCESS_CREATE_NEEDS_PIN_PAGES); + test_ioctl_ioas_map_fixed(buffer, BUFFER_SIZE, MOCK_APERTURE_START); + ASSERT_EQ(0, + ioctl(self->fd, _IOMMU_TEST_CMD(IOMMU_TEST_OP_ACCESS_PAGES), + &access_pages_cmd)); + + /* Trigger the unmap op */ + test_ioctl_ioas_unmap(MOCK_APERTURE_START, BUFFER_SIZE); + + /* kernel removed the item for us */ + test_err_destroy_access_pages( + ENOENT, access_pages_cmd.id, + access_pages_cmd.access_pages.out_access_pages_id); +} + +static void check_access_rw(struct __test_metadata *_metadata, int fd, + unsigned int access_id, uint64_t iova, + unsigned int def_flags) +{ + uint16_t tmp[32]; + struct iommu_test_cmd access_cmd = { + .size = sizeof(access_cmd), + .op = IOMMU_TEST_OP_ACCESS_RW, + .id = access_id, + .access_rw = { .uptr = (uintptr_t)tmp }, + }; + uint16_t *buffer16 = buffer; + unsigned int i; + void *tmp2; + + for (i = 0; i != BUFFER_SIZE / sizeof(*buffer16); i++) + buffer16[i] = rand(); + + for (access_cmd.access_rw.iova = iova + PAGE_SIZE - 50; + access_cmd.access_rw.iova < iova + PAGE_SIZE + 50; + access_cmd.access_rw.iova++) { + for (access_cmd.access_rw.length = 1; + access_cmd.access_rw.length < sizeof(tmp); + access_cmd.access_rw.length++) { + access_cmd.access_rw.flags = def_flags; + ASSERT_EQ(0, ioctl(fd, + _IOMMU_TEST_CMD( + IOMMU_TEST_OP_ACCESS_RW), + &access_cmd)); + ASSERT_EQ(0, + memcmp(buffer + (access_cmd.access_rw.iova - + iova), + tmp, access_cmd.access_rw.length)); + + for (i = 0; i != ARRAY_SIZE(tmp); i++) + tmp[i] = rand(); + access_cmd.access_rw.flags = def_flags | + MOCK_ACCESS_RW_WRITE; + ASSERT_EQ(0, ioctl(fd, + _IOMMU_TEST_CMD( + IOMMU_TEST_OP_ACCESS_RW), + &access_cmd)); + ASSERT_EQ(0, + memcmp(buffer + (access_cmd.access_rw.iova - + iova), + tmp, access_cmd.access_rw.length)); + } + } + + /* Multi-page test */ + tmp2 = malloc(BUFFER_SIZE); + ASSERT_NE(NULL, tmp2); + access_cmd.access_rw.iova = iova; + access_cmd.access_rw.length = BUFFER_SIZE; + access_cmd.access_rw.flags = def_flags; + access_cmd.access_rw.uptr = (uintptr_t)tmp2; + ASSERT_EQ(0, ioctl(fd, _IOMMU_TEST_CMD(IOMMU_TEST_OP_ACCESS_RW), + &access_cmd)); + ASSERT_EQ(0, memcmp(buffer, tmp2, access_cmd.access_rw.length)); + free(tmp2); +} + +TEST_F(iommufd_ioas, access_rw) +{ + __u32 access_id; + __u64 iova; + + test_cmd_create_access(self->ioas_id, &access_id, 0); + test_ioctl_ioas_map(buffer, BUFFER_SIZE, &iova); + check_access_rw(_metadata, self->fd, access_id, iova, 0); + check_access_rw(_metadata, self->fd, access_id, iova, + MOCK_ACCESS_RW_SLOW_PATH); + test_ioctl_ioas_unmap(iova, BUFFER_SIZE); + test_cmd_destroy_access(access_id); +} + +TEST_F(iommufd_ioas, access_rw_unaligned) +{ + __u32 access_id; + __u64 iova; + + test_cmd_create_access(self->ioas_id, &access_id, 0); + + /* Unaligned pages */ + iova = self->base_iova + MOCK_PAGE_SIZE; + test_ioctl_ioas_map_fixed(buffer, BUFFER_SIZE, iova); + check_access_rw(_metadata, self->fd, access_id, iova, 0); + test_ioctl_ioas_unmap(iova, BUFFER_SIZE); + test_cmd_destroy_access(access_id); +} + +TEST_F(iommufd_ioas, fork_gone) +{ + __u32 access_id; + pid_t child; + + test_cmd_create_access(self->ioas_id, &access_id, 0); + + /* Create a mapping with a different mm */ + child = fork(); + if (!child) { + test_ioctl_ioas_map_fixed(buffer, BUFFER_SIZE, + MOCK_APERTURE_START); + exit(0); + } + ASSERT_NE(-1, child); + ASSERT_EQ(child, waitpid(child, NULL, 0)); + + if (self->domain_id) { + /* + * If a domain already existed then everything was pinned within + * the fork, so this copies from one domain to another. + */ + test_cmd_mock_domain(self->ioas_id, NULL, NULL); + check_access_rw(_metadata, self->fd, access_id, + MOCK_APERTURE_START, 0); + + } else { + /* + * Otherwise we need to actually pin pages which can't happen + * since the fork is gone. + */ + test_err_mock_domain(EFAULT, self->ioas_id, NULL, NULL); + } + + test_cmd_destroy_access(access_id); +} + +TEST_F(iommufd_ioas, fork_present) +{ + __u32 access_id; + int pipefds[2]; + uint64_t tmp; + pid_t child; + int efd; + + test_cmd_create_access(self->ioas_id, &access_id, 0); + + ASSERT_EQ(0, pipe2(pipefds, O_CLOEXEC)); + efd = eventfd(0, EFD_CLOEXEC); + ASSERT_NE(-1, efd); + + /* Create a mapping with a different mm */ + child = fork(); + if (!child) { + __u64 iova; + uint64_t one = 1; + + close(pipefds[1]); + test_ioctl_ioas_map_fixed(buffer, BUFFER_SIZE, + MOCK_APERTURE_START); + if (write(efd, &one, sizeof(one)) != sizeof(one)) + exit(100); + if (read(pipefds[0], &iova, 1) != 1) + exit(100); + exit(0); + } + close(pipefds[0]); + ASSERT_NE(-1, child); + ASSERT_EQ(8, read(efd, &tmp, sizeof(tmp))); + + /* Read pages from the remote process */ + test_cmd_mock_domain(self->ioas_id, NULL, NULL); + check_access_rw(_metadata, self->fd, access_id, MOCK_APERTURE_START, 0); + + ASSERT_EQ(0, close(pipefds[1])); + ASSERT_EQ(child, waitpid(child, NULL, 0)); + + test_cmd_destroy_access(access_id); +} + +TEST_F(iommufd_ioas, ioas_option_huge_pages) +{ + struct iommu_option cmd = { + .size = sizeof(cmd), + .option_id = IOMMU_OPTION_HUGE_PAGES, + .op = IOMMU_OPTION_OP_GET, + .val64 = 3, + .object_id = self->ioas_id, + }; + + ASSERT_EQ(0, ioctl(self->fd, IOMMU_OPTION, &cmd)); + ASSERT_EQ(1, cmd.val64); + + cmd.op = IOMMU_OPTION_OP_SET; + cmd.val64 = 0; + ASSERT_EQ(0, ioctl(self->fd, IOMMU_OPTION, &cmd)); + + cmd.op = IOMMU_OPTION_OP_GET; + cmd.val64 = 3; + ASSERT_EQ(0, ioctl(self->fd, IOMMU_OPTION, &cmd)); + ASSERT_EQ(0, cmd.val64); + + cmd.op = IOMMU_OPTION_OP_SET; + cmd.val64 = 2; + EXPECT_ERRNO(EINVAL, ioctl(self->fd, IOMMU_OPTION, &cmd)); + + cmd.op = IOMMU_OPTION_OP_SET; + cmd.val64 = 1; + ASSERT_EQ(0, ioctl(self->fd, IOMMU_OPTION, &cmd)); +} + +TEST_F(iommufd_ioas, ioas_iova_alloc) +{ + unsigned int length; + __u64 iova; + + for (length = 1; length != PAGE_SIZE * 2; length++) { + if (variant->mock_domains && (length % MOCK_PAGE_SIZE)) { + test_err_ioctl_ioas_map(EINVAL, buffer, length, &iova); + } else { + test_ioctl_ioas_map(buffer, length, &iova); + test_ioctl_ioas_unmap(iova, length); + } + } +} + +TEST_F(iommufd_ioas, ioas_align_change) +{ + struct iommu_option cmd = { + .size = sizeof(cmd), + .option_id = IOMMU_OPTION_HUGE_PAGES, + .op = IOMMU_OPTION_OP_SET, + .object_id = self->ioas_id, + /* 0 means everything must be aligned to PAGE_SIZE */ + .val64 = 0, + }; + + /* + * We cannot upgrade the alignment using OPTION_HUGE_PAGES when a domain + * and map are present. + */ + if (variant->mock_domains) + return; + + /* + * We can upgrade to PAGE_SIZE alignment when things are aligned right + */ + test_ioctl_ioas_map_fixed(buffer, PAGE_SIZE, MOCK_APERTURE_START); + ASSERT_EQ(0, ioctl(self->fd, IOMMU_OPTION, &cmd)); + + /* Misalignment is rejected at map time */ + test_err_ioctl_ioas_map_fixed(EINVAL, buffer + MOCK_PAGE_SIZE, + PAGE_SIZE, + MOCK_APERTURE_START + PAGE_SIZE); + ASSERT_EQ(0, ioctl(self->fd, IOMMU_OPTION, &cmd)); + + /* Reduce alignment */ + cmd.val64 = 1; + ASSERT_EQ(0, ioctl(self->fd, IOMMU_OPTION, &cmd)); + + /* Confirm misalignment is rejected during alignment upgrade */ + test_ioctl_ioas_map_fixed(buffer + MOCK_PAGE_SIZE, PAGE_SIZE, + MOCK_APERTURE_START + PAGE_SIZE); + cmd.val64 = 0; + EXPECT_ERRNO(EADDRINUSE, ioctl(self->fd, IOMMU_OPTION, &cmd)); + + test_ioctl_ioas_unmap(MOCK_APERTURE_START + PAGE_SIZE, PAGE_SIZE); + test_ioctl_ioas_unmap(MOCK_APERTURE_START, PAGE_SIZE); +} + +TEST_F(iommufd_ioas, copy_sweep) +{ + struct iommu_ioas_copy copy_cmd = { + .size = sizeof(copy_cmd), + .flags = IOMMU_IOAS_MAP_FIXED_IOVA, + .src_ioas_id = self->ioas_id, + .dst_iova = MOCK_APERTURE_START, + .length = MOCK_PAGE_SIZE, + }; + unsigned int dst_ioas_id; + uint64_t last_iova; + uint64_t iova; + + test_ioctl_ioas_alloc(&dst_ioas_id); + copy_cmd.dst_ioas_id = dst_ioas_id; + + if (variant->mock_domains) + last_iova = MOCK_APERTURE_START + BUFFER_SIZE - 1; + else + last_iova = MOCK_APERTURE_START + BUFFER_SIZE - 2; + + test_ioctl_ioas_map_fixed(buffer, last_iova - MOCK_APERTURE_START + 1, + MOCK_APERTURE_START); + + for (iova = MOCK_APERTURE_START - PAGE_SIZE; iova <= last_iova; + iova += 511) { + copy_cmd.src_iova = iova; + if (iova < MOCK_APERTURE_START || + iova + copy_cmd.length - 1 > last_iova) { + EXPECT_ERRNO(ENOENT, ioctl(self->fd, IOMMU_IOAS_COPY, + ©_cmd)); + } else { + ASSERT_EQ(0, + ioctl(self->fd, IOMMU_IOAS_COPY, ©_cmd)); + test_ioctl_ioas_unmap_id(dst_ioas_id, copy_cmd.dst_iova, + copy_cmd.length); + } + } + + test_ioctl_destroy(dst_ioas_id); +} + +FIXTURE(iommufd_mock_domain) +{ + int fd; + uint32_t ioas_id; + uint32_t domain_id; + uint32_t domain_ids[2]; + int mmap_flags; + size_t mmap_buf_size; +}; + +FIXTURE_VARIANT(iommufd_mock_domain) +{ + unsigned int mock_domains; + bool hugepages; +}; + +FIXTURE_SETUP(iommufd_mock_domain) +{ + unsigned int i; + + self->fd = open("/dev/iommu", O_RDWR); + ASSERT_NE(-1, self->fd); + test_ioctl_ioas_alloc(&self->ioas_id); + + ASSERT_GE(ARRAY_SIZE(self->domain_ids), variant->mock_domains); + + for (i = 0; i != variant->mock_domains; i++) + test_cmd_mock_domain(self->ioas_id, NULL, &self->domain_ids[i]); + self->domain_id = self->domain_ids[0]; + + self->mmap_flags = MAP_SHARED | MAP_ANONYMOUS; + self->mmap_buf_size = PAGE_SIZE * 8; + if (variant->hugepages) { + /* + * MAP_POPULATE will cause the kernel to fail mmap if THPs are + * not available. + */ + self->mmap_flags |= MAP_HUGETLB | MAP_POPULATE; + self->mmap_buf_size = HUGEPAGE_SIZE * 2; + } +} + +FIXTURE_TEARDOWN(iommufd_mock_domain) +{ + teardown_iommufd(self->fd, _metadata); +} + +FIXTURE_VARIANT_ADD(iommufd_mock_domain, one_domain) +{ + .mock_domains = 1, + .hugepages = false, +}; + +FIXTURE_VARIANT_ADD(iommufd_mock_domain, two_domains) +{ + .mock_domains = 2, + .hugepages = false, +}; + +FIXTURE_VARIANT_ADD(iommufd_mock_domain, one_domain_hugepage) +{ + .mock_domains = 1, + .hugepages = true, +}; + +FIXTURE_VARIANT_ADD(iommufd_mock_domain, two_domains_hugepage) +{ + .mock_domains = 2, + .hugepages = true, +}; + +/* Have the kernel check that the user pages made it to the iommu_domain */ +#define check_mock_iova(_ptr, _iova, _length) \ + ({ \ + struct iommu_test_cmd check_map_cmd = { \ + .size = sizeof(check_map_cmd), \ + .op = IOMMU_TEST_OP_MD_CHECK_MAP, \ + .id = self->domain_id, \ + .check_map = { .iova = _iova, \ + .length = _length, \ + .uptr = (uintptr_t)(_ptr) }, \ + }; \ + ASSERT_EQ(0, \ + ioctl(self->fd, \ + _IOMMU_TEST_CMD(IOMMU_TEST_OP_MD_CHECK_MAP), \ + &check_map_cmd)); \ + if (self->domain_ids[1]) { \ + check_map_cmd.id = self->domain_ids[1]; \ + ASSERT_EQ(0, \ + ioctl(self->fd, \ + _IOMMU_TEST_CMD( \ + IOMMU_TEST_OP_MD_CHECK_MAP), \ + &check_map_cmd)); \ + } \ + }) + +TEST_F(iommufd_mock_domain, basic) +{ + size_t buf_size = self->mmap_buf_size; + uint8_t *buf; + __u64 iova; + + /* Simple one page map */ + test_ioctl_ioas_map(buffer, PAGE_SIZE, &iova); + check_mock_iova(buffer, iova, PAGE_SIZE); + + buf = mmap(0, buf_size, PROT_READ | PROT_WRITE, self->mmap_flags, -1, + 0); + ASSERT_NE(MAP_FAILED, buf); + + /* EFAULT half way through mapping */ + ASSERT_EQ(0, munmap(buf + buf_size / 2, buf_size / 2)); + test_err_ioctl_ioas_map(EFAULT, buf, buf_size, &iova); + + /* EFAULT on first page */ + ASSERT_EQ(0, munmap(buf, buf_size / 2)); + test_err_ioctl_ioas_map(EFAULT, buf, buf_size, &iova); +} + +TEST_F(iommufd_mock_domain, ro_unshare) +{ + uint8_t *buf; + __u64 iova; + int fd; + + fd = open("/proc/self/exe", O_RDONLY); + ASSERT_NE(-1, fd); + + buf = mmap(0, PAGE_SIZE, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0); + ASSERT_NE(MAP_FAILED, buf); + close(fd); + + /* + * There have been lots of changes to the "unshare" mechanism in + * get_user_pages(), make sure it works right. The write to the page + * after we map it for reading should not change the assigned PFN. + */ + ASSERT_EQ(0, + _test_ioctl_ioas_map(self->fd, self->ioas_id, buf, PAGE_SIZE, + &iova, IOMMU_IOAS_MAP_READABLE)); + check_mock_iova(buf, iova, PAGE_SIZE); + memset(buf, 1, PAGE_SIZE); + check_mock_iova(buf, iova, PAGE_SIZE); + ASSERT_EQ(0, munmap(buf, PAGE_SIZE)); +} + +TEST_F(iommufd_mock_domain, all_aligns) +{ + size_t test_step = variant->hugepages ? (self->mmap_buf_size / 16) : + MOCK_PAGE_SIZE; + size_t buf_size = self->mmap_buf_size; + unsigned int start; + unsigned int end; + uint8_t *buf; + + buf = mmap(0, buf_size, PROT_READ | PROT_WRITE, self->mmap_flags, -1, + 0); + ASSERT_NE(MAP_FAILED, buf); + check_refs(buf, buf_size, 0); + + /* + * Map every combination of page size and alignment within a big region, + * less for hugepage case as it takes so long to finish. + */ + for (start = 0; start < buf_size; start += test_step) { + if (variant->hugepages) + end = buf_size; + else + end = start + MOCK_PAGE_SIZE; + for (; end < buf_size; end += MOCK_PAGE_SIZE) { + size_t length = end - start; + __u64 iova; + + test_ioctl_ioas_map(buf + start, length, &iova); + check_mock_iova(buf + start, iova, length); + check_refs(buf + start / PAGE_SIZE * PAGE_SIZE, + end / PAGE_SIZE * PAGE_SIZE - + start / PAGE_SIZE * PAGE_SIZE, + 1); + + test_ioctl_ioas_unmap(iova, length); + } + } + check_refs(buf, buf_size, 0); + ASSERT_EQ(0, munmap(buf, buf_size)); +} + +TEST_F(iommufd_mock_domain, all_aligns_copy) +{ + size_t test_step = variant->hugepages ? self->mmap_buf_size / 16 : + MOCK_PAGE_SIZE; + size_t buf_size = self->mmap_buf_size; + unsigned int start; + unsigned int end; + uint8_t *buf; + + buf = mmap(0, buf_size, PROT_READ | PROT_WRITE, self->mmap_flags, -1, + 0); + ASSERT_NE(MAP_FAILED, buf); + check_refs(buf, buf_size, 0); + + /* + * Map every combination of page size and alignment within a big region, + * less for hugepage case as it takes so long to finish. + */ + for (start = 0; start < buf_size; start += test_step) { + if (variant->hugepages) + end = buf_size; + else + end = start + MOCK_PAGE_SIZE; + for (; end < buf_size; end += MOCK_PAGE_SIZE) { + size_t length = end - start; + unsigned int old_id; + uint32_t mock_device_id; + __u64 iova; + + test_ioctl_ioas_map(buf + start, length, &iova); + + /* Add and destroy a domain while the area exists */ + old_id = self->domain_ids[1]; + test_cmd_mock_domain(self->ioas_id, &mock_device_id, + &self->domain_ids[1]); + + check_mock_iova(buf + start, iova, length); + check_refs(buf + start / PAGE_SIZE * PAGE_SIZE, + end / PAGE_SIZE * PAGE_SIZE - + start / PAGE_SIZE * PAGE_SIZE, + 1); + + test_ioctl_destroy(mock_device_id); + test_ioctl_destroy(self->domain_ids[1]); + self->domain_ids[1] = old_id; + + test_ioctl_ioas_unmap(iova, length); + } + } + check_refs(buf, buf_size, 0); + ASSERT_EQ(0, munmap(buf, buf_size)); +} + +TEST_F(iommufd_mock_domain, user_copy) +{ + struct iommu_test_cmd access_cmd = { + .size = sizeof(access_cmd), + .op = IOMMU_TEST_OP_ACCESS_PAGES, + .access_pages = { .length = BUFFER_SIZE, + .uptr = (uintptr_t)buffer }, + }; + struct iommu_ioas_copy copy_cmd = { + .size = sizeof(copy_cmd), + .flags = IOMMU_IOAS_MAP_FIXED_IOVA, + .dst_ioas_id = self->ioas_id, + .dst_iova = MOCK_APERTURE_START, + .length = BUFFER_SIZE, + }; + unsigned int ioas_id; + + /* Pin the pages in an IOAS with no domains then copy to an IOAS with domains */ + test_ioctl_ioas_alloc(&ioas_id); + test_ioctl_ioas_map_id(ioas_id, buffer, BUFFER_SIZE, + ©_cmd.src_iova); + + test_cmd_create_access(ioas_id, &access_cmd.id, + MOCK_FLAGS_ACCESS_CREATE_NEEDS_PIN_PAGES); + + access_cmd.access_pages.iova = copy_cmd.src_iova; + ASSERT_EQ(0, + ioctl(self->fd, _IOMMU_TEST_CMD(IOMMU_TEST_OP_ACCESS_PAGES), + &access_cmd)); + copy_cmd.src_ioas_id = ioas_id; + ASSERT_EQ(0, ioctl(self->fd, IOMMU_IOAS_COPY, ©_cmd)); + check_mock_iova(buffer, MOCK_APERTURE_START, BUFFER_SIZE); + + test_cmd_destroy_access_pages( + access_cmd.id, access_cmd.access_pages.out_access_pages_id); + test_cmd_destroy_access(access_cmd.id) test_ioctl_destroy(ioas_id); + + test_ioctl_destroy(ioas_id); +} + +/* VFIO compatibility IOCTLs */ + +TEST_F(iommufd, simple_ioctls) +{ + ASSERT_EQ(VFIO_API_VERSION, ioctl(self->fd, VFIO_GET_API_VERSION)); + ASSERT_EQ(1, ioctl(self->fd, VFIO_CHECK_EXTENSION, VFIO_TYPE1v2_IOMMU)); +} + +TEST_F(iommufd, unmap_cmd) +{ + struct vfio_iommu_type1_dma_unmap unmap_cmd = { + .iova = MOCK_APERTURE_START, + .size = PAGE_SIZE, + }; + + unmap_cmd.argsz = 1; + EXPECT_ERRNO(EINVAL, ioctl(self->fd, VFIO_IOMMU_UNMAP_DMA, &unmap_cmd)); + + unmap_cmd.argsz = sizeof(unmap_cmd); + unmap_cmd.flags = 1 << 31; + EXPECT_ERRNO(EINVAL, ioctl(self->fd, VFIO_IOMMU_UNMAP_DMA, &unmap_cmd)); + + unmap_cmd.flags = 0; + EXPECT_ERRNO(ENODEV, ioctl(self->fd, VFIO_IOMMU_UNMAP_DMA, &unmap_cmd)); +} + +TEST_F(iommufd, map_cmd) +{ + struct vfio_iommu_type1_dma_map map_cmd = { + .iova = MOCK_APERTURE_START, + .size = PAGE_SIZE, + .vaddr = (__u64)buffer, + }; + + map_cmd.argsz = 1; + EXPECT_ERRNO(EINVAL, ioctl(self->fd, VFIO_IOMMU_MAP_DMA, &map_cmd)); + + map_cmd.argsz = sizeof(map_cmd); + map_cmd.flags = 1 << 31; + EXPECT_ERRNO(EINVAL, ioctl(self->fd, VFIO_IOMMU_MAP_DMA, &map_cmd)); + + /* Requires a domain to be attached */ + map_cmd.flags = VFIO_DMA_MAP_FLAG_READ | VFIO_DMA_MAP_FLAG_WRITE; + EXPECT_ERRNO(ENODEV, ioctl(self->fd, VFIO_IOMMU_MAP_DMA, &map_cmd)); +} + +TEST_F(iommufd, info_cmd) +{ + struct vfio_iommu_type1_info info_cmd = {}; + + /* Invalid argsz */ + info_cmd.argsz = 1; + EXPECT_ERRNO(EINVAL, ioctl(self->fd, VFIO_IOMMU_GET_INFO, &info_cmd)); + + info_cmd.argsz = sizeof(info_cmd); + EXPECT_ERRNO(ENODEV, ioctl(self->fd, VFIO_IOMMU_GET_INFO, &info_cmd)); +} + +TEST_F(iommufd, set_iommu_cmd) +{ + /* Requires a domain to be attached */ + EXPECT_ERRNO(ENODEV, + ioctl(self->fd, VFIO_SET_IOMMU, VFIO_TYPE1v2_IOMMU)); + EXPECT_ERRNO(ENODEV, ioctl(self->fd, VFIO_SET_IOMMU, VFIO_TYPE1_IOMMU)); +} + +TEST_F(iommufd, vfio_ioas) +{ + struct iommu_vfio_ioas vfio_ioas_cmd = { + .size = sizeof(vfio_ioas_cmd), + .op = IOMMU_VFIO_IOAS_GET, + }; + __u32 ioas_id; + + /* ENODEV if there is no compat ioas */ + EXPECT_ERRNO(ENODEV, ioctl(self->fd, IOMMU_VFIO_IOAS, &vfio_ioas_cmd)); + + /* Invalid id for set */ + vfio_ioas_cmd.op = IOMMU_VFIO_IOAS_SET; + EXPECT_ERRNO(ENOENT, ioctl(self->fd, IOMMU_VFIO_IOAS, &vfio_ioas_cmd)); + + /* Valid id for set*/ + test_ioctl_ioas_alloc(&ioas_id); + vfio_ioas_cmd.ioas_id = ioas_id; + ASSERT_EQ(0, ioctl(self->fd, IOMMU_VFIO_IOAS, &vfio_ioas_cmd)); + + /* Same id comes back from get */ + vfio_ioas_cmd.op = IOMMU_VFIO_IOAS_GET; + ASSERT_EQ(0, ioctl(self->fd, IOMMU_VFIO_IOAS, &vfio_ioas_cmd)); + ASSERT_EQ(ioas_id, vfio_ioas_cmd.ioas_id); + + /* Clear works */ + vfio_ioas_cmd.op = IOMMU_VFIO_IOAS_CLEAR; + ASSERT_EQ(0, ioctl(self->fd, IOMMU_VFIO_IOAS, &vfio_ioas_cmd)); + vfio_ioas_cmd.op = IOMMU_VFIO_IOAS_GET; + EXPECT_ERRNO(ENODEV, ioctl(self->fd, IOMMU_VFIO_IOAS, &vfio_ioas_cmd)); +} + +FIXTURE(vfio_compat_mock_domain) +{ + int fd; + uint32_t ioas_id; +}; + +FIXTURE_VARIANT(vfio_compat_mock_domain) +{ + unsigned int version; +}; + +FIXTURE_SETUP(vfio_compat_mock_domain) +{ + struct iommu_vfio_ioas vfio_ioas_cmd = { + .size = sizeof(vfio_ioas_cmd), + .op = IOMMU_VFIO_IOAS_SET, + }; + + self->fd = open("/dev/iommu", O_RDWR); + ASSERT_NE(-1, self->fd); + + /* Create what VFIO would consider a group */ + test_ioctl_ioas_alloc(&self->ioas_id); + test_cmd_mock_domain(self->ioas_id, NULL, NULL); + + /* Attach it to the vfio compat */ + vfio_ioas_cmd.ioas_id = self->ioas_id; + ASSERT_EQ(0, ioctl(self->fd, IOMMU_VFIO_IOAS, &vfio_ioas_cmd)); + ASSERT_EQ(0, ioctl(self->fd, VFIO_SET_IOMMU, variant->version)); +} + +FIXTURE_TEARDOWN(vfio_compat_mock_domain) +{ + teardown_iommufd(self->fd, _metadata); +} + +FIXTURE_VARIANT_ADD(vfio_compat_mock_domain, Ver1v2) +{ + .version = VFIO_TYPE1v2_IOMMU, +}; + +FIXTURE_VARIANT_ADD(vfio_compat_mock_domain, Ver1v0) +{ + .version = VFIO_TYPE1_IOMMU, +}; + +TEST_F(vfio_compat_mock_domain, simple_close) +{ +} + +TEST_F(vfio_compat_mock_domain, option_huge_pages) +{ + struct iommu_option cmd = { + .size = sizeof(cmd), + .option_id = IOMMU_OPTION_HUGE_PAGES, + .op = IOMMU_OPTION_OP_GET, + .val64 = 3, + .object_id = self->ioas_id, + }; + + ASSERT_EQ(0, ioctl(self->fd, IOMMU_OPTION, &cmd)); + if (variant->version == VFIO_TYPE1_IOMMU) { + ASSERT_EQ(0, cmd.val64); + } else { + ASSERT_EQ(1, cmd.val64); + } +} + +/* + * Execute an ioctl command stored in buffer and check that the result does not + * overflow memory. + */ +static bool is_filled(const void *buf, uint8_t c, size_t len) +{ + const uint8_t *cbuf = buf; + + for (; len; cbuf++, len--) + if (*cbuf != c) + return false; + return true; +} + +#define ioctl_check_buf(fd, cmd) \ + ({ \ + size_t _cmd_len = *(__u32 *)buffer; \ + \ + memset(buffer + _cmd_len, 0xAA, BUFFER_SIZE - _cmd_len); \ + ASSERT_EQ(0, ioctl(fd, cmd, buffer)); \ + ASSERT_EQ(true, is_filled(buffer + _cmd_len, 0xAA, \ + BUFFER_SIZE - _cmd_len)); \ + }) + +static void check_vfio_info_cap_chain(struct __test_metadata *_metadata, + struct vfio_iommu_type1_info *info_cmd) +{ + const struct vfio_info_cap_header *cap; + + ASSERT_GE(info_cmd->argsz, info_cmd->cap_offset + sizeof(*cap)); + cap = buffer + info_cmd->cap_offset; + while (true) { + size_t cap_size; + + if (cap->next) + cap_size = (buffer + cap->next) - (void *)cap; + else + cap_size = (buffer + info_cmd->argsz) - (void *)cap; + + switch (cap->id) { + case VFIO_IOMMU_TYPE1_INFO_CAP_IOVA_RANGE: { + struct vfio_iommu_type1_info_cap_iova_range *data = + (void *)cap; + + ASSERT_EQ(1, data->header.version); + ASSERT_EQ(1, data->nr_iovas); + EXPECT_EQ(MOCK_APERTURE_START, + data->iova_ranges[0].start); + EXPECT_EQ(MOCK_APERTURE_LAST, data->iova_ranges[0].end); + break; + } + case VFIO_IOMMU_TYPE1_INFO_DMA_AVAIL: { + struct vfio_iommu_type1_info_dma_avail *data = + (void *)cap; + + ASSERT_EQ(1, data->header.version); + ASSERT_EQ(sizeof(*data), cap_size); + break; + } + default: + ASSERT_EQ(false, true); + break; + } + if (!cap->next) + break; + + ASSERT_GE(info_cmd->argsz, cap->next + sizeof(*cap)); + ASSERT_GE(buffer + cap->next, (void *)cap); + cap = buffer + cap->next; + } +} + +TEST_F(vfio_compat_mock_domain, get_info) +{ + struct vfio_iommu_type1_info *info_cmd = buffer; + unsigned int i; + size_t caplen; + + /* Pre-cap ABI */ + *info_cmd = (struct vfio_iommu_type1_info){ + .argsz = offsetof(struct vfio_iommu_type1_info, cap_offset), + }; + ioctl_check_buf(self->fd, VFIO_IOMMU_GET_INFO); + ASSERT_NE(0, info_cmd->iova_pgsizes); + ASSERT_EQ(VFIO_IOMMU_INFO_PGSIZES | VFIO_IOMMU_INFO_CAPS, + info_cmd->flags); + + /* Read the cap chain size */ + *info_cmd = (struct vfio_iommu_type1_info){ + .argsz = sizeof(*info_cmd), + }; + ioctl_check_buf(self->fd, VFIO_IOMMU_GET_INFO); + ASSERT_NE(0, info_cmd->iova_pgsizes); + ASSERT_EQ(VFIO_IOMMU_INFO_PGSIZES | VFIO_IOMMU_INFO_CAPS, + info_cmd->flags); + ASSERT_EQ(0, info_cmd->cap_offset); + ASSERT_LT(sizeof(*info_cmd), info_cmd->argsz); + + /* Read the caps, kernel should never create a corrupted caps */ + caplen = info_cmd->argsz; + for (i = sizeof(*info_cmd); i < caplen; i++) { + *info_cmd = (struct vfio_iommu_type1_info){ + .argsz = i, + }; + ioctl_check_buf(self->fd, VFIO_IOMMU_GET_INFO); + ASSERT_EQ(VFIO_IOMMU_INFO_PGSIZES | VFIO_IOMMU_INFO_CAPS, + info_cmd->flags); + if (!info_cmd->cap_offset) + continue; + check_vfio_info_cap_chain(_metadata, info_cmd); + } +} + +static void shuffle_array(unsigned long *array, size_t nelms) +{ + unsigned int i; + + /* Shuffle */ + for (i = 0; i != nelms; i++) { + unsigned long tmp = array[i]; + unsigned int other = rand() % (nelms - i); + + array[i] = array[other]; + array[other] = tmp; + } +} + +TEST_F(vfio_compat_mock_domain, map) +{ + struct vfio_iommu_type1_dma_map map_cmd = { + .argsz = sizeof(map_cmd), + .flags = VFIO_DMA_MAP_FLAG_READ | VFIO_DMA_MAP_FLAG_WRITE, + .vaddr = (uintptr_t)buffer, + .size = BUFFER_SIZE, + .iova = MOCK_APERTURE_START, + }; + struct vfio_iommu_type1_dma_unmap unmap_cmd = { + .argsz = sizeof(unmap_cmd), + .size = BUFFER_SIZE, + .iova = MOCK_APERTURE_START, + }; + unsigned long pages_iova[BUFFER_SIZE / PAGE_SIZE]; + unsigned int i; + + /* Simple map/unmap */ + ASSERT_EQ(0, ioctl(self->fd, VFIO_IOMMU_MAP_DMA, &map_cmd)); + ASSERT_EQ(0, ioctl(self->fd, VFIO_IOMMU_UNMAP_DMA, &unmap_cmd)); + ASSERT_EQ(BUFFER_SIZE, unmap_cmd.size); + + /* UNMAP_FLAG_ALL requres 0 iova/size */ + ASSERT_EQ(0, ioctl(self->fd, VFIO_IOMMU_MAP_DMA, &map_cmd)); + unmap_cmd.flags = VFIO_DMA_UNMAP_FLAG_ALL; + EXPECT_ERRNO(EINVAL, ioctl(self->fd, VFIO_IOMMU_UNMAP_DMA, &unmap_cmd)); + + unmap_cmd.iova = 0; + unmap_cmd.size = 0; + ASSERT_EQ(0, ioctl(self->fd, VFIO_IOMMU_UNMAP_DMA, &unmap_cmd)); + ASSERT_EQ(BUFFER_SIZE, unmap_cmd.size); + + /* Small pages */ + for (i = 0; i != ARRAY_SIZE(pages_iova); i++) { + map_cmd.iova = pages_iova[i] = + MOCK_APERTURE_START + i * PAGE_SIZE; + map_cmd.vaddr = (uintptr_t)buffer + i * PAGE_SIZE; + map_cmd.size = PAGE_SIZE; + ASSERT_EQ(0, ioctl(self->fd, VFIO_IOMMU_MAP_DMA, &map_cmd)); + } + shuffle_array(pages_iova, ARRAY_SIZE(pages_iova)); + + unmap_cmd.flags = 0; + unmap_cmd.size = PAGE_SIZE; + for (i = 0; i != ARRAY_SIZE(pages_iova); i++) { + unmap_cmd.iova = pages_iova[i]; + ASSERT_EQ(0, ioctl(self->fd, VFIO_IOMMU_UNMAP_DMA, &unmap_cmd)); + } +} + +TEST_F(vfio_compat_mock_domain, huge_map) +{ + size_t buf_size = HUGEPAGE_SIZE * 2; + struct vfio_iommu_type1_dma_map map_cmd = { + .argsz = sizeof(map_cmd), + .flags = VFIO_DMA_MAP_FLAG_READ | VFIO_DMA_MAP_FLAG_WRITE, + .size = buf_size, + .iova = MOCK_APERTURE_START, + }; + struct vfio_iommu_type1_dma_unmap unmap_cmd = { + .argsz = sizeof(unmap_cmd), + }; + unsigned long pages_iova[16]; + unsigned int i; + void *buf; + + /* Test huge pages and splitting */ + buf = mmap(0, buf_size, PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_ANONYMOUS | MAP_HUGETLB | MAP_POPULATE, -1, + 0); + ASSERT_NE(MAP_FAILED, buf); + map_cmd.vaddr = (uintptr_t)buf; + ASSERT_EQ(0, ioctl(self->fd, VFIO_IOMMU_MAP_DMA, &map_cmd)); + + unmap_cmd.size = buf_size / ARRAY_SIZE(pages_iova); + for (i = 0; i != ARRAY_SIZE(pages_iova); i++) + pages_iova[i] = MOCK_APERTURE_START + (i * unmap_cmd.size); + shuffle_array(pages_iova, ARRAY_SIZE(pages_iova)); + + /* type1 mode can cut up larger mappings, type1v2 always fails */ + for (i = 0; i != ARRAY_SIZE(pages_iova); i++) { + unmap_cmd.iova = pages_iova[i]; + unmap_cmd.size = buf_size / ARRAY_SIZE(pages_iova); + if (variant->version == VFIO_TYPE1_IOMMU) { + ASSERT_EQ(0, ioctl(self->fd, VFIO_IOMMU_UNMAP_DMA, + &unmap_cmd)); + } else { + EXPECT_ERRNO(ENOENT, + ioctl(self->fd, VFIO_IOMMU_UNMAP_DMA, + &unmap_cmd)); + } + } +} + +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/iommu/iommufd_fail_nth.c b/tools/testing/selftests/iommu/iommufd_fail_nth.c new file mode 100644 index 000000000000..9713111b820d --- /dev/null +++ b/tools/testing/selftests/iommu/iommufd_fail_nth.c @@ -0,0 +1,580 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright (c) 2021-2022, NVIDIA CORPORATION & AFFILIATES + * + * These tests are "kernel integrity" tests. They are looking for kernel + * WARN/OOPS/kasn/etc splats triggered by kernel sanitizers & debugging + * features. It does not attempt to verify that the system calls are doing what + * they are supposed to do. + * + * The basic philosophy is to run a sequence of calls that will succeed and then + * sweep every failure injection point on that call chain to look for + * interesting things in error handling. + * + * This test is best run with: + * echo 1 > /proc/sys/kernel/panic_on_warn + * If something is actually going wrong. + */ +#include <fcntl.h> +#include <dirent.h> + +#define __EXPORTED_HEADERS__ +#include <linux/vfio.h> + +#include "iommufd_utils.h" + +static bool have_fault_injection; + +static int writeat(int dfd, const char *fn, const char *val) +{ + size_t val_len = strlen(val); + ssize_t res; + int fd; + + fd = openat(dfd, fn, O_WRONLY); + if (fd == -1) + return -1; + res = write(fd, val, val_len); + assert(res == val_len); + close(fd); + return 0; +} + +static __attribute__((constructor)) void setup_buffer(void) +{ + BUFFER_SIZE = 2*1024*1024; + + buffer = mmap(0, BUFFER_SIZE, PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_ANONYMOUS, -1, 0); +} + +/* + * This sets up fail_injection in a way that is useful for this test. + * It does not attempt to restore things back to how they were. + */ +static __attribute__((constructor)) void setup_fault_injection(void) +{ + DIR *debugfs = opendir("/sys/kernel/debug/"); + struct dirent *dent; + + if (!debugfs) + return; + + /* Allow any allocation call to be fault injected */ + if (writeat(dirfd(debugfs), "failslab/ignore-gfp-wait", "N")) + return; + writeat(dirfd(debugfs), "fail_page_alloc/ignore-gfp-wait", "N"); + writeat(dirfd(debugfs), "fail_page_alloc/ignore-gfp-highmem", "N"); + + while ((dent = readdir(debugfs))) { + char fn[300]; + + if (strncmp(dent->d_name, "fail", 4) != 0) + continue; + + /* We are looking for kernel splats, quiet down the log */ + snprintf(fn, sizeof(fn), "%s/verbose", dent->d_name); + writeat(dirfd(debugfs), fn, "0"); + } + closedir(debugfs); + have_fault_injection = true; +} + +struct fail_nth_state { + int proc_fd; + unsigned int iteration; +}; + +static void fail_nth_first(struct __test_metadata *_metadata, + struct fail_nth_state *nth_state) +{ + char buf[300]; + + snprintf(buf, sizeof(buf), "/proc/self/task/%u/fail-nth", getpid()); + nth_state->proc_fd = open(buf, O_RDWR); + ASSERT_NE(-1, nth_state->proc_fd); +} + +static bool fail_nth_next(struct __test_metadata *_metadata, + struct fail_nth_state *nth_state, + int test_result) +{ + static const char disable_nth[] = "0"; + char buf[300]; + + /* + * This is just an arbitrary limit based on the current kernel + * situation. Changes in the kernel can dramtically change the number of + * required fault injection sites, so if this hits it doesn't + * necessarily mean a test failure, just that the limit has to be made + * bigger. + */ + ASSERT_GT(400, nth_state->iteration); + if (nth_state->iteration != 0) { + ssize_t res; + ssize_t res2; + + buf[0] = 0; + /* + * Annoyingly disabling the nth can also fail. This means + * the test passed without triggering failure + */ + res = pread(nth_state->proc_fd, buf, sizeof(buf), 0); + if (res == -1 && errno == EFAULT) { + buf[0] = '1'; + buf[1] = '\n'; + res = 2; + } + + res2 = pwrite(nth_state->proc_fd, disable_nth, + ARRAY_SIZE(disable_nth) - 1, 0); + if (res2 == -1 && errno == EFAULT) { + res2 = pwrite(nth_state->proc_fd, disable_nth, + ARRAY_SIZE(disable_nth) - 1, 0); + buf[0] = '1'; + buf[1] = '\n'; + } + ASSERT_EQ(ARRAY_SIZE(disable_nth) - 1, res2); + + /* printf(" nth %u result=%d nth=%u\n", nth_state->iteration, + test_result, atoi(buf)); */ + fflush(stdout); + ASSERT_LT(1, res); + if (res != 2 || buf[0] != '0' || buf[1] != '\n') + return false; + } else { + /* printf(" nth %u result=%d\n", nth_state->iteration, + test_result); */ + } + nth_state->iteration++; + return true; +} + +/* + * This is called during the test to start failure injection. It allows the test + * to do some setup that has already been swept and thus reduce the required + * iterations. + */ +void __fail_nth_enable(struct __test_metadata *_metadata, + struct fail_nth_state *nth_state) +{ + char buf[300]; + size_t len; + + if (!nth_state->iteration) + return; + + len = snprintf(buf, sizeof(buf), "%u", nth_state->iteration); + ASSERT_EQ(len, pwrite(nth_state->proc_fd, buf, len, 0)); +} +#define fail_nth_enable() __fail_nth_enable(_metadata, _nth_state) + +#define TEST_FAIL_NTH(fixture_name, name) \ + static int test_nth_##name(struct __test_metadata *_metadata, \ + FIXTURE_DATA(fixture_name) *self, \ + const FIXTURE_VARIANT(fixture_name) \ + *variant, \ + struct fail_nth_state *_nth_state); \ + TEST_F(fixture_name, name) \ + { \ + struct fail_nth_state nth_state = {}; \ + int test_result = 0; \ + \ + if (!have_fault_injection) \ + SKIP(return, \ + "fault injection is not enabled in the kernel"); \ + fail_nth_first(_metadata, &nth_state); \ + ASSERT_EQ(0, test_nth_##name(_metadata, self, variant, \ + &nth_state)); \ + while (fail_nth_next(_metadata, &nth_state, test_result)) { \ + fixture_name##_teardown(_metadata, self, variant); \ + fixture_name##_setup(_metadata, self, variant); \ + test_result = test_nth_##name(_metadata, self, \ + variant, &nth_state); \ + }; \ + ASSERT_EQ(0, test_result); \ + } \ + static int test_nth_##name( \ + struct __test_metadata __attribute__((unused)) *_metadata, \ + FIXTURE_DATA(fixture_name) __attribute__((unused)) *self, \ + const FIXTURE_VARIANT(fixture_name) __attribute__((unused)) \ + *variant, \ + struct fail_nth_state *_nth_state) + +FIXTURE(basic_fail_nth) +{ + int fd; + uint32_t access_id; +}; + +FIXTURE_SETUP(basic_fail_nth) +{ + self->fd = -1; + self->access_id = 0; +} + +FIXTURE_TEARDOWN(basic_fail_nth) +{ + int rc; + + if (self->access_id) { + /* The access FD holds the iommufd open until it closes */ + rc = _test_cmd_destroy_access(self->access_id); + assert(rc == 0); + } + teardown_iommufd(self->fd, _metadata); +} + +/* Cover ioas.c */ +TEST_FAIL_NTH(basic_fail_nth, basic) +{ + struct iommu_iova_range ranges[10]; + uint32_t ioas_id; + __u64 iova; + + fail_nth_enable(); + + self->fd = open("/dev/iommu", O_RDWR); + if (self->fd == -1) + return -1; + + if (_test_ioctl_ioas_alloc(self->fd, &ioas_id)) + return -1; + + { + struct iommu_ioas_iova_ranges ranges_cmd = { + .size = sizeof(ranges_cmd), + .num_iovas = ARRAY_SIZE(ranges), + .ioas_id = ioas_id, + .allowed_iovas = (uintptr_t)ranges, + }; + if (ioctl(self->fd, IOMMU_IOAS_IOVA_RANGES, &ranges_cmd)) + return -1; + } + + { + struct iommu_ioas_allow_iovas allow_cmd = { + .size = sizeof(allow_cmd), + .ioas_id = ioas_id, + .num_iovas = 1, + .allowed_iovas = (uintptr_t)ranges, + }; + + ranges[0].start = 16*1024; + ranges[0].last = BUFFER_SIZE + 16 * 1024 * 600 - 1; + if (ioctl(self->fd, IOMMU_IOAS_ALLOW_IOVAS, &allow_cmd)) + return -1; + } + + if (_test_ioctl_ioas_map(self->fd, ioas_id, buffer, BUFFER_SIZE, &iova, + IOMMU_IOAS_MAP_WRITEABLE | + IOMMU_IOAS_MAP_READABLE)) + return -1; + + { + struct iommu_ioas_copy copy_cmd = { + .size = sizeof(copy_cmd), + .flags = IOMMU_IOAS_MAP_WRITEABLE | + IOMMU_IOAS_MAP_READABLE, + .dst_ioas_id = ioas_id, + .src_ioas_id = ioas_id, + .src_iova = iova, + .length = sizeof(ranges), + }; + + if (ioctl(self->fd, IOMMU_IOAS_COPY, ©_cmd)) + return -1; + } + + if (_test_ioctl_ioas_unmap(self->fd, ioas_id, iova, BUFFER_SIZE, + NULL)) + return -1; + /* Failure path of no IOVA to unmap */ + _test_ioctl_ioas_unmap(self->fd, ioas_id, iova, BUFFER_SIZE, NULL); + return 0; +} + +/* iopt_area_fill_domains() and iopt_area_fill_domain() */ +TEST_FAIL_NTH(basic_fail_nth, map_domain) +{ + uint32_t ioas_id; + __u32 device_id; + __u32 hwpt_id; + __u64 iova; + + self->fd = open("/dev/iommu", O_RDWR); + if (self->fd == -1) + return -1; + + if (_test_ioctl_ioas_alloc(self->fd, &ioas_id)) + return -1; + + if (_test_ioctl_set_temp_memory_limit(self->fd, 32)) + return -1; + + fail_nth_enable(); + + if (_test_cmd_mock_domain(self->fd, ioas_id, &device_id, &hwpt_id)) + return -1; + + if (_test_ioctl_ioas_map(self->fd, ioas_id, buffer, 262144, &iova, + IOMMU_IOAS_MAP_WRITEABLE | + IOMMU_IOAS_MAP_READABLE)) + return -1; + + if (_test_ioctl_destroy(self->fd, device_id)) + return -1; + if (_test_ioctl_destroy(self->fd, hwpt_id)) + return -1; + + if (_test_cmd_mock_domain(self->fd, ioas_id, &device_id, &hwpt_id)) + return -1; + return 0; +} + +TEST_FAIL_NTH(basic_fail_nth, map_two_domains) +{ + uint32_t ioas_id; + __u32 device_id2; + __u32 device_id; + __u32 hwpt_id2; + __u32 hwpt_id; + __u64 iova; + + self->fd = open("/dev/iommu", O_RDWR); + if (self->fd == -1) + return -1; + + if (_test_ioctl_ioas_alloc(self->fd, &ioas_id)) + return -1; + + if (_test_ioctl_set_temp_memory_limit(self->fd, 32)) + return -1; + + if (_test_cmd_mock_domain(self->fd, ioas_id, &device_id, &hwpt_id)) + return -1; + + fail_nth_enable(); + + if (_test_cmd_mock_domain(self->fd, ioas_id, &device_id2, &hwpt_id2)) + return -1; + + if (_test_ioctl_ioas_map(self->fd, ioas_id, buffer, 262144, &iova, + IOMMU_IOAS_MAP_WRITEABLE | + IOMMU_IOAS_MAP_READABLE)) + return -1; + + if (_test_ioctl_destroy(self->fd, device_id)) + return -1; + if (_test_ioctl_destroy(self->fd, hwpt_id)) + return -1; + + if (_test_ioctl_destroy(self->fd, device_id2)) + return -1; + if (_test_ioctl_destroy(self->fd, hwpt_id2)) + return -1; + + if (_test_cmd_mock_domain(self->fd, ioas_id, &device_id, &hwpt_id)) + return -1; + if (_test_cmd_mock_domain(self->fd, ioas_id, &device_id2, &hwpt_id2)) + return -1; + return 0; +} + +TEST_FAIL_NTH(basic_fail_nth, access_rw) +{ + uint64_t tmp_big[4096]; + uint32_t ioas_id; + uint16_t tmp[32]; + __u64 iova; + + self->fd = open("/dev/iommu", O_RDWR); + if (self->fd == -1) + return -1; + + if (_test_ioctl_ioas_alloc(self->fd, &ioas_id)) + return -1; + + if (_test_ioctl_set_temp_memory_limit(self->fd, 32)) + return -1; + + if (_test_ioctl_ioas_map(self->fd, ioas_id, buffer, 262144, &iova, + IOMMU_IOAS_MAP_WRITEABLE | + IOMMU_IOAS_MAP_READABLE)) + return -1; + + fail_nth_enable(); + + if (_test_cmd_create_access(self->fd, ioas_id, &self->access_id, 0)) + return -1; + + { + struct iommu_test_cmd access_cmd = { + .size = sizeof(access_cmd), + .op = IOMMU_TEST_OP_ACCESS_RW, + .id = self->access_id, + .access_rw = { .iova = iova, + .length = sizeof(tmp), + .uptr = (uintptr_t)tmp }, + }; + + // READ + if (ioctl(self->fd, _IOMMU_TEST_CMD(IOMMU_TEST_OP_ACCESS_RW), + &access_cmd)) + return -1; + + access_cmd.access_rw.flags = MOCK_ACCESS_RW_WRITE; + if (ioctl(self->fd, _IOMMU_TEST_CMD(IOMMU_TEST_OP_ACCESS_RW), + &access_cmd)) + return -1; + + access_cmd.access_rw.flags = MOCK_ACCESS_RW_SLOW_PATH; + if (ioctl(self->fd, _IOMMU_TEST_CMD(IOMMU_TEST_OP_ACCESS_RW), + &access_cmd)) + return -1; + access_cmd.access_rw.flags = MOCK_ACCESS_RW_SLOW_PATH | + MOCK_ACCESS_RW_WRITE; + if (ioctl(self->fd, _IOMMU_TEST_CMD(IOMMU_TEST_OP_ACCESS_RW), + &access_cmd)) + return -1; + } + + { + struct iommu_test_cmd access_cmd = { + .size = sizeof(access_cmd), + .op = IOMMU_TEST_OP_ACCESS_RW, + .id = self->access_id, + .access_rw = { .iova = iova, + .flags = MOCK_ACCESS_RW_SLOW_PATH, + .length = sizeof(tmp_big), + .uptr = (uintptr_t)tmp_big }, + }; + + if (ioctl(self->fd, _IOMMU_TEST_CMD(IOMMU_TEST_OP_ACCESS_RW), + &access_cmd)) + return -1; + } + if (_test_cmd_destroy_access(self->access_id)) + return -1; + self->access_id = 0; + return 0; +} + +/* pages.c access functions */ +TEST_FAIL_NTH(basic_fail_nth, access_pin) +{ + uint32_t access_pages_id; + uint32_t ioas_id; + __u64 iova; + + self->fd = open("/dev/iommu", O_RDWR); + if (self->fd == -1) + return -1; + + if (_test_ioctl_ioas_alloc(self->fd, &ioas_id)) + return -1; + + if (_test_ioctl_set_temp_memory_limit(self->fd, 32)) + return -1; + + if (_test_ioctl_ioas_map(self->fd, ioas_id, buffer, BUFFER_SIZE, &iova, + IOMMU_IOAS_MAP_WRITEABLE | + IOMMU_IOAS_MAP_READABLE)) + return -1; + + if (_test_cmd_create_access(self->fd, ioas_id, &self->access_id, + MOCK_FLAGS_ACCESS_CREATE_NEEDS_PIN_PAGES)) + return -1; + + fail_nth_enable(); + + { + struct iommu_test_cmd access_cmd = { + .size = sizeof(access_cmd), + .op = IOMMU_TEST_OP_ACCESS_PAGES, + .id = self->access_id, + .access_pages = { .iova = iova, + .length = BUFFER_SIZE, + .uptr = (uintptr_t)buffer }, + }; + + if (ioctl(self->fd, _IOMMU_TEST_CMD(IOMMU_TEST_OP_ACCESS_RW), + &access_cmd)) + return -1; + access_pages_id = access_cmd.access_pages.out_access_pages_id; + } + + if (_test_cmd_destroy_access_pages(self->fd, self->access_id, + access_pages_id)) + return -1; + + if (_test_cmd_destroy_access(self->access_id)) + return -1; + self->access_id = 0; + return 0; +} + +/* iopt_pages_fill_xarray() */ +TEST_FAIL_NTH(basic_fail_nth, access_pin_domain) +{ + uint32_t access_pages_id; + uint32_t ioas_id; + __u32 device_id; + __u32 hwpt_id; + __u64 iova; + + self->fd = open("/dev/iommu", O_RDWR); + if (self->fd == -1) + return -1; + + if (_test_ioctl_ioas_alloc(self->fd, &ioas_id)) + return -1; + + if (_test_ioctl_set_temp_memory_limit(self->fd, 32)) + return -1; + + if (_test_cmd_mock_domain(self->fd, ioas_id, &device_id, &hwpt_id)) + return -1; + + if (_test_ioctl_ioas_map(self->fd, ioas_id, buffer, BUFFER_SIZE, &iova, + IOMMU_IOAS_MAP_WRITEABLE | + IOMMU_IOAS_MAP_READABLE)) + return -1; + + if (_test_cmd_create_access(self->fd, ioas_id, &self->access_id, + MOCK_FLAGS_ACCESS_CREATE_NEEDS_PIN_PAGES)) + return -1; + + fail_nth_enable(); + + { + struct iommu_test_cmd access_cmd = { + .size = sizeof(access_cmd), + .op = IOMMU_TEST_OP_ACCESS_PAGES, + .id = self->access_id, + .access_pages = { .iova = iova, + .length = BUFFER_SIZE, + .uptr = (uintptr_t)buffer }, + }; + + if (ioctl(self->fd, _IOMMU_TEST_CMD(IOMMU_TEST_OP_ACCESS_RW), + &access_cmd)) + return -1; + access_pages_id = access_cmd.access_pages.out_access_pages_id; + } + + if (_test_cmd_destroy_access_pages(self->fd, self->access_id, + access_pages_id)) + return -1; + + if (_test_cmd_destroy_access(self->access_id)) + return -1; + self->access_id = 0; + + if (_test_ioctl_destroy(self->fd, device_id)) + return -1; + if (_test_ioctl_destroy(self->fd, hwpt_id)) + return -1; + return 0; +} + +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/iommu/iommufd_utils.h b/tools/testing/selftests/iommu/iommufd_utils.h new file mode 100644 index 000000000000..0d1f46369c2a --- /dev/null +++ b/tools/testing/selftests/iommu/iommufd_utils.h @@ -0,0 +1,278 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* Copyright (c) 2021-2022, NVIDIA CORPORATION & AFFILIATES */ +#ifndef __SELFTEST_IOMMUFD_UTILS +#define __SELFTEST_IOMMUFD_UTILS + +#include <unistd.h> +#include <stddef.h> +#include <sys/fcntl.h> +#include <sys/ioctl.h> +#include <stdint.h> +#include <assert.h> + +#include "../kselftest_harness.h" +#include "../../../../drivers/iommu/iommufd/iommufd_test.h" + +/* Hack to make assertions more readable */ +#define _IOMMU_TEST_CMD(x) IOMMU_TEST_CMD + +static void *buffer; +static unsigned long BUFFER_SIZE; + +/* + * Have the kernel check the refcount on pages. I don't know why a freshly + * mmap'd anon non-compound page starts out with a ref of 3 + */ +#define check_refs(_ptr, _length, _refs) \ + ({ \ + struct iommu_test_cmd test_cmd = { \ + .size = sizeof(test_cmd), \ + .op = IOMMU_TEST_OP_MD_CHECK_REFS, \ + .check_refs = { .length = _length, \ + .uptr = (uintptr_t)(_ptr), \ + .refs = _refs }, \ + }; \ + ASSERT_EQ(0, \ + ioctl(self->fd, \ + _IOMMU_TEST_CMD(IOMMU_TEST_OP_MD_CHECK_REFS), \ + &test_cmd)); \ + }) + +static int _test_cmd_mock_domain(int fd, unsigned int ioas_id, __u32 *device_id, + __u32 *hwpt_id) +{ + struct iommu_test_cmd cmd = { + .size = sizeof(cmd), + .op = IOMMU_TEST_OP_MOCK_DOMAIN, + .id = ioas_id, + .mock_domain = {}, + }; + int ret; + + ret = ioctl(fd, IOMMU_TEST_CMD, &cmd); + if (ret) + return ret; + if (device_id) + *device_id = cmd.mock_domain.out_device_id; + assert(cmd.id != 0); + if (hwpt_id) + *hwpt_id = cmd.mock_domain.out_hwpt_id; + return 0; +} +#define test_cmd_mock_domain(ioas_id, device_id, hwpt_id) \ + ASSERT_EQ(0, _test_cmd_mock_domain(self->fd, ioas_id, device_id, \ + hwpt_id)) +#define test_err_mock_domain(_errno, ioas_id, device_id, hwpt_id) \ + EXPECT_ERRNO(_errno, _test_cmd_mock_domain(self->fd, ioas_id, \ + device_id, hwpt_id)) + +static int _test_cmd_create_access(int fd, unsigned int ioas_id, + __u32 *access_id, unsigned int flags) +{ + struct iommu_test_cmd cmd = { + .size = sizeof(cmd), + .op = IOMMU_TEST_OP_CREATE_ACCESS, + .id = ioas_id, + .create_access = { .flags = flags }, + }; + int ret; + + ret = ioctl(fd, IOMMU_TEST_CMD, &cmd); + if (ret) + return ret; + *access_id = cmd.create_access.out_access_fd; + return 0; +} +#define test_cmd_create_access(ioas_id, access_id, flags) \ + ASSERT_EQ(0, _test_cmd_create_access(self->fd, ioas_id, access_id, \ + flags)) + +static int _test_cmd_destroy_access(unsigned int access_id) +{ + return close(access_id); +} +#define test_cmd_destroy_access(access_id) \ + ASSERT_EQ(0, _test_cmd_destroy_access(access_id)) + +static int _test_cmd_destroy_access_pages(int fd, unsigned int access_id, + unsigned int access_pages_id) +{ + struct iommu_test_cmd cmd = { + .size = sizeof(cmd), + .op = IOMMU_TEST_OP_DESTROY_ACCESS_PAGES, + .id = access_id, + .destroy_access_pages = { .access_pages_id = access_pages_id }, + }; + return ioctl(fd, IOMMU_TEST_CMD, &cmd); +} +#define test_cmd_destroy_access_pages(access_id, access_pages_id) \ + ASSERT_EQ(0, _test_cmd_destroy_access_pages(self->fd, access_id, \ + access_pages_id)) +#define test_err_destroy_access_pages(_errno, access_id, access_pages_id) \ + EXPECT_ERRNO(_errno, _test_cmd_destroy_access_pages( \ + self->fd, access_id, access_pages_id)) + +static int _test_ioctl_destroy(int fd, unsigned int id) +{ + struct iommu_destroy cmd = { + .size = sizeof(cmd), + .id = id, + }; + return ioctl(fd, IOMMU_DESTROY, &cmd); +} +#define test_ioctl_destroy(id) ASSERT_EQ(0, _test_ioctl_destroy(self->fd, id)) + +static int _test_ioctl_ioas_alloc(int fd, __u32 *id) +{ + struct iommu_ioas_alloc cmd = { + .size = sizeof(cmd), + }; + int ret; + + ret = ioctl(fd, IOMMU_IOAS_ALLOC, &cmd); + if (ret) + return ret; + *id = cmd.out_ioas_id; + return 0; +} +#define test_ioctl_ioas_alloc(id) \ + ({ \ + ASSERT_EQ(0, _test_ioctl_ioas_alloc(self->fd, id)); \ + ASSERT_NE(0, *(id)); \ + }) + +static int _test_ioctl_ioas_map(int fd, unsigned int ioas_id, void *buffer, + size_t length, __u64 *iova, unsigned int flags) +{ + struct iommu_ioas_map cmd = { + .size = sizeof(cmd), + .flags = flags, + .ioas_id = ioas_id, + .user_va = (uintptr_t)buffer, + .length = length, + }; + int ret; + + if (flags & IOMMU_IOAS_MAP_FIXED_IOVA) + cmd.iova = *iova; + + ret = ioctl(fd, IOMMU_IOAS_MAP, &cmd); + *iova = cmd.iova; + return ret; +} +#define test_ioctl_ioas_map(buffer, length, iova_p) \ + ASSERT_EQ(0, _test_ioctl_ioas_map(self->fd, self->ioas_id, buffer, \ + length, iova_p, \ + IOMMU_IOAS_MAP_WRITEABLE | \ + IOMMU_IOAS_MAP_READABLE)) + +#define test_err_ioctl_ioas_map(_errno, buffer, length, iova_p) \ + EXPECT_ERRNO(_errno, \ + _test_ioctl_ioas_map(self->fd, self->ioas_id, buffer, \ + length, iova_p, \ + IOMMU_IOAS_MAP_WRITEABLE | \ + IOMMU_IOAS_MAP_READABLE)) + +#define test_ioctl_ioas_map_id(ioas_id, buffer, length, iova_p) \ + ASSERT_EQ(0, _test_ioctl_ioas_map(self->fd, ioas_id, buffer, length, \ + iova_p, \ + IOMMU_IOAS_MAP_WRITEABLE | \ + IOMMU_IOAS_MAP_READABLE)) + +#define test_ioctl_ioas_map_fixed(buffer, length, iova) \ + ({ \ + __u64 __iova = iova; \ + ASSERT_EQ(0, _test_ioctl_ioas_map( \ + self->fd, self->ioas_id, buffer, length, \ + &__iova, \ + IOMMU_IOAS_MAP_FIXED_IOVA | \ + IOMMU_IOAS_MAP_WRITEABLE | \ + IOMMU_IOAS_MAP_READABLE)); \ + }) + +#define test_err_ioctl_ioas_map_fixed(_errno, buffer, length, iova) \ + ({ \ + __u64 __iova = iova; \ + EXPECT_ERRNO(_errno, \ + _test_ioctl_ioas_map( \ + self->fd, self->ioas_id, buffer, length, \ + &__iova, \ + IOMMU_IOAS_MAP_FIXED_IOVA | \ + IOMMU_IOAS_MAP_WRITEABLE | \ + IOMMU_IOAS_MAP_READABLE)); \ + }) + +static int _test_ioctl_ioas_unmap(int fd, unsigned int ioas_id, uint64_t iova, + size_t length, uint64_t *out_len) +{ + struct iommu_ioas_unmap cmd = { + .size = sizeof(cmd), + .ioas_id = ioas_id, + .iova = iova, + .length = length, + }; + int ret; + + ret = ioctl(fd, IOMMU_IOAS_UNMAP, &cmd); + if (out_len) + *out_len = cmd.length; + return ret; +} +#define test_ioctl_ioas_unmap(iova, length) \ + ASSERT_EQ(0, _test_ioctl_ioas_unmap(self->fd, self->ioas_id, iova, \ + length, NULL)) + +#define test_ioctl_ioas_unmap_id(ioas_id, iova, length) \ + ASSERT_EQ(0, _test_ioctl_ioas_unmap(self->fd, ioas_id, iova, length, \ + NULL)) + +#define test_err_ioctl_ioas_unmap(_errno, iova, length) \ + EXPECT_ERRNO(_errno, _test_ioctl_ioas_unmap(self->fd, self->ioas_id, \ + iova, length, NULL)) + +static int _test_ioctl_set_temp_memory_limit(int fd, unsigned int limit) +{ + struct iommu_test_cmd memlimit_cmd = { + .size = sizeof(memlimit_cmd), + .op = IOMMU_TEST_OP_SET_TEMP_MEMORY_LIMIT, + .memory_limit = { .limit = limit }, + }; + + return ioctl(fd, _IOMMU_TEST_CMD(IOMMU_TEST_OP_SET_TEMP_MEMORY_LIMIT), + &memlimit_cmd); +} + +#define test_ioctl_set_temp_memory_limit(limit) \ + ASSERT_EQ(0, _test_ioctl_set_temp_memory_limit(self->fd, limit)) + +#define test_ioctl_set_default_memory_limit() \ + test_ioctl_set_temp_memory_limit(65536) + +static void teardown_iommufd(int fd, struct __test_metadata *_metadata) +{ + struct iommu_test_cmd test_cmd = { + .size = sizeof(test_cmd), + .op = IOMMU_TEST_OP_MD_CHECK_REFS, + .check_refs = { .length = BUFFER_SIZE, + .uptr = (uintptr_t)buffer }, + }; + + if (fd == -1) + return; + + EXPECT_EQ(0, close(fd)); + + fd = open("/dev/iommu", O_RDWR); + EXPECT_NE(-1, fd); + EXPECT_EQ(0, ioctl(fd, _IOMMU_TEST_CMD(IOMMU_TEST_OP_MD_CHECK_REFS), + &test_cmd)); + EXPECT_EQ(0, close(fd)); +} + +#define EXPECT_ERRNO(expected_errno, cmd) \ + ({ \ + ASSERT_EQ(-1, cmd); \ + EXPECT_EQ(expected_errno, errno); \ + }) + +#endif diff --git a/tools/testing/selftests/ipc/Makefile b/tools/testing/selftests/ipc/Makefile index 1c4448a843a4..50e9c299fc4a 100644 --- a/tools/testing/selftests/ipc/Makefile +++ b/tools/testing/selftests/ipc/Makefile @@ -10,7 +10,7 @@ ifeq ($(ARCH),x86_64) CFLAGS := -DCONFIG_X86_64 -D__x86_64__ endif -CFLAGS += -I../../../../usr/include/ +CFLAGS += $(KHDR_INCLUDES) TEST_GEN_PROGS := msgque diff --git a/tools/testing/selftests/kcmp/Makefile b/tools/testing/selftests/kcmp/Makefile index b4d39f6b5124..59a1e5379018 100644 --- a/tools/testing/selftests/kcmp/Makefile +++ b/tools/testing/selftests/kcmp/Makefile @@ -1,5 +1,5 @@ # SPDX-License-Identifier: GPL-2.0-only -CFLAGS += -I../../../../usr/include/ +CFLAGS += $(KHDR_INCLUDES) TEST_GEN_PROGS := kcmp_test diff --git a/tools/testing/selftests/kmod/kmod.sh b/tools/testing/selftests/kmod/kmod.sh index afd42387e8b2..7189715d7960 100755 --- a/tools/testing/selftests/kmod/kmod.sh +++ b/tools/testing/selftests/kmod/kmod.sh @@ -1,18 +1,7 @@ #!/bin/bash -# +# SPDX-License-Identifier: GPL-2.0-or-later OR copyleft-next-0.3.1 # Copyright (C) 2017 Luis R. Rodriguez <mcgrof@kernel.org> # -# This program is free software; you can redistribute it and/or modify it -# under the terms of the GNU General Public License as published by the Free -# Software Foundation; either version 2 of the License, or at your option any -# later version; or, when distributed separately from the Linux kernel or -# when incorporated into other software packages, subject to the following -# license: -# -# This program is free software; you can redistribute it and/or modify it -# under the terms of copyleft-next (version 0.3.1 or later) as published -# at http://copyleft-next.org/. - # This is a stress test script for kmod, the kernel module loader. It uses # test_kmod which exposes a series of knobs for the API for us so we can # tweak each test in userspace rather than in kernelspace. diff --git a/tools/testing/selftests/kselftest_deps.sh b/tools/testing/selftests/kselftest_deps.sh index 708cb5429633..7424a1f5babc 100755 --- a/tools/testing/selftests/kselftest_deps.sh +++ b/tools/testing/selftests/kselftest_deps.sh @@ -90,7 +90,7 @@ pass_libs=() pass_cnt=0 # Get all TARGETS from selftests Makefile -targets=$(egrep "^TARGETS +|^TARGETS =" Makefile | cut -d "=" -f2) +targets=$(grep -E "^TARGETS +|^TARGETS =" Makefile | cut -d "=" -f2) # Single test case if [ $# -eq 2 ] diff --git a/tools/testing/selftests/kselftest_harness.h b/tools/testing/selftests/kselftest_harness.h index 25f4d54067c0..d8bff2005dfc 100644 --- a/tools/testing/selftests/kselftest_harness.h +++ b/tools/testing/selftests/kselftest_harness.h @@ -54,6 +54,7 @@ #define _GNU_SOURCE #endif #include <asm/types.h> +#include <ctype.h> #include <errno.h> #include <stdbool.h> #include <stdint.h> @@ -985,6 +986,127 @@ void __wait_for_test(struct __test_metadata *t) } } +static void test_harness_list_tests(void) +{ + struct __fixture_variant_metadata *v; + struct __fixture_metadata *f; + struct __test_metadata *t; + + for (f = __fixture_list; f; f = f->next) { + v = f->variant; + t = f->tests; + + if (f == __fixture_list) + fprintf(stderr, "%-20s %-25s %s\n", + "# FIXTURE", "VARIANT", "TEST"); + else + fprintf(stderr, "--------------------------------------------------------------------------------\n"); + + do { + fprintf(stderr, "%-20s %-25s %s\n", + t == f->tests ? f->name : "", + v ? v->name : "", + t ? t->name : ""); + + v = v ? v->next : NULL; + t = t ? t->next : NULL; + } while (v || t); + } +} + +static int test_harness_argv_check(int argc, char **argv) +{ + int opt; + + while ((opt = getopt(argc, argv, "hlF:f:V:v:t:T:r:")) != -1) { + switch (opt) { + case 'f': + case 'F': + case 'v': + case 'V': + case 't': + case 'T': + case 'r': + break; + case 'l': + test_harness_list_tests(); + return KSFT_SKIP; + case 'h': + default: + fprintf(stderr, + "Usage: %s [-h|-l] [-t|-T|-v|-V|-f|-F|-r name]\n" + "\t-h print help\n" + "\t-l list all tests\n" + "\n" + "\t-t name include test\n" + "\t-T name exclude test\n" + "\t-v name include variant\n" + "\t-V name exclude variant\n" + "\t-f name include fixture\n" + "\t-F name exclude fixture\n" + "\t-r name run specified test\n" + "\n" + "Test filter options can be specified " + "multiple times. The filtering stops\n" + "at the first match. For example to " + "include all tests from variant 'bla'\n" + "but not test 'foo' specify '-T foo -v bla'.\n" + "", argv[0]); + return opt == 'h' ? KSFT_SKIP : KSFT_FAIL; + } + } + + return KSFT_PASS; +} + +static bool test_enabled(int argc, char **argv, + struct __fixture_metadata *f, + struct __fixture_variant_metadata *v, + struct __test_metadata *t) +{ + unsigned int flen = 0, vlen = 0, tlen = 0; + bool has_positive = false; + int opt; + + optind = 1; + while ((opt = getopt(argc, argv, "F:f:V:v:t:T:r:")) != -1) { + has_positive |= islower(opt); + + switch (tolower(opt)) { + case 't': + if (!strcmp(t->name, optarg)) + return islower(opt); + break; + case 'f': + if (!strcmp(f->name, optarg)) + return islower(opt); + break; + case 'v': + if (!strcmp(v->name, optarg)) + return islower(opt); + break; + case 'r': + if (!tlen) { + flen = strlen(f->name); + vlen = strlen(v->name); + tlen = strlen(t->name); + } + if (strlen(optarg) == flen + 1 + vlen + !!vlen + tlen && + !strncmp(f->name, &optarg[0], flen) && + !strncmp(v->name, &optarg[flen + 1], vlen) && + !strncmp(t->name, &optarg[flen + 1 + vlen + !!vlen], tlen)) + return true; + break; + } + } + + /* + * If there are no positive tests then we assume user just wants + * exclusions and everything else is a pass. + */ + return !has_positive; +} + void __run_test(struct __fixture_metadata *f, struct __fixture_variant_metadata *variant, struct __test_metadata *t) @@ -1032,24 +1154,32 @@ void __run_test(struct __fixture_metadata *f, f->name, variant->name[0] ? "." : "", variant->name, t->name); } -static int test_harness_run(int __attribute__((unused)) argc, - char __attribute__((unused)) **argv) +static int test_harness_run(int argc, char **argv) { struct __fixture_variant_metadata no_variant = { .name = "", }; struct __fixture_variant_metadata *v; struct __fixture_metadata *f; struct __test_results *results; struct __test_metadata *t; - int ret = 0; + int ret; unsigned int case_count = 0, test_count = 0; unsigned int count = 0; unsigned int pass_count = 0; + ret = test_harness_argv_check(argc, argv); + if (ret != KSFT_PASS) + return ret; + for (f = __fixture_list; f; f = f->next) { for (v = f->variant ?: &no_variant; v; v = v->next) { - case_count++; + unsigned int old_tests = test_count; + for (t = f->tests; t; t = t->next) - test_count++; + if (test_enabled(argc, argv, f, v, t)) + test_count++; + + if (old_tests != test_count) + case_count++; } } @@ -1063,6 +1193,8 @@ static int test_harness_run(int __attribute__((unused)) argc, for (f = __fixture_list; f; f = f->next) { for (v = f->variant ?: &no_variant; v; v = v->next) { for (t = f->tests; t; t = t->next) { + if (!test_enabled(argc, argv, f, v, t)) + continue; count++; t->results = results; __run_test(f, v, t); diff --git a/tools/testing/selftests/kvm/.gitignore b/tools/testing/selftests/kvm/.gitignore index 05d980fb083d..6d9381d60172 100644 --- a/tools/testing/selftests/kvm/.gitignore +++ b/tools/testing/selftests/kvm/.gitignore @@ -1,82 +1,7 @@ # SPDX-License-Identifier: GPL-2.0-only -/aarch64/aarch32_id_regs -/aarch64/arch_timer -/aarch64/debug-exceptions -/aarch64/get-reg-list -/aarch64/hypercalls -/aarch64/psci_test -/aarch64/vcpu_width_config -/aarch64/vgic_init -/aarch64/vgic_irq -/s390x/memop -/s390x/resets -/s390x/sync_regs_test -/s390x/tprot -/x86_64/amx_test -/x86_64/cpuid_test -/x86_64/cr4_cpuid_sync_test -/x86_64/debug_regs -/x86_64/evmcs_test -/x86_64/emulator_error_test -/x86_64/fix_hypercall_test -/x86_64/get_msr_index_features -/x86_64/kvm_clock_test -/x86_64/kvm_pv_test -/x86_64/hyperv_clock -/x86_64/hyperv_cpuid -/x86_64/hyperv_features -/x86_64/hyperv_svm_test -/x86_64/max_vcpuid_cap_test -/x86_64/mmio_warning_test -/x86_64/monitor_mwait_test -/x86_64/nested_exceptions_test -/x86_64/nx_huge_pages_test -/x86_64/platform_info_test -/x86_64/pmu_event_filter_test -/x86_64/set_boot_cpu_id -/x86_64/set_sregs_test -/x86_64/sev_migrate_tests -/x86_64/smm_test -/x86_64/state_test -/x86_64/svm_vmcall_test -/x86_64/svm_int_ctl_test -/x86_64/svm_nested_soft_inject_test -/x86_64/svm_nested_shutdown_test -/x86_64/sync_regs_test -/x86_64/tsc_msrs_test -/x86_64/tsc_scaling_sync -/x86_64/ucna_injection_test -/x86_64/userspace_io_test -/x86_64/userspace_msr_exit_test -/x86_64/vmx_apic_access_test -/x86_64/vmx_close_while_nested_test -/x86_64/vmx_dirty_log_test -/x86_64/vmx_exception_with_invalid_guest_state -/x86_64/vmx_invalid_nested_guest_state -/x86_64/vmx_msrs_test -/x86_64/vmx_preemption_timer_test -/x86_64/vmx_set_nested_state_test -/x86_64/vmx_tsc_adjust_test -/x86_64/vmx_nested_tsc_scaling_test -/x86_64/xapic_ipi_test -/x86_64/xapic_state_test -/x86_64/xen_shinfo_test -/x86_64/xen_vmcall_test -/x86_64/xss_msr_test -/x86_64/vmx_pmu_caps_test -/x86_64/triple_fault_event_test -/access_tracking_perf_test -/demand_paging_test -/dirty_log_test -/dirty_log_perf_test -/hardware_disable_test -/kvm_create_max_vcpus -/kvm_page_table_test -/max_guest_memory_test -/memslot_modification_stress_test -/memslot_perf_test -/rseq_test -/set_memory_region_test -/steal_time -/kvm_binary_stats_test -/system_counter_offset_test +* +!/**/ +!*.c +!*.h +!*.S +!*.sh diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile index 4a2caef2c939..1750f91dd936 100644 --- a/tools/testing/selftests/kvm/Makefile +++ b/tools/testing/selftests/kvm/Makefile @@ -7,35 +7,14 @@ top_srcdir = ../../../.. include $(top_srcdir)/scripts/subarch.include ARCH ?= $(SUBARCH) -# For cross-builds to work, UNAME_M has to map to ARCH and arch specific -# directories and targets in this Makefile. "uname -m" doesn't map to -# arch specific sub-directory names. -# -# UNAME_M variable to used to run the compiles pointing to the right arch -# directories and build the right targets for these supported architectures. -# -# TEST_GEN_PROGS and LIBKVM are set using UNAME_M variable. -# LINUX_TOOL_ARCH_INCLUDE is set using ARCH variable. -# -# x86_64 targets are named to include x86_64 as a suffix and directories -# for includes are in x86_64 sub-directory. s390x and aarch64 follow the -# same convention. "uname -m" doesn't result in the correct mapping for -# s390x and aarch64. -# -# No change necessary for x86_64 -UNAME_M := $(shell uname -m) - -# Set UNAME_M for arm64 compile/install to work -ifeq ($(ARCH),arm64) - UNAME_M := aarch64 -endif -# Set UNAME_M s390x compile/install to work -ifeq ($(ARCH),s390) - UNAME_M := s390x -endif -# Set UNAME_M riscv compile/install to work -ifeq ($(ARCH),riscv) - UNAME_M := riscv +ifeq ($(ARCH),x86) + ARCH_DIR := x86_64 +else ifeq ($(ARCH),arm64) + ARCH_DIR := aarch64 +else ifeq ($(ARCH),s390) + ARCH_DIR := s390x +else + ARCH_DIR := $(ARCH) endif LIBKVM += lib/assert.c @@ -43,16 +22,19 @@ LIBKVM += lib/elf.c LIBKVM += lib/guest_modes.c LIBKVM += lib/io.c LIBKVM += lib/kvm_util.c -LIBKVM += lib/perf_test_util.c +LIBKVM += lib/memstress.c LIBKVM += lib/rbtree.c LIBKVM += lib/sparsebit.c LIBKVM += lib/test_util.c +LIBKVM += lib/ucall_common.c +LIBKVM += lib/userfaultfd_util.c LIBKVM_STRING += lib/string_override.c LIBKVM_x86_64 += lib/x86_64/apic.c LIBKVM_x86_64 += lib/x86_64/handlers.S -LIBKVM_x86_64 += lib/x86_64/perf_test_util.c +LIBKVM_x86_64 += lib/x86_64/hyperv.c +LIBKVM_x86_64 += lib/x86_64/memstress.c LIBKVM_x86_64 += lib/x86_64/processor.c LIBKVM_x86_64 += lib/x86_64/svm.c LIBKVM_x86_64 += lib/x86_64/ucall.c @@ -80,13 +62,15 @@ TEST_PROGS_x86_64 += x86_64/nx_huge_pages_test.sh TEST_GEN_PROGS_x86_64 = x86_64/cpuid_test TEST_GEN_PROGS_x86_64 += x86_64/cr4_cpuid_sync_test TEST_GEN_PROGS_x86_64 += x86_64/get_msr_index_features -TEST_GEN_PROGS_x86_64 += x86_64/evmcs_test -TEST_GEN_PROGS_x86_64 += x86_64/emulator_error_test +TEST_GEN_PROGS_x86_64 += x86_64/exit_on_emulation_failure_test TEST_GEN_PROGS_x86_64 += x86_64/fix_hypercall_test TEST_GEN_PROGS_x86_64 += x86_64/hyperv_clock TEST_GEN_PROGS_x86_64 += x86_64/hyperv_cpuid +TEST_GEN_PROGS_x86_64 += x86_64/hyperv_evmcs TEST_GEN_PROGS_x86_64 += x86_64/hyperv_features +TEST_GEN_PROGS_x86_64 += x86_64/hyperv_ipi TEST_GEN_PROGS_x86_64 += x86_64/hyperv_svm_test +TEST_GEN_PROGS_x86_64 += x86_64/hyperv_tlb_flush TEST_GEN_PROGS_x86_64 += x86_64/kvm_clock_test TEST_GEN_PROGS_x86_64 += x86_64/kvm_pv_test TEST_GEN_PROGS_x86_64 += x86_64/mmio_warning_test @@ -96,6 +80,7 @@ TEST_GEN_PROGS_x86_64 += x86_64/platform_info_test TEST_GEN_PROGS_x86_64 += x86_64/pmu_event_filter_test TEST_GEN_PROGS_x86_64 += x86_64/set_boot_cpu_id TEST_GEN_PROGS_x86_64 += x86_64/set_sregs_test +TEST_GEN_PROGS_x86_64 += x86_64/smaller_maxphyaddr_emulation_test TEST_GEN_PROGS_x86_64 += x86_64/smm_test TEST_GEN_PROGS_x86_64 += x86_64/state_test TEST_GEN_PROGS_x86_64 += x86_64/vmx_preemption_timer_test @@ -153,10 +138,12 @@ TEST_GEN_PROGS_aarch64 += aarch64/arch_timer TEST_GEN_PROGS_aarch64 += aarch64/debug-exceptions TEST_GEN_PROGS_aarch64 += aarch64/get-reg-list TEST_GEN_PROGS_aarch64 += aarch64/hypercalls +TEST_GEN_PROGS_aarch64 += aarch64/page_fault_test TEST_GEN_PROGS_aarch64 += aarch64/psci_test TEST_GEN_PROGS_aarch64 += aarch64/vcpu_width_config TEST_GEN_PROGS_aarch64 += aarch64/vgic_init TEST_GEN_PROGS_aarch64 += aarch64/vgic_irq +TEST_GEN_PROGS_aarch64 += access_tracking_perf_test TEST_GEN_PROGS_aarch64 += demand_paging_test TEST_GEN_PROGS_aarch64 += dirty_log_test TEST_GEN_PROGS_aarch64 += dirty_log_perf_test @@ -188,10 +175,15 @@ TEST_GEN_PROGS_riscv += kvm_page_table_test TEST_GEN_PROGS_riscv += set_memory_region_test TEST_GEN_PROGS_riscv += kvm_binary_stats_test -TEST_PROGS += $(TEST_PROGS_$(UNAME_M)) -TEST_GEN_PROGS += $(TEST_GEN_PROGS_$(UNAME_M)) -TEST_GEN_PROGS_EXTENDED += $(TEST_GEN_PROGS_EXTENDED_$(UNAME_M)) -LIBKVM += $(LIBKVM_$(UNAME_M)) +TEST_PROGS += $(TEST_PROGS_$(ARCH_DIR)) +TEST_GEN_PROGS += $(TEST_GEN_PROGS_$(ARCH_DIR)) +TEST_GEN_PROGS_EXTENDED += $(TEST_GEN_PROGS_EXTENDED_$(ARCH_DIR)) +LIBKVM += $(LIBKVM_$(ARCH_DIR)) + +# lib.mak defines $(OUTPUT), prepends $(OUTPUT)/ to $(TEST_GEN_PROGS), and most +# importantly defines, i.e. overwrites, $(CC) (unless `make -e` or `make CC=`, +# which causes the environment variable to override the makefile). +include ../lib.mk INSTALL_HDR_PATH = $(top_srcdir)/usr LINUX_HDR_PATH = $(INSTALL_HDR_PATH)/include/ @@ -202,25 +194,23 @@ else LINUX_TOOL_ARCH_INCLUDE = $(top_srcdir)/tools/arch/$(ARCH)/include endif CFLAGS += -Wall -Wstrict-prototypes -Wuninitialized -O2 -g -std=gnu99 \ + -Wno-gnu-variable-sized-type-not-at-end \ + -fno-builtin-memcmp -fno-builtin-memcpy -fno-builtin-memset \ -fno-stack-protector -fno-PIE -I$(LINUX_TOOL_INCLUDE) \ -I$(LINUX_TOOL_ARCH_INCLUDE) -I$(LINUX_HDR_PATH) -Iinclude \ - -I$(<D) -Iinclude/$(UNAME_M) -I ../rseq -I.. $(EXTRA_CFLAGS) \ + -I$(<D) -Iinclude/$(ARCH_DIR) -I ../rseq -I.. $(EXTRA_CFLAGS) \ $(KHDR_INCLUDES) -no-pie-option := $(call try-run, echo 'int main() { return 0; }' | \ - $(CC) -Werror -no-pie -x c - -o "$$TMP", -no-pie) +no-pie-option := $(call try-run, echo 'int main(void) { return 0; }' | \ + $(CC) -Werror $(CFLAGS) -no-pie -x c - -o "$$TMP", -no-pie) # On s390, build the testcases KVM-enabled -pgste-option = $(call try-run, echo 'int main() { return 0; }' | \ +pgste-option = $(call try-run, echo 'int main(void) { return 0; }' | \ $(CC) -Werror -Wl$(comma)--s390-pgste -x c - -o "$$TMP",-Wl$(comma)--s390-pgste) LDLIBS += -ldl LDFLAGS += -pthread $(no-pie-option) $(pgste-option) -# After inclusion, $(OUTPUT) is defined and -# $(TEST_GEN_PROGS) starts with $(OUTPUT)/ -include ../lib.mk - LIBKVM_C := $(filter %.c,$(LIBKVM)) LIBKVM_S := $(filter %.S,$(LIBKVM)) LIBKVM_C_OBJ := $(patsubst %.c, $(OUTPUT)/%.o, $(LIBKVM_C)) diff --git a/tools/testing/selftests/kvm/aarch64/aarch32_id_regs.c b/tools/testing/selftests/kvm/aarch64/aarch32_id_regs.c index 6f9c1f19c7f6..4951ac53d1f8 100644 --- a/tools/testing/selftests/kvm/aarch64/aarch32_id_regs.c +++ b/tools/testing/selftests/kvm/aarch64/aarch32_id_regs.c @@ -13,6 +13,7 @@ #include "kvm_util.h" #include "processor.h" #include "test_util.h" +#include <linux/bitfield.h> #define BAD_ID_REG_VAL 0x1badc0deul @@ -145,7 +146,7 @@ static bool vcpu_aarch64_only(struct kvm_vcpu *vcpu) vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR0_EL1), &val); - el0 = (val & ARM64_FEATURE_MASK(ID_AA64PFR0_EL0)) >> ID_AA64PFR0_EL0_SHIFT; + el0 = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL0), val); return el0 == ID_AA64PFR0_ELx_64BIT_ONLY; } @@ -158,12 +159,9 @@ int main(void) TEST_REQUIRE(vcpu_aarch64_only(vcpu)); - ucall_init(vm, NULL); - test_user_raz_wi(vcpu); test_user_raz_invariant(vcpu); test_guest_raz(vcpu); - ucall_uninit(vm); kvm_vm_free(vm); } diff --git a/tools/testing/selftests/kvm/aarch64/arch_timer.c b/tools/testing/selftests/kvm/aarch64/arch_timer.c index 574eb73f0e90..26556a266021 100644 --- a/tools/testing/selftests/kvm/aarch64/arch_timer.c +++ b/tools/testing/selftests/kvm/aarch64/arch_timer.c @@ -222,7 +222,7 @@ static void *test_vcpu_run(void *arg) /* Currently, any exit from guest is an indication of completion */ pthread_mutex_lock(&vcpu_done_map_lock); - set_bit(vcpu_idx, vcpu_done_map); + __set_bit(vcpu_idx, vcpu_done_map); pthread_mutex_unlock(&vcpu_done_map_lock); switch (get_ucall(vcpu, &uc)) { @@ -375,7 +375,6 @@ static struct kvm_vm *test_vm_create(void) for (i = 0; i < nr_vcpus; i++) vcpu_init_descriptor_tables(vcpus[i]); - ucall_init(vm, NULL); test_init_timer_irq(vm); gic_fd = vgic_v3_setup(vm, nr_vcpus, 64, GICD_BASE_GPA, GICR_BASE_GPA); __TEST_REQUIRE(gic_fd >= 0, "Failed to create vgic-v3"); @@ -414,36 +413,21 @@ static bool parse_args(int argc, char *argv[]) while ((opt = getopt(argc, argv, "hn:i:p:m:")) != -1) { switch (opt) { case 'n': - test_args.nr_vcpus = atoi(optarg); - if (test_args.nr_vcpus <= 0) { - pr_info("Positive value needed for -n\n"); - goto err; - } else if (test_args.nr_vcpus > KVM_MAX_VCPUS) { + test_args.nr_vcpus = atoi_positive("Number of vCPUs", optarg); + if (test_args.nr_vcpus > KVM_MAX_VCPUS) { pr_info("Max allowed vCPUs: %u\n", KVM_MAX_VCPUS); goto err; } break; case 'i': - test_args.nr_iter = atoi(optarg); - if (test_args.nr_iter <= 0) { - pr_info("Positive value needed for -i\n"); - goto err; - } + test_args.nr_iter = atoi_positive("Number of iterations", optarg); break; case 'p': - test_args.timer_period_ms = atoi(optarg); - if (test_args.timer_period_ms <= 0) { - pr_info("Positive value needed for -p\n"); - goto err; - } + test_args.timer_period_ms = atoi_positive("Periodicity", optarg); break; case 'm': - test_args.migration_freq_ms = atoi(optarg); - if (test_args.migration_freq_ms < 0) { - pr_info("0 or positive value needed for -m\n"); - goto err; - } + test_args.migration_freq_ms = atoi_non_negative("Frequency", optarg); break; case 'h': default: @@ -462,9 +446,6 @@ int main(int argc, char *argv[]) { struct kvm_vm *vm; - /* Tell stdout not to buffer its content */ - setbuf(stdout, NULL); - if (!parse_args(argc, argv)) exit(KSFT_SKIP); diff --git a/tools/testing/selftests/kvm/aarch64/debug-exceptions.c b/tools/testing/selftests/kvm/aarch64/debug-exceptions.c index 947bd201435c..637be796086f 100644 --- a/tools/testing/selftests/kvm/aarch64/debug-exceptions.c +++ b/tools/testing/selftests/kvm/aarch64/debug-exceptions.c @@ -2,6 +2,7 @@ #include <test_util.h> #include <kvm_util.h> #include <processor.h> +#include <linux/bitfield.h> #define MDSCR_KDE (1 << 13) #define MDSCR_MDE (1 << 15) @@ -11,17 +12,24 @@ #define DBGBCR_EXEC (0x0 << 3) #define DBGBCR_EL1 (0x1 << 1) #define DBGBCR_E (0x1 << 0) +#define DBGBCR_LBN_SHIFT 16 +#define DBGBCR_BT_SHIFT 20 +#define DBGBCR_BT_ADDR_LINK_CTX (0x1 << DBGBCR_BT_SHIFT) +#define DBGBCR_BT_CTX_LINK (0x3 << DBGBCR_BT_SHIFT) #define DBGWCR_LEN8 (0xff << 5) #define DBGWCR_RD (0x1 << 3) #define DBGWCR_WR (0x2 << 3) #define DBGWCR_EL1 (0x1 << 1) #define DBGWCR_E (0x1 << 0) +#define DBGWCR_LBN_SHIFT 16 +#define DBGWCR_WT_SHIFT 20 +#define DBGWCR_WT_LINK (0x1 << DBGWCR_WT_SHIFT) #define SPSR_D (1 << 9) #define SPSR_SS (1 << 21) -extern unsigned char sw_bp, sw_bp2, hw_bp, hw_bp2, bp_svc, bp_brk, hw_wp, ss_start; +extern unsigned char sw_bp, sw_bp2, hw_bp, hw_bp2, bp_svc, bp_brk, hw_wp, ss_start, hw_bp_ctx; extern unsigned char iter_ss_begin, iter_ss_end; static volatile uint64_t sw_bp_addr, hw_bp_addr; static volatile uint64_t wp_addr, wp_data_addr; @@ -29,8 +37,74 @@ static volatile uint64_t svc_addr; static volatile uint64_t ss_addr[4], ss_idx; #define PC(v) ((uint64_t)&(v)) +#define GEN_DEBUG_WRITE_REG(reg_name) \ +static void write_##reg_name(int num, uint64_t val) \ +{ \ + switch (num) { \ + case 0: \ + write_sysreg(val, reg_name##0_el1); \ + break; \ + case 1: \ + write_sysreg(val, reg_name##1_el1); \ + break; \ + case 2: \ + write_sysreg(val, reg_name##2_el1); \ + break; \ + case 3: \ + write_sysreg(val, reg_name##3_el1); \ + break; \ + case 4: \ + write_sysreg(val, reg_name##4_el1); \ + break; \ + case 5: \ + write_sysreg(val, reg_name##5_el1); \ + break; \ + case 6: \ + write_sysreg(val, reg_name##6_el1); \ + break; \ + case 7: \ + write_sysreg(val, reg_name##7_el1); \ + break; \ + case 8: \ + write_sysreg(val, reg_name##8_el1); \ + break; \ + case 9: \ + write_sysreg(val, reg_name##9_el1); \ + break; \ + case 10: \ + write_sysreg(val, reg_name##10_el1); \ + break; \ + case 11: \ + write_sysreg(val, reg_name##11_el1); \ + break; \ + case 12: \ + write_sysreg(val, reg_name##12_el1); \ + break; \ + case 13: \ + write_sysreg(val, reg_name##13_el1); \ + break; \ + case 14: \ + write_sysreg(val, reg_name##14_el1); \ + break; \ + case 15: \ + write_sysreg(val, reg_name##15_el1); \ + break; \ + default: \ + GUEST_ASSERT(0); \ + } \ +} + +/* Define write_dbgbcr()/write_dbgbvr()/write_dbgwcr()/write_dbgwvr() */ +GEN_DEBUG_WRITE_REG(dbgbcr) +GEN_DEBUG_WRITE_REG(dbgbvr) +GEN_DEBUG_WRITE_REG(dbgwcr) +GEN_DEBUG_WRITE_REG(dbgwvr) + static void reset_debug_state(void) { + uint8_t brps, wrps, i; + uint64_t dfr0; + asm volatile("msr daifset, #8"); write_sysreg(0, osdlr_el1); @@ -38,11 +112,21 @@ static void reset_debug_state(void) isb(); write_sysreg(0, mdscr_el1); - /* This test only uses the first bp and wp slot. */ - write_sysreg(0, dbgbvr0_el1); - write_sysreg(0, dbgbcr0_el1); - write_sysreg(0, dbgwcr0_el1); - write_sysreg(0, dbgwvr0_el1); + write_sysreg(0, contextidr_el1); + + /* Reset all bcr/bvr/wcr/wvr registers */ + dfr0 = read_sysreg(id_aa64dfr0_el1); + brps = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_BRPS), dfr0); + for (i = 0; i <= brps; i++) { + write_dbgbcr(i, 0); + write_dbgbvr(i, 0); + } + wrps = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_WRPS), dfr0); + for (i = 0; i <= wrps; i++) { + write_dbgwcr(i, 0); + write_dbgwvr(i, 0); + } + isb(); } @@ -54,16 +138,10 @@ static void enable_os_lock(void) GUEST_ASSERT(read_sysreg(oslsr_el1) & 2); } -static void install_wp(uint64_t addr) +static void enable_monitor_debug_exceptions(void) { - uint32_t wcr; uint32_t mdscr; - wcr = DBGWCR_LEN8 | DBGWCR_RD | DBGWCR_WR | DBGWCR_EL1 | DBGWCR_E; - write_sysreg(wcr, dbgwcr0_el1); - write_sysreg(addr, dbgwvr0_el1); - isb(); - asm volatile("msr daifclr, #8"); mdscr = read_sysreg(mdscr_el1) | MDSCR_KDE | MDSCR_MDE; @@ -71,21 +149,76 @@ static void install_wp(uint64_t addr) isb(); } -static void install_hw_bp(uint64_t addr) +static void install_wp(uint8_t wpn, uint64_t addr) +{ + uint32_t wcr; + + wcr = DBGWCR_LEN8 | DBGWCR_RD | DBGWCR_WR | DBGWCR_EL1 | DBGWCR_E; + write_dbgwcr(wpn, wcr); + write_dbgwvr(wpn, addr); + + isb(); + + enable_monitor_debug_exceptions(); +} + +static void install_hw_bp(uint8_t bpn, uint64_t addr) { uint32_t bcr; - uint32_t mdscr; bcr = DBGBCR_LEN8 | DBGBCR_EXEC | DBGBCR_EL1 | DBGBCR_E; - write_sysreg(bcr, dbgbcr0_el1); - write_sysreg(addr, dbgbvr0_el1); + write_dbgbcr(bpn, bcr); + write_dbgbvr(bpn, addr); isb(); - asm volatile("msr daifclr, #8"); + enable_monitor_debug_exceptions(); +} - mdscr = read_sysreg(mdscr_el1) | MDSCR_KDE | MDSCR_MDE; - write_sysreg(mdscr, mdscr_el1); +static void install_wp_ctx(uint8_t addr_wp, uint8_t ctx_bp, uint64_t addr, + uint64_t ctx) +{ + uint32_t wcr; + uint64_t ctx_bcr; + + /* Setup a context-aware breakpoint for Linked Context ID Match */ + ctx_bcr = DBGBCR_LEN8 | DBGBCR_EXEC | DBGBCR_EL1 | DBGBCR_E | + DBGBCR_BT_CTX_LINK; + write_dbgbcr(ctx_bp, ctx_bcr); + write_dbgbvr(ctx_bp, ctx); + + /* Setup a linked watchpoint (linked to the context-aware breakpoint) */ + wcr = DBGWCR_LEN8 | DBGWCR_RD | DBGWCR_WR | DBGWCR_EL1 | DBGWCR_E | + DBGWCR_WT_LINK | ((uint32_t)ctx_bp << DBGWCR_LBN_SHIFT); + write_dbgwcr(addr_wp, wcr); + write_dbgwvr(addr_wp, addr); + isb(); + + enable_monitor_debug_exceptions(); +} + +void install_hw_bp_ctx(uint8_t addr_bp, uint8_t ctx_bp, uint64_t addr, + uint64_t ctx) +{ + uint32_t addr_bcr, ctx_bcr; + + /* Setup a context-aware breakpoint for Linked Context ID Match */ + ctx_bcr = DBGBCR_LEN8 | DBGBCR_EXEC | DBGBCR_EL1 | DBGBCR_E | + DBGBCR_BT_CTX_LINK; + write_dbgbcr(ctx_bp, ctx_bcr); + write_dbgbvr(ctx_bp, ctx); + + /* + * Setup a normal breakpoint for Linked Address Match, and link it + * to the context-aware breakpoint. + */ + addr_bcr = DBGBCR_LEN8 | DBGBCR_EXEC | DBGBCR_EL1 | DBGBCR_E | + DBGBCR_BT_ADDR_LINK_CTX | + ((uint32_t)ctx_bp << DBGBCR_LBN_SHIFT); + write_dbgbcr(addr_bp, addr_bcr); + write_dbgbvr(addr_bp, addr); isb(); + + enable_monitor_debug_exceptions(); } static void install_ss(void) @@ -101,52 +234,42 @@ static void install_ss(void) static volatile char write_data; -static void guest_code(void) +static void guest_code(uint8_t bpn, uint8_t wpn, uint8_t ctx_bpn) { - GUEST_SYNC(0); + uint64_t ctx = 0xabcdef; /* a random context number */ /* Software-breakpoint */ reset_debug_state(); asm volatile("sw_bp: brk #0"); GUEST_ASSERT_EQ(sw_bp_addr, PC(sw_bp)); - GUEST_SYNC(1); - /* Hardware-breakpoint */ reset_debug_state(); - install_hw_bp(PC(hw_bp)); + install_hw_bp(bpn, PC(hw_bp)); asm volatile("hw_bp: nop"); GUEST_ASSERT_EQ(hw_bp_addr, PC(hw_bp)); - GUEST_SYNC(2); - /* Hardware-breakpoint + svc */ reset_debug_state(); - install_hw_bp(PC(bp_svc)); + install_hw_bp(bpn, PC(bp_svc)); asm volatile("bp_svc: svc #0"); GUEST_ASSERT_EQ(hw_bp_addr, PC(bp_svc)); GUEST_ASSERT_EQ(svc_addr, PC(bp_svc) + 4); - GUEST_SYNC(3); - /* Hardware-breakpoint + software-breakpoint */ reset_debug_state(); - install_hw_bp(PC(bp_brk)); + install_hw_bp(bpn, PC(bp_brk)); asm volatile("bp_brk: brk #0"); GUEST_ASSERT_EQ(sw_bp_addr, PC(bp_brk)); GUEST_ASSERT_EQ(hw_bp_addr, PC(bp_brk)); - GUEST_SYNC(4); - /* Watchpoint */ reset_debug_state(); - install_wp(PC(write_data)); + install_wp(wpn, PC(write_data)); write_data = 'x'; GUEST_ASSERT_EQ(write_data, 'x'); GUEST_ASSERT_EQ(wp_data_addr, PC(write_data)); - GUEST_SYNC(5); - /* Single-step */ reset_debug_state(); install_ss(); @@ -160,8 +283,6 @@ static void guest_code(void) GUEST_ASSERT_EQ(ss_addr[1], PC(ss_start) + 4); GUEST_ASSERT_EQ(ss_addr[2], PC(ss_start) + 8); - GUEST_SYNC(6); - /* OS Lock does not block software-breakpoint */ reset_debug_state(); enable_os_lock(); @@ -169,30 +290,24 @@ static void guest_code(void) asm volatile("sw_bp2: brk #0"); GUEST_ASSERT_EQ(sw_bp_addr, PC(sw_bp2)); - GUEST_SYNC(7); - /* OS Lock blocking hardware-breakpoint */ reset_debug_state(); enable_os_lock(); - install_hw_bp(PC(hw_bp2)); + install_hw_bp(bpn, PC(hw_bp2)); hw_bp_addr = 0; asm volatile("hw_bp2: nop"); GUEST_ASSERT_EQ(hw_bp_addr, 0); - GUEST_SYNC(8); - /* OS Lock blocking watchpoint */ reset_debug_state(); enable_os_lock(); write_data = '\0'; wp_data_addr = 0; - install_wp(PC(write_data)); + install_wp(wpn, PC(write_data)); write_data = 'x'; GUEST_ASSERT_EQ(write_data, 'x'); GUEST_ASSERT_EQ(wp_data_addr, 0); - GUEST_SYNC(9); - /* OS Lock blocking single-step */ reset_debug_state(); enable_os_lock(); @@ -205,6 +320,27 @@ static void guest_code(void) : : : "x0"); GUEST_ASSERT_EQ(ss_addr[0], 0); + /* Linked hardware-breakpoint */ + hw_bp_addr = 0; + reset_debug_state(); + install_hw_bp_ctx(bpn, ctx_bpn, PC(hw_bp_ctx), ctx); + /* Set context id */ + write_sysreg(ctx, contextidr_el1); + isb(); + asm volatile("hw_bp_ctx: nop"); + write_sysreg(0, contextidr_el1); + GUEST_ASSERT_EQ(hw_bp_addr, PC(hw_bp_ctx)); + + /* Linked watchpoint */ + reset_debug_state(); + install_wp_ctx(wpn, ctx_bpn, PC(write_data), ctx); + /* Set context id */ + write_sysreg(ctx, contextidr_el1); + isb(); + write_data = 'x'; + GUEST_ASSERT_EQ(write_data, 'x'); + GUEST_ASSERT_EQ(wp_data_addr, PC(write_data)); + GUEST_DONE(); } @@ -239,11 +375,6 @@ static void guest_svc_handler(struct ex_regs *regs) svc_addr = regs->pc; } -enum single_step_op { - SINGLE_STEP_ENABLE = 0, - SINGLE_STEP_DISABLE = 1, -}; - static void guest_code_ss(int test_cnt) { uint64_t i; @@ -254,11 +385,19 @@ static void guest_code_ss(int test_cnt) w_bvr = i << 2; w_wvr = i << 2; - /* Enable Single Step execution */ - GUEST_SYNC(SINGLE_STEP_ENABLE); + /* + * Enable Single Step execution. Note! This _must_ be a bare + * ucall as the ucall() path uses atomic operations to manage + * the ucall structures, and the built-in "atomics" are usually + * implemented via exclusive access instructions. The exlusive + * monitor is cleared on ERET, and so taking debug exceptions + * during a LDREX=>STREX sequence will prevent forward progress + * and hang the guest/test. + */ + GUEST_UCALL_NONE(); /* - * The userspace will veriry that the pc is as expected during + * The userspace will verify that the pc is as expected during * single step execution between iter_ss_begin and iter_ss_end. */ asm volatile("iter_ss_begin:nop\n"); @@ -268,34 +407,27 @@ static void guest_code_ss(int test_cnt) bvr = read_sysreg(dbgbvr0_el1); wvr = read_sysreg(dbgwvr0_el1); + /* Userspace disables Single Step when the end is nigh. */ asm volatile("iter_ss_end:\n"); - /* Disable Single Step execution */ - GUEST_SYNC(SINGLE_STEP_DISABLE); - GUEST_ASSERT(bvr == w_bvr); GUEST_ASSERT(wvr == w_wvr); } GUEST_DONE(); } -static int debug_version(struct kvm_vcpu *vcpu) +static int debug_version(uint64_t id_aa64dfr0) { - uint64_t id_aa64dfr0; - - vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64DFR0_EL1), &id_aa64dfr0); - return id_aa64dfr0 & 0xf; + return FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_DEBUGVER), id_aa64dfr0); } -static void test_guest_debug_exceptions(void) +static void test_guest_debug_exceptions(uint8_t bpn, uint8_t wpn, uint8_t ctx_bpn) { struct kvm_vcpu *vcpu; struct kvm_vm *vm; struct ucall uc; - int stage; vm = vm_create_with_one_vcpu(&vcpu, guest_code); - ucall_init(vm, NULL); vm_init_descriptor_tables(vm); vcpu_init_descriptor_tables(vcpu); @@ -311,23 +443,19 @@ static void test_guest_debug_exceptions(void) vm_install_sync_handler(vm, VECTOR_SYNC_CURRENT, ESR_EC_SVC64, guest_svc_handler); - for (stage = 0; stage < 11; stage++) { - vcpu_run(vcpu); - - switch (get_ucall(vcpu, &uc)) { - case UCALL_SYNC: - TEST_ASSERT(uc.args[1] == stage, - "Stage %d: Unexpected sync ucall, got %lx", - stage, (ulong)uc.args[1]); - break; - case UCALL_ABORT: - REPORT_GUEST_ASSERT_2(uc, "values: %#lx, %#lx"); - break; - case UCALL_DONE: - goto done; - default: - TEST_FAIL("Unknown ucall %lu", uc.cmd); - } + /* Specify bpn/wpn/ctx_bpn to be tested */ + vcpu_args_set(vcpu, 3, bpn, wpn, ctx_bpn); + pr_debug("Use bpn#%d, wpn#%d and ctx_bpn#%d\n", bpn, wpn, ctx_bpn); + + vcpu_run(vcpu); + switch (get_ucall(vcpu, &uc)) { + case UCALL_ABORT: + REPORT_GUEST_ASSERT_2(uc, "values: %#lx, %#lx"); + break; + case UCALL_DONE: + goto done; + default: + TEST_FAIL("Unknown ucall %lu", uc.cmd); } done: @@ -346,7 +474,6 @@ void test_single_step_from_userspace(int test_cnt) struct kvm_guest_debug debug = {}; vm = vm_create_with_one_vcpu(&vcpu, guest_code_ss); - ucall_init(vm, NULL); run = vcpu->run; vcpu_args_set(vcpu, 1, test_cnt); @@ -361,18 +488,12 @@ void test_single_step_from_userspace(int test_cnt) break; } - TEST_ASSERT(cmd == UCALL_SYNC, + TEST_ASSERT(cmd == UCALL_NONE, "Unexpected ucall cmd 0x%lx", cmd); - if (uc.args[1] == SINGLE_STEP_ENABLE) { - debug.control = KVM_GUESTDBG_ENABLE | - KVM_GUESTDBG_SINGLESTEP; - ss_enable = true; - } else { - debug.control = SINGLE_STEP_DISABLE; - ss_enable = false; - } - + debug.control = KVM_GUESTDBG_ENABLE | + KVM_GUESTDBG_SINGLESTEP; + ss_enable = true; vcpu_guest_debug_set(vcpu, &debug); continue; } @@ -385,6 +506,14 @@ void test_single_step_from_userspace(int test_cnt) "Unexpected pc 0x%lx (expected 0x%lx)", pc, test_pc); + if ((pc + 4) == (uint64_t)&iter_ss_end) { + test_pc = 0; + debug.control = KVM_GUESTDBG_ENABLE; + ss_enable = false; + vcpu_guest_debug_set(vcpu, &debug); + continue; + } + /* * If the current pc is between iter_ss_bgin and * iter_ss_end, the pc for the next KVM_EXIT_DEBUG should @@ -400,6 +529,43 @@ void test_single_step_from_userspace(int test_cnt) kvm_vm_free(vm); } +/* + * Run debug testing using the various breakpoint#, watchpoint# and + * context-aware breakpoint# with the given ID_AA64DFR0_EL1 configuration. + */ +void test_guest_debug_exceptions_all(uint64_t aa64dfr0) +{ + uint8_t brp_num, wrp_num, ctx_brp_num, normal_brp_num, ctx_brp_base; + int b, w, c; + + /* Number of breakpoints */ + brp_num = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_BRPS), aa64dfr0) + 1; + __TEST_REQUIRE(brp_num >= 2, "At least two breakpoints are required"); + + /* Number of watchpoints */ + wrp_num = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_WRPS), aa64dfr0) + 1; + + /* Number of context aware breakpoints */ + ctx_brp_num = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_CTX_CMPS), aa64dfr0) + 1; + + pr_debug("%s brp_num:%d, wrp_num:%d, ctx_brp_num:%d\n", __func__, + brp_num, wrp_num, ctx_brp_num); + + /* Number of normal (non-context aware) breakpoints */ + normal_brp_num = brp_num - ctx_brp_num; + + /* Lowest context aware breakpoint number */ + ctx_brp_base = normal_brp_num; + + /* Run tests with all supported breakpoints/watchpoints */ + for (c = ctx_brp_base; c < ctx_brp_base + ctx_brp_num; c++) { + for (b = 0; b < normal_brp_num; b++) { + for (w = 0; w < wrp_num; w++) + test_guest_debug_exceptions(b, w, c); + } + } +} + static void help(char *name) { puts(""); @@ -414,16 +580,18 @@ int main(int argc, char *argv[]) struct kvm_vm *vm; int opt; int ss_iteration = 10000; + uint64_t aa64dfr0; vm = vm_create_with_one_vcpu(&vcpu, guest_code); - __TEST_REQUIRE(debug_version(vcpu) >= 6, + vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64DFR0_EL1), &aa64dfr0); + __TEST_REQUIRE(debug_version(aa64dfr0) >= 6, "Armv8 debug architecture not supported."); kvm_vm_free(vm); while ((opt = getopt(argc, argv, "i:")) != -1) { switch (opt) { case 'i': - ss_iteration = atoi(optarg); + ss_iteration = atoi_positive("Number of iterations", optarg); break; case 'h': default: @@ -432,7 +600,7 @@ int main(int argc, char *argv[]) } } - test_guest_debug_exceptions(); + test_guest_debug_exceptions_all(aa64dfr0); test_single_step_from_userspace(ss_iteration); return 0; diff --git a/tools/testing/selftests/kvm/aarch64/hypercalls.c b/tools/testing/selftests/kvm/aarch64/hypercalls.c index a39da3fe4952..bef1499fb465 100644 --- a/tools/testing/selftests/kvm/aarch64/hypercalls.c +++ b/tools/testing/selftests/kvm/aarch64/hypercalls.c @@ -236,7 +236,6 @@ static struct kvm_vm *test_vm_create(struct kvm_vcpu **vcpu) vm = vm_create_with_one_vcpu(vcpu, guest_code); - ucall_init(vm, NULL); steal_time_init(*vcpu); return vm; @@ -306,8 +305,6 @@ static void test_run(void) int main(void) { - setbuf(stdout, NULL); - test_run(); return 0; } diff --git a/tools/testing/selftests/kvm/aarch64/page_fault_test.c b/tools/testing/selftests/kvm/aarch64/page_fault_test.c new file mode 100644 index 000000000000..54680dc5887f --- /dev/null +++ b/tools/testing/selftests/kvm/aarch64/page_fault_test.c @@ -0,0 +1,1136 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * page_fault_test.c - Test stage 2 faults. + * + * This test tries different combinations of guest accesses (e.g., write, + * S1PTW), backing source type (e.g., anon) and types of faults (e.g., read on + * hugetlbfs with a hole). It checks that the expected handling method is + * called (e.g., uffd faults with the right address and write/read flag). + */ + +#define _GNU_SOURCE +#include <linux/bitmap.h> +#include <fcntl.h> +#include <test_util.h> +#include <kvm_util.h> +#include <processor.h> +#include <asm/sysreg.h> +#include <linux/bitfield.h> +#include "guest_modes.h" +#include "userfaultfd_util.h" + +/* Guest virtual addresses that point to the test page and its PTE. */ +#define TEST_GVA 0xc0000000 +#define TEST_EXEC_GVA (TEST_GVA + 0x8) +#define TEST_PTE_GVA 0xb0000000 +#define TEST_DATA 0x0123456789ABCDEF + +static uint64_t *guest_test_memory = (uint64_t *)TEST_GVA; + +#define CMD_NONE (0) +#define CMD_SKIP_TEST (1ULL << 1) +#define CMD_HOLE_PT (1ULL << 2) +#define CMD_HOLE_DATA (1ULL << 3) +#define CMD_CHECK_WRITE_IN_DIRTY_LOG (1ULL << 4) +#define CMD_CHECK_S1PTW_WR_IN_DIRTY_LOG (1ULL << 5) +#define CMD_CHECK_NO_WRITE_IN_DIRTY_LOG (1ULL << 6) +#define CMD_CHECK_NO_S1PTW_WR_IN_DIRTY_LOG (1ULL << 7) +#define CMD_SET_PTE_AF (1ULL << 8) + +#define PREPARE_FN_NR 10 +#define CHECK_FN_NR 10 + +static struct event_cnt { + int mmio_exits; + int fail_vcpu_runs; + int uffd_faults; + /* uffd_faults is incremented from multiple threads. */ + pthread_mutex_t uffd_faults_mutex; +} events; + +struct test_desc { + const char *name; + uint64_t mem_mark_cmd; + /* Skip the test if any prepare function returns false */ + bool (*guest_prepare[PREPARE_FN_NR])(void); + void (*guest_test)(void); + void (*guest_test_check[CHECK_FN_NR])(void); + uffd_handler_t uffd_pt_handler; + uffd_handler_t uffd_data_handler; + void (*dabt_handler)(struct ex_regs *regs); + void (*iabt_handler)(struct ex_regs *regs); + void (*mmio_handler)(struct kvm_vm *vm, struct kvm_run *run); + void (*fail_vcpu_run_handler)(int ret); + uint32_t pt_memslot_flags; + uint32_t data_memslot_flags; + bool skip; + struct event_cnt expected_events; +}; + +struct test_params { + enum vm_mem_backing_src_type src_type; + struct test_desc *test_desc; +}; + +static inline void flush_tlb_page(uint64_t vaddr) +{ + uint64_t page = vaddr >> 12; + + dsb(ishst); + asm volatile("tlbi vaae1is, %0" :: "r" (page)); + dsb(ish); + isb(); +} + +static void guest_write64(void) +{ + uint64_t val; + + WRITE_ONCE(*guest_test_memory, TEST_DATA); + val = READ_ONCE(*guest_test_memory); + GUEST_ASSERT_EQ(val, TEST_DATA); +} + +/* Check the system for atomic instructions. */ +static bool guest_check_lse(void) +{ + uint64_t isar0 = read_sysreg(id_aa64isar0_el1); + uint64_t atomic; + + atomic = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64ISAR0_ATOMICS), isar0); + return atomic >= 2; +} + +static bool guest_check_dc_zva(void) +{ + uint64_t dczid = read_sysreg(dczid_el0); + uint64_t dzp = FIELD_GET(ARM64_FEATURE_MASK(DCZID_DZP), dczid); + + return dzp == 0; +} + +/* Compare and swap instruction. */ +static void guest_cas(void) +{ + uint64_t val; + + GUEST_ASSERT(guest_check_lse()); + asm volatile(".arch_extension lse\n" + "casal %0, %1, [%2]\n" + :: "r" (0ul), "r" (TEST_DATA), "r" (guest_test_memory)); + val = READ_ONCE(*guest_test_memory); + GUEST_ASSERT_EQ(val, TEST_DATA); +} + +static void guest_read64(void) +{ + uint64_t val; + + val = READ_ONCE(*guest_test_memory); + GUEST_ASSERT_EQ(val, 0); +} + +/* Address translation instruction */ +static void guest_at(void) +{ + uint64_t par; + + asm volatile("at s1e1r, %0" :: "r" (guest_test_memory)); + par = read_sysreg(par_el1); + isb(); + + /* Bit 1 indicates whether the AT was successful */ + GUEST_ASSERT_EQ(par & 1, 0); +} + +/* + * The size of the block written by "dc zva" is guaranteed to be between (2 << + * 0) and (2 << 9), which is safe in our case as we need the write to happen + * for at least a word, and not more than a page. + */ +static void guest_dc_zva(void) +{ + uint16_t val; + + asm volatile("dc zva, %0" :: "r" (guest_test_memory)); + dsb(ish); + val = READ_ONCE(*guest_test_memory); + GUEST_ASSERT_EQ(val, 0); +} + +/* + * Pre-indexing loads and stores don't have a valid syndrome (ESR_EL2.ISV==0). + * And that's special because KVM must take special care with those: they + * should still count as accesses for dirty logging or user-faulting, but + * should be handled differently on mmio. + */ +static void guest_ld_preidx(void) +{ + uint64_t val; + uint64_t addr = TEST_GVA - 8; + + /* + * This ends up accessing "TEST_GVA + 8 - 8", where "TEST_GVA - 8" is + * in a gap between memslots not backing by anything. + */ + asm volatile("ldr %0, [%1, #8]!" + : "=r" (val), "+r" (addr)); + GUEST_ASSERT_EQ(val, 0); + GUEST_ASSERT_EQ(addr, TEST_GVA); +} + +static void guest_st_preidx(void) +{ + uint64_t val = TEST_DATA; + uint64_t addr = TEST_GVA - 8; + + asm volatile("str %0, [%1, #8]!" + : "+r" (val), "+r" (addr)); + + GUEST_ASSERT_EQ(addr, TEST_GVA); + val = READ_ONCE(*guest_test_memory); +} + +static bool guest_set_ha(void) +{ + uint64_t mmfr1 = read_sysreg(id_aa64mmfr1_el1); + uint64_t hadbs, tcr; + + /* Skip if HA is not supported. */ + hadbs = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64MMFR1_HADBS), mmfr1); + if (hadbs == 0) + return false; + + tcr = read_sysreg(tcr_el1) | TCR_EL1_HA; + write_sysreg(tcr, tcr_el1); + isb(); + + return true; +} + +static bool guest_clear_pte_af(void) +{ + *((uint64_t *)TEST_PTE_GVA) &= ~PTE_AF; + flush_tlb_page(TEST_GVA); + + return true; +} + +static void guest_check_pte_af(void) +{ + dsb(ish); + GUEST_ASSERT_EQ(*((uint64_t *)TEST_PTE_GVA) & PTE_AF, PTE_AF); +} + +static void guest_check_write_in_dirty_log(void) +{ + GUEST_SYNC(CMD_CHECK_WRITE_IN_DIRTY_LOG); +} + +static void guest_check_no_write_in_dirty_log(void) +{ + GUEST_SYNC(CMD_CHECK_NO_WRITE_IN_DIRTY_LOG); +} + +static void guest_check_s1ptw_wr_in_dirty_log(void) +{ + GUEST_SYNC(CMD_CHECK_S1PTW_WR_IN_DIRTY_LOG); +} + +static void guest_check_no_s1ptw_wr_in_dirty_log(void) +{ + GUEST_SYNC(CMD_CHECK_NO_S1PTW_WR_IN_DIRTY_LOG); +} + +static void guest_exec(void) +{ + int (*code)(void) = (int (*)(void))TEST_EXEC_GVA; + int ret; + + ret = code(); + GUEST_ASSERT_EQ(ret, 0x77); +} + +static bool guest_prepare(struct test_desc *test) +{ + bool (*prepare_fn)(void); + int i; + + for (i = 0; i < PREPARE_FN_NR; i++) { + prepare_fn = test->guest_prepare[i]; + if (prepare_fn && !prepare_fn()) + return false; + } + + return true; +} + +static void guest_test_check(struct test_desc *test) +{ + void (*check_fn)(void); + int i; + + for (i = 0; i < CHECK_FN_NR; i++) { + check_fn = test->guest_test_check[i]; + if (check_fn) + check_fn(); + } +} + +static void guest_code(struct test_desc *test) +{ + if (!guest_prepare(test)) + GUEST_SYNC(CMD_SKIP_TEST); + + GUEST_SYNC(test->mem_mark_cmd); + + if (test->guest_test) + test->guest_test(); + + guest_test_check(test); + GUEST_DONE(); +} + +static void no_dabt_handler(struct ex_regs *regs) +{ + GUEST_ASSERT_1(false, read_sysreg(far_el1)); +} + +static void no_iabt_handler(struct ex_regs *regs) +{ + GUEST_ASSERT_1(false, regs->pc); +} + +static struct uffd_args { + char *copy; + void *hva; + uint64_t paging_size; +} pt_args, data_args; + +/* Returns true to continue the test, and false if it should be skipped. */ +static int uffd_generic_handler(int uffd_mode, int uffd, struct uffd_msg *msg, + struct uffd_args *args) +{ + uint64_t addr = msg->arg.pagefault.address; + uint64_t flags = msg->arg.pagefault.flags; + struct uffdio_copy copy; + int ret; + + TEST_ASSERT(uffd_mode == UFFDIO_REGISTER_MODE_MISSING, + "The only expected UFFD mode is MISSING"); + ASSERT_EQ(addr, (uint64_t)args->hva); + + pr_debug("uffd fault: addr=%p write=%d\n", + (void *)addr, !!(flags & UFFD_PAGEFAULT_FLAG_WRITE)); + + copy.src = (uint64_t)args->copy; + copy.dst = addr; + copy.len = args->paging_size; + copy.mode = 0; + + ret = ioctl(uffd, UFFDIO_COPY, ©); + if (ret == -1) { + pr_info("Failed UFFDIO_COPY in 0x%lx with errno: %d\n", + addr, errno); + return ret; + } + + pthread_mutex_lock(&events.uffd_faults_mutex); + events.uffd_faults += 1; + pthread_mutex_unlock(&events.uffd_faults_mutex); + return 0; +} + +static int uffd_pt_handler(int mode, int uffd, struct uffd_msg *msg) +{ + return uffd_generic_handler(mode, uffd, msg, &pt_args); +} + +static int uffd_data_handler(int mode, int uffd, struct uffd_msg *msg) +{ + return uffd_generic_handler(mode, uffd, msg, &data_args); +} + +static void setup_uffd_args(struct userspace_mem_region *region, + struct uffd_args *args) +{ + args->hva = (void *)region->region.userspace_addr; + args->paging_size = region->region.memory_size; + + args->copy = malloc(args->paging_size); + TEST_ASSERT(args->copy, "Failed to allocate data copy."); + memcpy(args->copy, args->hva, args->paging_size); +} + +static void setup_uffd(struct kvm_vm *vm, struct test_params *p, + struct uffd_desc **pt_uffd, struct uffd_desc **data_uffd) +{ + struct test_desc *test = p->test_desc; + int uffd_mode = UFFDIO_REGISTER_MODE_MISSING; + + setup_uffd_args(vm_get_mem_region(vm, MEM_REGION_PT), &pt_args); + setup_uffd_args(vm_get_mem_region(vm, MEM_REGION_TEST_DATA), &data_args); + + *pt_uffd = NULL; + if (test->uffd_pt_handler) + *pt_uffd = uffd_setup_demand_paging(uffd_mode, 0, + pt_args.hva, + pt_args.paging_size, + test->uffd_pt_handler); + + *data_uffd = NULL; + if (test->uffd_data_handler) + *data_uffd = uffd_setup_demand_paging(uffd_mode, 0, + data_args.hva, + data_args.paging_size, + test->uffd_data_handler); +} + +static void free_uffd(struct test_desc *test, struct uffd_desc *pt_uffd, + struct uffd_desc *data_uffd) +{ + if (test->uffd_pt_handler) + uffd_stop_demand_paging(pt_uffd); + if (test->uffd_data_handler) + uffd_stop_demand_paging(data_uffd); + + free(pt_args.copy); + free(data_args.copy); +} + +static int uffd_no_handler(int mode, int uffd, struct uffd_msg *msg) +{ + TEST_FAIL("There was no UFFD fault expected."); + return -1; +} + +/* Returns false if the test should be skipped. */ +static bool punch_hole_in_backing_store(struct kvm_vm *vm, + struct userspace_mem_region *region) +{ + void *hva = (void *)region->region.userspace_addr; + uint64_t paging_size = region->region.memory_size; + int ret, fd = region->fd; + + if (fd != -1) { + ret = fallocate(fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, + 0, paging_size); + TEST_ASSERT(ret == 0, "fallocate failed\n"); + } else { + ret = madvise(hva, paging_size, MADV_DONTNEED); + TEST_ASSERT(ret == 0, "madvise failed\n"); + } + + return true; +} + +static void mmio_on_test_gpa_handler(struct kvm_vm *vm, struct kvm_run *run) +{ + struct userspace_mem_region *region; + void *hva; + + region = vm_get_mem_region(vm, MEM_REGION_TEST_DATA); + hva = (void *)region->region.userspace_addr; + + ASSERT_EQ(run->mmio.phys_addr, region->region.guest_phys_addr); + + memcpy(hva, run->mmio.data, run->mmio.len); + events.mmio_exits += 1; +} + +static void mmio_no_handler(struct kvm_vm *vm, struct kvm_run *run) +{ + uint64_t data; + + memcpy(&data, run->mmio.data, sizeof(data)); + pr_debug("addr=%lld len=%d w=%d data=%lx\n", + run->mmio.phys_addr, run->mmio.len, + run->mmio.is_write, data); + TEST_FAIL("There was no MMIO exit expected."); +} + +static bool check_write_in_dirty_log(struct kvm_vm *vm, + struct userspace_mem_region *region, + uint64_t host_pg_nr) +{ + unsigned long *bmap; + bool first_page_dirty; + uint64_t size = region->region.memory_size; + + /* getpage_size() is not always equal to vm->page_size */ + bmap = bitmap_zalloc(size / getpagesize()); + kvm_vm_get_dirty_log(vm, region->region.slot, bmap); + first_page_dirty = test_bit(host_pg_nr, bmap); + free(bmap); + return first_page_dirty; +} + +/* Returns true to continue the test, and false if it should be skipped. */ +static bool handle_cmd(struct kvm_vm *vm, int cmd) +{ + struct userspace_mem_region *data_region, *pt_region; + bool continue_test = true; + uint64_t pte_gpa, pte_pg; + + data_region = vm_get_mem_region(vm, MEM_REGION_TEST_DATA); + pt_region = vm_get_mem_region(vm, MEM_REGION_PT); + pte_gpa = addr_hva2gpa(vm, virt_get_pte_hva(vm, TEST_GVA)); + pte_pg = (pte_gpa - pt_region->region.guest_phys_addr) / getpagesize(); + + if (cmd == CMD_SKIP_TEST) + continue_test = false; + + if (cmd & CMD_HOLE_PT) + continue_test = punch_hole_in_backing_store(vm, pt_region); + if (cmd & CMD_HOLE_DATA) + continue_test = punch_hole_in_backing_store(vm, data_region); + if (cmd & CMD_CHECK_WRITE_IN_DIRTY_LOG) + TEST_ASSERT(check_write_in_dirty_log(vm, data_region, 0), + "Missing write in dirty log"); + if (cmd & CMD_CHECK_S1PTW_WR_IN_DIRTY_LOG) + TEST_ASSERT(check_write_in_dirty_log(vm, pt_region, pte_pg), + "Missing s1ptw write in dirty log"); + if (cmd & CMD_CHECK_NO_WRITE_IN_DIRTY_LOG) + TEST_ASSERT(!check_write_in_dirty_log(vm, data_region, 0), + "Unexpected write in dirty log"); + if (cmd & CMD_CHECK_NO_S1PTW_WR_IN_DIRTY_LOG) + TEST_ASSERT(!check_write_in_dirty_log(vm, pt_region, pte_pg), + "Unexpected s1ptw write in dirty log"); + + return continue_test; +} + +void fail_vcpu_run_no_handler(int ret) +{ + TEST_FAIL("Unexpected vcpu run failure\n"); +} + +void fail_vcpu_run_mmio_no_syndrome_handler(int ret) +{ + TEST_ASSERT(errno == ENOSYS, + "The mmio handler should have returned not implemented."); + events.fail_vcpu_runs += 1; +} + +typedef uint32_t aarch64_insn_t; +extern aarch64_insn_t __exec_test[2]; + +noinline void __return_0x77(void) +{ + asm volatile("__exec_test: mov x0, #0x77\n" + "ret\n"); +} + +/* + * Note that this function runs on the host before the test VM starts: there's + * no need to sync the D$ and I$ caches. + */ +static void load_exec_code_for_test(struct kvm_vm *vm) +{ + uint64_t *code; + struct userspace_mem_region *region; + void *hva; + + region = vm_get_mem_region(vm, MEM_REGION_TEST_DATA); + hva = (void *)region->region.userspace_addr; + + assert(TEST_EXEC_GVA > TEST_GVA); + code = hva + TEST_EXEC_GVA - TEST_GVA; + memcpy(code, __exec_test, sizeof(__exec_test)); +} + +static void setup_abort_handlers(struct kvm_vm *vm, struct kvm_vcpu *vcpu, + struct test_desc *test) +{ + vm_init_descriptor_tables(vm); + vcpu_init_descriptor_tables(vcpu); + + vm_install_sync_handler(vm, VECTOR_SYNC_CURRENT, + ESR_EC_DABT, no_dabt_handler); + vm_install_sync_handler(vm, VECTOR_SYNC_CURRENT, + ESR_EC_IABT, no_iabt_handler); +} + +static void setup_gva_maps(struct kvm_vm *vm) +{ + struct userspace_mem_region *region; + uint64_t pte_gpa; + + region = vm_get_mem_region(vm, MEM_REGION_TEST_DATA); + /* Map TEST_GVA first. This will install a new PTE. */ + virt_pg_map(vm, TEST_GVA, region->region.guest_phys_addr); + /* Then map TEST_PTE_GVA to the above PTE. */ + pte_gpa = addr_hva2gpa(vm, virt_get_pte_hva(vm, TEST_GVA)); + virt_pg_map(vm, TEST_PTE_GVA, pte_gpa); +} + +enum pf_test_memslots { + CODE_AND_DATA_MEMSLOT, + PAGE_TABLE_MEMSLOT, + TEST_DATA_MEMSLOT, +}; + +/* + * Create a memslot for code and data at pfn=0, and test-data and PT ones + * at max_gfn. + */ +static void setup_memslots(struct kvm_vm *vm, struct test_params *p) +{ + uint64_t backing_src_pagesz = get_backing_src_pagesz(p->src_type); + uint64_t guest_page_size = vm->page_size; + uint64_t max_gfn = vm_compute_max_gfn(vm); + /* Enough for 2M of code when using 4K guest pages. */ + uint64_t code_npages = 512; + uint64_t pt_size, data_size, data_gpa; + + /* + * This test requires 1 pgd, 2 pud, 4 pmd, and 6 pte pages when using + * VM_MODE_P48V48_4K. Note that the .text takes ~1.6MBs. That's 13 + * pages. VM_MODE_P48V48_4K is the mode with most PT pages; let's use + * twice that just in case. + */ + pt_size = 26 * guest_page_size; + + /* memslot sizes and gpa's must be aligned to the backing page size */ + pt_size = align_up(pt_size, backing_src_pagesz); + data_size = align_up(guest_page_size, backing_src_pagesz); + data_gpa = (max_gfn * guest_page_size) - data_size; + data_gpa = align_down(data_gpa, backing_src_pagesz); + + vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, 0, + CODE_AND_DATA_MEMSLOT, code_npages, 0); + vm->memslots[MEM_REGION_CODE] = CODE_AND_DATA_MEMSLOT; + vm->memslots[MEM_REGION_DATA] = CODE_AND_DATA_MEMSLOT; + + vm_userspace_mem_region_add(vm, p->src_type, data_gpa - pt_size, + PAGE_TABLE_MEMSLOT, pt_size / guest_page_size, + p->test_desc->pt_memslot_flags); + vm->memslots[MEM_REGION_PT] = PAGE_TABLE_MEMSLOT; + + vm_userspace_mem_region_add(vm, p->src_type, data_gpa, TEST_DATA_MEMSLOT, + data_size / guest_page_size, + p->test_desc->data_memslot_flags); + vm->memslots[MEM_REGION_TEST_DATA] = TEST_DATA_MEMSLOT; +} + +static void setup_ucall(struct kvm_vm *vm) +{ + struct userspace_mem_region *region = vm_get_mem_region(vm, MEM_REGION_TEST_DATA); + + ucall_init(vm, region->region.guest_phys_addr + region->region.memory_size); +} + +static void setup_default_handlers(struct test_desc *test) +{ + if (!test->mmio_handler) + test->mmio_handler = mmio_no_handler; + + if (!test->fail_vcpu_run_handler) + test->fail_vcpu_run_handler = fail_vcpu_run_no_handler; +} + +static void check_event_counts(struct test_desc *test) +{ + ASSERT_EQ(test->expected_events.uffd_faults, events.uffd_faults); + ASSERT_EQ(test->expected_events.mmio_exits, events.mmio_exits); + ASSERT_EQ(test->expected_events.fail_vcpu_runs, events.fail_vcpu_runs); +} + +static void print_test_banner(enum vm_guest_mode mode, struct test_params *p) +{ + struct test_desc *test = p->test_desc; + + pr_debug("Test: %s\n", test->name); + pr_debug("Testing guest mode: %s\n", vm_guest_mode_string(mode)); + pr_debug("Testing memory backing src type: %s\n", + vm_mem_backing_src_alias(p->src_type)->name); +} + +static void reset_event_counts(void) +{ + memset(&events, 0, sizeof(events)); +} + +/* + * This function either succeeds, skips the test (after setting test->skip), or + * fails with a TEST_FAIL that aborts all tests. + */ +static void vcpu_run_loop(struct kvm_vm *vm, struct kvm_vcpu *vcpu, + struct test_desc *test) +{ + struct kvm_run *run; + struct ucall uc; + int ret; + + run = vcpu->run; + + for (;;) { + ret = _vcpu_run(vcpu); + if (ret) { + test->fail_vcpu_run_handler(ret); + goto done; + } + + switch (get_ucall(vcpu, &uc)) { + case UCALL_SYNC: + if (!handle_cmd(vm, uc.args[1])) { + test->skip = true; + goto done; + } + break; + case UCALL_ABORT: + REPORT_GUEST_ASSERT_2(uc, "values: %#lx, %#lx"); + break; + case UCALL_DONE: + goto done; + case UCALL_NONE: + if (run->exit_reason == KVM_EXIT_MMIO) + test->mmio_handler(vm, run); + break; + default: + TEST_FAIL("Unknown ucall %lu", uc.cmd); + } + } + +done: + pr_debug(test->skip ? "Skipped.\n" : "Done.\n"); +} + +static void run_test(enum vm_guest_mode mode, void *arg) +{ + struct test_params *p = (struct test_params *)arg; + struct test_desc *test = p->test_desc; + struct kvm_vm *vm; + struct kvm_vcpu *vcpu; + struct uffd_desc *pt_uffd, *data_uffd; + + print_test_banner(mode, p); + + vm = ____vm_create(mode); + setup_memslots(vm, p); + kvm_vm_elf_load(vm, program_invocation_name); + setup_ucall(vm); + vcpu = vm_vcpu_add(vm, 0, guest_code); + + setup_gva_maps(vm); + + reset_event_counts(); + + /* + * Set some code in the data memslot for the guest to execute (only + * applicable to the EXEC tests). This has to be done before + * setup_uffd() as that function copies the memslot data for the uffd + * handler. + */ + load_exec_code_for_test(vm); + setup_uffd(vm, p, &pt_uffd, &data_uffd); + setup_abort_handlers(vm, vcpu, test); + setup_default_handlers(test); + vcpu_args_set(vcpu, 1, test); + + vcpu_run_loop(vm, vcpu, test); + + kvm_vm_free(vm); + free_uffd(test, pt_uffd, data_uffd); + + /* + * Make sure we check the events after the uffd threads have exited, + * which means they updated their respective event counters. + */ + if (!test->skip) + check_event_counts(test); +} + +static void help(char *name) +{ + puts(""); + printf("usage: %s [-h] [-s mem-type]\n", name); + puts(""); + guest_modes_help(); + backing_src_help("-s"); + puts(""); +} + +#define SNAME(s) #s +#define SCAT2(a, b) SNAME(a ## _ ## b) +#define SCAT3(a, b, c) SCAT2(a, SCAT2(b, c)) +#define SCAT4(a, b, c, d) SCAT2(a, SCAT3(b, c, d)) + +#define _CHECK(_test) _CHECK_##_test +#define _PREPARE(_test) _PREPARE_##_test +#define _PREPARE_guest_read64 NULL +#define _PREPARE_guest_ld_preidx NULL +#define _PREPARE_guest_write64 NULL +#define _PREPARE_guest_st_preidx NULL +#define _PREPARE_guest_exec NULL +#define _PREPARE_guest_at NULL +#define _PREPARE_guest_dc_zva guest_check_dc_zva +#define _PREPARE_guest_cas guest_check_lse + +/* With or without access flag checks */ +#define _PREPARE_with_af guest_set_ha, guest_clear_pte_af +#define _PREPARE_no_af NULL +#define _CHECK_with_af guest_check_pte_af +#define _CHECK_no_af NULL + +/* Performs an access and checks that no faults were triggered. */ +#define TEST_ACCESS(_access, _with_af, _mark_cmd) \ +{ \ + .name = SCAT3(_access, _with_af, #_mark_cmd), \ + .guest_prepare = { _PREPARE(_with_af), \ + _PREPARE(_access) }, \ + .mem_mark_cmd = _mark_cmd, \ + .guest_test = _access, \ + .guest_test_check = { _CHECK(_with_af) }, \ + .expected_events = { 0 }, \ +} + +#define TEST_UFFD(_access, _with_af, _mark_cmd, \ + _uffd_data_handler, _uffd_pt_handler, _uffd_faults) \ +{ \ + .name = SCAT4(uffd, _access, _with_af, #_mark_cmd), \ + .guest_prepare = { _PREPARE(_with_af), \ + _PREPARE(_access) }, \ + .guest_test = _access, \ + .mem_mark_cmd = _mark_cmd, \ + .guest_test_check = { _CHECK(_with_af) }, \ + .uffd_data_handler = _uffd_data_handler, \ + .uffd_pt_handler = _uffd_pt_handler, \ + .expected_events = { .uffd_faults = _uffd_faults, }, \ +} + +#define TEST_DIRTY_LOG(_access, _with_af, _test_check, _pt_check) \ +{ \ + .name = SCAT3(dirty_log, _access, _with_af), \ + .data_memslot_flags = KVM_MEM_LOG_DIRTY_PAGES, \ + .pt_memslot_flags = KVM_MEM_LOG_DIRTY_PAGES, \ + .guest_prepare = { _PREPARE(_with_af), \ + _PREPARE(_access) }, \ + .guest_test = _access, \ + .guest_test_check = { _CHECK(_with_af), _test_check, _pt_check }, \ + .expected_events = { 0 }, \ +} + +#define TEST_UFFD_AND_DIRTY_LOG(_access, _with_af, _uffd_data_handler, \ + _uffd_faults, _test_check, _pt_check) \ +{ \ + .name = SCAT3(uffd_and_dirty_log, _access, _with_af), \ + .data_memslot_flags = KVM_MEM_LOG_DIRTY_PAGES, \ + .pt_memslot_flags = KVM_MEM_LOG_DIRTY_PAGES, \ + .guest_prepare = { _PREPARE(_with_af), \ + _PREPARE(_access) }, \ + .guest_test = _access, \ + .mem_mark_cmd = CMD_HOLE_DATA | CMD_HOLE_PT, \ + .guest_test_check = { _CHECK(_with_af), _test_check, _pt_check }, \ + .uffd_data_handler = _uffd_data_handler, \ + .uffd_pt_handler = uffd_pt_handler, \ + .expected_events = { .uffd_faults = _uffd_faults, }, \ +} + +#define TEST_RO_MEMSLOT(_access, _mmio_handler, _mmio_exits) \ +{ \ + .name = SCAT2(ro_memslot, _access), \ + .data_memslot_flags = KVM_MEM_READONLY, \ + .pt_memslot_flags = KVM_MEM_READONLY, \ + .guest_prepare = { _PREPARE(_access) }, \ + .guest_test = _access, \ + .mmio_handler = _mmio_handler, \ + .expected_events = { .mmio_exits = _mmio_exits }, \ +} + +#define TEST_RO_MEMSLOT_NO_SYNDROME(_access) \ +{ \ + .name = SCAT2(ro_memslot_no_syndrome, _access), \ + .data_memslot_flags = KVM_MEM_READONLY, \ + .pt_memslot_flags = KVM_MEM_READONLY, \ + .guest_test = _access, \ + .fail_vcpu_run_handler = fail_vcpu_run_mmio_no_syndrome_handler, \ + .expected_events = { .fail_vcpu_runs = 1 }, \ +} + +#define TEST_RO_MEMSLOT_AND_DIRTY_LOG(_access, _mmio_handler, _mmio_exits, \ + _test_check) \ +{ \ + .name = SCAT2(ro_memslot, _access), \ + .data_memslot_flags = KVM_MEM_READONLY | KVM_MEM_LOG_DIRTY_PAGES, \ + .pt_memslot_flags = KVM_MEM_READONLY | KVM_MEM_LOG_DIRTY_PAGES, \ + .guest_prepare = { _PREPARE(_access) }, \ + .guest_test = _access, \ + .guest_test_check = { _test_check }, \ + .mmio_handler = _mmio_handler, \ + .expected_events = { .mmio_exits = _mmio_exits}, \ +} + +#define TEST_RO_MEMSLOT_NO_SYNDROME_AND_DIRTY_LOG(_access, _test_check) \ +{ \ + .name = SCAT2(ro_memslot_no_syn_and_dlog, _access), \ + .data_memslot_flags = KVM_MEM_READONLY | KVM_MEM_LOG_DIRTY_PAGES, \ + .pt_memslot_flags = KVM_MEM_READONLY | KVM_MEM_LOG_DIRTY_PAGES, \ + .guest_test = _access, \ + .guest_test_check = { _test_check }, \ + .fail_vcpu_run_handler = fail_vcpu_run_mmio_no_syndrome_handler, \ + .expected_events = { .fail_vcpu_runs = 1 }, \ +} + +#define TEST_RO_MEMSLOT_AND_UFFD(_access, _mmio_handler, _mmio_exits, \ + _uffd_data_handler, _uffd_faults) \ +{ \ + .name = SCAT2(ro_memslot_uffd, _access), \ + .data_memslot_flags = KVM_MEM_READONLY, \ + .pt_memslot_flags = KVM_MEM_READONLY, \ + .mem_mark_cmd = CMD_HOLE_DATA | CMD_HOLE_PT, \ + .guest_prepare = { _PREPARE(_access) }, \ + .guest_test = _access, \ + .uffd_data_handler = _uffd_data_handler, \ + .uffd_pt_handler = uffd_pt_handler, \ + .mmio_handler = _mmio_handler, \ + .expected_events = { .mmio_exits = _mmio_exits, \ + .uffd_faults = _uffd_faults }, \ +} + +#define TEST_RO_MEMSLOT_NO_SYNDROME_AND_UFFD(_access, _uffd_data_handler, \ + _uffd_faults) \ +{ \ + .name = SCAT2(ro_memslot_no_syndrome, _access), \ + .data_memslot_flags = KVM_MEM_READONLY, \ + .pt_memslot_flags = KVM_MEM_READONLY, \ + .mem_mark_cmd = CMD_HOLE_DATA | CMD_HOLE_PT, \ + .guest_test = _access, \ + .uffd_data_handler = _uffd_data_handler, \ + .uffd_pt_handler = uffd_pt_handler, \ + .fail_vcpu_run_handler = fail_vcpu_run_mmio_no_syndrome_handler, \ + .expected_events = { .fail_vcpu_runs = 1, \ + .uffd_faults = _uffd_faults }, \ +} + +static struct test_desc tests[] = { + + /* Check that HW is setting the Access Flag (AF) (sanity checks). */ + TEST_ACCESS(guest_read64, with_af, CMD_NONE), + TEST_ACCESS(guest_ld_preidx, with_af, CMD_NONE), + TEST_ACCESS(guest_cas, with_af, CMD_NONE), + TEST_ACCESS(guest_write64, with_af, CMD_NONE), + TEST_ACCESS(guest_st_preidx, with_af, CMD_NONE), + TEST_ACCESS(guest_dc_zva, with_af, CMD_NONE), + TEST_ACCESS(guest_exec, with_af, CMD_NONE), + + /* + * Punch a hole in the data backing store, and then try multiple + * accesses: reads should rturn zeroes, and writes should + * re-populate the page. Moreover, the test also check that no + * exception was generated in the guest. Note that this + * reading/writing behavior is the same as reading/writing a + * punched page (with fallocate(FALLOC_FL_PUNCH_HOLE)) from + * userspace. + */ + TEST_ACCESS(guest_read64, no_af, CMD_HOLE_DATA), + TEST_ACCESS(guest_cas, no_af, CMD_HOLE_DATA), + TEST_ACCESS(guest_ld_preidx, no_af, CMD_HOLE_DATA), + TEST_ACCESS(guest_write64, no_af, CMD_HOLE_DATA), + TEST_ACCESS(guest_st_preidx, no_af, CMD_HOLE_DATA), + TEST_ACCESS(guest_at, no_af, CMD_HOLE_DATA), + TEST_ACCESS(guest_dc_zva, no_af, CMD_HOLE_DATA), + + /* + * Punch holes in the data and PT backing stores and mark them for + * userfaultfd handling. This should result in 2 faults: the access + * on the data backing store, and its respective S1 page table walk + * (S1PTW). + */ + TEST_UFFD(guest_read64, with_af, CMD_HOLE_DATA | CMD_HOLE_PT, + uffd_data_handler, uffd_pt_handler, 2), + TEST_UFFD(guest_read64, no_af, CMD_HOLE_DATA | CMD_HOLE_PT, + uffd_data_handler, uffd_pt_handler, 2), + TEST_UFFD(guest_cas, with_af, CMD_HOLE_DATA | CMD_HOLE_PT, + uffd_data_handler, uffd_pt_handler, 2), + /* + * Can't test guest_at with_af as it's IMPDEF whether the AF is set. + * The S1PTW fault should still be marked as a write. + */ + TEST_UFFD(guest_at, no_af, CMD_HOLE_DATA | CMD_HOLE_PT, + uffd_no_handler, uffd_pt_handler, 1), + TEST_UFFD(guest_ld_preidx, with_af, CMD_HOLE_DATA | CMD_HOLE_PT, + uffd_data_handler, uffd_pt_handler, 2), + TEST_UFFD(guest_write64, with_af, CMD_HOLE_DATA | CMD_HOLE_PT, + uffd_data_handler, uffd_pt_handler, 2), + TEST_UFFD(guest_dc_zva, with_af, CMD_HOLE_DATA | CMD_HOLE_PT, + uffd_data_handler, uffd_pt_handler, 2), + TEST_UFFD(guest_st_preidx, with_af, CMD_HOLE_DATA | CMD_HOLE_PT, + uffd_data_handler, uffd_pt_handler, 2), + TEST_UFFD(guest_exec, with_af, CMD_HOLE_DATA | CMD_HOLE_PT, + uffd_data_handler, uffd_pt_handler, 2), + + /* + * Try accesses when the data and PT memory regions are both + * tracked for dirty logging. + */ + TEST_DIRTY_LOG(guest_read64, with_af, guest_check_no_write_in_dirty_log, + guest_check_s1ptw_wr_in_dirty_log), + TEST_DIRTY_LOG(guest_read64, no_af, guest_check_no_write_in_dirty_log, + guest_check_no_s1ptw_wr_in_dirty_log), + TEST_DIRTY_LOG(guest_ld_preidx, with_af, + guest_check_no_write_in_dirty_log, + guest_check_s1ptw_wr_in_dirty_log), + TEST_DIRTY_LOG(guest_at, no_af, guest_check_no_write_in_dirty_log, + guest_check_no_s1ptw_wr_in_dirty_log), + TEST_DIRTY_LOG(guest_exec, with_af, guest_check_no_write_in_dirty_log, + guest_check_s1ptw_wr_in_dirty_log), + TEST_DIRTY_LOG(guest_write64, with_af, guest_check_write_in_dirty_log, + guest_check_s1ptw_wr_in_dirty_log), + TEST_DIRTY_LOG(guest_cas, with_af, guest_check_write_in_dirty_log, + guest_check_s1ptw_wr_in_dirty_log), + TEST_DIRTY_LOG(guest_dc_zva, with_af, guest_check_write_in_dirty_log, + guest_check_s1ptw_wr_in_dirty_log), + TEST_DIRTY_LOG(guest_st_preidx, with_af, guest_check_write_in_dirty_log, + guest_check_s1ptw_wr_in_dirty_log), + + /* + * Access when the data and PT memory regions are both marked for + * dirty logging and UFFD at the same time. The expected result is + * that writes should mark the dirty log and trigger a userfaultfd + * write fault. Reads/execs should result in a read userfaultfd + * fault, and nothing in the dirty log. Any S1PTW should result in + * a write in the dirty log and a userfaultfd write. + */ + TEST_UFFD_AND_DIRTY_LOG(guest_read64, with_af, + uffd_data_handler, 2, + guest_check_no_write_in_dirty_log, + guest_check_s1ptw_wr_in_dirty_log), + TEST_UFFD_AND_DIRTY_LOG(guest_read64, no_af, + uffd_data_handler, 2, + guest_check_no_write_in_dirty_log, + guest_check_no_s1ptw_wr_in_dirty_log), + TEST_UFFD_AND_DIRTY_LOG(guest_ld_preidx, with_af, + uffd_data_handler, + 2, guest_check_no_write_in_dirty_log, + guest_check_s1ptw_wr_in_dirty_log), + TEST_UFFD_AND_DIRTY_LOG(guest_at, with_af, uffd_no_handler, 1, + guest_check_no_write_in_dirty_log, + guest_check_s1ptw_wr_in_dirty_log), + TEST_UFFD_AND_DIRTY_LOG(guest_exec, with_af, + uffd_data_handler, 2, + guest_check_no_write_in_dirty_log, + guest_check_s1ptw_wr_in_dirty_log), + TEST_UFFD_AND_DIRTY_LOG(guest_write64, with_af, + uffd_data_handler, + 2, guest_check_write_in_dirty_log, + guest_check_s1ptw_wr_in_dirty_log), + TEST_UFFD_AND_DIRTY_LOG(guest_cas, with_af, + uffd_data_handler, 2, + guest_check_write_in_dirty_log, + guest_check_s1ptw_wr_in_dirty_log), + TEST_UFFD_AND_DIRTY_LOG(guest_dc_zva, with_af, + uffd_data_handler, + 2, guest_check_write_in_dirty_log, + guest_check_s1ptw_wr_in_dirty_log), + TEST_UFFD_AND_DIRTY_LOG(guest_st_preidx, with_af, + uffd_data_handler, 2, + guest_check_write_in_dirty_log, + guest_check_s1ptw_wr_in_dirty_log), + /* + * Access when both the PT and data regions are marked read-only + * (with KVM_MEM_READONLY). Writes with a syndrome result in an + * MMIO exit, writes with no syndrome (e.g., CAS) result in a + * failed vcpu run, and reads/execs with and without syndroms do + * not fault. + */ + TEST_RO_MEMSLOT(guest_read64, 0, 0), + TEST_RO_MEMSLOT(guest_ld_preidx, 0, 0), + TEST_RO_MEMSLOT(guest_at, 0, 0), + TEST_RO_MEMSLOT(guest_exec, 0, 0), + TEST_RO_MEMSLOT(guest_write64, mmio_on_test_gpa_handler, 1), + TEST_RO_MEMSLOT_NO_SYNDROME(guest_dc_zva), + TEST_RO_MEMSLOT_NO_SYNDROME(guest_cas), + TEST_RO_MEMSLOT_NO_SYNDROME(guest_st_preidx), + + /* + * The PT and data regions are both read-only and marked + * for dirty logging at the same time. The expected result is that + * for writes there should be no write in the dirty log. The + * readonly handling is the same as if the memslot was not marked + * for dirty logging: writes with a syndrome result in an MMIO + * exit, and writes with no syndrome result in a failed vcpu run. + */ + TEST_RO_MEMSLOT_AND_DIRTY_LOG(guest_read64, 0, 0, + guest_check_no_write_in_dirty_log), + TEST_RO_MEMSLOT_AND_DIRTY_LOG(guest_ld_preidx, 0, 0, + guest_check_no_write_in_dirty_log), + TEST_RO_MEMSLOT_AND_DIRTY_LOG(guest_at, 0, 0, + guest_check_no_write_in_dirty_log), + TEST_RO_MEMSLOT_AND_DIRTY_LOG(guest_exec, 0, 0, + guest_check_no_write_in_dirty_log), + TEST_RO_MEMSLOT_AND_DIRTY_LOG(guest_write64, mmio_on_test_gpa_handler, + 1, guest_check_no_write_in_dirty_log), + TEST_RO_MEMSLOT_NO_SYNDROME_AND_DIRTY_LOG(guest_dc_zva, + guest_check_no_write_in_dirty_log), + TEST_RO_MEMSLOT_NO_SYNDROME_AND_DIRTY_LOG(guest_cas, + guest_check_no_write_in_dirty_log), + TEST_RO_MEMSLOT_NO_SYNDROME_AND_DIRTY_LOG(guest_st_preidx, + guest_check_no_write_in_dirty_log), + + /* + * The PT and data regions are both read-only and punched with + * holes tracked with userfaultfd. The expected result is the + * union of both userfaultfd and read-only behaviors. For example, + * write accesses result in a userfaultfd write fault and an MMIO + * exit. Writes with no syndrome result in a failed vcpu run and + * no userfaultfd write fault. Reads result in userfaultfd getting + * triggered. + */ + TEST_RO_MEMSLOT_AND_UFFD(guest_read64, 0, 0, uffd_data_handler, 2), + TEST_RO_MEMSLOT_AND_UFFD(guest_ld_preidx, 0, 0, uffd_data_handler, 2), + TEST_RO_MEMSLOT_AND_UFFD(guest_at, 0, 0, uffd_no_handler, 1), + TEST_RO_MEMSLOT_AND_UFFD(guest_exec, 0, 0, uffd_data_handler, 2), + TEST_RO_MEMSLOT_AND_UFFD(guest_write64, mmio_on_test_gpa_handler, 1, + uffd_data_handler, 2), + TEST_RO_MEMSLOT_NO_SYNDROME_AND_UFFD(guest_cas, uffd_data_handler, 2), + TEST_RO_MEMSLOT_NO_SYNDROME_AND_UFFD(guest_dc_zva, uffd_no_handler, 1), + TEST_RO_MEMSLOT_NO_SYNDROME_AND_UFFD(guest_st_preidx, uffd_no_handler, 1), + + { 0 } +}; + +static void for_each_test_and_guest_mode(enum vm_mem_backing_src_type src_type) +{ + struct test_desc *t; + + for (t = &tests[0]; t->name; t++) { + if (t->skip) + continue; + + struct test_params p = { + .src_type = src_type, + .test_desc = t, + }; + + for_each_guest_mode(run_test, &p); + } +} + +int main(int argc, char *argv[]) +{ + enum vm_mem_backing_src_type src_type; + int opt; + + setbuf(stdout, NULL); + + src_type = DEFAULT_VM_MEM_SRC; + + while ((opt = getopt(argc, argv, "hm:s:")) != -1) { + switch (opt) { + case 'm': + guest_modes_cmdline(optarg); + break; + case 's': + src_type = parse_backing_src_type(optarg); + break; + case 'h': + default: + help(argv[0]); + exit(0); + } + } + + for_each_test_and_guest_mode(src_type); + return 0; +} diff --git a/tools/testing/selftests/kvm/aarch64/psci_test.c b/tools/testing/selftests/kvm/aarch64/psci_test.c index e0b9e81a3e09..cfa36f387948 100644 --- a/tools/testing/selftests/kvm/aarch64/psci_test.c +++ b/tools/testing/selftests/kvm/aarch64/psci_test.c @@ -79,7 +79,6 @@ static struct kvm_vm *setup_vm(void *guest_code, struct kvm_vcpu **source, struct kvm_vm *vm; vm = vm_create(2); - ucall_init(vm, NULL); vm_ioctl(vm, KVM_ARM_PREFERRED_TARGET, &init); init.features[0] |= (1 << KVM_ARM_VCPU_PSCI_0_2); diff --git a/tools/testing/selftests/kvm/aarch64/vgic_init.c b/tools/testing/selftests/kvm/aarch64/vgic_init.c index 9c131d977a1b..eef816b80993 100644 --- a/tools/testing/selftests/kvm/aarch64/vgic_init.c +++ b/tools/testing/selftests/kvm/aarch64/vgic_init.c @@ -68,8 +68,6 @@ static void guest_code(void) /* we don't want to assert on run execution, hence that helper */ static int run_vcpu(struct kvm_vcpu *vcpu) { - ucall_init(vcpu->vm, NULL); - return __vcpu_run(vcpu) ? -errno : 0; } diff --git a/tools/testing/selftests/kvm/aarch64/vgic_irq.c b/tools/testing/selftests/kvm/aarch64/vgic_irq.c index 17417220a083..90d854e0fcff 100644 --- a/tools/testing/selftests/kvm/aarch64/vgic_irq.c +++ b/tools/testing/selftests/kvm/aarch64/vgic_irq.c @@ -756,7 +756,6 @@ static void test_vgic(uint32_t nr_irqs, bool level_sensitive, bool eoi_split) print_args(&args); vm = vm_create_with_one_vcpu(&vcpu, guest_code); - ucall_init(vm, NULL); vm_init_descriptor_tables(vm); vcpu_init_descriptor_tables(vcpu); @@ -818,22 +817,19 @@ int main(int argc, char **argv) int opt; bool eoi_split = false; - /* Tell stdout not to buffer its content */ - setbuf(stdout, NULL); - while ((opt = getopt(argc, argv, "hn:e:l:")) != -1) { switch (opt) { case 'n': - nr_irqs = atoi(optarg); + nr_irqs = atoi_non_negative("Number of IRQs", optarg); if (nr_irqs > 1024 || nr_irqs % 32) help(argv[0]); break; case 'e': - eoi_split = (bool)atoi(optarg); + eoi_split = (bool)atoi_paranoid(optarg); default_args = false; break; case 'l': - level_sensitive = (bool)atoi(optarg); + level_sensitive = (bool)atoi_paranoid(optarg); default_args = false; break; case 'h': diff --git a/tools/testing/selftests/kvm/access_tracking_perf_test.c b/tools/testing/selftests/kvm/access_tracking_perf_test.c index 76c583a07ea2..3c7defd34f56 100644 --- a/tools/testing/selftests/kvm/access_tracking_perf_test.c +++ b/tools/testing/selftests/kvm/access_tracking_perf_test.c @@ -44,8 +44,9 @@ #include "kvm_util.h" #include "test_util.h" -#include "perf_test_util.h" +#include "memstress.h" #include "guest_modes.h" +#include "processor.h" /* Global variable used to synchronize all of the vCPU threads. */ static int iteration; @@ -58,9 +59,6 @@ static enum { ITERATION_MARK_IDLE, } iteration_work; -/* Set to true when vCPU threads should exit. */ -static bool done; - /* The iteration that was last completed by each vCPU. */ static int vcpu_last_completed_iteration[KVM_MAX_VCPUS]; @@ -126,7 +124,7 @@ static void mark_page_idle(int page_idle_fd, uint64_t pfn) } static void mark_vcpu_memory_idle(struct kvm_vm *vm, - struct perf_test_vcpu_args *vcpu_args) + struct memstress_vcpu_args *vcpu_args) { int vcpu_idx = vcpu_args->vcpu_idx; uint64_t base_gva = vcpu_args->gva; @@ -148,7 +146,7 @@ static void mark_vcpu_memory_idle(struct kvm_vm *vm, TEST_ASSERT(pagemap_fd > 0, "Failed to open pagemap."); for (page = 0; page < pages; page++) { - uint64_t gva = base_gva + page * perf_test_args.guest_page_size; + uint64_t gva = base_gva + page * memstress_args.guest_page_size; uint64_t pfn = lookup_pfn(pagemap_fd, vm, gva); if (!pfn) { @@ -180,16 +178,21 @@ static void mark_vcpu_memory_idle(struct kvm_vm *vm, * access tracking but low enough as to not make the test too brittle * over time and across architectures. * - * Note that when run in nested virtualization, this check will trigger - * much more frequently because TLB size is unlimited and since no flush - * happens, much more pages are cached there and guest won't see the - * "idle" bit cleared. + * When running the guest as a nested VM, "warn" instead of asserting + * as the TLB size is effectively unlimited and the KVM doesn't + * explicitly flush the TLB when aging SPTEs. As a result, more pages + * are cached and the guest won't see the "idle" bit cleared. */ - if (still_idle < pages / 10) - printf("WARNING: vCPU%d: Too many pages still idle (%" PRIu64 - "out of %" PRIu64 "), this will affect performance results" - ".\n", + if (still_idle >= pages / 10) { +#ifdef __x86_64__ + TEST_ASSERT(this_cpu_has(X86_FEATURE_HYPERVISOR), + "vCPU%d: Too many pages still idle (%lu out of %lu)", + vcpu_idx, still_idle, pages); +#endif + printf("WARNING: vCPU%d: Too many pages still idle (%lu out of %lu), " + "this will affect performance results.\n", vcpu_idx, still_idle, pages); + } close(page_idle_fd); close(pagemap_fd); @@ -211,7 +214,7 @@ static bool spin_wait_for_next_iteration(int *current_iteration) int last_iteration = *current_iteration; do { - if (READ_ONCE(done)) + if (READ_ONCE(memstress_args.stop_vcpus)) return false; *current_iteration = READ_ONCE(iteration); @@ -220,10 +223,10 @@ static bool spin_wait_for_next_iteration(int *current_iteration) return true; } -static void vcpu_thread_main(struct perf_test_vcpu_args *vcpu_args) +static void vcpu_thread_main(struct memstress_vcpu_args *vcpu_args) { struct kvm_vcpu *vcpu = vcpu_args->vcpu; - struct kvm_vm *vm = perf_test_args.vm; + struct kvm_vm *vm = memstress_args.vm; int vcpu_idx = vcpu_args->vcpu_idx; int current_iteration = 0; @@ -279,7 +282,7 @@ static void run_iteration(struct kvm_vm *vm, int nr_vcpus, const char *descripti static void access_memory(struct kvm_vm *vm, int nr_vcpus, enum access_type access, const char *description) { - perf_test_set_wr_fract(vm, (access == ACCESS_READ) ? INT_MAX : 1); + memstress_set_write_percent(vm, (access == ACCESS_READ) ? 0 : 100); iteration_work = ITERATION_ACCESS_MEMORY; run_iteration(vm, nr_vcpus, description); } @@ -303,10 +306,10 @@ static void run_test(enum vm_guest_mode mode, void *arg) struct kvm_vm *vm; int nr_vcpus = params->nr_vcpus; - vm = perf_test_create_vm(mode, nr_vcpus, params->vcpu_memory_bytes, 1, + vm = memstress_create_vm(mode, nr_vcpus, params->vcpu_memory_bytes, 1, params->backing_src, !overlap_memory_access); - perf_test_start_vcpu_threads(nr_vcpus, vcpu_thread_main); + memstress_start_vcpu_threads(nr_vcpus, vcpu_thread_main); pr_info("\n"); access_memory(vm, nr_vcpus, ACCESS_WRITE, "Populating memory"); @@ -321,11 +324,8 @@ static void run_test(enum vm_guest_mode mode, void *arg) mark_memory_idle(vm, nr_vcpus); access_memory(vm, nr_vcpus, ACCESS_READ, "Reading from idle memory"); - /* Set done to signal the vCPU threads to exit */ - done = true; - - perf_test_join_vcpu_threads(nr_vcpus); - perf_test_destroy_vm(vm); + memstress_join_vcpu_threads(nr_vcpus); + memstress_destroy_vm(vm); } static void help(char *name) @@ -368,7 +368,7 @@ int main(int argc, char *argv[]) params.vcpu_memory_bytes = parse_size(optarg); break; case 'v': - params.nr_vcpus = atoi(optarg); + params.nr_vcpus = atoi_positive("Number of vCPUs", optarg); break; case 'o': overlap_memory_access = true; diff --git a/tools/testing/selftests/kvm/demand_paging_test.c b/tools/testing/selftests/kvm/demand_paging_test.c index 779ae54f89c4..b0e1fc4de9e2 100644 --- a/tools/testing/selftests/kvm/demand_paging_test.c +++ b/tools/testing/selftests/kvm/demand_paging_test.c @@ -20,29 +20,19 @@ #include "kvm_util.h" #include "test_util.h" -#include "perf_test_util.h" +#include "memstress.h" #include "guest_modes.h" +#include "userfaultfd_util.h" #ifdef __NR_userfaultfd -#ifdef PRINT_PER_PAGE_UPDATES -#define PER_PAGE_DEBUG(...) printf(__VA_ARGS__) -#else -#define PER_PAGE_DEBUG(...) _no_printf(__VA_ARGS__) -#endif - -#ifdef PRINT_PER_VCPU_UPDATES -#define PER_VCPU_DEBUG(...) printf(__VA_ARGS__) -#else -#define PER_VCPU_DEBUG(...) _no_printf(__VA_ARGS__) -#endif - static int nr_vcpus = 1; static uint64_t guest_percpu_mem_size = DEFAULT_PER_VCPU_MEM_SIZE; + static size_t demand_paging_size; static char *guest_data_prototype; -static void vcpu_worker(struct perf_test_vcpu_args *vcpu_args) +static void vcpu_worker(struct memstress_vcpu_args *vcpu_args) { struct kvm_vcpu *vcpu = vcpu_args->vcpu; int vcpu_idx = vcpu_args->vcpu_idx; @@ -67,9 +57,11 @@ static void vcpu_worker(struct perf_test_vcpu_args *vcpu_args) ts_diff.tv_sec, ts_diff.tv_nsec); } -static int handle_uffd_page_request(int uffd_mode, int uffd, uint64_t addr) +static int handle_uffd_page_request(int uffd_mode, int uffd, + struct uffd_msg *msg) { pid_t tid = syscall(__NR_gettid); + uint64_t addr = msg->arg.pagefault.address; struct timespec start; struct timespec ts_diff; int r; @@ -116,176 +108,34 @@ static int handle_uffd_page_request(int uffd_mode, int uffd, uint64_t addr) return 0; } -bool quit_uffd_thread; - -struct uffd_handler_args { +struct test_params { int uffd_mode; - int uffd; - int pipefd; - useconds_t delay; + useconds_t uffd_delay; + enum vm_mem_backing_src_type src_type; + bool partition_vcpu_memory_access; }; -static void *uffd_handler_thread_fn(void *arg) -{ - struct uffd_handler_args *uffd_args = (struct uffd_handler_args *)arg; - int uffd = uffd_args->uffd; - int pipefd = uffd_args->pipefd; - useconds_t delay = uffd_args->delay; - int64_t pages = 0; - struct timespec start; - struct timespec ts_diff; - - clock_gettime(CLOCK_MONOTONIC, &start); - while (!quit_uffd_thread) { - struct uffd_msg msg; - struct pollfd pollfd[2]; - char tmp_chr; - int r; - uint64_t addr; - - pollfd[0].fd = uffd; - pollfd[0].events = POLLIN; - pollfd[1].fd = pipefd; - pollfd[1].events = POLLIN; - - r = poll(pollfd, 2, -1); - switch (r) { - case -1: - pr_info("poll err"); - continue; - case 0: - continue; - case 1: - break; - default: - pr_info("Polling uffd returned %d", r); - return NULL; - } - - if (pollfd[0].revents & POLLERR) { - pr_info("uffd revents has POLLERR"); - return NULL; - } - - if (pollfd[1].revents & POLLIN) { - r = read(pollfd[1].fd, &tmp_chr, 1); - TEST_ASSERT(r == 1, - "Error reading pipefd in UFFD thread\n"); - return NULL; - } - - if (!(pollfd[0].revents & POLLIN)) - continue; - - r = read(uffd, &msg, sizeof(msg)); - if (r == -1) { - if (errno == EAGAIN) - continue; - pr_info("Read of uffd got errno %d\n", errno); - return NULL; - } - - if (r != sizeof(msg)) { - pr_info("Read on uffd returned unexpected size: %d bytes", r); - return NULL; - } - - if (!(msg.event & UFFD_EVENT_PAGEFAULT)) - continue; - - if (delay) - usleep(delay); - addr = msg.arg.pagefault.address; - r = handle_uffd_page_request(uffd_args->uffd_mode, uffd, addr); - if (r < 0) - return NULL; - pages++; - } - - ts_diff = timespec_elapsed(start); - PER_VCPU_DEBUG("userfaulted %ld pages over %ld.%.9lds. (%f/sec)\n", - pages, ts_diff.tv_sec, ts_diff.tv_nsec, - pages / ((double)ts_diff.tv_sec + (double)ts_diff.tv_nsec / 100000000.0)); - - return NULL; -} - -static void setup_demand_paging(struct kvm_vm *vm, - pthread_t *uffd_handler_thread, int pipefd, - int uffd_mode, useconds_t uffd_delay, - struct uffd_handler_args *uffd_args, - void *hva, void *alias, uint64_t len) +static void prefault_mem(void *alias, uint64_t len) { - bool is_minor = (uffd_mode == UFFDIO_REGISTER_MODE_MINOR); - int uffd; - struct uffdio_api uffdio_api; - struct uffdio_register uffdio_register; - uint64_t expected_ioctls = ((uint64_t) 1) << _UFFDIO_COPY; - int ret; + size_t p; - PER_PAGE_DEBUG("Userfaultfd %s mode, faults resolved with %s\n", - is_minor ? "MINOR" : "MISSING", - is_minor ? "UFFDIO_CONINUE" : "UFFDIO_COPY"); - - /* In order to get minor faults, prefault via the alias. */ - if (is_minor) { - size_t p; - - expected_ioctls = ((uint64_t) 1) << _UFFDIO_CONTINUE; - - TEST_ASSERT(alias != NULL, "Alias required for minor faults"); - for (p = 0; p < (len / demand_paging_size); ++p) { - memcpy(alias + (p * demand_paging_size), - guest_data_prototype, demand_paging_size); - } + TEST_ASSERT(alias != NULL, "Alias required for minor faults"); + for (p = 0; p < (len / demand_paging_size); ++p) { + memcpy(alias + (p * demand_paging_size), + guest_data_prototype, demand_paging_size); } - - uffd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK); - TEST_ASSERT(uffd >= 0, __KVM_SYSCALL_ERROR("userfaultfd()", uffd)); - - uffdio_api.api = UFFD_API; - uffdio_api.features = 0; - ret = ioctl(uffd, UFFDIO_API, &uffdio_api); - TEST_ASSERT(ret != -1, __KVM_SYSCALL_ERROR("UFFDIO_API", ret)); - - uffdio_register.range.start = (uint64_t)hva; - uffdio_register.range.len = len; - uffdio_register.mode = uffd_mode; - ret = ioctl(uffd, UFFDIO_REGISTER, &uffdio_register); - TEST_ASSERT(ret != -1, __KVM_SYSCALL_ERROR("UFFDIO_REGISTER", ret)); - TEST_ASSERT((uffdio_register.ioctls & expected_ioctls) == - expected_ioctls, "missing userfaultfd ioctls"); - - uffd_args->uffd_mode = uffd_mode; - uffd_args->uffd = uffd; - uffd_args->pipefd = pipefd; - uffd_args->delay = uffd_delay; - pthread_create(uffd_handler_thread, NULL, uffd_handler_thread_fn, - uffd_args); - - PER_VCPU_DEBUG("Created uffd thread for HVA range [%p, %p)\n", - hva, hva + len); } -struct test_params { - int uffd_mode; - useconds_t uffd_delay; - enum vm_mem_backing_src_type src_type; - bool partition_vcpu_memory_access; -}; - static void run_test(enum vm_guest_mode mode, void *arg) { struct test_params *p = arg; - pthread_t *uffd_handler_threads = NULL; - struct uffd_handler_args *uffd_args = NULL; + struct uffd_desc **uffd_descs = NULL; struct timespec start; struct timespec ts_diff; - int *pipefds = NULL; struct kvm_vm *vm; - int r, i; + int i; - vm = perf_test_create_vm(mode, nr_vcpus, guest_percpu_mem_size, 1, + vm = memstress_create_vm(mode, nr_vcpus, guest_percpu_mem_size, 1, p->src_type, p->partition_vcpu_memory_access); demand_paging_size = get_backing_src_pagesz(p->src_type); @@ -296,79 +146,61 @@ static void run_test(enum vm_guest_mode mode, void *arg) memset(guest_data_prototype, 0xAB, demand_paging_size); if (p->uffd_mode) { - uffd_handler_threads = - malloc(nr_vcpus * sizeof(*uffd_handler_threads)); - TEST_ASSERT(uffd_handler_threads, "Memory allocation failed"); - - uffd_args = malloc(nr_vcpus * sizeof(*uffd_args)); - TEST_ASSERT(uffd_args, "Memory allocation failed"); - - pipefds = malloc(sizeof(int) * nr_vcpus * 2); - TEST_ASSERT(pipefds, "Unable to allocate memory for pipefd"); + uffd_descs = malloc(nr_vcpus * sizeof(struct uffd_desc *)); + TEST_ASSERT(uffd_descs, "Memory allocation failed"); for (i = 0; i < nr_vcpus; i++) { - struct perf_test_vcpu_args *vcpu_args; + struct memstress_vcpu_args *vcpu_args; void *vcpu_hva; void *vcpu_alias; - vcpu_args = &perf_test_args.vcpu_args[i]; + vcpu_args = &memstress_args.vcpu_args[i]; /* Cache the host addresses of the region */ vcpu_hva = addr_gpa2hva(vm, vcpu_args->gpa); vcpu_alias = addr_gpa2alias(vm, vcpu_args->gpa); + prefault_mem(vcpu_alias, + vcpu_args->pages * memstress_args.guest_page_size); + /* * Set up user fault fd to handle demand paging * requests. */ - r = pipe2(&pipefds[i * 2], - O_CLOEXEC | O_NONBLOCK); - TEST_ASSERT(!r, "Failed to set up pipefd"); - - setup_demand_paging(vm, &uffd_handler_threads[i], - pipefds[i * 2], p->uffd_mode, - p->uffd_delay, &uffd_args[i], - vcpu_hva, vcpu_alias, - vcpu_args->pages * perf_test_args.guest_page_size); + uffd_descs[i] = uffd_setup_demand_paging( + p->uffd_mode, p->uffd_delay, vcpu_hva, + vcpu_args->pages * memstress_args.guest_page_size, + &handle_uffd_page_request); } } pr_info("Finished creating vCPUs and starting uffd threads\n"); clock_gettime(CLOCK_MONOTONIC, &start); - perf_test_start_vcpu_threads(nr_vcpus, vcpu_worker); + memstress_start_vcpu_threads(nr_vcpus, vcpu_worker); pr_info("Started all vCPUs\n"); - perf_test_join_vcpu_threads(nr_vcpus); + memstress_join_vcpu_threads(nr_vcpus); ts_diff = timespec_elapsed(start); pr_info("All vCPU threads joined\n"); if (p->uffd_mode) { - char c; - /* Tell the user fault fd handler threads to quit */ - for (i = 0; i < nr_vcpus; i++) { - r = write(pipefds[i * 2 + 1], &c, 1); - TEST_ASSERT(r == 1, "Unable to write to pipefd"); - - pthread_join(uffd_handler_threads[i], NULL); - } + for (i = 0; i < nr_vcpus; i++) + uffd_stop_demand_paging(uffd_descs[i]); } pr_info("Total guest execution time: %ld.%.9lds\n", ts_diff.tv_sec, ts_diff.tv_nsec); pr_info("Overall demand paging rate: %f pgs/sec\n", - perf_test_args.vcpu_args[0].pages * nr_vcpus / + memstress_args.vcpu_args[0].pages * nr_vcpus / ((double)ts_diff.tv_sec + (double)ts_diff.tv_nsec / 100000000.0)); - perf_test_destroy_vm(vm); + memstress_destroy_vm(vm); free(guest_data_prototype); - if (p->uffd_mode) { - free(uffd_handler_threads); - free(uffd_args); - free(pipefds); - } + if (p->uffd_mode) + free(uffd_descs); } static void help(char *name) @@ -427,8 +259,8 @@ int main(int argc, char *argv[]) p.src_type = parse_backing_src_type(optarg); break; case 'v': - nr_vcpus = atoi(optarg); - TEST_ASSERT(nr_vcpus > 0 && nr_vcpus <= max_vcpus, + nr_vcpus = atoi_positive("Number of vCPUs", optarg); + TEST_ASSERT(nr_vcpus <= max_vcpus, "Invalid number of vcpus, must be between 1 and %d", max_vcpus); break; case 'o': diff --git a/tools/testing/selftests/kvm/dirty_log_perf_test.c b/tools/testing/selftests/kvm/dirty_log_perf_test.c index f99e39a672d3..e9d6d1aecf89 100644 --- a/tools/testing/selftests/kvm/dirty_log_perf_test.c +++ b/tools/testing/selftests/kvm/dirty_log_perf_test.c @@ -16,7 +16,7 @@ #include "kvm_util.h" #include "test_util.h" -#include "perf_test_util.h" +#include "memstress.h" #include "guest_modes.h" #ifdef __aarch64__ @@ -67,7 +67,7 @@ static bool host_quit; static int iteration; static int vcpu_last_completed_iteration[KVM_MAX_VCPUS]; -static void vcpu_worker(struct perf_test_vcpu_args *vcpu_args) +static void vcpu_worker(struct memstress_vcpu_args *vcpu_args) { struct kvm_vcpu *vcpu = vcpu_args->vcpu; int vcpu_idx = vcpu_args->vcpu_idx; @@ -128,10 +128,12 @@ static void vcpu_worker(struct perf_test_vcpu_args *vcpu_args) struct test_params { unsigned long iterations; uint64_t phys_offset; - int wr_fract; bool partition_vcpu_memory_access; enum vm_mem_backing_src_type backing_src; int slots; + uint32_t write_percent; + uint32_t random_seed; + bool random_access; }; static void toggle_dirty_logging(struct kvm_vm *vm, int slots, bool enable) @@ -139,7 +141,7 @@ static void toggle_dirty_logging(struct kvm_vm *vm, int slots, bool enable) int i; for (i = 0; i < slots; i++) { - int slot = PERF_TEST_MEM_SLOT_INDEX + i; + int slot = MEMSTRESS_MEM_SLOT_INDEX + i; int flags = enable ? KVM_MEM_LOG_DIRTY_PAGES : 0; vm_mem_region_set_flags(vm, slot, flags); @@ -161,7 +163,7 @@ static void get_dirty_log(struct kvm_vm *vm, unsigned long *bitmaps[], int slots int i; for (i = 0; i < slots; i++) { - int slot = PERF_TEST_MEM_SLOT_INDEX + i; + int slot = MEMSTRESS_MEM_SLOT_INDEX + i; kvm_vm_get_dirty_log(vm, slot, bitmaps[i]); } @@ -173,7 +175,7 @@ static void clear_dirty_log(struct kvm_vm *vm, unsigned long *bitmaps[], int i; for (i = 0; i < slots; i++) { - int slot = PERF_TEST_MEM_SLOT_INDEX + i; + int slot = MEMSTRESS_MEM_SLOT_INDEX + i; kvm_vm_clear_dirty_log(vm, slot, bitmaps[i], 0, pages_per_slot); } @@ -221,11 +223,13 @@ static void run_test(enum vm_guest_mode mode, void *arg) struct timespec clear_dirty_log_total = (struct timespec){0}; int i; - vm = perf_test_create_vm(mode, nr_vcpus, guest_percpu_mem_size, + vm = memstress_create_vm(mode, nr_vcpus, guest_percpu_mem_size, p->slots, p->backing_src, p->partition_vcpu_memory_access); - perf_test_set_wr_fract(vm, p->wr_fract); + pr_info("Random seed: %u\n", p->random_seed); + memstress_set_random_seed(vm, p->random_seed); + memstress_set_write_percent(vm, p->write_percent); guest_num_pages = (nr_vcpus * guest_percpu_mem_size) >> vm->page_shift; guest_num_pages = vm_adjust_num_guest_pages(mode, guest_num_pages); @@ -248,7 +252,16 @@ static void run_test(enum vm_guest_mode mode, void *arg) for (i = 0; i < nr_vcpus; i++) vcpu_last_completed_iteration[i] = -1; - perf_test_start_vcpu_threads(nr_vcpus, vcpu_worker); + /* + * Use 100% writes during the population phase to ensure all + * memory is actually populated and not just mapped to the zero + * page. The prevents expensive copy-on-write faults from + * occurring during the dirty memory iterations below, which + * would pollute the performance results. + */ + memstress_set_write_percent(vm, 100); + memstress_set_random_access(vm, false); + memstress_start_vcpu_threads(nr_vcpus, vcpu_worker); /* Allow the vCPUs to populate memory */ pr_debug("Starting iteration %d - Populating\n", iteration); @@ -269,6 +282,9 @@ static void run_test(enum vm_guest_mode mode, void *arg) pr_info("Enabling dirty logging time: %ld.%.9lds\n\n", ts_diff.tv_sec, ts_diff.tv_nsec); + memstress_set_write_percent(vm, p->write_percent); + memstress_set_random_access(vm, p->random_access); + while (iteration < p->iterations) { /* * Incrementing the iteration number will start the vCPUs @@ -329,7 +345,7 @@ static void run_test(enum vm_guest_mode mode, void *arg) * wait for them to exit. */ host_quit = true; - perf_test_join_vcpu_threads(nr_vcpus); + memstress_join_vcpu_threads(nr_vcpus); avg = timespec_div(get_dirty_log_total, p->iterations); pr_info("Get dirty log over %lu iterations took %ld.%.9lds. (Avg %ld.%.9lds/iteration)\n", @@ -345,16 +361,17 @@ static void run_test(enum vm_guest_mode mode, void *arg) free_bitmaps(bitmaps, p->slots); arch_cleanup_vm(vm); - perf_test_destroy_vm(vm); + memstress_destroy_vm(vm); } static void help(char *name) { puts(""); - printf("usage: %s [-h] [-i iterations] [-p offset] [-g] " - "[-m mode] [-n] [-b vcpu bytes] [-v vcpus] [-o] [-s mem type]" - "[-x memslots]\n", name); + printf("usage: %s [-h] [-a] [-i iterations] [-p offset] [-g] " + "[-m mode] [-n] [-b vcpu bytes] [-v vcpus] [-o] [-r random seed ] [-s mem type]" + "[-x memslots] [-w percentage] [-c physical cpus to run test on]\n", name); puts(""); + printf(" -a: access memory randomly rather than in order.\n"); printf(" -i: specify iteration counts (default: %"PRIu64")\n", TEST_HOST_LOOP_N); printf(" -g: Do not enable KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2. This\n" @@ -373,16 +390,29 @@ static void help(char *name) printf(" -b: specify the size of the memory region which should be\n" " dirtied by each vCPU. e.g. 10M or 3G.\n" " (default: 1G)\n"); - printf(" -f: specify the fraction of pages which should be written to\n" - " as opposed to simply read, in the form\n" - " 1/<fraction of pages to write>.\n" - " (default: 1 i.e. all pages are written to.)\n"); printf(" -v: specify the number of vCPUs to run.\n"); printf(" -o: Overlap guest memory accesses instead of partitioning\n" " them into a separate region of memory for each vCPU.\n"); + printf(" -r: specify the starting random seed.\n"); backing_src_help("-s"); printf(" -x: Split the memory region into this number of memslots.\n" " (default: 1)\n"); + printf(" -w: specify the percentage of pages which should be written to\n" + " as an integer from 0-100 inclusive. This is probabilistic,\n" + " so -w X means each page has an X%% chance of writing\n" + " and a (100-X)%% chance of reading.\n" + " (default: 100 i.e. all pages are written to.)\n"); + printf(" -c: Pin tasks to physical CPUs. Takes a list of comma separated\n" + " values (target pCPU), one for each vCPU, plus an optional\n" + " entry for the main application task (specified via entry\n" + " <nr_vcpus + 1>). If used, entries must be provided for all\n" + " vCPUs, i.e. pinning vCPUs is all or nothing.\n\n" + " E.g. to create 3 vCPUs, pin vCPU0=>pCPU22, vCPU1=>pCPU23,\n" + " vCPU2=>pCPU24, and pin the application task to pCPU50:\n\n" + " ./dirty_log_perf_test -v 3 -c 22,23,24,50\n\n" + " To leave the application task unpinned, drop the final entry:\n\n" + " ./dirty_log_perf_test -v 3 -c 22,23,24\n\n" + " (default: no pinning)\n"); puts(""); exit(0); } @@ -390,12 +420,14 @@ static void help(char *name) int main(int argc, char *argv[]) { int max_vcpus = kvm_check_cap(KVM_CAP_MAX_VCPUS); + const char *pcpu_list = NULL; struct test_params p = { .iterations = TEST_HOST_LOOP_N, - .wr_fract = 1, .partition_vcpu_memory_access = true, .backing_src = DEFAULT_VM_MEM_SRC, .slots = 1, + .random_seed = 1, + .write_percent = 100, }; int opt; @@ -406,55 +438,73 @@ int main(int argc, char *argv[]) guest_modes_append_default(); - while ((opt = getopt(argc, argv, "eghi:p:m:nb:f:v:os:x:")) != -1) { + while ((opt = getopt(argc, argv, "ab:c:eghi:m:nop:r:s:v:x:w:")) != -1) { switch (opt) { + case 'a': + p.random_access = true; + break; + case 'b': + guest_percpu_mem_size = parse_size(optarg); + break; + case 'c': + pcpu_list = optarg; + break; case 'e': /* 'e' is for evil. */ run_vcpus_while_disabling_dirty_logging = true; + break; case 'g': dirty_log_manual_caps = 0; break; - case 'i': - p.iterations = atoi(optarg); + case 'h': + help(argv[0]); break; - case 'p': - p.phys_offset = strtoull(optarg, NULL, 0); + case 'i': + p.iterations = atoi_positive("Number of iterations", optarg); break; case 'm': guest_modes_cmdline(optarg); break; case 'n': - perf_test_args.nested = true; - break; - case 'b': - guest_percpu_mem_size = parse_size(optarg); - break; - case 'f': - p.wr_fract = atoi(optarg); - TEST_ASSERT(p.wr_fract >= 1, - "Write fraction cannot be less than one"); - break; - case 'v': - nr_vcpus = atoi(optarg); - TEST_ASSERT(nr_vcpus > 0 && nr_vcpus <= max_vcpus, - "Invalid number of vcpus, must be between 1 and %d", max_vcpus); + memstress_args.nested = true; break; case 'o': p.partition_vcpu_memory_access = false; break; + case 'p': + p.phys_offset = strtoull(optarg, NULL, 0); + break; + case 'r': + p.random_seed = atoi_positive("Random seed", optarg); + break; case 's': p.backing_src = parse_backing_src_type(optarg); break; + case 'v': + nr_vcpus = atoi_positive("Number of vCPUs", optarg); + TEST_ASSERT(nr_vcpus <= max_vcpus, + "Invalid number of vcpus, must be between 1 and %d", max_vcpus); + break; + case 'w': + p.write_percent = atoi_non_negative("Write percentage", optarg); + TEST_ASSERT(p.write_percent <= 100, + "Write percentage must be between 0 and 100"); + break; case 'x': - p.slots = atoi(optarg); + p.slots = atoi_positive("Number of slots", optarg); break; - case 'h': default: help(argv[0]); break; } } + if (pcpu_list) { + kvm_parse_vcpu_pinning(pcpu_list, memstress_args.vcpu_to_pcpu, + nr_vcpus); + memstress_args.pin_vcpus = true; + } + TEST_ASSERT(p.iterations >= 2, "The test should have at least two iterations"); pr_info("Test iterations: %"PRIu64"\n", p.iterations); diff --git a/tools/testing/selftests/kvm/dirty_log_test.c b/tools/testing/selftests/kvm/dirty_log_test.c index b5234d6efbe1..936f3a8d1b83 100644 --- a/tools/testing/selftests/kvm/dirty_log_test.c +++ b/tools/testing/selftests/kvm/dirty_log_test.c @@ -24,6 +24,9 @@ #include "guest_modes.h" #include "processor.h" +#define DIRTY_MEM_BITS 30 /* 1G */ +#define PAGE_SHIFT_4K 12 + /* The memory slot index to track dirty pages */ #define TEST_MEM_SLOT_INDEX 1 @@ -44,20 +47,20 @@ # define BITOP_LE_SWIZZLE ((BITS_PER_LONG-1) & ~0x7) # define test_bit_le(nr, addr) \ test_bit((nr) ^ BITOP_LE_SWIZZLE, addr) -# define set_bit_le(nr, addr) \ - set_bit((nr) ^ BITOP_LE_SWIZZLE, addr) -# define clear_bit_le(nr, addr) \ - clear_bit((nr) ^ BITOP_LE_SWIZZLE, addr) -# define test_and_set_bit_le(nr, addr) \ - test_and_set_bit((nr) ^ BITOP_LE_SWIZZLE, addr) -# define test_and_clear_bit_le(nr, addr) \ - test_and_clear_bit((nr) ^ BITOP_LE_SWIZZLE, addr) +# define __set_bit_le(nr, addr) \ + __set_bit((nr) ^ BITOP_LE_SWIZZLE, addr) +# define __clear_bit_le(nr, addr) \ + __clear_bit((nr) ^ BITOP_LE_SWIZZLE, addr) +# define __test_and_set_bit_le(nr, addr) \ + __test_and_set_bit((nr) ^ BITOP_LE_SWIZZLE, addr) +# define __test_and_clear_bit_le(nr, addr) \ + __test_and_clear_bit((nr) ^ BITOP_LE_SWIZZLE, addr) #else -# define test_bit_le test_bit -# define set_bit_le set_bit -# define clear_bit_le clear_bit -# define test_and_set_bit_le test_and_set_bit -# define test_and_clear_bit_le test_and_clear_bit +# define test_bit_le test_bit +# define __set_bit_le __set_bit +# define __clear_bit_le __clear_bit +# define __test_and_set_bit_le __test_and_set_bit +# define __test_and_clear_bit_le __test_and_clear_bit #endif #define TEST_DIRTY_RING_COUNT 65536 @@ -226,13 +229,15 @@ static void clear_log_create_vm_done(struct kvm_vm *vm) } static void dirty_log_collect_dirty_pages(struct kvm_vcpu *vcpu, int slot, - void *bitmap, uint32_t num_pages) + void *bitmap, uint32_t num_pages, + uint32_t *unused) { kvm_vm_get_dirty_log(vcpu->vm, slot, bitmap); } static void clear_log_collect_dirty_pages(struct kvm_vcpu *vcpu, int slot, - void *bitmap, uint32_t num_pages) + void *bitmap, uint32_t num_pages, + uint32_t *unused) { kvm_vm_get_dirty_log(vcpu->vm, slot, bitmap); kvm_vm_clear_dirty_log(vcpu->vm, slot, bitmap, 0, num_pages); @@ -271,6 +276,24 @@ static bool dirty_ring_supported(void) static void dirty_ring_create_vm_done(struct kvm_vm *vm) { + uint64_t pages; + uint32_t limit; + + /* + * We rely on vcpu exit due to full dirty ring state. Adjust + * the ring buffer size to ensure we're able to reach the + * full dirty ring state. + */ + pages = (1ul << (DIRTY_MEM_BITS - vm->page_shift)) + 3; + pages = vm_adjust_num_guest_pages(vm->mode, pages); + if (vm->page_size < getpagesize()) + pages = vm_num_host_pages(vm->mode, pages); + + limit = 1 << (31 - __builtin_clz(pages)); + test_dirty_ring_count = 1 << (31 - __builtin_clz(test_dirty_ring_count)); + test_dirty_ring_count = min(limit, test_dirty_ring_count); + pr_info("dirty ring count: 0x%x\n", test_dirty_ring_count); + /* * Switch to dirty ring mode after VM creation but before any * of the vcpu creation. @@ -305,7 +328,7 @@ static uint32_t dirty_ring_collect_one(struct kvm_dirty_gfn *dirty_gfns, TEST_ASSERT(cur->offset < num_pages, "Offset overflow: " "0x%llx >= 0x%x", cur->offset, num_pages); //pr_info("fetch 0x%x page %llu\n", *fetch_index, cur->offset); - set_bit_le(cur->offset, bitmap); + __set_bit_le(cur->offset, bitmap); dirty_ring_last_page = cur->offset; dirty_gfn_set_collected(cur); (*fetch_index)++; @@ -329,10 +352,9 @@ static void dirty_ring_continue_vcpu(void) } static void dirty_ring_collect_dirty_pages(struct kvm_vcpu *vcpu, int slot, - void *bitmap, uint32_t num_pages) + void *bitmap, uint32_t num_pages, + uint32_t *ring_buf_idx) { - /* We only have one vcpu */ - static uint32_t fetch_index = 0; uint32_t count = 0, cleared; bool continued_vcpu = false; @@ -349,7 +371,8 @@ static void dirty_ring_collect_dirty_pages(struct kvm_vcpu *vcpu, int slot, /* Only have one vcpu */ count = dirty_ring_collect_one(vcpu_map_dirty_ring(vcpu), - slot, bitmap, num_pages, &fetch_index); + slot, bitmap, num_pages, + ring_buf_idx); cleared = kvm_vm_reset_dirty_ring(vcpu->vm); @@ -406,7 +429,8 @@ struct log_mode { void (*create_vm_done)(struct kvm_vm *vm); /* Hook to collect the dirty pages into the bitmap provided */ void (*collect_dirty_pages) (struct kvm_vcpu *vcpu, int slot, - void *bitmap, uint32_t num_pages); + void *bitmap, uint32_t num_pages, + uint32_t *ring_buf_idx); /* Hook to call when after each vcpu run */ void (*after_vcpu_run)(struct kvm_vcpu *vcpu, int ret, int err); void (*before_vcpu_join) (void); @@ -471,13 +495,14 @@ static void log_mode_create_vm_done(struct kvm_vm *vm) } static void log_mode_collect_dirty_pages(struct kvm_vcpu *vcpu, int slot, - void *bitmap, uint32_t num_pages) + void *bitmap, uint32_t num_pages, + uint32_t *ring_buf_idx) { struct log_mode *mode = &log_modes[host_log_mode]; TEST_ASSERT(mode->collect_dirty_pages != NULL, "collect_dirty_pages() is required for any log mode!"); - mode->collect_dirty_pages(vcpu, slot, bitmap, num_pages); + mode->collect_dirty_pages(vcpu, slot, bitmap, num_pages, ring_buf_idx); } static void log_mode_after_vcpu_run(struct kvm_vcpu *vcpu, int ret, int err) @@ -560,7 +585,7 @@ static void vm_dirty_log_verify(enum vm_guest_mode mode, unsigned long *bmap) value_ptr = host_test_mem + page * host_page_size; /* If this is a special page that we were tracking... */ - if (test_and_clear_bit_le(page, host_bmap_track)) { + if (__test_and_clear_bit_le(page, host_bmap_track)) { host_track_next_count++; TEST_ASSERT(test_bit_le(page, bmap), "Page %"PRIu64" should have its dirty bit " @@ -568,7 +593,7 @@ static void vm_dirty_log_verify(enum vm_guest_mode mode, unsigned long *bmap) page); } - if (test_and_clear_bit_le(page, bmap)) { + if (__test_and_clear_bit_le(page, bmap)) { bool matched; host_dirty_count++; @@ -661,7 +686,7 @@ static void vm_dirty_log_verify(enum vm_guest_mode mode, unsigned long *bmap) * should report its dirtyness in the * next run */ - set_bit_le(page, host_bmap_track); + __set_bit_le(page, host_bmap_track); } } } @@ -681,9 +706,6 @@ static struct kvm_vm *create_vm(enum vm_guest_mode mode, struct kvm_vcpu **vcpu, return vm; } -#define DIRTY_MEM_BITS 30 /* 1G */ -#define PAGE_SHIFT_4K 12 - struct test_params { unsigned long iterations; unsigned long interval; @@ -696,6 +718,7 @@ static void run_test(enum vm_guest_mode mode, void *arg) struct kvm_vcpu *vcpu; struct kvm_vm *vm; unsigned long *bmap; + uint32_t ring_buf_idx = 0; if (!log_mode_supported()) { print_skip("Log mode '%s' not supported", @@ -756,8 +779,6 @@ static void run_test(enum vm_guest_mode mode, void *arg) /* Cache the HVA pointer of the region */ host_test_mem = addr_gpa2hva(vm, (vm_paddr_t)guest_test_phys_mem); - ucall_init(vm, NULL); - /* Export the shared variables to the guest */ sync_global_to_guest(vm, host_page_size); sync_global_to_guest(vm, guest_page_size); @@ -771,6 +792,7 @@ static void run_test(enum vm_guest_mode mode, void *arg) host_dirty_count = 0; host_clear_count = 0; host_track_next_count = 0; + WRITE_ONCE(dirty_ring_vcpu_ring_full, false); pthread_create(&vcpu_thread, NULL, vcpu_worker, vcpu); @@ -778,7 +800,8 @@ static void run_test(enum vm_guest_mode mode, void *arg) /* Give the vcpu thread some time to dirty some pages */ usleep(p->interval * 1000); log_mode_collect_dirty_pages(vcpu, TEST_MEM_SLOT_INDEX, - bmap, host_num_pages); + bmap, host_num_pages, + &ring_buf_idx); /* * See vcpu_sync_stop_requested definition for details on why @@ -813,7 +836,6 @@ static void run_test(enum vm_guest_mode mode, void *arg) free(bmap); free(host_bmap_track); - ucall_uninit(vm); kvm_vm_free(vm); } @@ -823,7 +845,7 @@ static void help(char *name) printf("usage: %s [-h] [-i iterations] [-I interval] " "[-p offset] [-m mode]\n", name); puts(""); - printf(" -c: specify dirty ring size, in number of entries\n"); + printf(" -c: hint to dirty ring size, in number of entries\n"); printf(" (only useful for dirty-ring test; default: %"PRIu32")\n", TEST_DIRTY_RING_COUNT); printf(" -i: specify iteration counts (default: %"PRIu64")\n", diff --git a/tools/testing/selftests/kvm/include/aarch64/processor.h b/tools/testing/selftests/kvm/include/aarch64/processor.h index a8124f9dd68a..5f977528e09c 100644 --- a/tools/testing/selftests/kvm/include/aarch64/processor.h +++ b/tools/testing/selftests/kvm/include/aarch64/processor.h @@ -38,12 +38,25 @@ * NORMAL 4 1111:1111 * NORMAL_WT 5 1011:1011 */ -#define DEFAULT_MAIR_EL1 ((0x00ul << (0 * 8)) | \ - (0x04ul << (1 * 8)) | \ - (0x0cul << (2 * 8)) | \ - (0x44ul << (3 * 8)) | \ - (0xfful << (4 * 8)) | \ - (0xbbul << (5 * 8))) + +/* Linux doesn't use these memory types, so let's define them. */ +#define MAIR_ATTR_DEVICE_GRE UL(0x0c) +#define MAIR_ATTR_NORMAL_WT UL(0xbb) + +#define MT_DEVICE_nGnRnE 0 +#define MT_DEVICE_nGnRE 1 +#define MT_DEVICE_GRE 2 +#define MT_NORMAL_NC 3 +#define MT_NORMAL 4 +#define MT_NORMAL_WT 5 + +#define DEFAULT_MAIR_EL1 \ + (MAIR_ATTRIDX(MAIR_ATTR_DEVICE_nGnRnE, MT_DEVICE_nGnRnE) | \ + MAIR_ATTRIDX(MAIR_ATTR_DEVICE_nGnRE, MT_DEVICE_nGnRE) | \ + MAIR_ATTRIDX(MAIR_ATTR_DEVICE_GRE, MT_DEVICE_GRE) | \ + MAIR_ATTRIDX(MAIR_ATTR_NORMAL_NC, MT_NORMAL_NC) | \ + MAIR_ATTRIDX(MAIR_ATTR_NORMAL, MT_NORMAL) | \ + MAIR_ATTRIDX(MAIR_ATTR_NORMAL_WT, MT_NORMAL_WT)) #define MPIDR_HWID_BITMASK (0xff00fffffful) @@ -92,11 +105,19 @@ enum { #define ESR_EC_MASK (ESR_EC_NUM - 1) #define ESR_EC_SVC64 0x15 +#define ESR_EC_IABT 0x21 +#define ESR_EC_DABT 0x25 #define ESR_EC_HW_BP_CURRENT 0x31 #define ESR_EC_SSTEP_CURRENT 0x33 #define ESR_EC_WP_CURRENT 0x35 #define ESR_EC_BRK_INS 0x3c +/* Access flag */ +#define PTE_AF (1ULL << 10) + +/* Access flag update enable/disable */ +#define TCR_EL1_HA (1ULL << 39) + void aarch64_get_supported_page_sizes(uint32_t ipa, bool *ps4k, bool *ps16k, bool *ps64k); @@ -109,6 +130,8 @@ void vm_install_exception_handler(struct kvm_vm *vm, void vm_install_sync_handler(struct kvm_vm *vm, int vector, int ec, handler_fn handler); +uint64_t *virt_get_pte_hva(struct kvm_vm *vm, vm_vaddr_t gva); + static inline void cpu_relax(void) { asm volatile("yield" ::: "memory"); diff --git a/tools/testing/selftests/kvm/include/kvm_util_base.h b/tools/testing/selftests/kvm/include/kvm_util_base.h index e42a09cd24a0..fbc2a79369b8 100644 --- a/tools/testing/selftests/kvm/include/kvm_util_base.h +++ b/tools/testing/selftests/kvm/include/kvm_util_base.h @@ -16,11 +16,24 @@ #include <linux/kvm.h> #include "linux/rbtree.h" +#include <asm/atomic.h> #include <sys/ioctl.h> #include "sparsebit.h" +/* + * Provide a version of static_assert() that is guaranteed to have an optional + * message param. If _ISOC11_SOURCE is defined, glibc (/usr/include/assert.h) + * #undefs and #defines static_assert() as a direct alias to _Static_assert(), + * i.e. effectively makes the message mandatory. Many KVM selftests #define + * _GNU_SOURCE for various reasons, and _GNU_SOURCE implies _ISOC11_SOURCE. As + * a result, static_assert() behavior is non-deterministic and may or may not + * require a message depending on #include order. + */ +#define __kvm_static_assert(expr, msg, ...) _Static_assert(expr, msg) +#define kvm_static_assert(expr, ...) __kvm_static_assert(expr, ##__VA_ARGS__, #expr) + #define KVM_DEV_PATH "/dev/kvm" #define KVM_MAX_VCPUS 512 @@ -34,6 +47,7 @@ struct userspace_mem_region { struct sparsebit *unused_phy_pages; int fd; off_t offset; + enum vm_mem_backing_src_type backing_src_type; void *host_mem; void *host_alias; void *mmap_start; @@ -64,6 +78,14 @@ struct userspace_mem_regions { DECLARE_HASHTABLE(slot_hash, 9); }; +enum kvm_mem_region_type { + MEM_REGION_CODE, + MEM_REGION_DATA, + MEM_REGION_PT, + MEM_REGION_TEST_DATA, + NR_MEM_REGIONS, +}; + struct kvm_vm { int mode; unsigned long type; @@ -81,6 +103,7 @@ struct kvm_vm { struct sparsebit *vpages_mapped; bool has_irqchip; bool pgd_created; + vm_paddr_t ucall_mmio_addr; vm_paddr_t pgd; vm_vaddr_t gdt; vm_vaddr_t tss; @@ -92,6 +115,13 @@ struct kvm_vm { int stats_fd; struct kvm_stats_header stats_header; struct kvm_stats_desc *stats_desc; + + /* + * KVM region slots. These are the default memslots used by page + * allocators, e.g., lib/elf uses the memslots[MEM_REGION_CODE] + * memslot. + */ + uint32_t memslots[NR_MEM_REGIONS]; }; @@ -104,6 +134,13 @@ struct kvm_vm { struct userspace_mem_region * memslot2region(struct kvm_vm *vm, uint32_t memslot); +static inline struct userspace_mem_region *vm_get_mem_region(struct kvm_vm *vm, + enum kvm_mem_region_type type) +{ + assert(type < NR_MEM_REGIONS); + return memslot2region(vm, vm->memslots[type]); +} + /* Minimum allocated guest virtual and physical addresses */ #define KVM_UTIL_MIN_VADDR 0x2000 #define KVM_GUEST_PAGE_TABLE_MIN_PADDR 0x180000 @@ -194,7 +231,7 @@ static inline bool kvm_has_cap(long cap) #define kvm_do_ioctl(fd, cmd, arg) \ ({ \ - static_assert(!_IOC_SIZE(cmd) || sizeof(*arg) == _IOC_SIZE(cmd), ""); \ + kvm_static_assert(!_IOC_SIZE(cmd) || sizeof(*arg) == _IOC_SIZE(cmd)); \ ioctl(fd, cmd, arg); \ }) @@ -383,8 +420,14 @@ void vm_mem_region_set_flags(struct kvm_vm *vm, uint32_t slot, uint32_t flags); void vm_mem_region_move(struct kvm_vm *vm, uint32_t slot, uint64_t new_gpa); void vm_mem_region_delete(struct kvm_vm *vm, uint32_t slot); struct kvm_vcpu *__vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id); +void vm_populate_vaddr_bitmap(struct kvm_vm *vm); +vm_vaddr_t vm_vaddr_unused_gap(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min); vm_vaddr_t vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min); +vm_vaddr_t __vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min, + enum kvm_mem_region_type type); vm_vaddr_t vm_vaddr_alloc_pages(struct kvm_vm *vm, int nr_pages); +vm_vaddr_t __vm_vaddr_alloc_page(struct kvm_vm *vm, + enum kvm_mem_region_type type); vm_vaddr_t vm_vaddr_alloc_page(struct kvm_vm *vm); void virt_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, @@ -646,13 +689,13 @@ vm_paddr_t vm_alloc_page_table(struct kvm_vm *vm); * __vm_create() does NOT create vCPUs, @nr_runnable_vcpus is used purely to * calculate the amount of memory needed for per-vCPU data, e.g. stacks. */ -struct kvm_vm *____vm_create(enum vm_guest_mode mode, uint64_t nr_pages); +struct kvm_vm *____vm_create(enum vm_guest_mode mode); struct kvm_vm *__vm_create(enum vm_guest_mode mode, uint32_t nr_runnable_vcpus, uint64_t nr_extra_pages); static inline struct kvm_vm *vm_create_barebones(void) { - return ____vm_create(VM_MODE_DEFAULT, 0); + return ____vm_create(VM_MODE_DEFAULT); } static inline struct kvm_vm *vm_create(uint32_t nr_runnable_vcpus) @@ -688,6 +731,10 @@ static inline struct kvm_vm *vm_create_with_one_vcpu(struct kvm_vcpu **vcpu, struct kvm_vcpu *vm_recreate_with_one_vcpu(struct kvm_vm *vm); +void kvm_pin_this_task_to_pcpu(uint32_t pcpu); +void kvm_parse_vcpu_pinning(const char *pcpus_string, uint32_t vcpu_to_pcpu[], + int nr_vcpus); + unsigned long vm_compute_max_gfn(struct kvm_vm *vm); unsigned int vm_calc_num_guest_pages(enum vm_guest_mode mode, size_t size); unsigned int vm_num_host_pages(enum vm_guest_mode mode, unsigned int num_guest_pages); @@ -718,6 +765,19 @@ kvm_userspace_memory_region_find(struct kvm_vm *vm, uint64_t start, memcpy(&(g), _p, sizeof(g)); \ }) +/* + * Write a global value, but only in the VM's (guest's) domain. Primarily used + * for "globals" that hold per-VM values (VMs always duplicate code and global + * data into their own region of physical memory), but can be used anytime it's + * undesirable to change the host's copy of the global. + */ +#define write_guest_global(vm, g, val) ({ \ + typeof(g) *_p = addr_gva2hva(vm, (vm_vaddr_t)&(g)); \ + typeof(g) _val = val; \ + \ + memcpy(_p, &(_val), sizeof(g)); \ +}) + void assert_on_unhandled_exception(struct kvm_vcpu *vcpu); void vcpu_arch_dump(FILE *stream, struct kvm_vcpu *vcpu, @@ -838,4 +898,13 @@ static inline int __vm_disable_nx_huge_pages(struct kvm_vm *vm) return __vm_enable_cap(vm, KVM_CAP_VM_DISABLE_NX_HUGE_PAGES, 0); } +/* + * Arch hook that is invoked via a constructor, i.e. before exeucting main(), + * to allow for arch-specific setup that is common to all tests, e.g. computing + * the default guest "mode". + */ +void kvm_selftest_arch_init(void); + +void kvm_arch_vm_post_create(struct kvm_vm *vm); + #endif /* SELFTEST_KVM_UTIL_BASE_H */ diff --git a/tools/testing/selftests/kvm/include/memstress.h b/tools/testing/selftests/kvm/include/memstress.h new file mode 100644 index 000000000000..72e3e358ef7b --- /dev/null +++ b/tools/testing/selftests/kvm/include/memstress.h @@ -0,0 +1,75 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * tools/testing/selftests/kvm/include/memstress.h + * + * Copyright (C) 2020, Google LLC. + */ + +#ifndef SELFTEST_KVM_MEMSTRESS_H +#define SELFTEST_KVM_MEMSTRESS_H + +#include <pthread.h> + +#include "kvm_util.h" + +/* Default guest test virtual memory offset */ +#define DEFAULT_GUEST_TEST_MEM 0xc0000000 + +#define DEFAULT_PER_VCPU_MEM_SIZE (1 << 30) /* 1G */ + +#define MEMSTRESS_MEM_SLOT_INDEX 1 + +struct memstress_vcpu_args { + uint64_t gpa; + uint64_t gva; + uint64_t pages; + + /* Only used by the host userspace part of the vCPU thread */ + struct kvm_vcpu *vcpu; + int vcpu_idx; +}; + +struct memstress_args { + struct kvm_vm *vm; + /* The starting address and size of the guest test region. */ + uint64_t gpa; + uint64_t size; + uint64_t guest_page_size; + uint32_t random_seed; + uint32_t write_percent; + + /* Run vCPUs in L2 instead of L1, if the architecture supports it. */ + bool nested; + /* Randomize which pages are accessed by the guest. */ + bool random_access; + /* True if all vCPUs are pinned to pCPUs */ + bool pin_vcpus; + /* The vCPU=>pCPU pinning map. Only valid if pin_vcpus is true. */ + uint32_t vcpu_to_pcpu[KVM_MAX_VCPUS]; + + /* Test is done, stop running vCPUs. */ + bool stop_vcpus; + + struct memstress_vcpu_args vcpu_args[KVM_MAX_VCPUS]; +}; + +extern struct memstress_args memstress_args; + +struct kvm_vm *memstress_create_vm(enum vm_guest_mode mode, int nr_vcpus, + uint64_t vcpu_memory_bytes, int slots, + enum vm_mem_backing_src_type backing_src, + bool partition_vcpu_memory_access); +void memstress_destroy_vm(struct kvm_vm *vm); + +void memstress_set_write_percent(struct kvm_vm *vm, uint32_t write_percent); +void memstress_set_random_seed(struct kvm_vm *vm, uint32_t random_seed); +void memstress_set_random_access(struct kvm_vm *vm, bool random_access); + +void memstress_start_vcpu_threads(int vcpus, void (*vcpu_fn)(struct memstress_vcpu_args *)); +void memstress_join_vcpu_threads(int vcpus); +void memstress_guest_code(uint32_t vcpu_id); + +uint64_t memstress_nested_pages(int nr_vcpus); +void memstress_setup_nested(struct kvm_vm *vm, int nr_vcpus, struct kvm_vcpu *vcpus[]); + +#endif /* SELFTEST_KVM_MEMSTRESS_H */ diff --git a/tools/testing/selftests/kvm/include/perf_test_util.h b/tools/testing/selftests/kvm/include/perf_test_util.h deleted file mode 100644 index eaa88df0555a..000000000000 --- a/tools/testing/selftests/kvm/include/perf_test_util.h +++ /dev/null @@ -1,63 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * tools/testing/selftests/kvm/include/perf_test_util.h - * - * Copyright (C) 2020, Google LLC. - */ - -#ifndef SELFTEST_KVM_PERF_TEST_UTIL_H -#define SELFTEST_KVM_PERF_TEST_UTIL_H - -#include <pthread.h> - -#include "kvm_util.h" - -/* Default guest test virtual memory offset */ -#define DEFAULT_GUEST_TEST_MEM 0xc0000000 - -#define DEFAULT_PER_VCPU_MEM_SIZE (1 << 30) /* 1G */ - -#define PERF_TEST_MEM_SLOT_INDEX 1 - -struct perf_test_vcpu_args { - uint64_t gpa; - uint64_t gva; - uint64_t pages; - - /* Only used by the host userspace part of the vCPU thread */ - struct kvm_vcpu *vcpu; - int vcpu_idx; -}; - -struct perf_test_args { - struct kvm_vm *vm; - /* The starting address and size of the guest test region. */ - uint64_t gpa; - uint64_t size; - uint64_t guest_page_size; - int wr_fract; - - /* Run vCPUs in L2 instead of L1, if the architecture supports it. */ - bool nested; - - struct perf_test_vcpu_args vcpu_args[KVM_MAX_VCPUS]; -}; - -extern struct perf_test_args perf_test_args; - -struct kvm_vm *perf_test_create_vm(enum vm_guest_mode mode, int nr_vcpus, - uint64_t vcpu_memory_bytes, int slots, - enum vm_mem_backing_src_type backing_src, - bool partition_vcpu_memory_access); -void perf_test_destroy_vm(struct kvm_vm *vm); - -void perf_test_set_wr_fract(struct kvm_vm *vm, int wr_fract); - -void perf_test_start_vcpu_threads(int vcpus, void (*vcpu_fn)(struct perf_test_vcpu_args *)); -void perf_test_join_vcpu_threads(int vcpus); -void perf_test_guest_code(uint32_t vcpu_id); - -uint64_t perf_test_nested_pages(int nr_vcpus); -void perf_test_setup_nested(struct kvm_vm *vm, int nr_vcpus, struct kvm_vcpu *vcpus[]); - -#endif /* SELFTEST_KVM_PERF_TEST_UTIL_H */ diff --git a/tools/testing/selftests/kvm/include/test_util.h b/tools/testing/selftests/kvm/include/test_util.h index befc754ce9b3..80d6416f3012 100644 --- a/tools/testing/selftests/kvm/include/test_util.h +++ b/tools/testing/selftests/kvm/include/test_util.h @@ -77,6 +77,13 @@ struct timespec timespec_sub(struct timespec ts1, struct timespec ts2); struct timespec timespec_elapsed(struct timespec start); struct timespec timespec_div(struct timespec ts, int divisor); +struct guest_random_state { + uint32_t seed; +}; + +struct guest_random_state new_guest_random_state(uint32_t seed); +uint32_t guest_random_u32(struct guest_random_state *state); + enum vm_mem_backing_src_type { VM_MEM_SRC_ANONYMOUS, VM_MEM_SRC_ANONYMOUS_THP, @@ -152,4 +159,22 @@ static inline void *align_ptr_up(void *x, size_t size) return (void *)align_up((unsigned long)x, size); } +int atoi_paranoid(const char *num_str); + +static inline uint32_t atoi_positive(const char *name, const char *num_str) +{ + int num = atoi_paranoid(num_str); + + TEST_ASSERT(num > 0, "%s must be greater than 0, got '%s'", name, num_str); + return num; +} + +static inline uint32_t atoi_non_negative(const char *name, const char *num_str) +{ + int num = atoi_paranoid(num_str); + + TEST_ASSERT(num >= 0, "%s must be non-negative, got '%s'", name, num_str); + return num; +} + #endif /* SELFTEST_KVM_TEST_UTIL_H */ diff --git a/tools/testing/selftests/kvm/include/ucall_common.h b/tools/testing/selftests/kvm/include/ucall_common.h index ee79d180e07e..1a6aaef5ccae 100644 --- a/tools/testing/selftests/kvm/include/ucall_common.h +++ b/tools/testing/selftests/kvm/include/ucall_common.h @@ -22,12 +22,26 @@ enum { struct ucall { uint64_t cmd; uint64_t args[UCALL_MAX_ARGS]; + + /* Host virtual address of this struct. */ + struct ucall *hva; }; -void ucall_init(struct kvm_vm *vm, void *arg); -void ucall_uninit(struct kvm_vm *vm); +void ucall_arch_init(struct kvm_vm *vm, vm_paddr_t mmio_gpa); +void ucall_arch_do_ucall(vm_vaddr_t uc); +void *ucall_arch_get_ucall(struct kvm_vcpu *vcpu); + void ucall(uint64_t cmd, int nargs, ...); uint64_t get_ucall(struct kvm_vcpu *vcpu, struct ucall *uc); +void ucall_init(struct kvm_vm *vm, vm_paddr_t mmio_gpa); + +/* + * Perform userspace call without any associated data. This bare call avoids + * allocating a ucall struct, which can be useful if the atomic operations in + * the full ucall() are problematic and/or unwanted. Note, this will come out + * as UCALL_NONE on the backend. + */ +#define GUEST_UCALL_NONE() ucall_arch_do_ucall((vm_vaddr_t)NULL) #define GUEST_SYNC_ARGS(stage, arg1, arg2, arg3, arg4) \ ucall(UCALL_SYNC, 6, "hello", stage, arg1, arg2, arg3, arg4) diff --git a/tools/testing/selftests/kvm/include/userfaultfd_util.h b/tools/testing/selftests/kvm/include/userfaultfd_util.h new file mode 100644 index 000000000000..877449c34592 --- /dev/null +++ b/tools/testing/selftests/kvm/include/userfaultfd_util.h @@ -0,0 +1,45 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * KVM userfaultfd util + * + * Copyright (C) 2018, Red Hat, Inc. + * Copyright (C) 2019-2022 Google LLC + */ + +#define _GNU_SOURCE /* for pipe2 */ + +#include <inttypes.h> +#include <time.h> +#include <pthread.h> +#include <linux/userfaultfd.h> + +#include "test_util.h" + +typedef int (*uffd_handler_t)(int uffd_mode, int uffd, struct uffd_msg *msg); + +struct uffd_desc { + int uffd_mode; + int uffd; + int pipefds[2]; + useconds_t delay; + uffd_handler_t handler; + pthread_t thread; +}; + +struct uffd_desc *uffd_setup_demand_paging(int uffd_mode, useconds_t delay, + void *hva, uint64_t len, + uffd_handler_t handler); + +void uffd_stop_demand_paging(struct uffd_desc *uffd); + +#ifdef PRINT_PER_PAGE_UPDATES +#define PER_PAGE_DEBUG(...) printf(__VA_ARGS__) +#else +#define PER_PAGE_DEBUG(...) _no_printf(__VA_ARGS__) +#endif + +#ifdef PRINT_PER_VCPU_UPDATES +#define PER_VCPU_DEBUG(...) printf(__VA_ARGS__) +#else +#define PER_VCPU_DEBUG(...) _no_printf(__VA_ARGS__) +#endif diff --git a/tools/testing/selftests/kvm/include/x86_64/evmcs.h b/tools/testing/selftests/kvm/include/x86_64/evmcs.h index 58db74f68af2..901caf0e0939 100644 --- a/tools/testing/selftests/kvm/include/x86_64/evmcs.h +++ b/tools/testing/selftests/kvm/include/x86_64/evmcs.h @@ -10,6 +10,7 @@ #define SELFTEST_KVM_EVMCS_H #include <stdint.h> +#include "hyperv.h" #include "vmx.h" #define u16 uint16_t @@ -20,15 +21,6 @@ extern bool enable_evmcs; -struct hv_vp_assist_page { - __u32 apic_assist; - __u32 reserved; - __u64 vtl_control[2]; - __u64 nested_enlightenments_control[2]; - __u32 enlighten_vmentry; - __u64 current_nested_vmcs; -}; - struct hv_enlightened_vmcs { u32 revision_id; u32 abort; @@ -41,6 +33,8 @@ struct hv_enlightened_vmcs { u16 host_gs_selector; u16 host_tr_selector; + u16 padding16_1; + u64 host_ia32_pat; u64 host_ia32_efer; @@ -159,7 +153,7 @@ struct hv_enlightened_vmcs { u64 ept_pointer; u16 virtual_processor_id; - u16 padding16[3]; + u16 padding16_2[3]; u64 padding64_2[5]; u64 guest_physical_address; @@ -195,13 +189,13 @@ struct hv_enlightened_vmcs { u64 guest_rip; u32 hv_clean_fields; - u32 hv_padding_32; + u32 padding32_1; u32 hv_synthetic_controls; struct { u32 nested_flush_hypercall:1; u32 msr_bitmap:1; u32 reserved:30; - } hv_enlightenments_control; + } __packed hv_enlightenments_control; u32 hv_vp_id; u32 padding32_2; u64 hv_vm_id; @@ -222,7 +216,7 @@ struct hv_enlightened_vmcs { u64 host_ssp; u64 host_ia32_int_ssp_table_addr; u64 padding64_6; -}; +} __packed; #define HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE 0 #define HV_VMX_ENLIGHTENED_CLEAN_FIELD_IO_BITMAP BIT(0) @@ -243,29 +237,15 @@ struct hv_enlightened_vmcs { #define HV_VMX_ENLIGHTENED_CLEAN_FIELD_ENLIGHTENMENTSCONTROL BIT(15) #define HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL 0xFFFF -#define HV_X64_MSR_VP_ASSIST_PAGE 0x40000073 -#define HV_X64_MSR_VP_ASSIST_PAGE_ENABLE 0x00000001 -#define HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_SHIFT 12 -#define HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_MASK \ - (~((1ull << HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_SHIFT) - 1)) +#define HV_VMX_SYNTHETIC_EXIT_REASON_TRAP_AFTER_FLUSH 0x10000031 extern struct hv_enlightened_vmcs *current_evmcs; -extern struct hv_vp_assist_page *current_vp_assist; int vcpu_enable_evmcs(struct kvm_vcpu *vcpu); -static inline int enable_vp_assist(uint64_t vp_assist_pa, void *vp_assist) +static inline void evmcs_enable(void) { - u64 val = (vp_assist_pa & HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_MASK) | - HV_X64_MSR_VP_ASSIST_PAGE_ENABLE; - - wrmsr(HV_X64_MSR_VP_ASSIST_PAGE, val); - - current_vp_assist = vp_assist; - enable_evmcs = true; - - return 0; } static inline int evmcs_vmptrld(uint64_t vmcs_pa, void *vmcs) @@ -278,6 +258,16 @@ static inline int evmcs_vmptrld(uint64_t vmcs_pa, void *vmcs) return 0; } +static inline bool load_evmcs(struct hyperv_test_pages *hv) +{ + if (evmcs_vmptrld(hv->enlightened_vmcs_gpa, hv->enlightened_vmcs)) + return false; + + current_evmcs->revision_id = EVMCS_VERSION; + + return true; +} + static inline int evmcs_vmptrst(uint64_t *value) { *value = current_vp_assist->current_nested_vmcs & diff --git a/tools/testing/selftests/kvm/include/x86_64/hyperv.h b/tools/testing/selftests/kvm/include/x86_64/hyperv.h index b66910702c0a..9218bb5f44bf 100644 --- a/tools/testing/selftests/kvm/include/x86_64/hyperv.h +++ b/tools/testing/selftests/kvm/include/x86_64/hyperv.h @@ -9,6 +9,8 @@ #ifndef SELFTEST_KVM_HYPERV_H #define SELFTEST_KVM_HYPERV_H +#include "processor.h" + #define HYPERV_CPUID_VENDOR_AND_MAX_FUNCTIONS 0x40000000 #define HYPERV_CPUID_INTERFACE 0x40000001 #define HYPERV_CPUID_VERSION 0x40000002 @@ -184,5 +186,106 @@ /* hypercall options */ #define HV_HYPERCALL_FAST_BIT BIT(16) +#define HV_HYPERCALL_VARHEAD_OFFSET 17 +#define HV_HYPERCALL_REP_COMP_OFFSET 32 + +/* + * Issue a Hyper-V hypercall. Returns exception vector raised or 0, 'hv_status' + * is set to the hypercall status (if no exception occurred). + */ +static inline uint8_t __hyperv_hypercall(u64 control, vm_vaddr_t input_address, + vm_vaddr_t output_address, + uint64_t *hv_status) +{ + uint64_t error_code; + uint8_t vector; + + /* Note both the hypercall and the "asm safe" clobber r9-r11. */ + asm volatile("mov %[output_address], %%r8\n\t" + KVM_ASM_SAFE("vmcall") + : "=a" (*hv_status), + "+c" (control), "+d" (input_address), + KVM_ASM_SAFE_OUTPUTS(vector, error_code) + : [output_address] "r"(output_address), + "a" (-EFAULT) + : "cc", "memory", "r8", KVM_ASM_SAFE_CLOBBERS); + return vector; +} + +/* Issue a Hyper-V hypercall and assert that it succeeded. */ +static inline void hyperv_hypercall(u64 control, vm_vaddr_t input_address, + vm_vaddr_t output_address) +{ + uint64_t hv_status; + uint8_t vector; + + vector = __hyperv_hypercall(control, input_address, output_address, &hv_status); + + GUEST_ASSERT(!vector); + GUEST_ASSERT((hv_status & 0xffff) == 0); +} + +/* Write 'Fast' hypercall input 'data' to the first 'n_sse_regs' SSE regs */ +static inline void hyperv_write_xmm_input(void *data, int n_sse_regs) +{ + int i; + + for (i = 0; i < n_sse_regs; i++) + write_sse_reg(i, (sse128_t *)(data + sizeof(sse128_t) * i)); +} + +/* Proper HV_X64_MSR_GUEST_OS_ID value */ +#define HYPERV_LINUX_OS_ID ((u64)0x8100 << 48) + +#define HV_X64_MSR_VP_ASSIST_PAGE 0x40000073 +#define HV_X64_MSR_VP_ASSIST_PAGE_ENABLE 0x00000001 +#define HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_SHIFT 12 +#define HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_MASK \ + (~((1ull << HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_SHIFT) - 1)) + +struct hv_nested_enlightenments_control { + struct { + __u32 directhypercall:1; + __u32 reserved:31; + } features; + struct { + __u32 reserved; + } hypercallControls; +} __packed; + +/* Define virtual processor assist page structure. */ +struct hv_vp_assist_page { + __u32 apic_assist; + __u32 reserved1; + __u64 vtl_control[3]; + struct hv_nested_enlightenments_control nested_control; + __u8 enlighten_vmentry; + __u8 reserved2[7]; + __u64 current_nested_vmcs; +} __packed; + +extern struct hv_vp_assist_page *current_vp_assist; + +int enable_vp_assist(uint64_t vp_assist_pa, void *vp_assist); + +struct hyperv_test_pages { + /* VP assist page */ + void *vp_assist_hva; + uint64_t vp_assist_gpa; + void *vp_assist; + + /* Partition assist page */ + void *partition_assist_hva; + uint64_t partition_assist_gpa; + void *partition_assist; + + /* Enlightened VMCS */ + void *enlightened_vmcs_hva; + uint64_t enlightened_vmcs_gpa; + void *enlightened_vmcs; +}; + +struct hyperv_test_pages *vcpu_alloc_hyperv_test_pages(struct kvm_vm *vm, + vm_vaddr_t *p_hv_pages_gva); #endif /* !SELFTEST_KVM_HYPERV_H */ diff --git a/tools/testing/selftests/kvm/include/x86_64/processor.h b/tools/testing/selftests/kvm/include/x86_64/processor.h index 5da0c5e2a7af..b1a31de7108a 100644 --- a/tools/testing/selftests/kvm/include/x86_64/processor.h +++ b/tools/testing/selftests/kvm/include/x86_64/processor.h @@ -63,16 +63,21 @@ struct kvm_x86_cpu_feature { u8 reg; u8 bit; }; -#define KVM_X86_CPU_FEATURE(fn, idx, gpr, __bit) \ -({ \ - struct kvm_x86_cpu_feature feature = { \ - .function = fn, \ - .index = idx, \ - .reg = KVM_CPUID_##gpr, \ - .bit = __bit, \ - }; \ - \ - feature; \ +#define KVM_X86_CPU_FEATURE(fn, idx, gpr, __bit) \ +({ \ + struct kvm_x86_cpu_feature feature = { \ + .function = fn, \ + .index = idx, \ + .reg = KVM_CPUID_##gpr, \ + .bit = __bit, \ + }; \ + \ + kvm_static_assert((fn & 0xc0000000) == 0 || \ + (fn & 0xc0000000) == 0x40000000 || \ + (fn & 0xc0000000) == 0x80000000 || \ + (fn & 0xc0000000) == 0xc0000000); \ + kvm_static_assert(idx < BIT(sizeof(feature.index) * BITS_PER_BYTE)); \ + feature; \ }) /* @@ -89,6 +94,8 @@ struct kvm_x86_cpu_feature { #define X86_FEATURE_XSAVE KVM_X86_CPU_FEATURE(0x1, 0, ECX, 26) #define X86_FEATURE_OSXSAVE KVM_X86_CPU_FEATURE(0x1, 0, ECX, 27) #define X86_FEATURE_RDRAND KVM_X86_CPU_FEATURE(0x1, 0, ECX, 30) +#define X86_FEATURE_HYPERVISOR KVM_X86_CPU_FEATURE(0x1, 0, ECX, 31) +#define X86_FEATURE_PAE KVM_X86_CPU_FEATURE(0x1, 0, EDX, 6) #define X86_FEATURE_MCE KVM_X86_CPU_FEATURE(0x1, 0, EDX, 7) #define X86_FEATURE_APIC KVM_X86_CPU_FEATURE(0x1, 0, EDX, 9) #define X86_FEATURE_CLFLUSH KVM_X86_CPU_FEATURE(0x1, 0, EDX, 19) @@ -96,6 +103,7 @@ struct kvm_x86_cpu_feature { #define X86_FEATURE_XMM2 KVM_X86_CPU_FEATURE(0x1, 0, EDX, 26) #define X86_FEATURE_FSGSBASE KVM_X86_CPU_FEATURE(0x7, 0, EBX, 0) #define X86_FEATURE_TSC_ADJUST KVM_X86_CPU_FEATURE(0x7, 0, EBX, 1) +#define X86_FEATURE_SGX KVM_X86_CPU_FEATURE(0x7, 0, EBX, 2) #define X86_FEATURE_HLE KVM_X86_CPU_FEATURE(0x7, 0, EBX, 4) #define X86_FEATURE_SMEP KVM_X86_CPU_FEATURE(0x7, 0, EBX, 7) #define X86_FEATURE_INVPCID KVM_X86_CPU_FEATURE(0x7, 0, EBX, 10) @@ -109,6 +117,7 @@ struct kvm_x86_cpu_feature { #define X86_FEATURE_PKU KVM_X86_CPU_FEATURE(0x7, 0, ECX, 3) #define X86_FEATURE_LA57 KVM_X86_CPU_FEATURE(0x7, 0, ECX, 16) #define X86_FEATURE_RDPID KVM_X86_CPU_FEATURE(0x7, 0, ECX, 22) +#define X86_FEATURE_SGX_LC KVM_X86_CPU_FEATURE(0x7, 0, ECX, 30) #define X86_FEATURE_SHSTK KVM_X86_CPU_FEATURE(0x7, 0, ECX, 7) #define X86_FEATURE_IBT KVM_X86_CPU_FEATURE(0x7, 0, EDX, 20) #define X86_FEATURE_AMX_TILE KVM_X86_CPU_FEATURE(0x7, 0, EDX, 24) @@ -162,6 +171,102 @@ struct kvm_x86_cpu_feature { #define X86_FEATURE_KVM_HC_MAP_GPA_RANGE KVM_X86_CPU_FEATURE(0x40000001, 0, EAX, 16) #define X86_FEATURE_KVM_MIGRATION_CONTROL KVM_X86_CPU_FEATURE(0x40000001, 0, EAX, 17) +/* + * Same idea as X86_FEATURE_XXX, but X86_PROPERTY_XXX retrieves a multi-bit + * value/property as opposed to a single-bit feature. Again, pack the info + * into a 64-bit value to pass by value with no overhead. + */ +struct kvm_x86_cpu_property { + u32 function; + u8 index; + u8 reg; + u8 lo_bit; + u8 hi_bit; +}; +#define KVM_X86_CPU_PROPERTY(fn, idx, gpr, low_bit, high_bit) \ +({ \ + struct kvm_x86_cpu_property property = { \ + .function = fn, \ + .index = idx, \ + .reg = KVM_CPUID_##gpr, \ + .lo_bit = low_bit, \ + .hi_bit = high_bit, \ + }; \ + \ + kvm_static_assert(low_bit < high_bit); \ + kvm_static_assert((fn & 0xc0000000) == 0 || \ + (fn & 0xc0000000) == 0x40000000 || \ + (fn & 0xc0000000) == 0x80000000 || \ + (fn & 0xc0000000) == 0xc0000000); \ + kvm_static_assert(idx < BIT(sizeof(property.index) * BITS_PER_BYTE)); \ + property; \ +}) + +#define X86_PROPERTY_MAX_BASIC_LEAF KVM_X86_CPU_PROPERTY(0, 0, EAX, 0, 31) +#define X86_PROPERTY_PMU_VERSION KVM_X86_CPU_PROPERTY(0xa, 0, EAX, 0, 7) +#define X86_PROPERTY_PMU_NR_GP_COUNTERS KVM_X86_CPU_PROPERTY(0xa, 0, EAX, 8, 15) +#define X86_PROPERTY_PMU_EBX_BIT_VECTOR_LENGTH KVM_X86_CPU_PROPERTY(0xa, 0, EAX, 24, 31) + +#define X86_PROPERTY_XSTATE_MAX_SIZE_XCR0 KVM_X86_CPU_PROPERTY(0xd, 0, EBX, 0, 31) +#define X86_PROPERTY_XSTATE_MAX_SIZE KVM_X86_CPU_PROPERTY(0xd, 0, ECX, 0, 31) +#define X86_PROPERTY_XSTATE_TILE_SIZE KVM_X86_CPU_PROPERTY(0xd, 18, EAX, 0, 31) +#define X86_PROPERTY_XSTATE_TILE_OFFSET KVM_X86_CPU_PROPERTY(0xd, 18, EBX, 0, 31) +#define X86_PROPERTY_AMX_TOTAL_TILE_BYTES KVM_X86_CPU_PROPERTY(0x1d, 1, EAX, 0, 15) +#define X86_PROPERTY_AMX_BYTES_PER_TILE KVM_X86_CPU_PROPERTY(0x1d, 1, EAX, 16, 31) +#define X86_PROPERTY_AMX_BYTES_PER_ROW KVM_X86_CPU_PROPERTY(0x1d, 1, EBX, 0, 15) +#define X86_PROPERTY_AMX_NR_TILE_REGS KVM_X86_CPU_PROPERTY(0x1d, 1, EBX, 16, 31) +#define X86_PROPERTY_AMX_MAX_ROWS KVM_X86_CPU_PROPERTY(0x1d, 1, ECX, 0, 15) + +#define X86_PROPERTY_MAX_KVM_LEAF KVM_X86_CPU_PROPERTY(0x40000000, 0, EAX, 0, 31) + +#define X86_PROPERTY_MAX_EXT_LEAF KVM_X86_CPU_PROPERTY(0x80000000, 0, EAX, 0, 31) +#define X86_PROPERTY_MAX_PHY_ADDR KVM_X86_CPU_PROPERTY(0x80000008, 0, EAX, 0, 7) +#define X86_PROPERTY_MAX_VIRT_ADDR KVM_X86_CPU_PROPERTY(0x80000008, 0, EAX, 8, 15) +#define X86_PROPERTY_PHYS_ADDR_REDUCTION KVM_X86_CPU_PROPERTY(0x8000001F, 0, EBX, 6, 11) + +#define X86_PROPERTY_MAX_CENTAUR_LEAF KVM_X86_CPU_PROPERTY(0xC0000000, 0, EAX, 0, 31) + +/* + * Intel's architectural PMU events are bizarre. They have a "feature" bit + * that indicates the feature is _not_ supported, and a property that states + * the length of the bit mask of unsupported features. A feature is supported + * if the size of the bit mask is larger than the "unavailable" bit, and said + * bit is not set. + * + * Wrap the "unavailable" feature to simplify checking whether or not a given + * architectural event is supported. + */ +struct kvm_x86_pmu_feature { + struct kvm_x86_cpu_feature anti_feature; +}; +#define KVM_X86_PMU_FEATURE(name, __bit) \ +({ \ + struct kvm_x86_pmu_feature feature = { \ + .anti_feature = KVM_X86_CPU_FEATURE(0xa, 0, EBX, __bit), \ + }; \ + \ + feature; \ +}) + +#define X86_PMU_FEATURE_BRANCH_INSNS_RETIRED KVM_X86_PMU_FEATURE(BRANCH_INSNS_RETIRED, 5) + +static inline unsigned int x86_family(unsigned int eax) +{ + unsigned int x86; + + x86 = (eax >> 8) & 0xf; + + if (x86 == 0xf) + x86 += (eax >> 20) & 0xff; + + return x86; +} + +static inline unsigned int x86_model(unsigned int eax) +{ + return ((eax >> 12) & 0xf0) | ((eax >> 4) & 0x0f); +} + /* Page table bitfield declarations */ #define PTE_PRESENT_MASK BIT_ULL(0) #define PTE_WRITABLE_MASK BIT_ULL(1) @@ -172,12 +277,18 @@ struct kvm_x86_cpu_feature { #define PTE_GLOBAL_MASK BIT_ULL(8) #define PTE_NX_MASK BIT_ULL(63) +#define PHYSICAL_PAGE_MASK GENMASK_ULL(51, 12) + #define PAGE_SHIFT 12 #define PAGE_SIZE (1ULL << PAGE_SHIFT) -#define PAGE_MASK (~(PAGE_SIZE-1)) +#define PAGE_MASK (~(PAGE_SIZE-1) & PHYSICAL_PAGE_MASK) -#define PHYSICAL_PAGE_MASK GENMASK_ULL(51, 12) -#define PTE_GET_PFN(pte) (((pte) & PHYSICAL_PAGE_MASK) >> PAGE_SHIFT) +#define HUGEPAGE_SHIFT(x) (PAGE_SHIFT + (((x) - 1) * 9)) +#define HUGEPAGE_SIZE(x) (1UL << HUGEPAGE_SHIFT(x)) +#define HUGEPAGE_MASK(x) (~(HUGEPAGE_SIZE(x) - 1) & PHYSICAL_PAGE_MASK) + +#define PTE_GET_PA(pte) ((pte) & PHYSICAL_PAGE_MASK) +#define PTE_GET_PFN(pte) (PTE_GET_PA(pte) >> PAGE_SHIFT) /* General Registers in 64-Bit Mode */ struct gpr64_regs { @@ -425,82 +536,143 @@ static inline void cpuid(uint32_t function, return __cpuid(function, 0, eax, ebx, ecx, edx); } -static inline bool this_cpu_has(struct kvm_x86_cpu_feature feature) +static inline uint32_t this_cpu_fms(void) +{ + uint32_t eax, ebx, ecx, edx; + + cpuid(1, &eax, &ebx, &ecx, &edx); + return eax; +} + +static inline uint32_t this_cpu_family(void) +{ + return x86_family(this_cpu_fms()); +} + +static inline uint32_t this_cpu_model(void) +{ + return x86_model(this_cpu_fms()); +} + +static inline uint32_t __this_cpu_has(uint32_t function, uint32_t index, + uint8_t reg, uint8_t lo, uint8_t hi) { uint32_t gprs[4]; - __cpuid(feature.function, feature.index, + __cpuid(function, index, &gprs[KVM_CPUID_EAX], &gprs[KVM_CPUID_EBX], &gprs[KVM_CPUID_ECX], &gprs[KVM_CPUID_EDX]); - return gprs[feature.reg] & BIT(feature.bit); + return (gprs[reg] & GENMASK(hi, lo)) >> lo; +} + +static inline bool this_cpu_has(struct kvm_x86_cpu_feature feature) +{ + return __this_cpu_has(feature.function, feature.index, + feature.reg, feature.bit, feature.bit); +} + +static inline uint32_t this_cpu_property(struct kvm_x86_cpu_property property) +{ + return __this_cpu_has(property.function, property.index, + property.reg, property.lo_bit, property.hi_bit); +} + +static __always_inline bool this_cpu_has_p(struct kvm_x86_cpu_property property) +{ + uint32_t max_leaf; + + switch (property.function & 0xc0000000) { + case 0: + max_leaf = this_cpu_property(X86_PROPERTY_MAX_BASIC_LEAF); + break; + case 0x40000000: + max_leaf = this_cpu_property(X86_PROPERTY_MAX_KVM_LEAF); + break; + case 0x80000000: + max_leaf = this_cpu_property(X86_PROPERTY_MAX_EXT_LEAF); + break; + case 0xc0000000: + max_leaf = this_cpu_property(X86_PROPERTY_MAX_CENTAUR_LEAF); + } + return max_leaf >= property.function; } -#define SET_XMM(__var, __xmm) \ - asm volatile("movq %0, %%"#__xmm : : "r"(__var) : #__xmm) +static inline bool this_pmu_has(struct kvm_x86_pmu_feature feature) +{ + uint32_t nr_bits = this_cpu_property(X86_PROPERTY_PMU_EBX_BIT_VECTOR_LENGTH); -static inline void set_xmm(int n, unsigned long val) + return nr_bits > feature.anti_feature.bit && + !this_cpu_has(feature.anti_feature); +} + +typedef u32 __attribute__((vector_size(16))) sse128_t; +#define __sse128_u union { sse128_t vec; u64 as_u64[2]; u32 as_u32[4]; } +#define sse128_lo(x) ({ __sse128_u t; t.vec = x; t.as_u64[0]; }) +#define sse128_hi(x) ({ __sse128_u t; t.vec = x; t.as_u64[1]; }) + +static inline void read_sse_reg(int reg, sse128_t *data) { - switch (n) { + switch (reg) { case 0: - SET_XMM(val, xmm0); + asm("movdqa %%xmm0, %0" : "=m"(*data)); break; case 1: - SET_XMM(val, xmm1); + asm("movdqa %%xmm1, %0" : "=m"(*data)); break; case 2: - SET_XMM(val, xmm2); + asm("movdqa %%xmm2, %0" : "=m"(*data)); break; case 3: - SET_XMM(val, xmm3); + asm("movdqa %%xmm3, %0" : "=m"(*data)); break; case 4: - SET_XMM(val, xmm4); + asm("movdqa %%xmm4, %0" : "=m"(*data)); break; case 5: - SET_XMM(val, xmm5); + asm("movdqa %%xmm5, %0" : "=m"(*data)); break; case 6: - SET_XMM(val, xmm6); + asm("movdqa %%xmm6, %0" : "=m"(*data)); break; case 7: - SET_XMM(val, xmm7); + asm("movdqa %%xmm7, %0" : "=m"(*data)); break; + default: + BUG(); } } -#define GET_XMM(__xmm) \ -({ \ - unsigned long __val; \ - asm volatile("movq %%"#__xmm", %0" : "=r"(__val)); \ - __val; \ -}) - -static inline unsigned long get_xmm(int n) +static inline void write_sse_reg(int reg, const sse128_t *data) { - assert(n >= 0 && n <= 7); - - switch (n) { + switch (reg) { case 0: - return GET_XMM(xmm0); + asm("movdqa %0, %%xmm0" : : "m"(*data)); + break; case 1: - return GET_XMM(xmm1); + asm("movdqa %0, %%xmm1" : : "m"(*data)); + break; case 2: - return GET_XMM(xmm2); + asm("movdqa %0, %%xmm2" : : "m"(*data)); + break; case 3: - return GET_XMM(xmm3); + asm("movdqa %0, %%xmm3" : : "m"(*data)); + break; case 4: - return GET_XMM(xmm4); + asm("movdqa %0, %%xmm4" : : "m"(*data)); + break; case 5: - return GET_XMM(xmm5); + asm("movdqa %0, %%xmm5" : : "m"(*data)); + break; case 6: - return GET_XMM(xmm6); + asm("movdqa %0, %%xmm6" : : "m"(*data)); + break; case 7: - return GET_XMM(xmm7); + asm("movdqa %0, %%xmm7" : : "m"(*data)); + break; + default: + BUG(); } - - /* never reached */ - return 0; } static inline void cpu_relax(void) @@ -508,11 +680,6 @@ static inline void cpu_relax(void) asm volatile("rep; nop" ::: "memory"); } -#define vmmcall() \ - __asm__ __volatile__( \ - "vmmcall\n" \ - ) - #define ud2() \ __asm__ __volatile__( \ "ud2\n" \ @@ -526,23 +693,6 @@ static inline void cpu_relax(void) bool is_intel_cpu(void); bool is_amd_cpu(void); -static inline unsigned int x86_family(unsigned int eax) -{ - unsigned int x86; - - x86 = (eax >> 8) & 0xf; - - if (x86 == 0xf) - x86 += (eax >> 20) & 0xff; - - return x86; -} - -static inline unsigned int x86_model(unsigned int eax) -{ - return ((eax >> 12) & 0xf0) | ((eax >> 4) & 0x0f); -} - struct kvm_x86_state *vcpu_save_state(struct kvm_vcpu *vcpu); void vcpu_load_state(struct kvm_vcpu *vcpu, struct kvm_x86_state *state); void kvm_x86_state_cleanup(struct kvm_x86_state *state); @@ -604,10 +754,27 @@ static inline void vcpu_xcrs_set(struct kvm_vcpu *vcpu, struct kvm_xcrs *xcrs) vcpu_ioctl(vcpu, KVM_SET_XCRS, xcrs); } +const struct kvm_cpuid_entry2 *get_cpuid_entry(const struct kvm_cpuid2 *cpuid, + uint32_t function, uint32_t index); const struct kvm_cpuid2 *kvm_get_supported_cpuid(void); const struct kvm_cpuid2 *kvm_get_supported_hv_cpuid(void); const struct kvm_cpuid2 *vcpu_get_supported_hv_cpuid(struct kvm_vcpu *vcpu); +static inline uint32_t kvm_cpu_fms(void) +{ + return get_cpuid_entry(kvm_get_supported_cpuid(), 0x1, 0)->eax; +} + +static inline uint32_t kvm_cpu_family(void) +{ + return x86_family(kvm_cpu_fms()); +} + +static inline uint32_t kvm_cpu_model(void) +{ + return x86_model(kvm_cpu_fms()); +} + bool kvm_cpuid_has(const struct kvm_cpuid2 *cpuid, struct kvm_x86_cpu_feature feature); @@ -616,6 +783,42 @@ static inline bool kvm_cpu_has(struct kvm_x86_cpu_feature feature) return kvm_cpuid_has(kvm_get_supported_cpuid(), feature); } +uint32_t kvm_cpuid_property(const struct kvm_cpuid2 *cpuid, + struct kvm_x86_cpu_property property); + +static inline uint32_t kvm_cpu_property(struct kvm_x86_cpu_property property) +{ + return kvm_cpuid_property(kvm_get_supported_cpuid(), property); +} + +static __always_inline bool kvm_cpu_has_p(struct kvm_x86_cpu_property property) +{ + uint32_t max_leaf; + + switch (property.function & 0xc0000000) { + case 0: + max_leaf = kvm_cpu_property(X86_PROPERTY_MAX_BASIC_LEAF); + break; + case 0x40000000: + max_leaf = kvm_cpu_property(X86_PROPERTY_MAX_KVM_LEAF); + break; + case 0x80000000: + max_leaf = kvm_cpu_property(X86_PROPERTY_MAX_EXT_LEAF); + break; + case 0xc0000000: + max_leaf = kvm_cpu_property(X86_PROPERTY_MAX_CENTAUR_LEAF); + } + return max_leaf >= property.function; +} + +static inline bool kvm_pmu_has(struct kvm_x86_pmu_feature feature) +{ + uint32_t nr_bits = kvm_cpu_property(X86_PROPERTY_PMU_EBX_BIT_VECTOR_LENGTH); + + return nr_bits > feature.anti_feature.bit && + !kvm_cpu_has(feature.anti_feature); +} + static inline size_t kvm_cpuid2_size(int nr_entries) { return sizeof(struct kvm_cpuid2) + @@ -639,8 +842,6 @@ static inline struct kvm_cpuid2 *allocate_kvm_cpuid2(int nr_entries) return cpuid; } -const struct kvm_cpuid_entry2 *get_cpuid_entry(const struct kvm_cpuid2 *cpuid, - uint32_t function, uint32_t index); void vcpu_init_cpuid(struct kvm_vcpu *vcpu, const struct kvm_cpuid2 *cpuid); void vcpu_set_hv_cpuid(struct kvm_vcpu *vcpu); @@ -701,17 +902,6 @@ static inline void vcpu_clear_cpuid_feature(struct kvm_vcpu *vcpu, vcpu_set_or_clear_cpuid_feature(vcpu, feature, false); } -static inline const struct kvm_cpuid_entry2 *__kvm_get_supported_cpuid_entry(uint32_t function, - uint32_t index) -{ - return get_cpuid_entry(kvm_get_supported_cpuid(), function, index); -} - -static inline const struct kvm_cpuid_entry2 *kvm_get_supported_cpuid_entry(uint32_t function) -{ - return __kvm_get_supported_cpuid_entry(function, 0); -} - uint64_t vcpu_get_msr(struct kvm_vcpu *vcpu, uint64_t msr_index); int _vcpu_set_msr(struct kvm_vcpu *vcpu, uint64_t msr_index, uint64_t msr_value); @@ -723,15 +913,6 @@ static inline void vcpu_set_msr(struct kvm_vcpu *vcpu, uint64_t msr_index, TEST_ASSERT(r == 1, KVM_IOCTL_ERROR(KVM_SET_MSRS, r)); } -static inline uint32_t kvm_get_cpuid_max_basic(void) -{ - return kvm_get_supported_cpuid_entry(0)->eax; -} - -static inline uint32_t kvm_get_cpuid_max_extended(void) -{ - return kvm_get_supported_cpuid_entry(0x80000000)->eax; -} void kvm_get_cpu_address_width(unsigned int *pa_bits, unsigned int *va_bits); bool vm_is_unrestricted_guest(struct kvm_vm *vm); @@ -777,7 +958,7 @@ void vm_install_exception_handler(struct kvm_vm *vm, int vector, * for recursive faults when accessing memory in the handler. The downside to * using registers is that it restricts what registers can be used by the actual * instruction. But, selftests are 64-bit only, making register* pressure a - * minor concern. Use r9-r11 as they are volatile, i.e. don't need* to be saved + * minor concern. Use r9-r11 as they are volatile, i.e. don't need to be saved * by the callee, and except for r11 are not implicit parameters to any * instructions. Ideally, fixup would use r8-r10 and thus avoid implicit * parameters entirely, but Hyper-V's hypercall ABI uses r8 and testing Hyper-V @@ -793,39 +974,52 @@ void vm_install_exception_handler(struct kvm_vm *vm, int vector, * * REGISTER OUTPUTS: * r9 = exception vector (non-zero) + * r10 = error code */ #define KVM_ASM_SAFE(insn) \ "mov $" __stringify(KVM_EXCEPTION_MAGIC) ", %%r9\n\t" \ "lea 1f(%%rip), %%r10\n\t" \ "lea 2f(%%rip), %%r11\n\t" \ "1: " insn "\n\t" \ - "movb $0, %[vector]\n\t" \ - "jmp 3f\n\t" \ + "xor %%r9, %%r9\n\t" \ "2:\n\t" \ "mov %%r9b, %[vector]\n\t" \ - "3:\n\t" + "mov %%r10, %[error_code]\n\t" -#define KVM_ASM_SAFE_OUTPUTS(v) [vector] "=qm"(v) +#define KVM_ASM_SAFE_OUTPUTS(v, ec) [vector] "=qm"(v), [error_code] "=rm"(ec) #define KVM_ASM_SAFE_CLOBBERS "r9", "r10", "r11" -#define kvm_asm_safe(insn, inputs...) \ -({ \ - uint8_t vector; \ - \ - asm volatile(KVM_ASM_SAFE(insn) \ - : KVM_ASM_SAFE_OUTPUTS(vector) \ - : inputs \ - : KVM_ASM_SAFE_CLOBBERS); \ - vector; \ +#define kvm_asm_safe(insn, inputs...) \ +({ \ + uint64_t ign_error_code; \ + uint8_t vector; \ + \ + asm volatile(KVM_ASM_SAFE(insn) \ + : KVM_ASM_SAFE_OUTPUTS(vector, ign_error_code) \ + : inputs \ + : KVM_ASM_SAFE_CLOBBERS); \ + vector; \ +}) + +#define kvm_asm_safe_ec(insn, error_code, inputs...) \ +({ \ + uint8_t vector; \ + \ + asm volatile(KVM_ASM_SAFE(insn) \ + : KVM_ASM_SAFE_OUTPUTS(vector, error_code) \ + : inputs \ + : KVM_ASM_SAFE_CLOBBERS); \ + vector; \ }) static inline uint8_t rdmsr_safe(uint32_t msr, uint64_t *val) { + uint64_t error_code; uint8_t vector; uint32_t a, d; asm volatile(KVM_ASM_SAFE("rdmsr") - : "=a"(a), "=d"(d), KVM_ASM_SAFE_OUTPUTS(vector) + : "=a"(a), "=d"(d), KVM_ASM_SAFE_OUTPUTS(vector, error_code) : "c"(msr) : KVM_ASM_SAFE_CLOBBERS); @@ -840,10 +1034,9 @@ static inline uint8_t wrmsr_safe(uint32_t msr, uint64_t val) bool kvm_is_tdp_enabled(void); -uint64_t vm_get_page_table_entry(struct kvm_vm *vm, struct kvm_vcpu *vcpu, - uint64_t vaddr); -void vm_set_page_table_entry(struct kvm_vm *vm, struct kvm_vcpu *vcpu, - uint64_t vaddr, uint64_t pte); +uint64_t *__vm_get_page_table_entry(struct kvm_vm *vm, uint64_t vaddr, + int *level); +uint64_t *vm_get_page_table_entry(struct kvm_vm *vm, uint64_t vaddr); uint64_t kvm_hypercall(uint64_t nr, uint64_t a0, uint64_t a1, uint64_t a2, uint64_t a3); @@ -895,4 +1088,27 @@ void virt_map_level(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, #define XSTATE_XTILE_DATA_MASK (1ULL << XSTATE_XTILE_DATA_BIT) #define XFEATURE_XTILE_MASK (XSTATE_XTILE_CFG_MASK | \ XSTATE_XTILE_DATA_MASK) + +#define PFERR_PRESENT_BIT 0 +#define PFERR_WRITE_BIT 1 +#define PFERR_USER_BIT 2 +#define PFERR_RSVD_BIT 3 +#define PFERR_FETCH_BIT 4 +#define PFERR_PK_BIT 5 +#define PFERR_SGX_BIT 15 +#define PFERR_GUEST_FINAL_BIT 32 +#define PFERR_GUEST_PAGE_BIT 33 +#define PFERR_IMPLICIT_ACCESS_BIT 48 + +#define PFERR_PRESENT_MASK BIT(PFERR_PRESENT_BIT) +#define PFERR_WRITE_MASK BIT(PFERR_WRITE_BIT) +#define PFERR_USER_MASK BIT(PFERR_USER_BIT) +#define PFERR_RSVD_MASK BIT(PFERR_RSVD_BIT) +#define PFERR_FETCH_MASK BIT(PFERR_FETCH_BIT) +#define PFERR_PK_MASK BIT(PFERR_PK_BIT) +#define PFERR_SGX_MASK BIT(PFERR_SGX_BIT) +#define PFERR_GUEST_FINAL_MASK BIT_ULL(PFERR_GUEST_FINAL_BIT) +#define PFERR_GUEST_PAGE_MASK BIT_ULL(PFERR_GUEST_PAGE_BIT) +#define PFERR_IMPLICIT_ACCESS BIT_ULL(PFERR_IMPLICIT_ACCESS_BIT) + #endif /* SELFTEST_KVM_PROCESSOR_H */ diff --git a/tools/testing/selftests/kvm/include/x86_64/svm.h b/tools/testing/selftests/kvm/include/x86_64/svm.h index c8343ff84f7f..4803e1056055 100644 --- a/tools/testing/selftests/kvm/include/x86_64/svm.h +++ b/tools/testing/selftests/kvm/include/x86_64/svm.h @@ -58,6 +58,27 @@ enum { INTERCEPT_RDPRU, }; +struct hv_vmcb_enlightenments { + struct __packed hv_enlightenments_control { + u32 nested_flush_hypercall:1; + u32 msr_bitmap:1; + u32 enlightened_npt_tlb: 1; + u32 reserved:29; + } __packed hv_enlightenments_control; + u32 hv_vp_id; + u64 hv_vm_id; + u64 partition_assist_page; + u64 reserved; +} __packed; + +/* + * Hyper-V uses the software reserved clean bit in VMCB + */ +#define HV_VMCB_NESTED_ENLIGHTENMENTS (1U << 31) + +/* Synthetic VM-Exit */ +#define HV_SVM_EXITCODE_ENL 0xf0000000 +#define HV_SVM_ENL_EXITCODE_TRAP_AFTER_FLUSH (1) struct __attribute__ ((__packed__)) vmcb_control_area { u32 intercept_cr; @@ -106,7 +127,10 @@ struct __attribute__ ((__packed__)) vmcb_control_area { * Offset 0x3e0, 32 bytes reserved * for use by hypervisor/software. */ - u8 reserved_sw[32]; + union { + struct hv_vmcb_enlightenments hv_enlightenments; + u8 reserved_sw[32]; + }; }; diff --git a/tools/testing/selftests/kvm/include/x86_64/svm_util.h b/tools/testing/selftests/kvm/include/x86_64/svm_util.h index 7aee6244ab6a..044f0f872ba9 100644 --- a/tools/testing/selftests/kvm/include/x86_64/svm_util.h +++ b/tools/testing/selftests/kvm/include/x86_64/svm_util.h @@ -32,6 +32,20 @@ struct svm_test_data { uint64_t msr_gpa; }; +static inline void vmmcall(void) +{ + /* + * Stuff RAX and RCX with "safe" values to make sure L0 doesn't handle + * it as a valid hypercall (e.g. Hyper-V L2 TLB flush) as the intended + * use of this function is to exit to L1 from L2. Clobber all other + * GPRs as L1 doesn't correctly preserve them during vmexits. + */ + __asm__ __volatile__("push %%rbp; vmmcall; pop %%rbp" + : : "a"(0xdeadbeef), "c"(0xbeefdead) + : "rbx", "rdx", "rsi", "rdi", "r8", "r9", + "r10", "r11", "r12", "r13", "r14", "r15"); +} + #define stgi() \ __asm__ __volatile__( \ "stgi\n" \ diff --git a/tools/testing/selftests/kvm/include/x86_64/vmx.h b/tools/testing/selftests/kvm/include/x86_64/vmx.h index 71b290b6469d..5f0c0a29c556 100644 --- a/tools/testing/selftests/kvm/include/x86_64/vmx.h +++ b/tools/testing/selftests/kvm/include/x86_64/vmx.h @@ -437,11 +437,16 @@ static inline int vmresume(void) static inline void vmcall(void) { - /* Currently, L1 destroys our GPRs during vmexits. */ - __asm__ __volatile__("push %%rbp; vmcall; pop %%rbp" : : : - "rax", "rbx", "rcx", "rdx", - "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", - "r13", "r14", "r15"); + /* + * Stuff RAX and RCX with "safe" values to make sure L0 doesn't handle + * it as a valid hypercall (e.g. Hyper-V L2 TLB flush) as the intended + * use of this function is to exit to L1 from L2. Clobber all other + * GPRs as L1 doesn't correctly preserve them during vmexits. + */ + __asm__ __volatile__("push %%rbp; vmcall; pop %%rbp" + : : "a"(0xdeadbeef), "c"(0xbeefdead) + : "rbx", "rdx", "rsi", "rdi", "r8", "r9", + "r10", "r11", "r12", "r13", "r14", "r15"); } static inline int vmread(uint64_t encoding, uint64_t *value) @@ -517,14 +522,6 @@ struct vmx_pages { uint64_t vmwrite_gpa; void *vmwrite; - void *vp_assist_hva; - uint64_t vp_assist_gpa; - void *vp_assist; - - void *enlightened_vmcs_hva; - uint64_t enlightened_vmcs_gpa; - void *enlightened_vmcs; - void *eptp_hva; uint64_t eptp_gpa; void *eptp; @@ -572,7 +569,7 @@ void nested_map_memslot(struct vmx_pages *vmx, struct kvm_vm *vm, uint32_t memslot); void nested_identity_map_1g(struct vmx_pages *vmx, struct kvm_vm *vm, uint64_t addr, uint64_t size); -bool kvm_vm_has_ept(struct kvm_vm *vm); +bool kvm_cpu_has_ept(void); void prepare_eptp(struct vmx_pages *vmx, struct kvm_vm *vm, uint32_t eptp_memslot); void prepare_virtualize_apic_accesses(struct vmx_pages *vmx, struct kvm_vm *vm); diff --git a/tools/testing/selftests/kvm/kvm_page_table_test.c b/tools/testing/selftests/kvm/kvm_page_table_test.c index f42c6ac6d71d..b3b00be1ef82 100644 --- a/tools/testing/selftests/kvm/kvm_page_table_test.c +++ b/tools/testing/selftests/kvm/kvm_page_table_test.c @@ -289,7 +289,6 @@ static struct kvm_vm *pre_init_before_test(enum vm_guest_mode mode, void *arg) host_test_mem = addr_gpa2hva(vm, (vm_paddr_t)guest_test_phys_mem); /* Export shared structure test_args to guest */ - ucall_init(vm, NULL); sync_global_to_guest(vm, test_args); ret = sem_init(&test_stage_updated, 0, 0); @@ -417,7 +416,6 @@ static void run_test(enum vm_guest_mode mode, void *arg) TEST_ASSERT(ret == 0, "Error in sem_destroy"); free(vcpu_threads); - ucall_uninit(vm); kvm_vm_free(vm); } @@ -461,8 +459,8 @@ int main(int argc, char *argv[]) p.test_mem_size = parse_size(optarg); break; case 'v': - nr_vcpus = atoi(optarg); - TEST_ASSERT(nr_vcpus > 0 && nr_vcpus <= max_vcpus, + nr_vcpus = atoi_positive("Number of vCPUs", optarg); + TEST_ASSERT(nr_vcpus <= max_vcpus, "Invalid number of vcpus, must be between 1 and %d", max_vcpus); break; case 's': diff --git a/tools/testing/selftests/kvm/lib/aarch64/processor.c b/tools/testing/selftests/kvm/lib/aarch64/processor.c index 6f5551368944..5972a23b2765 100644 --- a/tools/testing/selftests/kvm/lib/aarch64/processor.c +++ b/tools/testing/selftests/kvm/lib/aarch64/processor.c @@ -11,6 +11,7 @@ #include "guest_modes.h" #include "kvm_util.h" #include "processor.h" +#include <linux/bitfield.h> #define DEFAULT_ARM64_GUEST_STACK_VADDR_MIN 0xac0000 @@ -76,13 +77,15 @@ static uint64_t __maybe_unused ptrs_per_pte(struct kvm_vm *vm) void virt_arch_pgd_alloc(struct kvm_vm *vm) { - if (!vm->pgd_created) { - vm_paddr_t paddr = vm_phy_pages_alloc(vm, - page_align(vm, ptrs_per_pgd(vm) * 8) / vm->page_size, - KVM_GUEST_PAGE_TABLE_MIN_PADDR, 0); - vm->pgd = paddr; - vm->pgd_created = true; - } + size_t nr_pages = page_align(vm, ptrs_per_pgd(vm) * 8) / vm->page_size; + + if (vm->pgd_created) + return; + + vm->pgd = vm_phy_pages_alloc(vm, nr_pages, + KVM_GUEST_PAGE_TABLE_MIN_PADDR, + vm->memslots[MEM_REGION_PT]); + vm->pgd_created = true; } static void _virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, @@ -133,12 +136,12 @@ static void _virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, void virt_arch_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr) { - uint64_t attr_idx = 4; /* NORMAL (See DEFAULT_MAIR_EL1) */ + uint64_t attr_idx = MT_NORMAL; _virt_pg_map(vm, vaddr, paddr, attr_idx); } -vm_paddr_t addr_arch_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva) +uint64_t *virt_get_pte_hva(struct kvm_vm *vm, vm_vaddr_t gva) { uint64_t *ptep; @@ -169,11 +172,18 @@ vm_paddr_t addr_arch_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva) TEST_FAIL("Page table levels must be 2, 3, or 4"); } - return pte_addr(vm, *ptep) + (gva & (vm->page_size - 1)); + return ptep; unmapped_gva: TEST_FAIL("No mapping for vm virtual address, gva: 0x%lx", gva); - exit(1); + exit(EXIT_FAILURE); +} + +vm_paddr_t addr_arch_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva) +{ + uint64_t *ptep = virt_get_pte_hva(vm, gva); + + return pte_addr(vm, *ptep) + (gva & (vm->page_size - 1)); } static void pte_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent, uint64_t page, int level) @@ -318,13 +328,16 @@ void vcpu_arch_dump(FILE *stream, struct kvm_vcpu *vcpu, uint8_t indent) struct kvm_vcpu *aarch64_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id, struct kvm_vcpu_init *init, void *guest_code) { - size_t stack_size = vm->page_size == 4096 ? - DEFAULT_STACK_PGS * vm->page_size : - vm->page_size; - uint64_t stack_vaddr = vm_vaddr_alloc(vm, stack_size, - DEFAULT_ARM64_GUEST_STACK_VADDR_MIN); + size_t stack_size; + uint64_t stack_vaddr; struct kvm_vcpu *vcpu = __vm_vcpu_add(vm, vcpu_id); + stack_size = vm->page_size == 4096 ? DEFAULT_STACK_PGS * vm->page_size : + vm->page_size; + stack_vaddr = __vm_vaddr_alloc(vm, stack_size, + DEFAULT_ARM64_GUEST_STACK_VADDR_MIN, + MEM_REGION_DATA); + aarch64_vcpu_setup(vcpu, init); vcpu_set_reg(vcpu, ARM64_CORE_REG(sp_el1), stack_vaddr + stack_size); @@ -428,8 +441,8 @@ unexpected_exception: void vm_init_descriptor_tables(struct kvm_vm *vm) { - vm->handlers = vm_vaddr_alloc(vm, sizeof(struct handlers), - vm->page_size); + vm->handlers = __vm_vaddr_alloc(vm, sizeof(struct handlers), + vm->page_size, MEM_REGION_DATA); *(vm_vaddr_t *)addr_gva2hva(vm, (vm_vaddr_t)(&exception_handlers)) = vm->handlers; } @@ -486,24 +499,15 @@ void aarch64_get_supported_page_sizes(uint32_t ipa, err = ioctl(vcpu_fd, KVM_GET_ONE_REG, ®); TEST_ASSERT(err == 0, KVM_IOCTL_ERROR(KVM_GET_ONE_REG, vcpu_fd)); - *ps4k = ((val >> 28) & 0xf) != 0xf; - *ps64k = ((val >> 24) & 0xf) == 0; - *ps16k = ((val >> 20) & 0xf) != 0; + *ps4k = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64MMFR0_TGRAN4), val) != 0xf; + *ps64k = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64MMFR0_TGRAN64), val) == 0; + *ps16k = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64MMFR0_TGRAN16), val) != 0; close(vcpu_fd); close(vm_fd); close(kvm_fd); } -/* - * arm64 doesn't have a true default mode, so start by computing the - * available IPA space and page sizes early. - */ -void __attribute__((constructor)) init_guest_modes(void) -{ - guest_modes_append_default(); -} - void smccc_hvc(uint32_t function_id, uint64_t arg0, uint64_t arg1, uint64_t arg2, uint64_t arg3, uint64_t arg4, uint64_t arg5, uint64_t arg6, struct arm_smccc_res *res) @@ -528,3 +532,22 @@ void smccc_hvc(uint32_t function_id, uint64_t arg0, uint64_t arg1, [arg4] "r"(arg4), [arg5] "r"(arg5), [arg6] "r"(arg6) : "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7"); } + +void kvm_selftest_arch_init(void) +{ + /* + * arm64 doesn't have a true default mode, so start by computing the + * available IPA space and page sizes early. + */ + guest_modes_append_default(); +} + +void vm_vaddr_populate_bitmap(struct kvm_vm *vm) +{ + /* + * arm64 selftests use only TTBR0_EL1, meaning that the valid VA space + * is [0, 2^(64 - TCR_EL1.T0SZ)). + */ + sparsebit_set_num(vm->vpages_valid, 0, + (1ULL << vm->va_bits) >> vm->page_shift); +} diff --git a/tools/testing/selftests/kvm/lib/aarch64/ucall.c b/tools/testing/selftests/kvm/lib/aarch64/ucall.c index ed237b744690..f212bd8ab93d 100644 --- a/tools/testing/selftests/kvm/lib/aarch64/ucall.c +++ b/tools/testing/selftests/kvm/lib/aarch64/ucall.c @@ -6,108 +6,38 @@ */ #include "kvm_util.h" +/* + * ucall_exit_mmio_addr holds per-VM values (global data is duplicated by each + * VM), it must not be accessed from host code. + */ static vm_vaddr_t *ucall_exit_mmio_addr; -static bool ucall_mmio_init(struct kvm_vm *vm, vm_paddr_t gpa) +void ucall_arch_init(struct kvm_vm *vm, vm_paddr_t mmio_gpa) { - if (kvm_userspace_memory_region_find(vm, gpa, gpa + 1)) - return false; - - virt_pg_map(vm, gpa, gpa); + vm_vaddr_t mmio_gva = vm_vaddr_unused_gap(vm, vm->page_size, KVM_UTIL_MIN_VADDR); - ucall_exit_mmio_addr = (vm_vaddr_t *)gpa; - sync_global_to_guest(vm, ucall_exit_mmio_addr); - - return true; -} - -void ucall_init(struct kvm_vm *vm, void *arg) -{ - vm_paddr_t gpa, start, end, step, offset; - unsigned int bits; - bool ret; - - if (arg) { - gpa = (vm_paddr_t)arg; - ret = ucall_mmio_init(vm, gpa); - TEST_ASSERT(ret, "Can't set ucall mmio address to %lx", gpa); - return; - } + virt_map(vm, mmio_gva, mmio_gpa, 1); - /* - * Find an address within the allowed physical and virtual address - * spaces, that does _not_ have a KVM memory region associated with - * it. Identity mapping an address like this allows the guest to - * access it, but as KVM doesn't know what to do with it, it - * will assume it's something userspace handles and exit with - * KVM_EXIT_MMIO. Well, at least that's how it works for AArch64. - * Here we start with a guess that the addresses around 5/8th - * of the allowed space are unmapped and then work both down and - * up from there in 1/16th allowed space sized steps. - * - * Note, we need to use VA-bits - 1 when calculating the allowed - * virtual address space for an identity mapping because the upper - * half of the virtual address space is the two's complement of the - * lower and won't match physical addresses. - */ - bits = vm->va_bits - 1; - bits = min(vm->pa_bits, bits); - end = 1ul << bits; - start = end * 5 / 8; - step = end / 16; - for (offset = 0; offset < end - start; offset += step) { - if (ucall_mmio_init(vm, start - offset)) - return; - if (ucall_mmio_init(vm, start + offset)) - return; - } - TEST_FAIL("Can't find a ucall mmio address"); -} + vm->ucall_mmio_addr = mmio_gpa; -void ucall_uninit(struct kvm_vm *vm) -{ - ucall_exit_mmio_addr = 0; - sync_global_to_guest(vm, ucall_exit_mmio_addr); + write_guest_global(vm, ucall_exit_mmio_addr, (vm_vaddr_t *)mmio_gva); } -void ucall(uint64_t cmd, int nargs, ...) +void ucall_arch_do_ucall(vm_vaddr_t uc) { - struct ucall uc = {}; - va_list va; - int i; - - WRITE_ONCE(uc.cmd, cmd); - nargs = min(nargs, UCALL_MAX_ARGS); - - va_start(va, nargs); - for (i = 0; i < nargs; ++i) - WRITE_ONCE(uc.args[i], va_arg(va, uint64_t)); - va_end(va); - - WRITE_ONCE(*ucall_exit_mmio_addr, (vm_vaddr_t)&uc); + WRITE_ONCE(*ucall_exit_mmio_addr, uc); } -uint64_t get_ucall(struct kvm_vcpu *vcpu, struct ucall *uc) +void *ucall_arch_get_ucall(struct kvm_vcpu *vcpu) { struct kvm_run *run = vcpu->run; - struct ucall ucall = {}; - - if (uc) - memset(uc, 0, sizeof(*uc)); if (run->exit_reason == KVM_EXIT_MMIO && - run->mmio.phys_addr == (uint64_t)ucall_exit_mmio_addr) { - vm_vaddr_t gva; - - TEST_ASSERT(run->mmio.is_write && run->mmio.len == 8, + run->mmio.phys_addr == vcpu->vm->ucall_mmio_addr) { + TEST_ASSERT(run->mmio.is_write && run->mmio.len == sizeof(uint64_t), "Unexpected ucall exit mmio address access"); - memcpy(&gva, run->mmio.data, sizeof(gva)); - memcpy(&ucall, addr_gva2hva(vcpu->vm, gva), sizeof(ucall)); - - vcpu_run_complete_io(vcpu); - if (uc) - memcpy(uc, &ucall, sizeof(ucall)); + return (void *)(*((uint64_t *)run->mmio.data)); } - return ucall.cmd; + return NULL; } diff --git a/tools/testing/selftests/kvm/lib/elf.c b/tools/testing/selftests/kvm/lib/elf.c index 9f54c098d9d0..820ac2d08c98 100644 --- a/tools/testing/selftests/kvm/lib/elf.c +++ b/tools/testing/selftests/kvm/lib/elf.c @@ -138,7 +138,7 @@ void kvm_vm_elf_load(struct kvm_vm *vm, const char *filename) offset = hdr.e_phoff + (n1 * hdr.e_phentsize); offset_rv = lseek(fd, offset, SEEK_SET); TEST_ASSERT(offset_rv == offset, - "Failed to seek to begining of program header %u,\n" + "Failed to seek to beginning of program header %u,\n" " filename: %s\n" " rv: %jd errno: %i", n1, filename, (intmax_t) offset_rv, errno); @@ -161,7 +161,8 @@ void kvm_vm_elf_load(struct kvm_vm *vm, const char *filename) seg_vend |= vm->page_size - 1; size_t seg_size = seg_vend - seg_vstart + 1; - vm_vaddr_t vaddr = vm_vaddr_alloc(vm, seg_size, seg_vstart); + vm_vaddr_t vaddr = __vm_vaddr_alloc(vm, seg_size, seg_vstart, + MEM_REGION_CODE); TEST_ASSERT(vaddr == seg_vstart, "Unable to allocate " "virtual memory for segment at requested min addr,\n" " segment idx: %u\n" diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c index f1cb1627161f..56d5ea949cbb 100644 --- a/tools/testing/selftests/kvm/lib/kvm_util.c +++ b/tools/testing/selftests/kvm/lib/kvm_util.c @@ -11,6 +11,7 @@ #include "processor.h" #include <assert.h> +#include <sched.h> #include <sys/mman.h> #include <sys/types.h> #include <sys/stat.h> @@ -185,12 +186,27 @@ const struct vm_guest_mode_params vm_guest_mode_params[] = { _Static_assert(sizeof(vm_guest_mode_params)/sizeof(struct vm_guest_mode_params) == NUM_VM_MODES, "Missing new mode params?"); -struct kvm_vm *____vm_create(enum vm_guest_mode mode, uint64_t nr_pages) +/* + * Initializes vm->vpages_valid to match the canonical VA space of the + * architecture. + * + * The default implementation is valid for architectures which split the + * range addressed by a single page table into a low and high region + * based on the MSB of the VA. On architectures with this behavior + * the VA region spans [0, 2^(va_bits - 1)), [-(2^(va_bits - 1), -1]. + */ +__weak void vm_vaddr_populate_bitmap(struct kvm_vm *vm) { - struct kvm_vm *vm; + sparsebit_set_num(vm->vpages_valid, + 0, (1ULL << (vm->va_bits - 1)) >> vm->page_shift); + sparsebit_set_num(vm->vpages_valid, + (~((1ULL << (vm->va_bits - 1)) - 1)) >> vm->page_shift, + (1ULL << (vm->va_bits - 1)) >> vm->page_shift); +} - pr_debug("%s: mode='%s' pages='%ld'\n", __func__, - vm_guest_mode_string(mode), nr_pages); +struct kvm_vm *____vm_create(enum vm_guest_mode mode) +{ + struct kvm_vm *vm; vm = calloc(1, sizeof(*vm)); TEST_ASSERT(vm != NULL, "Insufficient Memory"); @@ -276,20 +292,13 @@ struct kvm_vm *____vm_create(enum vm_guest_mode mode, uint64_t nr_pages) /* Limit to VA-bit canonical virtual addresses. */ vm->vpages_valid = sparsebit_alloc(); - sparsebit_set_num(vm->vpages_valid, - 0, (1ULL << (vm->va_bits - 1)) >> vm->page_shift); - sparsebit_set_num(vm->vpages_valid, - (~((1ULL << (vm->va_bits - 1)) - 1)) >> vm->page_shift, - (1ULL << (vm->va_bits - 1)) >> vm->page_shift); + vm_vaddr_populate_bitmap(vm); /* Limit physical addresses to PA-bits. */ vm->max_gfn = vm_compute_max_gfn(vm); /* Allocate and setup memory for guest. */ vm->vpages_mapped = sparsebit_alloc(); - if (nr_pages != 0) - vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, - 0, 0, nr_pages, 0); return vm; } @@ -334,15 +343,32 @@ struct kvm_vm *__vm_create(enum vm_guest_mode mode, uint32_t nr_runnable_vcpus, { uint64_t nr_pages = vm_nr_pages_required(mode, nr_runnable_vcpus, nr_extra_pages); + struct userspace_mem_region *slot0; struct kvm_vm *vm; + int i; - vm = ____vm_create(mode, nr_pages); + pr_debug("%s: mode='%s' pages='%ld'\n", __func__, + vm_guest_mode_string(mode), nr_pages); + + vm = ____vm_create(mode); + + vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, 0, 0, nr_pages, 0); + for (i = 0; i < NR_MEM_REGIONS; i++) + vm->memslots[i] = 0; kvm_vm_elf_load(vm, program_invocation_name); -#ifdef __x86_64__ - vm_create_irqchip(vm); -#endif + /* + * TODO: Add proper defines to protect the library's memslots, and then + * carve out memslot1 for the ucall MMIO address. KVM treats writes to + * read-only memslots as MMIO, and creating a read-only memslot for the + * MMIO region would prevent silently clobbering the MMIO region. + */ + slot0 = memslot2region(vm, 0); + ucall_init(vm, slot0->region.guest_phys_addr + slot0->region.memory_size); + + kvm_arch_vm_post_create(vm); + return vm; } @@ -443,6 +469,59 @@ struct kvm_vcpu *vm_recreate_with_one_vcpu(struct kvm_vm *vm) return vm_vcpu_recreate(vm, 0); } +void kvm_pin_this_task_to_pcpu(uint32_t pcpu) +{ + cpu_set_t mask; + int r; + + CPU_ZERO(&mask); + CPU_SET(pcpu, &mask); + r = sched_setaffinity(0, sizeof(mask), &mask); + TEST_ASSERT(!r, "sched_setaffinity() failed for pCPU '%u'.\n", pcpu); +} + +static uint32_t parse_pcpu(const char *cpu_str, const cpu_set_t *allowed_mask) +{ + uint32_t pcpu = atoi_non_negative("CPU number", cpu_str); + + TEST_ASSERT(CPU_ISSET(pcpu, allowed_mask), + "Not allowed to run on pCPU '%d', check cgroups?\n", pcpu); + return pcpu; +} + +void kvm_parse_vcpu_pinning(const char *pcpus_string, uint32_t vcpu_to_pcpu[], + int nr_vcpus) +{ + cpu_set_t allowed_mask; + char *cpu, *cpu_list; + char delim[2] = ","; + int i, r; + + cpu_list = strdup(pcpus_string); + TEST_ASSERT(cpu_list, "strdup() allocation failed.\n"); + + r = sched_getaffinity(0, sizeof(allowed_mask), &allowed_mask); + TEST_ASSERT(!r, "sched_getaffinity() failed"); + + cpu = strtok(cpu_list, delim); + + /* 1. Get all pcpus for vcpus. */ + for (i = 0; i < nr_vcpus; i++) { + TEST_ASSERT(cpu, "pCPU not provided for vCPU '%d'\n", i); + vcpu_to_pcpu[i] = parse_pcpu(cpu, &allowed_mask); + cpu = strtok(NULL, delim); + } + + /* 2. Check if the main worker needs to be pinned. */ + if (cpu) { + kvm_pin_this_task_to_pcpu(parse_pcpu(cpu, &allowed_mask)); + cpu = strtok(NULL, delim); + } + + TEST_ASSERT(!cpu, "pCPU list contains trailing garbage characters '%s'", cpu); + free(cpu_list); +} + /* * Userspace Memory Region Find * @@ -586,6 +665,12 @@ static void __vm_mem_region_delete(struct kvm_vm *vm, sparsebit_free(®ion->unused_phy_pages); ret = munmap(region->mmap_start, region->mmap_size); TEST_ASSERT(!ret, __KVM_SYSCALL_ERROR("munmap()", ret)); + if (region->fd >= 0) { + /* There's an extra map when using shared memory. */ + ret = munmap(region->mmap_alias, region->mmap_size); + TEST_ASSERT(!ret, __KVM_SYSCALL_ERROR("munmap()", ret)); + close(region->fd); + } free(region); } @@ -923,6 +1008,7 @@ void vm_userspace_mem_region_add(struct kvm_vm *vm, vm_mem_backing_src_alias(src_type)->name); } + region->backing_src_type = src_type; region->unused_phy_pages = sparsebit_alloc(); sparsebit_set_num(region->unused_phy_pages, guest_paddr >> vm->page_shift, npages); @@ -1151,8 +1237,8 @@ struct kvm_vcpu *__vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id) * TEST_ASSERT failure occurs for invalid input or no area of at least * sz unallocated bytes >= vaddr_min is available. */ -static vm_vaddr_t vm_vaddr_unused_gap(struct kvm_vm *vm, size_t sz, - vm_vaddr_t vaddr_min) +vm_vaddr_t vm_vaddr_unused_gap(struct kvm_vm *vm, size_t sz, + vm_vaddr_t vaddr_min) { uint64_t pages = (sz + vm->page_size - 1) >> vm->page_shift; @@ -1217,32 +1303,15 @@ va_found: return pgidx_start * vm->page_size; } -/* - * VM Virtual Address Allocate - * - * Input Args: - * vm - Virtual Machine - * sz - Size in bytes - * vaddr_min - Minimum starting virtual address - * - * Output Args: None - * - * Return: - * Starting guest virtual address - * - * Allocates at least sz bytes within the virtual address space of the vm - * given by vm. The allocated bytes are mapped to a virtual address >= - * the address given by vaddr_min. Note that each allocation uses a - * a unique set of pages, with the minimum real allocation being at least - * a page. - */ -vm_vaddr_t vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min) +vm_vaddr_t __vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min, + enum kvm_mem_region_type type) { uint64_t pages = (sz >> vm->page_shift) + ((sz % vm->page_size) != 0); virt_pgd_alloc(vm); vm_paddr_t paddr = vm_phy_pages_alloc(vm, pages, - KVM_UTIL_MIN_PFN * vm->page_size, 0); + KVM_UTIL_MIN_PFN * vm->page_size, + vm->memslots[type]); /* * Find an unused range of virtual page addresses of at least @@ -1256,14 +1325,37 @@ vm_vaddr_t vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min) virt_pg_map(vm, vaddr, paddr); - sparsebit_set(vm->vpages_mapped, - vaddr >> vm->page_shift); + sparsebit_set(vm->vpages_mapped, vaddr >> vm->page_shift); } return vaddr_start; } /* + * VM Virtual Address Allocate + * + * Input Args: + * vm - Virtual Machine + * sz - Size in bytes + * vaddr_min - Minimum starting virtual address + * + * Output Args: None + * + * Return: + * Starting guest virtual address + * + * Allocates at least sz bytes within the virtual address space of the vm + * given by vm. The allocated bytes are mapped to a virtual address >= + * the address given by vaddr_min. Note that each allocation uses a + * a unique set of pages, with the minimum real allocation being at least + * a page. The allocated physical space comes from the TEST_DATA memory region. + */ +vm_vaddr_t vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min) +{ + return __vm_vaddr_alloc(vm, sz, vaddr_min, MEM_REGION_TEST_DATA); +} + +/* * VM Virtual Address Allocate Pages * * Input Args: @@ -1282,6 +1374,11 @@ vm_vaddr_t vm_vaddr_alloc_pages(struct kvm_vm *vm, int nr_pages) return vm_vaddr_alloc(vm, nr_pages * getpagesize(), KVM_UTIL_MIN_VADDR); } +vm_vaddr_t __vm_vaddr_alloc_page(struct kvm_vm *vm, enum kvm_mem_region_type type) +{ + return __vm_vaddr_alloc(vm, getpagesize(), KVM_UTIL_MIN_VADDR, type); +} + /* * VM Virtual Address Allocate Page * @@ -1328,6 +1425,8 @@ void virt_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, while (npages--) { virt_pg_map(vm, vaddr, paddr); + sparsebit_set(vm->vpages_mapped, vaddr >> vm->page_shift); + vaddr += page_size; paddr += page_size; } @@ -1506,7 +1605,7 @@ struct kvm_reg_list *vcpu_get_reg_list(struct kvm_vcpu *vcpu) void *vcpu_map_dirty_ring(struct kvm_vcpu *vcpu) { - uint32_t page_size = vcpu->vm->page_size; + uint32_t page_size = getpagesize(); uint32_t size = vcpu->vm->dirty_ring_size; TEST_ASSERT(size > 0, "Should enable dirty ring first"); @@ -1847,7 +1946,8 @@ vm_paddr_t vm_phy_page_alloc(struct kvm_vm *vm, vm_paddr_t paddr_min, vm_paddr_t vm_alloc_page_table(struct kvm_vm *vm) { - return vm_phy_page_alloc(vm, KVM_GUEST_PAGE_TABLE_MIN_PADDR, 0); + return vm_phy_page_alloc(vm, KVM_GUEST_PAGE_TABLE_MIN_PADDR, + vm->memslots[MEM_REGION_PT]); } /* @@ -2021,3 +2121,19 @@ void __vm_get_stat(struct kvm_vm *vm, const char *stat_name, uint64_t *data, break; } } + +__weak void kvm_arch_vm_post_create(struct kvm_vm *vm) +{ +} + +__weak void kvm_selftest_arch_init(void) +{ +} + +void __attribute((constructor)) kvm_selftest_init(void) +{ + /* Tell stdout not to buffer its content. */ + setbuf(stdout, NULL); + + kvm_selftest_arch_init(); +} diff --git a/tools/testing/selftests/kvm/lib/perf_test_util.c b/tools/testing/selftests/kvm/lib/memstress.c index 9618b37c66f7..5f1d3173c238 100644 --- a/tools/testing/selftests/kvm/lib/perf_test_util.c +++ b/tools/testing/selftests/kvm/lib/memstress.c @@ -2,13 +2,15 @@ /* * Copyright (C) 2020, Google LLC. */ +#define _GNU_SOURCE + #include <inttypes.h> #include "kvm_util.h" -#include "perf_test_util.h" +#include "memstress.h" #include "processor.h" -struct perf_test_args perf_test_args; +struct memstress_args memstress_args; /* * Guest virtual memory offset of the testing memory slot. @@ -31,7 +33,7 @@ struct vcpu_thread { static struct vcpu_thread vcpu_threads[KVM_MAX_VCPUS]; /* The function run by each vCPU thread, as provided by the test. */ -static void (*vcpu_thread_fn)(struct perf_test_vcpu_args *); +static void (*vcpu_thread_fn)(struct memstress_vcpu_args *); /* Set to true once all vCPU threads are up and running. */ static bool all_vcpu_threads_running; @@ -42,14 +44,19 @@ static struct kvm_vcpu *vcpus[KVM_MAX_VCPUS]; * Continuously write to the first 8 bytes of each page in the * specified region. */ -void perf_test_guest_code(uint32_t vcpu_idx) +void memstress_guest_code(uint32_t vcpu_idx) { - struct perf_test_args *pta = &perf_test_args; - struct perf_test_vcpu_args *vcpu_args = &pta->vcpu_args[vcpu_idx]; + struct memstress_args *args = &memstress_args; + struct memstress_vcpu_args *vcpu_args = &args->vcpu_args[vcpu_idx]; + struct guest_random_state rand_state; uint64_t gva; uint64_t pages; + uint64_t addr; + uint64_t page; int i; + rand_state = new_guest_random_state(args->random_seed + vcpu_idx); + gva = vcpu_args->gva; pages = vcpu_args->pages; @@ -58,9 +65,14 @@ void perf_test_guest_code(uint32_t vcpu_idx) while (true) { for (i = 0; i < pages; i++) { - uint64_t addr = gva + (i * pta->guest_page_size); + if (args->random_access) + page = guest_random_u32(&rand_state) % pages; + else + page = i; + + addr = gva + (page * args->guest_page_size); - if (i % pta->wr_fract == 0) + if (guest_random_u32(&rand_state) % 100 < args->write_percent) *(uint64_t *)addr = 0x0123456789ABCDEF; else READ_ONCE(*(uint64_t *)addr); @@ -70,17 +82,17 @@ void perf_test_guest_code(uint32_t vcpu_idx) } } -void perf_test_setup_vcpus(struct kvm_vm *vm, int nr_vcpus, +void memstress_setup_vcpus(struct kvm_vm *vm, int nr_vcpus, struct kvm_vcpu *vcpus[], uint64_t vcpu_memory_bytes, bool partition_vcpu_memory_access) { - struct perf_test_args *pta = &perf_test_args; - struct perf_test_vcpu_args *vcpu_args; + struct memstress_args *args = &memstress_args; + struct memstress_vcpu_args *vcpu_args; int i; for (i = 0; i < nr_vcpus; i++) { - vcpu_args = &pta->vcpu_args[i]; + vcpu_args = &args->vcpu_args[i]; vcpu_args->vcpu = vcpus[i]; vcpu_args->vcpu_idx = i; @@ -89,29 +101,29 @@ void perf_test_setup_vcpus(struct kvm_vm *vm, int nr_vcpus, vcpu_args->gva = guest_test_virt_mem + (i * vcpu_memory_bytes); vcpu_args->pages = vcpu_memory_bytes / - pta->guest_page_size; - vcpu_args->gpa = pta->gpa + (i * vcpu_memory_bytes); + args->guest_page_size; + vcpu_args->gpa = args->gpa + (i * vcpu_memory_bytes); } else { vcpu_args->gva = guest_test_virt_mem; vcpu_args->pages = (nr_vcpus * vcpu_memory_bytes) / - pta->guest_page_size; - vcpu_args->gpa = pta->gpa; + args->guest_page_size; + vcpu_args->gpa = args->gpa; } vcpu_args_set(vcpus[i], 1, i); pr_debug("Added VCPU %d with test mem gpa [%lx, %lx)\n", i, vcpu_args->gpa, vcpu_args->gpa + - (vcpu_args->pages * pta->guest_page_size)); + (vcpu_args->pages * args->guest_page_size)); } } -struct kvm_vm *perf_test_create_vm(enum vm_guest_mode mode, int nr_vcpus, +struct kvm_vm *memstress_create_vm(enum vm_guest_mode mode, int nr_vcpus, uint64_t vcpu_memory_bytes, int slots, enum vm_mem_backing_src_type backing_src, bool partition_vcpu_memory_access) { - struct perf_test_args *pta = &perf_test_args; + struct memstress_args *args = &memstress_args; struct kvm_vm *vm; uint64_t guest_num_pages, slot0_pages = 0; uint64_t backing_src_pagesz = get_backing_src_pagesz(backing_src); @@ -121,20 +133,20 @@ struct kvm_vm *perf_test_create_vm(enum vm_guest_mode mode, int nr_vcpus, pr_info("Testing guest mode: %s\n", vm_guest_mode_string(mode)); /* By default vCPUs will write to memory. */ - pta->wr_fract = 1; + args->write_percent = 100; /* * Snapshot the non-huge page size. This is used by the guest code to * access/dirty pages at the logging granularity. */ - pta->guest_page_size = vm_guest_mode_params[mode].page_size; + args->guest_page_size = vm_guest_mode_params[mode].page_size; guest_num_pages = vm_adjust_num_guest_pages(mode, - (nr_vcpus * vcpu_memory_bytes) / pta->guest_page_size); + (nr_vcpus * vcpu_memory_bytes) / args->guest_page_size); TEST_ASSERT(vcpu_memory_bytes % getpagesize() == 0, "Guest memory size is not host page size aligned."); - TEST_ASSERT(vcpu_memory_bytes % pta->guest_page_size == 0, + TEST_ASSERT(vcpu_memory_bytes % args->guest_page_size == 0, "Guest memory size is not guest page size aligned."); TEST_ASSERT(guest_num_pages % slots == 0, "Guest memory cannot be evenly divided into %d slots.", @@ -144,8 +156,8 @@ struct kvm_vm *perf_test_create_vm(enum vm_guest_mode mode, int nr_vcpus, * If using nested, allocate extra pages for the nested page tables and * in-memory data structures. */ - if (pta->nested) - slot0_pages += perf_test_nested_pages(nr_vcpus); + if (args->nested) + slot0_pages += memstress_nested_pages(nr_vcpus); /* * Pass guest_num_pages to populate the page tables for test memory. @@ -153,9 +165,9 @@ struct kvm_vm *perf_test_create_vm(enum vm_guest_mode mode, int nr_vcpus, * effect as KVM allows aliasing HVAs in meslots. */ vm = __vm_create_with_vcpus(mode, nr_vcpus, slot0_pages + guest_num_pages, - perf_test_guest_code, vcpus); + memstress_guest_code, vcpus); - pta->vm = vm; + args->vm = vm; /* Put the test region at the top guest physical memory. */ region_end_gfn = vm->max_gfn + 1; @@ -165,8 +177,8 @@ struct kvm_vm *perf_test_create_vm(enum vm_guest_mode mode, int nr_vcpus, * When running vCPUs in L2, restrict the test region to 48 bits to * avoid needing 5-level page tables to identity map L2. */ - if (pta->nested) - region_end_gfn = min(region_end_gfn, (1UL << 48) / pta->guest_page_size); + if (args->nested) + region_end_gfn = min(region_end_gfn, (1UL << 48) / args->guest_page_size); #endif /* * If there should be more memory in the guest test region than there @@ -178,63 +190,72 @@ struct kvm_vm *perf_test_create_vm(enum vm_guest_mode mode, int nr_vcpus, " nr_vcpus: %d wss: %" PRIx64 "]\n", guest_num_pages, region_end_gfn - 1, nr_vcpus, vcpu_memory_bytes); - pta->gpa = (region_end_gfn - guest_num_pages - 1) * pta->guest_page_size; - pta->gpa = align_down(pta->gpa, backing_src_pagesz); + args->gpa = (region_end_gfn - guest_num_pages - 1) * args->guest_page_size; + args->gpa = align_down(args->gpa, backing_src_pagesz); #ifdef __s390x__ /* Align to 1M (segment size) */ - pta->gpa = align_down(pta->gpa, 1 << 20); + args->gpa = align_down(args->gpa, 1 << 20); #endif - pta->size = guest_num_pages * pta->guest_page_size; + args->size = guest_num_pages * args->guest_page_size; pr_info("guest physical test memory: [0x%lx, 0x%lx)\n", - pta->gpa, pta->gpa + pta->size); + args->gpa, args->gpa + args->size); /* Add extra memory slots for testing */ for (i = 0; i < slots; i++) { uint64_t region_pages = guest_num_pages / slots; - vm_paddr_t region_start = pta->gpa + region_pages * pta->guest_page_size * i; + vm_paddr_t region_start = args->gpa + region_pages * args->guest_page_size * i; vm_userspace_mem_region_add(vm, backing_src, region_start, - PERF_TEST_MEM_SLOT_INDEX + i, + MEMSTRESS_MEM_SLOT_INDEX + i, region_pages, 0); } /* Do mapping for the demand paging memory slot */ - virt_map(vm, guest_test_virt_mem, pta->gpa, guest_num_pages); + virt_map(vm, guest_test_virt_mem, args->gpa, guest_num_pages); - perf_test_setup_vcpus(vm, nr_vcpus, vcpus, vcpu_memory_bytes, + memstress_setup_vcpus(vm, nr_vcpus, vcpus, vcpu_memory_bytes, partition_vcpu_memory_access); - if (pta->nested) { + if (args->nested) { pr_info("Configuring vCPUs to run in L2 (nested).\n"); - perf_test_setup_nested(vm, nr_vcpus, vcpus); + memstress_setup_nested(vm, nr_vcpus, vcpus); } - ucall_init(vm, NULL); - /* Export the shared variables to the guest. */ - sync_global_to_guest(vm, perf_test_args); + sync_global_to_guest(vm, memstress_args); return vm; } -void perf_test_destroy_vm(struct kvm_vm *vm) +void memstress_destroy_vm(struct kvm_vm *vm) { - ucall_uninit(vm); kvm_vm_free(vm); } -void perf_test_set_wr_fract(struct kvm_vm *vm, int wr_fract) +void memstress_set_write_percent(struct kvm_vm *vm, uint32_t write_percent) { - perf_test_args.wr_fract = wr_fract; - sync_global_to_guest(vm, perf_test_args); + memstress_args.write_percent = write_percent; + sync_global_to_guest(vm, memstress_args.write_percent); } -uint64_t __weak perf_test_nested_pages(int nr_vcpus) +void memstress_set_random_seed(struct kvm_vm *vm, uint32_t random_seed) +{ + memstress_args.random_seed = random_seed; + sync_global_to_guest(vm, memstress_args.random_seed); +} + +void memstress_set_random_access(struct kvm_vm *vm, bool random_access) +{ + memstress_args.random_access = random_access; + sync_global_to_guest(vm, memstress_args.random_access); +} + +uint64_t __weak memstress_nested_pages(int nr_vcpus) { return 0; } -void __weak perf_test_setup_nested(struct kvm_vm *vm, int nr_vcpus, struct kvm_vcpu **vcpus) +void __weak memstress_setup_nested(struct kvm_vm *vm, int nr_vcpus, struct kvm_vcpu **vcpus) { pr_info("%s() not support on this architecture, skipping.\n", __func__); exit(KSFT_SKIP); @@ -243,6 +264,10 @@ void __weak perf_test_setup_nested(struct kvm_vm *vm, int nr_vcpus, struct kvm_v static void *vcpu_thread_main(void *data) { struct vcpu_thread *vcpu = data; + int vcpu_idx = vcpu->vcpu_idx; + + if (memstress_args.pin_vcpus) + kvm_pin_this_task_to_pcpu(memstress_args.vcpu_to_pcpu[vcpu_idx]); WRITE_ONCE(vcpu->running, true); @@ -255,18 +280,19 @@ static void *vcpu_thread_main(void *data) while (!READ_ONCE(all_vcpu_threads_running)) ; - vcpu_thread_fn(&perf_test_args.vcpu_args[vcpu->vcpu_idx]); + vcpu_thread_fn(&memstress_args.vcpu_args[vcpu_idx]); return NULL; } -void perf_test_start_vcpu_threads(int nr_vcpus, - void (*vcpu_fn)(struct perf_test_vcpu_args *)) +void memstress_start_vcpu_threads(int nr_vcpus, + void (*vcpu_fn)(struct memstress_vcpu_args *)) { int i; vcpu_thread_fn = vcpu_fn; WRITE_ONCE(all_vcpu_threads_running, false); + WRITE_ONCE(memstress_args.stop_vcpus, false); for (i = 0; i < nr_vcpus; i++) { struct vcpu_thread *vcpu = &vcpu_threads[i]; @@ -285,10 +311,12 @@ void perf_test_start_vcpu_threads(int nr_vcpus, WRITE_ONCE(all_vcpu_threads_running, true); } -void perf_test_join_vcpu_threads(int nr_vcpus) +void memstress_join_vcpu_threads(int nr_vcpus) { int i; + WRITE_ONCE(memstress_args.stop_vcpus, true); + for (i = 0; i < nr_vcpus; i++) pthread_join(vcpu_threads[i].thread, NULL); } diff --git a/tools/testing/selftests/kvm/lib/riscv/processor.c b/tools/testing/selftests/kvm/lib/riscv/processor.c index 604478151212..d146ca71e0c0 100644 --- a/tools/testing/selftests/kvm/lib/riscv/processor.c +++ b/tools/testing/selftests/kvm/lib/riscv/processor.c @@ -55,13 +55,15 @@ static uint64_t pte_index(struct kvm_vm *vm, vm_vaddr_t gva, int level) void virt_arch_pgd_alloc(struct kvm_vm *vm) { - if (!vm->pgd_created) { - vm_paddr_t paddr = vm_phy_pages_alloc(vm, - page_align(vm, ptrs_per_pte(vm) * 8) / vm->page_size, - KVM_GUEST_PAGE_TABLE_MIN_PADDR, 0); - vm->pgd = paddr; - vm->pgd_created = true; - } + size_t nr_pages = page_align(vm, ptrs_per_pte(vm) * 8) / vm->page_size; + + if (vm->pgd_created) + return; + + vm->pgd = vm_phy_pages_alloc(vm, nr_pages, + KVM_GUEST_PAGE_TABLE_MIN_PADDR, + vm->memslots[MEM_REGION_PT]); + vm->pgd_created = true; } void virt_arch_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr) @@ -279,15 +281,18 @@ struct kvm_vcpu *vm_arch_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id, void *guest_code) { int r; - size_t stack_size = vm->page_size == 4096 ? - DEFAULT_STACK_PGS * vm->page_size : - vm->page_size; - unsigned long stack_vaddr = vm_vaddr_alloc(vm, stack_size, - DEFAULT_RISCV_GUEST_STACK_VADDR_MIN); + size_t stack_size; + unsigned long stack_vaddr; unsigned long current_gp = 0; struct kvm_mp_state mps; struct kvm_vcpu *vcpu; + stack_size = vm->page_size == 4096 ? DEFAULT_STACK_PGS * vm->page_size : + vm->page_size; + stack_vaddr = __vm_vaddr_alloc(vm, stack_size, + DEFAULT_RISCV_GUEST_STACK_VADDR_MIN, + MEM_REGION_DATA); + vcpu = __vm_vcpu_add(vm, vcpu_id); riscv_vcpu_mmu_setup(vcpu); diff --git a/tools/testing/selftests/kvm/lib/riscv/ucall.c b/tools/testing/selftests/kvm/lib/riscv/ucall.c index 087b9740bc8f..9a3476a2dfca 100644 --- a/tools/testing/selftests/kvm/lib/riscv/ucall.c +++ b/tools/testing/selftests/kvm/lib/riscv/ucall.c @@ -10,11 +10,7 @@ #include "kvm_util.h" #include "processor.h" -void ucall_init(struct kvm_vm *vm, void *arg) -{ -} - -void ucall_uninit(struct kvm_vm *vm) +void ucall_arch_init(struct kvm_vm *vm, vm_paddr_t mmio_gpa) { } @@ -44,47 +40,22 @@ struct sbiret sbi_ecall(int ext, int fid, unsigned long arg0, return ret; } -void ucall(uint64_t cmd, int nargs, ...) +void ucall_arch_do_ucall(vm_vaddr_t uc) { - struct ucall uc = { - .cmd = cmd, - }; - va_list va; - int i; - - nargs = min(nargs, UCALL_MAX_ARGS); - - va_start(va, nargs); - for (i = 0; i < nargs; ++i) - uc.args[i] = va_arg(va, uint64_t); - va_end(va); - sbi_ecall(KVM_RISCV_SELFTESTS_SBI_EXT, KVM_RISCV_SELFTESTS_SBI_UCALL, - (vm_vaddr_t)&uc, 0, 0, 0, 0, 0); + uc, 0, 0, 0, 0, 0); } -uint64_t get_ucall(struct kvm_vcpu *vcpu, struct ucall *uc) +void *ucall_arch_get_ucall(struct kvm_vcpu *vcpu) { struct kvm_run *run = vcpu->run; - struct ucall ucall = {}; - - if (uc) - memset(uc, 0, sizeof(*uc)); if (run->exit_reason == KVM_EXIT_RISCV_SBI && run->riscv_sbi.extension_id == KVM_RISCV_SELFTESTS_SBI_EXT) { switch (run->riscv_sbi.function_id) { case KVM_RISCV_SELFTESTS_SBI_UCALL: - memcpy(&ucall, - addr_gva2hva(vcpu->vm, run->riscv_sbi.args[0]), - sizeof(ucall)); - - vcpu_run_complete_io(vcpu); - if (uc) - memcpy(uc, &ucall, sizeof(ucall)); - - break; + return (void *)run->riscv_sbi.args[0]; case KVM_RISCV_SELFTESTS_SBI_UNEXP: vcpu_dump(stderr, vcpu, 2); TEST_ASSERT(0, "Unexpected trap taken by guest"); @@ -93,6 +64,5 @@ uint64_t get_ucall(struct kvm_vcpu *vcpu, struct ucall *uc) break; } } - - return ucall.cmd; + return NULL; } diff --git a/tools/testing/selftests/kvm/lib/s390x/processor.c b/tools/testing/selftests/kvm/lib/s390x/processor.c index 89d7340d9cbd..15945121daf1 100644 --- a/tools/testing/selftests/kvm/lib/s390x/processor.c +++ b/tools/testing/selftests/kvm/lib/s390x/processor.c @@ -21,7 +21,8 @@ void virt_arch_pgd_alloc(struct kvm_vm *vm) return; paddr = vm_phy_pages_alloc(vm, PAGES_PER_REGION, - KVM_GUEST_PAGE_TABLE_MIN_PADDR, 0); + KVM_GUEST_PAGE_TABLE_MIN_PADDR, + vm->memslots[MEM_REGION_PT]); memset(addr_gpa2hva(vm, paddr), 0xff, PAGES_PER_REGION * vm->page_size); vm->pgd = paddr; @@ -167,8 +168,9 @@ struct kvm_vcpu *vm_arch_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id, TEST_ASSERT(vm->page_size == 4096, "Unsupported page size: 0x%x", vm->page_size); - stack_vaddr = vm_vaddr_alloc(vm, stack_size, - DEFAULT_GUEST_STACK_VADDR_MIN); + stack_vaddr = __vm_vaddr_alloc(vm, stack_size, + DEFAULT_GUEST_STACK_VADDR_MIN, + MEM_REGION_DATA); vcpu = __vm_vcpu_add(vm, vcpu_id); diff --git a/tools/testing/selftests/kvm/lib/s390x/ucall.c b/tools/testing/selftests/kvm/lib/s390x/ucall.c index 73dc4e21190f..a7f02dc372cf 100644 --- a/tools/testing/selftests/kvm/lib/s390x/ucall.c +++ b/tools/testing/selftests/kvm/lib/s390x/ucall.c @@ -6,40 +6,19 @@ */ #include "kvm_util.h" -void ucall_init(struct kvm_vm *vm, void *arg) +void ucall_arch_init(struct kvm_vm *vm, vm_paddr_t mmio_gpa) { } -void ucall_uninit(struct kvm_vm *vm) +void ucall_arch_do_ucall(vm_vaddr_t uc) { -} - -void ucall(uint64_t cmd, int nargs, ...) -{ - struct ucall uc = { - .cmd = cmd, - }; - va_list va; - int i; - - nargs = min(nargs, UCALL_MAX_ARGS); - - va_start(va, nargs); - for (i = 0; i < nargs; ++i) - uc.args[i] = va_arg(va, uint64_t); - va_end(va); - /* Exit via DIAGNOSE 0x501 (normally used for breakpoints) */ - asm volatile ("diag 0,%0,0x501" : : "a"(&uc) : "memory"); + asm volatile ("diag 0,%0,0x501" : : "a"(uc) : "memory"); } -uint64_t get_ucall(struct kvm_vcpu *vcpu, struct ucall *uc) +void *ucall_arch_get_ucall(struct kvm_vcpu *vcpu) { struct kvm_run *run = vcpu->run; - struct ucall ucall = {}; - - if (uc) - memset(uc, 0, sizeof(*uc)); if (run->exit_reason == KVM_EXIT_S390_SIEIC && run->s390_sieic.icptcode == 4 && @@ -47,13 +26,7 @@ uint64_t get_ucall(struct kvm_vcpu *vcpu, struct ucall *uc) (run->s390_sieic.ipb >> 16) == 0x501) { int reg = run->s390_sieic.ipa & 0xf; - memcpy(&ucall, addr_gva2hva(vcpu->vm, run->s.regs.gprs[reg]), - sizeof(ucall)); - - vcpu_run_complete_io(vcpu); - if (uc) - memcpy(uc, &ucall, sizeof(ucall)); + return (void *)run->s.regs.gprs[reg]; } - - return ucall.cmd; + return NULL; } diff --git a/tools/testing/selftests/kvm/lib/test_util.c b/tools/testing/selftests/kvm/lib/test_util.c index 6d23878bbfe1..5c22fa4c2825 100644 --- a/tools/testing/selftests/kvm/lib/test_util.c +++ b/tools/testing/selftests/kvm/lib/test_util.c @@ -18,6 +18,23 @@ #include "test_util.h" /* + * Random number generator that is usable from guest code. This is the + * Park-Miller LCG using standard constants. + */ + +struct guest_random_state new_guest_random_state(uint32_t seed) +{ + struct guest_random_state s = {.seed = seed}; + return s; +} + +uint32_t guest_random_u32(struct guest_random_state *state) +{ + state->seed = (uint64_t)state->seed * 48271 % ((uint32_t)(1 << 31) - 1); + return state->seed; +} + +/* * Parses "[0-9]+[kmgt]?". */ size_t parse_size(const char *size) @@ -334,3 +351,22 @@ long get_run_delay(void) return val[1]; } + +int atoi_paranoid(const char *num_str) +{ + char *end_ptr; + long num; + + errno = 0; + num = strtol(num_str, &end_ptr, 0); + TEST_ASSERT(!errno, "strtol(\"%s\") failed", num_str); + TEST_ASSERT(num_str != end_ptr, + "strtol(\"%s\") didn't find a valid integer.", num_str); + TEST_ASSERT(*end_ptr == '\0', + "strtol(\"%s\") failed to parse trailing characters \"%s\".", + num_str, end_ptr); + TEST_ASSERT(num >= INT_MIN && num <= INT_MAX, + "%ld not in range of [%d, %d]", num, INT_MIN, INT_MAX); + + return num; +} diff --git a/tools/testing/selftests/kvm/lib/ucall_common.c b/tools/testing/selftests/kvm/lib/ucall_common.c new file mode 100644 index 000000000000..2f0e2ea941cc --- /dev/null +++ b/tools/testing/selftests/kvm/lib/ucall_common.c @@ -0,0 +1,115 @@ +// SPDX-License-Identifier: GPL-2.0-only +#include "kvm_util.h" +#include "linux/types.h" +#include "linux/bitmap.h" +#include "linux/atomic.h" + +#define GUEST_UCALL_FAILED -1 + +struct ucall_header { + DECLARE_BITMAP(in_use, KVM_MAX_VCPUS); + struct ucall ucalls[KVM_MAX_VCPUS]; +}; + +/* + * ucall_pool holds per-VM values (global data is duplicated by each VM), it + * must not be accessed from host code. + */ +static struct ucall_header *ucall_pool; + +void ucall_init(struct kvm_vm *vm, vm_paddr_t mmio_gpa) +{ + struct ucall_header *hdr; + struct ucall *uc; + vm_vaddr_t vaddr; + int i; + + vaddr = __vm_vaddr_alloc(vm, sizeof(*hdr), KVM_UTIL_MIN_VADDR, MEM_REGION_DATA); + hdr = (struct ucall_header *)addr_gva2hva(vm, vaddr); + memset(hdr, 0, sizeof(*hdr)); + + for (i = 0; i < KVM_MAX_VCPUS; ++i) { + uc = &hdr->ucalls[i]; + uc->hva = uc; + } + + write_guest_global(vm, ucall_pool, (struct ucall_header *)vaddr); + + ucall_arch_init(vm, mmio_gpa); +} + +static struct ucall *ucall_alloc(void) +{ + struct ucall *uc; + int i; + + if (!ucall_pool) + goto ucall_failed; + + for (i = 0; i < KVM_MAX_VCPUS; ++i) { + if (!test_and_set_bit(i, ucall_pool->in_use)) { + uc = &ucall_pool->ucalls[i]; + memset(uc->args, 0, sizeof(uc->args)); + return uc; + } + } + +ucall_failed: + /* + * If the vCPU cannot grab a ucall structure, make a bare ucall with a + * magic value to signal to get_ucall() that things went sideways. + * GUEST_ASSERT() depends on ucall_alloc() and so cannot be used here. + */ + ucall_arch_do_ucall(GUEST_UCALL_FAILED); + return NULL; +} + +static void ucall_free(struct ucall *uc) +{ + /* Beware, here be pointer arithmetic. */ + clear_bit(uc - ucall_pool->ucalls, ucall_pool->in_use); +} + +void ucall(uint64_t cmd, int nargs, ...) +{ + struct ucall *uc; + va_list va; + int i; + + uc = ucall_alloc(); + + WRITE_ONCE(uc->cmd, cmd); + + nargs = min(nargs, UCALL_MAX_ARGS); + + va_start(va, nargs); + for (i = 0; i < nargs; ++i) + WRITE_ONCE(uc->args[i], va_arg(va, uint64_t)); + va_end(va); + + ucall_arch_do_ucall((vm_vaddr_t)uc->hva); + + ucall_free(uc); +} + +uint64_t get_ucall(struct kvm_vcpu *vcpu, struct ucall *uc) +{ + struct ucall ucall; + void *addr; + + if (!uc) + uc = &ucall; + + addr = ucall_arch_get_ucall(vcpu); + if (addr) { + TEST_ASSERT(addr != (void *)GUEST_UCALL_FAILED, + "Guest failed to allocate ucall struct"); + + memcpy(uc, addr, sizeof(*uc)); + vcpu_run_complete_io(vcpu); + } else { + memset(uc, 0, sizeof(*uc)); + } + + return uc->cmd; +} diff --git a/tools/testing/selftests/kvm/lib/userfaultfd_util.c b/tools/testing/selftests/kvm/lib/userfaultfd_util.c new file mode 100644 index 000000000000..92cef20902f1 --- /dev/null +++ b/tools/testing/selftests/kvm/lib/userfaultfd_util.c @@ -0,0 +1,186 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * KVM userfaultfd util + * Adapted from demand_paging_test.c + * + * Copyright (C) 2018, Red Hat, Inc. + * Copyright (C) 2019-2022 Google LLC + */ + +#define _GNU_SOURCE /* for pipe2 */ + +#include <inttypes.h> +#include <stdio.h> +#include <stdlib.h> +#include <time.h> +#include <poll.h> +#include <pthread.h> +#include <linux/userfaultfd.h> +#include <sys/syscall.h> + +#include "kvm_util.h" +#include "test_util.h" +#include "memstress.h" +#include "userfaultfd_util.h" + +#ifdef __NR_userfaultfd + +static void *uffd_handler_thread_fn(void *arg) +{ + struct uffd_desc *uffd_desc = (struct uffd_desc *)arg; + int uffd = uffd_desc->uffd; + int pipefd = uffd_desc->pipefds[0]; + useconds_t delay = uffd_desc->delay; + int64_t pages = 0; + struct timespec start; + struct timespec ts_diff; + + clock_gettime(CLOCK_MONOTONIC, &start); + while (1) { + struct uffd_msg msg; + struct pollfd pollfd[2]; + char tmp_chr; + int r; + + pollfd[0].fd = uffd; + pollfd[0].events = POLLIN; + pollfd[1].fd = pipefd; + pollfd[1].events = POLLIN; + + r = poll(pollfd, 2, -1); + switch (r) { + case -1: + pr_info("poll err"); + continue; + case 0: + continue; + case 1: + break; + default: + pr_info("Polling uffd returned %d", r); + return NULL; + } + + if (pollfd[0].revents & POLLERR) { + pr_info("uffd revents has POLLERR"); + return NULL; + } + + if (pollfd[1].revents & POLLIN) { + r = read(pollfd[1].fd, &tmp_chr, 1); + TEST_ASSERT(r == 1, + "Error reading pipefd in UFFD thread\n"); + return NULL; + } + + if (!(pollfd[0].revents & POLLIN)) + continue; + + r = read(uffd, &msg, sizeof(msg)); + if (r == -1) { + if (errno == EAGAIN) + continue; + pr_info("Read of uffd got errno %d\n", errno); + return NULL; + } + + if (r != sizeof(msg)) { + pr_info("Read on uffd returned unexpected size: %d bytes", r); + return NULL; + } + + if (!(msg.event & UFFD_EVENT_PAGEFAULT)) + continue; + + if (delay) + usleep(delay); + r = uffd_desc->handler(uffd_desc->uffd_mode, uffd, &msg); + if (r < 0) + return NULL; + pages++; + } + + ts_diff = timespec_elapsed(start); + PER_VCPU_DEBUG("userfaulted %ld pages over %ld.%.9lds. (%f/sec)\n", + pages, ts_diff.tv_sec, ts_diff.tv_nsec, + pages / ((double)ts_diff.tv_sec + (double)ts_diff.tv_nsec / 100000000.0)); + + return NULL; +} + +struct uffd_desc *uffd_setup_demand_paging(int uffd_mode, useconds_t delay, + void *hva, uint64_t len, + uffd_handler_t handler) +{ + struct uffd_desc *uffd_desc; + bool is_minor = (uffd_mode == UFFDIO_REGISTER_MODE_MINOR); + int uffd; + struct uffdio_api uffdio_api; + struct uffdio_register uffdio_register; + uint64_t expected_ioctls = ((uint64_t) 1) << _UFFDIO_COPY; + int ret; + + PER_PAGE_DEBUG("Userfaultfd %s mode, faults resolved with %s\n", + is_minor ? "MINOR" : "MISSING", + is_minor ? "UFFDIO_CONINUE" : "UFFDIO_COPY"); + + uffd_desc = malloc(sizeof(struct uffd_desc)); + TEST_ASSERT(uffd_desc, "malloc failed"); + + /* In order to get minor faults, prefault via the alias. */ + if (is_minor) + expected_ioctls = ((uint64_t) 1) << _UFFDIO_CONTINUE; + + uffd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK); + TEST_ASSERT(uffd >= 0, "uffd creation failed, errno: %d", errno); + + uffdio_api.api = UFFD_API; + uffdio_api.features = 0; + TEST_ASSERT(ioctl(uffd, UFFDIO_API, &uffdio_api) != -1, + "ioctl UFFDIO_API failed: %" PRIu64, + (uint64_t)uffdio_api.api); + + uffdio_register.range.start = (uint64_t)hva; + uffdio_register.range.len = len; + uffdio_register.mode = uffd_mode; + TEST_ASSERT(ioctl(uffd, UFFDIO_REGISTER, &uffdio_register) != -1, + "ioctl UFFDIO_REGISTER failed"); + TEST_ASSERT((uffdio_register.ioctls & expected_ioctls) == + expected_ioctls, "missing userfaultfd ioctls"); + + ret = pipe2(uffd_desc->pipefds, O_CLOEXEC | O_NONBLOCK); + TEST_ASSERT(!ret, "Failed to set up pipefd"); + + uffd_desc->uffd_mode = uffd_mode; + uffd_desc->uffd = uffd; + uffd_desc->delay = delay; + uffd_desc->handler = handler; + pthread_create(&uffd_desc->thread, NULL, uffd_handler_thread_fn, + uffd_desc); + + PER_VCPU_DEBUG("Created uffd thread for HVA range [%p, %p)\n", + hva, hva + len); + + return uffd_desc; +} + +void uffd_stop_demand_paging(struct uffd_desc *uffd) +{ + char c = 0; + int ret; + + ret = write(uffd->pipefds[1], &c, 1); + TEST_ASSERT(ret == 1, "Unable to write to pipefd"); + + ret = pthread_join(uffd->thread, NULL); + TEST_ASSERT(ret == 0, "Pthread_join failed."); + + close(uffd->uffd); + + close(uffd->pipefds[1]); + close(uffd->pipefds[0]); + + free(uffd); +} + +#endif /* __NR_userfaultfd */ diff --git a/tools/testing/selftests/kvm/lib/x86_64/hyperv.c b/tools/testing/selftests/kvm/lib/x86_64/hyperv.c new file mode 100644 index 000000000000..efb7e7a1354d --- /dev/null +++ b/tools/testing/selftests/kvm/lib/x86_64/hyperv.c @@ -0,0 +1,46 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Hyper-V specific functions. + * + * Copyright (C) 2021, Red Hat Inc. + */ +#include <stdint.h> +#include "processor.h" +#include "hyperv.h" + +struct hyperv_test_pages *vcpu_alloc_hyperv_test_pages(struct kvm_vm *vm, + vm_vaddr_t *p_hv_pages_gva) +{ + vm_vaddr_t hv_pages_gva = vm_vaddr_alloc_page(vm); + struct hyperv_test_pages *hv = addr_gva2hva(vm, hv_pages_gva); + + /* Setup of a region of guest memory for the VP Assist page. */ + hv->vp_assist = (void *)vm_vaddr_alloc_page(vm); + hv->vp_assist_hva = addr_gva2hva(vm, (uintptr_t)hv->vp_assist); + hv->vp_assist_gpa = addr_gva2gpa(vm, (uintptr_t)hv->vp_assist); + + /* Setup of a region of guest memory for the partition assist page. */ + hv->partition_assist = (void *)vm_vaddr_alloc_page(vm); + hv->partition_assist_hva = addr_gva2hva(vm, (uintptr_t)hv->partition_assist); + hv->partition_assist_gpa = addr_gva2gpa(vm, (uintptr_t)hv->partition_assist); + + /* Setup of a region of guest memory for the enlightened VMCS. */ + hv->enlightened_vmcs = (void *)vm_vaddr_alloc_page(vm); + hv->enlightened_vmcs_hva = addr_gva2hva(vm, (uintptr_t)hv->enlightened_vmcs); + hv->enlightened_vmcs_gpa = addr_gva2gpa(vm, (uintptr_t)hv->enlightened_vmcs); + + *p_hv_pages_gva = hv_pages_gva; + return hv; +} + +int enable_vp_assist(uint64_t vp_assist_pa, void *vp_assist) +{ + uint64_t val = (vp_assist_pa & HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_MASK) | + HV_X64_MSR_VP_ASSIST_PAGE_ENABLE; + + wrmsr(HV_X64_MSR_VP_ASSIST_PAGE, val); + + current_vp_assist = vp_assist; + + return 0; +} diff --git a/tools/testing/selftests/kvm/lib/x86_64/perf_test_util.c b/tools/testing/selftests/kvm/lib/x86_64/memstress.c index 0f344a7c89c4..d61e623afc8c 100644 --- a/tools/testing/selftests/kvm/lib/x86_64/perf_test_util.c +++ b/tools/testing/selftests/kvm/lib/x86_64/memstress.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 /* - * x86_64-specific extensions to perf_test_util.c. + * x86_64-specific extensions to memstress.c. * * Copyright (C) 2022, Google, Inc. */ @@ -11,25 +11,25 @@ #include "test_util.h" #include "kvm_util.h" -#include "perf_test_util.h" +#include "memstress.h" #include "processor.h" #include "vmx.h" -void perf_test_l2_guest_code(uint64_t vcpu_id) +void memstress_l2_guest_code(uint64_t vcpu_id) { - perf_test_guest_code(vcpu_id); + memstress_guest_code(vcpu_id); vmcall(); } -extern char perf_test_l2_guest_entry[]; +extern char memstress_l2_guest_entry[]; __asm__( -"perf_test_l2_guest_entry:" +"memstress_l2_guest_entry:" " mov (%rsp), %rdi;" -" call perf_test_l2_guest_code;" +" call memstress_l2_guest_code;" " ud2;" ); -static void perf_test_l1_guest_code(struct vmx_pages *vmx, uint64_t vcpu_id) +static void memstress_l1_guest_code(struct vmx_pages *vmx, uint64_t vcpu_id) { #define L2_GUEST_STACK_SIZE 64 unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE]; @@ -42,14 +42,14 @@ static void perf_test_l1_guest_code(struct vmx_pages *vmx, uint64_t vcpu_id) rsp = &l2_guest_stack[L2_GUEST_STACK_SIZE - 1]; *rsp = vcpu_id; - prepare_vmcs(vmx, perf_test_l2_guest_entry, rsp); + prepare_vmcs(vmx, memstress_l2_guest_entry, rsp); GUEST_ASSERT(!vmlaunch()); GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL); GUEST_DONE(); } -uint64_t perf_test_nested_pages(int nr_vcpus) +uint64_t memstress_nested_pages(int nr_vcpus) { /* * 513 page tables is enough to identity-map 256 TiB of L2 with 1G @@ -59,7 +59,7 @@ uint64_t perf_test_nested_pages(int nr_vcpus) return 513 + 10 * nr_vcpus; } -void perf_test_setup_ept(struct vmx_pages *vmx, struct kvm_vm *vm) +void memstress_setup_ept(struct vmx_pages *vmx, struct kvm_vm *vm) { uint64_t start, end; @@ -72,12 +72,12 @@ void perf_test_setup_ept(struct vmx_pages *vmx, struct kvm_vm *vm) */ nested_identity_map_1g(vmx, vm, 0, 0x100000000ULL); - start = align_down(perf_test_args.gpa, PG_SIZE_1G); - end = align_up(perf_test_args.gpa + perf_test_args.size, PG_SIZE_1G); + start = align_down(memstress_args.gpa, PG_SIZE_1G); + end = align_up(memstress_args.gpa + memstress_args.size, PG_SIZE_1G); nested_identity_map_1g(vmx, vm, start, end - start); } -void perf_test_setup_nested(struct kvm_vm *vm, int nr_vcpus, struct kvm_vcpu *vcpus[]) +void memstress_setup_nested(struct kvm_vm *vm, int nr_vcpus, struct kvm_vcpu *vcpus[]) { struct vmx_pages *vmx, *vmx0 = NULL; struct kvm_regs regs; @@ -85,12 +85,13 @@ void perf_test_setup_nested(struct kvm_vm *vm, int nr_vcpus, struct kvm_vcpu *vc int vcpu_id; TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX)); + TEST_REQUIRE(kvm_cpu_has_ept()); for (vcpu_id = 0; vcpu_id < nr_vcpus; vcpu_id++) { vmx = vcpu_alloc_vmx(vm, &vmx_gva); if (vcpu_id == 0) { - perf_test_setup_ept(vmx, vm); + memstress_setup_ept(vmx, vm); vmx0 = vmx; } else { /* Share the same EPT table across all vCPUs. */ @@ -100,11 +101,11 @@ void perf_test_setup_nested(struct kvm_vm *vm, int nr_vcpus, struct kvm_vcpu *vc } /* - * Override the vCPU to run perf_test_l1_guest_code() which will - * bounce it into L2 before calling perf_test_guest_code(). + * Override the vCPU to run memstress_l1_guest_code() which will + * bounce it into L2 before calling memstress_guest_code(). */ vcpu_regs_get(vcpus[vcpu_id], ®s); - regs.rip = (unsigned long) perf_test_l1_guest_code; + regs.rip = (unsigned long) memstress_l1_guest_code; vcpu_regs_set(vcpus[vcpu_id], ®s); vcpu_args_set(vcpus[vcpu_id], 2, vmx_gva, vcpu_id); } diff --git a/tools/testing/selftests/kvm/lib/x86_64/processor.c b/tools/testing/selftests/kvm/lib/x86_64/processor.c index 41c1c73c464d..acfa1d01e7df 100644 --- a/tools/testing/selftests/kvm/lib/x86_64/processor.c +++ b/tools/testing/selftests/kvm/lib/x86_64/processor.c @@ -131,23 +131,28 @@ void virt_arch_pgd_alloc(struct kvm_vm *vm) } } -static void *virt_get_pte(struct kvm_vm *vm, uint64_t pt_pfn, uint64_t vaddr, - int level) +static void *virt_get_pte(struct kvm_vm *vm, uint64_t *parent_pte, + uint64_t vaddr, int level) { - uint64_t *page_table = addr_gpa2hva(vm, pt_pfn << vm->page_shift); + uint64_t pt_gpa = PTE_GET_PA(*parent_pte); + uint64_t *page_table = addr_gpa2hva(vm, pt_gpa); int index = (vaddr >> PG_LEVEL_SHIFT(level)) & 0x1ffu; + TEST_ASSERT((*parent_pte & PTE_PRESENT_MASK) || parent_pte == &vm->pgd, + "Parent PTE (level %d) not PRESENT for gva: 0x%08lx", + level + 1, vaddr); + return &page_table[index]; } static uint64_t *virt_create_upper_pte(struct kvm_vm *vm, - uint64_t pt_pfn, + uint64_t *parent_pte, uint64_t vaddr, uint64_t paddr, int current_level, int target_level) { - uint64_t *pte = virt_get_pte(vm, pt_pfn, vaddr, current_level); + uint64_t *pte = virt_get_pte(vm, parent_pte, vaddr, current_level); if (!(*pte & PTE_PRESENT_MASK)) { *pte = PTE_PRESENT_MASK | PTE_WRITABLE_MASK; @@ -197,21 +202,20 @@ void __virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, int level) * Allocate upper level page tables, if not already present. Return * early if a hugepage was created. */ - pml4e = virt_create_upper_pte(vm, vm->pgd >> vm->page_shift, - vaddr, paddr, PG_LEVEL_512G, level); + pml4e = virt_create_upper_pte(vm, &vm->pgd, vaddr, paddr, PG_LEVEL_512G, level); if (*pml4e & PTE_LARGE_MASK) return; - pdpe = virt_create_upper_pte(vm, PTE_GET_PFN(*pml4e), vaddr, paddr, PG_LEVEL_1G, level); + pdpe = virt_create_upper_pte(vm, pml4e, vaddr, paddr, PG_LEVEL_1G, level); if (*pdpe & PTE_LARGE_MASK) return; - pde = virt_create_upper_pte(vm, PTE_GET_PFN(*pdpe), vaddr, paddr, PG_LEVEL_2M, level); + pde = virt_create_upper_pte(vm, pdpe, vaddr, paddr, PG_LEVEL_2M, level); if (*pde & PTE_LARGE_MASK) return; /* Fill in page table entry. */ - pte = virt_get_pte(vm, PTE_GET_PFN(*pde), vaddr, PG_LEVEL_4K); + pte = virt_get_pte(vm, pde, vaddr, PG_LEVEL_4K); TEST_ASSERT(!(*pte & PTE_PRESENT_MASK), "PTE already present for 4k page at vaddr: 0x%lx\n", vaddr); *pte = PTE_PRESENT_MASK | PTE_WRITABLE_MASK | (paddr & PHYSICAL_PAGE_MASK); @@ -241,30 +245,25 @@ void virt_map_level(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, } } -static uint64_t *_vm_get_page_table_entry(struct kvm_vm *vm, - struct kvm_vcpu *vcpu, - uint64_t vaddr) +static bool vm_is_target_pte(uint64_t *pte, int *level, int current_level) { - uint16_t index[4]; - uint64_t *pml4e, *pdpe, *pde; - uint64_t *pte; - struct kvm_sregs sregs; - uint64_t rsvd_mask = 0; + if (*pte & PTE_LARGE_MASK) { + TEST_ASSERT(*level == PG_LEVEL_NONE || + *level == current_level, + "Unexpected hugepage at level %d\n", current_level); + *level = current_level; + } - /* Set the high bits in the reserved mask. */ - if (vm->pa_bits < 52) - rsvd_mask = GENMASK_ULL(51, vm->pa_bits); + return *level == current_level; +} - /* - * SDM vol 3, fig 4-11 "Formats of CR3 and Paging-Structure Entries - * with 4-Level Paging and 5-Level Paging". - * If IA32_EFER.NXE = 0 and the P flag of a paging-structure entry is 1, - * the XD flag (bit 63) is reserved. - */ - vcpu_sregs_get(vcpu, &sregs); - if ((sregs.efer & EFER_NX) == 0) { - rsvd_mask |= PTE_NX_MASK; - } +uint64_t *__vm_get_page_table_entry(struct kvm_vm *vm, uint64_t vaddr, + int *level) +{ + uint64_t *pml4e, *pdpe, *pde; + + TEST_ASSERT(*level >= PG_LEVEL_NONE && *level < PG_LEVEL_NUM, + "Invalid PG_LEVEL_* '%d'", *level); TEST_ASSERT(vm->mode == VM_MODE_PXXV48_4K, "Attempt to use " "unknown or unsupported guest mode, mode: 0x%x", vm->mode); @@ -279,54 +278,26 @@ static uint64_t *_vm_get_page_table_entry(struct kvm_vm *vm, TEST_ASSERT(vaddr == (((int64_t)vaddr << 16) >> 16), "Canonical check failed. The virtual address is invalid."); - index[0] = (vaddr >> 12) & 0x1ffu; - index[1] = (vaddr >> 21) & 0x1ffu; - index[2] = (vaddr >> 30) & 0x1ffu; - index[3] = (vaddr >> 39) & 0x1ffu; - - pml4e = addr_gpa2hva(vm, vm->pgd); - TEST_ASSERT(pml4e[index[3]] & PTE_PRESENT_MASK, - "Expected pml4e to be present for gva: 0x%08lx", vaddr); - TEST_ASSERT((pml4e[index[3]] & (rsvd_mask | PTE_LARGE_MASK)) == 0, - "Unexpected reserved bits set."); - - pdpe = addr_gpa2hva(vm, PTE_GET_PFN(pml4e[index[3]]) * vm->page_size); - TEST_ASSERT(pdpe[index[2]] & PTE_PRESENT_MASK, - "Expected pdpe to be present for gva: 0x%08lx", vaddr); - TEST_ASSERT(!(pdpe[index[2]] & PTE_LARGE_MASK), - "Expected pdpe to map a pde not a 1-GByte page."); - TEST_ASSERT((pdpe[index[2]] & rsvd_mask) == 0, - "Unexpected reserved bits set."); + pml4e = virt_get_pte(vm, &vm->pgd, vaddr, PG_LEVEL_512G); + if (vm_is_target_pte(pml4e, level, PG_LEVEL_512G)) + return pml4e; - pde = addr_gpa2hva(vm, PTE_GET_PFN(pdpe[index[2]]) * vm->page_size); - TEST_ASSERT(pde[index[1]] & PTE_PRESENT_MASK, - "Expected pde to be present for gva: 0x%08lx", vaddr); - TEST_ASSERT(!(pde[index[1]] & PTE_LARGE_MASK), - "Expected pde to map a pte not a 2-MByte page."); - TEST_ASSERT((pde[index[1]] & rsvd_mask) == 0, - "Unexpected reserved bits set."); + pdpe = virt_get_pte(vm, pml4e, vaddr, PG_LEVEL_1G); + if (vm_is_target_pte(pdpe, level, PG_LEVEL_1G)) + return pdpe; - pte = addr_gpa2hva(vm, PTE_GET_PFN(pde[index[1]]) * vm->page_size); - TEST_ASSERT(pte[index[0]] & PTE_PRESENT_MASK, - "Expected pte to be present for gva: 0x%08lx", vaddr); + pde = virt_get_pte(vm, pdpe, vaddr, PG_LEVEL_2M); + if (vm_is_target_pte(pde, level, PG_LEVEL_2M)) + return pde; - return &pte[index[0]]; + return virt_get_pte(vm, pde, vaddr, PG_LEVEL_4K); } -uint64_t vm_get_page_table_entry(struct kvm_vm *vm, struct kvm_vcpu *vcpu, - uint64_t vaddr) +uint64_t *vm_get_page_table_entry(struct kvm_vm *vm, uint64_t vaddr) { - uint64_t *pte = _vm_get_page_table_entry(vm, vcpu, vaddr); + int level = PG_LEVEL_4K; - return *(uint64_t *)pte; -} - -void vm_set_page_table_entry(struct kvm_vm *vm, struct kvm_vcpu *vcpu, - uint64_t vaddr, uint64_t pte) -{ - uint64_t *new_pte = _vm_get_page_table_entry(vm, vcpu, vaddr); - - *(uint64_t *)new_pte = pte; + return __vm_get_page_table_entry(vm, vaddr, &level); } void virt_arch_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent) @@ -512,47 +483,23 @@ static void kvm_seg_set_kernel_data_64bit(struct kvm_vm *vm, uint16_t selector, vm_paddr_t addr_arch_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva) { - uint16_t index[4]; - uint64_t *pml4e, *pdpe, *pde; - uint64_t *pte; - - TEST_ASSERT(vm->mode == VM_MODE_PXXV48_4K, "Attempt to use " - "unknown or unsupported guest mode, mode: 0x%x", vm->mode); - - index[0] = (gva >> 12) & 0x1ffu; - index[1] = (gva >> 21) & 0x1ffu; - index[2] = (gva >> 30) & 0x1ffu; - index[3] = (gva >> 39) & 0x1ffu; - - if (!vm->pgd_created) - goto unmapped_gva; - pml4e = addr_gpa2hva(vm, vm->pgd); - if (!(pml4e[index[3]] & PTE_PRESENT_MASK)) - goto unmapped_gva; - - pdpe = addr_gpa2hva(vm, PTE_GET_PFN(pml4e[index[3]]) * vm->page_size); - if (!(pdpe[index[2]] & PTE_PRESENT_MASK)) - goto unmapped_gva; - - pde = addr_gpa2hva(vm, PTE_GET_PFN(pdpe[index[2]]) * vm->page_size); - if (!(pde[index[1]] & PTE_PRESENT_MASK)) - goto unmapped_gva; + int level = PG_LEVEL_NONE; + uint64_t *pte = __vm_get_page_table_entry(vm, gva, &level); - pte = addr_gpa2hva(vm, PTE_GET_PFN(pde[index[1]]) * vm->page_size); - if (!(pte[index[0]] & PTE_PRESENT_MASK)) - goto unmapped_gva; + TEST_ASSERT(*pte & PTE_PRESENT_MASK, + "Leaf PTE not PRESENT for gva: 0x%08lx", gva); - return (PTE_GET_PFN(pte[index[0]]) * vm->page_size) + (gva & ~PAGE_MASK); - -unmapped_gva: - TEST_FAIL("No mapping for vm virtual address, gva: 0x%lx", gva); - exit(EXIT_FAILURE); + /* + * No need for a hugepage mask on the PTE, x86-64 requires the "unused" + * address bits to be zero. + */ + return PTE_GET_PA(*pte) | (gva & ~HUGEPAGE_MASK(level)); } static void kvm_setup_gdt(struct kvm_vm *vm, struct kvm_dtable *dt) { if (!vm->gdt) - vm->gdt = vm_vaddr_alloc_page(vm); + vm->gdt = __vm_vaddr_alloc_page(vm, MEM_REGION_DATA); dt->base = vm->gdt; dt->limit = getpagesize(); @@ -562,7 +509,7 @@ static void kvm_setup_tss_64bit(struct kvm_vm *vm, struct kvm_segment *segp, int selector) { if (!vm->tss) - vm->tss = vm_vaddr_alloc_page(vm); + vm->tss = __vm_vaddr_alloc_page(vm, MEM_REGION_DATA); memset(segp, 0, sizeof(*segp)); segp->base = vm->tss; @@ -605,38 +552,9 @@ static void vcpu_setup(struct kvm_vm *vm, struct kvm_vcpu *vcpu) vcpu_sregs_set(vcpu, &sregs); } -void __vm_xsave_require_permission(int bit, const char *name) +void kvm_arch_vm_post_create(struct kvm_vm *vm) { - int kvm_fd; - u64 bitmask; - long rc; - struct kvm_device_attr attr = { - .group = 0, - .attr = KVM_X86_XCOMP_GUEST_SUPP, - .addr = (unsigned long) &bitmask - }; - - TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_XFD)); - - kvm_fd = open_kvm_dev_path_or_exit(); - rc = __kvm_ioctl(kvm_fd, KVM_GET_DEVICE_ATTR, &attr); - close(kvm_fd); - - if (rc == -1 && (errno == ENXIO || errno == EINVAL)) - __TEST_REQUIRE(0, "KVM_X86_XCOMP_GUEST_SUPP not supported"); - - TEST_ASSERT(rc == 0, "KVM_GET_DEVICE_ATTR(0, KVM_X86_XCOMP_GUEST_SUPP) error: %ld", rc); - - __TEST_REQUIRE(bitmask & (1ULL << bit), - "Required XSAVE feature '%s' not supported", name); - - TEST_REQUIRE(!syscall(SYS_arch_prctl, ARCH_REQ_XCOMP_GUEST_PERM, bit)); - - rc = syscall(SYS_arch_prctl, ARCH_GET_XCOMP_GUEST_PERM, &bitmask); - TEST_ASSERT(rc == 0, "prctl(ARCH_GET_XCOMP_GUEST_PERM) error: %ld", rc); - TEST_ASSERT(bitmask & (1ULL << bit), - "prctl(ARCH_REQ_XCOMP_GUEST_PERM) failure bitmask=0x%lx", - bitmask); + vm_create_irqchip(vm); } struct kvm_vcpu *vm_arch_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id, @@ -647,8 +565,9 @@ struct kvm_vcpu *vm_arch_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id, vm_vaddr_t stack_vaddr; struct kvm_vcpu *vcpu; - stack_vaddr = vm_vaddr_alloc(vm, DEFAULT_STACK_PGS * getpagesize(), - DEFAULT_GUEST_STACK_VADDR_MIN); + stack_vaddr = __vm_vaddr_alloc(vm, DEFAULT_STACK_PGS * getpagesize(), + DEFAULT_GUEST_STACK_VADDR_MIN, + MEM_REGION_DATA); vcpu = __vm_vcpu_add(vm, vcpu_id); vcpu_init_cpuid(vcpu, kvm_get_supported_cpuid()); @@ -683,25 +602,29 @@ void vcpu_arch_free(struct kvm_vcpu *vcpu) free(vcpu->cpuid); } +/* Do not use kvm_supported_cpuid directly except for validity checks. */ +static void *kvm_supported_cpuid; + const struct kvm_cpuid2 *kvm_get_supported_cpuid(void) { - static struct kvm_cpuid2 *cpuid; int kvm_fd; - if (cpuid) - return cpuid; + if (kvm_supported_cpuid) + return kvm_supported_cpuid; - cpuid = allocate_kvm_cpuid2(MAX_NR_CPUID_ENTRIES); + kvm_supported_cpuid = allocate_kvm_cpuid2(MAX_NR_CPUID_ENTRIES); kvm_fd = open_kvm_dev_path_or_exit(); - kvm_ioctl(kvm_fd, KVM_GET_SUPPORTED_CPUID, cpuid); + kvm_ioctl(kvm_fd, KVM_GET_SUPPORTED_CPUID, + (struct kvm_cpuid2 *)kvm_supported_cpuid); close(kvm_fd); - return cpuid; + return kvm_supported_cpuid; } -bool kvm_cpuid_has(const struct kvm_cpuid2 *cpuid, - struct kvm_x86_cpu_feature feature) +static uint32_t __kvm_cpu_has(const struct kvm_cpuid2 *cpuid, + uint32_t function, uint32_t index, + uint8_t reg, uint8_t lo, uint8_t hi) { const struct kvm_cpuid_entry2 *entry; int i; @@ -714,12 +637,25 @@ bool kvm_cpuid_has(const struct kvm_cpuid2 *cpuid, * order, but kvm_x86_cpu_feature matches that mess, so yay * pointer shenanigans! */ - if (entry->function == feature.function && - entry->index == feature.index) - return (&entry->eax)[feature.reg] & BIT(feature.bit); + if (entry->function == function && entry->index == index) + return ((&entry->eax)[reg] & GENMASK(hi, lo)) >> lo; } - return false; + return 0; +} + +bool kvm_cpuid_has(const struct kvm_cpuid2 *cpuid, + struct kvm_x86_cpu_feature feature) +{ + return __kvm_cpu_has(cpuid, feature.function, feature.index, + feature.reg, feature.bit, feature.bit); +} + +uint32_t kvm_cpuid_property(const struct kvm_cpuid2 *cpuid, + struct kvm_x86_cpu_property property) +{ + return __kvm_cpu_has(cpuid, property.function, property.index, + property.reg, property.lo_bit, property.hi_bit); } uint64_t kvm_get_feature_msr(uint64_t msr_index) @@ -741,6 +677,41 @@ uint64_t kvm_get_feature_msr(uint64_t msr_index) return buffer.entry.data; } +void __vm_xsave_require_permission(int bit, const char *name) +{ + int kvm_fd; + u64 bitmask; + long rc; + struct kvm_device_attr attr = { + .group = 0, + .attr = KVM_X86_XCOMP_GUEST_SUPP, + .addr = (unsigned long) &bitmask + }; + + TEST_ASSERT(!kvm_supported_cpuid, + "kvm_get_supported_cpuid() cannot be used before ARCH_REQ_XCOMP_GUEST_PERM"); + + kvm_fd = open_kvm_dev_path_or_exit(); + rc = __kvm_ioctl(kvm_fd, KVM_GET_DEVICE_ATTR, &attr); + close(kvm_fd); + + if (rc == -1 && (errno == ENXIO || errno == EINVAL)) + __TEST_REQUIRE(0, "KVM_X86_XCOMP_GUEST_SUPP not supported"); + + TEST_ASSERT(rc == 0, "KVM_GET_DEVICE_ATTR(0, KVM_X86_XCOMP_GUEST_SUPP) error: %ld", rc); + + __TEST_REQUIRE(bitmask & (1ULL << bit), + "Required XSAVE feature '%s' not supported", name); + + TEST_REQUIRE(!syscall(SYS_arch_prctl, ARCH_REQ_XCOMP_GUEST_PERM, bit)); + + rc = syscall(SYS_arch_prctl, ARCH_GET_XCOMP_GUEST_PERM, &bitmask); + TEST_ASSERT(rc == 0, "prctl(ARCH_GET_XCOMP_GUEST_PERM) error: %ld", rc); + TEST_ASSERT(bitmask & (1ULL << bit), + "prctl(ARCH_REQ_XCOMP_GUEST_PERM) failure bitmask=0x%lx", + bitmask); +} + void vcpu_init_cpuid(struct kvm_vcpu *vcpu, const struct kvm_cpuid2 *cpuid) { TEST_ASSERT(cpuid != vcpu->cpuid, "@cpuid can't be the vCPU's CPUID"); @@ -1059,18 +1030,12 @@ bool is_amd_cpu(void) void kvm_get_cpu_address_width(unsigned int *pa_bits, unsigned int *va_bits) { - const struct kvm_cpuid_entry2 *entry; - bool pae; - - /* SDM 4.1.4 */ - if (kvm_get_cpuid_max_extended() < 0x80000008) { - pae = kvm_get_supported_cpuid_entry(1)->edx & (1 << 6); - *pa_bits = pae ? 36 : 32; + if (!kvm_cpu_has_p(X86_PROPERTY_MAX_PHY_ADDR)) { + *pa_bits = kvm_cpu_has(X86_FEATURE_PAE) ? 36 : 32; *va_bits = 32; } else { - entry = kvm_get_supported_cpuid_entry(0x80000008); - *pa_bits = entry->eax & 0xff; - *va_bits = (entry->eax >> 8) & 0xff; + *pa_bits = kvm_cpu_property(X86_PROPERTY_MAX_PHY_ADDR); + *va_bits = kvm_cpu_property(X86_PROPERTY_MAX_VIRT_ADDR); } } @@ -1103,6 +1068,7 @@ static bool kvm_fixup_exception(struct ex_regs *regs) regs->rip = regs->r11; regs->r9 = regs->vector; + regs->r10 = regs->error_code; return true; } @@ -1132,8 +1098,8 @@ void vm_init_descriptor_tables(struct kvm_vm *vm) extern void *idt_handlers; int i; - vm->idt = vm_vaddr_alloc_page(vm); - vm->handlers = vm_vaddr_alloc_page(vm); + vm->idt = __vm_vaddr_alloc_page(vm, MEM_REGION_DATA); + vm->handlers = __vm_vaddr_alloc_page(vm, MEM_REGION_DATA); /* Handlers have the same address in both address spaces.*/ for (i = 0; i < NUM_INTERRUPTS; i++) set_idt_entry(vm, i, (unsigned long)(&idt_handlers)[i], 0, @@ -1265,7 +1231,7 @@ unsigned long vm_compute_max_gfn(struct kvm_vm *vm) { const unsigned long num_ht_pages = 12 << (30 - vm->page_shift); /* 12 GiB */ unsigned long ht_gfn, max_gfn, max_pfn; - uint32_t eax, ebx, ecx, edx, max_ext_leaf; + uint8_t maxphyaddr; max_gfn = (1ULL << (vm->pa_bits - vm->page_shift)) - 1; @@ -1279,8 +1245,7 @@ unsigned long vm_compute_max_gfn(struct kvm_vm *vm) /* Before family 17h, the HyperTransport area is just below 1T. */ ht_gfn = (1 << 28) - num_ht_pages; - cpuid(1, &eax, &ebx, &ecx, &edx); - if (x86_family(eax) < 0x17) + if (this_cpu_family() < 0x17) goto done; /* @@ -1288,17 +1253,14 @@ unsigned long vm_compute_max_gfn(struct kvm_vm *vm) * reduced due to SME by bits 11:6 of CPUID[0x8000001f].EBX. Use * the old conservative value if MAXPHYADDR is not enumerated. */ - cpuid(0x80000000, &eax, &ebx, &ecx, &edx); - max_ext_leaf = eax; - if (max_ext_leaf < 0x80000008) + if (!this_cpu_has_p(X86_PROPERTY_MAX_PHY_ADDR)) goto done; - cpuid(0x80000008, &eax, &ebx, &ecx, &edx); - max_pfn = (1ULL << ((eax & 0xff) - vm->page_shift)) - 1; - if (max_ext_leaf >= 0x8000001f) { - cpuid(0x8000001f, &eax, &ebx, &ecx, &edx); - max_pfn >>= (ebx >> 6) & 0x3f; - } + maxphyaddr = this_cpu_property(X86_PROPERTY_MAX_PHY_ADDR); + max_pfn = (1ULL << (maxphyaddr - vm->page_shift)) - 1; + + if (this_cpu_has_p(X86_PROPERTY_PHYS_ADDR_REDUCTION)) + max_pfn >>= this_cpu_property(X86_PROPERTY_PHYS_ADDR_REDUCTION); ht_gfn = max_pfn - num_ht_pages; done: diff --git a/tools/testing/selftests/kvm/lib/x86_64/ucall.c b/tools/testing/selftests/kvm/lib/x86_64/ucall.c index e5f0f9e0d3ee..4d41dc63cc9e 100644 --- a/tools/testing/selftests/kvm/lib/x86_64/ucall.c +++ b/tools/testing/selftests/kvm/lib/x86_64/ucall.c @@ -8,52 +8,25 @@ #define UCALL_PIO_PORT ((uint16_t)0x1000) -void ucall_init(struct kvm_vm *vm, void *arg) +void ucall_arch_init(struct kvm_vm *vm, vm_paddr_t mmio_gpa) { } -void ucall_uninit(struct kvm_vm *vm) +void ucall_arch_do_ucall(vm_vaddr_t uc) { -} - -void ucall(uint64_t cmd, int nargs, ...) -{ - struct ucall uc = { - .cmd = cmd, - }; - va_list va; - int i; - - nargs = min(nargs, UCALL_MAX_ARGS); - - va_start(va, nargs); - for (i = 0; i < nargs; ++i) - uc.args[i] = va_arg(va, uint64_t); - va_end(va); - asm volatile("in %[port], %%al" - : : [port] "d" (UCALL_PIO_PORT), "D" (&uc) : "rax", "memory"); + : : [port] "d" (UCALL_PIO_PORT), "D" (uc) : "rax", "memory"); } -uint64_t get_ucall(struct kvm_vcpu *vcpu, struct ucall *uc) +void *ucall_arch_get_ucall(struct kvm_vcpu *vcpu) { struct kvm_run *run = vcpu->run; - struct ucall ucall = {}; - - if (uc) - memset(uc, 0, sizeof(*uc)); if (run->exit_reason == KVM_EXIT_IO && run->io.port == UCALL_PIO_PORT) { struct kvm_regs regs; vcpu_regs_get(vcpu, ®s); - memcpy(&ucall, addr_gva2hva(vcpu->vm, (vm_vaddr_t)regs.rdi), - sizeof(ucall)); - - vcpu_run_complete_io(vcpu); - if (uc) - memcpy(uc, &ucall, sizeof(ucall)); + return (void *)regs.rdi; } - - return ucall.cmd; + return NULL; } diff --git a/tools/testing/selftests/kvm/lib/x86_64/vmx.c b/tools/testing/selftests/kvm/lib/x86_64/vmx.c index d21049c38fc5..59d97531c9b1 100644 --- a/tools/testing/selftests/kvm/lib/x86_64/vmx.c +++ b/tools/testing/selftests/kvm/lib/x86_64/vmx.c @@ -109,18 +109,6 @@ vcpu_alloc_vmx(struct kvm_vm *vm, vm_vaddr_t *p_vmx_gva) vmx->vmwrite_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->vmwrite); memset(vmx->vmwrite_hva, 0, getpagesize()); - /* Setup of a region of guest memory for the VP Assist page. */ - vmx->vp_assist = (void *)vm_vaddr_alloc_page(vm); - vmx->vp_assist_hva = addr_gva2hva(vm, (uintptr_t)vmx->vp_assist); - vmx->vp_assist_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->vp_assist); - - /* Setup of a region of guest memory for the enlightened VMCS. */ - vmx->enlightened_vmcs = (void *)vm_vaddr_alloc_page(vm); - vmx->enlightened_vmcs_hva = - addr_gva2hva(vm, (uintptr_t)vmx->enlightened_vmcs); - vmx->enlightened_vmcs_gpa = - addr_gva2gpa(vm, (uintptr_t)vmx->enlightened_vmcs); - *p_vmx_gva = vmx_gva; return vmx; } @@ -171,26 +159,18 @@ bool prepare_for_vmx_operation(struct vmx_pages *vmx) bool load_vmcs(struct vmx_pages *vmx) { - if (!enable_evmcs) { - /* Load a VMCS. */ - *(uint32_t *)(vmx->vmcs) = vmcs_revision(); - if (vmclear(vmx->vmcs_gpa)) - return false; - - if (vmptrld(vmx->vmcs_gpa)) - return false; - - /* Setup shadow VMCS, do not load it yet. */ - *(uint32_t *)(vmx->shadow_vmcs) = - vmcs_revision() | 0x80000000ul; - if (vmclear(vmx->shadow_vmcs_gpa)) - return false; - } else { - if (evmcs_vmptrld(vmx->enlightened_vmcs_gpa, - vmx->enlightened_vmcs)) - return false; - current_evmcs->revision_id = EVMCS_VERSION; - } + /* Load a VMCS. */ + *(uint32_t *)(vmx->vmcs) = vmcs_revision(); + if (vmclear(vmx->vmcs_gpa)) + return false; + + if (vmptrld(vmx->vmcs_gpa)) + return false; + + /* Setup shadow VMCS, do not load it yet. */ + *(uint32_t *)(vmx->shadow_vmcs) = vmcs_revision() | 0x80000000ul; + if (vmclear(vmx->shadow_vmcs_gpa)) + return false; return true; } @@ -544,26 +524,22 @@ void nested_identity_map_1g(struct vmx_pages *vmx, struct kvm_vm *vm, __nested_map(vmx, vm, addr, addr, size, PG_LEVEL_1G); } -bool kvm_vm_has_ept(struct kvm_vm *vm) +bool kvm_cpu_has_ept(void) { - struct kvm_vcpu *vcpu; uint64_t ctrl; - vcpu = list_first_entry(&vm->vcpus, struct kvm_vcpu, list); - TEST_ASSERT(vcpu, "Cannot determine EPT support without vCPUs.\n"); - - ctrl = vcpu_get_msr(vcpu, MSR_IA32_VMX_TRUE_PROCBASED_CTLS) >> 32; + ctrl = kvm_get_feature_msr(MSR_IA32_VMX_TRUE_PROCBASED_CTLS) >> 32; if (!(ctrl & CPU_BASED_ACTIVATE_SECONDARY_CONTROLS)) return false; - ctrl = vcpu_get_msr(vcpu, MSR_IA32_VMX_PROCBASED_CTLS2) >> 32; + ctrl = kvm_get_feature_msr(MSR_IA32_VMX_PROCBASED_CTLS2) >> 32; return ctrl & SECONDARY_EXEC_ENABLE_EPT; } void prepare_eptp(struct vmx_pages *vmx, struct kvm_vm *vm, uint32_t eptp_memslot) { - TEST_REQUIRE(kvm_vm_has_ept(vm)); + TEST_ASSERT(kvm_cpu_has_ept(), "KVM doesn't support nested EPT"); vmx->eptp = (void *)vm_vaddr_alloc_page(vm); vmx->eptp_hva = addr_gva2hva(vm, (uintptr_t)vmx->eptp); diff --git a/tools/testing/selftests/kvm/max_guest_memory_test.c b/tools/testing/selftests/kvm/max_guest_memory_test.c index 9a6e4f3ad6b5..feaf2be20ff2 100644 --- a/tools/testing/selftests/kvm/max_guest_memory_test.c +++ b/tools/testing/selftests/kvm/max_guest_memory_test.c @@ -11,6 +11,7 @@ #include <linux/bitmap.h> #include <linux/bitops.h> #include <linux/atomic.h> +#include <linux/sizes.h> #include "kvm_util.h" #include "test_util.h" @@ -162,8 +163,7 @@ int main(int argc, char *argv[]) * just below the 4gb boundary. This test could create memory at * 1gb-3gb,but it's simpler to skip straight to 4gb. */ - const uint64_t size_1gb = (1 << 30); - const uint64_t start_gpa = (4ull * size_1gb); + const uint64_t start_gpa = SZ_4G; const int first_slot = 1; struct timespec time_start, time_run1, time_reset, time_run2; @@ -180,29 +180,26 @@ int main(int argc, char *argv[]) * are quite common for x86, requires changing only max_mem (KVM allows * 32k memslots, 32k * 2gb == ~64tb of guest memory). */ - slot_size = 2 * size_1gb; + slot_size = SZ_2G; max_slots = kvm_check_cap(KVM_CAP_NR_MEMSLOTS); TEST_ASSERT(max_slots > first_slot, "KVM is broken"); /* All KVM MMUs should be able to survive a 128gb guest. */ - max_mem = 128 * size_1gb; + max_mem = 128ull * SZ_1G; calc_default_nr_vcpus(); while ((opt = getopt(argc, argv, "c:h:m:s:H")) != -1) { switch (opt) { case 'c': - nr_vcpus = atoi(optarg); - TEST_ASSERT(nr_vcpus > 0, "number of vcpus must be >0"); + nr_vcpus = atoi_positive("Number of vCPUs", optarg); break; case 'm': - max_mem = atoi(optarg) * size_1gb; - TEST_ASSERT(max_mem > 0, "memory size must be >0"); + max_mem = 1ull * atoi_positive("Memory size", optarg) * SZ_1G; break; case 's': - slot_size = atoi(optarg) * size_1gb; - TEST_ASSERT(slot_size > 0, "slot size must be >0"); + slot_size = 1ull * atoi_positive("Slot size", optarg) * SZ_1G; break; case 'H': hugepages = true; @@ -245,7 +242,7 @@ int main(int argc, char *argv[]) #ifdef __x86_64__ /* Identity map memory in the guest using 1gb pages. */ - for (i = 0; i < slot_size; i += size_1gb) + for (i = 0; i < slot_size; i += SZ_1G) __virt_pg_map(vm, gpa + i, gpa + i, PG_LEVEL_1G); #else for (i = 0; i < slot_size; i += vm->page_size) @@ -260,7 +257,7 @@ int main(int argc, char *argv[]) vcpus = NULL; pr_info("Running with %lugb of guest memory and %u vCPUs\n", - (gpa - start_gpa) / size_1gb, nr_vcpus); + (gpa - start_gpa) / SZ_1G, nr_vcpus); rendezvous_with_vcpus(&time_start, "spawning"); rendezvous_with_vcpus(&time_run1, "run 1"); diff --git a/tools/testing/selftests/kvm/memslot_modification_stress_test.c b/tools/testing/selftests/kvm/memslot_modification_stress_test.c index bb1d17a1171b..9855c41ca811 100644 --- a/tools/testing/selftests/kvm/memslot_modification_stress_test.c +++ b/tools/testing/selftests/kvm/memslot_modification_stress_test.c @@ -21,7 +21,7 @@ #include <linux/bitops.h> #include <linux/userfaultfd.h> -#include "perf_test_util.h" +#include "memstress.h" #include "processor.h" #include "test_util.h" #include "guest_modes.h" @@ -34,9 +34,7 @@ static int nr_vcpus = 1; static uint64_t guest_percpu_mem_size = DEFAULT_PER_VCPU_MEM_SIZE; -static bool run_vcpus = true; - -static void vcpu_worker(struct perf_test_vcpu_args *vcpu_args) +static void vcpu_worker(struct memstress_vcpu_args *vcpu_args) { struct kvm_vcpu *vcpu = vcpu_args->vcpu; struct kvm_run *run; @@ -45,7 +43,7 @@ static void vcpu_worker(struct perf_test_vcpu_args *vcpu_args) run = vcpu->run; /* Let the guest access its memory until a stop signal is received */ - while (READ_ONCE(run_vcpus)) { + while (!READ_ONCE(memstress_args.stop_vcpus)) { ret = _vcpu_run(vcpu); TEST_ASSERT(ret == 0, "vcpu_run failed: %d\n", ret); @@ -72,10 +70,10 @@ static void add_remove_memslot(struct kvm_vm *vm, useconds_t delay, int i; /* - * Add the dummy memslot just below the perf_test_util memslot, which is + * Add the dummy memslot just below the memstress memslot, which is * at the top of the guest physical address space. */ - gpa = perf_test_args.gpa - pages * vm->page_size; + gpa = memstress_args.gpa - pages * vm->page_size; for (i = 0; i < nr_modifications; i++) { usleep(delay); @@ -87,8 +85,8 @@ static void add_remove_memslot(struct kvm_vm *vm, useconds_t delay, } struct test_params { - useconds_t memslot_modification_delay; - uint64_t nr_memslot_modifications; + useconds_t delay; + uint64_t nr_iterations; bool partition_vcpu_memory_access; }; @@ -97,25 +95,22 @@ static void run_test(enum vm_guest_mode mode, void *arg) struct test_params *p = arg; struct kvm_vm *vm; - vm = perf_test_create_vm(mode, nr_vcpus, guest_percpu_mem_size, 1, + vm = memstress_create_vm(mode, nr_vcpus, guest_percpu_mem_size, 1, VM_MEM_SRC_ANONYMOUS, p->partition_vcpu_memory_access); pr_info("Finished creating vCPUs\n"); - perf_test_start_vcpu_threads(nr_vcpus, vcpu_worker); + memstress_start_vcpu_threads(nr_vcpus, vcpu_worker); pr_info("Started all vCPUs\n"); - add_remove_memslot(vm, p->memslot_modification_delay, - p->nr_memslot_modifications); - - run_vcpus = false; + add_remove_memslot(vm, p->delay, p->nr_iterations); - perf_test_join_vcpu_threads(nr_vcpus); + memstress_join_vcpu_threads(nr_vcpus); pr_info("All vCPU threads joined\n"); - perf_test_destroy_vm(vm); + memstress_destroy_vm(vm); } static void help(char *name) @@ -144,9 +139,8 @@ int main(int argc, char *argv[]) int max_vcpus = kvm_check_cap(KVM_CAP_MAX_VCPUS); int opt; struct test_params p = { - .memslot_modification_delay = 0, - .nr_memslot_modifications = - DEFAULT_MEMSLOT_MODIFICATION_ITERATIONS, + .delay = 0, + .nr_iterations = DEFAULT_MEMSLOT_MODIFICATION_ITERATIONS, .partition_vcpu_memory_access = true }; @@ -158,16 +152,14 @@ int main(int argc, char *argv[]) guest_modes_cmdline(optarg); break; case 'd': - p.memslot_modification_delay = strtoul(optarg, NULL, 0); - TEST_ASSERT(p.memslot_modification_delay >= 0, - "A negative delay is not supported."); + p.delay = atoi_non_negative("Delay", optarg); break; case 'b': guest_percpu_mem_size = parse_size(optarg); break; case 'v': - nr_vcpus = atoi(optarg); - TEST_ASSERT(nr_vcpus > 0 && nr_vcpus <= max_vcpus, + nr_vcpus = atoi_positive("Number of vCPUs", optarg); + TEST_ASSERT(nr_vcpus <= max_vcpus, "Invalid number of vcpus, must be between 1 and %d", max_vcpus); break; @@ -175,7 +167,7 @@ int main(int argc, char *argv[]) p.partition_vcpu_memory_access = false; break; case 'i': - p.nr_memslot_modifications = atoi(optarg); + p.nr_iterations = atoi_positive("Number of iterations", optarg); break; case 'h': default: diff --git a/tools/testing/selftests/kvm/memslot_perf_test.c b/tools/testing/selftests/kvm/memslot_perf_test.c index 44995446d942..e6587e193490 100644 --- a/tools/testing/selftests/kvm/memslot_perf_test.c +++ b/tools/testing/selftests/kvm/memslot_perf_test.c @@ -20,20 +20,20 @@ #include <unistd.h> #include <linux/compiler.h> +#include <linux/sizes.h> #include <test_util.h> #include <kvm_util.h> #include <processor.h> -#define MEM_SIZE ((512U << 20) + 4096) -#define MEM_SIZE_PAGES (MEM_SIZE / 4096) -#define MEM_GPA 0x10000000UL +#define MEM_EXTRA_SIZE SZ_64K + +#define MEM_SIZE (SZ_512M + MEM_EXTRA_SIZE) +#define MEM_GPA SZ_256M #define MEM_AUX_GPA MEM_GPA #define MEM_SYNC_GPA MEM_AUX_GPA -#define MEM_TEST_GPA (MEM_AUX_GPA + 4096) -#define MEM_TEST_SIZE (MEM_SIZE - 4096) -static_assert(MEM_SIZE % 4096 == 0, "invalid mem size"); -static_assert(MEM_TEST_SIZE % 4096 == 0, "invalid mem test size"); +#define MEM_TEST_GPA (MEM_AUX_GPA + MEM_EXTRA_SIZE) +#define MEM_TEST_SIZE (MEM_SIZE - MEM_EXTRA_SIZE) /* * 32 MiB is max size that gets well over 100 iterations on 509 slots. @@ -41,44 +41,38 @@ static_assert(MEM_TEST_SIZE % 4096 == 0, "invalid mem test size"); * 8194 slots in use can then be tested (although with slightly * limited resolution). */ -#define MEM_SIZE_MAP ((32U << 20) + 4096) -#define MEM_SIZE_MAP_PAGES (MEM_SIZE_MAP / 4096) -#define MEM_TEST_MAP_SIZE (MEM_SIZE_MAP - 4096) -#define MEM_TEST_MAP_SIZE_PAGES (MEM_TEST_MAP_SIZE / 4096) -static_assert(MEM_SIZE_MAP % 4096 == 0, "invalid map test region size"); -static_assert(MEM_TEST_MAP_SIZE % 4096 == 0, "invalid map test region size"); -static_assert(MEM_TEST_MAP_SIZE_PAGES % 2 == 0, "invalid map test region size"); -static_assert(MEM_TEST_MAP_SIZE_PAGES > 2, "invalid map test region size"); +#define MEM_SIZE_MAP (SZ_32M + MEM_EXTRA_SIZE) +#define MEM_TEST_MAP_SIZE (MEM_SIZE_MAP - MEM_EXTRA_SIZE) /* * 128 MiB is min size that fills 32k slots with at least one page in each * while at the same time gets 100+ iterations in such test + * + * 2 MiB chunk size like a typical huge page */ -#define MEM_TEST_UNMAP_SIZE (128U << 20) -#define MEM_TEST_UNMAP_SIZE_PAGES (MEM_TEST_UNMAP_SIZE / 4096) -/* 2 MiB chunk size like a typical huge page */ -#define MEM_TEST_UNMAP_CHUNK_PAGES (2U << (20 - 12)) -static_assert(MEM_TEST_UNMAP_SIZE <= MEM_TEST_SIZE, - "invalid unmap test region size"); -static_assert(MEM_TEST_UNMAP_SIZE % 4096 == 0, - "invalid unmap test region size"); -static_assert(MEM_TEST_UNMAP_SIZE_PAGES % - (2 * MEM_TEST_UNMAP_CHUNK_PAGES) == 0, - "invalid unmap test region size"); +#define MEM_TEST_UNMAP_SIZE SZ_128M +#define MEM_TEST_UNMAP_CHUNK_SIZE SZ_2M /* * For the move active test the middle of the test area is placed on * a memslot boundary: half lies in the memslot being moved, half in * other memslot(s). * - * When running this test with 32k memslots (32764, really) each memslot - * contains 4 pages. - * The last one additionally contains the remaining 21 pages of memory, - * for the total size of 25 pages. - * Hence, the maximum size here is 50 pages. + * We have different number of memory slots, excluding the reserved + * memory slot 0, on various architectures and configurations. The + * memory size in this test is calculated by picking the maximal + * last memory slot's memory size, with alignment to the largest + * supported page size (64KB). In this way, the selected memory + * size for this test is compatible with test_memslot_move_prepare(). + * + * architecture slots memory-per-slot memory-on-last-slot + * -------------------------------------------------------------- + * x86-4KB 32763 16KB 160KB + * arm64-4KB 32766 16KB 112KB + * arm64-16KB 32766 16KB 112KB + * arm64-64KB 8192 64KB 128KB */ -#define MEM_TEST_MOVE_SIZE_PAGES (50) -#define MEM_TEST_MOVE_SIZE (MEM_TEST_MOVE_SIZE_PAGES * 4096) +#define MEM_TEST_MOVE_SIZE (3 * SZ_64K) #define MEM_TEST_MOVE_GPA_DEST (MEM_GPA + MEM_SIZE) static_assert(MEM_TEST_MOVE_SIZE <= MEM_TEST_SIZE, "invalid move test region size"); @@ -100,6 +94,7 @@ struct vm_data { }; struct sync_area { + uint32_t guest_page_size; atomic_bool start_flag; atomic_bool exit_flag; atomic_bool sync_flag; @@ -192,14 +187,15 @@ static void *vm_gpa2hva(struct vm_data *data, uint64_t gpa, uint64_t *rempages) uint64_t gpage, pgoffs; uint32_t slot, slotoffs; void *base; + uint32_t guest_page_size = data->vm->page_size; TEST_ASSERT(gpa >= MEM_GPA, "Too low gpa to translate"); - TEST_ASSERT(gpa < MEM_GPA + data->npages * 4096, + TEST_ASSERT(gpa < MEM_GPA + data->npages * guest_page_size, "Too high gpa to translate"); gpa -= MEM_GPA; - gpage = gpa / 4096; - pgoffs = gpa % 4096; + gpage = gpa / guest_page_size; + pgoffs = gpa % guest_page_size; slot = min(gpage / data->pages_per_slot, (uint64_t)data->nslots - 1); slotoffs = gpage - (slot * data->pages_per_slot); @@ -217,14 +213,16 @@ static void *vm_gpa2hva(struct vm_data *data, uint64_t gpa, uint64_t *rempages) } base = data->hva_slots[slot]; - return (uint8_t *)base + slotoffs * 4096 + pgoffs; + return (uint8_t *)base + slotoffs * guest_page_size + pgoffs; } static uint64_t vm_slot2gpa(struct vm_data *data, uint32_t slot) { + uint32_t guest_page_size = data->vm->page_size; + TEST_ASSERT(slot < data->nslots, "Too high slot number"); - return MEM_GPA + slot * data->pages_per_slot * 4096; + return MEM_GPA + slot * data->pages_per_slot * guest_page_size; } static struct vm_data *alloc_vm(void) @@ -241,82 +239,114 @@ static struct vm_data *alloc_vm(void) return data; } +static bool check_slot_pages(uint32_t host_page_size, uint32_t guest_page_size, + uint64_t pages_per_slot, uint64_t rempages) +{ + if (!pages_per_slot) + return false; + + if ((pages_per_slot * guest_page_size) % host_page_size) + return false; + + if ((rempages * guest_page_size) % host_page_size) + return false; + + return true; +} + + +static uint64_t get_max_slots(struct vm_data *data, uint32_t host_page_size) +{ + uint32_t guest_page_size = data->vm->page_size; + uint64_t mempages, pages_per_slot, rempages; + uint64_t slots; + + mempages = data->npages; + slots = data->nslots; + while (--slots > 1) { + pages_per_slot = mempages / slots; + if (!pages_per_slot) + continue; + + rempages = mempages % pages_per_slot; + if (check_slot_pages(host_page_size, guest_page_size, + pages_per_slot, rempages)) + return slots + 1; /* slot 0 is reserved */ + } + + return 0; +} + static bool prepare_vm(struct vm_data *data, int nslots, uint64_t *maxslots, - void *guest_code, uint64_t mempages, + void *guest_code, uint64_t mem_size, struct timespec *slot_runtime) { - uint32_t max_mem_slots; - uint64_t rempages; + uint64_t mempages, rempages; uint64_t guest_addr; - uint32_t slot; + uint32_t slot, host_page_size, guest_page_size; struct timespec tstart; struct sync_area *sync; - max_mem_slots = kvm_check_cap(KVM_CAP_NR_MEMSLOTS); - TEST_ASSERT(max_mem_slots > 1, - "KVM_CAP_NR_MEMSLOTS should be greater than 1"); - TEST_ASSERT(nslots > 1 || nslots == -1, - "Slot count cap should be greater than 1"); - if (nslots != -1) - max_mem_slots = min(max_mem_slots, (uint32_t)nslots); - pr_info_v("Allowed number of memory slots: %"PRIu32"\n", max_mem_slots); + host_page_size = getpagesize(); + guest_page_size = vm_guest_mode_params[VM_MODE_DEFAULT].page_size; + mempages = mem_size / guest_page_size; - TEST_ASSERT(mempages > 1, - "Can't test without any memory"); + data->vm = __vm_create_with_one_vcpu(&data->vcpu, mempages, guest_code); + TEST_ASSERT(data->vm->page_size == guest_page_size, "Invalid VM page size"); data->npages = mempages; - data->nslots = max_mem_slots - 1; - data->pages_per_slot = mempages / data->nslots; - if (!data->pages_per_slot) { - *maxslots = mempages + 1; + TEST_ASSERT(data->npages > 1, "Can't test without any memory"); + data->nslots = nslots; + data->pages_per_slot = data->npages / data->nslots; + rempages = data->npages % data->nslots; + if (!check_slot_pages(host_page_size, guest_page_size, + data->pages_per_slot, rempages)) { + *maxslots = get_max_slots(data, host_page_size); return false; } - rempages = mempages % data->nslots; data->hva_slots = malloc(sizeof(*data->hva_slots) * data->nslots); TEST_ASSERT(data->hva_slots, "malloc() fail"); data->vm = __vm_create_with_one_vcpu(&data->vcpu, mempages, guest_code); - ucall_init(data->vm, NULL); pr_info_v("Adding slots 1..%i, each slot with %"PRIu64" pages + %"PRIu64" extra pages last\n", - max_mem_slots - 1, data->pages_per_slot, rempages); + data->nslots, data->pages_per_slot, rempages); clock_gettime(CLOCK_MONOTONIC, &tstart); - for (slot = 1, guest_addr = MEM_GPA; slot < max_mem_slots; slot++) { + for (slot = 1, guest_addr = MEM_GPA; slot <= data->nslots; slot++) { uint64_t npages; npages = data->pages_per_slot; - if (slot == max_mem_slots - 1) + if (slot == data->nslots) npages += rempages; vm_userspace_mem_region_add(data->vm, VM_MEM_SRC_ANONYMOUS, guest_addr, slot, npages, 0); - guest_addr += npages * 4096; + guest_addr += npages * guest_page_size; } *slot_runtime = timespec_elapsed(tstart); - for (slot = 0, guest_addr = MEM_GPA; slot < max_mem_slots - 1; slot++) { + for (slot = 1, guest_addr = MEM_GPA; slot <= data->nslots; slot++) { uint64_t npages; uint64_t gpa; npages = data->pages_per_slot; - if (slot == max_mem_slots - 2) + if (slot == data->nslots) npages += rempages; - gpa = vm_phy_pages_alloc(data->vm, npages, guest_addr, - slot + 1); + gpa = vm_phy_pages_alloc(data->vm, npages, guest_addr, slot); TEST_ASSERT(gpa == guest_addr, "vm_phy_pages_alloc() failed\n"); - data->hva_slots[slot] = addr_gpa2hva(data->vm, guest_addr); - memset(data->hva_slots[slot], 0, npages * 4096); + data->hva_slots[slot - 1] = addr_gpa2hva(data->vm, guest_addr); + memset(data->hva_slots[slot - 1], 0, npages * guest_page_size); - guest_addr += npages * 4096; + guest_addr += npages * guest_page_size; } - virt_map(data->vm, MEM_GPA, MEM_GPA, mempages); + virt_map(data->vm, MEM_GPA, MEM_GPA, data->npages); sync = (typeof(sync))vm_gpa2hva(data, MEM_SYNC_GPA, NULL); atomic_init(&sync->start_flag, false); @@ -415,6 +445,7 @@ static bool guest_perform_sync(void) static void guest_code_test_memslot_move(void) { struct sync_area *sync = (typeof(sync))MEM_SYNC_GPA; + uint32_t page_size = (typeof(page_size))READ_ONCE(sync->guest_page_size); uintptr_t base = (typeof(base))READ_ONCE(sync->move_area_ptr); GUEST_SYNC(0); @@ -425,7 +456,7 @@ static void guest_code_test_memslot_move(void) uintptr_t ptr; for (ptr = base; ptr < base + MEM_TEST_MOVE_SIZE; - ptr += 4096) + ptr += page_size) *(uint64_t *)ptr = MEM_TEST_VAL_1; /* @@ -443,6 +474,7 @@ static void guest_code_test_memslot_move(void) static void guest_code_test_memslot_map(void) { struct sync_area *sync = (typeof(sync))MEM_SYNC_GPA; + uint32_t page_size = (typeof(page_size))READ_ONCE(sync->guest_page_size); GUEST_SYNC(0); @@ -452,14 +484,16 @@ static void guest_code_test_memslot_map(void) uintptr_t ptr; for (ptr = MEM_TEST_GPA; - ptr < MEM_TEST_GPA + MEM_TEST_MAP_SIZE / 2; ptr += 4096) + ptr < MEM_TEST_GPA + MEM_TEST_MAP_SIZE / 2; + ptr += page_size) *(uint64_t *)ptr = MEM_TEST_VAL_1; if (!guest_perform_sync()) break; for (ptr = MEM_TEST_GPA + MEM_TEST_MAP_SIZE / 2; - ptr < MEM_TEST_GPA + MEM_TEST_MAP_SIZE; ptr += 4096) + ptr < MEM_TEST_GPA + MEM_TEST_MAP_SIZE; + ptr += page_size) *(uint64_t *)ptr = MEM_TEST_VAL_2; if (!guest_perform_sync()) @@ -506,6 +540,9 @@ static void guest_code_test_memslot_unmap(void) static void guest_code_test_memslot_rw(void) { + struct sync_area *sync = (typeof(sync))MEM_SYNC_GPA; + uint32_t page_size = (typeof(page_size))READ_ONCE(sync->guest_page_size); + GUEST_SYNC(0); guest_spin_until_start(); @@ -514,14 +551,14 @@ static void guest_code_test_memslot_rw(void) uintptr_t ptr; for (ptr = MEM_TEST_GPA; - ptr < MEM_TEST_GPA + MEM_TEST_SIZE; ptr += 4096) + ptr < MEM_TEST_GPA + MEM_TEST_SIZE; ptr += page_size) *(uint64_t *)ptr = MEM_TEST_VAL_1; if (!guest_perform_sync()) break; - for (ptr = MEM_TEST_GPA + 4096 / 2; - ptr < MEM_TEST_GPA + MEM_TEST_SIZE; ptr += 4096) { + for (ptr = MEM_TEST_GPA + page_size / 2; + ptr < MEM_TEST_GPA + MEM_TEST_SIZE; ptr += page_size) { uint64_t val = *(uint64_t *)ptr; GUEST_ASSERT_1(val == MEM_TEST_VAL_2, val); @@ -539,6 +576,7 @@ static bool test_memslot_move_prepare(struct vm_data *data, struct sync_area *sync, uint64_t *maxslots, bool isactive) { + uint32_t guest_page_size = data->vm->page_size; uint64_t movesrcgpa, movetestgpa; movesrcgpa = vm_slot2gpa(data, data->nslots - 1); @@ -547,7 +585,7 @@ static bool test_memslot_move_prepare(struct vm_data *data, uint64_t lastpages; vm_gpa2hva(data, movesrcgpa, &lastpages); - if (lastpages < MEM_TEST_MOVE_SIZE_PAGES / 2) { + if (lastpages * guest_page_size < MEM_TEST_MOVE_SIZE / 2) { *maxslots = 0; return false; } @@ -593,8 +631,9 @@ static void test_memslot_do_unmap(struct vm_data *data, uint64_t offsp, uint64_t count) { uint64_t gpa, ctr; + uint32_t guest_page_size = data->vm->page_size; - for (gpa = MEM_TEST_GPA + offsp * 4096, ctr = 0; ctr < count; ) { + for (gpa = MEM_TEST_GPA + offsp * guest_page_size, ctr = 0; ctr < count; ) { uint64_t npages; void *hva; int ret; @@ -602,12 +641,12 @@ static void test_memslot_do_unmap(struct vm_data *data, hva = vm_gpa2hva(data, gpa, &npages); TEST_ASSERT(npages, "Empty memory slot at gptr 0x%"PRIx64, gpa); npages = min(npages, count - ctr); - ret = madvise(hva, npages * 4096, MADV_DONTNEED); + ret = madvise(hva, npages * guest_page_size, MADV_DONTNEED); TEST_ASSERT(!ret, "madvise(%p, MADV_DONTNEED) on VM memory should not fail for gptr 0x%"PRIx64, hva, gpa); ctr += npages; - gpa += npages * 4096; + gpa += npages * guest_page_size; } TEST_ASSERT(ctr == count, "madvise(MADV_DONTNEED) should exactly cover all of the requested area"); @@ -618,11 +657,12 @@ static void test_memslot_map_unmap_check(struct vm_data *data, { uint64_t gpa; uint64_t *val; + uint32_t guest_page_size = data->vm->page_size; if (!map_unmap_verify) return; - gpa = MEM_TEST_GPA + offsp * 4096; + gpa = MEM_TEST_GPA + offsp * guest_page_size; val = (typeof(val))vm_gpa2hva(data, gpa, NULL); TEST_ASSERT(*val == valexp, "Guest written values should read back correctly before unmap (%"PRIu64" vs %"PRIu64" @ %"PRIx64")", @@ -632,12 +672,14 @@ static void test_memslot_map_unmap_check(struct vm_data *data, static void test_memslot_map_loop(struct vm_data *data, struct sync_area *sync) { + uint32_t guest_page_size = data->vm->page_size; + uint64_t guest_pages = MEM_TEST_MAP_SIZE / guest_page_size; + /* * Unmap the second half of the test area while guest writes to (maps) * the first half. */ - test_memslot_do_unmap(data, MEM_TEST_MAP_SIZE_PAGES / 2, - MEM_TEST_MAP_SIZE_PAGES / 2); + test_memslot_do_unmap(data, guest_pages / 2, guest_pages / 2); /* * Wait for the guest to finish writing the first half of the test @@ -648,10 +690,8 @@ static void test_memslot_map_loop(struct vm_data *data, struct sync_area *sync) */ host_perform_sync(sync); test_memslot_map_unmap_check(data, 0, MEM_TEST_VAL_1); - test_memslot_map_unmap_check(data, - MEM_TEST_MAP_SIZE_PAGES / 2 - 1, - MEM_TEST_VAL_1); - test_memslot_do_unmap(data, 0, MEM_TEST_MAP_SIZE_PAGES / 2); + test_memslot_map_unmap_check(data, guest_pages / 2 - 1, MEM_TEST_VAL_1); + test_memslot_do_unmap(data, 0, guest_pages / 2); /* @@ -664,16 +704,16 @@ static void test_memslot_map_loop(struct vm_data *data, struct sync_area *sync) * the test area. */ host_perform_sync(sync); - test_memslot_map_unmap_check(data, MEM_TEST_MAP_SIZE_PAGES / 2, - MEM_TEST_VAL_2); - test_memslot_map_unmap_check(data, MEM_TEST_MAP_SIZE_PAGES - 1, - MEM_TEST_VAL_2); + test_memslot_map_unmap_check(data, guest_pages / 2, MEM_TEST_VAL_2); + test_memslot_map_unmap_check(data, guest_pages - 1, MEM_TEST_VAL_2); } static void test_memslot_unmap_loop_common(struct vm_data *data, struct sync_area *sync, uint64_t chunk) { + uint32_t guest_page_size = data->vm->page_size; + uint64_t guest_pages = MEM_TEST_UNMAP_SIZE / guest_page_size; uint64_t ctr; /* @@ -685,42 +725,49 @@ static void test_memslot_unmap_loop_common(struct vm_data *data, */ host_perform_sync(sync); test_memslot_map_unmap_check(data, 0, MEM_TEST_VAL_1); - for (ctr = 0; ctr < MEM_TEST_UNMAP_SIZE_PAGES / 2; ctr += chunk) + for (ctr = 0; ctr < guest_pages / 2; ctr += chunk) test_memslot_do_unmap(data, ctr, chunk); /* Likewise, but for the opposite host / guest areas */ host_perform_sync(sync); - test_memslot_map_unmap_check(data, MEM_TEST_UNMAP_SIZE_PAGES / 2, - MEM_TEST_VAL_2); - for (ctr = MEM_TEST_UNMAP_SIZE_PAGES / 2; - ctr < MEM_TEST_UNMAP_SIZE_PAGES; ctr += chunk) + test_memslot_map_unmap_check(data, guest_pages / 2, MEM_TEST_VAL_2); + for (ctr = guest_pages / 2; ctr < guest_pages; ctr += chunk) test_memslot_do_unmap(data, ctr, chunk); } static void test_memslot_unmap_loop(struct vm_data *data, struct sync_area *sync) { - test_memslot_unmap_loop_common(data, sync, 1); + uint32_t host_page_size = getpagesize(); + uint32_t guest_page_size = data->vm->page_size; + uint64_t guest_chunk_pages = guest_page_size >= host_page_size ? + 1 : host_page_size / guest_page_size; + + test_memslot_unmap_loop_common(data, sync, guest_chunk_pages); } static void test_memslot_unmap_loop_chunked(struct vm_data *data, struct sync_area *sync) { - test_memslot_unmap_loop_common(data, sync, MEM_TEST_UNMAP_CHUNK_PAGES); + uint32_t guest_page_size = data->vm->page_size; + uint64_t guest_chunk_pages = MEM_TEST_UNMAP_CHUNK_SIZE / guest_page_size; + + test_memslot_unmap_loop_common(data, sync, guest_chunk_pages); } static void test_memslot_rw_loop(struct vm_data *data, struct sync_area *sync) { uint64_t gptr; + uint32_t guest_page_size = data->vm->page_size; - for (gptr = MEM_TEST_GPA + 4096 / 2; - gptr < MEM_TEST_GPA + MEM_TEST_SIZE; gptr += 4096) + for (gptr = MEM_TEST_GPA + guest_page_size / 2; + gptr < MEM_TEST_GPA + MEM_TEST_SIZE; gptr += guest_page_size) *(uint64_t *)vm_gpa2hva(data, gptr, NULL) = MEM_TEST_VAL_2; host_perform_sync(sync); for (gptr = MEM_TEST_GPA; - gptr < MEM_TEST_GPA + MEM_TEST_SIZE; gptr += 4096) { + gptr < MEM_TEST_GPA + MEM_TEST_SIZE; gptr += guest_page_size) { uint64_t *vptr = (typeof(vptr))vm_gpa2hva(data, gptr, NULL); uint64_t val = *vptr; @@ -749,7 +796,7 @@ static bool test_execute(int nslots, uint64_t *maxslots, struct timespec *slot_runtime, struct timespec *guest_runtime) { - uint64_t mem_size = tdata->mem_size ? : MEM_SIZE_PAGES; + uint64_t mem_size = tdata->mem_size ? : MEM_SIZE; struct vm_data *data; struct sync_area *sync; struct timespec tstart; @@ -764,6 +811,7 @@ static bool test_execute(int nslots, uint64_t *maxslots, sync = (typeof(sync))vm_gpa2hva(data, MEM_SYNC_GPA, NULL); + sync->guest_page_size = data->vm->page_size; if (tdata->prepare && !tdata->prepare(data, sync, maxslots)) { ret = false; @@ -797,19 +845,19 @@ exit_free: static const struct test_data tests[] = { { .name = "map", - .mem_size = MEM_SIZE_MAP_PAGES, + .mem_size = MEM_SIZE_MAP, .guest_code = guest_code_test_memslot_map, .loop = test_memslot_map_loop, }, { .name = "unmap", - .mem_size = MEM_TEST_UNMAP_SIZE_PAGES + 1, + .mem_size = MEM_TEST_UNMAP_SIZE + MEM_EXTRA_SIZE, .guest_code = guest_code_test_memslot_unmap, .loop = test_memslot_unmap_loop, }, { .name = "unmap chunked", - .mem_size = MEM_TEST_UNMAP_SIZE_PAGES + 1, + .mem_size = MEM_TEST_UNMAP_SIZE + MEM_EXTRA_SIZE, .guest_code = guest_code_test_memslot_unmap, .loop = test_memslot_unmap_loop_chunked, }, @@ -867,9 +915,46 @@ static void help(char *name, struct test_args *targs) pr_info("%d: %s\n", ctr, tests[ctr].name); } +static bool check_memory_sizes(void) +{ + uint32_t host_page_size = getpagesize(); + uint32_t guest_page_size = vm_guest_mode_params[VM_MODE_DEFAULT].page_size; + + if (host_page_size > SZ_64K || guest_page_size > SZ_64K) { + pr_info("Unsupported page size on host (0x%x) or guest (0x%x)\n", + host_page_size, guest_page_size); + return false; + } + + if (MEM_SIZE % guest_page_size || + MEM_TEST_SIZE % guest_page_size) { + pr_info("invalid MEM_SIZE or MEM_TEST_SIZE\n"); + return false; + } + + if (MEM_SIZE_MAP % guest_page_size || + MEM_TEST_MAP_SIZE % guest_page_size || + (MEM_TEST_MAP_SIZE / guest_page_size) <= 2 || + (MEM_TEST_MAP_SIZE / guest_page_size) % 2) { + pr_info("invalid MEM_SIZE_MAP or MEM_TEST_MAP_SIZE\n"); + return false; + } + + if (MEM_TEST_UNMAP_SIZE > MEM_TEST_SIZE || + MEM_TEST_UNMAP_SIZE % guest_page_size || + (MEM_TEST_UNMAP_SIZE / guest_page_size) % + (2 * MEM_TEST_UNMAP_CHUNK_SIZE / guest_page_size)) { + pr_info("invalid MEM_TEST_UNMAP_SIZE or MEM_TEST_UNMAP_CHUNK_SIZE\n"); + return false; + } + + return true; +} + static bool parse_args(int argc, char *argv[], struct test_args *targs) { + uint32_t max_mem_slots; int opt; while ((opt = getopt(argc, argv, "hvds:f:e:l:r:")) != -1) { @@ -885,40 +970,28 @@ static bool parse_args(int argc, char *argv[], map_unmap_verify = true; break; case 's': - targs->nslots = atoi(optarg); - if (targs->nslots <= 0 && targs->nslots != -1) { - pr_info("Slot count cap has to be positive or -1 for no cap\n"); + targs->nslots = atoi_paranoid(optarg); + if (targs->nslots <= 1 && targs->nslots != -1) { + pr_info("Slot count cap must be larger than 1 or -1 for no cap\n"); return false; } break; case 'f': - targs->tfirst = atoi(optarg); - if (targs->tfirst < 0) { - pr_info("First test to run has to be non-negative\n"); - return false; - } + targs->tfirst = atoi_non_negative("First test", optarg); break; case 'e': - targs->tlast = atoi(optarg); - if (targs->tlast < 0 || targs->tlast >= NTESTS) { + targs->tlast = atoi_non_negative("Last test", optarg); + if (targs->tlast >= NTESTS) { pr_info("Last test to run has to be non-negative and less than %zu\n", NTESTS); return false; } break; case 'l': - targs->seconds = atoi(optarg); - if (targs->seconds < 0) { - pr_info("Test length in seconds has to be non-negative\n"); - return false; - } + targs->seconds = atoi_non_negative("Test length", optarg); break; case 'r': - targs->runs = atoi(optarg); - if (targs->runs <= 0) { - pr_info("Runs per test has to be positive\n"); - return false; - } + targs->runs = atoi_positive("Runs per test", optarg); break; } } @@ -933,6 +1006,21 @@ static bool parse_args(int argc, char *argv[], return false; } + max_mem_slots = kvm_check_cap(KVM_CAP_NR_MEMSLOTS); + if (max_mem_slots <= 1) { + pr_info("KVM_CAP_NR_MEMSLOTS should be greater than 1\n"); + return false; + } + + /* Memory slot 0 is reserved */ + if (targs->nslots == -1) + targs->nslots = max_mem_slots - 1; + else + targs->nslots = min_t(int, targs->nslots, max_mem_slots) - 1; + + pr_info_v("Allowed Number of memory slots: %"PRIu32"\n", + targs->nslots + 1); + return true; } @@ -1007,8 +1095,8 @@ int main(int argc, char *argv[]) struct test_result rbestslottime; int tctr; - /* Tell stdout not to buffer its content */ - setbuf(stdout, NULL); + if (!check_memory_sizes()) + return -1; if (!parse_args(argc, argv, &targs)) return -1; diff --git a/tools/testing/selftests/kvm/rseq_test.c b/tools/testing/selftests/kvm/rseq_test.c index 6f88da7e60be..f74e76d03b7e 100644 --- a/tools/testing/selftests/kvm/rseq_test.c +++ b/tools/testing/selftests/kvm/rseq_test.c @@ -41,18 +41,6 @@ static void guest_code(void) GUEST_SYNC(0); } -/* - * We have to perform direct system call for getcpu() because it's - * not available until glic 2.29. - */ -static void sys_getcpu(unsigned *cpu) -{ - int r; - - r = syscall(__NR_getcpu, cpu, NULL, NULL); - TEST_ASSERT(!r, "getcpu failed, errno = %d (%s)", errno, strerror(errno)); -} - static int next_cpu(int cpu) { /* @@ -205,9 +193,6 @@ int main(int argc, char *argv[]) struct kvm_vcpu *vcpu; u32 cpu, rseq_cpu; - /* Tell stdout not to buffer its content */ - setbuf(stdout, NULL); - r = sched_getaffinity(0, sizeof(possible_mask), &possible_mask); TEST_ASSERT(!r, "sched_getaffinity failed, errno = %d (%s)", errno, strerror(errno)); @@ -224,7 +209,6 @@ int main(int argc, char *argv[]) * CPU affinity. */ vm = vm_create_with_one_vcpu(&vcpu, guest_code); - ucall_init(vm, NULL); pthread_create(&migration_thread, NULL, migration_worker, (void *)(unsigned long)syscall(SYS_gettid)); @@ -253,7 +237,9 @@ int main(int argc, char *argv[]) * across the seq_cnt reads. */ smp_rmb(); - sys_getcpu(&cpu); + r = sys_getcpu(&cpu, NULL); + TEST_ASSERT(!r, "getcpu failed, errno = %d (%s)", + errno, strerror(errno)); rseq_cpu = rseq_current_cpu_raw(); smp_rmb(); } while (snapshot != atomic_read(&seq_cnt)); diff --git a/tools/testing/selftests/kvm/s390x/memop.c b/tools/testing/selftests/kvm/s390x/memop.c index 9113696d5178..3fd81e58f40c 100644 --- a/tools/testing/selftests/kvm/s390x/memop.c +++ b/tools/testing/selftests/kvm/s390x/memop.c @@ -760,8 +760,6 @@ int main(int argc, char *argv[]) TEST_REQUIRE(kvm_has_cap(KVM_CAP_S390_MEM_OP)); - setbuf(stdout, NULL); /* Tell stdout not to buffer its content */ - ksft_print_header(); ksft_set_plan(ARRAY_SIZE(testlist)); diff --git a/tools/testing/selftests/kvm/s390x/resets.c b/tools/testing/selftests/kvm/s390x/resets.c index 19486084eb30..e41e2cb8ffa9 100644 --- a/tools/testing/selftests/kvm/s390x/resets.c +++ b/tools/testing/selftests/kvm/s390x/resets.c @@ -296,8 +296,6 @@ int main(int argc, char *argv[]) bool has_s390_vcpu_resets = kvm_check_cap(KVM_CAP_S390_VCPU_RESETS); int idx; - setbuf(stdout, NULL); /* Tell stdout not to buffer its content */ - ksft_print_header(); ksft_set_plan(ARRAY_SIZE(testlist)); diff --git a/tools/testing/selftests/kvm/s390x/sync_regs_test.c b/tools/testing/selftests/kvm/s390x/sync_regs_test.c index 3fdb6e2598eb..2ddde41c44ba 100644 --- a/tools/testing/selftests/kvm/s390x/sync_regs_test.c +++ b/tools/testing/selftests/kvm/s390x/sync_regs_test.c @@ -231,9 +231,6 @@ int main(int argc, char *argv[]) TEST_REQUIRE(kvm_has_cap(KVM_CAP_SYNC_REGS)); - /* Tell stdout not to buffer its content */ - setbuf(stdout, NULL); - ksft_print_header(); ksft_set_plan(ARRAY_SIZE(testlist)); diff --git a/tools/testing/selftests/kvm/set_memory_region_test.c b/tools/testing/selftests/kvm/set_memory_region_test.c index 0d55f508d595..2ef1d1b72ce4 100644 --- a/tools/testing/selftests/kvm/set_memory_region_test.c +++ b/tools/testing/selftests/kvm/set_memory_region_test.c @@ -392,9 +392,6 @@ int main(int argc, char *argv[]) int i, loops; #endif - /* Tell stdout not to buffer its content */ - setbuf(stdout, NULL); - #ifdef __x86_64__ /* * FIXME: the zero-memslot test fails on aarch64 and s390x because @@ -407,7 +404,7 @@ int main(int argc, char *argv[]) #ifdef __x86_64__ if (argc > 1) - loops = atoi(argv[1]); + loops = atoi_positive("Number of iterations", argv[1]); else loops = 10; diff --git a/tools/testing/selftests/kvm/steal_time.c b/tools/testing/selftests/kvm/steal_time.c index db8967f1a17b..c87f38712073 100644 --- a/tools/testing/selftests/kvm/steal_time.c +++ b/tools/testing/selftests/kvm/steal_time.c @@ -266,7 +266,6 @@ int main(int ac, char **av) gpages = vm_calc_num_guest_pages(VM_MODE_DEFAULT, STEAL_TIME_SIZE * NR_VCPUS); vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, ST_GPA_BASE, 1, gpages, 0); virt_map(vm, ST_GPA_BASE, ST_GPA_BASE, gpages); - ucall_init(vm, NULL); TEST_REQUIRE(is_steal_time_supported(vcpus[0])); diff --git a/tools/testing/selftests/kvm/system_counter_offset_test.c b/tools/testing/selftests/kvm/system_counter_offset_test.c index 1c274933912b..7f5b330b6a1b 100644 --- a/tools/testing/selftests/kvm/system_counter_offset_test.c +++ b/tools/testing/selftests/kvm/system_counter_offset_test.c @@ -121,7 +121,6 @@ int main(void) vm = vm_create_with_one_vcpu(&vcpu, guest_main); check_preconditions(vcpu); - ucall_init(vm, NULL); enter_guest(vcpu); kvm_vm_free(vm); diff --git a/tools/testing/selftests/kvm/x86_64/amx_test.c b/tools/testing/selftests/kvm/x86_64/amx_test.c index dadcbad10a1d..bd72c6eb3b67 100644 --- a/tools/testing/selftests/kvm/x86_64/amx_test.c +++ b/tools/testing/selftests/kvm/x86_64/amx_test.c @@ -39,11 +39,6 @@ #define XFEATURE_MASK_XTILEDATA (1 << XFEATURE_XTILEDATA) #define XFEATURE_MASK_XTILE (XFEATURE_MASK_XTILECFG | XFEATURE_MASK_XTILEDATA) -#define TILE_CPUID 0x1d -#define XSTATE_CPUID 0xd -#define TILE_PALETTE_CPUID_SUBLEAVE 0x1 -#define XSTATE_USER_STATE_SUBLEAVE 0x0 - #define XSAVE_HDR_OFFSET 512 struct xsave_data { @@ -129,71 +124,26 @@ static bool check_xsave_supports_xtile(void) return __xgetbv(0) & XFEATURE_MASK_XTILE; } -static bool enum_xtile_config(void) +static void check_xtile_info(void) { - u32 eax, ebx, ecx, edx; - - __cpuid(TILE_CPUID, TILE_PALETTE_CPUID_SUBLEAVE, &eax, &ebx, &ecx, &edx); - if (!eax || !ebx || !ecx) - return false; - - xtile.max_names = ebx >> 16; - if (xtile.max_names < NUM_TILES) - return false; - - xtile.bytes_per_tile = eax >> 16; - if (xtile.bytes_per_tile < TILE_SIZE) - return false; - - xtile.bytes_per_row = ebx; - xtile.max_rows = ecx; - - return true; -} - -static bool enum_xsave_tile(void) -{ - u32 eax, ebx, ecx, edx; - - __cpuid(XSTATE_CPUID, XFEATURE_XTILEDATA, &eax, &ebx, &ecx, &edx); - if (!eax || !ebx) - return false; - - xtile.xsave_offset = ebx; - xtile.xsave_size = eax; - - return true; -} - -static bool check_xsave_size(void) -{ - u32 eax, ebx, ecx, edx; - bool valid = false; - - __cpuid(XSTATE_CPUID, XSTATE_USER_STATE_SUBLEAVE, &eax, &ebx, &ecx, &edx); - if (ebx && ebx <= XSAVE_SIZE) - valid = true; - - return valid; -} - -static bool check_xtile_info(void) -{ - bool ret = false; - - if (!check_xsave_size()) - return ret; + GUEST_ASSERT(this_cpu_has_p(X86_PROPERTY_XSTATE_MAX_SIZE_XCR0)); + GUEST_ASSERT(this_cpu_property(X86_PROPERTY_XSTATE_MAX_SIZE_XCR0) <= XSAVE_SIZE); - if (!enum_xsave_tile()) - return ret; - - if (!enum_xtile_config()) - return ret; - - if (sizeof(struct tile_data) >= xtile.xsave_size) - ret = true; + xtile.xsave_offset = this_cpu_property(X86_PROPERTY_XSTATE_TILE_OFFSET); + GUEST_ASSERT(xtile.xsave_offset == 2816); + xtile.xsave_size = this_cpu_property(X86_PROPERTY_XSTATE_TILE_SIZE); + GUEST_ASSERT(xtile.xsave_size == 8192); + GUEST_ASSERT(sizeof(struct tile_data) >= xtile.xsave_size); - return ret; + GUEST_ASSERT(this_cpu_has_p(X86_PROPERTY_AMX_NR_TILE_REGS)); + xtile.max_names = this_cpu_property(X86_PROPERTY_AMX_NR_TILE_REGS); + GUEST_ASSERT(xtile.max_names == 8); + xtile.bytes_per_tile = this_cpu_property(X86_PROPERTY_AMX_BYTES_PER_TILE); + GUEST_ASSERT(xtile.bytes_per_tile == 1024); + xtile.bytes_per_row = this_cpu_property(X86_PROPERTY_AMX_BYTES_PER_ROW); + GUEST_ASSERT(xtile.bytes_per_row == 64); + xtile.max_rows = this_cpu_property(X86_PROPERTY_AMX_MAX_ROWS); + GUEST_ASSERT(xtile.max_rows == 16); } static void set_tilecfg(struct tile_config *cfg) @@ -238,16 +188,8 @@ static void __attribute__((__flatten__)) guest_code(struct tile_config *amx_cfg, { init_regs(); check_cpuid_xsave(); - GUEST_ASSERT(check_xsave_supports_xtile()); - GUEST_ASSERT(check_xtile_info()); - - /* check xtile configs */ - GUEST_ASSERT(xtile.xsave_offset == 2816); - GUEST_ASSERT(xtile.xsave_size == 8192); - GUEST_ASSERT(xtile.max_names == 8); - GUEST_ASSERT(xtile.bytes_per_tile == 1024); - GUEST_ASSERT(xtile.bytes_per_row == 64); - GUEST_ASSERT(xtile.max_rows == 16); + check_xsave_supports_xtile(); + check_xtile_info(); GUEST_SYNC(1); /* xfd=0, enable amx */ @@ -307,18 +249,24 @@ int main(int argc, char *argv[]) u32 amx_offset; int stage, ret; + /* + * Note, all off-by-default features must be enabled before anything + * caches KVM_GET_SUPPORTED_CPUID, e.g. before using kvm_cpu_has(). + */ vm_xsave_require_permission(XSTATE_XTILE_DATA_BIT); - /* Create VM */ - vm = vm_create_with_one_vcpu(&vcpu, guest_code); - + TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_XFD)); TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_XSAVE)); TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_AMX_TILE)); TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_XTILECFG)); TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_XTILEDATA)); - /* Get xsave/restore max size */ - xsave_restore_size = kvm_get_supported_cpuid_entry(0xd)->ecx; + /* Create VM */ + vm = vm_create_with_one_vcpu(&vcpu, guest_code); + + TEST_ASSERT(kvm_cpu_has_p(X86_PROPERTY_XSTATE_MAX_SIZE), + "KVM should enumerate max XSAVE size when XSAVE is supported"); + xsave_restore_size = kvm_cpu_property(X86_PROPERTY_XSTATE_MAX_SIZE); run = vcpu->run; vcpu_regs_get(vcpu, ®s1); diff --git a/tools/testing/selftests/kvm/x86_64/cpuid_test.c b/tools/testing/selftests/kvm/x86_64/cpuid_test.c index a6aeee2e62e4..2fc3ad9c887e 100644 --- a/tools/testing/selftests/kvm/x86_64/cpuid_test.c +++ b/tools/testing/selftests/kvm/x86_64/cpuid_test.c @@ -43,15 +43,6 @@ static void test_guest_cpuids(struct kvm_cpuid2 *guest_cpuid) } -static void test_cpuid_40000000(struct kvm_cpuid2 *guest_cpuid) -{ - u32 eax, ebx, ecx, edx; - - cpuid(0x40000000, &eax, &ebx, &ecx, &edx); - - GUEST_ASSERT(eax == 0x40000001); -} - static void guest_main(struct kvm_cpuid2 *guest_cpuid) { GUEST_SYNC(1); @@ -60,7 +51,7 @@ static void guest_main(struct kvm_cpuid2 *guest_cpuid) GUEST_SYNC(2); - test_cpuid_40000000(guest_cpuid); + GUEST_ASSERT(this_cpu_property(X86_PROPERTY_MAX_KVM_LEAF) == 0x40000001); GUEST_DONE(); } diff --git a/tools/testing/selftests/kvm/x86_64/cr4_cpuid_sync_test.c b/tools/testing/selftests/kvm/x86_64/cr4_cpuid_sync_test.c index 4208487652f8..1027a671c7d3 100644 --- a/tools/testing/selftests/kvm/x86_64/cr4_cpuid_sync_test.c +++ b/tools/testing/selftests/kvm/x86_64/cr4_cpuid_sync_test.c @@ -57,9 +57,6 @@ int main(int argc, char *argv[]) TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_XSAVE)); - /* Tell stdout not to buffer its content */ - setbuf(stdout, NULL); - vm = vm_create_with_one_vcpu(&vcpu, guest_code); run = vcpu->run; diff --git a/tools/testing/selftests/kvm/x86_64/emulator_error_test.c b/tools/testing/selftests/kvm/x86_64/emulator_error_test.c deleted file mode 100644 index 236e11755ba6..000000000000 --- a/tools/testing/selftests/kvm/x86_64/emulator_error_test.c +++ /dev/null @@ -1,193 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Copyright (C) 2020, Google LLC. - * - * Tests for KVM_CAP_EXIT_ON_EMULATION_FAILURE capability. - */ - -#define _GNU_SOURCE /* for program_invocation_short_name */ - -#include "test_util.h" -#include "kvm_util.h" -#include "vmx.h" - -#define MAXPHYADDR 36 - -#define MEM_REGION_GVA 0x0000123456789000 -#define MEM_REGION_GPA 0x0000000700000000 -#define MEM_REGION_SLOT 10 -#define MEM_REGION_SIZE PAGE_SIZE - -static void guest_code(void) -{ - __asm__ __volatile__("flds (%[addr])" - :: [addr]"r"(MEM_REGION_GVA)); - - GUEST_DONE(); -} - -/* - * Accessors to get R/M, REG, and Mod bits described in the SDM vol 2, - * figure 2-2 "Table Interpretation of ModR/M Byte (C8H)". - */ -#define GET_RM(insn_byte) (insn_byte & 0x7) -#define GET_REG(insn_byte) ((insn_byte & 0x38) >> 3) -#define GET_MOD(insn_byte) ((insn_byte & 0xc) >> 6) - -/* Ensure we are dealing with a simple 2-byte flds instruction. */ -static bool is_flds(uint8_t *insn_bytes, uint8_t insn_size) -{ - return insn_size >= 2 && - insn_bytes[0] == 0xd9 && - GET_REG(insn_bytes[1]) == 0x0 && - GET_MOD(insn_bytes[1]) == 0x0 && - /* Ensure there is no SIB byte. */ - GET_RM(insn_bytes[1]) != 0x4 && - /* Ensure there is no displacement byte. */ - GET_RM(insn_bytes[1]) != 0x5; -} - -static void process_exit_on_emulation_error(struct kvm_vcpu *vcpu) -{ - struct kvm_run *run = vcpu->run; - struct kvm_regs regs; - uint8_t *insn_bytes; - uint8_t insn_size; - uint64_t flags; - - TEST_ASSERT(run->exit_reason == KVM_EXIT_INTERNAL_ERROR, - "Unexpected exit reason: %u (%s)", - run->exit_reason, - exit_reason_str(run->exit_reason)); - - TEST_ASSERT(run->emulation_failure.suberror == KVM_INTERNAL_ERROR_EMULATION, - "Unexpected suberror: %u", - run->emulation_failure.suberror); - - if (run->emulation_failure.ndata >= 1) { - flags = run->emulation_failure.flags; - if ((flags & KVM_INTERNAL_ERROR_EMULATION_FLAG_INSTRUCTION_BYTES) && - run->emulation_failure.ndata >= 3) { - insn_size = run->emulation_failure.insn_size; - insn_bytes = run->emulation_failure.insn_bytes; - - TEST_ASSERT(insn_size <= 15 && insn_size > 0, - "Unexpected instruction size: %u", - insn_size); - - TEST_ASSERT(is_flds(insn_bytes, insn_size), - "Unexpected instruction. Expected 'flds' (0xd9 /0)"); - - /* - * If is_flds() succeeded then the instruction bytes - * contained an flds instruction that is 2-bytes in - * length (ie: no prefix, no SIB, no displacement). - */ - vcpu_regs_get(vcpu, ®s); - regs.rip += 2; - vcpu_regs_set(vcpu, ®s); - } - } -} - -static void do_guest_assert(struct ucall *uc) -{ - REPORT_GUEST_ASSERT(*uc); -} - -static void check_for_guest_assert(struct kvm_vcpu *vcpu) -{ - struct ucall uc; - - if (vcpu->run->exit_reason == KVM_EXIT_IO && - get_ucall(vcpu, &uc) == UCALL_ABORT) { - do_guest_assert(&uc); - } -} - -static void process_ucall_done(struct kvm_vcpu *vcpu) -{ - struct kvm_run *run = vcpu->run; - struct ucall uc; - - check_for_guest_assert(vcpu); - - TEST_ASSERT(run->exit_reason == KVM_EXIT_IO, - "Unexpected exit reason: %u (%s)", - run->exit_reason, - exit_reason_str(run->exit_reason)); - - TEST_ASSERT(get_ucall(vcpu, &uc) == UCALL_DONE, - "Unexpected ucall command: %lu, expected UCALL_DONE (%d)", - uc.cmd, UCALL_DONE); -} - -static uint64_t process_ucall(struct kvm_vcpu *vcpu) -{ - struct kvm_run *run = vcpu->run; - struct ucall uc; - - TEST_ASSERT(run->exit_reason == KVM_EXIT_IO, - "Unexpected exit reason: %u (%s)", - run->exit_reason, - exit_reason_str(run->exit_reason)); - - switch (get_ucall(vcpu, &uc)) { - case UCALL_SYNC: - break; - case UCALL_ABORT: - do_guest_assert(&uc); - break; - case UCALL_DONE: - process_ucall_done(vcpu); - break; - default: - TEST_ASSERT(false, "Unexpected ucall"); - } - - return uc.cmd; -} - -int main(int argc, char *argv[]) -{ - struct kvm_vcpu *vcpu; - struct kvm_vm *vm; - uint64_t gpa, pte; - uint64_t *hva; - int rc; - - /* Tell stdout not to buffer its content */ - setbuf(stdout, NULL); - - TEST_REQUIRE(kvm_has_cap(KVM_CAP_SMALLER_MAXPHYADDR)); - - vm = vm_create_with_one_vcpu(&vcpu, guest_code); - - vcpu_set_cpuid_maxphyaddr(vcpu, MAXPHYADDR); - - rc = kvm_check_cap(KVM_CAP_EXIT_ON_EMULATION_FAILURE); - TEST_ASSERT(rc, "KVM_CAP_EXIT_ON_EMULATION_FAILURE is unavailable"); - vm_enable_cap(vm, KVM_CAP_EXIT_ON_EMULATION_FAILURE, 1); - - vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, - MEM_REGION_GPA, MEM_REGION_SLOT, - MEM_REGION_SIZE / PAGE_SIZE, 0); - gpa = vm_phy_pages_alloc(vm, MEM_REGION_SIZE / PAGE_SIZE, - MEM_REGION_GPA, MEM_REGION_SLOT); - TEST_ASSERT(gpa == MEM_REGION_GPA, "Failed vm_phy_pages_alloc\n"); - virt_map(vm, MEM_REGION_GVA, MEM_REGION_GPA, 1); - hva = addr_gpa2hva(vm, MEM_REGION_GPA); - memset(hva, 0, PAGE_SIZE); - pte = vm_get_page_table_entry(vm, vcpu, MEM_REGION_GVA); - vm_set_page_table_entry(vm, vcpu, MEM_REGION_GVA, pte | (1ull << 36)); - - vcpu_run(vcpu); - process_exit_on_emulation_error(vcpu); - vcpu_run(vcpu); - - TEST_ASSERT(process_ucall(vcpu) == UCALL_DONE, "Expected UCALL_DONE"); - - kvm_vm_free(vm); - - return 0; -} diff --git a/tools/testing/selftests/kvm/x86_64/exit_on_emulation_failure_test.c b/tools/testing/selftests/kvm/x86_64/exit_on_emulation_failure_test.c new file mode 100644 index 000000000000..37c61f712fd5 --- /dev/null +++ b/tools/testing/selftests/kvm/x86_64/exit_on_emulation_failure_test.c @@ -0,0 +1,45 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2022, Google LLC. + * + * Test for KVM_CAP_EXIT_ON_EMULATION_FAILURE. + */ + +#define _GNU_SOURCE /* for program_invocation_short_name */ + +#include "flds_emulation.h" + +#include "test_util.h" + +#define MMIO_GPA 0x700000000 +#define MMIO_GVA MMIO_GPA + +static void guest_code(void) +{ + /* Execute flds with an MMIO address to force KVM to emulate it. */ + flds(MMIO_GVA); + GUEST_DONE(); +} + +int main(int argc, char *argv[]) +{ + struct kvm_vcpu *vcpu; + struct kvm_vm *vm; + + /* Tell stdout not to buffer its content */ + setbuf(stdout, NULL); + + TEST_REQUIRE(kvm_has_cap(KVM_CAP_EXIT_ON_EMULATION_FAILURE)); + + vm = vm_create_with_one_vcpu(&vcpu, guest_code); + vm_enable_cap(vm, KVM_CAP_EXIT_ON_EMULATION_FAILURE, 1); + virt_map(vm, MMIO_GVA, MMIO_GPA, 1); + + vcpu_run(vcpu); + handle_flds_emulation_failure_exit(vcpu); + vcpu_run(vcpu); + ASSERT_EQ(get_ucall(vcpu, NULL), UCALL_DONE); + + kvm_vm_free(vm); + return 0; +} diff --git a/tools/testing/selftests/kvm/x86_64/flds_emulation.h b/tools/testing/selftests/kvm/x86_64/flds_emulation.h new file mode 100644 index 000000000000..e43a7df25f2c --- /dev/null +++ b/tools/testing/selftests/kvm/x86_64/flds_emulation.h @@ -0,0 +1,55 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +#ifndef SELFTEST_KVM_FLDS_EMULATION_H +#define SELFTEST_KVM_FLDS_EMULATION_H + +#include "kvm_util.h" + +#define FLDS_MEM_EAX ".byte 0xd9, 0x00" + +/* + * flds is an instruction that the KVM instruction emulator is known not to + * support. This can be used in guest code along with a mechanism to force + * KVM to emulate the instruction (e.g. by providing an MMIO address) to + * exercise emulation failures. + */ +static inline void flds(uint64_t address) +{ + __asm__ __volatile__(FLDS_MEM_EAX :: "a"(address)); +} + +static inline void handle_flds_emulation_failure_exit(struct kvm_vcpu *vcpu) +{ + struct kvm_run *run = vcpu->run; + struct kvm_regs regs; + uint8_t *insn_bytes; + uint64_t flags; + + TEST_ASSERT(run->exit_reason == KVM_EXIT_INTERNAL_ERROR, + "Unexpected exit reason: %u (%s)", + run->exit_reason, + exit_reason_str(run->exit_reason)); + + TEST_ASSERT(run->emulation_failure.suberror == KVM_INTERNAL_ERROR_EMULATION, + "Unexpected suberror: %u", + run->emulation_failure.suberror); + + flags = run->emulation_failure.flags; + TEST_ASSERT(run->emulation_failure.ndata >= 3 && + flags & KVM_INTERNAL_ERROR_EMULATION_FLAG_INSTRUCTION_BYTES, + "run->emulation_failure is missing instruction bytes"); + + TEST_ASSERT(run->emulation_failure.insn_size >= 2, + "Expected a 2-byte opcode for 'flds', got %d bytes", + run->emulation_failure.insn_size); + + insn_bytes = run->emulation_failure.insn_bytes; + TEST_ASSERT(insn_bytes[0] == 0xd9 && insn_bytes[1] == 0, + "Expected 'flds [eax]', opcode '0xd9 0x00', got opcode 0x%02x 0x%02x\n", + insn_bytes[0], insn_bytes[1]); + + vcpu_regs_get(vcpu, ®s); + regs.rip += 2; + vcpu_regs_set(vcpu, ®s); +} + +#endif /* !SELFTEST_KVM_FLDS_EMULATION_H */ diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_cpuid.c b/tools/testing/selftests/kvm/x86_64/hyperv_cpuid.c index e804eb08dff9..5c27efbf405e 100644 --- a/tools/testing/selftests/kvm/x86_64/hyperv_cpuid.c +++ b/tools/testing/selftests/kvm/x86_64/hyperv_cpuid.c @@ -134,9 +134,6 @@ int main(int argc, char *argv[]) const struct kvm_cpuid2 *hv_cpuid_entries; struct kvm_vcpu *vcpu; - /* Tell stdout not to buffer its content */ - setbuf(stdout, NULL); - TEST_REQUIRE(kvm_has_cap(KVM_CAP_HYPERV_CPUID)); vm = vm_create_with_one_vcpu(&vcpu, guest_code); diff --git a/tools/testing/selftests/kvm/x86_64/evmcs_test.c b/tools/testing/selftests/kvm/x86_64/hyperv_evmcs.c index 99bc202243d2..af29e5776d40 100644 --- a/tools/testing/selftests/kvm/x86_64/evmcs_test.c +++ b/tools/testing/selftests/kvm/x86_64/hyperv_evmcs.c @@ -16,6 +16,7 @@ #include "kvm_util.h" +#include "hyperv.h" #include "vmx.h" static int ud_count; @@ -30,24 +31,19 @@ static void guest_nmi_handler(struct ex_regs *regs) { } -/* Exits to L1 destroy GRPs! */ -static inline void rdmsr_fs_base(void) +static inline void rdmsr_from_l2(uint32_t msr) { - __asm__ __volatile__ ("mov $0xc0000100, %%rcx; rdmsr" : : : - "rax", "rbx", "rcx", "rdx", - "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", - "r13", "r14", "r15"); -} -static inline void rdmsr_gs_base(void) -{ - __asm__ __volatile__ ("mov $0xc0000101, %%rcx; rdmsr" : : : - "rax", "rbx", "rcx", "rdx", - "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", - "r13", "r14", "r15"); + /* Currently, L1 doesn't preserve GPRs during vmexits. */ + __asm__ __volatile__ ("rdmsr" : : "c"(msr) : + "rax", "rbx", "rdx", "rsi", "rdi", "r8", "r9", + "r10", "r11", "r12", "r13", "r14", "r15"); } +/* Exit to L1 from L2 with RDMSR instruction */ void l2_guest_code(void) { + u64 unused; + GUEST_SYNC(7); GUEST_SYNC(8); @@ -58,42 +54,58 @@ void l2_guest_code(void) vmcall(); /* MSR-Bitmap tests */ - rdmsr_fs_base(); /* intercepted */ - rdmsr_fs_base(); /* intercepted */ - rdmsr_gs_base(); /* not intercepted */ + rdmsr_from_l2(MSR_FS_BASE); /* intercepted */ + rdmsr_from_l2(MSR_FS_BASE); /* intercepted */ + rdmsr_from_l2(MSR_GS_BASE); /* not intercepted */ vmcall(); - rdmsr_gs_base(); /* intercepted */ + rdmsr_from_l2(MSR_GS_BASE); /* intercepted */ + + /* L2 TLB flush tests */ + hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE | HV_HYPERCALL_FAST_BIT, 0x0, + HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES | HV_FLUSH_ALL_PROCESSORS); + rdmsr_from_l2(MSR_FS_BASE); + /* + * Note: hypercall status (RAX) is not preserved correctly by L1 after + * synthetic vmexit, use unchecked version. + */ + __hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE | HV_HYPERCALL_FAST_BIT, 0x0, + HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES | HV_FLUSH_ALL_PROCESSORS, + &unused); /* Done, exit to L1 and never come back. */ vmcall(); } -void guest_code(struct vmx_pages *vmx_pages) +void guest_code(struct vmx_pages *vmx_pages, struct hyperv_test_pages *hv_pages, + vm_vaddr_t hv_hcall_page_gpa) { #define L2_GUEST_STACK_SIZE 64 unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE]; + wrmsr(HV_X64_MSR_GUEST_OS_ID, HYPERV_LINUX_OS_ID); + wrmsr(HV_X64_MSR_HYPERCALL, hv_hcall_page_gpa); + x2apic_enable(); GUEST_SYNC(1); GUEST_SYNC(2); - enable_vp_assist(vmx_pages->vp_assist_gpa, vmx_pages->vp_assist); + enable_vp_assist(hv_pages->vp_assist_gpa, hv_pages->vp_assist); + evmcs_enable(); - GUEST_ASSERT(vmx_pages->vmcs_gpa); GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages)); GUEST_SYNC(3); - GUEST_ASSERT(load_vmcs(vmx_pages)); - GUEST_ASSERT(vmptrstz() == vmx_pages->enlightened_vmcs_gpa); + GUEST_ASSERT(load_evmcs(hv_pages)); + GUEST_ASSERT(vmptrstz() == hv_pages->enlightened_vmcs_gpa); GUEST_SYNC(4); - GUEST_ASSERT(vmptrstz() == vmx_pages->enlightened_vmcs_gpa); + GUEST_ASSERT(vmptrstz() == hv_pages->enlightened_vmcs_gpa); prepare_vmcs(vmx_pages, l2_guest_code, &l2_guest_stack[L2_GUEST_STACK_SIZE]); GUEST_SYNC(5); - GUEST_ASSERT(vmptrstz() == vmx_pages->enlightened_vmcs_gpa); + GUEST_ASSERT(vmptrstz() == hv_pages->enlightened_vmcs_gpa); current_evmcs->revision_id = -1u; GUEST_ASSERT(vmlaunch()); current_evmcs->revision_id = EVMCS_VERSION; @@ -102,8 +114,18 @@ void guest_code(struct vmx_pages *vmx_pages) vmwrite(PIN_BASED_VM_EXEC_CONTROL, vmreadz(PIN_BASED_VM_EXEC_CONTROL) | PIN_BASED_NMI_EXITING); + /* L2 TLB flush setup */ + current_evmcs->partition_assist_page = hv_pages->partition_assist_gpa; + current_evmcs->hv_enlightenments_control.nested_flush_hypercall = 1; + current_evmcs->hv_vm_id = 1; + current_evmcs->hv_vp_id = 1; + current_vp_assist->nested_control.features.directhypercall = 1; + *(u32 *)(hv_pages->partition_assist) = 0; + GUEST_ASSERT(!vmlaunch()); - GUEST_ASSERT(vmptrstz() == vmx_pages->enlightened_vmcs_gpa); + GUEST_ASSERT_EQ(vmreadz(VM_EXIT_REASON), EXIT_REASON_EXCEPTION_NMI); + GUEST_ASSERT_EQ((vmreadz(VM_EXIT_INTR_INFO) & 0xff), NMI_VECTOR); + GUEST_ASSERT(vmptrstz() == hv_pages->enlightened_vmcs_gpa); /* * NMI forces L2->L1 exit, resuming L2 and hope that EVMCS is @@ -120,7 +142,7 @@ void guest_code(struct vmx_pages *vmx_pages) /* Intercept RDMSR 0xc0000100 */ vmwrite(CPU_BASED_VM_EXEC_CONTROL, vmreadz(CPU_BASED_VM_EXEC_CONTROL) | CPU_BASED_USE_MSR_BITMAPS); - set_bit(MSR_FS_BASE & 0x1fff, vmx_pages->msr + 0x400); + __set_bit(MSR_FS_BASE & 0x1fff, vmx_pages->msr + 0x400); GUEST_ASSERT(!vmresume()); GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_MSR_READ); current_evmcs->guest_rip += 2; /* rdmsr */ @@ -132,7 +154,7 @@ void guest_code(struct vmx_pages *vmx_pages) current_evmcs->guest_rip += 2; /* rdmsr */ /* Intercept RDMSR 0xc0000101 without telling KVM about it */ - set_bit(MSR_GS_BASE & 0x1fff, vmx_pages->msr + 0x400); + __set_bit(MSR_GS_BASE & 0x1fff, vmx_pages->msr + 0x400); /* Make sure HV_VMX_ENLIGHTENED_CLEAN_FIELD_MSR_BITMAP is set */ current_evmcs->hv_clean_fields |= HV_VMX_ENLIGHTENED_CLEAN_FIELD_MSR_BITMAP; GUEST_ASSERT(!vmresume()); @@ -146,12 +168,24 @@ void guest_code(struct vmx_pages *vmx_pages) GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_MSR_READ); current_evmcs->guest_rip += 2; /* rdmsr */ + /* + * L2 TLB flush test. First VMCALL should be handled directly by L0, + * no VMCALL exit expected. + */ + GUEST_ASSERT(!vmresume()); + GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_MSR_READ); + current_evmcs->guest_rip += 2; /* rdmsr */ + /* Enable synthetic vmexit */ + *(u32 *)(hv_pages->partition_assist) = 1; + GUEST_ASSERT(!vmresume()); + GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == HV_VMX_SYNTHETIC_EXIT_REASON_TRAP_AFTER_FLUSH); + GUEST_ASSERT(!vmresume()); GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL); GUEST_SYNC(11); /* Try enlightened vmptrld with an incorrect GPA */ - evmcs_vmptrld(0xdeadbeef, vmx_pages->enlightened_vmcs); + evmcs_vmptrld(0xdeadbeef, hv_pages->enlightened_vmcs); GUEST_ASSERT(vmlaunch()); GUEST_ASSERT(ud_count == 1); GUEST_DONE(); @@ -198,7 +232,8 @@ static struct kvm_vcpu *save_restore_vm(struct kvm_vm *vm, int main(int argc, char *argv[]) { - vm_vaddr_t vmx_pages_gva = 0; + vm_vaddr_t vmx_pages_gva = 0, hv_pages_gva = 0; + vm_vaddr_t hcall_page; struct kvm_vcpu *vcpu; struct kvm_vm *vm; @@ -212,11 +247,16 @@ int main(int argc, char *argv[]) TEST_REQUIRE(kvm_has_cap(KVM_CAP_NESTED_STATE)); TEST_REQUIRE(kvm_has_cap(KVM_CAP_HYPERV_ENLIGHTENED_VMCS)); + hcall_page = vm_vaddr_alloc_pages(vm, 1); + memset(addr_gva2hva(vm, hcall_page), 0x0, getpagesize()); + vcpu_set_hv_cpuid(vcpu); vcpu_enable_evmcs(vcpu); vcpu_alloc_vmx(vm, &vmx_pages_gva); - vcpu_args_set(vcpu, 1, vmx_pages_gva); + vcpu_alloc_hyperv_test_pages(vm, &hv_pages_gva); + vcpu_args_set(vcpu, 3, vmx_pages_gva, hv_pages_gva, addr_gva2gpa(vm, hcall_page)); + vcpu_set_msr(vcpu, HV_X64_MSR_VP_INDEX, vcpu->id); vm_init_descriptor_tables(vm); vcpu_init_descriptor_tables(vcpu); diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_features.c b/tools/testing/selftests/kvm/x86_64/hyperv_features.c index 05b32e550a80..3163c3e8db0a 100644 --- a/tools/testing/selftests/kvm/x86_64/hyperv_features.c +++ b/tools/testing/selftests/kvm/x86_64/hyperv_features.c @@ -13,25 +13,6 @@ #include "processor.h" #include "hyperv.h" -#define LINUX_OS_ID ((u64)0x8100 << 48) - -static inline uint8_t hypercall(u64 control, vm_vaddr_t input_address, - vm_vaddr_t output_address, uint64_t *hv_status) -{ - uint8_t vector; - - /* Note both the hypercall and the "asm safe" clobber r9-r11. */ - asm volatile("mov %[output_address], %%r8\n\t" - KVM_ASM_SAFE("vmcall") - : "=a" (*hv_status), - "+c" (control), "+d" (input_address), - KVM_ASM_SAFE_OUTPUTS(vector) - : [output_address] "r"(output_address), - "a" (-EFAULT) - : "cc", "memory", "r8", KVM_ASM_SAFE_CLOBBERS); - return vector; -} - struct msr_data { uint32_t idx; bool available; @@ -71,7 +52,7 @@ static void guest_hcall(vm_vaddr_t pgs_gpa, struct hcall_data *hcall) GUEST_ASSERT(hcall->control); - wrmsr(HV_X64_MSR_GUEST_OS_ID, LINUX_OS_ID); + wrmsr(HV_X64_MSR_GUEST_OS_ID, HYPERV_LINUX_OS_ID); wrmsr(HV_X64_MSR_HYPERCALL, pgs_gpa); if (!(hcall->control & HV_HYPERCALL_FAST_BIT)) { @@ -81,7 +62,7 @@ static void guest_hcall(vm_vaddr_t pgs_gpa, struct hcall_data *hcall) input = output = 0; } - vector = hypercall(hcall->control, input, output, &res); + vector = __hyperv_hypercall(hcall->control, input, output, &res); if (hcall->ud_expected) { GUEST_ASSERT_2(vector == UD_VECTOR, hcall->control, vector); } else { @@ -169,7 +150,7 @@ static void guest_test_msrs_access(void) */ msr->idx = HV_X64_MSR_GUEST_OS_ID; msr->write = 1; - msr->write_val = LINUX_OS_ID; + msr->write_val = HYPERV_LINUX_OS_ID; msr->available = 1; break; case 3: diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_ipi.c b/tools/testing/selftests/kvm/x86_64/hyperv_ipi.c new file mode 100644 index 000000000000..0cbb0e646ef8 --- /dev/null +++ b/tools/testing/selftests/kvm/x86_64/hyperv_ipi.c @@ -0,0 +1,315 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Hyper-V HvCallSendSyntheticClusterIpi{,Ex} tests + * + * Copyright (C) 2022, Red Hat, Inc. + * + */ + +#define _GNU_SOURCE /* for program_invocation_short_name */ +#include <pthread.h> +#include <inttypes.h> + +#include "kvm_util.h" +#include "hyperv.h" +#include "test_util.h" +#include "vmx.h" + +#define RECEIVER_VCPU_ID_1 2 +#define RECEIVER_VCPU_ID_2 65 + +#define IPI_VECTOR 0xfe + +static volatile uint64_t ipis_rcvd[RECEIVER_VCPU_ID_2 + 1]; + +struct hv_vpset { + u64 format; + u64 valid_bank_mask; + u64 bank_contents[2]; +}; + +enum HV_GENERIC_SET_FORMAT { + HV_GENERIC_SET_SPARSE_4K, + HV_GENERIC_SET_ALL, +}; + +/* HvCallSendSyntheticClusterIpi hypercall */ +struct hv_send_ipi { + u32 vector; + u32 reserved; + u64 cpu_mask; +}; + +/* HvCallSendSyntheticClusterIpiEx hypercall */ +struct hv_send_ipi_ex { + u32 vector; + u32 reserved; + struct hv_vpset vp_set; +}; + +static inline void hv_init(vm_vaddr_t pgs_gpa) +{ + wrmsr(HV_X64_MSR_GUEST_OS_ID, HYPERV_LINUX_OS_ID); + wrmsr(HV_X64_MSR_HYPERCALL, pgs_gpa); +} + +static void receiver_code(void *hcall_page, vm_vaddr_t pgs_gpa) +{ + u32 vcpu_id; + + x2apic_enable(); + hv_init(pgs_gpa); + + vcpu_id = rdmsr(HV_X64_MSR_VP_INDEX); + + /* Signal sender vCPU we're ready */ + ipis_rcvd[vcpu_id] = (u64)-1; + + for (;;) + asm volatile("sti; hlt; cli"); +} + +static void guest_ipi_handler(struct ex_regs *regs) +{ + u32 vcpu_id = rdmsr(HV_X64_MSR_VP_INDEX); + + ipis_rcvd[vcpu_id]++; + wrmsr(HV_X64_MSR_EOI, 1); +} + +static inline void nop_loop(void) +{ + int i; + + for (i = 0; i < 100000000; i++) + asm volatile("nop"); +} + +static void sender_guest_code(void *hcall_page, vm_vaddr_t pgs_gpa) +{ + struct hv_send_ipi *ipi = (struct hv_send_ipi *)hcall_page; + struct hv_send_ipi_ex *ipi_ex = (struct hv_send_ipi_ex *)hcall_page; + int stage = 1, ipis_expected[2] = {0}; + + hv_init(pgs_gpa); + GUEST_SYNC(stage++); + + /* Wait for receiver vCPUs to come up */ + while (!ipis_rcvd[RECEIVER_VCPU_ID_1] || !ipis_rcvd[RECEIVER_VCPU_ID_2]) + nop_loop(); + ipis_rcvd[RECEIVER_VCPU_ID_1] = ipis_rcvd[RECEIVER_VCPU_ID_2] = 0; + + /* 'Slow' HvCallSendSyntheticClusterIpi to RECEIVER_VCPU_ID_1 */ + ipi->vector = IPI_VECTOR; + ipi->cpu_mask = 1 << RECEIVER_VCPU_ID_1; + hyperv_hypercall(HVCALL_SEND_IPI, pgs_gpa, pgs_gpa + 4096); + nop_loop(); + GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_1] == ++ipis_expected[0]); + GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_2] == ipis_expected[1]); + GUEST_SYNC(stage++); + /* 'Fast' HvCallSendSyntheticClusterIpi to RECEIVER_VCPU_ID_1 */ + hyperv_hypercall(HVCALL_SEND_IPI | HV_HYPERCALL_FAST_BIT, + IPI_VECTOR, 1 << RECEIVER_VCPU_ID_1); + nop_loop(); + GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_1] == ++ipis_expected[0]); + GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_2] == ipis_expected[1]); + GUEST_SYNC(stage++); + + /* 'Slow' HvCallSendSyntheticClusterIpiEx to RECEIVER_VCPU_ID_1 */ + memset(hcall_page, 0, 4096); + ipi_ex->vector = IPI_VECTOR; + ipi_ex->vp_set.format = HV_GENERIC_SET_SPARSE_4K; + ipi_ex->vp_set.valid_bank_mask = 1 << 0; + ipi_ex->vp_set.bank_contents[0] = BIT(RECEIVER_VCPU_ID_1); + hyperv_hypercall(HVCALL_SEND_IPI_EX | (1 << HV_HYPERCALL_VARHEAD_OFFSET), + pgs_gpa, pgs_gpa + 4096); + nop_loop(); + GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_1] == ++ipis_expected[0]); + GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_2] == ipis_expected[1]); + GUEST_SYNC(stage++); + /* 'XMM Fast' HvCallSendSyntheticClusterIpiEx to RECEIVER_VCPU_ID_1 */ + hyperv_write_xmm_input(&ipi_ex->vp_set.valid_bank_mask, 1); + hyperv_hypercall(HVCALL_SEND_IPI_EX | HV_HYPERCALL_FAST_BIT | + (1 << HV_HYPERCALL_VARHEAD_OFFSET), + IPI_VECTOR, HV_GENERIC_SET_SPARSE_4K); + nop_loop(); + GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_1] == ++ipis_expected[0]); + GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_2] == ipis_expected[1]); + GUEST_SYNC(stage++); + + /* 'Slow' HvCallSendSyntheticClusterIpiEx to RECEIVER_VCPU_ID_2 */ + memset(hcall_page, 0, 4096); + ipi_ex->vector = IPI_VECTOR; + ipi_ex->vp_set.format = HV_GENERIC_SET_SPARSE_4K; + ipi_ex->vp_set.valid_bank_mask = 1 << 1; + ipi_ex->vp_set.bank_contents[0] = BIT(RECEIVER_VCPU_ID_2 - 64); + hyperv_hypercall(HVCALL_SEND_IPI_EX | (1 << HV_HYPERCALL_VARHEAD_OFFSET), + pgs_gpa, pgs_gpa + 4096); + nop_loop(); + GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_1] == ipis_expected[0]); + GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_2] == ++ipis_expected[1]); + GUEST_SYNC(stage++); + /* 'XMM Fast' HvCallSendSyntheticClusterIpiEx to RECEIVER_VCPU_ID_2 */ + hyperv_write_xmm_input(&ipi_ex->vp_set.valid_bank_mask, 1); + hyperv_hypercall(HVCALL_SEND_IPI_EX | HV_HYPERCALL_FAST_BIT | + (1 << HV_HYPERCALL_VARHEAD_OFFSET), + IPI_VECTOR, HV_GENERIC_SET_SPARSE_4K); + nop_loop(); + GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_1] == ipis_expected[0]); + GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_2] == ++ipis_expected[1]); + GUEST_SYNC(stage++); + + /* 'Slow' HvCallSendSyntheticClusterIpiEx to both RECEIVER_VCPU_ID_{1,2} */ + memset(hcall_page, 0, 4096); + ipi_ex->vector = IPI_VECTOR; + ipi_ex->vp_set.format = HV_GENERIC_SET_SPARSE_4K; + ipi_ex->vp_set.valid_bank_mask = 1 << 1 | 1; + ipi_ex->vp_set.bank_contents[0] = BIT(RECEIVER_VCPU_ID_1); + ipi_ex->vp_set.bank_contents[1] = BIT(RECEIVER_VCPU_ID_2 - 64); + hyperv_hypercall(HVCALL_SEND_IPI_EX | (2 << HV_HYPERCALL_VARHEAD_OFFSET), + pgs_gpa, pgs_gpa + 4096); + nop_loop(); + GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_1] == ++ipis_expected[0]); + GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_2] == ++ipis_expected[1]); + GUEST_SYNC(stage++); + /* 'XMM Fast' HvCallSendSyntheticClusterIpiEx to both RECEIVER_VCPU_ID_{1, 2} */ + hyperv_write_xmm_input(&ipi_ex->vp_set.valid_bank_mask, 2); + hyperv_hypercall(HVCALL_SEND_IPI_EX | HV_HYPERCALL_FAST_BIT | + (2 << HV_HYPERCALL_VARHEAD_OFFSET), + IPI_VECTOR, HV_GENERIC_SET_SPARSE_4K); + nop_loop(); + GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_1] == ++ipis_expected[0]); + GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_2] == ++ipis_expected[1]); + GUEST_SYNC(stage++); + + /* 'Slow' HvCallSendSyntheticClusterIpiEx to HV_GENERIC_SET_ALL */ + memset(hcall_page, 0, 4096); + ipi_ex->vector = IPI_VECTOR; + ipi_ex->vp_set.format = HV_GENERIC_SET_ALL; + hyperv_hypercall(HVCALL_SEND_IPI_EX, pgs_gpa, pgs_gpa + 4096); + nop_loop(); + GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_1] == ++ipis_expected[0]); + GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_2] == ++ipis_expected[1]); + GUEST_SYNC(stage++); + /* + * 'XMM Fast' HvCallSendSyntheticClusterIpiEx to HV_GENERIC_SET_ALL. + */ + ipi_ex->vp_set.valid_bank_mask = 0; + hyperv_write_xmm_input(&ipi_ex->vp_set.valid_bank_mask, 2); + hyperv_hypercall(HVCALL_SEND_IPI_EX | HV_HYPERCALL_FAST_BIT, + IPI_VECTOR, HV_GENERIC_SET_ALL); + nop_loop(); + GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_1] == ++ipis_expected[0]); + GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_2] == ++ipis_expected[1]); + GUEST_SYNC(stage++); + + GUEST_DONE(); +} + +static void *vcpu_thread(void *arg) +{ + struct kvm_vcpu *vcpu = (struct kvm_vcpu *)arg; + int old, r; + + r = pthread_setcanceltype(PTHREAD_CANCEL_ASYNCHRONOUS, &old); + TEST_ASSERT(!r, "pthread_setcanceltype failed on vcpu_id=%u with errno=%d", + vcpu->id, r); + + vcpu_run(vcpu); + + TEST_FAIL("vCPU %u exited unexpectedly", vcpu->id); + + return NULL; +} + +static void cancel_join_vcpu_thread(pthread_t thread, struct kvm_vcpu *vcpu) +{ + void *retval; + int r; + + r = pthread_cancel(thread); + TEST_ASSERT(!r, "pthread_cancel on vcpu_id=%d failed with errno=%d", + vcpu->id, r); + + r = pthread_join(thread, &retval); + TEST_ASSERT(!r, "pthread_join on vcpu_id=%d failed with errno=%d", + vcpu->id, r); + TEST_ASSERT(retval == PTHREAD_CANCELED, + "expected retval=%p, got %p", PTHREAD_CANCELED, + retval); +} + +int main(int argc, char *argv[]) +{ + struct kvm_vm *vm; + struct kvm_vcpu *vcpu[3]; + unsigned int exit_reason; + vm_vaddr_t hcall_page; + pthread_t threads[2]; + int stage = 1, r; + struct ucall uc; + + vm = vm_create_with_one_vcpu(&vcpu[0], sender_guest_code); + + /* Hypercall input/output */ + hcall_page = vm_vaddr_alloc_pages(vm, 2); + memset(addr_gva2hva(vm, hcall_page), 0x0, 2 * getpagesize()); + + vm_init_descriptor_tables(vm); + + vcpu[1] = vm_vcpu_add(vm, RECEIVER_VCPU_ID_1, receiver_code); + vcpu_init_descriptor_tables(vcpu[1]); + vcpu_args_set(vcpu[1], 2, hcall_page, addr_gva2gpa(vm, hcall_page)); + vcpu_set_msr(vcpu[1], HV_X64_MSR_VP_INDEX, RECEIVER_VCPU_ID_1); + vcpu_set_hv_cpuid(vcpu[1]); + + vcpu[2] = vm_vcpu_add(vm, RECEIVER_VCPU_ID_2, receiver_code); + vcpu_init_descriptor_tables(vcpu[2]); + vcpu_args_set(vcpu[2], 2, hcall_page, addr_gva2gpa(vm, hcall_page)); + vcpu_set_msr(vcpu[2], HV_X64_MSR_VP_INDEX, RECEIVER_VCPU_ID_2); + vcpu_set_hv_cpuid(vcpu[2]); + + vm_install_exception_handler(vm, IPI_VECTOR, guest_ipi_handler); + + vcpu_args_set(vcpu[0], 2, hcall_page, addr_gva2gpa(vm, hcall_page)); + vcpu_set_hv_cpuid(vcpu[0]); + + r = pthread_create(&threads[0], NULL, vcpu_thread, vcpu[1]); + TEST_ASSERT(!r, "pthread_create failed errno=%d", r); + + r = pthread_create(&threads[1], NULL, vcpu_thread, vcpu[2]); + TEST_ASSERT(!r, "pthread_create failed errno=%d", errno); + + while (true) { + vcpu_run(vcpu[0]); + + exit_reason = vcpu[0]->run->exit_reason; + TEST_ASSERT(exit_reason == KVM_EXIT_IO, + "unexpected exit reason: %u (%s)", + exit_reason, exit_reason_str(exit_reason)); + + switch (get_ucall(vcpu[0], &uc)) { + case UCALL_SYNC: + TEST_ASSERT(uc.args[1] == stage, + "Unexpected stage: %ld (%d expected)\n", + uc.args[1], stage); + break; + case UCALL_DONE: + goto done; + case UCALL_ABORT: + REPORT_GUEST_ASSERT(uc); + /* NOT REACHED */ + default: + TEST_FAIL("Unknown ucall %lu", uc.cmd); + } + + stage++; + } + +done: + cancel_join_vcpu_thread(threads[0], vcpu[1]); + cancel_join_vcpu_thread(threads[1], vcpu[2]); + kvm_vm_free(vm); + + return r; +} diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_svm_test.c b/tools/testing/selftests/kvm/x86_64/hyperv_svm_test.c index a380ad7bb9b3..68a7d354ea07 100644 --- a/tools/testing/selftests/kvm/x86_64/hyperv_svm_test.c +++ b/tools/testing/selftests/kvm/x86_64/hyperv_svm_test.c @@ -23,59 +23,78 @@ #define L2_GUEST_STACK_SIZE 256 -struct hv_enlightenments { - struct __packed hv_enlightenments_control { - u32 nested_flush_hypercall:1; - u32 msr_bitmap:1; - u32 enlightened_npt_tlb: 1; - u32 reserved:29; - } __packed hv_enlightenments_control; - u32 hv_vp_id; - u64 hv_vm_id; - u64 partition_assist_page; - u64 reserved; -} __packed; - -/* - * Hyper-V uses the software reserved clean bit in VMCB - */ -#define VMCB_HV_NESTED_ENLIGHTENMENTS (1U << 31) +/* Exit to L1 from L2 with RDMSR instruction */ +static inline void rdmsr_from_l2(uint32_t msr) +{ + /* Currently, L1 doesn't preserve GPRs during vmexits. */ + __asm__ __volatile__ ("rdmsr" : : "c"(msr) : + "rax", "rbx", "rdx", "rsi", "rdi", "r8", "r9", + "r10", "r11", "r12", "r13", "r14", "r15"); +} void l2_guest_code(void) { + u64 unused; + GUEST_SYNC(3); /* Exit to L1 */ vmmcall(); /* MSR-Bitmap tests */ - rdmsr(MSR_FS_BASE); /* intercepted */ - rdmsr(MSR_FS_BASE); /* intercepted */ - rdmsr(MSR_GS_BASE); /* not intercepted */ + rdmsr_from_l2(MSR_FS_BASE); /* intercepted */ + rdmsr_from_l2(MSR_FS_BASE); /* intercepted */ + rdmsr_from_l2(MSR_GS_BASE); /* not intercepted */ vmmcall(); - rdmsr(MSR_GS_BASE); /* intercepted */ + rdmsr_from_l2(MSR_GS_BASE); /* intercepted */ GUEST_SYNC(5); + /* L2 TLB flush tests */ + hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE | + HV_HYPERCALL_FAST_BIT, 0x0, + HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES | + HV_FLUSH_ALL_PROCESSORS); + rdmsr_from_l2(MSR_FS_BASE); + /* + * Note: hypercall status (RAX) is not preserved correctly by L1 after + * synthetic vmexit, use unchecked version. + */ + __hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE | + HV_HYPERCALL_FAST_BIT, 0x0, + HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES | + HV_FLUSH_ALL_PROCESSORS, &unused); + /* Done, exit to L1 and never come back. */ vmmcall(); } -static void __attribute__((__flatten__)) guest_code(struct svm_test_data *svm) +static void __attribute__((__flatten__)) guest_code(struct svm_test_data *svm, + struct hyperv_test_pages *hv_pages, + vm_vaddr_t pgs_gpa) { unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE]; struct vmcb *vmcb = svm->vmcb; - struct hv_enlightenments *hve = - (struct hv_enlightenments *)vmcb->control.reserved_sw; + struct hv_vmcb_enlightenments *hve = &vmcb->control.hv_enlightenments; GUEST_SYNC(1); - wrmsr(HV_X64_MSR_GUEST_OS_ID, (u64)0x8100 << 48); + wrmsr(HV_X64_MSR_GUEST_OS_ID, HYPERV_LINUX_OS_ID); + wrmsr(HV_X64_MSR_HYPERCALL, pgs_gpa); + enable_vp_assist(hv_pages->vp_assist_gpa, hv_pages->vp_assist); GUEST_ASSERT(svm->vmcb_gpa); /* Prepare for L2 execution. */ generic_svm_setup(svm, l2_guest_code, &l2_guest_stack[L2_GUEST_STACK_SIZE]); + /* L2 TLB flush setup */ + hve->partition_assist_page = hv_pages->partition_assist_gpa; + hve->hv_enlightenments_control.nested_flush_hypercall = 1; + hve->hv_vm_id = 1; + hve->hv_vp_id = 1; + current_vp_assist->nested_control.features.directhypercall = 1; + *(u32 *)(hv_pages->partition_assist) = 0; + GUEST_SYNC(2); run_guest(vmcb, svm->vmcb_gpa); GUEST_ASSERT(vmcb->control.exit_code == SVM_EXIT_VMMCALL); @@ -84,7 +103,7 @@ static void __attribute__((__flatten__)) guest_code(struct svm_test_data *svm) /* Intercept RDMSR 0xc0000100 */ vmcb->control.intercept |= 1ULL << INTERCEPT_MSR_PROT; - set_bit(2 * (MSR_FS_BASE & 0x1fff), svm->msr + 0x800); + __set_bit(2 * (MSR_FS_BASE & 0x1fff), svm->msr + 0x800); run_guest(vmcb, svm->vmcb_gpa); GUEST_ASSERT(vmcb->control.exit_code == SVM_EXIT_MSR); vmcb->save.rip += 2; /* rdmsr */ @@ -96,20 +115,34 @@ static void __attribute__((__flatten__)) guest_code(struct svm_test_data *svm) vmcb->save.rip += 2; /* rdmsr */ /* Intercept RDMSR 0xc0000101 without telling KVM about it */ - set_bit(2 * (MSR_GS_BASE & 0x1fff), svm->msr + 0x800); + __set_bit(2 * (MSR_GS_BASE & 0x1fff), svm->msr + 0x800); /* Make sure HV_VMX_ENLIGHTENED_CLEAN_FIELD_MSR_BITMAP is set */ - vmcb->control.clean |= VMCB_HV_NESTED_ENLIGHTENMENTS; + vmcb->control.clean |= HV_VMCB_NESTED_ENLIGHTENMENTS; run_guest(vmcb, svm->vmcb_gpa); /* Make sure we don't see SVM_EXIT_MSR here so eMSR bitmap works */ GUEST_ASSERT(vmcb->control.exit_code == SVM_EXIT_VMMCALL); vmcb->save.rip += 3; /* vmcall */ /* Now tell KVM we've changed MSR-Bitmap */ - vmcb->control.clean &= ~VMCB_HV_NESTED_ENLIGHTENMENTS; + vmcb->control.clean &= ~HV_VMCB_NESTED_ENLIGHTENMENTS; run_guest(vmcb, svm->vmcb_gpa); GUEST_ASSERT(vmcb->control.exit_code == SVM_EXIT_MSR); vmcb->save.rip += 2; /* rdmsr */ + + /* + * L2 TLB flush test. First VMCALL should be handled directly by L0, + * no VMCALL exit expected. + */ + run_guest(vmcb, svm->vmcb_gpa); + GUEST_ASSERT(vmcb->control.exit_code == SVM_EXIT_MSR); + vmcb->save.rip += 2; /* rdmsr */ + /* Enable synthetic vmexit */ + *(u32 *)(hv_pages->partition_assist) = 1; + run_guest(vmcb, svm->vmcb_gpa); + GUEST_ASSERT(vmcb->control.exit_code == HV_SVM_EXITCODE_ENL); + GUEST_ASSERT(vmcb->control.exit_info_1 == HV_SVM_ENL_EXITCODE_TRAP_AFTER_FLUSH); + run_guest(vmcb, svm->vmcb_gpa); GUEST_ASSERT(vmcb->control.exit_code == SVM_EXIT_VMMCALL); GUEST_SYNC(6); @@ -119,8 +152,8 @@ static void __attribute__((__flatten__)) guest_code(struct svm_test_data *svm) int main(int argc, char *argv[]) { - vm_vaddr_t nested_gva = 0; - + vm_vaddr_t nested_gva = 0, hv_pages_gva = 0; + vm_vaddr_t hcall_page; struct kvm_vcpu *vcpu; struct kvm_vm *vm; struct kvm_run *run; @@ -134,7 +167,13 @@ int main(int argc, char *argv[]) vcpu_set_hv_cpuid(vcpu); run = vcpu->run; vcpu_alloc_svm(vm, &nested_gva); - vcpu_args_set(vcpu, 1, nested_gva); + vcpu_alloc_hyperv_test_pages(vm, &hv_pages_gva); + + hcall_page = vm_vaddr_alloc_pages(vm, 1); + memset(addr_gva2hva(vm, hcall_page), 0x0, getpagesize()); + + vcpu_args_set(vcpu, 3, nested_gva, hv_pages_gva, addr_gva2gpa(vm, hcall_page)); + vcpu_set_msr(vcpu, HV_X64_MSR_VP_INDEX, vcpu->id); for (stage = 1;; stage++) { vcpu_run(vcpu); diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_tlb_flush.c b/tools/testing/selftests/kvm/x86_64/hyperv_tlb_flush.c new file mode 100644 index 000000000000..68f97ff720a7 --- /dev/null +++ b/tools/testing/selftests/kvm/x86_64/hyperv_tlb_flush.c @@ -0,0 +1,690 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Hyper-V HvFlushVirtualAddress{List,Space}{,Ex} tests + * + * Copyright (C) 2022, Red Hat, Inc. + * + */ + +#define _GNU_SOURCE /* for program_invocation_short_name */ +#include <asm/barrier.h> +#include <pthread.h> +#include <inttypes.h> + +#include "kvm_util.h" +#include "processor.h" +#include "hyperv.h" +#include "test_util.h" +#include "vmx.h" + +#define WORKER_VCPU_ID_1 2 +#define WORKER_VCPU_ID_2 65 + +#define NTRY 100 +#define NTEST_PAGES 2 + +struct hv_vpset { + u64 format; + u64 valid_bank_mask; + u64 bank_contents[]; +}; + +enum HV_GENERIC_SET_FORMAT { + HV_GENERIC_SET_SPARSE_4K, + HV_GENERIC_SET_ALL, +}; + +#define HV_FLUSH_ALL_PROCESSORS BIT(0) +#define HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES BIT(1) +#define HV_FLUSH_NON_GLOBAL_MAPPINGS_ONLY BIT(2) +#define HV_FLUSH_USE_EXTENDED_RANGE_FORMAT BIT(3) + +/* HvFlushVirtualAddressSpace, HvFlushVirtualAddressList hypercalls */ +struct hv_tlb_flush { + u64 address_space; + u64 flags; + u64 processor_mask; + u64 gva_list[]; +} __packed; + +/* HvFlushVirtualAddressSpaceEx, HvFlushVirtualAddressListEx hypercalls */ +struct hv_tlb_flush_ex { + u64 address_space; + u64 flags; + struct hv_vpset hv_vp_set; + u64 gva_list[]; +} __packed; + +/* + * Pass the following info to 'workers' and 'sender' + * - Hypercall page's GVA + * - Hypercall page's GPA + * - Test pages GVA + * - GVAs of the test pages' PTEs + */ +struct test_data { + vm_vaddr_t hcall_gva; + vm_paddr_t hcall_gpa; + vm_vaddr_t test_pages; + vm_vaddr_t test_pages_pte[NTEST_PAGES]; +}; + +/* 'Worker' vCPU code checking the contents of the test page */ +static void worker_guest_code(vm_vaddr_t test_data) +{ + struct test_data *data = (struct test_data *)test_data; + u32 vcpu_id = rdmsr(HV_X64_MSR_VP_INDEX); + void *exp_page = (void *)data->test_pages + PAGE_SIZE * NTEST_PAGES; + u64 *this_cpu = (u64 *)(exp_page + vcpu_id * sizeof(u64)); + u64 expected, val; + + x2apic_enable(); + wrmsr(HV_X64_MSR_GUEST_OS_ID, HYPERV_LINUX_OS_ID); + + for (;;) { + cpu_relax(); + + expected = READ_ONCE(*this_cpu); + + /* + * Make sure the value in the test page is read after reading + * the expectation for the first time. Pairs with wmb() in + * prepare_to_test(). + */ + rmb(); + + val = READ_ONCE(*(u64 *)data->test_pages); + + /* + * Make sure the value in the test page is read after before + * reading the expectation for the second time. Pairs with wmb() + * post_test(). + */ + rmb(); + + /* + * '0' indicates the sender is between iterations, wait until + * the sender is ready for this vCPU to start checking again. + */ + if (!expected) + continue; + + /* + * Re-read the per-vCPU byte to ensure the sender didn't move + * onto a new iteration. + */ + if (expected != READ_ONCE(*this_cpu)) + continue; + + GUEST_ASSERT(val == expected); + } +} + +/* + * Write per-CPU info indicating what each 'worker' CPU is supposed to see in + * test page. '0' means don't check. + */ +static void set_expected_val(void *addr, u64 val, int vcpu_id) +{ + void *exp_page = addr + PAGE_SIZE * NTEST_PAGES; + + *(u64 *)(exp_page + vcpu_id * sizeof(u64)) = val; +} + +/* + * Update PTEs swapping two test pages. + * TODO: use swap()/xchg() when these are provided. + */ +static void swap_two_test_pages(vm_paddr_t pte_gva1, vm_paddr_t pte_gva2) +{ + uint64_t tmp = *(uint64_t *)pte_gva1; + + *(uint64_t *)pte_gva1 = *(uint64_t *)pte_gva2; + *(uint64_t *)pte_gva2 = tmp; +} + +/* + * TODO: replace the silly NOP loop with a proper udelay() implementation. + */ +static inline void do_delay(void) +{ + int i; + + for (i = 0; i < 1000000; i++) + asm volatile("nop"); +} + +/* + * Prepare to test: 'disable' workers by setting the expectation to '0', + * clear hypercall input page and then swap two test pages. + */ +static inline void prepare_to_test(struct test_data *data) +{ + /* Clear hypercall input page */ + memset((void *)data->hcall_gva, 0, PAGE_SIZE); + + /* 'Disable' workers */ + set_expected_val((void *)data->test_pages, 0x0, WORKER_VCPU_ID_1); + set_expected_val((void *)data->test_pages, 0x0, WORKER_VCPU_ID_2); + + /* Make sure workers are 'disabled' before we swap PTEs. */ + wmb(); + + /* Make sure workers have enough time to notice */ + do_delay(); + + /* Swap test page mappings */ + swap_two_test_pages(data->test_pages_pte[0], data->test_pages_pte[1]); +} + +/* + * Finalize the test: check hypercall resule set the expected val for + * 'worker' CPUs and give them some time to test. + */ +static inline void post_test(struct test_data *data, u64 exp1, u64 exp2) +{ + /* Make sure we change the expectation after swapping PTEs */ + wmb(); + + /* Set the expectation for workers, '0' means don't test */ + set_expected_val((void *)data->test_pages, exp1, WORKER_VCPU_ID_1); + set_expected_val((void *)data->test_pages, exp2, WORKER_VCPU_ID_2); + + /* Make sure workers have enough time to test */ + do_delay(); +} + +#define TESTVAL1 0x0101010101010101 +#define TESTVAL2 0x0202020202020202 + +/* Main vCPU doing the test */ +static void sender_guest_code(vm_vaddr_t test_data) +{ + struct test_data *data = (struct test_data *)test_data; + struct hv_tlb_flush *flush = (struct hv_tlb_flush *)data->hcall_gva; + struct hv_tlb_flush_ex *flush_ex = (struct hv_tlb_flush_ex *)data->hcall_gva; + vm_paddr_t hcall_gpa = data->hcall_gpa; + int i, stage = 1; + + wrmsr(HV_X64_MSR_GUEST_OS_ID, HYPERV_LINUX_OS_ID); + wrmsr(HV_X64_MSR_HYPERCALL, data->hcall_gpa); + + /* "Slow" hypercalls */ + + GUEST_SYNC(stage++); + + /* HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE for WORKER_VCPU_ID_1 */ + for (i = 0; i < NTRY; i++) { + prepare_to_test(data); + flush->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES; + flush->processor_mask = BIT(WORKER_VCPU_ID_1); + hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE, hcall_gpa, + hcall_gpa + PAGE_SIZE); + post_test(data, i % 2 ? TESTVAL1 : TESTVAL2, 0x0); + } + + GUEST_SYNC(stage++); + + /* HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST for WORKER_VCPU_ID_1 */ + for (i = 0; i < NTRY; i++) { + prepare_to_test(data); + flush->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES; + flush->processor_mask = BIT(WORKER_VCPU_ID_1); + flush->gva_list[0] = (u64)data->test_pages; + hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST | + (1UL << HV_HYPERCALL_REP_COMP_OFFSET), + hcall_gpa, hcall_gpa + PAGE_SIZE); + post_test(data, i % 2 ? TESTVAL1 : TESTVAL2, 0x0); + } + + GUEST_SYNC(stage++); + + /* HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE for HV_FLUSH_ALL_PROCESSORS */ + for (i = 0; i < NTRY; i++) { + prepare_to_test(data); + flush->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES | + HV_FLUSH_ALL_PROCESSORS; + flush->processor_mask = 0; + hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE, hcall_gpa, + hcall_gpa + PAGE_SIZE); + post_test(data, i % 2 ? TESTVAL1 : TESTVAL2, i % 2 ? TESTVAL1 : TESTVAL2); + } + + GUEST_SYNC(stage++); + + /* HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST for HV_FLUSH_ALL_PROCESSORS */ + for (i = 0; i < NTRY; i++) { + prepare_to_test(data); + flush->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES | + HV_FLUSH_ALL_PROCESSORS; + flush->gva_list[0] = (u64)data->test_pages; + hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST | + (1UL << HV_HYPERCALL_REP_COMP_OFFSET), + hcall_gpa, hcall_gpa + PAGE_SIZE); + post_test(data, i % 2 ? TESTVAL1 : TESTVAL2, + i % 2 ? TESTVAL1 : TESTVAL2); + } + + GUEST_SYNC(stage++); + + /* HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX for WORKER_VCPU_ID_2 */ + for (i = 0; i < NTRY; i++) { + prepare_to_test(data); + flush_ex->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES; + flush_ex->hv_vp_set.format = HV_GENERIC_SET_SPARSE_4K; + flush_ex->hv_vp_set.valid_bank_mask = BIT_ULL(WORKER_VCPU_ID_2 / 64); + flush_ex->hv_vp_set.bank_contents[0] = BIT_ULL(WORKER_VCPU_ID_2 % 64); + hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX | + (1 << HV_HYPERCALL_VARHEAD_OFFSET), + hcall_gpa, hcall_gpa + PAGE_SIZE); + post_test(data, 0x0, i % 2 ? TESTVAL1 : TESTVAL2); + } + + GUEST_SYNC(stage++); + + /* HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX for WORKER_VCPU_ID_2 */ + for (i = 0; i < NTRY; i++) { + prepare_to_test(data); + flush_ex->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES; + flush_ex->hv_vp_set.format = HV_GENERIC_SET_SPARSE_4K; + flush_ex->hv_vp_set.valid_bank_mask = BIT_ULL(WORKER_VCPU_ID_2 / 64); + flush_ex->hv_vp_set.bank_contents[0] = BIT_ULL(WORKER_VCPU_ID_2 % 64); + /* bank_contents and gva_list occupy the same space, thus [1] */ + flush_ex->gva_list[1] = (u64)data->test_pages; + hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX | + (1 << HV_HYPERCALL_VARHEAD_OFFSET) | + (1UL << HV_HYPERCALL_REP_COMP_OFFSET), + hcall_gpa, hcall_gpa + PAGE_SIZE); + post_test(data, 0x0, i % 2 ? TESTVAL1 : TESTVAL2); + } + + GUEST_SYNC(stage++); + + /* HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX for both vCPUs */ + for (i = 0; i < NTRY; i++) { + prepare_to_test(data); + flush_ex->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES; + flush_ex->hv_vp_set.format = HV_GENERIC_SET_SPARSE_4K; + flush_ex->hv_vp_set.valid_bank_mask = BIT_ULL(WORKER_VCPU_ID_2 / 64) | + BIT_ULL(WORKER_VCPU_ID_1 / 64); + flush_ex->hv_vp_set.bank_contents[0] = BIT_ULL(WORKER_VCPU_ID_1 % 64); + flush_ex->hv_vp_set.bank_contents[1] = BIT_ULL(WORKER_VCPU_ID_2 % 64); + hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX | + (2 << HV_HYPERCALL_VARHEAD_OFFSET), + hcall_gpa, hcall_gpa + PAGE_SIZE); + post_test(data, i % 2 ? TESTVAL1 : TESTVAL2, + i % 2 ? TESTVAL1 : TESTVAL2); + } + + GUEST_SYNC(stage++); + + /* HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX for both vCPUs */ + for (i = 0; i < NTRY; i++) { + prepare_to_test(data); + flush_ex->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES; + flush_ex->hv_vp_set.format = HV_GENERIC_SET_SPARSE_4K; + flush_ex->hv_vp_set.valid_bank_mask = BIT_ULL(WORKER_VCPU_ID_1 / 64) | + BIT_ULL(WORKER_VCPU_ID_2 / 64); + flush_ex->hv_vp_set.bank_contents[0] = BIT_ULL(WORKER_VCPU_ID_1 % 64); + flush_ex->hv_vp_set.bank_contents[1] = BIT_ULL(WORKER_VCPU_ID_2 % 64); + /* bank_contents and gva_list occupy the same space, thus [2] */ + flush_ex->gva_list[2] = (u64)data->test_pages; + hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX | + (2 << HV_HYPERCALL_VARHEAD_OFFSET) | + (1UL << HV_HYPERCALL_REP_COMP_OFFSET), + hcall_gpa, hcall_gpa + PAGE_SIZE); + post_test(data, i % 2 ? TESTVAL1 : TESTVAL2, + i % 2 ? TESTVAL1 : TESTVAL2); + } + + GUEST_SYNC(stage++); + + /* HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX for HV_GENERIC_SET_ALL */ + for (i = 0; i < NTRY; i++) { + prepare_to_test(data); + flush_ex->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES; + flush_ex->hv_vp_set.format = HV_GENERIC_SET_ALL; + hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX, + hcall_gpa, hcall_gpa + PAGE_SIZE); + post_test(data, i % 2 ? TESTVAL1 : TESTVAL2, + i % 2 ? TESTVAL1 : TESTVAL2); + } + + GUEST_SYNC(stage++); + + /* HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX for HV_GENERIC_SET_ALL */ + for (i = 0; i < NTRY; i++) { + prepare_to_test(data); + flush_ex->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES; + flush_ex->hv_vp_set.format = HV_GENERIC_SET_ALL; + flush_ex->gva_list[0] = (u64)data->test_pages; + hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX | + (1UL << HV_HYPERCALL_REP_COMP_OFFSET), + hcall_gpa, hcall_gpa + PAGE_SIZE); + post_test(data, i % 2 ? TESTVAL1 : TESTVAL2, + i % 2 ? TESTVAL1 : TESTVAL2); + } + + /* "Fast" hypercalls */ + + GUEST_SYNC(stage++); + + /* HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE for WORKER_VCPU_ID_1 */ + for (i = 0; i < NTRY; i++) { + prepare_to_test(data); + flush->processor_mask = BIT(WORKER_VCPU_ID_1); + hyperv_write_xmm_input(&flush->processor_mask, 1); + hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE | + HV_HYPERCALL_FAST_BIT, 0x0, + HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES); + post_test(data, i % 2 ? TESTVAL1 : TESTVAL2, 0x0); + } + + GUEST_SYNC(stage++); + + /* HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST for WORKER_VCPU_ID_1 */ + for (i = 0; i < NTRY; i++) { + prepare_to_test(data); + flush->processor_mask = BIT(WORKER_VCPU_ID_1); + flush->gva_list[0] = (u64)data->test_pages; + hyperv_write_xmm_input(&flush->processor_mask, 1); + hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST | + HV_HYPERCALL_FAST_BIT | + (1UL << HV_HYPERCALL_REP_COMP_OFFSET), + 0x0, HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES); + post_test(data, i % 2 ? TESTVAL1 : TESTVAL2, 0x0); + } + + GUEST_SYNC(stage++); + + /* HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE for HV_FLUSH_ALL_PROCESSORS */ + for (i = 0; i < NTRY; i++) { + prepare_to_test(data); + hyperv_write_xmm_input(&flush->processor_mask, 1); + hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE | + HV_HYPERCALL_FAST_BIT, 0x0, + HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES | + HV_FLUSH_ALL_PROCESSORS); + post_test(data, i % 2 ? TESTVAL1 : TESTVAL2, + i % 2 ? TESTVAL1 : TESTVAL2); + } + + GUEST_SYNC(stage++); + + /* HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST for HV_FLUSH_ALL_PROCESSORS */ + for (i = 0; i < NTRY; i++) { + prepare_to_test(data); + flush->gva_list[0] = (u64)data->test_pages; + hyperv_write_xmm_input(&flush->processor_mask, 1); + hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST | + HV_HYPERCALL_FAST_BIT | + (1UL << HV_HYPERCALL_REP_COMP_OFFSET), 0x0, + HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES | + HV_FLUSH_ALL_PROCESSORS); + post_test(data, i % 2 ? TESTVAL1 : TESTVAL2, + i % 2 ? TESTVAL1 : TESTVAL2); + } + + GUEST_SYNC(stage++); + + /* HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX for WORKER_VCPU_ID_2 */ + for (i = 0; i < NTRY; i++) { + prepare_to_test(data); + flush_ex->hv_vp_set.format = HV_GENERIC_SET_SPARSE_4K; + flush_ex->hv_vp_set.valid_bank_mask = BIT_ULL(WORKER_VCPU_ID_2 / 64); + flush_ex->hv_vp_set.bank_contents[0] = BIT_ULL(WORKER_VCPU_ID_2 % 64); + hyperv_write_xmm_input(&flush_ex->hv_vp_set, 2); + hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX | + HV_HYPERCALL_FAST_BIT | + (1 << HV_HYPERCALL_VARHEAD_OFFSET), + 0x0, HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES); + post_test(data, 0x0, i % 2 ? TESTVAL1 : TESTVAL2); + } + + GUEST_SYNC(stage++); + + /* HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX for WORKER_VCPU_ID_2 */ + for (i = 0; i < NTRY; i++) { + prepare_to_test(data); + flush_ex->hv_vp_set.format = HV_GENERIC_SET_SPARSE_4K; + flush_ex->hv_vp_set.valid_bank_mask = BIT_ULL(WORKER_VCPU_ID_2 / 64); + flush_ex->hv_vp_set.bank_contents[0] = BIT_ULL(WORKER_VCPU_ID_2 % 64); + /* bank_contents and gva_list occupy the same space, thus [1] */ + flush_ex->gva_list[1] = (u64)data->test_pages; + hyperv_write_xmm_input(&flush_ex->hv_vp_set, 2); + hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX | + HV_HYPERCALL_FAST_BIT | + (1 << HV_HYPERCALL_VARHEAD_OFFSET) | + (1UL << HV_HYPERCALL_REP_COMP_OFFSET), + 0x0, HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES); + post_test(data, 0x0, i % 2 ? TESTVAL1 : TESTVAL2); + } + + GUEST_SYNC(stage++); + + /* HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX for both vCPUs */ + for (i = 0; i < NTRY; i++) { + prepare_to_test(data); + flush_ex->hv_vp_set.format = HV_GENERIC_SET_SPARSE_4K; + flush_ex->hv_vp_set.valid_bank_mask = BIT_ULL(WORKER_VCPU_ID_2 / 64) | + BIT_ULL(WORKER_VCPU_ID_1 / 64); + flush_ex->hv_vp_set.bank_contents[0] = BIT_ULL(WORKER_VCPU_ID_1 % 64); + flush_ex->hv_vp_set.bank_contents[1] = BIT_ULL(WORKER_VCPU_ID_2 % 64); + hyperv_write_xmm_input(&flush_ex->hv_vp_set, 2); + hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX | + HV_HYPERCALL_FAST_BIT | + (2 << HV_HYPERCALL_VARHEAD_OFFSET), + 0x0, HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES); + post_test(data, i % 2 ? TESTVAL1 : + TESTVAL2, i % 2 ? TESTVAL1 : TESTVAL2); + } + + GUEST_SYNC(stage++); + + /* HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX for both vCPUs */ + for (i = 0; i < NTRY; i++) { + prepare_to_test(data); + flush_ex->hv_vp_set.format = HV_GENERIC_SET_SPARSE_4K; + flush_ex->hv_vp_set.valid_bank_mask = BIT_ULL(WORKER_VCPU_ID_1 / 64) | + BIT_ULL(WORKER_VCPU_ID_2 / 64); + flush_ex->hv_vp_set.bank_contents[0] = BIT_ULL(WORKER_VCPU_ID_1 % 64); + flush_ex->hv_vp_set.bank_contents[1] = BIT_ULL(WORKER_VCPU_ID_2 % 64); + /* bank_contents and gva_list occupy the same space, thus [2] */ + flush_ex->gva_list[2] = (u64)data->test_pages; + hyperv_write_xmm_input(&flush_ex->hv_vp_set, 3); + hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX | + HV_HYPERCALL_FAST_BIT | + (2 << HV_HYPERCALL_VARHEAD_OFFSET) | + (1UL << HV_HYPERCALL_REP_COMP_OFFSET), + 0x0, HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES); + post_test(data, i % 2 ? TESTVAL1 : TESTVAL2, + i % 2 ? TESTVAL1 : TESTVAL2); + } + + GUEST_SYNC(stage++); + + /* HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX for HV_GENERIC_SET_ALL */ + for (i = 0; i < NTRY; i++) { + prepare_to_test(data); + flush_ex->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES; + flush_ex->hv_vp_set.format = HV_GENERIC_SET_ALL; + hyperv_write_xmm_input(&flush_ex->hv_vp_set, 2); + hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX | + HV_HYPERCALL_FAST_BIT, + 0x0, HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES); + post_test(data, i % 2 ? TESTVAL1 : TESTVAL2, + i % 2 ? TESTVAL1 : TESTVAL2); + } + + GUEST_SYNC(stage++); + + /* HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX for HV_GENERIC_SET_ALL */ + for (i = 0; i < NTRY; i++) { + prepare_to_test(data); + flush_ex->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES; + flush_ex->hv_vp_set.format = HV_GENERIC_SET_ALL; + flush_ex->gva_list[0] = (u64)data->test_pages; + hyperv_write_xmm_input(&flush_ex->hv_vp_set, 2); + hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX | + HV_HYPERCALL_FAST_BIT | + (1UL << HV_HYPERCALL_REP_COMP_OFFSET), + 0x0, HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES); + post_test(data, i % 2 ? TESTVAL1 : TESTVAL2, + i % 2 ? TESTVAL1 : TESTVAL2); + } + + GUEST_DONE(); +} + +static void *vcpu_thread(void *arg) +{ + struct kvm_vcpu *vcpu = (struct kvm_vcpu *)arg; + struct ucall uc; + int old; + int r; + unsigned int exit_reason; + + r = pthread_setcanceltype(PTHREAD_CANCEL_ASYNCHRONOUS, &old); + TEST_ASSERT(!r, "pthread_setcanceltype failed on vcpu_id=%u with errno=%d", + vcpu->id, r); + + vcpu_run(vcpu); + exit_reason = vcpu->run->exit_reason; + + TEST_ASSERT(exit_reason == KVM_EXIT_IO, + "vCPU %u exited with unexpected exit reason %u-%s, expected KVM_EXIT_IO", + vcpu->id, exit_reason, exit_reason_str(exit_reason)); + + switch (get_ucall(vcpu, &uc)) { + case UCALL_ABORT: + REPORT_GUEST_ASSERT(uc); + /* NOT REACHED */ + default: + TEST_FAIL("Unexpected ucall %lu, vCPU %d", uc.cmd, vcpu->id); + } + + return NULL; +} + +static void cancel_join_vcpu_thread(pthread_t thread, struct kvm_vcpu *vcpu) +{ + void *retval; + int r; + + r = pthread_cancel(thread); + TEST_ASSERT(!r, "pthread_cancel on vcpu_id=%d failed with errno=%d", + vcpu->id, r); + + r = pthread_join(thread, &retval); + TEST_ASSERT(!r, "pthread_join on vcpu_id=%d failed with errno=%d", + vcpu->id, r); + TEST_ASSERT(retval == PTHREAD_CANCELED, + "expected retval=%p, got %p", PTHREAD_CANCELED, + retval); +} + +int main(int argc, char *argv[]) +{ + struct kvm_vm *vm; + struct kvm_vcpu *vcpu[3]; + unsigned int exit_reason; + pthread_t threads[2]; + vm_vaddr_t test_data_page, gva; + vm_paddr_t gpa; + uint64_t *pte; + struct test_data *data; + struct ucall uc; + int stage = 1, r, i; + + vm = vm_create_with_one_vcpu(&vcpu[0], sender_guest_code); + + /* Test data page */ + test_data_page = vm_vaddr_alloc_page(vm); + data = (struct test_data *)addr_gva2hva(vm, test_data_page); + + /* Hypercall input/output */ + data->hcall_gva = vm_vaddr_alloc_pages(vm, 2); + data->hcall_gpa = addr_gva2gpa(vm, data->hcall_gva); + memset(addr_gva2hva(vm, data->hcall_gva), 0x0, 2 * PAGE_SIZE); + + /* + * Test pages: the first one is filled with '0x01's, the second with '0x02's + * and the test will swap their mappings. The third page keeps the indication + * about the current state of mappings. + */ + data->test_pages = vm_vaddr_alloc_pages(vm, NTEST_PAGES + 1); + for (i = 0; i < NTEST_PAGES; i++) + memset(addr_gva2hva(vm, data->test_pages + PAGE_SIZE * i), + (u8)(i + 1), PAGE_SIZE); + set_expected_val(addr_gva2hva(vm, data->test_pages), 0x0, WORKER_VCPU_ID_1); + set_expected_val(addr_gva2hva(vm, data->test_pages), 0x0, WORKER_VCPU_ID_2); + + /* + * Get PTE pointers for test pages and map them inside the guest. + * Use separate page for each PTE for simplicity. + */ + gva = vm_vaddr_unused_gap(vm, NTEST_PAGES * PAGE_SIZE, KVM_UTIL_MIN_VADDR); + for (i = 0; i < NTEST_PAGES; i++) { + pte = vm_get_page_table_entry(vm, data->test_pages + i * PAGE_SIZE); + gpa = addr_hva2gpa(vm, pte); + __virt_pg_map(vm, gva + PAGE_SIZE * i, gpa & PAGE_MASK, PG_LEVEL_4K); + data->test_pages_pte[i] = gva + (gpa & ~PAGE_MASK); + } + + /* + * Sender vCPU which performs the test: swaps test pages, sets expectation + * for 'workers' and issues TLB flush hypercalls. + */ + vcpu_args_set(vcpu[0], 1, test_data_page); + vcpu_set_hv_cpuid(vcpu[0]); + + /* Create worker vCPUs which check the contents of the test pages */ + vcpu[1] = vm_vcpu_add(vm, WORKER_VCPU_ID_1, worker_guest_code); + vcpu_args_set(vcpu[1], 1, test_data_page); + vcpu_set_msr(vcpu[1], HV_X64_MSR_VP_INDEX, WORKER_VCPU_ID_1); + vcpu_set_hv_cpuid(vcpu[1]); + + vcpu[2] = vm_vcpu_add(vm, WORKER_VCPU_ID_2, worker_guest_code); + vcpu_args_set(vcpu[2], 1, test_data_page); + vcpu_set_msr(vcpu[2], HV_X64_MSR_VP_INDEX, WORKER_VCPU_ID_2); + vcpu_set_hv_cpuid(vcpu[2]); + + r = pthread_create(&threads[0], NULL, vcpu_thread, vcpu[1]); + TEST_ASSERT(!r, "pthread_create() failed"); + + r = pthread_create(&threads[1], NULL, vcpu_thread, vcpu[2]); + TEST_ASSERT(!r, "pthread_create() failed"); + + while (true) { + vcpu_run(vcpu[0]); + exit_reason = vcpu[0]->run->exit_reason; + + TEST_ASSERT(exit_reason == KVM_EXIT_IO, + "unexpected exit reason: %u (%s)", + exit_reason, exit_reason_str(exit_reason)); + + switch (get_ucall(vcpu[0], &uc)) { + case UCALL_SYNC: + TEST_ASSERT(uc.args[1] == stage, + "Unexpected stage: %ld (%d expected)\n", + uc.args[1], stage); + break; + case UCALL_ABORT: + REPORT_GUEST_ASSERT(uc); + /* NOT REACHED */ + case UCALL_DONE: + goto done; + default: + TEST_FAIL("Unknown ucall %lu", uc.cmd); + } + + stage++; + } + +done: + cancel_join_vcpu_thread(threads[0], vcpu[1]); + cancel_join_vcpu_thread(threads[1], vcpu[2]); + kvm_vm_free(vm); + + return 0; +} diff --git a/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.c b/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.c index 59ffe7fd354f..251794f83719 100644 --- a/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.c +++ b/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.c @@ -241,10 +241,10 @@ int main(int argc, char **argv) while ((opt = getopt(argc, argv, "hp:t:r")) != -1) { switch (opt) { case 'p': - reclaim_period_ms = atoi(optarg); + reclaim_period_ms = atoi_positive("Reclaim period", optarg); break; case 't': - token = atoi(optarg); + token = atoi_paranoid(optarg); break; case 'r': reboot_permissions = true; @@ -257,7 +257,6 @@ int main(int argc, char **argv) } TEST_REQUIRE(kvm_has_cap(KVM_CAP_VM_DISABLE_NX_HUGE_PAGES)); - TEST_REQUIRE(reclaim_period_ms > 0); __TEST_REQUIRE(token == MAGIC_TOKEN, "This test must be run with the magic token %d.\n" diff --git a/tools/testing/selftests/kvm/x86_64/platform_info_test.c b/tools/testing/selftests/kvm/x86_64/platform_info_test.c index 76417c7d687b..310a104d94f0 100644 --- a/tools/testing/selftests/kvm/x86_64/platform_info_test.c +++ b/tools/testing/selftests/kvm/x86_64/platform_info_test.c @@ -72,9 +72,6 @@ int main(int argc, char *argv[]) struct kvm_vm *vm; uint64_t msr_platform_info; - /* Tell stdout not to buffer its content */ - setbuf(stdout, NULL); - TEST_REQUIRE(kvm_has_cap(KVM_CAP_MSR_PLATFORM_INFO)); vm = vm_create_with_one_vcpu(&vcpu, guest_code); diff --git a/tools/testing/selftests/kvm/x86_64/pmu_event_filter_test.c b/tools/testing/selftests/kvm/x86_64/pmu_event_filter_test.c index ea4e259a1e2e..2de98fce7edd 100644 --- a/tools/testing/selftests/kvm/x86_64/pmu_event_filter_test.c +++ b/tools/testing/selftests/kvm/x86_64/pmu_event_filter_test.c @@ -21,29 +21,6 @@ #define ARCH_PERFMON_EVENTSEL_OS (1ULL << 17) #define ARCH_PERFMON_EVENTSEL_ENABLE (1ULL << 22) -union cpuid10_eax { - struct { - unsigned int version_id:8; - unsigned int num_counters:8; - unsigned int bit_width:8; - unsigned int mask_length:8; - } split; - unsigned int full; -}; - -union cpuid10_ebx { - struct { - unsigned int no_unhalted_core_cycles:1; - unsigned int no_instructions_retired:1; - unsigned int no_unhalted_reference_cycles:1; - unsigned int no_llc_reference:1; - unsigned int no_llc_misses:1; - unsigned int no_branch_instruction_retired:1; - unsigned int no_branch_misses_retired:1; - } split; - unsigned int full; -}; - /* End of stuff taken from perf_event.h. */ /* Oddly, this isn't in perf_event.h. */ @@ -380,46 +357,31 @@ static void test_pmu_config_disable(void (*guest_code)(void)) } /* - * Check for a non-zero PMU version, at least one general-purpose - * counter per logical processor, an EBX bit vector of length greater - * than 5, and EBX[5] clear. - */ -static bool check_intel_pmu_leaf(const struct kvm_cpuid_entry2 *entry) -{ - union cpuid10_eax eax = { .full = entry->eax }; - union cpuid10_ebx ebx = { .full = entry->ebx }; - - return eax.split.version_id && eax.split.num_counters > 0 && - eax.split.mask_length > ARCH_PERFMON_BRANCHES_RETIRED && - !ebx.split.no_branch_instruction_retired; -} - -/* - * Note that CPUID leaf 0xa is Intel-specific. This leaf should be - * clear on AMD hardware. + * On Intel, check for a non-zero PMU version, at least one general-purpose + * counter per logical processor, and support for counting the number of branch + * instructions retired. */ static bool use_intel_pmu(void) { - const struct kvm_cpuid_entry2 *entry; - - entry = kvm_get_supported_cpuid_entry(0xa); - return is_intel_cpu() && check_intel_pmu_leaf(entry); + return is_intel_cpu() && + kvm_cpu_property(X86_PROPERTY_PMU_VERSION) && + kvm_cpu_property(X86_PROPERTY_PMU_NR_GP_COUNTERS) && + kvm_pmu_has(X86_PMU_FEATURE_BRANCH_INSNS_RETIRED); } -static bool is_zen1(uint32_t eax) +static bool is_zen1(uint32_t family, uint32_t model) { - return x86_family(eax) == 0x17 && x86_model(eax) <= 0x0f; + return family == 0x17 && model <= 0x0f; } -static bool is_zen2(uint32_t eax) +static bool is_zen2(uint32_t family, uint32_t model) { - return x86_family(eax) == 0x17 && - x86_model(eax) >= 0x30 && x86_model(eax) <= 0x3f; + return family == 0x17 && model >= 0x30 && model <= 0x3f; } -static bool is_zen3(uint32_t eax) +static bool is_zen3(uint32_t family, uint32_t model) { - return x86_family(eax) == 0x19 && x86_model(eax) <= 0x0f; + return family == 0x19 && model <= 0x0f; } /* @@ -432,13 +394,13 @@ static bool is_zen3(uint32_t eax) */ static bool use_amd_pmu(void) { - const struct kvm_cpuid_entry2 *entry; + uint32_t family = kvm_cpu_family(); + uint32_t model = kvm_cpu_model(); - entry = kvm_get_supported_cpuid_entry(1); return is_amd_cpu() && - (is_zen1(entry->eax) || - is_zen2(entry->eax) || - is_zen3(entry->eax)); + (is_zen1(family, model) || + is_zen2(family, model) || + is_zen3(family, model)); } int main(int argc, char *argv[]) @@ -447,9 +409,6 @@ int main(int argc, char *argv[]) struct kvm_vcpu *vcpu; struct kvm_vm *vm; - /* Tell stdout not to buffer its content */ - setbuf(stdout, NULL); - TEST_REQUIRE(kvm_has_cap(KVM_CAP_PMU_EVENT_FILTER)); TEST_REQUIRE(use_intel_pmu() || use_amd_pmu()); diff --git a/tools/testing/selftests/kvm/x86_64/set_sregs_test.c b/tools/testing/selftests/kvm/x86_64/set_sregs_test.c index 2bb08bf2125d..a284fcef6ed7 100644 --- a/tools/testing/selftests/kvm/x86_64/set_sregs_test.c +++ b/tools/testing/selftests/kvm/x86_64/set_sregs_test.c @@ -82,9 +82,6 @@ int main(int argc, char *argv[]) uint64_t cr4; int rc; - /* Tell stdout not to buffer its content */ - setbuf(stdout, NULL); - /* * Create a dummy VM, specifically to avoid doing KVM_SET_CPUID2, and * use it to verify all supported CR4 bits can be set prior to defining diff --git a/tools/testing/selftests/kvm/x86_64/smaller_maxphyaddr_emulation_test.c b/tools/testing/selftests/kvm/x86_64/smaller_maxphyaddr_emulation_test.c new file mode 100644 index 000000000000..06edf00a97d6 --- /dev/null +++ b/tools/testing/selftests/kvm/x86_64/smaller_maxphyaddr_emulation_test.c @@ -0,0 +1,111 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2020, Google LLC. + * + * Test that KVM emulates instructions in response to EPT violations when + * allow_smaller_maxphyaddr is enabled and guest.MAXPHYADDR < host.MAXPHYADDR. + */ + +#define _GNU_SOURCE /* for program_invocation_short_name */ + +#include "flds_emulation.h" + +#include "test_util.h" +#include "kvm_util.h" +#include "vmx.h" + +#define MAXPHYADDR 36 + +#define MEM_REGION_GVA 0x0000123456789000 +#define MEM_REGION_GPA 0x0000000700000000 +#define MEM_REGION_SLOT 10 +#define MEM_REGION_SIZE PAGE_SIZE + +static void guest_code(bool tdp_enabled) +{ + uint64_t error_code; + uint64_t vector; + + vector = kvm_asm_safe_ec(FLDS_MEM_EAX, error_code, "a"(MEM_REGION_GVA)); + + /* + * When TDP is enabled, flds will trigger an emulation failure, exit to + * userspace, and then the selftest host "VMM" skips the instruction. + * + * When TDP is disabled, no instruction emulation is required so flds + * should generate #PF(RSVD). + */ + if (tdp_enabled) { + GUEST_ASSERT(!vector); + } else { + GUEST_ASSERT_EQ(vector, PF_VECTOR); + GUEST_ASSERT(error_code & PFERR_RSVD_MASK); + } + + GUEST_DONE(); +} + +int main(int argc, char *argv[]) +{ + struct kvm_vcpu *vcpu; + struct kvm_vm *vm; + struct ucall uc; + uint64_t *pte; + uint64_t *hva; + uint64_t gpa; + int rc; + + TEST_REQUIRE(kvm_has_cap(KVM_CAP_SMALLER_MAXPHYADDR)); + + vm = vm_create_with_one_vcpu(&vcpu, guest_code); + vcpu_args_set(vcpu, 1, kvm_is_tdp_enabled()); + + vm_init_descriptor_tables(vm); + vcpu_init_descriptor_tables(vcpu); + + vcpu_set_cpuid_maxphyaddr(vcpu, MAXPHYADDR); + + rc = kvm_check_cap(KVM_CAP_EXIT_ON_EMULATION_FAILURE); + TEST_ASSERT(rc, "KVM_CAP_EXIT_ON_EMULATION_FAILURE is unavailable"); + vm_enable_cap(vm, KVM_CAP_EXIT_ON_EMULATION_FAILURE, 1); + + vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, + MEM_REGION_GPA, MEM_REGION_SLOT, + MEM_REGION_SIZE / PAGE_SIZE, 0); + gpa = vm_phy_pages_alloc(vm, MEM_REGION_SIZE / PAGE_SIZE, + MEM_REGION_GPA, MEM_REGION_SLOT); + TEST_ASSERT(gpa == MEM_REGION_GPA, "Failed vm_phy_pages_alloc\n"); + virt_map(vm, MEM_REGION_GVA, MEM_REGION_GPA, 1); + hva = addr_gpa2hva(vm, MEM_REGION_GPA); + memset(hva, 0, PAGE_SIZE); + + pte = vm_get_page_table_entry(vm, MEM_REGION_GVA); + *pte |= BIT_ULL(MAXPHYADDR); + + vcpu_run(vcpu); + + /* + * When TDP is enabled, KVM must emulate in response the guest physical + * address that is illegal from the guest's perspective, but is legal + * from hardware's perspeective. This should result in an emulation + * failure exit to userspace since KVM doesn't support emulating flds. + */ + if (kvm_is_tdp_enabled()) { + handle_flds_emulation_failure_exit(vcpu); + vcpu_run(vcpu); + } + + switch (get_ucall(vcpu, &uc)) { + case UCALL_ABORT: + REPORT_GUEST_ASSERT(uc); + break; + case UCALL_DONE: + break; + default: + TEST_FAIL("Unrecognized ucall: %lu\n", uc.cmd); + } + + kvm_vm_free(vm); + + return 0; +} diff --git a/tools/testing/selftests/kvm/x86_64/smm_test.c b/tools/testing/selftests/kvm/x86_64/smm_test.c index 1f136a81858e..cb38a478e1f6 100644 --- a/tools/testing/selftests/kvm/x86_64/smm_test.c +++ b/tools/testing/selftests/kvm/x86_64/smm_test.c @@ -137,6 +137,8 @@ int main(int argc, char *argv[]) struct kvm_x86_state *state; int stage, stage_reported; + TEST_REQUIRE(kvm_has_cap(KVM_CAP_X86_SMM)); + /* Create VM */ vm = vm_create_with_one_vcpu(&vcpu, guest_code); diff --git a/tools/testing/selftests/kvm/x86_64/svm_nested_soft_inject_test.c b/tools/testing/selftests/kvm/x86_64/svm_nested_soft_inject_test.c index e637d7736012..b34980d45648 100644 --- a/tools/testing/selftests/kvm/x86_64/svm_nested_soft_inject_test.c +++ b/tools/testing/selftests/kvm/x86_64/svm_nested_soft_inject_test.c @@ -41,8 +41,17 @@ static void guest_int_handler(struct ex_regs *regs) static void l2_guest_code_int(void) { GUEST_ASSERT_1(int_fired == 1, int_fired); - vmmcall(); - ud2(); + + /* + * Same as the vmmcall() function, but with a ud2 sneaked after the + * vmmcall. The caller injects an exception with the return address + * increased by 2, so the "pop rbp" must be after the ud2 and we cannot + * use vmmcall() directly. + */ + __asm__ __volatile__("push %%rbp; vmmcall; ud2; pop %%rbp" + : : "a"(0xdeadbeef), "c"(0xbeefdead) + : "rbx", "rdx", "rsi", "rdi", "r8", "r9", + "r10", "r11", "r12", "r13", "r14", "r15"); GUEST_ASSERT_1(bp_fired == 1, bp_fired); hlt(); @@ -194,9 +203,6 @@ done: int main(int argc, char *argv[]) { - /* Tell stdout not to buffer its content */ - setbuf(stdout, NULL); - TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_SVM)); TEST_ASSERT(kvm_cpu_has(X86_FEATURE_NRIPS), diff --git a/tools/testing/selftests/kvm/x86_64/sync_regs_test.c b/tools/testing/selftests/kvm/x86_64/sync_regs_test.c index 9b6db0b0b13e..d2f9b5bdfab2 100644 --- a/tools/testing/selftests/kvm/x86_64/sync_regs_test.c +++ b/tools/testing/selftests/kvm/x86_64/sync_regs_test.c @@ -90,9 +90,6 @@ int main(int argc, char *argv[]) struct kvm_vcpu_events events; int rv, cap; - /* Tell stdout not to buffer its content */ - setbuf(stdout, NULL); - cap = kvm_check_cap(KVM_CAP_SYNC_REGS); TEST_REQUIRE((cap & TEST_SYNC_FIELDS) == TEST_SYNC_FIELDS); TEST_REQUIRE(!(cap & INVALID_SYNC_FIELD)); diff --git a/tools/testing/selftests/kvm/x86_64/userspace_io_test.c b/tools/testing/selftests/kvm/x86_64/userspace_io_test.c index 7316521428f8..91076c9787b4 100644 --- a/tools/testing/selftests/kvm/x86_64/userspace_io_test.c +++ b/tools/testing/selftests/kvm/x86_64/userspace_io_test.c @@ -56,9 +56,6 @@ int main(int argc, char *argv[]) struct kvm_vm *vm; struct ucall uc; - /* Tell stdout not to buffer its content */ - setbuf(stdout, NULL); - vm = vm_create_with_one_vcpu(&vcpu, guest_code); run = vcpu->run; diff --git a/tools/testing/selftests/kvm/x86_64/userspace_msr_exit_test.c b/tools/testing/selftests/kvm/x86_64/userspace_msr_exit_test.c index a4f06370a245..25fa55344a10 100644 --- a/tools/testing/selftests/kvm/x86_64/userspace_msr_exit_test.c +++ b/tools/testing/selftests/kvm/x86_64/userspace_msr_exit_test.c @@ -733,16 +733,98 @@ static void test_msr_permission_bitmap(void) kvm_vm_free(vm); } -int main(int argc, char *argv[]) +#define test_user_exit_msr_ioctl(vm, cmd, arg, flag, valid_mask) \ +({ \ + int r = __vm_ioctl(vm, cmd, arg); \ + \ + if (flag & valid_mask) \ + TEST_ASSERT(!r, __KVM_IOCTL_ERROR(#cmd, r)); \ + else \ + TEST_ASSERT(r == -1 && errno == EINVAL, \ + "Wanted EINVAL for %s with flag = 0x%llx, got rc: %i errno: %i (%s)", \ + #cmd, flag, r, errno, strerror(errno)); \ +}) + +static void run_user_space_msr_flag_test(struct kvm_vm *vm) { - /* Tell stdout not to buffer its content */ - setbuf(stdout, NULL); + struct kvm_enable_cap cap = { .cap = KVM_CAP_X86_USER_SPACE_MSR }; + int nflags = sizeof(cap.args[0]) * BITS_PER_BYTE; + int rc; + int i; + + rc = kvm_check_cap(KVM_CAP_X86_USER_SPACE_MSR); + TEST_ASSERT(rc, "KVM_CAP_X86_USER_SPACE_MSR is available"); + + for (i = 0; i < nflags; i++) { + cap.args[0] = BIT_ULL(i); + test_user_exit_msr_ioctl(vm, KVM_ENABLE_CAP, &cap, + BIT_ULL(i), KVM_MSR_EXIT_REASON_VALID_MASK); + } +} + +static void run_msr_filter_flag_test(struct kvm_vm *vm) +{ + u64 deny_bits = 0; + struct kvm_msr_filter filter = { + .flags = KVM_MSR_FILTER_DEFAULT_ALLOW, + .ranges = { + { + .flags = KVM_MSR_FILTER_READ, + .nmsrs = 1, + .base = 0, + .bitmap = (uint8_t *)&deny_bits, + }, + }, + }; + int nflags; + int rc; + int i; + + rc = kvm_check_cap(KVM_CAP_X86_MSR_FILTER); + TEST_ASSERT(rc, "KVM_CAP_X86_MSR_FILTER is available"); + + nflags = sizeof(filter.flags) * BITS_PER_BYTE; + for (i = 0; i < nflags; i++) { + filter.flags = BIT_ULL(i); + test_user_exit_msr_ioctl(vm, KVM_X86_SET_MSR_FILTER, &filter, + BIT_ULL(i), KVM_MSR_FILTER_VALID_MASK); + } + filter.flags = KVM_MSR_FILTER_DEFAULT_ALLOW; + nflags = sizeof(filter.ranges[0].flags) * BITS_PER_BYTE; + for (i = 0; i < nflags; i++) { + filter.ranges[0].flags = BIT_ULL(i); + test_user_exit_msr_ioctl(vm, KVM_X86_SET_MSR_FILTER, &filter, + BIT_ULL(i), KVM_MSR_FILTER_RANGE_VALID_MASK); + } +} + +/* Test that attempts to write to the unused bits in a flag fails. */ +static void test_user_exit_msr_flags(void) +{ + struct kvm_vcpu *vcpu; + struct kvm_vm *vm; + + vm = vm_create_with_one_vcpu(&vcpu, NULL); + + /* Test flags for KVM_CAP_X86_USER_SPACE_MSR. */ + run_user_space_msr_flag_test(vm); + + /* Test flags and range flags for KVM_X86_SET_MSR_FILTER. */ + run_msr_filter_flag_test(vm); + + kvm_vm_free(vm); +} + +int main(int argc, char *argv[]) +{ test_msr_filter_allow(); test_msr_filter_deny(); test_msr_permission_bitmap(); + test_user_exit_msr_flags(); + return 0; } diff --git a/tools/testing/selftests/kvm/x86_64/vmx_dirty_log_test.c b/tools/testing/selftests/kvm/x86_64/vmx_dirty_log_test.c index 2d8c23d639f7..f0456fb031b1 100644 --- a/tools/testing/selftests/kvm/x86_64/vmx_dirty_log_test.c +++ b/tools/testing/selftests/kvm/x86_64/vmx_dirty_log_test.c @@ -78,6 +78,7 @@ int main(int argc, char *argv[]) bool done = false; TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX)); + TEST_REQUIRE(kvm_cpu_has_ept()); /* Create VM */ vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code); diff --git a/tools/testing/selftests/kvm/x86_64/vmx_msrs_test.c b/tools/testing/selftests/kvm/x86_64/vmx_msrs_test.c index 322d561b4260..90720b6205f4 100644 --- a/tools/testing/selftests/kvm/x86_64/vmx_msrs_test.c +++ b/tools/testing/selftests/kvm/x86_64/vmx_msrs_test.c @@ -67,6 +67,52 @@ static void vmx_save_restore_msrs_test(struct kvm_vcpu *vcpu) vmx_fixed1_msr_test(vcpu, MSR_IA32_VMX_VMFUNC, -1ull); } +static void __ia32_feature_control_msr_test(struct kvm_vcpu *vcpu, + uint64_t msr_bit, + struct kvm_x86_cpu_feature feature) +{ + uint64_t val; + + vcpu_clear_cpuid_feature(vcpu, feature); + + val = vcpu_get_msr(vcpu, MSR_IA32_FEAT_CTL); + vcpu_set_msr(vcpu, MSR_IA32_FEAT_CTL, val | msr_bit | FEAT_CTL_LOCKED); + vcpu_set_msr(vcpu, MSR_IA32_FEAT_CTL, (val & ~msr_bit) | FEAT_CTL_LOCKED); + vcpu_set_msr(vcpu, MSR_IA32_FEAT_CTL, val | msr_bit | FEAT_CTL_LOCKED); + vcpu_set_msr(vcpu, MSR_IA32_FEAT_CTL, (val & ~msr_bit) | FEAT_CTL_LOCKED); + vcpu_set_msr(vcpu, MSR_IA32_FEAT_CTL, val); + + if (!kvm_cpu_has(feature)) + return; + + vcpu_set_cpuid_feature(vcpu, feature); +} + +static void ia32_feature_control_msr_test(struct kvm_vcpu *vcpu) +{ + uint64_t supported_bits = FEAT_CTL_LOCKED | + FEAT_CTL_VMX_ENABLED_INSIDE_SMX | + FEAT_CTL_VMX_ENABLED_OUTSIDE_SMX | + FEAT_CTL_SGX_LC_ENABLED | + FEAT_CTL_SGX_ENABLED | + FEAT_CTL_LMCE_ENABLED; + int bit, r; + + __ia32_feature_control_msr_test(vcpu, FEAT_CTL_VMX_ENABLED_INSIDE_SMX, X86_FEATURE_SMX); + __ia32_feature_control_msr_test(vcpu, FEAT_CTL_VMX_ENABLED_INSIDE_SMX, X86_FEATURE_VMX); + __ia32_feature_control_msr_test(vcpu, FEAT_CTL_VMX_ENABLED_OUTSIDE_SMX, X86_FEATURE_VMX); + __ia32_feature_control_msr_test(vcpu, FEAT_CTL_SGX_LC_ENABLED, X86_FEATURE_SGX_LC); + __ia32_feature_control_msr_test(vcpu, FEAT_CTL_SGX_LC_ENABLED, X86_FEATURE_SGX); + __ia32_feature_control_msr_test(vcpu, FEAT_CTL_SGX_ENABLED, X86_FEATURE_SGX); + __ia32_feature_control_msr_test(vcpu, FEAT_CTL_LMCE_ENABLED, X86_FEATURE_MCE); + + for_each_clear_bit(bit, &supported_bits, 64) { + r = _vcpu_set_msr(vcpu, MSR_IA32_FEAT_CTL, BIT(bit)); + TEST_ASSERT(r == 0, + "Setting reserved bit %d in IA32_FEATURE_CONTROL should fail", bit); + } +} + int main(void) { struct kvm_vcpu *vcpu; @@ -79,6 +125,7 @@ int main(void) vm = vm_create_with_one_vcpu(&vcpu, NULL); vmx_save_restore_msrs_test(vcpu); + ia32_feature_control_msr_test(vcpu); kvm_vm_free(vm); } diff --git a/tools/testing/selftests/kvm/x86_64/vmx_pmu_caps_test.c b/tools/testing/selftests/kvm/x86_64/vmx_pmu_caps_test.c index 069589c52f41..c280ba1e6572 100644 --- a/tools/testing/selftests/kvm/x86_64/vmx_pmu_caps_test.c +++ b/tools/testing/selftests/kvm/x86_64/vmx_pmu_caps_test.c @@ -20,16 +20,6 @@ #define PMU_CAP_FW_WRITES (1ULL << 13) #define PMU_CAP_LBR_FMT 0x3f -union cpuid10_eax { - struct { - unsigned int version_id:8; - unsigned int num_counters:8; - unsigned int bit_width:8; - unsigned int mask_length:8; - } split; - unsigned int full; -}; - union perf_capabilities { struct { u64 lbr_format:6; @@ -53,11 +43,9 @@ static void guest_code(void) int main(int argc, char *argv[]) { - const struct kvm_cpuid_entry2 *entry_a_0; struct kvm_vm *vm; struct kvm_vcpu *vcpu; int ret; - union cpuid10_eax eax; union perf_capabilities host_cap; uint64_t val; @@ -69,11 +57,8 @@ int main(int argc, char *argv[]) TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_PDCM)); - TEST_REQUIRE(kvm_get_cpuid_max_basic() >= 0xa); - entry_a_0 = kvm_get_supported_cpuid_entry(0xa); - - eax.full = entry_a_0->eax; - __TEST_REQUIRE(eax.split.version_id, "PMU is not supported by the vCPU"); + TEST_REQUIRE(kvm_cpu_has_p(X86_PROPERTY_PMU_VERSION)); + TEST_REQUIRE(kvm_cpu_property(X86_PROPERTY_PMU_VERSION) > 0); /* testcase 1, set capabilities when we have PDCM bit */ vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, PMU_CAP_FW_WRITES); diff --git a/tools/testing/selftests/kvm/x86_64/vmx_tsc_adjust_test.c b/tools/testing/selftests/kvm/x86_64/vmx_tsc_adjust_test.c index 5943187e8594..ff8ecdf32ae0 100644 --- a/tools/testing/selftests/kvm/x86_64/vmx_tsc_adjust_test.c +++ b/tools/testing/selftests/kvm/x86_64/vmx_tsc_adjust_test.c @@ -49,11 +49,6 @@ enum { NUM_VMX_PAGES, }; -struct kvm_single_msr { - struct kvm_msrs header; - struct kvm_msr_entry entry; -} __attribute__((packed)); - /* The virtual machine object. */ static struct kvm_vm *vm; diff --git a/tools/testing/selftests/kvm/x86_64/xapic_state_test.c b/tools/testing/selftests/kvm/x86_64/xapic_state_test.c index 6f7a5ef66718..d7d37dae3eeb 100644 --- a/tools/testing/selftests/kvm/x86_64/xapic_state_test.c +++ b/tools/testing/selftests/kvm/x86_64/xapic_state_test.c @@ -114,7 +114,9 @@ static void test_icr(struct xapic_vcpu *x) * vCPUs, not vcpu.id + 1. Arbitrarily use vector 0xff. */ icr = APIC_INT_ASSERT | 0xff; - for (i = vcpu->id + 1; i < 0xff; i++) { + for (i = 0; i < 0xff; i++) { + if (i == vcpu->id) + continue; for (j = 0; j < 8; j++) __test_icr(x, i << (32 + 24) | icr | (j << 8)); } diff --git a/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c b/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c index 2a5727188c8d..13c75dc18c10 100644 --- a/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c +++ b/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c @@ -26,17 +26,17 @@ #define SHINFO_REGION_GPA 0xc0000000ULL #define SHINFO_REGION_SLOT 10 -#define DUMMY_REGION_GPA (SHINFO_REGION_GPA + (2 * PAGE_SIZE)) +#define DUMMY_REGION_GPA (SHINFO_REGION_GPA + (3 * PAGE_SIZE)) #define DUMMY_REGION_SLOT 11 #define SHINFO_ADDR (SHINFO_REGION_GPA) -#define PVTIME_ADDR (SHINFO_REGION_GPA + PAGE_SIZE) -#define RUNSTATE_ADDR (SHINFO_REGION_GPA + PAGE_SIZE + 0x20) #define VCPU_INFO_ADDR (SHINFO_REGION_GPA + 0x40) +#define PVTIME_ADDR (SHINFO_REGION_GPA + PAGE_SIZE) +#define RUNSTATE_ADDR (SHINFO_REGION_GPA + PAGE_SIZE + PAGE_SIZE - 15) #define SHINFO_VADDR (SHINFO_REGION_GVA) -#define RUNSTATE_VADDR (SHINFO_REGION_GVA + PAGE_SIZE + 0x20) #define VCPU_INFO_VADDR (SHINFO_REGION_GVA + 0x40) +#define RUNSTATE_VADDR (SHINFO_REGION_GVA + PAGE_SIZE + PAGE_SIZE - 15) #define EVTCHN_VECTOR 0x10 @@ -88,14 +88,20 @@ struct pvclock_wall_clock { } __attribute__((__packed__)); struct vcpu_runstate_info { - uint32_t state; - uint64_t state_entry_time; - uint64_t time[4]; + uint32_t state; + uint64_t state_entry_time; + uint64_t time[5]; /* Extra field for overrun check */ }; +struct compat_vcpu_runstate_info { + uint32_t state; + uint64_t state_entry_time; + uint64_t time[5]; +} __attribute__((__packed__));; + struct arch_vcpu_info { - unsigned long cr2; - unsigned long pad; /* sizeof(vcpu_info_t) == 64 */ + unsigned long cr2; + unsigned long pad; /* sizeof(vcpu_info_t) == 64 */ }; struct vcpu_info { @@ -428,6 +434,7 @@ static void *juggle_shinfo_state(void *arg) int main(int argc, char *argv[]) { struct timespec min_ts, max_ts, vm_ts; + struct kvm_xen_hvm_attr evt_reset; struct kvm_vm *vm; pthread_t thread; bool verbose; @@ -440,6 +447,7 @@ int main(int argc, char *argv[]) TEST_REQUIRE(xen_caps & KVM_XEN_HVM_CONFIG_SHARED_INFO); bool do_runstate_tests = !!(xen_caps & KVM_XEN_HVM_CONFIG_RUNSTATE); + bool do_runstate_flag = !!(xen_caps & KVM_XEN_HVM_CONFIG_RUNSTATE_UPDATE_FLAG); bool do_eventfd_tests = !!(xen_caps & KVM_XEN_HVM_CONFIG_EVTCHN_2LEVEL); bool do_evtchn_tests = do_eventfd_tests && !!(xen_caps & KVM_XEN_HVM_CONFIG_EVTCHN_SEND); @@ -449,8 +457,8 @@ int main(int argc, char *argv[]) /* Map a region for the shared_info page */ vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, - SHINFO_REGION_GPA, SHINFO_REGION_SLOT, 2, 0); - virt_map(vm, SHINFO_REGION_GVA, SHINFO_REGION_GPA, 2); + SHINFO_REGION_GPA, SHINFO_REGION_SLOT, 3, 0); + virt_map(vm, SHINFO_REGION_GVA, SHINFO_REGION_GPA, 3); struct shared_info *shinfo = addr_gpa2hva(vm, SHINFO_VADDR); @@ -475,6 +483,19 @@ int main(int argc, char *argv[]) }; vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &lm); + if (do_runstate_flag) { + struct kvm_xen_hvm_attr ruf = { + .type = KVM_XEN_ATTR_TYPE_RUNSTATE_UPDATE_FLAG, + .u.runstate_update_flag = 1, + }; + vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &ruf); + + ruf.u.runstate_update_flag = 0; + vm_ioctl(vm, KVM_XEN_HVM_GET_ATTR, &ruf); + TEST_ASSERT(ruf.u.runstate_update_flag == 1, + "Failed to read back RUNSTATE_UPDATE_FLAG attr"); + } + struct kvm_xen_hvm_attr ha = { .type = KVM_XEN_ATTR_TYPE_SHARED_INFO, .u.shared_info.gfn = SHINFO_REGION_GPA / PAGE_SIZE, @@ -942,6 +963,10 @@ int main(int argc, char *argv[]) } done: + evt_reset.type = KVM_XEN_ATTR_TYPE_EVTCHN; + evt_reset.u.evtchn.flags = KVM_XEN_EVTCHN_RESET; + vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &evt_reset); + alarm(0); clock_gettime(CLOCK_REALTIME, &max_ts); @@ -999,22 +1024,91 @@ int main(int argc, char *argv[]) runstate_names[i], rs->time[i]); } } - TEST_ASSERT(rs->state == rst.u.runstate.state, "Runstate mismatch"); - TEST_ASSERT(rs->state_entry_time == rst.u.runstate.state_entry_time, - "State entry time mismatch"); - TEST_ASSERT(rs->time[RUNSTATE_running] == rst.u.runstate.time_running, - "Running time mismatch"); - TEST_ASSERT(rs->time[RUNSTATE_runnable] == rst.u.runstate.time_runnable, - "Runnable time mismatch"); - TEST_ASSERT(rs->time[RUNSTATE_blocked] == rst.u.runstate.time_blocked, - "Blocked time mismatch"); - TEST_ASSERT(rs->time[RUNSTATE_offline] == rst.u.runstate.time_offline, - "Offline time mismatch"); - - TEST_ASSERT(rs->state_entry_time == rs->time[0] + - rs->time[1] + rs->time[2] + rs->time[3], - "runstate times don't add up"); + + /* + * Exercise runstate info at all points across the page boundary, in + * 32-bit and 64-bit mode. In particular, test the case where it is + * configured in 32-bit mode and then switched to 64-bit mode while + * active, which takes it onto the second page. + */ + unsigned long runstate_addr; + struct compat_vcpu_runstate_info *crs; + for (runstate_addr = SHINFO_REGION_GPA + PAGE_SIZE + PAGE_SIZE - sizeof(*rs) - 4; + runstate_addr < SHINFO_REGION_GPA + PAGE_SIZE + PAGE_SIZE + 4; runstate_addr++) { + + rs = addr_gpa2hva(vm, runstate_addr); + crs = (void *)rs; + + memset(rs, 0xa5, sizeof(*rs)); + + /* Set to compatibility mode */ + lm.u.long_mode = 0; + vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &lm); + + /* Set runstate to new address (kernel will write it) */ + struct kvm_xen_vcpu_attr st = { + .type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADDR, + .u.gpa = runstate_addr, + }; + vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &st); + + if (verbose) + printf("Compatibility runstate at %08lx\n", runstate_addr); + + TEST_ASSERT(crs->state == rst.u.runstate.state, "Runstate mismatch"); + TEST_ASSERT(crs->state_entry_time == rst.u.runstate.state_entry_time, + "State entry time mismatch"); + TEST_ASSERT(crs->time[RUNSTATE_running] == rst.u.runstate.time_running, + "Running time mismatch"); + TEST_ASSERT(crs->time[RUNSTATE_runnable] == rst.u.runstate.time_runnable, + "Runnable time mismatch"); + TEST_ASSERT(crs->time[RUNSTATE_blocked] == rst.u.runstate.time_blocked, + "Blocked time mismatch"); + TEST_ASSERT(crs->time[RUNSTATE_offline] == rst.u.runstate.time_offline, + "Offline time mismatch"); + TEST_ASSERT(crs->time[RUNSTATE_offline + 1] == 0xa5a5a5a5a5a5a5a5ULL, + "Structure overrun"); + TEST_ASSERT(crs->state_entry_time == crs->time[0] + + crs->time[1] + crs->time[2] + crs->time[3], + "runstate times don't add up"); + + + /* Now switch to 64-bit mode */ + lm.u.long_mode = 1; + vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &lm); + + memset(rs, 0xa5, sizeof(*rs)); + + /* Don't change the address, just trigger a write */ + struct kvm_xen_vcpu_attr adj = { + .type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADJUST, + .u.runstate.state = (uint64_t)-1 + }; + vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &adj); + + if (verbose) + printf("64-bit runstate at %08lx\n", runstate_addr); + + TEST_ASSERT(rs->state == rst.u.runstate.state, "Runstate mismatch"); + TEST_ASSERT(rs->state_entry_time == rst.u.runstate.state_entry_time, + "State entry time mismatch"); + TEST_ASSERT(rs->time[RUNSTATE_running] == rst.u.runstate.time_running, + "Running time mismatch"); + TEST_ASSERT(rs->time[RUNSTATE_runnable] == rst.u.runstate.time_runnable, + "Runnable time mismatch"); + TEST_ASSERT(rs->time[RUNSTATE_blocked] == rst.u.runstate.time_blocked, + "Blocked time mismatch"); + TEST_ASSERT(rs->time[RUNSTATE_offline] == rst.u.runstate.time_offline, + "Offline time mismatch"); + TEST_ASSERT(rs->time[RUNSTATE_offline + 1] == 0xa5a5a5a5a5a5a5a5ULL, + "Structure overrun"); + + TEST_ASSERT(rs->state_entry_time == rs->time[0] + + rs->time[1] + rs->time[2] + rs->time[3], + "runstate times don't add up"); + } } + kvm_vm_free(vm); return 0; } diff --git a/tools/testing/selftests/landlock/base_test.c b/tools/testing/selftests/landlock/base_test.c index da9290817866..792c3f0a59b4 100644 --- a/tools/testing/selftests/landlock/base_test.c +++ b/tools/testing/selftests/landlock/base_test.c @@ -75,7 +75,7 @@ TEST(abi_version) const struct landlock_ruleset_attr ruleset_attr = { .handled_access_fs = LANDLOCK_ACCESS_FS_READ_FILE, }; - ASSERT_EQ(2, landlock_create_ruleset(NULL, 0, + ASSERT_EQ(3, landlock_create_ruleset(NULL, 0, LANDLOCK_CREATE_RULESET_VERSION)); ASSERT_EQ(-1, landlock_create_ruleset(&ruleset_attr, 0, @@ -263,23 +263,6 @@ TEST(ruleset_fd_transfer) .allowed_access = LANDLOCK_ACCESS_FS_READ_DIR, }; int ruleset_fd_tx, dir_fd; - union { - /* Aligned ancillary data buffer. */ - char buf[CMSG_SPACE(sizeof(ruleset_fd_tx))]; - struct cmsghdr _align; - } cmsg_tx = {}; - char data_tx = '.'; - struct iovec io = { - .iov_base = &data_tx, - .iov_len = sizeof(data_tx), - }; - struct msghdr msg = { - .msg_iov = &io, - .msg_iovlen = 1, - .msg_control = &cmsg_tx.buf, - .msg_controllen = sizeof(cmsg_tx.buf), - }; - struct cmsghdr *cmsg; int socket_fds[2]; pid_t child; int status; @@ -298,33 +281,20 @@ TEST(ruleset_fd_transfer) &path_beneath_attr, 0)); ASSERT_EQ(0, close(path_beneath_attr.parent_fd)); - cmsg = CMSG_FIRSTHDR(&msg); - ASSERT_NE(NULL, cmsg); - cmsg->cmsg_len = CMSG_LEN(sizeof(ruleset_fd_tx)); - cmsg->cmsg_level = SOL_SOCKET; - cmsg->cmsg_type = SCM_RIGHTS; - memcpy(CMSG_DATA(cmsg), &ruleset_fd_tx, sizeof(ruleset_fd_tx)); - /* Sends the ruleset FD over a socketpair and then close it. */ ASSERT_EQ(0, socketpair(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0, socket_fds)); - ASSERT_EQ(sizeof(data_tx), sendmsg(socket_fds[0], &msg, 0)); + ASSERT_EQ(0, send_fd(socket_fds[0], ruleset_fd_tx)); ASSERT_EQ(0, close(socket_fds[0])); ASSERT_EQ(0, close(ruleset_fd_tx)); child = fork(); ASSERT_LE(0, child); if (child == 0) { - int ruleset_fd_rx; + const int ruleset_fd_rx = recv_fd(socket_fds[1]); - *(char *)msg.msg_iov->iov_base = '\0'; - ASSERT_EQ(sizeof(data_tx), - recvmsg(socket_fds[1], &msg, MSG_CMSG_CLOEXEC)); - ASSERT_EQ('.', *(char *)msg.msg_iov->iov_base); + ASSERT_LE(0, ruleset_fd_rx); ASSERT_EQ(0, close(socket_fds[1])); - cmsg = CMSG_FIRSTHDR(&msg); - ASSERT_EQ(cmsg->cmsg_len, CMSG_LEN(sizeof(ruleset_fd_tx))); - memcpy(&ruleset_fd_rx, CMSG_DATA(cmsg), sizeof(ruleset_fd_tx)); /* Enforces the received ruleset on the child. */ ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)); diff --git a/tools/testing/selftests/landlock/common.h b/tools/testing/selftests/landlock/common.h index 7ba18eb23783..d7987ae8d7fc 100644 --- a/tools/testing/selftests/landlock/common.h +++ b/tools/testing/selftests/landlock/common.h @@ -10,6 +10,7 @@ #include <errno.h> #include <linux/landlock.h> #include <sys/capability.h> +#include <sys/socket.h> #include <sys/syscall.h> #include <sys/types.h> #include <sys/wait.h> @@ -17,6 +18,10 @@ #include "../kselftest_harness.h" +#ifndef __maybe_unused +#define __maybe_unused __attribute__((__unused__)) +#endif + /* * TEST_F_FORK() is useful when a test drop privileges but the corresponding * FIXTURE_TEARDOWN() requires them (e.g. to remove files from a directory @@ -140,14 +145,12 @@ static void _init_caps(struct __test_metadata *const _metadata, bool drop_all) } /* We cannot put such helpers in a library because of kselftest_harness.h . */ -__attribute__((__unused__)) static void -disable_caps(struct __test_metadata *const _metadata) +static void __maybe_unused disable_caps(struct __test_metadata *const _metadata) { _init_caps(_metadata, false); } -__attribute__((__unused__)) static void -drop_caps(struct __test_metadata *const _metadata) +static void __maybe_unused drop_caps(struct __test_metadata *const _metadata) { _init_caps(_metadata, true); } @@ -176,14 +179,80 @@ static void _effective_cap(struct __test_metadata *const _metadata, } } -__attribute__((__unused__)) static void -set_cap(struct __test_metadata *const _metadata, const cap_value_t caps) +static void __maybe_unused set_cap(struct __test_metadata *const _metadata, + const cap_value_t caps) { _effective_cap(_metadata, caps, CAP_SET); } -__attribute__((__unused__)) static void -clear_cap(struct __test_metadata *const _metadata, const cap_value_t caps) +static void __maybe_unused clear_cap(struct __test_metadata *const _metadata, + const cap_value_t caps) { _effective_cap(_metadata, caps, CAP_CLEAR); } + +/* Receives an FD from a UNIX socket. Returns the received FD, or -errno. */ +static int __maybe_unused recv_fd(int usock) +{ + int fd_rx; + union { + /* Aligned ancillary data buffer. */ + char buf[CMSG_SPACE(sizeof(fd_rx))]; + struct cmsghdr _align; + } cmsg_rx = {}; + char data = '\0'; + struct iovec io = { + .iov_base = &data, + .iov_len = sizeof(data), + }; + struct msghdr msg = { + .msg_iov = &io, + .msg_iovlen = 1, + .msg_control = &cmsg_rx.buf, + .msg_controllen = sizeof(cmsg_rx.buf), + }; + struct cmsghdr *cmsg; + int res; + + res = recvmsg(usock, &msg, MSG_CMSG_CLOEXEC); + if (res < 0) + return -errno; + + cmsg = CMSG_FIRSTHDR(&msg); + if (cmsg->cmsg_len != CMSG_LEN(sizeof(fd_rx))) + return -EIO; + + memcpy(&fd_rx, CMSG_DATA(cmsg), sizeof(fd_rx)); + return fd_rx; +} + +/* Sends an FD on a UNIX socket. Returns 0 on success or -errno. */ +static int __maybe_unused send_fd(int usock, int fd_tx) +{ + union { + /* Aligned ancillary data buffer. */ + char buf[CMSG_SPACE(sizeof(fd_tx))]; + struct cmsghdr _align; + } cmsg_tx = {}; + char data_tx = '.'; + struct iovec io = { + .iov_base = &data_tx, + .iov_len = sizeof(data_tx), + }; + struct msghdr msg = { + .msg_iov = &io, + .msg_iovlen = 1, + .msg_control = &cmsg_tx.buf, + .msg_controllen = sizeof(cmsg_tx.buf), + }; + struct cmsghdr *cmsg = CMSG_FIRSTHDR(&msg); + + cmsg->cmsg_len = CMSG_LEN(sizeof(fd_tx)); + cmsg->cmsg_level = SOL_SOCKET; + cmsg->cmsg_type = SCM_RIGHTS; + memcpy(CMSG_DATA(cmsg), &fd_tx, sizeof(fd_tx)); + + if (sendmsg(usock, &msg, 0) < 0) + return -errno; + return 0; +} diff --git a/tools/testing/selftests/landlock/fs_test.c b/tools/testing/selftests/landlock/fs_test.c index 45de42a027c5..b6c4be3faf7a 100644 --- a/tools/testing/selftests/landlock/fs_test.c +++ b/tools/testing/selftests/landlock/fs_test.c @@ -11,6 +11,7 @@ #include <fcntl.h> #include <linux/landlock.h> #include <sched.h> +#include <stdio.h> #include <string.h> #include <sys/capability.h> #include <sys/mount.h> @@ -58,6 +59,7 @@ static const char file1_s2d3[] = TMP_DIR "/s2d1/s2d2/s2d3/f1"; static const char file2_s2d3[] = TMP_DIR "/s2d1/s2d2/s2d3/f2"; static const char dir_s3d1[] = TMP_DIR "/s3d1"; +static const char file1_s3d1[] = TMP_DIR "/s3d1/f1"; /* dir_s3d2 is a mount point. */ static const char dir_s3d2[] = TMP_DIR "/s3d1/s3d2"; static const char dir_s3d3[] = TMP_DIR "/s3d1/s3d2/s3d3"; @@ -83,10 +85,45 @@ static const char dir_s3d3[] = TMP_DIR "/s3d1/s3d2/s3d3"; * │ ├── f1 * │ └── f2 * └── s3d1 + * ├── f1 * └── s3d2 * └── s3d3 */ +static bool fgrep(FILE *const inf, const char *const str) +{ + char line[32]; + const int slen = strlen(str); + + while (!feof(inf)) { + if (!fgets(line, sizeof(line), inf)) + break; + if (strncmp(line, str, slen)) + continue; + + return true; + } + + return false; +} + +static bool supports_overlayfs(void) +{ + bool res; + FILE *const inf = fopen("/proc/filesystems", "r"); + + /* + * Consider that the filesystem is supported if we cannot get the + * supported ones. + */ + if (!inf) + return true; + + res = fgrep(inf, "nodev\toverlay\n"); + fclose(inf); + return res; +} + static void mkdir_parents(struct __test_metadata *const _metadata, const char *const path) { @@ -208,6 +245,7 @@ static void create_layout1(struct __test_metadata *const _metadata) create_file(_metadata, file1_s2d3); create_file(_metadata, file2_s2d3); + create_file(_metadata, file1_s3d1); create_directory(_metadata, dir_s3d2); set_cap(_metadata, CAP_SYS_ADMIN); ASSERT_EQ(0, mount("tmp", dir_s3d2, "tmpfs", 0, "size=4m,mode=700")); @@ -230,6 +268,7 @@ static void remove_layout1(struct __test_metadata *const _metadata) EXPECT_EQ(0, remove_path(file1_s2d2)); EXPECT_EQ(0, remove_path(file1_s2d1)); + EXPECT_EQ(0, remove_path(file1_s3d1)); EXPECT_EQ(0, remove_path(dir_s3d3)); set_cap(_metadata, CAP_SYS_ADMIN); umount(dir_s3d2); @@ -406,9 +445,10 @@ TEST_F_FORK(layout1, inval) #define ACCESS_FILE ( \ LANDLOCK_ACCESS_FS_EXECUTE | \ LANDLOCK_ACCESS_FS_WRITE_FILE | \ - LANDLOCK_ACCESS_FS_READ_FILE) + LANDLOCK_ACCESS_FS_READ_FILE | \ + LANDLOCK_ACCESS_FS_TRUNCATE) -#define ACCESS_LAST LANDLOCK_ACCESS_FS_REFER +#define ACCESS_LAST LANDLOCK_ACCESS_FS_TRUNCATE #define ACCESS_ALL ( \ ACCESS_FILE | \ @@ -422,7 +462,7 @@ TEST_F_FORK(layout1, inval) LANDLOCK_ACCESS_FS_MAKE_FIFO | \ LANDLOCK_ACCESS_FS_MAKE_BLOCK | \ LANDLOCK_ACCESS_FS_MAKE_SYM | \ - ACCESS_LAST) + LANDLOCK_ACCESS_FS_REFER) /* clang-format on */ @@ -3157,6 +3197,463 @@ TEST_F_FORK(layout1, proc_pipe) ASSERT_EQ(0, close(pipe_fds[1])); } +/* Invokes truncate(2) and returns its errno or 0. */ +static int test_truncate(const char *const path) +{ + if (truncate(path, 10) < 0) + return errno; + return 0; +} + +/* + * Invokes creat(2) and returns its errno or 0. + * Closes the opened file descriptor on success. + */ +static int test_creat(const char *const path) +{ + int fd = creat(path, 0600); + + if (fd < 0) + return errno; + + /* + * Mixing error codes from close(2) and creat(2) should not lead to any + * (access type) confusion for this test. + */ + if (close(fd) < 0) + return errno; + return 0; +} + +/* + * Exercises file truncation when it's not restricted, + * as it was the case before LANDLOCK_ACCESS_FS_TRUNCATE existed. + */ +TEST_F_FORK(layout1, truncate_unhandled) +{ + const char *const file_r = file1_s1d1; + const char *const file_w = file2_s1d1; + const char *const file_none = file1_s1d2; + const struct rule rules[] = { + { + .path = file_r, + .access = LANDLOCK_ACCESS_FS_READ_FILE, + }, + { + .path = file_w, + .access = LANDLOCK_ACCESS_FS_WRITE_FILE, + }, + /* Implicitly: No rights for file_none. */ + {}, + }; + + const __u64 handled = LANDLOCK_ACCESS_FS_READ_FILE | + LANDLOCK_ACCESS_FS_WRITE_FILE; + int ruleset_fd; + + /* Enable Landlock. */ + ruleset_fd = create_ruleset(_metadata, handled, rules); + + ASSERT_LE(0, ruleset_fd); + enforce_ruleset(_metadata, ruleset_fd); + ASSERT_EQ(0, close(ruleset_fd)); + + /* + * Checks read right: truncate and open with O_TRUNC work, unless the + * file is attempted to be opened for writing. + */ + EXPECT_EQ(0, test_truncate(file_r)); + EXPECT_EQ(0, test_open(file_r, O_RDONLY | O_TRUNC)); + EXPECT_EQ(EACCES, test_open(file_r, O_WRONLY | O_TRUNC)); + EXPECT_EQ(EACCES, test_creat(file_r)); + + /* + * Checks write right: truncate and open with O_TRUNC work, unless the + * file is attempted to be opened for reading. + */ + EXPECT_EQ(0, test_truncate(file_w)); + EXPECT_EQ(EACCES, test_open(file_w, O_RDONLY | O_TRUNC)); + EXPECT_EQ(0, test_open(file_w, O_WRONLY | O_TRUNC)); + EXPECT_EQ(0, test_creat(file_w)); + + /* + * Checks "no rights" case: truncate works but all open attempts fail, + * including creat. + */ + EXPECT_EQ(0, test_truncate(file_none)); + EXPECT_EQ(EACCES, test_open(file_none, O_RDONLY | O_TRUNC)); + EXPECT_EQ(EACCES, test_open(file_none, O_WRONLY | O_TRUNC)); + EXPECT_EQ(EACCES, test_creat(file_none)); +} + +TEST_F_FORK(layout1, truncate) +{ + const char *const file_rwt = file1_s1d1; + const char *const file_rw = file2_s1d1; + const char *const file_rt = file1_s1d2; + const char *const file_t = file2_s1d2; + const char *const file_none = file1_s1d3; + const char *const dir_t = dir_s2d1; + const char *const file_in_dir_t = file1_s2d1; + const char *const dir_w = dir_s3d1; + const char *const file_in_dir_w = file1_s3d1; + const struct rule rules[] = { + { + .path = file_rwt, + .access = LANDLOCK_ACCESS_FS_READ_FILE | + LANDLOCK_ACCESS_FS_WRITE_FILE | + LANDLOCK_ACCESS_FS_TRUNCATE, + }, + { + .path = file_rw, + .access = LANDLOCK_ACCESS_FS_READ_FILE | + LANDLOCK_ACCESS_FS_WRITE_FILE, + }, + { + .path = file_rt, + .access = LANDLOCK_ACCESS_FS_READ_FILE | + LANDLOCK_ACCESS_FS_TRUNCATE, + }, + { + .path = file_t, + .access = LANDLOCK_ACCESS_FS_TRUNCATE, + }, + /* Implicitly: No access rights for file_none. */ + { + .path = dir_t, + .access = LANDLOCK_ACCESS_FS_TRUNCATE, + }, + { + .path = dir_w, + .access = LANDLOCK_ACCESS_FS_WRITE_FILE, + }, + {}, + }; + const __u64 handled = LANDLOCK_ACCESS_FS_READ_FILE | + LANDLOCK_ACCESS_FS_WRITE_FILE | + LANDLOCK_ACCESS_FS_TRUNCATE; + int ruleset_fd; + + /* Enable Landlock. */ + ruleset_fd = create_ruleset(_metadata, handled, rules); + + ASSERT_LE(0, ruleset_fd); + enforce_ruleset(_metadata, ruleset_fd); + ASSERT_EQ(0, close(ruleset_fd)); + + /* Checks read, write and truncate rights: truncation works. */ + EXPECT_EQ(0, test_truncate(file_rwt)); + EXPECT_EQ(0, test_open(file_rwt, O_RDONLY | O_TRUNC)); + EXPECT_EQ(0, test_open(file_rwt, O_WRONLY | O_TRUNC)); + + /* Checks read and write rights: no truncate variant works. */ + EXPECT_EQ(EACCES, test_truncate(file_rw)); + EXPECT_EQ(EACCES, test_open(file_rw, O_RDONLY | O_TRUNC)); + EXPECT_EQ(EACCES, test_open(file_rw, O_WRONLY | O_TRUNC)); + + /* + * Checks read and truncate rights: truncation works. + * + * Note: Files can get truncated using open() even with O_RDONLY. + */ + EXPECT_EQ(0, test_truncate(file_rt)); + EXPECT_EQ(0, test_open(file_rt, O_RDONLY | O_TRUNC)); + EXPECT_EQ(EACCES, test_open(file_rt, O_WRONLY | O_TRUNC)); + + /* Checks truncate right: truncate works, but can't open file. */ + EXPECT_EQ(0, test_truncate(file_t)); + EXPECT_EQ(EACCES, test_open(file_t, O_RDONLY | O_TRUNC)); + EXPECT_EQ(EACCES, test_open(file_t, O_WRONLY | O_TRUNC)); + + /* Checks "no rights" case: No form of truncation works. */ + EXPECT_EQ(EACCES, test_truncate(file_none)); + EXPECT_EQ(EACCES, test_open(file_none, O_RDONLY | O_TRUNC)); + EXPECT_EQ(EACCES, test_open(file_none, O_WRONLY | O_TRUNC)); + + /* + * Checks truncate right on directory: truncate works on contained + * files. + */ + EXPECT_EQ(0, test_truncate(file_in_dir_t)); + EXPECT_EQ(EACCES, test_open(file_in_dir_t, O_RDONLY | O_TRUNC)); + EXPECT_EQ(EACCES, test_open(file_in_dir_t, O_WRONLY | O_TRUNC)); + + /* + * Checks creat in dir_w: This requires the truncate right when + * overwriting an existing file, but does not require it when the file + * is new. + */ + EXPECT_EQ(EACCES, test_creat(file_in_dir_w)); + + ASSERT_EQ(0, unlink(file_in_dir_w)); + EXPECT_EQ(0, test_creat(file_in_dir_w)); +} + +/* Invokes ftruncate(2) and returns its errno or 0. */ +static int test_ftruncate(int fd) +{ + if (ftruncate(fd, 10) < 0) + return errno; + return 0; +} + +TEST_F_FORK(layout1, ftruncate) +{ + /* + * This test opens a new file descriptor at different stages of + * Landlock restriction: + * + * without restriction: ftruncate works + * something else but truncate restricted: ftruncate works + * truncate restricted and permitted: ftruncate works + * truncate restricted and not permitted: ftruncate fails + * + * Whether this works or not is expected to depend on the time when the + * FD was opened, not to depend on the time when ftruncate() was + * called. + */ + const char *const path = file1_s1d1; + const __u64 handled1 = LANDLOCK_ACCESS_FS_READ_FILE | + LANDLOCK_ACCESS_FS_WRITE_FILE; + const struct rule layer1[] = { + { + .path = path, + .access = LANDLOCK_ACCESS_FS_WRITE_FILE, + }, + {}, + }; + const __u64 handled2 = LANDLOCK_ACCESS_FS_TRUNCATE; + const struct rule layer2[] = { + { + .path = path, + .access = LANDLOCK_ACCESS_FS_TRUNCATE, + }, + {}, + }; + const __u64 handled3 = LANDLOCK_ACCESS_FS_TRUNCATE | + LANDLOCK_ACCESS_FS_WRITE_FILE; + const struct rule layer3[] = { + { + .path = path, + .access = LANDLOCK_ACCESS_FS_WRITE_FILE, + }, + {}, + }; + int fd_layer0, fd_layer1, fd_layer2, fd_layer3, ruleset_fd; + + fd_layer0 = open(path, O_WRONLY); + EXPECT_EQ(0, test_ftruncate(fd_layer0)); + + ruleset_fd = create_ruleset(_metadata, handled1, layer1); + ASSERT_LE(0, ruleset_fd); + enforce_ruleset(_metadata, ruleset_fd); + ASSERT_EQ(0, close(ruleset_fd)); + + fd_layer1 = open(path, O_WRONLY); + EXPECT_EQ(0, test_ftruncate(fd_layer0)); + EXPECT_EQ(0, test_ftruncate(fd_layer1)); + + ruleset_fd = create_ruleset(_metadata, handled2, layer2); + ASSERT_LE(0, ruleset_fd); + enforce_ruleset(_metadata, ruleset_fd); + ASSERT_EQ(0, close(ruleset_fd)); + + fd_layer2 = open(path, O_WRONLY); + EXPECT_EQ(0, test_ftruncate(fd_layer0)); + EXPECT_EQ(0, test_ftruncate(fd_layer1)); + EXPECT_EQ(0, test_ftruncate(fd_layer2)); + + ruleset_fd = create_ruleset(_metadata, handled3, layer3); + ASSERT_LE(0, ruleset_fd); + enforce_ruleset(_metadata, ruleset_fd); + ASSERT_EQ(0, close(ruleset_fd)); + + fd_layer3 = open(path, O_WRONLY); + EXPECT_EQ(0, test_ftruncate(fd_layer0)); + EXPECT_EQ(0, test_ftruncate(fd_layer1)); + EXPECT_EQ(0, test_ftruncate(fd_layer2)); + EXPECT_EQ(EACCES, test_ftruncate(fd_layer3)); + + ASSERT_EQ(0, close(fd_layer0)); + ASSERT_EQ(0, close(fd_layer1)); + ASSERT_EQ(0, close(fd_layer2)); + ASSERT_EQ(0, close(fd_layer3)); +} + +/* clang-format off */ +FIXTURE(ftruncate) {}; +/* clang-format on */ + +FIXTURE_SETUP(ftruncate) +{ + prepare_layout(_metadata); + create_file(_metadata, file1_s1d1); +} + +FIXTURE_TEARDOWN(ftruncate) +{ + EXPECT_EQ(0, remove_path(file1_s1d1)); + cleanup_layout(_metadata); +} + +FIXTURE_VARIANT(ftruncate) +{ + const __u64 handled; + const __u64 permitted; + const int expected_open_result; + const int expected_ftruncate_result; +}; + +/* clang-format off */ +FIXTURE_VARIANT_ADD(ftruncate, w_w) { + /* clang-format on */ + .handled = LANDLOCK_ACCESS_FS_WRITE_FILE, + .permitted = LANDLOCK_ACCESS_FS_WRITE_FILE, + .expected_open_result = 0, + .expected_ftruncate_result = 0, +}; + +/* clang-format off */ +FIXTURE_VARIANT_ADD(ftruncate, t_t) { + /* clang-format on */ + .handled = LANDLOCK_ACCESS_FS_TRUNCATE, + .permitted = LANDLOCK_ACCESS_FS_TRUNCATE, + .expected_open_result = 0, + .expected_ftruncate_result = 0, +}; + +/* clang-format off */ +FIXTURE_VARIANT_ADD(ftruncate, wt_w) { + /* clang-format on */ + .handled = LANDLOCK_ACCESS_FS_WRITE_FILE | LANDLOCK_ACCESS_FS_TRUNCATE, + .permitted = LANDLOCK_ACCESS_FS_WRITE_FILE, + .expected_open_result = 0, + .expected_ftruncate_result = EACCES, +}; + +/* clang-format off */ +FIXTURE_VARIANT_ADD(ftruncate, wt_wt) { + /* clang-format on */ + .handled = LANDLOCK_ACCESS_FS_WRITE_FILE | LANDLOCK_ACCESS_FS_TRUNCATE, + .permitted = LANDLOCK_ACCESS_FS_WRITE_FILE | + LANDLOCK_ACCESS_FS_TRUNCATE, + .expected_open_result = 0, + .expected_ftruncate_result = 0, +}; + +/* clang-format off */ +FIXTURE_VARIANT_ADD(ftruncate, wt_t) { + /* clang-format on */ + .handled = LANDLOCK_ACCESS_FS_WRITE_FILE | LANDLOCK_ACCESS_FS_TRUNCATE, + .permitted = LANDLOCK_ACCESS_FS_TRUNCATE, + .expected_open_result = EACCES, +}; + +TEST_F_FORK(ftruncate, open_and_ftruncate) +{ + const char *const path = file1_s1d1; + const struct rule rules[] = { + { + .path = path, + .access = variant->permitted, + }, + {}, + }; + int fd, ruleset_fd; + + /* Enable Landlock. */ + ruleset_fd = create_ruleset(_metadata, variant->handled, rules); + ASSERT_LE(0, ruleset_fd); + enforce_ruleset(_metadata, ruleset_fd); + ASSERT_EQ(0, close(ruleset_fd)); + + fd = open(path, O_WRONLY); + EXPECT_EQ(variant->expected_open_result, (fd < 0 ? errno : 0)); + if (fd >= 0) { + EXPECT_EQ(variant->expected_ftruncate_result, + test_ftruncate(fd)); + ASSERT_EQ(0, close(fd)); + } +} + +TEST_F_FORK(ftruncate, open_and_ftruncate_in_different_processes) +{ + int child, fd, status; + int socket_fds[2]; + + ASSERT_EQ(0, socketpair(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0, + socket_fds)); + + child = fork(); + ASSERT_LE(0, child); + if (child == 0) { + /* + * Enables Landlock in the child process, open a file descriptor + * where truncation is forbidden and send it to the + * non-landlocked parent process. + */ + const char *const path = file1_s1d1; + const struct rule rules[] = { + { + .path = path, + .access = variant->permitted, + }, + {}, + }; + int fd, ruleset_fd; + + ruleset_fd = create_ruleset(_metadata, variant->handled, rules); + ASSERT_LE(0, ruleset_fd); + enforce_ruleset(_metadata, ruleset_fd); + ASSERT_EQ(0, close(ruleset_fd)); + + fd = open(path, O_WRONLY); + ASSERT_EQ(variant->expected_open_result, (fd < 0 ? errno : 0)); + + if (fd >= 0) { + ASSERT_EQ(0, send_fd(socket_fds[0], fd)); + ASSERT_EQ(0, close(fd)); + } + + ASSERT_EQ(0, close(socket_fds[0])); + + _exit(_metadata->passed ? EXIT_SUCCESS : EXIT_FAILURE); + return; + } + + if (variant->expected_open_result == 0) { + fd = recv_fd(socket_fds[1]); + ASSERT_LE(0, fd); + + EXPECT_EQ(variant->expected_ftruncate_result, + test_ftruncate(fd)); + ASSERT_EQ(0, close(fd)); + } + + ASSERT_EQ(child, waitpid(child, &status, 0)); + ASSERT_EQ(1, WIFEXITED(status)); + ASSERT_EQ(EXIT_SUCCESS, WEXITSTATUS(status)); + + ASSERT_EQ(0, close(socket_fds[0])); + ASSERT_EQ(0, close(socket_fds[1])); +} + +TEST(memfd_ftruncate) +{ + int fd; + + fd = memfd_create("name", MFD_CLOEXEC); + ASSERT_LE(0, fd); + + /* + * Checks that ftruncate is permitted on file descriptors that are + * created in ways other than open(2). + */ + EXPECT_EQ(0, test_ftruncate(fd)); + + ASSERT_EQ(0, close(fd)); +} + /* clang-format off */ FIXTURE(layout1_bind) {}; /* clang-format on */ @@ -3539,6 +4036,9 @@ FIXTURE(layout2_overlay) {}; FIXTURE_SETUP(layout2_overlay) { + if (!supports_overlayfs()) + SKIP(return, "overlayfs is not supported"); + prepare_layout(_metadata); create_directory(_metadata, LOWER_BASE); @@ -3575,6 +4075,9 @@ FIXTURE_SETUP(layout2_overlay) FIXTURE_TEARDOWN(layout2_overlay) { + if (!supports_overlayfs()) + SKIP(return, "overlayfs is not supported"); + EXPECT_EQ(0, remove_path(lower_do1_fl3)); EXPECT_EQ(0, remove_path(lower_dl1_fl2)); EXPECT_EQ(0, remove_path(lower_fl1)); @@ -3606,6 +4109,9 @@ FIXTURE_TEARDOWN(layout2_overlay) TEST_F_FORK(layout2_overlay, no_restriction) { + if (!supports_overlayfs()) + SKIP(return, "overlayfs is not supported"); + ASSERT_EQ(0, test_open(lower_fl1, O_RDONLY)); ASSERT_EQ(0, test_open(lower_dl1, O_RDONLY)); ASSERT_EQ(0, test_open(lower_dl1_fl2, O_RDONLY)); @@ -3769,6 +4275,9 @@ TEST_F_FORK(layout2_overlay, same_content_different_file) size_t i; const char *path_entry; + if (!supports_overlayfs()) + SKIP(return, "overlayfs is not supported"); + /* Sets rules on base directories (i.e. outside overlay scope). */ ruleset_fd = create_ruleset(_metadata, ACCESS_RW, layer1_base); ASSERT_LE(0, ruleset_fd); diff --git a/tools/testing/selftests/landlock/ptrace_test.c b/tools/testing/selftests/landlock/ptrace_test.c index c28ef98ff3ac..55e7871631a1 100644 --- a/tools/testing/selftests/landlock/ptrace_test.c +++ b/tools/testing/selftests/landlock/ptrace_test.c @@ -19,6 +19,12 @@ #include "common.h" +/* Copied from security/yama/yama_lsm.c */ +#define YAMA_SCOPE_DISABLED 0 +#define YAMA_SCOPE_RELATIONAL 1 +#define YAMA_SCOPE_CAPABILITY 2 +#define YAMA_SCOPE_NO_ATTACH 3 + static void create_domain(struct __test_metadata *const _metadata) { int ruleset_fd; @@ -60,6 +66,25 @@ static int test_ptrace_read(const pid_t pid) return 0; } +static int get_yama_ptrace_scope(void) +{ + int ret; + char buf[2] = {}; + const int fd = open("/proc/sys/kernel/yama/ptrace_scope", O_RDONLY); + + if (fd < 0) + return 0; + + if (read(fd, buf, 1) < 0) { + close(fd); + return -1; + } + + ret = atoi(buf); + close(fd); + return ret; +} + /* clang-format off */ FIXTURE(hierarchy) {}; /* clang-format on */ @@ -232,8 +257,51 @@ TEST_F(hierarchy, trace) pid_t child, parent; int status, err_proc_read; int pipe_child[2], pipe_parent[2]; + int yama_ptrace_scope; char buf_parent; long ret; + bool can_read_child, can_trace_child, can_read_parent, can_trace_parent; + + yama_ptrace_scope = get_yama_ptrace_scope(); + ASSERT_LE(0, yama_ptrace_scope); + + if (yama_ptrace_scope > YAMA_SCOPE_DISABLED) + TH_LOG("Incomplete tests due to Yama restrictions (scope %d)", + yama_ptrace_scope); + + /* + * can_read_child is true if a parent process can read its child + * process, which is only the case when the parent process is not + * isolated from the child with a dedicated Landlock domain. + */ + can_read_child = !variant->domain_parent; + + /* + * can_trace_child is true if a parent process can trace its child + * process. This depends on two conditions: + * - The parent process is not isolated from the child with a dedicated + * Landlock domain. + * - Yama allows tracing children (up to YAMA_SCOPE_RELATIONAL). + */ + can_trace_child = can_read_child && + yama_ptrace_scope <= YAMA_SCOPE_RELATIONAL; + + /* + * can_read_parent is true if a child process can read its parent + * process, which is only the case when the child process is not + * isolated from the parent with a dedicated Landlock domain. + */ + can_read_parent = !variant->domain_child; + + /* + * can_trace_parent is true if a child process can trace its parent + * process. This depends on two conditions: + * - The child process is not isolated from the parent with a dedicated + * Landlock domain. + * - Yama is disabled (YAMA_SCOPE_DISABLED). + */ + can_trace_parent = can_read_parent && + yama_ptrace_scope <= YAMA_SCOPE_DISABLED; /* * Removes all effective and permitted capabilities to not interfere @@ -264,16 +332,21 @@ TEST_F(hierarchy, trace) /* Waits for the parent to be in a domain, if any. */ ASSERT_EQ(1, read(pipe_parent[0], &buf_child, 1)); - /* Tests PTRACE_ATTACH and PTRACE_MODE_READ on the parent. */ + /* Tests PTRACE_MODE_READ on the parent. */ err_proc_read = test_ptrace_read(parent); + if (can_read_parent) { + EXPECT_EQ(0, err_proc_read); + } else { + EXPECT_EQ(EACCES, err_proc_read); + } + + /* Tests PTRACE_ATTACH on the parent. */ ret = ptrace(PTRACE_ATTACH, parent, NULL, 0); - if (variant->domain_child) { + if (can_trace_parent) { + EXPECT_EQ(0, ret); + } else { EXPECT_EQ(-1, ret); EXPECT_EQ(EPERM, errno); - EXPECT_EQ(EACCES, err_proc_read); - } else { - EXPECT_EQ(0, ret); - EXPECT_EQ(0, err_proc_read); } if (ret == 0) { ASSERT_EQ(parent, waitpid(parent, &status, 0)); @@ -283,11 +356,11 @@ TEST_F(hierarchy, trace) /* Tests child PTRACE_TRACEME. */ ret = ptrace(PTRACE_TRACEME); - if (variant->domain_parent) { + if (can_trace_child) { + EXPECT_EQ(0, ret); + } else { EXPECT_EQ(-1, ret); EXPECT_EQ(EPERM, errno); - } else { - EXPECT_EQ(0, ret); } /* @@ -296,7 +369,7 @@ TEST_F(hierarchy, trace) */ ASSERT_EQ(1, write(pipe_child[1], ".", 1)); - if (!variant->domain_parent) { + if (can_trace_child) { ASSERT_EQ(0, raise(SIGSTOP)); } @@ -321,7 +394,7 @@ TEST_F(hierarchy, trace) ASSERT_EQ(1, read(pipe_child[0], &buf_parent, 1)); /* Tests child PTRACE_TRACEME. */ - if (!variant->domain_parent) { + if (can_trace_child) { ASSERT_EQ(child, waitpid(child, &status, 0)); ASSERT_EQ(1, WIFSTOPPED(status)); ASSERT_EQ(0, ptrace(PTRACE_DETACH, child, NULL, 0)); @@ -331,17 +404,23 @@ TEST_F(hierarchy, trace) EXPECT_EQ(ESRCH, errno); } - /* Tests PTRACE_ATTACH and PTRACE_MODE_READ on the child. */ + /* Tests PTRACE_MODE_READ on the child. */ err_proc_read = test_ptrace_read(child); + if (can_read_child) { + EXPECT_EQ(0, err_proc_read); + } else { + EXPECT_EQ(EACCES, err_proc_read); + } + + /* Tests PTRACE_ATTACH on the child. */ ret = ptrace(PTRACE_ATTACH, child, NULL, 0); - if (variant->domain_parent) { + if (can_trace_child) { + EXPECT_EQ(0, ret); + } else { EXPECT_EQ(-1, ret); EXPECT_EQ(EPERM, errno); - EXPECT_EQ(EACCES, err_proc_read); - } else { - EXPECT_EQ(0, ret); - EXPECT_EQ(0, err_proc_read); } + if (ret == 0) { ASSERT_EQ(child, waitpid(child, &status, 0)); ASSERT_EQ(1, WIFSTOPPED(status)); diff --git a/tools/testing/selftests/lib.mk b/tools/testing/selftests/lib.mk index a3ea3d4a206d..f7900e75d230 100644 --- a/tools/testing/selftests/lib.mk +++ b/tools/testing/selftests/lib.mk @@ -20,7 +20,7 @@ CLANG_TARGET_FLAGS := $(CLANG_TARGET_FLAGS_$(ARCH)) ifeq ($(CROSS_COMPILE),) ifeq ($(CLANG_TARGET_FLAGS),) -$(error Specify CROSS_COMPILE or add '--target=' option to lib.mk +$(error Specify CROSS_COMPILE or add '--target=' option to lib.mk) else CLANG_FLAGS += --target=$(CLANG_TARGET_FLAGS) endif # CLANG_TARGET_FLAGS @@ -123,6 +123,11 @@ endef clean: $(CLEAN) +# Enables to extend CFLAGS and LDFLAGS from command line, e.g. +# make USERCFLAGS=-Werror USERLDFLAGS=-static +CFLAGS += $(USERCFLAGS) +LDFLAGS += $(USERLDFLAGS) + # When make O= with kselftest target from main level # the following aren't defined. # diff --git a/tools/testing/selftests/media_tests/Makefile b/tools/testing/selftests/media_tests/Makefile index 60826d7d37d4..471d83e61d95 100644 --- a/tools/testing/selftests/media_tests/Makefile +++ b/tools/testing/selftests/media_tests/Makefile @@ -1,6 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 # -CFLAGS += -I../ -I../../../../usr/include/ +CFLAGS += -I../ $(KHDR_INCLUDES) TEST_GEN_PROGS := media_device_test media_device_open video_device_test include ../lib.mk diff --git a/tools/testing/selftests/membarrier/Makefile b/tools/testing/selftests/membarrier/Makefile index 34d1c81a2324..fc840e06ff56 100644 --- a/tools/testing/selftests/membarrier/Makefile +++ b/tools/testing/selftests/membarrier/Makefile @@ -1,5 +1,5 @@ # SPDX-License-Identifier: GPL-2.0-only -CFLAGS += -g -I../../../../usr/include/ +CFLAGS += -g $(KHDR_INCLUDES) LDLIBS += -lpthread TEST_GEN_PROGS := membarrier_test_single_thread \ diff --git a/tools/testing/selftests/membarrier/membarrier_test_impl.h b/tools/testing/selftests/membarrier/membarrier_test_impl.h index 186be69f0a59..af89855adb7b 100644 --- a/tools/testing/selftests/membarrier/membarrier_test_impl.h +++ b/tools/testing/selftests/membarrier/membarrier_test_impl.h @@ -9,11 +9,38 @@ #include "../kselftest.h" +static int registrations; + static int sys_membarrier(int cmd, int flags) { return syscall(__NR_membarrier, cmd, flags); } +static int test_membarrier_get_registrations(int cmd) +{ + int ret, flags = 0; + const char *test_name = + "sys membarrier MEMBARRIER_CMD_GET_REGISTRATIONS"; + + registrations |= cmd; + + ret = sys_membarrier(MEMBARRIER_CMD_GET_REGISTRATIONS, 0); + if (ret < 0) { + ksft_exit_fail_msg( + "%s test: flags = %d, errno = %d\n", + test_name, flags, errno); + } else if (ret != registrations) { + ksft_exit_fail_msg( + "%s test: flags = %d, ret = %d, registrations = %d\n", + test_name, flags, ret, registrations); + } + ksft_test_result_pass( + "%s test: flags = %d, ret = %d, registrations = %d\n", + test_name, flags, ret, registrations); + + return 0; +} + static int test_membarrier_cmd_fail(void) { int cmd = -1, flags = 0; @@ -113,6 +140,8 @@ static int test_membarrier_register_private_expedited_success(void) ksft_test_result_pass( "%s test: flags = %d\n", test_name, flags); + + test_membarrier_get_registrations(cmd); return 0; } @@ -170,6 +199,8 @@ static int test_membarrier_register_private_expedited_sync_core_success(void) ksft_test_result_pass( "%s test: flags = %d\n", test_name, flags); + + test_membarrier_get_registrations(cmd); return 0; } @@ -204,6 +235,8 @@ static int test_membarrier_register_global_expedited_success(void) ksft_test_result_pass( "%s test: flags = %d\n", test_name, flags); + + test_membarrier_get_registrations(cmd); return 0; } diff --git a/tools/testing/selftests/membarrier/membarrier_test_multi_thread.c b/tools/testing/selftests/membarrier/membarrier_test_multi_thread.c index ac5613e5b0eb..a9cc17facfb3 100644 --- a/tools/testing/selftests/membarrier/membarrier_test_multi_thread.c +++ b/tools/testing/selftests/membarrier/membarrier_test_multi_thread.c @@ -62,7 +62,7 @@ static int test_mt_membarrier(void) int main(int argc, char **argv) { ksft_print_header(); - ksft_set_plan(13); + ksft_set_plan(16); test_membarrier_query(); diff --git a/tools/testing/selftests/membarrier/membarrier_test_single_thread.c b/tools/testing/selftests/membarrier/membarrier_test_single_thread.c index c1c963902854..4cdc8b1d124c 100644 --- a/tools/testing/selftests/membarrier/membarrier_test_single_thread.c +++ b/tools/testing/selftests/membarrier/membarrier_test_single_thread.c @@ -12,7 +12,9 @@ int main(int argc, char **argv) { ksft_print_header(); - ksft_set_plan(13); + ksft_set_plan(18); + + test_membarrier_get_registrations(/*cmd=*/0); test_membarrier_query(); @@ -20,5 +22,7 @@ int main(int argc, char **argv) test_membarrier_success(); + test_membarrier_get_registrations(/*cmd=*/0); + return ksft_exit_pass(); } diff --git a/tools/testing/selftests/memfd/Makefile b/tools/testing/selftests/memfd/Makefile index 4da8b565fa32..163b6f68631c 100644 --- a/tools/testing/selftests/memfd/Makefile +++ b/tools/testing/selftests/memfd/Makefile @@ -1,8 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 CFLAGS += -D_FILE_OFFSET_BITS=64 -CFLAGS += -I../../../../include/uapi/ -CFLAGS += -I../../../../include/ -CFLAGS += -I../../../../usr/include/ +CFLAGS += $(KHDR_INCLUDES) TEST_GEN_PROGS := memfd_test TEST_PROGS := run_fuse_test.sh run_hugetlbfs_test.sh diff --git a/tools/testing/selftests/mount_setattr/Makefile b/tools/testing/selftests/mount_setattr/Makefile index 2250f7dcb81e..0c0d7b1234c1 100644 --- a/tools/testing/selftests/mount_setattr/Makefile +++ b/tools/testing/selftests/mount_setattr/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 # Makefile for mount selftests. -CFLAGS = -g -I../../../../usr/include/ -Wall -O2 -pthread +CFLAGS = -g $(KHDR_INCLUDES) -Wall -O2 -pthread -TEST_GEN_FILES += mount_setattr_test +TEST_GEN_PROGS := mount_setattr_test include ../lib.mk diff --git a/tools/testing/selftests/mount_setattr/mount_setattr_test.c b/tools/testing/selftests/mount_setattr/mount_setattr_test.c index 8c5fea68ae67..582669ca38e9 100644 --- a/tools/testing/selftests/mount_setattr/mount_setattr_test.c +++ b/tools/testing/selftests/mount_setattr/mount_setattr_test.c @@ -103,13 +103,6 @@ #else #define __NR_mount_setattr 442 #endif - -struct mount_attr { - __u64 attr_set; - __u64 attr_clr; - __u64 propagation; - __u64 userns_fd; -}; #endif #ifndef __NR_open_tree diff --git a/tools/testing/selftests/move_mount_set_group/Makefile b/tools/testing/selftests/move_mount_set_group/Makefile index 80c2d86812b0..94235846b6f9 100644 --- a/tools/testing/selftests/move_mount_set_group/Makefile +++ b/tools/testing/selftests/move_mount_set_group/Makefile @@ -1,6 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 # Makefile for mount selftests. -CFLAGS = -g -I../../../../usr/include/ -Wall -O2 +CFLAGS = -g $(KHDR_INCLUDES) -Wall -O2 TEST_GEN_FILES += move_mount_set_group_test diff --git a/tools/testing/selftests/nci/nci_dev.c b/tools/testing/selftests/nci/nci_dev.c index 162c41e9bcae..1562aa7d60b0 100644 --- a/tools/testing/selftests/nci/nci_dev.c +++ b/tools/testing/selftests/nci/nci_dev.c @@ -888,6 +888,17 @@ TEST_F(NCI, deinit) &msg); ASSERT_EQ(rc, 0); EXPECT_EQ(get_dev_enable_state(&msg), 0); + + /* Test that operations that normally send packets to the driver + * don't cause issues when the device is already closed. + * Note: the send of NFC_CMD_DEV_UP itself still succeeds it's just + * that the device won't actually be up. + */ + close(self->virtual_nci_fd); + self->virtual_nci_fd = -1; + rc = send_cmd_with_idx(self->sd, self->fid, self->pid, + NFC_CMD_DEV_UP, self->dev_idex); + EXPECT_EQ(rc, 0); } TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/net/.gitignore b/tools/testing/selftests/net/.gitignore index 3d7adee7a3e6..a6911cae368c 100644 --- a/tools/testing/selftests/net/.gitignore +++ b/tools/testing/selftests/net/.gitignore @@ -1,6 +1,9 @@ # SPDX-License-Identifier: GPL-2.0-only bind_bhash +bind_timewait +csum cmsg_sender +diag_uid fin_ack_lat gro hwtstamp_config @@ -25,6 +28,7 @@ rxtimestamp sk_bind_sendto_listen sk_connect_zero_addr socket +so_incoming_cpu so_netns_cookie so_txtime stress_reuseport_listen diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile index 69c58362c0ed..6cd8993454d7 100644 --- a/tools/testing/selftests/net/Makefile +++ b/tools/testing/selftests/net/Makefile @@ -38,6 +38,7 @@ TEST_PROGS += srv6_end_dt6_l3vpn_test.sh TEST_PROGS += srv6_hencap_red_l3vpn_test.sh TEST_PROGS += srv6_hl2encap_red_l2vpn_test.sh TEST_PROGS += srv6_end_next_csid_l3vpn_test.sh +TEST_PROGS += srv6_end_flavors_test.sh TEST_PROGS += vrf_strict_mode_test.sh TEST_PROGS += arp_ndisc_evict_nocarrier.sh TEST_PROGS += ndisc_unsolicited_na_test.sh @@ -45,6 +46,8 @@ TEST_PROGS += arp_ndisc_untracked_subnets.sh TEST_PROGS += stress_reuseport_listen.sh TEST_PROGS += l2_tos_ttl_inherit.sh TEST_PROGS += bind_bhash.sh +TEST_PROGS += ip_local_port_range.sh +TEST_PROGS += rps_default_mask.sh TEST_PROGS_EXTENDED := in_netns.sh setup_loopback.sh setup_veth.sh TEST_PROGS_EXTENDED += toeplitz_client.sh toeplitz.sh TEST_GEN_FILES = socket nettest @@ -71,14 +74,65 @@ TEST_GEN_FILES += bind_bhash TEST_GEN_PROGS += sk_bind_sendto_listen TEST_GEN_PROGS += sk_connect_zero_addr TEST_PROGS += test_ingress_egress_chaining.sh +TEST_GEN_PROGS += so_incoming_cpu +TEST_PROGS += sctp_vrf.sh +TEST_GEN_FILES += sctp_hello +TEST_GEN_FILES += csum +TEST_GEN_FILES += nat6to4.o +TEST_GEN_FILES += ip_local_port_range TEST_FILES := settings include ../lib.mk -include bpf/Makefile - $(OUTPUT)/reuseport_bpf_numa: LDLIBS += -lnuma $(OUTPUT)/tcp_mmap: LDLIBS += -lpthread $(OUTPUT)/tcp_inq: LDLIBS += -lpthread $(OUTPUT)/bind_bhash: LDLIBS += -lpthread + +# Rules to generate bpf obj nat6to4.o +CLANG ?= clang +SCRATCH_DIR := $(OUTPUT)/tools +BUILD_DIR := $(SCRATCH_DIR)/build +BPFDIR := $(abspath ../../../lib/bpf) +APIDIR := $(abspath ../../../include/uapi) + +CCINCLUDE += -I../bpf +CCINCLUDE += -I../../../../usr/include/ +CCINCLUDE += -I$(SCRATCH_DIR)/include + +BPFOBJ := $(BUILD_DIR)/libbpf/libbpf.a + +MAKE_DIRS := $(BUILD_DIR)/libbpf +$(MAKE_DIRS): + mkdir -p $@ + +# Get Clang's default includes on this system, as opposed to those seen by +# '-target bpf'. This fixes "missing" files on some architectures/distros, +# such as asm/byteorder.h, asm/socket.h, asm/sockios.h, sys/cdefs.h etc. +# +# Use '-idirafter': Don't interfere with include mechanics except where the +# build would have failed anyways. +define get_sys_includes +$(shell $(1) $(2) -v -E - </dev/null 2>&1 \ + | sed -n '/<...> search starts here:/,/End of search list./{ s| \(/.*\)|-idirafter \1|p }') \ +$(shell $(1) $(2) -dM -E - </dev/null | grep '__riscv_xlen ' | awk '{printf("-D__riscv_xlen=%d -D__BITS_PER_LONG=%d", $$3, $$3)}') +endef + +ifneq ($(CROSS_COMPILE),) +CLANG_TARGET_ARCH = --target=$(notdir $(CROSS_COMPILE:%-=%)) +endif + +CLANG_SYS_INCLUDES = $(call get_sys_includes,$(CLANG),$(CLANG_TARGET_ARCH)) + +$(OUTPUT)/nat6to4.o: nat6to4.c $(BPFOBJ) | $(MAKE_DIRS) + $(CLANG) -O2 -target bpf -c $< $(CCINCLUDE) $(CLANG_SYS_INCLUDES) -o $@ + +$(BPFOBJ): $(wildcard $(BPFDIR)/*.[ch] $(BPFDIR)/Makefile) \ + $(APIDIR)/linux/bpf.h \ + | $(BUILD_DIR)/libbpf + $(MAKE) $(submake_extras) -C $(BPFDIR) OUTPUT=$(BUILD_DIR)/libbpf/ \ + EXTRA_CFLAGS='-g -O0' \ + DESTDIR=$(SCRATCH_DIR) prefix= all install_headers + +EXTRA_CLEAN := $(SCRATCH_DIR) diff --git a/tools/testing/selftests/net/af_unix/Makefile b/tools/testing/selftests/net/af_unix/Makefile index 969620ae9928..1e4b397cece6 100644 --- a/tools/testing/selftests/net/af_unix/Makefile +++ b/tools/testing/selftests/net/af_unix/Makefile @@ -1,3 +1,3 @@ -TEST_GEN_PROGS := test_unix_oob unix_connect +TEST_GEN_PROGS := diag_uid test_unix_oob unix_connect include ../../lib.mk diff --git a/tools/testing/selftests/net/af_unix/diag_uid.c b/tools/testing/selftests/net/af_unix/diag_uid.c new file mode 100644 index 000000000000..5b88f7129fea --- /dev/null +++ b/tools/testing/selftests/net/af_unix/diag_uid.c @@ -0,0 +1,178 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright Amazon.com Inc. or its affiliates. */ + +#define _GNU_SOURCE +#include <sched.h> + +#include <unistd.h> +#include <linux/netlink.h> +#include <linux/rtnetlink.h> +#include <linux/sock_diag.h> +#include <linux/unix_diag.h> +#include <sys/socket.h> +#include <sys/stat.h> +#include <sys/types.h> +#include <sys/un.h> + +#include "../../kselftest_harness.h" + +FIXTURE(diag_uid) +{ + int netlink_fd; + int unix_fd; + __u32 inode; + __u64 cookie; +}; + +FIXTURE_VARIANT(diag_uid) +{ + int unshare; + int udiag_show; +}; + +FIXTURE_VARIANT_ADD(diag_uid, uid) +{ + .unshare = 0, + .udiag_show = UDIAG_SHOW_UID +}; + +FIXTURE_VARIANT_ADD(diag_uid, uid_unshare) +{ + .unshare = CLONE_NEWUSER, + .udiag_show = UDIAG_SHOW_UID +}; + +FIXTURE_SETUP(diag_uid) +{ + struct stat file_stat; + socklen_t optlen; + int ret; + + if (variant->unshare) + ASSERT_EQ(unshare(variant->unshare), 0); + + self->netlink_fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_SOCK_DIAG); + ASSERT_NE(self->netlink_fd, -1); + + self->unix_fd = socket(AF_UNIX, SOCK_STREAM, 0); + ASSERT_NE(self->unix_fd, -1); + + ret = fstat(self->unix_fd, &file_stat); + ASSERT_EQ(ret, 0); + + self->inode = file_stat.st_ino; + + optlen = sizeof(self->cookie); + ret = getsockopt(self->unix_fd, SOL_SOCKET, SO_COOKIE, &self->cookie, &optlen); + ASSERT_EQ(ret, 0); +} + +FIXTURE_TEARDOWN(diag_uid) +{ + close(self->netlink_fd); + close(self->unix_fd); +} + +int send_request(struct __test_metadata *_metadata, + FIXTURE_DATA(diag_uid) *self, + const FIXTURE_VARIANT(diag_uid) *variant) +{ + struct { + struct nlmsghdr nlh; + struct unix_diag_req udr; + } req = { + .nlh = { + .nlmsg_len = sizeof(req), + .nlmsg_type = SOCK_DIAG_BY_FAMILY, + .nlmsg_flags = NLM_F_REQUEST + }, + .udr = { + .sdiag_family = AF_UNIX, + .udiag_ino = self->inode, + .udiag_cookie = { + (__u32)self->cookie, + (__u32)(self->cookie >> 32) + }, + .udiag_show = variant->udiag_show + } + }; + struct sockaddr_nl nladdr = { + .nl_family = AF_NETLINK + }; + struct iovec iov = { + .iov_base = &req, + .iov_len = sizeof(req) + }; + struct msghdr msg = { + .msg_name = &nladdr, + .msg_namelen = sizeof(nladdr), + .msg_iov = &iov, + .msg_iovlen = 1 + }; + + return sendmsg(self->netlink_fd, &msg, 0); +} + +void render_response(struct __test_metadata *_metadata, + struct unix_diag_req *udr, __u32 len) +{ + unsigned int rta_len = len - NLMSG_LENGTH(sizeof(*udr)); + struct rtattr *attr; + uid_t uid; + + ASSERT_GT(len, sizeof(*udr)); + ASSERT_EQ(udr->sdiag_family, AF_UNIX); + + attr = (struct rtattr *)(udr + 1); + ASSERT_NE(RTA_OK(attr, rta_len), 0); + ASSERT_EQ(attr->rta_type, UNIX_DIAG_UID); + + uid = *(uid_t *)RTA_DATA(attr); + ASSERT_EQ(uid, getuid()); +} + +void receive_response(struct __test_metadata *_metadata, + FIXTURE_DATA(diag_uid) *self) +{ + long buf[8192 / sizeof(long)]; + struct sockaddr_nl nladdr = { + .nl_family = AF_NETLINK + }; + struct iovec iov = { + .iov_base = buf, + .iov_len = sizeof(buf) + }; + struct msghdr msg = { + .msg_name = &nladdr, + .msg_namelen = sizeof(nladdr), + .msg_iov = &iov, + .msg_iovlen = 1 + }; + struct unix_diag_req *udr; + struct nlmsghdr *nlh; + int ret; + + ret = recvmsg(self->netlink_fd, &msg, 0); + ASSERT_GT(ret, 0); + + nlh = (struct nlmsghdr *)buf; + ASSERT_NE(NLMSG_OK(nlh, ret), 0); + ASSERT_EQ(nlh->nlmsg_type, SOCK_DIAG_BY_FAMILY); + + render_response(_metadata, NLMSG_DATA(nlh), nlh->nlmsg_len); + + nlh = NLMSG_NEXT(nlh, ret); + ASSERT_EQ(NLMSG_OK(nlh, ret), 0); +} + +TEST_F(diag_uid, 1) +{ + int ret; + + ret = send_request(_metadata, self, variant); + ASSERT_GT(ret, 0); + + receive_response(_metadata, self); +} + +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/net/af_unix/test_unix_oob.c b/tools/testing/selftests/net/af_unix/test_unix_oob.c index b57e91e1c3f2..532459a15067 100644 --- a/tools/testing/selftests/net/af_unix/test_unix_oob.c +++ b/tools/testing/selftests/net/af_unix/test_unix_oob.c @@ -124,7 +124,7 @@ void producer(struct sockaddr_un *consumer_addr) wait_for_signal(pipefd[0]); if (connect(cfd, (struct sockaddr *)consumer_addr, - sizeof(struct sockaddr)) != 0) { + sizeof(*consumer_addr)) != 0) { perror("Connect failed"); kill(0, SIGTERM); exit(1); diff --git a/tools/testing/selftests/net/arp_ndisc_evict_nocarrier.sh b/tools/testing/selftests/net/arp_ndisc_evict_nocarrier.sh index b5af08af8559..4a110bb01e53 100755 --- a/tools/testing/selftests/net/arp_ndisc_evict_nocarrier.sh +++ b/tools/testing/selftests/net/arp_ndisc_evict_nocarrier.sh @@ -18,14 +18,15 @@ readonly V4_ADDR1=10.0.10.2 readonly V6_ADDR0=2001:db8:91::1 readonly V6_ADDR1=2001:db8:91::2 nsid=100 +ret=0 cleanup_v6() { ip netns del me ip netns del peer - sysctl -w net.ipv4.conf.veth0.ndisc_evict_nocarrier=1 >/dev/null 2>&1 - sysctl -w net.ipv4.conf.all.ndisc_evict_nocarrier=1 >/dev/null 2>&1 + sysctl -w net.ipv6.conf.veth1.ndisc_evict_nocarrier=1 >/dev/null 2>&1 + sysctl -w net.ipv6.conf.all.ndisc_evict_nocarrier=1 >/dev/null 2>&1 } create_ns() @@ -61,7 +62,7 @@ setup_v6() { if [ $? -ne 0 ]; then cleanup_v6 echo "failed" - exit + exit 1 fi # Set veth2 down, which will put veth1 in NOCARRIER state @@ -88,7 +89,7 @@ setup_v4() { if [ $? -ne 0 ]; then cleanup_v4 echo "failed" - exit + exit 1 fi # Set veth1 down, which will put veth0 in NOCARRIER state @@ -115,6 +116,7 @@ run_arp_evict_nocarrier_enabled() { if [ $? -eq 0 ];then echo "failed" + ret=1 else echo "ok" fi @@ -134,6 +136,7 @@ run_arp_evict_nocarrier_disabled() { echo "ok" else echo "failed" + ret=1 fi cleanup_v4 @@ -164,6 +167,7 @@ run_ndisc_evict_nocarrier_enabled() { if [ $? -eq 0 ];then echo "failed" + ret=1 else echo "ok" fi @@ -182,6 +186,7 @@ run_ndisc_evict_nocarrier_disabled() { echo "ok" else echo "failed" + ret=1 fi cleanup_v6 @@ -198,6 +203,7 @@ run_ndisc_evict_nocarrier_disabled_all() { echo "ok" else echo "failed" + ret=1 fi cleanup_v6 @@ -218,3 +224,4 @@ if [ "$(id -u)" -ne 0 ];then fi run_all_tests +exit $ret diff --git a/tools/testing/selftests/net/bind_timewait.c b/tools/testing/selftests/net/bind_timewait.c new file mode 100644 index 000000000000..cb9fdf51ea59 --- /dev/null +++ b/tools/testing/selftests/net/bind_timewait.c @@ -0,0 +1,92 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright Amazon.com Inc. or its affiliates. */ + +#include <sys/socket.h> +#include <netinet/in.h> + +#include "../kselftest_harness.h" + +FIXTURE(bind_timewait) +{ + struct sockaddr_in addr; + socklen_t addrlen; +}; + +FIXTURE_VARIANT(bind_timewait) +{ + __u32 addr_const; +}; + +FIXTURE_VARIANT_ADD(bind_timewait, localhost) +{ + .addr_const = INADDR_LOOPBACK +}; + +FIXTURE_VARIANT_ADD(bind_timewait, addrany) +{ + .addr_const = INADDR_ANY +}; + +FIXTURE_SETUP(bind_timewait) +{ + self->addr.sin_family = AF_INET; + self->addr.sin_port = 0; + self->addr.sin_addr.s_addr = htonl(variant->addr_const); + self->addrlen = sizeof(self->addr); +} + +FIXTURE_TEARDOWN(bind_timewait) +{ +} + +void create_timewait_socket(struct __test_metadata *_metadata, + FIXTURE_DATA(bind_timewait) *self) +{ + int server_fd, client_fd, child_fd, ret; + struct sockaddr_in addr; + socklen_t addrlen; + + server_fd = socket(AF_INET, SOCK_STREAM, 0); + ASSERT_GT(server_fd, 0); + + ret = bind(server_fd, (struct sockaddr *)&self->addr, self->addrlen); + ASSERT_EQ(ret, 0); + + ret = listen(server_fd, 1); + ASSERT_EQ(ret, 0); + + ret = getsockname(server_fd, (struct sockaddr *)&self->addr, &self->addrlen); + ASSERT_EQ(ret, 0); + + client_fd = socket(AF_INET, SOCK_STREAM, 0); + ASSERT_GT(client_fd, 0); + + ret = connect(client_fd, (struct sockaddr *)&self->addr, self->addrlen); + ASSERT_EQ(ret, 0); + + addrlen = sizeof(addr); + child_fd = accept(server_fd, (struct sockaddr *)&addr, &addrlen); + ASSERT_GT(child_fd, 0); + + close(child_fd); + close(client_fd); + close(server_fd); +} + +TEST_F(bind_timewait, 1) +{ + int fd, ret; + + create_timewait_socket(_metadata, self); + + fd = socket(AF_INET, SOCK_STREAM, 0); + ASSERT_GT(fd, 0); + + ret = bind(fd, (struct sockaddr *)&self->addr, self->addrlen); + ASSERT_EQ(ret, -1); + ASSERT_EQ(errno, EADDRINUSE); + + close(fd); +} + +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/net/bpf/Makefile b/tools/testing/selftests/net/bpf/Makefile deleted file mode 100644 index 8ccaf8732eb2..000000000000 --- a/tools/testing/selftests/net/bpf/Makefile +++ /dev/null @@ -1,14 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0 - -CLANG ?= clang -CCINCLUDE += -I../../bpf -CCINCLUDE += -I../../../../lib -CCINCLUDE += -I../../../../../usr/include/ - -TEST_CUSTOM_PROGS = $(OUTPUT)/bpf/nat6to4.o -all: $(TEST_CUSTOM_PROGS) - -$(OUTPUT)/%.o: %.c - $(CLANG) -O2 -target bpf -c $< $(CCINCLUDE) -o $@ - -EXTRA_CLEAN := $(TEST_CUSTOM_PROGS) diff --git a/tools/testing/selftests/net/cmsg_ipv6.sh b/tools/testing/selftests/net/cmsg_ipv6.sh index 2d89cb0ad288..330d0b1ceced 100755 --- a/tools/testing/selftests/net/cmsg_ipv6.sh +++ b/tools/testing/selftests/net/cmsg_ipv6.sh @@ -6,7 +6,7 @@ ksft_skip=4 NS=ns IP6=2001:db8:1::1/64 TGT6=2001:db8:1::2 -TMPF=`mktemp` +TMPF=$(mktemp --suffix ".pcap") cleanup() { diff --git a/tools/testing/selftests/net/cmsg_sender.c b/tools/testing/selftests/net/cmsg_sender.c index 75dd83e39207..24b21b15ed3f 100644 --- a/tools/testing/selftests/net/cmsg_sender.c +++ b/tools/testing/selftests/net/cmsg_sender.c @@ -110,7 +110,7 @@ static void __attribute__((noreturn)) cs_usage(const char *bin) static void cs_parse_args(int argc, char *argv[]) { - char o; + int o; while ((o = getopt(argc, argv, "46sS:p:m:M:d:tf:F:c:C:l:L:H:")) != -1) { switch (o) { diff --git a/tools/testing/selftests/net/config b/tools/testing/selftests/net/config index ead7963b9bf0..cc9fd55ab869 100644 --- a/tools/testing/selftests/net/config +++ b/tools/testing/selftests/net/config @@ -3,6 +3,9 @@ CONFIG_NET_NS=y CONFIG_BPF_SYSCALL=y CONFIG_TEST_BPF=m CONFIG_NUMA=y +CONFIG_RPS=y +CONFIG_SYSFS=y +CONFIG_PROC_SYSCTL=y CONFIG_NET_VRF=y CONFIG_NET_L3_MASTER_DEV=y CONFIG_IPV6=y @@ -43,5 +46,5 @@ CONFIG_NET_ACT_TUNNEL_KEY=m CONFIG_NET_ACT_MIRRED=m CONFIG_BAREUDP=m CONFIG_IPV6_IOAM6_LWTUNNEL=y -CONFIG_CRYPTO_SM4=y +CONFIG_CRYPTO_SM4_GENERIC=y CONFIG_AMT=m diff --git a/tools/testing/selftests/net/csum.c b/tools/testing/selftests/net/csum.c new file mode 100644 index 000000000000..82a1c1839da6 --- /dev/null +++ b/tools/testing/selftests/net/csum.c @@ -0,0 +1,986 @@ +// SPDX-License-Identifier: GPL-2.0 + +/* Test hardware checksum offload: Rx + Tx, IPv4 + IPv6, TCP + UDP. + * + * The test runs on two machines to exercise the NIC. For this reason it + * is not integrated in kselftests. + * + * CMD=$((./csum -[46] -[tu] -S $SADDR -D $DADDR -[RT] -r 1 $EXTRA_ARGS)) + * + * Rx: + * + * The sender sends packets with a known checksum field using PF_INET(6) + * SOCK_RAW sockets. + * + * good packet: $CMD [-t] + * bad packet: $CMD [-t] -E + * + * The receiver reads UDP packets with a UDP socket. This is not an + * option for TCP packets ('-t'). Optionally insert an iptables filter + * to avoid these entering the real protocol stack. + * + * The receiver also reads all packets with a PF_PACKET socket, to + * observe whether both good and bad packets arrive on the host. And to + * read the optional TP_STATUS_CSUM_VALID bit. This requires setting + * option PACKET_AUXDATA, and works only for CHECKSUM_UNNECESSARY. + * + * Tx: + * + * The sender needs to build CHECKSUM_PARTIAL packets to exercise tx + * checksum offload. + * + * The sender can sends packets with a UDP socket. + * + * Optionally crafts a packet that sums up to zero to verify that the + * device writes negative zero 0xFFFF in this case to distinguish from + * 0x0000 (checksum disabled), as required by RFC 768. Hit this case + * by choosing a specific source port. + * + * good packet: $CMD -U + * zero csum: $CMD -U -Z + * + * The sender can also build packets with PF_PACKET with PACKET_VNET_HDR, + * to cover more protocols. PF_PACKET requires passing src and dst mac + * addresses. + * + * good packet: $CMD -s $smac -d $dmac -p [-t] + * + * Argument '-z' sends UDP packets with a 0x000 checksum disabled field, + * to verify that the NIC passes these packets unmodified. + * + * Argument '-e' adds a transport mode encapsulation header between + * network and transport header. This will fail for devices that parse + * headers. Should work on devices that implement protocol agnostic tx + * checksum offload (NETIF_F_HW_CSUM). + * + * Argument '-r $SEED' optionally randomizes header, payload and length + * to increase coverage between packets sent. SEED 1 further chooses a + * different seed for each run (and logs this for reproducibility). It + * is advised to enable this for extra coverage in continuous testing. + */ + +#define _GNU_SOURCE + +#include <arpa/inet.h> +#include <asm/byteorder.h> +#include <errno.h> +#include <error.h> +#include <linux/filter.h> +#include <linux/if_packet.h> +#include <linux/ipv6.h> +#include <linux/virtio_net.h> +#include <net/ethernet.h> +#include <net/if.h> +#include <netinet/if_ether.h> +#include <netinet/in.h> +#include <netinet/ip.h> +#include <netinet/ip6.h> +#include <netinet/tcp.h> +#include <netinet/udp.h> +#include <poll.h> +#include <sched.h> +#include <stdbool.h> +#include <stddef.h> +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/socket.h> +#include <sys/stat.h> +#include <sys/time.h> +#include <sys/types.h> +#include <unistd.h> + +static bool cfg_bad_csum; +static int cfg_family = PF_INET6; +static int cfg_num_pkt = 4; +static bool cfg_do_rx = true; +static bool cfg_do_tx = true; +static bool cfg_encap; +static char *cfg_ifname = "eth0"; +static char *cfg_mac_dst; +static char *cfg_mac_src; +static int cfg_proto = IPPROTO_UDP; +static int cfg_payload_char = 'a'; +static int cfg_payload_len = 100; +static uint16_t cfg_port_dst = 34000; +static uint16_t cfg_port_src = 33000; +static uint16_t cfg_port_src_encap = 33001; +static unsigned int cfg_random_seed; +static int cfg_rcvbuf = 1 << 22; /* be able to queue large cfg_num_pkt */ +static bool cfg_send_pfpacket; +static bool cfg_send_udp; +static int cfg_timeout_ms = 2000; +static bool cfg_zero_disable; /* skip checksum: set to zero (udp only) */ +static bool cfg_zero_sum; /* create packet that adds up to zero */ + +static struct sockaddr_in cfg_daddr4 = {.sin_family = AF_INET}; +static struct sockaddr_in cfg_saddr4 = {.sin_family = AF_INET}; +static struct sockaddr_in6 cfg_daddr6 = {.sin6_family = AF_INET6}; +static struct sockaddr_in6 cfg_saddr6 = {.sin6_family = AF_INET6}; + +#define ENC_HEADER_LEN (sizeof(struct udphdr) + sizeof(struct udp_encap_hdr)) +#define MAX_HEADER_LEN (sizeof(struct ipv6hdr) + ENC_HEADER_LEN + sizeof(struct tcphdr)) +#define MAX_PAYLOAD_LEN 1024 + +/* Trivial demo encap. Stand-in for transport layer protocols like ESP or PSP */ +struct udp_encap_hdr { + uint8_t nexthdr; + uint8_t padding[3]; +}; + +/* Ipaddrs, for pseudo csum. Global var is ugly, pass through funcs was worse */ +static void *iph_addr_p; + +static unsigned long gettimeofday_ms(void) +{ + struct timeval tv; + + gettimeofday(&tv, NULL); + return (tv.tv_sec * 1000UL) + (tv.tv_usec / 1000UL); +} + +static uint32_t checksum_nofold(char *data, size_t len, uint32_t sum) +{ + uint16_t *words = (uint16_t *)data; + int i; + + for (i = 0; i < len / 2; i++) + sum += words[i]; + + if (len & 1) + sum += ((unsigned char *)data)[len - 1]; + + return sum; +} + +static uint16_t checksum_fold(void *data, size_t len, uint32_t sum) +{ + sum = checksum_nofold(data, len, sum); + + while (sum > 0xFFFF) + sum = (sum & 0xFFFF) + (sum >> 16); + + return ~sum; +} + +static uint16_t checksum(void *th, uint16_t proto, size_t len) +{ + uint32_t sum; + int alen; + + alen = cfg_family == PF_INET6 ? 32 : 8; + + sum = checksum_nofold(iph_addr_p, alen, 0); + sum += htons(proto); + sum += htons(len); + + /* With CHECKSUM_PARTIAL kernel expects non-inverted pseudo csum */ + if (cfg_do_tx && cfg_send_pfpacket) + return ~checksum_fold(NULL, 0, sum); + else + return checksum_fold(th, len, sum); +} + +static void *build_packet_ipv4(void *_iph, uint8_t proto, unsigned int len) +{ + struct iphdr *iph = _iph; + + memset(iph, 0, sizeof(*iph)); + + iph->version = 4; + iph->ihl = 5; + iph->ttl = 8; + iph->protocol = proto; + iph->saddr = cfg_saddr4.sin_addr.s_addr; + iph->daddr = cfg_daddr4.sin_addr.s_addr; + iph->tot_len = htons(sizeof(*iph) + len); + iph->check = checksum_fold(iph, sizeof(*iph), 0); + + iph_addr_p = &iph->saddr; + + return iph + 1; +} + +static void *build_packet_ipv6(void *_ip6h, uint8_t proto, unsigned int len) +{ + struct ipv6hdr *ip6h = _ip6h; + + memset(ip6h, 0, sizeof(*ip6h)); + + ip6h->version = 6; + ip6h->payload_len = htons(len); + ip6h->nexthdr = proto; + ip6h->hop_limit = 64; + ip6h->saddr = cfg_saddr6.sin6_addr; + ip6h->daddr = cfg_daddr6.sin6_addr; + + iph_addr_p = &ip6h->saddr; + + return ip6h + 1; +} + +static void *build_packet_udp(void *_uh) +{ + struct udphdr *uh = _uh; + + uh->source = htons(cfg_port_src); + uh->dest = htons(cfg_port_dst); + uh->len = htons(sizeof(*uh) + cfg_payload_len); + uh->check = 0; + + /* choose source port so that uh->check adds up to zero */ + if (cfg_zero_sum) { + uh->source = 0; + uh->source = checksum(uh, IPPROTO_UDP, sizeof(*uh) + cfg_payload_len); + + fprintf(stderr, "tx: changing sport: %hu -> %hu\n", + cfg_port_src, ntohs(uh->source)); + cfg_port_src = ntohs(uh->source); + } + + if (cfg_zero_disable) + uh->check = 0; + else + uh->check = checksum(uh, IPPROTO_UDP, sizeof(*uh) + cfg_payload_len); + + if (cfg_bad_csum) + uh->check = ~uh->check; + + fprintf(stderr, "tx: sending checksum: 0x%x\n", uh->check); + return uh + 1; +} + +static void *build_packet_tcp(void *_th) +{ + struct tcphdr *th = _th; + + th->source = htons(cfg_port_src); + th->dest = htons(cfg_port_dst); + th->doff = 5; + th->check = 0; + + th->check = checksum(th, IPPROTO_TCP, sizeof(*th) + cfg_payload_len); + + if (cfg_bad_csum) + th->check = ~th->check; + + fprintf(stderr, "tx: sending checksum: 0x%x\n", th->check); + return th + 1; +} + +static char *build_packet_udp_encap(void *_uh) +{ + struct udphdr *uh = _uh; + struct udp_encap_hdr *eh = _uh + sizeof(*uh); + + /* outer dst == inner dst, to simplify BPF filter + * outer src != inner src, to demultiplex on recv + */ + uh->dest = htons(cfg_port_dst); + uh->source = htons(cfg_port_src_encap); + uh->check = 0; + uh->len = htons(sizeof(*uh) + + sizeof(*eh) + + sizeof(struct tcphdr) + + cfg_payload_len); + + eh->nexthdr = IPPROTO_TCP; + + return build_packet_tcp(eh + 1); +} + +static char *build_packet(char *buf, int max_len, int *len) +{ + uint8_t proto; + char *off; + int tlen; + + if (cfg_random_seed) { + int *buf32 = (void *)buf; + int i; + + for (i = 0; i < (max_len / sizeof(int)); i++) + buf32[i] = rand(); + } else { + memset(buf, cfg_payload_char, max_len); + } + + if (cfg_proto == IPPROTO_UDP) + tlen = sizeof(struct udphdr) + cfg_payload_len; + else + tlen = sizeof(struct tcphdr) + cfg_payload_len; + + if (cfg_encap) { + proto = IPPROTO_UDP; + tlen += ENC_HEADER_LEN; + } else { + proto = cfg_proto; + } + + if (cfg_family == PF_INET) + off = build_packet_ipv4(buf, proto, tlen); + else + off = build_packet_ipv6(buf, proto, tlen); + + if (cfg_encap) + off = build_packet_udp_encap(off); + else if (cfg_proto == IPPROTO_UDP) + off = build_packet_udp(off); + else + off = build_packet_tcp(off); + + /* only pass the payload, but still compute headers for cfg_zero_sum */ + if (cfg_send_udp) { + *len = cfg_payload_len; + return off; + } + + *len = off - buf + cfg_payload_len; + return buf; +} + +static int open_inet(int ipproto, int protocol) +{ + int fd; + + fd = socket(cfg_family, ipproto, protocol); + if (fd == -1) + error(1, errno, "socket inet"); + + if (cfg_family == PF_INET6) { + /* may have been updated by cfg_zero_sum */ + cfg_saddr6.sin6_port = htons(cfg_port_src); + + if (bind(fd, (void *)&cfg_saddr6, sizeof(cfg_saddr6))) + error(1, errno, "bind dgram 6"); + if (connect(fd, (void *)&cfg_daddr6, sizeof(cfg_daddr6))) + error(1, errno, "connect dgram 6"); + } else { + /* may have been updated by cfg_zero_sum */ + cfg_saddr4.sin_port = htons(cfg_port_src); + + if (bind(fd, (void *)&cfg_saddr4, sizeof(cfg_saddr4))) + error(1, errno, "bind dgram 4"); + if (connect(fd, (void *)&cfg_daddr4, sizeof(cfg_daddr4))) + error(1, errno, "connect dgram 4"); + } + + return fd; +} + +static int open_packet(void) +{ + int fd, one = 1; + + fd = socket(PF_PACKET, SOCK_RAW, 0); + if (fd == -1) + error(1, errno, "socket packet"); + + if (setsockopt(fd, SOL_PACKET, PACKET_VNET_HDR, &one, sizeof(one))) + error(1, errno, "setsockopt packet_vnet_ndr"); + + return fd; +} + +static void send_inet(int fd, const char *buf, int len) +{ + int ret; + + ret = write(fd, buf, len); + if (ret == -1) + error(1, errno, "write"); + if (ret != len) + error(1, 0, "write: %d", ret); +} + +static void eth_str_to_addr(const char *str, unsigned char *eth) +{ + if (sscanf(str, "%hhx:%hhx:%hhx:%hhx:%hhx:%hhx", + ð[0], ð[1], ð[2], ð[3], ð[4], ð[5]) != 6) + error(1, 0, "cannot parse mac addr %s", str); +} + +static void send_packet(int fd, const char *buf, int len) +{ + struct virtio_net_hdr vh = {0}; + struct sockaddr_ll addr = {0}; + struct msghdr msg = {0}; + struct ethhdr eth; + struct iovec iov[3]; + int ret; + + addr.sll_family = AF_PACKET; + addr.sll_halen = ETH_ALEN; + addr.sll_ifindex = if_nametoindex(cfg_ifname); + if (!addr.sll_ifindex) + error(1, errno, "if_nametoindex %s", cfg_ifname); + + vh.flags = VIRTIO_NET_HDR_F_NEEDS_CSUM; + if (cfg_family == PF_INET6) { + vh.csum_start = sizeof(struct ethhdr) + sizeof(struct ipv6hdr); + addr.sll_protocol = htons(ETH_P_IPV6); + } else { + vh.csum_start = sizeof(struct ethhdr) + sizeof(struct iphdr); + addr.sll_protocol = htons(ETH_P_IP); + } + + if (cfg_encap) + vh.csum_start += ENC_HEADER_LEN; + + if (cfg_proto == IPPROTO_TCP) { + vh.csum_offset = __builtin_offsetof(struct tcphdr, check); + vh.hdr_len = vh.csum_start + sizeof(struct tcphdr); + } else { + vh.csum_offset = __builtin_offsetof(struct udphdr, check); + vh.hdr_len = vh.csum_start + sizeof(struct udphdr); + } + + eth_str_to_addr(cfg_mac_src, eth.h_source); + eth_str_to_addr(cfg_mac_dst, eth.h_dest); + eth.h_proto = addr.sll_protocol; + + iov[0].iov_base = &vh; + iov[0].iov_len = sizeof(vh); + + iov[1].iov_base = ð + iov[1].iov_len = sizeof(eth); + + iov[2].iov_base = (void *)buf; + iov[2].iov_len = len; + + msg.msg_iov = iov; + msg.msg_iovlen = sizeof(iov) / sizeof(iov[0]); + + msg.msg_name = &addr; + msg.msg_namelen = sizeof(addr); + + ret = sendmsg(fd, &msg, 0); + if (ret == -1) + error(1, errno, "sendmsg packet"); + if (ret != sizeof(vh) + sizeof(eth) + len) + error(1, errno, "sendmsg packet: %u", ret); +} + +static int recv_prepare_udp(void) +{ + int fd; + + fd = socket(cfg_family, SOCK_DGRAM, 0); + if (fd == -1) + error(1, errno, "socket r"); + + if (setsockopt(fd, SOL_SOCKET, SO_RCVBUF, + &cfg_rcvbuf, sizeof(cfg_rcvbuf))) + error(1, errno, "setsockopt SO_RCVBUF r"); + + if (cfg_family == PF_INET6) { + if (bind(fd, (void *)&cfg_daddr6, sizeof(cfg_daddr6))) + error(1, errno, "bind r"); + } else { + if (bind(fd, (void *)&cfg_daddr4, sizeof(cfg_daddr4))) + error(1, errno, "bind r"); + } + + return fd; +} + +/* Filter out all traffic that is not cfg_proto with our destination port. + * + * Otherwise background noise may cause PF_PACKET receive queue overflow, + * dropping the expected packets and failing the test. + */ +static void __recv_prepare_packet_filter(int fd, int off_nexthdr, int off_dport) +{ + struct sock_filter filter[] = { + BPF_STMT(BPF_LD + BPF_B + BPF_ABS, SKF_AD_OFF + SKF_AD_PKTTYPE), + BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, PACKET_HOST, 0, 4), + BPF_STMT(BPF_LD + BPF_B + BPF_ABS, off_nexthdr), + BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, cfg_encap ? IPPROTO_UDP : cfg_proto, 0, 2), + BPF_STMT(BPF_LD + BPF_H + BPF_ABS, off_dport), + BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, cfg_port_dst, 1, 0), + BPF_STMT(BPF_RET + BPF_K, 0), + BPF_STMT(BPF_RET + BPF_K, 0xFFFF), + }; + struct sock_fprog prog = {}; + + prog.filter = filter; + prog.len = sizeof(filter) / sizeof(struct sock_filter); + if (setsockopt(fd, SOL_SOCKET, SO_ATTACH_FILTER, &prog, sizeof(prog))) + error(1, errno, "setsockopt filter"); +} + +static void recv_prepare_packet_filter(int fd) +{ + const int off_dport = offsetof(struct tcphdr, dest); /* same for udp */ + + if (cfg_family == AF_INET) + __recv_prepare_packet_filter(fd, offsetof(struct iphdr, protocol), + sizeof(struct iphdr) + off_dport); + else + __recv_prepare_packet_filter(fd, offsetof(struct ipv6hdr, nexthdr), + sizeof(struct ipv6hdr) + off_dport); +} + +static void recv_prepare_packet_bind(int fd) +{ + struct sockaddr_ll laddr = {0}; + + laddr.sll_family = AF_PACKET; + + if (cfg_family == PF_INET) + laddr.sll_protocol = htons(ETH_P_IP); + else + laddr.sll_protocol = htons(ETH_P_IPV6); + + laddr.sll_ifindex = if_nametoindex(cfg_ifname); + if (!laddr.sll_ifindex) + error(1, 0, "if_nametoindex %s", cfg_ifname); + + if (bind(fd, (void *)&laddr, sizeof(laddr))) + error(1, errno, "bind pf_packet"); +} + +static int recv_prepare_packet(void) +{ + int fd, one = 1; + + fd = socket(PF_PACKET, SOCK_DGRAM, 0); + if (fd == -1) + error(1, errno, "socket p"); + + if (setsockopt(fd, SOL_SOCKET, SO_RCVBUF, + &cfg_rcvbuf, sizeof(cfg_rcvbuf))) + error(1, errno, "setsockopt SO_RCVBUF p"); + + /* enable auxdata to recv checksum status (valid vs unknown) */ + if (setsockopt(fd, SOL_PACKET, PACKET_AUXDATA, &one, sizeof(one))) + error(1, errno, "setsockopt auxdata"); + + /* install filter to restrict packet flow to match */ + recv_prepare_packet_filter(fd); + + /* bind to address family to start packet flow */ + recv_prepare_packet_bind(fd); + + return fd; +} + +static int recv_udp(int fd) +{ + static char buf[MAX_PAYLOAD_LEN]; + int ret, count = 0; + + while (1) { + ret = recv(fd, buf, sizeof(buf), MSG_DONTWAIT); + if (ret == -1 && errno == EAGAIN) + break; + if (ret == -1) + error(1, errno, "recv r"); + + fprintf(stderr, "rx: udp: len=%u\n", ret); + count++; + } + + return count; +} + +static int recv_verify_csum(void *th, int len, uint16_t sport, uint16_t csum_field) +{ + uint16_t csum; + + csum = checksum(th, cfg_proto, len); + + fprintf(stderr, "rx: pkt: sport=%hu len=%u csum=0x%hx verify=0x%hx\n", + sport, len, csum_field, csum); + + /* csum must be zero unless cfg_bad_csum indicates bad csum */ + if (csum && !cfg_bad_csum) { + fprintf(stderr, "pkt: bad csum\n"); + return 1; + } else if (cfg_bad_csum && !csum) { + fprintf(stderr, "pkt: good csum, while bad expected\n"); + return 1; + } + + if (cfg_zero_sum && csum_field != 0xFFFF) { + fprintf(stderr, "pkt: zero csum: field should be 0xFFFF, is 0x%hx\n", csum_field); + return 1; + } + + return 0; +} + +static int recv_verify_packet_tcp(void *th, int len) +{ + struct tcphdr *tcph = th; + + if (len < sizeof(*tcph) || tcph->dest != htons(cfg_port_dst)) + return -1; + + return recv_verify_csum(th, len, ntohs(tcph->source), tcph->check); +} + +static int recv_verify_packet_udp_encap(void *th, int len) +{ + struct udp_encap_hdr *eh = th; + + if (len < sizeof(*eh) || eh->nexthdr != IPPROTO_TCP) + return -1; + + return recv_verify_packet_tcp(eh + 1, len - sizeof(*eh)); +} + +static int recv_verify_packet_udp(void *th, int len) +{ + struct udphdr *udph = th; + + if (len < sizeof(*udph)) + return -1; + + if (udph->dest != htons(cfg_port_dst)) + return -1; + + if (udph->source == htons(cfg_port_src_encap)) + return recv_verify_packet_udp_encap(udph + 1, + len - sizeof(*udph)); + + return recv_verify_csum(th, len, ntohs(udph->source), udph->check); +} + +static int recv_verify_packet_ipv4(void *nh, int len) +{ + struct iphdr *iph = nh; + uint16_t proto = cfg_encap ? IPPROTO_UDP : cfg_proto; + + if (len < sizeof(*iph) || iph->protocol != proto) + return -1; + + iph_addr_p = &iph->saddr; + if (proto == IPPROTO_TCP) + return recv_verify_packet_tcp(iph + 1, len - sizeof(*iph)); + else + return recv_verify_packet_udp(iph + 1, len - sizeof(*iph)); +} + +static int recv_verify_packet_ipv6(void *nh, int len) +{ + struct ipv6hdr *ip6h = nh; + uint16_t proto = cfg_encap ? IPPROTO_UDP : cfg_proto; + + if (len < sizeof(*ip6h) || ip6h->nexthdr != proto) + return -1; + + iph_addr_p = &ip6h->saddr; + + if (proto == IPPROTO_TCP) + return recv_verify_packet_tcp(ip6h + 1, len - sizeof(*ip6h)); + else + return recv_verify_packet_udp(ip6h + 1, len - sizeof(*ip6h)); +} + +/* return whether auxdata includes TP_STATUS_CSUM_VALID */ +static bool recv_verify_packet_csum(struct msghdr *msg) +{ + struct tpacket_auxdata *aux = NULL; + struct cmsghdr *cm; + + if (msg->msg_flags & MSG_CTRUNC) + error(1, 0, "cmsg: truncated"); + + for (cm = CMSG_FIRSTHDR(msg); cm; cm = CMSG_NXTHDR(msg, cm)) { + if (cm->cmsg_level != SOL_PACKET || + cm->cmsg_type != PACKET_AUXDATA) + error(1, 0, "cmsg: level=%d type=%d\n", + cm->cmsg_level, cm->cmsg_type); + + if (cm->cmsg_len != CMSG_LEN(sizeof(struct tpacket_auxdata))) + error(1, 0, "cmsg: len=%lu expected=%lu", + cm->cmsg_len, CMSG_LEN(sizeof(struct tpacket_auxdata))); + + aux = (void *)CMSG_DATA(cm); + } + + if (!aux) + error(1, 0, "cmsg: no auxdata"); + + return aux->tp_status & TP_STATUS_CSUM_VALID; +} + +static int recv_packet(int fd) +{ + static char _buf[MAX_HEADER_LEN + MAX_PAYLOAD_LEN]; + unsigned long total = 0, bad_csums = 0, bad_validations = 0; + char ctrl[CMSG_SPACE(sizeof(struct tpacket_auxdata))]; + struct pkt *buf = (void *)_buf; + struct msghdr msg = {0}; + struct iovec iov; + int len, ret; + + iov.iov_base = _buf; + iov.iov_len = sizeof(_buf); + + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + + msg.msg_control = ctrl; + msg.msg_controllen = sizeof(ctrl); + + while (1) { + msg.msg_flags = 0; + + len = recvmsg(fd, &msg, MSG_DONTWAIT); + if (len == -1 && errno == EAGAIN) + break; + if (len == -1) + error(1, errno, "recv p"); + + if (cfg_family == PF_INET6) + ret = recv_verify_packet_ipv6(buf, len); + else + ret = recv_verify_packet_ipv4(buf, len); + + if (ret == -1 /* skip: non-matching */) + continue; + + total++; + if (ret == 1) + bad_csums++; + + /* Fail if kernel returns valid for known bad csum. + * Do not fail if kernel does not validate a good csum: + * Absence of validation does not imply invalid. + */ + if (recv_verify_packet_csum(&msg) && cfg_bad_csum) { + fprintf(stderr, "cmsg: expected bad csum, pf_packet returns valid\n"); + bad_validations++; + } + } + + if (bad_csums || bad_validations) + error(1, 0, "rx: errors at pf_packet: total=%lu bad_csums=%lu bad_valids=%lu\n", + total, bad_csums, bad_validations); + + return total; +} + +static void parse_args(int argc, char *const argv[]) +{ + const char *daddr = NULL, *saddr = NULL; + int c; + + while ((c = getopt(argc, argv, "46d:D:eEi:l:L:n:r:PRs:S:tTuUzZ")) != -1) { + switch (c) { + case '4': + cfg_family = PF_INET; + break; + case '6': + cfg_family = PF_INET6; + break; + case 'd': + cfg_mac_dst = optarg; + break; + case 'D': + daddr = optarg; + break; + case 'e': + cfg_encap = true; + break; + case 'E': + cfg_bad_csum = true; + break; + case 'i': + cfg_ifname = optarg; + break; + case 'l': + cfg_payload_len = strtol(optarg, NULL, 0); + break; + case 'L': + cfg_timeout_ms = strtol(optarg, NULL, 0) * 1000; + break; + case 'n': + cfg_num_pkt = strtol(optarg, NULL, 0); + break; + case 'r': + cfg_random_seed = strtol(optarg, NULL, 0); + break; + case 'P': + cfg_send_pfpacket = true; + break; + case 'R': + /* only Rx: used with two machine tests */ + cfg_do_tx = false; + break; + case 's': + cfg_mac_src = optarg; + break; + case 'S': + saddr = optarg; + break; + case 't': + cfg_proto = IPPROTO_TCP; + break; + case 'T': + /* only Tx: used with two machine tests */ + cfg_do_rx = false; + break; + case 'u': + cfg_proto = IPPROTO_UDP; + break; + case 'U': + /* send using real udp socket, + * to exercise tx checksum offload + */ + cfg_send_udp = true; + break; + case 'z': + cfg_zero_disable = true; + break; + case 'Z': + cfg_zero_sum = true; + break; + default: + error(1, 0, "unknown arg %c", c); + } + } + + if (!daddr || !saddr) + error(1, 0, "Must pass -D <daddr> and -S <saddr>"); + + if (cfg_do_tx && cfg_send_pfpacket && (!cfg_mac_src || !cfg_mac_dst)) + error(1, 0, "Transmit with pf_packet requires mac addresses"); + + if (cfg_payload_len > MAX_PAYLOAD_LEN) + error(1, 0, "Payload length exceeds max"); + + if (cfg_proto != IPPROTO_UDP && (cfg_zero_sum || cfg_zero_disable)) + error(1, 0, "Only UDP supports zero csum"); + + if (cfg_zero_sum && !cfg_send_udp) + error(1, 0, "Zero checksum conversion requires -U for tx csum offload"); + if (cfg_zero_sum && cfg_bad_csum) + error(1, 0, "Cannot combine zero checksum conversion and invalid checksum"); + if (cfg_zero_sum && cfg_random_seed) + error(1, 0, "Cannot combine zero checksum conversion with randomization"); + + if (cfg_family == PF_INET6) { + cfg_saddr6.sin6_port = htons(cfg_port_src); + cfg_daddr6.sin6_port = htons(cfg_port_dst); + + if (inet_pton(cfg_family, daddr, &cfg_daddr6.sin6_addr) != 1) + error(1, errno, "Cannot parse ipv6 -D"); + if (inet_pton(cfg_family, saddr, &cfg_saddr6.sin6_addr) != 1) + error(1, errno, "Cannot parse ipv6 -S"); + } else { + cfg_saddr4.sin_port = htons(cfg_port_src); + cfg_daddr4.sin_port = htons(cfg_port_dst); + + if (inet_pton(cfg_family, daddr, &cfg_daddr4.sin_addr) != 1) + error(1, errno, "Cannot parse ipv4 -D"); + if (inet_pton(cfg_family, saddr, &cfg_saddr4.sin_addr) != 1) + error(1, errno, "Cannot parse ipv4 -S"); + } + + if (cfg_do_tx && cfg_random_seed) { + /* special case: time-based seed */ + if (cfg_random_seed == 1) + cfg_random_seed = (unsigned int)gettimeofday_ms(); + srand(cfg_random_seed); + fprintf(stderr, "randomization seed: %u\n", cfg_random_seed); + } +} + +static void do_tx(void) +{ + static char _buf[MAX_HEADER_LEN + MAX_PAYLOAD_LEN]; + char *buf; + int fd, len, i; + + buf = build_packet(_buf, sizeof(_buf), &len); + + if (cfg_send_pfpacket) + fd = open_packet(); + else if (cfg_send_udp) + fd = open_inet(SOCK_DGRAM, 0); + else + fd = open_inet(SOCK_RAW, IPPROTO_RAW); + + for (i = 0; i < cfg_num_pkt; i++) { + if (cfg_send_pfpacket) + send_packet(fd, buf, len); + else + send_inet(fd, buf, len); + + /* randomize each packet individually to increase coverage */ + if (cfg_random_seed) { + cfg_payload_len = rand() % MAX_PAYLOAD_LEN; + buf = build_packet(_buf, sizeof(_buf), &len); + } + } + + if (close(fd)) + error(1, errno, "close tx"); +} + +static void do_rx(int fdp, int fdr) +{ + unsigned long count_udp = 0, count_pkt = 0; + long tleft, tstop; + struct pollfd pfd; + + tstop = gettimeofday_ms() + cfg_timeout_ms; + tleft = cfg_timeout_ms; + + do { + pfd.events = POLLIN; + pfd.fd = fdp; + if (poll(&pfd, 1, tleft) == -1) + error(1, errno, "poll"); + + if (pfd.revents & POLLIN) + count_pkt += recv_packet(fdp); + + if (cfg_proto == IPPROTO_UDP) + count_udp += recv_udp(fdr); + + tleft = tstop - gettimeofday_ms(); + } while (tleft > 0); + + if (close(fdr)) + error(1, errno, "close r"); + if (close(fdp)) + error(1, errno, "close p"); + + if (count_pkt < cfg_num_pkt) + error(1, 0, "rx: missing packets at pf_packet: %lu < %u", + count_pkt, cfg_num_pkt); + + if (cfg_proto == IPPROTO_UDP) { + if (cfg_bad_csum && count_udp) + error(1, 0, "rx: unexpected packets at udp"); + if (!cfg_bad_csum && !count_udp) + error(1, 0, "rx: missing packets at udp"); + } +} + +int main(int argc, char *const argv[]) +{ + int fdp = -1, fdr = -1; /* -1 to silence -Wmaybe-uninitialized */ + + parse_args(argc, argv); + + /* open receive sockets before transmitting */ + if (cfg_do_rx) { + fdp = recv_prepare_packet(); + fdr = recv_prepare_udp(); + } + + if (cfg_do_tx) + do_tx(); + + if (cfg_do_rx) + do_rx(fdp, fdr); + + fprintf(stderr, "OK\n"); + return 0; +} diff --git a/tools/testing/selftests/net/fib_rule_tests.sh b/tools/testing/selftests/net/fib_rule_tests.sh index c245476fa29d..63c3eaec8d30 100755 --- a/tools/testing/selftests/net/fib_rule_tests.sh +++ b/tools/testing/selftests/net/fib_rule_tests.sh @@ -10,8 +10,10 @@ ret=0 PAUSE_ON_FAIL=${PAUSE_ON_FAIL:=no} IP="ip -netns testns" +IP_PEER="ip -netns peerns" RTABLE=100 +RTABLE_PEER=101 GW_IP4=192.51.100.2 SRC_IP=192.51.100.3 GW_IP6=2001:db8:1::2 @@ -20,7 +22,9 @@ SRC_IP6=2001:db8:1::3 DEV_ADDR=192.51.100.1 DEV_ADDR6=2001:db8:1::1 DEV=dummy0 -TESTS="fib_rule6 fib_rule4" +TESTS="fib_rule6 fib_rule4 fib_rule6_connect fib_rule4_connect" + +SELFTEST_PATH="" log_test() { @@ -52,6 +56,31 @@ log_section() echo "######################################################################" } +check_nettest() +{ + if which nettest > /dev/null 2>&1; then + return 0 + fi + + # Add the selftest directory to PATH if not already done + if [ "${SELFTEST_PATH}" = "" ]; then + SELFTEST_PATH="$(dirname $0)" + PATH="${PATH}:${SELFTEST_PATH}" + + # Now retry with the new path + if which nettest > /dev/null 2>&1; then + return 0 + fi + + if [ "${ret}" -eq 0 ]; then + ret="${ksft_skip}" + fi + echo "nettest not found (try 'make -C ${SELFTEST_PATH} nettest')" + fi + + return 1 +} + setup() { set -e @@ -72,6 +101,39 @@ cleanup() ip netns del testns } +setup_peer() +{ + set -e + + ip netns add peerns + $IP_PEER link set dev lo up + + ip link add name veth0 netns testns type veth \ + peer name veth1 netns peerns + $IP link set dev veth0 up + $IP_PEER link set dev veth1 up + + $IP address add 192.0.2.10 peer 192.0.2.11/32 dev veth0 + $IP_PEER address add 192.0.2.11 peer 192.0.2.10/32 dev veth1 + + $IP address add 2001:db8::10 peer 2001:db8::11/128 dev veth0 nodad + $IP_PEER address add 2001:db8::11 peer 2001:db8::10/128 dev veth1 nodad + + $IP_PEER address add 198.51.100.11/32 dev lo + $IP route add table $RTABLE_PEER 198.51.100.11/32 via 192.0.2.11 + + $IP_PEER address add 2001:db8::1:11/128 dev lo + $IP route add table $RTABLE_PEER 2001:db8::1:11/128 via 2001:db8::11 + + set +e +} + +cleanup_peer() +{ + $IP link del dev veth0 + ip netns del peerns +} + fib_check_iproute_support() { ip rule help 2>&1 | grep -q $1 @@ -190,6 +252,37 @@ fib_rule6_test() fi } +# Verify that the IPV6_TCLASS option of UDPv6 and TCPv6 sockets is properly +# taken into account when connecting the socket and when sending packets. +fib_rule6_connect_test() +{ + local dsfield + + if ! check_nettest; then + echo "SKIP: Could not run test without nettest tool" + return + fi + + setup_peer + $IP -6 rule add dsfield 0x04 table $RTABLE_PEER + + # Combine the base DS Field value (0x04) with all possible ECN values + # (Not-ECT: 0, ECT(1): 1, ECT(0): 2, CE: 3). + # The ECN bits shouldn't influence the result of the test. + for dsfield in 0x04 0x05 0x06 0x07; do + nettest -q -6 -B -t 5 -N testns -O peerns -U -D \ + -Q "${dsfield}" -l 2001:db8::1:11 -r 2001:db8::1:11 + log_test $? 0 "rule6 dsfield udp connect (dsfield ${dsfield})" + + nettest -q -6 -B -t 5 -N testns -O peerns -Q "${dsfield}" \ + -l 2001:db8::1:11 -r 2001:db8::1:11 + log_test $? 0 "rule6 dsfield tcp connect (dsfield ${dsfield})" + done + + $IP -6 rule del dsfield 0x04 table $RTABLE_PEER + cleanup_peer +} + fib_rule4_del() { $IP rule del $1 @@ -296,6 +389,37 @@ fib_rule4_test() fi } +# Verify that the IP_TOS option of UDPv4 and TCPv4 sockets is properly taken +# into account when connecting the socket and when sending packets. +fib_rule4_connect_test() +{ + local dsfield + + if ! check_nettest; then + echo "SKIP: Could not run test without nettest tool" + return + fi + + setup_peer + $IP -4 rule add dsfield 0x04 table $RTABLE_PEER + + # Combine the base DS Field value (0x04) with all possible ECN values + # (Not-ECT: 0, ECT(1): 1, ECT(0): 2, CE: 3). + # The ECN bits shouldn't influence the result of the test. + for dsfield in 0x04 0x05 0x06 0x07; do + nettest -q -B -t 5 -N testns -O peerns -D -U -Q "${dsfield}" \ + -l 198.51.100.11 -r 198.51.100.11 + log_test $? 0 "rule4 dsfield udp connect (dsfield ${dsfield})" + + nettest -q -B -t 5 -N testns -O peerns -Q "${dsfield}" \ + -l 198.51.100.11 -r 198.51.100.11 + log_test $? 0 "rule4 dsfield tcp connect (dsfield ${dsfield})" + done + + $IP -4 rule del dsfield 0x04 table $RTABLE_PEER + cleanup_peer +} + run_fibrule_tests() { log_section "IPv4 fib rule" @@ -345,6 +469,8 @@ do case $t in fib_rule6_test|fib_rule6) fib_rule6_test;; fib_rule4_test|fib_rule4) fib_rule4_test;; + fib_rule6_connect_test|fib_rule6_connect) fib_rule6_connect_test;; + fib_rule4_connect_test|fib_rule4_connect) fib_rule4_connect_test;; help) echo "Test names: $TESTS"; exit 0;; diff --git a/tools/testing/selftests/net/fib_tests.sh b/tools/testing/selftests/net/fib_tests.sh index 2271a8727f62..70ea8798b1f6 100755 --- a/tools/testing/selftests/net/fib_tests.sh +++ b/tools/testing/selftests/net/fib_tests.sh @@ -1711,13 +1711,21 @@ ipv4_del_addr_test() $IP addr add dev dummy1 172.16.104.1/24 $IP addr add dev dummy1 172.16.104.11/24 + $IP addr add dev dummy1 172.16.104.12/24 + $IP addr add dev dummy1 172.16.104.13/24 $IP addr add dev dummy2 172.16.104.1/24 $IP addr add dev dummy2 172.16.104.11/24 + $IP addr add dev dummy2 172.16.104.12/24 $IP route add 172.16.105.0/24 via 172.16.104.2 src 172.16.104.11 + $IP route add 172.16.106.0/24 dev lo src 172.16.104.12 + $IP route add table 0 172.16.107.0/24 via 172.16.104.2 src 172.16.104.13 $IP route add vrf red 172.16.105.0/24 via 172.16.104.2 src 172.16.104.11 + $IP route add vrf red 172.16.106.0/24 dev lo src 172.16.104.12 set +e # removing address from device in vrf should only remove route from vrf table + echo " Regular FIB info" + $IP addr del dev dummy2 172.16.104.11/24 $IP ro ls vrf red | grep -q 172.16.105.0/24 log_test $? 1 "Route removed from VRF when source address deleted" @@ -1735,6 +1743,35 @@ ipv4_del_addr_test() $IP ro ls vrf red | grep -q 172.16.105.0/24 log_test $? 0 "Route in VRF is not removed by address delete" + # removing address from device in vrf should only remove route from vrf + # table even when the associated fib info only differs in table ID + echo " Identical FIB info with different table ID" + + $IP addr del dev dummy2 172.16.104.12/24 + $IP ro ls vrf red | grep -q 172.16.106.0/24 + log_test $? 1 "Route removed from VRF when source address deleted" + + $IP ro ls | grep -q 172.16.106.0/24 + log_test $? 0 "Route in default VRF not removed" + + $IP addr add dev dummy2 172.16.104.12/24 + $IP route add vrf red 172.16.106.0/24 dev lo src 172.16.104.12 + + $IP addr del dev dummy1 172.16.104.12/24 + $IP ro ls | grep -q 172.16.106.0/24 + log_test $? 1 "Route removed in default VRF when source address deleted" + + $IP ro ls vrf red | grep -q 172.16.106.0/24 + log_test $? 0 "Route in VRF is not removed by address delete" + + # removing address from device in default vrf should remove route from + # the default vrf even when route was inserted with a table ID of 0. + echo " Table ID 0" + + $IP addr del dev dummy1 172.16.104.13/24 + $IP ro ls | grep -q 172.16.107.0/24 + log_test $? 1 "Route removed in default VRF when source address deleted" + $IP li del dummy1 $IP li del dummy2 cleanup @@ -2028,6 +2065,8 @@ EOF ################################################################################ # main +trap cleanup EXIT + while getopts :t:pPhv o do case $o in diff --git a/tools/testing/selftests/net/forwarding/Makefile b/tools/testing/selftests/net/forwarding/Makefile index a9c5c1be5088..91201ab3c4fc 100644 --- a/tools/testing/selftests/net/forwarding/Makefile +++ b/tools/testing/selftests/net/forwarding/Makefile @@ -3,6 +3,8 @@ TEST_PROGS = bridge_igmp.sh \ bridge_locked_port.sh \ bridge_mdb.sh \ + bridge_mdb_host.sh \ + bridge_mdb_max.sh \ bridge_mdb_port_down.sh \ bridge_mld.sh \ bridge_port_isolation.sh \ diff --git a/tools/testing/selftests/net/forwarding/bridge_igmp.sh b/tools/testing/selftests/net/forwarding/bridge_igmp.sh index 1162836f8f32..2aa66d2a1702 100755 --- a/tools/testing/selftests/net/forwarding/bridge_igmp.sh +++ b/tools/testing/selftests/net/forwarding/bridge_igmp.sh @@ -96,9 +96,6 @@ cleanup() switch_destroy - # Always cleanup the mcast group - ip address del dev $h2 $TEST_GROUP/32 2>&1 1>/dev/null - h2_destroy h1_destroy diff --git a/tools/testing/selftests/net/forwarding/bridge_locked_port.sh b/tools/testing/selftests/net/forwarding/bridge_locked_port.sh index 5b02b6b60ce7..dc92d32464f6 100755 --- a/tools/testing/selftests/net/forwarding/bridge_locked_port.sh +++ b/tools/testing/selftests/net/forwarding/bridge_locked_port.sh @@ -1,7 +1,16 @@ #!/bin/bash # SPDX-License-Identifier: GPL-2.0 -ALL_TESTS="locked_port_ipv4 locked_port_ipv6 locked_port_vlan" +ALL_TESTS=" + locked_port_ipv4 + locked_port_ipv6 + locked_port_vlan + locked_port_mab + locked_port_mab_roam + locked_port_mab_config + locked_port_mab_flush +" + NUM_NETIFS=4 CHECK_TC="no" source lib.sh @@ -166,6 +175,150 @@ locked_port_ipv6() log_test "Locked port ipv6" } +locked_port_mab() +{ + RET=0 + check_port_mab_support || return 0 + + ping_do $h1 192.0.2.2 + check_err $? "Ping did not work before locking port" + + bridge link set dev $swp1 learning on locked on + + ping_do $h1 192.0.2.2 + check_fail $? "Ping worked on a locked port without an FDB entry" + + bridge fdb get `mac_get $h1` br br0 vlan 1 &> /dev/null + check_fail $? "FDB entry created before enabling MAB" + + bridge link set dev $swp1 learning on locked on mab on + + ping_do $h1 192.0.2.2 + check_fail $? "Ping worked on MAB enabled port without an FDB entry" + + bridge fdb get `mac_get $h1` br br0 vlan 1 | grep "dev $swp1" | grep -q "locked" + check_err $? "Locked FDB entry not created" + + bridge fdb replace `mac_get $h1` dev $swp1 master static + + ping_do $h1 192.0.2.2 + check_err $? "Ping did not work after replacing FDB entry" + + bridge fdb get `mac_get $h1` br br0 vlan 1 | grep "dev $swp1" | grep -q "locked" + check_fail $? "FDB entry marked as locked after replacement" + + bridge fdb del `mac_get $h1` dev $swp1 master + bridge link set dev $swp1 learning off locked off mab off + + log_test "Locked port MAB" +} + +# Check that entries cannot roam to a locked port, but that entries can roam +# to an unlocked port. +locked_port_mab_roam() +{ + local mac=a0:b0:c0:c0:b0:a0 + + RET=0 + check_port_mab_support || return 0 + + bridge link set dev $swp1 learning on locked on mab on + + $MZ $h1 -q -c 5 -d 100msec -t udp -a $mac -b rand + bridge fdb get $mac br br0 vlan 1 | grep "dev $swp1" | grep -q "locked" + check_err $? "No locked entry on first injection" + + $MZ $h2 -q -c 5 -d 100msec -t udp -a $mac -b rand + bridge fdb get $mac br br0 vlan 1 | grep -q "dev $swp2" + check_err $? "Entry did not roam to an unlocked port" + + bridge fdb get $mac br br0 vlan 1 | grep -q "locked" + check_fail $? "Entry roamed with locked flag on" + + $MZ $h1 -q -c 5 -d 100msec -t udp -a $mac -b rand + bridge fdb get $mac br br0 vlan 1 | grep -q "dev $swp1" + check_fail $? "Entry roamed back to locked port" + + bridge fdb del $mac vlan 1 dev $swp2 master + bridge link set dev $swp1 learning off locked off mab off + + log_test "Locked port MAB roam" +} + +# Check that MAB can only be enabled on a port that is both locked and has +# learning enabled. +locked_port_mab_config() +{ + RET=0 + check_port_mab_support || return 0 + + bridge link set dev $swp1 learning on locked off mab on &> /dev/null + check_fail $? "MAB enabled while port is unlocked" + + bridge link set dev $swp1 learning off locked on mab on &> /dev/null + check_fail $? "MAB enabled while port has learning disabled" + + bridge link set dev $swp1 learning on locked on mab on + check_err $? "Failed to enable MAB when port is locked and has learning enabled" + + bridge link set dev $swp1 learning off locked off mab off + + log_test "Locked port MAB configuration" +} + +# Check that locked FDB entries are flushed from a port when MAB is disabled. +locked_port_mab_flush() +{ + local locked_mac1=00:01:02:03:04:05 + local unlocked_mac1=00:01:02:03:04:06 + local locked_mac2=00:01:02:03:04:07 + local unlocked_mac2=00:01:02:03:04:08 + + RET=0 + check_port_mab_support || return 0 + + bridge link set dev $swp1 learning on locked on mab on + bridge link set dev $swp2 learning on locked on mab on + + # Create regular and locked FDB entries on each port. + bridge fdb add $unlocked_mac1 dev $swp1 vlan 1 master static + bridge fdb add $unlocked_mac2 dev $swp2 vlan 1 master static + + $MZ $h1 -q -c 5 -d 100msec -t udp -a $locked_mac1 -b rand + bridge fdb get $locked_mac1 br br0 vlan 1 | grep "dev $swp1" | \ + grep -q "locked" + check_err $? "Failed to create locked FDB entry on first port" + + $MZ $h2 -q -c 5 -d 100msec -t udp -a $locked_mac2 -b rand + bridge fdb get $locked_mac2 br br0 vlan 1 | grep "dev $swp2" | \ + grep -q "locked" + check_err $? "Failed to create locked FDB entry on second port" + + # Disable MAB on the first port and check that only the first locked + # FDB entry was flushed. + bridge link set dev $swp1 mab off + + bridge fdb get $unlocked_mac1 br br0 vlan 1 &> /dev/null + check_err $? "Regular FDB entry on first port was flushed after disabling MAB" + + bridge fdb get $unlocked_mac2 br br0 vlan 1 &> /dev/null + check_err $? "Regular FDB entry on second port was flushed after disabling MAB" + + bridge fdb get $locked_mac1 br br0 vlan 1 &> /dev/null + check_fail $? "Locked FDB entry on first port was not flushed after disabling MAB" + + bridge fdb get $locked_mac2 br br0 vlan 1 &> /dev/null + check_err $? "Locked FDB entry on second port was flushed after disabling MAB" + + bridge fdb del $unlocked_mac2 dev $swp2 vlan 1 master static + bridge fdb del $unlocked_mac1 dev $swp1 vlan 1 master static + + bridge link set dev $swp2 learning on locked off mab off + bridge link set dev $swp1 learning off locked off mab off + + log_test "Locked port MAB FDB flush" +} + trap cleanup EXIT setup_prepare diff --git a/tools/testing/selftests/net/forwarding/bridge_mdb.sh b/tools/testing/selftests/net/forwarding/bridge_mdb.sh index b1ba6876dd86..ae3f9462a2b6 100755 --- a/tools/testing/selftests/net/forwarding/bridge_mdb.sh +++ b/tools/testing/selftests/net/forwarding/bridge_mdb.sh @@ -1,42 +1,107 @@ #!/bin/bash # SPDX-License-Identifier: GPL-2.0 -# -# Verify that adding host mdb entries work as intended for all types of -# multicast filters: ipv4, ipv6, and mac -ALL_TESTS="mdb_add_del_test" -NUM_NETIFS=2 +# +-----------------------+ +------------------------+ +# | H1 (vrf) | | H2 (vrf) | +# | + $h1.10 | | + $h2.10 | +# | | 192.0.2.1/28 | | | 192.0.2.2/28 | +# | | 2001:db8:1::1/64 | | | 2001:db8:1::2/64 | +# | | | | | | +# | | + $h1.20 | | | + $h2.20 | +# | \ | 198.51.100.1/24 | | \ | 198.51.100.2/24 | +# | \ | 2001:db8:2::1/64 | | \ | 2001:db8:2::2/64 | +# | \| | | \| | +# | + $h1 | | + $h2 | +# +----|------------------+ +----|-------------------+ +# | | +# +----|--------------------------------------------------|-------------------+ +# | SW | | | +# | +--|--------------------------------------------------|-----------------+ | +# | | + $swp1 BR0 (802.1q) + $swp2 | | +# | | vid 10 vid 10 | | +# | | vid 20 vid 20 | | +# | | | | +# | +-----------------------------------------------------------------------+ | +# +---------------------------------------------------------------------------+ -TEST_GROUP_IP4="225.1.2.3" -TEST_GROUP_IP6="ff02::42" -TEST_GROUP_MAC="01:00:01:c0:ff:ee" +ALL_TESTS=" + cfg_test + fwd_test + ctrl_test +" +NUM_NETIFS=4 source lib.sh +source tc_common.sh h1_create() { - simple_if_init $h1 192.0.2.1/24 2001:db8:1::1/64 + simple_if_init $h1 + vlan_create $h1 10 v$h1 192.0.2.1/28 2001:db8:1::1/64 + vlan_create $h1 20 v$h1 198.51.100.1/24 2001:db8:2::1/64 } h1_destroy() { - simple_if_fini $h1 192.0.2.1/24 2001:db8:1::1/64 + vlan_destroy $h1 20 + vlan_destroy $h1 10 + simple_if_fini $h1 } -switch_create() +h2_create() { - # Enable multicast filtering - ip link add dev br0 type bridge mcast_snooping 1 + simple_if_init $h2 + vlan_create $h2 10 v$h2 192.0.2.2/28 + vlan_create $h2 20 v$h2 198.51.100.2/24 +} - ip link set dev $swp1 master br0 +h2_destroy() +{ + vlan_destroy $h2 20 + vlan_destroy $h2 10 + simple_if_fini $h2 +} +switch_create() +{ + ip link add name br0 type bridge vlan_filtering 1 vlan_default_pvid 0 \ + mcast_snooping 1 mcast_igmp_version 3 mcast_mld_version 2 + bridge vlan add vid 10 dev br0 self + bridge vlan add vid 20 dev br0 self ip link set dev br0 up + + ip link set dev $swp1 master br0 ip link set dev $swp1 up + bridge vlan add vid 10 dev $swp1 + bridge vlan add vid 20 dev $swp1 + + ip link set dev $swp2 master br0 + ip link set dev $swp2 up + bridge vlan add vid 10 dev $swp2 + bridge vlan add vid 20 dev $swp2 + + tc qdisc add dev br0 clsact + tc qdisc add dev $h2 clsact } switch_destroy() { + tc qdisc del dev $h2 clsact + tc qdisc del dev br0 clsact + + bridge vlan del vid 20 dev $swp2 + bridge vlan del vid 10 dev $swp2 + ip link set dev $swp2 down + ip link set dev $swp2 nomaster + + bridge vlan del vid 20 dev $swp1 + bridge vlan del vid 10 dev $swp1 ip link set dev $swp1 down + ip link set dev $swp1 nomaster + + ip link set dev br0 down + bridge vlan del vid 20 dev br0 self + bridge vlan del vid 10 dev br0 self ip link del dev br0 } @@ -45,9 +110,14 @@ setup_prepare() h1=${NETIFS[p1]} swp1=${NETIFS[p2]} + swp2=${NETIFS[p3]} + h2=${NETIFS[p4]} + vrf_prepare + forwarding_enable h1_create + h2_create switch_create } @@ -56,48 +126,1090 @@ cleanup() pre_cleanup switch_destroy + h2_destroy h1_destroy + forwarding_restore vrf_cleanup } -do_mdb_add_del() +cfg_test_host_common() +{ + local name=$1; shift + local grp=$1; shift + local src=$1; shift + local state=$1; shift + local invalid_state=$1; shift + + RET=0 + + # Check basic add, replace and delete behavior. + bridge mdb add dev br0 port br0 grp $grp $state vid 10 + bridge mdb show dev br0 vid 10 | grep -q "$grp" + check_err $? "Failed to add $name host entry" + + bridge mdb replace dev br0 port br0 grp $grp $state vid 10 &> /dev/null + check_fail $? "Managed to replace $name host entry" + + bridge mdb del dev br0 port br0 grp $grp $state vid 10 + bridge mdb show dev br0 vid 10 | grep -q "$grp" + check_fail $? "Failed to delete $name host entry" + + # Check error cases. + bridge mdb add dev br0 port br0 grp $grp $invalid_state vid 10 \ + &> /dev/null + check_fail $? "Managed to add $name host entry with a $invalid_state state" + + bridge mdb add dev br0 port br0 grp $grp src $src $state vid 10 \ + &> /dev/null + check_fail $? "Managed to add $name host entry with a source" + + bridge mdb add dev br0 port br0 grp $grp $state vid 10 \ + filter_mode exclude &> /dev/null + check_fail $? "Managed to add $name host entry with a filter mode" + + bridge mdb add dev br0 port br0 grp $grp $state vid 10 \ + source_list $src &> /dev/null + check_fail $? "Managed to add $name host entry with a source list" + + bridge mdb add dev br0 port br0 grp $grp $state vid 10 \ + proto 123 &> /dev/null + check_fail $? "Managed to add $name host entry with a protocol" + + log_test "Common host entries configuration tests ($name)" +} + +# Check configuration of host entries from all types. +cfg_test_host() +{ + echo + log_info "# Host entries configuration tests" + + cfg_test_host_common "IPv4" "239.1.1.1" "192.0.2.1" "temp" "permanent" + cfg_test_host_common "IPv6" "ff0e::1" "2001:db8:1::1" "temp" "permanent" + cfg_test_host_common "L2" "01:02:03:04:05:06" "00:00:00:00:00:01" \ + "permanent" "temp" +} + +cfg_test_port_common() +{ + local name=$1;shift + local grp_key=$1; shift + + RET=0 + + # Check basic add, replace and delete behavior. + bridge mdb add dev br0 port $swp1 $grp_key permanent vid 10 + bridge mdb show dev br0 vid 10 | grep -q "$grp_key" + check_err $? "Failed to add $name entry" + + bridge mdb replace dev br0 port $swp1 $grp_key permanent vid 10 \ + &> /dev/null + check_err $? "Failed to replace $name entry" + + bridge mdb del dev br0 port $swp1 $grp_key permanent vid 10 + bridge mdb show dev br0 vid 10 | grep -q "$grp_key" + check_fail $? "Failed to delete $name entry" + + # Check default protocol and replacement. + bridge mdb add dev br0 port $swp1 $grp_key permanent vid 10 + bridge -d mdb show dev br0 vid 10 | grep "$grp_key" | grep -q "static" + check_err $? "$name entry not added with default \"static\" protocol" + + bridge mdb replace dev br0 port $swp1 $grp_key permanent vid 10 \ + proto 123 + bridge -d mdb show dev br0 vid 10 | grep "$grp_key" | grep -q "123" + check_err $? "Failed to replace protocol of $name entry" + bridge mdb del dev br0 port $swp1 $grp_key permanent vid 10 + + # Check behavior when VLAN is not specified. + bridge mdb add dev br0 port $swp1 $grp_key permanent + bridge mdb show dev br0 vid 10 | grep -q "$grp_key" + check_err $? "$name entry with VLAN 10 not added when VLAN was not specified" + bridge mdb show dev br0 vid 20 | grep -q "$grp_key" + check_err $? "$name entry with VLAN 20 not added when VLAN was not specified" + + bridge mdb del dev br0 port $swp1 $grp_key permanent + bridge mdb show dev br0 vid 10 | grep -q "$grp_key" + check_fail $? "$name entry with VLAN 10 not deleted when VLAN was not specified" + bridge mdb show dev br0 vid 20 | grep -q "$grp_key" + check_fail $? "$name entry with VLAN 20 not deleted when VLAN was not specified" + + # Check behavior when bridge port is down. + ip link set dev $swp1 down + + bridge mdb add dev br0 port $swp1 $grp_key permanent vid 10 + check_err $? "Failed to add $name permanent entry when bridge port is down" + + bridge mdb del dev br0 port $swp1 $grp_key permanent vid 10 + + bridge mdb add dev br0 port $swp1 $grp_key temp vid 10 &> /dev/null + check_fail $? "Managed to add $name temporary entry when bridge port is down" + + ip link set dev $swp1 up + setup_wait_dev $swp1 + + # Check error cases. + ip link set dev br0 down + bridge mdb add dev br0 port $swp1 $grp_key permanent vid 10 \ + &> /dev/null + check_fail $? "Managed to add $name entry when bridge is down" + ip link set dev br0 up + + ip link set dev br0 type bridge mcast_snooping 0 + bridge mdb add dev br0 port $swp1 $grp_key permanent vid \ + 10 &> /dev/null + check_fail $? "Managed to add $name entry when multicast snooping is disabled" + ip link set dev br0 type bridge mcast_snooping 1 + + bridge mdb add dev br0 port $swp1 $grp_key permanent vid 5000 \ + &> /dev/null + check_fail $? "Managed to add $name entry with an invalid VLAN" + + log_test "Common port group entries configuration tests ($name)" +} + +src_list_create() +{ + local src_prefix=$1; shift + local num_srcs=$1; shift + local src_list + local i + + for i in $(seq 1 $num_srcs); do + src_list=${src_list},${src_prefix}${i} + done + + echo $src_list | cut -c 2- +} + +__cfg_test_port_ip_star_g() +{ + local name=$1; shift + local grp=$1; shift + local invalid_grp=$1; shift + local src_prefix=$1; shift + local src1=${src_prefix}1 + local src2=${src_prefix}2 + local src3=${src_prefix}3 + local max_srcs=31 + local num_srcs + + RET=0 + + bridge mdb add dev br0 port $swp1 grp $grp vid 10 + bridge -d mdb show dev br0 vid 10 | grep "$grp" | grep -q "exclude" + check_err $? "Default filter mode is not \"exclude\"" + bridge mdb del dev br0 port $swp1 grp $grp vid 10 + + # Check basic add and delete behavior. + bridge mdb add dev br0 port $swp1 grp $grp vid 10 filter_mode exclude \ + source_list $src1 + bridge -d mdb show dev br0 vid 10 | grep "$grp" | grep -q -v "src" + check_err $? "(*, G) entry not created" + bridge -d mdb show dev br0 vid 10 | grep "$grp" | grep -q "src $src1" + check_err $? "(S, G) entry not created" + bridge mdb del dev br0 port $swp1 grp $grp vid 10 + bridge -d mdb show dev br0 vid 10 | grep "$grp" | grep -q -v "src" + check_fail $? "(*, G) entry not deleted" + bridge -d mdb show dev br0 vid 10 | grep "$grp" | grep -q "src $src1" + check_fail $? "(S, G) entry not deleted" + + ## State (permanent / temp) tests. + + # Check that group and source timer are not set for permanent entries. + bridge mdb add dev br0 port $swp1 grp $grp permanent vid 10 \ + filter_mode exclude source_list $src1 + + bridge -d mdb show dev br0 vid 10 | grep "$grp" | grep -v "src" | \ + grep -q "permanent" + check_err $? "(*, G) entry not added as \"permanent\" when should" + bridge -d mdb show dev br0 vid 10 | grep "$grp" | grep "src" | \ + grep -q "permanent" + check_err $? "(S, G) entry not added as \"permanent\" when should" + + bridge -d -s mdb show dev br0 vid 10 | grep "$grp" | grep -v "src" | \ + grep -q " 0.00" + check_err $? "(*, G) \"permanent\" entry has a pending group timer" + bridge -d -s mdb show dev br0 vid 10 | grep "$grp" | grep -v "src" | \ + grep -q "\/0.00" + check_err $? "\"permanent\" source entry has a pending source timer" + + bridge mdb del dev br0 port $swp1 grp $grp vid 10 + + # Check that group timer is set for temporary (*, G) EXCLUDE, but not + # the source timer. + bridge mdb add dev br0 port $swp1 grp $grp temp vid 10 \ + filter_mode exclude source_list $src1 + + bridge -d mdb show dev br0 vid 10 | grep "$grp" | grep -v "src" | \ + grep -q "temp" + check_err $? "(*, G) EXCLUDE entry not added as \"temp\" when should" + bridge -d mdb show dev br0 vid 10 | grep "$grp" | grep "src" | \ + grep -q "temp" + check_err $? "(S, G) \"blocked\" entry not added as \"temp\" when should" + + bridge -d -s mdb show dev br0 vid 10 | grep "$grp" | grep -v "src" | \ + grep -q " 0.00" + check_fail $? "(*, G) EXCLUDE entry does not have a pending group timer" + bridge -d -s mdb show dev br0 vid 10 | grep "$grp" | grep -v "src" | \ + grep -q "\/0.00" + check_err $? "\"blocked\" source entry has a pending source timer" + + bridge mdb del dev br0 port $swp1 grp $grp vid 10 + + # Check that group timer is not set for temporary (*, G) INCLUDE, but + # that the source timer is set. + bridge mdb add dev br0 port $swp1 grp $grp temp vid 10 \ + filter_mode include source_list $src1 + + bridge -d mdb show dev br0 vid 10 | grep "$grp" | grep -v "src" | \ + grep -q "temp" + check_err $? "(*, G) INCLUDE entry not added as \"temp\" when should" + bridge -d mdb show dev br0 vid 10 | grep "$grp" | grep "src" | \ + grep -q "temp" + check_err $? "(S, G) entry not added as \"temp\" when should" + + bridge -d -s mdb show dev br0 vid 10 | grep "$grp" | grep -v "src" | \ + grep -q " 0.00" + check_err $? "(*, G) INCLUDE entry has a pending group timer" + bridge -d -s mdb show dev br0 vid 10 | grep "$grp" | grep -v "src" | \ + grep -q "\/0.00" + check_fail $? "Source entry does not have a pending source timer" + + bridge mdb del dev br0 port $swp1 grp $grp vid 10 + + # Check that group timer is never set for (S, G) entries. + bridge mdb add dev br0 port $swp1 grp $grp temp vid 10 \ + filter_mode include source_list $src1 + + bridge -d -s mdb show dev br0 vid 10 | grep "$grp" | grep "src" | \ + grep -q " 0.00" + check_err $? "(S, G) entry has a pending group timer" + + bridge mdb del dev br0 port $swp1 grp $grp vid 10 + + ## Filter mode (include / exclude) tests. + + # Check that (*, G) INCLUDE entries are added with correct filter mode + # and that (S, G) entries are not marked as "blocked". + bridge mdb add dev br0 port $swp1 grp $grp vid 10 \ + filter_mode include source_list $src1 + + bridge -d mdb show dev br0 vid 10 | grep "$grp" | grep -v "src" | \ + grep -q "include" + check_err $? "(*, G) INCLUDE not added with \"include\" filter mode" + bridge -d mdb show dev br0 vid 10 | grep "$grp" | grep "src" | \ + grep -q "blocked" + check_fail $? "(S, G) entry marked as \"blocked\" when should not" + + bridge mdb del dev br0 port $swp1 grp $grp vid 10 + + # Check that (*, G) EXCLUDE entries are added with correct filter mode + # and that (S, G) entries are marked as "blocked". + bridge mdb add dev br0 port $swp1 grp $grp vid 10 \ + filter_mode exclude source_list $src1 + + bridge -d mdb show dev br0 vid 10 | grep "$grp" | grep -v "src" | \ + grep -q "exclude" + check_err $? "(*, G) EXCLUDE not added with \"exclude\" filter mode" + bridge -d mdb show dev br0 vid 10 | grep "$grp" | grep "src" | \ + grep -q "blocked" + check_err $? "(S, G) entry not marked as \"blocked\" when should" + + bridge mdb del dev br0 port $swp1 grp $grp vid 10 + + ## Protocol tests. + + # Check that (*, G) and (S, G) entries are added with the specified + # protocol. + bridge mdb add dev br0 port $swp1 grp $grp vid 10 \ + filter_mode exclude source_list $src1 proto zebra + + bridge -d mdb show dev br0 vid 10 | grep "$grp" | grep -v "src" | \ + grep -q "zebra" + check_err $? "(*, G) entry not added with \"zebra\" protocol" + bridge -d mdb show dev br0 vid 10 | grep "$grp" | grep "src" | \ + grep -q "zebra" + check_err $? "(S, G) entry not marked added with \"zebra\" protocol" + + bridge mdb del dev br0 port $swp1 grp $grp vid 10 + + ## Replace tests. + + # Check that state can be modified. + bridge mdb add dev br0 port $swp1 grp $grp temp vid 10 \ + filter_mode exclude source_list $src1 + + bridge mdb replace dev br0 port $swp1 grp $grp permanent vid 10 \ + filter_mode exclude source_list $src1 + bridge -d mdb show dev br0 vid 10 | grep "$grp" | grep -v "src" | \ + grep -q "permanent" + check_err $? "(*, G) entry not marked as \"permanent\" after replace" + bridge -d mdb show dev br0 vid 10 | grep "$grp" | grep "src" | \ + grep -q "permanent" + check_err $? "(S, G) entry not marked as \"permanent\" after replace" + + bridge mdb replace dev br0 port $swp1 grp $grp temp vid 10 \ + filter_mode exclude source_list $src1 + bridge -d mdb show dev br0 vid 10 | grep "$grp" | grep -v "src" | \ + grep -q "temp" + check_err $? "(*, G) entry not marked as \"temp\" after replace" + bridge -d mdb show dev br0 vid 10 | grep "$grp" | grep "src" | \ + grep -q "temp" + check_err $? "(S, G) entry not marked as \"temp\" after replace" + + bridge mdb del dev br0 port $swp1 grp $grp vid 10 + + # Check that filter mode can be modified. + bridge mdb add dev br0 port $swp1 grp $grp temp vid 10 \ + filter_mode exclude source_list $src1 + + bridge mdb replace dev br0 port $swp1 grp $grp temp vid 10 \ + filter_mode include source_list $src1 + bridge -d mdb show dev br0 vid 10 | grep "$grp" | grep -v "src" | \ + grep -q "include" + check_err $? "(*, G) not marked with \"include\" filter mode after replace" + bridge -d mdb show dev br0 vid 10 | grep "$grp" | grep "src" | \ + grep -q "blocked" + check_fail $? "(S, G) marked as \"blocked\" after replace" + + bridge mdb replace dev br0 port $swp1 grp $grp temp vid 10 \ + filter_mode exclude source_list $src1 + bridge -d mdb show dev br0 vid 10 | grep "$grp" | grep -v "src" | \ + grep -q "exclude" + check_err $? "(*, G) not marked with \"exclude\" filter mode after replace" + bridge -d mdb show dev br0 vid 10 | grep "$grp" | grep "src" | \ + grep -q "blocked" + check_err $? "(S, G) not marked as \"blocked\" after replace" + + bridge mdb del dev br0 port $swp1 grp $grp vid 10 + + # Check that sources can be added to and removed from the source list. + bridge mdb add dev br0 port $swp1 grp $grp temp vid 10 \ + filter_mode exclude source_list $src1 + + bridge mdb replace dev br0 port $swp1 grp $grp temp vid 10 \ + filter_mode exclude source_list $src1,$src2,$src3 + bridge -d mdb show dev br0 vid 10 | grep "$grp" | grep -q "src $src1" + check_err $? "(S, G) entry for source $src1 not created after replace" + bridge -d mdb show dev br0 vid 10 | grep "$grp" | grep -q "src $src2" + check_err $? "(S, G) entry for source $src2 not created after replace" + bridge -d mdb show dev br0 vid 10 | grep "$grp" | grep -q "src $src3" + check_err $? "(S, G) entry for source $src3 not created after replace" + + bridge mdb replace dev br0 port $swp1 grp $grp temp vid 10 \ + filter_mode exclude source_list $src1,$src3 + bridge -d mdb show dev br0 vid 10 | grep "$grp" | grep -q "src $src1" + check_err $? "(S, G) entry for source $src1 not created after second replace" + bridge -d mdb show dev br0 vid 10 | grep "$grp" | grep -q "src $src2" + check_fail $? "(S, G) entry for source $src2 created after second replace" + bridge -d mdb show dev br0 vid 10 | grep "$grp" | grep -q "src $src3" + check_err $? "(S, G) entry for source $src3 not created after second replace" + + bridge mdb del dev br0 port $swp1 grp $grp vid 10 + + # Check that protocol can be modified. + bridge mdb add dev br0 port $swp1 grp $grp temp vid 10 \ + filter_mode exclude source_list $src1 proto zebra + + bridge mdb replace dev br0 port $swp1 grp $grp temp vid 10 \ + filter_mode exclude source_list $src1 proto bgp + bridge -d mdb show dev br0 vid 10 | grep "$grp" | grep -v "src" | \ + grep -q "bgp" + check_err $? "(*, G) protocol not changed to \"bgp\" after replace" + bridge -d mdb show dev br0 vid 10 | grep "$grp" | grep "src" | \ + grep -q "bgp" + check_err $? "(S, G) protocol not changed to \"bgp\" after replace" + + bridge mdb del dev br0 port $swp1 grp $grp vid 10 + + ## Star exclude tests. + + # Check star exclude functionality. When adding a new EXCLUDE (*, G), + # it needs to be also added to all (S, G) entries for proper + # replication. + bridge mdb add dev br0 port $swp2 grp $grp vid 10 \ + filter_mode include source_list $src1 + bridge mdb add dev br0 port $swp1 grp $grp vid 10 + bridge -d mdb show dev br0 vid 10 | grep "$swp1" | grep "$grp" | \ + grep "$src1" | grep -q "added_by_star_ex" + check_err $? "\"added_by_star_ex\" entry not created after adding (*, G) entry" + bridge mdb del dev br0 port $swp1 grp $grp vid 10 + bridge mdb del dev br0 port $swp2 grp $grp src $src1 vid 10 + + ## Error cases tests. + + bridge mdb add dev br0 port $swp1 grp $invalid_grp vid 10 &> /dev/null + check_fail $? "Managed to add an entry with an invalid group" + + bridge mdb add dev br0 port $swp1 grp $grp vid 10 filter_mode include \ + &> /dev/null + check_fail $? "Managed to add an INCLUDE entry with an empty source list" + + bridge mdb add dev br0 port $swp1 grp $grp vid 10 filter_mode include \ + source_list $grp &> /dev/null + check_fail $? "Managed to add an entry with an invalid source in source list" + + bridge mdb add dev br0 port $swp1 grp $grp vid 10 \ + source_list $src &> /dev/null + check_fail $? "Managed to add an entry with a source list and no filter mode" + + bridge mdb add dev br0 port $swp1 grp $grp vid 10 filter_mode include \ + source_list $src1 + bridge mdb add dev br0 port $swp1 grp $grp vid 10 filter_mode exclude \ + source_list $src1 &> /dev/null + check_fail $? "Managed to replace an entry without using replace" + bridge mdb del dev br0 port $swp1 grp $grp vid 10 + + bridge mdb add dev br0 port $swp1 grp $grp src $src2 vid 10 + bridge mdb add dev br0 port $swp1 grp $grp vid 10 filter_mode include \ + source_list $src1,$src2,$src3 &> /dev/null + check_fail $? "Managed to add a source that already has a forwarding entry" + bridge mdb del dev br0 port $swp1 grp $grp src $src2 vid 10 + + # Check maximum number of sources. + bridge mdb add dev br0 port $swp1 grp $grp vid 10 filter_mode exclude \ + source_list $(src_list_create $src_prefix $max_srcs) + num_srcs=$(bridge -d mdb show dev br0 vid 10 | grep "$grp" | \ + grep "src" | wc -l) + [[ $num_srcs -eq $max_srcs ]] + check_err $? "Failed to configure maximum number of sources ($max_srcs)" + bridge mdb del dev br0 port $swp1 grp $grp vid 10 + + bridge mdb add dev br0 port $swp1 grp $grp vid 10 filter_mode exclude \ + source_list $(src_list_create $src_prefix $((max_srcs + 1))) \ + &> /dev/null + check_fail $? "Managed to exceed maximum number of sources ($max_srcs)" + + log_test "$name (*, G) port group entries configuration tests" +} + +cfg_test_port_ip_star_g() +{ + echo + log_info "# Port group entries configuration tests - (*, G)" + + cfg_test_port_common "IPv4 (*, G)" "grp 239.1.1.1" + cfg_test_port_common "IPv6 (*, G)" "grp ff0e::1" + __cfg_test_port_ip_star_g "IPv4" "239.1.1.1" "224.0.0.1" "192.0.2." + __cfg_test_port_ip_star_g "IPv6" "ff0e::1" "ff02::1" "2001:db8:1::" +} + +__cfg_test_port_ip_sg() { - local group=$1 - local flag=$2 + local name=$1; shift + local grp=$1; shift + local src=$1; shift + local grp_key="grp $grp src $src" RET=0 - bridge mdb add dev br0 port br0 grp $group $flag 2>/dev/null - check_err $? "Failed adding $group to br0, port br0" - if [ -z "$flag" ]; then - flag="temp" + bridge mdb add dev br0 port $swp1 $grp_key vid 10 + bridge -d mdb show dev br0 vid 10 | grep "$grp_key" | grep -q "include" + check_err $? "Default filter mode is not \"include\"" + bridge mdb del dev br0 port $swp1 $grp_key vid 10 + + # Check that entries can be added as both permanent and temp and that + # group timer is set correctly. + bridge mdb add dev br0 port $swp1 $grp_key permanent vid 10 + bridge -d mdb show dev br0 vid 10 | grep "$grp_key" | \ + grep -q "permanent" + check_err $? "Entry not added as \"permanent\" when should" + bridge -d -s mdb show dev br0 vid 10 | grep "$grp_key" | \ + grep -q "0.00" + check_err $? "\"permanent\" entry has a pending group timer" + bridge mdb del dev br0 port $swp1 $grp_key vid 10 + + bridge mdb add dev br0 port $swp1 $grp_key temp vid 10 + bridge -d mdb show dev br0 vid 10 | grep "$grp_key" | \ + grep -q "temp" + check_err $? "Entry not added as \"temp\" when should" + bridge -d -s mdb show dev br0 vid 10 | grep "$grp_key" | \ + grep -q "0.00" + check_fail $? "\"temp\" entry has an unpending group timer" + bridge mdb del dev br0 port $swp1 $grp_key vid 10 + + # Check error cases. + bridge mdb add dev br0 port $swp1 $grp_key vid 10 \ + filter_mode include &> /dev/null + check_fail $? "Managed to add an entry with a filter mode" + + bridge mdb add dev br0 port $swp1 $grp_key vid 10 \ + filter_mode include source_list $src &> /dev/null + check_fail $? "Managed to add an entry with a source list" + + bridge mdb add dev br0 port $swp1 grp $grp src $grp vid 10 &> /dev/null + check_fail $? "Managed to add an entry with an invalid source" + + bridge mdb add dev br0 port $swp1 $grp_key vid 10 temp + bridge mdb add dev br0 port $swp1 $grp_key vid 10 permanent &> /dev/null + check_fail $? "Managed to replace an entry without using replace" + bridge mdb del dev br0 port $swp1 $grp_key vid 10 + + # Check that we can replace available attributes. + bridge mdb add dev br0 port $swp1 $grp_key vid 10 proto 123 + bridge mdb replace dev br0 port $swp1 $grp_key vid 10 proto 111 + bridge -d mdb show dev br0 vid 10 | grep "$grp_key" | \ + grep -q "111" + check_err $? "Failed to replace protocol" + + bridge mdb replace dev br0 port $swp1 $grp_key vid 10 permanent + bridge -d mdb show dev br0 vid 10 | grep "$grp_key" | \ + grep -q "permanent" + check_err $? "Entry not marked as \"permanent\" after replace" + bridge -d -s mdb show dev br0 vid 10 | grep "$grp_key" | \ + grep -q "0.00" + check_err $? "Entry has a pending group timer after replace" + + bridge mdb replace dev br0 port $swp1 $grp_key vid 10 temp + bridge -d mdb show dev br0 vid 10 | grep "$grp_key" | \ + grep -q "temp" + check_err $? "Entry not marked as \"temp\" after replace" + bridge -d -s mdb show dev br0 vid 10 | grep "$grp_key" | \ + grep -q "0.00" + check_fail $? "Entry has an unpending group timer after replace" + bridge mdb del dev br0 port $swp1 $grp_key vid 10 + + # Check star exclude functionality. When adding a (S, G), all matching + # (*, G) ports need to be added to it. + bridge mdb add dev br0 port $swp2 grp $grp vid 10 + bridge mdb add dev br0 port $swp1 $grp_key vid 10 + bridge mdb show dev br0 vid 10 | grep "$grp_key" | grep $swp2 | \ + grep -q "added_by_star_ex" + check_err $? "\"added_by_star_ex\" entry not created after adding (S, G) entry" + bridge mdb del dev br0 port $swp1 $grp_key vid 10 + bridge mdb del dev br0 port $swp2 grp $grp vid 10 + + log_test "$name (S, G) port group entries configuration tests" +} + +cfg_test_port_ip_sg() +{ + echo + log_info "# Port group entries configuration tests - (S, G)" + + cfg_test_port_common "IPv4 (S, G)" "grp 239.1.1.1 src 192.0.2.1" + cfg_test_port_common "IPv6 (S, G)" "grp ff0e::1 src 2001:db8:1::1" + __cfg_test_port_ip_sg "IPv4" "239.1.1.1" "192.0.2.1" + __cfg_test_port_ip_sg "IPv6" "ff0e::1" "2001:db8:1::1" +} + +cfg_test_port_ip() +{ + cfg_test_port_ip_star_g + cfg_test_port_ip_sg +} + +__cfg_test_port_l2() +{ + local grp="01:02:03:04:05:06" + + RET=0 + + bridge meb add dev br0 port $swp grp 00:01:02:03:04:05 \ + permanent vid 10 &> /dev/null + check_fail $? "Managed to add an entry with unicast MAC" + + bridge mdb add dev br0 port $swp grp $grp src 00:01:02:03:04:05 \ + permanent vid 10 &> /dev/null + check_fail $? "Managed to add an entry with a source" + + bridge mdb add dev br0 port $swp1 grp $grp permanent vid 10 \ + filter_mode include &> /dev/null + check_fail $? "Managed to add an entry with a filter mode" + + bridge mdb add dev br0 port $swp1 grp $grp permanent vid 10 \ + source_list 00:01:02:03:04:05 &> /dev/null + check_fail $? "Managed to add an entry with a source list" + + log_test "L2 (*, G) port group entries configuration tests" +} + +cfg_test_port_l2() +{ + echo + log_info "# Port group entries configuration tests - L2" + + cfg_test_port_common "L2 (*, G)" "grp 01:02:03:04:05:06" + __cfg_test_port_l2 +} + +# Check configuration of regular (port) entries of all types. +cfg_test_port() +{ + cfg_test_port_ip + cfg_test_port_l2 +} + +ipv4_grps_get() +{ + local max_grps=$1; shift + local i + + for i in $(seq 0 $((max_grps - 1))); do + echo "239.1.1.$i" + done +} + +ipv6_grps_get() +{ + local max_grps=$1; shift + local i + + for i in $(seq 0 $((max_grps - 1))); do + echo "ff0e::$(printf %x $i)" + done +} + +l2_grps_get() +{ + local max_grps=$1; shift + local i + + for i in $(seq 0 $((max_grps - 1))); do + echo "01:00:00:00:00:$(printf %02x $i)" + done +} + +cfg_test_dump_common() +{ + local name=$1; shift + local fn=$1; shift + local max_bridges=2 + local max_grps=256 + local max_ports=32 + local num_entries + local batch_file + local grp + local i j + + RET=0 + + # Create net devices. + for i in $(seq 1 $max_bridges); do + ip link add name br-test${i} up type bridge vlan_filtering 1 \ + mcast_snooping 1 + for j in $(seq 1 $max_ports); do + ip link add name br-test${i}-du${j} up \ + master br-test${i} type dummy + done + done + + # Create batch file with MDB entries. + batch_file=$(mktemp) + for i in $(seq 1 $max_bridges); do + for j in $(seq 1 $max_ports); do + for grp in $($fn $max_grps); do + echo "mdb add dev br-test${i} \ + port br-test${i}-du${j} grp $grp \ + permanent vid 1" >> $batch_file + done + done + done + + # Program the batch file and check for expected number of entries. + bridge -b $batch_file + for i in $(seq 1 $max_bridges); do + num_entries=$(bridge mdb show dev br-test${i} | \ + grep "permanent" | wc -l) + [[ $num_entries -eq $((max_grps * max_ports)) ]] + check_err $? "Wrong number of entries in br-test${i}" + done + + # Cleanup. + rm $batch_file + for i in $(seq 1 $max_bridges); do + ip link del dev br-test${i} + for j in $(seq $max_ports); do + ip link del dev br-test${i}-du${j} + done + done + + log_test "$name large scale dump tests" +} + +# Check large scale dump. +cfg_test_dump() +{ + echo + log_info "# Large scale dump tests" + + cfg_test_dump_common "IPv4" ipv4_grps_get + cfg_test_dump_common "IPv6" ipv6_grps_get + cfg_test_dump_common "L2" l2_grps_get +} + +cfg_test() +{ + cfg_test_host + cfg_test_port + cfg_test_dump +} + +__fwd_test_host_ip() +{ + local grp=$1; shift + local src=$1; shift + local mode=$1; shift + local name + local eth_type + + RET=0 + + if [[ $mode == "-4" ]]; then + name="IPv4" + eth_type="ipv4" + else + name="IPv6" + eth_type="ipv6" fi - bridge mdb show dev br0 | grep $group | grep -q $flag 2>/dev/null - check_err $? "$group not added with $flag flag" + tc filter add dev br0 ingress protocol 802.1q pref 1 handle 1 flower \ + vlan_ethtype $eth_type vlan_id 10 dst_ip $grp src_ip $src \ + action drop - bridge mdb del dev br0 port br0 grp $group 2>/dev/null - check_err $? "Failed deleting $group from br0, port br0" + # Packet should only be flooded to multicast router ports when there is + # no matching MDB entry. The bridge is not configured as a multicast + # router port. + $MZ $mode $h1.10 -c 1 -p 128 -A $src -B $grp -t udp -q + tc_check_packets "dev br0 ingress" 1 0 + check_err $? "Packet locally received after flood" - bridge mdb show dev br0 | grep -q $group >/dev/null - check_err_fail 1 $? "$group still in mdb after delete" + # Install a regular port group entry and expect the packet to not be + # locally received. + bridge mdb add dev br0 port $swp2 grp $grp temp vid 10 + $MZ $mode $h1.10 -c 1 -p 128 -A $src -B $grp -t udp -q + tc_check_packets "dev br0 ingress" 1 0 + check_err $? "Packet locally received after installing a regular entry" - log_test "MDB add/del group $group to bridge port br0" + # Add a host entry and expect the packet to be locally received. + bridge mdb add dev br0 port br0 grp $grp temp vid 10 + $MZ $mode $h1.10 -c 1 -p 128 -A $src -B $grp -t udp -q + tc_check_packets "dev br0 ingress" 1 1 + check_err $? "Packet not locally received after adding a host entry" + + # Remove the host entry and expect the packet to not be locally + # received. + bridge mdb del dev br0 port br0 grp $grp vid 10 + $MZ $mode $h1.10 -c 1 -p 128 -A $src -B $grp -t udp -q + tc_check_packets "dev br0 ingress" 1 1 + check_err $? "Packet locally received after removing a host entry" + + bridge mdb del dev br0 port $swp2 grp $grp vid 10 + + tc filter del dev br0 ingress protocol 802.1q pref 1 handle 1 flower + + log_test "$name host entries forwarding tests" } -mdb_add_del_test() +fwd_test_host_ip() { - do_mdb_add_del $TEST_GROUP_MAC permanent - do_mdb_add_del $TEST_GROUP_IP4 - do_mdb_add_del $TEST_GROUP_IP6 + __fwd_test_host_ip "239.1.1.1" "192.0.2.1" "-4" + __fwd_test_host_ip "ff0e::1" "2001:db8:1::1" "-6" +} + +fwd_test_host_l2() +{ + local dmac=01:02:03:04:05:06 + + RET=0 + + tc filter add dev br0 ingress protocol all pref 1 handle 1 flower \ + dst_mac $dmac action drop + + # Packet should be flooded and locally received when there is no + # matching MDB entry. + $MZ $h1.10 -c 1 -p 128 -a own -b $dmac -q + tc_check_packets "dev br0 ingress" 1 1 + check_err $? "Packet not locally received after flood" + + # Install a regular port group entry and expect the packet to not be + # locally received. + bridge mdb add dev br0 port $swp2 grp $dmac permanent vid 10 + $MZ $h1.10 -c 1 -p 128 -a own -b $dmac -q + tc_check_packets "dev br0 ingress" 1 1 + check_err $? "Packet locally received after installing a regular entry" + + # Add a host entry and expect the packet to be locally received. + bridge mdb add dev br0 port br0 grp $dmac permanent vid 10 + $MZ $h1.10 -c 1 -p 128 -a own -b $dmac -q + tc_check_packets "dev br0 ingress" 1 2 + check_err $? "Packet not locally received after adding a host entry" + + # Remove the host entry and expect the packet to not be locally + # received. + bridge mdb del dev br0 port br0 grp $dmac permanent vid 10 + $MZ $h1.10 -c 1 -p 128 -a own -b $dmac -q + tc_check_packets "dev br0 ingress" 1 2 + check_err $? "Packet locally received after removing a host entry" + + bridge mdb del dev br0 port $swp2 grp $dmac permanent vid 10 + + tc filter del dev br0 ingress protocol all pref 1 handle 1 flower + + log_test "L2 host entries forwarding tests" +} + +fwd_test_host() +{ + # Disable multicast router on the bridge to ensure that packets are + # only locally received when a matching host entry is present. + ip link set dev br0 type bridge mcast_router 0 + + fwd_test_host_ip + fwd_test_host_l2 + + ip link set dev br0 type bridge mcast_router 1 +} + +__fwd_test_port_ip() +{ + local grp=$1; shift + local valid_src=$1; shift + local invalid_src=$1; shift + local mode=$1; shift + local filter_mode=$1; shift + local name + local eth_type + local src_list + + RET=0 + + if [[ $mode == "-4" ]]; then + name="IPv4" + eth_type="ipv4" + else + name="IPv6" + eth_type="ipv6" + fi + + # The valid source is the one we expect to get packets from after + # adding the entry. + if [[ $filter_mode == "include" ]]; then + src_list=$valid_src + else + src_list=$invalid_src + fi + + tc filter add dev $h2 ingress protocol 802.1q pref 1 handle 1 flower \ + vlan_ethtype $eth_type vlan_id 10 dst_ip $grp \ + src_ip $valid_src action drop + tc filter add dev $h2 ingress protocol 802.1q pref 1 handle 2 flower \ + vlan_ethtype $eth_type vlan_id 10 dst_ip $grp \ + src_ip $invalid_src action drop + + $MZ $mode $h1.10 -c 1 -p 128 -A $valid_src -B $grp -t udp -q + tc_check_packets "dev $h2 ingress" 1 0 + check_err $? "Packet from valid source received on H2 before adding entry" + + $MZ $mode $h1.10 -c 1 -p 128 -A $invalid_src -B $grp -t udp -q + tc_check_packets "dev $h2 ingress" 2 0 + check_err $? "Packet from invalid source received on H2 before adding entry" + + bridge mdb add dev br0 port $swp2 grp $grp vid 10 \ + filter_mode $filter_mode source_list $src_list + + $MZ $mode $h1.10 -c 1 -p 128 -A $valid_src -B $grp -t udp -q + tc_check_packets "dev $h2 ingress" 1 1 + check_err $? "Packet from valid source not received on H2 after adding entry" + + $MZ $mode $h1.10 -c 1 -p 128 -A $invalid_src -B $grp -t udp -q + tc_check_packets "dev $h2 ingress" 2 0 + check_err $? "Packet from invalid source received on H2 after adding entry" + + bridge mdb replace dev br0 port $swp2 grp $grp vid 10 \ + filter_mode exclude + + $MZ $mode $h1.10 -c 1 -p 128 -A $valid_src -B $grp -t udp -q + tc_check_packets "dev $h2 ingress" 1 2 + check_err $? "Packet from valid source not received on H2 after allowing all sources" + + $MZ $mode $h1.10 -c 1 -p 128 -A $invalid_src -B $grp -t udp -q + tc_check_packets "dev $h2 ingress" 2 1 + check_err $? "Packet from invalid source not received on H2 after allowing all sources" + + bridge mdb del dev br0 port $swp2 grp $grp vid 10 + + $MZ $mode $h1.10 -c 1 -p 128 -A $valid_src -B $grp -t udp -q + tc_check_packets "dev $h2 ingress" 1 2 + check_err $? "Packet from valid source received on H2 after deleting entry" + + $MZ $mode $h1.10 -c 1 -p 128 -A $invalid_src -B $grp -t udp -q + tc_check_packets "dev $h2 ingress" 2 1 + check_err $? "Packet from invalid source received on H2 after deleting entry" + + tc filter del dev $h2 ingress protocol 802.1q pref 1 handle 2 flower + tc filter del dev $h2 ingress protocol 802.1q pref 1 handle 1 flower + + log_test "$name port group \"$filter_mode\" entries forwarding tests" +} + +fwd_test_port_ip() +{ + __fwd_test_port_ip "239.1.1.1" "192.0.2.1" "192.0.2.2" "-4" "exclude" + __fwd_test_port_ip "ff0e::1" "2001:db8:1::1" "2001:db8:1::2" "-6" \ + "exclude" + __fwd_test_port_ip "239.1.1.1" "192.0.2.1" "192.0.2.2" "-4" "include" + __fwd_test_port_ip "ff0e::1" "2001:db8:1::1" "2001:db8:1::2" "-6" \ + "include" +} + +fwd_test_port_l2() +{ + local dmac=01:02:03:04:05:06 + + RET=0 + + tc filter add dev $h2 ingress protocol all pref 1 handle 1 flower \ + dst_mac $dmac action drop + + $MZ $h1.10 -c 1 -p 128 -a own -b $dmac -q + tc_check_packets "dev $h2 ingress" 1 0 + check_err $? "Packet received on H2 before adding entry" + + bridge mdb add dev br0 port $swp2 grp $dmac permanent vid 10 + $MZ $h1.10 -c 1 -p 128 -a own -b $dmac -q + tc_check_packets "dev $h2 ingress" 1 1 + check_err $? "Packet not received on H2 after adding entry" + + bridge mdb del dev br0 port $swp2 grp $dmac permanent vid 10 + $MZ $h1.10 -c 1 -p 128 -a own -b $dmac -q + tc_check_packets "dev $h2 ingress" 1 1 + check_err $? "Packet received on H2 after deleting entry" + + tc filter del dev $h2 ingress protocol all pref 1 handle 1 flower + + log_test "L2 port entries forwarding tests" +} + +fwd_test_port() +{ + # Disable multicast flooding to ensure that packets are only forwarded + # out of a port when a matching port group entry is present. + bridge link set dev $swp2 mcast_flood off + + fwd_test_port_ip + fwd_test_port_l2 + + bridge link set dev $swp2 mcast_flood on +} + +fwd_test() +{ + echo + log_info "# Forwarding tests" + + # Forwarding according to MDB entries only takes place when the bridge + # detects that there is a valid querier in the network. Set the bridge + # as the querier and assign it a valid IPv6 link-local address to be + # used as the source address for MLD queries. + ip -6 address add fe80::1/64 nodad dev br0 + ip link set dev br0 type bridge mcast_querier 1 + # Wait the default Query Response Interval (10 seconds) for the bridge + # to determine that there are no other queriers in the network. + sleep 10 + + fwd_test_host + fwd_test_port + + ip link set dev br0 type bridge mcast_querier 0 + ip -6 address del fe80::1/64 dev br0 +} + +ctrl_igmpv3_is_in_test() +{ + RET=0 + + # Add a permanent entry and check that it is not affected by the + # received IGMP packet. + bridge mdb add dev br0 port $swp1 grp 239.1.1.1 permanent vid 10 \ + filter_mode include source_list 192.0.2.1 + + # IS_IN ( 192.0.2.2 ) + $MZ $h1.10 -c 1 -A 192.0.2.1 -B 239.1.1.1 \ + -t ip proto=2,p=$(igmpv3_is_in_get 239.1.1.1 192.0.2.2) -q + + bridge -d mdb show dev br0 vid 10 | grep 239.1.1.1 | grep -q 192.0.2.2 + check_fail $? "Permanent entry affected by IGMP packet" + + # Replace the permanent entry with a temporary one and check that after + # processing the IGMP packet, a new source is added to the list along + # with a new forwarding entry. + bridge mdb replace dev br0 port $swp1 grp 239.1.1.1 temp vid 10 \ + filter_mode include source_list 192.0.2.1 + + # IS_IN ( 192.0.2.2 ) + $MZ $h1.10 -c 1 -A 192.0.2.1 -B 239.1.1.1 \ + -t ip proto=2,p=$(igmpv3_is_in_get 239.1.1.1 192.0.2.2) -q + + bridge -d mdb show dev br0 vid 10 | grep 239.1.1.1 | grep -v "src" | \ + grep -q 192.0.2.2 + check_err $? "Source not add to source list" + + bridge -d mdb show dev br0 vid 10 | grep 239.1.1.1 | \ + grep -q "src 192.0.2.2" + check_err $? "(S, G) entry not created for new source" + + bridge mdb del dev br0 port $swp1 grp 239.1.1.1 vid 10 + + log_test "IGMPv3 MODE_IS_INCLUDE tests" +} + +ctrl_mldv2_is_in_test() +{ + RET=0 + + # Add a permanent entry and check that it is not affected by the + # received MLD packet. + bridge mdb add dev br0 port $swp1 grp ff0e::1 permanent vid 10 \ + filter_mode include source_list 2001:db8:1::1 + + # IS_IN ( 2001:db8:1::2 ) + local p=$(mldv2_is_in_get fe80::1 ff0e::1 2001:db8:1::2) + $MZ -6 $h1.10 -c 1 -A fe80::1 -B ff0e::1 \ + -t ip hop=1,next=0,p="$p" -q + + bridge -d mdb show dev br0 vid 10 | grep ff0e::1 | \ + grep -q 2001:db8:1::2 + check_fail $? "Permanent entry affected by MLD packet" + + # Replace the permanent entry with a temporary one and check that after + # processing the MLD packet, a new source is added to the list along + # with a new forwarding entry. + bridge mdb replace dev br0 port $swp1 grp ff0e::1 temp vid 10 \ + filter_mode include source_list 2001:db8:1::1 + + # IS_IN ( 2001:db8:1::2 ) + $MZ -6 $h1.10 -c 1 -A fe80::1 -B ff0e::1 \ + -t ip hop=1,next=0,p="$p" -q + + bridge -d mdb show dev br0 vid 10 | grep ff0e::1 | grep -v "src" | \ + grep -q 2001:db8:1::2 + check_err $? "Source not add to source list" + + bridge -d mdb show dev br0 vid 10 | grep ff0e::1 | \ + grep -q "src 2001:db8:1::2" + check_err $? "(S, G) entry not created for new source" + + bridge mdb del dev br0 port $swp1 grp ff0e::1 vid 10 + + log_test "MLDv2 MODE_IS_INCLUDE tests" +} + +ctrl_test() +{ + echo + log_info "# Control packets tests" + + ctrl_igmpv3_is_in_test + ctrl_mldv2_is_in_test } trap cleanup EXIT setup_prepare setup_wait - tests_run exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/bridge_mdb_host.sh b/tools/testing/selftests/net/forwarding/bridge_mdb_host.sh new file mode 100755 index 000000000000..b1ba6876dd86 --- /dev/null +++ b/tools/testing/selftests/net/forwarding/bridge_mdb_host.sh @@ -0,0 +1,103 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Verify that adding host mdb entries work as intended for all types of +# multicast filters: ipv4, ipv6, and mac + +ALL_TESTS="mdb_add_del_test" +NUM_NETIFS=2 + +TEST_GROUP_IP4="225.1.2.3" +TEST_GROUP_IP6="ff02::42" +TEST_GROUP_MAC="01:00:01:c0:ff:ee" + +source lib.sh + +h1_create() +{ + simple_if_init $h1 192.0.2.1/24 2001:db8:1::1/64 +} + +h1_destroy() +{ + simple_if_fini $h1 192.0.2.1/24 2001:db8:1::1/64 +} + +switch_create() +{ + # Enable multicast filtering + ip link add dev br0 type bridge mcast_snooping 1 + + ip link set dev $swp1 master br0 + + ip link set dev br0 up + ip link set dev $swp1 up +} + +switch_destroy() +{ + ip link set dev $swp1 down + ip link del dev br0 +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + swp1=${NETIFS[p2]} + + vrf_prepare + + h1_create + switch_create +} + +cleanup() +{ + pre_cleanup + + switch_destroy + h1_destroy + + vrf_cleanup +} + +do_mdb_add_del() +{ + local group=$1 + local flag=$2 + + RET=0 + bridge mdb add dev br0 port br0 grp $group $flag 2>/dev/null + check_err $? "Failed adding $group to br0, port br0" + + if [ -z "$flag" ]; then + flag="temp" + fi + + bridge mdb show dev br0 | grep $group | grep -q $flag 2>/dev/null + check_err $? "$group not added with $flag flag" + + bridge mdb del dev br0 port br0 grp $group 2>/dev/null + check_err $? "Failed deleting $group from br0, port br0" + + bridge mdb show dev br0 | grep -q $group >/dev/null + check_err_fail 1 $? "$group still in mdb after delete" + + log_test "MDB add/del group $group to bridge port br0" +} + +mdb_add_del_test() +{ + do_mdb_add_del $TEST_GROUP_MAC permanent + do_mdb_add_del $TEST_GROUP_IP4 + do_mdb_add_del $TEST_GROUP_IP6 +} + +trap cleanup EXIT + +setup_prepare +setup_wait + +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/bridge_mdb_max.sh b/tools/testing/selftests/net/forwarding/bridge_mdb_max.sh new file mode 100755 index 000000000000..ae255b662ba3 --- /dev/null +++ b/tools/testing/selftests/net/forwarding/bridge_mdb_max.sh @@ -0,0 +1,1336 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# +-----------------------+ +------------------------+ +# | H1 (vrf) | | H2 (vrf) | +# | + $h1.10 | | + $h2.10 | +# | | 192.0.2.1/28 | | | 192.0.2.2/28 | +# | | 2001:db8:1::1/64 | | | 2001:db8:1::2/64 | +# | | | | | | +# | | + $h1.20 | | | + $h2.20 | +# | \ | 198.51.100.1/24 | | \ | 198.51.100.2/24 | +# | \ | 2001:db8:2::1/64 | | \ | 2001:db8:2::2/64 | +# | \| | | \| | +# | + $h1 | | + $h2 | +# +----|------------------+ +----|-------------------+ +# | | +# +----|--------------------------------------------------|-------------------+ +# | SW | | | +# | +--|--------------------------------------------------|-----------------+ | +# | | + $swp1 BR0 (802.1q) + $swp2 | | +# | | vid 10 vid 10 | | +# | | vid 20 vid 20 | | +# | | | | +# | +-----------------------------------------------------------------------+ | +# +---------------------------------------------------------------------------+ + +ALL_TESTS=" + test_8021d + test_8021q + test_8021qvs +" + +NUM_NETIFS=4 +source lib.sh +source tc_common.sh + +h1_create() +{ + simple_if_init $h1 + vlan_create $h1 10 v$h1 192.0.2.1/28 2001:db8:1::1/64 + vlan_create $h1 20 v$h1 198.51.100.1/24 2001:db8:2::1/64 +} + +h1_destroy() +{ + vlan_destroy $h1 20 + vlan_destroy $h1 10 + simple_if_fini $h1 +} + +h2_create() +{ + simple_if_init $h2 + vlan_create $h2 10 v$h2 192.0.2.2/28 + vlan_create $h2 20 v$h2 198.51.100.2/24 +} + +h2_destroy() +{ + vlan_destroy $h2 20 + vlan_destroy $h2 10 + simple_if_fini $h2 +} + +switch_create_8021d() +{ + log_info "802.1d tests" + + ip link add name br0 type bridge vlan_filtering 0 \ + mcast_snooping 1 \ + mcast_igmp_version 3 mcast_mld_version 2 + ip link set dev br0 up + + ip link set dev $swp1 master br0 + ip link set dev $swp1 up + bridge link set dev $swp1 fastleave on + + ip link set dev $swp2 master br0 + ip link set dev $swp2 up +} + +switch_create_8021q() +{ + local br_flags=$1; shift + + log_info "802.1q $br_flags${br_flags:+ }tests" + + ip link add name br0 type bridge vlan_filtering 1 vlan_default_pvid 0 \ + mcast_snooping 1 $br_flags \ + mcast_igmp_version 3 mcast_mld_version 2 + bridge vlan add vid 10 dev br0 self + bridge vlan add vid 20 dev br0 self + ip link set dev br0 up + + ip link set dev $swp1 master br0 + ip link set dev $swp1 up + bridge link set dev $swp1 fastleave on + bridge vlan add vid 10 dev $swp1 + bridge vlan add vid 20 dev $swp1 + + ip link set dev $swp2 master br0 + ip link set dev $swp2 up + bridge vlan add vid 10 dev $swp2 + bridge vlan add vid 20 dev $swp2 +} + +switch_create_8021qvs() +{ + switch_create_8021q "mcast_vlan_snooping 1" + bridge vlan global set dev br0 vid 10 mcast_igmp_version 3 + bridge vlan global set dev br0 vid 10 mcast_mld_version 2 + bridge vlan global set dev br0 vid 20 mcast_igmp_version 3 + bridge vlan global set dev br0 vid 20 mcast_mld_version 2 +} + +switch_destroy() +{ + ip link set dev $swp2 down + ip link set dev $swp2 nomaster + + ip link set dev $swp1 down + ip link set dev $swp1 nomaster + + ip link set dev br0 down + ip link del dev br0 +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + swp1=${NETIFS[p2]} + + swp2=${NETIFS[p3]} + h2=${NETIFS[p4]} + + vrf_prepare + forwarding_enable + + h1_create + h2_create +} + +cleanup() +{ + pre_cleanup + + switch_destroy 2>/dev/null + h2_destroy + h1_destroy + + forwarding_restore + vrf_cleanup +} + +cfg_src_list() +{ + local IPs=("$@") + local IPstr=$(echo ${IPs[@]} | tr '[:space:]' , | sed 's/,$//') + + echo ${IPstr:+source_list }${IPstr} +} + +cfg_group_op() +{ + local op=$1; shift + local locus=$1; shift + local GRP=$1; shift + local state=$1; shift + local IPs=("$@") + + local source_list=$(cfg_src_list ${IPs[@]}) + + # Everything besides `bridge mdb' uses the "dev X vid Y" syntax, + # so we use it here as well and convert. + local br_locus=$(echo "$locus" | sed 's/^dev /port /') + + bridge mdb $op dev br0 $br_locus grp $GRP $state \ + filter_mode include $source_list +} + +cfg4_entries_op() +{ + local op=$1; shift + local locus=$1; shift + local state=$1; shift + local n=$1; shift + local grp=${1:-1}; shift + + local GRP=239.1.1.${grp} + local IPs=$(seq -f 192.0.2.%g 1 $((n - 1))) + cfg_group_op "$op" "$locus" "$GRP" "$state" ${IPs[@]} +} + +cfg4_entries_add() +{ + cfg4_entries_op add "$@" +} + +cfg4_entries_del() +{ + cfg4_entries_op del "$@" +} + +cfg6_entries_op() +{ + local op=$1; shift + local locus=$1; shift + local state=$1; shift + local n=$1; shift + local grp=${1:-1}; shift + + local GRP=ff0e::${grp} + local IPs=$(printf "2001:db8:1::%x\n" $(seq 1 $((n - 1)))) + cfg_group_op "$op" "$locus" "$GRP" "$state" ${IPs[@]} +} + +cfg6_entries_add() +{ + cfg6_entries_op add "$@" +} + +cfg6_entries_del() +{ + cfg6_entries_op del "$@" +} + +locus_dev_peer() +{ + local dev_kw=$1; shift + local dev=$1; shift + local vid_kw=$1; shift + local vid=$1; shift + + echo "$h1.${vid:-10}" +} + +locus_dev() +{ + local dev_kw=$1; shift + local dev=$1; shift + + echo $dev +} + +ctl4_entries_add() +{ + local locus=$1; shift + local state=$1; shift + local n=$1; shift + local grp=${1:-1}; shift + + local IPs=$(seq -f 192.0.2.%g 1 $((n - 1))) + local peer=$(locus_dev_peer $locus) + local GRP=239.1.1.${grp} + $MZ $peer -c 1 -A 192.0.2.1 -B $GRP \ + -t ip proto=2,p=$(igmpv3_is_in_get $GRP $IPs) -q + sleep 1 + + local nn=$(bridge mdb show dev br0 | grep $GRP | wc -l) + if ((nn != n)); then + echo mcast_max_groups > /dev/stderr + false + fi +} + +ctl4_entries_del() +{ + local locus=$1; shift + local state=$1; shift + local n=$1; shift + local grp=${1:-1}; shift + + local peer=$(locus_dev_peer $locus) + local GRP=239.1.1.${grp} + $MZ $peer -c 1 -A 192.0.2.1 -B 224.0.0.2 \ + -t ip proto=2,p=$(igmpv2_leave_get $GRP) -q + sleep 1 + ! bridge mdb show dev br0 | grep -q $GRP +} + +ctl6_entries_add() +{ + local locus=$1; shift + local state=$1; shift + local n=$1; shift + local grp=${1:-1}; shift + + local IPs=$(printf "2001:db8:1::%x\n" $(seq 1 $((n - 1)))) + local peer=$(locus_dev_peer $locus) + local SIP=fe80::1 + local GRP=ff0e::${grp} + local p=$(mldv2_is_in_get $SIP $GRP $IPs) + $MZ -6 $peer -c 1 -A $SIP -B $GRP -t ip hop=1,next=0,p="$p" -q + sleep 1 + + local nn=$(bridge mdb show dev br0 | grep $GRP | wc -l) + if ((nn != n)); then + echo mcast_max_groups > /dev/stderr + false + fi +} + +ctl6_entries_del() +{ + local locus=$1; shift + local state=$1; shift + local n=$1; shift + local grp=${1:-1}; shift + + local peer=$(locus_dev_peer $locus) + local SIP=fe80::1 + local GRP=ff0e::${grp} + local p=$(mldv1_done_get $SIP $GRP) + $MZ -6 $peer -c 1 -A $SIP -B $GRP -t ip hop=1,next=0,p="$p" -q + sleep 1 + ! bridge mdb show dev br0 | grep -q $GRP +} + +bridge_maxgroups_errmsg_check_cfg() +{ + local msg=$1; shift + local needle=$1; shift + + echo "$msg" | grep -q mcast_max_groups + check_err $? "Adding MDB entries failed for the wrong reason: $msg" +} + +bridge_maxgroups_errmsg_check_cfg4() +{ + bridge_maxgroups_errmsg_check_cfg "$@" +} + +bridge_maxgroups_errmsg_check_cfg6() +{ + bridge_maxgroups_errmsg_check_cfg "$@" +} + +bridge_maxgroups_errmsg_check_ctl4() +{ + : +} + +bridge_maxgroups_errmsg_check_ctl6() +{ + : +} + +bridge_port_ngroups_get() +{ + local locus=$1; shift + + bridge -j -d link show $locus | + jq '.[].mcast_n_groups' +} + +bridge_port_maxgroups_get() +{ + local locus=$1; shift + + bridge -j -d link show $locus | + jq '.[].mcast_max_groups' +} + +bridge_port_maxgroups_set() +{ + local locus=$1; shift + local max=$1; shift + + bridge link set dev $(locus_dev $locus) mcast_max_groups $max +} + +bridge_port_vlan_ngroups_get() +{ + local locus=$1; shift + + bridge -j -d vlan show $locus | + jq '.[].vlans[].mcast_n_groups' +} + +bridge_port_vlan_maxgroups_get() +{ + local locus=$1; shift + + bridge -j -d vlan show $locus | + jq '.[].vlans[].mcast_max_groups' +} + +bridge_port_vlan_maxgroups_set() +{ + local locus=$1; shift + local max=$1; shift + + bridge vlan set $locus mcast_max_groups $max +} + +test_ngroups_reporting() +{ + local CFG=$1; shift + local context=$1; shift + local locus=$1; shift + + RET=0 + + local n0=$(bridge_${context}_ngroups_get "$locus") + ${CFG}_entries_add "$locus" temp 5 + check_err $? "Couldn't add MDB entries" + local n1=$(bridge_${context}_ngroups_get "$locus") + + ((n1 == n0 + 5)) + check_err $? "Number of groups was $n0, now is $n1, but $((n0 + 5)) expected" + + ${CFG}_entries_del "$locus" temp 5 + check_err $? "Couldn't delete MDB entries" + local n2=$(bridge_${context}_ngroups_get "$locus") + + ((n2 == n0)) + check_err $? "Number of groups was $n0, now is $n2, but should be back to $n0" + + log_test "$CFG: $context: ngroups reporting" +} + +test_8021d_ngroups_reporting_cfg4() +{ + test_ngroups_reporting cfg4 port "dev $swp1" +} + +test_8021d_ngroups_reporting_ctl4() +{ + test_ngroups_reporting ctl4 port "dev $swp1" +} + +test_8021d_ngroups_reporting_cfg6() +{ + test_ngroups_reporting cfg6 port "dev $swp1" +} + +test_8021d_ngroups_reporting_ctl6() +{ + test_ngroups_reporting ctl6 port "dev $swp1" +} + +test_8021q_ngroups_reporting_cfg4() +{ + test_ngroups_reporting cfg4 port "dev $swp1 vid 10" +} + +test_8021q_ngroups_reporting_ctl4() +{ + test_ngroups_reporting ctl4 port "dev $swp1 vid 10" +} + +test_8021q_ngroups_reporting_cfg6() +{ + test_ngroups_reporting cfg6 port "dev $swp1 vid 10" +} + +test_8021q_ngroups_reporting_ctl6() +{ + test_ngroups_reporting ctl6 port "dev $swp1 vid 10" +} + +test_8021qvs_ngroups_reporting_cfg4() +{ + test_ngroups_reporting cfg4 port_vlan "dev $swp1 vid 10" +} + +test_8021qvs_ngroups_reporting_ctl4() +{ + test_ngroups_reporting ctl4 port_vlan "dev $swp1 vid 10" +} + +test_8021qvs_ngroups_reporting_cfg6() +{ + test_ngroups_reporting cfg6 port_vlan "dev $swp1 vid 10" +} + +test_8021qvs_ngroups_reporting_ctl6() +{ + test_ngroups_reporting ctl6 port_vlan "dev $swp1 vid 10" +} + +test_ngroups_cross_vlan() +{ + local CFG=$1; shift + + local locus1="dev $swp1 vid 10" + local locus2="dev $swp1 vid 20" + + RET=0 + + local n10=$(bridge_port_vlan_ngroups_get "$locus1") + local n20=$(bridge_port_vlan_ngroups_get "$locus2") + ${CFG}_entries_add "$locus1" temp 5 111 + check_err $? "Couldn't add MDB entries to VLAN 10" + local n11=$(bridge_port_vlan_ngroups_get "$locus1") + local n21=$(bridge_port_vlan_ngroups_get "$locus2") + + ((n11 == n10 + 5)) + check_err $? "Number of groups at VLAN 10 was $n10, now is $n11, but 5 entries added on VLAN 10, $((n10 + 5)) expected" + + ((n21 == n20)) + check_err $? "Number of groups at VLAN 20 was $n20, now is $n21, but no change expected on VLAN 20" + + ${CFG}_entries_add "$locus2" temp 5 112 + check_err $? "Couldn't add MDB entries to VLAN 20" + local n12=$(bridge_port_vlan_ngroups_get "$locus1") + local n22=$(bridge_port_vlan_ngroups_get "$locus2") + + ((n12 == n11)) + check_err $? "Number of groups at VLAN 10 was $n11, now is $n12, but no change expected on VLAN 10" + + ((n22 == n21 + 5)) + check_err $? "Number of groups at VLAN 20 was $n21, now is $n22, but 5 entries added on VLAN 20, $((n21 + 5)) expected" + + ${CFG}_entries_del "$locus1" temp 5 111 + check_err $? "Couldn't delete MDB entries from VLAN 10" + ${CFG}_entries_del "$locus2" temp 5 112 + check_err $? "Couldn't delete MDB entries from VLAN 20" + local n13=$(bridge_port_vlan_ngroups_get "$locus1") + local n23=$(bridge_port_vlan_ngroups_get "$locus2") + + ((n13 == n10)) + check_err $? "Number of groups at VLAN 10 was $n10, now is $n13, but should be back to $n10" + + ((n23 == n20)) + check_err $? "Number of groups at VLAN 20 was $n20, now is $n23, but should be back to $n20" + + log_test "$CFG: port_vlan: isolation of port and per-VLAN ngroups" +} + +test_8021qvs_ngroups_cross_vlan_cfg4() +{ + test_ngroups_cross_vlan cfg4 +} + +test_8021qvs_ngroups_cross_vlan_ctl4() +{ + test_ngroups_cross_vlan ctl4 +} + +test_8021qvs_ngroups_cross_vlan_cfg6() +{ + test_ngroups_cross_vlan cfg6 +} + +test_8021qvs_ngroups_cross_vlan_ctl6() +{ + test_ngroups_cross_vlan ctl6 +} + +test_maxgroups_zero() +{ + local CFG=$1; shift + local context=$1; shift + local locus=$1; shift + + RET=0 + local max + + max=$(bridge_${context}_maxgroups_get "$locus") + ((max == 0)) + check_err $? "Max groups on $locus should be 0, but $max reported" + + bridge_${context}_maxgroups_set "$locus" 100 + check_err $? "Failed to set max to 100" + max=$(bridge_${context}_maxgroups_get "$locus") + ((max == 100)) + check_err $? "Max groups expected to be 100, but $max reported" + + bridge_${context}_maxgroups_set "$locus" 0 + check_err $? "Couldn't set maximum to 0" + + # Test that setting 0 explicitly still serves as infinity. + ${CFG}_entries_add "$locus" temp 5 + check_err $? "Adding 5 MDB entries failed but should have passed" + ${CFG}_entries_del "$locus" temp 5 + check_err $? "Couldn't delete MDB entries" + + log_test "$CFG: $context maxgroups: reporting and treatment of 0" +} + +test_8021d_maxgroups_zero_cfg4() +{ + test_maxgroups_zero cfg4 port "dev $swp1" +} + +test_8021d_maxgroups_zero_ctl4() +{ + test_maxgroups_zero ctl4 port "dev $swp1" +} + +test_8021d_maxgroups_zero_cfg6() +{ + test_maxgroups_zero cfg6 port "dev $swp1" +} + +test_8021d_maxgroups_zero_ctl6() +{ + test_maxgroups_zero ctl6 port "dev $swp1" +} + +test_8021q_maxgroups_zero_cfg4() +{ + test_maxgroups_zero cfg4 port "dev $swp1 vid 10" +} + +test_8021q_maxgroups_zero_ctl4() +{ + test_maxgroups_zero ctl4 port "dev $swp1 vid 10" +} + +test_8021q_maxgroups_zero_cfg6() +{ + test_maxgroups_zero cfg6 port "dev $swp1 vid 10" +} + +test_8021q_maxgroups_zero_ctl6() +{ + test_maxgroups_zero ctl6 port "dev $swp1 vid 10" +} + +test_8021qvs_maxgroups_zero_cfg4() +{ + test_maxgroups_zero cfg4 port_vlan "dev $swp1 vid 10" +} + +test_8021qvs_maxgroups_zero_ctl4() +{ + test_maxgroups_zero ctl4 port_vlan "dev $swp1 vid 10" +} + +test_8021qvs_maxgroups_zero_cfg6() +{ + test_maxgroups_zero cfg6 port_vlan "dev $swp1 vid 10" +} + +test_8021qvs_maxgroups_zero_ctl6() +{ + test_maxgroups_zero ctl6 port_vlan "dev $swp1 vid 10" +} + +test_maxgroups_zero_cross_vlan() +{ + local CFG=$1; shift + + local locus0="dev $swp1" + local locus1="dev $swp1 vid 10" + local locus2="dev $swp1 vid 20" + local max + + RET=0 + + bridge_port_vlan_maxgroups_set "$locus1" 100 + check_err $? "$locus1: Failed to set max to 100" + + max=$(bridge_port_maxgroups_get "$locus0") + ((max == 0)) + check_err $? "$locus0: Max groups expected to be 0, but $max reported" + + max=$(bridge_port_vlan_maxgroups_get "$locus2") + ((max == 0)) + check_err $? "$locus2: Max groups expected to be 0, but $max reported" + + bridge_port_vlan_maxgroups_set "$locus2" 100 + check_err $? "$locus2: Failed to set max to 100" + + max=$(bridge_port_maxgroups_get "$locus0") + ((max == 0)) + check_err $? "$locus0: Max groups expected to be 0, but $max reported" + + max=$(bridge_port_vlan_maxgroups_get "$locus2") + ((max == 100)) + check_err $? "$locus2: Max groups expected to be 100, but $max reported" + + bridge_port_maxgroups_set "$locus0" 100 + check_err $? "$locus0: Failed to set max to 100" + + max=$(bridge_port_maxgroups_get "$locus0") + ((max == 100)) + check_err $? "$locus0: Max groups expected to be 100, but $max reported" + + max=$(bridge_port_vlan_maxgroups_get "$locus2") + ((max == 100)) + check_err $? "$locus2: Max groups expected to be 100, but $max reported" + + bridge_port_vlan_maxgroups_set "$locus1" 0 + check_err $? "$locus1: Failed to set max to 0" + + max=$(bridge_port_maxgroups_get "$locus0") + ((max == 100)) + check_err $? "$locus0: Max groups expected to be 100, but $max reported" + + max=$(bridge_port_vlan_maxgroups_get "$locus2") + ((max == 100)) + check_err $? "$locus2: Max groups expected to be 100, but $max reported" + + bridge_port_vlan_maxgroups_set "$locus2" 0 + check_err $? "$locus2: Failed to set max to 0" + + max=$(bridge_port_maxgroups_get "$locus0") + ((max == 100)) + check_err $? "$locus0: Max groups expected to be 100, but $max reported" + + max=$(bridge_port_vlan_maxgroups_get "$locus2") + ((max == 0)) + check_err $? "$locus2: Max groups expected to be 0 but $max reported" + + bridge_port_maxgroups_set "$locus0" 0 + check_err $? "$locus0: Failed to set max to 0" + + max=$(bridge_port_maxgroups_get "$locus0") + ((max == 0)) + check_err $? "$locus0: Max groups expected to be 0, but $max reported" + + max=$(bridge_port_vlan_maxgroups_get "$locus2") + ((max == 0)) + check_err $? "$locus2: Max groups expected to be 0, but $max reported" + + log_test "$CFG: port_vlan maxgroups: isolation of port and per-VLAN maximums" +} + +test_8021qvs_maxgroups_zero_cross_vlan_cfg4() +{ + test_maxgroups_zero_cross_vlan cfg4 +} + +test_8021qvs_maxgroups_zero_cross_vlan_ctl4() +{ + test_maxgroups_zero_cross_vlan ctl4 +} + +test_8021qvs_maxgroups_zero_cross_vlan_cfg6() +{ + test_maxgroups_zero_cross_vlan cfg6 +} + +test_8021qvs_maxgroups_zero_cross_vlan_ctl6() +{ + test_maxgroups_zero_cross_vlan ctl6 +} + +test_maxgroups_too_low() +{ + local CFG=$1; shift + local context=$1; shift + local locus=$1; shift + + RET=0 + + local n=$(bridge_${context}_ngroups_get "$locus") + local msg + + ${CFG}_entries_add "$locus" temp 5 111 + check_err $? "$locus: Couldn't add MDB entries" + + bridge_${context}_maxgroups_set "$locus" $((n+2)) + check_err $? "$locus: Setting maxgroups to $((n+2)) failed" + + msg=$(${CFG}_entries_add "$locus" temp 2 112 2>&1) + check_fail $? "$locus: Adding more entries passed when max<n" + bridge_maxgroups_errmsg_check_cfg "$msg" + + ${CFG}_entries_del "$locus" temp 5 111 + check_err $? "$locus: Couldn't delete MDB entries" + + ${CFG}_entries_add "$locus" temp 2 112 + check_err $? "$locus: Adding more entries failed" + + ${CFG}_entries_del "$locus" temp 2 112 + check_err $? "$locus: Deleting more entries failed" + + bridge_${context}_maxgroups_set "$locus" 0 + check_err $? "$locus: Couldn't set maximum to 0" + + log_test "$CFG: $context maxgroups: configure below ngroups" +} + +test_8021d_maxgroups_too_low_cfg4() +{ + test_maxgroups_too_low cfg4 port "dev $swp1" +} + +test_8021d_maxgroups_too_low_ctl4() +{ + test_maxgroups_too_low ctl4 port "dev $swp1" +} + +test_8021d_maxgroups_too_low_cfg6() +{ + test_maxgroups_too_low cfg6 port "dev $swp1" +} + +test_8021d_maxgroups_too_low_ctl6() +{ + test_maxgroups_too_low ctl6 port "dev $swp1" +} + +test_8021q_maxgroups_too_low_cfg4() +{ + test_maxgroups_too_low cfg4 port "dev $swp1 vid 10" +} + +test_8021q_maxgroups_too_low_ctl4() +{ + test_maxgroups_too_low ctl4 port "dev $swp1 vid 10" +} + +test_8021q_maxgroups_too_low_cfg6() +{ + test_maxgroups_too_low cfg6 port "dev $swp1 vid 10" +} + +test_8021q_maxgroups_too_low_ctl6() +{ + test_maxgroups_too_low ctl6 port "dev $swp1 vid 10" +} + +test_8021qvs_maxgroups_too_low_cfg4() +{ + test_maxgroups_too_low cfg4 port_vlan "dev $swp1 vid 10" +} + +test_8021qvs_maxgroups_too_low_ctl4() +{ + test_maxgroups_too_low ctl4 port_vlan "dev $swp1 vid 10" +} + +test_8021qvs_maxgroups_too_low_cfg6() +{ + test_maxgroups_too_low cfg6 port_vlan "dev $swp1 vid 10" +} + +test_8021qvs_maxgroups_too_low_ctl6() +{ + test_maxgroups_too_low ctl6 port_vlan "dev $swp1 vid 10" +} + +test_maxgroups_too_many_entries() +{ + local CFG=$1; shift + local context=$1; shift + local locus=$1; shift + + RET=0 + + local n=$(bridge_${context}_ngroups_get "$locus") + local msg + + # Configure a low maximum + bridge_${context}_maxgroups_set "$locus" $((n+1)) + check_err $? "$locus: Couldn't set maximum" + + # Try to add more entries than the configured maximum + msg=$(${CFG}_entries_add "$locus" temp 5 2>&1) + check_fail $? "Adding 5 MDB entries passed, but should have failed" + bridge_maxgroups_errmsg_check_${CFG} "$msg" + + # When adding entries through the control path, as many as possible + # get created. That's consistent with the mcast_hash_max behavior. + # So there, drop the entries explicitly. + if [[ ${CFG%[46]} == ctl ]]; then + ${CFG}_entries_del "$locus" temp 17 2>&1 + fi + + local n2=$(bridge_${context}_ngroups_get "$locus") + ((n2 == n)) + check_err $? "Number of groups was $n, but after a failed attempt to add MDB entries it changed to $n2" + + bridge_${context}_maxgroups_set "$locus" 0 + check_err $? "$locus: Couldn't set maximum to 0" + + log_test "$CFG: $context maxgroups: add too many MDB entries" +} + +test_8021d_maxgroups_too_many_entries_cfg4() +{ + test_maxgroups_too_many_entries cfg4 port "dev $swp1" +} + +test_8021d_maxgroups_too_many_entries_ctl4() +{ + test_maxgroups_too_many_entries ctl4 port "dev $swp1" +} + +test_8021d_maxgroups_too_many_entries_cfg6() +{ + test_maxgroups_too_many_entries cfg6 port "dev $swp1" +} + +test_8021d_maxgroups_too_many_entries_ctl6() +{ + test_maxgroups_too_many_entries ctl6 port "dev $swp1" +} + +test_8021q_maxgroups_too_many_entries_cfg4() +{ + test_maxgroups_too_many_entries cfg4 port "dev $swp1 vid 10" +} + +test_8021q_maxgroups_too_many_entries_ctl4() +{ + test_maxgroups_too_many_entries ctl4 port "dev $swp1 vid 10" +} + +test_8021q_maxgroups_too_many_entries_cfg6() +{ + test_maxgroups_too_many_entries cfg6 port "dev $swp1 vid 10" +} + +test_8021q_maxgroups_too_many_entries_ctl6() +{ + test_maxgroups_too_many_entries ctl6 port "dev $swp1 vid 10" +} + +test_8021qvs_maxgroups_too_many_entries_cfg4() +{ + test_maxgroups_too_many_entries cfg4 port_vlan "dev $swp1 vid 10" +} + +test_8021qvs_maxgroups_too_many_entries_ctl4() +{ + test_maxgroups_too_many_entries ctl4 port_vlan "dev $swp1 vid 10" +} + +test_8021qvs_maxgroups_too_many_entries_cfg6() +{ + test_maxgroups_too_many_entries cfg6 port_vlan "dev $swp1 vid 10" +} + +test_8021qvs_maxgroups_too_many_entries_ctl6() +{ + test_maxgroups_too_many_entries ctl6 port_vlan "dev $swp1 vid 10" +} + +test_maxgroups_too_many_cross_vlan() +{ + local CFG=$1; shift + + RET=0 + + local locus0="dev $swp1" + local locus1="dev $swp1 vid 10" + local locus2="dev $swp1 vid 20" + local n1=$(bridge_port_vlan_ngroups_get "$locus1") + local n2=$(bridge_port_vlan_ngroups_get "$locus2") + local msg + + if ((n1 > n2)); then + local tmp=$n1 + n1=$n2 + n2=$tmp + + tmp="$locus1" + locus1="$locus2" + locus2="$tmp" + fi + + # Now 0 <= n1 <= n2. + ${CFG}_entries_add "$locus2" temp 5 112 + check_err $? "Couldn't add 5 entries" + + n2=$(bridge_port_vlan_ngroups_get "$locus2") + # Now 0 <= n1 < n2-1. + + # Setting locus1'maxgroups to n2-1 should pass. The number is + # smaller than both the absolute number of MDB entries, and in + # particular than number of locus2's number of entries, but it is + # large enough to cover locus1's entries. Thus we check that + # individual VLAN's ngroups are independent. + bridge_port_vlan_maxgroups_set "$locus1" $((n2-1)) + check_err $? "Setting ${locus1}'s maxgroups to $((n2-1)) failed" + + msg=$(${CFG}_entries_add "$locus1" temp $n2 111 2>&1) + check_fail $? "$locus1: Adding $n2 MDB entries passed, but should have failed" + bridge_maxgroups_errmsg_check_${CFG} "$msg" + + bridge_port_maxgroups_set "$locus0" $((n1 + n2 + 2)) + check_err $? "$locus0: Couldn't set maximum" + + msg=$(${CFG}_entries_add "$locus1" temp 5 111 2>&1) + check_fail $? "$locus1: Adding 5 MDB entries passed, but should have failed" + bridge_maxgroups_errmsg_check_${CFG} "$msg" + + # IGMP/MLD packets can cause several entries to be added, before + # the maximum is hit and the rest is then bounced. Remove what was + # committed, if anything. + ${CFG}_entries_del "$locus1" temp 5 111 2>/dev/null + + ${CFG}_entries_add "$locus1" temp 2 111 + check_err $? "$locus1: Adding 2 MDB entries failed, but should have passed" + + ${CFG}_entries_del "$locus1" temp 2 111 + check_err $? "Couldn't delete MDB entries" + + ${CFG}_entries_del "$locus2" temp 5 112 + check_err $? "Couldn't delete MDB entries" + + bridge_port_vlan_maxgroups_set "$locus1" 0 + check_err $? "$locus1: Couldn't set maximum to 0" + + bridge_port_maxgroups_set "$locus0" 0 + check_err $? "$locus0: Couldn't set maximum to 0" + + log_test "$CFG: port_vlan maxgroups: isolation of port and per-VLAN ngroups" +} + +test_8021qvs_maxgroups_too_many_cross_vlan_cfg4() +{ + test_maxgroups_too_many_cross_vlan cfg4 +} + +test_8021qvs_maxgroups_too_many_cross_vlan_ctl4() +{ + test_maxgroups_too_many_cross_vlan ctl4 +} + +test_8021qvs_maxgroups_too_many_cross_vlan_cfg6() +{ + test_maxgroups_too_many_cross_vlan cfg6 +} + +test_8021qvs_maxgroups_too_many_cross_vlan_ctl6() +{ + test_maxgroups_too_many_cross_vlan ctl6 +} + +test_vlan_attributes() +{ + local locus=$1; shift + local expect=$1; shift + + RET=0 + + local max=$(bridge_port_vlan_maxgroups_get "$locus") + local n=$(bridge_port_vlan_ngroups_get "$locus") + + eval "[[ $max $expect ]]" + check_err $? "$locus: maxgroups attribute expected to be $expect, but was $max" + + eval "[[ $n $expect ]]" + check_err $? "$locus: ngroups attribute expected to be $expect, but was $n" + + log_test "port_vlan: presence of ngroups and maxgroups attributes" +} + +test_8021q_vlan_attributes() +{ + test_vlan_attributes "dev $swp1 vid 10" "== null" +} + +test_8021qvs_vlan_attributes() +{ + test_vlan_attributes "dev $swp1 vid 10" "-ge 0" +} + +test_toggle_vlan_snooping() +{ + local mode=$1; shift + + RET=0 + + local CFG=cfg4 + local context=port_vlan + local locus="dev $swp1 vid 10" + + ${CFG}_entries_add "$locus" $mode 5 + check_err $? "Couldn't add MDB entries" + + bridge_${context}_maxgroups_set "$locus" 100 + check_err $? "Failed to set max to 100" + + ip link set dev br0 type bridge mcast_vlan_snooping 0 + sleep 1 + ip link set dev br0 type bridge mcast_vlan_snooping 1 + + local n=$(bridge_${context}_ngroups_get "$locus") + local nn=$(bridge mdb show dev br0 | grep $swp1 | wc -l) + ((nn == n)) + check_err $? "mcast_n_groups expected to be $nn, but $n reported" + + local max=$(bridge_${context}_maxgroups_get "$locus") + ((max == 100)) + check_err $? "Max groups expected to be 100 but $max reported" + + bridge_${context}_maxgroups_set "$locus" 0 + check_err $? "Failed to set max to 0" + + log_test "$CFG: $context: $mode: mcast_vlan_snooping toggle" +} + +test_toggle_vlan_snooping_temp() +{ + test_toggle_vlan_snooping temp +} + +test_toggle_vlan_snooping_permanent() +{ + test_toggle_vlan_snooping permanent +} + +# ngroup test suites + +test_8021d_ngroups_cfg4() +{ + test_8021d_ngroups_reporting_cfg4 +} + +test_8021d_ngroups_ctl4() +{ + test_8021d_ngroups_reporting_ctl4 +} + +test_8021d_ngroups_cfg6() +{ + test_8021d_ngroups_reporting_cfg6 +} + +test_8021d_ngroups_ctl6() +{ + test_8021d_ngroups_reporting_ctl6 +} + +test_8021q_ngroups_cfg4() +{ + test_8021q_ngroups_reporting_cfg4 +} + +test_8021q_ngroups_ctl4() +{ + test_8021q_ngroups_reporting_ctl4 +} + +test_8021q_ngroups_cfg6() +{ + test_8021q_ngroups_reporting_cfg6 +} + +test_8021q_ngroups_ctl6() +{ + test_8021q_ngroups_reporting_ctl6 +} + +test_8021qvs_ngroups_cfg4() +{ + test_8021qvs_ngroups_reporting_cfg4 + test_8021qvs_ngroups_cross_vlan_cfg4 +} + +test_8021qvs_ngroups_ctl4() +{ + test_8021qvs_ngroups_reporting_ctl4 + test_8021qvs_ngroups_cross_vlan_ctl4 +} + +test_8021qvs_ngroups_cfg6() +{ + test_8021qvs_ngroups_reporting_cfg6 + test_8021qvs_ngroups_cross_vlan_cfg6 +} + +test_8021qvs_ngroups_ctl6() +{ + test_8021qvs_ngroups_reporting_ctl6 + test_8021qvs_ngroups_cross_vlan_ctl6 +} + +# maxgroups test suites + +test_8021d_maxgroups_cfg4() +{ + test_8021d_maxgroups_zero_cfg4 + test_8021d_maxgroups_too_low_cfg4 + test_8021d_maxgroups_too_many_entries_cfg4 +} + +test_8021d_maxgroups_ctl4() +{ + test_8021d_maxgroups_zero_ctl4 + test_8021d_maxgroups_too_low_ctl4 + test_8021d_maxgroups_too_many_entries_ctl4 +} + +test_8021d_maxgroups_cfg6() +{ + test_8021d_maxgroups_zero_cfg6 + test_8021d_maxgroups_too_low_cfg6 + test_8021d_maxgroups_too_many_entries_cfg6 +} + +test_8021d_maxgroups_ctl6() +{ + test_8021d_maxgroups_zero_ctl6 + test_8021d_maxgroups_too_low_ctl6 + test_8021d_maxgroups_too_many_entries_ctl6 +} + +test_8021q_maxgroups_cfg4() +{ + test_8021q_maxgroups_zero_cfg4 + test_8021q_maxgroups_too_low_cfg4 + test_8021q_maxgroups_too_many_entries_cfg4 +} + +test_8021q_maxgroups_ctl4() +{ + test_8021q_maxgroups_zero_ctl4 + test_8021q_maxgroups_too_low_ctl4 + test_8021q_maxgroups_too_many_entries_ctl4 +} + +test_8021q_maxgroups_cfg6() +{ + test_8021q_maxgroups_zero_cfg6 + test_8021q_maxgroups_too_low_cfg6 + test_8021q_maxgroups_too_many_entries_cfg6 +} + +test_8021q_maxgroups_ctl6() +{ + test_8021q_maxgroups_zero_ctl6 + test_8021q_maxgroups_too_low_ctl6 + test_8021q_maxgroups_too_many_entries_ctl6 +} + +test_8021qvs_maxgroups_cfg4() +{ + test_8021qvs_maxgroups_zero_cfg4 + test_8021qvs_maxgroups_zero_cross_vlan_cfg4 + test_8021qvs_maxgroups_too_low_cfg4 + test_8021qvs_maxgroups_too_many_entries_cfg4 + test_8021qvs_maxgroups_too_many_cross_vlan_cfg4 +} + +test_8021qvs_maxgroups_ctl4() +{ + test_8021qvs_maxgroups_zero_ctl4 + test_8021qvs_maxgroups_zero_cross_vlan_ctl4 + test_8021qvs_maxgroups_too_low_ctl4 + test_8021qvs_maxgroups_too_many_entries_ctl4 + test_8021qvs_maxgroups_too_many_cross_vlan_ctl4 +} + +test_8021qvs_maxgroups_cfg6() +{ + test_8021qvs_maxgroups_zero_cfg6 + test_8021qvs_maxgroups_zero_cross_vlan_cfg6 + test_8021qvs_maxgroups_too_low_cfg6 + test_8021qvs_maxgroups_too_many_entries_cfg6 + test_8021qvs_maxgroups_too_many_cross_vlan_cfg6 +} + +test_8021qvs_maxgroups_ctl6() +{ + test_8021qvs_maxgroups_zero_ctl6 + test_8021qvs_maxgroups_zero_cross_vlan_ctl6 + test_8021qvs_maxgroups_too_low_ctl6 + test_8021qvs_maxgroups_too_many_entries_ctl6 + test_8021qvs_maxgroups_too_many_cross_vlan_ctl6 +} + +# other test suites + +test_8021qvs_toggle_vlan_snooping() +{ + test_toggle_vlan_snooping_temp + test_toggle_vlan_snooping_permanent +} + +# test groups + +test_8021d() +{ + # Tests for vlan_filtering 0 mcast_vlan_snooping 0. + + switch_create_8021d + setup_wait + + test_8021d_ngroups_cfg4 + test_8021d_ngroups_ctl4 + test_8021d_ngroups_cfg6 + test_8021d_ngroups_ctl6 + test_8021d_maxgroups_cfg4 + test_8021d_maxgroups_ctl4 + test_8021d_maxgroups_cfg6 + test_8021d_maxgroups_ctl6 + + switch_destroy +} + +test_8021q() +{ + # Tests for vlan_filtering 1 mcast_vlan_snooping 0. + + switch_create_8021q + setup_wait + + test_8021q_vlan_attributes + test_8021q_ngroups_cfg4 + test_8021q_ngroups_ctl4 + test_8021q_ngroups_cfg6 + test_8021q_ngroups_ctl6 + test_8021q_maxgroups_cfg4 + test_8021q_maxgroups_ctl4 + test_8021q_maxgroups_cfg6 + test_8021q_maxgroups_ctl6 + + switch_destroy +} + +test_8021qvs() +{ + # Tests for vlan_filtering 1 mcast_vlan_snooping 1. + + switch_create_8021qvs + setup_wait + + test_8021qvs_vlan_attributes + test_8021qvs_ngroups_cfg4 + test_8021qvs_ngroups_ctl4 + test_8021qvs_ngroups_cfg6 + test_8021qvs_ngroups_ctl6 + test_8021qvs_maxgroups_cfg4 + test_8021qvs_maxgroups_ctl4 + test_8021qvs_maxgroups_cfg6 + test_8021qvs_maxgroups_ctl6 + test_8021qvs_toggle_vlan_snooping + + switch_destroy +} + +trap cleanup EXIT + +setup_prepare +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/bridge_vlan_mcast.sh b/tools/testing/selftests/net/forwarding/bridge_vlan_mcast.sh index 8748d1b1d95b..72dfbeaf56b9 100755 --- a/tools/testing/selftests/net/forwarding/bridge_vlan_mcast.sh +++ b/tools/testing/selftests/net/forwarding/bridge_vlan_mcast.sh @@ -59,6 +59,9 @@ switch_create() switch_destroy() { + tc qdisc del dev $swp2 clsact + tc qdisc del dev $swp1 clsact + ip link set dev $swp2 down ip link set dev $swp1 down diff --git a/tools/testing/selftests/net/forwarding/devlink_lib.sh b/tools/testing/selftests/net/forwarding/devlink_lib.sh index 601990c6881b..f1de525cfa55 100644 --- a/tools/testing/selftests/net/forwarding/devlink_lib.sh +++ b/tools/testing/selftests/net/forwarding/devlink_lib.sh @@ -503,25 +503,30 @@ devlink_trap_drop_cleanup() tc filter del dev $dev egress protocol $proto pref $pref handle $handle flower } -devlink_trap_stats_test() +devlink_trap_stats_check() { - local test_name=$1; shift local trap_name=$1; shift local send_one="$@" local t0_packets local t1_packets - RET=0 - t0_packets=$(devlink_trap_rx_packets_get $trap_name) $send_one && sleep 1 t1_packets=$(devlink_trap_rx_packets_get $trap_name) - if [[ $t1_packets -eq $t0_packets ]]; then - check_err 1 "Trap stats did not increase" - fi + [[ $t1_packets -ne $t0_packets ]] +} + +devlink_trap_stats_test() +{ + local test_name=$1; shift + + RET=0 + + devlink_trap_stats_check "$@" + check_err $? "Trap stats did not increase" log_test "$test_name" } diff --git a/tools/testing/selftests/net/forwarding/lib.sh b/tools/testing/selftests/net/forwarding/lib.sh index 3ffb9d6c0950..d47499ba81c7 100755 --- a/tools/testing/selftests/net/forwarding/lib.sh +++ b/tools/testing/selftests/net/forwarding/lib.sh @@ -137,6 +137,14 @@ check_locked_port_support() fi } +check_port_mab_support() +{ + if ! bridge -d link show | grep -q "mab"; then + echo "SKIP: iproute2 too old; MacAuth feature not supported." + return $ksft_skip + fi +} + if [[ "$(id -u)" -ne 0 ]]; then echo "SKIP: need root privileges" exit $ksft_skip @@ -516,27 +524,6 @@ cmd_jq() [ ! -z "$output" ] } -lldpad_app_wait_set() -{ - local dev=$1; shift - - while lldptool -t -i $dev -V APP -c app | grep -Eq "pending|unknown"; do - echo "$dev: waiting for lldpad to push pending APP updates" - sleep 5 - done -} - -lldpad_app_wait_del() -{ - # Give lldpad a chance to push down the changes. If the device is downed - # too soon, the updates will be left pending. However, they will have - # been struck off the lldpad's DB already, so we won't be able to tell - # they are pending. Then on next test iteration this would cause - # weirdness as newly-added APP rules conflict with the old ones, - # sometimes getting stuck in an "unknown" state. - sleep 5 -} - pre_cleanup() { if [ "${PAUSE_ON_CLEANUP}" = "yes" ]; then @@ -906,14 +893,14 @@ sysctl_set() local value=$1; shift SYSCTL_ORIG[$key]=$(sysctl -n $key) - sysctl -qw $key=$value + sysctl -qw $key="$value" } sysctl_restore() { local key=$1; shift - sysctl -qw $key=${SYSCTL_ORIG["$key"]} + sysctl -qw $key="${SYSCTL_ORIG[$key]}" } forwarding_enable() @@ -1684,3 +1671,219 @@ hw_stats_monitor_test() log_test "${type}_stats notifications" } + +ipv4_to_bytes() +{ + local IP=$1; shift + + printf '%02x:' ${IP//./ } | + sed 's/:$//' +} + +# Convert a given IPv6 address, `IP' such that the :: token, if present, is +# expanded, and each 16-bit group is padded with zeroes to be 4 hexadecimal +# digits. An optional `BYTESEP' parameter can be given to further separate +# individual bytes of each 16-bit group. +expand_ipv6() +{ + local IP=$1; shift + local bytesep=$1; shift + + local cvt_ip=${IP/::/_} + local colons=${cvt_ip//[^:]/} + local allcol=::::::: + # IP where :: -> the appropriate number of colons: + local allcol_ip=${cvt_ip/_/${allcol:${#colons}}} + + echo $allcol_ip | tr : '\n' | + sed s/^/0000/ | + sed 's/.*\(..\)\(..\)/\1'"$bytesep"'\2/' | + tr '\n' : | + sed 's/:$//' +} + +ipv6_to_bytes() +{ + local IP=$1; shift + + expand_ipv6 "$IP" : +} + +u16_to_bytes() +{ + local u16=$1; shift + + printf "%04x" $u16 | sed 's/^/000/;s/^.*\(..\)\(..\)$/\1:\2/' +} + +# Given a mausezahn-formatted payload (colon-separated bytes given as %02x), +# possibly with a keyword CHECKSUM stashed where a 16-bit checksum should be, +# calculate checksum as per RFC 1071, assuming the CHECKSUM field (if any) +# stands for 00:00. +payload_template_calc_checksum() +{ + local payload=$1; shift + + ( + # Set input radix. + echo "16i" + # Push zero for the initial checksum. + echo 0 + + # Pad the payload with a terminating 00: in case we get an odd + # number of bytes. + echo "${payload%:}:00:" | + sed 's/CHECKSUM/00:00/g' | + tr '[:lower:]' '[:upper:]' | + # Add the word to the checksum. + sed 's/\(..\):\(..\):/\1\2+\n/g' | + # Strip the extra odd byte we pushed if left unconverted. + sed 's/\(..\):$//' + + echo "10000 ~ +" # Calculate and add carry. + echo "FFFF r - p" # Bit-flip and print. + ) | + dc | + tr '[:upper:]' '[:lower:]' +} + +payload_template_expand_checksum() +{ + local payload=$1; shift + local checksum=$1; shift + + local ckbytes=$(u16_to_bytes $checksum) + + echo "$payload" | sed "s/CHECKSUM/$ckbytes/g" +} + +payload_template_nbytes() +{ + local payload=$1; shift + + payload_template_expand_checksum "${payload%:}" 0 | + sed 's/:/\n/g' | wc -l +} + +igmpv3_is_in_get() +{ + local GRP=$1; shift + local sources=("$@") + + local igmpv3 + local nsources=$(u16_to_bytes ${#sources[@]}) + + # IS_IN ( $sources ) + igmpv3=$(: + )"22:"$( : Type - Membership Report + )"00:"$( : Reserved + )"CHECKSUM:"$( : Checksum + )"00:00:"$( : Reserved + )"00:01:"$( : Number of Group Records + )"01:"$( : Record Type - IS_IN + )"00:"$( : Aux Data Len + )"${nsources}:"$( : Number of Sources + )"$(ipv4_to_bytes $GRP):"$( : Multicast Address + )"$(for src in "${sources[@]}"; do + ipv4_to_bytes $src + echo -n : + done)"$( : Source Addresses + ) + local checksum=$(payload_template_calc_checksum "$igmpv3") + + payload_template_expand_checksum "$igmpv3" $checksum +} + +igmpv2_leave_get() +{ + local GRP=$1; shift + + local payload=$(: + )"17:"$( : Type - Leave Group + )"00:"$( : Max Resp Time - not meaningful + )"CHECKSUM:"$( : Checksum + )"$(ipv4_to_bytes $GRP)"$( : Group Address + ) + local checksum=$(payload_template_calc_checksum "$payload") + + payload_template_expand_checksum "$payload" $checksum +} + +mldv2_is_in_get() +{ + local SIP=$1; shift + local GRP=$1; shift + local sources=("$@") + + local hbh + local icmpv6 + local nsources=$(u16_to_bytes ${#sources[@]}) + + hbh=$(: + )"3a:"$( : Next Header - ICMPv6 + )"00:"$( : Hdr Ext Len + )"00:00:00:00:00:00:"$( : Options and Padding + ) + + icmpv6=$(: + )"8f:"$( : Type - MLDv2 Report + )"00:"$( : Code + )"CHECKSUM:"$( : Checksum + )"00:00:"$( : Reserved + )"00:01:"$( : Number of Group Records + )"01:"$( : Record Type - IS_IN + )"00:"$( : Aux Data Len + )"${nsources}:"$( : Number of Sources + )"$(ipv6_to_bytes $GRP):"$( : Multicast address + )"$(for src in "${sources[@]}"; do + ipv6_to_bytes $src + echo -n : + done)"$( : Source Addresses + ) + + local len=$(u16_to_bytes $(payload_template_nbytes $icmpv6)) + local sudohdr=$(: + )"$(ipv6_to_bytes $SIP):"$( : SIP + )"$(ipv6_to_bytes $GRP):"$( : DIP is multicast address + )"${len}:"$( : Upper-layer length + )"00:3a:"$( : Zero and next-header + ) + local checksum=$(payload_template_calc_checksum ${sudohdr}${icmpv6}) + + payload_template_expand_checksum "$hbh$icmpv6" $checksum +} + +mldv1_done_get() +{ + local SIP=$1; shift + local GRP=$1; shift + + local hbh + local icmpv6 + + hbh=$(: + )"3a:"$( : Next Header - ICMPv6 + )"00:"$( : Hdr Ext Len + )"00:00:00:00:00:00:"$( : Options and Padding + ) + + icmpv6=$(: + )"84:"$( : Type - MLDv1 Done + )"00:"$( : Code + )"CHECKSUM:"$( : Checksum + )"00:00:"$( : Max Resp Delay - not meaningful + )"00:00:"$( : Reserved + )"$(ipv6_to_bytes $GRP):"$( : Multicast address + ) + + local len=$(u16_to_bytes $(payload_template_nbytes $icmpv6)) + local sudohdr=$(: + )"$(ipv6_to_bytes $SIP):"$( : SIP + )"$(ipv6_to_bytes $GRP):"$( : DIP is multicast address + )"${len}:"$( : Upper-layer length + )"00:3a:"$( : Zero and next-header + ) + local checksum=$(payload_template_calc_checksum ${sudohdr}${icmpv6}) + + payload_template_expand_checksum "$hbh$icmpv6" $checksum +} diff --git a/tools/testing/selftests/net/forwarding/tc_actions.sh b/tools/testing/selftests/net/forwarding/tc_actions.sh index 1e0a62f638fe..a96cff8e7219 100755 --- a/tools/testing/selftests/net/forwarding/tc_actions.sh +++ b/tools/testing/selftests/net/forwarding/tc_actions.sh @@ -3,7 +3,8 @@ ALL_TESTS="gact_drop_and_ok_test mirred_egress_redirect_test \ mirred_egress_mirror_test matchall_mirred_egress_mirror_test \ - gact_trap_test mirred_egress_to_ingress_test" + gact_trap_test mirred_egress_to_ingress_test \ + mirred_egress_to_ingress_tcp_test" NUM_NETIFS=4 source tc_common.sh source lib.sh @@ -198,6 +199,52 @@ mirred_egress_to_ingress_test() log_test "mirred_egress_to_ingress ($tcflags)" } +mirred_egress_to_ingress_tcp_test() +{ + mirred_e2i_tf1=$(mktemp) mirred_e2i_tf2=$(mktemp) + + RET=0 + dd conv=sparse status=none if=/dev/zero bs=1M count=2 of=$mirred_e2i_tf1 + tc filter add dev $h1 protocol ip pref 100 handle 100 egress flower \ + $tcflags ip_proto tcp src_ip 192.0.2.1 dst_ip 192.0.2.2 \ + action ct commit nat src addr 192.0.2.2 pipe \ + action ct clear pipe \ + action ct commit nat dst addr 192.0.2.1 pipe \ + action ct clear pipe \ + action skbedit ptype host pipe \ + action mirred ingress redirect dev $h1 + tc filter add dev $h1 protocol ip pref 101 handle 101 egress flower \ + $tcflags ip_proto icmp \ + action mirred ingress redirect dev $h1 + tc filter add dev $h1 protocol ip pref 102 handle 102 ingress flower \ + ip_proto icmp \ + action drop + + ip vrf exec v$h1 nc --recv-only -w10 -l -p 12345 -o $mirred_e2i_tf2 & + local rpid=$! + ip vrf exec v$h1 nc -w1 --send-only 192.0.2.2 12345 <$mirred_e2i_tf1 + wait -n $rpid + cmp -s $mirred_e2i_tf1 $mirred_e2i_tf2 + check_err $? "server output check failed" + + $MZ $h1 -c 10 -p 64 -a $h1mac -b $h1mac -A 192.0.2.1 -B 192.0.2.1 \ + -t icmp "ping,id=42,seq=5" -q + tc_check_packets "dev $h1 egress" 101 10 + check_err $? "didn't mirred redirect ICMP" + tc_check_packets "dev $h1 ingress" 102 10 + check_err $? "didn't drop mirred ICMP" + local overlimits=$(tc_rule_stats_get ${h1} 101 egress .overlimits) + test ${overlimits} = 10 + check_err $? "wrong overlimits, expected 10 got ${overlimits}" + + tc filter del dev $h1 egress protocol ip pref 100 handle 100 flower + tc filter del dev $h1 egress protocol ip pref 101 handle 101 flower + tc filter del dev $h1 ingress protocol ip pref 102 handle 102 flower + + rm -f $mirred_e2i_tf1 $mirred_e2i_tf2 + log_test "mirred_egress_to_ingress_tcp ($tcflags)" +} + setup_prepare() { h1=${NETIFS[p1]} @@ -223,6 +270,8 @@ setup_prepare() cleanup() { + local tf + pre_cleanup switch_destroy @@ -233,6 +282,8 @@ cleanup() ip link set $swp2 address $swp2origmac ip link set $swp1 address $swp1origmac + + for tf in $mirred_e2i_tf1 $mirred_e2i_tf2; do rm -f $tf; done } mirred_egress_redirect_test() diff --git a/tools/testing/selftests/net/hsr/Makefile b/tools/testing/selftests/net/hsr/Makefile new file mode 100644 index 000000000000..92c1d9d080cd --- /dev/null +++ b/tools/testing/selftests/net/hsr/Makefile @@ -0,0 +1,7 @@ +# SPDX-License-Identifier: GPL-2.0 + +top_srcdir = ../../../../.. + +TEST_PROGS := hsr_ping.sh + +include ../../lib.mk diff --git a/tools/testing/selftests/net/hsr/config b/tools/testing/selftests/net/hsr/config new file mode 100644 index 000000000000..22061204fb69 --- /dev/null +++ b/tools/testing/selftests/net/hsr/config @@ -0,0 +1,4 @@ +CONFIG_IPV6=y +CONFIG_NET_SCH_NETEM=m +CONFIG_HSR=y +CONFIG_VETH=y diff --git a/tools/testing/selftests/net/hsr/hsr_ping.sh b/tools/testing/selftests/net/hsr/hsr_ping.sh new file mode 100755 index 000000000000..df9143538708 --- /dev/null +++ b/tools/testing/selftests/net/hsr/hsr_ping.sh @@ -0,0 +1,256 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +ret=0 +ksft_skip=4 +ipv6=true + +optstring="h4" +usage() { + echo "Usage: $0 [OPTION]" + echo -e "\t-4: IPv4 only: disable IPv6 tests (default: test both IPv4 and IPv6)" +} + +while getopts "$optstring" option;do + case "$option" in + "h") + usage $0 + exit 0 + ;; + "4") + ipv6=false + ;; + "?") + usage $0 + exit 1 + ;; +esac +done + +sec=$(date +%s) +rndh=$(printf %x $sec)-$(mktemp -u XXXXXX) +ns1="ns1-$rndh" +ns2="ns2-$rndh" +ns3="ns3-$rndh" + +cleanup() +{ + local netns + for netns in "$ns1" "$ns2" "$ns3" ;do + ip netns del $netns + done +} + +ip -Version > /dev/null 2>&1 +if [ $? -ne 0 ];then + echo "SKIP: Could not run test without ip tool" + exit $ksft_skip +fi + +trap cleanup EXIT + +for i in "$ns1" "$ns2" "$ns3" ;do + ip netns add $i || exit $ksft_skip + ip -net $i link set lo up +done + +echo "INFO: preparing interfaces." +# Three HSR nodes. Each node has one link to each of its neighbour, two links in total. +# +# ns1eth1 ----- ns2eth1 +# hsr1 hsr2 +# ns1eth2 ns2eth2 +# | | +# ns3eth1 ns3eth2 +# \ / +# hsr3 +# +# Interfaces +ip link add ns1eth1 netns "$ns1" type veth peer name ns2eth1 netns "$ns2" +ip link add ns1eth2 netns "$ns1" type veth peer name ns3eth1 netns "$ns3" +ip link add ns3eth2 netns "$ns3" type veth peer name ns2eth2 netns "$ns2" + +# HSRv0. +ip -net "$ns1" link add name hsr1 type hsr slave1 ns1eth1 slave2 ns1eth2 supervision 45 version 0 proto 0 +ip -net "$ns2" link add name hsr2 type hsr slave1 ns2eth1 slave2 ns2eth2 supervision 45 version 0 proto 0 +ip -net "$ns3" link add name hsr3 type hsr slave1 ns3eth1 slave2 ns3eth2 supervision 45 version 0 proto 0 + +# IP for HSR +ip -net "$ns1" addr add 100.64.0.1/24 dev hsr1 +ip -net "$ns1" addr add dead:beef:1::1/64 dev hsr1 nodad +ip -net "$ns2" addr add 100.64.0.2/24 dev hsr2 +ip -net "$ns2" addr add dead:beef:1::2/64 dev hsr2 nodad +ip -net "$ns3" addr add 100.64.0.3/24 dev hsr3 +ip -net "$ns3" addr add dead:beef:1::3/64 dev hsr3 nodad + +# All Links up +ip -net "$ns1" link set ns1eth1 up +ip -net "$ns1" link set ns1eth2 up +ip -net "$ns1" link set hsr1 up + +ip -net "$ns2" link set ns2eth1 up +ip -net "$ns2" link set ns2eth2 up +ip -net "$ns2" link set hsr2 up + +ip -net "$ns3" link set ns3eth1 up +ip -net "$ns3" link set ns3eth2 up +ip -net "$ns3" link set hsr3 up + +# $1: IP address +is_v6() +{ + [ -z "${1##*:*}" ] +} + +do_ping() +{ + local netns="$1" + local connect_addr="$2" + local ping_args="-q -c 2" + + if is_v6 "${connect_addr}"; then + $ipv6 || return 0 + ping_args="${ping_args} -6" + fi + + ip netns exec ${netns} ping ${ping_args} $connect_addr >/dev/null + if [ $? -ne 0 ] ; then + echo "$netns -> $connect_addr connectivity [ FAIL ]" 1>&2 + ret=1 + return 1 + fi + + return 0 +} + +do_ping_long() +{ + local netns="$1" + local connect_addr="$2" + local ping_args="-q -c 10" + + if is_v6 "${connect_addr}"; then + $ipv6 || return 0 + ping_args="${ping_args} -6" + fi + + OUT="$(LANG=C ip netns exec ${netns} ping ${ping_args} $connect_addr | grep received)" + if [ $? -ne 0 ] ; then + echo "$netns -> $connect_addr ping [ FAIL ]" 1>&2 + ret=1 + return 1 + fi + + VAL="$(echo $OUT | cut -d' ' -f1-8)" + if [ "$VAL" != "10 packets transmitted, 10 received, 0% packet loss," ] + then + echo "$netns -> $connect_addr ping TEST [ FAIL ]" + echo "Expect to send and receive 10 packets and no duplicates." + echo "Full message: ${OUT}." + ret=1 + return 1 + fi + + return 0 +} + +stop_if_error() +{ + local msg="$1" + + if [ ${ret} -ne 0 ]; then + echo "FAIL: ${msg}" 1>&2 + exit ${ret} + fi +} + + +echo "INFO: Initial validation ping." +# Each node has to be able each one. +do_ping "$ns1" 100.64.0.2 +do_ping "$ns2" 100.64.0.1 +do_ping "$ns3" 100.64.0.1 +stop_if_error "Initial validation failed." + +do_ping "$ns1" 100.64.0.3 +do_ping "$ns2" 100.64.0.3 +do_ping "$ns3" 100.64.0.2 + +do_ping "$ns1" dead:beef:1::2 +do_ping "$ns1" dead:beef:1::3 +do_ping "$ns2" dead:beef:1::1 +do_ping "$ns2" dead:beef:1::2 +do_ping "$ns3" dead:beef:1::1 +do_ping "$ns3" dead:beef:1::2 + +stop_if_error "Initial validation failed." + +# Wait until supervisor all supervision frames have been processed and the node +# entries have been merged. Otherwise duplicate frames will be observed which is +# valid at this stage. +WAIT=5 +while [ ${WAIT} -gt 0 ] +do + grep 00:00:00:00:00:00 /sys/kernel/debug/hsr/hsr*/node_table + if [ $? -ne 0 ] + then + break + fi + sleep 1 + let WAIT = WAIT - 1 +done + +# Just a safety delay in case the above check didn't handle it. +sleep 1 + +echo "INFO: Longer ping test." +do_ping_long "$ns1" 100.64.0.2 +do_ping_long "$ns1" dead:beef:1::2 +do_ping_long "$ns1" 100.64.0.3 +do_ping_long "$ns1" dead:beef:1::3 + +stop_if_error "Longer ping test failed." + +do_ping_long "$ns2" 100.64.0.1 +do_ping_long "$ns2" dead:beef:1::1 +do_ping_long "$ns2" 100.64.0.3 +do_ping_long "$ns2" dead:beef:1::2 +stop_if_error "Longer ping test failed." + +do_ping_long "$ns3" 100.64.0.1 +do_ping_long "$ns3" dead:beef:1::1 +do_ping_long "$ns3" 100.64.0.2 +do_ping_long "$ns3" dead:beef:1::2 +stop_if_error "Longer ping test failed." + +echo "INFO: Cutting one link." +do_ping_long "$ns1" 100.64.0.3 & + +sleep 3 +ip -net "$ns3" link set ns3eth1 down +wait + +ip -net "$ns3" link set ns3eth1 up + +stop_if_error "Failed with one link down." + +echo "INFO: Delay the link and drop a few packages." +tc -net "$ns3" qdisc add dev ns3eth1 root netem delay 50ms +tc -net "$ns2" qdisc add dev ns2eth1 root netem delay 5ms loss 25% + +do_ping_long "$ns1" 100.64.0.2 +do_ping_long "$ns1" 100.64.0.3 + +stop_if_error "Failed with delay and packetloss." + +do_ping_long "$ns2" 100.64.0.1 +do_ping_long "$ns2" 100.64.0.3 + +stop_if_error "Failed with delay and packetloss." + +do_ping_long "$ns3" 100.64.0.1 +do_ping_long "$ns3" 100.64.0.2 +stop_if_error "Failed with delay and packetloss." + +echo "INFO: All good." +exit $ret diff --git a/tools/testing/selftests/net/ip_local_port_range.c b/tools/testing/selftests/net/ip_local_port_range.c new file mode 100644 index 000000000000..75e3fdacdf73 --- /dev/null +++ b/tools/testing/selftests/net/ip_local_port_range.c @@ -0,0 +1,447 @@ +// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause +// Copyright (c) 2023 Cloudflare + +/* Test IP_LOCAL_PORT_RANGE socket option: IPv4 + IPv6, TCP + UDP. + * + * Tests assume that net.ipv4.ip_local_port_range is [40000, 49999]. + * Don't run these directly but with ip_local_port_range.sh script. + */ + +#include <fcntl.h> +#include <netinet/ip.h> + +#include "../kselftest_harness.h" + +#ifndef IP_LOCAL_PORT_RANGE +#define IP_LOCAL_PORT_RANGE 51 +#endif + +static __u32 pack_port_range(__u16 lo, __u16 hi) +{ + return (hi << 16) | (lo << 0); +} + +static void unpack_port_range(__u32 range, __u16 *lo, __u16 *hi) +{ + *lo = range & 0xffff; + *hi = range >> 16; +} + +static int get_so_domain(int fd) +{ + int domain, err; + socklen_t len; + + len = sizeof(domain); + err = getsockopt(fd, SOL_SOCKET, SO_DOMAIN, &domain, &len); + if (err) + return -1; + + return domain; +} + +static int bind_to_loopback_any_port(int fd) +{ + union { + struct sockaddr sa; + struct sockaddr_in v4; + struct sockaddr_in6 v6; + } addr; + socklen_t addr_len; + + memset(&addr, 0, sizeof(addr)); + switch (get_so_domain(fd)) { + case AF_INET: + addr.v4.sin_family = AF_INET; + addr.v4.sin_port = htons(0); + addr.v4.sin_addr.s_addr = htonl(INADDR_LOOPBACK); + addr_len = sizeof(addr.v4); + break; + case AF_INET6: + addr.v6.sin6_family = AF_INET6; + addr.v6.sin6_port = htons(0); + addr.v6.sin6_addr = in6addr_loopback; + addr_len = sizeof(addr.v6); + break; + default: + return -1; + } + + return bind(fd, &addr.sa, addr_len); +} + +static int get_sock_port(int fd) +{ + union { + struct sockaddr sa; + struct sockaddr_in v4; + struct sockaddr_in6 v6; + } addr; + socklen_t addr_len; + int err; + + addr_len = sizeof(addr); + memset(&addr, 0, sizeof(addr)); + err = getsockname(fd, &addr.sa, &addr_len); + if (err) + return -1; + + switch (addr.sa.sa_family) { + case AF_INET: + return ntohs(addr.v4.sin_port); + case AF_INET6: + return ntohs(addr.v6.sin6_port); + default: + errno = EAFNOSUPPORT; + return -1; + } +} + +static int get_ip_local_port_range(int fd, __u32 *range) +{ + socklen_t len; + __u32 val; + int err; + + len = sizeof(val); + err = getsockopt(fd, SOL_IP, IP_LOCAL_PORT_RANGE, &val, &len); + if (err) + return -1; + + *range = val; + return 0; +} + +FIXTURE(ip_local_port_range) {}; + +FIXTURE_SETUP(ip_local_port_range) +{ +} + +FIXTURE_TEARDOWN(ip_local_port_range) +{ +} + +FIXTURE_VARIANT(ip_local_port_range) { + int so_domain; + int so_type; + int so_protocol; +}; + +FIXTURE_VARIANT_ADD(ip_local_port_range, ip4_tcp) { + .so_domain = AF_INET, + .so_type = SOCK_STREAM, + .so_protocol = 0, +}; + +FIXTURE_VARIANT_ADD(ip_local_port_range, ip4_udp) { + .so_domain = AF_INET, + .so_type = SOCK_DGRAM, + .so_protocol = 0, +}; + +FIXTURE_VARIANT_ADD(ip_local_port_range, ip4_stcp) { + .so_domain = AF_INET, + .so_type = SOCK_STREAM, + .so_protocol = IPPROTO_SCTP, +}; + +FIXTURE_VARIANT_ADD(ip_local_port_range, ip6_tcp) { + .so_domain = AF_INET6, + .so_type = SOCK_STREAM, + .so_protocol = 0, +}; + +FIXTURE_VARIANT_ADD(ip_local_port_range, ip6_udp) { + .so_domain = AF_INET6, + .so_type = SOCK_DGRAM, + .so_protocol = 0, +}; + +FIXTURE_VARIANT_ADD(ip_local_port_range, ip6_stcp) { + .so_domain = AF_INET6, + .so_type = SOCK_STREAM, + .so_protocol = IPPROTO_SCTP, +}; + +TEST_F(ip_local_port_range, invalid_option_value) +{ + __u16 val16; + __u32 val32; + __u64 val64; + int fd, err; + + fd = socket(variant->so_domain, variant->so_type, variant->so_protocol); + ASSERT_GE(fd, 0) TH_LOG("socket failed"); + + /* Too few bytes */ + val16 = 40000; + err = setsockopt(fd, SOL_IP, IP_LOCAL_PORT_RANGE, &val16, sizeof(val16)); + EXPECT_TRUE(err) TH_LOG("expected setsockopt(IP_LOCAL_PORT_RANGE) to fail"); + EXPECT_EQ(errno, EINVAL); + + /* Empty range: low port > high port */ + val32 = pack_port_range(40222, 40111); + err = setsockopt(fd, SOL_IP, IP_LOCAL_PORT_RANGE, &val32, sizeof(val32)); + EXPECT_TRUE(err) TH_LOG("expected setsockopt(IP_LOCAL_PORT_RANGE) to fail"); + EXPECT_EQ(errno, EINVAL); + + /* Too many bytes */ + val64 = pack_port_range(40333, 40444); + err = setsockopt(fd, SOL_IP, IP_LOCAL_PORT_RANGE, &val64, sizeof(val64)); + EXPECT_TRUE(err) TH_LOG("expected setsockopt(IP_LOCAL_PORT_RANGE) to fail"); + EXPECT_EQ(errno, EINVAL); + + err = close(fd); + ASSERT_TRUE(!err) TH_LOG("close failed"); +} + +TEST_F(ip_local_port_range, port_range_out_of_netns_range) +{ + const struct test { + __u16 range_lo; + __u16 range_hi; + } tests[] = { + { 30000, 39999 }, /* socket range below netns range */ + { 50000, 59999 }, /* socket range above netns range */ + }; + const struct test *t; + + for (t = tests; t < tests + ARRAY_SIZE(tests); t++) { + /* Bind a couple of sockets, not just one, to check + * that the range wasn't clamped to a single port from + * the netns range. That is [40000, 40000] or [49999, + * 49999], respectively for each test case. + */ + int fds[2], i; + + TH_LOG("lo %5hu, hi %5hu", t->range_lo, t->range_hi); + + for (i = 0; i < ARRAY_SIZE(fds); i++) { + int fd, err, port; + __u32 range; + + fd = socket(variant->so_domain, variant->so_type, variant->so_protocol); + ASSERT_GE(fd, 0) TH_LOG("#%d: socket failed", i); + + range = pack_port_range(t->range_lo, t->range_hi); + err = setsockopt(fd, SOL_IP, IP_LOCAL_PORT_RANGE, &range, sizeof(range)); + ASSERT_TRUE(!err) TH_LOG("#%d: setsockopt(IP_LOCAL_PORT_RANGE) failed", i); + + err = bind_to_loopback_any_port(fd); + ASSERT_TRUE(!err) TH_LOG("#%d: bind failed", i); + + /* Check that socket port range outside of ephemeral range is ignored */ + port = get_sock_port(fd); + ASSERT_GE(port, 40000) TH_LOG("#%d: expected port within netns range", i); + ASSERT_LE(port, 49999) TH_LOG("#%d: expected port within netns range", i); + + fds[i] = fd; + } + + for (i = 0; i < ARRAY_SIZE(fds); i++) + ASSERT_TRUE(close(fds[i]) == 0) TH_LOG("#%d: close failed", i); + } +} + +TEST_F(ip_local_port_range, single_port_range) +{ + const struct test { + __u16 range_lo; + __u16 range_hi; + __u16 expected; + } tests[] = { + /* single port range within ephemeral range */ + { 45000, 45000, 45000 }, + /* first port in the ephemeral range (clamp from above) */ + { 0, 40000, 40000 }, + /* last port in the ephemeral range (clamp from below) */ + { 49999, 0, 49999 }, + }; + const struct test *t; + + for (t = tests; t < tests + ARRAY_SIZE(tests); t++) { + int fd, err, port; + __u32 range; + + TH_LOG("lo %5hu, hi %5hu, expected %5hu", + t->range_lo, t->range_hi, t->expected); + + fd = socket(variant->so_domain, variant->so_type, variant->so_protocol); + ASSERT_GE(fd, 0) TH_LOG("socket failed"); + + range = pack_port_range(t->range_lo, t->range_hi); + err = setsockopt(fd, SOL_IP, IP_LOCAL_PORT_RANGE, &range, sizeof(range)); + ASSERT_TRUE(!err) TH_LOG("setsockopt(IP_LOCAL_PORT_RANGE) failed"); + + err = bind_to_loopback_any_port(fd); + ASSERT_TRUE(!err) TH_LOG("bind failed"); + + port = get_sock_port(fd); + ASSERT_EQ(port, t->expected) TH_LOG("unexpected local port"); + + err = close(fd); + ASSERT_TRUE(!err) TH_LOG("close failed"); + } +} + +TEST_F(ip_local_port_range, exhaust_8_port_range) +{ + __u8 port_set = 0; + int i, fd, err; + __u32 range; + __u16 port; + int fds[8]; + + for (i = 0; i < ARRAY_SIZE(fds); i++) { + fd = socket(variant->so_domain, variant->so_type, variant->so_protocol); + ASSERT_GE(fd, 0) TH_LOG("socket failed"); + + range = pack_port_range(40000, 40007); + err = setsockopt(fd, SOL_IP, IP_LOCAL_PORT_RANGE, &range, sizeof(range)); + ASSERT_TRUE(!err) TH_LOG("setsockopt(IP_LOCAL_PORT_RANGE) failed"); + + err = bind_to_loopback_any_port(fd); + ASSERT_TRUE(!err) TH_LOG("bind failed"); + + port = get_sock_port(fd); + ASSERT_GE(port, 40000) TH_LOG("expected port within sockopt range"); + ASSERT_LE(port, 40007) TH_LOG("expected port within sockopt range"); + + port_set |= 1 << (port - 40000); + fds[i] = fd; + } + + /* Check that all every port from the test range is in use */ + ASSERT_EQ(port_set, 0xff) TH_LOG("expected all ports to be busy"); + + /* Check that bind() fails because the whole range is busy */ + fd = socket(variant->so_domain, variant->so_type, variant->so_protocol); + ASSERT_GE(fd, 0) TH_LOG("socket failed"); + + range = pack_port_range(40000, 40007); + err = setsockopt(fd, SOL_IP, IP_LOCAL_PORT_RANGE, &range, sizeof(range)); + ASSERT_TRUE(!err) TH_LOG("setsockopt(IP_LOCAL_PORT_RANGE) failed"); + + err = bind_to_loopback_any_port(fd); + ASSERT_TRUE(err) TH_LOG("expected bind to fail"); + ASSERT_EQ(errno, EADDRINUSE); + + err = close(fd); + ASSERT_TRUE(!err) TH_LOG("close failed"); + + for (i = 0; i < ARRAY_SIZE(fds); i++) { + err = close(fds[i]); + ASSERT_TRUE(!err) TH_LOG("close failed"); + } +} + +TEST_F(ip_local_port_range, late_bind) +{ + union { + struct sockaddr sa; + struct sockaddr_in v4; + struct sockaddr_in6 v6; + } addr; + socklen_t addr_len; + const int one = 1; + int fd, err; + __u32 range; + __u16 port; + + if (variant->so_protocol == IPPROTO_SCTP) + SKIP(return, "SCTP doesn't support IP_BIND_ADDRESS_NO_PORT"); + + fd = socket(variant->so_domain, variant->so_type, 0); + ASSERT_GE(fd, 0) TH_LOG("socket failed"); + + range = pack_port_range(40100, 40199); + err = setsockopt(fd, SOL_IP, IP_LOCAL_PORT_RANGE, &range, sizeof(range)); + ASSERT_TRUE(!err) TH_LOG("setsockopt(IP_LOCAL_PORT_RANGE) failed"); + + err = setsockopt(fd, SOL_IP, IP_BIND_ADDRESS_NO_PORT, &one, sizeof(one)); + ASSERT_TRUE(!err) TH_LOG("setsockopt(IP_BIND_ADDRESS_NO_PORT) failed"); + + err = bind_to_loopback_any_port(fd); + ASSERT_TRUE(!err) TH_LOG("bind failed"); + + port = get_sock_port(fd); + ASSERT_EQ(port, 0) TH_LOG("getsockname failed"); + + /* Invalid destination */ + memset(&addr, 0, sizeof(addr)); + switch (variant->so_domain) { + case AF_INET: + addr.v4.sin_family = AF_INET; + addr.v4.sin_port = htons(0); + addr.v4.sin_addr.s_addr = htonl(INADDR_ANY); + addr_len = sizeof(addr.v4); + break; + case AF_INET6: + addr.v6.sin6_family = AF_INET6; + addr.v6.sin6_port = htons(0); + addr.v6.sin6_addr = in6addr_any; + addr_len = sizeof(addr.v6); + break; + default: + ASSERT_TRUE(false) TH_LOG("unsupported socket domain"); + } + + /* connect() doesn't need to succeed for late bind to happen */ + connect(fd, &addr.sa, addr_len); + + port = get_sock_port(fd); + ASSERT_GE(port, 40100); + ASSERT_LE(port, 40199); + + err = close(fd); + ASSERT_TRUE(!err) TH_LOG("close failed"); +} + +TEST_F(ip_local_port_range, get_port_range) +{ + __u16 lo, hi; + __u32 range; + int fd, err; + + fd = socket(variant->so_domain, variant->so_type, variant->so_protocol); + ASSERT_GE(fd, 0) TH_LOG("socket failed"); + + /* Get range before it will be set */ + err = get_ip_local_port_range(fd, &range); + ASSERT_TRUE(!err) TH_LOG("getsockopt(IP_LOCAL_PORT_RANGE) failed"); + + unpack_port_range(range, &lo, &hi); + ASSERT_EQ(lo, 0) TH_LOG("unexpected low port"); + ASSERT_EQ(hi, 0) TH_LOG("unexpected high port"); + + range = pack_port_range(12345, 54321); + err = setsockopt(fd, SOL_IP, IP_LOCAL_PORT_RANGE, &range, sizeof(range)); + ASSERT_TRUE(!err) TH_LOG("setsockopt(IP_LOCAL_PORT_RANGE) failed"); + + /* Get range after it has been set */ + err = get_ip_local_port_range(fd, &range); + ASSERT_TRUE(!err) TH_LOG("getsockopt(IP_LOCAL_PORT_RANGE) failed"); + + unpack_port_range(range, &lo, &hi); + ASSERT_EQ(lo, 12345) TH_LOG("unexpected low port"); + ASSERT_EQ(hi, 54321) TH_LOG("unexpected high port"); + + /* Unset the port range */ + range = pack_port_range(0, 0); + err = setsockopt(fd, SOL_IP, IP_LOCAL_PORT_RANGE, &range, sizeof(range)); + ASSERT_TRUE(!err) TH_LOG("setsockopt(IP_LOCAL_PORT_RANGE) failed"); + + /* Get range after it has been unset */ + err = get_ip_local_port_range(fd, &range); + ASSERT_TRUE(!err) TH_LOG("getsockopt(IP_LOCAL_PORT_RANGE) failed"); + + unpack_port_range(range, &lo, &hi); + ASSERT_EQ(lo, 0) TH_LOG("unexpected low port"); + ASSERT_EQ(hi, 0) TH_LOG("unexpected high port"); + + err = close(fd); + ASSERT_TRUE(!err) TH_LOG("close failed"); +} + +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/net/ip_local_port_range.sh b/tools/testing/selftests/net/ip_local_port_range.sh new file mode 100755 index 000000000000..6c6ad346eaa0 --- /dev/null +++ b/tools/testing/selftests/net/ip_local_port_range.sh @@ -0,0 +1,5 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 + +./in_netns.sh \ + sh -c 'sysctl -q -w net.ipv4.ip_local_port_range="40000 49999" && ./ip_local_port_range' diff --git a/tools/testing/selftests/net/l2_tos_ttl_inherit.sh b/tools/testing/selftests/net/l2_tos_ttl_inherit.sh index dca1e6f777a8..f11756e7df2f 100755 --- a/tools/testing/selftests/net/l2_tos_ttl_inherit.sh +++ b/tools/testing/selftests/net/l2_tos_ttl_inherit.sh @@ -12,19 +12,27 @@ # In addition this script also checks if forcing a specific field in the # outer header is working. +# Return 4 by default (Kselftest SKIP code) +ERR=4 + if [ "$(id -u)" != "0" ]; then echo "Please run as root." - exit 0 + exit $ERR fi if ! which tcpdump > /dev/null 2>&1; then echo "No tcpdump found. Required for this test." - exit 0 + exit $ERR fi expected_tos="0x00" expected_ttl="0" failed=false +readonly NS0=$(mktemp -u ns0-XXXXXXXX) +readonly NS1=$(mktemp -u ns1-XXXXXXXX) + +RUN_NS0="ip netns exec ${NS0}" + get_random_tos() { # Get a random hex tos value between 0x00 and 0xfc, a multiple of 4 echo "0x$(tr -dc '0-9a-f' < /dev/urandom | head -c 1)\ @@ -61,7 +69,6 @@ setup() { local vlan="$5" local test_tos="0x00" local test_ttl="0" - local ns="ip netns exec testing" # We don't want a test-tos of 0x00, # because this is the value that we get when no tos is set. @@ -94,14 +101,15 @@ setup() { printf "│%7s │%6s │%6s │%13s │%13s │%6s │" \ "$type" "$outer" "$inner" "$tos" "$ttl" "$vlan" - # Create 'testing' netns, veth pair and connect main ns with testing ns - ip netns add testing - ip link add type veth - ip link set veth1 netns testing - ip link set veth0 up - $ns ip link set veth1 up - ip addr flush dev veth0 - $ns ip addr flush dev veth1 + # Create netns NS0 and NS1 and connect them with a veth pair + ip netns add "${NS0}" + ip netns add "${NS1}" + ip link add name veth0 netns "${NS0}" type veth \ + peer name veth1 netns "${NS1}" + ip -netns "${NS0}" link set dev veth0 up + ip -netns "${NS1}" link set dev veth1 up + ip -netns "${NS0}" address flush dev veth0 + ip -netns "${NS1}" address flush dev veth1 local local_addr1="" local local_addr2="" @@ -127,51 +135,59 @@ setup() { if [ "$type" = "gre" ]; then type="gretap" fi - ip addr add 198.18.0.1/24 dev veth0 - $ns ip addr add 198.18.0.2/24 dev veth1 - ip link add name tep0 type $type $local_addr1 remote \ - 198.18.0.2 tos $test_tos ttl $test_ttl $vxlan $geneve - $ns ip link add name tep1 type $type $local_addr2 remote \ - 198.18.0.1 tos $test_tos ttl $test_ttl $vxlan $geneve + ip -netns "${NS0}" address add 198.18.0.1/24 dev veth0 + ip -netns "${NS1}" address add 198.18.0.2/24 dev veth1 + ip -netns "${NS0}" link add name tep0 type $type $local_addr1 \ + remote 198.18.0.2 tos $test_tos ttl $test_ttl \ + $vxlan $geneve + ip -netns "${NS1}" link add name tep1 type $type $local_addr2 \ + remote 198.18.0.1 tos $test_tos ttl $test_ttl \ + $vxlan $geneve elif [ "$outer" = "6" ]; then if [ "$type" = "gre" ]; then type="ip6gretap" fi - ip addr add fdd1:ced0:5d88:3fce::1/64 dev veth0 - $ns ip addr add fdd1:ced0:5d88:3fce::2/64 dev veth1 - ip link add name tep0 type $type $local_addr1 \ - remote fdd1:ced0:5d88:3fce::2 tos $test_tos ttl $test_ttl \ - $vxlan $geneve - $ns ip link add name tep1 type $type $local_addr2 \ - remote fdd1:ced0:5d88:3fce::1 tos $test_tos ttl $test_ttl \ - $vxlan $geneve + ip -netns "${NS0}" address add fdd1:ced0:5d88:3fce::1/64 \ + dev veth0 nodad + ip -netns "${NS1}" address add fdd1:ced0:5d88:3fce::2/64 \ + dev veth1 nodad + ip -netns "${NS0}" link add name tep0 type $type $local_addr1 \ + remote fdd1:ced0:5d88:3fce::2 tos $test_tos \ + ttl $test_ttl $vxlan $geneve + ip -netns "${NS1}" link add name tep1 type $type $local_addr2 \ + remote fdd1:ced0:5d88:3fce::1 tos $test_tos \ + ttl $test_ttl $vxlan $geneve fi # Bring L2-tunnel link up and create VLAN on top - ip link set tep0 up - $ns ip link set tep1 up - ip addr flush dev tep0 - $ns ip addr flush dev tep1 + ip -netns "${NS0}" link set tep0 up + ip -netns "${NS1}" link set tep1 up + ip -netns "${NS0}" address flush dev tep0 + ip -netns "${NS1}" address flush dev tep1 local parent if $vlan; then parent="vlan99-" - ip link add link tep0 name ${parent}0 type vlan id 99 - $ns ip link add link tep1 name ${parent}1 type vlan id 99 - ip link set ${parent}0 up - $ns ip link set ${parent}1 up - ip addr flush dev ${parent}0 - $ns ip addr flush dev ${parent}1 + ip -netns "${NS0}" link add link tep0 name ${parent}0 \ + type vlan id 99 + ip -netns "${NS1}" link add link tep1 name ${parent}1 \ + type vlan id 99 + ip -netns "${NS0}" link set dev ${parent}0 up + ip -netns "${NS1}" link set dev ${parent}1 up + ip -netns "${NS0}" address flush dev ${parent}0 + ip -netns "${NS1}" address flush dev ${parent}1 else parent="tep" fi # Assign inner IPv4/IPv6 addresses if [ "$inner" = "4" ] || [ "$inner" = "other" ]; then - ip addr add 198.19.0.1/24 brd + dev ${parent}0 - $ns ip addr add 198.19.0.2/24 brd + dev ${parent}1 + ip -netns "${NS0}" address add 198.19.0.1/24 brd + dev ${parent}0 + ip -netns "${NS1}" address add 198.19.0.2/24 brd + dev ${parent}1 elif [ "$inner" = "6" ]; then - ip addr add fdd4:96cf:4eae:443b::1/64 dev ${parent}0 - $ns ip addr add fdd4:96cf:4eae:443b::2/64 dev ${parent}1 + ip -netns "${NS0}" address add fdd4:96cf:4eae:443b::1/64 \ + dev ${parent}0 nodad + ip -netns "${NS1}" address add fdd4:96cf:4eae:443b::2/64 \ + dev ${parent}1 nodad fi } @@ -192,10 +208,10 @@ verify() { ping_dst="198.19.0.3" # Generates ARPs which are not IPv4/IPv6 fi if [ "$tos_ttl" = "inherit" ]; then - ping -i 0.1 $ping_dst -Q "$expected_tos" -t "$expected_ttl" \ - 2>/dev/null 1>&2 & ping_pid="$!" + ${RUN_NS0} ping -i 0.1 $ping_dst -Q "$expected_tos" \ + -t "$expected_ttl" 2>/dev/null 1>&2 & ping_pid="$!" else - ping -i 0.1 $ping_dst 2>/dev/null 1>&2 & ping_pid="$!" + ${RUN_NS0} ping -i 0.1 $ping_dst 2>/dev/null 1>&2 & ping_pid="$!" fi local tunnel_type_offset tunnel_type_proto req_proto_offset req_offset if [ "$type" = "gre" ]; then @@ -216,10 +232,12 @@ verify() { req_proto_offset="$((req_proto_offset + 4))" req_offset="$((req_offset + 4))" fi - out="$(tcpdump --immediate-mode -p -c 1 -v -i veth0 -n \ - ip[$tunnel_type_offset] = $tunnel_type_proto and \ - ip[$req_proto_offset] = 0x01 and \ - ip[$req_offset] = 0x08 2>/dev/null | head -n 1)" + out="$(${RUN_NS0} tcpdump --immediate-mode -p -c 1 -v \ + -i veth0 -n \ + ip[$tunnel_type_offset] = $tunnel_type_proto and \ + ip[$req_proto_offset] = 0x01 and \ + ip[$req_offset] = 0x08 2>/dev/null \ + | head -n 1)" elif [ "$inner" = "6" ]; then req_proto_offset="44" req_offset="78" @@ -231,10 +249,12 @@ verify() { req_proto_offset="$((req_proto_offset + 4))" req_offset="$((req_offset + 4))" fi - out="$(tcpdump --immediate-mode -p -c 1 -v -i veth0 -n \ - ip[$tunnel_type_offset] = $tunnel_type_proto and \ - ip[$req_proto_offset] = 0x3a and \ - ip[$req_offset] = 0x80 2>/dev/null | head -n 1)" + out="$(${RUN_NS0} tcpdump --immediate-mode -p -c 1 -v \ + -i veth0 -n \ + ip[$tunnel_type_offset] = $tunnel_type_proto and \ + ip[$req_proto_offset] = 0x3a and \ + ip[$req_offset] = 0x80 2>/dev/null \ + | head -n 1)" elif [ "$inner" = "other" ]; then req_proto_offset="36" req_offset="45" @@ -250,11 +270,13 @@ verify() { expected_tos="0x00" expected_ttl="64" fi - out="$(tcpdump --immediate-mode -p -c 1 -v -i veth0 -n \ - ip[$tunnel_type_offset] = $tunnel_type_proto and \ - ip[$req_proto_offset] = 0x08 and \ - ip[$((req_proto_offset + 1))] = 0x06 and \ - ip[$req_offset] = 0x01 2>/dev/null | head -n 1)" + out="$(${RUN_NS0} tcpdump --immediate-mode -p -c 1 -v \ + -i veth0 -n \ + ip[$tunnel_type_offset] = $tunnel_type_proto and \ + ip[$req_proto_offset] = 0x08 and \ + ip[$((req_proto_offset + 1))] = 0x06 and \ + ip[$req_offset] = 0x01 2>/dev/null \ + | head -n 1)" fi elif [ "$outer" = "6" ]; then if [ "$type" = "gre" ]; then @@ -273,10 +295,12 @@ verify() { req_proto_offset="$((req_proto_offset + 4))" req_offset="$((req_offset + 4))" fi - out="$(tcpdump --immediate-mode -p -c 1 -v -i veth0 -n \ - ip6[$tunnel_type_offset] = $tunnel_type_proto and \ - ip6[$req_proto_offset] = 0x01 and \ - ip6[$req_offset] = 0x08 2>/dev/null | head -n 1)" + out="$(${RUN_NS0} tcpdump --immediate-mode -p -c 1 -v \ + -i veth0 -n \ + ip6[$tunnel_type_offset] = $tunnel_type_proto and \ + ip6[$req_proto_offset] = 0x01 and \ + ip6[$req_offset] = 0x08 2>/dev/null \ + | head -n 1)" elif [ "$inner" = "6" ]; then local req_proto_offset="72" local req_offset="106" @@ -288,10 +312,12 @@ verify() { req_proto_offset="$((req_proto_offset + 4))" req_offset="$((req_offset + 4))" fi - out="$(tcpdump --immediate-mode -p -c 1 -v -i veth0 -n \ - ip6[$tunnel_type_offset] = $tunnel_type_proto and \ - ip6[$req_proto_offset] = 0x3a and \ - ip6[$req_offset] = 0x80 2>/dev/null | head -n 1)" + out="$(${RUN_NS0} tcpdump --immediate-mode -p -c 1 -v \ + -i veth0 -n \ + ip6[$tunnel_type_offset] = $tunnel_type_proto and \ + ip6[$req_proto_offset] = 0x3a and \ + ip6[$req_offset] = 0x80 2>/dev/null \ + | head -n 1)" elif [ "$inner" = "other" ]; then local req_proto_offset="64" local req_offset="73" @@ -307,15 +333,17 @@ verify() { expected_tos="0x00" expected_ttl="64" fi - out="$(tcpdump --immediate-mode -p -c 1 -v -i veth0 -n \ - ip6[$tunnel_type_offset] = $tunnel_type_proto and \ - ip6[$req_proto_offset] = 0x08 and \ - ip6[$((req_proto_offset + 1))] = 0x06 and \ - ip6[$req_offset] = 0x01 2>/dev/null | head -n 1)" + out="$(${RUN_NS0} tcpdump --immediate-mode -p -c 1 -v \ + -i veth0 -n \ + ip6[$tunnel_type_offset] = $tunnel_type_proto and \ + ip6[$req_proto_offset] = 0x08 and \ + ip6[$((req_proto_offset + 1))] = 0x06 and \ + ip6[$req_offset] = 0x01 2>/dev/null \ + | head -n 1)" fi fi kill -9 $ping_pid - wait $ping_pid 2>/dev/null + wait $ping_pid 2>/dev/null || true result="FAIL" if [ "$outer" = "4" ]; then captured_ttl="$(get_field "ttl" "$out")" @@ -351,11 +379,35 @@ verify() { } cleanup() { - ip link del veth0 2>/dev/null - ip netns del testing 2>/dev/null - ip link del tep0 2>/dev/null + ip netns del "${NS0}" 2>/dev/null + ip netns del "${NS1}" 2>/dev/null } +exit_handler() { + # Don't exit immediately if one of the intermediate commands fails. + # We might be called at the end of the script, when the network + # namespaces have already been deleted. So cleanup() may fail, but we + # still need to run until 'exit $ERR' or the script won't return the + # correct error code. + set +e + + cleanup + + exit $ERR +} + +# Restore the default SIGINT handler (just in case) and exit. +# The exit handler will take care of cleaning everything up. +interrupted() { + trap - INT + + exit $ERR +} + +set -e +trap exit_handler EXIT +trap interrupted INT + printf "┌────────┬───────┬───────┬──────────────┬" printf "──────────────┬───────┬────────┐\n" for type in gre vxlan geneve; do @@ -385,6 +437,10 @@ done printf "└────────┴───────┴───────┴──────────────┴" printf "──────────────┴───────┴────────┘\n" +# All tests done. +# Set ERR appropriately: it will be returned by the exit handler. if $failed; then - exit 1 + ERR=1 +else + ERR=0 fi diff --git a/tools/testing/selftests/net/mptcp/diag.sh b/tools/testing/selftests/net/mptcp/diag.sh index 515859a5168b..ef628b16fe9b 100755 --- a/tools/testing/selftests/net/mptcp/diag.sh +++ b/tools/testing/selftests/net/mptcp/diag.sh @@ -1,6 +1,7 @@ #!/bin/bash # SPDX-License-Identifier: GPL-2.0 +sec=$(date +%s) rndh=$(printf %x $sec)-$(mktemp -u XXXXXX) ns="ns1-$rndh" ksft_skip=4 @@ -16,6 +17,11 @@ flush_pids() sleep 1.1 ip netns pids "${ns}" | xargs --no-run-if-empty kill -SIGUSR1 &>/dev/null + + for _ in $(seq 10); do + [ -z "$(ip netns pids "${ns}")" ] && break + sleep 0.1 + done } cleanup() @@ -36,15 +42,20 @@ if [ $? -ne 0 ];then exit $ksft_skip fi +get_msk_inuse() +{ + ip netns exec $ns cat /proc/net/protocols | awk '$1~/^MPTCP$/{print $3}' +} + __chk_nr() { - local condition="$1" + local command="$1" local expected=$2 local msg nr shift 2 msg=$* - nr=$(ss -inmHMN $ns | $condition) + nr=$(eval $command) printf "%-50s" "$msg" if [ $nr != $expected ]; then @@ -56,9 +67,17 @@ __chk_nr() test_cnt=$((test_cnt+1)) } +__chk_msk_nr() +{ + local condition=$1 + shift 1 + + __chk_nr "ss -inmHMN $ns | $condition" $* +} + chk_msk_nr() { - __chk_nr "grep -c token:" $* + __chk_msk_nr "grep -c token:" $* } wait_msk_nr() @@ -96,12 +115,12 @@ wait_msk_nr() chk_msk_fallback_nr() { - __chk_nr "grep -c fallback" $* + __chk_msk_nr "grep -c fallback" $* } chk_msk_remote_key_nr() { - __chk_nr "grep -c remote_key" $* + __chk_msk_nr "grep -c remote_key" $* } __chk_listen() @@ -141,6 +160,26 @@ chk_msk_listen() nr=$(ss -Ml $filter | wc -l) } +chk_msk_inuse() +{ + local expected=$1 + local listen_nr + + shift 1 + + listen_nr=$(ss -N "${ns}" -Ml | grep -c LISTEN) + expected=$((expected + listen_nr)) + + for _ in $(seq 10); do + if [ $(get_msk_inuse) -eq $expected ];then + break + fi + sleep 0.1 + done + + __chk_nr get_msk_inuse $expected $* +} + # $1: ns, $2: port wait_local_port_listen() { @@ -194,8 +233,10 @@ wait_connected $ns 10000 chk_msk_nr 2 "after MPC handshake " chk_msk_remote_key_nr 2 "....chk remote_key" chk_msk_fallback_nr 0 "....chk no fallback" +chk_msk_inuse 2 "....chk 2 msk in use" flush_pids +chk_msk_inuse 0 "....chk 0 msk in use after flush" echo "a" | \ timeout ${timeout_test} \ @@ -210,8 +251,11 @@ echo "b" | \ 127.0.0.1 >/dev/null & wait_connected $ns 10001 chk_msk_fallback_nr 1 "check fallback" +chk_msk_inuse 1 "....chk 1 msk in use" flush_pids +chk_msk_inuse 0 "....chk 0 msk in use after flush" + NR_CLIENTS=100 for I in `seq 1 $NR_CLIENTS`; do echo "a" | \ @@ -231,6 +275,9 @@ for I in `seq 1 $NR_CLIENTS`; do done wait_msk_nr $((NR_CLIENTS*2)) "many msk socket present" +chk_msk_inuse $((NR_CLIENTS*2)) "....chk many msk in use" flush_pids +chk_msk_inuse 0 "....chk 0 msk in use after flush" + exit $ret diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.c b/tools/testing/selftests/net/mptcp/mptcp_connect.c index e54653ea2ed4..b25a31445ded 100644 --- a/tools/testing/selftests/net/mptcp/mptcp_connect.c +++ b/tools/testing/selftests/net/mptcp/mptcp_connect.c @@ -83,6 +83,7 @@ struct cfg_cmsg_types { struct cfg_sockopt_types { unsigned int transparent:1; + unsigned int mptfo:1; }; struct tcp_inq_state { @@ -90,6 +91,13 @@ struct tcp_inq_state { bool expect_eof; }; +struct wstate { + char buf[8192]; + unsigned int len; + unsigned int off; + unsigned int total_len; +}; + static struct tcp_inq_state tcp_inq; static struct cfg_cmsg_types cfg_cmsg_types; @@ -232,6 +240,14 @@ static void set_transparent(int fd, int pf) } } +static void set_mptfo(int fd, int pf) +{ + int qlen = 25; + + if (setsockopt(fd, IPPROTO_TCP, TCP_FASTOPEN, &qlen, sizeof(qlen)) == -1) + perror("TCP_FASTOPEN"); +} + static int do_ulp_so(int sock, const char *name) { return setsockopt(sock, IPPROTO_TCP, TCP_ULP, name, strlen(name)); @@ -300,6 +316,9 @@ static int sock_listen_mptcp(const char * const listenaddr, if (cfg_sockopt_types.transparent) set_transparent(sock, pf); + if (cfg_sockopt_types.mptfo) + set_mptfo(sock, pf); + if (bind(sock, a->ai_addr, a->ai_addrlen) == 0) break; /* success */ @@ -330,13 +349,15 @@ static int sock_listen_mptcp(const char * const listenaddr, static int sock_connect_mptcp(const char * const remoteaddr, const char * const port, int proto, - struct addrinfo **peer) + struct addrinfo **peer, + int infd, struct wstate *winfo) { struct addrinfo hints = { .ai_protocol = IPPROTO_TCP, .ai_socktype = SOCK_STREAM, }; struct addrinfo *a, *addr; + int syn_copied = 0; int sock = -1; hints.ai_family = pf; @@ -354,14 +375,34 @@ static int sock_connect_mptcp(const char * const remoteaddr, if (cfg_mark) set_mark(sock, cfg_mark); - if (connect(sock, a->ai_addr, a->ai_addrlen) == 0) { - *peer = a; - break; /* success */ + if (cfg_sockopt_types.mptfo) { + if (!winfo->total_len) + winfo->total_len = winfo->len = read(infd, winfo->buf, + sizeof(winfo->buf)); + + syn_copied = sendto(sock, winfo->buf, winfo->len, MSG_FASTOPEN, + a->ai_addr, a->ai_addrlen); + if (syn_copied >= 0) { + winfo->off = syn_copied; + winfo->len -= syn_copied; + *peer = a; + break; /* success */ + } + } else { + if (connect(sock, a->ai_addr, a->ai_addrlen) == 0) { + *peer = a; + break; /* success */ + } + } + if (cfg_sockopt_types.mptfo) { + perror("sendto()"); + close(sock); + sock = -1; + } else { + perror("connect()"); + close(sock); + sock = -1; } - - perror("connect()"); - close(sock); - sock = -1; } freeaddrinfo(addr); @@ -571,14 +612,14 @@ static void shut_wr(int fd) shutdown(fd, SHUT_WR); } -static int copyfd_io_poll(int infd, int peerfd, int outfd, bool *in_closed_after_out) +static int copyfd_io_poll(int infd, int peerfd, int outfd, + bool *in_closed_after_out, struct wstate *winfo) { struct pollfd fds = { .fd = peerfd, .events = POLLIN | POLLOUT, }; - unsigned int woff = 0, wlen = 0, total_wlen = 0, total_rlen = 0; - char wbuf[8192]; + unsigned int total_wlen = 0, total_rlen = 0; set_nonblock(peerfd, true); @@ -586,7 +627,7 @@ static int copyfd_io_poll(int infd, int peerfd, int outfd, bool *in_closed_after char rbuf[8192]; ssize_t len; - if (fds.events == 0) + if (fds.events == 0 || quit) break; switch (poll(&fds, 1, poll_timeout)) { @@ -638,19 +679,19 @@ static int copyfd_io_poll(int infd, int peerfd, int outfd, bool *in_closed_after } if (fds.revents & POLLOUT) { - if (wlen == 0) { - woff = 0; - wlen = read(infd, wbuf, sizeof(wbuf)); + if (winfo->len == 0) { + winfo->off = 0; + winfo->len = read(infd, winfo->buf, sizeof(winfo->buf)); } - if (wlen > 0) { + if (winfo->len > 0) { ssize_t bw; /* limit the total amount of written data to the trunc value */ - if (cfg_truncate > 0 && wlen + total_wlen > cfg_truncate) - wlen = cfg_truncate - total_wlen; + if (cfg_truncate > 0 && winfo->len + total_wlen > cfg_truncate) + winfo->len = cfg_truncate - total_wlen; - bw = do_rnd_write(peerfd, wbuf + woff, wlen); + bw = do_rnd_write(peerfd, winfo->buf + winfo->off, winfo->len); if (bw < 0) { if (cfg_rcv_trunc) return 0; @@ -658,10 +699,10 @@ static int copyfd_io_poll(int infd, int peerfd, int outfd, bool *in_closed_after return 111; } - woff += bw; - wlen -= bw; + winfo->off += bw; + winfo->len -= bw; total_wlen += bw; - } else if (wlen == 0) { + } else if (winfo->len == 0) { /* We have no more data to send. */ fds.events &= ~POLLOUT; @@ -692,7 +733,7 @@ static int copyfd_io_poll(int infd, int peerfd, int outfd, bool *in_closed_after } /* leave some time for late join/announce */ - if (cfg_remove) + if (cfg_remove && !quit) usleep(cfg_wait); return 0; @@ -717,10 +758,26 @@ static int do_recvfile(int infd, int outfd) return (int)r; } -static int do_mmap(int infd, int outfd, unsigned int size) +static int spool_buf(int fd, struct wstate *winfo) +{ + while (winfo->len) { + int ret = write(fd, winfo->buf + winfo->off, winfo->len); + + if (ret < 0) { + perror("write"); + return 4; + } + winfo->off += ret; + winfo->len -= ret; + } + return 0; +} + +static int do_mmap(int infd, int outfd, unsigned int size, + struct wstate *winfo) { char *inbuf = mmap(NULL, size, PROT_READ, MAP_SHARED, infd, 0); - ssize_t ret = 0, off = 0; + ssize_t ret = 0, off = winfo->total_len; size_t rem; if (inbuf == MAP_FAILED) { @@ -728,7 +785,11 @@ static int do_mmap(int infd, int outfd, unsigned int size) return 1; } - rem = size; + ret = spool_buf(outfd, winfo); + if (ret < 0) + return ret; + + rem = size - winfo->total_len; while (rem > 0) { ret = write(outfd, inbuf + off, rem); @@ -772,8 +833,16 @@ static int get_infd_size(int fd) return (int)count; } -static int do_sendfile(int infd, int outfd, unsigned int count) +static int do_sendfile(int infd, int outfd, unsigned int count, + struct wstate *winfo) { + int ret = spool_buf(outfd, winfo); + + if (ret < 0) + return ret; + + count -= winfo->total_len; + while (count > 0) { ssize_t r; @@ -790,7 +859,8 @@ static int do_sendfile(int infd, int outfd, unsigned int count) } static int copyfd_io_mmap(int infd, int peerfd, int outfd, - unsigned int size, bool *in_closed_after_out) + unsigned int size, bool *in_closed_after_out, + struct wstate *winfo) { int err; @@ -799,9 +869,9 @@ static int copyfd_io_mmap(int infd, int peerfd, int outfd, if (err) return err; - err = do_mmap(infd, peerfd, size); + err = do_mmap(infd, peerfd, size, winfo); } else { - err = do_mmap(infd, peerfd, size); + err = do_mmap(infd, peerfd, size, winfo); if (err) return err; @@ -815,7 +885,7 @@ static int copyfd_io_mmap(int infd, int peerfd, int outfd, } static int copyfd_io_sendfile(int infd, int peerfd, int outfd, - unsigned int size, bool *in_closed_after_out) + unsigned int size, bool *in_closed_after_out, struct wstate *winfo) { int err; @@ -824,9 +894,9 @@ static int copyfd_io_sendfile(int infd, int peerfd, int outfd, if (err) return err; - err = do_sendfile(infd, peerfd, size); + err = do_sendfile(infd, peerfd, size, winfo); } else { - err = do_sendfile(infd, peerfd, size); + err = do_sendfile(infd, peerfd, size, winfo); if (err) return err; @@ -839,7 +909,7 @@ static int copyfd_io_sendfile(int infd, int peerfd, int outfd, return err; } -static int copyfd_io(int infd, int peerfd, int outfd, bool close_peerfd) +static int copyfd_io(int infd, int peerfd, int outfd, bool close_peerfd, struct wstate *winfo) { bool in_closed_after_out = false; struct timespec start, end; @@ -851,21 +921,24 @@ static int copyfd_io(int infd, int peerfd, int outfd, bool close_peerfd) switch (cfg_mode) { case CFG_MODE_POLL: - ret = copyfd_io_poll(infd, peerfd, outfd, &in_closed_after_out); + ret = copyfd_io_poll(infd, peerfd, outfd, &in_closed_after_out, + winfo); break; case CFG_MODE_MMAP: file_size = get_infd_size(infd); if (file_size < 0) return file_size; - ret = copyfd_io_mmap(infd, peerfd, outfd, file_size, &in_closed_after_out); + ret = copyfd_io_mmap(infd, peerfd, outfd, file_size, + &in_closed_after_out, winfo); break; case CFG_MODE_SENDFILE: file_size = get_infd_size(infd); if (file_size < 0) return file_size; - ret = copyfd_io_sendfile(infd, peerfd, outfd, file_size, &in_closed_after_out); + ret = copyfd_io_sendfile(infd, peerfd, outfd, file_size, + &in_closed_after_out, winfo); break; default: @@ -999,6 +1072,7 @@ static void maybe_close(int fd) int main_loop_s(int listensock) { struct sockaddr_storage ss; + struct wstate winfo; struct pollfd polls; socklen_t salen; int remotesock; @@ -1033,7 +1107,8 @@ again: SOCK_TEST_TCPULP(remotesock, 0); - copyfd_io(fd, remotesock, 1, true); + memset(&winfo, 0, sizeof(winfo)); + copyfd_io(fd, remotesock, 1, true, &winfo); } else { perror("accept"); return 1; @@ -1130,6 +1205,11 @@ static void parse_setsock_options(const char *name) return; } + if (strncmp(name, "MPTFO", len) == 0) { + cfg_sockopt_types.mptfo = 1; + return; + } + fprintf(stderr, "Unrecognized setsockopt option %s\n", name); exit(1); } @@ -1166,11 +1246,18 @@ void xdisconnect(int fd, int addrlen) int main_loop(void) { - int fd, ret, fd_in = 0; + int fd = 0, ret, fd_in = 0; struct addrinfo *peer; + struct wstate winfo; + + if (cfg_input && cfg_sockopt_types.mptfo) { + fd_in = open(cfg_input, O_RDONLY); + if (fd < 0) + xerror("can't open %s:%d", cfg_input, errno); + } - /* listener is ready. */ - fd = sock_connect_mptcp(cfg_host, cfg_port, cfg_sock_proto, &peer); + memset(&winfo, 0, sizeof(winfo)); + fd = sock_connect_mptcp(cfg_host, cfg_port, cfg_sock_proto, &peer, fd_in, &winfo); if (fd < 0) return 2; @@ -1186,14 +1273,13 @@ again: if (cfg_cmsg_types.cmsg_enabled) apply_cmsg_types(fd, &cfg_cmsg_types); - if (cfg_input) { + if (cfg_input && !cfg_sockopt_types.mptfo) { fd_in = open(cfg_input, O_RDONLY); if (fd < 0) xerror("can't open %s:%d", cfg_input, errno); } - /* close the client socket open only if we are not going to reconnect */ - ret = copyfd_io(fd_in, fd, 1, 0); + ret = copyfd_io(fd_in, fd, 1, 0, &winfo); if (ret) return ret; @@ -1210,6 +1296,7 @@ again: xerror("can't reconnect: %d", errno); if (cfg_input) close(fd_in); + memset(&winfo, 0, sizeof(winfo)); goto again; } else { close(fd); diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.sh b/tools/testing/selftests/net/mptcp/mptcp_connect.sh index 621af6895f4d..a43d3e2f59bb 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_connect.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_connect.sh @@ -274,8 +274,7 @@ check_transfer() check_mptcp_disabled() { - local disabled_ns - disabled_ns="ns_disabled-$sech-$(mktemp -u XXXXXX)" + local disabled_ns="ns_disabled-$rndh" ip netns add ${disabled_ns} || exit $ksft_skip # net.mptcp.enabled should be enabled by default @@ -762,9 +761,25 @@ run_tests_peekmode() run_tests_lo "$ns1" "$ns1" dead:beef:1::1 1 "-P ${peekmode}" } +run_tests_mptfo() +{ + echo "INFO: with MPTFO start" + ip netns exec "$ns1" sysctl -q net.ipv4.tcp_fastopen=2 + ip netns exec "$ns2" sysctl -q net.ipv4.tcp_fastopen=1 + + run_tests_lo "$ns1" "$ns2" 10.0.1.1 0 "-o MPTFO" + run_tests_lo "$ns1" "$ns2" 10.0.1.1 0 "-o MPTFO" + + run_tests_lo "$ns1" "$ns2" dead:beef:1::1 0 "-o MPTFO" + run_tests_lo "$ns1" "$ns2" dead:beef:1::1 0 "-o MPTFO" + + ip netns exec "$ns1" sysctl -q net.ipv4.tcp_fastopen=0 + ip netns exec "$ns2" sysctl -q net.ipv4.tcp_fastopen=0 + echo "INFO: with MPTFO end" +} + run_tests_disconnect() { - local peekmode="$1" local old_cin=$cin local old_sin=$sin @@ -772,7 +787,6 @@ run_tests_disconnect() # force do_transfer to cope with the multiple tranmissions sin="$cin.disconnect" - sin_disconnect=$old_sin cin="$cin.disconnect" cin_disconnect="$old_cin" connect_per_transfer=3 @@ -783,7 +797,6 @@ run_tests_disconnect() # restore previous status sin=$old_sin - sin_disconnect="$cout".disconnect cin=$old_cin cin_disconnect="$cin".disconnect connect_per_transfer=1 @@ -901,6 +914,10 @@ run_tests_peekmode "saveWithPeek" run_tests_peekmode "saveAfterPeek" stop_if_error "Tests with peek mode have failed" +# MPTFO (MultiPath TCP Fatopen tests) +run_tests_mptfo +stop_if_error "Tests with MPTFO have failed" + # connect to ns4 ip address, ns2 should intercept/proxy run_test_transparent 10.0.3.1 "tproxy ipv4" run_test_transparent dead:beef:3::1 "tproxy ipv6" diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh index 2eeaf4aca644..42e3bd1a05f5 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_join.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh @@ -26,6 +26,10 @@ ip_mptcp=0 check_invert=0 validate_checksum=0 init=0 +evts_ns1="" +evts_ns2="" +evts_ns1_pid=0 +evts_ns2_pid=0 declare -A all_tests declare -a only_tests_ids @@ -59,8 +63,9 @@ init_partial() { capout=$(mktemp) - local rndh - rndh=$(mktemp -u XXXXXX) + local sec rndh + sec=$(date +%s) + rndh=$(printf %x $sec)-$(mktemp -u XXXXXX) ns1="ns1-$rndh" ns2="ns2-$rndh" @@ -153,6 +158,8 @@ init() { cin=$(mktemp) cinsent=$(mktemp) cout=$(mktemp) + evts_ns1=$(mktemp) + evts_ns2=$(mktemp) trap cleanup EXIT @@ -164,6 +171,7 @@ cleanup() { rm -f "$cin" "$cout" "$sinfail" rm -f "$sin" "$sout" "$cinsent" "$cinfail" + rm -rf $evts_ns1 $evts_ns2 cleanup_partial } @@ -319,6 +327,18 @@ reset_with_fail() index 100 || exit 1 } +reset_with_events() +{ + reset "${1}" || return 1 + + :> "$evts_ns1" + :> "$evts_ns2" + ip netns exec $ns1 ./pm_nl_ctl events >> "$evts_ns1" 2>&1 & + evts_ns1_pid=$! + ip netns exec $ns2 ./pm_nl_ctl events >> "$evts_ns2" 2>&1 & + evts_ns2_pid=$! +} + fail_test() { ret=1 @@ -472,6 +492,18 @@ kill_wait() wait $1 2>/dev/null } +kill_events_pids() +{ + kill_wait $evts_ns1_pid + kill_wait $evts_ns2_pid +} + +kill_tests_wait() +{ + kill -SIGUSR1 $(ip netns pids $ns2) $(ip netns pids $ns1) + wait +} + pm_nl_set_limits() { local ns=$1 @@ -672,10 +704,6 @@ do_transfer() local port=$((10000 + TEST_COUNT - 1)) local cappid local userspace_pm=0 - local evts_ns1 - local evts_ns1_pid - local evts_ns2 - local evts_ns2_pid :> "$cout" :> "$sout" @@ -752,35 +780,17 @@ do_transfer() addr_nr_ns2=${addr_nr_ns2:9} fi - if [ $userspace_pm -eq 1 ]; then - evts_ns1=$(mktemp) - evts_ns2=$(mktemp) - :> "$evts_ns1" - :> "$evts_ns2" - ip netns exec ${listener_ns} ./pm_nl_ctl events >> "$evts_ns1" 2>&1 & - evts_ns1_pid=$! - ip netns exec ${connector_ns} ./pm_nl_ctl events >> "$evts_ns2" 2>&1 & - evts_ns2_pid=$! - fi - - local local_addr - if is_v6 "${connect_addr}"; then - local_addr="::" - else - local_addr="0.0.0.0" - fi - extra_srv_args="$extra_args $extra_srv_args" if [ "$test_link_fail" -gt 1 ];then timeout ${timeout_test} \ ip netns exec ${listener_ns} \ ./mptcp_connect -t ${timeout_poll} -l -p $port -s ${srv_proto} \ - $extra_srv_args ${local_addr} < "$sinfail" > "$sout" & + $extra_srv_args "::" < "$sinfail" > "$sout" & else timeout ${timeout_test} \ ip netns exec ${listener_ns} \ ./mptcp_connect -t ${timeout_poll} -l -p $port -s ${srv_proto} \ - $extra_srv_args ${local_addr} < "$sin" > "$sout" & + $extra_srv_args "::" < "$sin" > "$sout" & fi local spid=$! @@ -829,7 +839,8 @@ do_transfer() if [ $userspace_pm -eq 0 ]; then pm_nl_add_endpoint $ns1 $addr flags signal else - tk=$(sed -n 's/.*\(token:\)\([[:digit:]]*\).*$/\2/p;q' "$evts_ns1") + tk=$(grep "type:1," "$evts_ns1" | + sed -n 's/.*\(token:\)\([[:digit:]]*\).*$/\2/p;q') ip netns exec ${listener_ns} ./pm_nl_ctl ann $addr token $tk id $id sleep 1 ip netns exec ${listener_ns} ./pm_nl_ctl rem token $tk id $id @@ -980,12 +991,6 @@ do_transfer() kill $cappid fi - if [ $userspace_pm -eq 1 ]; then - kill_wait $evts_ns1_pid - kill_wait $evts_ns2_pid - rm -rf $evts_ns1 $evts_ns2 - fi - NSTAT_HISTORY=/tmp/${listener_ns}.nstat ip netns exec ${listener_ns} \ nstat | grep Tcp > /tmp/${listener_ns}.out NSTAT_HISTORY=/tmp/${connector_ns}.nstat ip netns exec ${connector_ns} \ @@ -1688,6 +1693,7 @@ chk_subflow_nr() local subflow_nr=$3 local cnt1 local cnt2 + local dump_stats if [ -n "${need_title}" ]; then printf "%03u %-36s %s" "${TEST_COUNT}" "${TEST_NAME}" "${msg}" @@ -1705,7 +1711,12 @@ chk_subflow_nr() echo "[ ok ]" fi - [ "${dump_stats}" = 1 ] && ( ss -N $ns1 -tOni ; ss -N $ns1 -tOni | grep token; ip -n $ns1 mptcp endpoint ) + if [ "${dump_stats}" = 1 ]; then + ss -N $ns1 -tOni + ss -N $ns1 -tOni | grep token + ip -n $ns1 mptcp endpoint + dump_stats + fi } chk_link_usage() @@ -2442,6 +2453,47 @@ v4mapped_tests() fi } +mixed_tests() +{ + if reset "IPv4 sockets do not use IPv6 addresses"; then + pm_nl_set_limits $ns1 0 1 + pm_nl_set_limits $ns2 1 1 + pm_nl_add_endpoint $ns1 dead:beef:2::1 flags signal + run_tests $ns1 $ns2 10.0.1.1 0 0 0 slow + chk_join_nr 0 0 0 + fi + + # Need an IPv6 mptcp socket to allow subflows of both families + if reset "simult IPv4 and IPv6 subflows"; then + pm_nl_set_limits $ns1 0 1 + pm_nl_set_limits $ns2 1 1 + pm_nl_add_endpoint $ns1 10.0.1.1 flags signal + run_tests $ns1 $ns2 dead:beef:2::1 0 0 0 slow + chk_join_nr 1 1 1 + fi + + # cross families subflows will not be created even in fullmesh mode + if reset "simult IPv4 and IPv6 subflows, fullmesh 1x1"; then + pm_nl_set_limits $ns1 0 4 + pm_nl_set_limits $ns2 1 4 + pm_nl_add_endpoint $ns2 dead:beef:2::2 flags subflow,fullmesh + pm_nl_add_endpoint $ns1 10.0.1.1 flags signal + run_tests $ns1 $ns2 dead:beef:2::1 0 0 0 slow + chk_join_nr 1 1 1 + fi + + # fullmesh still tries to create all the possibly subflows with + # matching family + if reset "simult IPv4 and IPv6 subflows, fullmesh 2x2"; then + pm_nl_set_limits $ns1 0 4 + pm_nl_set_limits $ns2 2 4 + pm_nl_add_endpoint $ns1 10.0.2.1 flags signal + pm_nl_add_endpoint $ns1 dead:beef:2::1 flags signal + run_tests $ns1 $ns2 dead:beef:1::1 0 0 fullmesh_1 slow + chk_join_nr 4 4 4 + fi +} + backup_tests() { # single subflow, backup @@ -2507,6 +2559,57 @@ backup_tests() fi } +LISTENER_CREATED=15 #MPTCP_EVENT_LISTENER_CREATED +LISTENER_CLOSED=16 #MPTCP_EVENT_LISTENER_CLOSED + +AF_INET=2 +AF_INET6=10 + +verify_listener_events() +{ + local evt=$1 + local e_type=$2 + local e_family=$3 + local e_saddr=$4 + local e_sport=$5 + local type + local family + local saddr + local sport + + if [ $e_type = $LISTENER_CREATED ]; then + stdbuf -o0 -e0 printf "\t\t\t\t\t CREATE_LISTENER %s:%s"\ + $e_saddr $e_sport + elif [ $e_type = $LISTENER_CLOSED ]; then + stdbuf -o0 -e0 printf "\t\t\t\t\t CLOSE_LISTENER %s:%s "\ + $e_saddr $e_sport + fi + + type=$(grep "type:$e_type," $evt | + sed --unbuffered -n 's/.*\(type:\)\([[:digit:]]*\).*$/\2/p;q') + family=$(grep "type:$e_type," $evt | + sed --unbuffered -n 's/.*\(family:\)\([[:digit:]]*\).*$/\2/p;q') + sport=$(grep "type:$e_type," $evt | + sed --unbuffered -n 's/.*\(sport:\)\([[:digit:]]*\).*$/\2/p;q') + if [ $family ] && [ $family = $AF_INET6 ]; then + saddr=$(grep "type:$e_type," $evt | + sed --unbuffered -n 's/.*\(saddr6:\)\([0-9a-f:.]*\).*$/\2/p;q') + else + saddr=$(grep "type:$e_type," $evt | + sed --unbuffered -n 's/.*\(saddr4:\)\([0-9.]*\).*$/\2/p;q') + fi + + if [ $type ] && [ $type = $e_type ] && + [ $family ] && [ $family = $e_family ] && + [ $saddr ] && [ $saddr = $e_saddr ] && + [ $sport ] && [ $sport = $e_sport ]; then + stdbuf -o0 -e0 printf "[ ok ]\n" + return 0 + fi + fail_test + stdbuf -o0 -e0 printf "[fail]\n" +} + add_addr_ports_tests() { # signal address with port @@ -2531,7 +2634,8 @@ add_addr_ports_tests() fi # single address with port, remove - if reset "remove single address with port"; then + # pm listener events + if reset_with_events "remove single address with port"; then pm_nl_set_limits $ns1 0 1 pm_nl_add_endpoint $ns1 10.0.2.1 flags signal port 10100 pm_nl_set_limits $ns2 1 1 @@ -2539,6 +2643,10 @@ add_addr_ports_tests() chk_join_nr 1 1 1 chk_add_nr 1 1 1 chk_rm_nr 1 1 invert + + verify_listener_events $evts_ns1 $LISTENER_CREATED $AF_INET 10.0.2.1 10100 + verify_listener_events $evts_ns1 $LISTENER_CLOSED $AF_INET 10.0.2.1 10100 + kill_events_pids fi # subflow and signal with port, remove @@ -2959,22 +3067,24 @@ userspace_tests() fi # userspace pm add & remove address - if reset "userspace pm add & remove address"; then + if reset_with_events "userspace pm add & remove address"; then set_userspace_pm $ns1 pm_nl_set_limits $ns2 1 1 run_tests $ns1 $ns2 10.0.1.1 0 userspace_1 0 slow chk_join_nr 1 1 1 chk_add_nr 1 1 chk_rm_nr 1 1 invert + kill_events_pids fi # userspace pm create destroy subflow - if reset "userspace pm create destroy subflow"; then + if reset_with_events "userspace pm create destroy subflow"; then set_userspace_pm $ns2 pm_nl_set_limits $ns1 0 1 run_tests $ns1 $ns2 10.0.1.1 0 0 userspace_1 slow chk_join_nr 1 1 1 chk_rm_nr 0 1 + kill_events_pids fi } @@ -2985,7 +3095,7 @@ endpoint_tests() pm_nl_set_limits $ns1 2 2 pm_nl_set_limits $ns2 2 2 pm_nl_add_endpoint $ns1 10.0.2.1 flags signal - run_tests $ns1 $ns2 10.0.1.1 0 0 0 slow & + run_tests $ns1 $ns2 10.0.1.1 0 0 0 slow 2>/dev/null & wait_mpj $ns1 pm_nl_check_endpoint 1 "creation" \ @@ -2998,14 +3108,14 @@ endpoint_tests() pm_nl_add_endpoint $ns2 10.0.2.2 flags signal pm_nl_check_endpoint 0 "modif is allowed" \ $ns2 10.0.2.2 id 1 flags signal - wait + kill_tests_wait fi if reset "delete and re-add"; then pm_nl_set_limits $ns1 1 1 pm_nl_set_limits $ns2 1 1 pm_nl_add_endpoint $ns2 10.0.2.2 id 2 dev ns2eth2 flags subflow - run_tests $ns1 $ns2 10.0.1.1 4 0 0 slow & + run_tests $ns1 $ns2 10.0.1.1 4 0 0 speed_20 2>/dev/null & wait_mpj $ns2 pm_nl_del_endpoint $ns2 2 10.0.2.2 @@ -3015,7 +3125,7 @@ endpoint_tests() pm_nl_add_endpoint $ns2 10.0.2.2 dev ns2eth2 flags subflow wait_mpj $ns2 chk_subflow_nr "" "after re-add" 2 - wait + kill_tests_wait fi } @@ -3056,6 +3166,7 @@ all_tests_sorted=( a@add_tests 6@ipv6_tests 4@v4mapped_tests + M@mixed_tests b@backup_tests p@add_addr_ports_tests k@syncookies_tests diff --git a/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh b/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh index 80d36f7cfee8..1b70c0a304ce 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh @@ -10,13 +10,19 @@ ksft_skip=4 timeout_poll=30 timeout_test=$((timeout_poll * 2 + 1)) mptcp_connect="" -do_all_tests=1 + +sec=$(date +%s) +rndh=$(printf %x $sec)-$(mktemp -u XXXXXX) +ns1="ns1-$rndh" +ns2="ns2-$rndh" +ns_sbox="ns_sbox-$rndh" add_mark_rules() { local ns=$1 local m=$2 + local t for t in iptables ip6tables; do # just to debug: check we have multiple subflows connection requests ip netns exec $ns $t -A OUTPUT -p tcp --syn -m mark --mark $m -j ACCEPT @@ -31,12 +37,7 @@ add_mark_rules() init() { - rndh=$(printf %x $sec)-$(mktemp -u XXXXXX) - - ns1="ns1-$rndh" - ns2="ns2-$rndh" - ns_sbox="ns_sbox-$rndh" - + local netns for netns in "$ns1" "$ns2" "$ns_sbox";do ip netns add $netns || exit $ksft_skip ip -net $netns link set lo up @@ -45,6 +46,7 @@ init() ip netns exec $netns sysctl -q net.ipv4.conf.default.rp_filter=0 done + local i for i in `seq 1 4`; do ip link add ns1eth$i netns "$ns1" type veth peer name ns2eth$i netns "$ns2" ip -net "$ns1" addr add 10.0.$i.1/24 dev ns1eth$i @@ -74,6 +76,7 @@ init() cleanup() { + local netns for netns in "$ns1" "$ns2" "$ns_sbox"; do ip netns del $netns done @@ -104,15 +107,17 @@ check_mark() local ns=$1 local af=$2 - tables=iptables + local tables=iptables if [ $af -eq 6 ];then tables=ip6tables fi + local counters values counters=$(ip netns exec $ns $tables -v -L OUTPUT | grep DROP) values=${counters%DROP*} + local v for v in $values; do if [ $v -ne 0 ]; then echo "FAIL: got $tables $values in ns $ns , not 0 - not all expected packets marked" 1>&2 @@ -132,9 +137,9 @@ print_file_err() check_transfer() { - in=$1 - out=$2 - what=$3 + local in=$1 + local out=$2 + local what=$3 cmp "$in" "$out" > /dev/null 2>&1 if [ $? -ne 0 ] ;then @@ -157,18 +162,18 @@ is_v6() do_transfer() { - listener_ns="$1" - connector_ns="$2" - cl_proto="$3" - srv_proto="$4" - connect_addr="$5" + local listener_ns="$1" + local connector_ns="$2" + local cl_proto="$3" + local srv_proto="$4" + local connect_addr="$5" - port=12001 + local port=12001 :> "$cout" :> "$sout" - mptcp_connect="./mptcp_connect -r 20" + local mptcp_connect="./mptcp_connect -r 20" local local_addr if is_v6 "${connect_addr}"; then @@ -181,7 +186,7 @@ do_transfer() ip netns exec ${listener_ns} \ $mptcp_connect -t ${timeout_poll} -l -M 1 -p $port -s ${srv_proto} -c TIMESTAMPNS,TCPINQ \ ${local_addr} < "$sin" > "$sout" & - spid=$! + local spid=$! sleep 1 @@ -190,12 +195,12 @@ do_transfer() $mptcp_connect -t ${timeout_poll} -M 2 -p $port -s ${cl_proto} -c TIMESTAMPNS,TCPINQ \ $connect_addr < "$cin" > "$cout" & - cpid=$! + local cpid=$! wait $cpid - retc=$? + local retc=$? wait $spid - rets=$? + local rets=$? if [ ${rets} -ne 0 ] || [ ${retc} -ne 0 ]; then echo " client exit code $retc, server $rets" 1>&2 @@ -230,9 +235,9 @@ do_transfer() make_file() { - name=$1 - who=$2 - size=$3 + local name=$1 + local who=$2 + local size=$3 dd if=/dev/urandom of="$name" bs=1024 count=$size 2> /dev/null echo -e "\nMPTCP_TEST_FILE_END_MARKER" >> "$name" @@ -265,9 +270,9 @@ do_mptcp_sockopt_tests() run_tests() { - listener_ns="$1" - connector_ns="$2" - connect_addr="$3" + local listener_ns="$1" + local connector_ns="$2" + local connect_addr="$3" local lret=0 do_transfer ${listener_ns} ${connector_ns} MPTCP MPTCP ${connect_addr} @@ -282,8 +287,8 @@ run_tests() do_tcpinq_test() { - ip netns exec "$ns1" ./mptcp_inq "$@" - lret=$? + ip netns exec "$ns_sbox" ./mptcp_inq "$@" + local lret=$? if [ $lret -ne 0 ];then ret=$lret echo "FAIL: mptcp_inq $@" 1>&2 @@ -298,9 +303,7 @@ do_tcpinq_tests() { local lret=0 - ip netns exec "$ns1" iptables -F - ip netns exec "$ns1" ip6tables -F - + local args for args in "-t tcp" "-r tcp"; do do_tcpinq_test $args lret=$? diff --git a/tools/testing/selftests/net/mptcp/simult_flows.sh b/tools/testing/selftests/net/mptcp/simult_flows.sh index 40aeb5a71a2a..9f22f7e5027d 100755 --- a/tools/testing/selftests/net/mptcp/simult_flows.sh +++ b/tools/testing/selftests/net/mptcp/simult_flows.sh @@ -1,6 +1,7 @@ #!/bin/bash # SPDX-License-Identifier: GPL-2.0 +sec=$(date +%s) rndh=$(printf %x $sec)-$(mktemp -u XXXXXX) ns1="ns1-$rndh" ns2="ns2-$rndh" @@ -148,9 +149,6 @@ do_transfer() :> "$sout" :> "$capout" - local addr_port - addr_port=$(printf "%s:%d" ${connect_addr} ${port}) - if $capture; then local capuser if [ -z $SUDO_USER ] ; then @@ -173,7 +171,7 @@ do_transfer() timeout ${timeout_test} \ ip netns exec ${ns3} \ - ./mptcp_connect -jt ${timeout_poll} -l -p $port -T $time \ + ./mptcp_connect -jt ${timeout_poll} -l -p $port -T $max_time \ 0.0.0.0 < "$sin" > "$sout" & local spid=$! @@ -181,7 +179,7 @@ do_transfer() timeout ${timeout_test} \ ip netns exec ${ns1} \ - ./mptcp_connect -jt ${timeout_poll} -p $port -T $time \ + ./mptcp_connect -jt ${timeout_poll} -p $port -T $max_time \ 10.0.3.3 < "$cin" > "$cout" & local cpid=$! diff --git a/tools/testing/selftests/net/mptcp/userspace_pm.sh b/tools/testing/selftests/net/mptcp/userspace_pm.sh index 3229725b64b0..66c5be25c13d 100755 --- a/tools/testing/selftests/net/mptcp/userspace_pm.sh +++ b/tools/testing/selftests/net/mptcp/userspace_pm.sh @@ -11,11 +11,17 @@ ANNOUNCED=6 # MPTCP_EVENT_ANNOUNCED REMOVED=7 # MPTCP_EVENT_REMOVED SUB_ESTABLISHED=10 # MPTCP_EVENT_SUB_ESTABLISHED SUB_CLOSED=11 # MPTCP_EVENT_SUB_CLOSED +LISTENER_CREATED=15 #MPTCP_EVENT_LISTENER_CREATED +LISTENER_CLOSED=16 #MPTCP_EVENT_LISTENER_CLOSED AF_INET=2 AF_INET6=10 -evts_pid=0 +file="" +server_evts="" +client_evts="" +server_evts_pid=0 +client_evts_pid=0 client4_pid=0 server4_pid=0 client6_pid=0 @@ -33,42 +39,44 @@ client_addr_id=${RANDOM:0:2} server_addr_id=${RANDOM:0:2} sec=$(date +%s) -rndh=$(stdbuf -o0 -e0 printf %x "$sec")-$(mktemp -u XXXXXX) +rndh=$(printf %x "$sec")-$(mktemp -u XXXXXX) ns1="ns1-$rndh" ns2="ns2-$rndh" +print_title() +{ + stdbuf -o0 -e0 printf "INFO: %s\n" "${1}" +} + kill_wait() { + [ $1 -eq 0 ] && return 0 + + kill -SIGUSR1 $1 > /dev/null 2>&1 kill $1 > /dev/null 2>&1 wait $1 2>/dev/null } cleanup() { - echo "cleanup" - - rm -rf $file + print_title "Cleanup" # Terminate the MPTCP connection and related processes - if [ $client4_pid -ne 0 ]; then - kill -SIGUSR1 $client4_pid > /dev/null 2>&1 - fi - if [ $server4_pid -ne 0 ]; then - kill_wait $server4_pid - fi - if [ $client6_pid -ne 0 ]; then - kill -SIGUSR1 $client6_pid > /dev/null 2>&1 - fi - if [ $server6_pid -ne 0 ]; then - kill_wait $server6_pid - fi - if [ $evts_pid -ne 0 ]; then - kill_wait $evts_pid - fi + local pid + for pid in $client4_pid $server4_pid $client6_pid $server6_pid\ + $server_evts_pid $client_evts_pid + do + kill_wait $pid + done + local netns for netns in "$ns1" "$ns2" ;do ip netns del "$netns" done + + rm -rf $file $client_evts $server_evts + + stdbuf -o0 -e0 printf "Done\n" } trap cleanup EXIT @@ -99,6 +107,7 @@ ip -net "$ns2" addr add dead:beef:1::2/64 dev ns2eth1 nodad ip -net "$ns2" addr add dead:beef:2::2/64 dev ns2eth1 nodad ip -net "$ns2" link set ns2eth1 up +print_title "Init" stdbuf -o0 -e0 printf "Created network namespaces ns1, ns2 \t\t\t[OK]\n" make_file() @@ -113,8 +122,9 @@ make_file() make_connection() { - local file - file=$(mktemp) + if [ -z "$file" ]; then + file=$(mktemp) + fi make_file "$file" "client" local is_v6=$1 @@ -132,16 +142,24 @@ make_connection() # Capture netlink events over the two network namespaces running # the MPTCP client and server - local client_evts - client_evts=$(mktemp) + if [ -z "$client_evts" ]; then + client_evts=$(mktemp) + fi :>"$client_evts" + if [ $client_evts_pid -ne 0 ]; then + kill_wait $client_evts_pid + fi ip netns exec "$ns2" ./pm_nl_ctl events >> "$client_evts" 2>&1 & - local client_evts_pid=$! - local server_evts - server_evts=$(mktemp) + client_evts_pid=$! + if [ -z "$server_evts" ]; then + server_evts=$(mktemp) + fi :>"$server_evts" + if [ $server_evts_pid -ne 0 ]; then + kill_wait $server_evts_pid + fi ip netns exec "$ns1" ./pm_nl_ctl events >> "$server_evts" 2>&1 & - local server_evts_pid=$! + server_evts_pid=$! sleep 0.5 # Run the server @@ -159,7 +177,6 @@ make_connection() sleep 1 # Capture client/server attributes from MPTCP connection netlink events - kill_wait $client_evts_pid local client_token local client_port @@ -171,17 +188,21 @@ make_connection() client_port=$(sed --unbuffered -n 's/.*\(sport:\)\([[:digit:]]*\).*$/\2/p;q' "$client_evts") client_serverside=$(sed --unbuffered -n 's/.*\(server_side:\)\([[:digit:]]*\).*$/\2/p;q'\ "$client_evts") - kill_wait $server_evts_pid - server_token=$(sed --unbuffered -n 's/.*\(token:\)\([[:digit:]]*\).*$/\2/p;q' "$server_evts") - server_serverside=$(sed --unbuffered -n 's/.*\(server_side:\)\([[:digit:]]*\).*$/\2/p;q'\ - "$server_evts") - rm -f "$client_evts" "$server_evts" "$file" + server_token=$(grep "type:1," "$server_evts" | + sed --unbuffered -n 's/.*\(token:\)\([[:digit:]]*\).*$/\2/p;q') + server_serverside=$(grep "type:1," "$server_evts" | + sed --unbuffered -n 's/.*\(server_side:\)\([[:digit:]]*\).*$/\2/p;q') + stdbuf -o0 -e0 printf "Established IP%s MPTCP Connection ns2 => ns1 \t\t" $is_v6 if [ "$client_token" != "" ] && [ "$server_token" != "" ] && [ "$client_serverside" = 0 ] && [ "$server_serverside" = 1 ] then - stdbuf -o0 -e0 printf "Established IP%s MPTCP Connection ns2 => ns1 \t\t[OK]\n" $is_v6 + stdbuf -o0 -e0 printf "[OK]\n" else + stdbuf -o0 -e0 printf "[FAIL]\n" + stdbuf -o0 -e0 printf "\tExpected tokens (c:%s - s:%s) and server (c:%d - s:%d)\n" \ + "${client_token}" "${server_token}" \ + "${client_serverside}" "${server_serverside}" exit 1 fi @@ -201,6 +222,48 @@ make_connection() fi } +# $1: var name ; $2: prev ret +check_expected_one() +{ + local var="${1}" + local exp="e_${var}" + local prev_ret="${2}" + + if [ "${!var}" = "${!exp}" ] + then + return 0 + fi + + if [ "${prev_ret}" = "0" ] + then + stdbuf -o0 -e0 printf "[FAIL]\n" + fi + + stdbuf -o0 -e0 printf "\tExpected value for '%s': '%s', got '%s'.\n" \ + "${var}" "${!var}" "${!exp}" + return 1 +} + +# $@: all var names to check +check_expected() +{ + local ret=0 + local var + + for var in "${@}" + do + check_expected_one "${var}" "${ret}" || ret=1 + done + + if [ ${ret} -eq 0 ] + then + stdbuf -o0 -e0 printf "[OK]\n" + return 0 + fi + + exit 1 +} + verify_announce_event() { local evt=$1 @@ -226,26 +289,16 @@ verify_announce_event() fi dport=$(sed --unbuffered -n 's/.*\(dport:\)\([[:digit:]]*\).*$/\2/p;q' "$evt") id=$(sed --unbuffered -n 's/.*\(rem_id:\)\([[:digit:]]*\).*$/\2/p;q' "$evt") - if [ "$type" = "$e_type" ] && [ "$token" = "$e_token" ] && - [ "$addr" = "$e_addr" ] && [ "$dport" = "$e_dport" ] && - [ "$id" = "$e_id" ] - then - stdbuf -o0 -e0 printf "[OK]\n" - return 0 - fi - stdbuf -o0 -e0 printf "[FAIL]\n" - exit 1 + + check_expected "type" "token" "addr" "dport" "id" } test_announce() { - local evts - evts=$(mktemp) + print_title "Announce tests" + # Capture events on the network namespace running the server - :>"$evts" - ip netns exec "$ns1" ./pm_nl_ctl events >> "$evts" 2>&1 & - evts_pid=$! - sleep 0.5 + :>"$server_evts" # ADD_ADDR using an invalid token should result in no action local invalid_token=$(( client4_token - 1)) @@ -253,82 +306,74 @@ test_announce() $client_addr_id dev ns2eth1 > /dev/null 2>&1 local type - type=$(sed --unbuffered -n 's/.*\(type:\)\([[:digit:]]*\).*$/\2/p;q' "$evts") + type=$(sed --unbuffered -n 's/.*\(type:\)\([[:digit:]]*\).*$/\2/p;q' "$server_evts") stdbuf -o0 -e0 printf "ADD_ADDR 10.0.2.2 (ns2) => ns1, invalid token \t\t" if [ "$type" = "" ] then stdbuf -o0 -e0 printf "[OK]\n" else - stdbuf -o0 -e0 printf "[FAIL]\n" + stdbuf -o0 -e0 printf "[FAIL]\n\ttype defined: %s\n" "${type}" exit 1 fi # ADD_ADDR from the client to server machine reusing the subflow port - :>"$evts" + :>"$server_evts" ip netns exec "$ns2"\ ./pm_nl_ctl ann 10.0.2.2 token "$client4_token" id $client_addr_id dev\ ns2eth1 > /dev/null 2>&1 stdbuf -o0 -e0 printf "ADD_ADDR id:%d 10.0.2.2 (ns2) => ns1, reuse port \t\t" $client_addr_id sleep 0.5 - verify_announce_event "$evts" "$ANNOUNCED" "$server4_token" "10.0.2.2" "$client_addr_id"\ + verify_announce_event $server_evts $ANNOUNCED $server4_token "10.0.2.2" $client_addr_id \ "$client4_port" # ADD_ADDR6 from the client to server machine reusing the subflow port - :>"$evts" + :>"$server_evts" ip netns exec "$ns2" ./pm_nl_ctl ann\ dead:beef:2::2 token "$client6_token" id $client_addr_id dev ns2eth1 > /dev/null 2>&1 stdbuf -o0 -e0 printf "ADD_ADDR6 id:%d dead:beef:2::2 (ns2) => ns1, reuse port\t\t" $client_addr_id sleep 0.5 - verify_announce_event "$evts" "$ANNOUNCED" "$server6_token" "dead:beef:2::2"\ + verify_announce_event "$server_evts" "$ANNOUNCED" "$server6_token" "dead:beef:2::2"\ "$client_addr_id" "$client6_port" "v6" # ADD_ADDR from the client to server machine using a new port - :>"$evts" + :>"$server_evts" client_addr_id=$((client_addr_id+1)) ip netns exec "$ns2" ./pm_nl_ctl ann 10.0.2.2 token "$client4_token" id\ $client_addr_id dev ns2eth1 port $new4_port > /dev/null 2>&1 stdbuf -o0 -e0 printf "ADD_ADDR id:%d 10.0.2.2 (ns2) => ns1, new port \t\t\t" $client_addr_id sleep 0.5 - verify_announce_event "$evts" "$ANNOUNCED" "$server4_token" "10.0.2.2"\ + verify_announce_event "$server_evts" "$ANNOUNCED" "$server4_token" "10.0.2.2"\ "$client_addr_id" "$new4_port" - kill_wait $evts_pid - # Capture events on the network namespace running the client - :>"$evts" - ip netns exec "$ns2" ./pm_nl_ctl events >> "$evts" 2>&1 & - evts_pid=$! - sleep 0.5 + :>"$client_evts" # ADD_ADDR from the server to client machine reusing the subflow port ip netns exec "$ns1" ./pm_nl_ctl ann 10.0.2.1 token "$server4_token" id\ $server_addr_id dev ns1eth2 > /dev/null 2>&1 stdbuf -o0 -e0 printf "ADD_ADDR id:%d 10.0.2.1 (ns1) => ns2, reuse port \t\t" $server_addr_id sleep 0.5 - verify_announce_event "$evts" "$ANNOUNCED" "$client4_token" "10.0.2.1"\ + verify_announce_event "$client_evts" "$ANNOUNCED" "$client4_token" "10.0.2.1"\ "$server_addr_id" "$app4_port" # ADD_ADDR6 from the server to client machine reusing the subflow port - :>"$evts" + :>"$client_evts" ip netns exec "$ns1" ./pm_nl_ctl ann dead:beef:2::1 token "$server6_token" id\ $server_addr_id dev ns1eth2 > /dev/null 2>&1 stdbuf -o0 -e0 printf "ADD_ADDR6 id:%d dead:beef:2::1 (ns1) => ns2, reuse port\t\t" $server_addr_id sleep 0.5 - verify_announce_event "$evts" "$ANNOUNCED" "$client6_token" "dead:beef:2::1"\ + verify_announce_event "$client_evts" "$ANNOUNCED" "$client6_token" "dead:beef:2::1"\ "$server_addr_id" "$app6_port" "v6" # ADD_ADDR from the server to client machine using a new port - :>"$evts" + :>"$client_evts" server_addr_id=$((server_addr_id+1)) ip netns exec "$ns1" ./pm_nl_ctl ann 10.0.2.1 token "$server4_token" id\ $server_addr_id dev ns1eth2 port $new4_port > /dev/null 2>&1 stdbuf -o0 -e0 printf "ADD_ADDR id:%d 10.0.2.1 (ns1) => ns2, new port \t\t\t" $server_addr_id sleep 0.5 - verify_announce_event "$evts" "$ANNOUNCED" "$client4_token" "10.0.2.1"\ + verify_announce_event "$client_evts" "$ANNOUNCED" "$client4_token" "10.0.2.1"\ "$server_addr_id" "$new4_port" - - kill_wait $evts_pid - rm -f "$evts" } verify_remove_event() @@ -344,26 +389,16 @@ verify_remove_event() type=$(sed --unbuffered -n 's/.*\(type:\)\([[:digit:]]*\).*$/\2/p;q' "$evt") token=$(sed --unbuffered -n 's/.*\(token:\)\([[:digit:]]*\).*$/\2/p;q' "$evt") id=$(sed --unbuffered -n 's/.*\(rem_id:\)\([[:digit:]]*\).*$/\2/p;q' "$evt") - if [ "$type" = "$e_type" ] && [ "$token" = "$e_token" ] && - [ "$id" = "$e_id" ] - then - stdbuf -o0 -e0 printf "[OK]\n" - return 0 - fi - stdbuf -o0 -e0 printf "[FAIL]\n" - exit 1 + + check_expected "type" "token" "id" } test_remove() { - local evts - evts=$(mktemp) + print_title "Remove tests" # Capture events on the network namespace running the server - :>"$evts" - ip netns exec "$ns1" ./pm_nl_ctl events >> "$evts" 2>&1 & - evts_pid=$! - sleep 0.5 + :>"$server_evts" # RM_ADDR using an invalid token should result in no action local invalid_token=$(( client4_token - 1 )) @@ -372,7 +407,7 @@ test_remove() stdbuf -o0 -e0 printf "RM_ADDR id:%d ns2 => ns1, invalid token \t"\ $client_addr_id local type - type=$(sed --unbuffered -n 's/.*\(type:\)\([[:digit:]]*\).*$/\2/p;q' "$evts") + type=$(sed --unbuffered -n 's/.*\(type:\)\([[:digit:]]*\).*$/\2/p;q' "$server_evts") if [ "$type" = "" ] then stdbuf -o0 -e0 printf "[OK]\n" @@ -386,7 +421,7 @@ test_remove() $invalid_id > /dev/null 2>&1 stdbuf -o0 -e0 printf "RM_ADDR id:%d ns2 => ns1, invalid id \t"\ $invalid_id - type=$(sed --unbuffered -n 's/.*\(type:\)\([[:digit:]]*\).*$/\2/p;q' "$evts") + type=$(sed --unbuffered -n 's/.*\(type:\)\([[:digit:]]*\).*$/\2/p;q' "$server_evts") if [ "$type" = "" ] then stdbuf -o0 -e0 printf "[OK]\n" @@ -395,40 +430,35 @@ test_remove() fi # RM_ADDR from the client to server machine - :>"$evts" + :>"$server_evts" ip netns exec "$ns2" ./pm_nl_ctl rem token "$client4_token" id\ $client_addr_id > /dev/null 2>&1 stdbuf -o0 -e0 printf "RM_ADDR id:%d ns2 => ns1 \t"\ $client_addr_id sleep 0.5 - verify_remove_event "$evts" "$REMOVED" "$server4_token" "$client_addr_id" + verify_remove_event "$server_evts" "$REMOVED" "$server4_token" "$client_addr_id" # RM_ADDR from the client to server machine - :>"$evts" + :>"$server_evts" client_addr_id=$(( client_addr_id - 1 )) ip netns exec "$ns2" ./pm_nl_ctl rem token "$client4_token" id\ $client_addr_id > /dev/null 2>&1 stdbuf -o0 -e0 printf "RM_ADDR id:%d ns2 => ns1 \t"\ $client_addr_id sleep 0.5 - verify_remove_event "$evts" "$REMOVED" "$server4_token" "$client_addr_id" + verify_remove_event "$server_evts" "$REMOVED" "$server4_token" "$client_addr_id" # RM_ADDR6 from the client to server machine - :>"$evts" + :>"$server_evts" ip netns exec "$ns2" ./pm_nl_ctl rem token "$client6_token" id\ $client_addr_id > /dev/null 2>&1 stdbuf -o0 -e0 printf "RM_ADDR6 id:%d ns2 => ns1 \t"\ $client_addr_id sleep 0.5 - verify_remove_event "$evts" "$REMOVED" "$server6_token" "$client_addr_id" - - kill_wait $evts_pid + verify_remove_event "$server_evts" "$REMOVED" "$server6_token" "$client_addr_id" # Capture events on the network namespace running the client - :>"$evts" - ip netns exec "$ns2" ./pm_nl_ctl events >> "$evts" 2>&1 & - evts_pid=$! - sleep 0.5 + :>"$client_evts" # RM_ADDR from the server to client machine ip netns exec "$ns1" ./pm_nl_ctl rem token "$server4_token" id\ @@ -436,27 +466,24 @@ test_remove() stdbuf -o0 -e0 printf "RM_ADDR id:%d ns1 => ns2 \t"\ $server_addr_id sleep 0.5 - verify_remove_event "$evts" "$REMOVED" "$client4_token" "$server_addr_id" + verify_remove_event "$client_evts" "$REMOVED" "$client4_token" "$server_addr_id" # RM_ADDR from the server to client machine - :>"$evts" + :>"$client_evts" server_addr_id=$(( server_addr_id - 1 )) ip netns exec "$ns1" ./pm_nl_ctl rem token "$server4_token" id\ $server_addr_id > /dev/null 2>&1 stdbuf -o0 -e0 printf "RM_ADDR id:%d ns1 => ns2 \t" $server_addr_id sleep 0.5 - verify_remove_event "$evts" "$REMOVED" "$client4_token" "$server_addr_id" + verify_remove_event "$client_evts" "$REMOVED" "$client4_token" "$server_addr_id" # RM_ADDR6 from the server to client machine - :>"$evts" + :>"$client_evts" ip netns exec "$ns1" ./pm_nl_ctl rem token "$server6_token" id\ $server_addr_id > /dev/null 2>&1 stdbuf -o0 -e0 printf "RM_ADDR6 id:%d ns1 => ns2 \t" $server_addr_id sleep 0.5 - verify_remove_event "$evts" "$REMOVED" "$client6_token" "$server_addr_id" - - kill_wait $evts_pid - rm -f "$evts" + verify_remove_event "$client_evts" "$REMOVED" "$client6_token" "$server_addr_id" } verify_subflow_events() @@ -518,27 +545,15 @@ verify_subflow_events() daddr=$(sed --unbuffered -n 's/.*\(daddr4:\)\([0-9.]*\).*$/\2/p;q' "$evt") fi - if [ "$type" = "$e_type" ] && [ "$token" = "$e_token" ] && - [ "$daddr" = "$e_daddr" ] && [ "$e_dport" = "$dport" ] && - [ "$family" = "$e_family" ] && [ "$saddr" = "$e_saddr" ] && - [ "$e_locid" = "$locid" ] && [ "$e_remid" = "$remid" ] - then - stdbuf -o0 -e0 printf "[OK]\n" - return 0 - fi - stdbuf -o0 -e0 printf "[FAIL]\n" - exit 1 + check_expected "type" "token" "daddr" "dport" "family" "saddr" "locid" "remid" } test_subflows() { - local evts - evts=$(mktemp) + print_title "Subflows v4 or v6 only tests" + # Capture events on the network namespace running the server - :>"$evts" - ip netns exec "$ns1" ./pm_nl_ctl events >> "$evts" 2>&1 & - evts_pid=$! - sleep 0.5 + :>"$server_evts" # Attempt to add a listener at 10.0.2.2:<subflow-port> ip netns exec "$ns2" ./pm_nl_ctl listen 10.0.2.2\ @@ -551,25 +566,25 @@ test_subflows() sleep 0.5 # CREATE_SUBFLOW from server to client machine - :>"$evts" + :>"$server_evts" ip netns exec "$ns1" ./pm_nl_ctl csf lip 10.0.2.1 lid 23 rip 10.0.2.2\ rport "$client4_port" token "$server4_token" > /dev/null 2>&1 sleep 0.5 - verify_subflow_events "$evts" "$SUB_ESTABLISHED" "$server4_token" "$AF_INET" "10.0.2.1"\ + verify_subflow_events $server_evts $SUB_ESTABLISHED $server4_token $AF_INET "10.0.2.1" \ "10.0.2.2" "$client4_port" "23" "$client_addr_id" "ns1" "ns2" # Delete the listener from the client ns, if one was created kill_wait $listener_pid local sport - sport=$(sed --unbuffered -n 's/.*\(sport:\)\([[:digit:]]*\).*$/\2/p;q' "$evts") + sport=$(sed --unbuffered -n 's/.*\(sport:\)\([[:digit:]]*\).*$/\2/p;q' "$server_evts") # DESTROY_SUBFLOW from server to client machine - :>"$evts" + :>"$server_evts" ip netns exec "$ns1" ./pm_nl_ctl dsf lip 10.0.2.1 lport "$sport" rip 10.0.2.2 rport\ "$client4_port" token "$server4_token" > /dev/null 2>&1 sleep 0.5 - verify_subflow_events "$evts" "$SUB_CLOSED" "$server4_token" "$AF_INET" "10.0.2.1"\ + verify_subflow_events "$server_evts" "$SUB_CLOSED" "$server4_token" "$AF_INET" "10.0.2.1"\ "10.0.2.2" "$client4_port" "23" "$client_addr_id" "ns1" "ns2" # RM_ADDR from client to server machine @@ -583,31 +598,31 @@ test_subflows() listener_pid=$! # ADD_ADDR6 from client to server machine reusing the subflow port - :>"$evts" + :>"$server_evts" ip netns exec "$ns2" ./pm_nl_ctl ann dead:beef:2::2 token "$client6_token" id\ $client_addr_id > /dev/null 2>&1 sleep 0.5 # CREATE_SUBFLOW6 from server to client machine - :>"$evts" + :>"$server_evts" ip netns exec "$ns1" ./pm_nl_ctl csf lip dead:beef:2::1 lid 23 rip\ dead:beef:2::2 rport "$client6_port" token "$server6_token" > /dev/null 2>&1 sleep 0.5 - verify_subflow_events "$evts" "$SUB_ESTABLISHED" "$server6_token" "$AF_INET6"\ + verify_subflow_events "$server_evts" "$SUB_ESTABLISHED" "$server6_token" "$AF_INET6"\ "dead:beef:2::1" "dead:beef:2::2" "$client6_port" "23"\ "$client_addr_id" "ns1" "ns2" # Delete the listener from the client ns, if one was created kill_wait $listener_pid - sport=$(sed --unbuffered -n 's/.*\(sport:\)\([[:digit:]]*\).*$/\2/p;q' "$evts") + sport=$(sed --unbuffered -n 's/.*\(sport:\)\([[:digit:]]*\).*$/\2/p;q' "$server_evts") # DESTROY_SUBFLOW6 from server to client machine - :>"$evts" + :>"$server_evts" ip netns exec "$ns1" ./pm_nl_ctl dsf lip dead:beef:2::1 lport "$sport" rip\ dead:beef:2::2 rport "$client6_port" token "$server6_token" > /dev/null 2>&1 sleep 0.5 - verify_subflow_events "$evts" "$SUB_CLOSED" "$server6_token" "$AF_INET6"\ + verify_subflow_events "$server_evts" "$SUB_CLOSED" "$server6_token" "$AF_INET6"\ "dead:beef:2::1" "dead:beef:2::2" "$client6_port" "23"\ "$client_addr_id" "ns1" "ns2" @@ -622,44 +637,39 @@ test_subflows() listener_pid=$! # ADD_ADDR from client to server machine using a new port - :>"$evts" + :>"$server_evts" ip netns exec "$ns2" ./pm_nl_ctl ann 10.0.2.2 token "$client4_token" id\ $client_addr_id port $new4_port > /dev/null 2>&1 sleep 0.5 # CREATE_SUBFLOW from server to client machine - :>"$evts" + :>"$server_evts" ip netns exec "$ns1" ./pm_nl_ctl csf lip 10.0.2.1 lid 23 rip 10.0.2.2 rport\ $new4_port token "$server4_token" > /dev/null 2>&1 sleep 0.5 - verify_subflow_events "$evts" "$SUB_ESTABLISHED" "$server4_token" "$AF_INET"\ + verify_subflow_events "$server_evts" "$SUB_ESTABLISHED" "$server4_token" "$AF_INET"\ "10.0.2.1" "10.0.2.2" "$new4_port" "23"\ "$client_addr_id" "ns1" "ns2" # Delete the listener from the client ns, if one was created kill_wait $listener_pid - sport=$(sed --unbuffered -n 's/.*\(sport:\)\([[:digit:]]*\).*$/\2/p;q' "$evts") + sport=$(sed --unbuffered -n 's/.*\(sport:\)\([[:digit:]]*\).*$/\2/p;q' "$server_evts") # DESTROY_SUBFLOW from server to client machine - :>"$evts" + :>"$server_evts" ip netns exec "$ns1" ./pm_nl_ctl dsf lip 10.0.2.1 lport "$sport" rip 10.0.2.2 rport\ $new4_port token "$server4_token" > /dev/null 2>&1 sleep 0.5 - verify_subflow_events "$evts" "$SUB_CLOSED" "$server4_token" "$AF_INET" "10.0.2.1"\ + verify_subflow_events "$server_evts" "$SUB_CLOSED" "$server4_token" "$AF_INET" "10.0.2.1"\ "10.0.2.2" "$new4_port" "23" "$client_addr_id" "ns1" "ns2" # RM_ADDR from client to server machine ip netns exec "$ns2" ./pm_nl_ctl rem id $client_addr_id token\ "$client4_token" > /dev/null 2>&1 - kill_wait $evts_pid - # Capture events on the network namespace running the client - :>"$evts" - ip netns exec "$ns2" ./pm_nl_ctl events >> "$evts" 2>&1 & - evts_pid=$! - sleep 0.5 + :>"$client_evts" # Attempt to add a listener at 10.0.2.1:<subflow-port> ip netns exec "$ns1" ./pm_nl_ctl listen 10.0.2.1\ @@ -672,24 +682,24 @@ test_subflows() sleep 0.5 # CREATE_SUBFLOW from client to server machine - :>"$evts" + :>"$client_evts" ip netns exec "$ns2" ./pm_nl_ctl csf lip 10.0.2.2 lid 23 rip 10.0.2.1 rport\ $app4_port token "$client4_token" > /dev/null 2>&1 sleep 0.5 - verify_subflow_events "$evts" "$SUB_ESTABLISHED" "$client4_token" "$AF_INET" "10.0.2.2"\ + verify_subflow_events $client_evts $SUB_ESTABLISHED $client4_token $AF_INET "10.0.2.2"\ "10.0.2.1" "$app4_port" "23" "$server_addr_id" "ns2" "ns1" # Delete the listener from the server ns, if one was created kill_wait $listener_pid - sport=$(sed --unbuffered -n 's/.*\(sport:\)\([[:digit:]]*\).*$/\2/p;q' "$evts") + sport=$(sed --unbuffered -n 's/.*\(sport:\)\([[:digit:]]*\).*$/\2/p;q' "$client_evts") # DESTROY_SUBFLOW from client to server machine - :>"$evts" + :>"$client_evts" ip netns exec "$ns2" ./pm_nl_ctl dsf lip 10.0.2.2 lport "$sport" rip 10.0.2.1 rport\ $app4_port token "$client4_token" > /dev/null 2>&1 sleep 0.5 - verify_subflow_events "$evts" "$SUB_CLOSED" "$client4_token" "$AF_INET" "10.0.2.2"\ + verify_subflow_events "$client_evts" "$SUB_CLOSED" "$client4_token" "$AF_INET" "10.0.2.2"\ "10.0.2.1" "$app4_port" "23" "$server_addr_id" "ns2" "ns1" # RM_ADDR from server to client machine @@ -703,17 +713,17 @@ test_subflows() listener_pid=$! # ADD_ADDR6 from server to client machine reusing the subflow port - :>"$evts" + :>"$client_evts" ip netns exec "$ns1" ./pm_nl_ctl ann dead:beef:2::1 token "$server6_token" id\ $server_addr_id > /dev/null 2>&1 sleep 0.5 # CREATE_SUBFLOW6 from client to server machine - :>"$evts" + :>"$client_evts" ip netns exec "$ns2" ./pm_nl_ctl csf lip dead:beef:2::2 lid 23 rip\ dead:beef:2::1 rport $app6_port token "$client6_token" > /dev/null 2>&1 sleep 0.5 - verify_subflow_events "$evts" "$SUB_ESTABLISHED" "$client6_token"\ + verify_subflow_events "$client_evts" "$SUB_ESTABLISHED" "$client6_token"\ "$AF_INET6" "dead:beef:2::2"\ "dead:beef:2::1" "$app6_port" "23"\ "$server_addr_id" "ns2" "ns1" @@ -721,14 +731,14 @@ test_subflows() # Delete the listener from the server ns, if one was created kill_wait $listener_pid - sport=$(sed --unbuffered -n 's/.*\(sport:\)\([[:digit:]]*\).*$/\2/p;q' "$evts") + sport=$(sed --unbuffered -n 's/.*\(sport:\)\([[:digit:]]*\).*$/\2/p;q' "$client_evts") # DESTROY_SUBFLOW6 from client to server machine - :>"$evts" + :>"$client_evts" ip netns exec "$ns2" ./pm_nl_ctl dsf lip dead:beef:2::2 lport "$sport" rip\ dead:beef:2::1 rport $app6_port token "$client6_token" > /dev/null 2>&1 sleep 0.5 - verify_subflow_events "$evts" "$SUB_CLOSED" "$client6_token" "$AF_INET6" "dead:beef:2::2"\ + verify_subflow_events $client_evts $SUB_CLOSED $client6_token $AF_INET6 "dead:beef:2::2"\ "dead:beef:2::1" "$app6_port" "23" "$server_addr_id" "ns2" "ns1" # RM_ADDR6 from server to client machine @@ -742,42 +752,89 @@ test_subflows() listener_pid=$! # ADD_ADDR from server to client machine using a new port - :>"$evts" + :>"$client_evts" ip netns exec "$ns1" ./pm_nl_ctl ann 10.0.2.1 token "$server4_token" id\ $server_addr_id port $new4_port > /dev/null 2>&1 sleep 0.5 # CREATE_SUBFLOW from client to server machine - :>"$evts" + :>"$client_evts" ip netns exec "$ns2" ./pm_nl_ctl csf lip 10.0.2.2 lid 23 rip 10.0.2.1 rport\ $new4_port token "$client4_token" > /dev/null 2>&1 sleep 0.5 - verify_subflow_events "$evts" "$SUB_ESTABLISHED" "$client4_token" "$AF_INET"\ + verify_subflow_events "$client_evts" "$SUB_ESTABLISHED" "$client4_token" "$AF_INET"\ "10.0.2.2" "10.0.2.1" "$new4_port" "23" "$server_addr_id" "ns2" "ns1" # Delete the listener from the server ns, if one was created kill_wait $listener_pid - sport=$(sed --unbuffered -n 's/.*\(sport:\)\([[:digit:]]*\).*$/\2/p;q' "$evts") + sport=$(sed --unbuffered -n 's/.*\(sport:\)\([[:digit:]]*\).*$/\2/p;q' "$client_evts") # DESTROY_SUBFLOW from client to server machine - :>"$evts" + :>"$client_evts" ip netns exec "$ns2" ./pm_nl_ctl dsf lip 10.0.2.2 lport "$sport" rip 10.0.2.1 rport\ $new4_port token "$client4_token" > /dev/null 2>&1 sleep 0.5 - verify_subflow_events "$evts" "$SUB_CLOSED" "$client4_token" "$AF_INET" "10.0.2.2"\ + verify_subflow_events "$client_evts" "$SUB_CLOSED" "$client4_token" "$AF_INET" "10.0.2.2"\ "10.0.2.1" "$new4_port" "23" "$server_addr_id" "ns2" "ns1" # RM_ADDR from server to client machine ip netns exec "$ns1" ./pm_nl_ctl rem id $server_addr_id token\ "$server4_token" > /dev/null 2>&1 +} - kill_wait $evts_pid - rm -f "$evts" +test_subflows_v4_v6_mix() +{ + print_title "Subflows v4 and v6 mix tests" + + # Attempt to add a listener at 10.0.2.1:<subflow-port> + ip netns exec "$ns1" ./pm_nl_ctl listen 10.0.2.1\ + $app6_port > /dev/null 2>&1 & + local listener_pid=$! + + # ADD_ADDR4 from server to client machine reusing the subflow port on + # the established v6 connection + :>"$client_evts" + ip netns exec "$ns1" ./pm_nl_ctl ann 10.0.2.1 token "$server6_token" id\ + $server_addr_id dev ns1eth2 > /dev/null 2>&1 + stdbuf -o0 -e0 printf "ADD_ADDR4 id:%d 10.0.2.1 (ns1) => ns2, reuse port\t\t" $server_addr_id + sleep 0.5 + verify_announce_event "$client_evts" "$ANNOUNCED" "$client6_token" "10.0.2.1"\ + "$server_addr_id" "$app6_port" + + # CREATE_SUBFLOW from client to server machine + :>"$client_evts" + ip netns exec "$ns2" ./pm_nl_ctl csf lip 10.0.2.2 lid 23 rip 10.0.2.1 rport\ + $app6_port token "$client6_token" > /dev/null 2>&1 + sleep 0.5 + verify_subflow_events "$client_evts" "$SUB_ESTABLISHED" "$client6_token"\ + "$AF_INET" "10.0.2.2" "10.0.2.1" "$app6_port" "23"\ + "$server_addr_id" "ns2" "ns1" + + # Delete the listener from the server ns, if one was created + kill_wait $listener_pid + + sport=$(sed --unbuffered -n 's/.*\(sport:\)\([[:digit:]]*\).*$/\2/p;q' "$client_evts") + + # DESTROY_SUBFLOW from client to server machine + :>"$client_evts" + ip netns exec "$ns2" ./pm_nl_ctl dsf lip 10.0.2.2 lport "$sport" rip 10.0.2.1 rport\ + $app6_port token "$client6_token" > /dev/null 2>&1 + sleep 0.5 + verify_subflow_events "$client_evts" "$SUB_CLOSED" "$client6_token" \ + "$AF_INET" "10.0.2.2" "10.0.2.1" "$app6_port" "23"\ + "$server_addr_id" "ns2" "ns1" + + # RM_ADDR from server to client machine + ip netns exec "$ns1" ./pm_nl_ctl rem id $server_addr_id token\ + "$server6_token" > /dev/null 2>&1 + sleep 0.5 } test_prio() { + print_title "Prio tests" + local count # Send MP_PRIO signal from client to server machine @@ -789,7 +846,7 @@ test_prio() count=$(ip netns exec "$ns2" nstat -as | grep MPTcpExtMPPrioTx | awk '{print $2}') [ -z "$count" ] && count=0 if [ $count != 1 ]; then - stdbuf -o0 -e0 printf "[FAIL]\n" + stdbuf -o0 -e0 printf "[FAIL]\n\tCount != 1: %d\n" "${count}" exit 1 else stdbuf -o0 -e0 printf "[OK]\n" @@ -800,18 +857,89 @@ test_prio() count=$(ip netns exec "$ns1" nstat -as | grep MPTcpExtMPPrioRx | awk '{print $2}') [ -z "$count" ] && count=0 if [ $count != 1 ]; then - stdbuf -o0 -e0 printf "[FAIL]\n" + stdbuf -o0 -e0 printf "[FAIL]\n\tCount != 1: %d\n" "${count}" exit 1 else stdbuf -o0 -e0 printf "[OK]\n" fi } +verify_listener_events() +{ + local evt=$1 + local e_type=$2 + local e_family=$3 + local e_saddr=$4 + local e_sport=$5 + local type + local family + local saddr + local sport + + if [ $e_type = $LISTENER_CREATED ]; then + stdbuf -o0 -e0 printf "CREATE_LISTENER %s:%s\t\t\t\t\t"\ + $e_saddr $e_sport + elif [ $e_type = $LISTENER_CLOSED ]; then + stdbuf -o0 -e0 printf "CLOSE_LISTENER %s:%s\t\t\t\t\t"\ + $e_saddr $e_sport + fi + + type=$(grep "type:$e_type," $evt | + sed --unbuffered -n 's/.*\(type:\)\([[:digit:]]*\).*$/\2/p;q') + family=$(grep "type:$e_type," $evt | + sed --unbuffered -n 's/.*\(family:\)\([[:digit:]]*\).*$/\2/p;q') + sport=$(grep "type:$e_type," $evt | + sed --unbuffered -n 's/.*\(sport:\)\([[:digit:]]*\).*$/\2/p;q') + if [ $family ] && [ $family = $AF_INET6 ]; then + saddr=$(grep "type:$e_type," $evt | + sed --unbuffered -n 's/.*\(saddr6:\)\([0-9a-f:.]*\).*$/\2/p;q') + else + saddr=$(grep "type:$e_type," $evt | + sed --unbuffered -n 's/.*\(saddr4:\)\([0-9.]*\).*$/\2/p;q') + fi + + check_expected "type" "family" "saddr" "sport" +} + +test_listener() +{ + print_title "Listener tests" + + # Capture events on the network namespace running the client + :>$client_evts + + # Attempt to add a listener at 10.0.2.2:<subflow-port> + ip netns exec $ns2 ./pm_nl_ctl listen 10.0.2.2\ + $client4_port > /dev/null 2>&1 & + local listener_pid=$! + + verify_listener_events $client_evts $LISTENER_CREATED $AF_INET 10.0.2.2 $client4_port + + # ADD_ADDR from client to server machine reusing the subflow port + ip netns exec $ns2 ./pm_nl_ctl ann 10.0.2.2 token $client4_token id\ + $client_addr_id > /dev/null 2>&1 + sleep 0.5 + + # CREATE_SUBFLOW from server to client machine + ip netns exec $ns1 ./pm_nl_ctl csf lip 10.0.2.1 lid 23 rip 10.0.2.2\ + rport $client4_port token $server4_token > /dev/null 2>&1 + sleep 0.5 + + # Delete the listener from the client ns, if one was created + kill_wait $listener_pid + + verify_listener_events $client_evts $LISTENER_CLOSED $AF_INET 10.0.2.2 $client4_port +} + +print_title "Make connections" make_connection make_connection "v6" + test_announce test_remove test_subflows +test_subflows_v4_v6_mix test_prio +test_listener exit 0 diff --git a/tools/testing/selftests/net/bpf/nat6to4.c b/tools/testing/selftests/net/nat6to4.c index ac54c36b25fc..ac54c36b25fc 100644 --- a/tools/testing/selftests/net/bpf/nat6to4.c +++ b/tools/testing/selftests/net/nat6to4.c diff --git a/tools/testing/selftests/net/nettest.c b/tools/testing/selftests/net/nettest.c index 7900fa98eccb..ee9a72982705 100644 --- a/tools/testing/selftests/net/nettest.c +++ b/tools/testing/selftests/net/nettest.c @@ -87,6 +87,7 @@ struct sock_args { int use_setsockopt; int use_freebind; int use_cmsg; + uint8_t dsfield; const char *dev; const char *server_dev; int ifindex; @@ -580,6 +581,36 @@ static int set_reuseaddr(int sd) return rc; } +static int set_dsfield(int sd, int version, int dsfield) +{ + if (!dsfield) + return 0; + + switch (version) { + case AF_INET: + if (setsockopt(sd, SOL_IP, IP_TOS, &dsfield, + sizeof(dsfield)) < 0) { + log_err_errno("setsockopt(IP_TOS)"); + return -1; + } + break; + + case AF_INET6: + if (setsockopt(sd, SOL_IPV6, IPV6_TCLASS, &dsfield, + sizeof(dsfield)) < 0) { + log_err_errno("setsockopt(IPV6_TCLASS)"); + return -1; + } + break; + + default: + log_error("Invalid address family\n"); + return -1; + } + + return 0; +} + static int str_to_uint(const char *str, int min, int max, unsigned int *value) { int number; @@ -1317,6 +1348,9 @@ static int msock_init(struct sock_args *args, int server) (char *)&one, sizeof(one)) < 0) log_err_errno("Setting SO_BROADCAST error"); + if (set_dsfield(sd, AF_INET, args->dsfield) != 0) + goto out_err; + if (args->dev && bind_to_device(sd, args->dev) != 0) goto out_err; else if (args->use_setsockopt && @@ -1445,6 +1479,9 @@ static int lsock_init(struct sock_args *args) if (set_reuseport(sd) != 0) goto err; + if (set_dsfield(sd, args->version, args->dsfield) != 0) + goto err; + if (args->dev && bind_to_device(sd, args->dev) != 0) goto err; else if (args->use_setsockopt && @@ -1658,6 +1695,9 @@ static int connectsock(void *addr, socklen_t alen, struct sock_args *args) if (set_reuseport(sd) != 0) goto err; + if (set_dsfield(sd, args->version, args->dsfield) != 0) + goto err; + if (args->dev && bind_to_device(sd, args->dev) != 0) goto err; else if (args->use_setsockopt && @@ -1862,7 +1902,7 @@ static int ipc_parent(int cpid, int fd, struct sock_args *args) return client_status; } -#define GETOPT_STR "sr:l:c:p:t:g:P:DRn:M:X:m:d:I:BN:O:SUCi6xL:0:1:2:3:Fbqf" +#define GETOPT_STR "sr:l:c:Q:p:t:g:P:DRn:M:X:m:d:I:BN:O:SUCi6xL:0:1:2:3:Fbqf" #define OPT_FORCE_BIND_KEY_IFINDEX 1001 #define OPT_NO_BIND_KEY_IFINDEX 1002 @@ -1893,6 +1933,8 @@ static void print_usage(char *prog) " -D|R datagram (D) / raw (R) socket (default stream)\n" " -l addr local address to bind to in server mode\n" " -c addr local address to bind to in client mode\n" + " -Q dsfield DS Field value of the socket (the IP_TOS or\n" + " IPV6_TCLASS socket option)\n" " -x configure XFRM policy on socket\n" "\n" " -d dev bind socket to given device name\n" @@ -1971,6 +2013,13 @@ int main(int argc, char *argv[]) args.has_local_ip = 1; args.client_local_addr_str = optarg; break; + case 'Q': + if (str_to_uint(optarg, 0, 255, &tmp) != 0) { + fprintf(stderr, "Invalid DS Field\n"); + return 1; + } + args.dsfield = tmp; + break; case 'p': if (str_to_uint(optarg, 1, 65535, &tmp) != 0) { fprintf(stderr, "Invalid port\n"); diff --git a/tools/testing/selftests/net/rps_default_mask.sh b/tools/testing/selftests/net/rps_default_mask.sh new file mode 100755 index 000000000000..0fd0d2db3abc --- /dev/null +++ b/tools/testing/selftests/net/rps_default_mask.sh @@ -0,0 +1,74 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 + +readonly ksft_skip=4 +readonly cpus=$(nproc) +ret=0 + +[ $cpus -gt 2 ] || exit $ksft_skip + +readonly INITIAL_RPS_DEFAULT_MASK=$(cat /proc/sys/net/core/rps_default_mask) +readonly TAG="$(mktemp -u XXXXXX)" +readonly VETH="veth${TAG}" +readonly NETNS="ns-${TAG}" + +setup() { + ip netns add "${NETNS}" + ip -netns "${NETNS}" link set lo up +} + +cleanup() { + echo $INITIAL_RPS_DEFAULT_MASK > /proc/sys/net/core/rps_default_mask + ip netns del $NETNS +} + +chk_rps() { + local rps_mask expected_rps_mask=$4 + local dev_name=$3 + local netns=$2 + local cmd="cat" + local msg=$1 + + [ -n "$netns" ] && cmd="ip netns exec $netns $cmd" + + rps_mask=$($cmd /sys/class/net/$dev_name/queues/rx-0/rps_cpus) + printf "%-60s" "$msg" + if [ $rps_mask -eq $expected_rps_mask ]; then + echo "[ ok ]" + else + echo "[fail] expected $expected_rps_mask found $rps_mask" + ret=1 + fi +} + +trap cleanup EXIT + +echo 0 > /proc/sys/net/core/rps_default_mask +setup +chk_rps "empty rps_default_mask" $NETNS lo 0 +cleanup + +echo 1 > /proc/sys/net/core/rps_default_mask +setup +chk_rps "changing rps_default_mask dont affect existing devices" "" lo $INITIAL_RPS_DEFAULT_MASK + +echo 3 > /proc/sys/net/core/rps_default_mask +chk_rps "changing rps_default_mask dont affect existing netns" $NETNS lo 0 + +ip link add name $VETH type veth peer netns $NETNS name $VETH +ip link set dev $VETH up +ip -n $NETNS link set dev $VETH up +chk_rps "changing rps_default_mask affect newly created devices" "" $VETH 3 +chk_rps "changing rps_default_mask don't affect newly child netns[II]" $NETNS $VETH 0 +ip netns del $NETNS + +setup +chk_rps "rps_default_mask is 0 by default in child netns" "$NETNS" lo 0 + +ip netns exec $NETNS sysctl -qw net.core.rps_default_mask=1 +ip link add name $VETH type veth peer netns $NETNS name $VETH +chk_rps "changing rps_default_mask in child ns don't affect the main one" "" lo $INITIAL_RPS_DEFAULT_MASK +chk_rps "changing rps_default_mask in child ns affects new childns devices" $NETNS $VETH 1 +chk_rps "changing rps_default_mask in child ns don't affect existing devices" $NETNS lo 0 + +exit $ret diff --git a/tools/testing/selftests/net/rtnetlink.sh b/tools/testing/selftests/net/rtnetlink.sh index 0900c5438fbb..275491be3da2 100755 --- a/tools/testing/selftests/net/rtnetlink.sh +++ b/tools/testing/selftests/net/rtnetlink.sh @@ -782,7 +782,7 @@ kci_test_ipsec_offload() tmpl proto esp src $srcip dst $dstip spi 9 \ mode transport reqid 42 check_err $? - ip x p add dir out src $dstip/24 dst $srcip/24 \ + ip x p add dir in src $dstip/24 dst $srcip/24 \ tmpl proto esp src $dstip dst $srcip spi 9 \ mode transport reqid 42 check_err $? diff --git a/tools/testing/selftests/net/sctp_hello.c b/tools/testing/selftests/net/sctp_hello.c new file mode 100644 index 000000000000..f02f1f95d227 --- /dev/null +++ b/tools/testing/selftests/net/sctp_hello.c @@ -0,0 +1,137 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <sys/types.h> +#include <netinet/in.h> +#include <arpa/inet.h> + +static void set_addr(struct sockaddr_storage *ss, char *ip, char *port, int *len) +{ + if (ss->ss_family == AF_INET) { + struct sockaddr_in *a = (struct sockaddr_in *)ss; + + a->sin_addr.s_addr = inet_addr(ip); + a->sin_port = htons(atoi(port)); + *len = sizeof(*a); + } else { + struct sockaddr_in6 *a = (struct sockaddr_in6 *)ss; + + a->sin6_family = AF_INET6; + inet_pton(AF_INET6, ip, &a->sin6_addr); + a->sin6_port = htons(atoi(port)); + *len = sizeof(*a); + } +} + +static int do_client(int argc, char *argv[]) +{ + struct sockaddr_storage ss; + char buf[] = "hello"; + int csk, ret, len; + + if (argc < 5) { + printf("%s client -4|6 IP PORT [IP PORT]\n", argv[0]); + return -1; + } + + bzero((void *)&ss, sizeof(ss)); + ss.ss_family = !strcmp(argv[2], "-4") ? AF_INET : AF_INET6; + csk = socket(ss.ss_family, SOCK_STREAM, IPPROTO_SCTP); + if (csk < 0) { + printf("failed to create socket\n"); + return -1; + } + + if (argc >= 7) { + set_addr(&ss, argv[5], argv[6], &len); + ret = bind(csk, (struct sockaddr *)&ss, len); + if (ret < 0) { + printf("failed to bind to address\n"); + return -1; + } + } + + set_addr(&ss, argv[3], argv[4], &len); + ret = connect(csk, (struct sockaddr *)&ss, len); + if (ret < 0) { + printf("failed to connect to peer\n"); + return -1; + } + + ret = send(csk, buf, strlen(buf) + 1, 0); + if (ret < 0) { + printf("failed to send msg %d\n", ret); + return -1; + } + close(csk); + + return 0; +} + +int main(int argc, char *argv[]) +{ + struct sockaddr_storage ss; + int lsk, csk, ret, len; + char buf[20]; + + if (argc < 2 || (strcmp(argv[1], "server") && strcmp(argv[1], "client"))) { + printf("%s server|client ...\n", argv[0]); + return -1; + } + + if (!strcmp(argv[1], "client")) + return do_client(argc, argv); + + if (argc < 5) { + printf("%s server -4|6 IP PORT [IFACE]\n", argv[0]); + return -1; + } + + ss.ss_family = !strcmp(argv[2], "-4") ? AF_INET : AF_INET6; + lsk = socket(ss.ss_family, SOCK_STREAM, IPPROTO_SCTP); + if (lsk < 0) { + printf("failed to create lsk\n"); + return -1; + } + + if (argc >= 6) { + ret = setsockopt(lsk, SOL_SOCKET, SO_BINDTODEVICE, + argv[5], strlen(argv[5]) + 1); + if (ret < 0) { + printf("failed to bind to device\n"); + return -1; + } + } + + set_addr(&ss, argv[3], argv[4], &len); + ret = bind(lsk, (struct sockaddr *)&ss, len); + if (ret < 0) { + printf("failed to bind to address\n"); + return -1; + } + + ret = listen(lsk, 5); + if (ret < 0) { + printf("failed to listen on port\n"); + return -1; + } + + csk = accept(lsk, (struct sockaddr *)NULL, (socklen_t *)NULL); + if (csk < 0) { + printf("failed to accept new client\n"); + return -1; + } + + ret = recv(csk, buf, sizeof(buf), 0); + if (ret <= 0) { + printf("failed to recv msg %d\n", ret); + return -1; + } + close(csk); + close(lsk); + + return 0; +} diff --git a/tools/testing/selftests/net/sctp_vrf.sh b/tools/testing/selftests/net/sctp_vrf.sh new file mode 100755 index 000000000000..c721e952e5f3 --- /dev/null +++ b/tools/testing/selftests/net/sctp_vrf.sh @@ -0,0 +1,178 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Testing For SCTP VRF. +# TOPO: CLIENT_NS1 (veth1) <---> (veth1) -> vrf_s1 +# SERVER_NS +# CLIENT_NS2 (veth1) <---> (veth2) -> vrf_s2 + +CLIENT_NS1="client-ns1" +CLIENT_NS2="client-ns2" +CLIENT_IP4="10.0.0.1" +CLIENT_IP6="2000::1" +CLIENT_PORT=1234 + +SERVER_NS="server-ns" +SERVER_IP4="10.0.0.2" +SERVER_IP6="2000::2" +SERVER_PORT=1234 + +setup() { + modprobe sctp + modprobe sctp_diag + ip netns add $CLIENT_NS1 + ip netns add $CLIENT_NS2 + ip netns add $SERVER_NS + + ip net exec $CLIENT_NS1 sysctl -w net.ipv6.conf.default.accept_dad=0 2>&1 >/dev/null + ip net exec $CLIENT_NS2 sysctl -w net.ipv6.conf.default.accept_dad=0 2>&1 >/dev/null + ip net exec $SERVER_NS sysctl -w net.ipv6.conf.default.accept_dad=0 2>&1 >/dev/null + + ip -n $SERVER_NS link add veth1 type veth peer name veth1 netns $CLIENT_NS1 + ip -n $SERVER_NS link add veth2 type veth peer name veth1 netns $CLIENT_NS2 + + ip -n $CLIENT_NS1 link set veth1 up + ip -n $CLIENT_NS1 addr add $CLIENT_IP4/24 dev veth1 + ip -n $CLIENT_NS1 addr add $CLIENT_IP6/24 dev veth1 + + ip -n $CLIENT_NS2 link set veth1 up + ip -n $CLIENT_NS2 addr add $CLIENT_IP4/24 dev veth1 + ip -n $CLIENT_NS2 addr add $CLIENT_IP6/24 dev veth1 + + ip -n $SERVER_NS link add dummy1 type dummy + ip -n $SERVER_NS link set dummy1 up + ip -n $SERVER_NS link add vrf-1 type vrf table 10 + ip -n $SERVER_NS link add vrf-2 type vrf table 20 + ip -n $SERVER_NS link set vrf-1 up + ip -n $SERVER_NS link set vrf-2 up + ip -n $SERVER_NS link set veth1 master vrf-1 + ip -n $SERVER_NS link set veth2 master vrf-2 + + ip -n $SERVER_NS addr add $SERVER_IP4/24 dev dummy1 + ip -n $SERVER_NS addr add $SERVER_IP4/24 dev veth1 + ip -n $SERVER_NS addr add $SERVER_IP4/24 dev veth2 + ip -n $SERVER_NS addr add $SERVER_IP6/24 dev dummy1 + ip -n $SERVER_NS addr add $SERVER_IP6/24 dev veth1 + ip -n $SERVER_NS addr add $SERVER_IP6/24 dev veth2 + + ip -n $SERVER_NS link set veth1 up + ip -n $SERVER_NS link set veth2 up + ip -n $SERVER_NS route add table 10 $CLIENT_IP4 dev veth1 src $SERVER_IP4 + ip -n $SERVER_NS route add table 20 $CLIENT_IP4 dev veth2 src $SERVER_IP4 + ip -n $SERVER_NS route add $CLIENT_IP4 dev veth1 src $SERVER_IP4 + ip -n $SERVER_NS route add table 10 $CLIENT_IP6 dev veth1 src $SERVER_IP6 + ip -n $SERVER_NS route add table 20 $CLIENT_IP6 dev veth2 src $SERVER_IP6 + ip -n $SERVER_NS route add $CLIENT_IP6 dev veth1 src $SERVER_IP6 +} + +cleanup() { + ip netns exec $SERVER_NS pkill sctp_hello 2>&1 >/dev/null + ip netns del "$CLIENT_NS1" + ip netns del "$CLIENT_NS2" + ip netns del "$SERVER_NS" +} + +wait_server() { + local IFACE=$1 + local CNT=0 + + until ip netns exec $SERVER_NS ss -lS src $SERVER_IP:$SERVER_PORT | \ + grep LISTEN | grep "$IFACE" 2>&1 >/dev/null; do + [ $((CNT++)) = "20" ] && { RET=3; return $RET; } + sleep 0.1 + done +} + +do_test() { + local CLIENT_NS=$1 + local IFACE=$2 + + ip netns exec $SERVER_NS pkill sctp_hello 2>&1 >/dev/null + ip netns exec $SERVER_NS ./sctp_hello server $AF $SERVER_IP \ + $SERVER_PORT $IFACE 2>&1 >/dev/null & + disown + wait_server $IFACE || return $RET + timeout 3 ip netns exec $CLIENT_NS ./sctp_hello client $AF \ + $SERVER_IP $SERVER_PORT $CLIENT_IP $CLIENT_PORT 2>&1 >/dev/null + RET=$? + return $RET +} + +do_testx() { + local IFACE1=$1 + local IFACE2=$2 + + ip netns exec $SERVER_NS pkill sctp_hello 2>&1 >/dev/null + ip netns exec $SERVER_NS ./sctp_hello server $AF $SERVER_IP \ + $SERVER_PORT $IFACE1 2>&1 >/dev/null & + disown + wait_server $IFACE1 || return $RET + ip netns exec $SERVER_NS ./sctp_hello server $AF $SERVER_IP \ + $SERVER_PORT $IFACE2 2>&1 >/dev/null & + disown + wait_server $IFACE2 || return $RET + timeout 3 ip netns exec $CLIENT_NS1 ./sctp_hello client $AF \ + $SERVER_IP $SERVER_PORT $CLIENT_IP $CLIENT_PORT 2>&1 >/dev/null && \ + timeout 3 ip netns exec $CLIENT_NS2 ./sctp_hello client $AF \ + $SERVER_IP $SERVER_PORT $CLIENT_IP $CLIENT_PORT 2>&1 >/dev/null + RET=$? + return $RET +} + +testup() { + ip netns exec $SERVER_NS sysctl -w net.sctp.l3mdev_accept=1 2>&1 >/dev/null + echo -n "TEST 01: nobind, connect from client 1, l3mdev_accept=1, Y " + do_test $CLIENT_NS1 || { echo "[FAIL]"; return $RET; } + echo "[PASS]" + + echo -n "TEST 02: nobind, connect from client 2, l3mdev_accept=1, N " + do_test $CLIENT_NS2 && { echo "[FAIL]"; return $RET; } + echo "[PASS]" + + ip netns exec $SERVER_NS sysctl -w net.sctp.l3mdev_accept=0 2>&1 >/dev/null + echo -n "TEST 03: nobind, connect from client 1, l3mdev_accept=0, N " + do_test $CLIENT_NS1 && { echo "[FAIL]"; return $RET; } + echo "[PASS]" + + echo -n "TEST 04: nobind, connect from client 2, l3mdev_accept=0, N " + do_test $CLIENT_NS2 && { echo "[FAIL]"; return $RET; } + echo "[PASS]" + + echo -n "TEST 05: bind veth2 in server, connect from client 1, N " + do_test $CLIENT_NS1 veth2 && { echo "[FAIL]"; return $RET; } + echo "[PASS]" + + echo -n "TEST 06: bind veth1 in server, connect from client 1, Y " + do_test $CLIENT_NS1 veth1 || { echo "[FAIL]"; return $RET; } + echo "[PASS]" + + echo -n "TEST 07: bind vrf-1 in server, connect from client 1, Y " + do_test $CLIENT_NS1 vrf-1 || { echo "[FAIL]"; return $RET; } + echo "[PASS]" + + echo -n "TEST 08: bind vrf-2 in server, connect from client 1, N " + do_test $CLIENT_NS1 vrf-2 && { echo "[FAIL]"; return $RET; } + echo "[PASS]" + + echo -n "TEST 09: bind vrf-2 in server, connect from client 2, Y " + do_test $CLIENT_NS2 vrf-2 || { echo "[FAIL]"; return $RET; } + echo "[PASS]" + + echo -n "TEST 10: bind vrf-1 in server, connect from client 2, N " + do_test $CLIENT_NS2 vrf-1 && { echo "[FAIL]"; return $RET; } + echo "[PASS]" + + echo -n "TEST 11: bind vrf-1 & 2 in server, connect from client 1 & 2, Y " + do_testx vrf-1 vrf-2 || { echo "[FAIL]"; return $RET; } + echo "[PASS]" + + echo -n "TEST 12: bind vrf-2 & 1 in server, connect from client 1 & 2, N " + do_testx vrf-2 vrf-1 || { echo "[FAIL]"; return $RET; } + echo "[PASS]" +} + +trap cleanup EXIT +setup && echo "Testing For SCTP VRF:" && \ +CLIENT_IP=$CLIENT_IP4 SERVER_IP=$SERVER_IP4 AF="-4" testup && echo "***v4 Tests Done***" && +CLIENT_IP=$CLIENT_IP6 SERVER_IP=$SERVER_IP6 AF="-6" testup && echo "***v6 Tests Done***" +exit $? diff --git a/tools/testing/selftests/net/so_incoming_cpu.c b/tools/testing/selftests/net/so_incoming_cpu.c new file mode 100644 index 000000000000..0e04f9fef986 --- /dev/null +++ b/tools/testing/selftests/net/so_incoming_cpu.c @@ -0,0 +1,242 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright Amazon.com Inc. or its affiliates. */ +#define _GNU_SOURCE +#include <sched.h> + +#include <netinet/in.h> +#include <sys/socket.h> +#include <sys/sysinfo.h> + +#include "../kselftest_harness.h" + +#define CLIENT_PER_SERVER 32 /* More sockets, more reliable */ +#define NR_SERVER self->nproc +#define NR_CLIENT (CLIENT_PER_SERVER * NR_SERVER) + +FIXTURE(so_incoming_cpu) +{ + int nproc; + int *servers; + union { + struct sockaddr addr; + struct sockaddr_in in_addr; + }; + socklen_t addrlen; +}; + +enum when_to_set { + BEFORE_REUSEPORT, + BEFORE_LISTEN, + AFTER_LISTEN, + AFTER_ALL_LISTEN, +}; + +FIXTURE_VARIANT(so_incoming_cpu) +{ + int when_to_set; +}; + +FIXTURE_VARIANT_ADD(so_incoming_cpu, before_reuseport) +{ + .when_to_set = BEFORE_REUSEPORT, +}; + +FIXTURE_VARIANT_ADD(so_incoming_cpu, before_listen) +{ + .when_to_set = BEFORE_LISTEN, +}; + +FIXTURE_VARIANT_ADD(so_incoming_cpu, after_listen) +{ + .when_to_set = AFTER_LISTEN, +}; + +FIXTURE_VARIANT_ADD(so_incoming_cpu, after_all_listen) +{ + .when_to_set = AFTER_ALL_LISTEN, +}; + +FIXTURE_SETUP(so_incoming_cpu) +{ + self->nproc = get_nprocs(); + ASSERT_LE(2, self->nproc); + + self->servers = malloc(sizeof(int) * NR_SERVER); + ASSERT_NE(self->servers, NULL); + + self->in_addr.sin_family = AF_INET; + self->in_addr.sin_addr.s_addr = htonl(INADDR_LOOPBACK); + self->in_addr.sin_port = htons(0); + self->addrlen = sizeof(struct sockaddr_in); +} + +FIXTURE_TEARDOWN(so_incoming_cpu) +{ + int i; + + for (i = 0; i < NR_SERVER; i++) + close(self->servers[i]); + + free(self->servers); +} + +void set_so_incoming_cpu(struct __test_metadata *_metadata, int fd, int cpu) +{ + int ret; + + ret = setsockopt(fd, SOL_SOCKET, SO_INCOMING_CPU, &cpu, sizeof(int)); + ASSERT_EQ(ret, 0); +} + +int create_server(struct __test_metadata *_metadata, + FIXTURE_DATA(so_incoming_cpu) *self, + const FIXTURE_VARIANT(so_incoming_cpu) *variant, + int cpu) +{ + int fd, ret; + + fd = socket(AF_INET, SOCK_STREAM | SOCK_NONBLOCK, 0); + ASSERT_NE(fd, -1); + + if (variant->when_to_set == BEFORE_REUSEPORT) + set_so_incoming_cpu(_metadata, fd, cpu); + + ret = setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, &(int){1}, sizeof(int)); + ASSERT_EQ(ret, 0); + + ret = bind(fd, &self->addr, self->addrlen); + ASSERT_EQ(ret, 0); + + if (variant->when_to_set == BEFORE_LISTEN) + set_so_incoming_cpu(_metadata, fd, cpu); + + /* We don't use CLIENT_PER_SERVER here not to block + * this test at connect() if SO_INCOMING_CPU is broken. + */ + ret = listen(fd, NR_CLIENT); + ASSERT_EQ(ret, 0); + + if (variant->when_to_set == AFTER_LISTEN) + set_so_incoming_cpu(_metadata, fd, cpu); + + return fd; +} + +void create_servers(struct __test_metadata *_metadata, + FIXTURE_DATA(so_incoming_cpu) *self, + const FIXTURE_VARIANT(so_incoming_cpu) *variant) +{ + int i, ret; + + for (i = 0; i < NR_SERVER; i++) { + self->servers[i] = create_server(_metadata, self, variant, i); + + if (i == 0) { + ret = getsockname(self->servers[i], &self->addr, &self->addrlen); + ASSERT_EQ(ret, 0); + } + } + + if (variant->when_to_set == AFTER_ALL_LISTEN) { + for (i = 0; i < NR_SERVER; i++) + set_so_incoming_cpu(_metadata, self->servers[i], i); + } +} + +void create_clients(struct __test_metadata *_metadata, + FIXTURE_DATA(so_incoming_cpu) *self) +{ + cpu_set_t cpu_set; + int i, j, fd, ret; + + for (i = 0; i < NR_SERVER; i++) { + CPU_ZERO(&cpu_set); + + CPU_SET(i, &cpu_set); + ASSERT_EQ(CPU_COUNT(&cpu_set), 1); + ASSERT_NE(CPU_ISSET(i, &cpu_set), 0); + + /* Make sure SYN will be processed on the i-th CPU + * and finally distributed to the i-th listener. + */ + sched_setaffinity(0, sizeof(cpu_set), &cpu_set); + ASSERT_EQ(ret, 0); + + for (j = 0; j < CLIENT_PER_SERVER; j++) { + fd = socket(AF_INET, SOCK_STREAM, 0); + ASSERT_NE(fd, -1); + + ret = connect(fd, &self->addr, self->addrlen); + ASSERT_EQ(ret, 0); + + close(fd); + } + } +} + +void verify_incoming_cpu(struct __test_metadata *_metadata, + FIXTURE_DATA(so_incoming_cpu) *self) +{ + int i, j, fd, cpu, ret, total = 0; + socklen_t len = sizeof(int); + + for (i = 0; i < NR_SERVER; i++) { + for (j = 0; j < CLIENT_PER_SERVER; j++) { + /* If we see -EAGAIN here, SO_INCOMING_CPU is broken */ + fd = accept(self->servers[i], &self->addr, &self->addrlen); + ASSERT_NE(fd, -1); + + ret = getsockopt(fd, SOL_SOCKET, SO_INCOMING_CPU, &cpu, &len); + ASSERT_EQ(ret, 0); + ASSERT_EQ(cpu, i); + + close(fd); + total++; + } + } + + ASSERT_EQ(total, NR_CLIENT); + TH_LOG("SO_INCOMING_CPU is very likely to be " + "working correctly with %d sockets.", total); +} + +TEST_F(so_incoming_cpu, test1) +{ + create_servers(_metadata, self, variant); + create_clients(_metadata, self); + verify_incoming_cpu(_metadata, self); +} + +TEST_F(so_incoming_cpu, test2) +{ + int server; + + create_servers(_metadata, self, variant); + + /* No CPU specified */ + server = create_server(_metadata, self, variant, -1); + close(server); + + create_clients(_metadata, self); + verify_incoming_cpu(_metadata, self); +} + +TEST_F(so_incoming_cpu, test3) +{ + int server, client; + + create_servers(_metadata, self, variant); + + /* No CPU specified */ + server = create_server(_metadata, self, variant, -1); + + create_clients(_metadata, self); + + /* Never receive any requests */ + client = accept(server, &self->addr, &self->addrlen); + ASSERT_EQ(client, -1); + + verify_incoming_cpu(_metadata, self); +} + +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/net/srv6_end_flavors_test.sh b/tools/testing/selftests/net/srv6_end_flavors_test.sh new file mode 100755 index 000000000000..50563443a4ad --- /dev/null +++ b/tools/testing/selftests/net/srv6_end_flavors_test.sh @@ -0,0 +1,869 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# author: Andrea Mayer <andrea.mayer@uniroma2.it> +# author: Paolo Lungaroni <paolo.lungaroni@uniroma2.it> +# +# This script is designed to test the support for "flavors" in the SRv6 End +# behavior. +# +# Flavors defined in RFC8986 [1] represent additional operations that can modify +# or extend the existing SRv6 End, End.X and End.T behaviors. For the sake of +# convenience, we report the list of flavors described in [1] hereafter: +# - Penultimate Segment Pop (PSP); +# - Ultimate Segment Pop (USP); +# - Ultimate Segment Decapsulation (USD). +# +# The End, End.X, and End.T behaviors can support these flavors either +# individually or in combinations. +# Currently in this selftest we consider only the PSP flavor for the SRv6 End +# behavior. However, it is possible to extend the script as soon as other +# flavors will be supported in the kernel. +# +# The purpose of the PSP flavor consists in instructing the penultimate node +# listed in the SRv6 policy to remove (i.e. pop) the outermost SRH from the IPv6 +# header. +# A PSP enabled SRv6 End behavior instance processes the SRH by: +# - decrementing the Segment Left (SL) value from 1 to 0; +# - copying the last SID from the SID List into the IPv6 Destination Address +# (DA); +# - removing the SRH from the extension headers following the IPv6 header. +# +# Once the SRH is removed, the IPv6 packet is forwarded to the destination using +# the IPv6 DA updated during the PSP operation (i.e. the IPv6 DA corresponding +# to the last SID carried by the removed SRH). +# +# Although the PSP flavor can be set for any SRv6 End behavior instance on any +# SR node, it will be active only on such behaviors bound to a penultimate SID +# for a given SRv6 policy. +# SL=2 SL=1 SL=0 +# | | | +# For example, given the SRv6 policy (SID List := <X, Y, Z>): +# - a PSP enabled SRv6 End behavior bound to SID Y will apply the PSP operation +# as Segment Left (SL) is 1, corresponding to the Penultimate Segment of the +# SID List; +# - a PSP enabled SRv6 End behavior bound to SID X will *NOT* apply the PSP +# operation as the Segment Left is 2. This behavior instance will apply the +# "standard" End packet processing, ignoring the configured PSP flavor at +# all. +# +# [1] RFC8986: https://datatracker.ietf.org/doc/html/rfc8986 +# +# Network topology +# ================ +# +# The network topology used in this selftest is depicted hereafter, composed by +# two hosts (hs-1, hs-2) and four routers (rt-1, rt-2, rt-3, rt-4). +# Hosts hs-1 and hs-2 are connected to routers rt-1 and rt-2, respectively, +# allowing them to communicate with each other. +# Traffic exchanged between hs-1 and hs-2 can follow different network paths. +# The network operator, through specific SRv6 Policies can steer traffic to one +# path rather than another. In this selftest this is implemented as follows: +# +# i) The SRv6 H.Insert behavior applies SRv6 Policies on traffic received by +# connected hosts. It pushes the Segment Routing Header (SRH) after the +# IPv6 header. The SRH contains the SID List (i.e. SRv6 Policy) needed for +# steering traffic across the segments/waypoints specified in that list; +# +# ii) The SRv6 End behavior advances the active SID in the SID List carried by +# the SRH; +# +# iii) The PSP enabled SRv6 End behavior is used to remove the SRH when such +# behavior is configured on a node bound to the Penultimate Segment carried +# by the SID List. +# +# cafe::1 cafe::2 +# +--------+ +--------+ +# | | | | +# | hs-1 | | hs-2 | +# | | | | +# +---+----+ +--- +---+ +# cafe::/64 | | cafe::/64 +# | | +# +---+----+ +----+---+ +# | | fcf0:0:1:2::/64 | | +# | rt-1 +-------------------+ rt-2 | +# | | | | +# +---+----+ +----+---+ +# | . . | +# | fcf0:0:1:3::/64 . | +# | . . | +# | . . | +# fcf0:0:1:4::/64 | . | fcf0:0:2:3::/64 +# | . . | +# | . . | +# | fcf0:0:2:4::/64 . | +# | . . | +# +---+----+ +----+---+ +# | | | | +# | rt-4 +-------------------+ rt-3 | +# | | fcf0:0:3:4::/64 | | +# +---+----+ +----+---+ +# +# Every fcf0:0:x:y::/64 network interconnects the SRv6 routers rt-x with rt-y in +# the IPv6 operator network. +# +# +# Local SID table +# =============== +# +# Each SRv6 router is configured with a Local SID table in which SIDs are +# stored. Considering the given SRv6 router rt-x, at least two SIDs are +# configured in the Local SID table: +# +# Local SID table for SRv6 router rt-x +# +---------------------------------------------------------------------+ +# |fcff:x::e is associated with the SRv6 End behavior | +# |fcff:x::ef1 is associated with the SRv6 End behavior with PSP flavor | +# +---------------------------------------------------------------------+ +# +# The fcff::/16 prefix is reserved by the operator for the SIDs. Reachability of +# SIDs is ensured by proper configuration of the IPv6 operator's network and +# SRv6 routers. +# +# +# SRv6 Policies +# ============= +# +# An SRv6 ingress router applies different SRv6 Policies to the traffic received +# from connected hosts on the basis of the destination addresses. +# In case of SRv6 H.Insert behavior, the SRv6 Policy enforcement consists of +# pushing the SRH (carrying a given SID List) after the existing IPv6 header. +# Note that in the inserting mode, there is no encapsulation at all. +# +# Before applying an SRv6 Policy using the SRv6 H.Insert behavior +# +------+---------+ +# | IPv6 | Payload | +# +------+---------+ +# +# After applying an SRv6 Policy using the SRv6 H.Insert behavior +# +------+-----+---------+ +# | IPv6 | SRH | Payload | +# +------+-----+---------+ +# +# Traffic from hs-1 to hs-2 +# ------------------------- +# +# Packets generated from hs-1 and directed towards hs-2 are +# handled by rt-1 which applies the following SRv6 Policy: +# +# i.a) IPv6 traffic, SID List=fcff:3::e,fcff:4::ef1,fcff:2::ef1,cafe::2 +# +# Router rt-1 is configured to enforce the Policy (i.a) through the SRv6 +# H.Insert behavior which pushes the SRH after the existing IPv6 header. This +# Policy steers the traffic from hs-1 across rt-3, rt-4, rt-2 and finally to the +# destination hs-2. +# +# As the packet reaches the router rt-3, the SRv6 End behavior bound to SID +# fcff:3::e is triggered. The behavior updates the Segment Left (from SL=3 to +# SL=2) in the SRH, the IPv6 DA with fcff:4::ef1 and forwards the packet to the +# next router on the path, i.e. rt-4. +# +# When router rt-4 receives the packet, the PSP enabled SRv6 End behavior bound +# to SID fcff:4::ef1 is executed. Since the SL=2, the PSP operation is *NOT* +# kicked in and the behavior applies the default End processing: the Segment +# Left is decreased (from SL=2 to SL=1), the IPv6 DA is updated with the SID +# fcff:2::ef1 and the packet is forwarded to router rt-2. +# +# The PSP enabled SRv6 End behavior on rt-2 is associated with SID fcff:2::ef1 +# and is executed as the packet is received. Because SL=1, the behavior applies +# the PSP processing on the packet as follows: i) SL is decreased, i.e. from +# SL=1 to SL=0; ii) last SID (cafe::2) is copied into the IPv6 DA; iii) the +# outermost SRH is removed from the extension headers following the IPv6 header. +# Once the PSP processing is completed, the packet is forwarded to the host hs-2 +# (destination). +# +# Traffic from hs-2 to hs-1 +# ------------------------- +# +# Packets generated from hs-2 and directed to hs-1 are handled by rt-2 which +# applies the following SRv6 Policy: +# +# i.b) IPv6 traffic, SID List=fcff:1::ef1,cafe::1 +# +# Router rt-2 is configured to enforce the Policy (i.b) through the SRv6 +# H.Insert behavior which pushes the SRH after the existing IPv6 header. This +# Policy steers the traffic from hs-2 across rt-1 and finally to the +# destination hs-1 +# +# +# When the router rt-1 receives the packet, the PSP enabled SRv6 End behavior +# associated with the SID fcff:1::ef1 is triggered. Since the SL=1, +# the PSP operation takes place: i) the SL is decremented; ii) the IPv6 DA is +# set with the last SID; iii) the SRH is removed from the extension headers +# after the IPv6 header. At this point, the packet with IPv6 DA=cafe::1 is sent +# to the destination, i.e. hs-1. + +# Kselftest framework requirement - SKIP code is 4. +readonly ksft_skip=4 + +readonly RDMSUFF="$(mktemp -u XXXXXXXX)" +readonly DUMMY_DEVNAME="dum0" +readonly RT2HS_DEVNAME="veth1" +readonly LOCALSID_TABLE_ID=90 +readonly IPv6_RT_NETWORK=fcf0:0 +readonly IPv6_HS_NETWORK=cafe +readonly IPv6_TESTS_ADDR=2001:db8::1 +readonly LOCATOR_SERVICE=fcff +readonly END_FUNC=000e +readonly END_PSP_FUNC=0ef1 + +PING_TIMEOUT_SEC=4 +PAUSE_ON_FAIL=${PAUSE_ON_FAIL:=no} + +# IDs of routers and hosts are initialized during the setup of the testing +# network +ROUTERS='' +HOSTS='' + +SETUP_ERR=1 + +ret=${ksft_skip} +nsuccess=0 +nfail=0 + +log_test() +{ + local rc="$1" + local expected="$2" + local msg="$3" + + if [ "${rc}" -eq "${expected}" ]; then + nsuccess=$((nsuccess+1)) + printf "\n TEST: %-60s [ OK ]\n" "${msg}" + else + ret=1 + nfail=$((nfail+1)) + printf "\n TEST: %-60s [FAIL]\n" "${msg}" + if [ "${PAUSE_ON_FAIL}" = "yes" ]; then + echo + echo "hit enter to continue, 'q' to quit" + read a + [ "$a" = "q" ] && exit 1 + fi + fi +} + +print_log_test_results() +{ + printf "\nTests passed: %3d\n" "${nsuccess}" + printf "Tests failed: %3d\n" "${nfail}" + + # when a test fails, the value of 'ret' is set to 1 (error code). + # Conversely, when all tests are passed successfully, the 'ret' value + # is set to 0 (success code). + if [ "${ret}" -ne 1 ]; then + ret=0 + fi +} + +log_section() +{ + echo + echo "################################################################################" + echo "TEST SECTION: $*" + echo "################################################################################" +} + +test_command_or_ksft_skip() +{ + local cmd="$1" + + if [ ! -x "$(command -v "${cmd}")" ]; then + echo "SKIP: Could not run test without \"${cmd}\" tool"; + exit "${ksft_skip}" + fi +} + +get_nodename() +{ + local name="$1" + + echo "${name}-${RDMSUFF}" +} + +get_rtname() +{ + local rtid="$1" + + get_nodename "rt-${rtid}" +} + +get_hsname() +{ + local hsid="$1" + + get_nodename "hs-${hsid}" +} + +__create_namespace() +{ + local name="$1" + + ip netns add "${name}" +} + +create_router() +{ + local rtid="$1" + local nsname + + nsname="$(get_rtname "${rtid}")" + + __create_namespace "${nsname}" +} + +create_host() +{ + local hsid="$1" + local nsname + + nsname="$(get_hsname "${hsid}")" + + __create_namespace "${nsname}" +} + +cleanup() +{ + local nsname + local i + + # destroy routers + for i in ${ROUTERS}; do + nsname="$(get_rtname "${i}")" + + ip netns del "${nsname}" &>/dev/null || true + done + + # destroy hosts + for i in ${HOSTS}; do + nsname="$(get_hsname "${i}")" + + ip netns del "${nsname}" &>/dev/null || true + done + + # check whether the setup phase was completed successfully or not. In + # case of an error during the setup phase of the testing environment, + # the selftest is considered as "skipped". + if [ "${SETUP_ERR}" -ne 0 ]; then + echo "SKIP: Setting up the testing environment failed" + exit "${ksft_skip}" + fi + + exit "${ret}" +} + +add_link_rt_pairs() +{ + local rt="$1" + local rt_neighs="$2" + local neigh + local nsname + local neigh_nsname + + nsname="$(get_rtname "${rt}")" + + for neigh in ${rt_neighs}; do + neigh_nsname="$(get_rtname "${neigh}")" + + ip link add "veth-rt-${rt}-${neigh}" netns "${nsname}" \ + type veth peer name "veth-rt-${neigh}-${rt}" \ + netns "${neigh_nsname}" + done +} + +get_network_prefix() +{ + local rt="$1" + local neigh="$2" + local p="${rt}" + local q="${neigh}" + + if [ "${p}" -gt "${q}" ]; then + p="${q}"; q="${rt}" + fi + + echo "${IPv6_RT_NETWORK}:${p}:${q}" +} + +# Given the description of a router <id:op> as an input, the function returns +# the <id> token which represents the ID of the router. +# i.e. input: "12:psp" +# output: "12" +__get_srv6_rtcfg_id() +{ + local element="$1" + + echo "${element}" | cut -d':' -f1 +} + +# Given the description of a router <id:op> as an input, the function returns +# the <op> token which represents the operation (e.g. End behavior with or +# withouth flavors) configured for the node. + +# Note that when the operation represents an End behavior with a list of +# flavors, the output is the ordered version of that list. +# i.e. input: "5:usp,psp,usd" +# output: "psp,usd,usp" +__get_srv6_rtcfg_op() +{ + local element="$1" + + # return the lexicographically ordered flavors + echo "${element}" | cut -d':' -f2 | sed 's/,/\n/g' | sort | \ + xargs | sed 's/ /,/g' +} + +# Setup the basic networking for the routers +setup_rt_networking() +{ + local rt="$1" + local rt_neighs="$2" + local nsname + local net_prefix + local devname + local neigh + + nsname="$(get_rtname "${rt}")" + + for neigh in ${rt_neighs}; do + devname="veth-rt-${rt}-${neigh}" + + net_prefix="$(get_network_prefix "${rt}" "${neigh}")" + + ip -netns "${nsname}" addr \ + add "${net_prefix}::${rt}/64" dev "${devname}" nodad + + ip -netns "${nsname}" link set "${devname}" up + done + + ip -netns "${nsname}" link set lo up + + ip -netns "${nsname}" link add ${DUMMY_DEVNAME} type dummy + ip -netns "${nsname}" link set ${DUMMY_DEVNAME} up + + ip netns exec "${nsname}" sysctl -wq net.ipv6.conf.all.accept_dad=0 + ip netns exec "${nsname}" sysctl -wq net.ipv6.conf.default.accept_dad=0 + ip netns exec "${nsname}" sysctl -wq net.ipv6.conf.all.forwarding=1 +} + +# Setup local SIDs for an SRv6 router +setup_rt_local_sids() +{ + local rt="$1" + local rt_neighs="$2" + local net_prefix + local devname + local nsname + local neigh + + nsname="$(get_rtname "${rt}")" + + for neigh in ${rt_neighs}; do + devname="veth-rt-${rt}-${neigh}" + + net_prefix="$(get_network_prefix "${rt}" "${neigh}")" + + # set underlay network routes for SIDs reachability + ip -netns "${nsname}" -6 route \ + add "${LOCATOR_SERVICE}:${neigh}::/32" \ + table "${LOCALSID_TABLE_ID}" \ + via "${net_prefix}::${neigh}" dev "${devname}" + done + + # Local End behavior (note that "dev" is a dummy interface chosen for + # the sake of simplicity). + ip -netns "${nsname}" -6 route \ + add "${LOCATOR_SERVICE}:${rt}::${END_FUNC}" \ + table "${LOCALSID_TABLE_ID}" \ + encap seg6local action End dev "${DUMMY_DEVNAME}" + + + # all SIDs start with a common locator. Routes and SRv6 Endpoint + # behavior instaces are grouped together in the 'localsid' table. + ip -netns "${nsname}" -6 rule \ + add to "${LOCATOR_SERVICE}::/16" \ + lookup "${LOCALSID_TABLE_ID}" prio 999 + + # set default routes to unreachable + ip -netns "${nsname}" -6 route \ + add unreachable default metric 4278198272 \ + dev "${DUMMY_DEVNAME}" +} + +# This helper function builds and installs the SID List (i.e. SRv6 Policy) +# to be applied on incoming packets at the ingress node. Moreover, it +# configures the SRv6 nodes specified in the SID List to process the traffic +# according to the operations required by the Policy itself. +# args: +# $1 - destination host (i.e. cafe::x host) +# $2 - SRv6 router configured for enforcing the SRv6 Policy +# $3 - compact way to represent a list of SRv6 routers with their operations +# (i.e. behaviors) that each of them needs to perform. Every <nodeid:op> +# element constructs a SID that is associated with the behavior <op> on +# the <nodeid> node. The list of such elements forms an SRv6 Policy. +__setup_rt_policy() +{ + local dst="$1" + local encap_rt="$2" + local policy_rts="$3" + local behavior_cfg + local in_nsname + local rt_nsname + local policy='' + local function + local fullsid + local op_type + local node + local n + + in_nsname="$(get_rtname "${encap_rt}")" + + for n in ${policy_rts}; do + node="$(__get_srv6_rtcfg_id "${n}")" + op_type="$(__get_srv6_rtcfg_op "${n}")" + rt_nsname="$(get_rtname "${node}")" + + case "${op_type}" in + "noflv") + policy="${policy}${LOCATOR_SERVICE}:${node}::${END_FUNC}," + function="${END_FUNC}" + behavior_cfg="End" + ;; + + "psp") + policy="${policy}${LOCATOR_SERVICE}:${node}::${END_PSP_FUNC}," + function="${END_PSP_FUNC}" + behavior_cfg="End flavors psp" + ;; + + *) + break + ;; + esac + + fullsid="${LOCATOR_SERVICE}:${node}::${function}" + + # add SRv6 Endpoint behavior to the selected router + if ! ip -netns "${rt_nsname}" -6 route get "${fullsid}" \ + &>/dev/null; then + ip -netns "${rt_nsname}" -6 route \ + add "${fullsid}" \ + table "${LOCALSID_TABLE_ID}" \ + encap seg6local action ${behavior_cfg} \ + dev "${DUMMY_DEVNAME}" + fi + done + + # we need to remove the trailing comma to avoid inserting an empty + # address (::0) in the SID List. + policy="${policy%,}" + + # add SRv6 policy to incoming traffic sent by connected hosts + ip -netns "${in_nsname}" -6 route \ + add "${IPv6_HS_NETWORK}::${dst}" \ + encap seg6 mode inline segs "${policy}" \ + dev "${DUMMY_DEVNAME}" + + ip -netns "${in_nsname}" -6 neigh \ + add proxy "${IPv6_HS_NETWORK}::${dst}" \ + dev "${RT2HS_DEVNAME}" +} + +# see __setup_rt_policy +setup_rt_policy_ipv6() +{ + __setup_rt_policy "$1" "$2" "$3" +} + +setup_hs() +{ + local hs="$1" + local rt="$2" + local hsname + local rtname + + hsname="$(get_hsname "${hs}")" + rtname="$(get_rtname "${rt}")" + + ip netns exec "${hsname}" sysctl -wq net.ipv6.conf.all.accept_dad=0 + ip netns exec "${hsname}" sysctl -wq net.ipv6.conf.default.accept_dad=0 + + ip -netns "${hsname}" link add veth0 type veth \ + peer name "${RT2HS_DEVNAME}" netns "${rtname}" + + ip -netns "${hsname}" addr \ + add "${IPv6_HS_NETWORK}::${hs}/64" dev veth0 nodad + + ip -netns "${hsname}" link set veth0 up + ip -netns "${hsname}" link set lo up + + ip -netns "${rtname}" addr \ + add "${IPv6_HS_NETWORK}::254/64" dev "${RT2HS_DEVNAME}" nodad + + ip -netns "${rtname}" link set "${RT2HS_DEVNAME}" up + + ip netns exec "${rtname}" \ + sysctl -wq net.ipv6.conf."${RT2HS_DEVNAME}".proxy_ndp=1 +} + +setup() +{ + local i + + # create routers + ROUTERS="1 2 3 4"; readonly ROUTERS + for i in ${ROUTERS}; do + create_router "${i}" + done + + # create hosts + HOSTS="1 2"; readonly HOSTS + for i in ${HOSTS}; do + create_host "${i}" + done + + # set up the links for connecting routers + add_link_rt_pairs 1 "2 3 4" + add_link_rt_pairs 2 "3 4" + add_link_rt_pairs 3 "4" + + # set up the basic connectivity of routers and routes required for + # reachability of SIDs. + setup_rt_networking 1 "2 3 4" + setup_rt_networking 2 "1 3 4" + setup_rt_networking 3 "1 2 4" + setup_rt_networking 4 "1 2 3" + + # set up the hosts connected to routers + setup_hs 1 1 + setup_hs 2 2 + + # set up default SRv6 Endpoints (i.e. SRv6 End behavior) + setup_rt_local_sids 1 "2 3 4" + setup_rt_local_sids 2 "1 3 4" + setup_rt_local_sids 3 "1 2 4" + setup_rt_local_sids 4 "1 2 3" + + # set up SRv6 policies + # create a connection between hosts hs-1 and hs-2. + # The path between hs-1 and hs-2 traverses SRv6 aware routers. + # For each direction two path are chosen: + # + # Direction hs-1 -> hs-2 (PSP flavor) + # - rt-1 (SRv6 H.Insert policy) + # - rt-3 (SRv6 End behavior) + # - rt-4 (SRv6 End flavor PSP with SL>1, acting as End behavior) + # - rt-2 (SRv6 End flavor PSP with SL=1) + # + # Direction hs-2 -> hs-1 (PSP flavor) + # - rt-2 (SRv6 H.Insert policy) + # - rt-1 (SRv6 End flavor PSP with SL=1) + setup_rt_policy_ipv6 2 1 "3:noflv 4:psp 2:psp" + setup_rt_policy_ipv6 1 2 "1:psp" + + # testing environment was set up successfully + SETUP_ERR=0 +} + +check_rt_connectivity() +{ + local rtsrc="$1" + local rtdst="$2" + local prefix + local rtsrc_nsname + + rtsrc_nsname="$(get_rtname "${rtsrc}")" + + prefix="$(get_network_prefix "${rtsrc}" "${rtdst}")" + + ip netns exec "${rtsrc_nsname}" ping -c 1 -W "${PING_TIMEOUT_SEC}" \ + "${prefix}::${rtdst}" >/dev/null 2>&1 +} + +check_and_log_rt_connectivity() +{ + local rtsrc="$1" + local rtdst="$2" + + check_rt_connectivity "${rtsrc}" "${rtdst}" + log_test $? 0 "Routers connectivity: rt-${rtsrc} -> rt-${rtdst}" +} + +check_hs_ipv6_connectivity() +{ + local hssrc="$1" + local hsdst="$2" + local hssrc_nsname + + hssrc_nsname="$(get_hsname "${hssrc}")" + + ip netns exec "${hssrc_nsname}" ping -c 1 -W "${PING_TIMEOUT_SEC}" \ + "${IPv6_HS_NETWORK}::${hsdst}" >/dev/null 2>&1 +} + +check_and_log_hs2gw_connectivity() +{ + local hssrc="$1" + + check_hs_ipv6_connectivity "${hssrc}" 254 + log_test $? 0 "IPv6 Hosts connectivity: hs-${hssrc} -> gw" +} + +check_and_log_hs_ipv6_connectivity() +{ + local hssrc="$1" + local hsdst="$2" + + check_hs_ipv6_connectivity "${hssrc}" "${hsdst}" + log_test $? 0 "IPv6 Hosts connectivity: hs-${hssrc} -> hs-${hsdst}" +} + +check_and_log_hs_connectivity() +{ + local hssrc="$1" + local hsdst="$2" + + check_and_log_hs_ipv6_connectivity "${hssrc}" "${hsdst}" +} + +router_tests() +{ + local i + local j + + log_section "IPv6 routers connectivity test" + + for i in ${ROUTERS}; do + for j in ${ROUTERS}; do + if [ "${i}" -eq "${j}" ]; then + continue + fi + + check_and_log_rt_connectivity "${i}" "${j}" + done + done +} + +host2gateway_tests() +{ + local hs + + log_section "IPv6 connectivity test among hosts and gateways" + + for hs in ${HOSTS}; do + check_and_log_hs2gw_connectivity "${hs}" + done +} + +host_srv6_end_flv_psp_tests() +{ + log_section "SRv6 connectivity test hosts (h1 <-> h2, PSP flavor)" + + check_and_log_hs_connectivity 1 2 + check_and_log_hs_connectivity 2 1 +} + +test_iproute2_supp_or_ksft_skip() +{ + local flavor="$1" + + if ! ip route help 2>&1 | grep -qo "${flavor}"; then + echo "SKIP: Missing SRv6 ${flavor} flavor support in iproute2" + exit "${ksft_skip}" + fi +} + +test_kernel_supp_or_ksft_skip() +{ + local flavor="$1" + local test_netns + + test_netns="kflv-$(mktemp -u XXXXXXXX)" + + if ! ip netns add "${test_netns}"; then + echo "SKIP: Cannot set up netns to test kernel support for flavors" + exit "${ksft_skip}" + fi + + if ! ip -netns "${test_netns}" link \ + add "${DUMMY_DEVNAME}" type dummy; then + echo "SKIP: Cannot set up dummy dev to test kernel support for flavors" + + ip netns del "${test_netns}" + exit "${ksft_skip}" + fi + + if ! ip -netns "${test_netns}" link \ + set "${DUMMY_DEVNAME}" up; then + echo "SKIP: Cannot activate dummy dev to test kernel support for flavors" + + ip netns del "${test_netns}" + exit "${ksft_skip}" + fi + + if ! ip -netns "${test_netns}" -6 route \ + add "${IPv6_TESTS_ADDR}" encap seg6local \ + action End flavors "${flavor}" dev "${DUMMY_DEVNAME}"; then + echo "SKIP: ${flavor} flavor not supported in kernel" + + ip netns del "${test_netns}" + exit "${ksft_skip}" + fi + + ip netns del "${test_netns}" +} + +test_dummy_dev_or_ksft_skip() +{ + local test_netns + + test_netns="dummy-$(mktemp -u XXXXXXXX)" + + if ! ip netns add "${test_netns}"; then + echo "SKIP: Cannot set up netns for testing dummy dev support" + exit "${ksft_skip}" + fi + + modprobe dummy &>/dev/null || true + if ! ip -netns "${test_netns}" link \ + add "${DUMMY_DEVNAME}" type dummy; then + echo "SKIP: dummy dev not supported" + + ip netns del "${test_netns}" + exit "${ksft_skip}" + fi + + ip netns del "${test_netns}" +} + +if [ "$(id -u)" -ne 0 ]; then + echo "SKIP: Need root privileges" + exit "${ksft_skip}" +fi + +# required programs to carry out this selftest +test_command_or_ksft_skip ip +test_command_or_ksft_skip ping +test_command_or_ksft_skip sysctl +test_command_or_ksft_skip grep +test_command_or_ksft_skip cut +test_command_or_ksft_skip sed +test_command_or_ksft_skip sort +test_command_or_ksft_skip xargs + +test_dummy_dev_or_ksft_skip +test_iproute2_supp_or_ksft_skip psp +test_kernel_supp_or_ksft_skip psp + +set -e +trap cleanup EXIT + +setup +set +e + +router_tests +host2gateway_tests +host_srv6_end_flv_psp_tests + +print_log_test_results diff --git a/tools/testing/selftests/net/tcp_mmap.c b/tools/testing/selftests/net/tcp_mmap.c index 00f837c9bc6c..46a02bbd31d0 100644 --- a/tools/testing/selftests/net/tcp_mmap.c +++ b/tools/testing/selftests/net/tcp_mmap.c @@ -137,7 +137,8 @@ static void *mmap_large_buffer(size_t need, size_t *allocated) if (buffer == (void *)-1) { sz = need; buffer = mmap(NULL, sz, PROT_READ | PROT_WRITE, - MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + MAP_PRIVATE | MAP_ANONYMOUS | MAP_POPULATE, + -1, 0); if (buffer != (void *)-1) fprintf(stderr, "MAP_HUGETLB attempt failed, look at /sys/kernel/mm/hugepages for optimal performance\n"); } diff --git a/tools/testing/selftests/net/test_vxlan_vnifiltering.sh b/tools/testing/selftests/net/test_vxlan_vnifiltering.sh index 704997ffc244..8c3ac0a72545 100755 --- a/tools/testing/selftests/net/test_vxlan_vnifiltering.sh +++ b/tools/testing/selftests/net/test_vxlan_vnifiltering.sh @@ -293,19 +293,11 @@ setup-vm() { elif [[ -n $vtype && $vtype == "vnifilterg" ]]; then # Add per vni group config with 'bridge vni' api if [ -n "$group" ]; then - if [ "$family" == "v4" ]; then - if [ $mcast -eq 1 ]; then - bridge -netns hv-$hvid vni add dev $vxlandev vni $tid group $group - else - bridge -netns hv-$hvid vni add dev $vxlandev vni $tid remote $group - fi - else - if [ $mcast -eq 1 ]; then - bridge -netns hv-$hvid vni add dev $vxlandev vni $tid group6 $group - else - bridge -netns hv-$hvid vni add dev $vxlandev vni $tid remote6 $group - fi - fi + if [ $mcast -eq 1 ]; then + bridge -netns hv-$hvid vni add dev $vxlandev vni $tid group $group + else + bridge -netns hv-$hvid vni add dev $vxlandev vni $tid remote $group + fi fi fi done diff --git a/tools/testing/selftests/net/toeplitz.c b/tools/testing/selftests/net/toeplitz.c index 90026a27eac0..9ba03164d73a 100644 --- a/tools/testing/selftests/net/toeplitz.c +++ b/tools/testing/selftests/net/toeplitz.c @@ -215,7 +215,7 @@ static char *recv_frame(const struct ring_state *ring, char *frame) } /* A single TPACKET_V3 block can hold multiple frames */ -static void recv_block(struct ring_state *ring) +static bool recv_block(struct ring_state *ring) { struct tpacket_block_desc *block; char *frame; @@ -223,7 +223,7 @@ static void recv_block(struct ring_state *ring) block = (void *)(ring->mmap + ring->idx * ring_block_sz); if (!(block->hdr.bh1.block_status & TP_STATUS_USER)) - return; + return false; frame = (char *)block; frame += block->hdr.bh1.offset_to_first_pkt; @@ -235,6 +235,8 @@ static void recv_block(struct ring_state *ring) block->hdr.bh1.block_status = TP_STATUS_KERNEL; ring->idx = (ring->idx + 1) % ring_block_nr; + + return true; } /* simple test: sleep once unconditionally and then process all rings */ @@ -245,7 +247,7 @@ static void process_rings(void) usleep(1000 * cfg_timeout_msec); for (i = 0; i < num_cpus; i++) - recv_block(&rings[i]); + do {} while (recv_block(&rings[i])); fprintf(stderr, "count: pass=%u nohash=%u fail=%u\n", frames_received - frames_nohash - frames_error, @@ -257,12 +259,12 @@ static char *setup_ring(int fd) struct tpacket_req3 req3 = {0}; void *ring; - req3.tp_retire_blk_tov = cfg_timeout_msec; + req3.tp_retire_blk_tov = cfg_timeout_msec / 8; req3.tp_feature_req_word = TP_FT_REQ_FILL_RXHASH; req3.tp_frame_size = 2048; req3.tp_frame_nr = 1 << 10; - req3.tp_block_nr = 2; + req3.tp_block_nr = 16; req3.tp_block_size = req3.tp_frame_size * req3.tp_frame_nr; req3.tp_block_size /= req3.tp_block_nr; diff --git a/tools/testing/selftests/net/toeplitz.sh b/tools/testing/selftests/net/toeplitz.sh index 0a49907cd4fe..da5bfd834eff 100755 --- a/tools/testing/selftests/net/toeplitz.sh +++ b/tools/testing/selftests/net/toeplitz.sh @@ -32,7 +32,7 @@ DEV="eth0" # This is determined by reading the RSS indirection table using ethtool. get_rss_cfg_num_rxqs() { echo $(ethtool -x "${DEV}" | - egrep [[:space:]]+[0-9]+:[[:space:]]+ | + grep -E [[:space:]]+[0-9]+:[[:space:]]+ | cut -d: -f2- | awk '{$1=$1};1' | tr ' ' '\n' | diff --git a/tools/testing/selftests/net/udpgro_frglist.sh b/tools/testing/selftests/net/udpgro_frglist.sh index c9c4b9d65839..0a6359bed0b9 100755 --- a/tools/testing/selftests/net/udpgro_frglist.sh +++ b/tools/testing/selftests/net/udpgro_frglist.sh @@ -40,8 +40,8 @@ run_one() { ip -n "${PEER_NS}" link set veth1 xdp object ${BPF_FILE} section xdp tc -n "${PEER_NS}" qdisc add dev veth1 clsact - tc -n "${PEER_NS}" filter add dev veth1 ingress prio 4 protocol ipv6 bpf object-file ../bpf/nat6to4.o section schedcls/ingress6/nat_6 direct-action - tc -n "${PEER_NS}" filter add dev veth1 egress prio 4 protocol ip bpf object-file ../bpf/nat6to4.o section schedcls/egress4/snat4 direct-action + tc -n "${PEER_NS}" filter add dev veth1 ingress prio 4 protocol ipv6 bpf object-file nat6to4.o section schedcls/ingress6/nat_6 direct-action + tc -n "${PEER_NS}" filter add dev veth1 egress prio 4 protocol ip bpf object-file nat6to4.o section schedcls/egress4/snat4 direct-action echo ${rx_args} ip netns exec "${PEER_NS}" ./udpgso_bench_rx ${rx_args} -r & @@ -88,8 +88,8 @@ if [ ! -f ${BPF_FILE} ]; then exit -1 fi -if [ ! -f bpf/nat6to4.o ]; then - echo "Missing nat6to4 helper. Build bpfnat6to4.o selftest first" +if [ ! -f nat6to4.o ]; then + echo "Missing nat6to4 helper. Build bpf nat6to4.o selftest first" exit -1 fi diff --git a/tools/testing/selftests/net/udpgso_bench.sh b/tools/testing/selftests/net/udpgso_bench.sh index dc932fd65363..640bc43452fa 100755 --- a/tools/testing/selftests/net/udpgso_bench.sh +++ b/tools/testing/selftests/net/udpgso_bench.sh @@ -7,6 +7,7 @@ readonly GREEN='\033[0;92m' readonly YELLOW='\033[0;33m' readonly RED='\033[0;31m' readonly NC='\033[0m' # No Color +readonly TESTPORT=8000 readonly KSFT_PASS=0 readonly KSFT_FAIL=1 @@ -56,11 +57,26 @@ trap wake_children EXIT run_one() { local -r args=$@ + local nr_socks=0 + local i=0 + local -r timeout=10 + + ./udpgso_bench_rx -p "$TESTPORT" & + ./udpgso_bench_rx -p "$TESTPORT" -t & + + # Wait for the above test program to get ready to receive connections. + while [ "$i" -lt "$timeout" ]; do + nr_socks="$(ss -lnHi | grep -c "\*:${TESTPORT}")" + [ "$nr_socks" -eq 2 ] && break + i=$((i + 1)) + sleep 1 + done + if [ "$nr_socks" -ne 2 ]; then + echo "timed out while waiting for udpgso_bench_rx" + exit 1 + fi - ./udpgso_bench_rx & - ./udpgso_bench_rx -t & - - ./udpgso_bench_tx ${args} + ./udpgso_bench_tx -p "$TESTPORT" ${args} } run_in_netns() { diff --git a/tools/testing/selftests/net/udpgso_bench_rx.c b/tools/testing/selftests/net/udpgso_bench_rx.c index 6a193425c367..f35a924d4a30 100644 --- a/tools/testing/selftests/net/udpgso_bench_rx.c +++ b/tools/testing/selftests/net/udpgso_bench_rx.c @@ -214,11 +214,10 @@ static void do_verify_udp(const char *data, int len) static int recv_msg(int fd, char *buf, int len, int *gso_size) { - char control[CMSG_SPACE(sizeof(uint16_t))] = {0}; + char control[CMSG_SPACE(sizeof(int))] = {0}; struct msghdr msg = {0}; struct iovec iov = {0}; struct cmsghdr *cmsg; - uint16_t *gsosizeptr; int ret; iov.iov_base = buf; @@ -237,8 +236,7 @@ static int recv_msg(int fd, char *buf, int len, int *gso_size) cmsg = CMSG_NXTHDR(&msg, cmsg)) { if (cmsg->cmsg_level == SOL_UDP && cmsg->cmsg_type == UDP_GRO) { - gsosizeptr = (uint16_t *) CMSG_DATA(cmsg); - *gso_size = *gsosizeptr; + *gso_size = *(int *)CMSG_DATA(cmsg); break; } } @@ -250,7 +248,7 @@ static int recv_msg(int fd, char *buf, int len, int *gso_size) static void do_flush_udp(int fd) { static char rbuf[ETH_MAX_MTU]; - int ret, len, gso_size, budget = 256; + int ret, len, gso_size = 0, budget = 256; len = cfg_read_all ? sizeof(rbuf) : 0; while (budget--) { @@ -336,6 +334,8 @@ static void parse_opts(int argc, char **argv) cfg_verify = true; cfg_read_all = true; break; + default: + exit(1); } } diff --git a/tools/testing/selftests/net/udpgso_bench_tx.c b/tools/testing/selftests/net/udpgso_bench_tx.c index f1fdaa270291..477392715a9a 100644 --- a/tools/testing/selftests/net/udpgso_bench_tx.c +++ b/tools/testing/selftests/net/udpgso_bench_tx.c @@ -62,6 +62,7 @@ static int cfg_payload_len = (1472 * 42); static int cfg_port = 8000; static int cfg_runtime_ms = -1; static bool cfg_poll; +static int cfg_poll_loop_timeout_ms = 2000; static bool cfg_segment; static bool cfg_sendmmsg; static bool cfg_tcp; @@ -235,16 +236,17 @@ static void flush_errqueue_recv(int fd) } } -static void flush_errqueue(int fd, const bool do_poll) +static void flush_errqueue(int fd, const bool do_poll, + unsigned long poll_timeout, const bool poll_err) { if (do_poll) { struct pollfd fds = {0}; int ret; fds.fd = fd; - ret = poll(&fds, 1, 500); + ret = poll(&fds, 1, poll_timeout); if (ret == 0) { - if (cfg_verbose) + if ((cfg_verbose) && (poll_err)) fprintf(stderr, "poll timeout\n"); } else if (ret < 0) { error(1, errno, "poll"); @@ -254,6 +256,20 @@ static void flush_errqueue(int fd, const bool do_poll) flush_errqueue_recv(fd); } +static void flush_errqueue_retry(int fd, unsigned long num_sends) +{ + unsigned long tnow, tstop; + bool first_try = true; + + tnow = gettimeofday_ms(); + tstop = tnow + cfg_poll_loop_timeout_ms; + do { + flush_errqueue(fd, true, tstop - tnow, first_try); + first_try = false; + tnow = gettimeofday_ms(); + } while ((stat_zcopies != num_sends) && (tnow < tstop)); +} + static int send_tcp(int fd, char *data) { int ret, done = 0, count = 0; @@ -413,7 +429,8 @@ static int send_udp_segment(int fd, char *data) static void usage(const char *filepath) { - error(1, 0, "Usage: %s [-46acmHPtTuvz] [-C cpu] [-D dst ip] [-l secs] [-M messagenr] [-p port] [-s sendsize] [-S gsosize]", + error(1, 0, "Usage: %s [-46acmHPtTuvz] [-C cpu] [-D dst ip] [-l secs] " + "[-L secs] [-M messagenr] [-p port] [-s sendsize] [-S gsosize]", filepath); } @@ -423,7 +440,7 @@ static void parse_opts(int argc, char **argv) int max_len, hdrlen; int c; - while ((c = getopt(argc, argv, "46acC:D:Hl:mM:p:s:PS:tTuvz")) != -1) { + while ((c = getopt(argc, argv, "46acC:D:Hl:L:mM:p:s:PS:tTuvz")) != -1) { switch (c) { case '4': if (cfg_family != PF_UNSPEC) @@ -452,6 +469,9 @@ static void parse_opts(int argc, char **argv) case 'l': cfg_runtime_ms = strtoul(optarg, NULL, 10) * 1000; break; + case 'L': + cfg_poll_loop_timeout_ms = strtoul(optarg, NULL, 10) * 1000; + break; case 'm': cfg_sendmmsg = true; break; @@ -490,6 +510,8 @@ static void parse_opts(int argc, char **argv) case 'z': cfg_zerocopy = true; break; + default: + exit(1); } } @@ -677,7 +699,7 @@ int main(int argc, char **argv) num_sends += send_udp(fd, buf[i]); num_msgs++; if ((cfg_zerocopy && ((num_msgs & 0xF) == 0)) || cfg_tx_tstamp) - flush_errqueue(fd, cfg_poll); + flush_errqueue(fd, cfg_poll, 500, true); if (cfg_msg_nr && num_msgs >= cfg_msg_nr) break; @@ -696,7 +718,7 @@ int main(int argc, char **argv) } while (!interrupted && (cfg_runtime_ms == -1 || tnow < tstop)); if (cfg_zerocopy || cfg_tx_tstamp) - flush_errqueue(fd, true); + flush_errqueue_retry(fd, num_sends); if (close(fd)) error(1, errno, "close"); diff --git a/tools/testing/selftests/netfilter/conntrack_icmp_related.sh b/tools/testing/selftests/netfilter/conntrack_icmp_related.sh index b48e1833bc89..76645aaf2b58 100755 --- a/tools/testing/selftests/netfilter/conntrack_icmp_related.sh +++ b/tools/testing/selftests/netfilter/conntrack_icmp_related.sh @@ -35,6 +35,8 @@ cleanup() { for i in 1 2;do ip netns del nsrouter$i;done } +trap cleanup EXIT + ipv4() { echo -n 192.168.$1.2 } @@ -146,11 +148,17 @@ ip netns exec nsclient1 nft -f - <<EOF table inet filter { counter unknown { } counter related { } + counter redir4 { } + counter redir6 { } chain input { type filter hook input priority 0; policy accept; - meta l4proto { icmp, icmpv6 } ct state established,untracked accept + icmp type "redirect" ct state "related" counter name "redir4" accept + icmpv6 type "nd-redirect" ct state "related" counter name "redir6" accept + + meta l4proto { icmp, icmpv6 } ct state established,untracked accept meta l4proto { icmp, icmpv6 } ct state "related" counter name "related" accept + counter name "unknown" drop } } @@ -279,5 +287,29 @@ else echo "ERROR: icmp error RELATED state test has failed" fi -cleanup +# add 'bad' route, expect icmp REDIRECT to be generated +ip netns exec nsclient1 ip route add 192.168.1.42 via 192.168.1.1 +ip netns exec nsclient1 ip route add dead:1::42 via dead:1::1 + +ip netns exec "nsclient1" ping -q -c 2 192.168.1.42 > /dev/null + +expect="packets 1 bytes 112" +check_counter nsclient1 "redir4" "$expect" +if [ $? -ne 0 ];then + ret=1 +fi + +ip netns exec "nsclient1" ping -c 1 dead:1::42 > /dev/null +expect="packets 1 bytes 192" +check_counter nsclient1 "redir6" "$expect" +if [ $? -ne 0 ];then + ret=1 +fi + +if [ $ret -eq 0 ];then + echo "PASS: icmp redirects had RELATED state" +else + echo "ERROR: icmp redirect RELATED state test has failed" +fi + exit $ret diff --git a/tools/testing/selftests/netfilter/nft_trans_stress.sh b/tools/testing/selftests/netfilter/nft_trans_stress.sh index a7f62ad4f661..2ffba45a78bf 100755 --- a/tools/testing/selftests/netfilter/nft_trans_stress.sh +++ b/tools/testing/selftests/netfilter/nft_trans_stress.sh @@ -10,12 +10,20 @@ ksft_skip=4 testns=testns-$(mktemp -u "XXXXXXXX") +tmp="" tables="foo bar baz quux" global_ret=0 eret=0 lret=0 +cleanup() { + ip netns pids "$testns" | xargs kill 2>/dev/null + ip netns del "$testns" + + rm -f "$tmp" +} + check_result() { local r=$1 @@ -43,6 +51,7 @@ if [ $? -ne 0 ];then exit $ksft_skip fi +trap cleanup EXIT tmp=$(mktemp) for table in $tables; do @@ -139,11 +148,4 @@ done check_result $lret "add/delete with nftrace enabled" -pkill -9 ping - -wait - -rm -f "$tmp" -ip netns del "$testns" - exit $global_ret diff --git a/tools/testing/selftests/netfilter/settings b/tools/testing/selftests/netfilter/settings new file mode 100644 index 000000000000..6091b45d226b --- /dev/null +++ b/tools/testing/selftests/netfilter/settings @@ -0,0 +1 @@ +timeout=120 diff --git a/tools/testing/selftests/nolibc/Makefile b/tools/testing/selftests/nolibc/Makefile index 69ea659caca9..8fe61d3e3cce 100644 --- a/tools/testing/selftests/nolibc/Makefile +++ b/tools/testing/selftests/nolibc/Makefile @@ -14,21 +14,25 @@ endif # kernel image names by architecture IMAGE_i386 = arch/x86/boot/bzImage +IMAGE_x86_64 = arch/x86/boot/bzImage IMAGE_x86 = arch/x86/boot/bzImage IMAGE_arm64 = arch/arm64/boot/Image IMAGE_arm = arch/arm/boot/zImage IMAGE_mips = vmlinuz IMAGE_riscv = arch/riscv/boot/Image +IMAGE_s390 = arch/s390/boot/bzImage IMAGE = $(IMAGE_$(ARCH)) IMAGE_NAME = $(notdir $(IMAGE)) # default kernel configurations that appear to be usable DEFCONFIG_i386 = defconfig +DEFCONFIG_x86_64 = defconfig DEFCONFIG_x86 = defconfig DEFCONFIG_arm64 = defconfig DEFCONFIG_arm = multi_v7_defconfig DEFCONFIG_mips = malta_defconfig DEFCONFIG_riscv = defconfig +DEFCONFIG_s390 = defconfig DEFCONFIG = $(DEFCONFIG_$(ARCH)) # optional tests to run (default = all) @@ -36,20 +40,24 @@ TEST = # QEMU_ARCH: arch names used by qemu QEMU_ARCH_i386 = i386 +QEMU_ARCH_x86_64 = x86_64 QEMU_ARCH_x86 = x86_64 QEMU_ARCH_arm64 = aarch64 QEMU_ARCH_arm = arm QEMU_ARCH_mips = mipsel # works with malta_defconfig QEMU_ARCH_riscv = riscv64 +QEMU_ARCH_s390 = s390x QEMU_ARCH = $(QEMU_ARCH_$(ARCH)) # QEMU_ARGS : some arch-specific args to pass to qemu QEMU_ARGS_i386 = -M pc -append "console=ttyS0,9600 i8042.noaux panic=-1 $(TEST:%=NOLIBC_TEST=%)" +QEMU_ARGS_x86_64 = -M pc -append "console=ttyS0,9600 i8042.noaux panic=-1 $(TEST:%=NOLIBC_TEST=%)" QEMU_ARGS_x86 = -M pc -append "console=ttyS0,9600 i8042.noaux panic=-1 $(TEST:%=NOLIBC_TEST=%)" QEMU_ARGS_arm64 = -M virt -cpu cortex-a53 -append "panic=-1 $(TEST:%=NOLIBC_TEST=%)" QEMU_ARGS_arm = -M virt -append "panic=-1 $(TEST:%=NOLIBC_TEST=%)" QEMU_ARGS_mips = -M malta -append "panic=-1 $(TEST:%=NOLIBC_TEST=%)" QEMU_ARGS_riscv = -M virt -append "console=ttyS0 panic=-1 $(TEST:%=NOLIBC_TEST=%)" +QEMU_ARGS_s390 = -M s390-ccw-virtio -m 1G -append "console=ttyS0 panic=-1 $(TEST:%=NOLIBC_TEST=%)" QEMU_ARGS = $(QEMU_ARGS_$(ARCH)) # OUTPUT is only set when run from the main makefile, otherwise @@ -62,7 +70,8 @@ else Q=@ endif -CFLAGS ?= -Os -fno-ident -fno-asynchronous-unwind-tables +CFLAGS_s390 = -m64 +CFLAGS ?= -Os -fno-ident -fno-asynchronous-unwind-tables $(CFLAGS_$(ARCH)) LDFLAGS := -s help: @@ -71,6 +80,7 @@ help: @echo " help this help" @echo " sysroot create the nolibc sysroot here (uses \$$ARCH)" @echo " nolibc-test build the executable (uses \$$CC and \$$CROSS_COMPILE)" + @echo " run-user runs the executable under QEMU (uses \$$ARCH, \$$TEST)" @echo " initramfs prepare the initramfs with nolibc-test" @echo " defconfig create a fresh new default config (uses \$$ARCH)" @echo " kernel (re)build the kernel with the initramfs (uses \$$ARCH)" @@ -95,6 +105,7 @@ all: run sysroot: sysroot/$(ARCH)/include sysroot/$(ARCH)/include: + $(Q)rm -rf sysroot/$(ARCH) sysroot/sysroot $(QUIET_MKDIR)mkdir -p sysroot $(Q)$(MAKE) -C ../../../include/nolibc ARCH=$(ARCH) OUTPUT=$(CURDIR)/sysroot/ headers_standalone $(Q)mv sysroot/sysroot sysroot/$(ARCH) @@ -103,6 +114,11 @@ nolibc-test: nolibc-test.c sysroot/$(ARCH)/include $(QUIET_CC)$(CC) $(CFLAGS) $(LDFLAGS) -o $@ \ -nostdlib -static -Isysroot/$(ARCH)/include $< -lgcc +# qemu user-land test +run-user: nolibc-test + $(Q)qemu-$(QEMU_ARCH) ./nolibc-test > "$(CURDIR)/run.out" || : + $(Q)grep -w FAIL "$(CURDIR)/run.out" && echo "See all results in $(CURDIR)/run.out" || echo "$$(grep -c ^[0-9].*OK $(CURDIR)/run.out) test(s) passed." + initramfs: nolibc-test $(QUIET_MKDIR)mkdir -p initramfs $(call QUIET_INSTALL, initramfs/init) @@ -133,3 +149,5 @@ clean: $(Q)rm -rf initramfs $(call QUIET_CLEAN, run.out) $(Q)rm -rf run.out + +.PHONY: sysroot/$(ARCH)/include diff --git a/tools/testing/selftests/nolibc/nolibc-test.c b/tools/testing/selftests/nolibc/nolibc-test.c index 78bced95ac63..c4a0c915139c 100644 --- a/tools/testing/selftests/nolibc/nolibc-test.c +++ b/tools/testing/selftests/nolibc/nolibc-test.c @@ -442,6 +442,35 @@ int test_getdents64(const char *dir) return ret; } +static int test_getpagesize(void) +{ + long x = getpagesize(); + int c; + + if (x < 0) + return x; + +#if defined(__x86_64__) || defined(__i386__) || defined(__i486__) || defined(__i586__) || defined(__i686__) + /* + * x86 family is always 4K page. + */ + c = (x == 4096); +#elif defined(__aarch64__) + /* + * Linux aarch64 supports three values of page size: 4K, 16K, and 64K + * which are selected at kernel compilation time. + */ + c = (x == 4096 || x == (16 * 1024) || x == (64 * 1024)); +#else + /* + * Assuming other architectures must have at least 4K page. + */ + c = (x >= 4096); +#endif + + return !c; +} + /* Run syscall tests between IDs <min> and <max>. * Return 0 on success, non-zero on failure. */ @@ -502,6 +531,7 @@ int run_syscall(int min, int max) CASE_TEST(gettimeofday_bad2); EXPECT_SYSER(1, gettimeofday(NULL, (void *)1), -1, EFAULT); break; CASE_TEST(gettimeofday_bad2); EXPECT_SYSER(1, gettimeofday(NULL, (void *)1), -1, EFAULT); break; #endif + CASE_TEST(getpagesize); EXPECT_SYSZR(1, test_getpagesize()); break; CASE_TEST(ioctl_tiocinq); EXPECT_SYSZR(1, ioctl(0, TIOCINQ, &tmp)); break; CASE_TEST(ioctl_tiocinq); EXPECT_SYSZR(1, ioctl(0, TIOCINQ, &tmp)); break; CASE_TEST(link_root1); EXPECT_SYSER(1, link("/", "/"), -1, EEXIST); break; @@ -565,6 +595,13 @@ int run_stdlib(int min, int max) CASE_TEST(strchr_foobar_z); EXPECT_STRZR(1, strchr("foobar", 'z')); break; CASE_TEST(strrchr_foobar_o); EXPECT_STREQ(1, strrchr("foobar", 'o'), "obar"); break; CASE_TEST(strrchr_foobar_z); EXPECT_STRZR(1, strrchr("foobar", 'z')); break; + CASE_TEST(memcmp_20_20); EXPECT_EQ(1, memcmp("aaa\x20", "aaa\x20", 4), 0); break; + CASE_TEST(memcmp_20_60); EXPECT_LT(1, memcmp("aaa\x20", "aaa\x60", 4), 0); break; + CASE_TEST(memcmp_60_20); EXPECT_GT(1, memcmp("aaa\x60", "aaa\x20", 4), 0); break; + CASE_TEST(memcmp_20_e0); EXPECT_LT(1, memcmp("aaa\x20", "aaa\xe0", 4), 0); break; + CASE_TEST(memcmp_e0_20); EXPECT_GT(1, memcmp("aaa\xe0", "aaa\x20", 4), 0); break; + CASE_TEST(memcmp_80_e0); EXPECT_LT(1, memcmp("aaa\x80", "aaa\xe0", 4), 0); break; + CASE_TEST(memcmp_e0_80); EXPECT_GT(1, memcmp("aaa\xe0", "aaa\x80", 4), 0); break; case __LINE__: return ret; /* must be last */ /* note: do not set any defaults so as to permit holes above */ diff --git a/tools/testing/selftests/perf_events/Makefile b/tools/testing/selftests/perf_events/Makefile index fcafa5f0d34c..db93c4ff081a 100644 --- a/tools/testing/selftests/perf_events/Makefile +++ b/tools/testing/selftests/perf_events/Makefile @@ -1,5 +1,5 @@ # SPDX-License-Identifier: GPL-2.0 -CFLAGS += -Wl,-no-as-needed -Wall -I../../../../usr/include +CFLAGS += -Wl,-no-as-needed -Wall $(KHDR_INCLUDES) LDFLAGS += -lpthread TEST_GEN_PROGS := sigtrap_threads remove_on_exec diff --git a/tools/testing/selftests/pid_namespace/Makefile b/tools/testing/selftests/pid_namespace/Makefile index edafaca1aeb3..9286a1d22cd3 100644 --- a/tools/testing/selftests/pid_namespace/Makefile +++ b/tools/testing/selftests/pid_namespace/Makefile @@ -1,5 +1,5 @@ # SPDX-License-Identifier: GPL-2.0 -CFLAGS += -g -I../../../../usr/include/ +CFLAGS += -g $(KHDR_INCLUDES) TEST_GEN_PROGS = regression_enomem diff --git a/tools/testing/selftests/pidfd/Makefile b/tools/testing/selftests/pidfd/Makefile index 778b6cdc8aed..d731e3e76d5b 100644 --- a/tools/testing/selftests/pidfd/Makefile +++ b/tools/testing/selftests/pidfd/Makefile @@ -1,5 +1,5 @@ # SPDX-License-Identifier: GPL-2.0-only -CFLAGS += -g -I../../../../usr/include/ -pthread -Wall +CFLAGS += -g $(KHDR_INCLUDES) -pthread -Wall TEST_GEN_PROGS := pidfd_test pidfd_fdinfo_test pidfd_open_test \ pidfd_poll_test pidfd_wait pidfd_getfd_test pidfd_setns_test diff --git a/tools/testing/selftests/powerpc/dscr/dscr.h b/tools/testing/selftests/powerpc/dscr/dscr.h index 13e9b9e28e2c..b703714e7d98 100644 --- a/tools/testing/selftests/powerpc/dscr/dscr.h +++ b/tools/testing/selftests/powerpc/dscr/dscr.h @@ -23,6 +23,7 @@ #include <sys/stat.h> #include <sys/wait.h> +#include "reg.h" #include "utils.h" #define THREADS 100 /* Max threads */ @@ -41,31 +42,23 @@ /* Prilvilege state DSCR access */ inline unsigned long get_dscr(void) { - unsigned long ret; - - asm volatile("mfspr %0,%1" : "=r" (ret) : "i" (SPRN_DSCR_PRIV)); - - return ret; + return mfspr(SPRN_DSCR_PRIV); } inline void set_dscr(unsigned long val) { - asm volatile("mtspr %1,%0" : : "r" (val), "i" (SPRN_DSCR_PRIV)); + mtspr(SPRN_DSCR_PRIV, val); } /* Problem state DSCR access */ inline unsigned long get_dscr_usr(void) { - unsigned long ret; - - asm volatile("mfspr %0,%1" : "=r" (ret) : "i" (SPRN_DSCR)); - - return ret; + return mfspr(SPRN_DSCR); } inline void set_dscr_usr(unsigned long val) { - asm volatile("mtspr %1,%0" : : "r" (val), "i" (SPRN_DSCR)); + mtspr(SPRN_DSCR, val); } /* Default DSCR access */ diff --git a/tools/testing/selftests/powerpc/dscr/dscr_sysfs_test.c b/tools/testing/selftests/powerpc/dscr/dscr_sysfs_test.c index fbbdffdb2e5d..f20d1c166d1e 100644 --- a/tools/testing/selftests/powerpc/dscr/dscr_sysfs_test.c +++ b/tools/testing/selftests/powerpc/dscr/dscr_sysfs_test.c @@ -24,6 +24,7 @@ static int check_cpu_dscr_default(char *file, unsigned long val) rc = read(fd, buf, sizeof(buf)); if (rc == -1) { perror("read() failed"); + close(fd); return 1; } close(fd); @@ -65,8 +66,10 @@ static int check_all_cpu_dscr_defaults(unsigned long val) if (access(file, F_OK)) continue; - if (check_cpu_dscr_default(file, val)) + if (check_cpu_dscr_default(file, val)) { + closedir(sysfs); return 1; + } } closedir(sysfs); return 0; diff --git a/tools/testing/selftests/powerpc/include/pkeys.h b/tools/testing/selftests/powerpc/include/pkeys.h index 3312cb1b058d..51729d9a7111 100644 --- a/tools/testing/selftests/powerpc/include/pkeys.h +++ b/tools/testing/selftests/powerpc/include/pkeys.h @@ -24,7 +24,7 @@ #undef PKEY_DISABLE_EXECUTE #define PKEY_DISABLE_EXECUTE 0x4 -/* Older versions of libc do not not define this */ +/* Older versions of libc do not define this */ #ifndef SEGV_PKUERR #define SEGV_PKUERR 4 #endif diff --git a/tools/testing/selftests/powerpc/ptrace/core-pkey.c b/tools/testing/selftests/powerpc/ptrace/core-pkey.c index bbc05ffc5860..4e8d0ce1ff58 100644 --- a/tools/testing/selftests/powerpc/ptrace/core-pkey.c +++ b/tools/testing/selftests/powerpc/ptrace/core-pkey.c @@ -329,7 +329,7 @@ static int parent(struct shared_info *info, pid_t pid) core = mmap(NULL, core_size, PROT_READ, MAP_PRIVATE, fd, 0); if (core == (void *) -1) { - perror("Error mmaping core file"); + perror("Error mmapping core file"); ret = TEST_FAIL; goto out; } @@ -383,7 +383,7 @@ static int setup_core_pattern(char **core_pattern_, bool *changed_) goto out; } - ret = fread(core_pattern, 1, PATH_MAX, f); + ret = fread(core_pattern, 1, PATH_MAX - 1, f); fclose(f); if (!ret) { perror("Error reading core_pattern file"); @@ -391,6 +391,8 @@ static int setup_core_pattern(char **core_pattern_, bool *changed_) goto out; } + core_pattern[ret] = '\0'; + /* Check whether we can predict the name of the core file. */ if (!strcmp(core_pattern, "core") || !strcmp(core_pattern, "core.%p")) *changed_ = false; diff --git a/tools/testing/selftests/powerpc/ptrace/perf-hwbreak.c b/tools/testing/selftests/powerpc/ptrace/perf-hwbreak.c index ecde2c199f3b..f75739bbad28 100644 --- a/tools/testing/selftests/powerpc/ptrace/perf-hwbreak.c +++ b/tools/testing/selftests/powerpc/ptrace/perf-hwbreak.c @@ -17,8 +17,11 @@ * Copyright (C) 2018 Michael Neuling, IBM Corporation. */ +#define _GNU_SOURCE + #include <unistd.h> #include <assert.h> +#include <sched.h> #include <stdio.h> #include <stdlib.h> #include <signal.h> @@ -26,6 +29,7 @@ #include <sys/ioctl.h> #include <sys/wait.h> #include <sys/ptrace.h> +#include <sys/resource.h> #include <sys/sysinfo.h> #include <asm/ptrace.h> #include <elf.h> @@ -140,17 +144,59 @@ static void disable_fds(int *fd, int n) static int perf_systemwide_event_open(int *fd, __u32 type, __u64 addr, __u64 len) { - int i = 0; + int i, ncpus, cpu, ret = 0; + struct rlimit rlim; + cpu_set_t *mask; + size_t size; + + if (getrlimit(RLIMIT_NOFILE, &rlim)) { + perror("getrlimit"); + return -1; + } + rlim.rlim_cur = 65536; + if (setrlimit(RLIMIT_NOFILE, &rlim)) { + perror("setrlimit"); + return -1; + } + + ncpus = get_nprocs_conf(); + size = CPU_ALLOC_SIZE(ncpus); + mask = CPU_ALLOC(ncpus); + if (!mask) { + perror("malloc"); + return -1; + } + + CPU_ZERO_S(size, mask); - /* Assume online processors are 0 to nprocs for simplisity */ - for (i = 0; i < nprocs; i++) { - fd[i] = perf_cpu_event_open(i, type, addr, len); + if (sched_getaffinity(0, size, mask)) { + perror("sched_getaffinity"); + ret = -1; + goto done; + } + + for (i = 0, cpu = 0; i < nprocs && cpu < ncpus; cpu++) { + if (!CPU_ISSET_S(cpu, size, mask)) + continue; + fd[i] = perf_cpu_event_open(cpu, type, addr, len); if (fd[i] < 0) { + perror("perf_systemwide_event_open"); close_fds(fd, i); - return fd[i]; + ret = fd[i]; + goto done; } + i++; } - return 0; + + if (i < nprocs) { + printf("Error: Number of online cpus reduced since start of test: %d < %d\n", i, nprocs); + close_fds(fd, i); + ret = -1; + } + +done: + CPU_FREE(mask); + return ret; } static inline bool breakpoint_test(int len) @@ -543,15 +589,12 @@ static int test_syswide_multi_diff_addr(void) int ret; ret = perf_systemwide_event_open(fd1, HW_BREAKPOINT_RW, (__u64)&a, (__u64)sizeof(a)); - if (ret) { - perror("perf_systemwide_event_open"); + if (ret) exit(EXIT_FAILURE); - } ret = perf_systemwide_event_open(fd2, HW_BREAKPOINT_RW, (__u64)&b, (__u64)sizeof(b)); if (ret) { close_fds(fd1, nprocs); - perror("perf_systemwide_event_open"); exit(EXIT_FAILURE); } @@ -590,15 +633,12 @@ static int test_syswide_multi_same_addr(void) int ret; ret = perf_systemwide_event_open(fd1, HW_BREAKPOINT_RW, (__u64)&a, (__u64)sizeof(a)); - if (ret) { - perror("perf_systemwide_event_open"); + if (ret) exit(EXIT_FAILURE); - } ret = perf_systemwide_event_open(fd2, HW_BREAKPOINT_RW, (__u64)&a, (__u64)sizeof(a)); if (ret) { close_fds(fd1, nprocs); - perror("perf_systemwide_event_open"); exit(EXIT_FAILURE); } @@ -637,15 +677,12 @@ static int test_syswide_multi_diff_addr_ro_wo(void) int ret; ret = perf_systemwide_event_open(fd1, HW_BREAKPOINT_W, (__u64)&a, (__u64)sizeof(a)); - if (ret) { - perror("perf_systemwide_event_open"); + if (ret) exit(EXIT_FAILURE); - } ret = perf_systemwide_event_open(fd2, HW_BREAKPOINT_R, (__u64)&b, (__u64)sizeof(b)); if (ret) { close_fds(fd1, nprocs); - perror("perf_systemwide_event_open"); exit(EXIT_FAILURE); } @@ -684,15 +721,12 @@ static int test_syswide_multi_same_addr_ro_wo(void) int ret; ret = perf_systemwide_event_open(fd1, HW_BREAKPOINT_W, (__u64)&a, (__u64)sizeof(a)); - if (ret) { - perror("perf_systemwide_event_open"); + if (ret) exit(EXIT_FAILURE); - } ret = perf_systemwide_event_open(fd2, HW_BREAKPOINT_R, (__u64)&a, (__u64)sizeof(a)); if (ret) { close_fds(fd1, nprocs); - perror("perf_systemwide_event_open"); exit(EXIT_FAILURE); } diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-hwbreak.c b/tools/testing/selftests/powerpc/ptrace/ptrace-hwbreak.c index a0635a3819aa..1345e9b9af0f 100644 --- a/tools/testing/selftests/powerpc/ptrace/ptrace-hwbreak.c +++ b/tools/testing/selftests/powerpc/ptrace/ptrace-hwbreak.c @@ -23,6 +23,7 @@ #include <sys/syscall.h> #include <linux/limits.h> #include "ptrace.h" +#include "reg.h" #define SPRN_PVR 0x11F #define PVR_8xx 0x00500000 @@ -620,10 +621,7 @@ static int ptrace_hwbreak(void) int main(int argc, char **argv, char **envp) { - int pvr = 0; - asm __volatile__ ("mfspr %0,%1" : "=r"(pvr) : "i"(SPRN_PVR)); - if (pvr == PVR_8xx) - is_8xx = true; + is_8xx = mfspr(SPRN_PVR) == PVR_8xx; return test_harness(ptrace_hwbreak, "ptrace-hwbreak"); } diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace.h b/tools/testing/selftests/powerpc/ptrace/ptrace.h index 4e0233c0f2b3..04788e5fc504 100644 --- a/tools/testing/selftests/powerpc/ptrace/ptrace.h +++ b/tools/testing/selftests/powerpc/ptrace/ptrace.h @@ -745,10 +745,7 @@ int show_tm_spr(pid_t child, struct tm_spr_regs *out) /* Analyse TEXASR after TM failure */ inline unsigned long get_tfiar(void) { - unsigned long ret; - - asm volatile("mfspr %0,%1" : "=r" (ret) : "i" (SPRN_TFIAR)); - return ret; + return mfspr(SPRN_TFIAR); } void analyse_texasr(unsigned long texasr) diff --git a/tools/testing/selftests/powerpc/scripts/hmi.sh b/tools/testing/selftests/powerpc/scripts/hmi.sh index dcdb392e8427..bcc7b6b65009 100755 --- a/tools/testing/selftests/powerpc/scripts/hmi.sh +++ b/tools/testing/selftests/powerpc/scripts/hmi.sh @@ -36,7 +36,7 @@ trap "ppc64_cpu --smt-snooze-delay=100" 0 1 # for each chip+core combination # todo - less fragile parsing -egrep -o 'OCC: Chip [0-9a-f]+ Core [0-9a-f]' < /sys/firmware/opal/msglog | +grep -E -o 'OCC: Chip [0-9a-f]+ Core [0-9a-f]' < /sys/firmware/opal/msglog | while read chipcore; do chip=$(echo "$chipcore"|awk '{print $3}') core=$(echo "$chipcore"|awk '{print $5}') diff --git a/tools/testing/selftests/powerpc/security/flush_utils.c b/tools/testing/selftests/powerpc/security/flush_utils.c index 4d95965cb751..9c5c00e04f63 100644 --- a/tools/testing/selftests/powerpc/security/flush_utils.c +++ b/tools/testing/selftests/powerpc/security/flush_utils.c @@ -14,6 +14,7 @@ #include <string.h> #include <stdio.h> #include <sys/utsname.h> +#include "reg.h" #include "utils.h" #include "flush_utils.h" @@ -79,5 +80,5 @@ void set_dscr(unsigned long val) init = 1; } - asm volatile("mtspr %1,%0" : : "r" (val), "i" (SPRN_DSCR)); + mtspr(SPRN_DSCR, val); } diff --git a/tools/testing/selftests/prctl/disable-tsc-ctxt-sw-stress-test.c b/tools/testing/selftests/prctl/disable-tsc-ctxt-sw-stress-test.c index 62a93cc61b7c..6d1a5ee8eb28 100644 --- a/tools/testing/selftests/prctl/disable-tsc-ctxt-sw-stress-test.c +++ b/tools/testing/selftests/prctl/disable-tsc-ctxt-sw-stress-test.c @@ -79,7 +79,7 @@ int main(void) { int n_tasks = 100, i; - fprintf(stderr, "[No further output means we're allright]\n"); + fprintf(stderr, "[No further output means we're all right]\n"); for (i=0; i<n_tasks; i++) if (fork() == 0) diff --git a/tools/testing/selftests/prctl/disable-tsc-on-off-stress-test.c b/tools/testing/selftests/prctl/disable-tsc-on-off-stress-test.c index 79950f9a26fd..d39511eb9b01 100644 --- a/tools/testing/selftests/prctl/disable-tsc-on-off-stress-test.c +++ b/tools/testing/selftests/prctl/disable-tsc-on-off-stress-test.c @@ -83,7 +83,7 @@ int main(void) { int n_tasks = 100, i; - fprintf(stderr, "[No further output means we're allright]\n"); + fprintf(stderr, "[No further output means we're all right]\n"); for (i=0; i<n_tasks; i++) if (fork() == 0) diff --git a/tools/testing/selftests/proc/proc-empty-vm.c b/tools/testing/selftests/proc/proc-empty-vm.c index d95b1cb43d9d..7588428b8fcd 100644 --- a/tools/testing/selftests/proc/proc-empty-vm.c +++ b/tools/testing/selftests/proc/proc-empty-vm.c @@ -25,6 +25,7 @@ #undef NDEBUG #include <assert.h> #include <errno.h> +#include <stdint.h> #include <stdio.h> #include <stdlib.h> #include <string.h> @@ -41,7 +42,7 @@ * 1: vsyscall VMA is --xp vsyscall=xonly * 2: vsyscall VMA is r-xp vsyscall=emulate */ -static int g_vsyscall; +static volatile int g_vsyscall; static const char *g_proc_pid_maps_vsyscall; static const char *g_proc_pid_smaps_vsyscall; @@ -147,11 +148,12 @@ static void vsyscall(void) g_vsyscall = 0; /* gettimeofday(NULL, NULL); */ + uint64_t rax = 0xffffffffff600000; asm volatile ( - "call %P0" - : - : "i" (0xffffffffff600000), "D" (NULL), "S" (NULL) - : "rax", "rcx", "r11" + "call *%[rax]" + : [rax] "+a" (rax) + : "D" (NULL), "S" (NULL) + : "rcx", "r11" ); g_vsyscall = 1; diff --git a/tools/testing/selftests/proc/proc-pid-vm.c b/tools/testing/selftests/proc/proc-pid-vm.c index 69551bfa215c..cacbd2a4aec9 100644 --- a/tools/testing/selftests/proc/proc-pid-vm.c +++ b/tools/testing/selftests/proc/proc-pid-vm.c @@ -257,11 +257,12 @@ static void vsyscall(void) g_vsyscall = 0; /* gettimeofday(NULL, NULL); */ + uint64_t rax = 0xffffffffff600000; asm volatile ( - "call %P0" - : - : "i" (0xffffffffff600000), "D" (NULL), "S" (NULL) - : "rax", "rcx", "r11" + "call *%[rax]" + : [rax] "+a" (rax) + : "D" (NULL), "S" (NULL) + : "rcx", "r11" ); g_vsyscall = 1; diff --git a/tools/testing/selftests/proc/proc-uptime-002.c b/tools/testing/selftests/proc/proc-uptime-002.c index e7ceabed7f51..7d0aa22bdc12 100644 --- a/tools/testing/selftests/proc/proc-uptime-002.c +++ b/tools/testing/selftests/proc/proc-uptime-002.c @@ -17,6 +17,7 @@ // while shifting across CPUs. #undef NDEBUG #include <assert.h> +#include <errno.h> #include <unistd.h> #include <sys/syscall.h> #include <stdlib.h> @@ -54,7 +55,7 @@ int main(void) len += sizeof(unsigned long); free(m); m = malloc(len); - } while (sys_sched_getaffinity(0, len, m) == -EINVAL); + } while (sys_sched_getaffinity(0, len, m) == -1 && errno == EINVAL); fd = open("/proc/uptime", O_RDONLY); assert(fd >= 0); diff --git a/tools/testing/selftests/ptp/Makefile b/tools/testing/selftests/ptp/Makefile index ef06de0898b7..8f57f88ecadd 100644 --- a/tools/testing/selftests/ptp/Makefile +++ b/tools/testing/selftests/ptp/Makefile @@ -1,10 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 -CFLAGS += -I../../../../usr/include/ -TEST_PROGS := testptp +CFLAGS += $(KHDR_INCLUDES) +TEST_GEN_PROGS := testptp LDLIBS += -lrt -all: $(TEST_PROGS) +TEST_PROGS = phc.sh include ../lib.mk - -clean: - rm -fr $(TEST_PROGS) diff --git a/tools/testing/selftests/ptrace/Makefile b/tools/testing/selftests/ptrace/Makefile index 2f1f532c39db..96ffa94afb91 100644 --- a/tools/testing/selftests/ptrace/Makefile +++ b/tools/testing/selftests/ptrace/Makefile @@ -1,5 +1,5 @@ # SPDX-License-Identifier: GPL-2.0-only -CFLAGS += -std=c99 -pthread -iquote../../../../include/uapi -Wall +CFLAGS += -std=c99 -pthread -Wall $(KHDR_INCLUDES) TEST_GEN_PROGS := get_syscall_info peeksiginfo vmaccess diff --git a/tools/testing/selftests/rcutorture/bin/config2csv.sh b/tools/testing/selftests/rcutorture/bin/config2csv.sh index d5a16631b16e..0cf55f1bf654 100755 --- a/tools/testing/selftests/rcutorture/bin/config2csv.sh +++ b/tools/testing/selftests/rcutorture/bin/config2csv.sh @@ -30,9 +30,8 @@ else fi scenarios="`echo $scenariosarg | sed -e "s/\<CFLIST\>/$defaultconfigs/g"`" -T=/tmp/config2latex.sh.$$ +T=`mktemp -d /tmp/config2latex.sh.XXXXXX` trap 'rm -rf $T' 0 -mkdir $T cat << '---EOF---' >> $T/p.awk END { diff --git a/tools/testing/selftests/rcutorture/bin/config_override.sh b/tools/testing/selftests/rcutorture/bin/config_override.sh index 90016c359e83..b3d2e7efa40c 100755 --- a/tools/testing/selftests/rcutorture/bin/config_override.sh +++ b/tools/testing/selftests/rcutorture/bin/config_override.sh @@ -29,9 +29,8 @@ else exit 1 fi -T=${TMPDIR-/tmp}/config_override.sh.$$ +T="`mktemp -d ${TMPDIR-/tmp}/config_override.sh.XXXXXX`" trap 'rm -rf $T' 0 -mkdir $T sed < $override -e 's/^/grep -v "/' -e 's/=.*$/="/' | awk ' diff --git a/tools/testing/selftests/rcutorture/bin/configcheck.sh b/tools/testing/selftests/rcutorture/bin/configcheck.sh index 31584cee84d7..b92dfeb7fbbf 100755 --- a/tools/testing/selftests/rcutorture/bin/configcheck.sh +++ b/tools/testing/selftests/rcutorture/bin/configcheck.sh @@ -7,14 +7,12 @@ # # Authors: Paul E. McKenney <paulmck@linux.ibm.com> -T=${TMPDIR-/tmp}/abat-chk-config.sh.$$ +T="`mktemp -d ${TMPDIR-/tmp}/configcheck.sh.XXXXXX`" trap 'rm -rf $T' 0 -mkdir $T -cat $1 > $T/.config +sed -e 's/"//g' < $1 > $T/.config -cat $2 | sed -e 's/\(.*\)=n/# \1 is not set/' -e 's/^#CHECK#//' | -grep -v '^CONFIG_INITRAMFS_SOURCE' | +sed -e 's/"//g' -e 's/\(.*\)=n/# \1 is not set/' -e 's/^#CHECK#//' < $2 | awk ' { print "if grep -q \"" $0 "\" < '"$T/.config"'"; diff --git a/tools/testing/selftests/rcutorture/bin/configinit.sh b/tools/testing/selftests/rcutorture/bin/configinit.sh index d6e5ce084b1c..28bdb3ac7ba6 100755 --- a/tools/testing/selftests/rcutorture/bin/configinit.sh +++ b/tools/testing/selftests/rcutorture/bin/configinit.sh @@ -15,9 +15,8 @@ # # Authors: Paul E. McKenney <paulmck@linux.ibm.com> -T=${TMPDIR-/tmp}/configinit.sh.$$ +T="`mktemp -d ${TMPDIR-/tmp}/configinit.sh.XXXXXX`" trap 'rm -rf $T' 0 -mkdir $T # Capture config spec file. diff --git a/tools/testing/selftests/rcutorture/bin/console-badness.sh b/tools/testing/selftests/rcutorture/bin/console-badness.sh index 69f8a5958cef..aad51e7c0183 100755 --- a/tools/testing/selftests/rcutorture/bin/console-badness.sh +++ b/tools/testing/selftests/rcutorture/bin/console-badness.sh @@ -10,7 +10,7 @@ # # Authors: Paul E. McKenney <paulmck@kernel.org> -egrep 'Badness|WARNING:|Warn|BUG|===========|BUG: KCSAN:|Call Trace:|Oops:|detected stalls on CPUs/tasks:|self-detected stall on CPU|Stall ended before state dump start|\?\?\? Writer stall state|rcu_.*kthread starved for|!!!' | +grep -E 'Badness|WARNING:|Warn|BUG|===========|BUG: KCSAN:|Call Trace:|Oops:|detected stalls on CPUs/tasks:|self-detected stall on CPU|Stall ended before state dump start|\?\?\? Writer stall state|rcu_.*kthread starved for|!!!' | grep -v 'ODEBUG: ' | grep -v 'This means that this is a DEBUG kernel and it is' | grep -v 'Warning: unable to open an initial console' | diff --git a/tools/testing/selftests/rcutorture/bin/functions.sh b/tools/testing/selftests/rcutorture/bin/functions.sh index 66d0414d8e4b..b52d5069563c 100644 --- a/tools/testing/selftests/rcutorture/bin/functions.sh +++ b/tools/testing/selftests/rcutorture/bin/functions.sh @@ -159,6 +159,9 @@ identify_boot_image () { qemu-system-aarch64) echo arch/arm64/boot/Image ;; + qemu-system-s390x) + echo arch/s390/boot/bzImage + ;; *) echo vmlinux ;; @@ -184,6 +187,9 @@ identify_qemu () { elif echo $u | grep -q aarch64 then echo qemu-system-aarch64 + elif echo $u | grep -q 'IBM S/390' + then + echo qemu-system-s390x elif uname -a | grep -q ppc64 then echo qemu-system-ppc64 diff --git a/tools/testing/selftests/rcutorture/bin/kvm-again.sh b/tools/testing/selftests/rcutorture/bin/kvm-again.sh index 0941f1ddab65..8a968fbda02c 100755 --- a/tools/testing/selftests/rcutorture/bin/kvm-again.sh +++ b/tools/testing/selftests/rcutorture/bin/kvm-again.sh @@ -12,9 +12,8 @@ scriptname=$0 args="$*" -T=${TMPDIR-/tmp}/kvm-again.sh.$$ +T="`mktemp -d ${TMPDIR-/tmp}/kvm-again.sh.XXXXXX`" trap 'rm -rf $T' 0 -mkdir $T if ! test -d tools/testing/selftests/rcutorture/bin then @@ -51,27 +50,56 @@ RCUTORTURE="`pwd`/tools/testing/selftests/rcutorture"; export RCUTORTURE PATH=${RCUTORTURE}/bin:$PATH; export PATH . functions.sh +bootargs= dryrun= dur= default_link="cp -R" -rundir="`pwd`/tools/testing/selftests/rcutorture/res/`date +%Y.%m.%d-%H.%M.%S-again`" +resdir="`pwd`/tools/testing/selftests/rcutorture/res" +rundir="$resdir/`date +%Y.%m.%d-%H.%M.%S-again`" +got_datestamp= +got_rundir= startdate="`date`" starttime="`get_starttime`" usage () { echo "Usage: $scriptname $oldrun [ arguments ]:" + echo " --bootargs kernel-boot-arguments" + echo " --datestamp string" echo " --dryrun" echo " --duration minutes | <seconds>s | <hours>h | <days>d" echo " --link hard|soft|copy" echo " --remote" echo " --rundir /new/res/path" + echo "Command line: $scriptname $args" exit 1 } while test $# -gt 0 do case "$1" in + --bootargs|--bootarg) + checkarg --bootargs "(list of kernel boot arguments)" "$#" "$2" '.*' '^--' + bootargs="$bootargs $2" + shift + ;; + --datestamp) + checkarg --datestamp "(relative pathname)" "$#" "$2" '^[a-zA-Z0-9._/-]*$' '^--' + if test -n "$got_rundir" || test -n "$got_datestamp" + then + echo Only one of --datestamp or --rundir may be specified + usage + fi + got_datestamp=y + ds=$2 + rundir="$resdir/$ds" + if test -e "$rundir" + then + echo "--datestamp $2: Already exists." + usage + fi + shift + ;; --dryrun) dryrun=1 ;; @@ -113,6 +141,12 @@ do ;; --rundir) checkarg --rundir "(absolute pathname)" "$#" "$2" '^/' '^error' + if test -n "$got_rundir" || test -n "$got_datestamp" + then + echo Only one of --datestamp or --rundir may be specified + usage + fi + got_rundir=y rundir=$2 if test -e "$rundir" then @@ -122,8 +156,11 @@ do shift ;; *) - echo Unknown argument $1 - usage + if test -n "$1" + then + echo Unknown argument $1 + usage + fi ;; esac shift @@ -156,7 +193,7 @@ do qemu_cmd_dir="`dirname "$i"`" kernel_dir="`echo $qemu_cmd_dir | sed -e 's/\.[0-9]\+$//'`" jitter_dir="`dirname "$kernel_dir"`" - kvm-transform.sh "$kernel_dir/bzImage" "$qemu_cmd_dir/console.log" "$jitter_dir" $dur < $T/qemu-cmd > $i + kvm-transform.sh "$kernel_dir/bzImage" "$qemu_cmd_dir/console.log" "$jitter_dir" $dur "$bootargs" < $T/qemu-cmd > $i if test -n "$arg_remote" then echo "# TORTURE_KCONFIG_GDB_ARG=''" >> $i diff --git a/tools/testing/selftests/rcutorture/bin/kvm-assign-cpus.sh b/tools/testing/selftests/rcutorture/bin/kvm-assign-cpus.sh index f99b2c146f83..46b08cd16ba5 100755 --- a/tools/testing/selftests/rcutorture/bin/kvm-assign-cpus.sh +++ b/tools/testing/selftests/rcutorture/bin/kvm-assign-cpus.sh @@ -7,9 +7,8 @@ # # Usage: kvm-assign-cpus.sh /path/to/sysfs -T=/tmp/kvm-assign-cpus.sh.$$ +T="`mktemp -d ${TMPDIR-/tmp}/kvm-assign-cpus.sh.XXXXXX`" trap 'rm -rf $T' 0 2 -mkdir $T sysfsdir=${1-/sys/devices/system/node} if ! cd "$sysfsdir" > $T/msg 2>&1 diff --git a/tools/testing/selftests/rcutorture/bin/kvm-build.sh b/tools/testing/selftests/rcutorture/bin/kvm-build.sh index 5ad973dca820..11f8d232b0ee 100755 --- a/tools/testing/selftests/rcutorture/bin/kvm-build.sh +++ b/tools/testing/selftests/rcutorture/bin/kvm-build.sh @@ -23,9 +23,8 @@ then fi resdir=${2} -T=${TMPDIR-/tmp}/test-linux.sh.$$ +T="`mktemp -d ${TMPDIR-/tmp}/kvm-build.sh.XXXXXX`" trap 'rm -rf $T' 0 -mkdir $T cp ${config_template} $T/config cat << ___EOF___ >> $T/config @@ -45,10 +44,10 @@ fi ncpus="`getconf _NPROCESSORS_ONLN`" make -j$((2 * ncpus)) $TORTURE_KMAKE_ARG > $resdir/Make.out 2>&1 retval=$? -if test $retval -ne 0 || grep "rcu[^/]*": < $resdir/Make.out | egrep -q "Stop|Error|error:|warning:" || egrep -q "Stop|Error|error:" < $resdir/Make.out +if test $retval -ne 0 || grep "rcu[^/]*": < $resdir/Make.out | grep -E -q "Stop|Error|error:|warning:" || grep -E -q "Stop|Error|error:" < $resdir/Make.out then echo Kernel build error - egrep "Stop|Error|error:|warning:" < $resdir/Make.out + grep -E "Stop|Error|error:|warning:" < $resdir/Make.out echo Run aborted. exit 3 fi diff --git a/tools/testing/selftests/rcutorture/bin/kvm-end-run-stats.sh b/tools/testing/selftests/rcutorture/bin/kvm-end-run-stats.sh index ee886b40a5d2..2b56baceb05d 100755 --- a/tools/testing/selftests/rcutorture/bin/kvm-end-run-stats.sh +++ b/tools/testing/selftests/rcutorture/bin/kvm-end-run-stats.sh @@ -18,9 +18,8 @@ then exit 1 fi -T=${TMPDIR-/tmp}/kvm-end-run-stats.sh.$$ +T="`mktemp -d ${TMPDIR-/tmp}/kvm-end-run-stats.sh.XXXXXX`" trap 'rm -rf $T' 0 -mkdir $T RCUTORTURE="`pwd`/tools/testing/selftests/rcutorture"; export RCUTORTURE PATH=${RCUTORTURE}/bin:$PATH; export PATH diff --git a/tools/testing/selftests/rcutorture/bin/kvm-find-errors.sh b/tools/testing/selftests/rcutorture/bin/kvm-find-errors.sh index 88983cba7956..28981007465b 100755 --- a/tools/testing/selftests/rcutorture/bin/kvm-find-errors.sh +++ b/tools/testing/selftests/rcutorture/bin/kvm-find-errors.sh @@ -32,11 +32,11 @@ for i in ${rundir}/*/Make.out do scenariodir="`dirname $i`" scenariobasedir="`echo ${scenariodir} | sed -e 's/\.[0-9]*$//'`" - if egrep -q "error:|warning:|^ld: .*undefined reference to" < $i + if grep -E -q "error:|warning:|^ld: .*undefined reference to" < $i then - egrep "error:|warning:|^ld: .*undefined reference to" < $i > $i.diags + grep -E "error:|warning:|^ld: .*undefined reference to" < $i > $i.diags files="$files $i.diags $i" - elif ! test -f ${scenariobasedir}/vmlinux && ! test -f "${rundir}/re-run" + elif ! test -f ${scenariobasedir}/vmlinux && ! test -f ${scenariobasedir}/vmlinux.xz && ! test -f "${rundir}/re-run" then echo No ${scenariobasedir}/vmlinux file > $i.diags files="$files $i.diags $i" diff --git a/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh b/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh index 0789c5606d2a..1df7e695edf7 100755 --- a/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh +++ b/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh @@ -30,7 +30,7 @@ do resdir=`echo $i | sed -e 's,/$,,' -e 's,/[^/]*$,,'` head -1 $resdir/log fi - TORTURE_SUITE="`cat $i/../torture_suite`" + TORTURE_SUITE="`cat $i/../torture_suite`" ; export TORTURE_SUITE configfile=`echo $i | sed -e 's,^.*/,,'` rm -f $i/console.log.*.diags case "${TORTURE_SUITE}" in diff --git a/tools/testing/selftests/rcutorture/bin/kvm-remote.sh b/tools/testing/selftests/rcutorture/bin/kvm-remote.sh index 9f0a5d5ff2dd..a2328163eba1 100755 --- a/tools/testing/selftests/rcutorture/bin/kvm-remote.sh +++ b/tools/testing/selftests/rcutorture/bin/kvm-remote.sh @@ -34,19 +34,18 @@ fi shift # Pathnames: -# T: /tmp/kvm-remote.sh.$$ -# resdir: /tmp/kvm-remote.sh.$$/res -# rundir: /tmp/kvm-remote.sh.$$/res/$ds ("-remote" suffix) +# T: /tmp/kvm-remote.sh.NNNNNN where "NNNNNN" is set by mktemp +# resdir: /tmp/kvm-remote.sh.NNNNNN/res +# rundir: /tmp/kvm-remote.sh.NNNNNN/res/$ds ("-remote" suffix) # oldrun: `pwd`/tools/testing/.../res/$otherds # # Pathname segments: -# TD: kvm-remote.sh.$$ +# TD: kvm-remote.sh.NNNNNN # ds: yyyy.mm.dd-hh.mm.ss-remote -TD=kvm-remote.sh.$$ -T=${TMPDIR-/tmp}/$TD +T="`mktemp -d ${TMPDIR-/tmp}/kvm-remote.sh.XXXXXX`" trap 'rm -rf $T' 0 -mkdir $T +TD="`basename "$T"`" resdir="$T/res" ds=`date +%Y.%m.%d-%H.%M.%S`-remote diff --git a/tools/testing/selftests/rcutorture/bin/kvm-test-1-run-batch.sh b/tools/testing/selftests/rcutorture/bin/kvm-test-1-run-batch.sh index 1e29d656501b..c3808c490d92 100755 --- a/tools/testing/selftests/rcutorture/bin/kvm-test-1-run-batch.sh +++ b/tools/testing/selftests/rcutorture/bin/kvm-test-1-run-batch.sh @@ -13,9 +13,8 @@ # # Authors: Paul E. McKenney <paulmck@kernel.org> -T=${TMPDIR-/tmp}/kvm-test-1-run-batch.sh.$$ +T="`mktemp -d ${TMPDIR-/tmp}/kvm-test-1-run-batch.sh.XXXXXX`" trap 'rm -rf $T' 0 -mkdir $T echo ---- Running batch $* # Check arguments diff --git a/tools/testing/selftests/rcutorture/bin/kvm-test-1-run-qemu.sh b/tools/testing/selftests/rcutorture/bin/kvm-test-1-run-qemu.sh index 44280582c594..76f24cd5825b 100755 --- a/tools/testing/selftests/rcutorture/bin/kvm-test-1-run-qemu.sh +++ b/tools/testing/selftests/rcutorture/bin/kvm-test-1-run-qemu.sh @@ -17,9 +17,8 @@ # # Authors: Paul E. McKenney <paulmck@kernel.org> -T=${TMPDIR-/tmp}/kvm-test-1-run-qemu.sh.$$ +T="`mktemp -d ${TMPDIR-/tmp}/kvm-test-1-run-qemu.sh.XXXXXX`" trap 'rm -rf $T' 0 -mkdir $T resdir="$1" if ! test -d "$resdir" @@ -109,7 +108,7 @@ do if test $kruntime -lt $seconds then echo Completed in $kruntime vs. $seconds >> $resdir/Warnings 2>&1 - grep "^(qemu) qemu:" $resdir/kvm-test-1-run.sh.out >> $resdir/Warnings 2>&1 + grep "^(qemu) qemu:" $resdir/kvm-test-1-run*.sh.out >> $resdir/Warnings 2>&1 killpid="`sed -n "s/^(qemu) qemu: terminating on signal [0-9]* from pid \([0-9]*\).*$/\1/p" $resdir/Warnings`" if test -n "$killpid" then diff --git a/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh b/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh index f4c8055dbf7a..d2a3710a5f2a 100755 --- a/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh +++ b/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh @@ -25,9 +25,8 @@ # # Authors: Paul E. McKenney <paulmck@linux.ibm.com> -T=${TMPDIR-/tmp}/kvm-test-1-run.sh.$$ +T="`mktemp -d ${TMPDIR-/tmp}/kvm-test-1-run.sh.XXXXXX`" trap 'rm -rf $T' 0 -mkdir $T . functions.sh . $CONFIGFRAG/ver_functions.sh diff --git a/tools/testing/selftests/rcutorture/bin/kvm-transform.sh b/tools/testing/selftests/rcutorture/bin/kvm-transform.sh index d40b4e60a50c..75a2610a27f3 100755 --- a/tools/testing/selftests/rcutorture/bin/kvm-transform.sh +++ b/tools/testing/selftests/rcutorture/bin/kvm-transform.sh @@ -3,10 +3,14 @@ # # Transform a qemu-cmd file to allow reuse. # -# Usage: kvm-transform.sh bzImage console.log jitter_dir [ seconds ] < qemu-cmd-in > qemu-cmd-out +# Usage: kvm-transform.sh bzImage console.log jitter_dir seconds [ bootargs ] < qemu-cmd-in > qemu-cmd-out # # bzImage: Kernel and initrd from the same prior kvm.sh run. # console.log: File into which to place console output. +# jitter_dir: Jitter directory for TORTURE_JITTER_START and +# TORTURE_JITTER_STOP environment variables. +# seconds: Run duaration for *.shutdown_secs module parameter. +# bootargs: New kernel boot parameters. Beware of Robert Tables. # # The original qemu-cmd file is provided on standard input. # The transformed qemu-cmd file is on standard output. @@ -17,6 +21,9 @@ # # Authors: Paul E. McKenney <paulmck@kernel.org> +T=`mktemp -d /tmp/kvm-transform.sh.XXXXXXXXXX` +trap 'rm -rf $T' 0 2 + image="$1" if test -z "$image" then @@ -41,9 +48,17 @@ then echo "Invalid duration, should be numeric in seconds: '$seconds'" exit 1 fi +bootargs="$5" + +# Build awk program. +echo "BEGIN {" > $T/bootarg.awk +echo $bootargs | tr -s ' ' '\012' | + awk -v dq='"' '/./ { print "\tbootarg[" NR "] = " dq $1 dq ";" }' >> $T/bootarg.awk +echo $bootargs | tr -s ' ' '\012' | sed -e 's/=.*$//' | + awk -v dq='"' '/./ { print "\tbootpar[" NR "] = " dq $1 dq ";" }' >> $T/bootarg.awk +cat >> $T/bootarg.awk << '___EOF___' +} -awk -v image="$image" -v consolelog="$consolelog" -v jitter_dir="$jitter_dir" \ - -v seconds="$seconds" ' /^# seconds=/ { if (seconds == "") print $0; @@ -70,13 +85,7 @@ awk -v image="$image" -v consolelog="$consolelog" -v jitter_dir="$jitter_dir" \ { line = ""; for (i = 1; i <= NF; i++) { - if ("" seconds != "" && $i ~ /\.shutdown_secs=[0-9]*$/) { - sub(/[0-9]*$/, seconds, $i); - if (line == "") - line = $i; - else - line = line " " $i; - } else if (line == "") { + if (line == "") { line = $i; } else { line = line " " $i; @@ -87,7 +96,44 @@ awk -v image="$image" -v consolelog="$consolelog" -v jitter_dir="$jitter_dir" \ } else if ($i == "-kernel") { i++; line = line " " image; + } else if ($i == "-append") { + for (i++; i <= NF; i++) { + arg = $i; + lq = ""; + rq = ""; + if ("" seconds != "" && $i ~ /\.shutdown_secs=[0-9]*$/) + sub(/[0-9]*$/, seconds, arg); + if (arg ~ /^"/) { + lq = substr(arg, 1, 1); + arg = substr(arg, 2); + } + if (arg ~ /"$/) { + rq = substr(arg, length($i), 1); + arg = substr(arg, 1, length($i) - 1); + } + par = arg; + gsub(/=.*$/, "", par); + j = 1; + while (bootpar[j] != "") { + if (bootpar[j] == par) { + arg = ""; + break; + } + j++; + } + if (line == "") + line = lq arg; + else + line = line " " lq arg; + } + for (j in bootarg) + line = line " " bootarg[j]; + line = line rq; } } print line; -}' +} +___EOF___ + +awk -v image="$image" -v consolelog="$consolelog" -v jitter_dir="$jitter_dir" \ + -v seconds="$seconds" -f $T/bootarg.awk diff --git a/tools/testing/selftests/rcutorture/bin/kvm.sh b/tools/testing/selftests/rcutorture/bin/kvm.sh index 6c734818a875..62f3b0f56e4d 100755 --- a/tools/testing/selftests/rcutorture/bin/kvm.sh +++ b/tools/testing/selftests/rcutorture/bin/kvm.sh @@ -14,9 +14,8 @@ scriptname=$0 args="$*" -T=${TMPDIR-/tmp}/kvm.sh.$$ +T="`mktemp -d ${TMPDIR-/tmp}/kvm.sh.XXXXXX`" trap 'rm -rf $T' 0 -mkdir $T cd `dirname $scriptname`/../../../../../ @@ -187,7 +186,7 @@ do fi ;; --kconfig|--kconfigs) - checkarg --kconfig "(Kconfig options)" $# "$2" '^CONFIG_[A-Z0-9_]\+=\([ynm]\|[0-9]\+\)\( CONFIG_[A-Z0-9_]\+=\([ynm]\|[0-9]\+\)\)*$' '^error$' + checkarg --kconfig "(Kconfig options)" $# "$2" '^CONFIG_[A-Z0-9_]\+=\([ynm]\|[0-9]\+\|"[^"]*"\)\( CONFIG_[A-Z0-9_]\+=\([ynm]\|[0-9]\+\|"[^"]*"\)\)*$' '^error$' TORTURE_KCONFIG_ARG="`echo "$TORTURE_KCONFIG_ARG $2" | sed -e 's/^ *//' -e 's/ *$//'`" shift ;; @@ -586,7 +585,7 @@ awk < $T/cfgcpu.pack \ echo kvm-end-run-stats.sh "$resdir/$ds" "$starttime" >> $T/script # Extract the tests and their batches from the script. -egrep 'Start batch|Starting build\.' $T/script | grep -v ">>" | +grep -E 'Start batch|Starting build\.' $T/script | grep -v ">>" | sed -e 's/:.*$//' -e 's/^echo //' -e 's/-ovf//' | awk ' /^----Start/ { @@ -623,7 +622,7 @@ then elif test "$dryrun" = sched then # Extract the test run schedule from the script. - egrep 'Start batch|Starting build\.' $T/script | grep -v ">>" | + grep -E 'Start batch|Starting build\.' $T/script | grep -v ">>" | sed -e 's/:.*$//' -e 's/^echo //' nbuilds="`grep 'Starting build\.' $T/script | grep -v ">>" | sed -e 's/:.*$//' -e 's/^echo //' | diff --git a/tools/testing/selftests/rcutorture/bin/mkinitrd.sh b/tools/testing/selftests/rcutorture/bin/mkinitrd.sh index 70d62fd0d31d..71f0dfbb2a6d 100755 --- a/tools/testing/selftests/rcutorture/bin/mkinitrd.sh +++ b/tools/testing/selftests/rcutorture/bin/mkinitrd.sh @@ -64,7 +64,7 @@ ___EOF___ # build using nolibc on supported archs (smaller executable) and fall # back to regular glibc on other ones. if echo -e "#if __x86_64__||__i386__||__i486__||__i586__||__i686__" \ - "||__ARM_EABI__||__aarch64__\nyes\n#endif" \ + "||__ARM_EABI__||__aarch64__||__s390x__\nyes\n#endif" \ | ${CROSS_COMPILE}gcc -E -nostdlib -xc - \ | grep -q '^yes'; then # architecture supported by nolibc diff --git a/tools/testing/selftests/rcutorture/bin/parse-build.sh b/tools/testing/selftests/rcutorture/bin/parse-build.sh index 2dbfca3589b1..5a0b7ffcf047 100755 --- a/tools/testing/selftests/rcutorture/bin/parse-build.sh +++ b/tools/testing/selftests/rcutorture/bin/parse-build.sh @@ -15,9 +15,8 @@ F=$1 title=$2 -T=${TMPDIR-/tmp}/parse-build.sh.$$ +T="`mktemp -d ${TMPDIR-/tmp}/parse-build.sh.XXXXXX`" trap 'rm -rf $T' 0 -mkdir $T . functions.sh diff --git a/tools/testing/selftests/rcutorture/bin/parse-console.sh b/tools/testing/selftests/rcutorture/bin/parse-console.sh index 822eb037a057..9ab0f6bc172c 100755 --- a/tools/testing/selftests/rcutorture/bin/parse-console.sh +++ b/tools/testing/selftests/rcutorture/bin/parse-console.sh @@ -65,7 +65,7 @@ then fi grep --binary-files=text 'torture:.*ver:' $file | - egrep --binary-files=text -v '\(null\)|rtc: 000000000* ' | + grep -E --binary-files=text -v '\(null\)|rtc: 000000000* ' | sed -e 's/^(initramfs)[^]]*] //' -e 's/^\[[^]]*] //' | sed -e 's/^.*ver: //' | awk ' @@ -128,17 +128,17 @@ then then summary="$summary Badness: $n_badness" fi - n_warn=`grep -v 'Warning: unable to open an initial console' $file | grep -v 'Warning: Failed to add ttynull console. No stdin, stdout, and stderr for the init process' | egrep -c 'WARNING:|Warn'` + n_warn=`grep -v 'Warning: unable to open an initial console' $file | grep -v 'Warning: Failed to add ttynull console. No stdin, stdout, and stderr for the init process' | grep -E -c 'WARNING:|Warn'` if test "$n_warn" -ne 0 then summary="$summary Warnings: $n_warn" fi - n_bugs=`egrep -c '\bBUG|Oops:' $file` + n_bugs=`grep -E -c '\bBUG|Oops:' $file` if test "$n_bugs" -ne 0 then summary="$summary Bugs: $n_bugs" fi - n_kcsan=`egrep -c 'BUG: KCSAN: ' $file` + n_kcsan=`grep -E -c 'BUG: KCSAN: ' $file` if test "$n_kcsan" -ne 0 then if test "$n_bugs" = "$n_kcsan" @@ -158,7 +158,7 @@ then then summary="$summary lockdep: $n_badness" fi - n_stalls=`egrep -c 'detected stalls on CPUs/tasks:|self-detected stall on CPU|Stall ended before state dump start|\?\?\? Writer stall state' $file` + n_stalls=`grep -E -c 'detected stalls on CPUs/tasks:|self-detected stall on CPU|Stall ended before state dump start|\?\?\? Writer stall state' $file` if test "$n_stalls" -ne 0 then summary="$summary Stalls: $n_stalls" diff --git a/tools/testing/selftests/rcutorture/bin/torture.sh b/tools/testing/selftests/rcutorture/bin/torture.sh index d477618e7261..130d0de4c3bb 100755 --- a/tools/testing/selftests/rcutorture/bin/torture.sh +++ b/tools/testing/selftests/rcutorture/bin/torture.sh @@ -206,9 +206,8 @@ ds="`date +%Y.%m.%d-%H.%M.%S`-torture" startdate="`date`" starttime="`get_starttime`" -T=/tmp/torture.sh.$$ +T="`mktemp -d ${TMPDIR-/tmp}/torture.sh.XXXXXX`" trap 'rm -rf $T' 0 2 -mkdir $T echo " --- " $scriptname $args | tee -a $T/log echo " --- Results directory: " $ds | tee -a $T/log @@ -278,6 +277,8 @@ function torture_one { then cat $T/$curflavor.out | tee -a $T/log echo retcode=$retcode | tee -a $T/log + else + echo $resdir > $T/last-resdir fi if test "$retcode" == 0 then @@ -303,10 +304,12 @@ function torture_set { shift curflavor=$flavor torture_one "$@" + mv $T/last-resdir $T/last-resdir-nodebug || : if test "$do_kasan" = "yes" then curflavor=${flavor}-kasan torture_one "$@" --kasan + mv $T/last-resdir $T/last-resdir-kasan || : fi if test "$do_kcsan" = "yes" then @@ -317,6 +320,7 @@ function torture_set { cur_kcsan_kmake_args="$kcsan_kmake_args" fi torture_one "$@" --kconfig "CONFIG_DEBUG_LOCK_ALLOC=y CONFIG_PROVE_LOCKING=y" $kcsan_kmake_tag $cur_kcsan_kmake_args --kcsan + mv $T/last-resdir $T/last-resdir-kcsan || : fi } @@ -326,20 +330,34 @@ then echo " --- allmodconfig:" Start `date` | tee -a $T/log amcdir="tools/testing/selftests/rcutorture/res/$ds/allmodconfig" mkdir -p "$amcdir" - echo " --- make clean" > "$amcdir/Make.out" 2>&1 + echo " --- make clean" | tee $amcdir/log > "$amcdir/Make.out" 2>&1 make -j$MAKE_ALLOTED_CPUS clean >> "$amcdir/Make.out" 2>&1 - echo " --- make allmodconfig" >> "$amcdir/Make.out" 2>&1 - cp .config $amcdir - make -j$MAKE_ALLOTED_CPUS allmodconfig >> "$amcdir/Make.out" 2>&1 - echo " --- make " >> "$amcdir/Make.out" 2>&1 - make -j$MAKE_ALLOTED_CPUS >> "$amcdir/Make.out" 2>&1 - retcode="$?" - echo $retcode > "$amcdir/Make.exitcode" - if test "$retcode" == 0 + retcode=$? + buildphase='"make clean"' + if test "$retcode" -eq 0 + then + echo " --- make allmodconfig" | tee -a $amcdir/log >> "$amcdir/Make.out" 2>&1 + cp .config $amcdir + make -j$MAKE_ALLOTED_CPUS allmodconfig >> "$amcdir/Make.out" 2>&1 + retcode=$? + buildphase='"make allmodconfig"' + fi + if test "$retcode" -eq 0 + then + echo " --- make " | tee -a $amcdir/log >> "$amcdir/Make.out" 2>&1 + make -j$MAKE_ALLOTED_CPUS >> "$amcdir/Make.out" 2>&1 + retcode="$?" + echo $retcode > "$amcdir/Make.exitcode" + buildphase='"make"' + fi + if test "$retcode" -eq 0 then echo "allmodconfig($retcode)" $amcdir >> $T/successes + echo Success >> $amcdir/log else echo "allmodconfig($retcode)" $amcdir >> $T/failures + echo " --- allmodconfig Test summary:" >> $amcdir/log + echo " --- Summary: Exit code $retcode from $buildphase, see Make.out" >> $amcdir/log fi fi @@ -379,11 +397,48 @@ then else primlist= fi +firsttime=1 +do_kasan_save="$do_kasan" +do_kcsan_save="$do_kcsan" for prim in $primlist do - torture_bootargs="refscale.scale_type="$prim" refscale.nreaders=$HALF_ALLOTED_CPUS refscale.loops=10000 refscale.holdoff=20 torture.disable_onoff_at_boot" - torture_set "refscale-$prim" tools/testing/selftests/rcutorture/bin/kvm.sh --torture refscale --allcpus --duration 5 --kconfig "CONFIG_TASKS_TRACE_RCU=y CONFIG_NR_CPUS=$HALF_ALLOTED_CPUS" --bootargs "verbose_batched=$VERBOSE_BATCH_CPUS torture.verbose_sleep_frequency=8 torture.verbose_sleep_duration=$VERBOSE_BATCH_CPUS" --trust-make + if test -n "$firsttime" + then + torture_bootargs="refscale.scale_type="$prim" refscale.nreaders=$HALF_ALLOTED_CPUS refscale.loops=10000 refscale.holdoff=20 torture.disable_onoff_at_boot" + torture_set "refscale-$prim" tools/testing/selftests/rcutorture/bin/kvm.sh --torture refscale --allcpus --duration 5 --kconfig "CONFIG_TASKS_TRACE_RCU=y CONFIG_NR_CPUS=$HALF_ALLOTED_CPUS" --bootargs "verbose_batched=$VERBOSE_BATCH_CPUS torture.verbose_sleep_frequency=8 torture.verbose_sleep_duration=$VERBOSE_BATCH_CPUS" --trust-make + mv $T/last-resdir-nodebug $T/first-resdir-nodebug || : + if test -f "$T/last-resdir-kasan" + then + mv $T/last-resdir-kasan $T/first-resdir-kasan || : + fi + if test -f "$T/last-resdir-kcsan" + then + mv $T/last-resdir-kcsan $T/first-resdir-kcsan || : + fi + firsttime= + do_kasan= + do_kcsan= + else + torture_bootargs= + for i in $T/first-resdir-* + do + case "$i" in + *-nodebug) + torture_suffix= + ;; + *-kasan) + torture_suffix="-kasan" + ;; + *-kcsan) + torture_suffix="-kcsan" + ;; + esac + torture_set "refscale-$prim$torture_suffix" tools/testing/selftests/rcutorture/bin/kvm-again.sh "`cat "$i"`" --duration 5 --bootargs "refscale.scale_type=$prim" + done + fi done +do_kasan="$do_kasan_save" +do_kcsan="$do_kcsan_save" if test "$do_rcuscale" = yes then @@ -391,11 +446,48 @@ then else primlist= fi +firsttime=1 +do_kasan_save="$do_kasan" +do_kcsan_save="$do_kcsan" for prim in $primlist do - torture_bootargs="rcuscale.scale_type="$prim" rcuscale.nwriters=$HALF_ALLOTED_CPUS rcuscale.holdoff=20 torture.disable_onoff_at_boot" - torture_set "rcuscale-$prim" tools/testing/selftests/rcutorture/bin/kvm.sh --torture rcuscale --allcpus --duration 5 --kconfig "CONFIG_TASKS_TRACE_RCU=y CONFIG_NR_CPUS=$HALF_ALLOTED_CPUS" --trust-make + if test -n "$firsttime" + then + torture_bootargs="rcuscale.scale_type="$prim" rcuscale.nwriters=$HALF_ALLOTED_CPUS rcuscale.holdoff=20 torture.disable_onoff_at_boot" + torture_set "rcuscale-$prim" tools/testing/selftests/rcutorture/bin/kvm.sh --torture rcuscale --allcpus --duration 5 --kconfig "CONFIG_TASKS_TRACE_RCU=y CONFIG_NR_CPUS=$HALF_ALLOTED_CPUS" --trust-make + mv $T/last-resdir-nodebug $T/first-resdir-nodebug || : + if test -f "$T/last-resdir-kasan" + then + mv $T/last-resdir-kasan $T/first-resdir-kasan || : + fi + if test -f "$T/last-resdir-kcsan" + then + mv $T/last-resdir-kcsan $T/first-resdir-kcsan || : + fi + firsttime= + do_kasan= + do_kcsan= + else + torture_bootargs= + for i in $T/first-resdir-* + do + case "$i" in + *-nodebug) + torture_suffix= + ;; + *-kasan) + torture_suffix="-kasan" + ;; + *-kcsan) + torture_suffix="-kcsan" + ;; + esac + torture_set "rcuscale-$prim$torture_suffix" tools/testing/selftests/rcutorture/bin/kvm-again.sh "`cat "$i"`" --duration 5 --bootargs "rcuscale.scale_type=$prim" + done + fi done +do_kasan="$do_kasan_save" +do_kcsan="$do_kcsan_save" if test "$do_kvfree" = "yes" then @@ -458,7 +550,10 @@ if test -n "$tdir" && test $compress_concurrency -gt 0 then # KASAN vmlinux files can approach 1GB in size, so compress them. echo Looking for K[AC]SAN files to compress: `date` > "$tdir/log-xz" 2>&1 - find "$tdir" -type d -name '*-k[ac]san' -print > $T/xz-todo + find "$tdir" -type d -name '*-k[ac]san' -print > $T/xz-todo-all + find "$tdir" -type f -name 're-run' -print | sed -e 's,/re-run,,' | + grep -e '-k[ac]san$' > $T/xz-todo-copy + sort $T/xz-todo-all $T/xz-todo-copy | uniq -u > $T/xz-todo ncompresses=0 batchno=1 if test -s $T/xz-todo @@ -490,6 +585,24 @@ then echo Waiting for final batch $batchno of $ncompresses compressions `date` | tee -a "$tdir/log-xz" | tee -a $T/log fi wait + if test -s $T/xz-todo-copy + then + # The trick here is that we need corresponding + # vmlinux files from corresponding scenarios. + echo Linking vmlinux.xz files to re-use scenarios `date` | tee -a "$tdir/log-xz" | tee -a $T/log + dirstash="`pwd`" + for i in `cat $T/xz-todo-copy` + do + cd $i + find . -name vmlinux -print > $T/xz-todo-copy-vmlinux + for v in `cat $T/xz-todo-copy-vmlinux` + do + rm -f "$v" + cp -l `cat $i/re-run`/"$i/$v".xz "`dirname "$v"`" + done + cd "$dirstash" + done + fi echo Size after compressing $n2compress files: `du -sh $tdir | awk '{ print $1 }'` `date` 2>&1 | tee -a "$tdir/log-xz" | tee -a $T/log echo Total duration `get_starttime_duration $starttime`. | tee -a $T/log else diff --git a/tools/testing/selftests/rseq/.gitignore b/tools/testing/selftests/rseq/.gitignore index 5910888ebfe1..16496de5f6ce 100644 --- a/tools/testing/selftests/rseq/.gitignore +++ b/tools/testing/selftests/rseq/.gitignore @@ -1,7 +1,11 @@ # SPDX-License-Identifier: GPL-2.0-only basic_percpu_ops_test +basic_percpu_ops_mm_cid_test basic_test basic_rseq_op_test param_test param_test_benchmark param_test_compare_twice +param_test_mm_cid +param_test_mm_cid_benchmark +param_test_mm_cid_compare_twice diff --git a/tools/testing/selftests/rseq/Makefile b/tools/testing/selftests/rseq/Makefile index 215e1067f037..b357ba24af06 100644 --- a/tools/testing/selftests/rseq/Makefile +++ b/tools/testing/selftests/rseq/Makefile @@ -4,7 +4,7 @@ ifneq ($(shell $(CC) --version 2>&1 | head -n 1 | grep clang),) CLANG_FLAGS += -no-integrated-as endif -CFLAGS += -O2 -Wall -g -I./ -I../../../../usr/include/ -L$(OUTPUT) -Wl,-rpath=./ \ +CFLAGS += -O2 -Wall -g -I./ $(KHDR_INCLUDES) -L$(OUTPUT) -Wl,-rpath=./ \ $(CLANG_FLAGS) LDLIBS += -lpthread -ldl @@ -12,8 +12,9 @@ LDLIBS += -lpthread -ldl # still track changes to header files and depend on shared object. OVERRIDE_TARGETS = 1 -TEST_GEN_PROGS = basic_test basic_percpu_ops_test param_test \ - param_test_benchmark param_test_compare_twice +TEST_GEN_PROGS = basic_test basic_percpu_ops_test basic_percpu_ops_mm_cid_test param_test \ + param_test_benchmark param_test_compare_twice param_test_mm_cid \ + param_test_mm_cid_benchmark param_test_mm_cid_compare_twice TEST_GEN_PROGS_EXTENDED = librseq.so @@ -29,6 +30,9 @@ $(OUTPUT)/librseq.so: rseq.c rseq.h rseq-*.h $(OUTPUT)/%: %.c $(TEST_GEN_PROGS_EXTENDED) rseq.h rseq-*.h $(CC) $(CFLAGS) $< $(LDLIBS) -lrseq -o $@ +$(OUTPUT)/basic_percpu_ops_mm_cid_test: basic_percpu_ops_test.c $(TEST_GEN_PROGS_EXTENDED) rseq.h rseq-*.h + $(CC) $(CFLAGS) -DBUILDOPT_RSEQ_PERCPU_MM_CID_ID $< $(LDLIBS) -lrseq -o $@ + $(OUTPUT)/param_test_benchmark: param_test.c $(TEST_GEN_PROGS_EXTENDED) \ rseq.h rseq-*.h $(CC) $(CFLAGS) -DBENCHMARK $< $(LDLIBS) -lrseq -o $@ @@ -36,3 +40,15 @@ $(OUTPUT)/param_test_benchmark: param_test.c $(TEST_GEN_PROGS_EXTENDED) \ $(OUTPUT)/param_test_compare_twice: param_test.c $(TEST_GEN_PROGS_EXTENDED) \ rseq.h rseq-*.h $(CC) $(CFLAGS) -DRSEQ_COMPARE_TWICE $< $(LDLIBS) -lrseq -o $@ + +$(OUTPUT)/param_test_mm_cid: param_test.c $(TEST_GEN_PROGS_EXTENDED) \ + rseq.h rseq-*.h + $(CC) $(CFLAGS) -DBUILDOPT_RSEQ_PERCPU_MM_CID $< $(LDLIBS) -lrseq -o $@ + +$(OUTPUT)/param_test_mm_cid_benchmark: param_test.c $(TEST_GEN_PROGS_EXTENDED) \ + rseq.h rseq-*.h + $(CC) $(CFLAGS) -DBUILDOPT_RSEQ_PERCPU_MM_CID -DBENCHMARK $< $(LDLIBS) -lrseq -o $@ + +$(OUTPUT)/param_test_mm_cid_compare_twice: param_test.c $(TEST_GEN_PROGS_EXTENDED) \ + rseq.h rseq-*.h + $(CC) $(CFLAGS) -DBUILDOPT_RSEQ_PERCPU_MM_CID -DRSEQ_COMPARE_TWICE $< $(LDLIBS) -lrseq -o $@ diff --git a/tools/testing/selftests/rseq/basic_percpu_ops_test.c b/tools/testing/selftests/rseq/basic_percpu_ops_test.c index 517756afc2a4..887542961968 100644 --- a/tools/testing/selftests/rseq/basic_percpu_ops_test.c +++ b/tools/testing/selftests/rseq/basic_percpu_ops_test.c @@ -12,6 +12,32 @@ #include "../kselftest.h" #include "rseq.h" +#ifdef BUILDOPT_RSEQ_PERCPU_MM_CID +# define RSEQ_PERCPU RSEQ_PERCPU_MM_CID +static +int get_current_cpu_id(void) +{ + return rseq_current_mm_cid(); +} +static +bool rseq_validate_cpu_id(void) +{ + return rseq_mm_cid_available(); +} +#else +# define RSEQ_PERCPU RSEQ_PERCPU_CPU_ID +static +int get_current_cpu_id(void) +{ + return rseq_cpu_start(); +} +static +bool rseq_validate_cpu_id(void) +{ + return rseq_current_cpu_raw() >= 0; +} +#endif + struct percpu_lock_entry { intptr_t v; } __attribute__((aligned(128))); @@ -51,9 +77,9 @@ int rseq_this_cpu_lock(struct percpu_lock *lock) for (;;) { int ret; - cpu = rseq_cpu_start(); - ret = rseq_cmpeqv_storev(&lock->c[cpu].v, - 0, 1, cpu); + cpu = get_current_cpu_id(); + ret = rseq_cmpeqv_storev(RSEQ_MO_RELAXED, RSEQ_PERCPU, + &lock->c[cpu].v, 0, 1, cpu); if (rseq_likely(!ret)) break; /* Retry if comparison fails or rseq aborts. */ @@ -141,13 +167,14 @@ void this_cpu_list_push(struct percpu_list *list, intptr_t *targetptr, newval, expect; int ret; - cpu = rseq_cpu_start(); + cpu = get_current_cpu_id(); /* Load list->c[cpu].head with single-copy atomicity. */ expect = (intptr_t)RSEQ_READ_ONCE(list->c[cpu].head); newval = (intptr_t)node; targetptr = (intptr_t *)&list->c[cpu].head; node->next = (struct percpu_list_node *)expect; - ret = rseq_cmpeqv_storev(targetptr, expect, newval, cpu); + ret = rseq_cmpeqv_storev(RSEQ_MO_RELAXED, RSEQ_PERCPU, + targetptr, expect, newval, cpu); if (rseq_likely(!ret)) break; /* Retry if comparison fails or rseq aborts. */ @@ -170,12 +197,13 @@ struct percpu_list_node *this_cpu_list_pop(struct percpu_list *list, long offset; int ret, cpu; - cpu = rseq_cpu_start(); + cpu = get_current_cpu_id(); targetptr = (intptr_t *)&list->c[cpu].head; expectnot = (intptr_t)NULL; offset = offsetof(struct percpu_list_node, next); load = (intptr_t *)&head; - ret = rseq_cmpnev_storeoffp_load(targetptr, expectnot, + ret = rseq_cmpnev_storeoffp_load(RSEQ_MO_RELAXED, RSEQ_PERCPU, + targetptr, expectnot, offset, load, cpu); if (rseq_likely(!ret)) { if (_cpu) @@ -295,6 +323,10 @@ int main(int argc, char **argv) errno, strerror(errno)); goto error; } + if (!rseq_validate_cpu_id()) { + fprintf(stderr, "Error: cpu id getter unavailable\n"); + goto error; + } printf("spinlock\n"); test_percpu_spinlock(); printf("percpu_list\n"); diff --git a/tools/testing/selftests/rseq/basic_test.c b/tools/testing/selftests/rseq/basic_test.c index d8efbfb89193..295eea16466f 100644 --- a/tools/testing/selftests/rseq/basic_test.c +++ b/tools/testing/selftests/rseq/basic_test.c @@ -22,6 +22,8 @@ void test_cpu_pointer(void) CPU_ZERO(&test_affinity); for (i = 0; i < CPU_SETSIZE; i++) { if (CPU_ISSET(i, &affinity)) { + int node; + CPU_SET(i, &test_affinity); sched_setaffinity(0, sizeof(test_affinity), &test_affinity); @@ -29,6 +31,8 @@ void test_cpu_pointer(void) assert(rseq_current_cpu() == i); assert(rseq_current_cpu_raw() == i); assert(rseq_cpu_start() == i); + node = rseq_fallback_current_node(); + assert(rseq_current_node_id() == node); CPU_CLR(i, &test_affinity); } } diff --git a/tools/testing/selftests/rseq/compiler.h b/tools/testing/selftests/rseq/compiler.h index 876eb6a7f75b..f47092bddeba 100644 --- a/tools/testing/selftests/rseq/compiler.h +++ b/tools/testing/selftests/rseq/compiler.h @@ -27,4 +27,10 @@ */ #define rseq_after_asm_goto() asm volatile ("" : : : "memory") +/* Combine two tokens. */ +#define RSEQ__COMBINE_TOKENS(_tokena, _tokenb) \ + _tokena##_tokenb +#define RSEQ_COMBINE_TOKENS(_tokena, _tokenb) \ + RSEQ__COMBINE_TOKENS(_tokena, _tokenb) + #endif /* RSEQ_COMPILER_H_ */ diff --git a/tools/testing/selftests/rseq/param_test.c b/tools/testing/selftests/rseq/param_test.c index ef29bc16f358..bf951a490bb4 100644 --- a/tools/testing/selftests/rseq/param_test.c +++ b/tools/testing/selftests/rseq/param_test.c @@ -16,6 +16,7 @@ #include <signal.h> #include <errno.h> #include <stddef.h> +#include <stdbool.h> static inline pid_t rseq_gettid(void) { @@ -36,13 +37,9 @@ static int opt_modulo, verbose; static int opt_yield, opt_signal, opt_sleep, opt_disable_rseq, opt_threads = 200, - opt_disable_mod = 0, opt_test = 's', opt_mb = 0; + opt_disable_mod = 0, opt_test = 's'; -#ifndef RSEQ_SKIP_FASTPATH static long long opt_reps = 5000; -#else -static long long opt_reps = 100; -#endif static __thread __attribute__((tls_model("initial-exec"))) unsigned int signals_delivered; @@ -268,6 +265,63 @@ unsigned int yield_mod_cnt, nr_abort; #include "rseq.h" +static enum rseq_mo opt_mo = RSEQ_MO_RELAXED; + +#ifdef RSEQ_ARCH_HAS_OFFSET_DEREF_ADDV +#define TEST_MEMBARRIER + +static int sys_membarrier(int cmd, int flags, int cpu_id) +{ + return syscall(__NR_membarrier, cmd, flags, cpu_id); +} +#endif + +#ifdef BUILDOPT_RSEQ_PERCPU_MM_CID +# define RSEQ_PERCPU RSEQ_PERCPU_MM_CID +static +int get_current_cpu_id(void) +{ + return rseq_current_mm_cid(); +} +static +bool rseq_validate_cpu_id(void) +{ + return rseq_mm_cid_available(); +} +# ifdef TEST_MEMBARRIER +/* + * Membarrier does not currently support targeting a mm_cid, so + * issue the barrier on all cpus. + */ +static +int rseq_membarrier_expedited(int cpu) +{ + return sys_membarrier(MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ, + 0, 0); +} +# endif /* TEST_MEMBARRIER */ +#else +# define RSEQ_PERCPU RSEQ_PERCPU_CPU_ID +static +int get_current_cpu_id(void) +{ + return rseq_cpu_start(); +} +static +bool rseq_validate_cpu_id(void) +{ + return rseq_current_cpu_raw() >= 0; +} +# ifdef TEST_MEMBARRIER +static +int rseq_membarrier_expedited(int cpu) +{ + return sys_membarrier(MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ, + MEMBARRIER_CMD_FLAG_CPU, cpu); +} +# endif /* TEST_MEMBARRIER */ +#endif + struct percpu_lock_entry { intptr_t v; } __attribute__((aligned(128))); @@ -355,8 +409,14 @@ static int rseq_this_cpu_lock(struct percpu_lock *lock) for (;;) { int ret; - cpu = rseq_cpu_start(); - ret = rseq_cmpeqv_storev(&lock->c[cpu].v, + cpu = get_current_cpu_id(); + if (cpu < 0) { + fprintf(stderr, "pid: %d: tid: %d, cpu: %d: cid: %d\n", + getpid(), (int) rseq_gettid(), rseq_current_cpu_raw(), cpu); + abort(); + } + ret = rseq_cmpeqv_storev(RSEQ_MO_RELAXED, RSEQ_PERCPU, + &lock->c[cpu].v, 0, 1, cpu); if (rseq_likely(!ret)) break; @@ -473,8 +533,9 @@ void *test_percpu_inc_thread(void *arg) do { int cpu; - cpu = rseq_cpu_start(); - ret = rseq_addv(&data->c[cpu].count, 1, cpu); + cpu = get_current_cpu_id(); + ret = rseq_addv(RSEQ_MO_RELAXED, RSEQ_PERCPU, + &data->c[cpu].count, 1, cpu); } while (rseq_unlikely(ret)); #ifndef BENCHMARK if (i != 0 && !(i % (reps / 10))) @@ -543,13 +604,14 @@ void this_cpu_list_push(struct percpu_list *list, intptr_t *targetptr, newval, expect; int ret; - cpu = rseq_cpu_start(); + cpu = get_current_cpu_id(); /* Load list->c[cpu].head with single-copy atomicity. */ expect = (intptr_t)RSEQ_READ_ONCE(list->c[cpu].head); newval = (intptr_t)node; targetptr = (intptr_t *)&list->c[cpu].head; node->next = (struct percpu_list_node *)expect; - ret = rseq_cmpeqv_storev(targetptr, expect, newval, cpu); + ret = rseq_cmpeqv_storev(RSEQ_MO_RELAXED, RSEQ_PERCPU, + targetptr, expect, newval, cpu); if (rseq_likely(!ret)) break; /* Retry if comparison fails or rseq aborts. */ @@ -575,13 +637,14 @@ struct percpu_list_node *this_cpu_list_pop(struct percpu_list *list, long offset; int ret; - cpu = rseq_cpu_start(); + cpu = get_current_cpu_id(); targetptr = (intptr_t *)&list->c[cpu].head; expectnot = (intptr_t)NULL; offset = offsetof(struct percpu_list_node, next); load = (intptr_t *)&head; - ret = rseq_cmpnev_storeoffp_load(targetptr, expectnot, - offset, load, cpu); + ret = rseq_cmpnev_storeoffp_load(RSEQ_MO_RELAXED, RSEQ_PERCPU, + targetptr, expectnot, + offset, load, cpu); if (rseq_likely(!ret)) { node = head; break; @@ -719,7 +782,7 @@ bool this_cpu_buffer_push(struct percpu_buffer *buffer, intptr_t offset; int ret; - cpu = rseq_cpu_start(); + cpu = get_current_cpu_id(); offset = RSEQ_READ_ONCE(buffer->c[cpu].offset); if (offset == buffer->c[cpu].buflen) break; @@ -727,14 +790,9 @@ bool this_cpu_buffer_push(struct percpu_buffer *buffer, targetptr_spec = (intptr_t *)&buffer->c[cpu].array[offset]; newval_final = offset + 1; targetptr_final = &buffer->c[cpu].offset; - if (opt_mb) - ret = rseq_cmpeqv_trystorev_storev_release( - targetptr_final, offset, targetptr_spec, - newval_spec, newval_final, cpu); - else - ret = rseq_cmpeqv_trystorev_storev(targetptr_final, - offset, targetptr_spec, newval_spec, - newval_final, cpu); + ret = rseq_cmpeqv_trystorev_storev(opt_mo, RSEQ_PERCPU, + targetptr_final, offset, targetptr_spec, + newval_spec, newval_final, cpu); if (rseq_likely(!ret)) { result = true; break; @@ -757,7 +815,7 @@ struct percpu_buffer_node *this_cpu_buffer_pop(struct percpu_buffer *buffer, intptr_t offset; int ret; - cpu = rseq_cpu_start(); + cpu = get_current_cpu_id(); /* Load offset with single-copy atomicity. */ offset = RSEQ_READ_ONCE(buffer->c[cpu].offset); if (offset == 0) { @@ -767,7 +825,8 @@ struct percpu_buffer_node *this_cpu_buffer_pop(struct percpu_buffer *buffer, head = RSEQ_READ_ONCE(buffer->c[cpu].array[offset - 1]); newval = offset - 1; targetptr = (intptr_t *)&buffer->c[cpu].offset; - ret = rseq_cmpeqv_cmpeqv_storev(targetptr, offset, + ret = rseq_cmpeqv_cmpeqv_storev(RSEQ_MO_RELAXED, RSEQ_PERCPU, + targetptr, offset, (intptr_t *)&buffer->c[cpu].array[offset - 1], (intptr_t)head, newval, cpu); if (rseq_likely(!ret)) @@ -924,7 +983,7 @@ bool this_cpu_memcpy_buffer_push(struct percpu_memcpy_buffer *buffer, size_t copylen; int ret; - cpu = rseq_cpu_start(); + cpu = get_current_cpu_id(); /* Load offset with single-copy atomicity. */ offset = RSEQ_READ_ONCE(buffer->c[cpu].offset); if (offset == buffer->c[cpu].buflen) @@ -935,15 +994,11 @@ bool this_cpu_memcpy_buffer_push(struct percpu_memcpy_buffer *buffer, copylen = sizeof(item); newval_final = offset + 1; targetptr_final = &buffer->c[cpu].offset; - if (opt_mb) - ret = rseq_cmpeqv_trymemcpy_storev_release( - targetptr_final, offset, - destptr, srcptr, copylen, - newval_final, cpu); - else - ret = rseq_cmpeqv_trymemcpy_storev(targetptr_final, - offset, destptr, srcptr, copylen, - newval_final, cpu); + ret = rseq_cmpeqv_trymemcpy_storev( + opt_mo, RSEQ_PERCPU, + targetptr_final, offset, + destptr, srcptr, copylen, + newval_final, cpu); if (rseq_likely(!ret)) { result = true; break; @@ -968,7 +1023,7 @@ bool this_cpu_memcpy_buffer_pop(struct percpu_memcpy_buffer *buffer, size_t copylen; int ret; - cpu = rseq_cpu_start(); + cpu = get_current_cpu_id(); /* Load offset with single-copy atomicity. */ offset = RSEQ_READ_ONCE(buffer->c[cpu].offset); if (offset == 0) @@ -979,8 +1034,8 @@ bool this_cpu_memcpy_buffer_pop(struct percpu_memcpy_buffer *buffer, copylen = sizeof(*item); newval_final = offset - 1; targetptr_final = &buffer->c[cpu].offset; - ret = rseq_cmpeqv_trymemcpy_storev(targetptr_final, - offset, destptr, srcptr, copylen, + ret = rseq_cmpeqv_trymemcpy_storev(RSEQ_MO_RELAXED, RSEQ_PERCPU, + targetptr_final, offset, destptr, srcptr, copylen, newval_final, cpu); if (rseq_likely(!ret)) { result = true; @@ -1155,7 +1210,7 @@ static int set_signal_handler(void) } /* Test MEMBARRIER_CMD_PRIVATE_RESTART_RSEQ_ON_CPU membarrier command. */ -#ifdef RSEQ_ARCH_HAS_OFFSET_DEREF_ADDV +#ifdef TEST_MEMBARRIER struct test_membarrier_thread_args { int stop; intptr_t percpu_list_ptr; @@ -1182,9 +1237,10 @@ void *test_membarrier_worker_thread(void *arg) int ret; do { - int cpu = rseq_cpu_start(); + int cpu = get_current_cpu_id(); - ret = rseq_offset_deref_addv(&args->percpu_list_ptr, + ret = rseq_offset_deref_addv(RSEQ_MO_RELAXED, RSEQ_PERCPU, + &args->percpu_list_ptr, sizeof(struct percpu_list_entry) * cpu, 1, cpu); } while (rseq_unlikely(ret)); } @@ -1221,11 +1277,6 @@ void test_membarrier_free_percpu_list(struct percpu_list *list) free(list->c[i].head); } -static int sys_membarrier(int cmd, int flags, int cpu_id) -{ - return syscall(__NR_membarrier, cmd, flags, cpu_id); -} - /* * The manager thread swaps per-cpu lists that worker threads see, * and validates that there are no unexpected modifications. @@ -1264,8 +1315,7 @@ void *test_membarrier_manager_thread(void *arg) /* Make list_b "active". */ atomic_store(&args->percpu_list_ptr, (intptr_t)&list_b); - if (sys_membarrier(MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ, - MEMBARRIER_CMD_FLAG_CPU, cpu_a) && + if (rseq_membarrier_expedited(cpu_a) && errno != ENXIO /* missing CPU */) { perror("sys_membarrier"); abort(); @@ -1288,8 +1338,7 @@ void *test_membarrier_manager_thread(void *arg) /* Make list_a "active". */ atomic_store(&args->percpu_list_ptr, (intptr_t)&list_a); - if (sys_membarrier(MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ, - MEMBARRIER_CMD_FLAG_CPU, cpu_b) && + if (rseq_membarrier_expedited(cpu_b) && errno != ENXIO /* missing CPU*/) { perror("sys_membarrier"); abort(); @@ -1360,7 +1409,7 @@ void test_membarrier(void) abort(); } } -#else /* RSEQ_ARCH_HAS_OFFSET_DEREF_ADDV */ +#else /* TEST_MEMBARRIER */ void test_membarrier(void) { fprintf(stderr, "rseq_offset_deref_addv is not implemented on this architecture. " @@ -1517,7 +1566,7 @@ int main(int argc, char **argv) verbose = 1; break; case 'M': - opt_mb = 1; + opt_mo = RSEQ_MO_RELEASE; break; default: show_usage(argc, argv); @@ -1537,6 +1586,10 @@ int main(int argc, char **argv) if (!opt_disable_rseq && rseq_register_current_thread()) goto error; + if (!opt_disable_rseq && !rseq_validate_cpu_id()) { + fprintf(stderr, "Error: cpu id getter unavailable\n"); + goto error; + } switch (opt_test) { case 's': printf_verbose("spinlock\n"); diff --git a/tools/testing/selftests/rseq/rseq-abi.h b/tools/testing/selftests/rseq/rseq-abi.h index a8c44d9af71f..fb4ec8a75dd4 100644 --- a/tools/testing/selftests/rseq/rseq-abi.h +++ b/tools/testing/selftests/rseq/rseq-abi.h @@ -146,6 +146,28 @@ struct rseq_abi { * this thread. */ __u32 flags; + + /* + * Restartable sequences node_id field. Updated by the kernel. Read by + * user-space with single-copy atomicity semantics. This field should + * only be read by the thread which registered this data structure. + * Aligned on 32-bit. Contains the current NUMA node ID. + */ + __u32 node_id; + + /* + * Restartable sequences mm_cid field. Updated by the kernel. Read by + * user-space with single-copy atomicity semantics. This field should + * only be read by the thread which registered this data structure. + * Aligned on 32-bit. Contains the current thread's concurrency ID + * (allocated uniquely within a memory map). + */ + __u32 mm_cid; + + /* + * Flexible array member at end of structure, after last feature field. + */ + char end[]; } __attribute__((aligned(4 * sizeof(__u64)))); #endif /* _RSEQ_ABI_H */ diff --git a/tools/testing/selftests/rseq/rseq-arm-bits.h b/tools/testing/selftests/rseq/rseq-arm-bits.h new file mode 100644 index 000000000000..4f03cb395462 --- /dev/null +++ b/tools/testing/selftests/rseq/rseq-arm-bits.h @@ -0,0 +1,505 @@ +/* SPDX-License-Identifier: LGPL-2.1 OR MIT */ +/* + * rseq-arm-bits.h + * + * (C) Copyright 2016-2022 - Mathieu Desnoyers <mathieu.desnoyers@efficios.com> + */ + +#include "rseq-bits-template.h" + +#if defined(RSEQ_TEMPLATE_MO_RELAXED) && \ + (defined(RSEQ_TEMPLATE_CPU_ID) || defined(RSEQ_TEMPLATE_MM_CID)) + +static inline __attribute__((always_inline)) +int RSEQ_TEMPLATE_IDENTIFIER(rseq_cmpeqv_storev)(intptr_t *v, intptr_t expect, intptr_t newv, int cpu) +{ + RSEQ_INJECT_C(9) + + __asm__ __volatile__ goto ( + RSEQ_ASM_DEFINE_TABLE(9, 1f, 2f, 4f) /* start, commit, abort */ + RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail]) +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1]) + RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2]) +#endif + /* Start rseq by storing table entry pointer into rseq_cs. */ + RSEQ_ASM_STORE_RSEQ_CS(1, 3f, rseq_cs) + RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f) + RSEQ_INJECT_ASM(3) + "ldr r0, %[v]\n\t" + "cmp %[expect], r0\n\t" + "bne %l[cmpfail]\n\t" + RSEQ_INJECT_ASM(4) +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1]) + "ldr r0, %[v]\n\t" + "cmp %[expect], r0\n\t" + "bne %l[error2]\n\t" +#endif + /* final store */ + "str %[newv], %[v]\n\t" + "2:\n\t" + RSEQ_INJECT_ASM(5) + "b 5f\n\t" + RSEQ_ASM_DEFINE_ABORT(3, 4, "", abort, 1b, 2b, 4f) + "5:\n\t" + : /* gcc asm goto does not allow outputs */ + : [cpu_id] "r" (cpu), + [current_cpu_id] "m" (rseq_get_abi()->RSEQ_TEMPLATE_CPU_ID_FIELD), + [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), + [v] "m" (*v), + [expect] "r" (expect), + [newv] "r" (newv) + RSEQ_INJECT_INPUT + : "r0", "memory", "cc" + RSEQ_INJECT_CLOBBER + : abort, cmpfail +#ifdef RSEQ_COMPARE_TWICE + , error1, error2 +#endif + ); + rseq_after_asm_goto(); + return 0; +abort: + rseq_after_asm_goto(); + RSEQ_INJECT_FAILED + return -1; +cmpfail: + rseq_after_asm_goto(); + return 1; +#ifdef RSEQ_COMPARE_TWICE +error1: + rseq_after_asm_goto(); + rseq_bug("cpu_id comparison failed"); +error2: + rseq_after_asm_goto(); + rseq_bug("expected value comparison failed"); +#endif +} + +static inline __attribute__((always_inline)) +int RSEQ_TEMPLATE_IDENTIFIER(rseq_cmpnev_storeoffp_load)(intptr_t *v, intptr_t expectnot, + long voffp, intptr_t *load, int cpu) +{ + RSEQ_INJECT_C(9) + + __asm__ __volatile__ goto ( + RSEQ_ASM_DEFINE_TABLE(9, 1f, 2f, 4f) /* start, commit, abort */ + RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail]) +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1]) + RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2]) +#endif + /* Start rseq by storing table entry pointer into rseq_cs. */ + RSEQ_ASM_STORE_RSEQ_CS(1, 3f, rseq_cs) + RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f) + RSEQ_INJECT_ASM(3) + "ldr r0, %[v]\n\t" + "cmp %[expectnot], r0\n\t" + "beq %l[cmpfail]\n\t" + RSEQ_INJECT_ASM(4) +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1]) + "ldr r0, %[v]\n\t" + "cmp %[expectnot], r0\n\t" + "beq %l[error2]\n\t" +#endif + "str r0, %[load]\n\t" + "add r0, %[voffp]\n\t" + "ldr r0, [r0]\n\t" + /* final store */ + "str r0, %[v]\n\t" + "2:\n\t" + RSEQ_INJECT_ASM(5) + "b 5f\n\t" + RSEQ_ASM_DEFINE_ABORT(3, 4, "", abort, 1b, 2b, 4f) + "5:\n\t" + : /* gcc asm goto does not allow outputs */ + : [cpu_id] "r" (cpu), + [current_cpu_id] "m" (rseq_get_abi()->RSEQ_TEMPLATE_CPU_ID_FIELD), + [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), + /* final store input */ + [v] "m" (*v), + [expectnot] "r" (expectnot), + [voffp] "Ir" (voffp), + [load] "m" (*load) + RSEQ_INJECT_INPUT + : "r0", "memory", "cc" + RSEQ_INJECT_CLOBBER + : abort, cmpfail +#ifdef RSEQ_COMPARE_TWICE + , error1, error2 +#endif + ); + rseq_after_asm_goto(); + return 0; +abort: + rseq_after_asm_goto(); + RSEQ_INJECT_FAILED + return -1; +cmpfail: + rseq_after_asm_goto(); + return 1; +#ifdef RSEQ_COMPARE_TWICE +error1: + rseq_after_asm_goto(); + rseq_bug("cpu_id comparison failed"); +error2: + rseq_after_asm_goto(); + rseq_bug("expected value comparison failed"); +#endif +} + +static inline __attribute__((always_inline)) +int RSEQ_TEMPLATE_IDENTIFIER(rseq_addv)(intptr_t *v, intptr_t count, int cpu) +{ + RSEQ_INJECT_C(9) + + __asm__ __volatile__ goto ( + RSEQ_ASM_DEFINE_TABLE(9, 1f, 2f, 4f) /* start, commit, abort */ +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1]) +#endif + /* Start rseq by storing table entry pointer into rseq_cs. */ + RSEQ_ASM_STORE_RSEQ_CS(1, 3f, rseq_cs) + RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f) + RSEQ_INJECT_ASM(3) +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1]) +#endif + "ldr r0, %[v]\n\t" + "add r0, %[count]\n\t" + /* final store */ + "str r0, %[v]\n\t" + "2:\n\t" + RSEQ_INJECT_ASM(4) + "b 5f\n\t" + RSEQ_ASM_DEFINE_ABORT(3, 4, "", abort, 1b, 2b, 4f) + "5:\n\t" + : /* gcc asm goto does not allow outputs */ + : [cpu_id] "r" (cpu), + [current_cpu_id] "m" (rseq_get_abi()->RSEQ_TEMPLATE_CPU_ID_FIELD), + [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), + [v] "m" (*v), + [count] "Ir" (count) + RSEQ_INJECT_INPUT + : "r0", "memory", "cc" + RSEQ_INJECT_CLOBBER + : abort +#ifdef RSEQ_COMPARE_TWICE + , error1 +#endif + ); + rseq_after_asm_goto(); + return 0; +abort: + rseq_after_asm_goto(); + RSEQ_INJECT_FAILED + return -1; +#ifdef RSEQ_COMPARE_TWICE +error1: + rseq_after_asm_goto(); + rseq_bug("cpu_id comparison failed"); +#endif +} + +static inline __attribute__((always_inline)) +int RSEQ_TEMPLATE_IDENTIFIER(rseq_cmpeqv_cmpeqv_storev)(intptr_t *v, intptr_t expect, + intptr_t *v2, intptr_t expect2, + intptr_t newv, int cpu) +{ + RSEQ_INJECT_C(9) + + __asm__ __volatile__ goto ( + RSEQ_ASM_DEFINE_TABLE(9, 1f, 2f, 4f) /* start, commit, abort */ + RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail]) +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1]) + RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2]) + RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error3]) +#endif + /* Start rseq by storing table entry pointer into rseq_cs. */ + RSEQ_ASM_STORE_RSEQ_CS(1, 3f, rseq_cs) + RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f) + RSEQ_INJECT_ASM(3) + "ldr r0, %[v]\n\t" + "cmp %[expect], r0\n\t" + "bne %l[cmpfail]\n\t" + RSEQ_INJECT_ASM(4) + "ldr r0, %[v2]\n\t" + "cmp %[expect2], r0\n\t" + "bne %l[cmpfail]\n\t" + RSEQ_INJECT_ASM(5) +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1]) + "ldr r0, %[v]\n\t" + "cmp %[expect], r0\n\t" + "bne %l[error2]\n\t" + "ldr r0, %[v2]\n\t" + "cmp %[expect2], r0\n\t" + "bne %l[error3]\n\t" +#endif + /* final store */ + "str %[newv], %[v]\n\t" + "2:\n\t" + RSEQ_INJECT_ASM(6) + "b 5f\n\t" + RSEQ_ASM_DEFINE_ABORT(3, 4, "", abort, 1b, 2b, 4f) + "5:\n\t" + : /* gcc asm goto does not allow outputs */ + : [cpu_id] "r" (cpu), + [current_cpu_id] "m" (rseq_get_abi()->RSEQ_TEMPLATE_CPU_ID_FIELD), + [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), + /* cmp2 input */ + [v2] "m" (*v2), + [expect2] "r" (expect2), + /* final store input */ + [v] "m" (*v), + [expect] "r" (expect), + [newv] "r" (newv) + RSEQ_INJECT_INPUT + : "r0", "memory", "cc" + RSEQ_INJECT_CLOBBER + : abort, cmpfail +#ifdef RSEQ_COMPARE_TWICE + , error1, error2, error3 +#endif + ); + rseq_after_asm_goto(); + return 0; +abort: + rseq_after_asm_goto(); + RSEQ_INJECT_FAILED + return -1; +cmpfail: + rseq_after_asm_goto(); + return 1; +#ifdef RSEQ_COMPARE_TWICE +error1: + rseq_after_asm_goto(); + rseq_bug("cpu_id comparison failed"); +error2: + rseq_after_asm_goto(); + rseq_bug("1st expected value comparison failed"); +error3: + rseq_after_asm_goto(); + rseq_bug("2nd expected value comparison failed"); +#endif +} + +#endif /* #if defined(RSEQ_TEMPLATE_MO_RELAXED) && + (defined(RSEQ_TEMPLATE_CPU_ID) || defined(RSEQ_TEMPLATE_MM_CID)) */ + +#if (defined(RSEQ_TEMPLATE_MO_RELAXED) || defined(RSEQ_TEMPLATE_MO_RELEASE)) && \ + (defined(RSEQ_TEMPLATE_CPU_ID) || defined(RSEQ_TEMPLATE_MM_CID)) + +static inline __attribute__((always_inline)) +int RSEQ_TEMPLATE_IDENTIFIER(rseq_cmpeqv_trystorev_storev)(intptr_t *v, intptr_t expect, + intptr_t *v2, intptr_t newv2, + intptr_t newv, int cpu) +{ + RSEQ_INJECT_C(9) + + __asm__ __volatile__ goto ( + RSEQ_ASM_DEFINE_TABLE(9, 1f, 2f, 4f) /* start, commit, abort */ + RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail]) +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1]) + RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2]) +#endif + /* Start rseq by storing table entry pointer into rseq_cs. */ + RSEQ_ASM_STORE_RSEQ_CS(1, 3f, rseq_cs) + RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f) + RSEQ_INJECT_ASM(3) + "ldr r0, %[v]\n\t" + "cmp %[expect], r0\n\t" + "bne %l[cmpfail]\n\t" + RSEQ_INJECT_ASM(4) +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1]) + "ldr r0, %[v]\n\t" + "cmp %[expect], r0\n\t" + "bne %l[error2]\n\t" +#endif + /* try store */ + "str %[newv2], %[v2]\n\t" + RSEQ_INJECT_ASM(5) +#ifdef RSEQ_TEMPLATE_MO_RELEASE + "dmb\n\t" /* full mb provides store-release */ +#endif + /* final store */ + "str %[newv], %[v]\n\t" + "2:\n\t" + RSEQ_INJECT_ASM(6) + "b 5f\n\t" + RSEQ_ASM_DEFINE_ABORT(3, 4, "", abort, 1b, 2b, 4f) + "5:\n\t" + : /* gcc asm goto does not allow outputs */ + : [cpu_id] "r" (cpu), + [current_cpu_id] "m" (rseq_get_abi()->RSEQ_TEMPLATE_CPU_ID_FIELD), + [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), + /* try store input */ + [v2] "m" (*v2), + [newv2] "r" (newv2), + /* final store input */ + [v] "m" (*v), + [expect] "r" (expect), + [newv] "r" (newv) + RSEQ_INJECT_INPUT + : "r0", "memory", "cc" + RSEQ_INJECT_CLOBBER + : abort, cmpfail +#ifdef RSEQ_COMPARE_TWICE + , error1, error2 +#endif + ); + rseq_after_asm_goto(); + return 0; +abort: + rseq_after_asm_goto(); + RSEQ_INJECT_FAILED + return -1; +cmpfail: + rseq_after_asm_goto(); + return 1; +#ifdef RSEQ_COMPARE_TWICE +error1: + rseq_after_asm_goto(); + rseq_bug("cpu_id comparison failed"); +error2: + rseq_after_asm_goto(); + rseq_bug("expected value comparison failed"); +#endif +} + + +static inline __attribute__((always_inline)) +int RSEQ_TEMPLATE_IDENTIFIER(rseq_cmpeqv_trymemcpy_storev)(intptr_t *v, intptr_t expect, + void *dst, void *src, size_t len, + intptr_t newv, int cpu) +{ + uint32_t rseq_scratch[3]; + + RSEQ_INJECT_C(9) + + __asm__ __volatile__ goto ( + RSEQ_ASM_DEFINE_TABLE(9, 1f, 2f, 4f) /* start, commit, abort */ + RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail]) +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1]) + RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2]) +#endif + "str %[src], %[rseq_scratch0]\n\t" + "str %[dst], %[rseq_scratch1]\n\t" + "str %[len], %[rseq_scratch2]\n\t" + /* Start rseq by storing table entry pointer into rseq_cs. */ + RSEQ_ASM_STORE_RSEQ_CS(1, 3f, rseq_cs) + RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f) + RSEQ_INJECT_ASM(3) + "ldr r0, %[v]\n\t" + "cmp %[expect], r0\n\t" + "bne 5f\n\t" + RSEQ_INJECT_ASM(4) +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 6f) + "ldr r0, %[v]\n\t" + "cmp %[expect], r0\n\t" + "bne 7f\n\t" +#endif + /* try memcpy */ + "cmp %[len], #0\n\t" \ + "beq 333f\n\t" \ + "222:\n\t" \ + "ldrb %%r0, [%[src]]\n\t" \ + "strb %%r0, [%[dst]]\n\t" \ + "adds %[src], #1\n\t" \ + "adds %[dst], #1\n\t" \ + "subs %[len], #1\n\t" \ + "bne 222b\n\t" \ + "333:\n\t" \ + RSEQ_INJECT_ASM(5) +#ifdef RSEQ_TEMPLATE_MO_RELEASE + "dmb\n\t" /* full mb provides store-release */ +#endif + /* final store */ + "str %[newv], %[v]\n\t" + "2:\n\t" + RSEQ_INJECT_ASM(6) + /* teardown */ + "ldr %[len], %[rseq_scratch2]\n\t" + "ldr %[dst], %[rseq_scratch1]\n\t" + "ldr %[src], %[rseq_scratch0]\n\t" + "b 8f\n\t" + RSEQ_ASM_DEFINE_ABORT(3, 4, + /* teardown */ + "ldr %[len], %[rseq_scratch2]\n\t" + "ldr %[dst], %[rseq_scratch1]\n\t" + "ldr %[src], %[rseq_scratch0]\n\t", + abort, 1b, 2b, 4f) + RSEQ_ASM_DEFINE_CMPFAIL(5, + /* teardown */ + "ldr %[len], %[rseq_scratch2]\n\t" + "ldr %[dst], %[rseq_scratch1]\n\t" + "ldr %[src], %[rseq_scratch0]\n\t", + cmpfail) +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_DEFINE_CMPFAIL(6, + /* teardown */ + "ldr %[len], %[rseq_scratch2]\n\t" + "ldr %[dst], %[rseq_scratch1]\n\t" + "ldr %[src], %[rseq_scratch0]\n\t", + error1) + RSEQ_ASM_DEFINE_CMPFAIL(7, + /* teardown */ + "ldr %[len], %[rseq_scratch2]\n\t" + "ldr %[dst], %[rseq_scratch1]\n\t" + "ldr %[src], %[rseq_scratch0]\n\t", + error2) +#endif + "8:\n\t" + : /* gcc asm goto does not allow outputs */ + : [cpu_id] "r" (cpu), + [current_cpu_id] "m" (rseq_get_abi()->RSEQ_TEMPLATE_CPU_ID_FIELD), + [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), + /* final store input */ + [v] "m" (*v), + [expect] "r" (expect), + [newv] "r" (newv), + /* try memcpy input */ + [dst] "r" (dst), + [src] "r" (src), + [len] "r" (len), + [rseq_scratch0] "m" (rseq_scratch[0]), + [rseq_scratch1] "m" (rseq_scratch[1]), + [rseq_scratch2] "m" (rseq_scratch[2]) + RSEQ_INJECT_INPUT + : "r0", "memory", "cc" + RSEQ_INJECT_CLOBBER + : abort, cmpfail +#ifdef RSEQ_COMPARE_TWICE + , error1, error2 +#endif + ); + rseq_after_asm_goto(); + return 0; +abort: + rseq_after_asm_goto(); + RSEQ_INJECT_FAILED + return -1; +cmpfail: + rseq_after_asm_goto(); + return 1; +#ifdef RSEQ_COMPARE_TWICE +error1: + rseq_after_asm_goto(); + rseq_bug("cpu_id comparison failed"); +error2: + rseq_after_asm_goto(); + rseq_bug("expected value comparison failed"); +#endif +} + +#endif /* #if (defined(RSEQ_TEMPLATE_MO_RELAXED) || defined(RSEQ_TEMPLATE_MO_RELEASE)) && + (defined(RSEQ_TEMPLATE_CPU_ID) || defined(RSEQ_TEMPLATE_MM_CID)) */ + +#include "rseq-bits-reset.h" diff --git a/tools/testing/selftests/rseq/rseq-arm.h b/tools/testing/selftests/rseq/rseq-arm.h index 893a11eca9d5..8414fc3eac15 100644 --- a/tools/testing/selftests/rseq/rseq-arm.h +++ b/tools/testing/selftests/rseq/rseq-arm.h @@ -2,7 +2,7 @@ /* * rseq-arm.h * - * (C) Copyright 2016-2018 - Mathieu Desnoyers <mathieu.desnoyers@efficios.com> + * (C) Copyright 2016-2022 - Mathieu Desnoyers <mathieu.desnoyers@efficios.com> */ /* @@ -79,10 +79,6 @@ do { \ RSEQ_WRITE_ONCE(*p, v); \ } while (0) -#ifdef RSEQ_SKIP_FASTPATH -#include "rseq-skip.h" -#else /* !RSEQ_SKIP_FASTPATH */ - #define __RSEQ_ASM_DEFINE_TABLE(label, version, flags, start_ip, \ post_commit_offset, abort_ip) \ ".pushsection __rseq_cs, \"aw\"\n\t" \ @@ -147,681 +143,34 @@ do { \ teardown \ "b %l[" __rseq_str(cmpfail_label) "]\n\t" -static inline __attribute__((always_inline)) -int rseq_cmpeqv_storev(intptr_t *v, intptr_t expect, intptr_t newv, int cpu) -{ - RSEQ_INJECT_C(9) - - __asm__ __volatile__ goto ( - RSEQ_ASM_DEFINE_TABLE(9, 1f, 2f, 4f) /* start, commit, abort */ - RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail]) -#ifdef RSEQ_COMPARE_TWICE - RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1]) - RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2]) -#endif - /* Start rseq by storing table entry pointer into rseq_cs. */ - RSEQ_ASM_STORE_RSEQ_CS(1, 3f, rseq_cs) - RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f) - RSEQ_INJECT_ASM(3) - "ldr r0, %[v]\n\t" - "cmp %[expect], r0\n\t" - "bne %l[cmpfail]\n\t" - RSEQ_INJECT_ASM(4) -#ifdef RSEQ_COMPARE_TWICE - RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1]) - "ldr r0, %[v]\n\t" - "cmp %[expect], r0\n\t" - "bne %l[error2]\n\t" -#endif - /* final store */ - "str %[newv], %[v]\n\t" - "2:\n\t" - RSEQ_INJECT_ASM(5) - "b 5f\n\t" - RSEQ_ASM_DEFINE_ABORT(3, 4, "", abort, 1b, 2b, 4f) - "5:\n\t" - : /* gcc asm goto does not allow outputs */ - : [cpu_id] "r" (cpu), - [current_cpu_id] "m" (rseq_get_abi()->cpu_id), - [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), - [v] "m" (*v), - [expect] "r" (expect), - [newv] "r" (newv) - RSEQ_INJECT_INPUT - : "r0", "memory", "cc" - RSEQ_INJECT_CLOBBER - : abort, cmpfail -#ifdef RSEQ_COMPARE_TWICE - , error1, error2 -#endif - ); - rseq_after_asm_goto(); - return 0; -abort: - rseq_after_asm_goto(); - RSEQ_INJECT_FAILED - return -1; -cmpfail: - rseq_after_asm_goto(); - return 1; -#ifdef RSEQ_COMPARE_TWICE -error1: - rseq_after_asm_goto(); - rseq_bug("cpu_id comparison failed"); -error2: - rseq_after_asm_goto(); - rseq_bug("expected value comparison failed"); -#endif -} - -static inline __attribute__((always_inline)) -int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot, - long voffp, intptr_t *load, int cpu) -{ - RSEQ_INJECT_C(9) - - __asm__ __volatile__ goto ( - RSEQ_ASM_DEFINE_TABLE(9, 1f, 2f, 4f) /* start, commit, abort */ - RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail]) -#ifdef RSEQ_COMPARE_TWICE - RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1]) - RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2]) -#endif - /* Start rseq by storing table entry pointer into rseq_cs. */ - RSEQ_ASM_STORE_RSEQ_CS(1, 3f, rseq_cs) - RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f) - RSEQ_INJECT_ASM(3) - "ldr r0, %[v]\n\t" - "cmp %[expectnot], r0\n\t" - "beq %l[cmpfail]\n\t" - RSEQ_INJECT_ASM(4) -#ifdef RSEQ_COMPARE_TWICE - RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1]) - "ldr r0, %[v]\n\t" - "cmp %[expectnot], r0\n\t" - "beq %l[error2]\n\t" -#endif - "str r0, %[load]\n\t" - "add r0, %[voffp]\n\t" - "ldr r0, [r0]\n\t" - /* final store */ - "str r0, %[v]\n\t" - "2:\n\t" - RSEQ_INJECT_ASM(5) - "b 5f\n\t" - RSEQ_ASM_DEFINE_ABORT(3, 4, "", abort, 1b, 2b, 4f) - "5:\n\t" - : /* gcc asm goto does not allow outputs */ - : [cpu_id] "r" (cpu), - [current_cpu_id] "m" (rseq_get_abi()->cpu_id), - [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), - /* final store input */ - [v] "m" (*v), - [expectnot] "r" (expectnot), - [voffp] "Ir" (voffp), - [load] "m" (*load) - RSEQ_INJECT_INPUT - : "r0", "memory", "cc" - RSEQ_INJECT_CLOBBER - : abort, cmpfail -#ifdef RSEQ_COMPARE_TWICE - , error1, error2 -#endif - ); - rseq_after_asm_goto(); - return 0; -abort: - rseq_after_asm_goto(); - RSEQ_INJECT_FAILED - return -1; -cmpfail: - rseq_after_asm_goto(); - return 1; -#ifdef RSEQ_COMPARE_TWICE -error1: - rseq_after_asm_goto(); - rseq_bug("cpu_id comparison failed"); -error2: - rseq_after_asm_goto(); - rseq_bug("expected value comparison failed"); -#endif -} - -static inline __attribute__((always_inline)) -int rseq_addv(intptr_t *v, intptr_t count, int cpu) -{ - RSEQ_INJECT_C(9) - - __asm__ __volatile__ goto ( - RSEQ_ASM_DEFINE_TABLE(9, 1f, 2f, 4f) /* start, commit, abort */ -#ifdef RSEQ_COMPARE_TWICE - RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1]) -#endif - /* Start rseq by storing table entry pointer into rseq_cs. */ - RSEQ_ASM_STORE_RSEQ_CS(1, 3f, rseq_cs) - RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f) - RSEQ_INJECT_ASM(3) -#ifdef RSEQ_COMPARE_TWICE - RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1]) -#endif - "ldr r0, %[v]\n\t" - "add r0, %[count]\n\t" - /* final store */ - "str r0, %[v]\n\t" - "2:\n\t" - RSEQ_INJECT_ASM(4) - "b 5f\n\t" - RSEQ_ASM_DEFINE_ABORT(3, 4, "", abort, 1b, 2b, 4f) - "5:\n\t" - : /* gcc asm goto does not allow outputs */ - : [cpu_id] "r" (cpu), - [current_cpu_id] "m" (rseq_get_abi()->cpu_id), - [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), - [v] "m" (*v), - [count] "Ir" (count) - RSEQ_INJECT_INPUT - : "r0", "memory", "cc" - RSEQ_INJECT_CLOBBER - : abort -#ifdef RSEQ_COMPARE_TWICE - , error1 -#endif - ); - rseq_after_asm_goto(); - return 0; -abort: - rseq_after_asm_goto(); - RSEQ_INJECT_FAILED - return -1; -#ifdef RSEQ_COMPARE_TWICE -error1: - rseq_after_asm_goto(); - rseq_bug("cpu_id comparison failed"); -#endif -} - -static inline __attribute__((always_inline)) -int rseq_cmpeqv_trystorev_storev(intptr_t *v, intptr_t expect, - intptr_t *v2, intptr_t newv2, - intptr_t newv, int cpu) -{ - RSEQ_INJECT_C(9) - - __asm__ __volatile__ goto ( - RSEQ_ASM_DEFINE_TABLE(9, 1f, 2f, 4f) /* start, commit, abort */ - RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail]) -#ifdef RSEQ_COMPARE_TWICE - RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1]) - RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2]) -#endif - /* Start rseq by storing table entry pointer into rseq_cs. */ - RSEQ_ASM_STORE_RSEQ_CS(1, 3f, rseq_cs) - RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f) - RSEQ_INJECT_ASM(3) - "ldr r0, %[v]\n\t" - "cmp %[expect], r0\n\t" - "bne %l[cmpfail]\n\t" - RSEQ_INJECT_ASM(4) -#ifdef RSEQ_COMPARE_TWICE - RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1]) - "ldr r0, %[v]\n\t" - "cmp %[expect], r0\n\t" - "bne %l[error2]\n\t" -#endif - /* try store */ - "str %[newv2], %[v2]\n\t" - RSEQ_INJECT_ASM(5) - /* final store */ - "str %[newv], %[v]\n\t" - "2:\n\t" - RSEQ_INJECT_ASM(6) - "b 5f\n\t" - RSEQ_ASM_DEFINE_ABORT(3, 4, "", abort, 1b, 2b, 4f) - "5:\n\t" - : /* gcc asm goto does not allow outputs */ - : [cpu_id] "r" (cpu), - [current_cpu_id] "m" (rseq_get_abi()->cpu_id), - [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), - /* try store input */ - [v2] "m" (*v2), - [newv2] "r" (newv2), - /* final store input */ - [v] "m" (*v), - [expect] "r" (expect), - [newv] "r" (newv) - RSEQ_INJECT_INPUT - : "r0", "memory", "cc" - RSEQ_INJECT_CLOBBER - : abort, cmpfail -#ifdef RSEQ_COMPARE_TWICE - , error1, error2 -#endif - ); - rseq_after_asm_goto(); - return 0; -abort: - rseq_after_asm_goto(); - RSEQ_INJECT_FAILED - return -1; -cmpfail: - rseq_after_asm_goto(); - return 1; -#ifdef RSEQ_COMPARE_TWICE -error1: - rseq_after_asm_goto(); - rseq_bug("cpu_id comparison failed"); -error2: - rseq_after_asm_goto(); - rseq_bug("expected value comparison failed"); -#endif -} - -static inline __attribute__((always_inline)) -int rseq_cmpeqv_trystorev_storev_release(intptr_t *v, intptr_t expect, - intptr_t *v2, intptr_t newv2, - intptr_t newv, int cpu) -{ - RSEQ_INJECT_C(9) +/* Per-cpu-id indexing. */ - __asm__ __volatile__ goto ( - RSEQ_ASM_DEFINE_TABLE(9, 1f, 2f, 4f) /* start, commit, abort */ - RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail]) -#ifdef RSEQ_COMPARE_TWICE - RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1]) - RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2]) -#endif - /* Start rseq by storing table entry pointer into rseq_cs. */ - RSEQ_ASM_STORE_RSEQ_CS(1, 3f, rseq_cs) - RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f) - RSEQ_INJECT_ASM(3) - "ldr r0, %[v]\n\t" - "cmp %[expect], r0\n\t" - "bne %l[cmpfail]\n\t" - RSEQ_INJECT_ASM(4) -#ifdef RSEQ_COMPARE_TWICE - RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1]) - "ldr r0, %[v]\n\t" - "cmp %[expect], r0\n\t" - "bne %l[error2]\n\t" -#endif - /* try store */ - "str %[newv2], %[v2]\n\t" - RSEQ_INJECT_ASM(5) - "dmb\n\t" /* full mb provides store-release */ - /* final store */ - "str %[newv], %[v]\n\t" - "2:\n\t" - RSEQ_INJECT_ASM(6) - "b 5f\n\t" - RSEQ_ASM_DEFINE_ABORT(3, 4, "", abort, 1b, 2b, 4f) - "5:\n\t" - : /* gcc asm goto does not allow outputs */ - : [cpu_id] "r" (cpu), - [current_cpu_id] "m" (rseq_get_abi()->cpu_id), - [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), - /* try store input */ - [v2] "m" (*v2), - [newv2] "r" (newv2), - /* final store input */ - [v] "m" (*v), - [expect] "r" (expect), - [newv] "r" (newv) - RSEQ_INJECT_INPUT - : "r0", "memory", "cc" - RSEQ_INJECT_CLOBBER - : abort, cmpfail -#ifdef RSEQ_COMPARE_TWICE - , error1, error2 -#endif - ); - rseq_after_asm_goto(); - return 0; -abort: - rseq_after_asm_goto(); - RSEQ_INJECT_FAILED - return -1; -cmpfail: - rseq_after_asm_goto(); - return 1; -#ifdef RSEQ_COMPARE_TWICE -error1: - rseq_after_asm_goto(); - rseq_bug("cpu_id comparison failed"); -error2: - rseq_after_asm_goto(); - rseq_bug("expected value comparison failed"); -#endif -} +#define RSEQ_TEMPLATE_CPU_ID +#define RSEQ_TEMPLATE_MO_RELAXED +#include "rseq-arm-bits.h" +#undef RSEQ_TEMPLATE_MO_RELAXED -static inline __attribute__((always_inline)) -int rseq_cmpeqv_cmpeqv_storev(intptr_t *v, intptr_t expect, - intptr_t *v2, intptr_t expect2, - intptr_t newv, int cpu) -{ - RSEQ_INJECT_C(9) +#define RSEQ_TEMPLATE_MO_RELEASE +#include "rseq-arm-bits.h" +#undef RSEQ_TEMPLATE_MO_RELEASE +#undef RSEQ_TEMPLATE_CPU_ID - __asm__ __volatile__ goto ( - RSEQ_ASM_DEFINE_TABLE(9, 1f, 2f, 4f) /* start, commit, abort */ - RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail]) -#ifdef RSEQ_COMPARE_TWICE - RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1]) - RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2]) - RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error3]) -#endif - /* Start rseq by storing table entry pointer into rseq_cs. */ - RSEQ_ASM_STORE_RSEQ_CS(1, 3f, rseq_cs) - RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f) - RSEQ_INJECT_ASM(3) - "ldr r0, %[v]\n\t" - "cmp %[expect], r0\n\t" - "bne %l[cmpfail]\n\t" - RSEQ_INJECT_ASM(4) - "ldr r0, %[v2]\n\t" - "cmp %[expect2], r0\n\t" - "bne %l[cmpfail]\n\t" - RSEQ_INJECT_ASM(5) -#ifdef RSEQ_COMPARE_TWICE - RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1]) - "ldr r0, %[v]\n\t" - "cmp %[expect], r0\n\t" - "bne %l[error2]\n\t" - "ldr r0, %[v2]\n\t" - "cmp %[expect2], r0\n\t" - "bne %l[error3]\n\t" -#endif - /* final store */ - "str %[newv], %[v]\n\t" - "2:\n\t" - RSEQ_INJECT_ASM(6) - "b 5f\n\t" - RSEQ_ASM_DEFINE_ABORT(3, 4, "", abort, 1b, 2b, 4f) - "5:\n\t" - : /* gcc asm goto does not allow outputs */ - : [cpu_id] "r" (cpu), - [current_cpu_id] "m" (rseq_get_abi()->cpu_id), - [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), - /* cmp2 input */ - [v2] "m" (*v2), - [expect2] "r" (expect2), - /* final store input */ - [v] "m" (*v), - [expect] "r" (expect), - [newv] "r" (newv) - RSEQ_INJECT_INPUT - : "r0", "memory", "cc" - RSEQ_INJECT_CLOBBER - : abort, cmpfail -#ifdef RSEQ_COMPARE_TWICE - , error1, error2, error3 -#endif - ); - rseq_after_asm_goto(); - return 0; -abort: - rseq_after_asm_goto(); - RSEQ_INJECT_FAILED - return -1; -cmpfail: - rseq_after_asm_goto(); - return 1; -#ifdef RSEQ_COMPARE_TWICE -error1: - rseq_after_asm_goto(); - rseq_bug("cpu_id comparison failed"); -error2: - rseq_after_asm_goto(); - rseq_bug("1st expected value comparison failed"); -error3: - rseq_after_asm_goto(); - rseq_bug("2nd expected value comparison failed"); -#endif -} +/* Per-mm-cid indexing. */ -static inline __attribute__((always_inline)) -int rseq_cmpeqv_trymemcpy_storev(intptr_t *v, intptr_t expect, - void *dst, void *src, size_t len, - intptr_t newv, int cpu) -{ - uint32_t rseq_scratch[3]; +#define RSEQ_TEMPLATE_MM_CID +#define RSEQ_TEMPLATE_MO_RELAXED +#include "rseq-arm-bits.h" +#undef RSEQ_TEMPLATE_MO_RELAXED - RSEQ_INJECT_C(9) +#define RSEQ_TEMPLATE_MO_RELEASE +#include "rseq-arm-bits.h" +#undef RSEQ_TEMPLATE_MO_RELEASE +#undef RSEQ_TEMPLATE_MM_CID - __asm__ __volatile__ goto ( - RSEQ_ASM_DEFINE_TABLE(9, 1f, 2f, 4f) /* start, commit, abort */ - RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail]) -#ifdef RSEQ_COMPARE_TWICE - RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1]) - RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2]) -#endif - "str %[src], %[rseq_scratch0]\n\t" - "str %[dst], %[rseq_scratch1]\n\t" - "str %[len], %[rseq_scratch2]\n\t" - /* Start rseq by storing table entry pointer into rseq_cs. */ - RSEQ_ASM_STORE_RSEQ_CS(1, 3f, rseq_cs) - RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f) - RSEQ_INJECT_ASM(3) - "ldr r0, %[v]\n\t" - "cmp %[expect], r0\n\t" - "bne 5f\n\t" - RSEQ_INJECT_ASM(4) -#ifdef RSEQ_COMPARE_TWICE - RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 6f) - "ldr r0, %[v]\n\t" - "cmp %[expect], r0\n\t" - "bne 7f\n\t" -#endif - /* try memcpy */ - "cmp %[len], #0\n\t" \ - "beq 333f\n\t" \ - "222:\n\t" \ - "ldrb %%r0, [%[src]]\n\t" \ - "strb %%r0, [%[dst]]\n\t" \ - "adds %[src], #1\n\t" \ - "adds %[dst], #1\n\t" \ - "subs %[len], #1\n\t" \ - "bne 222b\n\t" \ - "333:\n\t" \ - RSEQ_INJECT_ASM(5) - /* final store */ - "str %[newv], %[v]\n\t" - "2:\n\t" - RSEQ_INJECT_ASM(6) - /* teardown */ - "ldr %[len], %[rseq_scratch2]\n\t" - "ldr %[dst], %[rseq_scratch1]\n\t" - "ldr %[src], %[rseq_scratch0]\n\t" - "b 8f\n\t" - RSEQ_ASM_DEFINE_ABORT(3, 4, - /* teardown */ - "ldr %[len], %[rseq_scratch2]\n\t" - "ldr %[dst], %[rseq_scratch1]\n\t" - "ldr %[src], %[rseq_scratch0]\n\t", - abort, 1b, 2b, 4f) - RSEQ_ASM_DEFINE_CMPFAIL(5, - /* teardown */ - "ldr %[len], %[rseq_scratch2]\n\t" - "ldr %[dst], %[rseq_scratch1]\n\t" - "ldr %[src], %[rseq_scratch0]\n\t", - cmpfail) -#ifdef RSEQ_COMPARE_TWICE - RSEQ_ASM_DEFINE_CMPFAIL(6, - /* teardown */ - "ldr %[len], %[rseq_scratch2]\n\t" - "ldr %[dst], %[rseq_scratch1]\n\t" - "ldr %[src], %[rseq_scratch0]\n\t", - error1) - RSEQ_ASM_DEFINE_CMPFAIL(7, - /* teardown */ - "ldr %[len], %[rseq_scratch2]\n\t" - "ldr %[dst], %[rseq_scratch1]\n\t" - "ldr %[src], %[rseq_scratch0]\n\t", - error2) -#endif - "8:\n\t" - : /* gcc asm goto does not allow outputs */ - : [cpu_id] "r" (cpu), - [current_cpu_id] "m" (rseq_get_abi()->cpu_id), - [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), - /* final store input */ - [v] "m" (*v), - [expect] "r" (expect), - [newv] "r" (newv), - /* try memcpy input */ - [dst] "r" (dst), - [src] "r" (src), - [len] "r" (len), - [rseq_scratch0] "m" (rseq_scratch[0]), - [rseq_scratch1] "m" (rseq_scratch[1]), - [rseq_scratch2] "m" (rseq_scratch[2]) - RSEQ_INJECT_INPUT - : "r0", "memory", "cc" - RSEQ_INJECT_CLOBBER - : abort, cmpfail -#ifdef RSEQ_COMPARE_TWICE - , error1, error2 -#endif - ); - rseq_after_asm_goto(); - return 0; -abort: - rseq_after_asm_goto(); - RSEQ_INJECT_FAILED - return -1; -cmpfail: - rseq_after_asm_goto(); - return 1; -#ifdef RSEQ_COMPARE_TWICE -error1: - rseq_after_asm_goto(); - rseq_bug("cpu_id comparison failed"); -error2: - rseq_after_asm_goto(); - rseq_bug("expected value comparison failed"); -#endif -} - -static inline __attribute__((always_inline)) -int rseq_cmpeqv_trymemcpy_storev_release(intptr_t *v, intptr_t expect, - void *dst, void *src, size_t len, - intptr_t newv, int cpu) -{ - uint32_t rseq_scratch[3]; - - RSEQ_INJECT_C(9) - - __asm__ __volatile__ goto ( - RSEQ_ASM_DEFINE_TABLE(9, 1f, 2f, 4f) /* start, commit, abort */ - RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail]) -#ifdef RSEQ_COMPARE_TWICE - RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1]) - RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2]) -#endif - "str %[src], %[rseq_scratch0]\n\t" - "str %[dst], %[rseq_scratch1]\n\t" - "str %[len], %[rseq_scratch2]\n\t" - /* Start rseq by storing table entry pointer into rseq_cs. */ - RSEQ_ASM_STORE_RSEQ_CS(1, 3f, rseq_cs) - RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f) - RSEQ_INJECT_ASM(3) - "ldr r0, %[v]\n\t" - "cmp %[expect], r0\n\t" - "bne 5f\n\t" - RSEQ_INJECT_ASM(4) -#ifdef RSEQ_COMPARE_TWICE - RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 6f) - "ldr r0, %[v]\n\t" - "cmp %[expect], r0\n\t" - "bne 7f\n\t" -#endif - /* try memcpy */ - "cmp %[len], #0\n\t" \ - "beq 333f\n\t" \ - "222:\n\t" \ - "ldrb %%r0, [%[src]]\n\t" \ - "strb %%r0, [%[dst]]\n\t" \ - "adds %[src], #1\n\t" \ - "adds %[dst], #1\n\t" \ - "subs %[len], #1\n\t" \ - "bne 222b\n\t" \ - "333:\n\t" \ - RSEQ_INJECT_ASM(5) - "dmb\n\t" /* full mb provides store-release */ - /* final store */ - "str %[newv], %[v]\n\t" - "2:\n\t" - RSEQ_INJECT_ASM(6) - /* teardown */ - "ldr %[len], %[rseq_scratch2]\n\t" - "ldr %[dst], %[rseq_scratch1]\n\t" - "ldr %[src], %[rseq_scratch0]\n\t" - "b 8f\n\t" - RSEQ_ASM_DEFINE_ABORT(3, 4, - /* teardown */ - "ldr %[len], %[rseq_scratch2]\n\t" - "ldr %[dst], %[rseq_scratch1]\n\t" - "ldr %[src], %[rseq_scratch0]\n\t", - abort, 1b, 2b, 4f) - RSEQ_ASM_DEFINE_CMPFAIL(5, - /* teardown */ - "ldr %[len], %[rseq_scratch2]\n\t" - "ldr %[dst], %[rseq_scratch1]\n\t" - "ldr %[src], %[rseq_scratch0]\n\t", - cmpfail) -#ifdef RSEQ_COMPARE_TWICE - RSEQ_ASM_DEFINE_CMPFAIL(6, - /* teardown */ - "ldr %[len], %[rseq_scratch2]\n\t" - "ldr %[dst], %[rseq_scratch1]\n\t" - "ldr %[src], %[rseq_scratch0]\n\t", - error1) - RSEQ_ASM_DEFINE_CMPFAIL(7, - /* teardown */ - "ldr %[len], %[rseq_scratch2]\n\t" - "ldr %[dst], %[rseq_scratch1]\n\t" - "ldr %[src], %[rseq_scratch0]\n\t", - error2) -#endif - "8:\n\t" - : /* gcc asm goto does not allow outputs */ - : [cpu_id] "r" (cpu), - [current_cpu_id] "m" (rseq_get_abi()->cpu_id), - [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), - /* final store input */ - [v] "m" (*v), - [expect] "r" (expect), - [newv] "r" (newv), - /* try memcpy input */ - [dst] "r" (dst), - [src] "r" (src), - [len] "r" (len), - [rseq_scratch0] "m" (rseq_scratch[0]), - [rseq_scratch1] "m" (rseq_scratch[1]), - [rseq_scratch2] "m" (rseq_scratch[2]) - RSEQ_INJECT_INPUT - : "r0", "memory", "cc" - RSEQ_INJECT_CLOBBER - : abort, cmpfail -#ifdef RSEQ_COMPARE_TWICE - , error1, error2 -#endif - ); - rseq_after_asm_goto(); - return 0; -abort: - rseq_after_asm_goto(); - RSEQ_INJECT_FAILED - return -1; -cmpfail: - rseq_after_asm_goto(); - return 1; -#ifdef RSEQ_COMPARE_TWICE -error1: - rseq_after_asm_goto(); - rseq_bug("cpu_id comparison failed"); -error2: - rseq_after_asm_goto(); - rseq_bug("expected value comparison failed"); -#endif -} +/* APIs which are not based on cpu ids. */ -#endif /* !RSEQ_SKIP_FASTPATH */ +#define RSEQ_TEMPLATE_CPU_ID_NONE +#define RSEQ_TEMPLATE_MO_RELAXED +#include "rseq-arm-bits.h" +#undef RSEQ_TEMPLATE_MO_RELAXED +#undef RSEQ_TEMPLATE_CPU_ID_NONE diff --git a/tools/testing/selftests/rseq/rseq-arm64-bits.h b/tools/testing/selftests/rseq/rseq-arm64-bits.h new file mode 100644 index 000000000000..cc7226b1efe1 --- /dev/null +++ b/tools/testing/selftests/rseq/rseq-arm64-bits.h @@ -0,0 +1,392 @@ +/* SPDX-License-Identifier: LGPL-2.1 OR MIT */ +/* + * rseq-arm64-bits.h + * + * (C) Copyright 2016-2022 - Mathieu Desnoyers <mathieu.desnoyers@efficios.com> + * (C) Copyright 2018 - Will Deacon <will.deacon@arm.com> + */ + +#include "rseq-bits-template.h" + +#if defined(RSEQ_TEMPLATE_MO_RELAXED) && \ + (defined(RSEQ_TEMPLATE_CPU_ID) || defined(RSEQ_TEMPLATE_MM_CID)) + +static inline __attribute__((always_inline)) +int RSEQ_TEMPLATE_IDENTIFIER(rseq_cmpeqv_storev)(intptr_t *v, intptr_t expect, intptr_t newv, int cpu) +{ + RSEQ_INJECT_C(9) + + __asm__ __volatile__ goto ( + RSEQ_ASM_DEFINE_TABLE(1, 2f, 3f, 4f) + RSEQ_ASM_DEFINE_EXIT_POINT(2f, %l[cmpfail]) +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_DEFINE_EXIT_POINT(2f, %l[error1]) + RSEQ_ASM_DEFINE_EXIT_POINT(2f, %l[error2]) +#endif + RSEQ_ASM_STORE_RSEQ_CS(2, 1b, rseq_cs) + RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f) + RSEQ_INJECT_ASM(3) + RSEQ_ASM_OP_CMPEQ(v, expect, %l[cmpfail]) + RSEQ_INJECT_ASM(4) +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1]) + RSEQ_ASM_OP_CMPEQ(v, expect, %l[error2]) +#endif + RSEQ_ASM_OP_FINAL_STORE(newv, v, 3) + RSEQ_INJECT_ASM(5) + RSEQ_ASM_DEFINE_ABORT(4, abort) + : /* gcc asm goto does not allow outputs */ + : [cpu_id] "r" (cpu), + [current_cpu_id] "Qo" (rseq_get_abi()->RSEQ_TEMPLATE_CPU_ID_FIELD), + [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), + [v] "Qo" (*v), + [expect] "r" (expect), + [newv] "r" (newv) + RSEQ_INJECT_INPUT + : "memory", RSEQ_ASM_TMP_REG + : abort, cmpfail +#ifdef RSEQ_COMPARE_TWICE + , error1, error2 +#endif + ); + rseq_after_asm_goto(); + return 0; +abort: + rseq_after_asm_goto(); + RSEQ_INJECT_FAILED + return -1; +cmpfail: + rseq_after_asm_goto(); + return 1; +#ifdef RSEQ_COMPARE_TWICE +error1: + rseq_after_asm_goto(); + rseq_bug("cpu_id comparison failed"); +error2: + rseq_after_asm_goto(); + rseq_bug("expected value comparison failed"); +#endif +} + +static inline __attribute__((always_inline)) +int RSEQ_TEMPLATE_IDENTIFIER(rseq_cmpnev_storeoffp_load)(intptr_t *v, intptr_t expectnot, + long voffp, intptr_t *load, int cpu) +{ + RSEQ_INJECT_C(9) + + __asm__ __volatile__ goto ( + RSEQ_ASM_DEFINE_TABLE(1, 2f, 3f, 4f) + RSEQ_ASM_DEFINE_EXIT_POINT(2f, %l[cmpfail]) +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_DEFINE_EXIT_POINT(2f, %l[error1]) + RSEQ_ASM_DEFINE_EXIT_POINT(2f, %l[error2]) +#endif + RSEQ_ASM_STORE_RSEQ_CS(2, 1b, rseq_cs) + RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f) + RSEQ_INJECT_ASM(3) + RSEQ_ASM_OP_CMPNE(v, expectnot, %l[cmpfail]) + RSEQ_INJECT_ASM(4) +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1]) + RSEQ_ASM_OP_CMPNE(v, expectnot, %l[error2]) +#endif + RSEQ_ASM_OP_R_LOAD(v) + RSEQ_ASM_OP_R_STORE(load) + RSEQ_ASM_OP_R_LOAD_OFF(voffp) + RSEQ_ASM_OP_R_FINAL_STORE(v, 3) + RSEQ_INJECT_ASM(5) + RSEQ_ASM_DEFINE_ABORT(4, abort) + : /* gcc asm goto does not allow outputs */ + : [cpu_id] "r" (cpu), + [current_cpu_id] "Qo" (rseq_get_abi()->RSEQ_TEMPLATE_CPU_ID_FIELD), + [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), + [v] "Qo" (*v), + [expectnot] "r" (expectnot), + [load] "Qo" (*load), + [voffp] "r" (voffp) + RSEQ_INJECT_INPUT + : "memory", RSEQ_ASM_TMP_REG + : abort, cmpfail +#ifdef RSEQ_COMPARE_TWICE + , error1, error2 +#endif + ); + rseq_after_asm_goto(); + return 0; +abort: + rseq_after_asm_goto(); + RSEQ_INJECT_FAILED + return -1; +cmpfail: + rseq_after_asm_goto(); + return 1; +#ifdef RSEQ_COMPARE_TWICE +error1: + rseq_after_asm_goto(); + rseq_bug("cpu_id comparison failed"); +error2: + rseq_after_asm_goto(); + rseq_bug("expected value comparison failed"); +#endif +} + +static inline __attribute__((always_inline)) +int RSEQ_TEMPLATE_IDENTIFIER(rseq_addv)(intptr_t *v, intptr_t count, int cpu) +{ + RSEQ_INJECT_C(9) + + __asm__ __volatile__ goto ( + RSEQ_ASM_DEFINE_TABLE(1, 2f, 3f, 4f) +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_DEFINE_EXIT_POINT(2f, %l[error1]) +#endif + RSEQ_ASM_STORE_RSEQ_CS(2, 1b, rseq_cs) + RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f) + RSEQ_INJECT_ASM(3) +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1]) +#endif + RSEQ_ASM_OP_R_LOAD(v) + RSEQ_ASM_OP_R_ADD(count) + RSEQ_ASM_OP_R_FINAL_STORE(v, 3) + RSEQ_INJECT_ASM(4) + RSEQ_ASM_DEFINE_ABORT(4, abort) + : /* gcc asm goto does not allow outputs */ + : [cpu_id] "r" (cpu), + [current_cpu_id] "Qo" (rseq_get_abi()->RSEQ_TEMPLATE_CPU_ID_FIELD), + [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), + [v] "Qo" (*v), + [count] "r" (count) + RSEQ_INJECT_INPUT + : "memory", RSEQ_ASM_TMP_REG + : abort +#ifdef RSEQ_COMPARE_TWICE + , error1 +#endif + ); + rseq_after_asm_goto(); + return 0; +abort: + rseq_after_asm_goto(); + RSEQ_INJECT_FAILED + return -1; +#ifdef RSEQ_COMPARE_TWICE +error1: + rseq_after_asm_goto(); + rseq_bug("cpu_id comparison failed"); +#endif +} + +static inline __attribute__((always_inline)) +int RSEQ_TEMPLATE_IDENTIFIER(rseq_cmpeqv_cmpeqv_storev)(intptr_t *v, intptr_t expect, + intptr_t *v2, intptr_t expect2, + intptr_t newv, int cpu) +{ + RSEQ_INJECT_C(9) + + __asm__ __volatile__ goto ( + RSEQ_ASM_DEFINE_TABLE(1, 2f, 3f, 4f) + RSEQ_ASM_DEFINE_EXIT_POINT(2f, %l[cmpfail]) +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_DEFINE_EXIT_POINT(2f, %l[error1]) + RSEQ_ASM_DEFINE_EXIT_POINT(2f, %l[error2]) + RSEQ_ASM_DEFINE_EXIT_POINT(2f, %l[error3]) +#endif + RSEQ_ASM_STORE_RSEQ_CS(2, 1b, rseq_cs) + RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f) + RSEQ_INJECT_ASM(3) + RSEQ_ASM_OP_CMPEQ(v, expect, %l[cmpfail]) + RSEQ_INJECT_ASM(4) + RSEQ_ASM_OP_CMPEQ(v2, expect2, %l[cmpfail]) + RSEQ_INJECT_ASM(5) +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1]) + RSEQ_ASM_OP_CMPEQ(v, expect, %l[error2]) + RSEQ_ASM_OP_CMPEQ(v2, expect2, %l[error3]) +#endif + RSEQ_ASM_OP_FINAL_STORE(newv, v, 3) + RSEQ_INJECT_ASM(6) + RSEQ_ASM_DEFINE_ABORT(4, abort) + : /* gcc asm goto does not allow outputs */ + : [cpu_id] "r" (cpu), + [current_cpu_id] "Qo" (rseq_get_abi()->RSEQ_TEMPLATE_CPU_ID_FIELD), + [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), + [v] "Qo" (*v), + [expect] "r" (expect), + [v2] "Qo" (*v2), + [expect2] "r" (expect2), + [newv] "r" (newv) + RSEQ_INJECT_INPUT + : "memory", RSEQ_ASM_TMP_REG + : abort, cmpfail +#ifdef RSEQ_COMPARE_TWICE + , error1, error2, error3 +#endif + ); + rseq_after_asm_goto(); + return 0; +abort: + rseq_after_asm_goto(); + RSEQ_INJECT_FAILED + return -1; +cmpfail: + rseq_after_asm_goto(); + return 1; +#ifdef RSEQ_COMPARE_TWICE +error1: + rseq_after_asm_goto(); + rseq_bug("cpu_id comparison failed"); +error2: + rseq_after_asm_goto(); + rseq_bug("expected value comparison failed"); +error3: + rseq_after_asm_goto(); + rseq_bug("2nd expected value comparison failed"); +#endif +} + +#endif /* #if defined(RSEQ_TEMPLATE_MO_RELAXED) && + (defined(RSEQ_TEMPLATE_CPU_ID) || defined(RSEQ_TEMPLATE_MM_CID)) */ + +#if (defined(RSEQ_TEMPLATE_MO_RELAXED) || defined(RSEQ_TEMPLATE_MO_RELEASE)) && \ + (defined(RSEQ_TEMPLATE_CPU_ID) || defined(RSEQ_TEMPLATE_MM_CID)) + +static inline __attribute__((always_inline)) +int RSEQ_TEMPLATE_IDENTIFIER(rseq_cmpeqv_trystorev_storev)(intptr_t *v, intptr_t expect, + intptr_t *v2, intptr_t newv2, + intptr_t newv, int cpu) +{ + RSEQ_INJECT_C(9) + + __asm__ __volatile__ goto ( + RSEQ_ASM_DEFINE_TABLE(1, 2f, 3f, 4f) + RSEQ_ASM_DEFINE_EXIT_POINT(2f, %l[cmpfail]) +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_DEFINE_EXIT_POINT(2f, %l[error1]) + RSEQ_ASM_DEFINE_EXIT_POINT(2f, %l[error2]) +#endif + RSEQ_ASM_STORE_RSEQ_CS(2, 1b, rseq_cs) + RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f) + RSEQ_INJECT_ASM(3) + RSEQ_ASM_OP_CMPEQ(v, expect, %l[cmpfail]) + RSEQ_INJECT_ASM(4) +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1]) + RSEQ_ASM_OP_CMPEQ(v, expect, %l[error2]) +#endif + RSEQ_ASM_OP_STORE(newv2, v2) + RSEQ_INJECT_ASM(5) +#ifdef RSEQ_TEMPLATE_MO_RELEASE + RSEQ_ASM_OP_FINAL_STORE_RELEASE(newv, v, 3) +#else + RSEQ_ASM_OP_FINAL_STORE(newv, v, 3) +#endif + RSEQ_INJECT_ASM(6) + RSEQ_ASM_DEFINE_ABORT(4, abort) + : /* gcc asm goto does not allow outputs */ + : [cpu_id] "r" (cpu), + [current_cpu_id] "Qo" (rseq_get_abi()->RSEQ_TEMPLATE_CPU_ID_FIELD), + [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), + [expect] "r" (expect), + [v] "Qo" (*v), + [newv] "r" (newv), + [v2] "Qo" (*v2), + [newv2] "r" (newv2) + RSEQ_INJECT_INPUT + : "memory", RSEQ_ASM_TMP_REG + : abort, cmpfail +#ifdef RSEQ_COMPARE_TWICE + , error1, error2 +#endif + ); + rseq_after_asm_goto(); + return 0; +abort: + rseq_after_asm_goto(); + RSEQ_INJECT_FAILED + return -1; +cmpfail: + rseq_after_asm_goto(); + return 1; +#ifdef RSEQ_COMPARE_TWICE +error1: + rseq_after_asm_goto(); + rseq_bug("cpu_id comparison failed"); +error2: + rseq_after_asm_goto(); + rseq_bug("expected value comparison failed"); +#endif +} + +static inline __attribute__((always_inline)) +int RSEQ_TEMPLATE_IDENTIFIER(rseq_cmpeqv_trymemcpy_storev)(intptr_t *v, intptr_t expect, + void *dst, void *src, size_t len, + intptr_t newv, int cpu) +{ + RSEQ_INJECT_C(9) + + __asm__ __volatile__ goto ( + RSEQ_ASM_DEFINE_TABLE(1, 2f, 3f, 4f) + RSEQ_ASM_DEFINE_EXIT_POINT(2f, %l[cmpfail]) +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_DEFINE_EXIT_POINT(2f, %l[error1]) + RSEQ_ASM_DEFINE_EXIT_POINT(2f, %l[error2]) +#endif + RSEQ_ASM_STORE_RSEQ_CS(2, 1b, rseq_cs) + RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f) + RSEQ_INJECT_ASM(3) + RSEQ_ASM_OP_CMPEQ(v, expect, %l[cmpfail]) + RSEQ_INJECT_ASM(4) +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1]) + RSEQ_ASM_OP_CMPEQ(v, expect, %l[error2]) +#endif + RSEQ_ASM_OP_R_BAD_MEMCPY(dst, src, len) + RSEQ_INJECT_ASM(5) +#ifdef RSEQ_TEMPLATE_MO_RELEASE + RSEQ_ASM_OP_FINAL_STORE_RELEASE(newv, v, 3) +#else + RSEQ_ASM_OP_FINAL_STORE(newv, v, 3) +#endif + RSEQ_INJECT_ASM(6) + RSEQ_ASM_DEFINE_ABORT(4, abort) + : /* gcc asm goto does not allow outputs */ + : [cpu_id] "r" (cpu), + [current_cpu_id] "Qo" (rseq_get_abi()->RSEQ_TEMPLATE_CPU_ID_FIELD), + [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), + [expect] "r" (expect), + [v] "Qo" (*v), + [newv] "r" (newv), + [dst] "r" (dst), + [src] "r" (src), + [len] "r" (len) + RSEQ_INJECT_INPUT + : "memory", RSEQ_ASM_TMP_REG, RSEQ_ASM_TMP_REG_2 + : abort, cmpfail +#ifdef RSEQ_COMPARE_TWICE + , error1, error2 +#endif + ); + rseq_after_asm_goto(); + return 0; +abort: + rseq_after_asm_goto(); + RSEQ_INJECT_FAILED + return -1; +cmpfail: + rseq_after_asm_goto(); + return 1; +#ifdef RSEQ_COMPARE_TWICE +error1: + rseq_after_asm_goto(); + rseq_bug("cpu_id comparison failed"); +error2: + rseq_after_asm_goto(); + rseq_bug("expected value comparison failed"); +#endif +} + +#endif /* #if (defined(RSEQ_TEMPLATE_MO_RELAXED) || defined(RSEQ_TEMPLATE_MO_RELEASE)) && + (defined(RSEQ_TEMPLATE_CPU_ID) || defined(RSEQ_TEMPLATE_MM_CID)) */ + +#include "rseq-bits-reset.h" diff --git a/tools/testing/selftests/rseq/rseq-arm64.h b/tools/testing/selftests/rseq/rseq-arm64.h index cbe190a4d005..85b90977e7e6 100644 --- a/tools/testing/selftests/rseq/rseq-arm64.h +++ b/tools/testing/selftests/rseq/rseq-arm64.h @@ -2,7 +2,7 @@ /* * rseq-arm64.h * - * (C) Copyright 2016-2018 - Mathieu Desnoyers <mathieu.desnoyers@efficios.com> + * (C) Copyright 2016-2022 - Mathieu Desnoyers <mathieu.desnoyers@efficios.com> * (C) Copyright 2018 - Will Deacon <will.deacon@arm.com> */ @@ -85,10 +85,6 @@ do { \ } \ } while (0) -#ifdef RSEQ_SKIP_FASTPATH -#include "rseq-skip.h" -#else /* !RSEQ_SKIP_FASTPATH */ - #define RSEQ_ASM_TMP_REG32 "w15" #define RSEQ_ASM_TMP_REG "x15" #define RSEQ_ASM_TMP_REG_2 "x14" @@ -204,492 +200,34 @@ do { \ " cbnz " RSEQ_ASM_TMP_REG_2 ", 222b\n" \ "333:\n" -static inline __attribute__((always_inline)) -int rseq_cmpeqv_storev(intptr_t *v, intptr_t expect, intptr_t newv, int cpu) -{ - RSEQ_INJECT_C(9) - - __asm__ __volatile__ goto ( - RSEQ_ASM_DEFINE_TABLE(1, 2f, 3f, 4f) - RSEQ_ASM_DEFINE_EXIT_POINT(2f, %l[cmpfail]) -#ifdef RSEQ_COMPARE_TWICE - RSEQ_ASM_DEFINE_EXIT_POINT(2f, %l[error1]) - RSEQ_ASM_DEFINE_EXIT_POINT(2f, %l[error2]) -#endif - RSEQ_ASM_STORE_RSEQ_CS(2, 1b, rseq_cs) - RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f) - RSEQ_INJECT_ASM(3) - RSEQ_ASM_OP_CMPEQ(v, expect, %l[cmpfail]) - RSEQ_INJECT_ASM(4) -#ifdef RSEQ_COMPARE_TWICE - RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1]) - RSEQ_ASM_OP_CMPEQ(v, expect, %l[error2]) -#endif - RSEQ_ASM_OP_FINAL_STORE(newv, v, 3) - RSEQ_INJECT_ASM(5) - RSEQ_ASM_DEFINE_ABORT(4, abort) - : /* gcc asm goto does not allow outputs */ - : [cpu_id] "r" (cpu), - [current_cpu_id] "Qo" (rseq_get_abi()->cpu_id), - [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), - [v] "Qo" (*v), - [expect] "r" (expect), - [newv] "r" (newv) - RSEQ_INJECT_INPUT - : "memory", RSEQ_ASM_TMP_REG - : abort, cmpfail -#ifdef RSEQ_COMPARE_TWICE - , error1, error2 -#endif - ); - rseq_after_asm_goto(); - return 0; -abort: - rseq_after_asm_goto(); - RSEQ_INJECT_FAILED - return -1; -cmpfail: - rseq_after_asm_goto(); - return 1; -#ifdef RSEQ_COMPARE_TWICE -error1: - rseq_after_asm_goto(); - rseq_bug("cpu_id comparison failed"); -error2: - rseq_after_asm_goto(); - rseq_bug("expected value comparison failed"); -#endif -} - -static inline __attribute__((always_inline)) -int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot, - long voffp, intptr_t *load, int cpu) -{ - RSEQ_INJECT_C(9) - - __asm__ __volatile__ goto ( - RSEQ_ASM_DEFINE_TABLE(1, 2f, 3f, 4f) - RSEQ_ASM_DEFINE_EXIT_POINT(2f, %l[cmpfail]) -#ifdef RSEQ_COMPARE_TWICE - RSEQ_ASM_DEFINE_EXIT_POINT(2f, %l[error1]) - RSEQ_ASM_DEFINE_EXIT_POINT(2f, %l[error2]) -#endif - RSEQ_ASM_STORE_RSEQ_CS(2, 1b, rseq_cs) - RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f) - RSEQ_INJECT_ASM(3) - RSEQ_ASM_OP_CMPNE(v, expectnot, %l[cmpfail]) - RSEQ_INJECT_ASM(4) -#ifdef RSEQ_COMPARE_TWICE - RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1]) - RSEQ_ASM_OP_CMPNE(v, expectnot, %l[error2]) -#endif - RSEQ_ASM_OP_R_LOAD(v) - RSEQ_ASM_OP_R_STORE(load) - RSEQ_ASM_OP_R_LOAD_OFF(voffp) - RSEQ_ASM_OP_R_FINAL_STORE(v, 3) - RSEQ_INJECT_ASM(5) - RSEQ_ASM_DEFINE_ABORT(4, abort) - : /* gcc asm goto does not allow outputs */ - : [cpu_id] "r" (cpu), - [current_cpu_id] "Qo" (rseq_get_abi()->cpu_id), - [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), - [v] "Qo" (*v), - [expectnot] "r" (expectnot), - [load] "Qo" (*load), - [voffp] "r" (voffp) - RSEQ_INJECT_INPUT - : "memory", RSEQ_ASM_TMP_REG - : abort, cmpfail -#ifdef RSEQ_COMPARE_TWICE - , error1, error2 -#endif - ); - rseq_after_asm_goto(); - return 0; -abort: - rseq_after_asm_goto(); - RSEQ_INJECT_FAILED - return -1; -cmpfail: - rseq_after_asm_goto(); - return 1; -#ifdef RSEQ_COMPARE_TWICE -error1: - rseq_after_asm_goto(); - rseq_bug("cpu_id comparison failed"); -error2: - rseq_after_asm_goto(); - rseq_bug("expected value comparison failed"); -#endif -} +/* Per-cpu-id indexing. */ -static inline __attribute__((always_inline)) -int rseq_addv(intptr_t *v, intptr_t count, int cpu) -{ - RSEQ_INJECT_C(9) +#define RSEQ_TEMPLATE_CPU_ID +#define RSEQ_TEMPLATE_MO_RELAXED +#include "rseq-arm64-bits.h" +#undef RSEQ_TEMPLATE_MO_RELAXED - __asm__ __volatile__ goto ( - RSEQ_ASM_DEFINE_TABLE(1, 2f, 3f, 4f) -#ifdef RSEQ_COMPARE_TWICE - RSEQ_ASM_DEFINE_EXIT_POINT(2f, %l[error1]) -#endif - RSEQ_ASM_STORE_RSEQ_CS(2, 1b, rseq_cs) - RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f) - RSEQ_INJECT_ASM(3) -#ifdef RSEQ_COMPARE_TWICE - RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1]) -#endif - RSEQ_ASM_OP_R_LOAD(v) - RSEQ_ASM_OP_R_ADD(count) - RSEQ_ASM_OP_R_FINAL_STORE(v, 3) - RSEQ_INJECT_ASM(4) - RSEQ_ASM_DEFINE_ABORT(4, abort) - : /* gcc asm goto does not allow outputs */ - : [cpu_id] "r" (cpu), - [current_cpu_id] "Qo" (rseq_get_abi()->cpu_id), - [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), - [v] "Qo" (*v), - [count] "r" (count) - RSEQ_INJECT_INPUT - : "memory", RSEQ_ASM_TMP_REG - : abort -#ifdef RSEQ_COMPARE_TWICE - , error1 -#endif - ); - rseq_after_asm_goto(); - return 0; -abort: - rseq_after_asm_goto(); - RSEQ_INJECT_FAILED - return -1; -#ifdef RSEQ_COMPARE_TWICE -error1: - rseq_after_asm_goto(); - rseq_bug("cpu_id comparison failed"); -#endif -} - -static inline __attribute__((always_inline)) -int rseq_cmpeqv_trystorev_storev(intptr_t *v, intptr_t expect, - intptr_t *v2, intptr_t newv2, - intptr_t newv, int cpu) -{ - RSEQ_INJECT_C(9) - - __asm__ __volatile__ goto ( - RSEQ_ASM_DEFINE_TABLE(1, 2f, 3f, 4f) - RSEQ_ASM_DEFINE_EXIT_POINT(2f, %l[cmpfail]) -#ifdef RSEQ_COMPARE_TWICE - RSEQ_ASM_DEFINE_EXIT_POINT(2f, %l[error1]) - RSEQ_ASM_DEFINE_EXIT_POINT(2f, %l[error2]) -#endif - RSEQ_ASM_STORE_RSEQ_CS(2, 1b, rseq_cs) - RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f) - RSEQ_INJECT_ASM(3) - RSEQ_ASM_OP_CMPEQ(v, expect, %l[cmpfail]) - RSEQ_INJECT_ASM(4) -#ifdef RSEQ_COMPARE_TWICE - RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1]) - RSEQ_ASM_OP_CMPEQ(v, expect, %l[error2]) -#endif - RSEQ_ASM_OP_STORE(newv2, v2) - RSEQ_INJECT_ASM(5) - RSEQ_ASM_OP_FINAL_STORE(newv, v, 3) - RSEQ_INJECT_ASM(6) - RSEQ_ASM_DEFINE_ABORT(4, abort) - : /* gcc asm goto does not allow outputs */ - : [cpu_id] "r" (cpu), - [current_cpu_id] "Qo" (rseq_get_abi()->cpu_id), - [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), - [expect] "r" (expect), - [v] "Qo" (*v), - [newv] "r" (newv), - [v2] "Qo" (*v2), - [newv2] "r" (newv2) - RSEQ_INJECT_INPUT - : "memory", RSEQ_ASM_TMP_REG - : abort, cmpfail -#ifdef RSEQ_COMPARE_TWICE - , error1, error2 -#endif - ); - rseq_after_asm_goto(); - return 0; -abort: - rseq_after_asm_goto(); - RSEQ_INJECT_FAILED - return -1; -cmpfail: - rseq_after_asm_goto(); - return 1; -#ifdef RSEQ_COMPARE_TWICE -error1: - rseq_after_asm_goto(); - rseq_bug("cpu_id comparison failed"); -error2: - rseq_after_asm_goto(); - rseq_bug("expected value comparison failed"); -#endif -} - -static inline __attribute__((always_inline)) -int rseq_cmpeqv_trystorev_storev_release(intptr_t *v, intptr_t expect, - intptr_t *v2, intptr_t newv2, - intptr_t newv, int cpu) -{ - RSEQ_INJECT_C(9) - - __asm__ __volatile__ goto ( - RSEQ_ASM_DEFINE_TABLE(1, 2f, 3f, 4f) - RSEQ_ASM_DEFINE_EXIT_POINT(2f, %l[cmpfail]) -#ifdef RSEQ_COMPARE_TWICE - RSEQ_ASM_DEFINE_EXIT_POINT(2f, %l[error1]) - RSEQ_ASM_DEFINE_EXIT_POINT(2f, %l[error2]) -#endif - RSEQ_ASM_STORE_RSEQ_CS(2, 1b, rseq_cs) - RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f) - RSEQ_INJECT_ASM(3) - RSEQ_ASM_OP_CMPEQ(v, expect, %l[cmpfail]) - RSEQ_INJECT_ASM(4) -#ifdef RSEQ_COMPARE_TWICE - RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1]) - RSEQ_ASM_OP_CMPEQ(v, expect, %l[error2]) -#endif - RSEQ_ASM_OP_STORE(newv2, v2) - RSEQ_INJECT_ASM(5) - RSEQ_ASM_OP_FINAL_STORE_RELEASE(newv, v, 3) - RSEQ_INJECT_ASM(6) - RSEQ_ASM_DEFINE_ABORT(4, abort) - : /* gcc asm goto does not allow outputs */ - : [cpu_id] "r" (cpu), - [current_cpu_id] "Qo" (rseq_get_abi()->cpu_id), - [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), - [expect] "r" (expect), - [v] "Qo" (*v), - [newv] "r" (newv), - [v2] "Qo" (*v2), - [newv2] "r" (newv2) - RSEQ_INJECT_INPUT - : "memory", RSEQ_ASM_TMP_REG - : abort, cmpfail -#ifdef RSEQ_COMPARE_TWICE - , error1, error2 -#endif - ); - rseq_after_asm_goto(); - return 0; -abort: - rseq_after_asm_goto(); - RSEQ_INJECT_FAILED - return -1; -cmpfail: - rseq_after_asm_goto(); - return 1; -#ifdef RSEQ_COMPARE_TWICE -error1: - rseq_after_asm_goto(); - rseq_bug("cpu_id comparison failed"); -error2: - rseq_after_asm_goto(); - rseq_bug("expected value comparison failed"); -#endif -} - -static inline __attribute__((always_inline)) -int rseq_cmpeqv_cmpeqv_storev(intptr_t *v, intptr_t expect, - intptr_t *v2, intptr_t expect2, - intptr_t newv, int cpu) -{ - RSEQ_INJECT_C(9) - - __asm__ __volatile__ goto ( - RSEQ_ASM_DEFINE_TABLE(1, 2f, 3f, 4f) - RSEQ_ASM_DEFINE_EXIT_POINT(2f, %l[cmpfail]) -#ifdef RSEQ_COMPARE_TWICE - RSEQ_ASM_DEFINE_EXIT_POINT(2f, %l[error1]) - RSEQ_ASM_DEFINE_EXIT_POINT(2f, %l[error2]) - RSEQ_ASM_DEFINE_EXIT_POINT(2f, %l[error3]) -#endif - RSEQ_ASM_STORE_RSEQ_CS(2, 1b, rseq_cs) - RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f) - RSEQ_INJECT_ASM(3) - RSEQ_ASM_OP_CMPEQ(v, expect, %l[cmpfail]) - RSEQ_INJECT_ASM(4) - RSEQ_ASM_OP_CMPEQ(v2, expect2, %l[cmpfail]) - RSEQ_INJECT_ASM(5) -#ifdef RSEQ_COMPARE_TWICE - RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1]) - RSEQ_ASM_OP_CMPEQ(v, expect, %l[error2]) - RSEQ_ASM_OP_CMPEQ(v2, expect2, %l[error3]) -#endif - RSEQ_ASM_OP_FINAL_STORE(newv, v, 3) - RSEQ_INJECT_ASM(6) - RSEQ_ASM_DEFINE_ABORT(4, abort) - : /* gcc asm goto does not allow outputs */ - : [cpu_id] "r" (cpu), - [current_cpu_id] "Qo" (rseq_get_abi()->cpu_id), - [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), - [v] "Qo" (*v), - [expect] "r" (expect), - [v2] "Qo" (*v2), - [expect2] "r" (expect2), - [newv] "r" (newv) - RSEQ_INJECT_INPUT - : "memory", RSEQ_ASM_TMP_REG - : abort, cmpfail -#ifdef RSEQ_COMPARE_TWICE - , error1, error2, error3 -#endif - ); - rseq_after_asm_goto(); - return 0; -abort: - rseq_after_asm_goto(); - RSEQ_INJECT_FAILED - return -1; -cmpfail: - rseq_after_asm_goto(); - return 1; -#ifdef RSEQ_COMPARE_TWICE -error1: - rseq_after_asm_goto(); - rseq_bug("cpu_id comparison failed"); -error2: - rseq_after_asm_goto(); - rseq_bug("expected value comparison failed"); -error3: - rseq_after_asm_goto(); - rseq_bug("2nd expected value comparison failed"); -#endif -} - -static inline __attribute__((always_inline)) -int rseq_cmpeqv_trymemcpy_storev(intptr_t *v, intptr_t expect, - void *dst, void *src, size_t len, - intptr_t newv, int cpu) -{ - RSEQ_INJECT_C(9) - - __asm__ __volatile__ goto ( - RSEQ_ASM_DEFINE_TABLE(1, 2f, 3f, 4f) - RSEQ_ASM_DEFINE_EXIT_POINT(2f, %l[cmpfail]) -#ifdef RSEQ_COMPARE_TWICE - RSEQ_ASM_DEFINE_EXIT_POINT(2f, %l[error1]) - RSEQ_ASM_DEFINE_EXIT_POINT(2f, %l[error2]) -#endif - RSEQ_ASM_STORE_RSEQ_CS(2, 1b, rseq_cs) - RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f) - RSEQ_INJECT_ASM(3) - RSEQ_ASM_OP_CMPEQ(v, expect, %l[cmpfail]) - RSEQ_INJECT_ASM(4) -#ifdef RSEQ_COMPARE_TWICE - RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1]) - RSEQ_ASM_OP_CMPEQ(v, expect, %l[error2]) -#endif - RSEQ_ASM_OP_R_BAD_MEMCPY(dst, src, len) - RSEQ_INJECT_ASM(5) - RSEQ_ASM_OP_FINAL_STORE(newv, v, 3) - RSEQ_INJECT_ASM(6) - RSEQ_ASM_DEFINE_ABORT(4, abort) - : /* gcc asm goto does not allow outputs */ - : [cpu_id] "r" (cpu), - [current_cpu_id] "Qo" (rseq_get_abi()->cpu_id), - [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), - [expect] "r" (expect), - [v] "Qo" (*v), - [newv] "r" (newv), - [dst] "r" (dst), - [src] "r" (src), - [len] "r" (len) - RSEQ_INJECT_INPUT - : "memory", RSEQ_ASM_TMP_REG, RSEQ_ASM_TMP_REG_2 - : abort, cmpfail -#ifdef RSEQ_COMPARE_TWICE - , error1, error2 -#endif - ); - rseq_after_asm_goto(); - return 0; -abort: - rseq_after_asm_goto(); - RSEQ_INJECT_FAILED - return -1; -cmpfail: - rseq_after_asm_goto(); - return 1; -#ifdef RSEQ_COMPARE_TWICE -error1: - rseq_after_asm_goto(); - rseq_bug("cpu_id comparison failed"); -error2: - rseq_after_asm_goto(); - rseq_bug("expected value comparison failed"); -#endif -} - -static inline __attribute__((always_inline)) -int rseq_cmpeqv_trymemcpy_storev_release(intptr_t *v, intptr_t expect, - void *dst, void *src, size_t len, - intptr_t newv, int cpu) -{ - RSEQ_INJECT_C(9) - - __asm__ __volatile__ goto ( - RSEQ_ASM_DEFINE_TABLE(1, 2f, 3f, 4f) - RSEQ_ASM_DEFINE_EXIT_POINT(2f, %l[cmpfail]) -#ifdef RSEQ_COMPARE_TWICE - RSEQ_ASM_DEFINE_EXIT_POINT(2f, %l[error1]) - RSEQ_ASM_DEFINE_EXIT_POINT(2f, %l[error2]) -#endif - RSEQ_ASM_STORE_RSEQ_CS(2, 1b, rseq_cs) - RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f) - RSEQ_INJECT_ASM(3) - RSEQ_ASM_OP_CMPEQ(v, expect, %l[cmpfail]) - RSEQ_INJECT_ASM(4) -#ifdef RSEQ_COMPARE_TWICE - RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1]) - RSEQ_ASM_OP_CMPEQ(v, expect, %l[error2]) -#endif - RSEQ_ASM_OP_R_BAD_MEMCPY(dst, src, len) - RSEQ_INJECT_ASM(5) - RSEQ_ASM_OP_FINAL_STORE_RELEASE(newv, v, 3) - RSEQ_INJECT_ASM(6) - RSEQ_ASM_DEFINE_ABORT(4, abort) - : /* gcc asm goto does not allow outputs */ - : [cpu_id] "r" (cpu), - [current_cpu_id] "Qo" (rseq_get_abi()->cpu_id), - [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), - [expect] "r" (expect), - [v] "Qo" (*v), - [newv] "r" (newv), - [dst] "r" (dst), - [src] "r" (src), - [len] "r" (len) - RSEQ_INJECT_INPUT - : "memory", RSEQ_ASM_TMP_REG, RSEQ_ASM_TMP_REG_2 - : abort, cmpfail -#ifdef RSEQ_COMPARE_TWICE - , error1, error2 -#endif - ); - rseq_after_asm_goto(); - return 0; -abort: - rseq_after_asm_goto(); - RSEQ_INJECT_FAILED - return -1; -cmpfail: - rseq_after_asm_goto(); - return 1; -#ifdef RSEQ_COMPARE_TWICE -error1: - rseq_after_asm_goto(); - rseq_bug("cpu_id comparison failed"); -error2: - rseq_after_asm_goto(); - rseq_bug("expected value comparison failed"); -#endif -} +#define RSEQ_TEMPLATE_MO_RELEASE +#include "rseq-arm64-bits.h" +#undef RSEQ_TEMPLATE_MO_RELEASE +#undef RSEQ_TEMPLATE_CPU_ID + +/* Per-mm-cid indexing. */ + +#define RSEQ_TEMPLATE_MM_CID +#define RSEQ_TEMPLATE_MO_RELAXED +#include "rseq-arm64-bits.h" +#undef RSEQ_TEMPLATE_MO_RELAXED + +#define RSEQ_TEMPLATE_MO_RELEASE +#include "rseq-arm64-bits.h" +#undef RSEQ_TEMPLATE_MO_RELEASE +#undef RSEQ_TEMPLATE_MM_CID + +/* APIs which are not based on cpu ids. */ -#endif /* !RSEQ_SKIP_FASTPATH */ +#define RSEQ_TEMPLATE_CPU_ID_NONE +#define RSEQ_TEMPLATE_MO_RELAXED +#include "rseq-arm64-bits.h" +#undef RSEQ_TEMPLATE_MO_RELAXED +#undef RSEQ_TEMPLATE_CPU_ID_NONE diff --git a/tools/testing/selftests/rseq/rseq-bits-reset.h b/tools/testing/selftests/rseq/rseq-bits-reset.h new file mode 100644 index 000000000000..e8655089f9cb --- /dev/null +++ b/tools/testing/selftests/rseq/rseq-bits-reset.h @@ -0,0 +1,11 @@ +/* SPDX-License-Identifier: LGPL-2.1 OR MIT */ +/* + * rseq-bits-reset.h + * + * (C) Copyright 2016-2022 - Mathieu Desnoyers <mathieu.desnoyers@efficios.com> + */ + +#undef RSEQ_TEMPLATE_IDENTIFIER +#undef RSEQ_TEMPLATE_CPU_ID_FIELD +#undef RSEQ_TEMPLATE_CPU_ID_OFFSET +#undef RSEQ_TEMPLATE_SUFFIX diff --git a/tools/testing/selftests/rseq/rseq-bits-template.h b/tools/testing/selftests/rseq/rseq-bits-template.h new file mode 100644 index 000000000000..65698d6a6cf9 --- /dev/null +++ b/tools/testing/selftests/rseq/rseq-bits-template.h @@ -0,0 +1,41 @@ +/* SPDX-License-Identifier: LGPL-2.1 OR MIT */ +/* + * rseq-bits-template.h + * + * (C) Copyright 2016-2022 - Mathieu Desnoyers <mathieu.desnoyers@efficios.com> + */ + +#ifdef RSEQ_TEMPLATE_CPU_ID +# define RSEQ_TEMPLATE_CPU_ID_OFFSET RSEQ_CPU_ID_OFFSET +# define RSEQ_TEMPLATE_CPU_ID_FIELD cpu_id +# ifdef RSEQ_TEMPLATE_MO_RELEASE +# define RSEQ_TEMPLATE_SUFFIX _release_cpu_id +# elif defined (RSEQ_TEMPLATE_MO_RELAXED) +# define RSEQ_TEMPLATE_SUFFIX _relaxed_cpu_id +# else +# error "Never use <rseq-bits-template.h> directly; include <rseq.h> instead." +# endif +#elif defined(RSEQ_TEMPLATE_MM_CID) +# define RSEQ_TEMPLATE_CPU_ID_OFFSET RSEQ_MM_CID_OFFSET +# define RSEQ_TEMPLATE_CPU_ID_FIELD mm_cid +# ifdef RSEQ_TEMPLATE_MO_RELEASE +# define RSEQ_TEMPLATE_SUFFIX _release_mm_cid +# elif defined (RSEQ_TEMPLATE_MO_RELAXED) +# define RSEQ_TEMPLATE_SUFFIX _relaxed_mm_cid +# else +# error "Never use <rseq-bits-template.h> directly; include <rseq.h> instead." +# endif +#elif defined (RSEQ_TEMPLATE_CPU_ID_NONE) +# ifdef RSEQ_TEMPLATE_MO_RELEASE +# define RSEQ_TEMPLATE_SUFFIX _release +# elif defined (RSEQ_TEMPLATE_MO_RELAXED) +# define RSEQ_TEMPLATE_SUFFIX _relaxed +# else +# error "Never use <rseq-bits-template.h> directly; include <rseq.h> instead." +# endif +#else +# error "Never use <rseq-bits-template.h> directly; include <rseq.h> instead." +#endif + +#define RSEQ_TEMPLATE_IDENTIFIER(x) RSEQ_COMBINE_TOKENS(x, RSEQ_TEMPLATE_SUFFIX) + diff --git a/tools/testing/selftests/rseq/rseq-mips-bits.h b/tools/testing/selftests/rseq/rseq-mips-bits.h new file mode 100644 index 000000000000..6c48af4d0944 --- /dev/null +++ b/tools/testing/selftests/rseq/rseq-mips-bits.h @@ -0,0 +1,462 @@ +/* SPDX-License-Identifier: LGPL-2.1 OR MIT */ +/* + * Author: Paul Burton <paul.burton@mips.com> + * (C) Copyright 2018 MIPS Tech LLC + * (C) Copyright 2016-2022 - Mathieu Desnoyers <mathieu.desnoyers@efficios.com> + */ + +#include "rseq-bits-template.h" + +#if defined(RSEQ_TEMPLATE_MO_RELAXED) && \ + (defined(RSEQ_TEMPLATE_CPU_ID) || defined(RSEQ_TEMPLATE_MM_CID)) + +static inline __attribute__((always_inline)) +int RSEQ_TEMPLATE_IDENTIFIER(rseq_cmpeqv_storev)(intptr_t *v, intptr_t expect, intptr_t newv, int cpu) +{ + RSEQ_INJECT_C(9) + + __asm__ __volatile__ goto ( + RSEQ_ASM_DEFINE_TABLE(9, 1f, 2f, 4f) /* start, commit, abort */ + RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail]) +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1]) + RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2]) +#endif + /* Start rseq by storing table entry pointer into rseq_cs. */ + RSEQ_ASM_STORE_RSEQ_CS(1, 3f, rseq_cs) + RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f) + RSEQ_INJECT_ASM(3) + LONG_L " $4, %[v]\n\t" + "bne $4, %[expect], %l[cmpfail]\n\t" + RSEQ_INJECT_ASM(4) +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1]) + LONG_L " $4, %[v]\n\t" + "bne $4, %[expect], %l[error2]\n\t" +#endif + /* final store */ + LONG_S " %[newv], %[v]\n\t" + "2:\n\t" + RSEQ_INJECT_ASM(5) + "b 5f\n\t" + RSEQ_ASM_DEFINE_ABORT(3, 4, "", abort, 1b, 2b, 4f) + "5:\n\t" + : /* gcc asm goto does not allow outputs */ + : [cpu_id] "r" (cpu), + [current_cpu_id] "m" (rseq_get_abi()->RSEQ_TEMPLATE_CPU_ID_FIELD), + [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), + [v] "m" (*v), + [expect] "r" (expect), + [newv] "r" (newv) + RSEQ_INJECT_INPUT + : "$4", "memory" + RSEQ_INJECT_CLOBBER + : abort, cmpfail +#ifdef RSEQ_COMPARE_TWICE + , error1, error2 +#endif + ); + return 0; +abort: + RSEQ_INJECT_FAILED + return -1; +cmpfail: + return 1; +#ifdef RSEQ_COMPARE_TWICE +error1: + rseq_bug("cpu_id comparison failed"); +error2: + rseq_bug("expected value comparison failed"); +#endif +} + +static inline __attribute__((always_inline)) +int RSEQ_TEMPLATE_IDENTIFIER(rseq_cmpnev_storeoffp_load)(intptr_t *v, intptr_t expectnot, + long voffp, intptr_t *load, int cpu) +{ + RSEQ_INJECT_C(9) + + __asm__ __volatile__ goto ( + RSEQ_ASM_DEFINE_TABLE(9, 1f, 2f, 4f) /* start, commit, abort */ + RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail]) +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1]) + RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2]) +#endif + /* Start rseq by storing table entry pointer into rseq_cs. */ + RSEQ_ASM_STORE_RSEQ_CS(1, 3f, rseq_cs) + RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f) + RSEQ_INJECT_ASM(3) + LONG_L " $4, %[v]\n\t" + "beq $4, %[expectnot], %l[cmpfail]\n\t" + RSEQ_INJECT_ASM(4) +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1]) + LONG_L " $4, %[v]\n\t" + "beq $4, %[expectnot], %l[error2]\n\t" +#endif + LONG_S " $4, %[load]\n\t" + LONG_ADDI " $4, %[voffp]\n\t" + LONG_L " $4, 0($4)\n\t" + /* final store */ + LONG_S " $4, %[v]\n\t" + "2:\n\t" + RSEQ_INJECT_ASM(5) + "b 5f\n\t" + RSEQ_ASM_DEFINE_ABORT(3, 4, "", abort, 1b, 2b, 4f) + "5:\n\t" + : /* gcc asm goto does not allow outputs */ + : [cpu_id] "r" (cpu), + [current_cpu_id] "m" (rseq_get_abi()->RSEQ_TEMPLATE_CPU_ID_FIELD), + [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), + /* final store input */ + [v] "m" (*v), + [expectnot] "r" (expectnot), + [voffp] "Ir" (voffp), + [load] "m" (*load) + RSEQ_INJECT_INPUT + : "$4", "memory" + RSEQ_INJECT_CLOBBER + : abort, cmpfail +#ifdef RSEQ_COMPARE_TWICE + , error1, error2 +#endif + ); + return 0; +abort: + RSEQ_INJECT_FAILED + return -1; +cmpfail: + return 1; +#ifdef RSEQ_COMPARE_TWICE +error1: + rseq_bug("cpu_id comparison failed"); +error2: + rseq_bug("expected value comparison failed"); +#endif +} + +static inline __attribute__((always_inline)) +int RSEQ_TEMPLATE_IDENTIFIER(rseq_addv)(intptr_t *v, intptr_t count, int cpu) +{ + RSEQ_INJECT_C(9) + + __asm__ __volatile__ goto ( + RSEQ_ASM_DEFINE_TABLE(9, 1f, 2f, 4f) /* start, commit, abort */ +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1]) +#endif + /* Start rseq by storing table entry pointer into rseq_cs. */ + RSEQ_ASM_STORE_RSEQ_CS(1, 3f, rseq_cs) + RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f) + RSEQ_INJECT_ASM(3) +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1]) +#endif + LONG_L " $4, %[v]\n\t" + LONG_ADDI " $4, %[count]\n\t" + /* final store */ + LONG_S " $4, %[v]\n\t" + "2:\n\t" + RSEQ_INJECT_ASM(4) + "b 5f\n\t" + RSEQ_ASM_DEFINE_ABORT(3, 4, "", abort, 1b, 2b, 4f) + "5:\n\t" + : /* gcc asm goto does not allow outputs */ + : [cpu_id] "r" (cpu), + [current_cpu_id] "m" (rseq_get_abi()->RSEQ_TEMPLATE_CPU_ID_FIELD), + [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), + [v] "m" (*v), + [count] "Ir" (count) + RSEQ_INJECT_INPUT + : "$4", "memory" + RSEQ_INJECT_CLOBBER + : abort +#ifdef RSEQ_COMPARE_TWICE + , error1 +#endif + ); + return 0; +abort: + RSEQ_INJECT_FAILED + return -1; +#ifdef RSEQ_COMPARE_TWICE +error1: + rseq_bug("cpu_id comparison failed"); +#endif +} + +static inline __attribute__((always_inline)) +int RSEQ_TEMPLATE_IDENTIFIER(rseq_cmpeqv_cmpeqv_storev)(intptr_t *v, intptr_t expect, + intptr_t *v2, intptr_t expect2, + intptr_t newv, int cpu) +{ + RSEQ_INJECT_C(9) + + __asm__ __volatile__ goto ( + RSEQ_ASM_DEFINE_TABLE(9, 1f, 2f, 4f) /* start, commit, abort */ + RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail]) +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1]) + RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2]) + RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error3]) +#endif + /* Start rseq by storing table entry pointer into rseq_cs. */ + RSEQ_ASM_STORE_RSEQ_CS(1, 3f, rseq_cs) + RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f) + RSEQ_INJECT_ASM(3) + LONG_L " $4, %[v]\n\t" + "bne $4, %[expect], %l[cmpfail]\n\t" + RSEQ_INJECT_ASM(4) + LONG_L " $4, %[v2]\n\t" + "bne $4, %[expect2], %l[cmpfail]\n\t" + RSEQ_INJECT_ASM(5) +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1]) + LONG_L " $4, %[v]\n\t" + "bne $4, %[expect], %l[error2]\n\t" + LONG_L " $4, %[v2]\n\t" + "bne $4, %[expect2], %l[error3]\n\t" +#endif + /* final store */ + LONG_S " %[newv], %[v]\n\t" + "2:\n\t" + RSEQ_INJECT_ASM(6) + "b 5f\n\t" + RSEQ_ASM_DEFINE_ABORT(3, 4, "", abort, 1b, 2b, 4f) + "5:\n\t" + : /* gcc asm goto does not allow outputs */ + : [cpu_id] "r" (cpu), + [current_cpu_id] "m" (rseq_get_abi()->RSEQ_TEMPLATE_CPU_ID_FIELD), + [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), + /* cmp2 input */ + [v2] "m" (*v2), + [expect2] "r" (expect2), + /* final store input */ + [v] "m" (*v), + [expect] "r" (expect), + [newv] "r" (newv) + RSEQ_INJECT_INPUT + : "$4", "memory" + RSEQ_INJECT_CLOBBER + : abort, cmpfail +#ifdef RSEQ_COMPARE_TWICE + , error1, error2, error3 +#endif + ); + return 0; +abort: + RSEQ_INJECT_FAILED + return -1; +cmpfail: + return 1; +#ifdef RSEQ_COMPARE_TWICE +error1: + rseq_bug("cpu_id comparison failed"); +error2: + rseq_bug("1st expected value comparison failed"); +error3: + rseq_bug("2nd expected value comparison failed"); +#endif +} + +#endif /* #if defined(RSEQ_TEMPLATE_MO_RELAXED) && + (defined(RSEQ_TEMPLATE_CPU_ID) || defined(RSEQ_TEMPLATE_MM_CID)) */ + +#if (defined(RSEQ_TEMPLATE_MO_RELAXED) || defined(RSEQ_TEMPLATE_MO_RELEASE)) && \ + (defined(RSEQ_TEMPLATE_CPU_ID) || defined(RSEQ_TEMPLATE_MM_CID)) + +static inline __attribute__((always_inline)) +int RSEQ_TEMPLATE_IDENTIFIER(rseq_cmpeqv_trystorev_storev)(intptr_t *v, intptr_t expect, + intptr_t *v2, intptr_t newv2, + intptr_t newv, int cpu) +{ + RSEQ_INJECT_C(9) + + __asm__ __volatile__ goto ( + RSEQ_ASM_DEFINE_TABLE(9, 1f, 2f, 4f) /* start, commit, abort */ + RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail]) +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1]) + RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2]) +#endif + /* Start rseq by storing table entry pointer into rseq_cs. */ + RSEQ_ASM_STORE_RSEQ_CS(1, 3f, rseq_cs) + RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f) + RSEQ_INJECT_ASM(3) + LONG_L " $4, %[v]\n\t" + "bne $4, %[expect], %l[cmpfail]\n\t" + RSEQ_INJECT_ASM(4) +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1]) + LONG_L " $4, %[v]\n\t" + "bne $4, %[expect], %l[error2]\n\t" +#endif + /* try store */ + LONG_S " %[newv2], %[v2]\n\t" + RSEQ_INJECT_ASM(5) +#ifdef RSEQ_TEMPLATE_MO_RELEASE + "sync\n\t" /* full sync provides store-release */ +#endif + /* final store */ + LONG_S " %[newv], %[v]\n\t" + "2:\n\t" + RSEQ_INJECT_ASM(6) + "b 5f\n\t" + RSEQ_ASM_DEFINE_ABORT(3, 4, "", abort, 1b, 2b, 4f) + "5:\n\t" + : /* gcc asm goto does not allow outputs */ + : [cpu_id] "r" (cpu), + [current_cpu_id] "m" (rseq_get_abi()->RSEQ_TEMPLATE_CPU_ID_FIELD), + [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), + /* try store input */ + [v2] "m" (*v2), + [newv2] "r" (newv2), + /* final store input */ + [v] "m" (*v), + [expect] "r" (expect), + [newv] "r" (newv) + RSEQ_INJECT_INPUT + : "$4", "memory" + RSEQ_INJECT_CLOBBER + : abort, cmpfail +#ifdef RSEQ_COMPARE_TWICE + , error1, error2 +#endif + ); + return 0; +abort: + RSEQ_INJECT_FAILED + return -1; +cmpfail: + return 1; +#ifdef RSEQ_COMPARE_TWICE +error1: + rseq_bug("cpu_id comparison failed"); +error2: + rseq_bug("expected value comparison failed"); +#endif +} + +static inline __attribute__((always_inline)) +int RSEQ_TEMPLATE_IDENTIFIER(rseq_cmpeqv_trymemcpy_storev)(intptr_t *v, intptr_t expect, + void *dst, void *src, size_t len, + intptr_t newv, int cpu) +{ + uintptr_t rseq_scratch[3]; + + RSEQ_INJECT_C(9) + + __asm__ __volatile__ goto ( + RSEQ_ASM_DEFINE_TABLE(9, 1f, 2f, 4f) /* start, commit, abort */ + RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail]) +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1]) + RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2]) +#endif + LONG_S " %[src], %[rseq_scratch0]\n\t" + LONG_S " %[dst], %[rseq_scratch1]\n\t" + LONG_S " %[len], %[rseq_scratch2]\n\t" + /* Start rseq by storing table entry pointer into rseq_cs. */ + RSEQ_ASM_STORE_RSEQ_CS(1, 3f, rseq_cs) + RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f) + RSEQ_INJECT_ASM(3) + LONG_L " $4, %[v]\n\t" + "bne $4, %[expect], 5f\n\t" + RSEQ_INJECT_ASM(4) +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 6f) + LONG_L " $4, %[v]\n\t" + "bne $4, %[expect], 7f\n\t" +#endif + /* try memcpy */ + "beqz %[len], 333f\n\t" \ + "222:\n\t" \ + "lb $4, 0(%[src])\n\t" \ + "sb $4, 0(%[dst])\n\t" \ + LONG_ADDI " %[src], 1\n\t" \ + LONG_ADDI " %[dst], 1\n\t" \ + LONG_ADDI " %[len], -1\n\t" \ + "bnez %[len], 222b\n\t" \ + "333:\n\t" \ + RSEQ_INJECT_ASM(5) +#ifdef RSEQ_TEMPLATE_MO_RELEASE + "sync\n\t" /* full sync provides store-release */ +#endif + /* final store */ + LONG_S " %[newv], %[v]\n\t" + "2:\n\t" + RSEQ_INJECT_ASM(6) + /* teardown */ + LONG_L " %[len], %[rseq_scratch2]\n\t" + LONG_L " %[dst], %[rseq_scratch1]\n\t" + LONG_L " %[src], %[rseq_scratch0]\n\t" + "b 8f\n\t" + RSEQ_ASM_DEFINE_ABORT(3, 4, + /* teardown */ + LONG_L " %[len], %[rseq_scratch2]\n\t" + LONG_L " %[dst], %[rseq_scratch1]\n\t" + LONG_L " %[src], %[rseq_scratch0]\n\t", + abort, 1b, 2b, 4f) + RSEQ_ASM_DEFINE_CMPFAIL(5, + /* teardown */ + LONG_L " %[len], %[rseq_scratch2]\n\t" + LONG_L " %[dst], %[rseq_scratch1]\n\t" + LONG_L " %[src], %[rseq_scratch0]\n\t", + cmpfail) +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_DEFINE_CMPFAIL(6, + /* teardown */ + LONG_L " %[len], %[rseq_scratch2]\n\t" + LONG_L " %[dst], %[rseq_scratch1]\n\t" + LONG_L " %[src], %[rseq_scratch0]\n\t", + error1) + RSEQ_ASM_DEFINE_CMPFAIL(7, + /* teardown */ + LONG_L " %[len], %[rseq_scratch2]\n\t" + LONG_L " %[dst], %[rseq_scratch1]\n\t" + LONG_L " %[src], %[rseq_scratch0]\n\t", + error2) +#endif + "8:\n\t" + : /* gcc asm goto does not allow outputs */ + : [cpu_id] "r" (cpu), + [current_cpu_id] "m" (rseq_get_abi()->RSEQ_TEMPLATE_CPU_ID_FIELD), + [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), + /* final store input */ + [v] "m" (*v), + [expect] "r" (expect), + [newv] "r" (newv), + /* try memcpy input */ + [dst] "r" (dst), + [src] "r" (src), + [len] "r" (len), + [rseq_scratch0] "m" (rseq_scratch[0]), + [rseq_scratch1] "m" (rseq_scratch[1]), + [rseq_scratch2] "m" (rseq_scratch[2]) + RSEQ_INJECT_INPUT + : "$4", "memory" + RSEQ_INJECT_CLOBBER + : abort, cmpfail +#ifdef RSEQ_COMPARE_TWICE + , error1, error2 +#endif + ); + return 0; +abort: + RSEQ_INJECT_FAILED + return -1; +cmpfail: + return 1; +#ifdef RSEQ_COMPARE_TWICE +error1: + rseq_bug("cpu_id comparison failed"); +error2: + rseq_bug("expected value comparison failed"); +#endif +} + +#endif /* #if (defined(RSEQ_TEMPLATE_MO_RELAXED) || defined(RSEQ_TEMPLATE_MO_RELEASE)) && + (defined(RSEQ_TEMPLATE_CPU_ID) || defined(RSEQ_TEMPLATE_MM_CID)) */ + +#include "rseq-bits-reset.h" diff --git a/tools/testing/selftests/rseq/rseq-mips.h b/tools/testing/selftests/rseq/rseq-mips.h index 878739fae2fd..50b950cf9585 100644 --- a/tools/testing/selftests/rseq/rseq-mips.h +++ b/tools/testing/selftests/rseq/rseq-mips.h @@ -2,9 +2,7 @@ /* * Author: Paul Burton <paul.burton@mips.com> * (C) Copyright 2018 MIPS Tech LLC - * - * Based on rseq-arm.h: - * (C) Copyright 2016-2018 - Mathieu Desnoyers <mathieu.desnoyers@efficios.com> + * (C) Copyright 2016-2022 - Mathieu Desnoyers <mathieu.desnoyers@efficios.com> */ /* @@ -60,10 +58,6 @@ do { \ RSEQ_WRITE_ONCE(*p, v); \ } while (0) -#ifdef RSEQ_SKIP_FASTPATH -#include "rseq-skip.h" -#else /* !RSEQ_SKIP_FASTPATH */ - #if _MIPS_SZLONG == 64 # define LONG ".dword" # define LONG_LA "dla" @@ -154,624 +148,34 @@ do { \ teardown \ "b %l[" __rseq_str(cmpfail_label) "]\n\t" -static inline __attribute__((always_inline)) -int rseq_cmpeqv_storev(intptr_t *v, intptr_t expect, intptr_t newv, int cpu) -{ - RSEQ_INJECT_C(9) - - __asm__ __volatile__ goto ( - RSEQ_ASM_DEFINE_TABLE(9, 1f, 2f, 4f) /* start, commit, abort */ - RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail]) -#ifdef RSEQ_COMPARE_TWICE - RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1]) - RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2]) -#endif - /* Start rseq by storing table entry pointer into rseq_cs. */ - RSEQ_ASM_STORE_RSEQ_CS(1, 3f, rseq_cs) - RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f) - RSEQ_INJECT_ASM(3) - LONG_L " $4, %[v]\n\t" - "bne $4, %[expect], %l[cmpfail]\n\t" - RSEQ_INJECT_ASM(4) -#ifdef RSEQ_COMPARE_TWICE - RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1]) - LONG_L " $4, %[v]\n\t" - "bne $4, %[expect], %l[error2]\n\t" -#endif - /* final store */ - LONG_S " %[newv], %[v]\n\t" - "2:\n\t" - RSEQ_INJECT_ASM(5) - "b 5f\n\t" - RSEQ_ASM_DEFINE_ABORT(3, 4, "", abort, 1b, 2b, 4f) - "5:\n\t" - : /* gcc asm goto does not allow outputs */ - : [cpu_id] "r" (cpu), - [current_cpu_id] "m" (rseq_get_abi()->cpu_id), - [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), - [v] "m" (*v), - [expect] "r" (expect), - [newv] "r" (newv) - RSEQ_INJECT_INPUT - : "$4", "memory" - RSEQ_INJECT_CLOBBER - : abort, cmpfail -#ifdef RSEQ_COMPARE_TWICE - , error1, error2 -#endif - ); - return 0; -abort: - RSEQ_INJECT_FAILED - return -1; -cmpfail: - return 1; -#ifdef RSEQ_COMPARE_TWICE -error1: - rseq_bug("cpu_id comparison failed"); -error2: - rseq_bug("expected value comparison failed"); -#endif -} - -static inline __attribute__((always_inline)) -int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot, - long voffp, intptr_t *load, int cpu) -{ - RSEQ_INJECT_C(9) - - __asm__ __volatile__ goto ( - RSEQ_ASM_DEFINE_TABLE(9, 1f, 2f, 4f) /* start, commit, abort */ - RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail]) -#ifdef RSEQ_COMPARE_TWICE - RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1]) - RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2]) -#endif - /* Start rseq by storing table entry pointer into rseq_cs. */ - RSEQ_ASM_STORE_RSEQ_CS(1, 3f, rseq_cs) - RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f) - RSEQ_INJECT_ASM(3) - LONG_L " $4, %[v]\n\t" - "beq $4, %[expectnot], %l[cmpfail]\n\t" - RSEQ_INJECT_ASM(4) -#ifdef RSEQ_COMPARE_TWICE - RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1]) - LONG_L " $4, %[v]\n\t" - "beq $4, %[expectnot], %l[error2]\n\t" -#endif - LONG_S " $4, %[load]\n\t" - LONG_ADDI " $4, %[voffp]\n\t" - LONG_L " $4, 0($4)\n\t" - /* final store */ - LONG_S " $4, %[v]\n\t" - "2:\n\t" - RSEQ_INJECT_ASM(5) - "b 5f\n\t" - RSEQ_ASM_DEFINE_ABORT(3, 4, "", abort, 1b, 2b, 4f) - "5:\n\t" - : /* gcc asm goto does not allow outputs */ - : [cpu_id] "r" (cpu), - [current_cpu_id] "m" (rseq_get_abi()->cpu_id), - [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), - /* final store input */ - [v] "m" (*v), - [expectnot] "r" (expectnot), - [voffp] "Ir" (voffp), - [load] "m" (*load) - RSEQ_INJECT_INPUT - : "$4", "memory" - RSEQ_INJECT_CLOBBER - : abort, cmpfail -#ifdef RSEQ_COMPARE_TWICE - , error1, error2 -#endif - ); - return 0; -abort: - RSEQ_INJECT_FAILED - return -1; -cmpfail: - return 1; -#ifdef RSEQ_COMPARE_TWICE -error1: - rseq_bug("cpu_id comparison failed"); -error2: - rseq_bug("expected value comparison failed"); -#endif -} +/* Per-cpu-id indexing. */ -static inline __attribute__((always_inline)) -int rseq_addv(intptr_t *v, intptr_t count, int cpu) -{ - RSEQ_INJECT_C(9) +#define RSEQ_TEMPLATE_CPU_ID +#define RSEQ_TEMPLATE_MO_RELAXED +#include "rseq-mips-bits.h" +#undef RSEQ_TEMPLATE_MO_RELAXED - __asm__ __volatile__ goto ( - RSEQ_ASM_DEFINE_TABLE(9, 1f, 2f, 4f) /* start, commit, abort */ -#ifdef RSEQ_COMPARE_TWICE - RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1]) -#endif - /* Start rseq by storing table entry pointer into rseq_cs. */ - RSEQ_ASM_STORE_RSEQ_CS(1, 3f, rseq_cs) - RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f) - RSEQ_INJECT_ASM(3) -#ifdef RSEQ_COMPARE_TWICE - RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1]) -#endif - LONG_L " $4, %[v]\n\t" - LONG_ADDI " $4, %[count]\n\t" - /* final store */ - LONG_S " $4, %[v]\n\t" - "2:\n\t" - RSEQ_INJECT_ASM(4) - "b 5f\n\t" - RSEQ_ASM_DEFINE_ABORT(3, 4, "", abort, 1b, 2b, 4f) - "5:\n\t" - : /* gcc asm goto does not allow outputs */ - : [cpu_id] "r" (cpu), - [current_cpu_id] "m" (rseq_get_abi()->cpu_id), - [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), - [v] "m" (*v), - [count] "Ir" (count) - RSEQ_INJECT_INPUT - : "$4", "memory" - RSEQ_INJECT_CLOBBER - : abort -#ifdef RSEQ_COMPARE_TWICE - , error1 -#endif - ); - return 0; -abort: - RSEQ_INJECT_FAILED - return -1; -#ifdef RSEQ_COMPARE_TWICE -error1: - rseq_bug("cpu_id comparison failed"); -#endif -} +#define RSEQ_TEMPLATE_MO_RELEASE +#include "rseq-mips-bits.h" +#undef RSEQ_TEMPLATE_MO_RELEASE +#undef RSEQ_TEMPLATE_CPU_ID -static inline __attribute__((always_inline)) -int rseq_cmpeqv_trystorev_storev(intptr_t *v, intptr_t expect, - intptr_t *v2, intptr_t newv2, - intptr_t newv, int cpu) -{ - RSEQ_INJECT_C(9) +/* Per-mm-cid indexing. */ - __asm__ __volatile__ goto ( - RSEQ_ASM_DEFINE_TABLE(9, 1f, 2f, 4f) /* start, commit, abort */ - RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail]) -#ifdef RSEQ_COMPARE_TWICE - RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1]) - RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2]) -#endif - /* Start rseq by storing table entry pointer into rseq_cs. */ - RSEQ_ASM_STORE_RSEQ_CS(1, 3f, rseq_cs) - RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f) - RSEQ_INJECT_ASM(3) - LONG_L " $4, %[v]\n\t" - "bne $4, %[expect], %l[cmpfail]\n\t" - RSEQ_INJECT_ASM(4) -#ifdef RSEQ_COMPARE_TWICE - RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1]) - LONG_L " $4, %[v]\n\t" - "bne $4, %[expect], %l[error2]\n\t" -#endif - /* try store */ - LONG_S " %[newv2], %[v2]\n\t" - RSEQ_INJECT_ASM(5) - /* final store */ - LONG_S " %[newv], %[v]\n\t" - "2:\n\t" - RSEQ_INJECT_ASM(6) - "b 5f\n\t" - RSEQ_ASM_DEFINE_ABORT(3, 4, "", abort, 1b, 2b, 4f) - "5:\n\t" - : /* gcc asm goto does not allow outputs */ - : [cpu_id] "r" (cpu), - [current_cpu_id] "m" (rseq_get_abi()->cpu_id), - [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), - /* try store input */ - [v2] "m" (*v2), - [newv2] "r" (newv2), - /* final store input */ - [v] "m" (*v), - [expect] "r" (expect), - [newv] "r" (newv) - RSEQ_INJECT_INPUT - : "$4", "memory" - RSEQ_INJECT_CLOBBER - : abort, cmpfail -#ifdef RSEQ_COMPARE_TWICE - , error1, error2 -#endif - ); - return 0; -abort: - RSEQ_INJECT_FAILED - return -1; -cmpfail: - return 1; -#ifdef RSEQ_COMPARE_TWICE -error1: - rseq_bug("cpu_id comparison failed"); -error2: - rseq_bug("expected value comparison failed"); -#endif -} +#define RSEQ_TEMPLATE_MM_CID +#define RSEQ_TEMPLATE_MO_RELAXED +#include "rseq-mips-bits.h" +#undef RSEQ_TEMPLATE_MO_RELAXED -static inline __attribute__((always_inline)) -int rseq_cmpeqv_trystorev_storev_release(intptr_t *v, intptr_t expect, - intptr_t *v2, intptr_t newv2, - intptr_t newv, int cpu) -{ - RSEQ_INJECT_C(9) +#define RSEQ_TEMPLATE_MO_RELEASE +#include "rseq-mips-bits.h" +#undef RSEQ_TEMPLATE_MO_RELEASE +#undef RSEQ_TEMPLATE_MM_CID - __asm__ __volatile__ goto ( - RSEQ_ASM_DEFINE_TABLE(9, 1f, 2f, 4f) /* start, commit, abort */ - RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail]) -#ifdef RSEQ_COMPARE_TWICE - RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1]) - RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2]) -#endif - /* Start rseq by storing table entry pointer into rseq_cs. */ - RSEQ_ASM_STORE_RSEQ_CS(1, 3f, rseq_cs) - RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f) - RSEQ_INJECT_ASM(3) - LONG_L " $4, %[v]\n\t" - "bne $4, %[expect], %l[cmpfail]\n\t" - RSEQ_INJECT_ASM(4) -#ifdef RSEQ_COMPARE_TWICE - RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1]) - LONG_L " $4, %[v]\n\t" - "bne $4, %[expect], %l[error2]\n\t" -#endif - /* try store */ - LONG_S " %[newv2], %[v2]\n\t" - RSEQ_INJECT_ASM(5) - "sync\n\t" /* full sync provides store-release */ - /* final store */ - LONG_S " %[newv], %[v]\n\t" - "2:\n\t" - RSEQ_INJECT_ASM(6) - "b 5f\n\t" - RSEQ_ASM_DEFINE_ABORT(3, 4, "", abort, 1b, 2b, 4f) - "5:\n\t" - : /* gcc asm goto does not allow outputs */ - : [cpu_id] "r" (cpu), - [current_cpu_id] "m" (rseq_get_abi()->cpu_id), - [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), - /* try store input */ - [v2] "m" (*v2), - [newv2] "r" (newv2), - /* final store input */ - [v] "m" (*v), - [expect] "r" (expect), - [newv] "r" (newv) - RSEQ_INJECT_INPUT - : "$4", "memory" - RSEQ_INJECT_CLOBBER - : abort, cmpfail -#ifdef RSEQ_COMPARE_TWICE - , error1, error2 -#endif - ); - return 0; -abort: - RSEQ_INJECT_FAILED - return -1; -cmpfail: - return 1; -#ifdef RSEQ_COMPARE_TWICE -error1: - rseq_bug("cpu_id comparison failed"); -error2: - rseq_bug("expected value comparison failed"); -#endif -} - -static inline __attribute__((always_inline)) -int rseq_cmpeqv_cmpeqv_storev(intptr_t *v, intptr_t expect, - intptr_t *v2, intptr_t expect2, - intptr_t newv, int cpu) -{ - RSEQ_INJECT_C(9) - - __asm__ __volatile__ goto ( - RSEQ_ASM_DEFINE_TABLE(9, 1f, 2f, 4f) /* start, commit, abort */ - RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail]) -#ifdef RSEQ_COMPARE_TWICE - RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1]) - RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2]) - RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error3]) -#endif - /* Start rseq by storing table entry pointer into rseq_cs. */ - RSEQ_ASM_STORE_RSEQ_CS(1, 3f, rseq_cs) - RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f) - RSEQ_INJECT_ASM(3) - LONG_L " $4, %[v]\n\t" - "bne $4, %[expect], %l[cmpfail]\n\t" - RSEQ_INJECT_ASM(4) - LONG_L " $4, %[v2]\n\t" - "bne $4, %[expect2], %l[cmpfail]\n\t" - RSEQ_INJECT_ASM(5) -#ifdef RSEQ_COMPARE_TWICE - RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1]) - LONG_L " $4, %[v]\n\t" - "bne $4, %[expect], %l[error2]\n\t" - LONG_L " $4, %[v2]\n\t" - "bne $4, %[expect2], %l[error3]\n\t" -#endif - /* final store */ - LONG_S " %[newv], %[v]\n\t" - "2:\n\t" - RSEQ_INJECT_ASM(6) - "b 5f\n\t" - RSEQ_ASM_DEFINE_ABORT(3, 4, "", abort, 1b, 2b, 4f) - "5:\n\t" - : /* gcc asm goto does not allow outputs */ - : [cpu_id] "r" (cpu), - [current_cpu_id] "m" (rseq_get_abi()->cpu_id), - [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), - /* cmp2 input */ - [v2] "m" (*v2), - [expect2] "r" (expect2), - /* final store input */ - [v] "m" (*v), - [expect] "r" (expect), - [newv] "r" (newv) - RSEQ_INJECT_INPUT - : "$4", "memory" - RSEQ_INJECT_CLOBBER - : abort, cmpfail -#ifdef RSEQ_COMPARE_TWICE - , error1, error2, error3 -#endif - ); - return 0; -abort: - RSEQ_INJECT_FAILED - return -1; -cmpfail: - return 1; -#ifdef RSEQ_COMPARE_TWICE -error1: - rseq_bug("cpu_id comparison failed"); -error2: - rseq_bug("1st expected value comparison failed"); -error3: - rseq_bug("2nd expected value comparison failed"); -#endif -} - -static inline __attribute__((always_inline)) -int rseq_cmpeqv_trymemcpy_storev(intptr_t *v, intptr_t expect, - void *dst, void *src, size_t len, - intptr_t newv, int cpu) -{ - uintptr_t rseq_scratch[3]; - - RSEQ_INJECT_C(9) - - __asm__ __volatile__ goto ( - RSEQ_ASM_DEFINE_TABLE(9, 1f, 2f, 4f) /* start, commit, abort */ - RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail]) -#ifdef RSEQ_COMPARE_TWICE - RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1]) - RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2]) -#endif - LONG_S " %[src], %[rseq_scratch0]\n\t" - LONG_S " %[dst], %[rseq_scratch1]\n\t" - LONG_S " %[len], %[rseq_scratch2]\n\t" - /* Start rseq by storing table entry pointer into rseq_cs. */ - RSEQ_ASM_STORE_RSEQ_CS(1, 3f, rseq_cs) - RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f) - RSEQ_INJECT_ASM(3) - LONG_L " $4, %[v]\n\t" - "bne $4, %[expect], 5f\n\t" - RSEQ_INJECT_ASM(4) -#ifdef RSEQ_COMPARE_TWICE - RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 6f) - LONG_L " $4, %[v]\n\t" - "bne $4, %[expect], 7f\n\t" -#endif - /* try memcpy */ - "beqz %[len], 333f\n\t" \ - "222:\n\t" \ - "lb $4, 0(%[src])\n\t" \ - "sb $4, 0(%[dst])\n\t" \ - LONG_ADDI " %[src], 1\n\t" \ - LONG_ADDI " %[dst], 1\n\t" \ - LONG_ADDI " %[len], -1\n\t" \ - "bnez %[len], 222b\n\t" \ - "333:\n\t" \ - RSEQ_INJECT_ASM(5) - /* final store */ - LONG_S " %[newv], %[v]\n\t" - "2:\n\t" - RSEQ_INJECT_ASM(6) - /* teardown */ - LONG_L " %[len], %[rseq_scratch2]\n\t" - LONG_L " %[dst], %[rseq_scratch1]\n\t" - LONG_L " %[src], %[rseq_scratch0]\n\t" - "b 8f\n\t" - RSEQ_ASM_DEFINE_ABORT(3, 4, - /* teardown */ - LONG_L " %[len], %[rseq_scratch2]\n\t" - LONG_L " %[dst], %[rseq_scratch1]\n\t" - LONG_L " %[src], %[rseq_scratch0]\n\t", - abort, 1b, 2b, 4f) - RSEQ_ASM_DEFINE_CMPFAIL(5, - /* teardown */ - LONG_L " %[len], %[rseq_scratch2]\n\t" - LONG_L " %[dst], %[rseq_scratch1]\n\t" - LONG_L " %[src], %[rseq_scratch0]\n\t", - cmpfail) -#ifdef RSEQ_COMPARE_TWICE - RSEQ_ASM_DEFINE_CMPFAIL(6, - /* teardown */ - LONG_L " %[len], %[rseq_scratch2]\n\t" - LONG_L " %[dst], %[rseq_scratch1]\n\t" - LONG_L " %[src], %[rseq_scratch0]\n\t", - error1) - RSEQ_ASM_DEFINE_CMPFAIL(7, - /* teardown */ - LONG_L " %[len], %[rseq_scratch2]\n\t" - LONG_L " %[dst], %[rseq_scratch1]\n\t" - LONG_L " %[src], %[rseq_scratch0]\n\t", - error2) -#endif - "8:\n\t" - : /* gcc asm goto does not allow outputs */ - : [cpu_id] "r" (cpu), - [current_cpu_id] "m" (rseq_get_abi()->cpu_id), - [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), - /* final store input */ - [v] "m" (*v), - [expect] "r" (expect), - [newv] "r" (newv), - /* try memcpy input */ - [dst] "r" (dst), - [src] "r" (src), - [len] "r" (len), - [rseq_scratch0] "m" (rseq_scratch[0]), - [rseq_scratch1] "m" (rseq_scratch[1]), - [rseq_scratch2] "m" (rseq_scratch[2]) - RSEQ_INJECT_INPUT - : "$4", "memory" - RSEQ_INJECT_CLOBBER - : abort, cmpfail -#ifdef RSEQ_COMPARE_TWICE - , error1, error2 -#endif - ); - return 0; -abort: - RSEQ_INJECT_FAILED - return -1; -cmpfail: - return 1; -#ifdef RSEQ_COMPARE_TWICE -error1: - rseq_bug("cpu_id comparison failed"); -error2: - rseq_bug("expected value comparison failed"); -#endif -} - -static inline __attribute__((always_inline)) -int rseq_cmpeqv_trymemcpy_storev_release(intptr_t *v, intptr_t expect, - void *dst, void *src, size_t len, - intptr_t newv, int cpu) -{ - uintptr_t rseq_scratch[3]; - - RSEQ_INJECT_C(9) - - __asm__ __volatile__ goto ( - RSEQ_ASM_DEFINE_TABLE(9, 1f, 2f, 4f) /* start, commit, abort */ - RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail]) -#ifdef RSEQ_COMPARE_TWICE - RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1]) - RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2]) -#endif - LONG_S " %[src], %[rseq_scratch0]\n\t" - LONG_S " %[dst], %[rseq_scratch1]\n\t" - LONG_S " %[len], %[rseq_scratch2]\n\t" - /* Start rseq by storing table entry pointer into rseq_cs. */ - RSEQ_ASM_STORE_RSEQ_CS(1, 3f, rseq_cs) - RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f) - RSEQ_INJECT_ASM(3) - LONG_L " $4, %[v]\n\t" - "bne $4, %[expect], 5f\n\t" - RSEQ_INJECT_ASM(4) -#ifdef RSEQ_COMPARE_TWICE - RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 6f) - LONG_L " $4, %[v]\n\t" - "bne $4, %[expect], 7f\n\t" -#endif - /* try memcpy */ - "beqz %[len], 333f\n\t" \ - "222:\n\t" \ - "lb $4, 0(%[src])\n\t" \ - "sb $4, 0(%[dst])\n\t" \ - LONG_ADDI " %[src], 1\n\t" \ - LONG_ADDI " %[dst], 1\n\t" \ - LONG_ADDI " %[len], -1\n\t" \ - "bnez %[len], 222b\n\t" \ - "333:\n\t" \ - RSEQ_INJECT_ASM(5) - "sync\n\t" /* full sync provides store-release */ - /* final store */ - LONG_S " %[newv], %[v]\n\t" - "2:\n\t" - RSEQ_INJECT_ASM(6) - /* teardown */ - LONG_L " %[len], %[rseq_scratch2]\n\t" - LONG_L " %[dst], %[rseq_scratch1]\n\t" - LONG_L " %[src], %[rseq_scratch0]\n\t" - "b 8f\n\t" - RSEQ_ASM_DEFINE_ABORT(3, 4, - /* teardown */ - LONG_L " %[len], %[rseq_scratch2]\n\t" - LONG_L " %[dst], %[rseq_scratch1]\n\t" - LONG_L " %[src], %[rseq_scratch0]\n\t", - abort, 1b, 2b, 4f) - RSEQ_ASM_DEFINE_CMPFAIL(5, - /* teardown */ - LONG_L " %[len], %[rseq_scratch2]\n\t" - LONG_L " %[dst], %[rseq_scratch1]\n\t" - LONG_L " %[src], %[rseq_scratch0]\n\t", - cmpfail) -#ifdef RSEQ_COMPARE_TWICE - RSEQ_ASM_DEFINE_CMPFAIL(6, - /* teardown */ - LONG_L " %[len], %[rseq_scratch2]\n\t" - LONG_L " %[dst], %[rseq_scratch1]\n\t" - LONG_L " %[src], %[rseq_scratch0]\n\t", - error1) - RSEQ_ASM_DEFINE_CMPFAIL(7, - /* teardown */ - LONG_L " %[len], %[rseq_scratch2]\n\t" - LONG_L " %[dst], %[rseq_scratch1]\n\t" - LONG_L " %[src], %[rseq_scratch0]\n\t", - error2) -#endif - "8:\n\t" - : /* gcc asm goto does not allow outputs */ - : [cpu_id] "r" (cpu), - [current_cpu_id] "m" (rseq_get_abi()->cpu_id), - [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), - /* final store input */ - [v] "m" (*v), - [expect] "r" (expect), - [newv] "r" (newv), - /* try memcpy input */ - [dst] "r" (dst), - [src] "r" (src), - [len] "r" (len), - [rseq_scratch0] "m" (rseq_scratch[0]), - [rseq_scratch1] "m" (rseq_scratch[1]), - [rseq_scratch2] "m" (rseq_scratch[2]) - RSEQ_INJECT_INPUT - : "$4", "memory" - RSEQ_INJECT_CLOBBER - : abort, cmpfail -#ifdef RSEQ_COMPARE_TWICE - , error1, error2 -#endif - ); - return 0; -abort: - RSEQ_INJECT_FAILED - return -1; -cmpfail: - return 1; -#ifdef RSEQ_COMPARE_TWICE -error1: - rseq_bug("cpu_id comparison failed"); -error2: - rseq_bug("expected value comparison failed"); -#endif -} +/* APIs which are not based on cpu ids. */ -#endif /* !RSEQ_SKIP_FASTPATH */ +#define RSEQ_TEMPLATE_CPU_ID_NONE +#define RSEQ_TEMPLATE_MO_RELAXED +#include "rseq-mips-bits.h" +#undef RSEQ_TEMPLATE_MO_RELAXED +#undef RSEQ_TEMPLATE_CPU_ID_NONE diff --git a/tools/testing/selftests/rseq/rseq-ppc-bits.h b/tools/testing/selftests/rseq/rseq-ppc-bits.h new file mode 100644 index 000000000000..98e69eae1e62 --- /dev/null +++ b/tools/testing/selftests/rseq/rseq-ppc-bits.h @@ -0,0 +1,454 @@ +/* SPDX-License-Identifier: LGPL-2.1 OR MIT */ +/* + * rseq-ppc-bits.h + * + * (C) Copyright 2016-2018 - Mathieu Desnoyers <mathieu.desnoyers@efficios.com> + * (C) Copyright 2016-2018 - Boqun Feng <boqun.feng@gmail.com> + */ + +#include "rseq-bits-template.h" + +#if defined(RSEQ_TEMPLATE_MO_RELAXED) && \ + (defined(RSEQ_TEMPLATE_CPU_ID) || defined(RSEQ_TEMPLATE_MM_CID)) + +static inline __attribute__((always_inline)) +int RSEQ_TEMPLATE_IDENTIFIER(rseq_cmpeqv_storev)(intptr_t *v, intptr_t expect, intptr_t newv, int cpu) +{ + RSEQ_INJECT_C(9) + + __asm__ __volatile__ goto ( + RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */ + RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail]) +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1]) + RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2]) +#endif + /* Start rseq by storing table entry pointer into rseq_cs. */ + RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs) + /* cmp cpuid */ + RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f) + RSEQ_INJECT_ASM(3) + /* cmp @v equal to @expect */ + RSEQ_ASM_OP_CMPEQ(v, expect, %l[cmpfail]) + RSEQ_INJECT_ASM(4) +#ifdef RSEQ_COMPARE_TWICE + /* cmp cpuid */ + RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1]) + /* cmp @v equal to @expect */ + RSEQ_ASM_OP_CMPEQ(v, expect, %l[error2]) +#endif + /* final store */ + RSEQ_ASM_OP_FINAL_STORE(newv, v, 2) + RSEQ_INJECT_ASM(5) + RSEQ_ASM_DEFINE_ABORT(4, abort) + : /* gcc asm goto does not allow outputs */ + : [cpu_id] "r" (cpu), + [current_cpu_id] "m" (rseq_get_abi()->RSEQ_TEMPLATE_CPU_ID_FIELD), + [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), + [v] "m" (*v), + [expect] "r" (expect), + [newv] "r" (newv) + RSEQ_INJECT_INPUT + : "memory", "cc", "r17" + RSEQ_INJECT_CLOBBER + : abort, cmpfail +#ifdef RSEQ_COMPARE_TWICE + , error1, error2 +#endif + ); + rseq_after_asm_goto(); + return 0; +abort: + rseq_after_asm_goto(); + RSEQ_INJECT_FAILED + return -1; +cmpfail: + rseq_after_asm_goto(); + return 1; +#ifdef RSEQ_COMPARE_TWICE +error1: + rseq_after_asm_goto(); + rseq_bug("cpu_id comparison failed"); +error2: + rseq_after_asm_goto(); + rseq_bug("expected value comparison failed"); +#endif +} + +static inline __attribute__((always_inline)) +int RSEQ_TEMPLATE_IDENTIFIER(rseq_cmpnev_storeoffp_load)(intptr_t *v, intptr_t expectnot, + long voffp, intptr_t *load, int cpu) +{ + RSEQ_INJECT_C(9) + + __asm__ __volatile__ goto ( + RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */ + RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail]) +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1]) + RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2]) +#endif + /* Start rseq by storing table entry pointer into rseq_cs. */ + RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs) + /* cmp cpuid */ + RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f) + RSEQ_INJECT_ASM(3) + /* cmp @v not equal to @expectnot */ + RSEQ_ASM_OP_CMPNE(v, expectnot, %l[cmpfail]) + RSEQ_INJECT_ASM(4) +#ifdef RSEQ_COMPARE_TWICE + /* cmp cpuid */ + RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1]) + /* cmp @v not equal to @expectnot */ + RSEQ_ASM_OP_CMPNE(v, expectnot, %l[error2]) +#endif + /* load the value of @v */ + RSEQ_ASM_OP_R_LOAD(v) + /* store it in @load */ + RSEQ_ASM_OP_R_STORE(load) + /* dereference voffp(v) */ + RSEQ_ASM_OP_R_LOADX(voffp) + /* final store the value at voffp(v) */ + RSEQ_ASM_OP_R_FINAL_STORE(v, 2) + RSEQ_INJECT_ASM(5) + RSEQ_ASM_DEFINE_ABORT(4, abort) + : /* gcc asm goto does not allow outputs */ + : [cpu_id] "r" (cpu), + [current_cpu_id] "m" (rseq_get_abi()->RSEQ_TEMPLATE_CPU_ID_FIELD), + [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), + /* final store input */ + [v] "m" (*v), + [expectnot] "r" (expectnot), + [voffp] "b" (voffp), + [load] "m" (*load) + RSEQ_INJECT_INPUT + : "memory", "cc", "r17" + RSEQ_INJECT_CLOBBER + : abort, cmpfail +#ifdef RSEQ_COMPARE_TWICE + , error1, error2 +#endif + ); + rseq_after_asm_goto(); + return 0; +abort: + rseq_after_asm_goto(); + RSEQ_INJECT_FAILED + return -1; +cmpfail: + rseq_after_asm_goto(); + return 1; +#ifdef RSEQ_COMPARE_TWICE +error1: + rseq_after_asm_goto(); + rseq_bug("cpu_id comparison failed"); +error2: + rseq_after_asm_goto(); + rseq_bug("expected value comparison failed"); +#endif +} + +static inline __attribute__((always_inline)) +int RSEQ_TEMPLATE_IDENTIFIER(rseq_addv)(intptr_t *v, intptr_t count, int cpu) +{ + RSEQ_INJECT_C(9) + + __asm__ __volatile__ goto ( + RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */ +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1]) +#endif + /* Start rseq by storing table entry pointer into rseq_cs. */ + RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs) + /* cmp cpuid */ + RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f) + RSEQ_INJECT_ASM(3) +#ifdef RSEQ_COMPARE_TWICE + /* cmp cpuid */ + RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1]) +#endif + /* load the value of @v */ + RSEQ_ASM_OP_R_LOAD(v) + /* add @count to it */ + RSEQ_ASM_OP_R_ADD(count) + /* final store */ + RSEQ_ASM_OP_R_FINAL_STORE(v, 2) + RSEQ_INJECT_ASM(4) + RSEQ_ASM_DEFINE_ABORT(4, abort) + : /* gcc asm goto does not allow outputs */ + : [cpu_id] "r" (cpu), + [current_cpu_id] "m" (rseq_get_abi()->RSEQ_TEMPLATE_CPU_ID_FIELD), + [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), + /* final store input */ + [v] "m" (*v), + [count] "r" (count) + RSEQ_INJECT_INPUT + : "memory", "cc", "r17" + RSEQ_INJECT_CLOBBER + : abort +#ifdef RSEQ_COMPARE_TWICE + , error1 +#endif + ); + rseq_after_asm_goto(); + return 0; +abort: + rseq_after_asm_goto(); + RSEQ_INJECT_FAILED + return -1; +#ifdef RSEQ_COMPARE_TWICE +error1: + rseq_after_asm_goto(); + rseq_bug("cpu_id comparison failed"); +#endif +} + +static inline __attribute__((always_inline)) +int RSEQ_TEMPLATE_IDENTIFIER(rseq_cmpeqv_cmpeqv_storev)(intptr_t *v, intptr_t expect, + intptr_t *v2, intptr_t expect2, + intptr_t newv, int cpu) +{ + RSEQ_INJECT_C(9) + + __asm__ __volatile__ goto ( + RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */ + RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail]) +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1]) + RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2]) + RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error3]) +#endif + /* Start rseq by storing table entry pointer into rseq_cs. */ + RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs) + /* cmp cpuid */ + RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f) + RSEQ_INJECT_ASM(3) + /* cmp @v equal to @expect */ + RSEQ_ASM_OP_CMPEQ(v, expect, %l[cmpfail]) + RSEQ_INJECT_ASM(4) + /* cmp @v2 equal to @expct2 */ + RSEQ_ASM_OP_CMPEQ(v2, expect2, %l[cmpfail]) + RSEQ_INJECT_ASM(5) +#ifdef RSEQ_COMPARE_TWICE + /* cmp cpuid */ + RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1]) + /* cmp @v equal to @expect */ + RSEQ_ASM_OP_CMPEQ(v, expect, %l[error2]) + /* cmp @v2 equal to @expct2 */ + RSEQ_ASM_OP_CMPEQ(v2, expect2, %l[error3]) +#endif + /* final store */ + RSEQ_ASM_OP_FINAL_STORE(newv, v, 2) + RSEQ_INJECT_ASM(6) + RSEQ_ASM_DEFINE_ABORT(4, abort) + : /* gcc asm goto does not allow outputs */ + : [cpu_id] "r" (cpu), + [current_cpu_id] "m" (rseq_get_abi()->RSEQ_TEMPLATE_CPU_ID_FIELD), + [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), + /* cmp2 input */ + [v2] "m" (*v2), + [expect2] "r" (expect2), + /* final store input */ + [v] "m" (*v), + [expect] "r" (expect), + [newv] "r" (newv) + RSEQ_INJECT_INPUT + : "memory", "cc", "r17" + RSEQ_INJECT_CLOBBER + : abort, cmpfail +#ifdef RSEQ_COMPARE_TWICE + , error1, error2, error3 +#endif + ); + rseq_after_asm_goto(); + return 0; +abort: + rseq_after_asm_goto(); + RSEQ_INJECT_FAILED + return -1; +cmpfail: + rseq_after_asm_goto(); + return 1; +#ifdef RSEQ_COMPARE_TWICE +error1: + rseq_after_asm_goto(); + rseq_bug("cpu_id comparison failed"); +error2: + rseq_after_asm_goto(); + rseq_bug("1st expected value comparison failed"); +error3: + rseq_after_asm_goto(); + rseq_bug("2nd expected value comparison failed"); +#endif +} + +#endif /* #if defined(RSEQ_TEMPLATE_MO_RELAXED) && + (defined(RSEQ_TEMPLATE_CPU_ID) || defined(RSEQ_TEMPLATE_MM_CID)) */ + +#if (defined(RSEQ_TEMPLATE_MO_RELAXED) || defined(RSEQ_TEMPLATE_MO_RELEASE)) && \ + (defined(RSEQ_TEMPLATE_CPU_ID) || defined(RSEQ_TEMPLATE_MM_CID)) + +static inline __attribute__((always_inline)) +int RSEQ_TEMPLATE_IDENTIFIER(rseq_cmpeqv_trystorev_storev)(intptr_t *v, intptr_t expect, + intptr_t *v2, intptr_t newv2, + intptr_t newv, int cpu) +{ + RSEQ_INJECT_C(9) + + __asm__ __volatile__ goto ( + RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */ + RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail]) +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1]) + RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2]) +#endif + /* Start rseq by storing table entry pointer into rseq_cs. */ + RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs) + /* cmp cpuid */ + RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f) + RSEQ_INJECT_ASM(3) + /* cmp @v equal to @expect */ + RSEQ_ASM_OP_CMPEQ(v, expect, %l[cmpfail]) + RSEQ_INJECT_ASM(4) +#ifdef RSEQ_COMPARE_TWICE + /* cmp cpuid */ + RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1]) + /* cmp @v equal to @expect */ + RSEQ_ASM_OP_CMPEQ(v, expect, %l[error2]) +#endif + /* try store */ + RSEQ_ASM_OP_STORE(newv2, v2) + RSEQ_INJECT_ASM(5) +#ifdef RSEQ_TEMPLATE_MO_RELEASE + /* for 'release' */ + "lwsync\n\t" +#endif + /* final store */ + RSEQ_ASM_OP_FINAL_STORE(newv, v, 2) + RSEQ_INJECT_ASM(6) + RSEQ_ASM_DEFINE_ABORT(4, abort) + : /* gcc asm goto does not allow outputs */ + : [cpu_id] "r" (cpu), + [current_cpu_id] "m" (rseq_get_abi()->RSEQ_TEMPLATE_CPU_ID_FIELD), + [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), + /* try store input */ + [v2] "m" (*v2), + [newv2] "r" (newv2), + /* final store input */ + [v] "m" (*v), + [expect] "r" (expect), + [newv] "r" (newv) + RSEQ_INJECT_INPUT + : "memory", "cc", "r17" + RSEQ_INJECT_CLOBBER + : abort, cmpfail +#ifdef RSEQ_COMPARE_TWICE + , error1, error2 +#endif + ); + rseq_after_asm_goto(); + return 0; +abort: + rseq_after_asm_goto(); + RSEQ_INJECT_FAILED + return -1; +cmpfail: + rseq_after_asm_goto(); + return 1; +#ifdef RSEQ_COMPARE_TWICE +error1: + rseq_after_asm_goto(); + rseq_bug("cpu_id comparison failed"); +error2: + rseq_after_asm_goto(); + rseq_bug("expected value comparison failed"); +#endif +} + +static inline __attribute__((always_inline)) +int RSEQ_TEMPLATE_IDENTIFIER(rseq_cmpeqv_trymemcpy_storev)(intptr_t *v, intptr_t expect, + void *dst, void *src, size_t len, + intptr_t newv, int cpu) +{ + RSEQ_INJECT_C(9) + + __asm__ __volatile__ goto ( + RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */ + RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail]) +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1]) + RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2]) +#endif + /* setup for mempcy */ + "mr %%r19, %[len]\n\t" + "mr %%r20, %[src]\n\t" + "mr %%r21, %[dst]\n\t" + /* Start rseq by storing table entry pointer into rseq_cs. */ + RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs) + /* cmp cpuid */ + RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f) + RSEQ_INJECT_ASM(3) + /* cmp @v equal to @expect */ + RSEQ_ASM_OP_CMPEQ(v, expect, %l[cmpfail]) + RSEQ_INJECT_ASM(4) +#ifdef RSEQ_COMPARE_TWICE + /* cmp cpuid */ + RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1]) + /* cmp @v equal to @expect */ + RSEQ_ASM_OP_CMPEQ(v, expect, %l[error2]) +#endif + /* try memcpy */ + RSEQ_ASM_OP_R_MEMCPY() + RSEQ_INJECT_ASM(5) +#ifdef RSEQ_TEMPLATE_MO_RELEASE + /* for 'release' */ + "lwsync\n\t" +#endif + /* final store */ + RSEQ_ASM_OP_FINAL_STORE(newv, v, 2) + RSEQ_INJECT_ASM(6) + /* teardown */ + RSEQ_ASM_DEFINE_ABORT(4, abort) + : /* gcc asm goto does not allow outputs */ + : [cpu_id] "r" (cpu), + [current_cpu_id] "m" (rseq_get_abi()->RSEQ_TEMPLATE_CPU_ID_FIELD), + [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), + /* final store input */ + [v] "m" (*v), + [expect] "r" (expect), + [newv] "r" (newv), + /* try memcpy input */ + [dst] "r" (dst), + [src] "r" (src), + [len] "r" (len) + RSEQ_INJECT_INPUT + : "memory", "cc", "r17", "r18", "r19", "r20", "r21" + RSEQ_INJECT_CLOBBER + : abort, cmpfail +#ifdef RSEQ_COMPARE_TWICE + , error1, error2 +#endif + ); + rseq_after_asm_goto(); + return 0; +abort: + rseq_after_asm_goto(); + RSEQ_INJECT_FAILED + return -1; +cmpfail: + rseq_after_asm_goto(); + return 1; +#ifdef RSEQ_COMPARE_TWICE +error1: + rseq_after_asm_goto(); + rseq_bug("cpu_id comparison failed"); +error2: + rseq_after_asm_goto(); + rseq_bug("expected value comparison failed"); +#endif +} + +#endif /* #if (defined(RSEQ_TEMPLATE_MO_RELAXED) || defined(RSEQ_TEMPLATE_MO_RELEASE)) && + (defined(RSEQ_TEMPLATE_CPU_ID) || defined(RSEQ_TEMPLATE_MM_CID)) */ + +#include "rseq-bits-reset.h" diff --git a/tools/testing/selftests/rseq/rseq-ppc.h b/tools/testing/selftests/rseq/rseq-ppc.h index bab8e0b9fb11..dc9190facee9 100644 --- a/tools/testing/selftests/rseq/rseq-ppc.h +++ b/tools/testing/selftests/rseq/rseq-ppc.h @@ -2,7 +2,7 @@ /* * rseq-ppc.h * - * (C) Copyright 2016-2018 - Mathieu Desnoyers <mathieu.desnoyers@efficios.com> + * (C) Copyright 2016-2022 - Mathieu Desnoyers <mathieu.desnoyers@efficios.com> * (C) Copyright 2016-2018 - Boqun Feng <boqun.feng@gmail.com> */ @@ -36,10 +36,6 @@ do { \ RSEQ_WRITE_ONCE(*p, v); \ } while (0) -#ifdef RSEQ_SKIP_FASTPATH -#include "rseq-skip.h" -#else /* !RSEQ_SKIP_FASTPATH */ - /* * The __rseq_cs_ptr_array and __rseq_cs sections can be used by debuggers to * better handle single-stepping through the restartable critical sections. @@ -209,583 +205,34 @@ do { \ RSEQ_STORE_LONG(var) "%[" __rseq_str(value) "], %[" __rseq_str(var) "]\n\t" \ __rseq_str(post_commit_label) ":\n\t" -static inline __attribute__((always_inline)) -int rseq_cmpeqv_storev(intptr_t *v, intptr_t expect, intptr_t newv, int cpu) -{ - RSEQ_INJECT_C(9) - - __asm__ __volatile__ goto ( - RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */ - RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail]) -#ifdef RSEQ_COMPARE_TWICE - RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1]) - RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2]) -#endif - /* Start rseq by storing table entry pointer into rseq_cs. */ - RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs) - /* cmp cpuid */ - RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f) - RSEQ_INJECT_ASM(3) - /* cmp @v equal to @expect */ - RSEQ_ASM_OP_CMPEQ(v, expect, %l[cmpfail]) - RSEQ_INJECT_ASM(4) -#ifdef RSEQ_COMPARE_TWICE - /* cmp cpuid */ - RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1]) - /* cmp @v equal to @expect */ - RSEQ_ASM_OP_CMPEQ(v, expect, %l[error2]) -#endif - /* final store */ - RSEQ_ASM_OP_FINAL_STORE(newv, v, 2) - RSEQ_INJECT_ASM(5) - RSEQ_ASM_DEFINE_ABORT(4, abort) - : /* gcc asm goto does not allow outputs */ - : [cpu_id] "r" (cpu), - [current_cpu_id] "m" (rseq_get_abi()->cpu_id), - [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), - [v] "m" (*v), - [expect] "r" (expect), - [newv] "r" (newv) - RSEQ_INJECT_INPUT - : "memory", "cc", "r17" - RSEQ_INJECT_CLOBBER - : abort, cmpfail -#ifdef RSEQ_COMPARE_TWICE - , error1, error2 -#endif - ); - rseq_after_asm_goto(); - return 0; -abort: - rseq_after_asm_goto(); - RSEQ_INJECT_FAILED - return -1; -cmpfail: - rseq_after_asm_goto(); - return 1; -#ifdef RSEQ_COMPARE_TWICE -error1: - rseq_after_asm_goto(); - rseq_bug("cpu_id comparison failed"); -error2: - rseq_after_asm_goto(); - rseq_bug("expected value comparison failed"); -#endif -} - -static inline __attribute__((always_inline)) -int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot, - long voffp, intptr_t *load, int cpu) -{ - RSEQ_INJECT_C(9) - - __asm__ __volatile__ goto ( - RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */ - RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail]) -#ifdef RSEQ_COMPARE_TWICE - RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1]) - RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2]) -#endif - /* Start rseq by storing table entry pointer into rseq_cs. */ - RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs) - /* cmp cpuid */ - RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f) - RSEQ_INJECT_ASM(3) - /* cmp @v not equal to @expectnot */ - RSEQ_ASM_OP_CMPNE(v, expectnot, %l[cmpfail]) - RSEQ_INJECT_ASM(4) -#ifdef RSEQ_COMPARE_TWICE - /* cmp cpuid */ - RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1]) - /* cmp @v not equal to @expectnot */ - RSEQ_ASM_OP_CMPNE(v, expectnot, %l[error2]) -#endif - /* load the value of @v */ - RSEQ_ASM_OP_R_LOAD(v) - /* store it in @load */ - RSEQ_ASM_OP_R_STORE(load) - /* dereference voffp(v) */ - RSEQ_ASM_OP_R_LOADX(voffp) - /* final store the value at voffp(v) */ - RSEQ_ASM_OP_R_FINAL_STORE(v, 2) - RSEQ_INJECT_ASM(5) - RSEQ_ASM_DEFINE_ABORT(4, abort) - : /* gcc asm goto does not allow outputs */ - : [cpu_id] "r" (cpu), - [current_cpu_id] "m" (rseq_get_abi()->cpu_id), - [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), - /* final store input */ - [v] "m" (*v), - [expectnot] "r" (expectnot), - [voffp] "b" (voffp), - [load] "m" (*load) - RSEQ_INJECT_INPUT - : "memory", "cc", "r17" - RSEQ_INJECT_CLOBBER - : abort, cmpfail -#ifdef RSEQ_COMPARE_TWICE - , error1, error2 -#endif - ); - rseq_after_asm_goto(); - return 0; -abort: - rseq_after_asm_goto(); - RSEQ_INJECT_FAILED - return -1; -cmpfail: - rseq_after_asm_goto(); - return 1; -#ifdef RSEQ_COMPARE_TWICE -error1: - rseq_after_asm_goto(); - rseq_bug("cpu_id comparison failed"); -error2: - rseq_after_asm_goto(); - rseq_bug("expected value comparison failed"); -#endif -} - -static inline __attribute__((always_inline)) -int rseq_addv(intptr_t *v, intptr_t count, int cpu) -{ - RSEQ_INJECT_C(9) - - __asm__ __volatile__ goto ( - RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */ -#ifdef RSEQ_COMPARE_TWICE - RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1]) -#endif - /* Start rseq by storing table entry pointer into rseq_cs. */ - RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs) - /* cmp cpuid */ - RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f) - RSEQ_INJECT_ASM(3) -#ifdef RSEQ_COMPARE_TWICE - /* cmp cpuid */ - RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1]) -#endif - /* load the value of @v */ - RSEQ_ASM_OP_R_LOAD(v) - /* add @count to it */ - RSEQ_ASM_OP_R_ADD(count) - /* final store */ - RSEQ_ASM_OP_R_FINAL_STORE(v, 2) - RSEQ_INJECT_ASM(4) - RSEQ_ASM_DEFINE_ABORT(4, abort) - : /* gcc asm goto does not allow outputs */ - : [cpu_id] "r" (cpu), - [current_cpu_id] "m" (rseq_get_abi()->cpu_id), - [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), - /* final store input */ - [v] "m" (*v), - [count] "r" (count) - RSEQ_INJECT_INPUT - : "memory", "cc", "r17" - RSEQ_INJECT_CLOBBER - : abort -#ifdef RSEQ_COMPARE_TWICE - , error1 -#endif - ); - rseq_after_asm_goto(); - return 0; -abort: - rseq_after_asm_goto(); - RSEQ_INJECT_FAILED - return -1; -#ifdef RSEQ_COMPARE_TWICE -error1: - rseq_after_asm_goto(); - rseq_bug("cpu_id comparison failed"); -#endif -} - -static inline __attribute__((always_inline)) -int rseq_cmpeqv_trystorev_storev(intptr_t *v, intptr_t expect, - intptr_t *v2, intptr_t newv2, - intptr_t newv, int cpu) -{ - RSEQ_INJECT_C(9) - - __asm__ __volatile__ goto ( - RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */ - RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail]) -#ifdef RSEQ_COMPARE_TWICE - RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1]) - RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2]) -#endif - /* Start rseq by storing table entry pointer into rseq_cs. */ - RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs) - /* cmp cpuid */ - RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f) - RSEQ_INJECT_ASM(3) - /* cmp @v equal to @expect */ - RSEQ_ASM_OP_CMPEQ(v, expect, %l[cmpfail]) - RSEQ_INJECT_ASM(4) -#ifdef RSEQ_COMPARE_TWICE - /* cmp cpuid */ - RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1]) - /* cmp @v equal to @expect */ - RSEQ_ASM_OP_CMPEQ(v, expect, %l[error2]) -#endif - /* try store */ - RSEQ_ASM_OP_STORE(newv2, v2) - RSEQ_INJECT_ASM(5) - /* final store */ - RSEQ_ASM_OP_FINAL_STORE(newv, v, 2) - RSEQ_INJECT_ASM(6) - RSEQ_ASM_DEFINE_ABORT(4, abort) - : /* gcc asm goto does not allow outputs */ - : [cpu_id] "r" (cpu), - [current_cpu_id] "m" (rseq_get_abi()->cpu_id), - [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), - /* try store input */ - [v2] "m" (*v2), - [newv2] "r" (newv2), - /* final store input */ - [v] "m" (*v), - [expect] "r" (expect), - [newv] "r" (newv) - RSEQ_INJECT_INPUT - : "memory", "cc", "r17" - RSEQ_INJECT_CLOBBER - : abort, cmpfail -#ifdef RSEQ_COMPARE_TWICE - , error1, error2 -#endif - ); - rseq_after_asm_goto(); - return 0; -abort: - rseq_after_asm_goto(); - RSEQ_INJECT_FAILED - return -1; -cmpfail: - rseq_after_asm_goto(); - return 1; -#ifdef RSEQ_COMPARE_TWICE -error1: - rseq_after_asm_goto(); - rseq_bug("cpu_id comparison failed"); -error2: - rseq_after_asm_goto(); - rseq_bug("expected value comparison failed"); -#endif -} - -static inline __attribute__((always_inline)) -int rseq_cmpeqv_trystorev_storev_release(intptr_t *v, intptr_t expect, - intptr_t *v2, intptr_t newv2, - intptr_t newv, int cpu) -{ - RSEQ_INJECT_C(9) - - __asm__ __volatile__ goto ( - RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */ - RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail]) -#ifdef RSEQ_COMPARE_TWICE - RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1]) - RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2]) -#endif - /* Start rseq by storing table entry pointer into rseq_cs. */ - RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs) - /* cmp cpuid */ - RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f) - RSEQ_INJECT_ASM(3) - /* cmp @v equal to @expect */ - RSEQ_ASM_OP_CMPEQ(v, expect, %l[cmpfail]) - RSEQ_INJECT_ASM(4) -#ifdef RSEQ_COMPARE_TWICE - /* cmp cpuid */ - RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1]) - /* cmp @v equal to @expect */ - RSEQ_ASM_OP_CMPEQ(v, expect, %l[error2]) -#endif - /* try store */ - RSEQ_ASM_OP_STORE(newv2, v2) - RSEQ_INJECT_ASM(5) - /* for 'release' */ - "lwsync\n\t" - /* final store */ - RSEQ_ASM_OP_FINAL_STORE(newv, v, 2) - RSEQ_INJECT_ASM(6) - RSEQ_ASM_DEFINE_ABORT(4, abort) - : /* gcc asm goto does not allow outputs */ - : [cpu_id] "r" (cpu), - [current_cpu_id] "m" (rseq_get_abi()->cpu_id), - [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), - /* try store input */ - [v2] "m" (*v2), - [newv2] "r" (newv2), - /* final store input */ - [v] "m" (*v), - [expect] "r" (expect), - [newv] "r" (newv) - RSEQ_INJECT_INPUT - : "memory", "cc", "r17" - RSEQ_INJECT_CLOBBER - : abort, cmpfail -#ifdef RSEQ_COMPARE_TWICE - , error1, error2 -#endif - ); - rseq_after_asm_goto(); - return 0; -abort: - rseq_after_asm_goto(); - RSEQ_INJECT_FAILED - return -1; -cmpfail: - rseq_after_asm_goto(); - return 1; -#ifdef RSEQ_COMPARE_TWICE -error1: - rseq_after_asm_goto(); - rseq_bug("cpu_id comparison failed"); -error2: - rseq_after_asm_goto(); - rseq_bug("expected value comparison failed"); -#endif -} - -static inline __attribute__((always_inline)) -int rseq_cmpeqv_cmpeqv_storev(intptr_t *v, intptr_t expect, - intptr_t *v2, intptr_t expect2, - intptr_t newv, int cpu) -{ - RSEQ_INJECT_C(9) - - __asm__ __volatile__ goto ( - RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */ - RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail]) -#ifdef RSEQ_COMPARE_TWICE - RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1]) - RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2]) - RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error3]) -#endif - /* Start rseq by storing table entry pointer into rseq_cs. */ - RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs) - /* cmp cpuid */ - RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f) - RSEQ_INJECT_ASM(3) - /* cmp @v equal to @expect */ - RSEQ_ASM_OP_CMPEQ(v, expect, %l[cmpfail]) - RSEQ_INJECT_ASM(4) - /* cmp @v2 equal to @expct2 */ - RSEQ_ASM_OP_CMPEQ(v2, expect2, %l[cmpfail]) - RSEQ_INJECT_ASM(5) -#ifdef RSEQ_COMPARE_TWICE - /* cmp cpuid */ - RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1]) - /* cmp @v equal to @expect */ - RSEQ_ASM_OP_CMPEQ(v, expect, %l[error2]) - /* cmp @v2 equal to @expct2 */ - RSEQ_ASM_OP_CMPEQ(v2, expect2, %l[error3]) -#endif - /* final store */ - RSEQ_ASM_OP_FINAL_STORE(newv, v, 2) - RSEQ_INJECT_ASM(6) - RSEQ_ASM_DEFINE_ABORT(4, abort) - : /* gcc asm goto does not allow outputs */ - : [cpu_id] "r" (cpu), - [current_cpu_id] "m" (rseq_get_abi()->cpu_id), - [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), - /* cmp2 input */ - [v2] "m" (*v2), - [expect2] "r" (expect2), - /* final store input */ - [v] "m" (*v), - [expect] "r" (expect), - [newv] "r" (newv) - RSEQ_INJECT_INPUT - : "memory", "cc", "r17" - RSEQ_INJECT_CLOBBER - : abort, cmpfail -#ifdef RSEQ_COMPARE_TWICE - , error1, error2, error3 -#endif - ); - rseq_after_asm_goto(); - return 0; -abort: - rseq_after_asm_goto(); - RSEQ_INJECT_FAILED - return -1; -cmpfail: - rseq_after_asm_goto(); - return 1; -#ifdef RSEQ_COMPARE_TWICE -error1: - rseq_after_asm_goto(); - rseq_bug("cpu_id comparison failed"); -error2: - rseq_after_asm_goto(); - rseq_bug("1st expected value comparison failed"); -error3: - rseq_after_asm_goto(); - rseq_bug("2nd expected value comparison failed"); -#endif -} - -static inline __attribute__((always_inline)) -int rseq_cmpeqv_trymemcpy_storev(intptr_t *v, intptr_t expect, - void *dst, void *src, size_t len, - intptr_t newv, int cpu) -{ - RSEQ_INJECT_C(9) - - __asm__ __volatile__ goto ( - RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */ - RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail]) -#ifdef RSEQ_COMPARE_TWICE - RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1]) - RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2]) -#endif - /* setup for mempcy */ - "mr %%r19, %[len]\n\t" - "mr %%r20, %[src]\n\t" - "mr %%r21, %[dst]\n\t" - /* Start rseq by storing table entry pointer into rseq_cs. */ - RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs) - /* cmp cpuid */ - RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f) - RSEQ_INJECT_ASM(3) - /* cmp @v equal to @expect */ - RSEQ_ASM_OP_CMPEQ(v, expect, %l[cmpfail]) - RSEQ_INJECT_ASM(4) -#ifdef RSEQ_COMPARE_TWICE - /* cmp cpuid */ - RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1]) - /* cmp @v equal to @expect */ - RSEQ_ASM_OP_CMPEQ(v, expect, %l[error2]) -#endif - /* try memcpy */ - RSEQ_ASM_OP_R_MEMCPY() - RSEQ_INJECT_ASM(5) - /* final store */ - RSEQ_ASM_OP_FINAL_STORE(newv, v, 2) - RSEQ_INJECT_ASM(6) - /* teardown */ - RSEQ_ASM_DEFINE_ABORT(4, abort) - : /* gcc asm goto does not allow outputs */ - : [cpu_id] "r" (cpu), - [current_cpu_id] "m" (rseq_get_abi()->cpu_id), - [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), - /* final store input */ - [v] "m" (*v), - [expect] "r" (expect), - [newv] "r" (newv), - /* try memcpy input */ - [dst] "r" (dst), - [src] "r" (src), - [len] "r" (len) - RSEQ_INJECT_INPUT - : "memory", "cc", "r17", "r18", "r19", "r20", "r21" - RSEQ_INJECT_CLOBBER - : abort, cmpfail -#ifdef RSEQ_COMPARE_TWICE - , error1, error2 -#endif - ); - rseq_after_asm_goto(); - return 0; -abort: - rseq_after_asm_goto(); - RSEQ_INJECT_FAILED - return -1; -cmpfail: - rseq_after_asm_goto(); - return 1; -#ifdef RSEQ_COMPARE_TWICE -error1: - rseq_after_asm_goto(); - rseq_bug("cpu_id comparison failed"); -error2: - rseq_after_asm_goto(); - rseq_bug("expected value comparison failed"); -#endif -} - -static inline __attribute__((always_inline)) -int rseq_cmpeqv_trymemcpy_storev_release(intptr_t *v, intptr_t expect, - void *dst, void *src, size_t len, - intptr_t newv, int cpu) -{ - RSEQ_INJECT_C(9) - - __asm__ __volatile__ goto ( - RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */ - RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail]) -#ifdef RSEQ_COMPARE_TWICE - RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1]) - RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2]) -#endif - /* setup for mempcy */ - "mr %%r19, %[len]\n\t" - "mr %%r20, %[src]\n\t" - "mr %%r21, %[dst]\n\t" - /* Start rseq by storing table entry pointer into rseq_cs. */ - RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs) - /* cmp cpuid */ - RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f) - RSEQ_INJECT_ASM(3) - /* cmp @v equal to @expect */ - RSEQ_ASM_OP_CMPEQ(v, expect, %l[cmpfail]) - RSEQ_INJECT_ASM(4) -#ifdef RSEQ_COMPARE_TWICE - /* cmp cpuid */ - RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1]) - /* cmp @v equal to @expect */ - RSEQ_ASM_OP_CMPEQ(v, expect, %l[error2]) -#endif - /* try memcpy */ - RSEQ_ASM_OP_R_MEMCPY() - RSEQ_INJECT_ASM(5) - /* for 'release' */ - "lwsync\n\t" - /* final store */ - RSEQ_ASM_OP_FINAL_STORE(newv, v, 2) - RSEQ_INJECT_ASM(6) - /* teardown */ - RSEQ_ASM_DEFINE_ABORT(4, abort) - : /* gcc asm goto does not allow outputs */ - : [cpu_id] "r" (cpu), - [current_cpu_id] "m" (rseq_get_abi()->cpu_id), - [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), - /* final store input */ - [v] "m" (*v), - [expect] "r" (expect), - [newv] "r" (newv), - /* try memcpy input */ - [dst] "r" (dst), - [src] "r" (src), - [len] "r" (len) - RSEQ_INJECT_INPUT - : "memory", "cc", "r17", "r18", "r19", "r20", "r21" - RSEQ_INJECT_CLOBBER - : abort, cmpfail -#ifdef RSEQ_COMPARE_TWICE - , error1, error2 -#endif - ); - rseq_after_asm_goto(); - return 0; -abort: - rseq_after_asm_goto(); - RSEQ_INJECT_FAILED - return -1; -cmpfail: - rseq_after_asm_goto(); - return 1; -#ifdef RSEQ_COMPARE_TWICE -error1: - rseq_after_asm_goto(); - rseq_bug("cpu_id comparison failed"); -error2: - rseq_after_asm_goto(); - rseq_bug("expected value comparison failed"); -#endif -} - -#endif /* !RSEQ_SKIP_FASTPATH */ +/* Per-cpu-id indexing. */ + +#define RSEQ_TEMPLATE_CPU_ID +#define RSEQ_TEMPLATE_MO_RELAXED +#include "rseq-ppc-bits.h" +#undef RSEQ_TEMPLATE_MO_RELAXED + +#define RSEQ_TEMPLATE_MO_RELEASE +#include "rseq-ppc-bits.h" +#undef RSEQ_TEMPLATE_MO_RELEASE +#undef RSEQ_TEMPLATE_CPU_ID + +/* Per-mm-cid indexing. */ + +#define RSEQ_TEMPLATE_MM_CID +#define RSEQ_TEMPLATE_MO_RELAXED +#include "rseq-ppc-bits.h" +#undef RSEQ_TEMPLATE_MO_RELAXED + +#define RSEQ_TEMPLATE_MO_RELEASE +#include "rseq-ppc-bits.h" +#undef RSEQ_TEMPLATE_MO_RELEASE +#undef RSEQ_TEMPLATE_MM_CID + +/* APIs which are not based on cpu ids. */ + +#define RSEQ_TEMPLATE_CPU_ID_NONE +#define RSEQ_TEMPLATE_MO_RELAXED +#include "rseq-ppc-bits.h" +#undef RSEQ_TEMPLATE_MO_RELAXED +#undef RSEQ_TEMPLATE_CPU_ID_NONE diff --git a/tools/testing/selftests/rseq/rseq-riscv-bits.h b/tools/testing/selftests/rseq/rseq-riscv-bits.h new file mode 100644 index 000000000000..de31a0143139 --- /dev/null +++ b/tools/testing/selftests/rseq/rseq-riscv-bits.h @@ -0,0 +1,410 @@ +/* SPDX-License-Identifier: LGPL-2.1 OR MIT */ + +#include "rseq-bits-template.h" + +#if defined(RSEQ_TEMPLATE_MO_RELAXED) && \ + (defined(RSEQ_TEMPLATE_CPU_ID) || defined(RSEQ_TEMPLATE_MM_CID)) + +static inline __always_inline +int RSEQ_TEMPLATE_IDENTIFIER(rseq_cmpeqv_storev)(intptr_t *v, intptr_t expect, intptr_t newv, int cpu) +{ + RSEQ_INJECT_C(9) + + __asm__ __volatile__ goto(RSEQ_ASM_DEFINE_TABLE(1, 2f, 3f, 4f) + RSEQ_ASM_DEFINE_EXIT_POINT(2f, "%l[cmpfail]") +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_DEFINE_EXIT_POINT(2f, "%l[error1]") + RSEQ_ASM_DEFINE_EXIT_POINT(2f, "%l[error2]") +#endif + RSEQ_ASM_STORE_RSEQ_CS(2, 1b, rseq_cs) + RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f) + RSEQ_INJECT_ASM(3) + RSEQ_ASM_OP_CMPEQ(v, expect, "%l[cmpfail]") + RSEQ_INJECT_ASM(4) +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, "%l[error1]") + RSEQ_ASM_OP_CMPEQ(v, expect, "%l[error2]") +#endif + RSEQ_ASM_OP_FINAL_STORE(newv, v, 3) + RSEQ_INJECT_ASM(5) + RSEQ_ASM_DEFINE_ABORT(4, abort) + : /* gcc asm goto does not allow outputs */ + : [cpu_id] "r" (cpu), + [current_cpu_id] "m" (rseq_get_abi()->RSEQ_TEMPLATE_CPU_ID_FIELD), + [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), + [v] "m" (*v), + [expect] "r" (expect), + [newv] "r" (newv) + RSEQ_INJECT_INPUT + : "memory", RSEQ_ASM_TMP_REG_1 + RSEQ_INJECT_CLOBBER + : abort, cmpfail +#ifdef RSEQ_COMPARE_TWICE + , error1, error2 +#endif + ); + + return 0; +abort: + RSEQ_INJECT_FAILED + return -1; +cmpfail: + return 1; +#ifdef RSEQ_COMPARE_TWICE +error1: + rseq_bug("cpu_id comparison failed"); +error2: + rseq_bug("expected value comparison failed"); +#endif +} + +static inline __always_inline +int RSEQ_TEMPLATE_IDENTIFIER(rseq_cmpnev_storeoffp_load)(intptr_t *v, intptr_t expectnot, + off_t voffp, intptr_t *load, int cpu) +{ + RSEQ_INJECT_C(9) + + __asm__ __volatile__ goto(RSEQ_ASM_DEFINE_TABLE(1, 2f, 3f, 4f) + RSEQ_ASM_DEFINE_EXIT_POINT(2f, "%l[cmpfail]") +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_DEFINE_EXIT_POINT(2f, "%l[error1]") + RSEQ_ASM_DEFINE_EXIT_POINT(2f, "%l[error2]") +#endif + RSEQ_ASM_STORE_RSEQ_CS(2, 1b, rseq_cs) + RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f) + RSEQ_INJECT_ASM(3) + RSEQ_ASM_OP_CMPNE(v, expectnot, "%l[cmpfail]") + RSEQ_INJECT_ASM(4) +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, "%l[error1]") + RSEQ_ASM_OP_CMPNE(v, expectnot, "%l[error2]") +#endif + RSEQ_ASM_OP_R_LOAD(v) + RSEQ_ASM_OP_R_STORE(load) + RSEQ_ASM_OP_R_LOAD_OFF(voffp) + RSEQ_ASM_OP_R_FINAL_STORE(v, 3) + RSEQ_INJECT_ASM(5) + RSEQ_ASM_DEFINE_ABORT(4, abort) + : /* gcc asm goto does not allow outputs */ + : [cpu_id] "r" (cpu), + [current_cpu_id] "m" (rseq_get_abi()->RSEQ_TEMPLATE_CPU_ID_FIELD), + [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), + [v] "m" (*v), + [expectnot] "r" (expectnot), + [load] "m" (*load), + [voffp] "r" (voffp) + RSEQ_INJECT_INPUT + : "memory", RSEQ_ASM_TMP_REG_1 + RSEQ_INJECT_CLOBBER + : abort, cmpfail +#ifdef RSEQ_COMPARE_TWICE + , error1, error2 +#endif + ); + return 0; +abort: + RSEQ_INJECT_FAILED + return -1; +cmpfail: + return 1; +#ifdef RSEQ_COMPARE_TWICE +error1: + rseq_bug("cpu_id comparison failed"); +error2: + rseq_bug("expected value comparison failed"); +#endif +} + +static inline __always_inline +int RSEQ_TEMPLATE_IDENTIFIER(rseq_addv)(intptr_t *v, intptr_t count, int cpu) +{ + RSEQ_INJECT_C(9) + + __asm__ __volatile__ goto(RSEQ_ASM_DEFINE_TABLE(1, 2f, 3f, 4f) +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_DEFINE_EXIT_POINT(2f, "%l[error1]") +#endif + RSEQ_ASM_STORE_RSEQ_CS(2, 1b, rseq_cs) + RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f) + RSEQ_INJECT_ASM(3) +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, "%l[error1]") +#endif + RSEQ_ASM_OP_R_LOAD(v) + RSEQ_ASM_OP_R_ADD(count) + RSEQ_ASM_OP_R_FINAL_STORE(v, 3) + RSEQ_INJECT_ASM(4) + RSEQ_ASM_DEFINE_ABORT(4, abort) + : /* gcc asm goto does not allow outputs */ + : [cpu_id] "r" (cpu), + [current_cpu_id] "m" (rseq_get_abi()->RSEQ_TEMPLATE_CPU_ID_FIELD), + [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), + [v] "m" (*v), + [count] "r" (count) + RSEQ_INJECT_INPUT + : "memory", RSEQ_ASM_TMP_REG_1 + RSEQ_INJECT_CLOBBER + : abort +#ifdef RSEQ_COMPARE_TWICE + , error1 +#endif + ); + return 0; +abort: + RSEQ_INJECT_FAILED + return -1; +#ifdef RSEQ_COMPARE_TWICE +error1: + rseq_bug("cpu_id comparison failed"); +#endif +} + +static inline __always_inline +int RSEQ_TEMPLATE_IDENTIFIER(rseq_cmpeqv_cmpeqv_storev)(intptr_t *v, intptr_t expect, + intptr_t *v2, intptr_t expect2, + intptr_t newv, int cpu) +{ + RSEQ_INJECT_C(9) + + __asm__ __volatile__ goto(RSEQ_ASM_DEFINE_TABLE(1, 2f, 3f, 4f) + RSEQ_ASM_DEFINE_EXIT_POINT(2f, "%l[cmpfail]") +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_DEFINE_EXIT_POINT(2f, "%l[error1]") + RSEQ_ASM_DEFINE_EXIT_POINT(2f, "%l[error2]") + RSEQ_ASM_DEFINE_EXIT_POINT(2f, "%l[error3]") +#endif + RSEQ_ASM_STORE_RSEQ_CS(2, 1b, rseq_cs) + RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f) + RSEQ_INJECT_ASM(3) + RSEQ_ASM_OP_CMPEQ(v, expect, "%l[cmpfail]") + RSEQ_INJECT_ASM(4) + RSEQ_ASM_OP_CMPEQ(v2, expect2, "%l[cmpfail]") + RSEQ_INJECT_ASM(5) +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, "%l[error1]") + RSEQ_ASM_OP_CMPEQ(v, expect, "%l[error2]") + RSEQ_ASM_OP_CMPEQ(v2, expect2, "%l[error3]") +#endif + RSEQ_ASM_OP_FINAL_STORE(newv, v, 3) + RSEQ_INJECT_ASM(6) + RSEQ_ASM_DEFINE_ABORT(4, abort) + : /* gcc asm goto does not allow outputs */ + : [cpu_id] "r" (cpu), + [current_cpu_id] "m" (rseq_get_abi()->RSEQ_TEMPLATE_CPU_ID_FIELD), + [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), + [v] "m" (*v), + [expect] "r" (expect), + [v2] "m" (*v2), + [expect2] "r" (expect2), + [newv] "r" (newv) + RSEQ_INJECT_INPUT + : "memory", RSEQ_ASM_TMP_REG_1 + RSEQ_INJECT_CLOBBER + : abort, cmpfail +#ifdef RSEQ_COMPARE_TWICE + , error1, error2, error3 +#endif + ); + + return 0; +abort: + RSEQ_INJECT_FAILED + return -1; +cmpfail: + return 1; +#ifdef RSEQ_COMPARE_TWICE +error1: + rseq_bug("cpu_id comparison failed"); +error2: + rseq_bug("expected value comparison failed"); +error3: + rseq_bug("2nd expected value comparison failed"); +#endif +} + +#define RSEQ_ARCH_HAS_OFFSET_DEREF_ADDV + +/* + * pval = *(ptr+off) + * *pval += inc; + */ +static inline __always_inline +int RSEQ_TEMPLATE_IDENTIFIER(rseq_offset_deref_addv)(intptr_t *ptr, off_t off, intptr_t inc, int cpu) +{ + RSEQ_INJECT_C(9) + + __asm__ __volatile__ goto(RSEQ_ASM_DEFINE_TABLE(1, 2f, 3f, 4f) +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_DEFINE_EXIT_POINT(2f, "%l[error1]") +#endif + RSEQ_ASM_STORE_RSEQ_CS(2, 1b, rseq_cs) + RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f) + RSEQ_INJECT_ASM(3) +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, "%l[error1]") +#endif + RSEQ_ASM_OP_R_DEREF_ADDV(ptr, off, 3) + RSEQ_INJECT_ASM(4) + RSEQ_ASM_DEFINE_ABORT(4, abort) + : /* gcc asm goto does not allow outputs */ + : [cpu_id] "r" (cpu), + [current_cpu_id] "m" (rseq_get_abi()->RSEQ_TEMPLATE_CPU_ID_FIELD), + [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), + [ptr] "r" (ptr), + [off] "er" (off), + [inc] "er" (inc) + RSEQ_INJECT_INPUT + : "memory", RSEQ_ASM_TMP_REG_1 + RSEQ_INJECT_CLOBBER + : abort +#ifdef RSEQ_COMPARE_TWICE + , error1 +#endif + ); + return 0; +abort: + RSEQ_INJECT_FAILED + return -1; +#ifdef RSEQ_COMPARE_TWICE +error1: + rseq_bug("cpu_id comparison failed"); +#endif +} + +#endif /* #if defined(RSEQ_TEMPLATE_MO_RELAXED) && + (defined(RSEQ_TEMPLATE_CPU_ID) || defined(RSEQ_TEMPLATE_MM_CID)) */ + +#if (defined(RSEQ_TEMPLATE_MO_RELAXED) || defined(RSEQ_TEMPLATE_MO_RELEASE)) && \ + (defined(RSEQ_TEMPLATE_CPU_ID) || defined(RSEQ_TEMPLATE_MM_CID)) + +static inline __always_inline +int RSEQ_TEMPLATE_IDENTIFIER(rseq_cmpeqv_trystorev_storev)(intptr_t *v, intptr_t expect, + intptr_t *v2, intptr_t newv2, + intptr_t newv, int cpu) +{ + RSEQ_INJECT_C(9) + + __asm__ __volatile__ goto(RSEQ_ASM_DEFINE_TABLE(1, 2f, 3f, 4f) + RSEQ_ASM_DEFINE_EXIT_POINT(2f, "%l[cmpfail]") +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_DEFINE_EXIT_POINT(2f, "%l[error1]") + RSEQ_ASM_DEFINE_EXIT_POINT(2f, "%l[error2]") +#endif + RSEQ_ASM_STORE_RSEQ_CS(2, 1b, rseq_cs) + RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f) + RSEQ_INJECT_ASM(3) + RSEQ_ASM_OP_CMPEQ(v, expect, "%l[cmpfail]") + RSEQ_INJECT_ASM(4) +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, "%l[error1]") + RSEQ_ASM_OP_CMPEQ(v, expect, "%l[error2]") +#endif + RSEQ_ASM_OP_STORE(newv2, v2) + RSEQ_INJECT_ASM(5) +#ifdef RSEQ_TEMPLATE_MO_RELEASE + RSEQ_ASM_OP_FINAL_STORE_RELEASE(newv, v, 3) +#else + RSEQ_ASM_OP_FINAL_STORE(newv, v, 3) +#endif + RSEQ_INJECT_ASM(6) + RSEQ_ASM_DEFINE_ABORT(4, abort) + : /* gcc asm goto does not allow outputs */ + : [cpu_id] "r" (cpu), + [current_cpu_id] "m" (rseq_get_abi()->RSEQ_TEMPLATE_CPU_ID_FIELD), + [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), + [expect] "r" (expect), + [v] "m" (*v), + [newv] "r" (newv), + [v2] "m" (*v2), + [newv2] "r" (newv2) + RSEQ_INJECT_INPUT + : "memory", RSEQ_ASM_TMP_REG_1 + RSEQ_INJECT_CLOBBER + : abort, cmpfail +#ifdef RSEQ_COMPARE_TWICE + , error1, error2 +#endif + ); + + return 0; +abort: + RSEQ_INJECT_FAILED + return -1; +cmpfail: + return 1; +#ifdef RSEQ_COMPARE_TWICE +error1: + rseq_bug("cpu_id comparison failed"); +error2: + rseq_bug("expected value comparison failed"); +#endif +} + +static inline __always_inline +int RSEQ_TEMPLATE_IDENTIFIER(rseq_cmpeqv_trymemcpy_storev)(intptr_t *v, intptr_t expect, + void *dst, void *src, size_t len, + intptr_t newv, int cpu) +{ + RSEQ_INJECT_C(9) + __asm__ __volatile__ goto(RSEQ_ASM_DEFINE_TABLE(1, 2f, 3f, 4f) + RSEQ_ASM_DEFINE_EXIT_POINT(2f, "%l[cmpfail]") +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_DEFINE_EXIT_POINT(2f, "%l[error1]") + RSEQ_ASM_DEFINE_EXIT_POINT(2f, "%l[error2]") +#endif + RSEQ_ASM_STORE_RSEQ_CS(2, 1b, rseq_cs) + RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f) + RSEQ_INJECT_ASM(3) + RSEQ_ASM_OP_CMPEQ(v, expect, "%l[cmpfail]") + RSEQ_INJECT_ASM(4) +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, "%l[error1]") + RSEQ_ASM_OP_CMPEQ(v, expect, "%l[error2]") +#endif + RSEQ_ASM_OP_R_BAD_MEMCPY(dst, src, len) + RSEQ_INJECT_ASM(5) +#ifdef RSEQ_TEMPLATE_MO_RELEASE + RSEQ_ASM_OP_FINAL_STORE_RELEASE(newv, v, 3) +#else + RSEQ_ASM_OP_FINAL_STORE(newv, v, 3) +#endif + RSEQ_INJECT_ASM(6) + RSEQ_ASM_DEFINE_ABORT(4, abort) + : /* gcc asm goto does not allow outputs */ + : [cpu_id] "r" (cpu), + [current_cpu_id] "m" (rseq_get_abi()->RSEQ_TEMPLATE_CPU_ID_FIELD), + [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), + [expect] "r" (expect), + [v] "m" (*v), + [newv] "r" (newv), + [dst] "r" (dst), + [src] "r" (src), + [len] "r" (len) + RSEQ_INJECT_INPUT + : "memory", RSEQ_ASM_TMP_REG_1, RSEQ_ASM_TMP_REG_2, + RSEQ_ASM_TMP_REG_3, RSEQ_ASM_TMP_REG_4 + RSEQ_INJECT_CLOBBER + : abort, cmpfail +#ifdef RSEQ_COMPARE_TWICE + , error1, error2 +#endif + ); + + return 0; +abort: + RSEQ_INJECT_FAILED + return -1; +cmpfail: + return 1; +#ifdef RSEQ_COMPARE_TWICE +error1: + rseq_bug("cpu_id comparison failed"); +error2: + rseq_bug("expected value comparison failed"); +#endif +} + +#endif /* #if (defined(RSEQ_TEMPLATE_MO_RELAXED) || defined(RSEQ_TEMPLATE_MO_RELEASE)) && + (defined(RSEQ_TEMPLATE_CPU_ID) || defined(RSEQ_TEMPLATE_MM_CID)) */ + +#include "rseq-bits-reset.h" diff --git a/tools/testing/selftests/rseq/rseq-riscv.h b/tools/testing/selftests/rseq/rseq-riscv.h index 3a391c9bf468..17932a79e066 100644 --- a/tools/testing/selftests/rseq/rseq-riscv.h +++ b/tools/testing/selftests/rseq/rseq-riscv.h @@ -49,10 +49,6 @@ do { \ RSEQ_WRITE_ONCE(*(p), v); \ } while (0) -#ifdef RSEQ_SKIP_FASTPATH -#include "rseq-skip.h" -#else /* !RSEQ_SKIP_FASTPATH */ - #define __RSEQ_ASM_DEFINE_TABLE(label, version, flags, start_ip, \ post_commit_offset, abort_ip) \ ".pushsection __rseq_cs, \"aw\"\n" \ @@ -169,509 +165,34 @@ do { \ RSEQ_ASM_OP_R_ADD(inc) \ __rseq_str(post_commit_label) ":\n" -static inline __always_inline -int rseq_cmpeqv_storev(intptr_t *v, intptr_t expect, intptr_t newv, int cpu) -{ - RSEQ_INJECT_C(9) +/* Per-cpu-id indexing. */ - __asm__ __volatile__ goto(RSEQ_ASM_DEFINE_TABLE(1, 2f, 3f, 4f) - RSEQ_ASM_DEFINE_EXIT_POINT(2f, "%l[cmpfail]") -#ifdef RSEQ_COMPARE_TWICE - RSEQ_ASM_DEFINE_EXIT_POINT(2f, "%l[error1]") - RSEQ_ASM_DEFINE_EXIT_POINT(2f, "%l[error2]") -#endif - RSEQ_ASM_STORE_RSEQ_CS(2, 1b, rseq_cs) - RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f) - RSEQ_INJECT_ASM(3) - RSEQ_ASM_OP_CMPEQ(v, expect, "%l[cmpfail]") - RSEQ_INJECT_ASM(4) -#ifdef RSEQ_COMPARE_TWICE - RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, "%l[error1]") - RSEQ_ASM_OP_CMPEQ(v, expect, "%l[error2]") -#endif - RSEQ_ASM_OP_FINAL_STORE(newv, v, 3) - RSEQ_INJECT_ASM(5) - RSEQ_ASM_DEFINE_ABORT(4, abort) - : /* gcc asm goto does not allow outputs */ - : [cpu_id] "r" (cpu), - [current_cpu_id] "m" (rseq_get_abi()->cpu_id), - [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), - [v] "m" (*v), - [expect] "r" (expect), - [newv] "r" (newv) - RSEQ_INJECT_INPUT - : "memory", RSEQ_ASM_TMP_REG_1 - RSEQ_INJECT_CLOBBER - : abort, cmpfail -#ifdef RSEQ_COMPARE_TWICE - , error1, error2 -#endif - ); - - return 0; -abort: - RSEQ_INJECT_FAILED - return -1; -cmpfail: - return 1; -#ifdef RSEQ_COMPARE_TWICE -error1: - rseq_bug("cpu_id comparison failed"); -error2: - rseq_bug("expected value comparison failed"); -#endif -} - -static inline __always_inline -int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot, - off_t voffp, intptr_t *load, int cpu) -{ - RSEQ_INJECT_C(9) - - __asm__ __volatile__ goto(RSEQ_ASM_DEFINE_TABLE(1, 2f, 3f, 4f) - RSEQ_ASM_DEFINE_EXIT_POINT(2f, "%l[cmpfail]") -#ifdef RSEQ_COMPARE_TWICE - RSEQ_ASM_DEFINE_EXIT_POINT(2f, "%l[error1]") - RSEQ_ASM_DEFINE_EXIT_POINT(2f, "%l[error2]") -#endif - RSEQ_ASM_STORE_RSEQ_CS(2, 1b, rseq_cs) - RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f) - RSEQ_INJECT_ASM(3) - RSEQ_ASM_OP_CMPNE(v, expectnot, "%l[cmpfail]") - RSEQ_INJECT_ASM(4) -#ifdef RSEQ_COMPARE_TWICE - RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, "%l[error1]") - RSEQ_ASM_OP_CMPNE(v, expectnot, "%l[error2]") -#endif - RSEQ_ASM_OP_R_LOAD(v) - RSEQ_ASM_OP_R_STORE(load) - RSEQ_ASM_OP_R_LOAD_OFF(voffp) - RSEQ_ASM_OP_R_FINAL_STORE(v, 3) - RSEQ_INJECT_ASM(5) - RSEQ_ASM_DEFINE_ABORT(4, abort) - : /* gcc asm goto does not allow outputs */ - : [cpu_id] "r" (cpu), - [current_cpu_id] "m" (rseq_get_abi()->cpu_id), - [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), - [v] "m" (*v), - [expectnot] "r" (expectnot), - [load] "m" (*load), - [voffp] "r" (voffp) - RSEQ_INJECT_INPUT - : "memory", RSEQ_ASM_TMP_REG_1 - RSEQ_INJECT_CLOBBER - : abort, cmpfail -#ifdef RSEQ_COMPARE_TWICE - , error1, error2 -#endif - ); - return 0; -abort: - RSEQ_INJECT_FAILED - return -1; -cmpfail: - return 1; -#ifdef RSEQ_COMPARE_TWICE -error1: - rseq_bug("cpu_id comparison failed"); -error2: - rseq_bug("expected value comparison failed"); -#endif -} +#define RSEQ_TEMPLATE_CPU_ID +#define RSEQ_TEMPLATE_MO_RELAXED +#include "rseq-riscv-bits.h" +#undef RSEQ_TEMPLATE_MO_RELAXED -static inline __always_inline -int rseq_addv(intptr_t *v, intptr_t count, int cpu) -{ - RSEQ_INJECT_C(9) +#define RSEQ_TEMPLATE_MO_RELEASE +#include "rseq-riscv-bits.h" +#undef RSEQ_TEMPLATE_MO_RELEASE +#undef RSEQ_TEMPLATE_CPU_ID - __asm__ __volatile__ goto(RSEQ_ASM_DEFINE_TABLE(1, 2f, 3f, 4f) -#ifdef RSEQ_COMPARE_TWICE - RSEQ_ASM_DEFINE_EXIT_POINT(2f, "%l[error1]") -#endif - RSEQ_ASM_STORE_RSEQ_CS(2, 1b, rseq_cs) - RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f) - RSEQ_INJECT_ASM(3) -#ifdef RSEQ_COMPARE_TWICE - RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, "%l[error1]") -#endif - RSEQ_ASM_OP_R_LOAD(v) - RSEQ_ASM_OP_R_ADD(count) - RSEQ_ASM_OP_R_FINAL_STORE(v, 3) - RSEQ_INJECT_ASM(4) - RSEQ_ASM_DEFINE_ABORT(4, abort) - : /* gcc asm goto does not allow outputs */ - : [cpu_id] "r" (cpu), - [current_cpu_id] "m" (rseq_get_abi()->cpu_id), - [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), - [v] "m" (*v), - [count] "r" (count) - RSEQ_INJECT_INPUT - : "memory", RSEQ_ASM_TMP_REG_1 - RSEQ_INJECT_CLOBBER - : abort -#ifdef RSEQ_COMPARE_TWICE - , error1 -#endif - ); - return 0; -abort: - RSEQ_INJECT_FAILED - return -1; -#ifdef RSEQ_COMPARE_TWICE -error1: - rseq_bug("cpu_id comparison failed"); -#endif -} - -static inline __always_inline -int rseq_cmpeqv_trystorev_storev(intptr_t *v, intptr_t expect, - intptr_t *v2, intptr_t newv2, - intptr_t newv, int cpu) -{ - RSEQ_INJECT_C(9) - - __asm__ __volatile__ goto(RSEQ_ASM_DEFINE_TABLE(1, 2f, 3f, 4f) - RSEQ_ASM_DEFINE_EXIT_POINT(2f, "%l[cmpfail]") -#ifdef RSEQ_COMPARE_TWICE - RSEQ_ASM_DEFINE_EXIT_POINT(2f, "%l[error1]") - RSEQ_ASM_DEFINE_EXIT_POINT(2f, "%l[error2]") -#endif - RSEQ_ASM_STORE_RSEQ_CS(2, 1b, rseq_cs) - RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f) - RSEQ_INJECT_ASM(3) - RSEQ_ASM_OP_CMPEQ(v, expect, "%l[cmpfail]") - RSEQ_INJECT_ASM(4) -#ifdef RSEQ_COMPARE_TWICE - RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, "%l[error1]") - RSEQ_ASM_OP_CMPEQ(v, expect, "%l[error2]") -#endif - RSEQ_ASM_OP_STORE(newv2, v2) - RSEQ_INJECT_ASM(5) - RSEQ_ASM_OP_FINAL_STORE(newv, v, 3) - RSEQ_INJECT_ASM(6) - RSEQ_ASM_DEFINE_ABORT(4, abort) - : /* gcc asm goto does not allow outputs */ - : [cpu_id] "r" (cpu), - [current_cpu_id] "m" (rseq_get_abi()->cpu_id), - [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), - [expect] "r" (expect), - [v] "m" (*v), - [newv] "r" (newv), - [v2] "m" (*v2), - [newv2] "r" (newv2) - RSEQ_INJECT_INPUT - : "memory", RSEQ_ASM_TMP_REG_1 - RSEQ_INJECT_CLOBBER - : abort, cmpfail -#ifdef RSEQ_COMPARE_TWICE - , error1, error2 -#endif - ); - - return 0; -abort: - RSEQ_INJECT_FAILED - return -1; -cmpfail: - return 1; -#ifdef RSEQ_COMPARE_TWICE -error1: - rseq_bug("cpu_id comparison failed"); -error2: - rseq_bug("expected value comparison failed"); -#endif -} - -static inline __always_inline -int rseq_cmpeqv_trystorev_storev_release(intptr_t *v, intptr_t expect, - intptr_t *v2, intptr_t newv2, - intptr_t newv, int cpu) -{ - RSEQ_INJECT_C(9) - - __asm__ __volatile__ goto(RSEQ_ASM_DEFINE_TABLE(1, 2f, 3f, 4f) - RSEQ_ASM_DEFINE_EXIT_POINT(2f, "%l[cmpfail]") -#ifdef RSEQ_COMPARE_TWICE - RSEQ_ASM_DEFINE_EXIT_POINT(2f, "%l[error1]") - RSEQ_ASM_DEFINE_EXIT_POINT(2f, "%l[error2]") -#endif - RSEQ_ASM_STORE_RSEQ_CS(2, 1b, rseq_cs) - RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f) - RSEQ_INJECT_ASM(3) - RSEQ_ASM_OP_CMPEQ(v, expect, "%l[cmpfail]") - RSEQ_INJECT_ASM(4) -#ifdef RSEQ_COMPARE_TWICE - RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, "%l[error1]") - RSEQ_ASM_OP_CMPEQ(v, expect, "%l[error2]") -#endif - RSEQ_ASM_OP_STORE(newv2, v2) - RSEQ_INJECT_ASM(5) - RSEQ_ASM_OP_FINAL_STORE_RELEASE(newv, v, 3) - RSEQ_INJECT_ASM(6) - RSEQ_ASM_DEFINE_ABORT(4, abort) - : /* gcc asm goto does not allow outputs */ - : [cpu_id] "r" (cpu), - [current_cpu_id] "m" (rseq_get_abi()->cpu_id), - [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), - [expect] "r" (expect), - [v] "m" (*v), - [newv] "r" (newv), - [v2] "m" (*v2), - [newv2] "r" (newv2) - RSEQ_INJECT_INPUT - : "memory", RSEQ_ASM_TMP_REG_1 - RSEQ_INJECT_CLOBBER - : abort, cmpfail -#ifdef RSEQ_COMPARE_TWICE - , error1, error2 -#endif - ); - - return 0; -abort: - RSEQ_INJECT_FAILED - return -1; -cmpfail: - return 1; -#ifdef RSEQ_COMPARE_TWICE -error1: - rseq_bug("cpu_id comparison failed"); -error2: - rseq_bug("expected value comparison failed"); -#endif -} - -static inline __always_inline -int rseq_cmpeqv_cmpeqv_storev(intptr_t *v, intptr_t expect, - intptr_t *v2, intptr_t expect2, - intptr_t newv, int cpu) -{ - RSEQ_INJECT_C(9) - - __asm__ __volatile__ goto(RSEQ_ASM_DEFINE_TABLE(1, 2f, 3f, 4f) - RSEQ_ASM_DEFINE_EXIT_POINT(2f, "%l[cmpfail]") -#ifdef RSEQ_COMPARE_TWICE - RSEQ_ASM_DEFINE_EXIT_POINT(2f, "%l[error1]") - RSEQ_ASM_DEFINE_EXIT_POINT(2f, "%l[error2]") - RSEQ_ASM_DEFINE_EXIT_POINT(2f, "%l[error3]") -#endif - RSEQ_ASM_STORE_RSEQ_CS(2, 1b, rseq_cs) - RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f) - RSEQ_INJECT_ASM(3) - RSEQ_ASM_OP_CMPEQ(v, expect, "%l[cmpfail]") - RSEQ_INJECT_ASM(4) - RSEQ_ASM_OP_CMPEQ(v2, expect2, "%l[cmpfail]") - RSEQ_INJECT_ASM(5) -#ifdef RSEQ_COMPARE_TWICE - RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, "%l[error1]") - RSEQ_ASM_OP_CMPEQ(v, expect, "%l[error2]") - RSEQ_ASM_OP_CMPEQ(v2, expect2, "%l[error3]") -#endif - RSEQ_ASM_OP_FINAL_STORE(newv, v, 3) - RSEQ_INJECT_ASM(6) - RSEQ_ASM_DEFINE_ABORT(4, abort) - : /* gcc asm goto does not allow outputs */ - : [cpu_id] "r" (cpu), - [current_cpu_id] "m" (rseq_get_abi()->cpu_id), - [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), - [v] "m" (*v), - [expect] "r" (expect), - [v2] "m" (*v2), - [expect2] "r" (expect2), - [newv] "r" (newv) - RSEQ_INJECT_INPUT - : "memory", RSEQ_ASM_TMP_REG_1 - RSEQ_INJECT_CLOBBER - : abort, cmpfail -#ifdef RSEQ_COMPARE_TWICE - , error1, error2, error3 -#endif - ); - - return 0; -abort: - RSEQ_INJECT_FAILED - return -1; -cmpfail: - return 1; -#ifdef RSEQ_COMPARE_TWICE -error1: - rseq_bug("cpu_id comparison failed"); -error2: - rseq_bug("expected value comparison failed"); -error3: - rseq_bug("2nd expected value comparison failed"); -#endif -} - -static inline __always_inline -int rseq_cmpeqv_trymemcpy_storev(intptr_t *v, intptr_t expect, - void *dst, void *src, size_t len, - intptr_t newv, int cpu) -{ - RSEQ_INJECT_C(9) - __asm__ __volatile__ goto(RSEQ_ASM_DEFINE_TABLE(1, 2f, 3f, 4f) - RSEQ_ASM_DEFINE_EXIT_POINT(2f, "%l[cmpfail]") -#ifdef RSEQ_COMPARE_TWICE - RSEQ_ASM_DEFINE_EXIT_POINT(2f, "%l[error1]") - RSEQ_ASM_DEFINE_EXIT_POINT(2f, "%l[error2]") -#endif - RSEQ_ASM_STORE_RSEQ_CS(2, 1b, rseq_cs) - RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f) - RSEQ_INJECT_ASM(3) - RSEQ_ASM_OP_CMPEQ(v, expect, "%l[cmpfail]") - RSEQ_INJECT_ASM(4) -#ifdef RSEQ_COMPARE_TWICE - RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, "%l[error1]") - RSEQ_ASM_OP_CMPEQ(v, expect, "%l[error2]") -#endif - RSEQ_ASM_OP_R_BAD_MEMCPY(dst, src, len) - RSEQ_INJECT_ASM(5) - RSEQ_ASM_OP_FINAL_STORE(newv, v, 3) - RSEQ_INJECT_ASM(6) - RSEQ_ASM_DEFINE_ABORT(4, abort) - : /* gcc asm goto does not allow outputs */ - : [cpu_id] "r" (cpu), - [current_cpu_id] "m" (rseq_get_abi()->cpu_id), - [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), - [expect] "r" (expect), - [v] "m" (*v), - [newv] "r" (newv), - [dst] "r" (dst), - [src] "r" (src), - [len] "r" (len) - RSEQ_INJECT_INPUT - : "memory", RSEQ_ASM_TMP_REG_1, RSEQ_ASM_TMP_REG_2, - RSEQ_ASM_TMP_REG_3, RSEQ_ASM_TMP_REG_4 - RSEQ_INJECT_CLOBBER - : abort, cmpfail -#ifdef RSEQ_COMPARE_TWICE - , error1, error2 -#endif - ); - - return 0; -abort: - RSEQ_INJECT_FAILED - return -1; -cmpfail: - return 1; -#ifdef RSEQ_COMPARE_TWICE -error1: - rseq_bug("cpu_id comparison failed"); -error2: - rseq_bug("expected value comparison failed"); -#endif -} - -static inline __always_inline -int rseq_cmpeqv_trymemcpy_storev_release(intptr_t *v, intptr_t expect, - void *dst, void *src, size_t len, - intptr_t newv, int cpu) -{ - RSEQ_INJECT_C(9) - - __asm__ __volatile__ goto(RSEQ_ASM_DEFINE_TABLE(1, 2f, 3f, 4f) - RSEQ_ASM_DEFINE_EXIT_POINT(2f, "%l[cmpfail]") -#ifdef RSEQ_COMPARE_TWICE - RSEQ_ASM_DEFINE_EXIT_POINT(2f, "%l[error1]") - RSEQ_ASM_DEFINE_EXIT_POINT(2f, "%l[error2]") -#endif - RSEQ_ASM_STORE_RSEQ_CS(2, 1b, rseq_cs) - RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f) - RSEQ_INJECT_ASM(3) - RSEQ_ASM_OP_CMPEQ(v, expect, "%l[cmpfail]") - RSEQ_INJECT_ASM(4) -#ifdef RSEQ_COMPARE_TWICE - RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, "%l[error1]") - RSEQ_ASM_OP_CMPEQ(v, expect, "%l[error2]") -#endif - RSEQ_ASM_OP_R_BAD_MEMCPY(dst, src, len) - RSEQ_INJECT_ASM(5) - RSEQ_ASM_OP_FINAL_STORE_RELEASE(newv, v, 3) - RSEQ_INJECT_ASM(6) - RSEQ_ASM_DEFINE_ABORT(4, abort) - : /* gcc asm goto does not allow outputs */ - : [cpu_id] "r" (cpu), - [current_cpu_id] "m" (rseq_get_abi()->cpu_id), - [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), - [expect] "r" (expect), - [v] "m" (*v), - [newv] "r" (newv), - [dst] "r" (dst), - [src] "r" (src), - [len] "r" (len) - RSEQ_INJECT_INPUT - : "memory", RSEQ_ASM_TMP_REG_1, RSEQ_ASM_TMP_REG_2, - RSEQ_ASM_TMP_REG_3, RSEQ_ASM_TMP_REG_4 - RSEQ_INJECT_CLOBBER - : abort, cmpfail -#ifdef RSEQ_COMPARE_TWICE - , error1, error2 -#endif - ); - - return 0; -abort: - RSEQ_INJECT_FAILED - return -1; -cmpfail: - return 1; -#ifdef RSEQ_COMPARE_TWICE -error1: - rseq_bug("cpu_id comparison failed"); -error2: - rseq_bug("expected value comparison failed"); -#endif -} +/* Per-mm-cid indexing. */ -#define RSEQ_ARCH_HAS_OFFSET_DEREF_ADDV +#define RSEQ_TEMPLATE_MM_CID +#define RSEQ_TEMPLATE_MO_RELAXED +#include "rseq-riscv-bits.h" +#undef RSEQ_TEMPLATE_MO_RELAXED -/* - * pval = *(ptr+off) - * *pval += inc; - */ -static inline __always_inline -int rseq_offset_deref_addv(intptr_t *ptr, off_t off, intptr_t inc, int cpu) -{ - RSEQ_INJECT_C(9) - - __asm__ __volatile__ goto(RSEQ_ASM_DEFINE_TABLE(1, 2f, 3f, 4f) -#ifdef RSEQ_COMPARE_TWICE - RSEQ_ASM_DEFINE_EXIT_POINT(2f, "%l[error1]") -#endif - RSEQ_ASM_STORE_RSEQ_CS(2, 1b, rseq_cs) - RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f) - RSEQ_INJECT_ASM(3) -#ifdef RSEQ_COMPARE_TWICE - RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, "%l[error1]") -#endif - RSEQ_ASM_OP_R_DEREF_ADDV(ptr, off, 3) - RSEQ_INJECT_ASM(4) - RSEQ_ASM_DEFINE_ABORT(4, abort) - : /* gcc asm goto does not allow outputs */ - : [cpu_id] "r" (cpu), - [current_cpu_id] "m" (rseq_get_abi()->cpu_id), - [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), - [ptr] "r" (ptr), - [off] "er" (off), - [inc] "er" (inc) - RSEQ_INJECT_INPUT - : "memory", RSEQ_ASM_TMP_REG_1 - RSEQ_INJECT_CLOBBER - : abort -#ifdef RSEQ_COMPARE_TWICE - , error1 -#endif - ); - return 0; -abort: - RSEQ_INJECT_FAILED - return -1; -#ifdef RSEQ_COMPARE_TWICE -error1: - rseq_bug("cpu_id comparison failed"); -#endif -} +#define RSEQ_TEMPLATE_MO_RELEASE +#include "rseq-riscv-bits.h" +#undef RSEQ_TEMPLATE_MO_RELEASE +#undef RSEQ_TEMPLATE_MM_CID + +/* APIs which are not based on cpu ids. */ -#endif /* !RSEQ_SKIP_FASTPATH */ +#define RSEQ_TEMPLATE_CPU_ID_NONE +#define RSEQ_TEMPLATE_MO_RELAXED +#include "rseq-riscv-bits.h" +#undef RSEQ_TEMPLATE_MO_RELAXED +#undef RSEQ_TEMPLATE_CPU_ID_NONE diff --git a/tools/testing/selftests/rseq/rseq-s390-bits.h b/tools/testing/selftests/rseq/rseq-s390-bits.h new file mode 100644 index 000000000000..0cf17d9f170a --- /dev/null +++ b/tools/testing/selftests/rseq/rseq-s390-bits.h @@ -0,0 +1,474 @@ +/* SPDX-License-Identifier: LGPL-2.1 OR MIT */ + +#include "rseq-bits-template.h" + +#if defined(RSEQ_TEMPLATE_MO_RELAXED) && \ + (defined(RSEQ_TEMPLATE_CPU_ID) || defined(RSEQ_TEMPLATE_MM_CID)) + +static inline __attribute__((always_inline)) +int RSEQ_TEMPLATE_IDENTIFIER(rseq_cmpeqv_storev)(intptr_t *v, intptr_t expect, intptr_t newv, int cpu) +{ + RSEQ_INJECT_C(9) + + __asm__ __volatile__ goto ( + RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */ + RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail]) +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1]) + RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2]) +#endif + /* Start rseq by storing table entry pointer into rseq_cs. */ + RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs) + RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f) + RSEQ_INJECT_ASM(3) + LONG_CMP " %[expect], %[v]\n\t" + "jnz %l[cmpfail]\n\t" + RSEQ_INJECT_ASM(4) +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1]) + LONG_CMP " %[expect], %[v]\n\t" + "jnz %l[error2]\n\t" +#endif + /* final store */ + LONG_S " %[newv], %[v]\n\t" + "2:\n\t" + RSEQ_INJECT_ASM(5) + RSEQ_ASM_DEFINE_ABORT(4, "", abort) + : /* gcc asm goto does not allow outputs */ + : [cpu_id] "r" (cpu), + [current_cpu_id] "m" (rseq_get_abi()->RSEQ_TEMPLATE_CPU_ID_FIELD), + [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), + [v] "m" (*v), + [expect] "r" (expect), + [newv] "r" (newv) + RSEQ_INJECT_INPUT + : "memory", "cc", "r0" + RSEQ_INJECT_CLOBBER + : abort, cmpfail +#ifdef RSEQ_COMPARE_TWICE + , error1, error2 +#endif + ); + rseq_after_asm_goto(); + return 0; +abort: + rseq_after_asm_goto(); + RSEQ_INJECT_FAILED + return -1; +cmpfail: + rseq_after_asm_goto(); + return 1; +#ifdef RSEQ_COMPARE_TWICE +error1: + rseq_after_asm_goto(); + rseq_bug("cpu_id comparison failed"); +error2: + rseq_after_asm_goto(); + rseq_bug("expected value comparison failed"); +#endif +} + +/* + * Compare @v against @expectnot. When it does _not_ match, load @v + * into @load, and store the content of *@v + voffp into @v. + */ +static inline __attribute__((always_inline)) +int RSEQ_TEMPLATE_IDENTIFIER(rseq_cmpnev_storeoffp_load)(intptr_t *v, intptr_t expectnot, + long voffp, intptr_t *load, int cpu) +{ + RSEQ_INJECT_C(9) + + __asm__ __volatile__ goto ( + RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */ + RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail]) +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1]) + RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2]) +#endif + /* Start rseq by storing table entry pointer into rseq_cs. */ + RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs) + RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f) + RSEQ_INJECT_ASM(3) + LONG_L " %%r1, %[v]\n\t" + LONG_CMP_R " %%r1, %[expectnot]\n\t" + "je %l[cmpfail]\n\t" + RSEQ_INJECT_ASM(4) +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1]) + LONG_L " %%r1, %[v]\n\t" + LONG_CMP_R " %%r1, %[expectnot]\n\t" + "je %l[error2]\n\t" +#endif + LONG_S " %%r1, %[load]\n\t" + LONG_ADD_R " %%r1, %[voffp]\n\t" + LONG_L " %%r1, 0(%%r1)\n\t" + /* final store */ + LONG_S " %%r1, %[v]\n\t" + "2:\n\t" + RSEQ_INJECT_ASM(5) + RSEQ_ASM_DEFINE_ABORT(4, "", abort) + : /* gcc asm goto does not allow outputs */ + : [cpu_id] "r" (cpu), + [current_cpu_id] "m" (rseq_get_abi()->RSEQ_TEMPLATE_CPU_ID_FIELD), + [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), + /* final store input */ + [v] "m" (*v), + [expectnot] "r" (expectnot), + [voffp] "r" (voffp), + [load] "m" (*load) + RSEQ_INJECT_INPUT + : "memory", "cc", "r0", "r1" + RSEQ_INJECT_CLOBBER + : abort, cmpfail +#ifdef RSEQ_COMPARE_TWICE + , error1, error2 +#endif + ); + rseq_after_asm_goto(); + return 0; +abort: + rseq_after_asm_goto(); + RSEQ_INJECT_FAILED + return -1; +cmpfail: + rseq_after_asm_goto(); + return 1; +#ifdef RSEQ_COMPARE_TWICE +error1: + rseq_after_asm_goto(); + rseq_bug("cpu_id comparison failed"); +error2: + rseq_after_asm_goto(); + rseq_bug("expected value comparison failed"); +#endif +} + +static inline __attribute__((always_inline)) +int RSEQ_TEMPLATE_IDENTIFIER(rseq_addv)(intptr_t *v, intptr_t count, int cpu) +{ + RSEQ_INJECT_C(9) + + __asm__ __volatile__ goto ( + RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */ +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1]) +#endif + /* Start rseq by storing table entry pointer into rseq_cs. */ + RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs) + RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f) + RSEQ_INJECT_ASM(3) +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1]) +#endif + LONG_L " %%r0, %[v]\n\t" + LONG_ADD_R " %%r0, %[count]\n\t" + /* final store */ + LONG_S " %%r0, %[v]\n\t" + "2:\n\t" + RSEQ_INJECT_ASM(4) + RSEQ_ASM_DEFINE_ABORT(4, "", abort) + : /* gcc asm goto does not allow outputs */ + : [cpu_id] "r" (cpu), + [current_cpu_id] "m" (rseq_get_abi()->RSEQ_TEMPLATE_CPU_ID_FIELD), + [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), + /* final store input */ + [v] "m" (*v), + [count] "r" (count) + RSEQ_INJECT_INPUT + : "memory", "cc", "r0" + RSEQ_INJECT_CLOBBER + : abort +#ifdef RSEQ_COMPARE_TWICE + , error1 +#endif + ); + rseq_after_asm_goto(); + return 0; +abort: + rseq_after_asm_goto(); + RSEQ_INJECT_FAILED + return -1; +#ifdef RSEQ_COMPARE_TWICE +error1: + rseq_after_asm_goto(); + rseq_bug("cpu_id comparison failed"); +#endif +} + +static inline __attribute__((always_inline)) +int RSEQ_TEMPLATE_IDENTIFIER(rseq_cmpeqv_cmpeqv_storev)(intptr_t *v, intptr_t expect, + intptr_t *v2, intptr_t expect2, + intptr_t newv, int cpu) +{ + RSEQ_INJECT_C(9) + + __asm__ __volatile__ goto ( + RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */ + RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail]) +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1]) + RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2]) + RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error3]) +#endif + /* Start rseq by storing table entry pointer into rseq_cs. */ + RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs) + RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f) + RSEQ_INJECT_ASM(3) + LONG_CMP " %[expect], %[v]\n\t" + "jnz %l[cmpfail]\n\t" + RSEQ_INJECT_ASM(4) + LONG_CMP " %[expect2], %[v2]\n\t" + "jnz %l[cmpfail]\n\t" + RSEQ_INJECT_ASM(5) +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1]) + LONG_CMP " %[expect], %[v]\n\t" + "jnz %l[error2]\n\t" + LONG_CMP " %[expect2], %[v2]\n\t" + "jnz %l[error3]\n\t" +#endif + /* final store */ + LONG_S " %[newv], %[v]\n\t" + "2:\n\t" + RSEQ_INJECT_ASM(6) + RSEQ_ASM_DEFINE_ABORT(4, "", abort) + : /* gcc asm goto does not allow outputs */ + : [cpu_id] "r" (cpu), + [current_cpu_id] "m" (rseq_get_abi()->RSEQ_TEMPLATE_CPU_ID_FIELD), + [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), + /* cmp2 input */ + [v2] "m" (*v2), + [expect2] "r" (expect2), + /* final store input */ + [v] "m" (*v), + [expect] "r" (expect), + [newv] "r" (newv) + RSEQ_INJECT_INPUT + : "memory", "cc", "r0" + RSEQ_INJECT_CLOBBER + : abort, cmpfail +#ifdef RSEQ_COMPARE_TWICE + , error1, error2, error3 +#endif + ); + rseq_after_asm_goto(); + return 0; +abort: + rseq_after_asm_goto(); + RSEQ_INJECT_FAILED + return -1; +cmpfail: + rseq_after_asm_goto(); + return 1; +#ifdef RSEQ_COMPARE_TWICE +error1: + rseq_after_asm_goto(); + rseq_bug("cpu_id comparison failed"); +error2: + rseq_after_asm_goto(); + rseq_bug("1st expected value comparison failed"); +error3: + rseq_after_asm_goto(); + rseq_bug("2nd expected value comparison failed"); +#endif +} + +#endif /* #if defined(RSEQ_TEMPLATE_MO_RELAXED) && + (defined(RSEQ_TEMPLATE_CPU_ID) || defined(RSEQ_TEMPLATE_MM_CID)) */ + +#if (defined(RSEQ_TEMPLATE_MO_RELAXED) || defined(RSEQ_TEMPLATE_MO_RELEASE)) && \ + (defined(RSEQ_TEMPLATE_CPU_ID) || defined(RSEQ_TEMPLATE_MM_CID)) + +/* s390 is TSO. */ +static inline __attribute__((always_inline)) +int RSEQ_TEMPLATE_IDENTIFIER(rseq_cmpeqv_trystorev_storev)(intptr_t *v, intptr_t expect, + intptr_t *v2, intptr_t newv2, + intptr_t newv, int cpu) +{ + RSEQ_INJECT_C(9) + + __asm__ __volatile__ goto ( + RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */ + RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail]) +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1]) + RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2]) +#endif + /* Start rseq by storing table entry pointer into rseq_cs. */ + RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs) + RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f) + RSEQ_INJECT_ASM(3) + LONG_CMP " %[expect], %[v]\n\t" + "jnz %l[cmpfail]\n\t" + RSEQ_INJECT_ASM(4) +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1]) + LONG_CMP " %[expect], %[v]\n\t" + "jnz %l[error2]\n\t" +#endif + /* try store */ + LONG_S " %[newv2], %[v2]\n\t" + RSEQ_INJECT_ASM(5) + /* final store */ + LONG_S " %[newv], %[v]\n\t" + "2:\n\t" + RSEQ_INJECT_ASM(6) + RSEQ_ASM_DEFINE_ABORT(4, "", abort) + : /* gcc asm goto does not allow outputs */ + : [cpu_id] "r" (cpu), + [current_cpu_id] "m" (rseq_get_abi()->RSEQ_TEMPLATE_CPU_ID_FIELD), + [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), + /* try store input */ + [v2] "m" (*v2), + [newv2] "r" (newv2), + /* final store input */ + [v] "m" (*v), + [expect] "r" (expect), + [newv] "r" (newv) + RSEQ_INJECT_INPUT + : "memory", "cc", "r0" + RSEQ_INJECT_CLOBBER + : abort, cmpfail +#ifdef RSEQ_COMPARE_TWICE + , error1, error2 +#endif + ); + rseq_after_asm_goto(); + return 0; +abort: + rseq_after_asm_goto(); + RSEQ_INJECT_FAILED + return -1; +cmpfail: + rseq_after_asm_goto(); + return 1; +#ifdef RSEQ_COMPARE_TWICE +error1: + rseq_after_asm_goto(); + rseq_bug("cpu_id comparison failed"); +error2: + rseq_after_asm_goto(); + rseq_bug("expected value comparison failed"); +#endif +} + +/* s390 is TSO. */ +static inline __attribute__((always_inline)) +int RSEQ_TEMPLATE_IDENTIFIER(rseq_cmpeqv_trymemcpy_storev)(intptr_t *v, intptr_t expect, + void *dst, void *src, size_t len, + intptr_t newv, int cpu) +{ + uint64_t rseq_scratch[3]; + + RSEQ_INJECT_C(9) + + __asm__ __volatile__ goto ( + RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */ + RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail]) +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1]) + RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2]) +#endif + LONG_S " %[src], %[rseq_scratch0]\n\t" + LONG_S " %[dst], %[rseq_scratch1]\n\t" + LONG_S " %[len], %[rseq_scratch2]\n\t" + /* Start rseq by storing table entry pointer into rseq_cs. */ + RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs) + RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f) + RSEQ_INJECT_ASM(3) + LONG_CMP " %[expect], %[v]\n\t" + "jnz 5f\n\t" + RSEQ_INJECT_ASM(4) +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 6f) + LONG_CMP " %[expect], %[v]\n\t" + "jnz 7f\n\t" +#endif + /* try memcpy */ + LONG_LT_R " %[len], %[len]\n\t" + "jz 333f\n\t" + "222:\n\t" + "ic %%r0,0(%[src])\n\t" + "stc %%r0,0(%[dst])\n\t" + LONG_ADDI " %[src], 1\n\t" + LONG_ADDI " %[dst], 1\n\t" + LONG_ADDI " %[len], -1\n\t" + "jnz 222b\n\t" + "333:\n\t" + RSEQ_INJECT_ASM(5) + /* final store */ + LONG_S " %[newv], %[v]\n\t" + "2:\n\t" + RSEQ_INJECT_ASM(6) + /* teardown */ + LONG_L " %[len], %[rseq_scratch2]\n\t" + LONG_L " %[dst], %[rseq_scratch1]\n\t" + LONG_L " %[src], %[rseq_scratch0]\n\t" + RSEQ_ASM_DEFINE_ABORT(4, + LONG_L " %[len], %[rseq_scratch2]\n\t" + LONG_L " %[dst], %[rseq_scratch1]\n\t" + LONG_L " %[src], %[rseq_scratch0]\n\t", + abort) + RSEQ_ASM_DEFINE_CMPFAIL(5, + LONG_L " %[len], %[rseq_scratch2]\n\t" + LONG_L " %[dst], %[rseq_scratch1]\n\t" + LONG_L " %[src], %[rseq_scratch0]\n\t", + cmpfail) +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_DEFINE_CMPFAIL(6, + LONG_L " %[len], %[rseq_scratch2]\n\t" + LONG_L " %[dst], %[rseq_scratch1]\n\t" + LONG_L " %[src], %[rseq_scratch0]\n\t", + error1) + RSEQ_ASM_DEFINE_CMPFAIL(7, + LONG_L " %[len], %[rseq_scratch2]\n\t" + LONG_L " %[dst], %[rseq_scratch1]\n\t" + LONG_L " %[src], %[rseq_scratch0]\n\t", + error2) +#endif + : /* gcc asm goto does not allow outputs */ + : [cpu_id] "r" (cpu), + [current_cpu_id] "m" (rseq_get_abi()->RSEQ_TEMPLATE_CPU_ID_FIELD), + [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), + /* final store input */ + [v] "m" (*v), + [expect] "r" (expect), + [newv] "r" (newv), + /* try memcpy input */ + [dst] "r" (dst), + [src] "r" (src), + [len] "r" (len), + [rseq_scratch0] "m" (rseq_scratch[0]), + [rseq_scratch1] "m" (rseq_scratch[1]), + [rseq_scratch2] "m" (rseq_scratch[2]) + RSEQ_INJECT_INPUT + : "memory", "cc", "r0" + RSEQ_INJECT_CLOBBER + : abort, cmpfail +#ifdef RSEQ_COMPARE_TWICE + , error1, error2 +#endif + ); + rseq_after_asm_goto(); + return 0; +abort: + rseq_after_asm_goto(); + RSEQ_INJECT_FAILED + return -1; +cmpfail: + rseq_after_asm_goto(); + return 1; +#ifdef RSEQ_COMPARE_TWICE +error1: + rseq_after_asm_goto(); + rseq_bug("cpu_id comparison failed"); +error2: + rseq_after_asm_goto(); + rseq_bug("expected value comparison failed"); +#endif +} + +#endif /* #if (defined(RSEQ_TEMPLATE_MO_RELAXED) || defined(RSEQ_TEMPLATE_MO_RELEASE)) && + (defined(RSEQ_TEMPLATE_CPU_ID) || defined(RSEQ_TEMPLATE_MM_CID)) */ + +#include "rseq-bits-reset.h" diff --git a/tools/testing/selftests/rseq/rseq-s390.h b/tools/testing/selftests/rseq/rseq-s390.h index 4e6dc5f0cb42..46c92598acc7 100644 --- a/tools/testing/selftests/rseq/rseq-s390.h +++ b/tools/testing/selftests/rseq/rseq-s390.h @@ -28,10 +28,6 @@ do { \ RSEQ_WRITE_ONCE(*p, v); \ } while (0) -#ifdef RSEQ_SKIP_FASTPATH -#include "rseq-skip.h" -#else /* !RSEQ_SKIP_FASTPATH */ - #ifdef __s390x__ #define LONG_L "lg" @@ -134,477 +130,34 @@ do { \ "jg %l[" __rseq_str(cmpfail_label) "]\n\t" \ ".popsection\n\t" -static inline __attribute__((always_inline)) -int rseq_cmpeqv_storev(intptr_t *v, intptr_t expect, intptr_t newv, int cpu) -{ - RSEQ_INJECT_C(9) +/* Per-cpu-id indexing. */ - __asm__ __volatile__ goto ( - RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */ - RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail]) -#ifdef RSEQ_COMPARE_TWICE - RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1]) - RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2]) -#endif - /* Start rseq by storing table entry pointer into rseq_cs. */ - RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs) - RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f) - RSEQ_INJECT_ASM(3) - LONG_CMP " %[expect], %[v]\n\t" - "jnz %l[cmpfail]\n\t" - RSEQ_INJECT_ASM(4) -#ifdef RSEQ_COMPARE_TWICE - RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1]) - LONG_CMP " %[expect], %[v]\n\t" - "jnz %l[error2]\n\t" -#endif - /* final store */ - LONG_S " %[newv], %[v]\n\t" - "2:\n\t" - RSEQ_INJECT_ASM(5) - RSEQ_ASM_DEFINE_ABORT(4, "", abort) - : /* gcc asm goto does not allow outputs */ - : [cpu_id] "r" (cpu), - [current_cpu_id] "m" (rseq_get_abi()->cpu_id), - [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), - [v] "m" (*v), - [expect] "r" (expect), - [newv] "r" (newv) - RSEQ_INJECT_INPUT - : "memory", "cc", "r0" - RSEQ_INJECT_CLOBBER - : abort, cmpfail -#ifdef RSEQ_COMPARE_TWICE - , error1, error2 -#endif - ); - rseq_after_asm_goto(); - return 0; -abort: - rseq_after_asm_goto(); - RSEQ_INJECT_FAILED - return -1; -cmpfail: - rseq_after_asm_goto(); - return 1; -#ifdef RSEQ_COMPARE_TWICE -error1: - rseq_after_asm_goto(); - rseq_bug("cpu_id comparison failed"); -error2: - rseq_after_asm_goto(); - rseq_bug("expected value comparison failed"); -#endif -} +#define RSEQ_TEMPLATE_CPU_ID +#define RSEQ_TEMPLATE_MO_RELAXED +#include "rseq-s390-bits.h" +#undef RSEQ_TEMPLATE_MO_RELAXED -/* - * Compare @v against @expectnot. When it does _not_ match, load @v - * into @load, and store the content of *@v + voffp into @v. - */ -static inline __attribute__((always_inline)) -int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot, - long voffp, intptr_t *load, int cpu) -{ - RSEQ_INJECT_C(9) - - __asm__ __volatile__ goto ( - RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */ - RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail]) -#ifdef RSEQ_COMPARE_TWICE - RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1]) - RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2]) -#endif - /* Start rseq by storing table entry pointer into rseq_cs. */ - RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs) - RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f) - RSEQ_INJECT_ASM(3) - LONG_L " %%r1, %[v]\n\t" - LONG_CMP_R " %%r1, %[expectnot]\n\t" - "je %l[cmpfail]\n\t" - RSEQ_INJECT_ASM(4) -#ifdef RSEQ_COMPARE_TWICE - RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1]) - LONG_L " %%r1, %[v]\n\t" - LONG_CMP_R " %%r1, %[expectnot]\n\t" - "je %l[error2]\n\t" -#endif - LONG_S " %%r1, %[load]\n\t" - LONG_ADD_R " %%r1, %[voffp]\n\t" - LONG_L " %%r1, 0(%%r1)\n\t" - /* final store */ - LONG_S " %%r1, %[v]\n\t" - "2:\n\t" - RSEQ_INJECT_ASM(5) - RSEQ_ASM_DEFINE_ABORT(4, "", abort) - : /* gcc asm goto does not allow outputs */ - : [cpu_id] "r" (cpu), - [current_cpu_id] "m" (rseq_get_abi()->cpu_id), - [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), - /* final store input */ - [v] "m" (*v), - [expectnot] "r" (expectnot), - [voffp] "r" (voffp), - [load] "m" (*load) - RSEQ_INJECT_INPUT - : "memory", "cc", "r0", "r1" - RSEQ_INJECT_CLOBBER - : abort, cmpfail -#ifdef RSEQ_COMPARE_TWICE - , error1, error2 -#endif - ); - rseq_after_asm_goto(); - return 0; -abort: - rseq_after_asm_goto(); - RSEQ_INJECT_FAILED - return -1; -cmpfail: - rseq_after_asm_goto(); - return 1; -#ifdef RSEQ_COMPARE_TWICE -error1: - rseq_after_asm_goto(); - rseq_bug("cpu_id comparison failed"); -error2: - rseq_after_asm_goto(); - rseq_bug("expected value comparison failed"); -#endif -} +#define RSEQ_TEMPLATE_MO_RELEASE +#include "rseq-s390-bits.h" +#undef RSEQ_TEMPLATE_MO_RELEASE +#undef RSEQ_TEMPLATE_CPU_ID -static inline __attribute__((always_inline)) -int rseq_addv(intptr_t *v, intptr_t count, int cpu) -{ - RSEQ_INJECT_C(9) +/* Per-mm-cid indexing. */ - __asm__ __volatile__ goto ( - RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */ -#ifdef RSEQ_COMPARE_TWICE - RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1]) -#endif - /* Start rseq by storing table entry pointer into rseq_cs. */ - RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs) - RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f) - RSEQ_INJECT_ASM(3) -#ifdef RSEQ_COMPARE_TWICE - RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1]) -#endif - LONG_L " %%r0, %[v]\n\t" - LONG_ADD_R " %%r0, %[count]\n\t" - /* final store */ - LONG_S " %%r0, %[v]\n\t" - "2:\n\t" - RSEQ_INJECT_ASM(4) - RSEQ_ASM_DEFINE_ABORT(4, "", abort) - : /* gcc asm goto does not allow outputs */ - : [cpu_id] "r" (cpu), - [current_cpu_id] "m" (rseq_get_abi()->cpu_id), - [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), - /* final store input */ - [v] "m" (*v), - [count] "r" (count) - RSEQ_INJECT_INPUT - : "memory", "cc", "r0" - RSEQ_INJECT_CLOBBER - : abort -#ifdef RSEQ_COMPARE_TWICE - , error1 -#endif - ); - rseq_after_asm_goto(); - return 0; -abort: - rseq_after_asm_goto(); - RSEQ_INJECT_FAILED - return -1; -#ifdef RSEQ_COMPARE_TWICE -error1: - rseq_after_asm_goto(); - rseq_bug("cpu_id comparison failed"); -#endif -} +#define RSEQ_TEMPLATE_MM_CID +#define RSEQ_TEMPLATE_MO_RELAXED +#include "rseq-s390-bits.h" +#undef RSEQ_TEMPLATE_MO_RELAXED -static inline __attribute__((always_inline)) -int rseq_cmpeqv_trystorev_storev(intptr_t *v, intptr_t expect, - intptr_t *v2, intptr_t newv2, - intptr_t newv, int cpu) -{ - RSEQ_INJECT_C(9) +#define RSEQ_TEMPLATE_MO_RELEASE +#include "rseq-s390-bits.h" +#undef RSEQ_TEMPLATE_MO_RELEASE +#undef RSEQ_TEMPLATE_MM_CID - __asm__ __volatile__ goto ( - RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */ - RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail]) -#ifdef RSEQ_COMPARE_TWICE - RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1]) - RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2]) -#endif - /* Start rseq by storing table entry pointer into rseq_cs. */ - RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs) - RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f) - RSEQ_INJECT_ASM(3) - LONG_CMP " %[expect], %[v]\n\t" - "jnz %l[cmpfail]\n\t" - RSEQ_INJECT_ASM(4) -#ifdef RSEQ_COMPARE_TWICE - RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1]) - LONG_CMP " %[expect], %[v]\n\t" - "jnz %l[error2]\n\t" -#endif - /* try store */ - LONG_S " %[newv2], %[v2]\n\t" - RSEQ_INJECT_ASM(5) - /* final store */ - LONG_S " %[newv], %[v]\n\t" - "2:\n\t" - RSEQ_INJECT_ASM(6) - RSEQ_ASM_DEFINE_ABORT(4, "", abort) - : /* gcc asm goto does not allow outputs */ - : [cpu_id] "r" (cpu), - [current_cpu_id] "m" (rseq_get_abi()->cpu_id), - [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), - /* try store input */ - [v2] "m" (*v2), - [newv2] "r" (newv2), - /* final store input */ - [v] "m" (*v), - [expect] "r" (expect), - [newv] "r" (newv) - RSEQ_INJECT_INPUT - : "memory", "cc", "r0" - RSEQ_INJECT_CLOBBER - : abort, cmpfail -#ifdef RSEQ_COMPARE_TWICE - , error1, error2 -#endif - ); - rseq_after_asm_goto(); - return 0; -abort: - rseq_after_asm_goto(); - RSEQ_INJECT_FAILED - return -1; -cmpfail: - rseq_after_asm_goto(); - return 1; -#ifdef RSEQ_COMPARE_TWICE -error1: - rseq_after_asm_goto(); - rseq_bug("cpu_id comparison failed"); -error2: - rseq_after_asm_goto(); - rseq_bug("expected value comparison failed"); -#endif -} - -/* s390 is TSO. */ -static inline __attribute__((always_inline)) -int rseq_cmpeqv_trystorev_storev_release(intptr_t *v, intptr_t expect, - intptr_t *v2, intptr_t newv2, - intptr_t newv, int cpu) -{ - return rseq_cmpeqv_trystorev_storev(v, expect, v2, newv2, newv, cpu); -} - -static inline __attribute__((always_inline)) -int rseq_cmpeqv_cmpeqv_storev(intptr_t *v, intptr_t expect, - intptr_t *v2, intptr_t expect2, - intptr_t newv, int cpu) -{ - RSEQ_INJECT_C(9) - - __asm__ __volatile__ goto ( - RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */ - RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail]) -#ifdef RSEQ_COMPARE_TWICE - RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1]) - RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2]) - RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error3]) -#endif - /* Start rseq by storing table entry pointer into rseq_cs. */ - RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs) - RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f) - RSEQ_INJECT_ASM(3) - LONG_CMP " %[expect], %[v]\n\t" - "jnz %l[cmpfail]\n\t" - RSEQ_INJECT_ASM(4) - LONG_CMP " %[expect2], %[v2]\n\t" - "jnz %l[cmpfail]\n\t" - RSEQ_INJECT_ASM(5) -#ifdef RSEQ_COMPARE_TWICE - RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1]) - LONG_CMP " %[expect], %[v]\n\t" - "jnz %l[error2]\n\t" - LONG_CMP " %[expect2], %[v2]\n\t" - "jnz %l[error3]\n\t" -#endif - /* final store */ - LONG_S " %[newv], %[v]\n\t" - "2:\n\t" - RSEQ_INJECT_ASM(6) - RSEQ_ASM_DEFINE_ABORT(4, "", abort) - : /* gcc asm goto does not allow outputs */ - : [cpu_id] "r" (cpu), - [current_cpu_id] "m" (rseq_get_abi()->cpu_id), - [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), - /* cmp2 input */ - [v2] "m" (*v2), - [expect2] "r" (expect2), - /* final store input */ - [v] "m" (*v), - [expect] "r" (expect), - [newv] "r" (newv) - RSEQ_INJECT_INPUT - : "memory", "cc", "r0" - RSEQ_INJECT_CLOBBER - : abort, cmpfail -#ifdef RSEQ_COMPARE_TWICE - , error1, error2, error3 -#endif - ); - rseq_after_asm_goto(); - return 0; -abort: - rseq_after_asm_goto(); - RSEQ_INJECT_FAILED - return -1; -cmpfail: - rseq_after_asm_goto(); - return 1; -#ifdef RSEQ_COMPARE_TWICE -error1: - rseq_after_asm_goto(); - rseq_bug("cpu_id comparison failed"); -error2: - rseq_after_asm_goto(); - rseq_bug("1st expected value comparison failed"); -error3: - rseq_after_asm_goto(); - rseq_bug("2nd expected value comparison failed"); -#endif -} - -static inline __attribute__((always_inline)) -int rseq_cmpeqv_trymemcpy_storev(intptr_t *v, intptr_t expect, - void *dst, void *src, size_t len, - intptr_t newv, int cpu) -{ - uint64_t rseq_scratch[3]; - - RSEQ_INJECT_C(9) - - __asm__ __volatile__ goto ( - RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */ - RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail]) -#ifdef RSEQ_COMPARE_TWICE - RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1]) - RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2]) -#endif - LONG_S " %[src], %[rseq_scratch0]\n\t" - LONG_S " %[dst], %[rseq_scratch1]\n\t" - LONG_S " %[len], %[rseq_scratch2]\n\t" - /* Start rseq by storing table entry pointer into rseq_cs. */ - RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs) - RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f) - RSEQ_INJECT_ASM(3) - LONG_CMP " %[expect], %[v]\n\t" - "jnz 5f\n\t" - RSEQ_INJECT_ASM(4) -#ifdef RSEQ_COMPARE_TWICE - RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 6f) - LONG_CMP " %[expect], %[v]\n\t" - "jnz 7f\n\t" -#endif - /* try memcpy */ - LONG_LT_R " %[len], %[len]\n\t" - "jz 333f\n\t" - "222:\n\t" - "ic %%r0,0(%[src])\n\t" - "stc %%r0,0(%[dst])\n\t" - LONG_ADDI " %[src], 1\n\t" - LONG_ADDI " %[dst], 1\n\t" - LONG_ADDI " %[len], -1\n\t" - "jnz 222b\n\t" - "333:\n\t" - RSEQ_INJECT_ASM(5) - /* final store */ - LONG_S " %[newv], %[v]\n\t" - "2:\n\t" - RSEQ_INJECT_ASM(6) - /* teardown */ - LONG_L " %[len], %[rseq_scratch2]\n\t" - LONG_L " %[dst], %[rseq_scratch1]\n\t" - LONG_L " %[src], %[rseq_scratch0]\n\t" - RSEQ_ASM_DEFINE_ABORT(4, - LONG_L " %[len], %[rseq_scratch2]\n\t" - LONG_L " %[dst], %[rseq_scratch1]\n\t" - LONG_L " %[src], %[rseq_scratch0]\n\t", - abort) - RSEQ_ASM_DEFINE_CMPFAIL(5, - LONG_L " %[len], %[rseq_scratch2]\n\t" - LONG_L " %[dst], %[rseq_scratch1]\n\t" - LONG_L " %[src], %[rseq_scratch0]\n\t", - cmpfail) -#ifdef RSEQ_COMPARE_TWICE - RSEQ_ASM_DEFINE_CMPFAIL(6, - LONG_L " %[len], %[rseq_scratch2]\n\t" - LONG_L " %[dst], %[rseq_scratch1]\n\t" - LONG_L " %[src], %[rseq_scratch0]\n\t", - error1) - RSEQ_ASM_DEFINE_CMPFAIL(7, - LONG_L " %[len], %[rseq_scratch2]\n\t" - LONG_L " %[dst], %[rseq_scratch1]\n\t" - LONG_L " %[src], %[rseq_scratch0]\n\t", - error2) -#endif - : /* gcc asm goto does not allow outputs */ - : [cpu_id] "r" (cpu), - [current_cpu_id] "m" (rseq_get_abi()->cpu_id), - [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), - /* final store input */ - [v] "m" (*v), - [expect] "r" (expect), - [newv] "r" (newv), - /* try memcpy input */ - [dst] "r" (dst), - [src] "r" (src), - [len] "r" (len), - [rseq_scratch0] "m" (rseq_scratch[0]), - [rseq_scratch1] "m" (rseq_scratch[1]), - [rseq_scratch2] "m" (rseq_scratch[2]) - RSEQ_INJECT_INPUT - : "memory", "cc", "r0" - RSEQ_INJECT_CLOBBER - : abort, cmpfail -#ifdef RSEQ_COMPARE_TWICE - , error1, error2 -#endif - ); - rseq_after_asm_goto(); - return 0; -abort: - rseq_after_asm_goto(); - RSEQ_INJECT_FAILED - return -1; -cmpfail: - rseq_after_asm_goto(); - return 1; -#ifdef RSEQ_COMPARE_TWICE -error1: - rseq_after_asm_goto(); - rseq_bug("cpu_id comparison failed"); -error2: - rseq_after_asm_goto(); - rseq_bug("expected value comparison failed"); -#endif -} +/* APIs which are not based on cpu ids. */ -/* s390 is TSO. */ -static inline __attribute__((always_inline)) -int rseq_cmpeqv_trymemcpy_storev_release(intptr_t *v, intptr_t expect, - void *dst, void *src, size_t len, - intptr_t newv, int cpu) -{ - return rseq_cmpeqv_trymemcpy_storev(v, expect, dst, src, len, - newv, cpu); -} -#endif /* !RSEQ_SKIP_FASTPATH */ +#define RSEQ_TEMPLATE_CPU_ID_NONE +#define RSEQ_TEMPLATE_MO_RELAXED +#include "rseq-s390-bits.h" +#undef RSEQ_TEMPLATE_MO_RELAXED +#undef RSEQ_TEMPLATE_CPU_ID_NONE diff --git a/tools/testing/selftests/rseq/rseq-skip.h b/tools/testing/selftests/rseq/rseq-skip.h deleted file mode 100644 index 7b53dac1fcdd..000000000000 --- a/tools/testing/selftests/rseq/rseq-skip.h +++ /dev/null @@ -1,65 +0,0 @@ -/* SPDX-License-Identifier: LGPL-2.1 OR MIT */ -/* - * rseq-skip.h - * - * (C) Copyright 2017-2018 - Mathieu Desnoyers <mathieu.desnoyers@efficios.com> - */ - -static inline __attribute__((always_inline)) -int rseq_cmpeqv_storev(intptr_t *v, intptr_t expect, intptr_t newv, int cpu) -{ - return -1; -} - -static inline __attribute__((always_inline)) -int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot, - long voffp, intptr_t *load, int cpu) -{ - return -1; -} - -static inline __attribute__((always_inline)) -int rseq_addv(intptr_t *v, intptr_t count, int cpu) -{ - return -1; -} - -static inline __attribute__((always_inline)) -int rseq_cmpeqv_trystorev_storev(intptr_t *v, intptr_t expect, - intptr_t *v2, intptr_t newv2, - intptr_t newv, int cpu) -{ - return -1; -} - -static inline __attribute__((always_inline)) -int rseq_cmpeqv_trystorev_storev_release(intptr_t *v, intptr_t expect, - intptr_t *v2, intptr_t newv2, - intptr_t newv, int cpu) -{ - return -1; -} - -static inline __attribute__((always_inline)) -int rseq_cmpeqv_cmpeqv_storev(intptr_t *v, intptr_t expect, - intptr_t *v2, intptr_t expect2, - intptr_t newv, int cpu) -{ - return -1; -} - -static inline __attribute__((always_inline)) -int rseq_cmpeqv_trymemcpy_storev(intptr_t *v, intptr_t expect, - void *dst, void *src, size_t len, - intptr_t newv, int cpu) -{ - return -1; -} - -static inline __attribute__((always_inline)) -int rseq_cmpeqv_trymemcpy_storev_release(intptr_t *v, intptr_t expect, - void *dst, void *src, size_t len, - intptr_t newv, int cpu) -{ - return -1; -} diff --git a/tools/testing/selftests/rseq/rseq-x86-bits.h b/tools/testing/selftests/rseq/rseq-x86-bits.h new file mode 100644 index 000000000000..8a9431eec467 --- /dev/null +++ b/tools/testing/selftests/rseq/rseq-x86-bits.h @@ -0,0 +1,993 @@ +/* SPDX-License-Identifier: LGPL-2.1 OR MIT */ +/* + * rseq-x86-bits.h + * + * (C) Copyright 2016-2022 - Mathieu Desnoyers <mathieu.desnoyers@efficios.com> + */ + +#include "rseq-bits-template.h" + +#ifdef __x86_64__ + +#if defined(RSEQ_TEMPLATE_MO_RELAXED) && \ + (defined(RSEQ_TEMPLATE_CPU_ID) || defined(RSEQ_TEMPLATE_MM_CID)) + +static inline __attribute__((always_inline)) +int RSEQ_TEMPLATE_IDENTIFIER(rseq_cmpeqv_storev)(intptr_t *v, intptr_t expect, intptr_t newv, int cpu) +{ + RSEQ_INJECT_C(9) + + __asm__ __volatile__ goto ( + RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */ + RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail]) +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1]) + RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2]) +#endif + /* Start rseq by storing table entry pointer into rseq_cs. */ + RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_ASM_TP_SEGMENT:RSEQ_CS_OFFSET(%[rseq_offset])) + RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_TEMPLATE_CPU_ID_OFFSET(%[rseq_offset]), 4f) + RSEQ_INJECT_ASM(3) + "cmpq %[v], %[expect]\n\t" + "jnz %l[cmpfail]\n\t" + RSEQ_INJECT_ASM(4) +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_TEMPLATE_CPU_ID_OFFSET(%[rseq_offset]), %l[error1]) + "cmpq %[v], %[expect]\n\t" + "jnz %l[error2]\n\t" +#endif + /* final store */ + "movq %[newv], %[v]\n\t" + "2:\n\t" + RSEQ_INJECT_ASM(5) + RSEQ_ASM_DEFINE_ABORT(4, "", abort) + : /* gcc asm goto does not allow outputs */ + : [cpu_id] "r" (cpu), + [rseq_offset] "r" (rseq_offset), + [v] "m" (*v), + [expect] "r" (expect), + [newv] "r" (newv) + : "memory", "cc", "rax" + RSEQ_INJECT_CLOBBER + : abort, cmpfail +#ifdef RSEQ_COMPARE_TWICE + , error1, error2 +#endif + ); + rseq_after_asm_goto(); + return 0; +abort: + rseq_after_asm_goto(); + RSEQ_INJECT_FAILED + return -1; +cmpfail: + rseq_after_asm_goto(); + return 1; +#ifdef RSEQ_COMPARE_TWICE +error1: + rseq_after_asm_goto(); + rseq_bug("cpu_id comparison failed"); +error2: + rseq_after_asm_goto(); + rseq_bug("expected value comparison failed"); +#endif +} + +/* + * Compare @v against @expectnot. When it does _not_ match, load @v + * into @load, and store the content of *@v + voffp into @v. + */ +static inline __attribute__((always_inline)) +int RSEQ_TEMPLATE_IDENTIFIER(rseq_cmpnev_storeoffp_load)(intptr_t *v, intptr_t expectnot, + long voffp, intptr_t *load, int cpu) +{ + RSEQ_INJECT_C(9) + + __asm__ __volatile__ goto ( + RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */ + RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail]) +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1]) + RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2]) +#endif + /* Start rseq by storing table entry pointer into rseq_cs. */ + RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_ASM_TP_SEGMENT:RSEQ_CS_OFFSET(%[rseq_offset])) + RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_TEMPLATE_CPU_ID_OFFSET(%[rseq_offset]), 4f) + RSEQ_INJECT_ASM(3) + "movq %[v], %%rbx\n\t" + "cmpq %%rbx, %[expectnot]\n\t" + "je %l[cmpfail]\n\t" + RSEQ_INJECT_ASM(4) +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_TEMPLATE_CPU_ID_OFFSET(%[rseq_offset]), %l[error1]) + "movq %[v], %%rbx\n\t" + "cmpq %%rbx, %[expectnot]\n\t" + "je %l[error2]\n\t" +#endif + "movq %%rbx, %[load]\n\t" + "addq %[voffp], %%rbx\n\t" + "movq (%%rbx), %%rbx\n\t" + /* final store */ + "movq %%rbx, %[v]\n\t" + "2:\n\t" + RSEQ_INJECT_ASM(5) + RSEQ_ASM_DEFINE_ABORT(4, "", abort) + : /* gcc asm goto does not allow outputs */ + : [cpu_id] "r" (cpu), + [rseq_offset] "r" (rseq_offset), + /* final store input */ + [v] "m" (*v), + [expectnot] "r" (expectnot), + [voffp] "er" (voffp), + [load] "m" (*load) + : "memory", "cc", "rax", "rbx" + RSEQ_INJECT_CLOBBER + : abort, cmpfail +#ifdef RSEQ_COMPARE_TWICE + , error1, error2 +#endif + ); + rseq_after_asm_goto(); + return 0; +abort: + rseq_after_asm_goto(); + RSEQ_INJECT_FAILED + return -1; +cmpfail: + rseq_after_asm_goto(); + return 1; +#ifdef RSEQ_COMPARE_TWICE +error1: + rseq_after_asm_goto(); + rseq_bug("cpu_id comparison failed"); +error2: + rseq_after_asm_goto(); + rseq_bug("expected value comparison failed"); +#endif +} + +static inline __attribute__((always_inline)) +int RSEQ_TEMPLATE_IDENTIFIER(rseq_addv)(intptr_t *v, intptr_t count, int cpu) +{ + RSEQ_INJECT_C(9) + + __asm__ __volatile__ goto ( + RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */ +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1]) +#endif + /* Start rseq by storing table entry pointer into rseq_cs. */ + RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_ASM_TP_SEGMENT:RSEQ_CS_OFFSET(%[rseq_offset])) + RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_TEMPLATE_CPU_ID_OFFSET(%[rseq_offset]), 4f) + RSEQ_INJECT_ASM(3) +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_TEMPLATE_CPU_ID_OFFSET(%[rseq_offset]), %l[error1]) +#endif + /* final store */ + "addq %[count], %[v]\n\t" + "2:\n\t" + RSEQ_INJECT_ASM(4) + RSEQ_ASM_DEFINE_ABORT(4, "", abort) + : /* gcc asm goto does not allow outputs */ + : [cpu_id] "r" (cpu), + [rseq_offset] "r" (rseq_offset), + /* final store input */ + [v] "m" (*v), + [count] "er" (count) + : "memory", "cc", "rax" + RSEQ_INJECT_CLOBBER + : abort +#ifdef RSEQ_COMPARE_TWICE + , error1 +#endif + ); + rseq_after_asm_goto(); + return 0; +abort: + rseq_after_asm_goto(); + RSEQ_INJECT_FAILED + return -1; +#ifdef RSEQ_COMPARE_TWICE +error1: + rseq_after_asm_goto(); + rseq_bug("cpu_id comparison failed"); +#endif +} + +#define RSEQ_ARCH_HAS_OFFSET_DEREF_ADDV + +/* + * pval = *(ptr+off) + * *pval += inc; + */ +static inline __attribute__((always_inline)) +int RSEQ_TEMPLATE_IDENTIFIER(rseq_offset_deref_addv)(intptr_t *ptr, long off, intptr_t inc, int cpu) +{ + RSEQ_INJECT_C(9) + + __asm__ __volatile__ goto ( + RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */ +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1]) +#endif + /* Start rseq by storing table entry pointer into rseq_cs. */ + RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_ASM_TP_SEGMENT:RSEQ_CS_OFFSET(%[rseq_offset])) + RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_TEMPLATE_CPU_ID_OFFSET(%[rseq_offset]), 4f) + RSEQ_INJECT_ASM(3) +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_TEMPLATE_CPU_ID_OFFSET(%[rseq_offset]), %l[error1]) +#endif + /* get p+v */ + "movq %[ptr], %%rbx\n\t" + "addq %[off], %%rbx\n\t" + /* get pv */ + "movq (%%rbx), %%rcx\n\t" + /* *pv += inc */ + "addq %[inc], (%%rcx)\n\t" + "2:\n\t" + RSEQ_INJECT_ASM(4) + RSEQ_ASM_DEFINE_ABORT(4, "", abort) + : /* gcc asm goto does not allow outputs */ + : [cpu_id] "r" (cpu), + [rseq_offset] "r" (rseq_offset), + /* final store input */ + [ptr] "m" (*ptr), + [off] "er" (off), + [inc] "er" (inc) + : "memory", "cc", "rax", "rbx", "rcx" + RSEQ_INJECT_CLOBBER + : abort +#ifdef RSEQ_COMPARE_TWICE + , error1 +#endif + ); + return 0; +abort: + RSEQ_INJECT_FAILED + return -1; +#ifdef RSEQ_COMPARE_TWICE +error1: + rseq_bug("cpu_id comparison failed"); +#endif +} + +static inline __attribute__((always_inline)) +int RSEQ_TEMPLATE_IDENTIFIER(rseq_cmpeqv_cmpeqv_storev)(intptr_t *v, intptr_t expect, + intptr_t *v2, intptr_t expect2, + intptr_t newv, int cpu) +{ + RSEQ_INJECT_C(9) + + __asm__ __volatile__ goto ( + RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */ + RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail]) +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1]) + RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2]) + RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error3]) +#endif + /* Start rseq by storing table entry pointer into rseq_cs. */ + RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_ASM_TP_SEGMENT:RSEQ_CS_OFFSET(%[rseq_offset])) + RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_TEMPLATE_CPU_ID_OFFSET(%[rseq_offset]), 4f) + RSEQ_INJECT_ASM(3) + "cmpq %[v], %[expect]\n\t" + "jnz %l[cmpfail]\n\t" + RSEQ_INJECT_ASM(4) + "cmpq %[v2], %[expect2]\n\t" + "jnz %l[cmpfail]\n\t" + RSEQ_INJECT_ASM(5) +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_TEMPLATE_CPU_ID_OFFSET(%[rseq_offset]), %l[error1]) + "cmpq %[v], %[expect]\n\t" + "jnz %l[error2]\n\t" + "cmpq %[v2], %[expect2]\n\t" + "jnz %l[error3]\n\t" +#endif + /* final store */ + "movq %[newv], %[v]\n\t" + "2:\n\t" + RSEQ_INJECT_ASM(6) + RSEQ_ASM_DEFINE_ABORT(4, "", abort) + : /* gcc asm goto does not allow outputs */ + : [cpu_id] "r" (cpu), + [rseq_offset] "r" (rseq_offset), + /* cmp2 input */ + [v2] "m" (*v2), + [expect2] "r" (expect2), + /* final store input */ + [v] "m" (*v), + [expect] "r" (expect), + [newv] "r" (newv) + : "memory", "cc", "rax" + RSEQ_INJECT_CLOBBER + : abort, cmpfail +#ifdef RSEQ_COMPARE_TWICE + , error1, error2, error3 +#endif + ); + rseq_after_asm_goto(); + return 0; +abort: + rseq_after_asm_goto(); + RSEQ_INJECT_FAILED + return -1; +cmpfail: + rseq_after_asm_goto(); + return 1; +#ifdef RSEQ_COMPARE_TWICE +error1: + rseq_after_asm_goto(); + rseq_bug("cpu_id comparison failed"); +error2: + rseq_after_asm_goto(); + rseq_bug("1st expected value comparison failed"); +error3: + rseq_after_asm_goto(); + rseq_bug("2nd expected value comparison failed"); +#endif +} + +#endif /* #if defined(RSEQ_TEMPLATE_MO_RELAXED) && + (defined(RSEQ_TEMPLATE_CPU_ID) || defined(RSEQ_TEMPLATE_MM_CID)) */ + +#if (defined(RSEQ_TEMPLATE_MO_RELAXED) || defined(RSEQ_TEMPLATE_MO_RELEASE)) && \ + (defined(RSEQ_TEMPLATE_CPU_ID) || defined(RSEQ_TEMPLATE_MM_CID)) + +static inline __attribute__((always_inline)) +int RSEQ_TEMPLATE_IDENTIFIER(rseq_cmpeqv_trystorev_storev)(intptr_t *v, intptr_t expect, + intptr_t *v2, intptr_t newv2, + intptr_t newv, int cpu) +{ + RSEQ_INJECT_C(9) + + __asm__ __volatile__ goto ( + RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */ + RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail]) +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1]) + RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2]) +#endif + /* Start rseq by storing table entry pointer into rseq_cs. */ + RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_ASM_TP_SEGMENT:RSEQ_CS_OFFSET(%[rseq_offset])) + RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_TEMPLATE_CPU_ID_OFFSET(%[rseq_offset]), 4f) + RSEQ_INJECT_ASM(3) + "cmpq %[v], %[expect]\n\t" + "jnz %l[cmpfail]\n\t" + RSEQ_INJECT_ASM(4) +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_TEMPLATE_CPU_ID_OFFSET(%[rseq_offset]), %l[error1]) + "cmpq %[v], %[expect]\n\t" + "jnz %l[error2]\n\t" +#endif + /* try store */ + "movq %[newv2], %[v2]\n\t" + RSEQ_INJECT_ASM(5) + /* final store */ + "movq %[newv], %[v]\n\t" + "2:\n\t" + RSEQ_INJECT_ASM(6) + RSEQ_ASM_DEFINE_ABORT(4, "", abort) + : /* gcc asm goto does not allow outputs */ + : [cpu_id] "r" (cpu), + [rseq_offset] "r" (rseq_offset), + /* try store input */ + [v2] "m" (*v2), + [newv2] "r" (newv2), + /* final store input */ + [v] "m" (*v), + [expect] "r" (expect), + [newv] "r" (newv) + : "memory", "cc", "rax" + RSEQ_INJECT_CLOBBER + : abort, cmpfail +#ifdef RSEQ_COMPARE_TWICE + , error1, error2 +#endif + ); + rseq_after_asm_goto(); + return 0; +abort: + rseq_after_asm_goto(); + RSEQ_INJECT_FAILED + return -1; +cmpfail: + rseq_after_asm_goto(); + return 1; +#ifdef RSEQ_COMPARE_TWICE +error1: + rseq_after_asm_goto(); + rseq_bug("cpu_id comparison failed"); +error2: + rseq_after_asm_goto(); + rseq_bug("expected value comparison failed"); +#endif +} + +static inline __attribute__((always_inline)) +int RSEQ_TEMPLATE_IDENTIFIER(rseq_cmpeqv_trymemcpy_storev)(intptr_t *v, intptr_t expect, + void *dst, void *src, size_t len, + intptr_t newv, int cpu) +{ + uint64_t rseq_scratch[3]; + + RSEQ_INJECT_C(9) + + __asm__ __volatile__ goto ( + RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */ + RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail]) +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1]) + RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2]) +#endif + "movq %[src], %[rseq_scratch0]\n\t" + "movq %[dst], %[rseq_scratch1]\n\t" + "movq %[len], %[rseq_scratch2]\n\t" + /* Start rseq by storing table entry pointer into rseq_cs. */ + RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_ASM_TP_SEGMENT:RSEQ_CS_OFFSET(%[rseq_offset])) + RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_TEMPLATE_CPU_ID_OFFSET(%[rseq_offset]), 4f) + RSEQ_INJECT_ASM(3) + "cmpq %[v], %[expect]\n\t" + "jnz 5f\n\t" + RSEQ_INJECT_ASM(4) +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_TEMPLATE_CPU_ID_OFFSET(%[rseq_offset]), 6f) + "cmpq %[v], %[expect]\n\t" + "jnz 7f\n\t" +#endif + /* try memcpy */ + "test %[len], %[len]\n\t" \ + "jz 333f\n\t" \ + "222:\n\t" \ + "movb (%[src]), %%al\n\t" \ + "movb %%al, (%[dst])\n\t" \ + "inc %[src]\n\t" \ + "inc %[dst]\n\t" \ + "dec %[len]\n\t" \ + "jnz 222b\n\t" \ + "333:\n\t" \ + RSEQ_INJECT_ASM(5) + /* final store */ + "movq %[newv], %[v]\n\t" + "2:\n\t" + RSEQ_INJECT_ASM(6) + /* teardown */ + "movq %[rseq_scratch2], %[len]\n\t" + "movq %[rseq_scratch1], %[dst]\n\t" + "movq %[rseq_scratch0], %[src]\n\t" + RSEQ_ASM_DEFINE_ABORT(4, + "movq %[rseq_scratch2], %[len]\n\t" + "movq %[rseq_scratch1], %[dst]\n\t" + "movq %[rseq_scratch0], %[src]\n\t", + abort) + RSEQ_ASM_DEFINE_CMPFAIL(5, + "movq %[rseq_scratch2], %[len]\n\t" + "movq %[rseq_scratch1], %[dst]\n\t" + "movq %[rseq_scratch0], %[src]\n\t", + cmpfail) +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_DEFINE_CMPFAIL(6, + "movq %[rseq_scratch2], %[len]\n\t" + "movq %[rseq_scratch1], %[dst]\n\t" + "movq %[rseq_scratch0], %[src]\n\t", + error1) + RSEQ_ASM_DEFINE_CMPFAIL(7, + "movq %[rseq_scratch2], %[len]\n\t" + "movq %[rseq_scratch1], %[dst]\n\t" + "movq %[rseq_scratch0], %[src]\n\t", + error2) +#endif + : /* gcc asm goto does not allow outputs */ + : [cpu_id] "r" (cpu), + [rseq_offset] "r" (rseq_offset), + /* final store input */ + [v] "m" (*v), + [expect] "r" (expect), + [newv] "r" (newv), + /* try memcpy input */ + [dst] "r" (dst), + [src] "r" (src), + [len] "r" (len), + [rseq_scratch0] "m" (rseq_scratch[0]), + [rseq_scratch1] "m" (rseq_scratch[1]), + [rseq_scratch2] "m" (rseq_scratch[2]) + : "memory", "cc", "rax" + RSEQ_INJECT_CLOBBER + : abort, cmpfail +#ifdef RSEQ_COMPARE_TWICE + , error1, error2 +#endif + ); + rseq_after_asm_goto(); + return 0; +abort: + rseq_after_asm_goto(); + RSEQ_INJECT_FAILED + return -1; +cmpfail: + rseq_after_asm_goto(); + return 1; +#ifdef RSEQ_COMPARE_TWICE +error1: + rseq_after_asm_goto(); + rseq_bug("cpu_id comparison failed"); +error2: + rseq_after_asm_goto(); + rseq_bug("expected value comparison failed"); +#endif +} + +#endif /* #if (defined(RSEQ_TEMPLATE_MO_RELAXED) || defined(RSEQ_TEMPLATE_MO_RELEASE)) && + (defined(RSEQ_TEMPLATE_CPU_ID) || defined(RSEQ_TEMPLATE_MM_CID)) */ + +#elif defined(__i386__) + +#if defined(RSEQ_TEMPLATE_MO_RELAXED) && \ + (defined(RSEQ_TEMPLATE_CPU_ID) || defined(RSEQ_TEMPLATE_MM_CID)) + +static inline __attribute__((always_inline)) +int RSEQ_TEMPLATE_IDENTIFIER(rseq_cmpeqv_storev)(intptr_t *v, intptr_t expect, intptr_t newv, int cpu) +{ + RSEQ_INJECT_C(9) + + __asm__ __volatile__ goto ( + RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */ + RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail]) +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1]) + RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2]) +#endif + /* Start rseq by storing table entry pointer into rseq_cs. */ + RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_ASM_TP_SEGMENT:RSEQ_CS_OFFSET(%[rseq_offset])) + RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_TEMPLATE_CPU_ID_OFFSET(%[rseq_offset]), 4f) + RSEQ_INJECT_ASM(3) + "cmpl %[v], %[expect]\n\t" + "jnz %l[cmpfail]\n\t" + RSEQ_INJECT_ASM(4) +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_TEMPLATE_CPU_ID_OFFSET(%[rseq_offset]), %l[error1]) + "cmpl %[v], %[expect]\n\t" + "jnz %l[error2]\n\t" +#endif + /* final store */ + "movl %[newv], %[v]\n\t" + "2:\n\t" + RSEQ_INJECT_ASM(5) + RSEQ_ASM_DEFINE_ABORT(4, "", abort) + : /* gcc asm goto does not allow outputs */ + : [cpu_id] "r" (cpu), + [rseq_offset] "r" (rseq_offset), + [v] "m" (*v), + [expect] "r" (expect), + [newv] "r" (newv) + : "memory", "cc", "eax" + RSEQ_INJECT_CLOBBER + : abort, cmpfail +#ifdef RSEQ_COMPARE_TWICE + , error1, error2 +#endif + ); + rseq_after_asm_goto(); + return 0; +abort: + rseq_after_asm_goto(); + RSEQ_INJECT_FAILED + return -1; +cmpfail: + rseq_after_asm_goto(); + return 1; +#ifdef RSEQ_COMPARE_TWICE +error1: + rseq_after_asm_goto(); + rseq_bug("cpu_id comparison failed"); +error2: + rseq_after_asm_goto(); + rseq_bug("expected value comparison failed"); +#endif +} + +/* + * Compare @v against @expectnot. When it does _not_ match, load @v + * into @load, and store the content of *@v + voffp into @v. + */ +static inline __attribute__((always_inline)) +int RSEQ_TEMPLATE_IDENTIFIER(rseq_cmpnev_storeoffp_load)(intptr_t *v, intptr_t expectnot, + long voffp, intptr_t *load, int cpu) +{ + RSEQ_INJECT_C(9) + + __asm__ __volatile__ goto ( + RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */ + RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail]) +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1]) + RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2]) +#endif + /* Start rseq by storing table entry pointer into rseq_cs. */ + RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_ASM_TP_SEGMENT:RSEQ_CS_OFFSET(%[rseq_offset])) + RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_TEMPLATE_CPU_ID_OFFSET(%[rseq_offset]), 4f) + RSEQ_INJECT_ASM(3) + "movl %[v], %%ebx\n\t" + "cmpl %%ebx, %[expectnot]\n\t" + "je %l[cmpfail]\n\t" + RSEQ_INJECT_ASM(4) +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_TEMPLATE_CPU_ID_OFFSET(%[rseq_offset]), %l[error1]) + "movl %[v], %%ebx\n\t" + "cmpl %%ebx, %[expectnot]\n\t" + "je %l[error2]\n\t" +#endif + "movl %%ebx, %[load]\n\t" + "addl %[voffp], %%ebx\n\t" + "movl (%%ebx), %%ebx\n\t" + /* final store */ + "movl %%ebx, %[v]\n\t" + "2:\n\t" + RSEQ_INJECT_ASM(5) + RSEQ_ASM_DEFINE_ABORT(4, "", abort) + : /* gcc asm goto does not allow outputs */ + : [cpu_id] "r" (cpu), + [rseq_offset] "r" (rseq_offset), + /* final store input */ + [v] "m" (*v), + [expectnot] "r" (expectnot), + [voffp] "ir" (voffp), + [load] "m" (*load) + : "memory", "cc", "eax", "ebx" + RSEQ_INJECT_CLOBBER + : abort, cmpfail +#ifdef RSEQ_COMPARE_TWICE + , error1, error2 +#endif + ); + rseq_after_asm_goto(); + return 0; +abort: + rseq_after_asm_goto(); + RSEQ_INJECT_FAILED + return -1; +cmpfail: + rseq_after_asm_goto(); + return 1; +#ifdef RSEQ_COMPARE_TWICE +error1: + rseq_after_asm_goto(); + rseq_bug("cpu_id comparison failed"); +error2: + rseq_after_asm_goto(); + rseq_bug("expected value comparison failed"); +#endif +} + +static inline __attribute__((always_inline)) +int RSEQ_TEMPLATE_IDENTIFIER(rseq_addv)(intptr_t *v, intptr_t count, int cpu) +{ + RSEQ_INJECT_C(9) + + __asm__ __volatile__ goto ( + RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */ +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1]) +#endif + /* Start rseq by storing table entry pointer into rseq_cs. */ + RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_ASM_TP_SEGMENT:RSEQ_CS_OFFSET(%[rseq_offset])) + RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_TEMPLATE_CPU_ID_OFFSET(%[rseq_offset]), 4f) + RSEQ_INJECT_ASM(3) +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_TEMPLATE_CPU_ID_OFFSET(%[rseq_offset]), %l[error1]) +#endif + /* final store */ + "addl %[count], %[v]\n\t" + "2:\n\t" + RSEQ_INJECT_ASM(4) + RSEQ_ASM_DEFINE_ABORT(4, "", abort) + : /* gcc asm goto does not allow outputs */ + : [cpu_id] "r" (cpu), + [rseq_offset] "r" (rseq_offset), + /* final store input */ + [v] "m" (*v), + [count] "ir" (count) + : "memory", "cc", "eax" + RSEQ_INJECT_CLOBBER + : abort +#ifdef RSEQ_COMPARE_TWICE + , error1 +#endif + ); + rseq_after_asm_goto(); + return 0; +abort: + rseq_after_asm_goto(); + RSEQ_INJECT_FAILED + return -1; +#ifdef RSEQ_COMPARE_TWICE +error1: + rseq_after_asm_goto(); + rseq_bug("cpu_id comparison failed"); +#endif +} + +static inline __attribute__((always_inline)) +int RSEQ_TEMPLATE_IDENTIFIER(rseq_cmpeqv_cmpeqv_storev)(intptr_t *v, intptr_t expect, + intptr_t *v2, intptr_t expect2, + intptr_t newv, int cpu) +{ + RSEQ_INJECT_C(9) + + __asm__ __volatile__ goto ( + RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */ + RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail]) +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1]) + RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2]) + RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error3]) +#endif + /* Start rseq by storing table entry pointer into rseq_cs. */ + RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_ASM_TP_SEGMENT:RSEQ_CS_OFFSET(%[rseq_offset])) + RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_TEMPLATE_CPU_ID_OFFSET(%[rseq_offset]), 4f) + RSEQ_INJECT_ASM(3) + "cmpl %[v], %[expect]\n\t" + "jnz %l[cmpfail]\n\t" + RSEQ_INJECT_ASM(4) + "cmpl %[expect2], %[v2]\n\t" + "jnz %l[cmpfail]\n\t" + RSEQ_INJECT_ASM(5) +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_TEMPLATE_CPU_ID_OFFSET(%[rseq_offset]), %l[error1]) + "cmpl %[v], %[expect]\n\t" + "jnz %l[error2]\n\t" + "cmpl %[expect2], %[v2]\n\t" + "jnz %l[error3]\n\t" +#endif + "movl %[newv], %%eax\n\t" + /* final store */ + "movl %%eax, %[v]\n\t" + "2:\n\t" + RSEQ_INJECT_ASM(6) + RSEQ_ASM_DEFINE_ABORT(4, "", abort) + : /* gcc asm goto does not allow outputs */ + : [cpu_id] "r" (cpu), + [rseq_offset] "r" (rseq_offset), + /* cmp2 input */ + [v2] "m" (*v2), + [expect2] "r" (expect2), + /* final store input */ + [v] "m" (*v), + [expect] "r" (expect), + [newv] "m" (newv) + : "memory", "cc", "eax" + RSEQ_INJECT_CLOBBER + : abort, cmpfail +#ifdef RSEQ_COMPARE_TWICE + , error1, error2, error3 +#endif + ); + rseq_after_asm_goto(); + return 0; +abort: + rseq_after_asm_goto(); + RSEQ_INJECT_FAILED + return -1; +cmpfail: + rseq_after_asm_goto(); + return 1; +#ifdef RSEQ_COMPARE_TWICE +error1: + rseq_after_asm_goto(); + rseq_bug("cpu_id comparison failed"); +error2: + rseq_after_asm_goto(); + rseq_bug("1st expected value comparison failed"); +error3: + rseq_after_asm_goto(); + rseq_bug("2nd expected value comparison failed"); +#endif +} + +#endif /* #if defined(RSEQ_TEMPLATE_MO_RELAXED) && + (defined(RSEQ_TEMPLATE_CPU_ID) || defined(RSEQ_TEMPLATE_MM_CID)) */ + +#if (defined(RSEQ_TEMPLATE_MO_RELAXED) || defined(RSEQ_TEMPLATE_MO_RELEASE)) && \ + (defined(RSEQ_TEMPLATE_CPU_ID) || defined(RSEQ_TEMPLATE_MM_CID)) + +static inline __attribute__((always_inline)) +int RSEQ_TEMPLATE_IDENTIFIER(rseq_cmpeqv_trystorev_storev)(intptr_t *v, intptr_t expect, + intptr_t *v2, intptr_t newv2, + intptr_t newv, int cpu) +{ + RSEQ_INJECT_C(9) + + __asm__ __volatile__ goto ( + RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */ + RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail]) +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1]) + RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2]) +#endif + /* Start rseq by storing table entry pointer into rseq_cs. */ + RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_ASM_TP_SEGMENT:RSEQ_CS_OFFSET(%[rseq_offset])) + RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_TEMPLATE_CPU_ID_OFFSET(%[rseq_offset]), 4f) + RSEQ_INJECT_ASM(3) + "movl %[expect], %%eax\n\t" + "cmpl %[v], %%eax\n\t" + "jnz %l[cmpfail]\n\t" + RSEQ_INJECT_ASM(4) +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_TEMPLATE_CPU_ID_OFFSET(%[rseq_offset]), %l[error1]) + "movl %[expect], %%eax\n\t" + "cmpl %[v], %%eax\n\t" + "jnz %l[error2]\n\t" +#endif + /* try store */ + "movl %[newv2], %[v2]\n\t" + RSEQ_INJECT_ASM(5) +#ifdef RSEQ_TEMPLATE_MO_RELEASE + "lock; addl $0,-128(%%esp)\n\t" +#endif + /* final store */ + "movl %[newv], %[v]\n\t" + "2:\n\t" + RSEQ_INJECT_ASM(6) + RSEQ_ASM_DEFINE_ABORT(4, "", abort) + : /* gcc asm goto does not allow outputs */ + : [cpu_id] "r" (cpu), + [rseq_offset] "r" (rseq_offset), + /* try store input */ + [v2] "m" (*v2), + [newv2] "r" (newv2), + /* final store input */ + [v] "m" (*v), + [expect] "m" (expect), + [newv] "r" (newv) + : "memory", "cc", "eax" + RSEQ_INJECT_CLOBBER + : abort, cmpfail +#ifdef RSEQ_COMPARE_TWICE + , error1, error2 +#endif + ); + rseq_after_asm_goto(); + return 0; +abort: + rseq_after_asm_goto(); + RSEQ_INJECT_FAILED + return -1; +cmpfail: + rseq_after_asm_goto(); + return 1; +#ifdef RSEQ_COMPARE_TWICE +error1: + rseq_after_asm_goto(); + rseq_bug("cpu_id comparison failed"); +error2: + rseq_after_asm_goto(); + rseq_bug("expected value comparison failed"); +#endif + +} + +/* TODO: implement a faster memcpy. */ +static inline __attribute__((always_inline)) +int RSEQ_TEMPLATE_IDENTIFIER(rseq_cmpeqv_trymemcpy_storev)(intptr_t *v, intptr_t expect, + void *dst, void *src, size_t len, + intptr_t newv, int cpu) +{ + uint32_t rseq_scratch[3]; + + RSEQ_INJECT_C(9) + + __asm__ __volatile__ goto ( + RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */ + RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail]) +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1]) + RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2]) +#endif + "movl %[src], %[rseq_scratch0]\n\t" + "movl %[dst], %[rseq_scratch1]\n\t" + "movl %[len], %[rseq_scratch2]\n\t" + /* Start rseq by storing table entry pointer into rseq_cs. */ + RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_ASM_TP_SEGMENT:RSEQ_CS_OFFSET(%[rseq_offset])) + RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_TEMPLATE_CPU_ID_OFFSET(%[rseq_offset]), 4f) + RSEQ_INJECT_ASM(3) + "movl %[expect], %%eax\n\t" + "cmpl %%eax, %[v]\n\t" + "jnz 5f\n\t" + RSEQ_INJECT_ASM(4) +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_TEMPLATE_CPU_ID_OFFSET(%[rseq_offset]), 6f) + "movl %[expect], %%eax\n\t" + "cmpl %%eax, %[v]\n\t" + "jnz 7f\n\t" +#endif + /* try memcpy */ + "test %[len], %[len]\n\t" \ + "jz 333f\n\t" \ + "222:\n\t" \ + "movb (%[src]), %%al\n\t" \ + "movb %%al, (%[dst])\n\t" \ + "inc %[src]\n\t" \ + "inc %[dst]\n\t" \ + "dec %[len]\n\t" \ + "jnz 222b\n\t" \ + "333:\n\t" \ + RSEQ_INJECT_ASM(5) +#ifdef RSEQ_TEMPLATE_MO_RELEASE + "lock; addl $0,-128(%%esp)\n\t" +#endif + "movl %[newv], %%eax\n\t" + /* final store */ + "movl %%eax, %[v]\n\t" + "2:\n\t" + RSEQ_INJECT_ASM(6) + /* teardown */ + "movl %[rseq_scratch2], %[len]\n\t" + "movl %[rseq_scratch1], %[dst]\n\t" + "movl %[rseq_scratch0], %[src]\n\t" + RSEQ_ASM_DEFINE_ABORT(4, + "movl %[rseq_scratch2], %[len]\n\t" + "movl %[rseq_scratch1], %[dst]\n\t" + "movl %[rseq_scratch0], %[src]\n\t", + abort) + RSEQ_ASM_DEFINE_CMPFAIL(5, + "movl %[rseq_scratch2], %[len]\n\t" + "movl %[rseq_scratch1], %[dst]\n\t" + "movl %[rseq_scratch0], %[src]\n\t", + cmpfail) +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_DEFINE_CMPFAIL(6, + "movl %[rseq_scratch2], %[len]\n\t" + "movl %[rseq_scratch1], %[dst]\n\t" + "movl %[rseq_scratch0], %[src]\n\t", + error1) + RSEQ_ASM_DEFINE_CMPFAIL(7, + "movl %[rseq_scratch2], %[len]\n\t" + "movl %[rseq_scratch1], %[dst]\n\t" + "movl %[rseq_scratch0], %[src]\n\t", + error2) +#endif + : /* gcc asm goto does not allow outputs */ + : [cpu_id] "r" (cpu), + [rseq_offset] "r" (rseq_offset), + /* final store input */ + [v] "m" (*v), + [expect] "m" (expect), + [newv] "m" (newv), + /* try memcpy input */ + [dst] "r" (dst), + [src] "r" (src), + [len] "r" (len), + [rseq_scratch0] "m" (rseq_scratch[0]), + [rseq_scratch1] "m" (rseq_scratch[1]), + [rseq_scratch2] "m" (rseq_scratch[2]) + : "memory", "cc", "eax" + RSEQ_INJECT_CLOBBER + : abort, cmpfail +#ifdef RSEQ_COMPARE_TWICE + , error1, error2 +#endif + ); + rseq_after_asm_goto(); + return 0; +abort: + rseq_after_asm_goto(); + RSEQ_INJECT_FAILED + return -1; +cmpfail: + rseq_after_asm_goto(); + return 1; +#ifdef RSEQ_COMPARE_TWICE +error1: + rseq_after_asm_goto(); + rseq_bug("cpu_id comparison failed"); +error2: + rseq_after_asm_goto(); + rseq_bug("expected value comparison failed"); +#endif +} + +#endif /* #if (defined(RSEQ_TEMPLATE_MO_RELAXED) || defined(RSEQ_TEMPLATE_MO_RELEASE)) && + (defined(RSEQ_TEMPLATE_CPU_ID) || defined(RSEQ_TEMPLATE_MM_CID)) */ + +#endif + +#include "rseq-bits-reset.h" diff --git a/tools/testing/selftests/rseq/rseq-x86.h b/tools/testing/selftests/rseq/rseq-x86.h index bd01dc41ca13..fb65ef54b0fb 100644 --- a/tools/testing/selftests/rseq/rseq-x86.h +++ b/tools/testing/selftests/rseq/rseq-x86.h @@ -2,9 +2,13 @@ /* * rseq-x86.h * - * (C) Copyright 2016-2018 - Mathieu Desnoyers <mathieu.desnoyers@efficios.com> + * (C) Copyright 2016-2022 - Mathieu Desnoyers <mathieu.desnoyers@efficios.com> */ +#ifndef RSEQ_H +#error "Never use <rseq-x86.h> directly; include <rseq.h> instead." +#endif + #include <stdint.h> /* @@ -22,9 +26,10 @@ * address through a "r" input operand. */ -/* Offset of cpu_id and rseq_cs fields in struct rseq. */ +/* Offset of cpu_id, rseq_cs, and mm_cid fields in struct rseq. */ #define RSEQ_CPU_ID_OFFSET 4 #define RSEQ_CS_OFFSET 8 +#define RSEQ_MM_CID_OFFSET 24 #ifdef __x86_64__ @@ -50,10 +55,6 @@ do { \ RSEQ_WRITE_ONCE(*p, v); \ } while (0) -#ifdef RSEQ_SKIP_FASTPATH -#include "rseq-skip.h" -#else /* !RSEQ_SKIP_FASTPATH */ - #define __RSEQ_ASM_DEFINE_TABLE(label, version, flags, \ start_ip, post_commit_offset, abort_ip) \ ".pushsection __rseq_cs, \"aw\"\n\t" \ @@ -112,525 +113,6 @@ do { \ "jmp %l[" __rseq_str(cmpfail_label) "]\n\t" \ ".popsection\n\t" -static inline __attribute__((always_inline)) -int rseq_cmpeqv_storev(intptr_t *v, intptr_t expect, intptr_t newv, int cpu) -{ - RSEQ_INJECT_C(9) - - __asm__ __volatile__ goto ( - RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */ - RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail]) -#ifdef RSEQ_COMPARE_TWICE - RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1]) - RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2]) -#endif - /* Start rseq by storing table entry pointer into rseq_cs. */ - RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_ASM_TP_SEGMENT:RSEQ_CS_OFFSET(%[rseq_offset])) - RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_CPU_ID_OFFSET(%[rseq_offset]), 4f) - RSEQ_INJECT_ASM(3) - "cmpq %[v], %[expect]\n\t" - "jnz %l[cmpfail]\n\t" - RSEQ_INJECT_ASM(4) -#ifdef RSEQ_COMPARE_TWICE - RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_CPU_ID_OFFSET(%[rseq_offset]), %l[error1]) - "cmpq %[v], %[expect]\n\t" - "jnz %l[error2]\n\t" -#endif - /* final store */ - "movq %[newv], %[v]\n\t" - "2:\n\t" - RSEQ_INJECT_ASM(5) - RSEQ_ASM_DEFINE_ABORT(4, "", abort) - : /* gcc asm goto does not allow outputs */ - : [cpu_id] "r" (cpu), - [rseq_offset] "r" (rseq_offset), - [v] "m" (*v), - [expect] "r" (expect), - [newv] "r" (newv) - : "memory", "cc", "rax" - RSEQ_INJECT_CLOBBER - : abort, cmpfail -#ifdef RSEQ_COMPARE_TWICE - , error1, error2 -#endif - ); - rseq_after_asm_goto(); - return 0; -abort: - rseq_after_asm_goto(); - RSEQ_INJECT_FAILED - return -1; -cmpfail: - rseq_after_asm_goto(); - return 1; -#ifdef RSEQ_COMPARE_TWICE -error1: - rseq_after_asm_goto(); - rseq_bug("cpu_id comparison failed"); -error2: - rseq_after_asm_goto(); - rseq_bug("expected value comparison failed"); -#endif -} - -/* - * Compare @v against @expectnot. When it does _not_ match, load @v - * into @load, and store the content of *@v + voffp into @v. - */ -static inline __attribute__((always_inline)) -int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot, - long voffp, intptr_t *load, int cpu) -{ - RSEQ_INJECT_C(9) - - __asm__ __volatile__ goto ( - RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */ - RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail]) -#ifdef RSEQ_COMPARE_TWICE - RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1]) - RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2]) -#endif - /* Start rseq by storing table entry pointer into rseq_cs. */ - RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_ASM_TP_SEGMENT:RSEQ_CS_OFFSET(%[rseq_offset])) - RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_CPU_ID_OFFSET(%[rseq_offset]), 4f) - RSEQ_INJECT_ASM(3) - "movq %[v], %%rbx\n\t" - "cmpq %%rbx, %[expectnot]\n\t" - "je %l[cmpfail]\n\t" - RSEQ_INJECT_ASM(4) -#ifdef RSEQ_COMPARE_TWICE - RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_CPU_ID_OFFSET(%[rseq_offset]), %l[error1]) - "movq %[v], %%rbx\n\t" - "cmpq %%rbx, %[expectnot]\n\t" - "je %l[error2]\n\t" -#endif - "movq %%rbx, %[load]\n\t" - "addq %[voffp], %%rbx\n\t" - "movq (%%rbx), %%rbx\n\t" - /* final store */ - "movq %%rbx, %[v]\n\t" - "2:\n\t" - RSEQ_INJECT_ASM(5) - RSEQ_ASM_DEFINE_ABORT(4, "", abort) - : /* gcc asm goto does not allow outputs */ - : [cpu_id] "r" (cpu), - [rseq_offset] "r" (rseq_offset), - /* final store input */ - [v] "m" (*v), - [expectnot] "r" (expectnot), - [voffp] "er" (voffp), - [load] "m" (*load) - : "memory", "cc", "rax", "rbx" - RSEQ_INJECT_CLOBBER - : abort, cmpfail -#ifdef RSEQ_COMPARE_TWICE - , error1, error2 -#endif - ); - rseq_after_asm_goto(); - return 0; -abort: - rseq_after_asm_goto(); - RSEQ_INJECT_FAILED - return -1; -cmpfail: - rseq_after_asm_goto(); - return 1; -#ifdef RSEQ_COMPARE_TWICE -error1: - rseq_after_asm_goto(); - rseq_bug("cpu_id comparison failed"); -error2: - rseq_after_asm_goto(); - rseq_bug("expected value comparison failed"); -#endif -} - -static inline __attribute__((always_inline)) -int rseq_addv(intptr_t *v, intptr_t count, int cpu) -{ - RSEQ_INJECT_C(9) - - __asm__ __volatile__ goto ( - RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */ -#ifdef RSEQ_COMPARE_TWICE - RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1]) -#endif - /* Start rseq by storing table entry pointer into rseq_cs. */ - RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_ASM_TP_SEGMENT:RSEQ_CS_OFFSET(%[rseq_offset])) - RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_CPU_ID_OFFSET(%[rseq_offset]), 4f) - RSEQ_INJECT_ASM(3) -#ifdef RSEQ_COMPARE_TWICE - RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_CPU_ID_OFFSET(%[rseq_offset]), %l[error1]) -#endif - /* final store */ - "addq %[count], %[v]\n\t" - "2:\n\t" - RSEQ_INJECT_ASM(4) - RSEQ_ASM_DEFINE_ABORT(4, "", abort) - : /* gcc asm goto does not allow outputs */ - : [cpu_id] "r" (cpu), - [rseq_offset] "r" (rseq_offset), - /* final store input */ - [v] "m" (*v), - [count] "er" (count) - : "memory", "cc", "rax" - RSEQ_INJECT_CLOBBER - : abort -#ifdef RSEQ_COMPARE_TWICE - , error1 -#endif - ); - rseq_after_asm_goto(); - return 0; -abort: - rseq_after_asm_goto(); - RSEQ_INJECT_FAILED - return -1; -#ifdef RSEQ_COMPARE_TWICE -error1: - rseq_after_asm_goto(); - rseq_bug("cpu_id comparison failed"); -#endif -} - -#define RSEQ_ARCH_HAS_OFFSET_DEREF_ADDV - -/* - * pval = *(ptr+off) - * *pval += inc; - */ -static inline __attribute__((always_inline)) -int rseq_offset_deref_addv(intptr_t *ptr, long off, intptr_t inc, int cpu) -{ - RSEQ_INJECT_C(9) - - __asm__ __volatile__ goto ( - RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */ -#ifdef RSEQ_COMPARE_TWICE - RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1]) -#endif - /* Start rseq by storing table entry pointer into rseq_cs. */ - RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_ASM_TP_SEGMENT:RSEQ_CS_OFFSET(%[rseq_offset])) - RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_CPU_ID_OFFSET(%[rseq_offset]), 4f) - RSEQ_INJECT_ASM(3) -#ifdef RSEQ_COMPARE_TWICE - RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_CPU_ID_OFFSET(%[rseq_offset]), %l[error1]) -#endif - /* get p+v */ - "movq %[ptr], %%rbx\n\t" - "addq %[off], %%rbx\n\t" - /* get pv */ - "movq (%%rbx), %%rcx\n\t" - /* *pv += inc */ - "addq %[inc], (%%rcx)\n\t" - "2:\n\t" - RSEQ_INJECT_ASM(4) - RSEQ_ASM_DEFINE_ABORT(4, "", abort) - : /* gcc asm goto does not allow outputs */ - : [cpu_id] "r" (cpu), - [rseq_offset] "r" (rseq_offset), - /* final store input */ - [ptr] "m" (*ptr), - [off] "er" (off), - [inc] "er" (inc) - : "memory", "cc", "rax", "rbx", "rcx" - RSEQ_INJECT_CLOBBER - : abort -#ifdef RSEQ_COMPARE_TWICE - , error1 -#endif - ); - return 0; -abort: - RSEQ_INJECT_FAILED - return -1; -#ifdef RSEQ_COMPARE_TWICE -error1: - rseq_bug("cpu_id comparison failed"); -#endif -} - -static inline __attribute__((always_inline)) -int rseq_cmpeqv_trystorev_storev(intptr_t *v, intptr_t expect, - intptr_t *v2, intptr_t newv2, - intptr_t newv, int cpu) -{ - RSEQ_INJECT_C(9) - - __asm__ __volatile__ goto ( - RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */ - RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail]) -#ifdef RSEQ_COMPARE_TWICE - RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1]) - RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2]) -#endif - /* Start rseq by storing table entry pointer into rseq_cs. */ - RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_ASM_TP_SEGMENT:RSEQ_CS_OFFSET(%[rseq_offset])) - RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_CPU_ID_OFFSET(%[rseq_offset]), 4f) - RSEQ_INJECT_ASM(3) - "cmpq %[v], %[expect]\n\t" - "jnz %l[cmpfail]\n\t" - RSEQ_INJECT_ASM(4) -#ifdef RSEQ_COMPARE_TWICE - RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_CPU_ID_OFFSET(%[rseq_offset]), %l[error1]) - "cmpq %[v], %[expect]\n\t" - "jnz %l[error2]\n\t" -#endif - /* try store */ - "movq %[newv2], %[v2]\n\t" - RSEQ_INJECT_ASM(5) - /* final store */ - "movq %[newv], %[v]\n\t" - "2:\n\t" - RSEQ_INJECT_ASM(6) - RSEQ_ASM_DEFINE_ABORT(4, "", abort) - : /* gcc asm goto does not allow outputs */ - : [cpu_id] "r" (cpu), - [rseq_offset] "r" (rseq_offset), - /* try store input */ - [v2] "m" (*v2), - [newv2] "r" (newv2), - /* final store input */ - [v] "m" (*v), - [expect] "r" (expect), - [newv] "r" (newv) - : "memory", "cc", "rax" - RSEQ_INJECT_CLOBBER - : abort, cmpfail -#ifdef RSEQ_COMPARE_TWICE - , error1, error2 -#endif - ); - rseq_after_asm_goto(); - return 0; -abort: - rseq_after_asm_goto(); - RSEQ_INJECT_FAILED - return -1; -cmpfail: - rseq_after_asm_goto(); - return 1; -#ifdef RSEQ_COMPARE_TWICE -error1: - rseq_after_asm_goto(); - rseq_bug("cpu_id comparison failed"); -error2: - rseq_after_asm_goto(); - rseq_bug("expected value comparison failed"); -#endif -} - -/* x86-64 is TSO. */ -static inline __attribute__((always_inline)) -int rseq_cmpeqv_trystorev_storev_release(intptr_t *v, intptr_t expect, - intptr_t *v2, intptr_t newv2, - intptr_t newv, int cpu) -{ - return rseq_cmpeqv_trystorev_storev(v, expect, v2, newv2, newv, cpu); -} - -static inline __attribute__((always_inline)) -int rseq_cmpeqv_cmpeqv_storev(intptr_t *v, intptr_t expect, - intptr_t *v2, intptr_t expect2, - intptr_t newv, int cpu) -{ - RSEQ_INJECT_C(9) - - __asm__ __volatile__ goto ( - RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */ - RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail]) -#ifdef RSEQ_COMPARE_TWICE - RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1]) - RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2]) - RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error3]) -#endif - /* Start rseq by storing table entry pointer into rseq_cs. */ - RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_ASM_TP_SEGMENT:RSEQ_CS_OFFSET(%[rseq_offset])) - RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_CPU_ID_OFFSET(%[rseq_offset]), 4f) - RSEQ_INJECT_ASM(3) - "cmpq %[v], %[expect]\n\t" - "jnz %l[cmpfail]\n\t" - RSEQ_INJECT_ASM(4) - "cmpq %[v2], %[expect2]\n\t" - "jnz %l[cmpfail]\n\t" - RSEQ_INJECT_ASM(5) -#ifdef RSEQ_COMPARE_TWICE - RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_CPU_ID_OFFSET(%[rseq_offset]), %l[error1]) - "cmpq %[v], %[expect]\n\t" - "jnz %l[error2]\n\t" - "cmpq %[v2], %[expect2]\n\t" - "jnz %l[error3]\n\t" -#endif - /* final store */ - "movq %[newv], %[v]\n\t" - "2:\n\t" - RSEQ_INJECT_ASM(6) - RSEQ_ASM_DEFINE_ABORT(4, "", abort) - : /* gcc asm goto does not allow outputs */ - : [cpu_id] "r" (cpu), - [rseq_offset] "r" (rseq_offset), - /* cmp2 input */ - [v2] "m" (*v2), - [expect2] "r" (expect2), - /* final store input */ - [v] "m" (*v), - [expect] "r" (expect), - [newv] "r" (newv) - : "memory", "cc", "rax" - RSEQ_INJECT_CLOBBER - : abort, cmpfail -#ifdef RSEQ_COMPARE_TWICE - , error1, error2, error3 -#endif - ); - rseq_after_asm_goto(); - return 0; -abort: - rseq_after_asm_goto(); - RSEQ_INJECT_FAILED - return -1; -cmpfail: - rseq_after_asm_goto(); - return 1; -#ifdef RSEQ_COMPARE_TWICE -error1: - rseq_after_asm_goto(); - rseq_bug("cpu_id comparison failed"); -error2: - rseq_after_asm_goto(); - rseq_bug("1st expected value comparison failed"); -error3: - rseq_after_asm_goto(); - rseq_bug("2nd expected value comparison failed"); -#endif -} - -static inline __attribute__((always_inline)) -int rseq_cmpeqv_trymemcpy_storev(intptr_t *v, intptr_t expect, - void *dst, void *src, size_t len, - intptr_t newv, int cpu) -{ - uint64_t rseq_scratch[3]; - - RSEQ_INJECT_C(9) - - __asm__ __volatile__ goto ( - RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */ - RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail]) -#ifdef RSEQ_COMPARE_TWICE - RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1]) - RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2]) -#endif - "movq %[src], %[rseq_scratch0]\n\t" - "movq %[dst], %[rseq_scratch1]\n\t" - "movq %[len], %[rseq_scratch2]\n\t" - /* Start rseq by storing table entry pointer into rseq_cs. */ - RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_ASM_TP_SEGMENT:RSEQ_CS_OFFSET(%[rseq_offset])) - RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_CPU_ID_OFFSET(%[rseq_offset]), 4f) - RSEQ_INJECT_ASM(3) - "cmpq %[v], %[expect]\n\t" - "jnz 5f\n\t" - RSEQ_INJECT_ASM(4) -#ifdef RSEQ_COMPARE_TWICE - RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_CPU_ID_OFFSET(%[rseq_offset]), 6f) - "cmpq %[v], %[expect]\n\t" - "jnz 7f\n\t" -#endif - /* try memcpy */ - "test %[len], %[len]\n\t" \ - "jz 333f\n\t" \ - "222:\n\t" \ - "movb (%[src]), %%al\n\t" \ - "movb %%al, (%[dst])\n\t" \ - "inc %[src]\n\t" \ - "inc %[dst]\n\t" \ - "dec %[len]\n\t" \ - "jnz 222b\n\t" \ - "333:\n\t" \ - RSEQ_INJECT_ASM(5) - /* final store */ - "movq %[newv], %[v]\n\t" - "2:\n\t" - RSEQ_INJECT_ASM(6) - /* teardown */ - "movq %[rseq_scratch2], %[len]\n\t" - "movq %[rseq_scratch1], %[dst]\n\t" - "movq %[rseq_scratch0], %[src]\n\t" - RSEQ_ASM_DEFINE_ABORT(4, - "movq %[rseq_scratch2], %[len]\n\t" - "movq %[rseq_scratch1], %[dst]\n\t" - "movq %[rseq_scratch0], %[src]\n\t", - abort) - RSEQ_ASM_DEFINE_CMPFAIL(5, - "movq %[rseq_scratch2], %[len]\n\t" - "movq %[rseq_scratch1], %[dst]\n\t" - "movq %[rseq_scratch0], %[src]\n\t", - cmpfail) -#ifdef RSEQ_COMPARE_TWICE - RSEQ_ASM_DEFINE_CMPFAIL(6, - "movq %[rseq_scratch2], %[len]\n\t" - "movq %[rseq_scratch1], %[dst]\n\t" - "movq %[rseq_scratch0], %[src]\n\t", - error1) - RSEQ_ASM_DEFINE_CMPFAIL(7, - "movq %[rseq_scratch2], %[len]\n\t" - "movq %[rseq_scratch1], %[dst]\n\t" - "movq %[rseq_scratch0], %[src]\n\t", - error2) -#endif - : /* gcc asm goto does not allow outputs */ - : [cpu_id] "r" (cpu), - [rseq_offset] "r" (rseq_offset), - /* final store input */ - [v] "m" (*v), - [expect] "r" (expect), - [newv] "r" (newv), - /* try memcpy input */ - [dst] "r" (dst), - [src] "r" (src), - [len] "r" (len), - [rseq_scratch0] "m" (rseq_scratch[0]), - [rseq_scratch1] "m" (rseq_scratch[1]), - [rseq_scratch2] "m" (rseq_scratch[2]) - : "memory", "cc", "rax" - RSEQ_INJECT_CLOBBER - : abort, cmpfail -#ifdef RSEQ_COMPARE_TWICE - , error1, error2 -#endif - ); - rseq_after_asm_goto(); - return 0; -abort: - rseq_after_asm_goto(); - RSEQ_INJECT_FAILED - return -1; -cmpfail: - rseq_after_asm_goto(); - return 1; -#ifdef RSEQ_COMPARE_TWICE -error1: - rseq_after_asm_goto(); - rseq_bug("cpu_id comparison failed"); -error2: - rseq_after_asm_goto(); - rseq_bug("expected value comparison failed"); -#endif -} - -/* x86-64 is TSO. */ -static inline __attribute__((always_inline)) -int rseq_cmpeqv_trymemcpy_storev_release(intptr_t *v, intptr_t expect, - void *dst, void *src, size_t len, - intptr_t newv, int cpu) -{ - return rseq_cmpeqv_trymemcpy_storev(v, expect, dst, src, len, - newv, cpu); -} - -#endif /* !RSEQ_SKIP_FASTPATH */ - #elif defined(__i386__) #define RSEQ_ASM_TP_SEGMENT %%gs @@ -657,10 +139,6 @@ do { \ RSEQ_WRITE_ONCE(*p, v); \ } while (0) -#ifdef RSEQ_SKIP_FASTPATH -#include "rseq-skip.h" -#else /* !RSEQ_SKIP_FASTPATH */ - /* * Use eax as scratch register and take memory operands as input to * lessen register pressure. Especially needed when compiling in O0. @@ -721,645 +199,36 @@ do { \ "jmp %l[" __rseq_str(cmpfail_label) "]\n\t" \ ".popsection\n\t" -static inline __attribute__((always_inline)) -int rseq_cmpeqv_storev(intptr_t *v, intptr_t expect, intptr_t newv, int cpu) -{ - RSEQ_INJECT_C(9) - - __asm__ __volatile__ goto ( - RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */ - RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail]) -#ifdef RSEQ_COMPARE_TWICE - RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1]) - RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2]) -#endif - /* Start rseq by storing table entry pointer into rseq_cs. */ - RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_ASM_TP_SEGMENT:RSEQ_CS_OFFSET(%[rseq_offset])) - RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_CPU_ID_OFFSET(%[rseq_offset]), 4f) - RSEQ_INJECT_ASM(3) - "cmpl %[v], %[expect]\n\t" - "jnz %l[cmpfail]\n\t" - RSEQ_INJECT_ASM(4) -#ifdef RSEQ_COMPARE_TWICE - RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_CPU_ID_OFFSET(%[rseq_offset]), %l[error1]) - "cmpl %[v], %[expect]\n\t" - "jnz %l[error2]\n\t" -#endif - /* final store */ - "movl %[newv], %[v]\n\t" - "2:\n\t" - RSEQ_INJECT_ASM(5) - RSEQ_ASM_DEFINE_ABORT(4, "", abort) - : /* gcc asm goto does not allow outputs */ - : [cpu_id] "r" (cpu), - [rseq_offset] "r" (rseq_offset), - [v] "m" (*v), - [expect] "r" (expect), - [newv] "r" (newv) - : "memory", "cc", "eax" - RSEQ_INJECT_CLOBBER - : abort, cmpfail -#ifdef RSEQ_COMPARE_TWICE - , error1, error2 -#endif - ); - rseq_after_asm_goto(); - return 0; -abort: - rseq_after_asm_goto(); - RSEQ_INJECT_FAILED - return -1; -cmpfail: - rseq_after_asm_goto(); - return 1; -#ifdef RSEQ_COMPARE_TWICE -error1: - rseq_after_asm_goto(); - rseq_bug("cpu_id comparison failed"); -error2: - rseq_after_asm_goto(); - rseq_bug("expected value comparison failed"); -#endif -} - -/* - * Compare @v against @expectnot. When it does _not_ match, load @v - * into @load, and store the content of *@v + voffp into @v. - */ -static inline __attribute__((always_inline)) -int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot, - long voffp, intptr_t *load, int cpu) -{ - RSEQ_INJECT_C(9) - - __asm__ __volatile__ goto ( - RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */ - RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail]) -#ifdef RSEQ_COMPARE_TWICE - RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1]) - RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2]) -#endif - /* Start rseq by storing table entry pointer into rseq_cs. */ - RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_ASM_TP_SEGMENT:RSEQ_CS_OFFSET(%[rseq_offset])) - RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_CPU_ID_OFFSET(%[rseq_offset]), 4f) - RSEQ_INJECT_ASM(3) - "movl %[v], %%ebx\n\t" - "cmpl %%ebx, %[expectnot]\n\t" - "je %l[cmpfail]\n\t" - RSEQ_INJECT_ASM(4) -#ifdef RSEQ_COMPARE_TWICE - RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_CPU_ID_OFFSET(%[rseq_offset]), %l[error1]) - "movl %[v], %%ebx\n\t" - "cmpl %%ebx, %[expectnot]\n\t" - "je %l[error2]\n\t" -#endif - "movl %%ebx, %[load]\n\t" - "addl %[voffp], %%ebx\n\t" - "movl (%%ebx), %%ebx\n\t" - /* final store */ - "movl %%ebx, %[v]\n\t" - "2:\n\t" - RSEQ_INJECT_ASM(5) - RSEQ_ASM_DEFINE_ABORT(4, "", abort) - : /* gcc asm goto does not allow outputs */ - : [cpu_id] "r" (cpu), - [rseq_offset] "r" (rseq_offset), - /* final store input */ - [v] "m" (*v), - [expectnot] "r" (expectnot), - [voffp] "ir" (voffp), - [load] "m" (*load) - : "memory", "cc", "eax", "ebx" - RSEQ_INJECT_CLOBBER - : abort, cmpfail -#ifdef RSEQ_COMPARE_TWICE - , error1, error2 -#endif - ); - rseq_after_asm_goto(); - return 0; -abort: - rseq_after_asm_goto(); - RSEQ_INJECT_FAILED - return -1; -cmpfail: - rseq_after_asm_goto(); - return 1; -#ifdef RSEQ_COMPARE_TWICE -error1: - rseq_after_asm_goto(); - rseq_bug("cpu_id comparison failed"); -error2: - rseq_after_asm_goto(); - rseq_bug("expected value comparison failed"); -#endif -} - -static inline __attribute__((always_inline)) -int rseq_addv(intptr_t *v, intptr_t count, int cpu) -{ - RSEQ_INJECT_C(9) - - __asm__ __volatile__ goto ( - RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */ -#ifdef RSEQ_COMPARE_TWICE - RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1]) -#endif - /* Start rseq by storing table entry pointer into rseq_cs. */ - RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_ASM_TP_SEGMENT:RSEQ_CS_OFFSET(%[rseq_offset])) - RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_CPU_ID_OFFSET(%[rseq_offset]), 4f) - RSEQ_INJECT_ASM(3) -#ifdef RSEQ_COMPARE_TWICE - RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_CPU_ID_OFFSET(%[rseq_offset]), %l[error1]) -#endif - /* final store */ - "addl %[count], %[v]\n\t" - "2:\n\t" - RSEQ_INJECT_ASM(4) - RSEQ_ASM_DEFINE_ABORT(4, "", abort) - : /* gcc asm goto does not allow outputs */ - : [cpu_id] "r" (cpu), - [rseq_offset] "r" (rseq_offset), - /* final store input */ - [v] "m" (*v), - [count] "ir" (count) - : "memory", "cc", "eax" - RSEQ_INJECT_CLOBBER - : abort -#ifdef RSEQ_COMPARE_TWICE - , error1 -#endif - ); - rseq_after_asm_goto(); - return 0; -abort: - rseq_after_asm_goto(); - RSEQ_INJECT_FAILED - return -1; -#ifdef RSEQ_COMPARE_TWICE -error1: - rseq_after_asm_goto(); - rseq_bug("cpu_id comparison failed"); -#endif -} - -static inline __attribute__((always_inline)) -int rseq_cmpeqv_trystorev_storev(intptr_t *v, intptr_t expect, - intptr_t *v2, intptr_t newv2, - intptr_t newv, int cpu) -{ - RSEQ_INJECT_C(9) - - __asm__ __volatile__ goto ( - RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */ - RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail]) -#ifdef RSEQ_COMPARE_TWICE - RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1]) - RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2]) -#endif - /* Start rseq by storing table entry pointer into rseq_cs. */ - RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_ASM_TP_SEGMENT:RSEQ_CS_OFFSET(%[rseq_offset])) - RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_CPU_ID_OFFSET(%[rseq_offset]), 4f) - RSEQ_INJECT_ASM(3) - "cmpl %[v], %[expect]\n\t" - "jnz %l[cmpfail]\n\t" - RSEQ_INJECT_ASM(4) -#ifdef RSEQ_COMPARE_TWICE - RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_CPU_ID_OFFSET(%[rseq_offset]), %l[error1]) - "cmpl %[v], %[expect]\n\t" - "jnz %l[error2]\n\t" -#endif - /* try store */ - "movl %[newv2], %%eax\n\t" - "movl %%eax, %[v2]\n\t" - RSEQ_INJECT_ASM(5) - /* final store */ - "movl %[newv], %[v]\n\t" - "2:\n\t" - RSEQ_INJECT_ASM(6) - RSEQ_ASM_DEFINE_ABORT(4, "", abort) - : /* gcc asm goto does not allow outputs */ - : [cpu_id] "r" (cpu), - [rseq_offset] "r" (rseq_offset), - /* try store input */ - [v2] "m" (*v2), - [newv2] "m" (newv2), - /* final store input */ - [v] "m" (*v), - [expect] "r" (expect), - [newv] "r" (newv) - : "memory", "cc", "eax" - RSEQ_INJECT_CLOBBER - : abort, cmpfail -#ifdef RSEQ_COMPARE_TWICE - , error1, error2 -#endif - ); - rseq_after_asm_goto(); - return 0; -abort: - rseq_after_asm_goto(); - RSEQ_INJECT_FAILED - return -1; -cmpfail: - rseq_after_asm_goto(); - return 1; -#ifdef RSEQ_COMPARE_TWICE -error1: - rseq_after_asm_goto(); - rseq_bug("cpu_id comparison failed"); -error2: - rseq_after_asm_goto(); - rseq_bug("expected value comparison failed"); -#endif -} - -static inline __attribute__((always_inline)) -int rseq_cmpeqv_trystorev_storev_release(intptr_t *v, intptr_t expect, - intptr_t *v2, intptr_t newv2, - intptr_t newv, int cpu) -{ - RSEQ_INJECT_C(9) - - __asm__ __volatile__ goto ( - RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */ - RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail]) -#ifdef RSEQ_COMPARE_TWICE - RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1]) - RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2]) -#endif - /* Start rseq by storing table entry pointer into rseq_cs. */ - RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_ASM_TP_SEGMENT:RSEQ_CS_OFFSET(%[rseq_offset])) - RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_CPU_ID_OFFSET(%[rseq_offset]), 4f) - RSEQ_INJECT_ASM(3) - "movl %[expect], %%eax\n\t" - "cmpl %[v], %%eax\n\t" - "jnz %l[cmpfail]\n\t" - RSEQ_INJECT_ASM(4) -#ifdef RSEQ_COMPARE_TWICE - RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_CPU_ID_OFFSET(%[rseq_offset]), %l[error1]) - "movl %[expect], %%eax\n\t" - "cmpl %[v], %%eax\n\t" - "jnz %l[error2]\n\t" -#endif - /* try store */ - "movl %[newv2], %[v2]\n\t" - RSEQ_INJECT_ASM(5) - "lock; addl $0,-128(%%esp)\n\t" - /* final store */ - "movl %[newv], %[v]\n\t" - "2:\n\t" - RSEQ_INJECT_ASM(6) - RSEQ_ASM_DEFINE_ABORT(4, "", abort) - : /* gcc asm goto does not allow outputs */ - : [cpu_id] "r" (cpu), - [rseq_offset] "r" (rseq_offset), - /* try store input */ - [v2] "m" (*v2), - [newv2] "r" (newv2), - /* final store input */ - [v] "m" (*v), - [expect] "m" (expect), - [newv] "r" (newv) - : "memory", "cc", "eax" - RSEQ_INJECT_CLOBBER - : abort, cmpfail -#ifdef RSEQ_COMPARE_TWICE - , error1, error2 -#endif - ); - rseq_after_asm_goto(); - return 0; -abort: - rseq_after_asm_goto(); - RSEQ_INJECT_FAILED - return -1; -cmpfail: - rseq_after_asm_goto(); - return 1; -#ifdef RSEQ_COMPARE_TWICE -error1: - rseq_after_asm_goto(); - rseq_bug("cpu_id comparison failed"); -error2: - rseq_after_asm_goto(); - rseq_bug("expected value comparison failed"); -#endif - -} - -static inline __attribute__((always_inline)) -int rseq_cmpeqv_cmpeqv_storev(intptr_t *v, intptr_t expect, - intptr_t *v2, intptr_t expect2, - intptr_t newv, int cpu) -{ - RSEQ_INJECT_C(9) - - __asm__ __volatile__ goto ( - RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */ - RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail]) -#ifdef RSEQ_COMPARE_TWICE - RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1]) - RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2]) - RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error3]) -#endif - /* Start rseq by storing table entry pointer into rseq_cs. */ - RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_ASM_TP_SEGMENT:RSEQ_CS_OFFSET(%[rseq_offset])) - RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_CPU_ID_OFFSET(%[rseq_offset]), 4f) - RSEQ_INJECT_ASM(3) - "cmpl %[v], %[expect]\n\t" - "jnz %l[cmpfail]\n\t" - RSEQ_INJECT_ASM(4) - "cmpl %[expect2], %[v2]\n\t" - "jnz %l[cmpfail]\n\t" - RSEQ_INJECT_ASM(5) -#ifdef RSEQ_COMPARE_TWICE - RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_CPU_ID_OFFSET(%[rseq_offset]), %l[error1]) - "cmpl %[v], %[expect]\n\t" - "jnz %l[error2]\n\t" - "cmpl %[expect2], %[v2]\n\t" - "jnz %l[error3]\n\t" #endif - "movl %[newv], %%eax\n\t" - /* final store */ - "movl %%eax, %[v]\n\t" - "2:\n\t" - RSEQ_INJECT_ASM(6) - RSEQ_ASM_DEFINE_ABORT(4, "", abort) - : /* gcc asm goto does not allow outputs */ - : [cpu_id] "r" (cpu), - [rseq_offset] "r" (rseq_offset), - /* cmp2 input */ - [v2] "m" (*v2), - [expect2] "r" (expect2), - /* final store input */ - [v] "m" (*v), - [expect] "r" (expect), - [newv] "m" (newv) - : "memory", "cc", "eax" - RSEQ_INJECT_CLOBBER - : abort, cmpfail -#ifdef RSEQ_COMPARE_TWICE - , error1, error2, error3 -#endif - ); - rseq_after_asm_goto(); - return 0; -abort: - rseq_after_asm_goto(); - RSEQ_INJECT_FAILED - return -1; -cmpfail: - rseq_after_asm_goto(); - return 1; -#ifdef RSEQ_COMPARE_TWICE -error1: - rseq_after_asm_goto(); - rseq_bug("cpu_id comparison failed"); -error2: - rseq_after_asm_goto(); - rseq_bug("1st expected value comparison failed"); -error3: - rseq_after_asm_goto(); - rseq_bug("2nd expected value comparison failed"); -#endif -} -/* TODO: implement a faster memcpy. */ -static inline __attribute__((always_inline)) -int rseq_cmpeqv_trymemcpy_storev(intptr_t *v, intptr_t expect, - void *dst, void *src, size_t len, - intptr_t newv, int cpu) -{ - uint32_t rseq_scratch[3]; +/* Per-cpu-id indexing. */ - RSEQ_INJECT_C(9) +#define RSEQ_TEMPLATE_CPU_ID +#define RSEQ_TEMPLATE_MO_RELAXED +#include "rseq-x86-bits.h" +#undef RSEQ_TEMPLATE_MO_RELAXED - __asm__ __volatile__ goto ( - RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */ - RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail]) -#ifdef RSEQ_COMPARE_TWICE - RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1]) - RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2]) -#endif - "movl %[src], %[rseq_scratch0]\n\t" - "movl %[dst], %[rseq_scratch1]\n\t" - "movl %[len], %[rseq_scratch2]\n\t" - /* Start rseq by storing table entry pointer into rseq_cs. */ - RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_ASM_TP_SEGMENT:RSEQ_CS_OFFSET(%[rseq_offset])) - RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_CPU_ID_OFFSET(%[rseq_offset]), 4f) - RSEQ_INJECT_ASM(3) - "movl %[expect], %%eax\n\t" - "cmpl %%eax, %[v]\n\t" - "jnz 5f\n\t" - RSEQ_INJECT_ASM(4) -#ifdef RSEQ_COMPARE_TWICE - RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_CPU_ID_OFFSET(%[rseq_offset]), 6f) - "movl %[expect], %%eax\n\t" - "cmpl %%eax, %[v]\n\t" - "jnz 7f\n\t" -#endif - /* try memcpy */ - "test %[len], %[len]\n\t" \ - "jz 333f\n\t" \ - "222:\n\t" \ - "movb (%[src]), %%al\n\t" \ - "movb %%al, (%[dst])\n\t" \ - "inc %[src]\n\t" \ - "inc %[dst]\n\t" \ - "dec %[len]\n\t" \ - "jnz 222b\n\t" \ - "333:\n\t" \ - RSEQ_INJECT_ASM(5) - "movl %[newv], %%eax\n\t" - /* final store */ - "movl %%eax, %[v]\n\t" - "2:\n\t" - RSEQ_INJECT_ASM(6) - /* teardown */ - "movl %[rseq_scratch2], %[len]\n\t" - "movl %[rseq_scratch1], %[dst]\n\t" - "movl %[rseq_scratch0], %[src]\n\t" - RSEQ_ASM_DEFINE_ABORT(4, - "movl %[rseq_scratch2], %[len]\n\t" - "movl %[rseq_scratch1], %[dst]\n\t" - "movl %[rseq_scratch0], %[src]\n\t", - abort) - RSEQ_ASM_DEFINE_CMPFAIL(5, - "movl %[rseq_scratch2], %[len]\n\t" - "movl %[rseq_scratch1], %[dst]\n\t" - "movl %[rseq_scratch0], %[src]\n\t", - cmpfail) -#ifdef RSEQ_COMPARE_TWICE - RSEQ_ASM_DEFINE_CMPFAIL(6, - "movl %[rseq_scratch2], %[len]\n\t" - "movl %[rseq_scratch1], %[dst]\n\t" - "movl %[rseq_scratch0], %[src]\n\t", - error1) - RSEQ_ASM_DEFINE_CMPFAIL(7, - "movl %[rseq_scratch2], %[len]\n\t" - "movl %[rseq_scratch1], %[dst]\n\t" - "movl %[rseq_scratch0], %[src]\n\t", - error2) -#endif - : /* gcc asm goto does not allow outputs */ - : [cpu_id] "r" (cpu), - [rseq_offset] "r" (rseq_offset), - /* final store input */ - [v] "m" (*v), - [expect] "m" (expect), - [newv] "m" (newv), - /* try memcpy input */ - [dst] "r" (dst), - [src] "r" (src), - [len] "r" (len), - [rseq_scratch0] "m" (rseq_scratch[0]), - [rseq_scratch1] "m" (rseq_scratch[1]), - [rseq_scratch2] "m" (rseq_scratch[2]) - : "memory", "cc", "eax" - RSEQ_INJECT_CLOBBER - : abort, cmpfail -#ifdef RSEQ_COMPARE_TWICE - , error1, error2 -#endif - ); - rseq_after_asm_goto(); - return 0; -abort: - rseq_after_asm_goto(); - RSEQ_INJECT_FAILED - return -1; -cmpfail: - rseq_after_asm_goto(); - return 1; -#ifdef RSEQ_COMPARE_TWICE -error1: - rseq_after_asm_goto(); - rseq_bug("cpu_id comparison failed"); -error2: - rseq_after_asm_goto(); - rseq_bug("expected value comparison failed"); -#endif -} +#define RSEQ_TEMPLATE_MO_RELEASE +#include "rseq-x86-bits.h" +#undef RSEQ_TEMPLATE_MO_RELEASE +#undef RSEQ_TEMPLATE_CPU_ID -/* TODO: implement a faster memcpy. */ -static inline __attribute__((always_inline)) -int rseq_cmpeqv_trymemcpy_storev_release(intptr_t *v, intptr_t expect, - void *dst, void *src, size_t len, - intptr_t newv, int cpu) -{ - uint32_t rseq_scratch[3]; +/* Per-mm-cid indexing. */ - RSEQ_INJECT_C(9) +#define RSEQ_TEMPLATE_MM_CID +#define RSEQ_TEMPLATE_MO_RELAXED +#include "rseq-x86-bits.h" +#undef RSEQ_TEMPLATE_MO_RELAXED - __asm__ __volatile__ goto ( - RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */ - RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail]) -#ifdef RSEQ_COMPARE_TWICE - RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1]) - RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2]) -#endif - "movl %[src], %[rseq_scratch0]\n\t" - "movl %[dst], %[rseq_scratch1]\n\t" - "movl %[len], %[rseq_scratch2]\n\t" - /* Start rseq by storing table entry pointer into rseq_cs. */ - RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_ASM_TP_SEGMENT:RSEQ_CS_OFFSET(%[rseq_offset])) - RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_CPU_ID_OFFSET(%[rseq_offset]), 4f) - RSEQ_INJECT_ASM(3) - "movl %[expect], %%eax\n\t" - "cmpl %%eax, %[v]\n\t" - "jnz 5f\n\t" - RSEQ_INJECT_ASM(4) -#ifdef RSEQ_COMPARE_TWICE - RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_CPU_ID_OFFSET(%[rseq_offset]), 6f) - "movl %[expect], %%eax\n\t" - "cmpl %%eax, %[v]\n\t" - "jnz 7f\n\t" -#endif - /* try memcpy */ - "test %[len], %[len]\n\t" \ - "jz 333f\n\t" \ - "222:\n\t" \ - "movb (%[src]), %%al\n\t" \ - "movb %%al, (%[dst])\n\t" \ - "inc %[src]\n\t" \ - "inc %[dst]\n\t" \ - "dec %[len]\n\t" \ - "jnz 222b\n\t" \ - "333:\n\t" \ - RSEQ_INJECT_ASM(5) - "lock; addl $0,-128(%%esp)\n\t" - "movl %[newv], %%eax\n\t" - /* final store */ - "movl %%eax, %[v]\n\t" - "2:\n\t" - RSEQ_INJECT_ASM(6) - /* teardown */ - "movl %[rseq_scratch2], %[len]\n\t" - "movl %[rseq_scratch1], %[dst]\n\t" - "movl %[rseq_scratch0], %[src]\n\t" - RSEQ_ASM_DEFINE_ABORT(4, - "movl %[rseq_scratch2], %[len]\n\t" - "movl %[rseq_scratch1], %[dst]\n\t" - "movl %[rseq_scratch0], %[src]\n\t", - abort) - RSEQ_ASM_DEFINE_CMPFAIL(5, - "movl %[rseq_scratch2], %[len]\n\t" - "movl %[rseq_scratch1], %[dst]\n\t" - "movl %[rseq_scratch0], %[src]\n\t", - cmpfail) -#ifdef RSEQ_COMPARE_TWICE - RSEQ_ASM_DEFINE_CMPFAIL(6, - "movl %[rseq_scratch2], %[len]\n\t" - "movl %[rseq_scratch1], %[dst]\n\t" - "movl %[rseq_scratch0], %[src]\n\t", - error1) - RSEQ_ASM_DEFINE_CMPFAIL(7, - "movl %[rseq_scratch2], %[len]\n\t" - "movl %[rseq_scratch1], %[dst]\n\t" - "movl %[rseq_scratch0], %[src]\n\t", - error2) -#endif - : /* gcc asm goto does not allow outputs */ - : [cpu_id] "r" (cpu), - [rseq_offset] "r" (rseq_offset), - /* final store input */ - [v] "m" (*v), - [expect] "m" (expect), - [newv] "m" (newv), - /* try memcpy input */ - [dst] "r" (dst), - [src] "r" (src), - [len] "r" (len), - [rseq_scratch0] "m" (rseq_scratch[0]), - [rseq_scratch1] "m" (rseq_scratch[1]), - [rseq_scratch2] "m" (rseq_scratch[2]) - : "memory", "cc", "eax" - RSEQ_INJECT_CLOBBER - : abort, cmpfail -#ifdef RSEQ_COMPARE_TWICE - , error1, error2 -#endif - ); - rseq_after_asm_goto(); - return 0; -abort: - rseq_after_asm_goto(); - RSEQ_INJECT_FAILED - return -1; -cmpfail: - rseq_after_asm_goto(); - return 1; -#ifdef RSEQ_COMPARE_TWICE -error1: - rseq_after_asm_goto(); - rseq_bug("cpu_id comparison failed"); -error2: - rseq_after_asm_goto(); - rseq_bug("expected value comparison failed"); -#endif -} +#define RSEQ_TEMPLATE_MO_RELEASE +#include "rseq-x86-bits.h" +#undef RSEQ_TEMPLATE_MO_RELEASE +#undef RSEQ_TEMPLATE_MM_CID -#endif /* !RSEQ_SKIP_FASTPATH */ +/* APIs which are not based on cpu ids. */ -#endif +#define RSEQ_TEMPLATE_CPU_ID_NONE +#define RSEQ_TEMPLATE_MO_RELAXED +#include "rseq-x86-bits.h" +#undef RSEQ_TEMPLATE_MO_RELAXED +#undef RSEQ_TEMPLATE_CPU_ID_NONE diff --git a/tools/testing/selftests/rseq/rseq.c b/tools/testing/selftests/rseq/rseq.c index 4177f9507bbe..4e4aa006004c 100644 --- a/tools/testing/selftests/rseq/rseq.c +++ b/tools/testing/selftests/rseq/rseq.c @@ -28,6 +28,8 @@ #include <limits.h> #include <dlfcn.h> #include <stddef.h> +#include <sys/auxv.h> +#include <linux/auxvec.h> #include "../kselftest.h" #include "rseq.h" @@ -36,20 +38,38 @@ static const ptrdiff_t *libc_rseq_offset_p; static const unsigned int *libc_rseq_size_p; static const unsigned int *libc_rseq_flags_p; -/* Offset from the thread pointer to the rseq area. */ +/* Offset from the thread pointer to the rseq area. */ ptrdiff_t rseq_offset; -/* Size of the registered rseq area. 0 if the registration was - unsuccessful. */ +/* + * Size of the registered rseq area. 0 if the registration was + * unsuccessful. + */ unsigned int rseq_size = -1U; /* Flags used during rseq registration. */ unsigned int rseq_flags; +/* + * rseq feature size supported by the kernel. 0 if the registration was + * unsuccessful. + */ +unsigned int rseq_feature_size = -1U; + static int rseq_ownership; +static int rseq_reg_success; /* At least one rseq registration has succeded. */ + +/* Allocate a large area for the TLS. */ +#define RSEQ_THREAD_AREA_ALLOC_SIZE 1024 + +/* Original struct rseq feature size is 20 bytes. */ +#define ORIG_RSEQ_FEATURE_SIZE 20 + +/* Original struct rseq allocation size is 32 bytes. */ +#define ORIG_RSEQ_ALLOC_SIZE 32 static -__thread struct rseq_abi __rseq_abi __attribute__((tls_model("initial-exec"))) = { +__thread struct rseq_abi __rseq_abi __attribute__((tls_model("initial-exec"), aligned(RSEQ_THREAD_AREA_ALLOC_SIZE))) = { .cpu_id = RSEQ_ABI_CPU_ID_UNINITIALIZED, }; @@ -59,6 +79,11 @@ static int sys_rseq(struct rseq_abi *rseq_abi, uint32_t rseq_len, return syscall(__NR_rseq, rseq_abi, rseq_len, flags, sig); } +static int sys_getcpu(unsigned *cpu, unsigned *node) +{ + return syscall(__NR_getcpu, cpu, node, NULL); +} + int rseq_available(void) { int rc; @@ -84,10 +109,16 @@ int rseq_register_current_thread(void) /* Treat libc's ownership as a successful registration. */ return 0; } - rc = sys_rseq(&__rseq_abi, sizeof(struct rseq_abi), 0, RSEQ_SIG); - if (rc) + rc = sys_rseq(&__rseq_abi, rseq_size, 0, RSEQ_SIG); + if (rc) { + if (RSEQ_READ_ONCE(rseq_reg_success)) { + /* Incoherent success/failure within process. */ + abort(); + } return -1; + } assert(rseq_current_cpu_raw() >= 0); + RSEQ_WRITE_ONCE(rseq_reg_success, 1); return 0; } @@ -99,12 +130,28 @@ int rseq_unregister_current_thread(void) /* Treat libc's ownership as a successful unregistration. */ return 0; } - rc = sys_rseq(&__rseq_abi, sizeof(struct rseq_abi), RSEQ_ABI_FLAG_UNREGISTER, RSEQ_SIG); + rc = sys_rseq(&__rseq_abi, rseq_size, RSEQ_ABI_FLAG_UNREGISTER, RSEQ_SIG); if (rc) return -1; return 0; } +static +unsigned int get_rseq_feature_size(void) +{ + unsigned long auxv_rseq_feature_size, auxv_rseq_align; + + auxv_rseq_align = getauxval(AT_RSEQ_ALIGN); + assert(!auxv_rseq_align || auxv_rseq_align <= RSEQ_THREAD_AREA_ALLOC_SIZE); + + auxv_rseq_feature_size = getauxval(AT_RSEQ_FEATURE_SIZE); + assert(!auxv_rseq_feature_size || auxv_rseq_feature_size <= RSEQ_THREAD_AREA_ALLOC_SIZE); + if (auxv_rseq_feature_size) + return auxv_rseq_feature_size; + else + return ORIG_RSEQ_FEATURE_SIZE; +} + static __attribute__((constructor)) void rseq_init(void) { @@ -117,14 +164,24 @@ void rseq_init(void) rseq_offset = *libc_rseq_offset_p; rseq_size = *libc_rseq_size_p; rseq_flags = *libc_rseq_flags_p; + rseq_feature_size = get_rseq_feature_size(); + if (rseq_feature_size > rseq_size) + rseq_feature_size = rseq_size; return; } - if (!rseq_available()) - return; rseq_ownership = 1; + if (!rseq_available()) { + rseq_size = 0; + rseq_feature_size = 0; + return; + } rseq_offset = (void *)&__rseq_abi - rseq_thread_pointer(); - rseq_size = sizeof(struct rseq_abi); rseq_flags = 0; + rseq_feature_size = get_rseq_feature_size(); + if (rseq_feature_size == ORIG_RSEQ_FEATURE_SIZE) + rseq_size = ORIG_RSEQ_ALLOC_SIZE; + else + rseq_size = RSEQ_THREAD_AREA_ALLOC_SIZE; } static __attribute__((destructor)) @@ -134,6 +191,7 @@ void rseq_exit(void) return; rseq_offset = 0; rseq_size = -1U; + rseq_feature_size = -1U; rseq_ownership = 0; } @@ -148,3 +206,16 @@ int32_t rseq_fallback_current_cpu(void) } return cpu; } + +int32_t rseq_fallback_current_node(void) +{ + uint32_t cpu_id, node_id; + int ret; + + ret = sys_getcpu(&cpu_id, &node_id); + if (ret) { + perror("sys_getcpu()"); + return ret; + } + return (int32_t) node_id; +} diff --git a/tools/testing/selftests/rseq/rseq.h b/tools/testing/selftests/rseq/rseq.h index 6f7513384bf5..d7364ea4d201 100644 --- a/tools/testing/selftests/rseq/rseq.h +++ b/tools/testing/selftests/rseq/rseq.h @@ -20,6 +20,15 @@ #include "rseq-abi.h" #include "compiler.h" +#ifndef rseq_sizeof_field +#define rseq_sizeof_field(TYPE, MEMBER) sizeof((((TYPE *)0)->MEMBER)) +#endif + +#ifndef rseq_offsetofend +#define rseq_offsetofend(TYPE, MEMBER) \ + (offsetof(TYPE, MEMBER) + rseq_sizeof_field(TYPE, MEMBER)) +#endif + /* * Empty code injection macros, override when testing. * It is important to consider that the ASM injection macros need to be @@ -47,14 +56,38 @@ #include "rseq-thread-pointer.h" -/* Offset from the thread pointer to the rseq area. */ +/* Offset from the thread pointer to the rseq area. */ extern ptrdiff_t rseq_offset; -/* Size of the registered rseq area. 0 if the registration was - unsuccessful. */ + +/* + * Size of the registered rseq area. 0 if the registration was + * unsuccessful. + */ extern unsigned int rseq_size; -/* Flags used during rseq registration. */ + +/* Flags used during rseq registration. */ extern unsigned int rseq_flags; +/* + * rseq feature size supported by the kernel. 0 if the registration was + * unsuccessful. + */ +extern unsigned int rseq_feature_size; + +enum rseq_mo { + RSEQ_MO_RELAXED = 0, + RSEQ_MO_CONSUME = 1, /* Unused */ + RSEQ_MO_ACQUIRE = 2, /* Unused */ + RSEQ_MO_RELEASE = 3, + RSEQ_MO_ACQ_REL = 4, /* Unused */ + RSEQ_MO_SEQ_CST = 5, /* Unused */ +}; + +enum rseq_percpu_mode { + RSEQ_PERCPU_CPU_ID = 0, + RSEQ_PERCPU_MM_CID = 1, +}; + static inline struct rseq_abi *rseq_get_abi(void) { return (struct rseq_abi *) ((uintptr_t) rseq_thread_pointer() + rseq_offset); @@ -119,6 +152,11 @@ int rseq_unregister_current_thread(void); int32_t rseq_fallback_current_cpu(void); /* + * Restartable sequence fallback for reading the current node number. + */ +int32_t rseq_fallback_current_node(void); + +/* * Values returned can be either the current CPU number, -1 (rseq is * uninitialized), or -2 (rseq initialization has failed). */ @@ -153,6 +191,30 @@ static inline uint32_t rseq_current_cpu(void) return cpu; } +static inline bool rseq_node_id_available(void) +{ + return (int) rseq_feature_size >= rseq_offsetofend(struct rseq_abi, node_id); +} + +/* + * Current NUMA node number. + */ +static inline uint32_t rseq_current_node_id(void) +{ + assert(rseq_node_id_available()); + return RSEQ_ACCESS_ONCE(rseq_get_abi()->node_id); +} + +static inline bool rseq_mm_cid_available(void) +{ + return (int) rseq_feature_size >= rseq_offsetofend(struct rseq_abi, mm_cid); +} + +static inline uint32_t rseq_current_mm_cid(void) +{ + return RSEQ_ACCESS_ONCE(rseq_get_abi()->mm_cid); +} + static inline void rseq_clear_rseq_cs(void) { RSEQ_WRITE_ONCE(rseq_get_abi()->rseq_cs.arch.ptr, 0); @@ -174,4 +236,149 @@ static inline void rseq_prepare_unload(void) rseq_clear_rseq_cs(); } +static inline __attribute__((always_inline)) +int rseq_cmpeqv_storev(enum rseq_mo rseq_mo, enum rseq_percpu_mode percpu_mode, + intptr_t *v, intptr_t expect, + intptr_t newv, int cpu) +{ + if (rseq_mo != RSEQ_MO_RELAXED) + return -1; + switch (percpu_mode) { + case RSEQ_PERCPU_CPU_ID: + return rseq_cmpeqv_storev_relaxed_cpu_id(v, expect, newv, cpu); + case RSEQ_PERCPU_MM_CID: + return rseq_cmpeqv_storev_relaxed_mm_cid(v, expect, newv, cpu); + } + return -1; +} + +/* + * Compare @v against @expectnot. When it does _not_ match, load @v + * into @load, and store the content of *@v + voffp into @v. + */ +static inline __attribute__((always_inline)) +int rseq_cmpnev_storeoffp_load(enum rseq_mo rseq_mo, enum rseq_percpu_mode percpu_mode, + intptr_t *v, intptr_t expectnot, long voffp, intptr_t *load, + int cpu) +{ + if (rseq_mo != RSEQ_MO_RELAXED) + return -1; + switch (percpu_mode) { + case RSEQ_PERCPU_CPU_ID: + return rseq_cmpnev_storeoffp_load_relaxed_cpu_id(v, expectnot, voffp, load, cpu); + case RSEQ_PERCPU_MM_CID: + return rseq_cmpnev_storeoffp_load_relaxed_mm_cid(v, expectnot, voffp, load, cpu); + } + return -1; +} + +static inline __attribute__((always_inline)) +int rseq_addv(enum rseq_mo rseq_mo, enum rseq_percpu_mode percpu_mode, + intptr_t *v, intptr_t count, int cpu) +{ + if (rseq_mo != RSEQ_MO_RELAXED) + return -1; + switch (percpu_mode) { + case RSEQ_PERCPU_CPU_ID: + return rseq_addv_relaxed_cpu_id(v, count, cpu); + case RSEQ_PERCPU_MM_CID: + return rseq_addv_relaxed_mm_cid(v, count, cpu); + } + return -1; +} + +#ifdef RSEQ_ARCH_HAS_OFFSET_DEREF_ADDV +/* + * pval = *(ptr+off) + * *pval += inc; + */ +static inline __attribute__((always_inline)) +int rseq_offset_deref_addv(enum rseq_mo rseq_mo, enum rseq_percpu_mode percpu_mode, + intptr_t *ptr, long off, intptr_t inc, int cpu) +{ + if (rseq_mo != RSEQ_MO_RELAXED) + return -1; + switch (percpu_mode) { + case RSEQ_PERCPU_CPU_ID: + return rseq_offset_deref_addv_relaxed_cpu_id(ptr, off, inc, cpu); + case RSEQ_PERCPU_MM_CID: + return rseq_offset_deref_addv_relaxed_mm_cid(ptr, off, inc, cpu); + } + return -1; +} +#endif + +static inline __attribute__((always_inline)) +int rseq_cmpeqv_trystorev_storev(enum rseq_mo rseq_mo, enum rseq_percpu_mode percpu_mode, + intptr_t *v, intptr_t expect, + intptr_t *v2, intptr_t newv2, + intptr_t newv, int cpu) +{ + switch (rseq_mo) { + case RSEQ_MO_RELAXED: + switch (percpu_mode) { + case RSEQ_PERCPU_CPU_ID: + return rseq_cmpeqv_trystorev_storev_relaxed_cpu_id(v, expect, v2, newv2, newv, cpu); + case RSEQ_PERCPU_MM_CID: + return rseq_cmpeqv_trystorev_storev_relaxed_mm_cid(v, expect, v2, newv2, newv, cpu); + } + return -1; + case RSEQ_MO_RELEASE: + switch (percpu_mode) { + case RSEQ_PERCPU_CPU_ID: + return rseq_cmpeqv_trystorev_storev_release_cpu_id(v, expect, v2, newv2, newv, cpu); + case RSEQ_PERCPU_MM_CID: + return rseq_cmpeqv_trystorev_storev_release_mm_cid(v, expect, v2, newv2, newv, cpu); + } + return -1; + default: + return -1; + } +} + +static inline __attribute__((always_inline)) +int rseq_cmpeqv_cmpeqv_storev(enum rseq_mo rseq_mo, enum rseq_percpu_mode percpu_mode, + intptr_t *v, intptr_t expect, + intptr_t *v2, intptr_t expect2, + intptr_t newv, int cpu) +{ + if (rseq_mo != RSEQ_MO_RELAXED) + return -1; + switch (percpu_mode) { + case RSEQ_PERCPU_CPU_ID: + return rseq_cmpeqv_cmpeqv_storev_relaxed_cpu_id(v, expect, v2, expect2, newv, cpu); + case RSEQ_PERCPU_MM_CID: + return rseq_cmpeqv_cmpeqv_storev_relaxed_mm_cid(v, expect, v2, expect2, newv, cpu); + } + return -1; +} + +static inline __attribute__((always_inline)) +int rseq_cmpeqv_trymemcpy_storev(enum rseq_mo rseq_mo, enum rseq_percpu_mode percpu_mode, + intptr_t *v, intptr_t expect, + void *dst, void *src, size_t len, + intptr_t newv, int cpu) +{ + switch (rseq_mo) { + case RSEQ_MO_RELAXED: + switch (percpu_mode) { + case RSEQ_PERCPU_CPU_ID: + return rseq_cmpeqv_trymemcpy_storev_relaxed_cpu_id(v, expect, dst, src, len, newv, cpu); + case RSEQ_PERCPU_MM_CID: + return rseq_cmpeqv_trymemcpy_storev_relaxed_mm_cid(v, expect, dst, src, len, newv, cpu); + } + return -1; + case RSEQ_MO_RELEASE: + switch (percpu_mode) { + case RSEQ_PERCPU_CPU_ID: + return rseq_cmpeqv_trymemcpy_storev_release_cpu_id(v, expect, dst, src, len, newv, cpu); + case RSEQ_PERCPU_MM_CID: + return rseq_cmpeqv_trymemcpy_storev_release_mm_cid(v, expect, dst, src, len, newv, cpu); + } + return -1; + default: + return -1; + } +} + #endif /* RSEQ_H_ */ diff --git a/tools/testing/selftests/rseq/run_param_test.sh b/tools/testing/selftests/rseq/run_param_test.sh index f51bc83c9e41..8d31426ab41f 100755 --- a/tools/testing/selftests/rseq/run_param_test.sh +++ b/tools/testing/selftests/rseq/run_param_test.sh @@ -42,6 +42,11 @@ function do_tests() ./param_test ${TEST_LIST[$i]} -r ${REPS} -t ${NR_THREADS} ${@} ${EXTRA_ARGS} || exit 1 echo "Running compare-twice test ${TEST_NAME[$i]}" ./param_test_compare_twice ${TEST_LIST[$i]} -r ${REPS} -t ${NR_THREADS} ${@} ${EXTRA_ARGS} || exit 1 + + echo "Running mm_cid test ${TEST_NAME[$i]}" + ./param_test_mm_cid ${TEST_LIST[$i]} -r ${REPS} -t ${NR_THREADS} ${@} ${EXTRA_ARGS} || exit 1 + echo "Running mm_cid compare-twice test ${TEST_NAME[$i]}" + ./param_test_mm_cid_compare_twice ${TEST_LIST[$i]} -r ${REPS} -t ${NR_THREADS} ${@} ${EXTRA_ARGS} || exit 1 let "i++" done } diff --git a/tools/testing/selftests/rtc/rtctest.c b/tools/testing/selftests/rtc/rtctest.c index 2b9d929a24ed..63ce02d1d5cc 100644 --- a/tools/testing/selftests/rtc/rtctest.c +++ b/tools/testing/selftests/rtc/rtctest.c @@ -31,7 +31,6 @@ FIXTURE(rtc) { FIXTURE_SETUP(rtc) { self->fd = open(rtc_file, O_RDONLY); - ASSERT_NE(-1, self->fd); } FIXTURE_TEARDOWN(rtc) { @@ -42,6 +41,10 @@ TEST_F(rtc, date_read) { int rc; struct rtc_time rtc_tm; + if (self->fd == -1 && errno == ENOENT) + SKIP(return, "Skipping test since %s does not exist", rtc_file); + ASSERT_NE(-1, self->fd); + /* Read the RTC time/date */ rc = ioctl(self->fd, RTC_RD_TIME, &rtc_tm); ASSERT_NE(-1, rc); @@ -85,6 +88,10 @@ TEST_F_TIMEOUT(rtc, date_read_loop, READ_LOOP_DURATION_SEC + 2) { struct rtc_time rtc_tm; time_t start_rtc_read, prev_rtc_read; + if (self->fd == -1 && errno == ENOENT) + SKIP(return, "Skipping test since %s does not exist", rtc_file); + ASSERT_NE(-1, self->fd); + TH_LOG("Continuously reading RTC time for %ds (with %dms breaks after every read).", READ_LOOP_DURATION_SEC, READ_LOOP_SLEEP_MS); @@ -119,6 +126,10 @@ TEST_F_TIMEOUT(rtc, uie_read, NUM_UIE + 2) { int i, rc, irq = 0; unsigned long data; + if (self->fd == -1 && errno == ENOENT) + SKIP(return, "Skipping test since %s does not exist", rtc_file); + ASSERT_NE(-1, self->fd); + /* Turn on update interrupts */ rc = ioctl(self->fd, RTC_UIE_ON, 0); if (rc == -1) { @@ -144,6 +155,10 @@ TEST_F(rtc, uie_select) { int i, rc, irq = 0; unsigned long data; + if (self->fd == -1 && errno == ENOENT) + SKIP(return, "Skipping test since %s does not exist", rtc_file); + ASSERT_NE(-1, self->fd); + /* Turn on update interrupts */ rc = ioctl(self->fd, RTC_UIE_ON, 0); if (rc == -1) { @@ -183,6 +198,10 @@ TEST_F(rtc, alarm_alm_set) { time_t secs, new; int rc; + if (self->fd == -1 && errno == ENOENT) + SKIP(return, "Skipping test since %s does not exist", rtc_file); + ASSERT_NE(-1, self->fd); + rc = ioctl(self->fd, RTC_RD_TIME, &tm); ASSERT_NE(-1, rc); @@ -237,6 +256,10 @@ TEST_F(rtc, alarm_wkalm_set) { time_t secs, new; int rc; + if (self->fd == -1 && errno == ENOENT) + SKIP(return, "Skipping test since %s does not exist", rtc_file); + ASSERT_NE(-1, self->fd); + rc = ioctl(self->fd, RTC_RD_TIME, &alarm.time); ASSERT_NE(-1, rc); @@ -285,6 +308,10 @@ TEST_F_TIMEOUT(rtc, alarm_alm_set_minute, 65) { time_t secs, new; int rc; + if (self->fd == -1 && errno == ENOENT) + SKIP(return, "Skipping test since %s does not exist", rtc_file); + ASSERT_NE(-1, self->fd); + rc = ioctl(self->fd, RTC_RD_TIME, &tm); ASSERT_NE(-1, rc); @@ -339,6 +366,10 @@ TEST_F_TIMEOUT(rtc, alarm_wkalm_set_minute, 65) { time_t secs, new; int rc; + if (self->fd == -1 && errno == ENOENT) + SKIP(return, "Skipping test since %s does not exist", rtc_file); + ASSERT_NE(-1, self->fd); + rc = ioctl(self->fd, RTC_RD_TIME, &alarm.time); ASSERT_NE(-1, rc); diff --git a/tools/testing/selftests/sched/Makefile b/tools/testing/selftests/sched/Makefile index 10c72f14fea9..099ee9213557 100644 --- a/tools/testing/selftests/sched/Makefile +++ b/tools/testing/selftests/sched/Makefile @@ -4,7 +4,7 @@ ifneq ($(shell $(CC) --version 2>&1 | head -n 1 | grep clang),) CLANG_FLAGS += -no-integrated-as endif -CFLAGS += -O2 -Wall -g -I./ -I../../../../usr/include/ -Wl,-rpath=./ \ +CFLAGS += -O2 -Wall -g -I./ $(KHDR_INCLUDES) -Wl,-rpath=./ \ $(CLANG_FLAGS) LDLIBS += -lpthread diff --git a/tools/testing/selftests/sched/cs_prctl_test.c b/tools/testing/selftests/sched/cs_prctl_test.c index 8109b17dc764..25e0d95d3713 100644 --- a/tools/testing/selftests/sched/cs_prctl_test.c +++ b/tools/testing/selftests/sched/cs_prctl_test.c @@ -27,6 +27,7 @@ #include <sys/prctl.h> #include <unistd.h> #include <time.h> +#include <errno.h> #include <stdio.h> #include <stdlib.h> #include <string.h> @@ -151,12 +152,17 @@ static void create_threads(int num_threads, int thr_tids[]) static int child_func_process(void *arg) { struct child_args *ca = (struct child_args *)arg; + int ret; close(ca->pfd[0]); create_threads(ca->num_threads, ca->thr_tids); - write(ca->pfd[1], &ca->thr_tids, sizeof(int) * ca->num_threads); + ret = write(ca->pfd[1], &ca->thr_tids, sizeof(int) * ca->num_threads); + if (ret == -1) + printf("write failed on pfd[%d] - error (%s)\n", + ca->pfd[1], strerror(errno)); + close(ca->pfd[1]); while (1) @@ -169,7 +175,7 @@ static unsigned char child_func_process_stack[STACK_SIZE]; void create_processes(int num_processes, int num_threads, struct child_args proc[]) { pid_t cpid; - int i; + int i, ret; for (i = 0; i < num_processes; ++i) { proc[i].num_threads = num_threads; @@ -184,7 +190,10 @@ void create_processes(int num_processes, int num_threads, struct child_args proc } for (i = 0; i < num_processes; ++i) { - read(proc[i].pfd[0], &proc[i].thr_tids, sizeof(int) * proc[i].num_threads); + ret = read(proc[i].pfd[0], &proc[i].thr_tids, sizeof(int) * proc[i].num_threads); + if (ret == -1) + printf("read failed on proc[%d].pfd[0] error (%s)\n", + i, strerror(errno)); close(proc[i].pfd[0]); } } diff --git a/tools/testing/selftests/seccomp/Makefile b/tools/testing/selftests/seccomp/Makefile index f017c382c036..584fba487037 100644 --- a/tools/testing/selftests/seccomp/Makefile +++ b/tools/testing/selftests/seccomp/Makefile @@ -1,5 +1,5 @@ # SPDX-License-Identifier: GPL-2.0 -CFLAGS += -Wl,-no-as-needed -Wall -isystem ../../../../usr/include/ +CFLAGS += -Wl,-no-as-needed -Wall $(KHDR_INCLUDES) LDFLAGS += -lpthread LDLIBS += -lcap diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c index 4ae6c8991307..61386e499b77 100644 --- a/tools/testing/selftests/seccomp/seccomp_bpf.c +++ b/tools/testing/selftests/seccomp/seccomp_bpf.c @@ -138,6 +138,8 @@ struct seccomp_data { # define __NR_seccomp 337 # elif defined(__sh__) # define __NR_seccomp 372 +# elif defined(__mc68000__) +# define __NR_seccomp 380 # else # warning "seccomp syscall number unknown for this architecture" # define __NR_seccomp 0xffff @@ -392,6 +394,8 @@ TEST(mode_filter_without_nnp) .filter = filter, }; long ret; + cap_t cap = cap_get_proc(); + cap_flag_value_t is_cap_sys_admin = 0; ret = prctl(PR_GET_NO_NEW_PRIVS, 0, NULL, 0, 0); ASSERT_LE(0, ret) { @@ -400,8 +404,8 @@ TEST(mode_filter_without_nnp) errno = 0; ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0); /* Succeeds with CAP_SYS_ADMIN, fails without */ - /* TODO(wad) check caps not euid */ - if (geteuid()) { + cap_get_flag(cap, CAP_SYS_ADMIN, CAP_EFFECTIVE, &is_cap_sys_admin); + if (!is_cap_sys_admin) { EXPECT_EQ(-1, ret); EXPECT_EQ(EACCES, errno); } else { @@ -1836,6 +1840,10 @@ TEST_F(TRACE_poke, getpid_runs_normally) # define ARCH_REGS struct pt_regs # define SYSCALL_NUM(_regs) (_regs).regs[3] # define SYSCALL_RET(_regs) (_regs).regs[0] +#elif defined(__mc68000__) +# define ARCH_REGS struct user_regs_struct +# define SYSCALL_NUM(_regs) (_regs).orig_d0 +# define SYSCALL_RET(_regs) (_regs).d0 #else # error "Do not know how to find your architecture's registers and syscalls" #endif @@ -1900,7 +1908,7 @@ const bool ptrace_entry_set_syscall_ret = * Use PTRACE_GETREGS and PTRACE_SETREGS when available. This is useful for * architectures without HAVE_ARCH_TRACEHOOK (e.g. User-mode Linux). */ -#if defined(__x86_64__) || defined(__i386__) || defined(__mips__) +#if defined(__x86_64__) || defined(__i386__) || defined(__mips__) || defined(__mc68000__) # define ARCH_GETREGS(_regs) ptrace(PTRACE_GETREGS, tracee, 0, &(_regs)) # define ARCH_SETREGS(_regs) ptrace(PTRACE_SETREGS, tracee, 0, &(_regs)) #else diff --git a/tools/testing/selftests/splice/short_splice_read.sh b/tools/testing/selftests/splice/short_splice_read.sh index 22b6c8910b18..4710e09f49fa 100755 --- a/tools/testing/selftests/splice/short_splice_read.sh +++ b/tools/testing/selftests/splice/short_splice_read.sh @@ -127,7 +127,7 @@ expect_success "proc_handler: special read splice" test_splice /proc/sys/kernel/ if ! [ -d /sys/module/test_module/sections ] ; then expect_success "test_module kernel module load" modprobe test_module fi -expect_failure "kernfs attr splice" test_splice /sys/module/test_module/coresize -expect_failure "kernfs binattr splice" test_splice /sys/module/test_module/sections/.init.text +expect_success "kernfs attr splice" test_splice /sys/module/test_module/coresize +expect_success "kernfs binattr splice" test_splice /sys/module/test_module/sections/.init.text exit $ret diff --git a/tools/testing/selftests/sync/Makefile b/tools/testing/selftests/sync/Makefile index d0121a8a3523..df0f91bf6890 100644 --- a/tools/testing/selftests/sync/Makefile +++ b/tools/testing/selftests/sync/Makefile @@ -1,6 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 CFLAGS += -O2 -g -std=gnu89 -pthread -Wall -Wextra -CFLAGS += -I../../../../usr/include/ +CFLAGS += $(KHDR_INCLUDES) LDFLAGS += -pthread .PHONY: all clean diff --git a/tools/testing/selftests/sysctl/sysctl.sh b/tools/testing/selftests/sysctl/sysctl.sh index f50778a3d744..bfc54b422f25 100755 --- a/tools/testing/selftests/sysctl/sysctl.sh +++ b/tools/testing/selftests/sysctl/sysctl.sh @@ -1,16 +1,6 @@ #!/bin/bash +# SPDX-License-Identifier: GPL-2.0-or-later OR copyleft-next-0.3.1 # Copyright (C) 2017 Luis R. Rodriguez <mcgrof@kernel.org> -# -# This program is free software; you can redistribute it and/or modify it -# under the terms of the GNU General Public License as published by the Free -# Software Foundation; either version 2 of the License, or at your option any -# later version; or, when distributed separately from the Linux kernel or -# when incorporated into other software packages, subject to the following -# license: -# -# This program is free software; you can redistribute it and/or modify it -# under the terms of copyleft-next (version 0.3.1 or later) as published -# at http://copyleft-next.org/. # This performs a series tests against the proc sysctl interface. diff --git a/tools/testing/selftests/tc-testing/tc-tests/filters/rsvp.json b/tools/testing/selftests/tc-testing/tc-tests/filters/rsvp.json deleted file mode 100644 index bdcbaa4c5663..000000000000 --- a/tools/testing/selftests/tc-testing/tc-tests/filters/rsvp.json +++ /dev/null @@ -1,203 +0,0 @@ -[ - { - "id": "2141", - "name": "Add rsvp filter with tcp proto and specific IP address", - "category": [ - "filter", - "rsvp" - ], - "plugins": { - "requires": "nsPlugin" - }, - "setup": [ - "$TC qdisc add dev $DEV1 ingress" - ], - "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: protocol ip prio 1 rsvp ipproto tcp session 198.168.10.64", - "expExitCode": "0", - "verifyCmd": "$TC filter show dev $DEV1 parent ffff:", - "matchPattern": "^filter protocol ip pref [0-9]+ rsvp chain [0-9]+ fh 0x.*session 198.168.10.64 ipproto tcp", - "matchCount": "1", - "teardown": [ - "$TC qdisc del dev $DEV1 ingress" - ] - }, - { - "id": "5267", - "name": "Add rsvp filter with udp proto and specific IP address", - "category": [ - "filter", - "rsvp" - ], - "plugins": { - "requires": "nsPlugin" - }, - "setup": [ - "$TC qdisc add dev $DEV1 ingress" - ], - "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: protocol ip prio 1 rsvp ipproto udp session 1.1.1.1", - "expExitCode": "0", - "verifyCmd": "$TC filter show dev $DEV1 parent ffff:", - "matchPattern": "^filter protocol ip pref [0-9]+ rsvp chain [0-9]+ fh 0x.*session 1.1.1.1 ipproto udp", - "matchCount": "1", - "teardown": [ - "$TC qdisc del dev $DEV1 ingress" - ] - }, - { - "id": "2819", - "name": "Add rsvp filter with src ip and src port", - "category": [ - "filter", - "rsvp" - ], - "plugins": { - "requires": "nsPlugin" - }, - "setup": [ - "$TC qdisc add dev $DEV1 ingress" - ], - "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: protocol ip prio 1 rsvp ipproto udp session 1.1.1.1 sender 2.2.2.2/5021 classid 1:1", - "expExitCode": "0", - "verifyCmd": "$TC filter show dev $DEV1 parent ffff:", - "matchPattern": "^filter protocol ip pref [0-9]+ rsvp chain [0-9]+ fh 0x.*flowid 1:1 session 1.1.1.1 ipproto udp sender 2.2.2.2/5021", - "matchCount": "1", - "teardown": [ - "$TC qdisc del dev $DEV1 ingress" - ] - }, - { - "id": "c967", - "name": "Add rsvp filter with tunnelid and continue action", - "category": [ - "filter", - "rsvp" - ], - "plugins": { - "requires": "nsPlugin" - }, - "setup": [ - "$TC qdisc add dev $DEV1 ingress" - ], - "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: protocol ip prio 1 rsvp ipproto udp session 1.1.1.1 tunnelid 2 classid 1:1 action continue", - "expExitCode": "0", - "verifyCmd": "$TC filter show dev $DEV1 parent ffff:", - "matchPattern": "^filter protocol ip pref [0-9]+ rsvp chain [0-9]+ fh 0x.*flowid 1:1 session 1.1.1.1 ipproto udp tunnelid 2.*action order [0-9]+: gact action continue", - "matchCount": "1", - "teardown": [ - "$TC qdisc del dev $DEV1 ingress" - ] - }, - { - "id": "5463", - "name": "Add rsvp filter with tunnel and pipe action", - "category": [ - "filter", - "rsvp" - ], - "plugins": { - "requires": "nsPlugin" - }, - "setup": [ - "$TC qdisc add dev $DEV1 ingress" - ], - "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: protocol ip prio 1 rsvp ipproto udp session 1.1.1.1 tunnel 2 skip 1 action pipe", - "expExitCode": "0", - "verifyCmd": "$TC filter show dev $DEV1 parent ffff:", - "matchPattern": "^filter protocol ip pref [0-9]+ rsvp chain [0-9]+ fh 0x.*tunnel 2 skip 1 session 1.1.1.1 ipproto udp.*action order [0-9]+: gact action pipe", - "matchCount": "1", - "teardown": [ - "$TC qdisc del dev $DEV1 ingress" - ] - }, - { - "id": "2332", - "name": "Add rsvp filter with miltiple actions", - "category": [ - "filter", - "rsvp" - ], - "plugins": { - "requires": "nsPlugin" - }, - "setup": [ - "$TC qdisc add dev $DEV1 ingress" - ], - "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: protocol ip prio 7 rsvp ipproto udp session 1.1.1.1 classid 1:1 action skbedit mark 7 pipe action gact drop", - "expExitCode": "0", - "verifyCmd": "$TC filter show dev $DEV1 parent ffff:", - "matchPattern": "^filter protocol ip pref [0-9]+ rsvp chain [0-9]+ fh 0x.*flowid 1:1 session 1.1.1.1 ipproto udp.*action order [0-9]+: skbedit mark 7 pipe.*action order [0-9]+: gact action drop", - "matchCount": "1", - "teardown": [ - "$TC qdisc del dev $DEV1 ingress" - ] - }, - { - "id": "8879", - "name": "Add rsvp filter with tunnel and skp flag", - "category": [ - "filter", - "rsvp" - ], - "plugins": { - "requires": "nsPlugin" - }, - "setup": [ - "$TC qdisc add dev $DEV1 ingress" - ], - "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: protocol ip prio 1 rsvp ipproto udp session 1.1.1.1 tunnel 2 skip 1 action pipe", - "expExitCode": "0", - "verifyCmd": "$TC filter show dev $DEV1 parent ffff:", - "matchPattern": "^filter protocol ip pref [0-9]+ rsvp chain [0-9]+ fh 0x.*tunnel 2 skip 1 session 1.1.1.1 ipproto udp.*action order [0-9]+: gact action pipe", - "matchCount": "1", - "teardown": [ - "$TC qdisc del dev $DEV1 ingress" - ] - }, - { - "id": "8261", - "name": "List rsvp filters", - "category": [ - "filter", - "rsvp" - ], - "plugins": { - "requires": "nsPlugin" - }, - "setup": [ - "$TC qdisc add dev $DEV1 ingress", - "$TC filter add dev $DEV1 parent ffff: protocol ip prio 1 rsvp ipproto udp session 1.1.1.1/1234 classid 1:1", - "$TC filter add dev $DEV1 parent ffff: protocol ip prio 1 rsvp ipproto tcp session 2.2.2.2/1234 classid 2:1" - ], - "cmdUnderTest": "$TC filter show dev $DEV1 parent ffff:", - "expExitCode": "0", - "verifyCmd": "$TC filter show dev $DEV1 parent ffff:", - "matchPattern": "^filter protocol ip pref [0-9]+ rsvp chain [0-9]+ fh", - "matchCount": "2", - "teardown": [ - "$TC qdisc del dev $DEV1 ingress" - ] - }, - { - "id": "8989", - "name": "Delete rsvp filter", - "category": [ - "filter", - "rsvp" - ], - "plugins": { - "requires": "nsPlugin" - }, - "setup": [ - "$TC qdisc add dev $DEV1 ingress", - "$TC filter add dev $DEV1 parent ffff: protocol ip prio 1 rsvp ipproto udp session 1.1.1.1/1234 tunnelid 9 classid 2:1" - ], - "cmdUnderTest": "$TC filter del dev $DEV1 parent ffff: protocol ip prio 1 rsvp ipproto udp session 1.1.1.1/1234 tunnelid 9 classid 2:1", - "expExitCode": "0", - "verifyCmd": "$TC filter show dev $DEV1 parent ffff:", - "matchPattern": "filter protocol ip pref [0-9]+ rsvp chain [0-9]+ fh 0x.*flowid 2:1 session 1.1.1.1/1234 ipproto udp tunnelid 9", - "matchCount": "0", - "teardown": [ - "$TC qdisc del dev $DEV1 ingress" - ] - } -] diff --git a/tools/testing/selftests/tc-testing/tc-tests/filters/tcindex.json b/tools/testing/selftests/tc-testing/tc-tests/filters/tcindex.json deleted file mode 100644 index 44901db70376..000000000000 --- a/tools/testing/selftests/tc-testing/tc-tests/filters/tcindex.json +++ /dev/null @@ -1,227 +0,0 @@ -[ - { - "id": "8293", - "name": "Add tcindex filter with default action", - "category": [ - "filter", - "tcindex" - ], - "plugins": { - "requires": "nsPlugin" - }, - "setup": [ - "$TC qdisc add dev $DEV1 ingress" - ], - "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 tcindex classid 1:1", - "expExitCode": "0", - "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol ip tcindex", - "matchPattern": "^filter parent ffff: protocol ip pref 1 tcindex chain 0 handle 0x0001 classid 1:1", - "matchCount": "1", - "teardown": [ - "$TC qdisc del dev $DEV1 ingress" - ] - }, - { - "id": "7281", - "name": "Add tcindex filter with hash size and pass action", - "category": [ - "filter", - "tcindex" - ], - "plugins": { - "requires": "nsPlugin" - }, - "setup": [ - "$TC qdisc add dev $DEV1 ingress" - ], - "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 tcindex hash 32 fall_through classid 1:1 action pass", - "expExitCode": "0", - "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol ip tcindex", - "matchPattern": "^filter parent ffff: protocol ip pref.*tcindex chain [0-9]+ handle 0x0001 classid 1:1.*action order [0-9]+: gact action pass", - "matchCount": "1", - "teardown": [ - "$TC qdisc del dev $DEV1 ingress" - ] - }, - { - "id": "b294", - "name": "Add tcindex filter with mask shift and reclassify action", - "category": [ - "filter", - "tcindex" - ], - "plugins": { - "requires": "nsPlugin" - }, - "setup": [ - "$TC qdisc add dev $DEV1 ingress" - ], - "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 tcindex hash 32 mask 1 shift 2 fall_through classid 1:1 action reclassify", - "expExitCode": "0", - "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol ip tcindex", - "matchPattern": "^filter parent ffff: protocol ip pref.*tcindex chain [0-9]+ handle 0x0001 classid 1:1.*action order [0-9]+: gact action reclassify", - "matchCount": "1", - "teardown": [ - "$TC qdisc del dev $DEV1 ingress" - ] - }, - { - "id": "0532", - "name": "Add tcindex filter with pass_on and continue actions", - "category": [ - "filter", - "tcindex" - ], - "plugins": { - "requires": "nsPlugin" - }, - "setup": [ - "$TC qdisc add dev $DEV1 ingress" - ], - "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 tcindex hash 32 mask 1 shift 2 pass_on classid 1:1 action continue", - "expExitCode": "0", - "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol ip tcindex", - "matchPattern": "^filter parent ffff: protocol ip pref.*tcindex chain [0-9]+ handle 0x0001 classid 1:1.*action order [0-9]+: gact action continue", - "matchCount": "1", - "teardown": [ - "$TC qdisc del dev $DEV1 ingress" - ] - }, - { - "id": "d473", - "name": "Add tcindex filter with pipe action", - "category": [ - "filter", - "tcindex" - ], - "plugins": { - "requires": "nsPlugin" - }, - "setup": [ - "$TC qdisc add dev $DEV1 ingress" - ], - "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 tcindex hash 32 mask 1 shift 2 fall_through classid 1:1 action pipe", - "expExitCode": "0", - "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol ip tcindex", - "matchPattern": "^filter parent ffff: protocol ip pref.*tcindex chain [0-9]+ handle 0x0001 classid 1:1.*action order [0-9]+: gact action pipe", - "matchCount": "1", - "teardown": [ - "$TC qdisc del dev $DEV1 ingress" - ] - }, - { - "id": "2940", - "name": "Add tcindex filter with miltiple actions", - "category": [ - "filter", - "tcindex" - ], - "plugins": { - "requires": "nsPlugin" - }, - "setup": [ - "$TC qdisc add dev $DEV1 ingress" - ], - "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 7 tcindex hash 32 mask 1 shift 2 fall_through classid 1:1 action skbedit mark 7 pipe action gact drop", - "expExitCode": "0", - "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 7 protocol ip tcindex", - "matchPattern": "^filter parent ffff: protocol ip pref 7 tcindex.*handle 0x0001.*action.*skbedit.*mark 7 pipe.*action.*gact action drop", - "matchCount": "1", - "teardown": [ - "$TC qdisc del dev $DEV1 ingress" - ] - }, - { - "id": "1893", - "name": "List tcindex filters", - "category": [ - "filter", - "tcindex" - ], - "plugins": { - "requires": "nsPlugin" - }, - "setup": [ - "$TC qdisc add dev $DEV1 ingress", - "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 tcindex classid 1:1", - "$TC filter add dev $DEV1 parent ffff: handle 2 protocol ip prio 1 tcindex classid 1:1" - ], - "cmdUnderTest": "$TC filter show dev $DEV1 parent ffff:", - "expExitCode": "0", - "verifyCmd": "$TC filter show dev $DEV1 parent ffff:", - "matchPattern": "handle 0x000[0-9]+ classid 1:1", - "matchCount": "2", - "teardown": [ - "$TC qdisc del dev $DEV1 ingress" - ] - }, - { - "id": "2041", - "name": "Change tcindex filter with pass action", - "category": [ - "filter", - "tcindex" - ], - "plugins": { - "requires": "nsPlugin" - }, - "setup": [ - "$TC qdisc add dev $DEV1 ingress", - "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 tcindex classid 1:1 action drop" - ], - "cmdUnderTest": "$TC filter change dev $DEV1 parent ffff: handle 1 protocol ip prio 1 tcindex classid 1:1 action pass", - "expExitCode": "0", - "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol ip tcindex", - "matchPattern": "handle 0x0001 classid 1:1.*action order [0-9]+: gact action pass", - "matchCount": "1", - "teardown": [ - "$TC qdisc del dev $DEV1 ingress" - ] - }, - { - "id": "9203", - "name": "Replace tcindex filter with pass action", - "category": [ - "filter", - "tcindex" - ], - "plugins": { - "requires": "nsPlugin" - }, - "setup": [ - "$TC qdisc add dev $DEV1 ingress", - "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 tcindex classid 1:1 action drop" - ], - "cmdUnderTest": "$TC filter replace dev $DEV1 parent ffff: handle 1 protocol ip prio 1 tcindex classid 1:1 action pass", - "expExitCode": "0", - "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol ip tcindex", - "matchPattern": "handle 0x0001 classid 1:1.*action order [0-9]+: gact action pass", - "matchCount": "1", - "teardown": [ - "$TC qdisc del dev $DEV1 ingress" - ] - }, - { - "id": "7957", - "name": "Delete tcindex filter with drop action", - "category": [ - "filter", - "tcindex" - ], - "plugins": { - "requires": "nsPlugin" - }, - "setup": [ - "$TC qdisc add dev $DEV1 ingress", - "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 tcindex classid 1:1 action drop" - ], - "cmdUnderTest": "$TC filter del dev $DEV1 parent ffff: handle 1 protocol ip prio 1 tcindex classid 1:1 action drop", - "expExitCode": "0", - "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol ip tcindex", - "matchPattern": "handle 0x0001 classid 1:1.*action order [0-9]+: gact action drop", - "matchCount": "0", - "teardown": [ - "$TC qdisc del dev $DEV1 ingress" - ] - } -] diff --git a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/atm.json b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/atm.json deleted file mode 100644 index f5bc8670a67d..000000000000 --- a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/atm.json +++ /dev/null @@ -1,94 +0,0 @@ -[ - { - "id": "7628", - "name": "Create ATM with default setting", - "category": [ - "qdisc", - "atm" - ], - "plugins": { - "requires": "nsPlugin" - }, - "setup": [ - "$IP link add dev $DUMMY type dummy || /bin/true" - ], - "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root atm", - "expExitCode": "0", - "verifyCmd": "$TC qdisc show dev $DUMMY", - "matchPattern": "qdisc atm 1: root refcnt", - "matchCount": "1", - "teardown": [ - "$TC qdisc del dev $DUMMY handle 1: root", - "$IP link del dev $DUMMY type dummy" - ] - }, - { - "id": "390a", - "name": "Delete ATM with valid handle", - "category": [ - "qdisc", - "atm" - ], - "plugins": { - "requires": "nsPlugin" - }, - "setup": [ - "$IP link add dev $DUMMY type dummy || /bin/true", - "$TC qdisc add dev $DUMMY handle 1: root atm" - ], - "cmdUnderTest": "$TC qdisc del dev $DUMMY handle 1: root", - "expExitCode": "0", - "verifyCmd": "$TC qdisc show dev $DUMMY", - "matchPattern": "qdisc atm 1: root refcnt", - "matchCount": "0", - "teardown": [ - "$IP link del dev $DUMMY type dummy" - ] - }, - { - "id": "32a0", - "name": "Show ATM class", - "category": [ - "qdisc", - "atm" - ], - "plugins": { - "requires": "nsPlugin" - }, - "setup": [ - "$IP link add dev $DUMMY type dummy || /bin/true" - ], - "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root atm", - "expExitCode": "0", - "verifyCmd": "$TC class show dev $DUMMY", - "matchPattern": "class atm 1: parent 1:", - "matchCount": "1", - "teardown": [ - "$TC qdisc del dev $DUMMY handle 1: root", - "$IP link del dev $DUMMY type dummy" - ] - }, - { - "id": "6310", - "name": "Dump ATM stats", - "category": [ - "qdisc", - "atm" - ], - "plugins": { - "requires": "nsPlugin" - }, - "setup": [ - "$IP link add dev $DUMMY type dummy || /bin/true" - ], - "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root atm", - "expExitCode": "0", - "verifyCmd": "$TC -s qdisc show dev $DUMMY", - "matchPattern": "qdisc atm 1: root refcnt", - "matchCount": "1", - "teardown": [ - "$TC qdisc del dev $DUMMY handle 1: root", - "$IP link del dev $DUMMY type dummy" - ] - } -] diff --git a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/cbq.json b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/cbq.json deleted file mode 100644 index 1ab21c83a122..000000000000 --- a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/cbq.json +++ /dev/null @@ -1,184 +0,0 @@ -[ - { - "id": "3460", - "name": "Create CBQ with default setting", - "category": [ - "qdisc", - "cbq" - ], - "plugins": { - "requires": "nsPlugin" - }, - "setup": [ - "$IP link add dev $DUMMY type dummy || /bin/true" - ], - "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root cbq bandwidth 10000 avpkt 9000", - "expExitCode": "0", - "verifyCmd": "$TC qdisc show dev $DUMMY", - "matchPattern": "qdisc cbq 1: root refcnt [0-9]+ rate 10Kbit \\(bounded,isolated\\) prio no-transmit", - "matchCount": "1", - "teardown": [ - "$TC qdisc del dev $DUMMY handle 1: root", - "$IP link del dev $DUMMY type dummy" - ] - }, - { - "id": "0592", - "name": "Create CBQ with mpu", - "category": [ - "qdisc", - "cbq" - ], - "plugins": { - "requires": "nsPlugin" - }, - "setup": [ - "$IP link add dev $DUMMY type dummy || /bin/true" - ], - "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root cbq bandwidth 10000 avpkt 9000 mpu 1000", - "expExitCode": "0", - "verifyCmd": "$TC qdisc show dev $DUMMY", - "matchPattern": "qdisc cbq 1: root refcnt [0-9]+ rate 10Kbit \\(bounded,isolated\\) prio no-transmit", - "matchCount": "1", - "teardown": [ - "$TC qdisc del dev $DUMMY handle 1: root", - "$IP link del dev $DUMMY type dummy" - ] - }, - { - "id": "4684", - "name": "Create CBQ with valid cell num", - "category": [ - "qdisc", - "cbq" - ], - "plugins": { - "requires": "nsPlugin" - }, - "setup": [ - "$IP link add dev $DUMMY type dummy || /bin/true" - ], - "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root cbq bandwidth 10000 avpkt 9000 cell 128", - "expExitCode": "0", - "verifyCmd": "$TC qdisc show dev $DUMMY", - "matchPattern": "qdisc cbq 1: root refcnt [0-9]+ rate 10Kbit \\(bounded,isolated\\) prio no-transmit", - "matchCount": "1", - "teardown": [ - "$TC qdisc del dev $DUMMY handle 1: root", - "$IP link del dev $DUMMY type dummy" - ] - }, - { - "id": "4345", - "name": "Create CBQ with invalid cell num", - "category": [ - "qdisc", - "cbq" - ], - "plugins": { - "requires": "nsPlugin" - }, - "setup": [ - "$IP link add dev $DUMMY type dummy || /bin/true" - ], - "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root cbq bandwidth 10000 avpkt 9000 cell 100", - "expExitCode": "1", - "verifyCmd": "$TC qdisc show dev $DUMMY", - "matchPattern": "qdisc cbq 1: root refcnt [0-9]+ rate 10Kbit \\(bounded,isolated\\) prio no-transmit", - "matchCount": "0", - "teardown": [ - "$IP link del dev $DUMMY type dummy" - ] - }, - { - "id": "4525", - "name": "Create CBQ with valid ewma", - "category": [ - "qdisc", - "cbq" - ], - "plugins": { - "requires": "nsPlugin" - }, - "setup": [ - "$IP link add dev $DUMMY type dummy || /bin/true" - ], - "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root cbq bandwidth 10000 avpkt 9000 ewma 16", - "expExitCode": "0", - "verifyCmd": "$TC qdisc show dev $DUMMY", - "matchPattern": "qdisc cbq 1: root refcnt [0-9]+ rate 10Kbit \\(bounded,isolated\\) prio no-transmit", - "matchCount": "1", - "teardown": [ - "$TC qdisc del dev $DUMMY handle 1: root", - "$IP link del dev $DUMMY type dummy" - ] - }, - { - "id": "6784", - "name": "Create CBQ with invalid ewma", - "category": [ - "qdisc", - "cbq" - ], - "plugins": { - "requires": "nsPlugin" - }, - "setup": [ - "$IP link add dev $DUMMY type dummy || /bin/true" - ], - "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root cbq bandwidth 10000 avpkt 9000 ewma 128", - "expExitCode": "1", - "verifyCmd": "$TC qdisc show dev $DUMMY", - "matchPattern": "qdisc cbq 1: root refcnt [0-9]+ rate 10Kbit \\(bounded,isolated\\) prio no-transmit", - "matchCount": "0", - "teardown": [ - "$IP link del dev $DUMMY type dummy" - ] - }, - { - "id": "5468", - "name": "Delete CBQ with handle", - "category": [ - "qdisc", - "cbq" - ], - "plugins": { - "requires": "nsPlugin" - }, - "setup": [ - "$IP link add dev $DUMMY type dummy || /bin/true", - "$TC qdisc add dev $DUMMY handle 1: root cbq bandwidth 10000 avpkt 9000" - ], - "cmdUnderTest": "$TC qdisc del dev $DUMMY handle 1: root", - "expExitCode": "0", - "verifyCmd": "$TC qdisc show dev $DUMMY", - "matchPattern": "qdisc cbq 1: root refcnt [0-9]+ rate 10Kbit \\(bounded,isolated\\) prio no-transmit", - "matchCount": "0", - "teardown": [ - "$IP link del dev $DUMMY type dummy" - ] - }, - { - "id": "492a", - "name": "Show CBQ class", - "category": [ - "qdisc", - "cbq" - ], - "plugins": { - "requires": "nsPlugin" - }, - "setup": [ - "$IP link add dev $DUMMY type dummy || /bin/true" - ], - "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root cbq bandwidth 10000 avpkt 9000", - "expExitCode": "0", - "verifyCmd": "$TC class show dev $DUMMY", - "matchPattern": "class cbq 1: root rate 10Kbit \\(bounded,isolated\\) prio no-transmit", - "matchCount": "1", - "teardown": [ - "$TC qdisc del dev $DUMMY handle 1: root", - "$IP link del dev $DUMMY type dummy" - ] - } -] diff --git a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/dsmark.json b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/dsmark.json deleted file mode 100644 index c030795f9c37..000000000000 --- a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/dsmark.json +++ /dev/null @@ -1,140 +0,0 @@ -[ - { - "id": "6345", - "name": "Create DSMARK with default setting", - "category": [ - "qdisc", - "dsmark" - ], - "plugins": { - "requires": "nsPlugin" - }, - "setup": [ - "$IP link add dev $DUMMY type dummy || /bin/true" - ], - "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root dsmark indices 1024", - "expExitCode": "0", - "verifyCmd": "$TC qdisc show dev $DUMMY", - "matchPattern": "qdisc dsmark 1: root refcnt [0-9]+ indices 0x0400", - "matchCount": "1", - "teardown": [ - "$TC qdisc del dev $DUMMY handle 1: root", - "$IP link del dev $DUMMY type dummy" - ] - }, - { - "id": "3462", - "name": "Create DSMARK with default_index setting", - "category": [ - "qdisc", - "dsmark" - ], - "plugins": { - "requires": "nsPlugin" - }, - "setup": [ - "$IP link add dev $DUMMY type dummy || /bin/true" - ], - "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root dsmark indices 1024 default_index 512", - "expExitCode": "0", - "verifyCmd": "$TC qdisc show dev $DUMMY", - "matchPattern": "qdisc dsmark 1: root refcnt [0-9]+ indices 0x0400 default_index 0x0200", - "matchCount": "1", - "teardown": [ - "$TC qdisc del dev $DUMMY handle 1: root", - "$IP link del dev $DUMMY type dummy" - ] - }, - { - "id": "ca95", - "name": "Create DSMARK with set_tc_index flag", - "category": [ - "qdisc", - "dsmark" - ], - "plugins": { - "requires": "nsPlugin" - }, - "setup": [ - "$IP link add dev $DUMMY type dummy || /bin/true" - ], - "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root dsmark indices 1024 set_tc_index", - "expExitCode": "0", - "verifyCmd": "$TC qdisc show dev $DUMMY", - "matchPattern": "qdisc dsmark 1: root refcnt [0-9]+ indices 0x0400 set_tc_index", - "matchCount": "1", - "teardown": [ - "$TC qdisc del dev $DUMMY handle 1: root", - "$IP link del dev $DUMMY type dummy" - ] - }, - { - "id": "a950", - "name": "Create DSMARK with multiple setting", - "category": [ - "qdisc", - "dsmark" - ], - "plugins": { - "requires": "nsPlugin" - }, - "setup": [ - "$IP link add dev $DUMMY type dummy || /bin/true" - ], - "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root dsmark indices 1024 default_index 1024 set_tc_index", - "expExitCode": "0", - "verifyCmd": "$TC qdisc show dev $DUMMY", - "matchPattern": "qdisc dsmark 1: root refcnt [0-9]+ indices 0x0400 default_index 0x0400 set_tc_index", - "matchCount": "1", - "teardown": [ - "$TC qdisc del dev $DUMMY handle 1: root", - "$IP link del dev $DUMMY type dummy" - ] - }, - { - "id": "4092", - "name": "Delete DSMARK with handle", - "category": [ - "qdisc", - "dsmark" - ], - "plugins": { - "requires": "nsPlugin" - }, - "setup": [ - "$IP link add dev $DUMMY type dummy || /bin/true", - "$TC qdisc add dev $DUMMY handle 1: root dsmark indices 1024 default_index 1024" - ], - "cmdUnderTest": "$TC qdisc del dev $DUMMY handle 1: root", - "expExitCode": "0", - "verifyCmd": "$TC qdisc show dev $DUMMY", - "matchPattern": "qdisc dsmark 1: root refcnt [0-9]+ indices 0x0400", - "matchCount": "0", - "teardown": [ - "$IP link del dev $DUMMY type dummy" - ] - }, - { - "id": "5930", - "name": "Show DSMARK class", - "category": [ - "qdisc", - "dsmark" - ], - "plugins": { - "requires": "nsPlugin" - }, - "setup": [ - "$IP link add dev $DUMMY type dummy || /bin/true" - ], - "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root dsmark indices 1024", - "expExitCode": "0", - "verifyCmd": "$TC class show dev $DUMMY", - "matchPattern": "class dsmark 1:", - "matchCount": "0", - "teardown": [ - "$TC qdisc del dev $DUMMY handle 1: root", - "$IP link del dev $DUMMY type dummy" - ] - } -] diff --git a/tools/testing/selftests/tc-testing/tdc.py b/tools/testing/selftests/tc-testing/tdc.py index ee22e3447ec7..7bd94f8e490a 100755 --- a/tools/testing/selftests/tc-testing/tdc.py +++ b/tools/testing/selftests/tc-testing/tdc.py @@ -246,6 +246,110 @@ def prepare_env(args, pm, stage, prefix, cmdlist, output = None): stage, output, '"{}" did not complete successfully'.format(prefix)) +def verify_by_json(procout, res, tidx, args, pm): + try: + outputJSON = json.loads(procout) + except json.JSONDecodeError: + res.set_result(ResultState.fail) + res.set_failmsg('Cannot decode verify command\'s output. Is it JSON?') + return res + + matchJSON = json.loads(json.dumps(tidx['matchJSON'])) + + if type(outputJSON) != type(matchJSON): + failmsg = 'Original output and matchJSON value are not the same type: output: {} != matchJSON: {} ' + failmsg = failmsg.format(type(outputJSON).__name__, type(matchJSON).__name__) + res.set_result(ResultState.fail) + res.set_failmsg(failmsg) + return res + + if len(matchJSON) > len(outputJSON): + failmsg = "Your matchJSON value is an array, and it contains more elements than the command under test\'s output:\ncommand output (length: {}):\n{}\nmatchJSON value (length: {}):\n{}" + failmsg = failmsg.format(len(outputJSON), outputJSON, len(matchJSON), matchJSON) + res.set_result(ResultState.fail) + res.set_failmsg(failmsg) + return res + res = find_in_json(res, outputJSON, matchJSON, 0) + + return res + +def find_in_json(res, outputJSONVal, matchJSONVal, matchJSONKey=None): + if res.get_result() == ResultState.fail: + return res + + if type(matchJSONVal) == list: + res = find_in_json_list(res, outputJSONVal, matchJSONVal, matchJSONKey) + + elif type(matchJSONVal) == dict: + res = find_in_json_dict(res, outputJSONVal, matchJSONVal) + else: + res = find_in_json_other(res, outputJSONVal, matchJSONVal, matchJSONKey) + + if res.get_result() != ResultState.fail: + res.set_result(ResultState.success) + return res + + return res + +def find_in_json_list(res, outputJSONVal, matchJSONVal, matchJSONKey=None): + if (type(matchJSONVal) != type(outputJSONVal)): + failmsg = 'Original output and matchJSON value are not the same type: output: {} != matchJSON: {}' + failmsg = failmsg.format(outputJSONVal, matchJSONVal) + res.set_result(ResultState.fail) + res.set_failmsg(failmsg) + return res + + if len(matchJSONVal) > len(outputJSONVal): + failmsg = "Your matchJSON value is an array, and it contains more elements than the command under test\'s output:\ncommand output (length: {}):\n{}\nmatchJSON value (length: {}):\n{}" + failmsg = failmsg.format(len(outputJSONVal), outputJSONVal, len(matchJSONVal), matchJSONVal) + res.set_result(ResultState.fail) + res.set_failmsg(failmsg) + return res + + for matchJSONIdx, matchJSONVal in enumerate(matchJSONVal): + res = find_in_json(res, outputJSONVal[matchJSONIdx], matchJSONVal, + matchJSONKey) + return res + +def find_in_json_dict(res, outputJSONVal, matchJSONVal): + for matchJSONKey, matchJSONVal in matchJSONVal.items(): + if type(outputJSONVal) == dict: + if matchJSONKey not in outputJSONVal: + failmsg = 'Key not found in json output: {}: {}\nMatching against output: {}' + failmsg = failmsg.format(matchJSONKey, matchJSONVal, outputJSONVal) + res.set_result(ResultState.fail) + res.set_failmsg(failmsg) + return res + + else: + failmsg = 'Original output and matchJSON value are not the same type: output: {} != matchJSON: {}' + failmsg = failmsg.format(type(outputJSON).__name__, type(matchJSON).__name__) + res.set_result(ResultState.fail) + res.set_failmsg(failmsg) + return rest + + if type(outputJSONVal) == dict and (type(outputJSONVal[matchJSONKey]) == dict or + type(outputJSONVal[matchJSONKey]) == list): + if len(matchJSONVal) > 0: + res = find_in_json(res, outputJSONVal[matchJSONKey], matchJSONVal, matchJSONKey) + # handling corner case where matchJSONVal == [] or matchJSONVal == {} + else: + res = find_in_json_other(res, outputJSONVal, matchJSONVal, matchJSONKey) + else: + res = find_in_json(res, outputJSONVal, matchJSONVal, matchJSONKey) + return res + +def find_in_json_other(res, outputJSONVal, matchJSONVal, matchJSONKey=None): + if matchJSONKey in outputJSONVal: + if matchJSONVal != outputJSONVal[matchJSONKey]: + failmsg = 'Value doesn\'t match: {}: {} != {}\nMatching against output: {}' + failmsg = failmsg.format(matchJSONKey, matchJSONVal, outputJSONVal[matchJSONKey], outputJSONVal) + res.set_result(ResultState.fail) + res.set_failmsg(failmsg) + return res + + return res + def run_one_test(pm, args, index, tidx): global NAMES result = True @@ -292,16 +396,22 @@ def run_one_test(pm, args, index, tidx): else: if args.verbose > 0: print('-----> verify stage') - match_pattern = re.compile( - str(tidx["matchPattern"]), re.DOTALL | re.MULTILINE) (p, procout) = exec_cmd(args, pm, 'verify', tidx["verifyCmd"]) if procout: - match_index = re.findall(match_pattern, procout) - if len(match_index) != int(tidx["matchCount"]): - res.set_result(ResultState.fail) - res.set_failmsg('Could not match regex pattern. Verify command output:\n{}'.format(procout)) + if 'matchJSON' in tidx: + verify_by_json(procout, res, tidx, args, pm) + elif 'matchPattern' in tidx: + match_pattern = re.compile( + str(tidx["matchPattern"]), re.DOTALL | re.MULTILINE) + match_index = re.findall(match_pattern, procout) + if len(match_index) != int(tidx["matchCount"]): + res.set_result(ResultState.fail) + res.set_failmsg('Could not match regex pattern. Verify command output:\n{}'.format(procout)) + else: + res.set_result(ResultState.success) else: - res.set_result(ResultState.success) + res.set_result(ResultState.fail) + res.set_failmsg('Must specify a match option: matchJSON or matchPattern\n{}'.format(procout)) elif int(tidx["matchCount"]) != 0: res.set_result(ResultState.fail) res.set_failmsg('No output generated by verify command.') @@ -365,6 +475,7 @@ def test_runner(pm, args, filtered_tests): res.set_result(ResultState.skip) res.set_errormsg(errmsg) tsr.add_resultdata(res) + index += 1 continue try: badtest = tidx # in case it goes bad diff --git a/tools/testing/selftests/tdx/Makefile b/tools/testing/selftests/tdx/Makefile new file mode 100644 index 000000000000..306e9c4d5ef7 --- /dev/null +++ b/tools/testing/selftests/tdx/Makefile @@ -0,0 +1,7 @@ +# SPDX-License-Identifier: GPL-2.0 + +CFLAGS += -O3 -Wl,-no-as-needed -Wall $(KHDR_INCLUDES) -static + +TEST_GEN_PROGS := tdx_guest_test + +include ../lib.mk diff --git a/tools/testing/selftests/tdx/config b/tools/testing/selftests/tdx/config new file mode 100644 index 000000000000..aa1edc829ab6 --- /dev/null +++ b/tools/testing/selftests/tdx/config @@ -0,0 +1 @@ +CONFIG_TDX_GUEST_DRIVER=y diff --git a/tools/testing/selftests/tdx/tdx_guest_test.c b/tools/testing/selftests/tdx/tdx_guest_test.c new file mode 100644 index 000000000000..81d8cb88ea1a --- /dev/null +++ b/tools/testing/selftests/tdx/tdx_guest_test.c @@ -0,0 +1,163 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Test TDX guest features + * + * Copyright (C) 2022 Intel Corporation. + * + * Author: Kuppuswamy Sathyanarayanan <sathyanarayanan.kuppuswamy@linux.intel.com> + */ + +#include <sys/ioctl.h> + +#include <errno.h> +#include <fcntl.h> + +#include <linux/tdx-guest.h> +#include "../kselftest_harness.h" + +#define TDX_GUEST_DEVNAME "/dev/tdx_guest" +#define HEX_DUMP_SIZE 8 +#define DEBUG 0 + +/** + * struct tdreport_type - Type header of TDREPORT_STRUCT. + * @type: Type of the TDREPORT (0 - SGX, 81 - TDX, rest are reserved) + * @sub_type: Subtype of the TDREPORT (Default value is 0). + * @version: TDREPORT version (Default value is 0). + * @reserved: Added for future extension. + * + * More details can be found in TDX v1.0 module specification, sec + * titled "REPORTTYPE". + */ +struct tdreport_type { + __u8 type; + __u8 sub_type; + __u8 version; + __u8 reserved; +}; + +/** + * struct reportmac - TDX guest report data, MAC and TEE hashes. + * @type: TDREPORT type header. + * @reserved1: Reserved for future extension. + * @cpu_svn: CPU security version. + * @tee_tcb_info_hash: SHA384 hash of TEE TCB INFO. + * @tee_td_info_hash: SHA384 hash of TDINFO_STRUCT. + * @reportdata: User defined unique data passed in TDG.MR.REPORT request. + * @reserved2: Reserved for future extension. + * @mac: CPU MAC ID. + * + * It is MAC-protected and contains hashes of the remainder of the + * report structure along with user provided report data. More details can + * be found in TDX v1.0 Module specification, sec titled "REPORTMACSTRUCT" + */ +struct reportmac { + struct tdreport_type type; + __u8 reserved1[12]; + __u8 cpu_svn[16]; + __u8 tee_tcb_info_hash[48]; + __u8 tee_td_info_hash[48]; + __u8 reportdata[64]; + __u8 reserved2[32]; + __u8 mac[32]; +}; + +/** + * struct td_info - TDX guest measurements and configuration. + * @attr: TDX Guest attributes (like debug, spet_disable, etc). + * @xfam: Extended features allowed mask. + * @mrtd: Build time measurement register. + * @mrconfigid: Software-defined ID for non-owner-defined configuration + * of the guest - e.g., run-time or OS configuration. + * @mrowner: Software-defined ID for the guest owner. + * @mrownerconfig: Software-defined ID for owner-defined configuration of + * the guest - e.g., specific to the workload. + * @rtmr: Run time measurement registers. + * @reserved: Added for future extension. + * + * It contains the measurements and initial configuration of the TDX guest + * that was locked at initialization and a set of measurement registers + * that are run-time extendable. More details can be found in TDX v1.0 + * Module specification, sec titled "TDINFO_STRUCT". + */ +struct td_info { + __u8 attr[8]; + __u64 xfam; + __u64 mrtd[6]; + __u64 mrconfigid[6]; + __u64 mrowner[6]; + __u64 mrownerconfig[6]; + __u64 rtmr[24]; + __u64 reserved[14]; +}; + +/* + * struct tdreport - Output of TDCALL[TDG.MR.REPORT]. + * @reportmac: Mac protected header of size 256 bytes. + * @tee_tcb_info: Additional attestable elements in the TCB are not + * reflected in the reportmac. + * @reserved: Added for future extension. + * @tdinfo: Measurements and configuration data of size 512 bytes. + * + * More details can be found in TDX v1.0 Module specification, sec + * titled "TDREPORT_STRUCT". + */ +struct tdreport { + struct reportmac reportmac; + __u8 tee_tcb_info[239]; + __u8 reserved[17]; + struct td_info tdinfo; +}; + +static void print_array_hex(const char *title, const char *prefix_str, + const void *buf, int len) +{ + int i, j, line_len, rowsize = HEX_DUMP_SIZE; + const __u8 *ptr = buf; + + printf("\t\t%s", title); + + for (j = 0; j < len; j += rowsize) { + line_len = rowsize < (len - j) ? rowsize : (len - j); + printf("%s%.8x:", prefix_str, j); + for (i = 0; i < line_len; i++) + printf(" %.2x", ptr[j + i]); + printf("\n"); + } + + printf("\n"); +} + +TEST(verify_report) +{ + struct tdx_report_req req; + struct tdreport *tdreport; + int devfd, i; + + devfd = open(TDX_GUEST_DEVNAME, O_RDWR | O_SYNC); + ASSERT_LT(0, devfd); + + /* Generate sample report data */ + for (i = 0; i < TDX_REPORTDATA_LEN; i++) + req.reportdata[i] = i; + + /* Get TDREPORT */ + ASSERT_EQ(0, ioctl(devfd, TDX_CMD_GET_REPORT0, &req)); + + if (DEBUG) { + print_array_hex("\n\t\tTDX report data\n", "", + req.reportdata, sizeof(req.reportdata)); + + print_array_hex("\n\t\tTDX tdreport data\n", "", + req.tdreport, sizeof(req.tdreport)); + } + + /* Make sure TDREPORT data includes the REPORTDATA passed */ + tdreport = (struct tdreport *)req.tdreport; + ASSERT_EQ(0, memcmp(&tdreport->reportmac.reportdata[0], + req.reportdata, sizeof(req.reportdata))); + + ASSERT_EQ(0, close(devfd)); +} + +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/timens/.gitignore b/tools/testing/selftests/timens/.gitignore index fe1eb8271b35..cae8dca0fbff 100644 --- a/tools/testing/selftests/timens/.gitignore +++ b/tools/testing/selftests/timens/.gitignore @@ -8,3 +8,4 @@ procfs timens timer timerfd +vfork_exec diff --git a/tools/testing/selftests/timens/Makefile b/tools/testing/selftests/timens/Makefile index 3a5936cc10ab..f0d51d4d2c87 100644 --- a/tools/testing/selftests/timens/Makefile +++ b/tools/testing/selftests/timens/Makefile @@ -1,4 +1,4 @@ -TEST_GEN_PROGS := timens timerfd timer clock_nanosleep procfs exec futex +TEST_GEN_PROGS := timens timerfd timer clock_nanosleep procfs exec futex vfork_exec TEST_GEN_PROGS_EXTENDED := gettime_perf CFLAGS := -Wall -Werror -pthread diff --git a/tools/testing/selftests/timens/vfork_exec.c b/tools/testing/selftests/timens/vfork_exec.c new file mode 100644 index 000000000000..beb7614941fb --- /dev/null +++ b/tools/testing/selftests/timens/vfork_exec.c @@ -0,0 +1,139 @@ +// SPDX-License-Identifier: GPL-2.0 +#define _GNU_SOURCE +#include <errno.h> +#include <fcntl.h> +#include <sched.h> +#include <stdio.h> +#include <stdbool.h> +#include <sys/stat.h> +#include <sys/syscall.h> +#include <sys/types.h> +#include <sys/wait.h> +#include <time.h> +#include <unistd.h> +#include <string.h> +#include <pthread.h> + +#include "log.h" +#include "timens.h" + +#define OFFSET (36000) + +struct thread_args { + char *tst_name; + struct timespec *now; +}; + +static void *tcheck(void *_args) +{ + struct thread_args *args = _args; + struct timespec *now = args->now, tst; + int i; + + for (i = 0; i < 2; i++) { + _gettime(CLOCK_MONOTONIC, &tst, i); + if (abs(tst.tv_sec - now->tv_sec) > 5) { + pr_fail("%s: in-thread: unexpected value: %ld (%ld)\n", + args->tst_name, tst.tv_sec, now->tv_sec); + return (void *)1UL; + } + } + return NULL; +} + +static int check_in_thread(char *tst_name, struct timespec *now) +{ + struct thread_args args = { + .tst_name = tst_name, + .now = now, + }; + pthread_t th; + void *retval; + + if (pthread_create(&th, NULL, tcheck, &args)) + return pr_perror("thread"); + if (pthread_join(th, &retval)) + return pr_perror("pthread_join"); + return !(retval == NULL); +} + +static int check(char *tst_name, struct timespec *now) +{ + struct timespec tst; + int i; + + for (i = 0; i < 2; i++) { + _gettime(CLOCK_MONOTONIC, &tst, i); + if (abs(tst.tv_sec - now->tv_sec) > 5) + return pr_fail("%s: unexpected value: %ld (%ld)\n", + tst_name, tst.tv_sec, now->tv_sec); + } + if (check_in_thread(tst_name, now)) + return 1; + ksft_test_result_pass("%s\n", tst_name); + return 0; +} + +int main(int argc, char *argv[]) +{ + struct timespec now; + int status; + pid_t pid; + + if (argc > 1) { + char *endptr; + + ksft_cnt.ksft_pass = 1; + now.tv_sec = strtoul(argv[1], &endptr, 0); + if (*endptr != 0) + return pr_perror("strtoul"); + + return check("child after exec", &now); + } + + nscheck(); + + ksft_set_plan(4); + + clock_gettime(CLOCK_MONOTONIC, &now); + + if (unshare_timens()) + return 1; + + if (_settime(CLOCK_MONOTONIC, OFFSET)) + return 1; + + if (check("parent before vfork", &now)) + return 1; + + pid = vfork(); + if (pid < 0) + return pr_perror("fork"); + + if (pid == 0) { + char now_str[64]; + char *cargv[] = {"exec", now_str, NULL}; + char *cenv[] = {NULL}; + + /* Check for proper vvar offsets after execve. */ + snprintf(now_str, sizeof(now_str), "%ld", now.tv_sec + OFFSET); + execve("/proc/self/exe", cargv, cenv); + pr_perror("execve"); + _exit(1); + } + + if (waitpid(pid, &status, 0) != pid) + return pr_perror("waitpid"); + + if (status) + ksft_exit_fail(); + ksft_inc_pass_cnt(); + ksft_test_result_pass("wait for child\n"); + + /* Check that we are still in the source timens. */ + if (check("parent after vfork", &now)) + return 1; + + ksft_exit_pass(); + return 0; +} diff --git a/tools/testing/selftests/tpm2/Makefile b/tools/testing/selftests/tpm2/Makefile index 1a5db1eb8ed5..a9bf9459fb25 100644 --- a/tools/testing/selftests/tpm2/Makefile +++ b/tools/testing/selftests/tpm2/Makefile @@ -1,5 +1,5 @@ # SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause) include ../lib.mk -TEST_PROGS := test_smoke.sh test_space.sh +TEST_PROGS := test_smoke.sh test_space.sh test_async.sh TEST_PROGS_EXTENDED := tpm2.py tpm2_tests.py diff --git a/tools/testing/selftests/tpm2/test_async.sh b/tools/testing/selftests/tpm2/test_async.sh new file mode 100755 index 000000000000..43bf5bd772fd --- /dev/null +++ b/tools/testing/selftests/tpm2/test_async.sh @@ -0,0 +1,10 @@ +#!/bin/sh +# SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause) + +# Kselftest framework requirement - SKIP code is 4. +ksft_skip=4 + +[ -e /dev/tpm0 ] || exit $ksft_skip +[ -e /dev/tpmrm0 ] || exit $ksft_skip + +python3 -m unittest -v tpm2_tests.AsyncTest diff --git a/tools/testing/selftests/tpm2/test_smoke.sh b/tools/testing/selftests/tpm2/test_smoke.sh index 3e5ff29ee1dd..58af963e5b55 100755 --- a/tools/testing/selftests/tpm2/test_smoke.sh +++ b/tools/testing/selftests/tpm2/test_smoke.sh @@ -7,4 +7,3 @@ ksft_skip=4 [ -e /dev/tpm0 ] || exit $ksft_skip python3 -m unittest -v tpm2_tests.SmokeTest -python3 -m unittest -v tpm2_tests.AsyncTest diff --git a/tools/testing/selftests/tpm2/tpm2.py b/tools/testing/selftests/tpm2/tpm2.py index c7363c6764fc..bba8cb54548e 100644 --- a/tools/testing/selftests/tpm2/tpm2.py +++ b/tools/testing/selftests/tpm2/tpm2.py @@ -344,7 +344,7 @@ def get_algorithm(name): def hex_dump(d): - d = [format(ord(x), '02x') for x in d] + d = [format(x, '02x') for x in d] d = [d[i: i + 16] for i in range(0, len(d), 16)] d = [' '.join(x) for x in d] d = os.linesep.join(d) diff --git a/tools/testing/selftests/user_events/Makefile b/tools/testing/selftests/user_events/Makefile index c765d8635d9a..6b512b86aec3 100644 --- a/tools/testing/selftests/user_events/Makefile +++ b/tools/testing/selftests/user_events/Makefile @@ -1,7 +1,15 @@ # SPDX-License-Identifier: GPL-2.0 -CFLAGS += -Wl,-no-as-needed -Wall -I../../../../usr/include +CFLAGS += -Wl,-no-as-needed -Wall $(KHDR_INCLUDES) LDLIBS += -lrt -lpthread -lm +# Note: +# This test depends on <linux/user_events.h> exported in uapi +# The following commit removed user_events.h out of uapi: +# commit 5cfff569cab8bf544bab62c911c5d6efd5af5e05 +# tracing: Move user_events.h temporarily out of include/uapi +# This test will not compile until user_events.h is added +# back to uapi. + TEST_GEN_PROGS = ftrace_test dyn_test perf_test TEST_FILES := settings diff --git a/tools/testing/selftests/vDSO/vdso_test_getcpu.c b/tools/testing/selftests/vDSO/vdso_test_getcpu.c index fc25ede131b8..1df5d057d79f 100644 --- a/tools/testing/selftests/vDSO/vdso_test_getcpu.c +++ b/tools/testing/selftests/vDSO/vdso_test_getcpu.c @@ -14,7 +14,11 @@ #include "../kselftest.h" #include "parse_vdso.h" +#if defined(__riscv) +const char *version = "LINUX_4.15"; +#else const char *version = "LINUX_2.6"; +#endif const char *name = "__vdso_getcpu"; struct getcpu_cache; diff --git a/tools/testing/selftests/vDSO/vdso_test_gettimeofday.c b/tools/testing/selftests/vDSO/vdso_test_gettimeofday.c index 8ccc73ed8240..e411f287a426 100644 --- a/tools/testing/selftests/vDSO/vdso_test_gettimeofday.c +++ b/tools/testing/selftests/vDSO/vdso_test_gettimeofday.c @@ -27,6 +27,9 @@ #if defined(__aarch64__) const char *version = "LINUX_2.6.39"; const char *name = "__kernel_gettimeofday"; +#elif defined(__riscv) +const char *version = "LINUX_4.15"; +const char *name = "__vdso_gettimeofday"; #else const char *version = "LINUX_2.6"; const char *name = "__vdso_gettimeofday"; diff --git a/tools/testing/selftests/vm/.gitignore b/tools/testing/selftests/vm/.gitignore index 7b9dc2426f18..1f8c36a9fa10 100644 --- a/tools/testing/selftests/vm/.gitignore +++ b/tools/testing/selftests/vm/.gitignore @@ -1,4 +1,5 @@ # SPDX-License-Identifier: GPL-2.0-only +cow hugepage-mmap hugepage-mremap hugepage-shm @@ -33,3 +34,5 @@ memfd_secret soft-dirty split_huge_page_test ksm_tests +local_config.h +local_config.mk diff --git a/tools/testing/selftests/vm/Makefile b/tools/testing/selftests/vm/Makefile index 163c2fde3cb3..ac9366065fd2 100644 --- a/tools/testing/selftests/vm/Makefile +++ b/tools/testing/selftests/vm/Makefile @@ -1,7 +1,9 @@ # SPDX-License-Identifier: GPL-2.0 # Makefile for vm selftests -LOCAL_HDRS += $(top_srcdir)/mm/gup_test.h +LOCAL_HDRS += $(selfdir)/vm/local_config.h $(top_srcdir)/mm/gup_test.h + +include local_config.mk uname_M := $(shell uname -m 2>/dev/null || echo not) MACHINE ?= $(shell echo $(uname_M) | sed -e 's/aarch64.*/arm64/' -e 's/ppc64.*/ppc64/') @@ -23,9 +25,10 @@ MACHINE ?= $(shell echo $(uname_M) | sed -e 's/aarch64.*/arm64/' -e 's/ppc64.*/p # LDLIBS. MAKEFLAGS += --no-builtin-rules -CFLAGS = -Wall -I $(top_srcdir) -I $(top_srcdir)/usr/include $(EXTRA_CFLAGS) $(KHDR_INCLUDES) +CFLAGS = -Wall -I $(top_srcdir) $(EXTRA_CFLAGS) $(KHDR_INCLUDES) LDLIBS = -lrt -lpthread -TEST_GEN_FILES = compaction_test +TEST_GEN_FILES = cow +TEST_GEN_FILES += compaction_test TEST_GEN_FILES += gup_test TEST_GEN_FILES += hmm-tests TEST_GEN_FILES += hugetlb-madvise @@ -52,6 +55,7 @@ TEST_GEN_FILES += userfaultfd TEST_GEN_PROGS += soft-dirty TEST_GEN_PROGS += split_huge_page_test TEST_GEN_FILES += ksm_tests +TEST_GEN_PROGS += ksm_functional_tests ifeq ($(MACHINE),x86_64) CAN_BUILD_I386 := $(shell ./../x86/check_cc.sh "$(CC)" ../x86/trivial_32bit_program.c -m32) @@ -95,7 +99,9 @@ TEST_FILES += va_128TBswitch.sh include ../lib.mk +$(OUTPUT)/cow: vm_util.c $(OUTPUT)/khugepaged: vm_util.c +$(OUTPUT)/ksm_functional_tests: vm_util.c $(OUTPUT)/madv_populate: vm_util.c $(OUTPUT)/soft-dirty: vm_util.c $(OUTPUT)/split_huge_page_test: vm_util.c @@ -150,8 +156,25 @@ warn_32bit_failure: endif endif +# cow_EXTRA_LIBS may get set in local_config.mk, or it may be left empty. +$(OUTPUT)/cow: LDLIBS += $(COW_EXTRA_LIBS) + $(OUTPUT)/mlock-random-test $(OUTPUT)/memfd_secret: LDLIBS += -lcap $(OUTPUT)/ksm_tests: LDLIBS += -lnuma $(OUTPUT)/migration: LDLIBS += -lnuma + +local_config.mk local_config.h: check_config.sh + /bin/sh ./check_config.sh $(CC) + +EXTRA_CLEAN += local_config.mk local_config.h + +ifeq ($(COW_EXTRA_LIBS),) +all: warn_missing_liburing + +warn_missing_liburing: + @echo ; \ + echo "Warning: missing liburing support. Some COW tests will be skipped." ; \ + echo +endif diff --git a/tools/testing/selftests/vm/check_config.sh b/tools/testing/selftests/vm/check_config.sh new file mode 100644 index 000000000000..bcba3af0acea --- /dev/null +++ b/tools/testing/selftests/vm/check_config.sh @@ -0,0 +1,31 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 +# +# Probe for libraries and create header files to record the results. Both C +# header files and Makefile include fragments are created. + +OUTPUT_H_FILE=local_config.h +OUTPUT_MKFILE=local_config.mk + +tmpname=$(mktemp) +tmpfile_c=${tmpname}.c +tmpfile_o=${tmpname}.o + +# liburing +echo "#include <sys/types.h>" > $tmpfile_c +echo "#include <liburing.h>" >> $tmpfile_c +echo "int func(void) { return 0; }" >> $tmpfile_c + +CC=${1:?"Usage: $0 <compiler> # example compiler: gcc"} +$CC -c $tmpfile_c -o $tmpfile_o >/dev/null 2>&1 + +if [ -f $tmpfile_o ]; then + echo "#define LOCAL_CONFIG_HAVE_LIBURING 1" > $OUTPUT_H_FILE + echo "COW_EXTRA_LIBS = -luring" > $OUTPUT_MKFILE +else + echo "// No liburing support found" > $OUTPUT_H_FILE + echo "# No liburing support found, so:" > $OUTPUT_MKFILE + echo "COW_EXTRA_LIBS = " >> $OUTPUT_MKFILE +fi + +rm ${tmpname}.* diff --git a/tools/testing/selftests/vm/cow.c b/tools/testing/selftests/vm/cow.c new file mode 100644 index 000000000000..26f6ea3079e2 --- /dev/null +++ b/tools/testing/selftests/vm/cow.c @@ -0,0 +1,1536 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * COW (Copy On Write) tests. + * + * Copyright 2022, Red Hat, Inc. + * + * Author(s): David Hildenbrand <david@redhat.com> + */ +#define _GNU_SOURCE +#include <stdlib.h> +#include <string.h> +#include <stdbool.h> +#include <stdint.h> +#include <unistd.h> +#include <errno.h> +#include <fcntl.h> +#include <dirent.h> +#include <assert.h> +#include <sys/mman.h> +#include <sys/ioctl.h> +#include <sys/wait.h> +#include <linux/memfd.h> + +#include "local_config.h" +#ifdef LOCAL_CONFIG_HAVE_LIBURING +#include <liburing.h> +#endif /* LOCAL_CONFIG_HAVE_LIBURING */ + +#include "../../../../mm/gup_test.h" +#include "../kselftest.h" +#include "vm_util.h" + +static size_t pagesize; +static int pagemap_fd; +static size_t thpsize; +static int nr_hugetlbsizes; +static size_t hugetlbsizes[10]; +static int gup_fd; +static bool has_huge_zeropage; + +static void detect_thpsize(void) +{ + int fd = open("/sys/kernel/mm/transparent_hugepage/hpage_pmd_size", + O_RDONLY); + size_t size = 0; + char buf[15]; + int ret; + + if (fd < 0) + return; + + ret = pread(fd, buf, sizeof(buf), 0); + if (ret > 0 && ret < sizeof(buf)) { + buf[ret] = 0; + + size = strtoul(buf, NULL, 10); + if (size < pagesize) + size = 0; + if (size > 0) { + thpsize = size; + ksft_print_msg("[INFO] detected THP size: %zu KiB\n", + thpsize / 1024); + } + } + + close(fd); +} + +static void detect_huge_zeropage(void) +{ + int fd = open("/sys/kernel/mm/transparent_hugepage/use_zero_page", + O_RDONLY); + size_t enabled = 0; + char buf[15]; + int ret; + + if (fd < 0) + return; + + ret = pread(fd, buf, sizeof(buf), 0); + if (ret > 0 && ret < sizeof(buf)) { + buf[ret] = 0; + + enabled = strtoul(buf, NULL, 10); + if (enabled == 1) { + has_huge_zeropage = true; + ksft_print_msg("[INFO] huge zeropage is enabled\n"); + } + } + + close(fd); +} + +static void detect_hugetlbsizes(void) +{ + DIR *dir = opendir("/sys/kernel/mm/hugepages/"); + + if (!dir) + return; + + while (nr_hugetlbsizes < ARRAY_SIZE(hugetlbsizes)) { + struct dirent *entry = readdir(dir); + size_t kb; + + if (!entry) + break; + if (entry->d_type != DT_DIR) + continue; + if (sscanf(entry->d_name, "hugepages-%zukB", &kb) != 1) + continue; + hugetlbsizes[nr_hugetlbsizes] = kb * 1024; + nr_hugetlbsizes++; + ksft_print_msg("[INFO] detected hugetlb size: %zu KiB\n", + kb); + } + closedir(dir); +} + +static bool range_is_swapped(void *addr, size_t size) +{ + for (; size; addr += pagesize, size -= pagesize) + if (!pagemap_is_swapped(pagemap_fd, addr)) + return false; + return true; +} + +struct comm_pipes { + int child_ready[2]; + int parent_ready[2]; +}; + +static int setup_comm_pipes(struct comm_pipes *comm_pipes) +{ + if (pipe(comm_pipes->child_ready) < 0) + return -errno; + if (pipe(comm_pipes->parent_ready) < 0) { + close(comm_pipes->child_ready[0]); + close(comm_pipes->child_ready[1]); + return -errno; + } + + return 0; +} + +static void close_comm_pipes(struct comm_pipes *comm_pipes) +{ + close(comm_pipes->child_ready[0]); + close(comm_pipes->child_ready[1]); + close(comm_pipes->parent_ready[0]); + close(comm_pipes->parent_ready[1]); +} + +static int child_memcmp_fn(char *mem, size_t size, + struct comm_pipes *comm_pipes) +{ + char *old = malloc(size); + char buf; + + /* Backup the original content. */ + memcpy(old, mem, size); + + /* Wait until the parent modified the page. */ + write(comm_pipes->child_ready[1], "0", 1); + while (read(comm_pipes->parent_ready[0], &buf, 1) != 1) + ; + + /* See if we still read the old values. */ + return memcmp(old, mem, size); +} + +static int child_vmsplice_memcmp_fn(char *mem, size_t size, + struct comm_pipes *comm_pipes) +{ + struct iovec iov = { + .iov_base = mem, + .iov_len = size, + }; + ssize_t cur, total, transferred; + char *old, *new; + int fds[2]; + char buf; + + old = malloc(size); + new = malloc(size); + + /* Backup the original content. */ + memcpy(old, mem, size); + + if (pipe(fds) < 0) + return -errno; + + /* Trigger a read-only pin. */ + transferred = vmsplice(fds[1], &iov, 1, 0); + if (transferred < 0) + return -errno; + if (transferred == 0) + return -EINVAL; + + /* Unmap it from our page tables. */ + if (munmap(mem, size) < 0) + return -errno; + + /* Wait until the parent modified it. */ + write(comm_pipes->child_ready[1], "0", 1); + while (read(comm_pipes->parent_ready[0], &buf, 1) != 1) + ; + + /* See if we still read the old values via the pipe. */ + for (total = 0; total < transferred; total += cur) { + cur = read(fds[0], new + total, transferred - total); + if (cur < 0) + return -errno; + } + + return memcmp(old, new, transferred); +} + +typedef int (*child_fn)(char *mem, size_t size, struct comm_pipes *comm_pipes); + +static void do_test_cow_in_parent(char *mem, size_t size, bool do_mprotect, + child_fn fn) +{ + struct comm_pipes comm_pipes; + char buf; + int ret; + + ret = setup_comm_pipes(&comm_pipes); + if (ret) { + ksft_test_result_fail("pipe() failed\n"); + return; + } + + ret = fork(); + if (ret < 0) { + ksft_test_result_fail("fork() failed\n"); + goto close_comm_pipes; + } else if (!ret) { + exit(fn(mem, size, &comm_pipes)); + } + + while (read(comm_pipes.child_ready[0], &buf, 1) != 1) + ; + + if (do_mprotect) { + /* + * mprotect() optimizations might try avoiding + * write-faults by directly mapping pages writable. + */ + ret = mprotect(mem, size, PROT_READ); + ret |= mprotect(mem, size, PROT_READ|PROT_WRITE); + if (ret) { + ksft_test_result_fail("mprotect() failed\n"); + write(comm_pipes.parent_ready[1], "0", 1); + wait(&ret); + goto close_comm_pipes; + } + } + + /* Modify the page. */ + memset(mem, 0xff, size); + write(comm_pipes.parent_ready[1], "0", 1); + + wait(&ret); + if (WIFEXITED(ret)) + ret = WEXITSTATUS(ret); + else + ret = -EINVAL; + + ksft_test_result(!ret, "No leak from parent into child\n"); +close_comm_pipes: + close_comm_pipes(&comm_pipes); +} + +static void test_cow_in_parent(char *mem, size_t size) +{ + do_test_cow_in_parent(mem, size, false, child_memcmp_fn); +} + +static void test_cow_in_parent_mprotect(char *mem, size_t size) +{ + do_test_cow_in_parent(mem, size, true, child_memcmp_fn); +} + +static void test_vmsplice_in_child(char *mem, size_t size) +{ + do_test_cow_in_parent(mem, size, false, child_vmsplice_memcmp_fn); +} + +static void test_vmsplice_in_child_mprotect(char *mem, size_t size) +{ + do_test_cow_in_parent(mem, size, true, child_vmsplice_memcmp_fn); +} + +static void do_test_vmsplice_in_parent(char *mem, size_t size, + bool before_fork) +{ + struct iovec iov = { + .iov_base = mem, + .iov_len = size, + }; + ssize_t cur, total, transferred; + struct comm_pipes comm_pipes; + char *old, *new; + int ret, fds[2]; + char buf; + + old = malloc(size); + new = malloc(size); + + memcpy(old, mem, size); + + ret = setup_comm_pipes(&comm_pipes); + if (ret) { + ksft_test_result_fail("pipe() failed\n"); + goto free; + } + + if (pipe(fds) < 0) { + ksft_test_result_fail("pipe() failed\n"); + goto close_comm_pipes; + } + + if (before_fork) { + transferred = vmsplice(fds[1], &iov, 1, 0); + if (transferred <= 0) { + ksft_test_result_fail("vmsplice() failed\n"); + goto close_pipe; + } + } + + ret = fork(); + if (ret < 0) { + ksft_test_result_fail("fork() failed\n"); + goto close_pipe; + } else if (!ret) { + write(comm_pipes.child_ready[1], "0", 1); + while (read(comm_pipes.parent_ready[0], &buf, 1) != 1) + ; + /* Modify page content in the child. */ + memset(mem, 0xff, size); + exit(0); + } + + if (!before_fork) { + transferred = vmsplice(fds[1], &iov, 1, 0); + if (transferred <= 0) { + ksft_test_result_fail("vmsplice() failed\n"); + wait(&ret); + goto close_pipe; + } + } + + while (read(comm_pipes.child_ready[0], &buf, 1) != 1) + ; + if (munmap(mem, size) < 0) { + ksft_test_result_fail("munmap() failed\n"); + goto close_pipe; + } + write(comm_pipes.parent_ready[1], "0", 1); + + /* Wait until the child is done writing. */ + wait(&ret); + if (!WIFEXITED(ret)) { + ksft_test_result_fail("wait() failed\n"); + goto close_pipe; + } + + /* See if we still read the old values. */ + for (total = 0; total < transferred; total += cur) { + cur = read(fds[0], new + total, transferred - total); + if (cur < 0) { + ksft_test_result_fail("read() failed\n"); + goto close_pipe; + } + } + + ksft_test_result(!memcmp(old, new, transferred), + "No leak from child into parent\n"); +close_pipe: + close(fds[0]); + close(fds[1]); +close_comm_pipes: + close_comm_pipes(&comm_pipes); +free: + free(old); + free(new); +} + +static void test_vmsplice_before_fork(char *mem, size_t size) +{ + do_test_vmsplice_in_parent(mem, size, true); +} + +static void test_vmsplice_after_fork(char *mem, size_t size) +{ + do_test_vmsplice_in_parent(mem, size, false); +} + +#ifdef LOCAL_CONFIG_HAVE_LIBURING +static void do_test_iouring(char *mem, size_t size, bool use_fork) +{ + struct comm_pipes comm_pipes; + struct io_uring_cqe *cqe; + struct io_uring_sqe *sqe; + struct io_uring ring; + ssize_t cur, total; + struct iovec iov; + char *buf, *tmp; + int ret, fd; + FILE *file; + + ret = setup_comm_pipes(&comm_pipes); + if (ret) { + ksft_test_result_fail("pipe() failed\n"); + return; + } + + file = tmpfile(); + if (!file) { + ksft_test_result_fail("tmpfile() failed\n"); + goto close_comm_pipes; + } + fd = fileno(file); + assert(fd); + + tmp = malloc(size); + if (!tmp) { + ksft_test_result_fail("malloc() failed\n"); + goto close_file; + } + + /* Skip on errors, as we might just lack kernel support. */ + ret = io_uring_queue_init(1, &ring, 0); + if (ret < 0) { + ksft_test_result_skip("io_uring_queue_init() failed\n"); + goto free_tmp; + } + + /* + * Register the range as a fixed buffer. This will FOLL_WRITE | FOLL_PIN + * | FOLL_LONGTERM the range. + * + * Skip on errors, as we might just lack kernel support or might not + * have sufficient MEMLOCK permissions. + */ + iov.iov_base = mem; + iov.iov_len = size; + ret = io_uring_register_buffers(&ring, &iov, 1); + if (ret) { + ksft_test_result_skip("io_uring_register_buffers() failed\n"); + goto queue_exit; + } + + if (use_fork) { + /* + * fork() and keep the child alive until we're done. Note that + * we expect the pinned page to not get shared with the child. + */ + ret = fork(); + if (ret < 0) { + ksft_test_result_fail("fork() failed\n"); + goto unregister_buffers; + } else if (!ret) { + write(comm_pipes.child_ready[1], "0", 1); + while (read(comm_pipes.parent_ready[0], &buf, 1) != 1) + ; + exit(0); + } + + while (read(comm_pipes.child_ready[0], &buf, 1) != 1) + ; + } else { + /* + * Map the page R/O into the page table. Enable softdirty + * tracking to stop the page from getting mapped R/W immediately + * again by mprotect() optimizations. Note that we don't have an + * easy way to test if that worked (the pagemap does not export + * if the page is mapped R/O vs. R/W). + */ + ret = mprotect(mem, size, PROT_READ); + clear_softdirty(); + ret |= mprotect(mem, size, PROT_READ | PROT_WRITE); + if (ret) { + ksft_test_result_fail("mprotect() failed\n"); + goto unregister_buffers; + } + } + + /* + * Modify the page and write page content as observed by the fixed + * buffer pin to the file so we can verify it. + */ + memset(mem, 0xff, size); + sqe = io_uring_get_sqe(&ring); + if (!sqe) { + ksft_test_result_fail("io_uring_get_sqe() failed\n"); + goto quit_child; + } + io_uring_prep_write_fixed(sqe, fd, mem, size, 0, 0); + + ret = io_uring_submit(&ring); + if (ret < 0) { + ksft_test_result_fail("io_uring_submit() failed\n"); + goto quit_child; + } + + ret = io_uring_wait_cqe(&ring, &cqe); + if (ret < 0) { + ksft_test_result_fail("io_uring_wait_cqe() failed\n"); + goto quit_child; + } + + if (cqe->res != size) { + ksft_test_result_fail("write_fixed failed\n"); + goto quit_child; + } + io_uring_cqe_seen(&ring, cqe); + + /* Read back the file content to the temporary buffer. */ + total = 0; + while (total < size) { + cur = pread(fd, tmp + total, size - total, total); + if (cur < 0) { + ksft_test_result_fail("pread() failed\n"); + goto quit_child; + } + total += cur; + } + + /* Finally, check if we read what we expected. */ + ksft_test_result(!memcmp(mem, tmp, size), + "Longterm R/W pin is reliable\n"); + +quit_child: + if (use_fork) { + write(comm_pipes.parent_ready[1], "0", 1); + wait(&ret); + } +unregister_buffers: + io_uring_unregister_buffers(&ring); +queue_exit: + io_uring_queue_exit(&ring); +free_tmp: + free(tmp); +close_file: + fclose(file); +close_comm_pipes: + close_comm_pipes(&comm_pipes); +} + +static void test_iouring_ro(char *mem, size_t size) +{ + do_test_iouring(mem, size, false); +} + +static void test_iouring_fork(char *mem, size_t size) +{ + do_test_iouring(mem, size, true); +} + +#endif /* LOCAL_CONFIG_HAVE_LIBURING */ + +enum ro_pin_test { + RO_PIN_TEST, + RO_PIN_TEST_SHARED, + RO_PIN_TEST_PREVIOUSLY_SHARED, + RO_PIN_TEST_RO_EXCLUSIVE, +}; + +static void do_test_ro_pin(char *mem, size_t size, enum ro_pin_test test, + bool fast) +{ + struct pin_longterm_test args; + struct comm_pipes comm_pipes; + char *tmp, buf; + __u64 tmp_val; + int ret; + + if (gup_fd < 0) { + ksft_test_result_skip("gup_test not available\n"); + return; + } + + tmp = malloc(size); + if (!tmp) { + ksft_test_result_fail("malloc() failed\n"); + return; + } + + ret = setup_comm_pipes(&comm_pipes); + if (ret) { + ksft_test_result_fail("pipe() failed\n"); + goto free_tmp; + } + + switch (test) { + case RO_PIN_TEST: + break; + case RO_PIN_TEST_SHARED: + case RO_PIN_TEST_PREVIOUSLY_SHARED: + /* + * Share the pages with our child. As the pages are not pinned, + * this should just work. + */ + ret = fork(); + if (ret < 0) { + ksft_test_result_fail("fork() failed\n"); + goto close_comm_pipes; + } else if (!ret) { + write(comm_pipes.child_ready[1], "0", 1); + while (read(comm_pipes.parent_ready[0], &buf, 1) != 1) + ; + exit(0); + } + + /* Wait until our child is ready. */ + while (read(comm_pipes.child_ready[0], &buf, 1) != 1) + ; + + if (test == RO_PIN_TEST_PREVIOUSLY_SHARED) { + /* + * Tell the child to quit now and wait until it quit. + * The pages should now be mapped R/O into our page + * tables, but they are no longer shared. + */ + write(comm_pipes.parent_ready[1], "0", 1); + wait(&ret); + if (!WIFEXITED(ret)) + ksft_print_msg("[INFO] wait() failed\n"); + } + break; + case RO_PIN_TEST_RO_EXCLUSIVE: + /* + * Map the page R/O into the page table. Enable softdirty + * tracking to stop the page from getting mapped R/W immediately + * again by mprotect() optimizations. Note that we don't have an + * easy way to test if that worked (the pagemap does not export + * if the page is mapped R/O vs. R/W). + */ + ret = mprotect(mem, size, PROT_READ); + clear_softdirty(); + ret |= mprotect(mem, size, PROT_READ | PROT_WRITE); + if (ret) { + ksft_test_result_fail("mprotect() failed\n"); + goto close_comm_pipes; + } + break; + default: + assert(false); + } + + /* Take a R/O pin. This should trigger unsharing. */ + args.addr = (__u64)(uintptr_t)mem; + args.size = size; + args.flags = fast ? PIN_LONGTERM_TEST_FLAG_USE_FAST : 0; + ret = ioctl(gup_fd, PIN_LONGTERM_TEST_START, &args); + if (ret) { + if (errno == EINVAL) + ksft_test_result_skip("PIN_LONGTERM_TEST_START failed\n"); + else + ksft_test_result_fail("PIN_LONGTERM_TEST_START failed\n"); + goto wait; + } + + /* Modify the page. */ + memset(mem, 0xff, size); + + /* + * Read back the content via the pin to the temporary buffer and + * test if we observed the modification. + */ + tmp_val = (__u64)(uintptr_t)tmp; + ret = ioctl(gup_fd, PIN_LONGTERM_TEST_READ, &tmp_val); + if (ret) + ksft_test_result_fail("PIN_LONGTERM_TEST_READ failed\n"); + else + ksft_test_result(!memcmp(mem, tmp, size), + "Longterm R/O pin is reliable\n"); + + ret = ioctl(gup_fd, PIN_LONGTERM_TEST_STOP); + if (ret) + ksft_print_msg("[INFO] PIN_LONGTERM_TEST_STOP failed\n"); +wait: + switch (test) { + case RO_PIN_TEST_SHARED: + write(comm_pipes.parent_ready[1], "0", 1); + wait(&ret); + if (!WIFEXITED(ret)) + ksft_print_msg("[INFO] wait() failed\n"); + break; + default: + break; + } +close_comm_pipes: + close_comm_pipes(&comm_pipes); +free_tmp: + free(tmp); +} + +static void test_ro_pin_on_shared(char *mem, size_t size) +{ + do_test_ro_pin(mem, size, RO_PIN_TEST_SHARED, false); +} + +static void test_ro_fast_pin_on_shared(char *mem, size_t size) +{ + do_test_ro_pin(mem, size, RO_PIN_TEST_SHARED, true); +} + +static void test_ro_pin_on_ro_previously_shared(char *mem, size_t size) +{ + do_test_ro_pin(mem, size, RO_PIN_TEST_PREVIOUSLY_SHARED, false); +} + +static void test_ro_fast_pin_on_ro_previously_shared(char *mem, size_t size) +{ + do_test_ro_pin(mem, size, RO_PIN_TEST_PREVIOUSLY_SHARED, true); +} + +static void test_ro_pin_on_ro_exclusive(char *mem, size_t size) +{ + do_test_ro_pin(mem, size, RO_PIN_TEST_RO_EXCLUSIVE, false); +} + +static void test_ro_fast_pin_on_ro_exclusive(char *mem, size_t size) +{ + do_test_ro_pin(mem, size, RO_PIN_TEST_RO_EXCLUSIVE, true); +} + +typedef void (*test_fn)(char *mem, size_t size); + +static void do_run_with_base_page(test_fn fn, bool swapout) +{ + char *mem; + int ret; + + mem = mmap(NULL, pagesize, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + if (mem == MAP_FAILED) { + ksft_test_result_fail("mmap() failed\n"); + return; + } + + ret = madvise(mem, pagesize, MADV_NOHUGEPAGE); + /* Ignore if not around on a kernel. */ + if (ret && errno != EINVAL) { + ksft_test_result_fail("MADV_NOHUGEPAGE failed\n"); + goto munmap; + } + + /* Populate a base page. */ + memset(mem, 0, pagesize); + + if (swapout) { + madvise(mem, pagesize, MADV_PAGEOUT); + if (!pagemap_is_swapped(pagemap_fd, mem)) { + ksft_test_result_skip("MADV_PAGEOUT did not work, is swap enabled?\n"); + goto munmap; + } + } + + fn(mem, pagesize); +munmap: + munmap(mem, pagesize); +} + +static void run_with_base_page(test_fn fn, const char *desc) +{ + ksft_print_msg("[RUN] %s ... with base page\n", desc); + do_run_with_base_page(fn, false); +} + +static void run_with_base_page_swap(test_fn fn, const char *desc) +{ + ksft_print_msg("[RUN] %s ... with swapped out base page\n", desc); + do_run_with_base_page(fn, true); +} + +enum thp_run { + THP_RUN_PMD, + THP_RUN_PMD_SWAPOUT, + THP_RUN_PTE, + THP_RUN_PTE_SWAPOUT, + THP_RUN_SINGLE_PTE, + THP_RUN_SINGLE_PTE_SWAPOUT, + THP_RUN_PARTIAL_MREMAP, + THP_RUN_PARTIAL_SHARED, +}; + +static void do_run_with_thp(test_fn fn, enum thp_run thp_run) +{ + char *mem, *mmap_mem, *tmp, *mremap_mem = MAP_FAILED; + size_t size, mmap_size, mremap_size; + int ret; + + /* For alignment purposes, we need twice the thp size. */ + mmap_size = 2 * thpsize; + mmap_mem = mmap(NULL, mmap_size, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + if (mmap_mem == MAP_FAILED) { + ksft_test_result_fail("mmap() failed\n"); + return; + } + + /* We need a THP-aligned memory area. */ + mem = (char *)(((uintptr_t)mmap_mem + thpsize) & ~(thpsize - 1)); + + ret = madvise(mem, thpsize, MADV_HUGEPAGE); + if (ret) { + ksft_test_result_fail("MADV_HUGEPAGE failed\n"); + goto munmap; + } + + /* + * Try to populate a THP. Touch the first sub-page and test if we get + * another sub-page populated automatically. + */ + mem[0] = 0; + if (!pagemap_is_populated(pagemap_fd, mem + pagesize)) { + ksft_test_result_skip("Did not get a THP populated\n"); + goto munmap; + } + memset(mem, 0, thpsize); + + size = thpsize; + switch (thp_run) { + case THP_RUN_PMD: + case THP_RUN_PMD_SWAPOUT: + break; + case THP_RUN_PTE: + case THP_RUN_PTE_SWAPOUT: + /* + * Trigger PTE-mapping the THP by temporarily mapping a single + * subpage R/O. + */ + ret = mprotect(mem + pagesize, pagesize, PROT_READ); + if (ret) { + ksft_test_result_fail("mprotect() failed\n"); + goto munmap; + } + ret = mprotect(mem + pagesize, pagesize, PROT_READ | PROT_WRITE); + if (ret) { + ksft_test_result_fail("mprotect() failed\n"); + goto munmap; + } + break; + case THP_RUN_SINGLE_PTE: + case THP_RUN_SINGLE_PTE_SWAPOUT: + /* + * Discard all but a single subpage of that PTE-mapped THP. What + * remains is a single PTE mapping a single subpage. + */ + ret = madvise(mem + pagesize, thpsize - pagesize, MADV_DONTNEED); + if (ret) { + ksft_test_result_fail("MADV_DONTNEED failed\n"); + goto munmap; + } + size = pagesize; + break; + case THP_RUN_PARTIAL_MREMAP: + /* + * Remap half of the THP. We need some new memory location + * for that. + */ + mremap_size = thpsize / 2; + mremap_mem = mmap(NULL, mremap_size, PROT_NONE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + if (mem == MAP_FAILED) { + ksft_test_result_fail("mmap() failed\n"); + goto munmap; + } + tmp = mremap(mem + mremap_size, mremap_size, mremap_size, + MREMAP_MAYMOVE | MREMAP_FIXED, mremap_mem); + if (tmp != mremap_mem) { + ksft_test_result_fail("mremap() failed\n"); + goto munmap; + } + size = mremap_size; + break; + case THP_RUN_PARTIAL_SHARED: + /* + * Share the first page of the THP with a child and quit the + * child. This will result in some parts of the THP never + * have been shared. + */ + ret = madvise(mem + pagesize, thpsize - pagesize, MADV_DONTFORK); + if (ret) { + ksft_test_result_fail("MADV_DONTFORK failed\n"); + goto munmap; + } + ret = fork(); + if (ret < 0) { + ksft_test_result_fail("fork() failed\n"); + goto munmap; + } else if (!ret) { + exit(0); + } + wait(&ret); + /* Allow for sharing all pages again. */ + ret = madvise(mem + pagesize, thpsize - pagesize, MADV_DOFORK); + if (ret) { + ksft_test_result_fail("MADV_DOFORK failed\n"); + goto munmap; + } + break; + default: + assert(false); + } + + switch (thp_run) { + case THP_RUN_PMD_SWAPOUT: + case THP_RUN_PTE_SWAPOUT: + case THP_RUN_SINGLE_PTE_SWAPOUT: + madvise(mem, size, MADV_PAGEOUT); + if (!range_is_swapped(mem, size)) { + ksft_test_result_skip("MADV_PAGEOUT did not work, is swap enabled?\n"); + goto munmap; + } + break; + default: + break; + } + + fn(mem, size); +munmap: + munmap(mmap_mem, mmap_size); + if (mremap_mem != MAP_FAILED) + munmap(mremap_mem, mremap_size); +} + +static void run_with_thp(test_fn fn, const char *desc) +{ + ksft_print_msg("[RUN] %s ... with THP\n", desc); + do_run_with_thp(fn, THP_RUN_PMD); +} + +static void run_with_thp_swap(test_fn fn, const char *desc) +{ + ksft_print_msg("[RUN] %s ... with swapped-out THP\n", desc); + do_run_with_thp(fn, THP_RUN_PMD_SWAPOUT); +} + +static void run_with_pte_mapped_thp(test_fn fn, const char *desc) +{ + ksft_print_msg("[RUN] %s ... with PTE-mapped THP\n", desc); + do_run_with_thp(fn, THP_RUN_PTE); +} + +static void run_with_pte_mapped_thp_swap(test_fn fn, const char *desc) +{ + ksft_print_msg("[RUN] %s ... with swapped-out, PTE-mapped THP\n", desc); + do_run_with_thp(fn, THP_RUN_PTE_SWAPOUT); +} + +static void run_with_single_pte_of_thp(test_fn fn, const char *desc) +{ + ksft_print_msg("[RUN] %s ... with single PTE of THP\n", desc); + do_run_with_thp(fn, THP_RUN_SINGLE_PTE); +} + +static void run_with_single_pte_of_thp_swap(test_fn fn, const char *desc) +{ + ksft_print_msg("[RUN] %s ... with single PTE of swapped-out THP\n", desc); + do_run_with_thp(fn, THP_RUN_SINGLE_PTE_SWAPOUT); +} + +static void run_with_partial_mremap_thp(test_fn fn, const char *desc) +{ + ksft_print_msg("[RUN] %s ... with partially mremap()'ed THP\n", desc); + do_run_with_thp(fn, THP_RUN_PARTIAL_MREMAP); +} + +static void run_with_partial_shared_thp(test_fn fn, const char *desc) +{ + ksft_print_msg("[RUN] %s ... with partially shared THP\n", desc); + do_run_with_thp(fn, THP_RUN_PARTIAL_SHARED); +} + +static void run_with_hugetlb(test_fn fn, const char *desc, size_t hugetlbsize) +{ + int flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB; + char *mem, *dummy; + + ksft_print_msg("[RUN] %s ... with hugetlb (%zu kB)\n", desc, + hugetlbsize / 1024); + + flags |= __builtin_ctzll(hugetlbsize) << MAP_HUGE_SHIFT; + + mem = mmap(NULL, hugetlbsize, PROT_READ | PROT_WRITE, flags, -1, 0); + if (mem == MAP_FAILED) { + ksft_test_result_skip("need more free huge pages\n"); + return; + } + + /* Populate an huge page. */ + memset(mem, 0, hugetlbsize); + + /* + * We need a total of two hugetlb pages to handle COW/unsharing + * properly, otherwise we might get zapped by a SIGBUS. + */ + dummy = mmap(NULL, hugetlbsize, PROT_READ | PROT_WRITE, flags, -1, 0); + if (dummy == MAP_FAILED) { + ksft_test_result_skip("need more free huge pages\n"); + goto munmap; + } + munmap(dummy, hugetlbsize); + + fn(mem, hugetlbsize); +munmap: + munmap(mem, hugetlbsize); +} + +struct test_case { + const char *desc; + test_fn fn; +}; + +/* + * Test cases that are specific to anonymous pages: pages in private mappings + * that may get shared via COW during fork(). + */ +static const struct test_case anon_test_cases[] = { + /* + * Basic COW tests for fork() without any GUP. If we miss to break COW, + * either the child can observe modifications by the parent or the + * other way around. + */ + { + "Basic COW after fork()", + test_cow_in_parent, + }, + /* + * Basic test, but do an additional mprotect(PROT_READ)+ + * mprotect(PROT_READ|PROT_WRITE) in the parent before write access. + */ + { + "Basic COW after fork() with mprotect() optimization", + test_cow_in_parent_mprotect, + }, + /* + * vmsplice() [R/O GUP] + unmap in the child; modify in the parent. If + * we miss to break COW, the child observes modifications by the parent. + * This is CVE-2020-29374 reported by Jann Horn. + */ + { + "vmsplice() + unmap in child", + test_vmsplice_in_child + }, + /* + * vmsplice() test, but do an additional mprotect(PROT_READ)+ + * mprotect(PROT_READ|PROT_WRITE) in the parent before write access. + */ + { + "vmsplice() + unmap in child with mprotect() optimization", + test_vmsplice_in_child_mprotect + }, + /* + * vmsplice() [R/O GUP] in parent before fork(), unmap in parent after + * fork(); modify in the child. If we miss to break COW, the parent + * observes modifications by the child. + */ + { + "vmsplice() before fork(), unmap in parent after fork()", + test_vmsplice_before_fork, + }, + /* + * vmsplice() [R/O GUP] + unmap in parent after fork(); modify in the + * child. If we miss to break COW, the parent observes modifications by + * the child. + */ + { + "vmsplice() + unmap in parent after fork()", + test_vmsplice_after_fork, + }, +#ifdef LOCAL_CONFIG_HAVE_LIBURING + /* + * Take a R/W longterm pin and then map the page R/O into the page + * table to trigger a write fault on next access. When modifying the + * page, the page content must be visible via the pin. + */ + { + "R/O-mapping a page registered as iouring fixed buffer", + test_iouring_ro, + }, + /* + * Take a R/W longterm pin and then fork() a child. When modifying the + * page, the page content must be visible via the pin. We expect the + * pinned page to not get shared with the child. + */ + { + "fork() with an iouring fixed buffer", + test_iouring_fork, + }, + +#endif /* LOCAL_CONFIG_HAVE_LIBURING */ + /* + * Take a R/O longterm pin on a R/O-mapped shared anonymous page. + * When modifying the page via the page table, the page content change + * must be visible via the pin. + */ + { + "R/O GUP pin on R/O-mapped shared page", + test_ro_pin_on_shared, + }, + /* Same as above, but using GUP-fast. */ + { + "R/O GUP-fast pin on R/O-mapped shared page", + test_ro_fast_pin_on_shared, + }, + /* + * Take a R/O longterm pin on a R/O-mapped exclusive anonymous page that + * was previously shared. When modifying the page via the page table, + * the page content change must be visible via the pin. + */ + { + "R/O GUP pin on R/O-mapped previously-shared page", + test_ro_pin_on_ro_previously_shared, + }, + /* Same as above, but using GUP-fast. */ + { + "R/O GUP-fast pin on R/O-mapped previously-shared page", + test_ro_fast_pin_on_ro_previously_shared, + }, + /* + * Take a R/O longterm pin on a R/O-mapped exclusive anonymous page. + * When modifying the page via the page table, the page content change + * must be visible via the pin. + */ + { + "R/O GUP pin on R/O-mapped exclusive page", + test_ro_pin_on_ro_exclusive, + }, + /* Same as above, but using GUP-fast. */ + { + "R/O GUP-fast pin on R/O-mapped exclusive page", + test_ro_fast_pin_on_ro_exclusive, + }, +}; + +static void run_anon_test_case(struct test_case const *test_case) +{ + int i; + + run_with_base_page(test_case->fn, test_case->desc); + run_with_base_page_swap(test_case->fn, test_case->desc); + if (thpsize) { + run_with_thp(test_case->fn, test_case->desc); + run_with_thp_swap(test_case->fn, test_case->desc); + run_with_pte_mapped_thp(test_case->fn, test_case->desc); + run_with_pte_mapped_thp_swap(test_case->fn, test_case->desc); + run_with_single_pte_of_thp(test_case->fn, test_case->desc); + run_with_single_pte_of_thp_swap(test_case->fn, test_case->desc); + run_with_partial_mremap_thp(test_case->fn, test_case->desc); + run_with_partial_shared_thp(test_case->fn, test_case->desc); + } + for (i = 0; i < nr_hugetlbsizes; i++) + run_with_hugetlb(test_case->fn, test_case->desc, + hugetlbsizes[i]); +} + +static void run_anon_test_cases(void) +{ + int i; + + ksft_print_msg("[INFO] Anonymous memory tests in private mappings\n"); + + for (i = 0; i < ARRAY_SIZE(anon_test_cases); i++) + run_anon_test_case(&anon_test_cases[i]); +} + +static int tests_per_anon_test_case(void) +{ + int tests = 2 + nr_hugetlbsizes; + + if (thpsize) + tests += 8; + return tests; +} + +typedef void (*non_anon_test_fn)(char *mem, const char *smem, size_t size); + +static void test_cow(char *mem, const char *smem, size_t size) +{ + char *old = malloc(size); + + /* Backup the original content. */ + memcpy(old, smem, size); + + /* Modify the page. */ + memset(mem, 0xff, size); + + /* See if we still read the old values via the other mapping. */ + ksft_test_result(!memcmp(smem, old, size), + "Other mapping not modified\n"); + free(old); +} + +static void test_ro_pin(char *mem, const char *smem, size_t size) +{ + do_test_ro_pin(mem, size, RO_PIN_TEST, false); +} + +static void test_ro_fast_pin(char *mem, const char *smem, size_t size) +{ + do_test_ro_pin(mem, size, RO_PIN_TEST, true); +} + +static void run_with_zeropage(non_anon_test_fn fn, const char *desc) +{ + char *mem, *smem, tmp; + + ksft_print_msg("[RUN] %s ... with shared zeropage\n", desc); + + mem = mmap(NULL, pagesize, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANON, -1, 0); + if (mem == MAP_FAILED) { + ksft_test_result_fail("mmap() failed\n"); + return; + } + + smem = mmap(NULL, pagesize, PROT_READ, MAP_PRIVATE | MAP_ANON, -1, 0); + if (mem == MAP_FAILED) { + ksft_test_result_fail("mmap() failed\n"); + goto munmap; + } + + /* Read from the page to populate the shared zeropage. */ + tmp = *mem + *smem; + asm volatile("" : "+r" (tmp)); + + fn(mem, smem, pagesize); +munmap: + munmap(mem, pagesize); + if (smem != MAP_FAILED) + munmap(smem, pagesize); +} + +static void run_with_huge_zeropage(non_anon_test_fn fn, const char *desc) +{ + char *mem, *smem, *mmap_mem, *mmap_smem, tmp; + size_t mmap_size; + int ret; + + ksft_print_msg("[RUN] %s ... with huge zeropage\n", desc); + + if (!has_huge_zeropage) { + ksft_test_result_skip("Huge zeropage not enabled\n"); + return; + } + + /* For alignment purposes, we need twice the thp size. */ + mmap_size = 2 * thpsize; + mmap_mem = mmap(NULL, mmap_size, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + if (mmap_mem == MAP_FAILED) { + ksft_test_result_fail("mmap() failed\n"); + return; + } + mmap_smem = mmap(NULL, mmap_size, PROT_READ, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + if (mmap_smem == MAP_FAILED) { + ksft_test_result_fail("mmap() failed\n"); + goto munmap; + } + + /* We need a THP-aligned memory area. */ + mem = (char *)(((uintptr_t)mmap_mem + thpsize) & ~(thpsize - 1)); + smem = (char *)(((uintptr_t)mmap_smem + thpsize) & ~(thpsize - 1)); + + ret = madvise(mem, thpsize, MADV_HUGEPAGE); + ret |= madvise(smem, thpsize, MADV_HUGEPAGE); + if (ret) { + ksft_test_result_fail("MADV_HUGEPAGE failed\n"); + goto munmap; + } + + /* + * Read from the memory to populate the huge shared zeropage. Read from + * the first sub-page and test if we get another sub-page populated + * automatically. + */ + tmp = *mem + *smem; + asm volatile("" : "+r" (tmp)); + if (!pagemap_is_populated(pagemap_fd, mem + pagesize) || + !pagemap_is_populated(pagemap_fd, smem + pagesize)) { + ksft_test_result_skip("Did not get THPs populated\n"); + goto munmap; + } + + fn(mem, smem, thpsize); +munmap: + munmap(mmap_mem, mmap_size); + if (mmap_smem != MAP_FAILED) + munmap(mmap_smem, mmap_size); +} + +static void run_with_memfd(non_anon_test_fn fn, const char *desc) +{ + char *mem, *smem, tmp; + int fd; + + ksft_print_msg("[RUN] %s ... with memfd\n", desc); + + fd = memfd_create("test", 0); + if (fd < 0) { + ksft_test_result_fail("memfd_create() failed\n"); + return; + } + + /* File consists of a single page filled with zeroes. */ + if (fallocate(fd, 0, 0, pagesize)) { + ksft_test_result_fail("fallocate() failed\n"); + goto close; + } + + /* Create a private mapping of the memfd. */ + mem = mmap(NULL, pagesize, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0); + if (mem == MAP_FAILED) { + ksft_test_result_fail("mmap() failed\n"); + goto close; + } + smem = mmap(NULL, pagesize, PROT_READ, MAP_SHARED, fd, 0); + if (mem == MAP_FAILED) { + ksft_test_result_fail("mmap() failed\n"); + goto munmap; + } + + /* Fault the page in. */ + tmp = *mem + *smem; + asm volatile("" : "+r" (tmp)); + + fn(mem, smem, pagesize); +munmap: + munmap(mem, pagesize); + if (smem != MAP_FAILED) + munmap(smem, pagesize); +close: + close(fd); +} + +static void run_with_tmpfile(non_anon_test_fn fn, const char *desc) +{ + char *mem, *smem, tmp; + FILE *file; + int fd; + + ksft_print_msg("[RUN] %s ... with tmpfile\n", desc); + + file = tmpfile(); + if (!file) { + ksft_test_result_fail("tmpfile() failed\n"); + return; + } + + fd = fileno(file); + if (fd < 0) { + ksft_test_result_skip("fileno() failed\n"); + return; + } + + /* File consists of a single page filled with zeroes. */ + if (fallocate(fd, 0, 0, pagesize)) { + ksft_test_result_fail("fallocate() failed\n"); + goto close; + } + + /* Create a private mapping of the memfd. */ + mem = mmap(NULL, pagesize, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0); + if (mem == MAP_FAILED) { + ksft_test_result_fail("mmap() failed\n"); + goto close; + } + smem = mmap(NULL, pagesize, PROT_READ, MAP_SHARED, fd, 0); + if (mem == MAP_FAILED) { + ksft_test_result_fail("mmap() failed\n"); + goto munmap; + } + + /* Fault the page in. */ + tmp = *mem + *smem; + asm volatile("" : "+r" (tmp)); + + fn(mem, smem, pagesize); +munmap: + munmap(mem, pagesize); + if (smem != MAP_FAILED) + munmap(smem, pagesize); +close: + fclose(file); +} + +static void run_with_memfd_hugetlb(non_anon_test_fn fn, const char *desc, + size_t hugetlbsize) +{ + int flags = MFD_HUGETLB; + char *mem, *smem, tmp; + int fd; + + ksft_print_msg("[RUN] %s ... with memfd hugetlb (%zu kB)\n", desc, + hugetlbsize / 1024); + + flags |= __builtin_ctzll(hugetlbsize) << MFD_HUGE_SHIFT; + + fd = memfd_create("test", flags); + if (fd < 0) { + ksft_test_result_skip("memfd_create() failed\n"); + return; + } + + /* File consists of a single page filled with zeroes. */ + if (fallocate(fd, 0, 0, hugetlbsize)) { + ksft_test_result_skip("need more free huge pages\n"); + goto close; + } + + /* Create a private mapping of the memfd. */ + mem = mmap(NULL, hugetlbsize, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, + 0); + if (mem == MAP_FAILED) { + ksft_test_result_skip("need more free huge pages\n"); + goto close; + } + smem = mmap(NULL, hugetlbsize, PROT_READ, MAP_SHARED, fd, 0); + if (mem == MAP_FAILED) { + ksft_test_result_fail("mmap() failed\n"); + goto munmap; + } + + /* Fault the page in. */ + tmp = *mem + *smem; + asm volatile("" : "+r" (tmp)); + + fn(mem, smem, hugetlbsize); +munmap: + munmap(mem, hugetlbsize); + if (mem != MAP_FAILED) + munmap(smem, hugetlbsize); +close: + close(fd); +} + +struct non_anon_test_case { + const char *desc; + non_anon_test_fn fn; +}; + +/* + * Test cases that target any pages in private mappings that are not anonymous: + * pages that may get shared via COW ndependent of fork(). This includes + * the shared zeropage(s), pagecache pages, ... + */ +static const struct non_anon_test_case non_anon_test_cases[] = { + /* + * Basic COW test without any GUP. If we miss to break COW, changes are + * visible via other private/shared mappings. + */ + { + "Basic COW", + test_cow, + }, + /* + * Take a R/O longterm pin. When modifying the page via the page table, + * the page content change must be visible via the pin. + */ + { + "R/O longterm GUP pin", + test_ro_pin, + }, + /* Same as above, but using GUP-fast. */ + { + "R/O longterm GUP-fast pin", + test_ro_fast_pin, + }, +}; + +static void run_non_anon_test_case(struct non_anon_test_case const *test_case) +{ + int i; + + run_with_zeropage(test_case->fn, test_case->desc); + run_with_memfd(test_case->fn, test_case->desc); + run_with_tmpfile(test_case->fn, test_case->desc); + if (thpsize) + run_with_huge_zeropage(test_case->fn, test_case->desc); + for (i = 0; i < nr_hugetlbsizes; i++) + run_with_memfd_hugetlb(test_case->fn, test_case->desc, + hugetlbsizes[i]); +} + +static void run_non_anon_test_cases(void) +{ + int i; + + ksft_print_msg("[RUN] Non-anonymous memory tests in private mappings\n"); + + for (i = 0; i < ARRAY_SIZE(non_anon_test_cases); i++) + run_non_anon_test_case(&non_anon_test_cases[i]); +} + +static int tests_per_non_anon_test_case(void) +{ + int tests = 3 + nr_hugetlbsizes; + + if (thpsize) + tests += 1; + return tests; +} + +int main(int argc, char **argv) +{ + int err; + + pagesize = getpagesize(); + detect_thpsize(); + detect_hugetlbsizes(); + detect_huge_zeropage(); + + ksft_print_header(); + ksft_set_plan(ARRAY_SIZE(anon_test_cases) * tests_per_anon_test_case() + + ARRAY_SIZE(non_anon_test_cases) * tests_per_non_anon_test_case()); + + gup_fd = open("/sys/kernel/debug/gup_test", O_RDWR); + pagemap_fd = open("/proc/self/pagemap", O_RDONLY); + if (pagemap_fd < 0) + ksft_exit_fail_msg("opening pagemap failed\n"); + + run_anon_test_cases(); + run_non_anon_test_cases(); + + err = ksft_get_fail_cnt(); + if (err) + ksft_exit_fail_msg("%d out of %d tests failed\n", + err, ksft_test_num()); + return ksft_exit_pass(); +} diff --git a/tools/testing/selftests/vm/hugepage-mmap.c b/tools/testing/selftests/vm/hugepage-mmap.c index 93f9e7b81331..955ef87f382c 100644 --- a/tools/testing/selftests/vm/hugepage-mmap.c +++ b/tools/testing/selftests/vm/hugepage-mmap.c @@ -16,14 +16,13 @@ * range. * Other architectures, such as ppc64, i386 or x86_64 are not so constrained. */ - +#define _GNU_SOURCE #include <stdlib.h> #include <stdio.h> #include <unistd.h> #include <sys/mman.h> #include <fcntl.h> -#define FILE_NAME "huge/hugepagefile" #define LENGTH (256UL*1024*1024) #define PROTECTION (PROT_READ | PROT_WRITE) @@ -67,16 +66,16 @@ int main(void) void *addr; int fd, ret; - fd = open(FILE_NAME, O_CREAT | O_RDWR, 0755); + fd = memfd_create("hugepage-mmap", MFD_HUGETLB); if (fd < 0) { - perror("Open failed"); + perror("memfd_create() failed"); exit(1); } addr = mmap(ADDR, LENGTH, PROTECTION, FLAGS, fd, 0); if (addr == MAP_FAILED) { perror("mmap"); - unlink(FILE_NAME); + close(fd); exit(1); } @@ -87,7 +86,6 @@ int main(void) munmap(addr, LENGTH); close(fd); - unlink(FILE_NAME); return ret; } diff --git a/tools/testing/selftests/vm/hugepage-mremap.c b/tools/testing/selftests/vm/hugepage-mremap.c index e63a0214f639..e53b5eaa8fce 100644 --- a/tools/testing/selftests/vm/hugepage-mremap.c +++ b/tools/testing/selftests/vm/hugepage-mremap.c @@ -22,6 +22,7 @@ #include <sys/syscall.h> /* Definition of SYS_* constants */ #include <linux/userfaultfd.h> #include <sys/ioctl.h> +#include <string.h> #define DEFAULT_LENGTH_MB 10UL #define MB_TO_BYTES(x) (x * 1024 * 1024) @@ -108,26 +109,23 @@ static void register_region_with_uffd(char *addr, size_t len) int main(int argc, char *argv[]) { size_t length = 0; + int ret = 0, fd; - if (argc != 2 && argc != 3) { - printf("Usage: %s [length_in_MB] <hugetlb_file>\n", argv[0]); + if (argc >= 2 && !strcmp(argv[1], "-h")) { + printf("Usage: %s [length_in_MB]\n", argv[0]); exit(1); } /* Read memory length as the first arg if valid, otherwise fallback to * the default length. */ - if (argc == 3) - length = argc > 2 ? (size_t)atoi(argv[1]) : 0UL; + if (argc >= 2) + length = (size_t)atoi(argv[1]); + else + length = DEFAULT_LENGTH_MB; - length = length > 0 ? length : DEFAULT_LENGTH_MB; length = MB_TO_BYTES(length); - - int ret = 0; - - /* last arg is the hugetlb file name */ - int fd = open(argv[argc-1], O_CREAT | O_RDWR, 0755); - + fd = memfd_create(argv[0], MFD_HUGETLB); if (fd < 0) { perror("Open failed"); exit(1); @@ -185,7 +183,6 @@ int main(int argc, char *argv[]) } close(fd); - unlink(argv[argc-1]); return ret; } diff --git a/tools/testing/selftests/vm/hugetlb-madvise.c b/tools/testing/selftests/vm/hugetlb-madvise.c index 3c9943131881..9a127a8fe176 100644 --- a/tools/testing/selftests/vm/hugetlb-madvise.c +++ b/tools/testing/selftests/vm/hugetlb-madvise.c @@ -12,14 +12,13 @@ * directory. */ +#define _GNU_SOURCE #include <stdlib.h> #include <stdio.h> #include <unistd.h> #include <sys/mman.h> -#define __USE_GNU #include <fcntl.h> -#define USAGE "USAGE: %s <hugepagefile_name>\n" #define MIN_FREE_PAGES 20 #define NR_HUGE_PAGES 10 /* common number of pages to map/allocate */ @@ -103,11 +102,6 @@ int main(int argc, char **argv) int fd; int ret; - if (argc != 2) { - printf(USAGE, argv[0]); - exit(1); - } - huge_page_size = default_huge_page_size(); if (!huge_page_size) { printf("Unable to determine huge page size, exiting!\n"); @@ -125,9 +119,9 @@ int main(int argc, char **argv) exit(1); } - fd = open(argv[1], O_CREAT | O_RDWR, 0755); + fd = memfd_create(argv[0], MFD_HUGETLB); if (fd < 0) { - perror("Open failed"); + perror("memfd_create() failed"); exit(1); } @@ -200,7 +194,7 @@ int main(int argc, char **argv) exit(1); } - /* addr + length should be aligned up to huge page size */ + /* addr + length should be aligned down to huge page size */ if (madvise(addr, ((NR_HUGE_PAGES - 1) * huge_page_size) + base_page_size, MADV_DONTNEED)) { @@ -208,10 +202,11 @@ int main(int argc, char **argv) exit(1); } - /* should free all pages in mapping */ - validate_free_pages(free_hugepages); + /* should free all but last page in mapping */ + validate_free_pages(free_hugepages - 1); (void)munmap(addr, NR_HUGE_PAGES * huge_page_size); + validate_free_pages(free_hugepages); /* * Test MADV_DONTNEED on anonymous private mapping @@ -406,6 +401,5 @@ int main(int argc, char **argv) (void)munmap(addr2, NR_HUGE_PAGES * huge_page_size); close(fd); - unlink(argv[1]); return 0; } diff --git a/tools/testing/selftests/vm/ksm_functional_tests.c b/tools/testing/selftests/vm/ksm_functional_tests.c new file mode 100644 index 000000000000..b11b7e5115dc --- /dev/null +++ b/tools/testing/selftests/vm/ksm_functional_tests.c @@ -0,0 +1,279 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * KSM functional tests + * + * Copyright 2022, Red Hat, Inc. + * + * Author(s): David Hildenbrand <david@redhat.com> + */ +#define _GNU_SOURCE +#include <stdlib.h> +#include <string.h> +#include <stdbool.h> +#include <stdint.h> +#include <unistd.h> +#include <errno.h> +#include <fcntl.h> +#include <sys/mman.h> +#include <sys/syscall.h> +#include <sys/ioctl.h> +#include <linux/userfaultfd.h> + +#include "../kselftest.h" +#include "vm_util.h" + +#define KiB 1024u +#define MiB (1024 * KiB) + +static int ksm_fd; +static int ksm_full_scans_fd; +static int pagemap_fd; +static size_t pagesize; + +static bool range_maps_duplicates(char *addr, unsigned long size) +{ + unsigned long offs_a, offs_b, pfn_a, pfn_b; + + /* + * There is no easy way to check if there are KSM pages mapped into + * this range. We only check that the range does not map the same PFN + * twice by comaring each pair of mapped pages. + */ + for (offs_a = 0; offs_a < size; offs_a += pagesize) { + pfn_a = pagemap_get_pfn(pagemap_fd, addr + offs_a); + /* Page not present or PFN not exposed by the kernel. */ + if (pfn_a == -1ul || !pfn_a) + continue; + + for (offs_b = offs_a + pagesize; offs_b < size; + offs_b += pagesize) { + pfn_b = pagemap_get_pfn(pagemap_fd, addr + offs_b); + if (pfn_b == -1ul || !pfn_b) + continue; + if (pfn_a == pfn_b) + return true; + } + } + return false; +} + +static long ksm_get_full_scans(void) +{ + char buf[10]; + ssize_t ret; + + ret = pread(ksm_full_scans_fd, buf, sizeof(buf) - 1, 0); + if (ret <= 0) + return -errno; + buf[ret] = 0; + + return strtol(buf, NULL, 10); +} + +static int ksm_merge(void) +{ + long start_scans, end_scans; + + /* Wait for two full scans such that any possible merging happened. */ + start_scans = ksm_get_full_scans(); + if (start_scans < 0) + return start_scans; + if (write(ksm_fd, "1", 1) != 1) + return -errno; + do { + end_scans = ksm_get_full_scans(); + if (end_scans < 0) + return end_scans; + } while (end_scans < start_scans + 2); + + return 0; +} + +static char *mmap_and_merge_range(char val, unsigned long size) +{ + char *map; + + map = mmap(NULL, size, PROT_READ|PROT_WRITE, + MAP_PRIVATE|MAP_ANON, -1, 0); + if (map == MAP_FAILED) { + ksft_test_result_fail("mmap() failed\n"); + return MAP_FAILED; + } + + /* Don't use THP. Ignore if THP are not around on a kernel. */ + if (madvise(map, size, MADV_NOHUGEPAGE) && errno != EINVAL) { + ksft_test_result_fail("MADV_NOHUGEPAGE failed\n"); + goto unmap; + } + + /* Make sure each page contains the same values to merge them. */ + memset(map, val, size); + if (madvise(map, size, MADV_MERGEABLE)) { + ksft_test_result_fail("MADV_MERGEABLE failed\n"); + goto unmap; + } + + /* Run KSM to trigger merging and wait. */ + if (ksm_merge()) { + ksft_test_result_fail("Running KSM failed\n"); + goto unmap; + } + return map; +unmap: + munmap(map, size); + return MAP_FAILED; +} + +static void test_unmerge(void) +{ + const unsigned int size = 2 * MiB; + char *map; + + ksft_print_msg("[RUN] %s\n", __func__); + + map = mmap_and_merge_range(0xcf, size); + if (map == MAP_FAILED) + return; + + if (madvise(map, size, MADV_UNMERGEABLE)) { + ksft_test_result_fail("MADV_UNMERGEABLE failed\n"); + goto unmap; + } + + ksft_test_result(!range_maps_duplicates(map, size), + "Pages were unmerged\n"); +unmap: + munmap(map, size); +} + +static void test_unmerge_discarded(void) +{ + const unsigned int size = 2 * MiB; + char *map; + + ksft_print_msg("[RUN] %s\n", __func__); + + map = mmap_and_merge_range(0xcf, size); + if (map == MAP_FAILED) + return; + + /* Discard half of all mapped pages so we have pte_none() entries. */ + if (madvise(map, size / 2, MADV_DONTNEED)) { + ksft_test_result_fail("MADV_DONTNEED failed\n"); + goto unmap; + } + + if (madvise(map, size, MADV_UNMERGEABLE)) { + ksft_test_result_fail("MADV_UNMERGEABLE failed\n"); + goto unmap; + } + + ksft_test_result(!range_maps_duplicates(map, size), + "Pages were unmerged\n"); +unmap: + munmap(map, size); +} + +#ifdef __NR_userfaultfd +static void test_unmerge_uffd_wp(void) +{ + struct uffdio_writeprotect uffd_writeprotect; + struct uffdio_register uffdio_register; + const unsigned int size = 2 * MiB; + struct uffdio_api uffdio_api; + char *map; + int uffd; + + ksft_print_msg("[RUN] %s\n", __func__); + + map = mmap_and_merge_range(0xcf, size); + if (map == MAP_FAILED) + return; + + /* See if UFFD is around. */ + uffd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK); + if (uffd < 0) { + ksft_test_result_skip("__NR_userfaultfd failed\n"); + goto unmap; + } + + /* See if UFFD-WP is around. */ + uffdio_api.api = UFFD_API; + uffdio_api.features = UFFD_FEATURE_PAGEFAULT_FLAG_WP; + if (ioctl(uffd, UFFDIO_API, &uffdio_api) < 0) { + ksft_test_result_fail("UFFDIO_API failed\n"); + goto close_uffd; + } + if (!(uffdio_api.features & UFFD_FEATURE_PAGEFAULT_FLAG_WP)) { + ksft_test_result_skip("UFFD_FEATURE_PAGEFAULT_FLAG_WP not available\n"); + goto close_uffd; + } + + /* Register UFFD-WP, no need for an actual handler. */ + uffdio_register.range.start = (unsigned long) map; + uffdio_register.range.len = size; + uffdio_register.mode = UFFDIO_REGISTER_MODE_WP; + if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register) < 0) { + ksft_test_result_fail("UFFDIO_REGISTER_MODE_WP failed\n"); + goto close_uffd; + } + + /* Write-protect the range using UFFD-WP. */ + uffd_writeprotect.range.start = (unsigned long) map; + uffd_writeprotect.range.len = size; + uffd_writeprotect.mode = UFFDIO_WRITEPROTECT_MODE_WP; + if (ioctl(uffd, UFFDIO_WRITEPROTECT, &uffd_writeprotect)) { + ksft_test_result_fail("UFFDIO_WRITEPROTECT failed\n"); + goto close_uffd; + } + + if (madvise(map, size, MADV_UNMERGEABLE)) { + ksft_test_result_fail("MADV_UNMERGEABLE failed\n"); + goto close_uffd; + } + + ksft_test_result(!range_maps_duplicates(map, size), + "Pages were unmerged\n"); +close_uffd: + close(uffd); +unmap: + munmap(map, size); +} +#endif + +int main(int argc, char **argv) +{ + unsigned int tests = 2; + int err; + +#ifdef __NR_userfaultfd + tests++; +#endif + + ksft_print_header(); + ksft_set_plan(tests); + + pagesize = getpagesize(); + + ksm_fd = open("/sys/kernel/mm/ksm/run", O_RDWR); + if (ksm_fd < 0) + ksft_exit_skip("open(\"/sys/kernel/mm/ksm/run\") failed\n"); + ksm_full_scans_fd = open("/sys/kernel/mm/ksm/full_scans", O_RDONLY); + if (ksm_full_scans_fd < 0) + ksft_exit_skip("open(\"/sys/kernel/mm/ksm/full_scans\") failed\n"); + pagemap_fd = open("/proc/self/pagemap", O_RDONLY); + if (pagemap_fd < 0) + ksft_exit_skip("open(\"/proc/self/pagemap\") failed\n"); + + test_unmerge(); + test_unmerge_discarded(); +#ifdef __NR_userfaultfd + test_unmerge_uffd_wp(); +#endif + + err = ksft_get_fail_cnt(); + if (err) + ksft_exit_fail_msg("%d out of %d tests failed\n", + err, ksft_test_num()); + return ksft_exit_pass(); +} diff --git a/tools/testing/selftests/vm/ksm_tests.c b/tools/testing/selftests/vm/ksm_tests.c index 0d85be2350fa..f9eb4d67e0dd 100644 --- a/tools/testing/selftests/vm/ksm_tests.c +++ b/tools/testing/selftests/vm/ksm_tests.c @@ -40,6 +40,7 @@ enum ksm_test_name { CHECK_KSM_NUMA_MERGE, KSM_MERGE_TIME, KSM_MERGE_TIME_HUGE_PAGES, + KSM_UNMERGE_TIME, KSM_COW_TIME }; @@ -108,7 +109,10 @@ static void print_help(void) " -P evaluate merging time and speed.\n" " For this test, the size of duplicated memory area (in MiB)\n" " must be provided using -s option\n" - " -H evaluate merging time and speed of area allocated mostly with huge pages\n" + " -H evaluate merging time and speed of area allocated mostly with huge pages\n" + " For this test, the size of duplicated memory area (in MiB)\n" + " must be provided using -s option\n" + " -D evaluate unmerging time and speed when disabling KSM.\n" " For this test, the size of duplicated memory area (in MiB)\n" " must be provided using -s option\n" " -C evaluate the time required to break COW of merged pages.\n\n"); @@ -188,6 +192,16 @@ static int ksm_merge_pages(void *addr, size_t size, struct timespec start_time, return 0; } +static int ksm_unmerge_pages(void *addr, size_t size, + struct timespec start_time, int timeout) +{ + if (madvise(addr, size, MADV_UNMERGEABLE)) { + perror("madvise"); + return 1; + } + return 0; +} + static bool assert_ksm_pages_count(long dupl_page_count) { unsigned long max_page_sharing, pages_sharing, pages_shared; @@ -560,6 +574,53 @@ err_out: return KSFT_FAIL; } +static int ksm_unmerge_time(int mapping, int prot, int timeout, size_t map_size) +{ + void *map_ptr; + struct timespec start_time, end_time; + unsigned long scan_time_ns; + + map_size *= MB; + + map_ptr = allocate_memory(NULL, prot, mapping, '*', map_size); + if (!map_ptr) + return KSFT_FAIL; + if (clock_gettime(CLOCK_MONOTONIC_RAW, &start_time)) { + perror("clock_gettime"); + goto err_out; + } + if (ksm_merge_pages(map_ptr, map_size, start_time, timeout)) + goto err_out; + + if (clock_gettime(CLOCK_MONOTONIC_RAW, &start_time)) { + perror("clock_gettime"); + goto err_out; + } + if (ksm_unmerge_pages(map_ptr, map_size, start_time, timeout)) + goto err_out; + if (clock_gettime(CLOCK_MONOTONIC_RAW, &end_time)) { + perror("clock_gettime"); + goto err_out; + } + + scan_time_ns = (end_time.tv_sec - start_time.tv_sec) * NSEC_PER_SEC + + (end_time.tv_nsec - start_time.tv_nsec); + + printf("Total size: %lu MiB\n", map_size / MB); + printf("Total time: %ld.%09ld s\n", scan_time_ns / NSEC_PER_SEC, + scan_time_ns % NSEC_PER_SEC); + printf("Average speed: %.3f MiB/s\n", (map_size / MB) / + ((double)scan_time_ns / NSEC_PER_SEC)); + + munmap(map_ptr, map_size); + return KSFT_PASS; + +err_out: + printf("Not OK\n"); + munmap(map_ptr, map_size); + return KSFT_FAIL; +} + static int ksm_cow_time(int mapping, int prot, int timeout, size_t page_size) { void *map_ptr; @@ -644,7 +705,7 @@ int main(int argc, char *argv[]) bool merge_across_nodes = KSM_MERGE_ACROSS_NODES_DEFAULT; long size_MB = 0; - while ((opt = getopt(argc, argv, "ha:p:l:z:m:s:MUZNPCH")) != -1) { + while ((opt = getopt(argc, argv, "ha:p:l:z:m:s:MUZNPCHD")) != -1) { switch (opt) { case 'a': prot = str_to_prot(optarg); @@ -701,6 +762,9 @@ int main(int argc, char *argv[]) case 'H': test_name = KSM_MERGE_TIME_HUGE_PAGES; break; + case 'D': + test_name = KSM_UNMERGE_TIME; + break; case 'C': test_name = KSM_COW_TIME; break; @@ -762,6 +826,14 @@ int main(int argc, char *argv[]) ret = ksm_merge_hugepages_time(MAP_PRIVATE | MAP_ANONYMOUS, prot, ksm_scan_limit_sec, size_MB); break; + case KSM_UNMERGE_TIME: + if (size_MB == 0) { + printf("Option '-s' is required.\n"); + return KSFT_FAIL; + } + ret = ksm_unmerge_time(MAP_PRIVATE | MAP_ANONYMOUS, prot, + ksm_scan_limit_sec, size_MB); + break; case KSM_COW_TIME: ret = ksm_cow_time(MAP_PRIVATE | MAP_ANONYMOUS, prot, ksm_scan_limit_sec, page_size); diff --git a/tools/testing/selftests/vm/madv_populate.c b/tools/testing/selftests/vm/madv_populate.c index 715a42e8e2cd..262eae6b58f2 100644 --- a/tools/testing/selftests/vm/madv_populate.c +++ b/tools/testing/selftests/vm/madv_populate.c @@ -20,6 +20,13 @@ #include "../kselftest.h" #include "vm_util.h" +#ifndef MADV_POPULATE_READ +#define MADV_POPULATE_READ 22 +#endif /* MADV_POPULATE_READ */ +#ifndef MADV_POPULATE_WRITE +#define MADV_POPULATE_WRITE 23 +#endif /* MADV_POPULATE_WRITE */ + /* * For now, we're using 2 MiB of private anonymous memory for all tests. */ @@ -27,14 +34,6 @@ static size_t pagesize; -static bool pagemap_is_populated(int fd, char *start) -{ - uint64_t entry = pagemap_get_entry(fd, start); - - /* Present or swapped. */ - return entry & 0xc000000000000000ull; -} - static void sense_support(void) { char *addr; diff --git a/tools/testing/selftests/vm/pkey-x86.h b/tools/testing/selftests/vm/pkey-x86.h index b078ce9c6d2a..72c14cd3ddc7 100644 --- a/tools/testing/selftests/vm/pkey-x86.h +++ b/tools/testing/selftests/vm/pkey-x86.h @@ -104,6 +104,18 @@ static inline int cpu_has_pkeys(void) return 1; } +static inline int cpu_max_xsave_size(void) +{ + unsigned long XSTATE_CPUID = 0xd; + unsigned int eax; + unsigned int ebx; + unsigned int ecx; + unsigned int edx; + + __cpuid_count(XSTATE_CPUID, 0, eax, ebx, ecx, edx); + return ecx; +} + static inline u32 pkey_bit_position(int pkey) { return pkey * PKEY_BITS_PER_PKEY; diff --git a/tools/testing/selftests/vm/protection_keys.c b/tools/testing/selftests/vm/protection_keys.c index 291bc1e07842..95f403a0c46d 100644 --- a/tools/testing/selftests/vm/protection_keys.c +++ b/tools/testing/selftests/vm/protection_keys.c @@ -18,12 +18,13 @@ * do a plain mprotect() to a mprotect_pkey() area and make sure the pkey sticks * * Compile like this: - * gcc -o protection_keys -O2 -g -std=gnu99 -pthread -Wall protection_keys.c -lrt -ldl -lm - * gcc -m32 -o protection_keys_32 -O2 -g -std=gnu99 -pthread -Wall protection_keys.c -lrt -ldl -lm + * gcc -mxsave -o protection_keys -O2 -g -std=gnu99 -pthread -Wall protection_keys.c -lrt -ldl -lm + * gcc -mxsave -m32 -o protection_keys_32 -O2 -g -std=gnu99 -pthread -Wall protection_keys.c -lrt -ldl -lm */ #define _GNU_SOURCE #define __SANE_USERSPACE_TYPES__ #include <errno.h> +#include <linux/elf.h> #include <linux/futex.h> #include <time.h> #include <sys/time.h> @@ -1550,6 +1551,129 @@ void test_implicit_mprotect_exec_only_memory(int *ptr, u16 pkey) do_not_expect_pkey_fault("plain read on recently PROT_EXEC area"); } +#if defined(__i386__) || defined(__x86_64__) +void test_ptrace_modifies_pkru(int *ptr, u16 pkey) +{ + u32 new_pkru; + pid_t child; + int status, ret; + int pkey_offset = pkey_reg_xstate_offset(); + size_t xsave_size = cpu_max_xsave_size(); + void *xsave; + u32 *pkey_register; + u64 *xstate_bv; + struct iovec iov; + + new_pkru = ~read_pkey_reg(); + /* Don't make PROT_EXEC mappings inaccessible */ + new_pkru &= ~3; + + child = fork(); + pkey_assert(child >= 0); + dprintf3("[%d] fork() ret: %d\n", getpid(), child); + if (!child) { + ptrace(PTRACE_TRACEME, 0, 0, 0); + /* Stop and allow the tracer to modify PKRU directly */ + raise(SIGSTOP); + + /* + * need __read_pkey_reg() version so we do not do shadow_pkey_reg + * checking + */ + if (__read_pkey_reg() != new_pkru) + exit(1); + + /* Stop and allow the tracer to clear XSTATE_BV for PKRU */ + raise(SIGSTOP); + + if (__read_pkey_reg() != 0) + exit(1); + + /* Stop and allow the tracer to examine PKRU */ + raise(SIGSTOP); + + exit(0); + } + + pkey_assert(child == waitpid(child, &status, 0)); + dprintf3("[%d] waitpid(%d) status: %x\n", getpid(), child, status); + pkey_assert(WIFSTOPPED(status) && WSTOPSIG(status) == SIGSTOP); + + xsave = (void *)malloc(xsave_size); + pkey_assert(xsave > 0); + + /* Modify the PKRU register directly */ + iov.iov_base = xsave; + iov.iov_len = xsave_size; + ret = ptrace(PTRACE_GETREGSET, child, (void *)NT_X86_XSTATE, &iov); + pkey_assert(ret == 0); + + pkey_register = (u32 *)(xsave + pkey_offset); + pkey_assert(*pkey_register == read_pkey_reg()); + + *pkey_register = new_pkru; + + ret = ptrace(PTRACE_SETREGSET, child, (void *)NT_X86_XSTATE, &iov); + pkey_assert(ret == 0); + + /* Test that the modification is visible in ptrace before any execution */ + memset(xsave, 0xCC, xsave_size); + ret = ptrace(PTRACE_GETREGSET, child, (void *)NT_X86_XSTATE, &iov); + pkey_assert(ret == 0); + pkey_assert(*pkey_register == new_pkru); + + /* Execute the tracee */ + ret = ptrace(PTRACE_CONT, child, 0, 0); + pkey_assert(ret == 0); + + /* Test that the tracee saw the PKRU value change */ + pkey_assert(child == waitpid(child, &status, 0)); + dprintf3("[%d] waitpid(%d) status: %x\n", getpid(), child, status); + pkey_assert(WIFSTOPPED(status) && WSTOPSIG(status) == SIGSTOP); + + /* Test that the modification is visible in ptrace after execution */ + memset(xsave, 0xCC, xsave_size); + ret = ptrace(PTRACE_GETREGSET, child, (void *)NT_X86_XSTATE, &iov); + pkey_assert(ret == 0); + pkey_assert(*pkey_register == new_pkru); + + /* Clear the PKRU bit from XSTATE_BV */ + xstate_bv = (u64 *)(xsave + 512); + *xstate_bv &= ~(1 << 9); + + ret = ptrace(PTRACE_SETREGSET, child, (void *)NT_X86_XSTATE, &iov); + pkey_assert(ret == 0); + + /* Test that the modification is visible in ptrace before any execution */ + memset(xsave, 0xCC, xsave_size); + ret = ptrace(PTRACE_GETREGSET, child, (void *)NT_X86_XSTATE, &iov); + pkey_assert(ret == 0); + pkey_assert(*pkey_register == 0); + + ret = ptrace(PTRACE_CONT, child, 0, 0); + pkey_assert(ret == 0); + + /* Test that the tracee saw the PKRU value go to 0 */ + pkey_assert(child == waitpid(child, &status, 0)); + dprintf3("[%d] waitpid(%d) status: %x\n", getpid(), child, status); + pkey_assert(WIFSTOPPED(status) && WSTOPSIG(status) == SIGSTOP); + + /* Test that the modification is visible in ptrace after execution */ + memset(xsave, 0xCC, xsave_size); + ret = ptrace(PTRACE_GETREGSET, child, (void *)NT_X86_XSTATE, &iov); + pkey_assert(ret == 0); + pkey_assert(*pkey_register == 0); + + ret = ptrace(PTRACE_CONT, child, 0, 0); + pkey_assert(ret == 0); + pkey_assert(child == waitpid(child, &status, 0)); + dprintf3("[%d] waitpid(%d) status: %x\n", getpid(), child, status); + pkey_assert(WIFEXITED(status)); + pkey_assert(WEXITSTATUS(status) == 0); + free(xsave); +} +#endif + void test_mprotect_pkey_on_unsupported_cpu(int *ptr, u16 pkey) { int size = PAGE_SIZE; @@ -1585,6 +1709,9 @@ void (*pkey_tests[])(int *ptr, u16 pkey) = { test_pkey_syscalls_bad_args, test_pkey_alloc_exhaust, test_pkey_alloc_free_attach_pkey0, +#if defined(__i386__) || defined(__x86_64__) + test_ptrace_modifies_pkru, +#endif }; void run_tests_once(void) diff --git a/tools/testing/selftests/vm/run_vmtests.sh b/tools/testing/selftests/vm/run_vmtests.sh index e780e76c26b8..8984e0bb58c7 100755 --- a/tools/testing/selftests/vm/run_vmtests.sh +++ b/tools/testing/selftests/vm/run_vmtests.sh @@ -1,14 +1,88 @@ #!/bin/bash # SPDX-License-Identifier: GPL-2.0 -#please run as root +# Please run as root # Kselftest framework requirement - SKIP code is 4. ksft_skip=4 -mnt=./huge exitcode=0 -#get huge pagesize and freepages from /proc/meminfo +usage() { + cat <<EOF +usage: ${BASH_SOURCE[0]:-$0} [ -h | -t "<categories>"] + -t: specify specific categories to tests to run + -h: display this message + +The default behavior is to run all tests. + +Alternatively, specific groups tests can be run by passing a string +to the -t argument containing one or more of the following categories +separated by spaces: +- mmap + tests for mmap(2) +- gup_test + tests for gup using gup_test interface +- userfaultfd + tests for userfaultfd(2) +- compaction + a test for the patch "Allow compaction of unevictable pages" +- mlock + tests for mlock(2) +- mremap + tests for mremap(2) +- hugevm + tests for very large virtual address space +- vmalloc + vmalloc smoke tests +- hmm + hmm smoke tests +- madv_populate + test memadvise(2) MADV_POPULATE_{READ,WRITE} options +- memfd_secret + test memfd_secret(2) +- process_mrelease + test process_mrelease(2) +- ksm + ksm tests that do not require >=2 NUMA nodes +- ksm_numa + ksm tests that require >=2 NUMA nodes +- pkey + memory protection key tests +- soft_dirty + test soft dirty page bit semantics +- cow + test copy-on-write semantics +example: ./run_vmtests.sh -t "hmm mmap ksm" +EOF + exit 0 +} + + +while getopts "ht:" OPT; do + case ${OPT} in + "h") usage ;; + "t") VM_SELFTEST_ITEMS=${OPTARG} ;; + esac +done +shift $((OPTIND -1)) + +# default behavior: run all tests +VM_SELFTEST_ITEMS=${VM_SELFTEST_ITEMS:-default} + +test_selected() { + if [ "$VM_SELFTEST_ITEMS" == "default" ]; then + # If no VM_SELFTEST_ITEMS are specified, run all tests + return 0 + fi + # If test selected argument is one of the test items + if [[ " ${VM_SELFTEST_ITEMS[*]} " =~ " ${1} " ]]; then + return 0 + else + return 1 + fi +} + +# get huge pagesize and freepages from /proc/meminfo while read -r name size unit; do if [ "$name" = "HugePages_Free:" ]; then freepgs="$size" @@ -28,7 +102,7 @@ hpgsize_MB=$((hpgsize_KB / 1024)) half_ufd_size_MB=$((((nr_cpus * hpgsize_MB + 127) / 128) * 128)) needmem_KB=$((half_ufd_size_MB * 2 * 1024)) -#set proper nr_hugepages +# set proper nr_hugepages if [ -n "$freepgs" ] && [ -n "$hpgsize_KB" ]; then nr_hugepgs=$(cat /proc/sys/vm/nr_hugepages) needpgs=$((needmem_KB / hpgsize_KB)) @@ -57,144 +131,144 @@ else exit 1 fi -#filter 64bit architectures +# filter 64bit architectures ARCH64STR="arm64 ia64 mips64 parisc64 ppc64 ppc64le riscv64 s390x sh64 sparc64 x86_64" if [ -z "$ARCH" ]; then ARCH=$(uname -m 2>/dev/null | sed -e 's/aarch64.*/arm64/') fi VADDR64=0 -echo "$ARCH64STR" | grep "$ARCH" && VADDR64=1 +echo "$ARCH64STR" | grep "$ARCH" &>/dev/null && VADDR64=1 # Usage: run_test [test binary] [arbitrary test arguments...] run_test() { - local title="running $*" - local sep=$(echo -n "$title" | tr "[:graph:][:space:]" -) - printf "%s\n%s\n%s\n" "$sep" "$title" "$sep" - - "$@" - local ret=$? - if [ $ret -eq 0 ]; then - echo "[PASS]" - elif [ $ret -eq $ksft_skip ]; then - echo "[SKIP]" - exitcode=$ksft_skip - else - echo "[FAIL]" - exitcode=1 - fi + if test_selected ${CATEGORY}; then + local title="running $*" + local sep=$(echo -n "$title" | tr "[:graph:][:space:]" -) + printf "%s\n%s\n%s\n" "$sep" "$title" "$sep" + + "$@" + local ret=$? + if [ $ret -eq 0 ]; then + echo "[PASS]" + elif [ $ret -eq $ksft_skip ]; then + echo "[SKIP]" + exitcode=$ksft_skip + else + echo "[FAIL]" + exitcode=1 + fi + fi # test_selected } -mkdir "$mnt" -mount -t hugetlbfs none "$mnt" - -run_test ./hugepage-mmap +CATEGORY="hugetlb" run_test ./hugepage-mmap shmmax=$(cat /proc/sys/kernel/shmmax) shmall=$(cat /proc/sys/kernel/shmall) echo 268435456 > /proc/sys/kernel/shmmax echo 4194304 > /proc/sys/kernel/shmall -run_test ./hugepage-shm +CATEGORY="hugetlb" run_test ./hugepage-shm echo "$shmmax" > /proc/sys/kernel/shmmax echo "$shmall" > /proc/sys/kernel/shmall -run_test ./map_hugetlb - -run_test ./hugepage-mremap "$mnt"/huge_mremap -rm -f "$mnt"/huge_mremap - -run_test ./hugepage-vmemmap +CATEGORY="hugetlb" run_test ./map_hugetlb +CATEGORY="hugetlb" run_test ./hugepage-mremap +CATEGORY="hugetlb" run_test ./hugepage-vmemmap +CATEGORY="hugetlb" run_test ./hugetlb-madvise -run_test ./hugetlb-madvise "$mnt"/madvise-test -rm -f "$mnt"/madvise-test - -echo "NOTE: The above hugetlb tests provide minimal coverage. Use" -echo " https://github.com/libhugetlbfs/libhugetlbfs.git for" -echo " hugetlb regression testing." +if test_selected "hugetlb"; then + echo "NOTE: These hugetlb tests provide minimal coverage. Use" + echo " https://github.com/libhugetlbfs/libhugetlbfs.git for" + echo " hugetlb regression testing." +fi -run_test ./map_fixed_noreplace +CATEGORY="mmap" run_test ./map_fixed_noreplace # get_user_pages_fast() benchmark -run_test ./gup_test -u +CATEGORY="gup_test" run_test ./gup_test -u # pin_user_pages_fast() benchmark -run_test ./gup_test -a +CATEGORY="gup_test" run_test ./gup_test -a # Dump pages 0, 19, and 4096, using pin_user_pages: -run_test ./gup_test -ct -F 0x1 0 19 0x1000 +CATEGORY="gup_test" run_test ./gup_test -ct -F 0x1 0 19 0x1000 uffd_mods=("" ":dev") for mod in "${uffd_mods[@]}"; do - run_test ./userfaultfd anon${mod} 20 16 + CATEGORY="userfaultfd" run_test ./userfaultfd anon${mod} 20 16 # Hugetlb tests require source and destination huge pages. Pass in half # the size ($half_ufd_size_MB), which is used for *each*. - run_test ./userfaultfd hugetlb${mod} "$half_ufd_size_MB" 32 - run_test ./userfaultfd hugetlb_shared${mod} "$half_ufd_size_MB" 32 "$mnt"/uffd-test - rm -f "$mnt"/uffd-test - run_test ./userfaultfd shmem${mod} 20 16 + CATEGORY="userfaultfd" run_test ./userfaultfd hugetlb${mod} "$half_ufd_size_MB" 32 + CATEGORY="userfaultfd" run_test ./userfaultfd hugetlb_shared${mod} "$half_ufd_size_MB" 32 + CATEGORY="userfaultfd" run_test ./userfaultfd shmem${mod} 20 16 done #cleanup -umount "$mnt" -rm -rf "$mnt" echo "$nr_hugepgs" > /proc/sys/vm/nr_hugepages -run_test ./compaction_test +CATEGORY="compaction" run_test ./compaction_test -run_test sudo -u nobody ./on-fault-limit +CATEGORY="mlock" run_test sudo -u nobody ./on-fault-limit -run_test ./map_populate +CATEGORY="mmap" run_test ./map_populate -run_test ./mlock-random-test +CATEGORY="mlock" run_test ./mlock-random-test -run_test ./mlock2-tests +CATEGORY="mlock" run_test ./mlock2-tests -run_test ./mrelease_test +CATEGORY="process_mrelease" run_test ./mrelease_test -run_test ./mremap_test +CATEGORY="mremap" run_test ./mremap_test -run_test ./thuge-gen +CATEGORY="hugetlb" run_test ./thuge-gen if [ $VADDR64 -ne 0 ]; then - run_test ./virtual_address_range + CATEGORY="hugevm" run_test ./virtual_address_range # virtual address 128TB switch test - run_test ./va_128TBswitch.sh + CATEGORY="hugevm" run_test ./va_128TBswitch.sh fi # VADDR64 # vmalloc stability smoke test -run_test ./test_vmalloc.sh smoke +CATEGORY="vmalloc" run_test ./test_vmalloc.sh smoke -run_test ./mremap_dontunmap +CATEGORY="mremap" run_test ./mremap_dontunmap -run_test ./test_hmm.sh smoke +CATEGORY="hmm" run_test ./test_hmm.sh smoke # MADV_POPULATE_READ and MADV_POPULATE_WRITE tests -run_test ./madv_populate +CATEGORY="madv_populate" run_test ./madv_populate -run_test ./memfd_secret +CATEGORY="memfd_secret" run_test ./memfd_secret # KSM MADV_MERGEABLE test with 10 identical pages -run_test ./ksm_tests -M -p 10 +CATEGORY="ksm" run_test ./ksm_tests -M -p 10 # KSM unmerge test -run_test ./ksm_tests -U +CATEGORY="ksm" run_test ./ksm_tests -U # KSM test with 10 zero pages and use_zero_pages = 0 -run_test ./ksm_tests -Z -p 10 -z 0 +CATEGORY="ksm" run_test ./ksm_tests -Z -p 10 -z 0 # KSM test with 10 zero pages and use_zero_pages = 1 -run_test ./ksm_tests -Z -p 10 -z 1 +CATEGORY="ksm" run_test ./ksm_tests -Z -p 10 -z 1 # KSM test with 2 NUMA nodes and merge_across_nodes = 1 -run_test ./ksm_tests -N -m 1 +CATEGORY="ksm_numa" run_test ./ksm_tests -N -m 1 # KSM test with 2 NUMA nodes and merge_across_nodes = 0 -run_test ./ksm_tests -N -m 0 +CATEGORY="ksm_numa" run_test ./ksm_tests -N -m 0 + +CATEGORY="ksm" run_test ./ksm_functional_tests + +run_test ./ksm_functional_tests # protection_keys tests if [ -x ./protection_keys_32 ] then - run_test ./protection_keys_32 + CATEGORY="pkey" run_test ./protection_keys_32 fi if [ -x ./protection_keys_64 ] then - run_test ./protection_keys_64 + CATEGORY="pkey" run_test ./protection_keys_64 fi -run_test ./soft-dirty +CATEGORY="soft_dirty" run_test ./soft-dirty + +# COW tests +CATEGORY="cow" run_test ./cow exit $exitcode diff --git a/tools/testing/selftests/vm/userfaultfd.c b/tools/testing/selftests/vm/userfaultfd.c index 297f250c1d95..7f22844ed704 100644 --- a/tools/testing/selftests/vm/userfaultfd.c +++ b/tools/testing/selftests/vm/userfaultfd.c @@ -93,10 +93,8 @@ static volatile bool test_uffdio_zeropage_eexist = true; static bool test_uffdio_wp = true; /* Whether to test uffd minor faults */ static bool test_uffdio_minor = false; - static bool map_shared; -static int shm_fd; -static int huge_fd; +static int mem_fd; static unsigned long long *count_verify; static int uffd = -1; static int uffd_flags, finished, *pipefd; @@ -143,7 +141,7 @@ const char *examples = "# Run hugetlb memory test on 256MiB region with 50 bounces:\n" "./userfaultfd hugetlb 256 50\n\n" "# Run the same hugetlb test but using shared file:\n" - "./userfaultfd hugetlb_shared 256 50 /dev/hugepages/hugefile\n\n" + "./userfaultfd hugetlb_shared 256 50\n\n" "# 10MiB-~6GiB 999 bounces anonymous test, " "continue forever unless an error triggers\n" "while ./userfaultfd anon $[RANDOM % 6000 + 10] 999; do true; done\n\n"; @@ -260,35 +258,21 @@ static void hugetlb_release_pages(char *rel_area) static void hugetlb_allocate_area(void **alloc_area, bool is_src) { + off_t size = nr_pages * page_size; + off_t offset = is_src ? 0 : size; void *area_alias = NULL; char **alloc_area_alias; - if (!map_shared) - *alloc_area = mmap(NULL, - nr_pages * page_size, - PROT_READ | PROT_WRITE, - MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB | - (is_src ? 0 : MAP_NORESERVE), - -1, - 0); - else - *alloc_area = mmap(NULL, - nr_pages * page_size, - PROT_READ | PROT_WRITE, - MAP_SHARED | - (is_src ? 0 : MAP_NORESERVE), - huge_fd, - is_src ? 0 : nr_pages * page_size); + *alloc_area = mmap(NULL, size, PROT_READ | PROT_WRITE, + (map_shared ? MAP_SHARED : MAP_PRIVATE) | + (is_src ? 0 : MAP_NORESERVE), + mem_fd, offset); if (*alloc_area == MAP_FAILED) err("mmap of hugetlbfs file failed"); if (map_shared) { - area_alias = mmap(NULL, - nr_pages * page_size, - PROT_READ | PROT_WRITE, - MAP_SHARED, - huge_fd, - is_src ? 0 : nr_pages * page_size); + area_alias = mmap(NULL, size, PROT_READ | PROT_WRITE, + MAP_SHARED, mem_fd, offset); if (area_alias == MAP_FAILED) err("mmap of hugetlb file alias failed"); } @@ -334,14 +318,14 @@ static void shmem_allocate_area(void **alloc_area, bool is_src) } *alloc_area = mmap(p, bytes, PROT_READ | PROT_WRITE, MAP_SHARED, - shm_fd, offset); + mem_fd, offset); if (*alloc_area == MAP_FAILED) err("mmap of memfd failed"); if (test_collapse && *alloc_area != p) err("mmap of memfd failed at %p", p); area_alias = mmap(p_alias, bytes, PROT_READ | PROT_WRITE, MAP_SHARED, - shm_fd, offset); + mem_fd, offset); if (area_alias == MAP_FAILED) err("mmap of memfd alias failed"); if (test_collapse && area_alias != p_alias) @@ -1841,21 +1825,17 @@ int main(int argc, char **argv) } nr_pages = nr_pages_per_cpu * nr_cpus; - if (test_type == TEST_HUGETLB && map_shared) { - if (argc < 5) - usage(); - huge_fd = open(argv[4], O_CREAT | O_RDWR, 0755); - if (huge_fd < 0) - err("Open of %s failed", argv[4]); - if (ftruncate(huge_fd, 0)) - err("ftruncate %s to size 0 failed", argv[4]); - } else if (test_type == TEST_SHMEM) { - shm_fd = memfd_create(argv[0], 0); - if (shm_fd < 0) + if (test_type == TEST_SHMEM || test_type == TEST_HUGETLB) { + unsigned int memfd_flags = 0; + + if (test_type == TEST_HUGETLB) + memfd_flags = MFD_HUGETLB; + mem_fd = memfd_create(argv[0], memfd_flags); + if (mem_fd < 0) err("memfd_create"); - if (ftruncate(shm_fd, nr_pages * page_size * 2)) + if (ftruncate(mem_fd, nr_pages * page_size * 2)) err("ftruncate"); - if (fallocate(shm_fd, + if (fallocate(mem_fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, 0, nr_pages * page_size * 2)) err("fallocate"); diff --git a/tools/testing/selftests/vm/vm_util.c b/tools/testing/selftests/vm/vm_util.c index f11f8adda521..40e795624ff3 100644 --- a/tools/testing/selftests/vm/vm_util.c +++ b/tools/testing/selftests/vm/vm_util.c @@ -28,6 +28,31 @@ bool pagemap_is_softdirty(int fd, char *start) return entry & 0x0080000000000000ull; } +bool pagemap_is_swapped(int fd, char *start) +{ + uint64_t entry = pagemap_get_entry(fd, start); + + return entry & 0x4000000000000000ull; +} + +bool pagemap_is_populated(int fd, char *start) +{ + uint64_t entry = pagemap_get_entry(fd, start); + + /* Present or swapped. */ + return entry & 0xc000000000000000ull; +} + +unsigned long pagemap_get_pfn(int fd, char *start) +{ + uint64_t entry = pagemap_get_entry(fd, start); + + /* If present (63th bit), PFN is at bit 0 -- 54. */ + if (entry & 0x8000000000000000ull) + return entry & 0x007fffffffffffffull; + return -1ul; +} + void clear_softdirty(void) { int ret; diff --git a/tools/testing/selftests/vm/vm_util.h b/tools/testing/selftests/vm/vm_util.h index 5c35de454e08..1995ee911ef2 100644 --- a/tools/testing/selftests/vm/vm_util.h +++ b/tools/testing/selftests/vm/vm_util.h @@ -4,6 +4,9 @@ uint64_t pagemap_get_entry(int fd, char *start); bool pagemap_is_softdirty(int fd, char *start); +bool pagemap_is_swapped(int fd, char *start); +bool pagemap_is_populated(int fd, char *start); +unsigned long pagemap_get_pfn(int fd, char *start); void clear_softdirty(void); bool check_for_pattern(FILE *fp, const char *pattern, char *buf, size_t len); uint64_t read_pmd_pagesize(void); diff --git a/tools/testing/selftests/watchdog/watchdog-test.c b/tools/testing/selftests/watchdog/watchdog-test.c index f45e510500c0..bc71cbca0dde 100644 --- a/tools/testing/selftests/watchdog/watchdog-test.c +++ b/tools/testing/selftests/watchdog/watchdog-test.c @@ -1,6 +1,14 @@ // SPDX-License-Identifier: GPL-2.0 /* - * Watchdog Driver Test Program +* Watchdog Driver Test Program +* - Tests all ioctls +* - Tests Magic Close - CONFIG_WATCHDOG_NOWAYOUT +* - Could be tested against softdog driver on systems that +* don't have watchdog hardware. +* - TODO: +* - Enhance test to add coverage for WDIOC_GETTEMP. +* +* Reference: Documentation/watchdog/watchdog-api.rst */ #include <errno.h> @@ -19,13 +27,14 @@ int fd; const char v = 'V'; -static const char sopts[] = "bdehp:t:Tn:NLf:i"; +static const char sopts[] = "bdehp:st:Tn:NLf:i"; static const struct option lopts[] = { {"bootstatus", no_argument, NULL, 'b'}, {"disable", no_argument, NULL, 'd'}, {"enable", no_argument, NULL, 'e'}, {"help", no_argument, NULL, 'h'}, {"pingrate", required_argument, NULL, 'p'}, + {"status", no_argument, NULL, 's'}, {"timeout", required_argument, NULL, 't'}, {"gettimeout", no_argument, NULL, 'T'}, {"pretimeout", required_argument, NULL, 'n'}, @@ -74,6 +83,7 @@ static void usage(char *progname) printf(" -f, --file\t\tOpen watchdog device file\n"); printf("\t\t\tDefault is /dev/watchdog\n"); printf(" -i, --info\t\tShow watchdog_info\n"); + printf(" -s, --status\t\tGet status & supported features\n"); printf(" -b, --bootstatus\tGet last boot status (Watchdog/POR)\n"); printf(" -d, --disable\t\tTurn off the watchdog timer\n"); printf(" -e, --enable\t\tTurn on the watchdog timer\n"); @@ -91,6 +101,73 @@ static void usage(char *progname) printf("Example: %s -t 12 -T -n 7 -N\n", progname); } +struct wdiof_status { + int flag; + const char *status_str; +}; + +#define WDIOF_NUM_STATUS 8 + +static const struct wdiof_status wdiof_status[WDIOF_NUM_STATUS] = { + {WDIOF_SETTIMEOUT, "Set timeout (in seconds)"}, + {WDIOF_MAGICCLOSE, "Supports magic close char"}, + {WDIOF_PRETIMEOUT, "Pretimeout (in seconds), get/set"}, + {WDIOF_ALARMONLY, "Watchdog triggers a management or other external alarm not a reboot"}, + {WDIOF_KEEPALIVEPING, "Keep alive ping reply"}, + {WDIOS_DISABLECARD, "Turn off the watchdog timer"}, + {WDIOS_ENABLECARD, "Turn on the watchdog timer"}, + {WDIOS_TEMPPANIC, "Kernel panic on temperature trip"}, +}; + +static void print_status(int flags) +{ + int wdiof = 0; + + if (flags == WDIOS_UNKNOWN) { + printf("Unknown status error from WDIOC_GETSTATUS\n"); + return; + } + + for (wdiof = 0; wdiof < WDIOF_NUM_STATUS; wdiof++) { + if (flags & wdiof_status[wdiof].flag) + printf("Support/Status: %s\n", + wdiof_status[wdiof].status_str); + } +} + +#define WDIOF_NUM_BOOTSTATUS 7 + +static const struct wdiof_status wdiof_bootstatus[WDIOF_NUM_BOOTSTATUS] = { + {WDIOF_OVERHEAT, "Reset due to CPU overheat"}, + {WDIOF_FANFAULT, "Fan failed"}, + {WDIOF_EXTERN1, "External relay 1"}, + {WDIOF_EXTERN2, "External relay 2"}, + {WDIOF_POWERUNDER, "Power bad/power fault"}, + {WDIOF_CARDRESET, "Card previously reset the CPU"}, + {WDIOF_POWEROVER, "Power over voltage"}, +}; + +static void print_boot_status(int flags) +{ + int wdiof = 0; + + if (flags == WDIOF_UNKNOWN) { + printf("Unknown flag error from WDIOC_GETBOOTSTATUS\n"); + return; + } + + if (flags == 0) { + printf("Last boot is caused by: Power-On-Reset\n"); + return; + } + + for (wdiof = 0; wdiof < WDIOF_NUM_BOOTSTATUS; wdiof++) { + if (flags & wdiof_bootstatus[wdiof].flag) + printf("Last boot is caused by: %s\n", + wdiof_bootstatus[wdiof].status_str); + } +} + int main(int argc, char *argv[]) { int flags; @@ -100,6 +177,7 @@ int main(int argc, char *argv[]) int oneshot = 0; char *file = "/dev/watchdog"; struct watchdog_info info; + int temperature; setbuf(stdout, NULL); @@ -140,8 +218,7 @@ int main(int argc, char *argv[]) oneshot = 1; ret = ioctl(fd, WDIOC_GETBOOTSTATUS, &flags); if (!ret) - printf("Last boot is caused by: %s.\n", (flags != 0) ? - "Watchdog" : "Power-On-Reset"); + print_boot_status(flags); else printf("WDIOC_GETBOOTSTATUS error '%s'\n", strerror(errno)); break; @@ -171,6 +248,21 @@ int main(int argc, char *argv[]) ping_rate = DEFAULT_PING_RATE; printf("Watchdog ping rate set to %u seconds.\n", ping_rate); break; + case 's': + flags = 0; + oneshot = 1; + ret = ioctl(fd, WDIOC_GETSTATUS, &flags); + if (!ret) + print_status(flags); + else + printf("WDIOC_GETSTATUS error '%s'\n", strerror(errno)); + ret = ioctl(fd, WDIOC_GETTEMP, &temperature); + if (ret) + printf("WDIOC_GETTEMP: '%s'\n", strerror(errno)); + else + printf("Temperature %d\n", temperature); + + break; case 't': flags = strtoul(optarg, NULL, 0); ret = ioctl(fd, WDIOC_SETTIMEOUT, &flags); @@ -228,7 +320,7 @@ int main(int argc, char *argv[]) printf(" identity:\t\t%s\n", info.identity); printf(" firmware_version:\t%u\n", info.firmware_version); - printf(" options:\t\t%08x\n", info.options); + print_status(info.options); break; default: @@ -249,6 +341,10 @@ int main(int argc, char *argv[]) sleep(ping_rate); } end: + /* + * Send specific magic character 'V' just in case Magic Close is + * enabled to ensure watchdog gets disabled on close. + */ ret = write(fd, &v, 1); if (ret < 0) printf("Stopping watchdog ticks failed (%d)...\n", errno); diff --git a/tools/testing/selftests/wireguard/qemu/kernel.config b/tools/testing/selftests/wireguard/qemu/kernel.config index ce2a04717300..6327c9c400e0 100644 --- a/tools/testing/selftests/wireguard/qemu/kernel.config +++ b/tools/testing/selftests/wireguard/qemu/kernel.config @@ -64,8 +64,6 @@ CONFIG_PROC_FS=y CONFIG_PROC_SYSCTL=y CONFIG_SYSFS=y CONFIG_TMPFS=y -CONFIG_RANDOM_TRUST_CPU=y -CONFIG_RANDOM_TRUST_BOOTLOADER=y CONFIG_CONSOLE_LOGLEVEL_DEFAULT=15 CONFIG_LOG_BUF_SHIFT=18 CONFIG_PRINTK_TIME=y diff --git a/tools/testing/selftests/x86/Makefile b/tools/testing/selftests/x86/Makefile index 0388c4d60af0..ca9374b56ead 100644 --- a/tools/testing/selftests/x86/Makefile +++ b/tools/testing/selftests/x86/Makefile @@ -34,7 +34,7 @@ BINARIES_64 := $(TARGETS_C_64BIT_ALL:%=%_64) BINARIES_32 := $(patsubst %,$(OUTPUT)/%,$(BINARIES_32)) BINARIES_64 := $(patsubst %,$(OUTPUT)/%,$(BINARIES_64)) -CFLAGS := -O2 -g -std=gnu99 -pthread -Wall +CFLAGS := -O2 -g -std=gnu99 -pthread -Wall $(KHDR_INCLUDES) # call32_from_64 in thunks.S uses absolute addresses. ifeq ($(CAN_BUILD_WITH_NOPIE),1) diff --git a/tools/testing/selftests/x86/test_vsyscall.c b/tools/testing/selftests/x86/test_vsyscall.c index 5b45e6986aea..47cab972807c 100644 --- a/tools/testing/selftests/x86/test_vsyscall.c +++ b/tools/testing/selftests/x86/test_vsyscall.c @@ -92,11 +92,8 @@ static void init_vdso(void) printf("[WARN]\tfailed to find time in vDSO\n"); vdso_getcpu = (getcpu_t)dlsym(vdso, "__vdso_getcpu"); - if (!vdso_getcpu) { - /* getcpu() was never wired up in the 32-bit vDSO. */ - printf("[%s]\tfailed to find getcpu in vDSO\n", - sizeof(long) == 8 ? "WARN" : "NOTE"); - } + if (!vdso_getcpu) + printf("[WARN]\tfailed to find getcpu in vDSO\n"); } static int init_vsys(void) diff --git a/tools/testing/vsock/Makefile b/tools/testing/vsock/Makefile index f8293c6910c9..43a254f0e14d 100644 --- a/tools/testing/vsock/Makefile +++ b/tools/testing/vsock/Makefile @@ -1,8 +1,9 @@ # SPDX-License-Identifier: GPL-2.0-only -all: test +all: test vsock_perf test: vsock_test vsock_diag_test vsock_test: vsock_test.o timeout.o control.o util.o vsock_diag_test: vsock_diag_test.o timeout.o control.o util.o +vsock_perf: vsock_perf.o CFLAGS += -g -O2 -Werror -Wall -I. -I../../include -I../../../usr/include -Wno-pointer-sign -fno-strict-overflow -fno-strict-aliasing -fno-common -MMD -U_FORTIFY_SOURCE -D_GNU_SOURCE .PHONY: all test clean diff --git a/tools/testing/vsock/README b/tools/testing/vsock/README index 4d5045e7d2c3..84ee217ba8ee 100644 --- a/tools/testing/vsock/README +++ b/tools/testing/vsock/README @@ -35,3 +35,37 @@ Invoke test binaries in both directions as follows: --control-port=$GUEST_IP \ --control-port=1234 \ --peer-cid=3 + +vsock_perf utility +------------------- +'vsock_perf' is a simple tool to measure vsock performance. It works in +sender/receiver modes: sender connect to peer at the specified port and +starts data transmission to the receiver. After data processing is done, +it prints several metrics(see below). + +Usage: +# run as sender +# connect to CID 2, port 1234, send 1G of data, tx buf size is 1M +./vsock_perf --sender 2 --port 1234 --bytes 1G --buf-size 1M + +Output: +tx performance: A Gbits/s + +Output explanation: +A is calculated as "number of bits to send" / "time in tx loop" + +# run as receiver +# listen port 1234, rx buf size is 1M, socket buf size is 1G, SO_RCVLOWAT is 64K +./vsock_perf --port 1234 --buf-size 1M --vsk-size 1G --rcvlowat 64K + +Output: +rx performance: A Gbits/s +total in 'read()': B sec +POLLIN wakeups: C +average in 'read()': D ns + +Output explanation: +A is calculated as "number of received bits" / "time in rx loop". +B is time, spent in 'read()' system call(excluding 'poll()') +C is number of 'poll()' wake ups with POLLIN bit set. +D is B / C, e.g. average amount of time, spent in single 'read()'. diff --git a/tools/testing/vsock/control.c b/tools/testing/vsock/control.c index 4874872fc5a3..d2deb4b15b94 100644 --- a/tools/testing/vsock/control.c +++ b/tools/testing/vsock/control.c @@ -141,6 +141,34 @@ void control_writeln(const char *str) timeout_end(); } +void control_writeulong(unsigned long value) +{ + char str[32]; + + if (snprintf(str, sizeof(str), "%lu", value) >= sizeof(str)) { + perror("snprintf"); + exit(EXIT_FAILURE); + } + + control_writeln(str); +} + +unsigned long control_readulong(void) +{ + unsigned long value; + char *str; + + str = control_readln(); + + if (!str) + exit(EXIT_FAILURE); + + value = strtoul(str, NULL, 10); + free(str); + + return value; +} + /* Return the next line from the control socket (without the trailing newline). * * The program terminates if a timeout occurs. diff --git a/tools/testing/vsock/control.h b/tools/testing/vsock/control.h index 51814b4f9ac1..c1f77fdb2c7a 100644 --- a/tools/testing/vsock/control.h +++ b/tools/testing/vsock/control.h @@ -9,7 +9,9 @@ void control_init(const char *control_host, const char *control_port, void control_cleanup(void); void control_writeln(const char *str); char *control_readln(void); +unsigned long control_readulong(void); void control_expectln(const char *str); bool control_cmpln(char *line, const char *str, bool fail); +void control_writeulong(unsigned long value); #endif /* CONTROL_H */ diff --git a/tools/testing/vsock/util.c b/tools/testing/vsock/util.c index 2acbb7703c6a..01b636d3039a 100644 --- a/tools/testing/vsock/util.c +++ b/tools/testing/vsock/util.c @@ -395,3 +395,16 @@ void skip_test(struct test_case *test_cases, size_t test_cases_len, test_cases[test_id].skip = true; } + +unsigned long hash_djb2(const void *data, size_t len) +{ + unsigned long hash = 5381; + int i = 0; + + while (i < len) { + hash = ((hash << 5) + hash) + ((unsigned char *)data)[i]; + i++; + } + + return hash; +} diff --git a/tools/testing/vsock/util.h b/tools/testing/vsock/util.h index a3375ad2fb7f..fb99208a95ea 100644 --- a/tools/testing/vsock/util.h +++ b/tools/testing/vsock/util.h @@ -49,4 +49,5 @@ void run_tests(const struct test_case *test_cases, void list_tests(const struct test_case *test_cases); void skip_test(struct test_case *test_cases, size_t test_cases_len, const char *test_id_str); +unsigned long hash_djb2(const void *data, size_t len); #endif /* UTIL_H */ diff --git a/tools/testing/vsock/vsock_perf.c b/tools/testing/vsock/vsock_perf.c new file mode 100644 index 000000000000..a72520338f84 --- /dev/null +++ b/tools/testing/vsock/vsock_perf.c @@ -0,0 +1,427 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * vsock_perf - benchmark utility for vsock. + * + * Copyright (C) 2022 SberDevices. + * + * Author: Arseniy Krasnov <AVKrasnov@sberdevices.ru> + */ +#include <getopt.h> +#include <stdio.h> +#include <stdlib.h> +#include <stdbool.h> +#include <string.h> +#include <errno.h> +#include <unistd.h> +#include <time.h> +#include <stdint.h> +#include <poll.h> +#include <sys/socket.h> +#include <linux/vm_sockets.h> + +#define DEFAULT_BUF_SIZE_BYTES (128 * 1024) +#define DEFAULT_TO_SEND_BYTES (64 * 1024) +#define DEFAULT_VSOCK_BUF_BYTES (256 * 1024) +#define DEFAULT_RCVLOWAT_BYTES 1 +#define DEFAULT_PORT 1234 + +#define BYTES_PER_GB (1024 * 1024 * 1024ULL) +#define NSEC_PER_SEC (1000000000ULL) + +static unsigned int port = DEFAULT_PORT; +static unsigned long buf_size_bytes = DEFAULT_BUF_SIZE_BYTES; +static unsigned long vsock_buf_bytes = DEFAULT_VSOCK_BUF_BYTES; + +static void error(const char *s) +{ + perror(s); + exit(EXIT_FAILURE); +} + +static time_t current_nsec(void) +{ + struct timespec ts; + + if (clock_gettime(CLOCK_REALTIME, &ts)) + error("clock_gettime"); + + return (ts.tv_sec * NSEC_PER_SEC) + ts.tv_nsec; +} + +/* From lib/cmdline.c. */ +static unsigned long memparse(const char *ptr) +{ + char *endptr; + + unsigned long long ret = strtoull(ptr, &endptr, 0); + + switch (*endptr) { + case 'E': + case 'e': + ret <<= 10; + case 'P': + case 'p': + ret <<= 10; + case 'T': + case 't': + ret <<= 10; + case 'G': + case 'g': + ret <<= 10; + case 'M': + case 'm': + ret <<= 10; + case 'K': + case 'k': + ret <<= 10; + endptr++; + default: + break; + } + + return ret; +} + +static void vsock_increase_buf_size(int fd) +{ + if (setsockopt(fd, AF_VSOCK, SO_VM_SOCKETS_BUFFER_MAX_SIZE, + &vsock_buf_bytes, sizeof(vsock_buf_bytes))) + error("setsockopt(SO_VM_SOCKETS_BUFFER_MAX_SIZE)"); + + if (setsockopt(fd, AF_VSOCK, SO_VM_SOCKETS_BUFFER_SIZE, + &vsock_buf_bytes, sizeof(vsock_buf_bytes))) + error("setsockopt(SO_VM_SOCKETS_BUFFER_SIZE)"); +} + +static int vsock_connect(unsigned int cid, unsigned int port) +{ + union { + struct sockaddr sa; + struct sockaddr_vm svm; + } addr = { + .svm = { + .svm_family = AF_VSOCK, + .svm_port = port, + .svm_cid = cid, + }, + }; + int fd; + + fd = socket(AF_VSOCK, SOCK_STREAM, 0); + + if (fd < 0) { + perror("socket"); + return -1; + } + + if (connect(fd, &addr.sa, sizeof(addr.svm)) < 0) { + perror("connect"); + close(fd); + return -1; + } + + return fd; +} + +static float get_gbps(unsigned long bits, time_t ns_delta) +{ + return ((float)bits / 1000000000ULL) / + ((float)ns_delta / NSEC_PER_SEC); +} + +static void run_receiver(unsigned long rcvlowat_bytes) +{ + unsigned int read_cnt; + time_t rx_begin_ns; + time_t in_read_ns; + size_t total_recv; + int client_fd; + char *data; + int fd; + union { + struct sockaddr sa; + struct sockaddr_vm svm; + } addr = { + .svm = { + .svm_family = AF_VSOCK, + .svm_port = port, + .svm_cid = VMADDR_CID_ANY, + }, + }; + union { + struct sockaddr sa; + struct sockaddr_vm svm; + } clientaddr; + + socklen_t clientaddr_len = sizeof(clientaddr.svm); + + printf("Run as receiver\n"); + printf("Listen port %u\n", port); + printf("RX buffer %lu bytes\n", buf_size_bytes); + printf("vsock buffer %lu bytes\n", vsock_buf_bytes); + printf("SO_RCVLOWAT %lu bytes\n", rcvlowat_bytes); + + fd = socket(AF_VSOCK, SOCK_STREAM, 0); + + if (fd < 0) + error("socket"); + + if (bind(fd, &addr.sa, sizeof(addr.svm)) < 0) + error("bind"); + + if (listen(fd, 1) < 0) + error("listen"); + + client_fd = accept(fd, &clientaddr.sa, &clientaddr_len); + + if (client_fd < 0) + error("accept"); + + vsock_increase_buf_size(client_fd); + + if (setsockopt(client_fd, SOL_SOCKET, SO_RCVLOWAT, + &rcvlowat_bytes, + sizeof(rcvlowat_bytes))) + error("setsockopt(SO_RCVLOWAT)"); + + data = malloc(buf_size_bytes); + + if (!data) { + fprintf(stderr, "'malloc()' failed\n"); + exit(EXIT_FAILURE); + } + + read_cnt = 0; + in_read_ns = 0; + total_recv = 0; + rx_begin_ns = current_nsec(); + + while (1) { + struct pollfd fds = { 0 }; + + fds.fd = client_fd; + fds.events = POLLIN | POLLERR | + POLLHUP | POLLRDHUP; + + if (poll(&fds, 1, -1) < 0) + error("poll"); + + if (fds.revents & POLLERR) { + fprintf(stderr, "'poll()' error\n"); + exit(EXIT_FAILURE); + } + + if (fds.revents & POLLIN) { + ssize_t bytes_read; + time_t t; + + t = current_nsec(); + bytes_read = read(fds.fd, data, buf_size_bytes); + in_read_ns += (current_nsec() - t); + read_cnt++; + + if (!bytes_read) + break; + + if (bytes_read < 0) { + perror("read"); + exit(EXIT_FAILURE); + } + + total_recv += bytes_read; + } + + if (fds.revents & (POLLHUP | POLLRDHUP)) + break; + } + + printf("total bytes received: %zu\n", total_recv); + printf("rx performance: %f Gbits/s\n", + get_gbps(total_recv * 8, current_nsec() - rx_begin_ns)); + printf("total time in 'read()': %f sec\n", (float)in_read_ns / NSEC_PER_SEC); + printf("average time in 'read()': %f ns\n", (float)in_read_ns / read_cnt); + printf("POLLIN wakeups: %i\n", read_cnt); + + free(data); + close(client_fd); + close(fd); +} + +static void run_sender(int peer_cid, unsigned long to_send_bytes) +{ + time_t tx_begin_ns; + time_t tx_total_ns; + size_t total_send; + void *data; + int fd; + + printf("Run as sender\n"); + printf("Connect to %i:%u\n", peer_cid, port); + printf("Send %lu bytes\n", to_send_bytes); + printf("TX buffer %lu bytes\n", buf_size_bytes); + + fd = vsock_connect(peer_cid, port); + + if (fd < 0) + exit(EXIT_FAILURE); + + data = malloc(buf_size_bytes); + + if (!data) { + fprintf(stderr, "'malloc()' failed\n"); + exit(EXIT_FAILURE); + } + + memset(data, 0, buf_size_bytes); + total_send = 0; + tx_begin_ns = current_nsec(); + + while (total_send < to_send_bytes) { + ssize_t sent; + + sent = write(fd, data, buf_size_bytes); + + if (sent <= 0) + error("write"); + + total_send += sent; + } + + tx_total_ns = current_nsec() - tx_begin_ns; + + printf("total bytes sent: %zu\n", total_send); + printf("tx performance: %f Gbits/s\n", + get_gbps(total_send * 8, tx_total_ns)); + printf("total time in 'write()': %f sec\n", + (float)tx_total_ns / NSEC_PER_SEC); + + close(fd); + free(data); +} + +static const char optstring[] = ""; +static const struct option longopts[] = { + { + .name = "help", + .has_arg = no_argument, + .val = 'H', + }, + { + .name = "sender", + .has_arg = required_argument, + .val = 'S', + }, + { + .name = "port", + .has_arg = required_argument, + .val = 'P', + }, + { + .name = "bytes", + .has_arg = required_argument, + .val = 'M', + }, + { + .name = "buf-size", + .has_arg = required_argument, + .val = 'B', + }, + { + .name = "vsk-size", + .has_arg = required_argument, + .val = 'V', + }, + { + .name = "rcvlowat", + .has_arg = required_argument, + .val = 'R', + }, + {}, +}; + +static void usage(void) +{ + printf("Usage: ./vsock_perf [--help] [options]\n" + "\n" + "This is benchmarking utility, to test vsock performance.\n" + "It runs in two modes: sender or receiver. In sender mode, it\n" + "connects to the specified CID and starts data transmission.\n" + "\n" + "Options:\n" + " --help This message\n" + " --sender <cid> Sender mode (receiver default)\n" + " <cid> of the receiver to connect to\n" + " --port <port> Port (default %d)\n" + " --bytes <bytes>KMG Bytes to send (default %d)\n" + " --buf-size <bytes>KMG Data buffer size (default %d). In sender mode\n" + " it is the buffer size, passed to 'write()'. In\n" + " receiver mode it is the buffer size passed to 'read()'.\n" + " --vsk-size <bytes>KMG Socket buffer size (default %d)\n" + " --rcvlowat <bytes>KMG SO_RCVLOWAT value (default %d)\n" + "\n", DEFAULT_PORT, DEFAULT_TO_SEND_BYTES, + DEFAULT_BUF_SIZE_BYTES, DEFAULT_VSOCK_BUF_BYTES, + DEFAULT_RCVLOWAT_BYTES); + exit(EXIT_FAILURE); +} + +static long strtolx(const char *arg) +{ + long value; + char *end; + + value = strtol(arg, &end, 10); + + if (end != arg + strlen(arg)) + usage(); + + return value; +} + +int main(int argc, char **argv) +{ + unsigned long to_send_bytes = DEFAULT_TO_SEND_BYTES; + unsigned long rcvlowat_bytes = DEFAULT_RCVLOWAT_BYTES; + int peer_cid = -1; + bool sender = false; + + while (1) { + int opt = getopt_long(argc, argv, optstring, longopts, NULL); + + if (opt == -1) + break; + + switch (opt) { + case 'V': /* Peer buffer size. */ + vsock_buf_bytes = memparse(optarg); + break; + case 'R': /* SO_RCVLOWAT value. */ + rcvlowat_bytes = memparse(optarg); + break; + case 'P': /* Port to connect to. */ + port = strtolx(optarg); + break; + case 'M': /* Bytes to send. */ + to_send_bytes = memparse(optarg); + break; + case 'B': /* Size of rx/tx buffer. */ + buf_size_bytes = memparse(optarg); + break; + case 'S': /* Sender mode. CID to connect to. */ + peer_cid = strtolx(optarg); + sender = true; + break; + case 'H': /* Help. */ + usage(); + break; + default: + usage(); + } + } + + if (!sender) + run_receiver(rcvlowat_bytes); + else + run_sender(peer_cid, to_send_bytes); + + return 0; +} diff --git a/tools/testing/vsock/vsock_test.c b/tools/testing/vsock/vsock_test.c index bb6d691cb30d..67e9f9df3a8c 100644 --- a/tools/testing/vsock/vsock_test.c +++ b/tools/testing/vsock/vsock_test.c @@ -284,10 +284,14 @@ static void test_stream_msg_peek_server(const struct test_opts *opts) close(fd); } -#define MESSAGES_CNT 7 -#define MSG_EOR_IDX (MESSAGES_CNT / 2) +#define SOCK_BUF_SIZE (2 * 1024 * 1024) +#define MAX_MSG_SIZE (32 * 1024) + static void test_seqpacket_msg_bounds_client(const struct test_opts *opts) { + unsigned long curr_hash; + int page_size; + int msg_count; int fd; fd = vsock_seqpacket_connect(opts->peer_cid, 1234); @@ -296,18 +300,79 @@ static void test_seqpacket_msg_bounds_client(const struct test_opts *opts) exit(EXIT_FAILURE); } - /* Send several messages, one with MSG_EOR flag */ - for (int i = 0; i < MESSAGES_CNT; i++) - send_byte(fd, 1, (i == MSG_EOR_IDX) ? MSG_EOR : 0); + /* Wait, until receiver sets buffer size. */ + control_expectln("SRVREADY"); + + curr_hash = 0; + page_size = getpagesize(); + msg_count = SOCK_BUF_SIZE / MAX_MSG_SIZE; + + for (int i = 0; i < msg_count; i++) { + ssize_t send_size; + size_t buf_size; + int flags; + void *buf; + + /* Use "small" buffers and "big" buffers. */ + if (i & 1) + buf_size = page_size + + (rand() % (MAX_MSG_SIZE - page_size)); + else + buf_size = 1 + (rand() % page_size); + + buf = malloc(buf_size); + + if (!buf) { + perror("malloc"); + exit(EXIT_FAILURE); + } + + memset(buf, rand() & 0xff, buf_size); + /* Set at least one MSG_EOR + some random. */ + if (i == (msg_count / 2) || (rand() & 1)) { + flags = MSG_EOR; + curr_hash++; + } else { + flags = 0; + } + + send_size = send(fd, buf, buf_size, flags); + + if (send_size < 0) { + perror("send"); + exit(EXIT_FAILURE); + } + + if (send_size != buf_size) { + fprintf(stderr, "Invalid send size\n"); + exit(EXIT_FAILURE); + } + + /* + * Hash sum is computed at both client and server in + * the same way: + * H += hash('message data') + * Such hash "controls" both data integrity and message + * bounds. After data exchange, both sums are compared + * using control socket, and if message bounds wasn't + * broken - two values must be equal. + */ + curr_hash += hash_djb2(buf, buf_size); + free(buf); + } control_writeln("SENDDONE"); + control_writeulong(curr_hash); close(fd); } static void test_seqpacket_msg_bounds_server(const struct test_opts *opts) { + unsigned long sock_buf_size; + unsigned long remote_hash; + unsigned long curr_hash; int fd; - char buf[16]; + char buf[MAX_MSG_SIZE]; struct msghdr msg = {0}; struct iovec iov = {0}; @@ -317,25 +382,57 @@ static void test_seqpacket_msg_bounds_server(const struct test_opts *opts) exit(EXIT_FAILURE); } + sock_buf_size = SOCK_BUF_SIZE; + + if (setsockopt(fd, AF_VSOCK, SO_VM_SOCKETS_BUFFER_MAX_SIZE, + &sock_buf_size, sizeof(sock_buf_size))) { + perror("setsockopt(SO_VM_SOCKETS_BUFFER_MAX_SIZE)"); + exit(EXIT_FAILURE); + } + + if (setsockopt(fd, AF_VSOCK, SO_VM_SOCKETS_BUFFER_SIZE, + &sock_buf_size, sizeof(sock_buf_size))) { + perror("setsockopt(SO_VM_SOCKETS_BUFFER_SIZE)"); + exit(EXIT_FAILURE); + } + + /* Ready to receive data. */ + control_writeln("SRVREADY"); + /* Wait, until peer sends whole data. */ control_expectln("SENDDONE"); iov.iov_base = buf; iov.iov_len = sizeof(buf); msg.msg_iov = &iov; msg.msg_iovlen = 1; - for (int i = 0; i < MESSAGES_CNT; i++) { - if (recvmsg(fd, &msg, 0) != 1) { - perror("message bound violated"); - exit(EXIT_FAILURE); - } + curr_hash = 0; + + while (1) { + ssize_t recv_size; - if ((i == MSG_EOR_IDX) ^ !!(msg.msg_flags & MSG_EOR)) { - perror("MSG_EOR"); + recv_size = recvmsg(fd, &msg, 0); + + if (!recv_size) + break; + + if (recv_size < 0) { + perror("recvmsg"); exit(EXIT_FAILURE); } + + if (msg.msg_flags & MSG_EOR) + curr_hash++; + + curr_hash += hash_djb2(msg.msg_iov[0].iov_base, recv_size); } close(fd); + remote_hash = control_readulong(); + + if (curr_hash != remote_hash) { + fprintf(stderr, "Message bounds broken\n"); + exit(EXIT_FAILURE); + } } #define MESSAGE_TRUNC_SZ 32 @@ -427,7 +524,7 @@ static void test_seqpacket_timeout_client(const struct test_opts *opts) tv.tv_usec = 0; if (setsockopt(fd, SOL_SOCKET, SO_RCVTIMEO, (void *)&tv, sizeof(tv)) == -1) { - perror("setsockopt 'SO_RCVTIMEO'"); + perror("setsockopt(SO_RCVTIMEO)"); exit(EXIT_FAILURE); } @@ -472,6 +569,70 @@ static void test_seqpacket_timeout_server(const struct test_opts *opts) close(fd); } +static void test_seqpacket_bigmsg_client(const struct test_opts *opts) +{ + unsigned long sock_buf_size; + ssize_t send_size; + socklen_t len; + void *data; + int fd; + + len = sizeof(sock_buf_size); + + fd = vsock_seqpacket_connect(opts->peer_cid, 1234); + if (fd < 0) { + perror("connect"); + exit(EXIT_FAILURE); + } + + if (getsockopt(fd, AF_VSOCK, SO_VM_SOCKETS_BUFFER_SIZE, + &sock_buf_size, &len)) { + perror("getsockopt"); + exit(EXIT_FAILURE); + } + + sock_buf_size++; + + data = malloc(sock_buf_size); + if (!data) { + perror("malloc"); + exit(EXIT_FAILURE); + } + + send_size = send(fd, data, sock_buf_size, 0); + if (send_size != -1) { + fprintf(stderr, "expected 'send(2)' failure, got %zi\n", + send_size); + exit(EXIT_FAILURE); + } + + if (errno != EMSGSIZE) { + fprintf(stderr, "expected EMSGSIZE in 'errno', got %i\n", + errno); + exit(EXIT_FAILURE); + } + + control_writeln("CLISENT"); + + free(data); + close(fd); +} + +static void test_seqpacket_bigmsg_server(const struct test_opts *opts) +{ + int fd; + + fd = vsock_seqpacket_accept(VMADDR_CID_ANY, 1234, NULL); + if (fd < 0) { + perror("accept"); + exit(EXIT_FAILURE); + } + + control_expectln("CLISENT"); + + close(fd); +} + #define BUF_PATTERN_1 'a' #define BUF_PATTERN_2 'b' @@ -644,7 +805,7 @@ static void test_stream_poll_rcvlowat_client(const struct test_opts *opts) if (setsockopt(fd, SOL_SOCKET, SO_RCVLOWAT, &lowat_val, sizeof(lowat_val))) { - perror("setsockopt"); + perror("setsockopt(SO_RCVLOWAT)"); exit(EXIT_FAILURE); } @@ -754,6 +915,11 @@ static struct test_case test_cases[] = { .run_client = test_stream_poll_rcvlowat_client, .run_server = test_stream_poll_rcvlowat_server, }, + { + .name = "SOCK_SEQPACKET big message", + .run_client = test_seqpacket_bigmsg_client, + .run_server = test_seqpacket_bigmsg_server, + }, {}, }; @@ -837,6 +1003,7 @@ int main(int argc, char **argv) .peer_cid = VMADDR_CID_ANY, }; + srand(time(NULL)); init_signals(); for (;;) { |