From a5facc4cac4dc504397428c936ef1492ce4edd19 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Thu, 3 Oct 2019 11:49:26 +0200 Subject: netdevsim: change fib accounting and limitations to be per-device Currently, the accounting is done per-namespace. However, devlink instance is always in init_net namespace for now, so only the accounting related to init_net is used. Limitations set using devlink resources are only considered for init_net. nsim_devlink_net() always returns init_net always. Make the accounting per-device. This brings no functional change. Per-device accounting has the same values as per-net. For a single netdevsim instance, the behaviour is exactly the same as before. When multiple netdevsim instances are created, each can have different limits. This is in prepare to implement proper devlink netns support. After that, the devlink instance which would exist in particular netns would account and limit that netns. Signed-off-by: Jiri Pirko Acked-by: Jakub Kicinski Signed-off-by: David S. Miller --- drivers/net/netdevsim/dev.c | 79 ++++++++++----------------------------------- 1 file changed, 17 insertions(+), 62 deletions(-) (limited to 'drivers/net/netdevsim/dev.c') diff --git a/drivers/net/netdevsim/dev.c b/drivers/net/netdevsim/dev.c index 56576d4f34a5..6087f5b99e47 100644 --- a/drivers/net/netdevsim/dev.c +++ b/drivers/net/netdevsim/dev.c @@ -123,39 +123,6 @@ static void nsim_dev_port_debugfs_exit(struct nsim_dev_port *nsim_dev_port) debugfs_remove_recursive(nsim_dev_port->ddir); } -static struct net *nsim_devlink_net(struct devlink *devlink) -{ - return &init_net; -} - -static u64 nsim_dev_ipv4_fib_resource_occ_get(void *priv) -{ - struct net *net = priv; - - return nsim_fib_get_val(net, NSIM_RESOURCE_IPV4_FIB, false); -} - -static u64 nsim_dev_ipv4_fib_rules_res_occ_get(void *priv) -{ - struct net *net = priv; - - return nsim_fib_get_val(net, NSIM_RESOURCE_IPV4_FIB_RULES, false); -} - -static u64 nsim_dev_ipv6_fib_resource_occ_get(void *priv) -{ - struct net *net = priv; - - return nsim_fib_get_val(net, NSIM_RESOURCE_IPV6_FIB, false); -} - -static u64 nsim_dev_ipv6_fib_rules_res_occ_get(void *priv) -{ - struct net *net = priv; - - return nsim_fib_get_val(net, NSIM_RESOURCE_IPV6_FIB_RULES, false); -} - static int nsim_dev_resources_register(struct devlink *devlink) { struct devlink_resource_size_params params = { @@ -163,9 +130,7 @@ static int nsim_dev_resources_register(struct devlink *devlink) .size_granularity = 1, .unit = DEVLINK_RESOURCE_UNIT_ENTRY }; - struct net *net = nsim_devlink_net(devlink); int err; - u64 n; /* Resources for IPv4 */ err = devlink_resource_register(devlink, "IPv4", (u64)-1, @@ -177,8 +142,7 @@ static int nsim_dev_resources_register(struct devlink *devlink) goto out; } - n = nsim_fib_get_val(net, NSIM_RESOURCE_IPV4_FIB, true); - err = devlink_resource_register(devlink, "fib", n, + err = devlink_resource_register(devlink, "fib", (u64)-1, NSIM_RESOURCE_IPV4_FIB, NSIM_RESOURCE_IPV4, ¶ms); if (err) { @@ -186,8 +150,7 @@ static int nsim_dev_resources_register(struct devlink *devlink) return err; } - n = nsim_fib_get_val(net, NSIM_RESOURCE_IPV4_FIB_RULES, true); - err = devlink_resource_register(devlink, "fib-rules", n, + err = devlink_resource_register(devlink, "fib-rules", (u64)-1, NSIM_RESOURCE_IPV4_FIB_RULES, NSIM_RESOURCE_IPV4, ¶ms); if (err) { @@ -205,8 +168,7 @@ static int nsim_dev_resources_register(struct devlink *devlink) goto out; } - n = nsim_fib_get_val(net, NSIM_RESOURCE_IPV6_FIB, true); - err = devlink_resource_register(devlink, "fib", n, + err = devlink_resource_register(devlink, "fib", (u64)-1, NSIM_RESOURCE_IPV6_FIB, NSIM_RESOURCE_IPV6, ¶ms); if (err) { @@ -214,8 +176,7 @@ static int nsim_dev_resources_register(struct devlink *devlink) return err; } - n = nsim_fib_get_val(net, NSIM_RESOURCE_IPV6_FIB_RULES, true); - err = devlink_resource_register(devlink, "fib-rules", n, + err = devlink_resource_register(devlink, "fib-rules", (u64)-1, NSIM_RESOURCE_IPV6_FIB_RULES, NSIM_RESOURCE_IPV6, ¶ms); if (err) { @@ -223,22 +184,6 @@ static int nsim_dev_resources_register(struct devlink *devlink) return err; } - devlink_resource_occ_get_register(devlink, - NSIM_RESOURCE_IPV4_FIB, - nsim_dev_ipv4_fib_resource_occ_get, - net); - devlink_resource_occ_get_register(devlink, - NSIM_RESOURCE_IPV4_FIB_RULES, - nsim_dev_ipv4_fib_rules_res_occ_get, - net); - devlink_resource_occ_get_register(devlink, - NSIM_RESOURCE_IPV6_FIB, - nsim_dev_ipv6_fib_resource_occ_get, - net); - devlink_resource_occ_get_register(devlink, - NSIM_RESOURCE_IPV6_FIB_RULES, - nsim_dev_ipv6_fib_rules_res_occ_get, - net); out: return err; } @@ -533,11 +478,11 @@ static int nsim_dev_reload_down(struct devlink *devlink, static int nsim_dev_reload_up(struct devlink *devlink, struct netlink_ext_ack *extack) { + struct nsim_dev *nsim_dev = devlink_priv(devlink); enum nsim_resource_id res_ids[] = { NSIM_RESOURCE_IPV4_FIB, NSIM_RESOURCE_IPV4_FIB_RULES, NSIM_RESOURCE_IPV6_FIB, NSIM_RESOURCE_IPV6_FIB_RULES }; - struct net *net = nsim_devlink_net(devlink); int i; for (i = 0; i < ARRAY_SIZE(res_ids); ++i) { @@ -546,7 +491,8 @@ static int nsim_dev_reload_up(struct devlink *devlink, err = devlink_resource_size_get(devlink, res_ids[i], &val); if (!err) { - err = nsim_fib_set_max(net, res_ids[i], val, extack); + err = nsim_fib_set_max(nsim_dev->fib_data, + res_ids[i], val, extack); if (err) return err; } @@ -681,9 +627,15 @@ nsim_dev_create(struct nsim_bus_dev *nsim_bus_dev, unsigned int port_count) if (err) goto err_devlink_free; + nsim_dev->fib_data = nsim_fib_create(devlink); + if (IS_ERR(nsim_dev->fib_data)) { + err = PTR_ERR(nsim_dev->fib_data); + goto err_resources_unregister; + } + err = devlink_register(devlink, &nsim_bus_dev->dev); if (err) - goto err_resources_unregister; + goto err_fib_destroy; err = devlink_params_register(devlink, nsim_devlink_params, ARRAY_SIZE(nsim_devlink_params)); @@ -721,6 +673,8 @@ err_params_unregister: ARRAY_SIZE(nsim_devlink_params)); err_dl_unregister: devlink_unregister(devlink); +err_fib_destroy: + nsim_fib_destroy(devlink, nsim_dev->fib_data); err_resources_unregister: devlink_resources_unregister(devlink, NULL); err_devlink_free: @@ -739,6 +693,7 @@ static void nsim_dev_destroy(struct nsim_dev *nsim_dev) devlink_params_unregister(devlink, nsim_devlink_params, ARRAY_SIZE(nsim_devlink_params)); devlink_unregister(devlink); + nsim_fib_destroy(devlink, nsim_dev->fib_data); devlink_resources_unregister(devlink, NULL); mutex_destroy(&nsim_dev->port_list_lock); devlink_free(devlink); -- cgit From 7f36a77ade6eefc243c64c64b8f8252fa43ea55e Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Thu, 3 Oct 2019 11:49:35 +0200 Subject: netdevsim: add all ports in nsim_dev_create() and del them in destroy() Currently the probe/remove function does this separately. Put the addition an deletion of ports into nsim_dev_create() and nsim_dev_destroy(). Signed-off-by: Jiri Pirko Acked-by: Jakub Kicinski Signed-off-by: David S. Miller --- drivers/net/netdevsim/dev.c | 175 +++++++++++++++++++++++--------------------- 1 file changed, 93 insertions(+), 82 deletions(-) (limited to 'drivers/net/netdevsim/dev.c') diff --git a/drivers/net/netdevsim/dev.c b/drivers/net/netdevsim/dev.c index 6087f5b99e47..3cc101aee991 100644 --- a/drivers/net/netdevsim/dev.c +++ b/drivers/net/netdevsim/dev.c @@ -603,8 +603,92 @@ static const struct devlink_ops nsim_dev_devlink_ops = { #define NSIM_DEV_MAX_MACS_DEFAULT 32 #define NSIM_DEV_TEST1_DEFAULT true +static int __nsim_dev_port_add(struct nsim_dev *nsim_dev, + unsigned int port_index) +{ + struct nsim_dev_port *nsim_dev_port; + struct devlink_port *devlink_port; + int err; + + nsim_dev_port = kzalloc(sizeof(*nsim_dev_port), GFP_KERNEL); + if (!nsim_dev_port) + return -ENOMEM; + nsim_dev_port->port_index = port_index; + + devlink_port = &nsim_dev_port->devlink_port; + devlink_port_attrs_set(devlink_port, DEVLINK_PORT_FLAVOUR_PHYSICAL, + port_index + 1, 0, 0, + nsim_dev->switch_id.id, + nsim_dev->switch_id.id_len); + err = devlink_port_register(priv_to_devlink(nsim_dev), devlink_port, + port_index); + if (err) + goto err_port_free; + + err = nsim_dev_port_debugfs_init(nsim_dev, nsim_dev_port); + if (err) + goto err_dl_port_unregister; + + nsim_dev_port->ns = nsim_create(nsim_dev, nsim_dev_port); + if (IS_ERR(nsim_dev_port->ns)) { + err = PTR_ERR(nsim_dev_port->ns); + goto err_port_debugfs_exit; + } + + devlink_port_type_eth_set(devlink_port, nsim_dev_port->ns->netdev); + list_add(&nsim_dev_port->list, &nsim_dev->port_list); + + return 0; + +err_port_debugfs_exit: + nsim_dev_port_debugfs_exit(nsim_dev_port); +err_dl_port_unregister: + devlink_port_unregister(devlink_port); +err_port_free: + kfree(nsim_dev_port); + return err; +} + +static void __nsim_dev_port_del(struct nsim_dev_port *nsim_dev_port) +{ + struct devlink_port *devlink_port = &nsim_dev_port->devlink_port; + + list_del(&nsim_dev_port->list); + devlink_port_type_clear(devlink_port); + nsim_destroy(nsim_dev_port->ns); + nsim_dev_port_debugfs_exit(nsim_dev_port); + devlink_port_unregister(devlink_port); + kfree(nsim_dev_port); +} + +static void nsim_dev_port_del_all(struct nsim_dev *nsim_dev) +{ + struct nsim_dev_port *nsim_dev_port, *tmp; + + list_for_each_entry_safe(nsim_dev_port, tmp, + &nsim_dev->port_list, list) + __nsim_dev_port_del(nsim_dev_port); +} + +static int nsim_dev_port_add_all(struct nsim_dev *nsim_dev, + unsigned int port_count) +{ + int i, err; + + for (i = 0; i < port_count; i++) { + err = __nsim_dev_port_add(nsim_dev, i); + if (err) + goto err_port_del_all; + } + return 0; + +err_port_del_all: + nsim_dev_port_del_all(nsim_dev); + return err; +} + static struct nsim_dev * -nsim_dev_create(struct nsim_bus_dev *nsim_bus_dev, unsigned int port_count) +nsim_dev_create(struct nsim_bus_dev *nsim_bus_dev) { struct nsim_dev *nsim_dev; struct devlink *devlink; @@ -659,9 +743,15 @@ nsim_dev_create(struct nsim_bus_dev *nsim_bus_dev, unsigned int port_count) if (err) goto err_debugfs_exit; + err = nsim_dev_port_add_all(nsim_dev, nsim_bus_dev->port_count); + if (err) + goto err_bpf_dev_exit; + devlink_params_publish(devlink); return nsim_dev; +err_bpf_dev_exit: + nsim_bpf_dev_exit(nsim_dev); err_debugfs_exit: nsim_dev_debugfs_exit(nsim_dev); err_traps_exit: @@ -686,6 +776,7 @@ static void nsim_dev_destroy(struct nsim_dev *nsim_dev) { struct devlink *devlink = priv_to_devlink(nsim_dev); + nsim_dev_port_del_all(nsim_dev); nsim_bpf_dev_exit(nsim_dev); nsim_dev_debugfs_exit(nsim_dev); nsim_dev_traps_exit(devlink); @@ -699,102 +790,22 @@ static void nsim_dev_destroy(struct nsim_dev *nsim_dev) devlink_free(devlink); } -static int __nsim_dev_port_add(struct nsim_dev *nsim_dev, - unsigned int port_index) -{ - struct nsim_dev_port *nsim_dev_port; - struct devlink_port *devlink_port; - int err; - - nsim_dev_port = kzalloc(sizeof(*nsim_dev_port), GFP_KERNEL); - if (!nsim_dev_port) - return -ENOMEM; - nsim_dev_port->port_index = port_index; - - devlink_port = &nsim_dev_port->devlink_port; - devlink_port_attrs_set(devlink_port, DEVLINK_PORT_FLAVOUR_PHYSICAL, - port_index + 1, 0, 0, - nsim_dev->switch_id.id, - nsim_dev->switch_id.id_len); - err = devlink_port_register(priv_to_devlink(nsim_dev), devlink_port, - port_index); - if (err) - goto err_port_free; - - err = nsim_dev_port_debugfs_init(nsim_dev, nsim_dev_port); - if (err) - goto err_dl_port_unregister; - - nsim_dev_port->ns = nsim_create(nsim_dev, nsim_dev_port); - if (IS_ERR(nsim_dev_port->ns)) { - err = PTR_ERR(nsim_dev_port->ns); - goto err_port_debugfs_exit; - } - - devlink_port_type_eth_set(devlink_port, nsim_dev_port->ns->netdev); - list_add(&nsim_dev_port->list, &nsim_dev->port_list); - - return 0; - -err_port_debugfs_exit: - nsim_dev_port_debugfs_exit(nsim_dev_port); -err_dl_port_unregister: - devlink_port_unregister(devlink_port); -err_port_free: - kfree(nsim_dev_port); - return err; -} - -static void __nsim_dev_port_del(struct nsim_dev_port *nsim_dev_port) -{ - struct devlink_port *devlink_port = &nsim_dev_port->devlink_port; - - list_del(&nsim_dev_port->list); - devlink_port_type_clear(devlink_port); - nsim_destroy(nsim_dev_port->ns); - nsim_dev_port_debugfs_exit(nsim_dev_port); - devlink_port_unregister(devlink_port); - kfree(nsim_dev_port); -} - -static void nsim_dev_port_del_all(struct nsim_dev *nsim_dev) -{ - struct nsim_dev_port *nsim_dev_port, *tmp; - - list_for_each_entry_safe(nsim_dev_port, tmp, - &nsim_dev->port_list, list) - __nsim_dev_port_del(nsim_dev_port); -} - int nsim_dev_probe(struct nsim_bus_dev *nsim_bus_dev) { struct nsim_dev *nsim_dev; - int i; - int err; - nsim_dev = nsim_dev_create(nsim_bus_dev, nsim_bus_dev->port_count); + nsim_dev = nsim_dev_create(nsim_bus_dev); if (IS_ERR(nsim_dev)) return PTR_ERR(nsim_dev); dev_set_drvdata(&nsim_bus_dev->dev, nsim_dev); - for (i = 0; i < nsim_bus_dev->port_count; i++) { - err = __nsim_dev_port_add(nsim_dev, i); - if (err) - goto err_port_del_all; - } return 0; - -err_port_del_all: - nsim_dev_port_del_all(nsim_dev); - nsim_dev_destroy(nsim_dev); - return err; } void nsim_dev_remove(struct nsim_bus_dev *nsim_bus_dev) { struct nsim_dev *nsim_dev = dev_get_drvdata(&nsim_bus_dev->dev); - nsim_dev_port_del_all(nsim_dev); nsim_dev_destroy(nsim_dev); } -- cgit From 75ba029f3c07f4755b88ee3a9c441e9ffb468e6a Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Thu, 3 Oct 2019 11:49:36 +0200 Subject: netdevsim: implement proper devlink reload During devlink reload, all driver objects should be reinstantiated with the exception of devlink instance and devlink resources and params. Move existing devlink_resource_size_get() calls into fib_create() just before fib notifier is registered. Also, make sure that extack is propagated down to fib_notifier_register() call. Signed-off-by: Jiri Pirko Acked-by: Jakub Kicinski Signed-off-by: David S. Miller --- drivers/net/netdevsim/dev.c | 95 +++++++++++++++++++++++++++------------ drivers/net/netdevsim/fib.c | 53 ++++++++++++---------- drivers/net/netdevsim/netdevsim.h | 8 ++-- 3 files changed, 99 insertions(+), 57 deletions(-) (limited to 'drivers/net/netdevsim/dev.c') diff --git a/drivers/net/netdevsim/dev.c b/drivers/net/netdevsim/dev.c index 3cc101aee991..7de80faab047 100644 --- a/drivers/net/netdevsim/dev.c +++ b/drivers/net/netdevsim/dev.c @@ -469,9 +469,16 @@ static void nsim_dev_traps_exit(struct devlink *devlink) kfree(nsim_dev->trap_data); } +static int nsim_dev_reload_create(struct nsim_dev *nsim_dev, + struct netlink_ext_ack *extack); +static void nsim_dev_reload_destroy(struct nsim_dev *nsim_dev); + static int nsim_dev_reload_down(struct devlink *devlink, struct netlink_ext_ack *extack) { + struct nsim_dev *nsim_dev = devlink_priv(devlink); + + nsim_dev_reload_destroy(nsim_dev); return 0; } @@ -479,27 +486,8 @@ static int nsim_dev_reload_up(struct devlink *devlink, struct netlink_ext_ack *extack) { struct nsim_dev *nsim_dev = devlink_priv(devlink); - enum nsim_resource_id res_ids[] = { - NSIM_RESOURCE_IPV4_FIB, NSIM_RESOURCE_IPV4_FIB_RULES, - NSIM_RESOURCE_IPV6_FIB, NSIM_RESOURCE_IPV6_FIB_RULES - }; - int i; - - for (i = 0; i < ARRAY_SIZE(res_ids); ++i) { - int err; - u64 val; - - err = devlink_resource_size_get(devlink, res_ids[i], &val); - if (!err) { - err = nsim_fib_set_max(nsim_dev->fib_data, - res_ids[i], val, extack); - if (err) - return err; - } - } - nsim_devlink_param_load_driverinit_values(devlink); - return 0; + return nsim_dev_reload_create(nsim_dev, extack); } #define NSIM_DEV_FLASH_SIZE 500000 @@ -687,8 +675,49 @@ err_port_del_all: return err; } -static struct nsim_dev * -nsim_dev_create(struct nsim_bus_dev *nsim_bus_dev) +static int nsim_dev_reload_create(struct nsim_dev *nsim_dev, + struct netlink_ext_ack *extack) +{ + struct nsim_bus_dev *nsim_bus_dev = nsim_dev->nsim_bus_dev; + struct devlink *devlink; + int err; + + devlink = priv_to_devlink(nsim_dev); + nsim_dev = devlink_priv(devlink); + INIT_LIST_HEAD(&nsim_dev->port_list); + mutex_init(&nsim_dev->port_list_lock); + nsim_dev->fw_update_status = true; + + nsim_dev->fib_data = nsim_fib_create(devlink, extack); + if (IS_ERR(nsim_dev->fib_data)) + return PTR_ERR(nsim_dev->fib_data); + + nsim_devlink_param_load_driverinit_values(devlink); + + err = nsim_dev_dummy_region_init(nsim_dev, devlink); + if (err) + goto err_fib_destroy; + + err = nsim_dev_traps_init(devlink); + if (err) + goto err_dummy_region_exit; + + err = nsim_dev_port_add_all(nsim_dev, nsim_bus_dev->port_count); + if (err) + goto err_traps_exit; + + return 0; + +err_traps_exit: + nsim_dev_traps_exit(devlink); +err_dummy_region_exit: + nsim_dev_dummy_region_exit(nsim_dev); +err_fib_destroy: + nsim_fib_destroy(devlink, nsim_dev->fib_data); + return err; +} + +static struct nsim_dev *nsim_dev_create(struct nsim_bus_dev *nsim_bus_dev) { struct nsim_dev *nsim_dev; struct devlink *devlink; @@ -711,7 +740,7 @@ nsim_dev_create(struct nsim_bus_dev *nsim_bus_dev) if (err) goto err_devlink_free; - nsim_dev->fib_data = nsim_fib_create(devlink); + nsim_dev->fib_data = nsim_fib_create(devlink, NULL); if (IS_ERR(nsim_dev->fib_data)) { err = PTR_ERR(nsim_dev->fib_data); goto err_resources_unregister; @@ -772,21 +801,31 @@ err_devlink_free: return ERR_PTR(err); } -static void nsim_dev_destroy(struct nsim_dev *nsim_dev) +static void nsim_dev_reload_destroy(struct nsim_dev *nsim_dev) { struct devlink *devlink = priv_to_devlink(nsim_dev); + if (devlink_is_reload_failed(devlink)) + return; nsim_dev_port_del_all(nsim_dev); - nsim_bpf_dev_exit(nsim_dev); - nsim_dev_debugfs_exit(nsim_dev); nsim_dev_traps_exit(devlink); nsim_dev_dummy_region_exit(nsim_dev); + mutex_destroy(&nsim_dev->port_list_lock); + nsim_fib_destroy(devlink, nsim_dev->fib_data); +} + +static void nsim_dev_destroy(struct nsim_dev *nsim_dev) +{ + struct devlink *devlink = priv_to_devlink(nsim_dev); + + nsim_dev_reload_destroy(nsim_dev); + + nsim_bpf_dev_exit(nsim_dev); + nsim_dev_debugfs_exit(nsim_dev); devlink_params_unregister(devlink, nsim_devlink_params, ARRAY_SIZE(nsim_devlink_params)); devlink_unregister(devlink); - nsim_fib_destroy(devlink, nsim_dev->fib_data); devlink_resources_unregister(devlink, NULL); - mutex_destroy(&nsim_dev->port_list_lock); devlink_free(devlink); } diff --git a/drivers/net/netdevsim/fib.c b/drivers/net/netdevsim/fib.c index d2aeac0f4c2c..fdc682f3a09a 100644 --- a/drivers/net/netdevsim/fib.c +++ b/drivers/net/netdevsim/fib.c @@ -63,12 +63,10 @@ u64 nsim_fib_get_val(struct nsim_fib_data *fib_data, return max ? entry->max : entry->num; } -int nsim_fib_set_max(struct nsim_fib_data *fib_data, - enum nsim_resource_id res_id, u64 val, - struct netlink_ext_ack *extack) +static void nsim_fib_set_max(struct nsim_fib_data *fib_data, + enum nsim_resource_id res_id, u64 val) { struct nsim_fib_entry *entry; - int err = 0; switch (res_id) { case NSIM_RESOURCE_IPV4_FIB: @@ -84,20 +82,10 @@ int nsim_fib_set_max(struct nsim_fib_data *fib_data, entry = &fib_data->ipv6.rules; break; default: - return 0; - } - - /* not allowing a new max to be less than curren occupancy - * --> no means of evicting entries - */ - if (val < entry->num) { - NL_SET_ERR_MSG_MOD(extack, "New size is less than current occupancy"); - err = -EINVAL; - } else { - entry->max = val; + WARN_ON(1); + return; } - - return err; + entry->max = val; } static int nsim_fib_rule_account(struct nsim_fib_entry *entry, bool add, @@ -239,7 +227,28 @@ static u64 nsim_fib_ipv6_rules_res_occ_get(void *priv) return nsim_fib_get_val(data, NSIM_RESOURCE_IPV6_FIB_RULES, false); } -struct nsim_fib_data *nsim_fib_create(struct devlink *devlink) +static void nsim_fib_set_max_all(struct nsim_fib_data *data, + struct devlink *devlink) +{ + enum nsim_resource_id res_ids[] = { + NSIM_RESOURCE_IPV4_FIB, NSIM_RESOURCE_IPV4_FIB_RULES, + NSIM_RESOURCE_IPV6_FIB, NSIM_RESOURCE_IPV6_FIB_RULES + }; + int i; + + for (i = 0; i < ARRAY_SIZE(res_ids); i++) { + int err; + u64 val; + + err = devlink_resource_size_get(devlink, res_ids[i], &val); + if (err) + val = (u64) -1; + nsim_fib_set_max(data, res_ids[i], val); + } +} + +struct nsim_fib_data *nsim_fib_create(struct devlink *devlink, + struct netlink_ext_ack *extack) { struct nsim_fib_data *data; int err; @@ -248,15 +257,11 @@ struct nsim_fib_data *nsim_fib_create(struct devlink *devlink) if (!data) return ERR_PTR(-ENOMEM); - data->ipv4.fib.max = (u64)-1; - data->ipv4.rules.max = (u64)-1; - - data->ipv6.fib.max = (u64)-1; - data->ipv6.rules.max = (u64)-1; + nsim_fib_set_max_all(data, devlink); data->fib_nb.notifier_call = nsim_fib_event_nb; err = register_fib_notifier(&init_net, &data->fib_nb, - nsim_fib_dump_inconsistent, NULL); + nsim_fib_dump_inconsistent, extack); if (err) { pr_err("Failed to register fib notifier\n"); goto err_out; diff --git a/drivers/net/netdevsim/netdevsim.h b/drivers/net/netdevsim/netdevsim.h index ac506cf253b6..702d951fe160 100644 --- a/drivers/net/netdevsim/netdevsim.h +++ b/drivers/net/netdevsim/netdevsim.h @@ -173,13 +173,11 @@ int nsim_dev_port_add(struct nsim_bus_dev *nsim_bus_dev, int nsim_dev_port_del(struct nsim_bus_dev *nsim_bus_dev, unsigned int port_index); -struct nsim_fib_data *nsim_fib_create(struct devlink *devlink); -void nsim_fib_destroy(struct devlink *devlink, struct nsim_fib_data *data); +struct nsim_fib_data *nsim_fib_create(struct devlink *devlink, + struct netlink_ext_ack *extack); +void nsim_fib_destroy(struct devlink *devlink, struct nsim_fib_data *fib_data); u64 nsim_fib_get_val(struct nsim_fib_data *fib_data, enum nsim_resource_id res_id, bool max); -int nsim_fib_set_max(struct nsim_fib_data *fib_data, - enum nsim_resource_id res_id, u64 val, - struct netlink_ext_ack *extack); #if IS_ENABLED(CONFIG_XFRM_OFFLOAD) void nsim_ipsec_init(struct netdevsim *ns); -- cgit From 070c63f20f6c739a3c534555f56c7327536bfcc2 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Thu, 3 Oct 2019 11:49:39 +0200 Subject: net: devlink: allow to change namespaces during reload All devlink instances are created in init_net and stay there for a lifetime. Allow user to be able to move devlink instances into namespaces during devlink reload operation. That ensures proper re-instantiation of driver objects, including netdevices. Signed-off-by: Jiri Pirko Acked-by: Jakub Kicinski Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/main.c | 6 +- drivers/net/ethernet/mellanox/mlxsw/core.c | 1 + drivers/net/netdevsim/dev.c | 2 +- include/net/devlink.h | 2 +- include/uapi/linux/devlink.h | 4 + net/core/devlink.c | 154 +++++++++++++++++++++++++++-- 6 files changed, 158 insertions(+), 11 deletions(-) (limited to 'drivers/net/netdevsim/dev.c') diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c index fce9b3a24347..22c72fb7206a 100644 --- a/drivers/net/ethernet/mellanox/mlx4/main.c +++ b/drivers/net/ethernet/mellanox/mlx4/main.c @@ -3935,13 +3935,17 @@ static void mlx4_restart_one_down(struct pci_dev *pdev); static int mlx4_restart_one_up(struct pci_dev *pdev, bool reload, struct devlink *devlink); -static int mlx4_devlink_reload_down(struct devlink *devlink, +static int mlx4_devlink_reload_down(struct devlink *devlink, bool netns_change, struct netlink_ext_ack *extack) { struct mlx4_priv *priv = devlink_priv(devlink); struct mlx4_dev *dev = &priv->dev; struct mlx4_dev_persistent *persist = dev->persist; + if (netns_change) { + NL_SET_ERR_MSG_MOD(extack, "Namespace change is not supported"); + return -EOPNOTSUPP; + } if (persist->num_vfs) mlx4_warn(persist->dev, "Reload performed on PF, will cause reset on operating Virtual Functions\n"); mlx4_restart_one_down(persist->pdev); diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.c b/drivers/net/ethernet/mellanox/mlxsw/core.c index 1e61a012ca43..1c29522a2af3 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core.c +++ b/drivers/net/ethernet/mellanox/mlxsw/core.c @@ -985,6 +985,7 @@ mlxsw_devlink_info_get(struct devlink *devlink, struct devlink_info_req *req, static int mlxsw_devlink_core_bus_device_reload_down(struct devlink *devlink, + bool netns_change, struct netlink_ext_ack *extack) { struct mlxsw_core *mlxsw_core = devlink_priv(devlink); diff --git a/drivers/net/netdevsim/dev.c b/drivers/net/netdevsim/dev.c index 7de80faab047..3f3c7cc21077 100644 --- a/drivers/net/netdevsim/dev.c +++ b/drivers/net/netdevsim/dev.c @@ -473,7 +473,7 @@ static int nsim_dev_reload_create(struct nsim_dev *nsim_dev, struct netlink_ext_ack *extack); static void nsim_dev_reload_destroy(struct nsim_dev *nsim_dev); -static int nsim_dev_reload_down(struct devlink *devlink, +static int nsim_dev_reload_down(struct devlink *devlink, bool netns_change, struct netlink_ext_ack *extack) { struct nsim_dev *nsim_dev = devlink_priv(devlink); diff --git a/include/net/devlink.h b/include/net/devlink.h index 5ac2be0f0857..3c9d4a063c98 100644 --- a/include/net/devlink.h +++ b/include/net/devlink.h @@ -643,7 +643,7 @@ enum devlink_trap_group_generic_id { } struct devlink_ops { - int (*reload_down)(struct devlink *devlink, + int (*reload_down)(struct devlink *devlink, bool netns_change, struct netlink_ext_ack *extack); int (*reload_up)(struct devlink *devlink, struct netlink_ext_ack *extack); diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h index 580b7a2e40e1..b558ea88b766 100644 --- a/include/uapi/linux/devlink.h +++ b/include/uapi/linux/devlink.h @@ -421,6 +421,10 @@ enum devlink_attr { DEVLINK_ATTR_RELOAD_FAILED, /* u8 0 or 1 */ + DEVLINK_ATTR_NETNS_FD, /* u32 */ + DEVLINK_ATTR_NETNS_PID, /* u32 */ + DEVLINK_ATTR_NETNS_ID, /* u32 */ + /* add new attributes above here, update the policy in devlink.c */ __DEVLINK_ATTR_MAX, diff --git a/net/core/devlink.c b/net/core/devlink.c index 362cbbcca225..c4d8c4ab0fb5 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -435,8 +435,16 @@ static void devlink_nl_post_doit(const struct genl_ops *ops, { struct devlink *devlink; - devlink = devlink_get_from_info(info); - if (~ops->internal_flags & DEVLINK_NL_FLAG_NO_LOCK) + /* When devlink changes netns, it would not be found + * by devlink_get_from_info(). So try if it is stored first. + */ + if (ops->internal_flags & DEVLINK_NL_FLAG_NEED_DEVLINK) { + devlink = info->user_ptr[0]; + } else { + devlink = devlink_get_from_info(info); + WARN_ON(IS_ERR(devlink)); + } + if (!IS_ERR(devlink) && ~ops->internal_flags & DEVLINK_NL_FLAG_NO_LOCK) mutex_unlock(&devlink->lock); mutex_unlock(&devlink_mutex); } @@ -2675,6 +2683,72 @@ devlink_resources_validate(struct devlink *devlink, return err; } +static struct net *devlink_netns_get(struct sk_buff *skb, + struct genl_info *info) +{ + struct nlattr *netns_pid_attr = info->attrs[DEVLINK_ATTR_NETNS_PID]; + struct nlattr *netns_fd_attr = info->attrs[DEVLINK_ATTR_NETNS_FD]; + struct nlattr *netns_id_attr = info->attrs[DEVLINK_ATTR_NETNS_ID]; + struct net *net; + + if (!!netns_pid_attr + !!netns_fd_attr + !!netns_id_attr > 1) { + NL_SET_ERR_MSG(info->extack, "multiple netns identifying attributes specified"); + return ERR_PTR(-EINVAL); + } + + if (netns_pid_attr) { + net = get_net_ns_by_pid(nla_get_u32(netns_pid_attr)); + } else if (netns_fd_attr) { + net = get_net_ns_by_fd(nla_get_u32(netns_fd_attr)); + } else if (netns_id_attr) { + net = get_net_ns_by_id(sock_net(skb->sk), + nla_get_u32(netns_id_attr)); + if (!net) + net = ERR_PTR(-EINVAL); + } else { + WARN_ON(1); + net = ERR_PTR(-EINVAL); + } + if (IS_ERR(net)) { + NL_SET_ERR_MSG(info->extack, "Unknown network namespace"); + return ERR_PTR(-EINVAL); + } + if (!netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN)) { + put_net(net); + return ERR_PTR(-EPERM); + } + return net; +} + +static void devlink_param_notify(struct devlink *devlink, + unsigned int port_index, + struct devlink_param_item *param_item, + enum devlink_command cmd); + +static void devlink_reload_netns_change(struct devlink *devlink, + struct net *dest_net) +{ + struct devlink_param_item *param_item; + + /* Userspace needs to be notified about devlink objects + * removed from original and entering new network namespace. + * The rest of the devlink objects are re-created during + * reload process so the notifications are generated separatelly. + */ + + list_for_each_entry(param_item, &devlink->param_list, list) + devlink_param_notify(devlink, 0, param_item, + DEVLINK_CMD_PARAM_DEL); + devlink_notify(devlink, DEVLINK_CMD_DEL); + + devlink_net_set(devlink, dest_net); + + devlink_notify(devlink, DEVLINK_CMD_NEW); + list_for_each_entry(param_item, &devlink->param_list, list) + devlink_param_notify(devlink, 0, param_item, + DEVLINK_CMD_PARAM_NEW); +} + static bool devlink_reload_supported(struct devlink *devlink) { return devlink->ops->reload_down && devlink->ops->reload_up; @@ -2695,9 +2769,27 @@ bool devlink_is_reload_failed(const struct devlink *devlink) } EXPORT_SYMBOL_GPL(devlink_is_reload_failed); +static int devlink_reload(struct devlink *devlink, struct net *dest_net, + struct netlink_ext_ack *extack) +{ + int err; + + err = devlink->ops->reload_down(devlink, !!dest_net, extack); + if (err) + return err; + + if (dest_net && !net_eq(dest_net, devlink_net(devlink))) + devlink_reload_netns_change(devlink, dest_net); + + err = devlink->ops->reload_up(devlink, extack); + devlink_reload_failed_set(devlink, !!err); + return err; +} + static int devlink_nl_cmd_reload(struct sk_buff *skb, struct genl_info *info) { struct devlink *devlink = info->user_ptr[0]; + struct net *dest_net = NULL; int err; if (!devlink_reload_supported(devlink)) @@ -2708,11 +2800,20 @@ static int devlink_nl_cmd_reload(struct sk_buff *skb, struct genl_info *info) NL_SET_ERR_MSG_MOD(info->extack, "resources size validation failed"); return err; } - err = devlink->ops->reload_down(devlink, info->extack); - if (err) - return err; - err = devlink->ops->reload_up(devlink, info->extack); - devlink_reload_failed_set(devlink, !!err); + + if (info->attrs[DEVLINK_ATTR_NETNS_PID] || + info->attrs[DEVLINK_ATTR_NETNS_FD] || + info->attrs[DEVLINK_ATTR_NETNS_ID]) { + dest_net = devlink_netns_get(skb, info); + if (IS_ERR(dest_net)) + return PTR_ERR(dest_net); + } + + err = devlink_reload(devlink, dest_net, info->extack); + + if (dest_net) + put_net(dest_net); + return err; } @@ -5794,6 +5895,9 @@ static const struct nla_policy devlink_nl_policy[DEVLINK_ATTR_MAX + 1] = { [DEVLINK_ATTR_TRAP_NAME] = { .type = NLA_NUL_STRING }, [DEVLINK_ATTR_TRAP_ACTION] = { .type = NLA_U8 }, [DEVLINK_ATTR_TRAP_GROUP_NAME] = { .type = NLA_NUL_STRING }, + [DEVLINK_ATTR_NETNS_PID] = { .type = NLA_U32 }, + [DEVLINK_ATTR_NETNS_FD] = { .type = NLA_U32 }, + [DEVLINK_ATTR_NETNS_ID] = { .type = NLA_U32 }, }; static const struct genl_ops devlink_nl_ops[] = { @@ -8061,9 +8165,43 @@ int devlink_compat_switch_id_get(struct net_device *dev, return 0; } +static void __net_exit devlink_pernet_pre_exit(struct net *net) +{ + struct devlink *devlink; + int err; + + /* In case network namespace is getting destroyed, reload + * all devlink instances from this namespace into init_net. + */ + mutex_lock(&devlink_mutex); + list_for_each_entry(devlink, &devlink_list, list) { + if (net_eq(devlink_net(devlink), net)) { + if (WARN_ON(!devlink_reload_supported(devlink))) + continue; + err = devlink_reload(devlink, &init_net, NULL); + if (err) + pr_warn("Failed to reload devlink instance into init_net\n"); + } + } + mutex_unlock(&devlink_mutex); +} + +static struct pernet_operations devlink_pernet_ops __net_initdata = { + .pre_exit = devlink_pernet_pre_exit, +}; + static int __init devlink_init(void) { - return genl_register_family(&devlink_nl_family); + int err; + + err = genl_register_family(&devlink_nl_family); + if (err) + goto out; + err = register_pernet_subsys(&devlink_pernet_ops); + +out: + WARN_ON(err); + return err; } subsys_initcall(devlink_init); -- cgit From 7b60027bbc6738b067bb9ed732a8c56b0ac430b2 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Sat, 5 Oct 2019 08:10:32 +0200 Subject: netdevsim: create devlink and netdev instances in namespace When user does create new netdevsim instance using sysfs bus file, create the devlink instance and related netdev instance in the namespace of the caller. Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/netdevsim/bus.c | 1 + drivers/net/netdevsim/dev.c | 1 + drivers/net/netdevsim/netdevsim.h | 3 +++ 3 files changed, 5 insertions(+) (limited to 'drivers/net/netdevsim/dev.c') diff --git a/drivers/net/netdevsim/bus.c b/drivers/net/netdevsim/bus.c index 1a0ff3d7747b..6aeed0c600f8 100644 --- a/drivers/net/netdevsim/bus.c +++ b/drivers/net/netdevsim/bus.c @@ -283,6 +283,7 @@ nsim_bus_dev_new(unsigned int id, unsigned int port_count) nsim_bus_dev->dev.bus = &nsim_bus; nsim_bus_dev->dev.type = &nsim_bus_dev_type; nsim_bus_dev->port_count = port_count; + nsim_bus_dev->initial_net = current->nsproxy->net_ns; err = device_register(&nsim_bus_dev->dev); if (err) diff --git a/drivers/net/netdevsim/dev.c b/drivers/net/netdevsim/dev.c index 3f3c7cc21077..fbc4cdcfe551 100644 --- a/drivers/net/netdevsim/dev.c +++ b/drivers/net/netdevsim/dev.c @@ -726,6 +726,7 @@ static struct nsim_dev *nsim_dev_create(struct nsim_bus_dev *nsim_bus_dev) devlink = devlink_alloc(&nsim_dev_devlink_ops, sizeof(*nsim_dev)); if (!devlink) return ERR_PTR(-ENOMEM); + devlink_net_set(devlink, nsim_bus_dev->initial_net); nsim_dev = devlink_priv(devlink); nsim_dev->nsim_bus_dev = nsim_bus_dev; nsim_dev->switch_id.id_len = sizeof(nsim_dev->switch_id.id); diff --git a/drivers/net/netdevsim/netdevsim.h b/drivers/net/netdevsim/netdevsim.h index 198ca31cec94..8168a5475fe7 100644 --- a/drivers/net/netdevsim/netdevsim.h +++ b/drivers/net/netdevsim/netdevsim.h @@ -220,6 +220,9 @@ struct nsim_bus_dev { struct device dev; struct list_head list; unsigned int port_count; + struct net *initial_net; /* Purpose of this is to carry net pointer + * during the probe time only. + */ unsigned int num_vfs; struct nsim_vf_config *vfconfigs; }; -- cgit From 155ddfc5e54a68f0e8d20f31f2b4b6b25e1071b5 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Sun, 6 Oct 2019 08:30:01 +0200 Subject: netdevsim: add couple of debugfs bools to debug devlink reload Add flag to disallow reload and another one that causes reload to always fail. Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/netdevsim/dev.c | 20 ++++++++++++++++++++ drivers/net/netdevsim/netdevsim.h | 2 ++ 2 files changed, 22 insertions(+) (limited to 'drivers/net/netdevsim/dev.c') diff --git a/drivers/net/netdevsim/dev.c b/drivers/net/netdevsim/dev.c index fbc4cdcfe551..31d1752c703a 100644 --- a/drivers/net/netdevsim/dev.c +++ b/drivers/net/netdevsim/dev.c @@ -90,6 +90,10 @@ static int nsim_dev_debugfs_init(struct nsim_dev *nsim_dev) &nsim_dev->test1); debugfs_create_file("take_snapshot", 0200, nsim_dev->ddir, nsim_dev, &nsim_dev_take_snapshot_fops); + debugfs_create_bool("dont_allow_reload", 0600, nsim_dev->ddir, + &nsim_dev->dont_allow_reload); + debugfs_create_bool("fail_reload", 0600, nsim_dev->ddir, + &nsim_dev->fail_reload); return 0; } @@ -478,6 +482,14 @@ static int nsim_dev_reload_down(struct devlink *devlink, bool netns_change, { struct nsim_dev *nsim_dev = devlink_priv(devlink); + if (nsim_dev->dont_allow_reload) { + /* For testing purposes, user set debugfs dont_allow_reload + * value to true. So forbid it. + */ + NL_SET_ERR_MSG_MOD(extack, "User forbidded reload for testing purposes"); + return -EOPNOTSUPP; + } + nsim_dev_reload_destroy(nsim_dev); return 0; } @@ -487,6 +499,14 @@ static int nsim_dev_reload_up(struct devlink *devlink, { struct nsim_dev *nsim_dev = devlink_priv(devlink); + if (nsim_dev->fail_reload) { + /* For testing purposes, user set debugfs fail_reload + * value to true. Fail right away. + */ + NL_SET_ERR_MSG_MOD(extack, "User setup the reload to fail for testing purposes"); + return -EINVAL; + } + return nsim_dev_reload_create(nsim_dev, extack); } diff --git a/drivers/net/netdevsim/netdevsim.h b/drivers/net/netdevsim/netdevsim.h index 8168a5475fe7..24358385d869 100644 --- a/drivers/net/netdevsim/netdevsim.h +++ b/drivers/net/netdevsim/netdevsim.h @@ -161,6 +161,8 @@ struct nsim_dev { bool fw_update_status; u32 max_macs; bool test1; + bool dont_allow_reload; + bool fail_reload; struct devlink_region *dummy_region; }; -- cgit From 8e23cc0319b185de49e35ed4fec174552bdf2f9a Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Mon, 7 Oct 2019 10:27:08 +0200 Subject: netdevsim: implement devlink dev_info op Do simple dev_info devlink operation implementation which only fills up the driver name. Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/netdevsim/dev.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'drivers/net/netdevsim/dev.c') diff --git a/drivers/net/netdevsim/dev.c b/drivers/net/netdevsim/dev.c index 31d1752c703a..a3d7d39f231a 100644 --- a/drivers/net/netdevsim/dev.c +++ b/drivers/net/netdevsim/dev.c @@ -510,6 +510,13 @@ static int nsim_dev_reload_up(struct devlink *devlink, return nsim_dev_reload_create(nsim_dev, extack); } +static int nsim_dev_info_get(struct devlink *devlink, + struct devlink_info_req *req, + struct netlink_ext_ack *extack) +{ + return devlink_info_driver_name_put(req, DRV_NAME); +} + #define NSIM_DEV_FLASH_SIZE 500000 #define NSIM_DEV_FLASH_CHUNK_SIZE 1000 #define NSIM_DEV_FLASH_CHUNK_TIME_MS 10 @@ -603,6 +610,7 @@ nsim_dev_devlink_trap_action_set(struct devlink *devlink, static const struct devlink_ops nsim_dev_devlink_ops = { .reload_down = nsim_dev_reload_down, .reload_up = nsim_dev_reload_up, + .info_get = nsim_dev_info_get, .flash_update = nsim_dev_flash_update, .trap_init = nsim_dev_devlink_trap_init, .trap_action_set = nsim_dev_devlink_trap_action_set, -- cgit From f9867b51d268d6fabcc4477d877f04aaad9299ae Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Tue, 8 Oct 2019 09:17:47 +0100 Subject: netdevsim: fix spelling mistake "forbidded" -> "forbid" There is a spelling mistake in a NL_SET_ERR_MSG_MOD message. Fix it. Signed-off-by: Colin Ian King Signed-off-by: Jakub Kicinski --- drivers/net/netdevsim/dev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/net/netdevsim/dev.c') diff --git a/drivers/net/netdevsim/dev.c b/drivers/net/netdevsim/dev.c index a3d7d39f231a..e47fa7b6ca7c 100644 --- a/drivers/net/netdevsim/dev.c +++ b/drivers/net/netdevsim/dev.c @@ -486,7 +486,7 @@ static int nsim_dev_reload_down(struct devlink *devlink, bool netns_change, /* For testing purposes, user set debugfs dont_allow_reload * value to true. So forbid it. */ - NL_SET_ERR_MSG_MOD(extack, "User forbidded reload for testing purposes"); + NL_SET_ERR_MSG_MOD(extack, "User forbid the reload for testing purposes"); return -EOPNOTSUPP; } -- cgit From 82c93a87bf8bc0cdb5ec2ab99da7d87715ff889f Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Thu, 10 Oct 2019 15:18:50 +0200 Subject: netdevsim: implement couple of testing devlink health reporters Implement "empty" and "dummy" reporters. The first one is really simple and does nothing. The other one has debugfs files to trigger breakage and it is able to do recovery. The ops also implement dummy fmsg content. Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/netdevsim/Makefile | 2 +- drivers/net/netdevsim/dev.c | 17 +- drivers/net/netdevsim/health.c | 325 ++++++++++++++++++++++++++++++++++++++ drivers/net/netdevsim/netdevsim.h | 13 ++ 4 files changed, 354 insertions(+), 3 deletions(-) create mode 100644 drivers/net/netdevsim/health.c (limited to 'drivers/net/netdevsim/dev.c') diff --git a/drivers/net/netdevsim/Makefile b/drivers/net/netdevsim/Makefile index 09f1315d2f2a..f4d8f62f28c2 100644 --- a/drivers/net/netdevsim/Makefile +++ b/drivers/net/netdevsim/Makefile @@ -3,7 +3,7 @@ obj-$(CONFIG_NETDEVSIM) += netdevsim.o netdevsim-objs := \ - netdev.o dev.o fib.o bus.o + netdev.o dev.o fib.o bus.o health.o ifeq ($(CONFIG_BPF_SYSCALL),y) netdevsim-objs += \ diff --git a/drivers/net/netdevsim/dev.c b/drivers/net/netdevsim/dev.c index e47fa7b6ca7c..468e157a7cb1 100644 --- a/drivers/net/netdevsim/dev.c +++ b/drivers/net/netdevsim/dev.c @@ -730,12 +730,18 @@ static int nsim_dev_reload_create(struct nsim_dev *nsim_dev, if (err) goto err_dummy_region_exit; - err = nsim_dev_port_add_all(nsim_dev, nsim_bus_dev->port_count); + err = nsim_dev_health_init(nsim_dev, devlink); if (err) goto err_traps_exit; + err = nsim_dev_port_add_all(nsim_dev, nsim_bus_dev->port_count); + if (err) + goto err_health_exit; + return 0; +err_health_exit: + nsim_dev_health_exit(nsim_dev); err_traps_exit: nsim_dev_traps_exit(devlink); err_dummy_region_exit: @@ -797,10 +803,14 @@ static struct nsim_dev *nsim_dev_create(struct nsim_bus_dev *nsim_bus_dev) if (err) goto err_traps_exit; - err = nsim_bpf_dev_init(nsim_dev); + err = nsim_dev_health_init(nsim_dev, devlink); if (err) goto err_debugfs_exit; + err = nsim_bpf_dev_init(nsim_dev); + if (err) + goto err_health_exit; + err = nsim_dev_port_add_all(nsim_dev, nsim_bus_dev->port_count); if (err) goto err_bpf_dev_exit; @@ -810,6 +820,8 @@ static struct nsim_dev *nsim_dev_create(struct nsim_bus_dev *nsim_bus_dev) err_bpf_dev_exit: nsim_bpf_dev_exit(nsim_dev); +err_health_exit: + nsim_dev_health_exit(nsim_dev); err_debugfs_exit: nsim_dev_debugfs_exit(nsim_dev); err_traps_exit: @@ -837,6 +849,7 @@ static void nsim_dev_reload_destroy(struct nsim_dev *nsim_dev) if (devlink_is_reload_failed(devlink)) return; nsim_dev_port_del_all(nsim_dev); + nsim_dev_health_exit(nsim_dev); nsim_dev_traps_exit(devlink); nsim_dev_dummy_region_exit(nsim_dev); mutex_destroy(&nsim_dev->port_list_lock); diff --git a/drivers/net/netdevsim/health.c b/drivers/net/netdevsim/health.c new file mode 100644 index 000000000000..2716235a0336 --- /dev/null +++ b/drivers/net/netdevsim/health.c @@ -0,0 +1,325 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2019 Mellanox Technologies. All rights reserved */ + +#include +#include +#include +#include + +#include "netdevsim.h" + +static int +nsim_dev_empty_reporter_dump(struct devlink_health_reporter *reporter, + struct devlink_fmsg *fmsg, void *priv_ctx, + struct netlink_ext_ack *extack) +{ + return 0; +} + +static int +nsim_dev_empty_reporter_diagnose(struct devlink_health_reporter *reporter, + struct devlink_fmsg *fmsg, + struct netlink_ext_ack *extack) +{ + return 0; +} + +static const +struct devlink_health_reporter_ops nsim_dev_empty_reporter_ops = { + .name = "empty", + .dump = nsim_dev_empty_reporter_dump, + .diagnose = nsim_dev_empty_reporter_diagnose, +}; + +struct nsim_dev_dummy_reporter_ctx { + char *break_msg; +}; + +static int +nsim_dev_dummy_reporter_recover(struct devlink_health_reporter *reporter, + void *priv_ctx, + struct netlink_ext_ack *extack) +{ + struct nsim_dev_health *health = devlink_health_reporter_priv(reporter); + struct nsim_dev_dummy_reporter_ctx *ctx = priv_ctx; + + if (health->fail_recover) { + /* For testing purposes, user set debugfs fail_recover + * value to true. Fail right away. + */ + NL_SET_ERR_MSG_MOD(extack, "User setup the recover to fail for testing purposes"); + return -EINVAL; + } + if (ctx) { + kfree(health->recovered_break_msg); + health->recovered_break_msg = kstrdup(ctx->break_msg, + GFP_KERNEL); + if (!health->recovered_break_msg) + return -ENOMEM; + } + return 0; +} + +static int nsim_dev_dummy_fmsg_put(struct devlink_fmsg *fmsg, u32 binary_len) +{ + char *binary; + int err; + int i; + + err = devlink_fmsg_bool_pair_put(fmsg, "test_bool", true); + if (err) + return err; + err = devlink_fmsg_u8_pair_put(fmsg, "test_u8", 1); + if (err) + return err; + err = devlink_fmsg_u32_pair_put(fmsg, "test_u32", 3); + if (err) + return err; + err = devlink_fmsg_u64_pair_put(fmsg, "test_u64", 4); + if (err) + return err; + err = devlink_fmsg_string_pair_put(fmsg, "test_string", "somestring"); + if (err) + return err; + + err = devlink_fmsg_arr_pair_nest_start(fmsg, "test_binary"); + if (err) + return err; + binary = kmalloc(binary_len, GFP_KERNEL); + if (!binary) + return -ENOMEM; + get_random_bytes(binary, binary_len); + err = devlink_fmsg_binary_put(fmsg, binary, binary_len); + kfree(binary); + if (err) + return err; + err = devlink_fmsg_arr_pair_nest_end(fmsg); + if (err) + return err; + + err = devlink_fmsg_pair_nest_start(fmsg, "test_nest"); + if (err) + return err; + err = devlink_fmsg_obj_nest_start(fmsg); + if (err) + return err; + err = devlink_fmsg_bool_pair_put(fmsg, "nested_test_bool", false); + if (err) + return err; + err = devlink_fmsg_u8_pair_put(fmsg, "nested_test_u8", false); + if (err) + return err; + err = devlink_fmsg_obj_nest_end(fmsg); + if (err) + return err; + err = devlink_fmsg_pair_nest_end(fmsg); + if (err) + return err; + + err = devlink_fmsg_arr_pair_nest_start(fmsg, "test_bool_array"); + if (err) + return err; + for (i = 0; i < 10; i++) { + err = devlink_fmsg_bool_put(fmsg, true); + if (err) + return err; + } + err = devlink_fmsg_arr_pair_nest_end(fmsg); + if (err) + return err; + + err = devlink_fmsg_arr_pair_nest_start(fmsg, "test_u8_array"); + if (err) + return err; + for (i = 0; i < 10; i++) { + err = devlink_fmsg_u8_put(fmsg, i); + if (err) + return err; + } + err = devlink_fmsg_arr_pair_nest_end(fmsg); + if (err) + return err; + + err = devlink_fmsg_arr_pair_nest_start(fmsg, "test_u32_array"); + if (err) + return err; + for (i = 0; i < 10; i++) { + err = devlink_fmsg_u32_put(fmsg, i); + if (err) + return err; + } + err = devlink_fmsg_arr_pair_nest_end(fmsg); + if (err) + return err; + + err = devlink_fmsg_arr_pair_nest_start(fmsg, "test_u64_array"); + if (err) + return err; + for (i = 0; i < 10; i++) { + err = devlink_fmsg_u64_put(fmsg, i); + if (err) + return err; + } + err = devlink_fmsg_arr_pair_nest_end(fmsg); + if (err) + return err; + + err = devlink_fmsg_arr_pair_nest_start(fmsg, "test_array_of_objects"); + if (err) + return err; + for (i = 0; i < 10; i++) { + err = devlink_fmsg_obj_nest_start(fmsg); + if (err) + return err; + err = devlink_fmsg_bool_pair_put(fmsg, + "in_array_nested_test_bool", + false); + if (err) + return err; + err = devlink_fmsg_u8_pair_put(fmsg, + "in_array_nested_test_u8", + i); + if (err) + return err; + err = devlink_fmsg_obj_nest_end(fmsg); + if (err) + return err; + } + return devlink_fmsg_arr_pair_nest_end(fmsg); +} + +static int +nsim_dev_dummy_reporter_dump(struct devlink_health_reporter *reporter, + struct devlink_fmsg *fmsg, void *priv_ctx, + struct netlink_ext_ack *extack) +{ + struct nsim_dev_health *health = devlink_health_reporter_priv(reporter); + struct nsim_dev_dummy_reporter_ctx *ctx = priv_ctx; + int err; + + if (ctx) { + err = devlink_fmsg_string_pair_put(fmsg, "break_message", + ctx->break_msg); + if (err) + return err; + } + return nsim_dev_dummy_fmsg_put(fmsg, health->binary_len); +} + +static int +nsim_dev_dummy_reporter_diagnose(struct devlink_health_reporter *reporter, + struct devlink_fmsg *fmsg, + struct netlink_ext_ack *extack) +{ + struct nsim_dev_health *health = devlink_health_reporter_priv(reporter); + int err; + + if (health->recovered_break_msg) { + err = devlink_fmsg_string_pair_put(fmsg, + "recovered_break_message", + health->recovered_break_msg); + if (err) + return err; + } + return nsim_dev_dummy_fmsg_put(fmsg, health->binary_len); +} + +static const +struct devlink_health_reporter_ops nsim_dev_dummy_reporter_ops = { + .name = "dummy", + .recover = nsim_dev_dummy_reporter_recover, + .dump = nsim_dev_dummy_reporter_dump, + .diagnose = nsim_dev_dummy_reporter_diagnose, +}; + +static ssize_t nsim_dev_health_break_write(struct file *file, + const char __user *data, + size_t count, loff_t *ppos) +{ + struct nsim_dev_health *health = file->private_data; + struct nsim_dev_dummy_reporter_ctx ctx; + char *break_msg; + int err; + + break_msg = kmalloc(count + 1, GFP_KERNEL); + if (!break_msg) + return -ENOMEM; + + if (copy_from_user(break_msg, data, count)) { + err = -EFAULT; + goto out; + } + break_msg[count] = '\0'; + if (break_msg[count - 1] == '\n') + break_msg[count - 1] = '\0'; + + ctx.break_msg = break_msg; + err = devlink_health_report(health->dummy_reporter, break_msg, &ctx); + if (err) + goto out; + +out: + kfree(break_msg); + return err ?: count; +} + +static const struct file_operations nsim_dev_health_break_fops = { + .open = simple_open, + .write = nsim_dev_health_break_write, + .llseek = generic_file_llseek, +}; + +int nsim_dev_health_init(struct nsim_dev *nsim_dev, struct devlink *devlink) +{ + struct nsim_dev_health *health = &nsim_dev->health; + int err; + + health->empty_reporter = + devlink_health_reporter_create(devlink, + &nsim_dev_empty_reporter_ops, + 0, false, health); + if (IS_ERR(health->empty_reporter)) + return PTR_ERR(health->empty_reporter); + + health->dummy_reporter = + devlink_health_reporter_create(devlink, + &nsim_dev_dummy_reporter_ops, + 0, false, health); + if (IS_ERR(health->dummy_reporter)) { + err = PTR_ERR(health->dummy_reporter); + goto err_empty_reporter_destroy; + } + + health->ddir = debugfs_create_dir("health", nsim_dev->ddir); + if (IS_ERR_OR_NULL(health->ddir)) { + err = PTR_ERR_OR_ZERO(health->ddir) ?: -EINVAL; + goto err_dummy_reporter_destroy; + } + + health->recovered_break_msg = NULL; + debugfs_create_file("break_health", 0200, health->ddir, health, + &nsim_dev_health_break_fops); + health->binary_len = 16; + debugfs_create_u32("binary_len", 0600, health->ddir, + &health->binary_len); + health->fail_recover = false; + debugfs_create_bool("fail_recover", 0600, health->ddir, + &health->fail_recover); + return 0; + +err_dummy_reporter_destroy: + devlink_health_reporter_destroy(health->dummy_reporter); +err_empty_reporter_destroy: + devlink_health_reporter_destroy(health->empty_reporter); + return err; +} + +void nsim_dev_health_exit(struct nsim_dev *nsim_dev) +{ + struct nsim_dev_health *health = &nsim_dev->health; + + debugfs_remove_recursive(health->ddir); + kfree(health->recovered_break_msg); + devlink_health_reporter_destroy(health->dummy_reporter); + devlink_health_reporter_destroy(health->empty_reporter); +} diff --git a/drivers/net/netdevsim/netdevsim.h b/drivers/net/netdevsim/netdevsim.h index 24358385d869..94df795ef4d3 100644 --- a/drivers/net/netdevsim/netdevsim.h +++ b/drivers/net/netdevsim/netdevsim.h @@ -134,6 +134,18 @@ enum nsim_resource_id { NSIM_RESOURCE_IPV6_FIB_RULES, }; +struct nsim_dev_health { + struct devlink_health_reporter *empty_reporter; + struct devlink_health_reporter *dummy_reporter; + struct dentry *ddir; + char *recovered_break_msg; + u32 binary_len; + bool fail_recover; +}; + +int nsim_dev_health_init(struct nsim_dev *nsim_dev, struct devlink *devlink); +void nsim_dev_health_exit(struct nsim_dev *nsim_dev); + struct nsim_dev_port { struct list_head list; struct devlink_port devlink_port; @@ -164,6 +176,7 @@ struct nsim_dev { bool dont_allow_reload; bool fail_reload; struct devlink_region *dummy_region; + struct nsim_dev_health health; }; static inline struct net *nsim_dev_net(struct nsim_dev *nsim_dev) -- cgit From 6d6f0383b697f004c65823c2b64240912f18515d Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Thu, 31 Oct 2019 18:20:30 +0200 Subject: netdevsim: Fix use-after-free during device dismantle Commit da58f90f11f5 ("netdevsim: Add devlink-trap support") added delayed work to netdevsim that periodically iterates over the registered netdevsim ports and reports various packet traps via devlink. While the delayed work takes the 'port_list_lock' mutex to protect against concurrent addition / deletion of ports, during device creation / dismantle ports are added / deleted without this lock, which can result in a use-after-free [1]. Fix this by making sure that the ports list is always modified under the lock. [1] [ 59.205543] ================================================================== [ 59.207748] BUG: KASAN: use-after-free in nsim_dev_trap_report_work+0xa67/0xad0 [ 59.210247] Read of size 8 at addr ffff8883cbdd3398 by task kworker/3:1/38 [ 59.212584] [ 59.213148] CPU: 3 PID: 38 Comm: kworker/3:1 Not tainted 5.4.0-rc3-custom-16119-ge6abb5f0261e #2013 [ 59.215896] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS ?-20180724_192412-buildhw-07.phx2.fedoraproject.org-1.fc29 04/01/2014 [ 59.218384] Workqueue: events nsim_dev_trap_report_work [ 59.219428] Call Trace: [ 59.219924] dump_stack+0xa9/0x10e [ 59.220623] print_address_description.constprop.4+0x21/0x340 [ 59.221976] ? vprintk_func+0x66/0x240 [ 59.222752] __kasan_report.cold.8+0x78/0x91 [ 59.223602] ? nsim_dev_trap_report_work+0xa67/0xad0 [ 59.224603] kasan_report+0xe/0x20 [ 59.225296] nsim_dev_trap_report_work+0xa67/0xad0 [ 59.226435] ? rcu_read_lock_sched_held+0xaf/0xe0 [ 59.227512] ? trace_event_raw_event_rcu_quiescent_state_report+0x360/0x360 [ 59.228851] process_one_work+0x98f/0x1760 [ 59.229684] ? pwq_dec_nr_in_flight+0x330/0x330 [ 59.230656] worker_thread+0x91/0xc40 [ 59.231587] ? process_one_work+0x1760/0x1760 [ 59.232451] kthread+0x34a/0x410 [ 59.233104] ? __kthread_queue_delayed_work+0x240/0x240 [ 59.234141] ret_from_fork+0x3a/0x50 [ 59.234982] [ 59.235371] Allocated by task 187: [ 59.236189] save_stack+0x19/0x80 [ 59.236853] __kasan_kmalloc.constprop.5+0xc1/0xd0 [ 59.237822] kmem_cache_alloc_trace+0x14c/0x380 [ 59.238769] __nsim_dev_port_add+0xaf/0x5c0 [ 59.239627] nsim_dev_probe+0x4fc/0x1140 [ 59.240550] really_probe+0x264/0xc00 [ 59.241418] driver_probe_device+0x208/0x2e0 [ 59.242255] __device_attach_driver+0x215/0x2d0 [ 59.243150] bus_for_each_drv+0x154/0x1d0 [ 59.243944] __device_attach+0x1ba/0x2b0 [ 59.244923] bus_probe_device+0x1dd/0x290 [ 59.245805] device_add+0xbac/0x1550 [ 59.246528] new_device_store+0x1f4/0x400 [ 59.247306] bus_attr_store+0x7b/0xa0 [ 59.248047] sysfs_kf_write+0x10f/0x170 [ 59.248941] kernfs_fop_write+0x283/0x430 [ 59.249843] __vfs_write+0x81/0x100 [ 59.250546] vfs_write+0x1ce/0x510 [ 59.251190] ksys_write+0x104/0x200 [ 59.251873] do_syscall_64+0xa4/0x4e0 [ 59.252642] entry_SYSCALL_64_after_hwframe+0x49/0xbe [ 59.253837] [ 59.254203] Freed by task 187: [ 59.254811] save_stack+0x19/0x80 [ 59.255463] __kasan_slab_free+0x125/0x170 [ 59.256265] kfree+0x100/0x440 [ 59.256870] nsim_dev_remove+0x98/0x100 [ 59.257651] nsim_bus_remove+0x16/0x20 [ 59.258382] device_release_driver_internal+0x20b/0x4d0 [ 59.259588] bus_remove_device+0x2e9/0x5a0 [ 59.260551] device_del+0x410/0xad0 [ 59.263777] device_unregister+0x26/0xc0 [ 59.264616] nsim_bus_dev_del+0x16/0x60 [ 59.265381] del_device_store+0x2d6/0x3c0 [ 59.266295] bus_attr_store+0x7b/0xa0 [ 59.267192] sysfs_kf_write+0x10f/0x170 [ 59.267960] kernfs_fop_write+0x283/0x430 [ 59.268800] __vfs_write+0x81/0x100 [ 59.269551] vfs_write+0x1ce/0x510 [ 59.270252] ksys_write+0x104/0x200 [ 59.270910] do_syscall_64+0xa4/0x4e0 [ 59.271680] entry_SYSCALL_64_after_hwframe+0x49/0xbe [ 59.272812] [ 59.273211] The buggy address belongs to the object at ffff8883cbdd3200 [ 59.273211] which belongs to the cache kmalloc-512 of size 512 [ 59.275838] The buggy address is located 408 bytes inside of [ 59.275838] 512-byte region [ffff8883cbdd3200, ffff8883cbdd3400) [ 59.278151] The buggy address belongs to the page: [ 59.279215] page:ffffea000f2f7400 refcount:1 mapcount:0 mapping:ffff8883ecc0ce00 index:0x0 compound_mapcount: 0 [ 59.281449] flags: 0x200000000010200(slab|head) [ 59.282356] raw: 0200000000010200 ffffea000f2f3a08 ffffea000f2fd608 ffff8883ecc0ce00 [ 59.283949] raw: 0000000000000000 0000000000150015 00000001ffffffff 0000000000000000 [ 59.285608] page dumped because: kasan: bad access detected [ 59.286981] [ 59.287337] Memory state around the buggy address: [ 59.288310] ffff8883cbdd3280: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [ 59.289763] ffff8883cbdd3300: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [ 59.291452] >ffff8883cbdd3380: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [ 59.292945] ^ [ 59.293815] ffff8883cbdd3400: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc [ 59.295220] ffff8883cbdd3480: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc [ 59.296872] ================================================================== Fixes: da58f90f11f5 ("netdevsim: Add devlink-trap support") Signed-off-by: Ido Schimmel Reported-by: syzbot+9ed8f68ab30761f3678e@syzkaller.appspotmail.com Acked-by: Jakub Kicinski Signed-off-by: David S. Miller --- drivers/net/netdevsim/dev.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'drivers/net/netdevsim/dev.c') diff --git a/drivers/net/netdevsim/dev.c b/drivers/net/netdevsim/dev.c index 56576d4f34a5..54ca6681ba31 100644 --- a/drivers/net/netdevsim/dev.c +++ b/drivers/net/netdevsim/dev.c @@ -806,9 +806,11 @@ static void nsim_dev_port_del_all(struct nsim_dev *nsim_dev) { struct nsim_dev_port *nsim_dev_port, *tmp; + mutex_lock(&nsim_dev->port_list_lock); list_for_each_entry_safe(nsim_dev_port, tmp, &nsim_dev->port_list, list) __nsim_dev_port_del(nsim_dev_port); + mutex_unlock(&nsim_dev->port_list_lock); } int nsim_dev_probe(struct nsim_bus_dev *nsim_bus_dev) @@ -822,14 +824,17 @@ int nsim_dev_probe(struct nsim_bus_dev *nsim_bus_dev) return PTR_ERR(nsim_dev); dev_set_drvdata(&nsim_bus_dev->dev, nsim_dev); + mutex_lock(&nsim_dev->port_list_lock); for (i = 0; i < nsim_bus_dev->port_count; i++) { err = __nsim_dev_port_add(nsim_dev, i); if (err) goto err_port_del_all; } + mutex_unlock(&nsim_dev->port_list_lock); return 0; err_port_del_all: + mutex_unlock(&nsim_dev->port_list_lock); nsim_dev_port_del_all(nsim_dev); nsim_dev_destroy(nsim_dev); return err; -- cgit From bfcccfe78b361f5f6ef48554aed5bcd30c72f67f Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 5 Nov 2019 13:26:11 -0800 Subject: netdevsim: drop code duplicated by a merge Looks like the port adding loop makes a re-appearance on net-next after net was merged back into it (even though it doesn't feature in the merge diff). The ports are already added in nsim_dev_create() so when we try to add them again get EEXIST, and see: netdevsim: probe of netdevsim0 failed with error -17 in the logs. When we remove the loop again the nsim_dev_probe() and nsim_dev_remove() become a wrapper of nsim_dev_create() and nsim_dev_destroy(). Remove this layer of indirection. Fixes: d31e95585ca6 ("Merge git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net") Signed-off-by: Jakub Kicinski Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/netdevsim/dev.c | 47 ++++++++------------------------------------- 1 file changed, 8 insertions(+), 39 deletions(-) (limited to 'drivers/net/netdevsim/dev.c') diff --git a/drivers/net/netdevsim/dev.c b/drivers/net/netdevsim/dev.c index e59a8826f36d..3da96c7e8265 100644 --- a/drivers/net/netdevsim/dev.c +++ b/drivers/net/netdevsim/dev.c @@ -753,7 +753,7 @@ err_fib_destroy: return err; } -static struct nsim_dev *nsim_dev_create(struct nsim_bus_dev *nsim_bus_dev) +int nsim_dev_probe(struct nsim_bus_dev *nsim_bus_dev) { struct nsim_dev *nsim_dev; struct devlink *devlink; @@ -761,7 +761,7 @@ static struct nsim_dev *nsim_dev_create(struct nsim_bus_dev *nsim_bus_dev) devlink = devlink_alloc(&nsim_dev_devlink_ops, sizeof(*nsim_dev)); if (!devlink) - return ERR_PTR(-ENOMEM); + return -ENOMEM; devlink_net_set(devlink, nsim_bus_dev->initial_net); nsim_dev = devlink_priv(devlink); nsim_dev->nsim_bus_dev = nsim_bus_dev; @@ -773,6 +773,8 @@ static struct nsim_dev *nsim_dev_create(struct nsim_bus_dev *nsim_bus_dev) nsim_dev->max_macs = NSIM_DEV_MAX_MACS_DEFAULT; nsim_dev->test1 = NSIM_DEV_TEST1_DEFAULT; + dev_set_drvdata(&nsim_bus_dev->dev, nsim_dev); + err = nsim_dev_resources_register(devlink); if (err) goto err_devlink_free; @@ -818,7 +820,7 @@ static struct nsim_dev *nsim_dev_create(struct nsim_bus_dev *nsim_bus_dev) goto err_bpf_dev_exit; devlink_params_publish(devlink); - return nsim_dev; + return 0; err_bpf_dev_exit: nsim_bpf_dev_exit(nsim_dev); @@ -841,7 +843,7 @@ err_resources_unregister: devlink_resources_unregister(devlink, NULL); err_devlink_free: devlink_free(devlink); - return ERR_PTR(err); + return err; } static void nsim_dev_reload_destroy(struct nsim_dev *nsim_dev) @@ -858,8 +860,9 @@ static void nsim_dev_reload_destroy(struct nsim_dev *nsim_dev) nsim_fib_destroy(devlink, nsim_dev->fib_data); } -static void nsim_dev_destroy(struct nsim_dev *nsim_dev) +void nsim_dev_remove(struct nsim_bus_dev *nsim_bus_dev) { + struct nsim_dev *nsim_dev = dev_get_drvdata(&nsim_bus_dev->dev); struct devlink *devlink = priv_to_devlink(nsim_dev); nsim_dev_reload_destroy(nsim_dev); @@ -873,40 +876,6 @@ static void nsim_dev_destroy(struct nsim_dev *nsim_dev) devlink_free(devlink); } -int nsim_dev_probe(struct nsim_bus_dev *nsim_bus_dev) -{ - struct nsim_dev *nsim_dev; - int i; - int err; - - nsim_dev = nsim_dev_create(nsim_bus_dev); - if (IS_ERR(nsim_dev)) - return PTR_ERR(nsim_dev); - dev_set_drvdata(&nsim_bus_dev->dev, nsim_dev); - - mutex_lock(&nsim_dev->port_list_lock); - for (i = 0; i < nsim_bus_dev->port_count; i++) { - err = __nsim_dev_port_add(nsim_dev, i); - if (err) - goto err_port_del_all; - } - mutex_unlock(&nsim_dev->port_list_lock); - return 0; - -err_port_del_all: - mutex_unlock(&nsim_dev->port_list_lock); - nsim_dev_port_del_all(nsim_dev); - nsim_dev_destroy(nsim_dev); - return err; -} - -void nsim_dev_remove(struct nsim_bus_dev *nsim_bus_dev) -{ - struct nsim_dev *nsim_dev = dev_get_drvdata(&nsim_bus_dev->dev); - - nsim_dev_destroy(nsim_dev); -} - static struct nsim_dev_port * __nsim_dev_port_lookup(struct nsim_dev *nsim_dev, unsigned int port_index) { -- cgit From a0c76345e3d3dbc40c39de2e00d15a3b7eef7885 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Fri, 8 Nov 2019 21:42:43 +0100 Subject: devlink: disallow reload operation during device cleanup There is a race between driver code that does setup/cleanup of device and devlink reload operation that in some drivers works with the same code. Use after free could we easily obtained by running: while true; do echo 10 > /sys/bus/netdevsim/new_device devlink dev reload netdevsim/netdevsim10 & echo 10 > /sys/bus/netdevsim/del_device done Fix this by enabling reload only after setup of device is complete and disabling it at the beginning of the cleanup process. Reported-by: Ido Schimmel Fixes: 2d8dc5bbf4e7 ("devlink: Add support for reload") Signed-off-by: Jiri Pirko Acked-by: Jakub Kicinski Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/main.c | 3 +++ drivers/net/ethernet/mellanox/mlxsw/core.c | 6 ++++- drivers/net/netdevsim/dev.c | 3 +++ include/net/devlink.h | 7 +++-- net/core/devlink.c | 42 +++++++++++++++++++++++++++++- 5 files changed, 57 insertions(+), 4 deletions(-) (limited to 'drivers/net/netdevsim/dev.c') diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c index 22c72fb7206a..77f056b0895e 100644 --- a/drivers/net/ethernet/mellanox/mlx4/main.c +++ b/drivers/net/ethernet/mellanox/mlx4/main.c @@ -4015,6 +4015,7 @@ static int mlx4_init_one(struct pci_dev *pdev, const struct pci_device_id *id) goto err_params_unregister; devlink_params_publish(devlink); + devlink_reload_enable(devlink); pci_save_state(pdev); return 0; @@ -4126,6 +4127,8 @@ static void mlx4_remove_one(struct pci_dev *pdev) struct devlink *devlink = priv_to_devlink(priv); int active_vfs = 0; + devlink_reload_disable(devlink); + if (mlx4_is_slave(dev)) persist->interface_state |= MLX4_INTERFACE_STATE_NOWAIT; diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.c b/drivers/net/ethernet/mellanox/mlxsw/core.c index e1a90f5bddd0..da436a6aad2f 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core.c +++ b/drivers/net/ethernet/mellanox/mlxsw/core.c @@ -1198,8 +1198,10 @@ __mlxsw_core_bus_device_register(const struct mlxsw_bus_info *mlxsw_bus_info, if (err) goto err_thermal_init; - if (mlxsw_driver->params_register) + if (mlxsw_driver->params_register) { devlink_params_publish(devlink); + devlink_reload_enable(devlink); + } return 0; @@ -1263,6 +1265,8 @@ void mlxsw_core_bus_device_unregister(struct mlxsw_core *mlxsw_core, { struct devlink *devlink = priv_to_devlink(mlxsw_core); + if (!reload) + devlink_reload_disable(devlink); if (devlink_is_reload_failed(devlink)) { if (!reload) /* Only the parts that were not de-initialized in the diff --git a/drivers/net/netdevsim/dev.c b/drivers/net/netdevsim/dev.c index 3da96c7e8265..059711edfc61 100644 --- a/drivers/net/netdevsim/dev.c +++ b/drivers/net/netdevsim/dev.c @@ -820,6 +820,7 @@ int nsim_dev_probe(struct nsim_bus_dev *nsim_bus_dev) goto err_bpf_dev_exit; devlink_params_publish(devlink); + devlink_reload_enable(devlink); return 0; err_bpf_dev_exit: @@ -865,6 +866,8 @@ void nsim_dev_remove(struct nsim_bus_dev *nsim_bus_dev) struct nsim_dev *nsim_dev = dev_get_drvdata(&nsim_bus_dev->dev); struct devlink *devlink = priv_to_devlink(nsim_dev); + devlink_reload_disable(devlink); + nsim_dev_reload_destroy(nsim_dev); nsim_bpf_dev_exit(nsim_dev); diff --git a/include/net/devlink.h b/include/net/devlink.h index 8d6b5846822c..7891611868e4 100644 --- a/include/net/devlink.h +++ b/include/net/devlink.h @@ -38,8 +38,9 @@ struct devlink { struct device *dev; possible_net_t _net; struct mutex lock; - bool reload_failed; - bool registered; + u8 reload_failed:1, + reload_enabled:1, + registered:1; char priv[0] __aligned(NETDEV_ALIGN); }; @@ -824,6 +825,8 @@ void devlink_net_set(struct devlink *devlink, struct net *net); struct devlink *devlink_alloc(const struct devlink_ops *ops, size_t priv_size); int devlink_register(struct devlink *devlink, struct device *dev); void devlink_unregister(struct devlink *devlink); +void devlink_reload_enable(struct devlink *devlink); +void devlink_reload_disable(struct devlink *devlink); void devlink_free(struct devlink *devlink); int devlink_port_register(struct devlink *devlink, struct devlink_port *devlink_port, diff --git a/net/core/devlink.c b/net/core/devlink.c index ff53f7d29dea..2e027c9436e0 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -2791,6 +2791,9 @@ static int devlink_reload(struct devlink *devlink, struct net *dest_net, { int err; + if (!devlink->reload_enabled) + return -EOPNOTSUPP; + err = devlink->ops->reload_down(devlink, !!dest_net, extack); if (err) return err; @@ -6308,12 +6311,49 @@ EXPORT_SYMBOL_GPL(devlink_register); void devlink_unregister(struct devlink *devlink) { mutex_lock(&devlink_mutex); + WARN_ON(devlink_reload_supported(devlink) && + devlink->reload_enabled); devlink_notify(devlink, DEVLINK_CMD_DEL); list_del(&devlink->list); mutex_unlock(&devlink_mutex); } EXPORT_SYMBOL_GPL(devlink_unregister); +/** + * devlink_reload_enable - Enable reload of devlink instance + * + * @devlink: devlink + * + * Should be called at end of device initialization + * process when reload operation is supported. + */ +void devlink_reload_enable(struct devlink *devlink) +{ + mutex_lock(&devlink_mutex); + devlink->reload_enabled = true; + mutex_unlock(&devlink_mutex); +} +EXPORT_SYMBOL_GPL(devlink_reload_enable); + +/** + * devlink_reload_disable - Disable reload of devlink instance + * + * @devlink: devlink + * + * Should be called at the beginning of device cleanup + * process when reload operation is supported. + */ +void devlink_reload_disable(struct devlink *devlink) +{ + mutex_lock(&devlink_mutex); + /* Mutex is taken which ensures that no reload operation is in + * progress while setting up forbidded flag. + */ + devlink->reload_enabled = false; + mutex_unlock(&devlink_mutex); +} +EXPORT_SYMBOL_GPL(devlink_reload_disable); + /** * devlink_free - Free devlink instance resources * @@ -8201,7 +8241,7 @@ static void __net_exit devlink_pernet_pre_exit(struct net *net) if (WARN_ON(!devlink_reload_supported(devlink))) continue; err = devlink_reload(devlink, &init_net, NULL); - if (err) + if (err && err != -EOPNOTSUPP) pr_warn("Failed to reload devlink instance into init_net\n"); } } -- cgit