diff options
Diffstat (limited to 'net/sched')
78 files changed, 1980 insertions, 1244 deletions
diff --git a/net/sched/Kconfig b/net/sched/Kconfig index 1b9afdee5ba9..2c72d95c3050 100644 --- a/net/sched/Kconfig +++ b/net/sched/Kconfig @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0-only # # Traffic control configuration. # @@ -358,8 +359,7 @@ config NET_SCH_PIE help Say Y here if you want to use the Proportional Integral controller Enhanced scheduler packet scheduling algorithm. - For more information, please see - http://tools.ietf.org/html/draft-pan-tsvwg-pie-00 + For more information, please see https://tools.ietf.org/html/rfc8033 To compile this driver as a module, choose M here: the module will be called sch_pie. diff --git a/net/sched/act_api.c b/net/sched/act_api.c index aecf1bf233c8..4e5d2e9ace5d 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -1,14 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* * net/sched/act_api.c Packet action API. * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - * * Author: Jamal Hadi Salim - * - * */ #include <linux/types.h> @@ -28,27 +22,10 @@ #include <net/act_api.h> #include <net/netlink.h> -static int tcf_action_goto_chain_init(struct tc_action *a, struct tcf_proto *tp) -{ - u32 chain_index = a->tcfa_action & TC_ACT_EXT_VAL_MASK; - - if (!tp) - return -EINVAL; - a->goto_chain = tcf_chain_get_by_act(tp->chain->block, chain_index); - if (!a->goto_chain) - return -ENOMEM; - return 0; -} - -static void tcf_action_goto_chain_fini(struct tc_action *a) -{ - tcf_chain_put_by_act(a->goto_chain); -} - static void tcf_action_goto_chain_exec(const struct tc_action *a, struct tcf_result *res) { - const struct tcf_chain *chain = a->goto_chain; + const struct tcf_chain *chain = rcu_dereference_bh(a->goto_chain); res->goto_tp = rcu_dereference_bh(chain->filter_chain); } @@ -71,6 +48,51 @@ static void tcf_set_action_cookie(struct tc_cookie __rcu **old_cookie, call_rcu(&old->rcu, tcf_free_cookie_rcu); } +int tcf_action_check_ctrlact(int action, struct tcf_proto *tp, + struct tcf_chain **newchain, + struct netlink_ext_ack *extack) +{ + int opcode = TC_ACT_EXT_OPCODE(action), ret = -EINVAL; + u32 chain_index; + + if (!opcode) + ret = action > TC_ACT_VALUE_MAX ? -EINVAL : 0; + else if (opcode <= TC_ACT_EXT_OPCODE_MAX || action == TC_ACT_UNSPEC) + ret = 0; + if (ret) { + NL_SET_ERR_MSG(extack, "invalid control action"); + goto end; + } + + if (TC_ACT_EXT_CMP(action, TC_ACT_GOTO_CHAIN)) { + chain_index = action & TC_ACT_EXT_VAL_MASK; + if (!tp || !newchain) { + ret = -EINVAL; + NL_SET_ERR_MSG(extack, + "can't goto NULL proto/chain"); + goto end; + } + *newchain = tcf_chain_get_by_act(tp->chain->block, chain_index); + if (!*newchain) { + ret = -ENOMEM; + NL_SET_ERR_MSG(extack, + "can't allocate goto_chain"); + } + } +end: + return ret; +} +EXPORT_SYMBOL(tcf_action_check_ctrlact); + +struct tcf_chain *tcf_action_set_ctrlact(struct tc_action *a, int action, + struct tcf_chain *goto_chain) +{ + a->tcfa_action = action; + rcu_swap_protected(a->goto_chain, goto_chain, 1); + return goto_chain; +} +EXPORT_SYMBOL(tcf_action_set_ctrlact); + /* XXX: For standalone actions, we don't need a RCU grace period either, because * actions are always connected to filters and filters are already destroyed in * RCU callbacks, so after a RCU grace period actions are already disconnected @@ -78,13 +100,15 @@ static void tcf_set_action_cookie(struct tc_cookie __rcu **old_cookie, */ static void free_tcf(struct tc_action *p) { + struct tcf_chain *chain = rcu_dereference_protected(p->goto_chain, 1); + free_percpu(p->cpu_bstats); free_percpu(p->cpu_bstats_hw); free_percpu(p->cpu_qstats); tcf_set_action_cookie(&p->act_cookie, NULL); - if (p->goto_chain) - tcf_action_goto_chain_fini(p); + if (chain) + tcf_chain_put_by_act(chain); kfree(p); } @@ -212,7 +236,7 @@ static int tcf_dump_walker(struct tcf_idrinfo *idrinfo, struct sk_buff *skb, (unsigned long)p->tcfa_tm.lastuse)) continue; - nest = nla_nest_start(skb, n_i); + nest = nla_nest_start_noflag(skb, n_i); if (!nest) { index--; goto nla_put_failure; @@ -269,7 +293,7 @@ static int tcf_del_walker(struct tcf_idrinfo *idrinfo, struct sk_buff *skb, struct tc_action *p; unsigned long id = 1; - nest = nla_nest_start(skb, 0); + nest = nla_nest_start_noflag(skb, 0); if (nest == NULL) goto nla_put_failure; if (nla_put_string(skb, TCA_KIND, ops->kind)) @@ -654,6 +678,10 @@ repeat: return TC_ACT_OK; } } else if (TC_ACT_EXT_CMP(ret, TC_ACT_GOTO_CHAIN)) { + if (unlikely(!rcu_access_pointer(a->goto_chain))) { + net_warn_ratelimited("can't go to NULL chain!\n"); + return TC_ACT_SHOT; + } tcf_action_goto_chain_exec(a, res); } @@ -742,7 +770,7 @@ tcf_action_dump_1(struct sk_buff *skb, struct tc_action *a, int bind, int ref) } rcu_read_unlock(); - nest = nla_nest_start(skb, TCA_OPTIONS); + nest = nla_nest_start_noflag(skb, TCA_OPTIONS); if (nest == NULL) goto nla_put_failure; err = tcf_action_dump_old(skb, a, bind, ref); @@ -766,7 +794,7 @@ int tcf_action_dump(struct sk_buff *skb, struct tc_action *actions[], for (i = 0; i < TCA_ACT_MAX_PRIO && actions[i]; i++) { a = actions[i]; - nest = nla_nest_start(skb, a->order); + nest = nla_nest_start_noflag(skb, i + 1); if (nest == NULL) goto nla_put_failure; err = tcf_action_dump_1(skb, a, bind, ref); @@ -800,15 +828,6 @@ static struct tc_cookie *nla_memdup_cookie(struct nlattr **tb) return c; } -static bool tcf_action_valid(int action) -{ - int opcode = TC_ACT_EXT_OPCODE(action); - - if (!opcode) - return action <= TC_ACT_VALUE_MAX; - return opcode <= TC_ACT_EXT_OPCODE_MAX || action == TC_ACT_UNSPEC; -} - struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp, struct nlattr *nla, struct nlattr *est, char *name, int ovr, int bind, @@ -824,7 +843,8 @@ struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp, int err; if (name == NULL) { - err = nla_parse_nested(tb, TCA_ACT_MAX, nla, NULL, extack); + err = nla_parse_nested_deprecated(tb, TCA_ACT_MAX, nla, NULL, + extack); if (err < 0) goto err_out; err = -EINVAL; @@ -890,10 +910,10 @@ struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp, /* backward compatibility for policer */ if (name == NULL) err = a_o->init(net, tb[TCA_ACT_OPTIONS], est, &a, ovr, bind, - rtnl_held, extack); + rtnl_held, tp, extack); else err = a_o->init(net, nla, est, &a, ovr, bind, rtnl_held, - extack); + tp, extack); if (err < 0) goto err_mod; @@ -907,18 +927,10 @@ struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp, if (err != ACT_P_CREATED) module_put(a_o->owner); - if (TC_ACT_EXT_CMP(a->tcfa_action, TC_ACT_GOTO_CHAIN)) { - err = tcf_action_goto_chain_init(a, tp); - if (err) { - tcf_action_destroy_1(a, bind); - NL_SET_ERR_MSG(extack, "Failed to init TC action chain"); - return ERR_PTR(err); - } - } - - if (!tcf_action_valid(a->tcfa_action)) { + if (TC_ACT_EXT_CMP(a->tcfa_action, TC_ACT_GOTO_CHAIN) && + !rcu_access_pointer(a->goto_chain)) { tcf_action_destroy_1(a, bind); - NL_SET_ERR_MSG(extack, "Invalid control action value"); + NL_SET_ERR_MSG(extack, "can't use goto chain with NULL chain"); return ERR_PTR(-EINVAL); } @@ -947,7 +959,8 @@ int tcf_action_init(struct net *net, struct tcf_proto *tp, struct nlattr *nla, int err; int i; - err = nla_parse_nested(tb, TCA_ACT_MAX_PRIO, nla, NULL, extack); + err = nla_parse_nested_deprecated(tb, TCA_ACT_MAX_PRIO, nla, NULL, + extack); if (err < 0) return err; @@ -1035,7 +1048,7 @@ static int tca_get_fill(struct sk_buff *skb, struct tc_action *actions[], t->tca__pad1 = 0; t->tca__pad2 = 0; - nest = nla_nest_start(skb, TCA_ACT_TAB); + nest = nla_nest_start_noflag(skb, TCA_ACT_TAB); if (!nest) goto out_nlmsg_trim; @@ -1082,7 +1095,7 @@ static struct tc_action *tcf_action_get_1(struct net *net, struct nlattr *nla, int index; int err; - err = nla_parse_nested(tb, TCA_ACT_MAX, nla, NULL, extack); + err = nla_parse_nested_deprecated(tb, TCA_ACT_MAX, nla, NULL, extack); if (err < 0) goto err_out; @@ -1136,7 +1149,7 @@ static int tca_action_flush(struct net *net, struct nlattr *nla, b = skb_tail_pointer(skb); - err = nla_parse_nested(tb, TCA_ACT_MAX, nla, NULL, extack); + err = nla_parse_nested_deprecated(tb, TCA_ACT_MAX, nla, NULL, extack); if (err < 0) goto err_out; @@ -1159,7 +1172,7 @@ static int tca_action_flush(struct net *net, struct nlattr *nla, t->tca__pad1 = 0; t->tca__pad2 = 0; - nest = nla_nest_start(skb, TCA_ACT_TAB); + nest = nla_nest_start_noflag(skb, TCA_ACT_TAB); if (!nest) { NL_SET_ERR_MSG(extack, "Failed to add new netlink message"); goto out_module_put; @@ -1265,7 +1278,8 @@ tca_action_gd(struct net *net, struct nlattr *nla, struct nlmsghdr *n, size_t attr_size = 0; struct tc_action *actions[TCA_ACT_MAX_PRIO] = {}; - ret = nla_parse_nested(tb, TCA_ACT_MAX_PRIO, nla, NULL, extack); + ret = nla_parse_nested_deprecated(tb, TCA_ACT_MAX_PRIO, nla, NULL, + extack); if (ret < 0) return ret; @@ -1283,7 +1297,6 @@ tca_action_gd(struct net *net, struct nlattr *nla, struct nlmsghdr *n, ret = PTR_ERR(act); goto err; } - act->order = i; attr_size += tcf_action_fill_size(act); actions[i - 1] = act; } @@ -1367,8 +1380,8 @@ static int tc_ctl_action(struct sk_buff *skb, struct nlmsghdr *n, !netlink_capable(skb, CAP_NET_ADMIN)) return -EPERM; - ret = nlmsg_parse(n, sizeof(struct tcamsg), tca, TCA_ROOT_MAX, NULL, - extack); + ret = nlmsg_parse_deprecated(n, sizeof(struct tcamsg), tca, + TCA_ROOT_MAX, NULL, extack); if (ret < 0) return ret; @@ -1419,13 +1432,12 @@ static struct nlattr *find_dump_kind(struct nlattr **nla) if (tb1 == NULL) return NULL; - if (nla_parse(tb, TCA_ACT_MAX_PRIO, nla_data(tb1), - NLMSG_ALIGN(nla_len(tb1)), NULL, NULL) < 0) + if (nla_parse_deprecated(tb, TCA_ACT_MAX_PRIO, nla_data(tb1), NLMSG_ALIGN(nla_len(tb1)), NULL, NULL) < 0) return NULL; if (tb[1] == NULL) return NULL; - if (nla_parse_nested(tb2, TCA_ACT_MAX, tb[1], NULL, NULL) < 0) + if (nla_parse_nested_deprecated(tb2, TCA_ACT_MAX, tb[1], NULL, NULL) < 0) return NULL; kind = tb2[TCA_ACT_KIND]; @@ -1449,8 +1461,8 @@ static int tc_dump_action(struct sk_buff *skb, struct netlink_callback *cb) u32 msecs_since = 0; u32 act_count = 0; - ret = nlmsg_parse(cb->nlh, sizeof(struct tcamsg), tb, TCA_ROOT_MAX, - tcaa_policy, cb->extack); + ret = nlmsg_parse_deprecated(cb->nlh, sizeof(struct tcamsg), tb, + TCA_ROOT_MAX, tcaa_policy, cb->extack); if (ret < 0) return ret; @@ -1491,7 +1503,7 @@ static int tc_dump_action(struct sk_buff *skb, struct netlink_callback *cb) if (!count_attr) goto out_module_put; - nest = nla_nest_start(skb, TCA_ACT_TAB); + nest = nla_nest_start_noflag(skb, TCA_ACT_TAB); if (nest == NULL) goto out_module_put; diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c index aa5c38d11a30..8126b26f125e 100644 --- a/net/sched/act_bpf.c +++ b/net/sched/act_bpf.c @@ -1,10 +1,6 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* * Copyright (c) 2015 Jiri Pirko <jiri@resnulli.us> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. */ #include <linux/module.h> @@ -17,6 +13,7 @@ #include <net/netlink.h> #include <net/pkt_sched.h> +#include <net/pkt_cls.h> #include <linux/tc_act/tc_bpf.h> #include <net/tc_act/tc_bpf.h> @@ -278,10 +275,11 @@ static void tcf_bpf_prog_fill_cfg(const struct tcf_bpf *prog, static int tcf_bpf_init(struct net *net, struct nlattr *nla, struct nlattr *est, struct tc_action **act, int replace, int bind, bool rtnl_held, - struct netlink_ext_ack *extack) + struct tcf_proto *tp, struct netlink_ext_ack *extack) { struct tc_action_net *tn = net_generic(net, bpf_net_id); struct nlattr *tb[TCA_ACT_BPF_MAX + 1]; + struct tcf_chain *goto_ch = NULL; struct tcf_bpf_cfg cfg, old; struct tc_act_bpf *parm; struct tcf_bpf *prog; @@ -291,7 +289,8 @@ static int tcf_bpf_init(struct net *net, struct nlattr *nla, if (!nla) return -EINVAL; - ret = nla_parse_nested(tb, TCA_ACT_BPF_MAX, nla, act_bpf_policy, NULL); + ret = nla_parse_nested_deprecated(tb, TCA_ACT_BPF_MAX, nla, + act_bpf_policy, NULL); if (ret < 0) return ret; @@ -323,12 +322,16 @@ static int tcf_bpf_init(struct net *net, struct nlattr *nla, return ret; } + ret = tcf_action_check_ctrlact(parm->action, tp, &goto_ch, extack); + if (ret < 0) + goto release_idr; + is_bpf = tb[TCA_ACT_BPF_OPS_LEN] && tb[TCA_ACT_BPF_OPS]; is_ebpf = tb[TCA_ACT_BPF_FD]; if ((!is_bpf && !is_ebpf) || (is_bpf && is_ebpf)) { ret = -EINVAL; - goto out; + goto put_chain; } memset(&cfg, 0, sizeof(cfg)); @@ -336,7 +339,7 @@ static int tcf_bpf_init(struct net *net, struct nlattr *nla, ret = is_bpf ? tcf_bpf_init_from_ops(tb, &cfg) : tcf_bpf_init_from_efd(tb, &cfg); if (ret < 0) - goto out; + goto put_chain; prog = to_bpf(*act); @@ -350,10 +353,13 @@ static int tcf_bpf_init(struct net *net, struct nlattr *nla, if (cfg.bpf_num_ops) prog->bpf_num_ops = cfg.bpf_num_ops; - prog->tcf_action = parm->action; + goto_ch = tcf_action_set_ctrlact(*act, parm->action, goto_ch); rcu_assign_pointer(prog->filter, cfg.filter); spin_unlock_bh(&prog->tcf_lock); + if (goto_ch) + tcf_chain_put_by_act(goto_ch); + if (res == ACT_P_CREATED) { tcf_idr_insert(tn, *act); } else { @@ -363,9 +369,13 @@ static int tcf_bpf_init(struct net *net, struct nlattr *nla, } return res; -out: - tcf_idr_release(*act, bind); +put_chain: + if (goto_ch) + tcf_chain_put_by_act(goto_ch); + +release_idr: + tcf_idr_release(*act, bind); return ret; } diff --git a/net/sched/act_connmark.c b/net/sched/act_connmark.c index 5d24993cccfe..ce36b0f7e1dc 100644 --- a/net/sched/act_connmark.c +++ b/net/sched/act_connmark.c @@ -1,13 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* * net/sched/act_connmark.c netfilter connmark retriever action * skb mark is over-written * * Copyright (c) 2011 Felix Fietkau <nbd@openwrt.org> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. */ #include <linux/module.h> @@ -21,6 +17,7 @@ #include <net/netlink.h> #include <net/pkt_sched.h> #include <net/act_api.h> +#include <net/pkt_cls.h> #include <uapi/linux/tc_act/tc_connmark.h> #include <net/tc_act/tc_connmark.h> @@ -97,19 +94,21 @@ static const struct nla_policy connmark_policy[TCA_CONNMARK_MAX + 1] = { static int tcf_connmark_init(struct net *net, struct nlattr *nla, struct nlattr *est, struct tc_action **a, int ovr, int bind, bool rtnl_held, + struct tcf_proto *tp, struct netlink_ext_ack *extack) { struct tc_action_net *tn = net_generic(net, connmark_net_id); struct nlattr *tb[TCA_CONNMARK_MAX + 1]; + struct tcf_chain *goto_ch = NULL; struct tcf_connmark_info *ci; struct tc_connmark *parm; - int ret = 0; + int ret = 0, err; if (!nla) return -EINVAL; - ret = nla_parse_nested(tb, TCA_CONNMARK_MAX, nla, connmark_policy, - NULL); + ret = nla_parse_nested_deprecated(tb, TCA_CONNMARK_MAX, nla, + connmark_policy, NULL); if (ret < 0) return ret; @@ -128,7 +127,11 @@ static int tcf_connmark_init(struct net *net, struct nlattr *nla, } ci = to_connmark(*a); - ci->tcf_action = parm->action; + err = tcf_action_check_ctrlact(parm->action, tp, &goto_ch, + extack); + if (err < 0) + goto release_idr; + tcf_action_set_ctrlact(*a, parm->action, goto_ch); ci->net = net; ci->zone = parm->zone; @@ -142,15 +145,24 @@ static int tcf_connmark_init(struct net *net, struct nlattr *nla, tcf_idr_release(*a, bind); return -EEXIST; } + err = tcf_action_check_ctrlact(parm->action, tp, &goto_ch, + extack); + if (err < 0) + goto release_idr; /* replacing action and zone */ spin_lock_bh(&ci->tcf_lock); - ci->tcf_action = parm->action; + goto_ch = tcf_action_set_ctrlact(*a, parm->action, goto_ch); ci->zone = parm->zone; spin_unlock_bh(&ci->tcf_lock); + if (goto_ch) + tcf_chain_put_by_act(goto_ch); ret = 0; } return ret; +release_idr: + tcf_idr_release(*a, bind); + return err; } static inline int tcf_connmark_dump(struct sk_buff *skb, struct tc_action *a, diff --git a/net/sched/act_csum.c b/net/sched/act_csum.c index c79aca29505e..621fb22ce2a9 100644 --- a/net/sched/act_csum.c +++ b/net/sched/act_csum.c @@ -1,13 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* * Checksum updating actions * * Copyright (c) 2010 Gregoire Baron <baronchon@n7mm.org> - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the Free - * Software Foundation; either version 2 of the License, or (at your option) - * any later version. - * */ #include <linux/types.h> @@ -33,6 +28,7 @@ #include <net/sctp/checksum.h> #include <net/act_api.h> +#include <net/pkt_cls.h> #include <linux/tc_act/tc_csum.h> #include <net/tc_act/tc_csum.h> @@ -46,12 +42,13 @@ static struct tc_action_ops act_csum_ops; static int tcf_csum_init(struct net *net, struct nlattr *nla, struct nlattr *est, struct tc_action **a, int ovr, - int bind, bool rtnl_held, + int bind, bool rtnl_held, struct tcf_proto *tp, struct netlink_ext_ack *extack) { struct tc_action_net *tn = net_generic(net, csum_net_id); struct tcf_csum_params *params_new; struct nlattr *tb[TCA_CSUM_MAX + 1]; + struct tcf_chain *goto_ch = NULL; struct tc_csum *parm; struct tcf_csum *p; int ret = 0, err; @@ -59,7 +56,8 @@ static int tcf_csum_init(struct net *net, struct nlattr *nla, if (nla == NULL) return -EINVAL; - err = nla_parse_nested(tb, TCA_CSUM_MAX, nla, csum_policy, NULL); + err = nla_parse_nested_deprecated(tb, TCA_CSUM_MAX, nla, csum_policy, + NULL); if (err < 0) return err; @@ -87,21 +85,27 @@ static int tcf_csum_init(struct net *net, struct nlattr *nla, return err; } + err = tcf_action_check_ctrlact(parm->action, tp, &goto_ch, extack); + if (err < 0) + goto release_idr; + p = to_tcf_csum(*a); params_new = kzalloc(sizeof(*params_new), GFP_KERNEL); if (unlikely(!params_new)) { - tcf_idr_release(*a, bind); - return -ENOMEM; + err = -ENOMEM; + goto put_chain; } params_new->update_flags = parm->update_flags; spin_lock_bh(&p->tcf_lock); - p->tcf_action = parm->action; + goto_ch = tcf_action_set_ctrlact(*a, parm->action, goto_ch); rcu_swap_protected(p->params, params_new, lockdep_is_held(&p->tcf_lock)); spin_unlock_bh(&p->tcf_lock); + if (goto_ch) + tcf_chain_put_by_act(goto_ch); if (params_new) kfree_rcu(params_new, rcu); @@ -109,6 +113,12 @@ static int tcf_csum_init(struct net *net, struct nlattr *nla, tcf_idr_insert(tn, *a); return ret; +put_chain: + if (goto_ch) + tcf_chain_put_by_act(goto_ch); +release_idr: + tcf_idr_release(*a, bind); + return err; } /** diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c index 93da0004e9f4..b2380c5284e6 100644 --- a/net/sched/act_gact.c +++ b/net/sched/act_gact.c @@ -1,13 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* * net/sched/act_gact.c Generic actions * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - * * copyright Jamal Hadi Salim (2002-4) - * */ #include <linux/types.h> @@ -20,6 +15,7 @@ #include <linux/init.h> #include <net/netlink.h> #include <net/pkt_sched.h> +#include <net/pkt_cls.h> #include <linux/tc_act/tc_gact.h> #include <net/tc_act/tc_gact.h> @@ -57,10 +53,11 @@ static const struct nla_policy gact_policy[TCA_GACT_MAX + 1] = { static int tcf_gact_init(struct net *net, struct nlattr *nla, struct nlattr *est, struct tc_action **a, int ovr, int bind, bool rtnl_held, - struct netlink_ext_ack *extack) + struct tcf_proto *tp, struct netlink_ext_ack *extack) { struct tc_action_net *tn = net_generic(net, gact_net_id); struct nlattr *tb[TCA_GACT_MAX + 1]; + struct tcf_chain *goto_ch = NULL; struct tc_gact *parm; struct tcf_gact *gact; int ret = 0; @@ -72,7 +69,8 @@ static int tcf_gact_init(struct net *net, struct nlattr *nla, if (nla == NULL) return -EINVAL; - err = nla_parse_nested(tb, TCA_GACT_MAX, nla, gact_policy, NULL); + err = nla_parse_nested_deprecated(tb, TCA_GACT_MAX, nla, gact_policy, + NULL); if (err < 0) return err; @@ -116,10 +114,13 @@ static int tcf_gact_init(struct net *net, struct nlattr *nla, return err; } + err = tcf_action_check_ctrlact(parm->action, tp, &goto_ch, extack); + if (err < 0) + goto release_idr; gact = to_gact(*a); spin_lock_bh(&gact->tcf_lock); - gact->tcf_action = parm->action; + goto_ch = tcf_action_set_ctrlact(*a, parm->action, goto_ch); #ifdef CONFIG_GACT_PROB if (p_parm) { gact->tcfg_paction = p_parm->paction; @@ -133,9 +134,15 @@ static int tcf_gact_init(struct net *net, struct nlattr *nla, #endif spin_unlock_bh(&gact->tcf_lock); + if (goto_ch) + tcf_chain_put_by_act(goto_ch); + if (ret == ACT_P_CREATED) tcf_idr_insert(tn, *a); return ret; +release_idr: + tcf_idr_release(*a, bind); + return err; } static int tcf_gact_act(struct sk_buff *skb, const struct tc_action *a, diff --git a/net/sched/act_ife.c b/net/sched/act_ife.c index 9b1f2b3990ee..41d5398dd2f2 100644 --- a/net/sched/act_ife.c +++ b/net/sched/act_ife.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* * net/sched/ife.c Inter-FE action based on ForCES WG InterFE LFB * @@ -9,13 +10,7 @@ * Subsystem" * Authors: Jamal Hadi Salim and Damascene M. Joachimpillai * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - * * copyright Jamal Hadi Salim (2015) - * */ #include <linux/types.h> @@ -29,6 +24,7 @@ #include <net/net_namespace.h> #include <net/netlink.h> #include <net/pkt_sched.h> +#include <net/pkt_cls.h> #include <uapi/linux/tc_act/tc_ife.h> #include <net/tc_act/tc_ife.h> #include <linux/etherdevice.h> @@ -386,7 +382,7 @@ static int dump_metalist(struct sk_buff *skb, struct tcf_ife_info *ife) if (list_empty(&ife->metalist)) return 0; - nest = nla_nest_start(skb, TCA_IFE_METALST); + nest = nla_nest_start_noflag(skb, TCA_IFE_METALST); if (!nest) goto out_nlmsg_trim; @@ -469,11 +465,12 @@ static int populate_metalist(struct tcf_ife_info *ife, struct nlattr **tb, static int tcf_ife_init(struct net *net, struct nlattr *nla, struct nlattr *est, struct tc_action **a, int ovr, int bind, bool rtnl_held, - struct netlink_ext_ack *extack) + struct tcf_proto *tp, struct netlink_ext_ack *extack) { struct tc_action_net *tn = net_generic(net, ife_net_id); struct nlattr *tb[TCA_IFE_MAX + 1]; struct nlattr *tb2[IFE_META_MAX + 1]; + struct tcf_chain *goto_ch = NULL; struct tcf_ife_params *p; struct tcf_ife_info *ife; u16 ife_type = ETH_P_IFE; @@ -484,7 +481,8 @@ static int tcf_ife_init(struct net *net, struct nlattr *nla, int ret = 0; int err; - err = nla_parse_nested(tb, TCA_IFE_MAX, nla, ife_policy, NULL); + err = nla_parse_nested_deprecated(tb, TCA_IFE_MAX, nla, ife_policy, + NULL); if (err < 0) return err; @@ -531,6 +529,10 @@ static int tcf_ife_init(struct net *net, struct nlattr *nla, } ife = to_ife(*a); + err = tcf_action_check_ctrlact(parm->action, tp, &goto_ch, extack); + if (err < 0) + goto release_idr; + p->flags = parm->flags; if (parm->flags & IFE_ENCODE) { @@ -561,15 +563,11 @@ static int tcf_ife_init(struct net *net, struct nlattr *nla, INIT_LIST_HEAD(&ife->metalist); if (tb[TCA_IFE_METALST]) { - err = nla_parse_nested(tb2, IFE_META_MAX, tb[TCA_IFE_METALST], - NULL, NULL); - if (err) { -metadata_parse_err: - tcf_idr_release(*a, bind); - kfree(p); - return err; - } - + err = nla_parse_nested_deprecated(tb2, IFE_META_MAX, + tb[TCA_IFE_METALST], NULL, + NULL); + if (err) + goto metadata_parse_err; err = populate_metalist(ife, tb2, exists, rtnl_held); if (err) goto metadata_parse_err; @@ -581,21 +579,20 @@ metadata_parse_err: * going to bail out */ err = use_all_metadata(ife, exists); - if (err) { - tcf_idr_release(*a, bind); - kfree(p); - return err; - } + if (err) + goto metadata_parse_err; } if (exists) spin_lock_bh(&ife->tcf_lock); - ife->tcf_action = parm->action; /* protected by tcf_lock when modifying existing action */ + goto_ch = tcf_action_set_ctrlact(*a, parm->action, goto_ch); rcu_swap_protected(ife->params, p, 1); if (exists) spin_unlock_bh(&ife->tcf_lock); + if (goto_ch) + tcf_chain_put_by_act(goto_ch); if (p) kfree_rcu(p, rcu); @@ -603,6 +600,13 @@ metadata_parse_err: tcf_idr_insert(tn, *a); return ret; +metadata_parse_err: + if (goto_ch) + tcf_chain_put_by_act(goto_ch); +release_idr: + kfree(p); + tcf_idr_release(*a, bind); + return err; } static int tcf_ife_dump(struct sk_buff *skb, struct tc_action *a, int bind, diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c index 98f5b6ea77b4..ce2c30a591d2 100644 --- a/net/sched/act_ipt.c +++ b/net/sched/act_ipt.c @@ -1,13 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* * net/sched/act_ipt.c iptables target interface * *TODO: Add other tables. For now we only support the ipv4 table targets * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - * * Copyright: Jamal Hadi Salim (2002-13) */ @@ -97,7 +93,8 @@ static const struct nla_policy ipt_policy[TCA_IPT_MAX + 1] = { static int __tcf_ipt_init(struct net *net, unsigned int id, struct nlattr *nla, struct nlattr *est, struct tc_action **a, - const struct tc_action_ops *ops, int ovr, int bind) + const struct tc_action_ops *ops, int ovr, int bind, + struct tcf_proto *tp) { struct tc_action_net *tn = net_generic(net, id); struct nlattr *tb[TCA_IPT_MAX + 1]; @@ -112,7 +109,8 @@ static int __tcf_ipt_init(struct net *net, unsigned int id, struct nlattr *nla, if (nla == NULL) return -EINVAL; - err = nla_parse_nested(tb, TCA_IPT_MAX, nla, ipt_policy, NULL); + err = nla_parse_nested_deprecated(tb, TCA_IPT_MAX, nla, ipt_policy, + NULL); if (err < 0) return err; @@ -205,20 +203,20 @@ err1: static int tcf_ipt_init(struct net *net, struct nlattr *nla, struct nlattr *est, struct tc_action **a, int ovr, - int bind, bool rtnl_held, + int bind, bool rtnl_held, struct tcf_proto *tp, struct netlink_ext_ack *extack) { return __tcf_ipt_init(net, ipt_net_id, nla, est, a, &act_ipt_ops, ovr, - bind); + bind, tp); } static int tcf_xt_init(struct net *net, struct nlattr *nla, struct nlattr *est, struct tc_action **a, int ovr, - int bind, bool unlocked, + int bind, bool unlocked, struct tcf_proto *tp, struct netlink_ext_ack *extack) { return __tcf_ipt_init(net, xt_net_id, nla, est, a, &act_xt_ops, ovr, - bind); + bind, tp); } static int tcf_ipt_act(struct sk_buff *skb, const struct tc_action *a, diff --git a/net/sched/act_meta_mark.c b/net/sched/act_meta_mark.c index 6445184b2759..ea0573cb8b2d 100644 --- a/net/sched/act_meta_mark.c +++ b/net/sched/act_meta_mark.c @@ -1,13 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* * net/sched/act_meta_mark.c IFE skb->mark metadata module * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - * * copyright Jamal Hadi Salim (2015) - * */ #include <linux/types.h> diff --git a/net/sched/act_meta_skbprio.c b/net/sched/act_meta_skbprio.c index 4033f9fc4d4a..2df3133ce5ad 100644 --- a/net/sched/act_meta_skbprio.c +++ b/net/sched/act_meta_skbprio.c @@ -1,13 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* * net/sched/act_meta_prio.c IFE skb->priority metadata module * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - * * copyright Jamal Hadi Salim (2015) - * */ #include <linux/types.h> diff --git a/net/sched/act_meta_skbtcindex.c b/net/sched/act_meta_skbtcindex.c index 7221437ca3a6..44547caead46 100644 --- a/net/sched/act_meta_skbtcindex.c +++ b/net/sched/act_meta_skbtcindex.c @@ -1,13 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* * net/sched/act_meta_tc_index.c IFE skb->tc_index metadata module * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - * * copyright Jamal Hadi Salim (2016) - * */ #include <linux/types.h> diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c index 6692fd054617..58e7573dded4 100644 --- a/net/sched/act_mirred.c +++ b/net/sched/act_mirred.c @@ -1,15 +1,10 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* * net/sched/act_mirred.c packet mirroring and redirect actions * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - * * Authors: Jamal Hadi Salim (2002-4) * * TODO: Add ingress support (and socket redirect support) - * */ #include <linux/types.h> @@ -94,10 +89,12 @@ static struct tc_action_ops act_mirred_ops; static int tcf_mirred_init(struct net *net, struct nlattr *nla, struct nlattr *est, struct tc_action **a, int ovr, int bind, bool rtnl_held, + struct tcf_proto *tp, struct netlink_ext_ack *extack) { struct tc_action_net *tn = net_generic(net, mirred_net_id); struct nlattr *tb[TCA_MIRRED_MAX + 1]; + struct tcf_chain *goto_ch = NULL; bool mac_header_xmit = false; struct tc_mirred *parm; struct tcf_mirred *m; @@ -109,7 +106,8 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla, NL_SET_ERR_MSG_MOD(extack, "Mirred requires attributes to be passed"); return -EINVAL; } - ret = nla_parse_nested(tb, TCA_MIRRED_MAX, nla, mirred_policy, extack); + ret = nla_parse_nested_deprecated(tb, TCA_MIRRED_MAX, nla, + mirred_policy, extack); if (ret < 0) return ret; if (!tb[TCA_MIRRED_PARMS]) { @@ -157,18 +155,23 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla, tcf_idr_release(*a, bind); return -EEXIST; } + m = to_mirred(*a); + if (ret == ACT_P_CREATED) + INIT_LIST_HEAD(&m->tcfm_list); + + err = tcf_action_check_ctrlact(parm->action, tp, &goto_ch, extack); + if (err < 0) + goto release_idr; spin_lock_bh(&m->tcf_lock); - m->tcf_action = parm->action; - m->tcfm_eaction = parm->eaction; if (parm->ifindex) { dev = dev_get_by_index(net, parm->ifindex); if (!dev) { spin_unlock_bh(&m->tcf_lock); - tcf_idr_release(*a, bind); - return -ENODEV; + err = -ENODEV; + goto put_chain; } mac_header_xmit = dev_is_mac_header_xmit(dev); rcu_swap_protected(m->tcfm_dev, dev, @@ -177,7 +180,11 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla, dev_put(dev); m->tcfm_mac_header_xmit = mac_header_xmit; } + goto_ch = tcf_action_set_ctrlact(*a, parm->action, goto_ch); + m->tcfm_eaction = parm->eaction; spin_unlock_bh(&m->tcf_lock); + if (goto_ch) + tcf_chain_put_by_act(goto_ch); if (ret == ACT_P_CREATED) { spin_lock(&mirred_list_lock); @@ -188,6 +195,12 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla, } return ret; +put_chain: + if (goto_ch) + tcf_chain_put_by_act(goto_ch); +release_idr: + tcf_idr_release(*a, bind); + return err; } static int tcf_mirred_act(struct sk_buff *skb, const struct tc_action *a, diff --git a/net/sched/act_nat.c b/net/sched/act_nat.c index 543eab9193f1..45923ebb7a4f 100644 --- a/net/sched/act_nat.c +++ b/net/sched/act_nat.c @@ -1,12 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* * Stateless NAT actions * * Copyright (c) 2007 Herbert Xu <herbert@gondor.apana.org.au> - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the Free - * Software Foundation; either version 2 of the License, or (at your option) - * any later version. */ #include <linux/errno.h> @@ -21,6 +17,7 @@ #include <linux/string.h> #include <linux/tc_act/tc_nat.h> #include <net/act_api.h> +#include <net/pkt_cls.h> #include <net/icmp.h> #include <net/ip.h> #include <net/netlink.h> @@ -38,10 +35,12 @@ static const struct nla_policy nat_policy[TCA_NAT_MAX + 1] = { static int tcf_nat_init(struct net *net, struct nlattr *nla, struct nlattr *est, struct tc_action **a, int ovr, int bind, - bool rtnl_held, struct netlink_ext_ack *extack) + bool rtnl_held, struct tcf_proto *tp, + struct netlink_ext_ack *extack) { struct tc_action_net *tn = net_generic(net, nat_net_id); struct nlattr *tb[TCA_NAT_MAX + 1]; + struct tcf_chain *goto_ch = NULL; struct tc_nat *parm; int ret = 0, err; struct tcf_nat *p; @@ -49,7 +48,8 @@ static int tcf_nat_init(struct net *net, struct nlattr *nla, struct nlattr *est, if (nla == NULL) return -EINVAL; - err = nla_parse_nested(tb, TCA_NAT_MAX, nla, nat_policy, NULL); + err = nla_parse_nested_deprecated(tb, TCA_NAT_MAX, nla, nat_policy, + NULL); if (err < 0) return err; @@ -76,6 +76,9 @@ static int tcf_nat_init(struct net *net, struct nlattr *nla, struct nlattr *est, } else { return err; } + err = tcf_action_check_ctrlact(parm->action, tp, &goto_ch, extack); + if (err < 0) + goto release_idr; p = to_tcf_nat(*a); spin_lock_bh(&p->tcf_lock); @@ -84,13 +87,18 @@ static int tcf_nat_init(struct net *net, struct nlattr *nla, struct nlattr *est, p->mask = parm->mask; p->flags = parm->flags; - p->tcf_action = parm->action; + goto_ch = tcf_action_set_ctrlact(*a, parm->action, goto_ch); spin_unlock_bh(&p->tcf_lock); + if (goto_ch) + tcf_chain_put_by_act(goto_ch); if (ret == ACT_P_CREATED) tcf_idr_insert(tn, *a); return ret; +release_idr: + tcf_idr_release(*a, bind); + return err; } static int tcf_nat_act(struct sk_buff *skb, const struct tc_action *a, diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c index a80373878df7..45e9d6bfddb3 100644 --- a/net/sched/act_pedit.c +++ b/net/sched/act_pedit.c @@ -1,11 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* * net/sched/act_pedit.c Generic packet editor * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - * * Authors: Jamal Hadi Salim (2002-4) */ @@ -23,6 +19,7 @@ #include <linux/tc_act/tc_pedit.h> #include <net/tc_act/tc_pedit.h> #include <uapi/linux/tc_act/tc_pedit.h> +#include <net/pkt_cls.h> static unsigned int pedit_net_id; static struct tc_action_ops act_pedit_ops; @@ -69,8 +66,9 @@ static struct tcf_pedit_key_ex *tcf_pedit_keys_ex_parse(struct nlattr *nla, goto err_out; } - err = nla_parse_nested(tb, TCA_PEDIT_KEY_EX_MAX, ka, - pedit_key_ex_policy, NULL); + err = nla_parse_nested_deprecated(tb, TCA_PEDIT_KEY_EX_MAX, + ka, pedit_key_ex_policy, + NULL); if (err) goto err_out; @@ -107,14 +105,15 @@ err_out: static int tcf_pedit_key_ex_dump(struct sk_buff *skb, struct tcf_pedit_key_ex *keys_ex, int n) { - struct nlattr *keys_start = nla_nest_start(skb, TCA_PEDIT_KEYS_EX); + struct nlattr *keys_start = nla_nest_start_noflag(skb, + TCA_PEDIT_KEYS_EX); if (!keys_start) goto nla_failure; for (; n > 0; n--) { struct nlattr *key_start; - key_start = nla_nest_start(skb, TCA_PEDIT_KEY_EX); + key_start = nla_nest_start_noflag(skb, TCA_PEDIT_KEY_EX); if (!key_start) goto nla_failure; @@ -138,10 +137,11 @@ nla_failure: static int tcf_pedit_init(struct net *net, struct nlattr *nla, struct nlattr *est, struct tc_action **a, int ovr, int bind, bool rtnl_held, - struct netlink_ext_ack *extack) + struct tcf_proto *tp, struct netlink_ext_ack *extack) { struct tc_action_net *tn = net_generic(net, pedit_net_id); struct nlattr *tb[TCA_PEDIT_MAX + 1]; + struct tcf_chain *goto_ch = NULL; struct tc_pedit_key *keys = NULL; struct tcf_pedit_key_ex *keys_ex; struct tc_pedit *parm; @@ -155,7 +155,8 @@ static int tcf_pedit_init(struct net *net, struct nlattr *nla, return -EINVAL; } - err = nla_parse_nested(tb, TCA_PEDIT_MAX, nla, pedit_policy, NULL); + err = nla_parse_nested_deprecated(tb, TCA_PEDIT_MAX, nla, + pedit_policy, NULL); if (err < 0) return err; @@ -205,6 +206,11 @@ static int tcf_pedit_init(struct net *net, struct nlattr *nla, goto out_free; } + err = tcf_action_check_ctrlact(parm->action, tp, &goto_ch, extack); + if (err < 0) { + ret = err; + goto out_release; + } p = to_pedit(*a); spin_lock_bh(&p->tcf_lock); @@ -214,7 +220,7 @@ static int tcf_pedit_init(struct net *net, struct nlattr *nla, if (!keys) { spin_unlock_bh(&p->tcf_lock); ret = -ENOMEM; - goto out_release; + goto put_chain; } kfree(p->tcfp_keys); p->tcfp_keys = keys; @@ -223,16 +229,21 @@ static int tcf_pedit_init(struct net *net, struct nlattr *nla, memcpy(p->tcfp_keys, parm->keys, ksize); p->tcfp_flags = parm->flags; - p->tcf_action = parm->action; + goto_ch = tcf_action_set_ctrlact(*a, parm->action, goto_ch); kfree(p->tcfp_keys_ex); p->tcfp_keys_ex = keys_ex; spin_unlock_bh(&p->tcf_lock); + if (goto_ch) + tcf_chain_put_by_act(goto_ch); if (ret == ACT_P_CREATED) tcf_idr_insert(tn, *a); return ret; +put_chain: + if (goto_ch) + tcf_chain_put_by_act(goto_ch); out_release: tcf_idr_release(*a, bind); out_free: diff --git a/net/sched/act_police.c b/net/sched/act_police.c index 8271a6263824..a065f62fa79c 100644 --- a/net/sched/act_police.c +++ b/net/sched/act_police.c @@ -1,11 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* * net/sched/act_police.c Input police filter * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - * * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> * J Hadi Salim (action changes) */ @@ -21,42 +17,8 @@ #include <linux/slab.h> #include <net/act_api.h> #include <net/netlink.h> - -struct tcf_police_params { - int tcfp_result; - u32 tcfp_ewma_rate; - s64 tcfp_burst; - u32 tcfp_mtu; - s64 tcfp_mtu_ptoks; - struct psched_ratecfg rate; - bool rate_present; - struct psched_ratecfg peak; - bool peak_present; - struct rcu_head rcu; -}; - -struct tcf_police { - struct tc_action common; - struct tcf_police_params __rcu *params; - - spinlock_t tcfp_lock ____cacheline_aligned_in_smp; - s64 tcfp_toks; - s64 tcfp_ptoks; - s64 tcfp_t_c; -}; - -#define to_police(pc) ((struct tcf_police *)pc) - -/* old policer structure from before tc actions */ -struct tc_police_compat { - u32 index; - int action; - u32 limit; - u32 burst; - u32 mtu; - struct tc_ratespec rate; - struct tc_ratespec peakrate; -}; +#include <net/pkt_cls.h> +#include <net/tc_act/tc_police.h> /* Each policer is serialized by its individual spinlock */ @@ -83,10 +45,12 @@ static const struct nla_policy police_policy[TCA_POLICE_MAX + 1] = { static int tcf_police_init(struct net *net, struct nlattr *nla, struct nlattr *est, struct tc_action **a, int ovr, int bind, bool rtnl_held, + struct tcf_proto *tp, struct netlink_ext_ack *extack) { int ret = 0, tcfp_result = TC_ACT_OK, err, size; struct nlattr *tb[TCA_POLICE_MAX + 1]; + struct tcf_chain *goto_ch = NULL; struct tc_police *parm; struct tcf_police *police; struct qdisc_rate_table *R_tab = NULL, *P_tab = NULL; @@ -97,7 +61,8 @@ static int tcf_police_init(struct net *net, struct nlattr *nla, if (nla == NULL) return -EINVAL; - err = nla_parse_nested(tb, TCA_POLICE_MAX, nla, police_policy, NULL); + err = nla_parse_nested_deprecated(tb, TCA_POLICE_MAX, nla, + police_policy, NULL); if (err < 0) return err; @@ -128,6 +93,9 @@ static int tcf_police_init(struct net *net, struct nlattr *nla, tcf_idr_release(*a, bind); return -EEXIST; } + err = tcf_action_check_ctrlact(parm->action, tp, &goto_ch, extack); + if (err < 0) + goto release_idr; police = to_police(*a); if (parm->rate.rate) { @@ -213,12 +181,14 @@ static int tcf_police_init(struct net *net, struct nlattr *nla, if (new->peak_present) police->tcfp_ptoks = new->tcfp_mtu_ptoks; spin_unlock_bh(&police->tcfp_lock); - police->tcf_action = parm->action; + goto_ch = tcf_action_set_ctrlact(*a, parm->action, goto_ch); rcu_swap_protected(police->params, new, lockdep_is_held(&police->tcf_lock)); spin_unlock_bh(&police->tcf_lock); + if (goto_ch) + tcf_chain_put_by_act(goto_ch); if (new) kfree_rcu(new, rcu); @@ -229,6 +199,9 @@ static int tcf_police_init(struct net *net, struct nlattr *nla, failure: qdisc_put_rtab(P_tab); qdisc_put_rtab(R_tab); + if (goto_ch) + tcf_chain_put_by_act(goto_ch); +release_idr: tcf_idr_release(*a, bind); return err; } @@ -305,6 +278,20 @@ static void tcf_police_cleanup(struct tc_action *a) kfree_rcu(p, rcu); } +static void tcf_police_stats_update(struct tc_action *a, + u64 bytes, u32 packets, + u64 lastuse, bool hw) +{ + struct tcf_police *police = to_police(a); + struct tcf_t *tm = &police->tcf_tm; + + _bstats_cpu_update(this_cpu_ptr(a->cpu_bstats), bytes, packets); + if (hw) + _bstats_cpu_update(this_cpu_ptr(a->cpu_bstats_hw), + bytes, packets); + tm->lastuse = max_t(u64, tm->lastuse, lastuse); +} + static int tcf_police_dump(struct sk_buff *skb, struct tc_action *a, int bind, int ref) { @@ -368,6 +355,7 @@ static struct tc_action_ops act_police_ops = { .kind = "police", .id = TCA_ID_POLICE, .owner = THIS_MODULE, + .stats_update = tcf_police_stats_update, .act = tcf_police_act, .dump = tcf_police_dump, .init = tcf_police_init, diff --git a/net/sched/act_sample.c b/net/sched/act_sample.c index 203e399e5c85..b2faa43c1ac7 100644 --- a/net/sched/act_sample.c +++ b/net/sched/act_sample.c @@ -22,6 +22,7 @@ #include <linux/tc_act/tc_sample.h> #include <net/tc_act/tc_sample.h> #include <net/psample.h> +#include <net/pkt_cls.h> #include <linux/if_arp.h> @@ -37,21 +38,23 @@ static const struct nla_policy sample_policy[TCA_SAMPLE_MAX + 1] = { static int tcf_sample_init(struct net *net, struct nlattr *nla, struct nlattr *est, struct tc_action **a, int ovr, - int bind, bool rtnl_held, + int bind, bool rtnl_held, struct tcf_proto *tp, struct netlink_ext_ack *extack) { struct tc_action_net *tn = net_generic(net, sample_net_id); struct nlattr *tb[TCA_SAMPLE_MAX + 1]; struct psample_group *psample_group; + struct tcf_chain *goto_ch = NULL; + u32 psample_group_num, rate; struct tc_sample *parm; - u32 psample_group_num; struct tcf_sample *s; bool exists = false; int ret, err; if (!nla) return -EINVAL; - ret = nla_parse_nested(tb, TCA_SAMPLE_MAX, nla, sample_policy, NULL); + ret = nla_parse_nested_deprecated(tb, TCA_SAMPLE_MAX, nla, + sample_policy, NULL); if (ret < 0) return ret; if (!tb[TCA_SAMPLE_PARMS] || !tb[TCA_SAMPLE_RATE] || @@ -79,19 +82,28 @@ static int tcf_sample_init(struct net *net, struct nlattr *nla, tcf_idr_release(*a, bind); return -EEXIST; } + err = tcf_action_check_ctrlact(parm->action, tp, &goto_ch, extack); + if (err < 0) + goto release_idr; + rate = nla_get_u32(tb[TCA_SAMPLE_RATE]); + if (!rate) { + NL_SET_ERR_MSG(extack, "invalid sample rate"); + err = -EINVAL; + goto put_chain; + } psample_group_num = nla_get_u32(tb[TCA_SAMPLE_PSAMPLE_GROUP]); psample_group = psample_group_get(net, psample_group_num); if (!psample_group) { - tcf_idr_release(*a, bind); - return -ENOMEM; + err = -ENOMEM; + goto put_chain; } s = to_sample(*a); spin_lock_bh(&s->tcf_lock); - s->tcf_action = parm->action; - s->rate = nla_get_u32(tb[TCA_SAMPLE_RATE]); + goto_ch = tcf_action_set_ctrlact(*a, parm->action, goto_ch); + s->rate = rate; s->psample_group_num = psample_group_num; RCU_INIT_POINTER(s->psample_group, psample_group); @@ -100,10 +112,18 @@ static int tcf_sample_init(struct net *net, struct nlattr *nla, s->trunc_size = nla_get_u32(tb[TCA_SAMPLE_TRUNC_SIZE]); } spin_unlock_bh(&s->tcf_lock); + if (goto_ch) + tcf_chain_put_by_act(goto_ch); if (ret == ACT_P_CREATED) tcf_idr_insert(tn, *a); return ret; +put_chain: + if (goto_ch) + tcf_chain_put_by_act(goto_ch); +release_idr: + tcf_idr_release(*a, bind); + return err; } static void tcf_sample_cleanup(struct tc_action *a) diff --git a/net/sched/act_simple.c b/net/sched/act_simple.c index d54cb608dbaf..f28ddbabff76 100644 --- a/net/sched/act_simple.c +++ b/net/sched/act_simple.c @@ -1,13 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* * net/sched/act_simple.c Simple example of an action * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - * * Authors: Jamal Hadi Salim (2005-8) - * */ #include <linux/module.h> @@ -18,6 +13,7 @@ #include <linux/rtnetlink.h> #include <net/netlink.h> #include <net/pkt_sched.h> +#include <net/pkt_cls.h> #include <linux/tc_act/tc_defact.h> #include <net/tc_act/tc_defact.h> @@ -60,14 +56,26 @@ static int alloc_defdata(struct tcf_defact *d, const struct nlattr *defdata) return 0; } -static void reset_policy(struct tcf_defact *d, const struct nlattr *defdata, - struct tc_defact *p) +static int reset_policy(struct tc_action *a, const struct nlattr *defdata, + struct tc_defact *p, struct tcf_proto *tp, + struct netlink_ext_ack *extack) { + struct tcf_chain *goto_ch = NULL; + struct tcf_defact *d; + int err; + + err = tcf_action_check_ctrlact(p->action, tp, &goto_ch, extack); + if (err < 0) + return err; + d = to_defact(a); spin_lock_bh(&d->tcf_lock); - d->tcf_action = p->action; + goto_ch = tcf_action_set_ctrlact(a, p->action, goto_ch); memset(d->tcfd_defdata, 0, SIMP_MAX_DATA); nla_strlcpy(d->tcfd_defdata, defdata, SIMP_MAX_DATA); spin_unlock_bh(&d->tcf_lock); + if (goto_ch) + tcf_chain_put_by_act(goto_ch); + return 0; } static const struct nla_policy simple_policy[TCA_DEF_MAX + 1] = { @@ -78,10 +86,11 @@ static const struct nla_policy simple_policy[TCA_DEF_MAX + 1] = { static int tcf_simp_init(struct net *net, struct nlattr *nla, struct nlattr *est, struct tc_action **a, int ovr, int bind, bool rtnl_held, - struct netlink_ext_ack *extack) + struct tcf_proto *tp, struct netlink_ext_ack *extack) { struct tc_action_net *tn = net_generic(net, simp_net_id); struct nlattr *tb[TCA_DEF_MAX + 1]; + struct tcf_chain *goto_ch = NULL; struct tc_defact *parm; struct tcf_defact *d; bool exists = false; @@ -90,7 +99,8 @@ static int tcf_simp_init(struct net *net, struct nlattr *nla, if (nla == NULL) return -EINVAL; - err = nla_parse_nested(tb, TCA_DEF_MAX, nla, simple_policy, NULL); + err = nla_parse_nested_deprecated(tb, TCA_DEF_MAX, nla, simple_policy, + NULL); if (err < 0) return err; @@ -122,27 +132,37 @@ static int tcf_simp_init(struct net *net, struct nlattr *nla, } d = to_defact(*a); - ret = alloc_defdata(d, tb[TCA_DEF_DATA]); - if (ret < 0) { - tcf_idr_release(*a, bind); - return ret; - } - d->tcf_action = parm->action; + err = tcf_action_check_ctrlact(parm->action, tp, &goto_ch, + extack); + if (err < 0) + goto release_idr; + + err = alloc_defdata(d, tb[TCA_DEF_DATA]); + if (err < 0) + goto put_chain; + + tcf_action_set_ctrlact(*a, parm->action, goto_ch); ret = ACT_P_CREATED; } else { - d = to_defact(*a); - if (!ovr) { - tcf_idr_release(*a, bind); - return -EEXIST; + err = -EEXIST; + goto release_idr; } - reset_policy(d, tb[TCA_DEF_DATA], parm); + err = reset_policy(*a, tb[TCA_DEF_DATA], parm, tp, extack); + if (err) + goto release_idr; } if (ret == ACT_P_CREATED) tcf_idr_insert(tn, *a); return ret; +put_chain: + if (goto_ch) + tcf_chain_put_by_act(goto_ch); +release_idr: + tcf_idr_release(*a, bind); + return err; } static int tcf_simp_dump(struct sk_buff *skb, struct tc_action *a, diff --git a/net/sched/act_skbedit.c b/net/sched/act_skbedit.c index 65879500b688..215a06705cef 100644 --- a/net/sched/act_skbedit.c +++ b/net/sched/act_skbedit.c @@ -1,18 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2008, Intel Corporation. * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program; if not, see <http://www.gnu.org/licenses/>. - * * Author: Alexander Duyck <alexander.h.duyck@intel.com> */ @@ -26,6 +15,7 @@ #include <net/ip.h> #include <net/ipv6.h> #include <net/dsfield.h> +#include <net/pkt_cls.h> #include <linux/tc_act/tc_skbedit.h> #include <net/tc_act/tc_skbedit.h> @@ -96,11 +86,13 @@ static const struct nla_policy skbedit_policy[TCA_SKBEDIT_MAX + 1] = { static int tcf_skbedit_init(struct net *net, struct nlattr *nla, struct nlattr *est, struct tc_action **a, int ovr, int bind, bool rtnl_held, + struct tcf_proto *tp, struct netlink_ext_ack *extack) { struct tc_action_net *tn = net_generic(net, skbedit_net_id); struct tcf_skbedit_params *params_new; struct nlattr *tb[TCA_SKBEDIT_MAX + 1]; + struct tcf_chain *goto_ch = NULL; struct tc_skbedit *parm; struct tcf_skbedit *d; u32 flags = 0, *priority = NULL, *mark = NULL, *mask = NULL; @@ -111,7 +103,8 @@ static int tcf_skbedit_init(struct net *net, struct nlattr *nla, if (nla == NULL) return -EINVAL; - err = nla_parse_nested(tb, TCA_SKBEDIT_MAX, nla, skbedit_policy, NULL); + err = nla_parse_nested_deprecated(tb, TCA_SKBEDIT_MAX, nla, + skbedit_policy, NULL); if (err < 0) return err; @@ -186,11 +179,14 @@ static int tcf_skbedit_init(struct net *net, struct nlattr *nla, return -EEXIST; } } + err = tcf_action_check_ctrlact(parm->action, tp, &goto_ch, extack); + if (err < 0) + goto release_idr; params_new = kzalloc(sizeof(*params_new), GFP_KERNEL); if (unlikely(!params_new)) { - tcf_idr_release(*a, bind); - return -ENOMEM; + err = -ENOMEM; + goto put_chain; } params_new->flags = flags; @@ -208,16 +204,24 @@ static int tcf_skbedit_init(struct net *net, struct nlattr *nla, params_new->mask = *mask; spin_lock_bh(&d->tcf_lock); - d->tcf_action = parm->action; + goto_ch = tcf_action_set_ctrlact(*a, parm->action, goto_ch); rcu_swap_protected(d->params, params_new, lockdep_is_held(&d->tcf_lock)); spin_unlock_bh(&d->tcf_lock); if (params_new) kfree_rcu(params_new, rcu); + if (goto_ch) + tcf_chain_put_by_act(goto_ch); if (ret == ACT_P_CREATED) tcf_idr_insert(tn, *a); return ret; +put_chain: + if (goto_ch) + tcf_chain_put_by_act(goto_ch); +release_idr: + tcf_idr_release(*a, bind); + return err; } static int tcf_skbedit_dump(struct sk_buff *skb, struct tc_action *a, diff --git a/net/sched/act_skbmod.c b/net/sched/act_skbmod.c index 7bac1d78e7a3..4f07706eff07 100644 --- a/net/sched/act_skbmod.c +++ b/net/sched/act_skbmod.c @@ -1,12 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* * net/sched/act_skbmod.c skb data modifier * * Copyright (c) 2016 Jamal Hadi Salim <jhs@mojatatu.com> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. */ #include <linux/module.h> @@ -16,6 +12,7 @@ #include <linux/rtnetlink.h> #include <net/netlink.h> #include <net/pkt_sched.h> +#include <net/pkt_cls.h> #include <linux/tc_act/tc_skbmod.h> #include <net/tc_act/tc_skbmod.h> @@ -82,11 +79,13 @@ static const struct nla_policy skbmod_policy[TCA_SKBMOD_MAX + 1] = { static int tcf_skbmod_init(struct net *net, struct nlattr *nla, struct nlattr *est, struct tc_action **a, int ovr, int bind, bool rtnl_held, + struct tcf_proto *tp, struct netlink_ext_ack *extack) { struct tc_action_net *tn = net_generic(net, skbmod_net_id); struct nlattr *tb[TCA_SKBMOD_MAX + 1]; struct tcf_skbmod_params *p, *p_old; + struct tcf_chain *goto_ch = NULL; struct tc_skbmod *parm; struct tcf_skbmod *d; bool exists = false; @@ -99,7 +98,8 @@ static int tcf_skbmod_init(struct net *net, struct nlattr *nla, if (!nla) return -EINVAL; - err = nla_parse_nested(tb, TCA_SKBMOD_MAX, nla, skbmod_policy, NULL); + err = nla_parse_nested_deprecated(tb, TCA_SKBMOD_MAX, nla, + skbmod_policy, NULL); if (err < 0) return err; @@ -153,21 +153,24 @@ static int tcf_skbmod_init(struct net *net, struct nlattr *nla, tcf_idr_release(*a, bind); return -EEXIST; } + err = tcf_action_check_ctrlact(parm->action, tp, &goto_ch, extack); + if (err < 0) + goto release_idr; d = to_skbmod(*a); p = kzalloc(sizeof(struct tcf_skbmod_params), GFP_KERNEL); if (unlikely(!p)) { - tcf_idr_release(*a, bind); - return -ENOMEM; + err = -ENOMEM; + goto put_chain; } p->flags = lflags; - d->tcf_action = parm->action; if (ovr) spin_lock_bh(&d->tcf_lock); /* Protected by tcf_lock if overwriting existing action. */ + goto_ch = tcf_action_set_ctrlact(*a, parm->action, goto_ch); p_old = rcu_dereference_protected(d->skbmod_p, 1); if (lflags & SKBMOD_F_DMAC) @@ -183,10 +186,18 @@ static int tcf_skbmod_init(struct net *net, struct nlattr *nla, if (p_old) kfree_rcu(p_old, rcu); + if (goto_ch) + tcf_chain_put_by_act(goto_ch); if (ret == ACT_P_CREATED) tcf_idr_insert(tn, *a); return ret; +put_chain: + if (goto_ch) + tcf_chain_put_by_act(goto_ch); +release_idr: + tcf_idr_release(*a, bind); + return err; } static void tcf_skbmod_cleanup(struct tc_action *a) diff --git a/net/sched/act_tunnel_key.c b/net/sched/act_tunnel_key.c index 7c6591b991d5..10dffda1d5cc 100644 --- a/net/sched/act_tunnel_key.c +++ b/net/sched/act_tunnel_key.c @@ -1,11 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* * Copyright (c) 2016, Amir Vadai <amir@vadai.me> * Copyright (c) 2016, Mellanox Technologies. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. */ #include <linux/module.h> @@ -17,6 +13,7 @@ #include <net/netlink.h> #include <net/pkt_sched.h> #include <net/dst.h> +#include <net/pkt_cls.h> #include <linux/tc_act/tc_tunnel_key.h> #include <net/tc_act/tc_tunnel_key.h> @@ -75,8 +72,9 @@ tunnel_key_copy_geneve_opt(const struct nlattr *nla, void *dst, int dst_len, int err, data_len, opt_len; u8 *data; - err = nla_parse_nested(tb, TCA_TUNNEL_KEY_ENC_OPT_GENEVE_MAX, - nla, geneve_opt_policy, extack); + err = nla_parse_nested_deprecated(tb, + TCA_TUNNEL_KEY_ENC_OPT_GENEVE_MAX, + nla, geneve_opt_policy, extack); if (err < 0) return err; @@ -124,8 +122,8 @@ static int tunnel_key_copy_opts(const struct nlattr *nla, u8 *dst, int err, rem, opt_len, len = nla_len(nla), opts_len = 0; const struct nlattr *attr, *head = nla_data(nla); - err = nla_validate(head, len, TCA_TUNNEL_KEY_ENC_OPTS_MAX, - enc_opts_policy, extack); + err = nla_validate_deprecated(head, len, TCA_TUNNEL_KEY_ENC_OPTS_MAX, + enc_opts_policy, extack); if (err) return err; @@ -210,12 +208,14 @@ static void tunnel_key_release_params(struct tcf_tunnel_key_params *p) static int tunnel_key_init(struct net *net, struct nlattr *nla, struct nlattr *est, struct tc_action **a, int ovr, int bind, bool rtnl_held, + struct tcf_proto *tp, struct netlink_ext_ack *extack) { struct tc_action_net *tn = net_generic(net, tunnel_key_net_id); struct nlattr *tb[TCA_TUNNEL_KEY_MAX + 1]; struct tcf_tunnel_key_params *params_new; struct metadata_dst *metadata = NULL; + struct tcf_chain *goto_ch = NULL; struct tc_tunnel_key *parm; struct tcf_tunnel_key *t; bool exists = false; @@ -232,8 +232,8 @@ static int tunnel_key_init(struct net *net, struct nlattr *nla, return -EINVAL; } - err = nla_parse_nested(tb, TCA_TUNNEL_KEY_MAX, nla, tunnel_key_policy, - extack); + err = nla_parse_nested_deprecated(tb, TCA_TUNNEL_KEY_MAX, nla, + tunnel_key_policy, extack); if (err < 0) { NL_SET_ERR_MSG(extack, "Failed to parse nested tunnel key attributes"); return err; @@ -359,6 +359,12 @@ static int tunnel_key_init(struct net *net, struct nlattr *nla, goto release_tun_meta; } + err = tcf_action_check_ctrlact(parm->action, tp, &goto_ch, extack); + if (err < 0) { + ret = err; + exists = true; + goto release_tun_meta; + } t = to_tunnel_key(*a); params_new = kzalloc(sizeof(*params_new), GFP_KERNEL); @@ -366,23 +372,29 @@ static int tunnel_key_init(struct net *net, struct nlattr *nla, NL_SET_ERR_MSG(extack, "Cannot allocate tunnel key parameters"); ret = -ENOMEM; exists = true; - goto release_tun_meta; + goto put_chain; } params_new->tcft_action = parm->t_action; params_new->tcft_enc_metadata = metadata; spin_lock_bh(&t->tcf_lock); - t->tcf_action = parm->action; + goto_ch = tcf_action_set_ctrlact(*a, parm->action, goto_ch); rcu_swap_protected(t->params, params_new, lockdep_is_held(&t->tcf_lock)); spin_unlock_bh(&t->tcf_lock); tunnel_key_release_params(params_new); + if (goto_ch) + tcf_chain_put_by_act(goto_ch); if (ret == ACT_P_CREATED) tcf_idr_insert(tn, *a); return ret; +put_chain: + if (goto_ch) + tcf_chain_put_by_act(goto_ch); + release_tun_meta: if (metadata) dst_release(&metadata->dst); @@ -411,7 +423,7 @@ static int tunnel_key_geneve_opts_dump(struct sk_buff *skb, u8 *src = (u8 *)(info + 1); struct nlattr *start; - start = nla_nest_start(skb, TCA_TUNNEL_KEY_ENC_OPTS_GENEVE); + start = nla_nest_start_noflag(skb, TCA_TUNNEL_KEY_ENC_OPTS_GENEVE); if (!start) return -EMSGSIZE; @@ -445,7 +457,7 @@ static int tunnel_key_opts_dump(struct sk_buff *skb, if (!info->options_len) return 0; - start = nla_nest_start(skb, TCA_TUNNEL_KEY_ENC_OPTS); + start = nla_nest_start_noflag(skb, TCA_TUNNEL_KEY_ENC_OPTS); if (!start) return -EMSGSIZE; diff --git a/net/sched/act_vlan.c b/net/sched/act_vlan.c index ac0061599225..9269d350fb8a 100644 --- a/net/sched/act_vlan.c +++ b/net/sched/act_vlan.c @@ -1,10 +1,6 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* * Copyright (c) 2014 Jiri Pirko <jiri@resnulli.us> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. */ #include <linux/module.h> @@ -15,6 +11,7 @@ #include <linux/if_vlan.h> #include <net/netlink.h> #include <net/pkt_sched.h> +#include <net/pkt_cls.h> #include <linux/tc_act/tc_vlan.h> #include <net/tc_act/tc_vlan.h> @@ -105,10 +102,11 @@ static const struct nla_policy vlan_policy[TCA_VLAN_MAX + 1] = { static int tcf_vlan_init(struct net *net, struct nlattr *nla, struct nlattr *est, struct tc_action **a, int ovr, int bind, bool rtnl_held, - struct netlink_ext_ack *extack) + struct tcf_proto *tp, struct netlink_ext_ack *extack) { struct tc_action_net *tn = net_generic(net, vlan_net_id); struct nlattr *tb[TCA_VLAN_MAX + 1]; + struct tcf_chain *goto_ch = NULL; struct tcf_vlan_params *p; struct tc_vlan *parm; struct tcf_vlan *v; @@ -122,7 +120,8 @@ static int tcf_vlan_init(struct net *net, struct nlattr *nla, if (!nla) return -EINVAL; - err = nla_parse_nested(tb, TCA_VLAN_MAX, nla, vlan_policy, NULL); + err = nla_parse_nested_deprecated(tb, TCA_VLAN_MAX, nla, vlan_policy, + NULL); if (err < 0) return err; @@ -200,12 +199,16 @@ static int tcf_vlan_init(struct net *net, struct nlattr *nla, return -EEXIST; } + err = tcf_action_check_ctrlact(parm->action, tp, &goto_ch, extack); + if (err < 0) + goto release_idr; + v = to_vlan(*a); p = kzalloc(sizeof(*p), GFP_KERNEL); if (!p) { - tcf_idr_release(*a, bind); - return -ENOMEM; + err = -ENOMEM; + goto put_chain; } p->tcfv_action = action; @@ -214,16 +217,24 @@ static int tcf_vlan_init(struct net *net, struct nlattr *nla, p->tcfv_push_proto = push_proto; spin_lock_bh(&v->tcf_lock); - v->tcf_action = parm->action; + goto_ch = tcf_action_set_ctrlact(*a, parm->action, goto_ch); rcu_swap_protected(v->vlan_p, p, lockdep_is_held(&v->tcf_lock)); spin_unlock_bh(&v->tcf_lock); + if (goto_ch) + tcf_chain_put_by_act(goto_ch); if (p) kfree_rcu(p, rcu); if (ret == ACT_P_CREATED) tcf_idr_insert(tn, *a); return ret; +put_chain: + if (goto_ch) + tcf_chain_put_by_act(goto_ch); +release_idr: + tcf_idr_release(*a, bind); + return err; } static void tcf_vlan_cleanup(struct tc_action *a) diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index dc10525e90e7..ad36bbcc583e 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -1,17 +1,12 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* * net/sched/cls_api.c Packet classifier API. * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - * * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> * * Changes: * * Eduardo J. Blanco <ejbs@netlabs.com.uy> :990222: kmod support - * */ #include <linux/module.h> @@ -37,6 +32,8 @@ #include <net/tc_act/tc_tunnel_key.h> #include <net/tc_act/tc_csum.h> #include <net/tc_act/tc_gact.h> +#include <net/tc_act/tc_police.h> +#include <net/tc_act/tc_sample.h> #include <net/tc_act/tc_skbedit.h> extern const struct nla_policy rtm_tca_policy[TCA_MAX + 1]; @@ -367,7 +364,7 @@ static void tcf_chain_destroy(struct tcf_chain *chain, bool free_block) struct tcf_block *block = chain->block; mutex_destroy(&chain->filter_chain_lock); - kfree(chain); + kfree_rcu(chain, rcu); if (free_block) tcf_block_destroy(block); } @@ -2006,7 +2003,8 @@ static int tc_new_tfilter(struct sk_buff *skb, struct nlmsghdr *n, replay: tp_created = 0; - err = nlmsg_parse(n, sizeof(*t), tca, TCA_MAX, rtm_tca_policy, extack); + err = nlmsg_parse_deprecated(n, sizeof(*t), tca, TCA_MAX, + rtm_tca_policy, extack); if (err < 0) return err; @@ -2217,7 +2215,8 @@ static int tc_del_tfilter(struct sk_buff *skb, struct nlmsghdr *n, if (!netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN)) return -EPERM; - err = nlmsg_parse(n, sizeof(*t), tca, TCA_MAX, rtm_tca_policy, extack); + err = nlmsg_parse_deprecated(n, sizeof(*t), tca, TCA_MAX, + rtm_tca_policy, extack); if (err < 0) return err; @@ -2366,7 +2365,8 @@ static int tc_get_tfilter(struct sk_buff *skb, struct nlmsghdr *n, int err; bool rtnl_held = false; - err = nlmsg_parse(n, sizeof(*t), tca, TCA_MAX, rtm_tca_policy, extack); + err = nlmsg_parse_deprecated(n, sizeof(*t), tca, TCA_MAX, + rtm_tca_policy, extack); if (err < 0) return err; @@ -2558,8 +2558,8 @@ static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb) if (nlmsg_len(cb->nlh) < sizeof(*tcm)) return skb->len; - err = nlmsg_parse(cb->nlh, sizeof(*tcm), tca, TCA_MAX, NULL, - cb->extack); + err = nlmsg_parse_deprecated(cb->nlh, sizeof(*tcm), tca, TCA_MAX, + NULL, cb->extack); if (err) return err; @@ -2806,7 +2806,8 @@ static int tc_ctl_chain(struct sk_buff *skb, struct nlmsghdr *n, return -EPERM; replay: - err = nlmsg_parse(n, sizeof(*t), tca, TCA_MAX, rtm_tca_policy, extack); + err = nlmsg_parse_deprecated(n, sizeof(*t), tca, TCA_MAX, + rtm_tca_policy, extack); if (err < 0) return err; @@ -2937,8 +2938,8 @@ static int tc_dump_chain(struct sk_buff *skb, struct netlink_callback *cb) if (nlmsg_len(cb->nlh) < sizeof(*tcm)) return skb->len; - err = nlmsg_parse(cb->nlh, sizeof(*tcm), tca, TCA_MAX, rtm_tca_policy, - cb->extack); + err = nlmsg_parse_deprecated(cb->nlh, sizeof(*tcm), tca, TCA_MAX, + rtm_tca_policy, cb->extack); if (err) return err; @@ -3111,7 +3112,7 @@ int tcf_exts_dump(struct sk_buff *skb, struct tcf_exts *exts) * tc data even if iproute2 was newer - jhs */ if (exts->type != TCA_OLD_COMPAT) { - nest = nla_nest_start(skb, exts->action); + nest = nla_nest_start_noflag(skb, exts->action); if (nest == NULL) goto nla_put_failure; @@ -3120,7 +3121,7 @@ int tcf_exts_dump(struct sk_buff *skb, struct tcf_exts *exts) nla_nest_end(skb, nest); } else if (exts->police) { struct tc_action *act = tcf_exts_first_act(exts); - nest = nla_nest_start(skb, exts->police); + nest = nla_nest_start_noflag(skb, exts->police); if (nest == NULL || !act) goto nla_put_failure; if (tcf_action_dump_old(skb, act, 0, 0) < 0) @@ -3229,7 +3230,6 @@ int tc_setup_flow_action(struct flow_action *flow_action, entry->tunnel = tcf_tunnel_info(act); } else if (is_tcf_tunnel_release(act)) { entry->id = FLOW_ACTION_TUNNEL_DECAP; - entry->tunnel = tcf_tunnel_info(act); } else if (is_tcf_pedit(act)) { for (k = 0; k < tcf_pedit_nkeys(act); k++) { switch (tcf_pedit_cmd(act, k)) { @@ -3254,6 +3254,18 @@ int tc_setup_flow_action(struct flow_action *flow_action, } else if (is_tcf_skbedit_mark(act)) { entry->id = FLOW_ACTION_MARK; entry->mark = tcf_skbedit_mark(act); + } else if (is_tcf_sample(act)) { + entry->id = FLOW_ACTION_SAMPLE; + entry->sample.psample_group = + tcf_sample_psample_group(act); + entry->sample.trunc_size = tcf_sample_trunc_size(act); + entry->sample.truncate = tcf_sample_truncate(act); + entry->sample.rate = tcf_sample_rate(act); + } else if (is_tcf_police(act)) { + entry->id = FLOW_ACTION_POLICE; + entry->police.burst = tcf_police_tcfp_burst(act); + entry->police.rate_bytes_ps = + tcf_police_rate_bytes_ps(act); } else { goto err_out; } diff --git a/net/sched/cls_basic.c b/net/sched/cls_basic.c index 687b0af67878..4aafbe3d435c 100644 --- a/net/sched/cls_basic.c +++ b/net/sched/cls_basic.c @@ -1,11 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* * net/sched/cls_basic.c Basic Packet Classifier. * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - * * Authors: Thomas Graf <tgraf@suug.ch> */ @@ -185,8 +181,8 @@ static int basic_change(struct net *net, struct sk_buff *in_skb, if (tca[TCA_OPTIONS] == NULL) return -EINVAL; - err = nla_parse_nested(tb, TCA_BASIC_MAX, tca[TCA_OPTIONS], - basic_policy, NULL); + err = nla_parse_nested_deprecated(tb, TCA_BASIC_MAX, tca[TCA_OPTIONS], + basic_policy, NULL); if (err < 0) return err; @@ -288,7 +284,7 @@ static int basic_dump(struct net *net, struct tcf_proto *tp, void *fh, t->tcm_handle = f->handle; - nest = nla_nest_start(skb, TCA_OPTIONS); + nest = nla_nest_start_noflag(skb, TCA_OPTIONS); if (nest == NULL) goto nla_put_failure; diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c index b4ac58039cb1..27365ed3fe0b 100644 --- a/net/sched/cls_bpf.c +++ b/net/sched/cls_bpf.c @@ -157,8 +157,7 @@ static int cls_bpf_offload_cmd(struct tcf_proto *tp, struct cls_bpf_prog *prog, skip_sw = prog && tc_skip_sw(prog->gen_flags); obj = prog ?: oldprog; - tc_cls_common_offload_init(&cls_bpf.common, tp, obj->gen_flags, - extack); + tc_cls_common_offload_init(&cls_bpf.common, tp, obj->gen_flags, extack); cls_bpf.command = TC_CLSBPF_OFFLOAD; cls_bpf.exts = &obj->exts; cls_bpf.prog = prog ? prog->filter : NULL; @@ -468,8 +467,8 @@ static int cls_bpf_change(struct net *net, struct sk_buff *in_skb, if (tca[TCA_OPTIONS] == NULL) return -EINVAL; - ret = nla_parse_nested(tb, TCA_BPF_MAX, tca[TCA_OPTIONS], bpf_policy, - NULL); + ret = nla_parse_nested_deprecated(tb, TCA_BPF_MAX, tca[TCA_OPTIONS], + bpf_policy, NULL); if (ret < 0) return ret; @@ -591,7 +590,7 @@ static int cls_bpf_dump(struct net *net, struct tcf_proto *tp, void *fh, cls_bpf_offload_update_stats(tp, prog); - nest = nla_nest_start(skb, TCA_OPTIONS); + nest = nla_nest_start_noflag(skb, TCA_OPTIONS); if (nest == NULL) goto nla_put_failure; diff --git a/net/sched/cls_cgroup.c b/net/sched/cls_cgroup.c index 4c1567854f95..fb881144fa01 100644 --- a/net/sched/cls_cgroup.c +++ b/net/sched/cls_cgroup.c @@ -1,11 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* * net/sched/cls_cgroup.c Control Group Classifier * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - * * Authors: Thomas Graf <tgraf@suug.ch> */ @@ -32,6 +28,8 @@ static int cls_cgroup_classify(struct sk_buff *skb, const struct tcf_proto *tp, struct cls_cgroup_head *head = rcu_dereference_bh(tp->root); u32 classid = task_get_classid(skb); + if (unlikely(!head)) + return -1; if (!classid) return -1; if (!tcf_em_tree_match(skb, &head->ematches, NULL)) @@ -104,8 +102,9 @@ static int cls_cgroup_change(struct net *net, struct sk_buff *in_skb, goto errout; new->handle = handle; new->tp = tp; - err = nla_parse_nested(tb, TCA_CGROUP_MAX, tca[TCA_OPTIONS], - cgroup_policy, NULL); + err = nla_parse_nested_deprecated(tb, TCA_CGROUP_MAX, + tca[TCA_OPTIONS], cgroup_policy, + NULL); if (err < 0) goto errout; @@ -176,7 +175,7 @@ static int cls_cgroup_dump(struct net *net, struct tcf_proto *tp, void *fh, t->tcm_handle = head->handle; - nest = nla_nest_start(skb, TCA_OPTIONS); + nest = nla_nest_start_noflag(skb, TCA_OPTIONS); if (nest == NULL) goto nla_put_failure; diff --git a/net/sched/cls_flow.c b/net/sched/cls_flow.c index eece1ee26930..80ae7b9fa90a 100644 --- a/net/sched/cls_flow.c +++ b/net/sched/cls_flow.c @@ -1,12 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* * net/sched/cls_flow.c Generic flow classifier * * Copyright (c) 2007, 2008 Patrick McHardy <kaber@trash.net> - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version 2 - * of the License, or (at your option) any later version. */ #include <linux/kernel.h> @@ -408,7 +404,8 @@ static int flow_change(struct net *net, struct sk_buff *in_skb, if (opt == NULL) return -EINVAL; - err = nla_parse_nested(tb, TCA_FLOW_MAX, opt, flow_policy, NULL); + err = nla_parse_nested_deprecated(tb, TCA_FLOW_MAX, opt, flow_policy, + NULL); if (err < 0) return err; @@ -629,7 +626,7 @@ static int flow_dump(struct net *net, struct tcf_proto *tp, void *fh, t->tcm_handle = f->handle; - nest = nla_nest_start(skb, TCA_OPTIONS); + nest = nla_nest_start_noflag(skb, TCA_OPTIONS); if (nest == NULL) goto nla_put_failure; diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c index c04247b403ed..c388372df0e2 100644 --- a/net/sched/cls_flower.c +++ b/net/sched/cls_flower.c @@ -1,12 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* * net/sched/cls_flower.c Flower classifier * * Copyright (c) 2015 Jiri Pirko <jiri@resnulli.us> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. */ #include <linux/kernel.h> @@ -14,6 +10,7 @@ #include <linux/module.h> #include <linux/rhashtable.h> #include <linux/workqueue.h> +#include <linux/refcount.h> #include <linux/if_ether.h> #include <linux/in6.h> @@ -75,6 +72,7 @@ struct fl_flow_mask { struct list_head filters; struct rcu_work rwork; struct list_head list; + refcount_t refcnt; }; struct fl_flow_tmplt { @@ -86,7 +84,9 @@ struct fl_flow_tmplt { struct cls_fl_head { struct rhashtable ht; + spinlock_t masks_lock; /* Protect masks list */ struct list_head masks; + struct list_head hw_filters; struct rcu_work rwork; struct idr handle_idr; }; @@ -99,11 +99,18 @@ struct cls_fl_filter { struct tcf_result res; struct fl_flow_key key; struct list_head list; + struct list_head hw_list; u32 handle; u32 flags; u32 in_hw_count; struct rcu_work rwork; struct net_device *hw_dev; + /* Flower classifier is unlocked, which means that its reference counter + * can be changed concurrently without any kind of external + * synchronization. Use atomic reference counter to be concurrency-safe. + */ + refcount_t refcnt; + bool deleted; }; static const struct rhashtable_params mask_ht_params = { @@ -304,7 +311,9 @@ static int fl_init(struct tcf_proto *tp) if (!head) return -ENOBUFS; + spin_lock_init(&head->masks_lock); INIT_LIST_HEAD_RCU(&head->masks); + INIT_LIST_HEAD(&head->hw_filters); rcu_assign_pointer(tp->root, head); idr_init(&head->handle_idr); @@ -313,6 +322,7 @@ static int fl_init(struct tcf_proto *tp) static void fl_mask_free(struct fl_flow_mask *mask) { + WARN_ON(!list_empty(&mask->filters)); rhashtable_destroy(&mask->ht); kfree(mask); } @@ -325,22 +335,32 @@ static void fl_mask_free_work(struct work_struct *work) fl_mask_free(mask); } -static bool fl_mask_put(struct cls_fl_head *head, struct fl_flow_mask *mask, - bool async) +static bool fl_mask_put(struct cls_fl_head *head, struct fl_flow_mask *mask) { - if (!list_empty(&mask->filters)) + if (!refcount_dec_and_test(&mask->refcnt)) return false; rhashtable_remove_fast(&head->ht, &mask->ht_node, mask_ht_params); + + spin_lock(&head->masks_lock); list_del_rcu(&mask->list); - if (async) - tcf_queue_work(&mask->rwork, fl_mask_free_work); - else - fl_mask_free(mask); + spin_unlock(&head->masks_lock); + + tcf_queue_work(&mask->rwork, fl_mask_free_work); return true; } +static struct cls_fl_head *fl_head_dereference(struct tcf_proto *tp) +{ + /* Flower classifier only changes root pointer during init and destroy. + * Users must obtain reference to tcf_proto instance before calling its + * API, so tp->root pointer is protected from concurrent call to + * fl_destroy() by reference counting. + */ + return rcu_dereference_raw(tp->root); +} + static void __fl_destroy_filter(struct cls_fl_filter *f) { tcf_exts_destroy(&f->exts); @@ -353,37 +373,50 @@ static void fl_destroy_filter_work(struct work_struct *work) struct cls_fl_filter *f = container_of(to_rcu_work(work), struct cls_fl_filter, rwork); - rtnl_lock(); __fl_destroy_filter(f); - rtnl_unlock(); } static void fl_hw_destroy_filter(struct tcf_proto *tp, struct cls_fl_filter *f, - struct netlink_ext_ack *extack) + bool rtnl_held, struct netlink_ext_ack *extack) { struct tc_cls_flower_offload cls_flower = {}; struct tcf_block *block = tp->chain->block; + if (!rtnl_held) + rtnl_lock(); + tc_cls_common_offload_init(&cls_flower.common, tp, f->flags, extack); cls_flower.command = TC_CLSFLOWER_DESTROY; cls_flower.cookie = (unsigned long) f; tc_setup_cb_call(block, TC_SETUP_CLSFLOWER, &cls_flower, false); + spin_lock(&tp->lock); + list_del_init(&f->hw_list); tcf_block_offload_dec(block, &f->flags); + spin_unlock(&tp->lock); + + if (!rtnl_held) + rtnl_unlock(); } static int fl_hw_replace_filter(struct tcf_proto *tp, - struct cls_fl_filter *f, + struct cls_fl_filter *f, bool rtnl_held, struct netlink_ext_ack *extack) { + struct cls_fl_head *head = fl_head_dereference(tp); struct tc_cls_flower_offload cls_flower = {}; struct tcf_block *block = tp->chain->block; bool skip_sw = tc_skip_sw(f->flags); - int err; + int err = 0; + + if (!rtnl_held) + rtnl_lock(); cls_flower.rule = flow_rule_alloc(tcf_exts_num_actions(&f->exts)); - if (!cls_flower.rule) - return -ENOMEM; + if (!cls_flower.rule) { + err = -ENOMEM; + goto errout; + } tc_cls_common_offload_init(&cls_flower.common, tp, f->flags, extack); cls_flower.command = TC_CLSFLOWER_REPLACE; @@ -396,35 +429,51 @@ static int fl_hw_replace_filter(struct tcf_proto *tp, err = tc_setup_flow_action(&cls_flower.rule->action, &f->exts); if (err) { kfree(cls_flower.rule); - if (skip_sw) { + if (skip_sw) NL_SET_ERR_MSG_MOD(extack, "Failed to setup flow action"); - return err; - } - return 0; + else + err = 0; + goto errout; } err = tc_setup_cb_call(block, TC_SETUP_CLSFLOWER, &cls_flower, skip_sw); kfree(cls_flower.rule); if (err < 0) { - fl_hw_destroy_filter(tp, f, NULL); - return err; + fl_hw_destroy_filter(tp, f, true, NULL); + goto errout; } else if (err > 0) { f->in_hw_count = err; + err = 0; + spin_lock(&tp->lock); tcf_block_offload_inc(block, &f->flags); + spin_unlock(&tp->lock); } - if (skip_sw && !(f->flags & TCA_CLS_FLAGS_IN_HW)) - return -EINVAL; + if (skip_sw && !(f->flags & TCA_CLS_FLAGS_IN_HW)) { + err = -EINVAL; + goto errout; + } - return 0; + spin_lock(&tp->lock); + list_add(&f->hw_list, &head->hw_filters); + spin_unlock(&tp->lock); +errout: + if (!rtnl_held) + rtnl_unlock(); + + return err; } -static void fl_hw_update_stats(struct tcf_proto *tp, struct cls_fl_filter *f) +static void fl_hw_update_stats(struct tcf_proto *tp, struct cls_fl_filter *f, + bool rtnl_held) { struct tc_cls_flower_offload cls_flower = {}; struct tcf_block *block = tp->chain->block; + if (!rtnl_held) + rtnl_lock(); + tc_cls_common_offload_init(&cls_flower.common, tp, f->flags, NULL); cls_flower.command = TC_CLSFLOWER_STATS; cls_flower.cookie = (unsigned long) f; @@ -435,27 +484,81 @@ static void fl_hw_update_stats(struct tcf_proto *tp, struct cls_fl_filter *f) tcf_exts_stats_update(&f->exts, cls_flower.stats.bytes, cls_flower.stats.pkts, cls_flower.stats.lastused); + + if (!rtnl_held) + rtnl_unlock(); } -static bool __fl_delete(struct tcf_proto *tp, struct cls_fl_filter *f, - struct netlink_ext_ack *extack) +static void __fl_put(struct cls_fl_filter *f) { - struct cls_fl_head *head = rtnl_dereference(tp->root); - bool async = tcf_exts_get_net(&f->exts); - bool last; + if (!refcount_dec_and_test(&f->refcnt)) + return; + + if (tcf_exts_get_net(&f->exts)) + tcf_queue_work(&f->rwork, fl_destroy_filter_work); + else + __fl_destroy_filter(f); +} + +static struct cls_fl_filter *__fl_get(struct cls_fl_head *head, u32 handle) +{ + struct cls_fl_filter *f; + rcu_read_lock(); + f = idr_find(&head->handle_idr, handle); + if (f && !refcount_inc_not_zero(&f->refcnt)) + f = NULL; + rcu_read_unlock(); + + return f; +} + +static struct cls_fl_filter *fl_get_next_filter(struct tcf_proto *tp, + unsigned long *handle) +{ + struct cls_fl_head *head = fl_head_dereference(tp); + struct cls_fl_filter *f; + + rcu_read_lock(); + while ((f = idr_get_next_ul(&head->handle_idr, handle))) { + /* don't return filters that are being deleted */ + if (refcount_inc_not_zero(&f->refcnt)) + break; + ++(*handle); + } + rcu_read_unlock(); + + return f; +} + +static int __fl_delete(struct tcf_proto *tp, struct cls_fl_filter *f, + bool *last, bool rtnl_held, + struct netlink_ext_ack *extack) +{ + struct cls_fl_head *head = fl_head_dereference(tp); + + *last = false; + + spin_lock(&tp->lock); + if (f->deleted) { + spin_unlock(&tp->lock); + return -ENOENT; + } + + f->deleted = true; + rhashtable_remove_fast(&f->mask->ht, &f->ht_node, + f->mask->filter_ht_params); idr_remove(&head->handle_idr, f->handle); list_del_rcu(&f->list); - last = fl_mask_put(head, f->mask, async); + spin_unlock(&tp->lock); + + *last = fl_mask_put(head, f->mask); if (!tc_skip_hw(f->flags)) - fl_hw_destroy_filter(tp, f, extack); + fl_hw_destroy_filter(tp, f, rtnl_held, extack); tcf_unbind_filter(tp, &f->res); - if (async) - tcf_queue_work(&f->rwork, fl_destroy_filter_work); - else - __fl_destroy_filter(f); + __fl_put(f); - return last; + return 0; } static void fl_destroy_sleepable(struct work_struct *work) @@ -472,13 +575,15 @@ static void fl_destroy_sleepable(struct work_struct *work) static void fl_destroy(struct tcf_proto *tp, bool rtnl_held, struct netlink_ext_ack *extack) { - struct cls_fl_head *head = rtnl_dereference(tp->root); + struct cls_fl_head *head = fl_head_dereference(tp); struct fl_flow_mask *mask, *next_mask; struct cls_fl_filter *f, *next; + bool last; list_for_each_entry_safe(mask, next_mask, &head->masks, list) { list_for_each_entry_safe(f, next, &mask->filters, list) { - if (__fl_delete(tp, f, extack)) + __fl_delete(tp, f, &last, rtnl_held, extack); + if (last) break; } } @@ -488,11 +593,18 @@ static void fl_destroy(struct tcf_proto *tp, bool rtnl_held, tcf_queue_work(&head->rwork, fl_destroy_sleepable); } +static void fl_put(struct tcf_proto *tp, void *arg) +{ + struct cls_fl_filter *f = arg; + + __fl_put(f); +} + static void *fl_get(struct tcf_proto *tp, u32 handle) { - struct cls_fl_head *head = rtnl_dereference(tp->root); + struct cls_fl_head *head = fl_head_dereference(tp); - return idr_find(&head->handle_idr, handle); + return __fl_get(head, handle); } static const struct nla_policy fl_policy[TCA_FLOWER_MAX + 1] = { @@ -768,8 +880,9 @@ static int fl_set_geneve_opt(const struct nlattr *nla, struct fl_flow_key *key, return -EINVAL; } - err = nla_parse_nested(tb, TCA_FLOWER_KEY_ENC_OPT_GENEVE_MAX, - nla, geneve_opt_policy, extack); + err = nla_parse_nested_deprecated(tb, + TCA_FLOWER_KEY_ENC_OPT_GENEVE_MAX, + nla, geneve_opt_policy, extack); if (err < 0) return err; @@ -831,18 +944,18 @@ static int fl_set_enc_opt(struct nlattr **tb, struct fl_flow_key *key, const struct nlattr *nla_enc_key, *nla_opt_key, *nla_opt_msk = NULL; int err, option_len, key_depth, msk_depth = 0; - err = nla_validate_nested(tb[TCA_FLOWER_KEY_ENC_OPTS], - TCA_FLOWER_KEY_ENC_OPTS_MAX, - enc_opts_policy, extack); + err = nla_validate_nested_deprecated(tb[TCA_FLOWER_KEY_ENC_OPTS], + TCA_FLOWER_KEY_ENC_OPTS_MAX, + enc_opts_policy, extack); if (err) return err; nla_enc_key = nla_data(tb[TCA_FLOWER_KEY_ENC_OPTS]); if (tb[TCA_FLOWER_KEY_ENC_OPTS_MASK]) { - err = nla_validate_nested(tb[TCA_FLOWER_KEY_ENC_OPTS_MASK], - TCA_FLOWER_KEY_ENC_OPTS_MAX, - enc_opts_policy, extack); + err = nla_validate_nested_deprecated(tb[TCA_FLOWER_KEY_ENC_OPTS_MASK], + TCA_FLOWER_KEY_ENC_OPTS_MAX, + enc_opts_policy, extack); if (err) return err; @@ -1227,12 +1340,18 @@ static struct fl_flow_mask *fl_create_new_mask(struct cls_fl_head *head, INIT_LIST_HEAD_RCU(&newmask->filters); - err = rhashtable_insert_fast(&head->ht, &newmask->ht_node, - mask_ht_params); + refcount_set(&newmask->refcnt, 1); + err = rhashtable_replace_fast(&head->ht, &mask->ht_node, + &newmask->ht_node, mask_ht_params); if (err) goto errout_destroy; + /* Wait until any potential concurrent users of mask are finished */ + synchronize_rcu(); + + spin_lock(&head->masks_lock); list_add_tail_rcu(&newmask->list, &head->masks); + spin_unlock(&head->masks_lock); return newmask; @@ -1250,41 +1369,77 @@ static int fl_check_assign_mask(struct cls_fl_head *head, struct fl_flow_mask *mask) { struct fl_flow_mask *newmask; + int ret = 0; + + rcu_read_lock(); - fnew->mask = rhashtable_lookup_fast(&head->ht, mask, mask_ht_params); + /* Insert mask as temporary node to prevent concurrent creation of mask + * with same key. Any concurrent lookups with same key will return + * -EAGAIN because mask's refcnt is zero. It is safe to insert + * stack-allocated 'mask' to masks hash table because we call + * synchronize_rcu() before returning from this function (either in case + * of error or after replacing it with heap-allocated mask in + * fl_create_new_mask()). + */ + fnew->mask = rhashtable_lookup_get_insert_fast(&head->ht, + &mask->ht_node, + mask_ht_params); if (!fnew->mask) { - if (fold) - return -EINVAL; + rcu_read_unlock(); + + if (fold) { + ret = -EINVAL; + goto errout_cleanup; + } newmask = fl_create_new_mask(head, mask); - if (IS_ERR(newmask)) - return PTR_ERR(newmask); + if (IS_ERR(newmask)) { + ret = PTR_ERR(newmask); + goto errout_cleanup; + } fnew->mask = newmask; + return 0; + } else if (IS_ERR(fnew->mask)) { + ret = PTR_ERR(fnew->mask); } else if (fold && fold->mask != fnew->mask) { - return -EINVAL; + ret = -EINVAL; + } else if (!refcount_inc_not_zero(&fnew->mask->refcnt)) { + /* Mask was deleted concurrently, try again */ + ret = -EAGAIN; } + rcu_read_unlock(); + return ret; - return 0; +errout_cleanup: + rhashtable_remove_fast(&head->ht, &mask->ht_node, + mask_ht_params); + /* Wait until any potential concurrent users of mask are finished */ + synchronize_rcu(); + return ret; } static int fl_set_parms(struct net *net, struct tcf_proto *tp, struct cls_fl_filter *f, struct fl_flow_mask *mask, unsigned long base, struct nlattr **tb, struct nlattr *est, bool ovr, - struct fl_flow_tmplt *tmplt, + struct fl_flow_tmplt *tmplt, bool rtnl_held, struct netlink_ext_ack *extack) { int err; - err = tcf_exts_validate(net, tp, tb, est, &f->exts, ovr, true, + err = tcf_exts_validate(net, tp, tb, est, &f->exts, ovr, rtnl_held, extack); if (err < 0) return err; if (tb[TCA_FLOWER_CLASSID]) { f->res.classid = nla_get_u32(tb[TCA_FLOWER_CLASSID]); + if (!rtnl_held) + rtnl_lock(); tcf_bind_filter(tp, &f->res, base); + if (!rtnl_held) + rtnl_unlock(); } err = fl_set_key(net, tb, &f->key, &mask->key, extack); @@ -1302,25 +1457,52 @@ static int fl_set_parms(struct net *net, struct tcf_proto *tp, return 0; } +static int fl_ht_insert_unique(struct cls_fl_filter *fnew, + struct cls_fl_filter *fold, + bool *in_ht) +{ + struct fl_flow_mask *mask = fnew->mask; + int err; + + err = rhashtable_lookup_insert_fast(&mask->ht, + &fnew->ht_node, + mask->filter_ht_params); + if (err) { + *in_ht = false; + /* It is okay if filter with same key exists when + * overwriting. + */ + return fold && err == -EEXIST ? 0 : err; + } + + *in_ht = true; + return 0; +} + static int fl_change(struct net *net, struct sk_buff *in_skb, struct tcf_proto *tp, unsigned long base, u32 handle, struct nlattr **tca, void **arg, bool ovr, bool rtnl_held, struct netlink_ext_ack *extack) { - struct cls_fl_head *head = rtnl_dereference(tp->root); + struct cls_fl_head *head = fl_head_dereference(tp); struct cls_fl_filter *fold = *arg; struct cls_fl_filter *fnew; struct fl_flow_mask *mask; struct nlattr **tb; + bool in_ht; int err; - if (!tca[TCA_OPTIONS]) - return -EINVAL; + if (!tca[TCA_OPTIONS]) { + err = -EINVAL; + goto errout_fold; + } mask = kzalloc(sizeof(struct fl_flow_mask), GFP_KERNEL); - if (!mask) - return -ENOBUFS; + if (!mask) { + err = -ENOBUFS; + goto errout_fold; + } tb = kcalloc(TCA_FLOWER_MAX + 1, sizeof(struct nlattr *), GFP_KERNEL); if (!tb) { @@ -1328,8 +1510,8 @@ static int fl_change(struct net *net, struct sk_buff *in_skb, goto errout_mask_alloc; } - err = nla_parse_nested(tb, TCA_FLOWER_MAX, tca[TCA_OPTIONS], - fl_policy, NULL); + err = nla_parse_nested_deprecated(tb, TCA_FLOWER_MAX, + tca[TCA_OPTIONS], fl_policy, NULL); if (err < 0) goto errout_tb; @@ -1343,6 +1525,8 @@ static int fl_change(struct net *net, struct sk_buff *in_skb, err = -ENOBUFS; goto errout_tb; } + INIT_LIST_HEAD(&fnew->hw_list); + refcount_set(&fnew->refcnt, 1); err = tcf_exts_init(&fnew->exts, net, TCA_FLOWER_ACT, 0); if (err < 0) @@ -1358,7 +1542,7 @@ static int fl_change(struct net *net, struct sk_buff *in_skb, } err = fl_set_parms(net, tp, fnew, mask, base, tb, tca[TCA_RATE], ovr, - tp->chain->tmplt_priv, extack); + tp->chain->tmplt_priv, rtnl_held, extack); if (err) goto errout; @@ -1366,169 +1550,247 @@ static int fl_change(struct net *net, struct sk_buff *in_skb, if (err) goto errout; - if (!handle) { - handle = 1; - err = idr_alloc_u32(&head->handle_idr, fnew, &handle, - INT_MAX, GFP_KERNEL); - } else if (!fold) { - /* user specifies a handle and it doesn't exist */ - err = idr_alloc_u32(&head->handle_idr, fnew, &handle, - handle, GFP_KERNEL); - } + err = fl_ht_insert_unique(fnew, fold, &in_ht); if (err) goto errout_mask; - fnew->handle = handle; - - if (!fold && __fl_lookup(fnew->mask, &fnew->mkey)) { - err = -EEXIST; - goto errout_idr; - } - - err = rhashtable_insert_fast(&fnew->mask->ht, &fnew->ht_node, - fnew->mask->filter_ht_params); - if (err) - goto errout_idr; if (!tc_skip_hw(fnew->flags)) { - err = fl_hw_replace_filter(tp, fnew, extack); + err = fl_hw_replace_filter(tp, fnew, rtnl_held, extack); if (err) - goto errout_mask_ht; + goto errout_ht; } if (!tc_in_hw(fnew->flags)) fnew->flags |= TCA_CLS_FLAGS_NOT_IN_HW; + spin_lock(&tp->lock); + + /* tp was deleted concurrently. -EAGAIN will cause caller to lookup + * proto again or create new one, if necessary. + */ + if (tp->deleting) { + err = -EAGAIN; + goto errout_hw; + } + if (fold) { + /* Fold filter was deleted concurrently. Retry lookup. */ + if (fold->deleted) { + err = -EAGAIN; + goto errout_hw; + } + + fnew->handle = handle; + + if (!in_ht) { + struct rhashtable_params params = + fnew->mask->filter_ht_params; + + err = rhashtable_insert_fast(&fnew->mask->ht, + &fnew->ht_node, + params); + if (err) + goto errout_hw; + in_ht = true; + } + + refcount_inc(&fnew->refcnt); rhashtable_remove_fast(&fold->mask->ht, &fold->ht_node, fold->mask->filter_ht_params); - if (!tc_skip_hw(fold->flags)) - fl_hw_destroy_filter(tp, fold, NULL); - } - - *arg = fnew; - - if (fold) { idr_replace(&head->handle_idr, fnew, fnew->handle); list_replace_rcu(&fold->list, &fnew->list); + fold->deleted = true; + + spin_unlock(&tp->lock); + + fl_mask_put(head, fold->mask); + if (!tc_skip_hw(fold->flags)) + fl_hw_destroy_filter(tp, fold, rtnl_held, NULL); tcf_unbind_filter(tp, &fold->res); - tcf_exts_get_net(&fold->exts); - tcf_queue_work(&fold->rwork, fl_destroy_filter_work); + /* Caller holds reference to fold, so refcnt is always > 0 + * after this. + */ + refcount_dec(&fold->refcnt); + __fl_put(fold); } else { + if (handle) { + /* user specifies a handle and it doesn't exist */ + err = idr_alloc_u32(&head->handle_idr, fnew, &handle, + handle, GFP_ATOMIC); + + /* Filter with specified handle was concurrently + * inserted after initial check in cls_api. This is not + * necessarily an error if NLM_F_EXCL is not set in + * message flags. Returning EAGAIN will cause cls_api to + * try to update concurrently inserted rule. + */ + if (err == -ENOSPC) + err = -EAGAIN; + } else { + handle = 1; + err = idr_alloc_u32(&head->handle_idr, fnew, &handle, + INT_MAX, GFP_ATOMIC); + } + if (err) + goto errout_hw; + + refcount_inc(&fnew->refcnt); + fnew->handle = handle; list_add_tail_rcu(&fnew->list, &fnew->mask->filters); + spin_unlock(&tp->lock); } + *arg = fnew; + kfree(tb); kfree(mask); return 0; -errout_mask_ht: - rhashtable_remove_fast(&fnew->mask->ht, &fnew->ht_node, - fnew->mask->filter_ht_params); - -errout_idr: - if (!fold) - idr_remove(&head->handle_idr, fnew->handle); - +errout_ht: + spin_lock(&tp->lock); +errout_hw: + fnew->deleted = true; + spin_unlock(&tp->lock); + if (!tc_skip_hw(fnew->flags)) + fl_hw_destroy_filter(tp, fnew, rtnl_held, NULL); + if (in_ht) + rhashtable_remove_fast(&fnew->mask->ht, &fnew->ht_node, + fnew->mask->filter_ht_params); errout_mask: - fl_mask_put(head, fnew->mask, false); - + fl_mask_put(head, fnew->mask); errout: - tcf_exts_destroy(&fnew->exts); - kfree(fnew); + __fl_put(fnew); errout_tb: kfree(tb); errout_mask_alloc: kfree(mask); +errout_fold: + if (fold) + __fl_put(fold); return err; } static int fl_delete(struct tcf_proto *tp, void *arg, bool *last, bool rtnl_held, struct netlink_ext_ack *extack) { - struct cls_fl_head *head = rtnl_dereference(tp->root); + struct cls_fl_head *head = fl_head_dereference(tp); struct cls_fl_filter *f = arg; + bool last_on_mask; + int err = 0; - rhashtable_remove_fast(&f->mask->ht, &f->ht_node, - f->mask->filter_ht_params); - __fl_delete(tp, f, extack); + err = __fl_delete(tp, f, &last_on_mask, rtnl_held, extack); *last = list_empty(&head->masks); - return 0; + __fl_put(f); + + return err; } static void fl_walk(struct tcf_proto *tp, struct tcf_walker *arg, bool rtnl_held) { - struct cls_fl_head *head = rtnl_dereference(tp->root); struct cls_fl_filter *f; arg->count = arg->skip; - while ((f = idr_get_next_ul(&head->handle_idr, - &arg->cookie)) != NULL) { + while ((f = fl_get_next_filter(tp, &arg->cookie)) != NULL) { if (arg->fn(tp, f, arg) < 0) { + __fl_put(f); arg->stop = 1; break; } - arg->cookie = f->handle + 1; + __fl_put(f); + arg->cookie++; arg->count++; } } +static struct cls_fl_filter * +fl_get_next_hw_filter(struct tcf_proto *tp, struct cls_fl_filter *f, bool add) +{ + struct cls_fl_head *head = fl_head_dereference(tp); + + spin_lock(&tp->lock); + if (list_empty(&head->hw_filters)) { + spin_unlock(&tp->lock); + return NULL; + } + + if (!f) + f = list_entry(&head->hw_filters, struct cls_fl_filter, + hw_list); + list_for_each_entry_continue(f, &head->hw_filters, hw_list) { + if (!(add && f->deleted) && refcount_inc_not_zero(&f->refcnt)) { + spin_unlock(&tp->lock); + return f; + } + } + + spin_unlock(&tp->lock); + return NULL; +} + static int fl_reoffload(struct tcf_proto *tp, bool add, tc_setup_cb_t *cb, void *cb_priv, struct netlink_ext_ack *extack) { - struct cls_fl_head *head = rtnl_dereference(tp->root); struct tc_cls_flower_offload cls_flower = {}; struct tcf_block *block = tp->chain->block; - struct fl_flow_mask *mask; - struct cls_fl_filter *f; + struct cls_fl_filter *f = NULL; int err; - list_for_each_entry(mask, &head->masks, list) { - list_for_each_entry(f, &mask->filters, list) { - if (tc_skip_hw(f->flags)) - continue; - - cls_flower.rule = - flow_rule_alloc(tcf_exts_num_actions(&f->exts)); - if (!cls_flower.rule) - return -ENOMEM; - - tc_cls_common_offload_init(&cls_flower.common, tp, - f->flags, extack); - cls_flower.command = add ? - TC_CLSFLOWER_REPLACE : TC_CLSFLOWER_DESTROY; - cls_flower.cookie = (unsigned long)f; - cls_flower.rule->match.dissector = &mask->dissector; - cls_flower.rule->match.mask = &mask->key; - cls_flower.rule->match.key = &f->mkey; - - err = tc_setup_flow_action(&cls_flower.rule->action, - &f->exts); - if (err) { - kfree(cls_flower.rule); - if (tc_skip_sw(f->flags)) { - NL_SET_ERR_MSG_MOD(extack, "Failed to setup flow action"); - return err; - } - continue; + /* hw_filters list can only be changed by hw offload functions after + * obtaining rtnl lock. Make sure it is not changed while reoffload is + * iterating it. + */ + ASSERT_RTNL(); + + while ((f = fl_get_next_hw_filter(tp, f, add))) { + cls_flower.rule = + flow_rule_alloc(tcf_exts_num_actions(&f->exts)); + if (!cls_flower.rule) { + __fl_put(f); + return -ENOMEM; + } + + tc_cls_common_offload_init(&cls_flower.common, tp, f->flags, + extack); + cls_flower.command = add ? + TC_CLSFLOWER_REPLACE : TC_CLSFLOWER_DESTROY; + cls_flower.cookie = (unsigned long)f; + cls_flower.rule->match.dissector = &f->mask->dissector; + cls_flower.rule->match.mask = &f->mask->key; + cls_flower.rule->match.key = &f->mkey; + + err = tc_setup_flow_action(&cls_flower.rule->action, &f->exts); + if (err) { + kfree(cls_flower.rule); + if (tc_skip_sw(f->flags)) { + NL_SET_ERR_MSG_MOD(extack, "Failed to setup flow action"); + __fl_put(f); + return err; } + goto next_flow; + } - cls_flower.classid = f->res.classid; + cls_flower.classid = f->res.classid; - err = cb(TC_SETUP_CLSFLOWER, &cls_flower, cb_priv); - kfree(cls_flower.rule); + err = cb(TC_SETUP_CLSFLOWER, &cls_flower, cb_priv); + kfree(cls_flower.rule); - if (err) { - if (add && tc_skip_sw(f->flags)) - return err; - continue; + if (err) { + if (add && tc_skip_sw(f->flags)) { + __fl_put(f); + return err; } - - tc_cls_offload_cnt_update(block, &f->in_hw_count, - &f->flags, add); + goto next_flow; } + + spin_lock(&tp->lock); + tc_cls_offload_cnt_update(block, &f->in_hw_count, &f->flags, + add); + spin_unlock(&tp->lock); +next_flow: + __fl_put(f); } return 0; @@ -1587,8 +1849,8 @@ static void *fl_tmplt_create(struct net *net, struct tcf_chain *chain, tb = kcalloc(TCA_FLOWER_MAX + 1, sizeof(struct nlattr *), GFP_KERNEL); if (!tb) return ERR_PTR(-ENOBUFS); - err = nla_parse_nested(tb, TCA_FLOWER_MAX, tca[TCA_OPTIONS], - fl_policy, NULL); + err = nla_parse_nested_deprecated(tb, TCA_FLOWER_MAX, + tca[TCA_OPTIONS], fl_policy, NULL); if (err) goto errout_tb; @@ -1786,7 +2048,7 @@ static int fl_dump_key_geneve_opt(struct sk_buff *skb, struct nlattr *nest; int opt_off = 0; - nest = nla_nest_start(skb, TCA_FLOWER_KEY_ENC_OPTS_GENEVE); + nest = nla_nest_start_noflag(skb, TCA_FLOWER_KEY_ENC_OPTS_GENEVE); if (!nest) goto nla_put_failure; @@ -1822,7 +2084,7 @@ static int fl_dump_key_options(struct sk_buff *skb, int enc_opt_type, if (!enc_opts->len) return 0; - nest = nla_nest_start(skb, enc_opt_type); + nest = nla_nest_start_noflag(skb, enc_opt_type); if (!nest) goto nla_put_failure; @@ -2061,31 +2323,37 @@ static int fl_dump(struct net *net, struct tcf_proto *tp, void *fh, struct cls_fl_filter *f = fh; struct nlattr *nest; struct fl_flow_key *key, *mask; + bool skip_hw; if (!f) return skb->len; t->tcm_handle = f->handle; - nest = nla_nest_start(skb, TCA_OPTIONS); + nest = nla_nest_start_noflag(skb, TCA_OPTIONS); if (!nest) goto nla_put_failure; + spin_lock(&tp->lock); + if (f->res.classid && nla_put_u32(skb, TCA_FLOWER_CLASSID, f->res.classid)) - goto nla_put_failure; + goto nla_put_failure_locked; key = &f->key; mask = &f->mask->key; + skip_hw = tc_skip_hw(f->flags); if (fl_dump_key(skb, net, key, mask)) - goto nla_put_failure; - - if (!tc_skip_hw(f->flags)) - fl_hw_update_stats(tp, f); + goto nla_put_failure_locked; if (f->flags && nla_put_u32(skb, TCA_FLOWER_FLAGS, f->flags)) - goto nla_put_failure; + goto nla_put_failure_locked; + + spin_unlock(&tp->lock); + + if (!skip_hw) + fl_hw_update_stats(tp, f, rtnl_held); if (nla_put_u32(skb, TCA_FLOWER_IN_HW_COUNT, f->in_hw_count)) goto nla_put_failure; @@ -2100,6 +2368,8 @@ static int fl_dump(struct net *net, struct tcf_proto *tp, void *fh, return skb->len; +nla_put_failure_locked: + spin_unlock(&tp->lock); nla_put_failure: nla_nest_cancel(skb, nest); return -1; @@ -2111,7 +2381,7 @@ static int fl_tmplt_dump(struct sk_buff *skb, struct net *net, void *tmplt_priv) struct fl_flow_key *key, *mask; struct nlattr *nest; - nest = nla_nest_start(skb, TCA_OPTIONS); + nest = nla_nest_start_noflag(skb, TCA_OPTIONS); if (!nest) goto nla_put_failure; @@ -2144,6 +2414,7 @@ static struct tcf_proto_ops cls_fl_ops __read_mostly = { .init = fl_init, .destroy = fl_destroy, .get = fl_get, + .put = fl_put, .change = fl_change, .delete = fl_delete, .walk = fl_walk, @@ -2154,6 +2425,7 @@ static struct tcf_proto_ops cls_fl_ops __read_mostly = { .tmplt_destroy = fl_tmplt_destroy, .tmplt_dump = fl_tmplt_dump, .owner = THIS_MODULE, + .flags = TCF_PROTO_OPS_DOIT_UNLOCKED, }; static int __init cls_fl_init(void) diff --git a/net/sched/cls_fw.c b/net/sched/cls_fw.c index ad036b00427d..4dab833f66cb 100644 --- a/net/sched/cls_fw.c +++ b/net/sched/cls_fw.c @@ -1,11 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* * net/sched/cls_fw.c Classifier mapping ipchains' fwmark to traffic class. * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - * * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> * * Changes: @@ -15,7 +11,6 @@ * * JHS: We should remove the CONFIG_NET_CLS_IND from here * eventually when the meta match extension is made available - * */ #include <linux/module.h> @@ -263,7 +258,8 @@ static int fw_change(struct net *net, struct sk_buff *in_skb, if (!opt) return handle ? -EINVAL : 0; /* Succeed if it is old method. */ - err = nla_parse_nested(tb, TCA_FW_MAX, opt, fw_policy, NULL); + err = nla_parse_nested_deprecated(tb, TCA_FW_MAX, opt, fw_policy, + NULL); if (err < 0) return err; @@ -402,7 +398,7 @@ static int fw_dump(struct net *net, struct tcf_proto *tp, void *fh, if (!f->res.classid && !tcf_exts_has_actions(&f->exts)) return skb->len; - nest = nla_nest_start(skb, TCA_OPTIONS); + nest = nla_nest_start_noflag(skb, TCA_OPTIONS); if (nest == NULL) goto nla_put_failure; diff --git a/net/sched/cls_matchall.c b/net/sched/cls_matchall.c index 459921bd3d87..38c0a9f0f296 100644 --- a/net/sched/cls_matchall.c +++ b/net/sched/cls_matchall.c @@ -1,12 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* * net/sched/cls_matchll.c Match-all classifier * * Copyright (c) 2016 Jiri Pirko <jiri@mellanox.com> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. */ #include <linux/kernel.h> @@ -32,6 +28,9 @@ static int mall_classify(struct sk_buff *skb, const struct tcf_proto *tp, { struct cls_mall_head *head = rcu_dereference_bh(tp->root); + if (unlikely(!head)) + return -1; + if (tc_skip_sw(head->flags)) return -1; @@ -89,12 +88,29 @@ static int mall_replace_hw_filter(struct tcf_proto *tp, bool skip_sw = tc_skip_sw(head->flags); int err; + cls_mall.rule = flow_rule_alloc(tcf_exts_num_actions(&head->exts)); + if (!cls_mall.rule) + return -ENOMEM; + tc_cls_common_offload_init(&cls_mall.common, tp, head->flags, extack); cls_mall.command = TC_CLSMATCHALL_REPLACE; - cls_mall.exts = &head->exts; cls_mall.cookie = cookie; + err = tc_setup_flow_action(&cls_mall.rule->action, &head->exts); + if (err) { + kfree(cls_mall.rule); + mall_destroy_hw_filter(tp, head, cookie, NULL); + if (skip_sw) + NL_SET_ERR_MSG_MOD(extack, "Failed to setup flow action"); + else + err = 0; + + return err; + } + err = tc_setup_cb_call(block, TC_SETUP_CLSMATCHALL, &cls_mall, skip_sw); + kfree(cls_mall.rule); + if (err < 0) { mall_destroy_hw_filter(tp, head, cookie, NULL); return err; @@ -130,6 +146,11 @@ static void mall_destroy(struct tcf_proto *tp, bool rtnl_held, static void *mall_get(struct tcf_proto *tp, u32 handle) { + struct cls_mall_head *head = rtnl_dereference(tp->root); + + if (head && head->handle == handle) + return head; + return NULL; } @@ -176,8 +197,8 @@ static int mall_change(struct net *net, struct sk_buff *in_skb, if (head) return -EEXIST; - err = nla_parse_nested(tb, TCA_MATCHALL_MAX, tca[TCA_OPTIONS], - mall_policy, NULL); + err = nla_parse_nested_deprecated(tb, TCA_MATCHALL_MAX, + tca[TCA_OPTIONS], mall_policy, NULL); if (err < 0) return err; @@ -267,13 +288,28 @@ static int mall_reoffload(struct tcf_proto *tp, bool add, tc_setup_cb_t *cb, if (tc_skip_hw(head->flags)) return 0; + cls_mall.rule = flow_rule_alloc(tcf_exts_num_actions(&head->exts)); + if (!cls_mall.rule) + return -ENOMEM; + tc_cls_common_offload_init(&cls_mall.common, tp, head->flags, extack); cls_mall.command = add ? TC_CLSMATCHALL_REPLACE : TC_CLSMATCHALL_DESTROY; - cls_mall.exts = &head->exts; cls_mall.cookie = (unsigned long)head; + err = tc_setup_flow_action(&cls_mall.rule->action, &head->exts); + if (err) { + kfree(cls_mall.rule); + if (add && tc_skip_sw(head->flags)) { + NL_SET_ERR_MSG_MOD(extack, "Failed to setup flow action"); + return err; + } + return 0; + } + err = cb(TC_SETUP_CLSMATCHALL, &cls_mall, cb_priv); + kfree(cls_mall.rule); + if (err) { if (add && tc_skip_sw(head->flags)) return err; @@ -285,6 +321,23 @@ static int mall_reoffload(struct tcf_proto *tp, bool add, tc_setup_cb_t *cb, return 0; } +static void mall_stats_hw_filter(struct tcf_proto *tp, + struct cls_mall_head *head, + unsigned long cookie) +{ + struct tc_cls_matchall_offload cls_mall = {}; + struct tcf_block *block = tp->chain->block; + + tc_cls_common_offload_init(&cls_mall.common, tp, head->flags, NULL); + cls_mall.command = TC_CLSMATCHALL_STATS; + cls_mall.cookie = cookie; + + tc_setup_cb_call(block, TC_SETUP_CLSMATCHALL, &cls_mall, false); + + tcf_exts_stats_update(&head->exts, cls_mall.stats.bytes, + cls_mall.stats.pkts, cls_mall.stats.lastused); +} + static int mall_dump(struct net *net, struct tcf_proto *tp, void *fh, struct sk_buff *skb, struct tcmsg *t, bool rtnl_held) { @@ -296,9 +349,12 @@ static int mall_dump(struct net *net, struct tcf_proto *tp, void *fh, if (!head) return skb->len; + if (!tc_skip_hw(head->flags)) + mall_stats_hw_filter(tp, head, (unsigned long)head); + t->tcm_handle = head->handle; - nest = nla_nest_start(skb, TCA_OPTIONS); + nest = nla_nest_start_noflag(skb, TCA_OPTIONS); if (!nest) goto nla_put_failure; diff --git a/net/sched/cls_route.c b/net/sched/cls_route.c index f006af23b64a..2d9e0b4484ea 100644 --- a/net/sched/cls_route.c +++ b/net/sched/cls_route.c @@ -1,11 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* * net/sched/cls_route.c ROUTE4 classifier. * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - * * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> */ @@ -484,7 +480,8 @@ static int route4_change(struct net *net, struct sk_buff *in_skb, if (opt == NULL) return handle ? -EINVAL : 0; - err = nla_parse_nested(tb, TCA_ROUTE4_MAX, opt, route4_policy, NULL); + err = nla_parse_nested_deprecated(tb, TCA_ROUTE4_MAX, opt, + route4_policy, NULL); if (err < 0) return err; @@ -607,7 +604,7 @@ static int route4_dump(struct net *net, struct tcf_proto *tp, void *fh, t->tcm_handle = f->handle; - nest = nla_nest_start(skb, TCA_OPTIONS); + nest = nla_nest_start_noflag(skb, TCA_OPTIONS); if (nest == NULL) goto nla_put_failure; diff --git a/net/sched/cls_rsvp.c b/net/sched/cls_rsvp.c index cbb5e0d600f3..de1c1d4da597 100644 --- a/net/sched/cls_rsvp.c +++ b/net/sched/cls_rsvp.c @@ -1,11 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* * net/sched/cls_rsvp.c Special RSVP packet classifier for IPv4. * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - * * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> */ diff --git a/net/sched/cls_rsvp.h b/net/sched/cls_rsvp.h index 0719a21d9c41..2f3c03b25d5d 100644 --- a/net/sched/cls_rsvp.h +++ b/net/sched/cls_rsvp.h @@ -1,11 +1,7 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ /* * net/sched/cls_rsvp.h Template file for RSVPv[46] classifiers. * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - * * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> */ @@ -497,7 +493,8 @@ static int rsvp_change(struct net *net, struct sk_buff *in_skb, if (opt == NULL) return handle ? -EINVAL : 0; - err = nla_parse_nested(tb, TCA_RSVP_MAX, opt, rsvp_policy, NULL); + err = nla_parse_nested_deprecated(tb, TCA_RSVP_MAX, opt, rsvp_policy, + NULL); if (err < 0) return err; @@ -706,7 +703,7 @@ static int rsvp_dump(struct net *net, struct tcf_proto *tp, void *fh, t->tcm_handle = f->handle; - nest = nla_nest_start(skb, TCA_OPTIONS); + nest = nla_nest_start_noflag(skb, TCA_OPTIONS); if (nest == NULL) goto nla_put_failure; diff --git a/net/sched/cls_rsvp6.c b/net/sched/cls_rsvp6.c index dd08aea2aee5..64078846000e 100644 --- a/net/sched/cls_rsvp6.c +++ b/net/sched/cls_rsvp6.c @@ -1,11 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* * net/sched/cls_rsvp6.c Special RSVP packet classifier for IPv6. * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - * * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> */ diff --git a/net/sched/cls_tcindex.c b/net/sched/cls_tcindex.c index 24e0a62a65cc..e573e5a5c794 100644 --- a/net/sched/cls_tcindex.c +++ b/net/sched/cls_tcindex.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * net/sched/cls_tcindex.c Packet classifier for skb->tc_index * @@ -510,7 +511,8 @@ tcindex_change(struct net *net, struct sk_buff *in_skb, if (!opt) return 0; - err = nla_parse_nested(tb, TCA_TCINDEX_MAX, opt, tcindex_policy, NULL); + err = nla_parse_nested_deprecated(tb, TCA_TCINDEX_MAX, opt, + tcindex_policy, NULL); if (err < 0) return err; @@ -601,7 +603,7 @@ static int tcindex_dump(struct net *net, struct tcf_proto *tp, void *fh, tp, fh, skb, t, p, r); pr_debug("p->perfect %p p->h %p\n", p->perfect, p->h); - nest = nla_nest_start(skb, TCA_OPTIONS); + nest = nla_nest_start_noflag(skb, TCA_OPTIONS); if (nest == NULL) goto nla_put_failure; diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c index 48e76a3acf8a..c7727de5e073 100644 --- a/net/sched/cls_u32.c +++ b/net/sched/cls_u32.c @@ -1,11 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* * net/sched/cls_u32.c Ugly (or Universal) 32bit key Packet Classifier. * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - * * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> * * The filters are packed to hash tables of key nodes @@ -847,7 +843,7 @@ static struct tc_u_knode *u32_init_knode(struct net *net, struct tcf_proto *tp, /* Similarly success statistics must be moved as pointers */ new->pcpu_success = n->pcpu_success; #endif - memcpy(&new->sel, s, sizeof(*s) + s->nkeys*sizeof(struct tc_u32_key)); + memcpy(&new->sel, s, struct_size(s, keys, s->nkeys)); if (tcf_exts_init(&new->exts, net, TCA_U32_ACT, TCA_U32_POLICE)) { kfree(new); @@ -884,7 +880,8 @@ static int u32_change(struct net *net, struct sk_buff *in_skb, } } - err = nla_parse_nested(tb, TCA_U32_MAX, opt, u32_policy, extack); + err = nla_parse_nested_deprecated(tb, TCA_U32_MAX, opt, u32_policy, + extack); if (err < 0) return err; @@ -1294,7 +1291,7 @@ static int u32_dump(struct net *net, struct tcf_proto *tp, void *fh, t->tcm_handle = n->handle; - nest = nla_nest_start(skb, TCA_OPTIONS); + nest = nla_nest_start_noflag(skb, TCA_OPTIONS); if (nest == NULL) goto nla_put_failure; diff --git a/net/sched/em_canid.c b/net/sched/em_canid.c index ddd883ca55b2..b9a94fdf9397 100644 --- a/net/sched/em_canid.c +++ b/net/sched/em_canid.c @@ -1,11 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* * em_canid.c Ematch rule to match CAN frames according to their CAN IDs * - * This program is free software; you can distribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - * * Idea: Oliver Hartkopp <oliver.hartkopp@volkswagen.de> * Copyright: (c) 2011 Czech Technical University in Prague * (c) 2011 Volkswagen Group Research diff --git a/net/sched/em_cmp.c b/net/sched/em_cmp.c index 1c8360a2752a..a4d09b1fb66a 100644 --- a/net/sched/em_cmp.c +++ b/net/sched/em_cmp.c @@ -1,11 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* * net/sched/em_cmp.c Simple packet data comparison ematch * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - * * Authors: Thomas Graf <tgraf@suug.ch> */ diff --git a/net/sched/em_ipt.c b/net/sched/em_ipt.c index a5f34e930eff..243fd22f2248 100644 --- a/net/sched/em_ipt.c +++ b/net/sched/em_ipt.c @@ -1,12 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* * net/sched/em_ipt.c IPtables matches Ematch * * (c) 2018 Eyal Birger <eyal.birger@gmail.com> - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. */ #include <linux/gfp.h> @@ -120,8 +116,8 @@ static int em_ipt_change(struct net *net, void *data, int data_len, struct xt_match *match; int mdata_len, ret; - ret = nla_parse(tb, TCA_EM_IPT_MAX, data, data_len, em_ipt_policy, - NULL); + ret = nla_parse_deprecated(tb, TCA_EM_IPT_MAX, data, data_len, + em_ipt_policy, NULL); if (ret < 0) return ret; diff --git a/net/sched/em_meta.c b/net/sched/em_meta.c index d6e97115500b..82bd14e7ac93 100644 --- a/net/sched/em_meta.c +++ b/net/sched/em_meta.c @@ -1,11 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* * net/sched/em_meta.c Metadata ematch * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - * * Authors: Thomas Graf <tgraf@suug.ch> * * ========================================================================== @@ -912,7 +908,8 @@ static int em_meta_change(struct net *net, void *data, int len, struct tcf_meta_hdr *hdr; struct meta_match *meta = NULL; - err = nla_parse(tb, TCA_EM_META_MAX, data, len, meta_policy, NULL); + err = nla_parse_deprecated(tb, TCA_EM_META_MAX, data, len, + meta_policy, NULL); if (err < 0) goto errout; diff --git a/net/sched/em_nbyte.c b/net/sched/em_nbyte.c index 07c10bac06a0..88c7ce42df7e 100644 --- a/net/sched/em_nbyte.c +++ b/net/sched/em_nbyte.c @@ -1,11 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* * net/sched/em_nbyte.c N-Byte ematch * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - * * Authors: Thomas Graf <tgraf@suug.ch> */ diff --git a/net/sched/em_text.c b/net/sched/em_text.c index 73e2ed576ceb..6f3c1fb2fb44 100644 --- a/net/sched/em_text.c +++ b/net/sched/em_text.c @@ -1,11 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* * net/sched/em_text.c Textsearch ematch * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - * * Authors: Thomas Graf <tgraf@suug.ch> */ diff --git a/net/sched/em_u32.c b/net/sched/em_u32.c index 797bdb88c010..71b070da0437 100644 --- a/net/sched/em_u32.c +++ b/net/sched/em_u32.c @@ -1,11 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* * net/sched/em_u32.c U32 Ematch * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - * * Authors: Thomas Graf <tgraf@suug.ch> * Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> * diff --git a/net/sched/ematch.c b/net/sched/ematch.c index 1331a4c2d8ff..8f2ad706784d 100644 --- a/net/sched/ematch.c +++ b/net/sched/ematch.c @@ -1,11 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* * net/sched/ematch.c Extended Match API * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - * * Authors: Thomas Graf <tgraf@suug.ch> * * ========================================================================== @@ -314,7 +310,8 @@ int tcf_em_tree_validate(struct tcf_proto *tp, struct nlattr *nla, if (!nla) return 0; - err = nla_parse_nested(tb, TCA_EMATCH_TREE_MAX, nla, em_policy, NULL); + err = nla_parse_nested_deprecated(tb, TCA_EMATCH_TREE_MAX, nla, + em_policy, NULL); if (err < 0) goto errout; @@ -440,14 +437,14 @@ int tcf_em_tree_dump(struct sk_buff *skb, struct tcf_ematch_tree *tree, int tlv) struct nlattr *top_start; struct nlattr *list_start; - top_start = nla_nest_start(skb, tlv); + top_start = nla_nest_start_noflag(skb, tlv); if (top_start == NULL) goto nla_put_failure; if (nla_put(skb, TCA_EMATCH_TREE_HDR, sizeof(tree->hdr), &tree->hdr)) goto nla_put_failure; - list_start = nla_nest_start(skb, TCA_EMATCH_TREE_LIST); + list_start = nla_nest_start_noflag(skb, TCA_EMATCH_TREE_LIST); if (list_start == NULL) goto nla_put_failure; diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index fb8f138b9776..04faee7ccbce 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -1,11 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* * net/sched/sch_api.c Packet scheduler API. * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - * * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> * * Fixes: @@ -479,7 +475,8 @@ static struct qdisc_size_table *qdisc_get_stab(struct nlattr *opt, u16 *tab = NULL; int err; - err = nla_parse_nested(tb, TCA_STAB_MAX, opt, stab_policy, extack); + err = nla_parse_nested_deprecated(tb, TCA_STAB_MAX, opt, stab_policy, + extack); if (err < 0) return ERR_PTR(err); if (!tb[TCA_STAB_BASE]) { @@ -542,7 +539,7 @@ static int qdisc_dump_stab(struct sk_buff *skb, struct qdisc_size_table *stab) { struct nlattr *nest; - nest = nla_nest_start(skb, TCA_STAB); + nest = nla_nest_start_noflag(skb, TCA_STAB); if (nest == NULL) goto nla_put_failure; if (nla_put(skb, TCA_STAB_BASE, sizeof(stab->szopts), &stab->szopts)) @@ -998,6 +995,19 @@ static void notify_and_destroy(struct net *net, struct sk_buff *skb, qdisc_put(old); } +static void qdisc_clear_nolock(struct Qdisc *sch) +{ + sch->flags &= ~TCQ_F_NOLOCK; + if (!(sch->flags & TCQ_F_CPUSTATS)) + return; + + free_percpu(sch->cpu_bstats); + free_percpu(sch->cpu_qstats); + sch->cpu_bstats = NULL; + sch->cpu_qstats = NULL; + sch->flags &= ~TCQ_F_CPUSTATS; +} + /* Graft qdisc "new" to class "classid" of qdisc "parent" or * to device "dev". * @@ -1076,7 +1086,7 @@ skip: /* Only support running class lockless if parent is lockless */ if (new && (new->flags & TCQ_F_NOLOCK) && parent && !(parent->flags & TCQ_F_NOLOCK)) - new->flags &= ~TCQ_F_NOLOCK; + qdisc_clear_nolock(new); if (!cops || !cops->graft) return -EOPNOTSUPP; @@ -1410,8 +1420,8 @@ static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, !netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN)) return -EPERM; - err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, rtm_tca_policy, - extack); + err = nlmsg_parse_deprecated(n, sizeof(*tcm), tca, TCA_MAX, + rtm_tca_policy, extack); if (err < 0) return err; @@ -1495,8 +1505,8 @@ static int tc_modify_qdisc(struct sk_buff *skb, struct nlmsghdr *n, replay: /* Reinit, just in case something touches this. */ - err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, rtm_tca_policy, - extack); + err = nlmsg_parse_deprecated(n, sizeof(*tcm), tca, TCA_MAX, + rtm_tca_policy, extack); if (err < 0) return err; @@ -1730,8 +1740,8 @@ static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb) idx = 0; ASSERT_RTNL(); - err = nlmsg_parse(nlh, sizeof(struct tcmsg), tca, TCA_MAX, - rtm_tca_policy, cb->extack); + err = nlmsg_parse_deprecated(nlh, sizeof(struct tcmsg), tca, TCA_MAX, + rtm_tca_policy, cb->extack); if (err < 0) return err; @@ -1959,8 +1969,8 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, !netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN)) return -EPERM; - err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, rtm_tca_policy, - extack); + err = nlmsg_parse_deprecated(n, sizeof(*tcm), tca, TCA_MAX, + rtm_tca_policy, extack); if (err < 0) return err; diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c index d714d3747bcb..f4f9b8cdbffb 100644 --- a/net/sched/sch_atm.c +++ b/net/sched/sch_atm.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-only /* net/sched/sch_atm.c - ATM VC selection "queueing discipline" */ /* Written 1998-2000 by Werner Almesberger, EPFL ICA */ @@ -223,7 +224,8 @@ static int atm_tc_change(struct Qdisc *sch, u32 classid, u32 parent, if (opt == NULL) return -EINVAL; - error = nla_parse_nested(tb, TCA_ATM_MAX, opt, atm_policy, NULL); + error = nla_parse_nested_deprecated(tb, TCA_ATM_MAX, opt, atm_policy, + NULL); if (error < 0) return error; @@ -609,7 +611,7 @@ static int atm_tc_dump_class(struct Qdisc *sch, unsigned long cl, tcm->tcm_handle = flow->common.classid; tcm->tcm_info = flow->q->handle; - nest = nla_nest_start(skb, TCA_OPTIONS); + nest = nla_nest_start_noflag(skb, TCA_OPTIONS); if (nest == NULL) goto nla_put_failure; diff --git a/net/sched/sch_blackhole.c b/net/sched/sch_blackhole.c index 9c4c2bb547d7..a7f7667ae984 100644 --- a/net/sched/sch_blackhole.c +++ b/net/sched/sch_blackhole.c @@ -1,11 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* * net/sched/sch_blackhole.c Black hole queue * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - * * Authors: Thomas Graf <tgraf@suug.ch> * * Note: Quantum tunneling is not supported. diff --git a/net/sched/sch_cake.c b/net/sched/sch_cake.c index 1d2a12132abc..53a80bc6b13a 100644 --- a/net/sched/sch_cake.c +++ b/net/sched/sch_cake.c @@ -211,6 +211,9 @@ struct cake_sched_data { u8 ack_filter; u8 atm_mode; + u32 fwmark_mask; + u16 fwmark_shft; + /* time_next = time_this + ((len * rate_ns) >> rate_shft) */ u16 rate_shft; ktime_t time_next_packet; @@ -258,8 +261,7 @@ enum { CAKE_FLAG_AUTORATE_INGRESS = BIT(1), CAKE_FLAG_INGRESS = BIT(2), CAKE_FLAG_WASH = BIT(3), - CAKE_FLAG_SPLIT_GSO = BIT(4), - CAKE_FLAG_FWMARK = BIT(5) + CAKE_FLAG_SPLIT_GSO = BIT(4) }; /* COBALT operates the Codel and BLUE algorithms in parallel, in order to @@ -1515,16 +1517,27 @@ static unsigned int cake_drop(struct Qdisc *sch, struct sk_buff **to_free) static u8 cake_handle_diffserv(struct sk_buff *skb, u16 wash) { + int wlen = skb_network_offset(skb); u8 dscp; - switch (skb->protocol) { + switch (tc_skb_protocol(skb)) { case htons(ETH_P_IP): + wlen += sizeof(struct iphdr); + if (!pskb_may_pull(skb, wlen) || + skb_try_make_writable(skb, wlen)) + return 0; + dscp = ipv4_get_dsfield(ip_hdr(skb)) >> 2; if (wash && dscp) ipv4_change_dsfield(ip_hdr(skb), INET_ECN_MASK, 0); return dscp; case htons(ETH_P_IPV6): + wlen += sizeof(struct ipv6hdr); + if (!pskb_may_pull(skb, wlen) || + skb_try_make_writable(skb, wlen)) + return 0; + dscp = ipv6_get_dsfield(ipv6_hdr(skb)) >> 2; if (wash && dscp) ipv6_change_dsfield(ipv6_hdr(skb), INET_ECN_MASK, 0); @@ -1543,7 +1556,7 @@ static struct cake_tin_data *cake_select_tin(struct Qdisc *sch, struct sk_buff *skb) { struct cake_sched_data *q = qdisc_priv(sch); - u32 tin; + u32 tin, mark; u8 dscp; /* Tin selection: Default to diffserv-based selection, allow overriding @@ -1551,14 +1564,13 @@ static struct cake_tin_data *cake_select_tin(struct Qdisc *sch, */ dscp = cake_handle_diffserv(skb, q->rate_flags & CAKE_FLAG_WASH); + mark = (skb->mark & q->fwmark_mask) >> q->fwmark_shft; if (q->tin_mode == CAKE_DIFFSERV_BESTEFFORT) tin = 0; - else if (q->rate_flags & CAKE_FLAG_FWMARK && /* use fw mark */ - skb->mark && - skb->mark <= q->tin_cnt) - tin = q->tin_order[skb->mark - 1]; + else if (mark && mark <= q->tin_cnt) + tin = q->tin_order[mark - 1]; else if (TC_H_MAJ(skb->priority) == sch->handle && TC_H_MIN(skb->priority) > 0 && @@ -2172,6 +2184,7 @@ static const struct nla_policy cake_policy[TCA_CAKE_MAX + 1] = { [TCA_CAKE_MPU] = { .type = NLA_U32 }, [TCA_CAKE_INGRESS] = { .type = NLA_U32 }, [TCA_CAKE_ACK_FILTER] = { .type = NLA_U32 }, + [TCA_CAKE_FWMARK] = { .type = NLA_U32 }, }; static void cake_set_rate(struct cake_tin_data *b, u64 rate, u32 mtu, @@ -2518,7 +2531,8 @@ static int cake_change(struct Qdisc *sch, struct nlattr *opt, if (!opt) return -EINVAL; - err = nla_parse_nested(tb, TCA_CAKE_MAX, opt, cake_policy, extack); + err = nla_parse_nested_deprecated(tb, TCA_CAKE_MAX, opt, cake_policy, + extack); if (err < 0) return err; @@ -2619,10 +2633,8 @@ static int cake_change(struct Qdisc *sch, struct nlattr *opt, } if (tb[TCA_CAKE_FWMARK]) { - if (!!nla_get_u32(tb[TCA_CAKE_FWMARK])) - q->rate_flags |= CAKE_FLAG_FWMARK; - else - q->rate_flags &= ~CAKE_FLAG_FWMARK; + q->fwmark_mask = nla_get_u32(tb[TCA_CAKE_FWMARK]); + q->fwmark_shft = q->fwmark_mask ? __ffs(q->fwmark_mask) : 0; } if (q->tins) { @@ -2724,7 +2736,7 @@ static int cake_dump(struct Qdisc *sch, struct sk_buff *skb) struct cake_sched_data *q = qdisc_priv(sch); struct nlattr *opts; - opts = nla_nest_start(skb, TCA_OPTIONS); + opts = nla_nest_start_noflag(skb, TCA_OPTIONS); if (!opts) goto nla_put_failure; @@ -2784,8 +2796,7 @@ static int cake_dump(struct Qdisc *sch, struct sk_buff *skb) !!(q->rate_flags & CAKE_FLAG_SPLIT_GSO))) goto nla_put_failure; - if (nla_put_u32(skb, TCA_CAKE_FWMARK, - !!(q->rate_flags & CAKE_FLAG_FWMARK))) + if (nla_put_u32(skb, TCA_CAKE_FWMARK, q->fwmark_mask)) goto nla_put_failure; return nla_nest_end(skb, opts); @@ -2796,7 +2807,7 @@ nla_put_failure: static int cake_dump_stats(struct Qdisc *sch, struct gnet_dump *d) { - struct nlattr *stats = nla_nest_start(d->skb, TCA_STATS_APP); + struct nlattr *stats = nla_nest_start_noflag(d->skb, TCA_STATS_APP); struct cake_sched_data *q = qdisc_priv(sch); struct nlattr *tstats, *ts; int i; @@ -2826,7 +2837,7 @@ static int cake_dump_stats(struct Qdisc *sch, struct gnet_dump *d) #undef PUT_STAT_U32 #undef PUT_STAT_U64 - tstats = nla_nest_start(d->skb, TCA_CAKE_STATS_TIN_STATS); + tstats = nla_nest_start_noflag(d->skb, TCA_CAKE_STATS_TIN_STATS); if (!tstats) goto nla_put_failure; @@ -2843,7 +2854,7 @@ static int cake_dump_stats(struct Qdisc *sch, struct gnet_dump *d) for (i = 0; i < q->tin_cnt; i++) { struct cake_tin_data *b = &q->tins[q->tin_order[i]]; - ts = nla_nest_start(d->skb, i + 1); + ts = nla_nest_start_noflag(d->skb, i + 1); if (!ts) goto nla_put_failure; @@ -2963,7 +2974,7 @@ static int cake_dump_class_stats(struct Qdisc *sch, unsigned long cl, if (flow) { ktime_t now = ktime_get(); - stats = nla_nest_start(d->skb, TCA_STATS_APP); + stats = nla_nest_start_noflag(d->skb, TCA_STATS_APP); if (!stats) return -1; diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c index 4dc05409e3fb..06c7a2da21bc 100644 --- a/net/sched/sch_cbq.c +++ b/net/sched/sch_cbq.c @@ -1,13 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* * net/sched/sch_cbq.c Class-Based Queueing discipline. * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - * * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> - * */ #include <linux/module.h> @@ -1149,7 +1144,8 @@ static int cbq_init(struct Qdisc *sch, struct nlattr *opt, return -EINVAL; } - err = nla_parse_nested(tb, TCA_CBQ_MAX, opt, cbq_policy, extack); + err = nla_parse_nested_deprecated(tb, TCA_CBQ_MAX, opt, cbq_policy, + extack); if (err < 0) return err; @@ -1305,7 +1301,7 @@ static int cbq_dump(struct Qdisc *sch, struct sk_buff *skb) struct cbq_sched_data *q = qdisc_priv(sch); struct nlattr *nest; - nest = nla_nest_start(skb, TCA_OPTIONS); + nest = nla_nest_start_noflag(skb, TCA_OPTIONS); if (nest == NULL) goto nla_put_failure; if (cbq_dump_attr(skb, &q->link) < 0) @@ -1340,7 +1336,7 @@ cbq_dump_class(struct Qdisc *sch, unsigned long arg, tcm->tcm_handle = cl->common.classid; tcm->tcm_info = cl->q->handle; - nest = nla_nest_start(skb, TCA_OPTIONS); + nest = nla_nest_start_noflag(skb, TCA_OPTIONS); if (nest == NULL) goto nla_put_failure; if (cbq_dump_attr(skb, cl) < 0) @@ -1358,9 +1354,11 @@ cbq_dump_class_stats(struct Qdisc *sch, unsigned long arg, { struct cbq_sched_data *q = qdisc_priv(sch); struct cbq_class *cl = (struct cbq_class *)arg; + __u32 qlen; cl->xstats.avgidle = cl->avgidle; cl->xstats.undertime = 0; + qdisc_qstats_qlen_backlog(cl->q, &qlen, &cl->qstats.backlog); if (cl->undertime != PSCHED_PASTPERFECT) cl->xstats.undertime = cl->undertime - q->now; @@ -1368,7 +1366,7 @@ cbq_dump_class_stats(struct Qdisc *sch, unsigned long arg, if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch), d, NULL, &cl->bstats) < 0 || gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 || - gnet_stats_copy_queue(d, NULL, &cl->qstats, cl->q->q.qlen) < 0) + gnet_stats_copy_queue(d, NULL, &cl->qstats, qlen) < 0) return -1; return gnet_stats_copy_app(d, &cl->xstats, sizeof(cl->xstats)); @@ -1471,7 +1469,8 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **t return -EINVAL; } - err = nla_parse_nested(tb, TCA_CBQ_MAX, opt, cbq_policy, extack); + err = nla_parse_nested_deprecated(tb, TCA_CBQ_MAX, opt, cbq_policy, + extack); if (err < 0) return err; @@ -1665,17 +1664,13 @@ static int cbq_delete(struct Qdisc *sch, unsigned long arg) { struct cbq_sched_data *q = qdisc_priv(sch); struct cbq_class *cl = (struct cbq_class *)arg; - unsigned int qlen, backlog; if (cl->filters || cl->children || cl == &q->link) return -EBUSY; sch_tree_lock(sch); - qlen = cl->q->q.qlen; - backlog = cl->q->qstats.backlog; - qdisc_reset(cl->q); - qdisc_tree_reduce_backlog(cl->q, qlen, backlog); + qdisc_purge_queue(cl->q); if (cl->next_alive) cbq_deactivate_class(cl); diff --git a/net/sched/sch_cbs.c b/net/sched/sch_cbs.c index c6a502933fe7..e16a3d37d2bc 100644 --- a/net/sched/sch_cbs.c +++ b/net/sched/sch_cbs.c @@ -1,13 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* * net/sched/sch_cbs.c Credit Based Shaper * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - * * Authors: Vinicius Costa Gomes <vinicius.gomes@intel.com> - * */ /* Credit Based Shaper (CBS) @@ -61,16 +56,20 @@ #include <linux/string.h> #include <linux/errno.h> #include <linux/skbuff.h> +#include <net/netevent.h> #include <net/netlink.h> #include <net/sch_generic.h> #include <net/pkt_sched.h> +static LIST_HEAD(cbs_list); +static DEFINE_SPINLOCK(cbs_list_lock); + #define BYTES_PER_KBIT (1000LL / 8) struct cbs_sched_data { bool offload; int queue; - s64 port_rate; /* in bytes/s */ + atomic64_t port_rate; /* in bytes/s */ s64 last; /* timestamp in ns */ s64 credits; /* in bytes */ s32 locredit; /* in bytes */ @@ -82,6 +81,7 @@ struct cbs_sched_data { struct sk_buff **to_free); struct sk_buff *(*dequeue)(struct Qdisc *sch); struct Qdisc *qdisc; + struct list_head cbs_list; }; static int cbs_child_enqueue(struct sk_buff *skb, struct Qdisc *sch, @@ -181,6 +181,11 @@ static struct sk_buff *cbs_dequeue_soft(struct Qdisc *sch) s64 credits; int len; + if (atomic64_read(&q->port_rate) == -1) { + WARN_ONCE(1, "cbs: dequeue() called with unknown port rate."); + return NULL; + } + if (q->credits < 0) { credits = timediff_to_credits(now - q->last, q->idleslope); @@ -207,7 +212,8 @@ static struct sk_buff *cbs_dequeue_soft(struct Qdisc *sch) /* As sendslope is a negative number, this will decrease the * amount of q->credits. */ - credits = credits_from_len(len, q->sendslope, q->port_rate); + credits = credits_from_len(len, q->sendslope, + atomic64_read(&q->port_rate)); credits += q->credits; q->credits = max_t(s64, credits, q->locredit); @@ -294,6 +300,50 @@ static int cbs_enable_offload(struct net_device *dev, struct cbs_sched_data *q, return 0; } +static void cbs_set_port_rate(struct net_device *dev, struct cbs_sched_data *q) +{ + struct ethtool_link_ksettings ecmd; + int port_rate = -1; + + if (!__ethtool_get_link_ksettings(dev, &ecmd) && + ecmd.base.speed != SPEED_UNKNOWN) + port_rate = ecmd.base.speed * 1000 * BYTES_PER_KBIT; + + atomic64_set(&q->port_rate, port_rate); + netdev_dbg(dev, "cbs: set %s's port_rate to: %lld, linkspeed: %d\n", + dev->name, (long long)atomic64_read(&q->port_rate), + ecmd.base.speed); +} + +static int cbs_dev_notifier(struct notifier_block *nb, unsigned long event, + void *ptr) +{ + struct net_device *dev = netdev_notifier_info_to_dev(ptr); + struct cbs_sched_data *q; + struct net_device *qdev; + bool found = false; + + ASSERT_RTNL(); + + if (event != NETDEV_UP && event != NETDEV_CHANGE) + return NOTIFY_DONE; + + spin_lock(&cbs_list_lock); + list_for_each_entry(q, &cbs_list, cbs_list) { + qdev = qdisc_dev(q->qdisc); + if (qdev == dev) { + found = true; + break; + } + } + spin_unlock(&cbs_list_lock); + + if (found) + cbs_set_port_rate(dev, q); + + return NOTIFY_DONE; +} + static int cbs_change(struct Qdisc *sch, struct nlattr *opt, struct netlink_ext_ack *extack) { @@ -303,7 +353,8 @@ static int cbs_change(struct Qdisc *sch, struct nlattr *opt, struct tc_cbs_qopt *qopt; int err; - err = nla_parse_nested(tb, TCA_CBS_MAX, opt, cbs_policy, extack); + err = nla_parse_nested_deprecated(tb, TCA_CBS_MAX, opt, cbs_policy, + extack); if (err < 0) return err; @@ -315,16 +366,7 @@ static int cbs_change(struct Qdisc *sch, struct nlattr *opt, qopt = nla_data(tb[TCA_CBS_PARMS]); if (!qopt->offload) { - struct ethtool_link_ksettings ecmd; - s64 link_speed; - - if (!__ethtool_get_link_ksettings(dev, &ecmd)) - link_speed = ecmd.base.speed; - else - link_speed = SPEED_1000; - - q->port_rate = link_speed * 1000 * BYTES_PER_KBIT; - + cbs_set_port_rate(dev, q); cbs_disable_offload(dev, q); } else { err = cbs_enable_offload(dev, q, qopt, extack); @@ -347,6 +389,7 @@ static int cbs_init(struct Qdisc *sch, struct nlattr *opt, { struct cbs_sched_data *q = qdisc_priv(sch); struct net_device *dev = qdisc_dev(sch); + int err; if (!opt) { NL_SET_ERR_MSG(extack, "Missing CBS qdisc options which are mandatory"); @@ -367,7 +410,17 @@ static int cbs_init(struct Qdisc *sch, struct nlattr *opt, qdisc_watchdog_init(&q->watchdog, sch); - return cbs_change(sch, opt, extack); + err = cbs_change(sch, opt, extack); + if (err) + return err; + + if (!q->offload) { + spin_lock(&cbs_list_lock); + list_add(&q->cbs_list, &cbs_list); + spin_unlock(&cbs_list_lock); + } + + return 0; } static void cbs_destroy(struct Qdisc *sch) @@ -375,8 +428,11 @@ static void cbs_destroy(struct Qdisc *sch) struct cbs_sched_data *q = qdisc_priv(sch); struct net_device *dev = qdisc_dev(sch); - qdisc_watchdog_cancel(&q->watchdog); + spin_lock(&cbs_list_lock); + list_del(&q->cbs_list); + spin_unlock(&cbs_list_lock); + qdisc_watchdog_cancel(&q->watchdog); cbs_disable_offload(dev, q); if (q->qdisc) @@ -389,7 +445,7 @@ static int cbs_dump(struct Qdisc *sch, struct sk_buff *skb) struct tc_cbs_qopt opt = { }; struct nlattr *nest; - nest = nla_nest_start(skb, TCA_OPTIONS); + nest = nla_nest_start_noflag(skb, TCA_OPTIONS); if (!nest) goto nla_put_failure; @@ -487,14 +543,24 @@ static struct Qdisc_ops cbs_qdisc_ops __read_mostly = { .owner = THIS_MODULE, }; +static struct notifier_block cbs_device_notifier = { + .notifier_call = cbs_dev_notifier, +}; + static int __init cbs_module_init(void) { + int err = register_netdevice_notifier(&cbs_device_notifier); + + if (err) + return err; + return register_qdisc(&cbs_qdisc_ops); } static void __exit cbs_module_exit(void) { unregister_qdisc(&cbs_qdisc_ops); + unregister_netdevice_notifier(&cbs_device_notifier); } module_init(cbs_module_init) module_exit(cbs_module_exit) diff --git a/net/sched/sch_choke.c b/net/sched/sch_choke.c index eafc0d17d174..370dbcf49e8b 100644 --- a/net/sched/sch_choke.c +++ b/net/sched/sch_choke.c @@ -358,7 +358,8 @@ static int choke_change(struct Qdisc *sch, struct nlattr *opt, if (opt == NULL) return -EINVAL; - err = nla_parse_nested(tb, TCA_CHOKE_MAX, opt, choke_policy, NULL); + err = nla_parse_nested_deprecated(tb, TCA_CHOKE_MAX, opt, + choke_policy, NULL); if (err < 0) return err; @@ -452,7 +453,7 @@ static int choke_dump(struct Qdisc *sch, struct sk_buff *skb) .Scell_log = q->parms.Scell_log, }; - opts = nla_nest_start(skb, TCA_OPTIONS); + opts = nla_nest_start_noflag(skb, TCA_OPTIONS); if (opts == NULL) goto nla_put_failure; diff --git a/net/sched/sch_codel.c b/net/sched/sch_codel.c index 17cd81f84b5d..25ef172c23df 100644 --- a/net/sched/sch_codel.c +++ b/net/sched/sch_codel.c @@ -141,7 +141,8 @@ static int codel_change(struct Qdisc *sch, struct nlattr *opt, if (!opt) return -EINVAL; - err = nla_parse_nested(tb, TCA_CODEL_MAX, opt, codel_policy, NULL); + err = nla_parse_nested_deprecated(tb, TCA_CODEL_MAX, opt, + codel_policy, NULL); if (err < 0) return err; @@ -217,7 +218,7 @@ static int codel_dump(struct Qdisc *sch, struct sk_buff *skb) struct codel_sched_data *q = qdisc_priv(sch); struct nlattr *opts; - opts = nla_nest_start(skb, TCA_OPTIONS); + opts = nla_nest_start_noflag(skb, TCA_OPTIONS); if (opts == NULL) goto nla_put_failure; diff --git a/net/sched/sch_drr.c b/net/sched/sch_drr.c index 09b800991065..ffcd6654c39d 100644 --- a/net/sched/sch_drr.c +++ b/net/sched/sch_drr.c @@ -50,15 +50,6 @@ static struct drr_class *drr_find_class(struct Qdisc *sch, u32 classid) return container_of(clc, struct drr_class, common); } -static void drr_purge_queue(struct drr_class *cl) -{ - unsigned int len = cl->qdisc->q.qlen; - unsigned int backlog = cl->qdisc->qstats.backlog; - - qdisc_reset(cl->qdisc); - qdisc_tree_reduce_backlog(cl->qdisc, len, backlog); -} - static const struct nla_policy drr_policy[TCA_DRR_MAX + 1] = { [TCA_DRR_QUANTUM] = { .type = NLA_U32 }, }; @@ -79,7 +70,8 @@ static int drr_change_class(struct Qdisc *sch, u32 classid, u32 parentid, return -EINVAL; } - err = nla_parse_nested(tb, TCA_DRR_MAX, opt, drr_policy, extack); + err = nla_parse_nested_deprecated(tb, TCA_DRR_MAX, opt, drr_policy, + extack); if (err < 0) return err; @@ -167,7 +159,7 @@ static int drr_delete_class(struct Qdisc *sch, unsigned long arg) sch_tree_lock(sch); - drr_purge_queue(cl); + qdisc_purge_queue(cl->qdisc); qdisc_class_hash_remove(&q->clhash, &cl->common); sch_tree_unlock(sch); @@ -253,7 +245,7 @@ static int drr_dump_class(struct Qdisc *sch, unsigned long arg, tcm->tcm_handle = cl->common.classid; tcm->tcm_info = cl->qdisc->handle; - nest = nla_nest_start(skb, TCA_OPTIONS); + nest = nla_nest_start_noflag(skb, TCA_OPTIONS); if (nest == NULL) goto nla_put_failure; if (nla_put_u32(skb, TCA_DRR_QUANTUM, cl->quantum)) @@ -269,7 +261,8 @@ static int drr_dump_class_stats(struct Qdisc *sch, unsigned long arg, struct gnet_dump *d) { struct drr_class *cl = (struct drr_class *)arg; - __u32 qlen = cl->qdisc->q.qlen; + __u32 qlen = qdisc_qlen_sum(cl->qdisc); + struct Qdisc *cl_q = cl->qdisc; struct tc_drr_stats xstats; memset(&xstats, 0, sizeof(xstats)); @@ -279,7 +272,7 @@ static int drr_dump_class_stats(struct Qdisc *sch, unsigned long arg, if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch), d, NULL, &cl->bstats) < 0 || gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 || - gnet_stats_copy_queue(d, NULL, &cl->qdisc->qstats, qlen) < 0) + gnet_stats_copy_queue(d, cl_q->cpu_qstats, &cl_q->qstats, qlen) < 0) return -1; return gnet_stats_copy_app(d, &xstats, sizeof(xstats)); diff --git a/net/sched/sch_dsmark.c b/net/sched/sch_dsmark.c index 42471464ded3..bad1cbe59a56 100644 --- a/net/sched/sch_dsmark.c +++ b/net/sched/sch_dsmark.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-only /* net/sched/sch_dsmark.c - Differentiated Services field marker */ /* Written 1998-2000 by Werner Almesberger, EPFL ICA */ @@ -132,7 +133,8 @@ static int dsmark_change(struct Qdisc *sch, u32 classid, u32 parent, if (!opt) goto errout; - err = nla_parse_nested(tb, TCA_DSMARK_MAX, opt, dsmark_policy, NULL); + err = nla_parse_nested_deprecated(tb, TCA_DSMARK_MAX, opt, + dsmark_policy, NULL); if (err < 0) goto errout; @@ -353,7 +355,8 @@ static int dsmark_init(struct Qdisc *sch, struct nlattr *opt, if (err) return err; - err = nla_parse_nested(tb, TCA_DSMARK_MAX, opt, dsmark_policy, NULL); + err = nla_parse_nested_deprecated(tb, TCA_DSMARK_MAX, opt, + dsmark_policy, NULL); if (err < 0) goto errout; @@ -432,7 +435,7 @@ static int dsmark_dump_class(struct Qdisc *sch, unsigned long cl, tcm->tcm_handle = TC_H_MAKE(TC_H_MAJ(sch->handle), cl - 1); tcm->tcm_info = p->q->handle; - opts = nla_nest_start(skb, TCA_OPTIONS); + opts = nla_nest_start_noflag(skb, TCA_OPTIONS); if (opts == NULL) goto nla_put_failure; if (nla_put_u8(skb, TCA_DSMARK_MASK, p->mv[cl - 1].mask) || @@ -451,7 +454,7 @@ static int dsmark_dump(struct Qdisc *sch, struct sk_buff *skb) struct dsmark_qdisc_data *p = qdisc_priv(sch); struct nlattr *opts = NULL; - opts = nla_nest_start(skb, TCA_OPTIONS); + opts = nla_nest_start_noflag(skb, TCA_OPTIONS); if (opts == NULL) goto nla_put_failure; if (nla_put_u16(skb, TCA_DSMARK_INDICES, p->indices)) diff --git a/net/sched/sch_etf.c b/net/sched/sch_etf.c index 1150f22983df..db0c2ba1d156 100644 --- a/net/sched/sch_etf.c +++ b/net/sched/sch_etf.c @@ -351,7 +351,8 @@ static int etf_init(struct Qdisc *sch, struct nlattr *opt, return -EINVAL; } - err = nla_parse_nested(tb, TCA_ETF_MAX, opt, etf_policy, extack); + err = nla_parse_nested_deprecated(tb, TCA_ETF_MAX, opt, etf_policy, + extack); if (err < 0) return err; @@ -460,7 +461,7 @@ static int etf_dump(struct Qdisc *sch, struct sk_buff *skb) struct tc_etf_qopt opt = { }; struct nlattr *nest; - nest = nla_nest_start(skb, TCA_OPTIONS); + nest = nla_nest_start_noflag(skb, TCA_OPTIONS); if (!nest) goto nla_put_failure; diff --git a/net/sched/sch_fifo.c b/net/sched/sch_fifo.c index 3809c9bf8896..37c8aa75d70c 100644 --- a/net/sched/sch_fifo.c +++ b/net/sched/sch_fifo.c @@ -1,11 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* * net/sched/sch_fifo.c The simplest FIFO queue. * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - * * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> */ diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c index 1a662f2bb7bb..98dd87ce1510 100644 --- a/net/sched/sch_fq.c +++ b/net/sched/sch_fq.c @@ -1,13 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* * net/sched/sch_fq.c Fair Queue Packet Scheduler (per flow pacing) * * Copyright (C) 2013-2015 Eric Dumazet <edumazet@google.com> * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - * * Meant to be mostly used for locally generated traffic : * Fast classification depends on skb->sk being set before reaching us. * If not, (router workload), we use rxhash as fallback, with 32 bits wide hash. @@ -54,10 +50,23 @@ #include <net/tcp_states.h> #include <net/tcp.h> +struct fq_skb_cb { + u64 time_to_send; +}; + +static inline struct fq_skb_cb *fq_skb_cb(struct sk_buff *skb) +{ + qdisc_cb_private_validate(skb, sizeof(struct fq_skb_cb)); + return (struct fq_skb_cb *)qdisc_skb_cb(skb)->data; +} + /* - * Per flow structure, dynamically allocated + * Per flow structure, dynamically allocated. + * If packets have monotically increasing time_to_send, they are placed in O(1) + * in linear list (head,tail), otherwise are placed in a rbtree (t_root). */ struct fq_flow { + struct rb_root t_root; struct sk_buff *head; /* list of skbs for this flow : first skb */ union { struct sk_buff *tail; /* last skb in the list */ @@ -257,6 +266,17 @@ static struct fq_flow *fq_classify(struct sk_buff *skb, struct fq_sched_data *q) */ sk = (struct sock *)((hash << 1) | 1UL); skb_orphan(skb); + } else if (sk->sk_state == TCP_CLOSE) { + unsigned long hash = skb_get_hash(skb) & q->orphan_mask; + /* + * Sockets in TCP_CLOSE are non connected. + * Typical use case is UDP sockets, they can send packets + * with sendto() to many different destinations. + * We probably could use a generic bit advertising + * non connected sockets, instead of sk_state == TCP_CLOSE, + * if we care enough. + */ + sk = (struct sock *)((hash << 1) | 1UL); } root = &q->fq_root[hash_ptr(sk, q->fq_trees_log)]; @@ -277,7 +297,7 @@ static struct fq_flow *fq_classify(struct sk_buff *skb, struct fq_sched_data *q) * It not, we need to refill credit with * initial quantum */ - if (unlikely(skb->sk && + if (unlikely(skb->sk == sk && f->socket_hash != sk->sk_hash)) { f->credit = q->initial_quantum; f->socket_hash = sk->sk_hash; @@ -298,9 +318,11 @@ static struct fq_flow *fq_classify(struct sk_buff *skb, struct fq_sched_data *q) q->stat_allocation_errors++; return &q->internal; } + /* f->t_root is already zeroed after kmem_cache_zalloc() */ + fq_flow_set_detached(f); f->sk = sk; - if (skb->sk) + if (skb->sk == sk) f->socket_hash = sk->sk_hash; f->credit = q->initial_quantum; @@ -312,14 +334,40 @@ static struct fq_flow *fq_classify(struct sk_buff *skb, struct fq_sched_data *q) return f; } +static struct sk_buff *fq_peek(struct fq_flow *flow) +{ + struct sk_buff *skb = skb_rb_first(&flow->t_root); + struct sk_buff *head = flow->head; + + if (!skb) + return head; + + if (!head) + return skb; + + if (fq_skb_cb(skb)->time_to_send < fq_skb_cb(head)->time_to_send) + return skb; + return head; +} + +static void fq_erase_head(struct Qdisc *sch, struct fq_flow *flow, + struct sk_buff *skb) +{ + if (skb == flow->head) { + flow->head = skb->next; + } else { + rb_erase(&skb->rbnode, &flow->t_root); + skb->dev = qdisc_dev(sch); + } +} /* remove one skb from head of flow queue */ static struct sk_buff *fq_dequeue_head(struct Qdisc *sch, struct fq_flow *flow) { - struct sk_buff *skb = flow->head; + struct sk_buff *skb = fq_peek(flow); if (skb) { - flow->head = skb->next; + fq_erase_head(sch, flow, skb); skb_mark_not_on_list(skb); flow->qlen--; qdisc_qstats_backlog_dec(sch, skb); @@ -330,15 +378,36 @@ static struct sk_buff *fq_dequeue_head(struct Qdisc *sch, struct fq_flow *flow) static void flow_queue_add(struct fq_flow *flow, struct sk_buff *skb) { - struct sk_buff *head = flow->head; + struct rb_node **p, *parent; + struct sk_buff *head, *aux; - skb->next = NULL; - if (!head) - flow->head = skb; - else - flow->tail->next = skb; + fq_skb_cb(skb)->time_to_send = skb->tstamp ?: ktime_get_ns(); + + head = flow->head; + if (!head || + fq_skb_cb(skb)->time_to_send >= fq_skb_cb(flow->tail)->time_to_send) { + if (!head) + flow->head = skb; + else + flow->tail->next = skb; + flow->tail = skb; + skb->next = NULL; + return; + } + + p = &flow->t_root.rb_node; + parent = NULL; - flow->tail = skb; + while (*p) { + parent = *p; + aux = rb_to_skb(parent); + if (fq_skb_cb(skb)->time_to_send >= fq_skb_cb(aux)->time_to_send) + p = &parent->rb_right; + else + p = &parent->rb_left; + } + rb_link_node(&skb->rbnode, parent, p); + rb_insert_color(&skb->rbnode, &flow->t_root); } static int fq_enqueue(struct sk_buff *skb, struct Qdisc *sch, @@ -450,9 +519,9 @@ begin: goto begin; } - skb = f->head; + skb = fq_peek(f); if (skb) { - u64 time_next_packet = max_t(u64, ktime_to_ns(skb->tstamp), + u64 time_next_packet = max_t(u64, fq_skb_cb(skb)->time_to_send, f->time_next_packet); if (now < time_next_packet) { @@ -533,6 +602,15 @@ out: static void fq_flow_purge(struct fq_flow *flow) { + struct rb_node *p = rb_first(&flow->t_root); + + while (p) { + struct sk_buff *skb = rb_to_skb(p); + + p = rb_next(p); + rb_erase(&skb->rbnode, &flow->t_root); + rtnl_kfree_skbs(skb, skb); + } rtnl_kfree_skbs(flow->head, flow->tail); flow->head = NULL; flow->qlen = 0; @@ -684,7 +762,8 @@ static int fq_change(struct Qdisc *sch, struct nlattr *opt, if (!opt) return -EINVAL; - err = nla_parse_nested(tb, TCA_FQ_MAX, opt, fq_policy, NULL); + err = nla_parse_nested_deprecated(tb, TCA_FQ_MAX, opt, fq_policy, + NULL); if (err < 0) return err; @@ -823,7 +902,7 @@ static int fq_dump(struct Qdisc *sch, struct sk_buff *skb) u64 ce_threshold = q->ce_threshold; struct nlattr *opts; - opts = nla_nest_start(skb, TCA_OPTIONS); + opts = nla_nest_start_noflag(skb, TCA_OPTIONS); if (opts == NULL) goto nla_put_failure; diff --git a/net/sched/sch_fq_codel.c b/net/sched/sch_fq_codel.c index cd04d40c30b6..e2faf33d282b 100644 --- a/net/sched/sch_fq_codel.c +++ b/net/sched/sch_fq_codel.c @@ -1,11 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* * Fair Queue CoDel discipline * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - * * Copyright (C) 2012,2015 Eric Dumazet <edumazet@google.com> */ @@ -387,8 +383,8 @@ static int fq_codel_change(struct Qdisc *sch, struct nlattr *opt, if (!opt) return -EINVAL; - err = nla_parse_nested(tb, TCA_FQ_CODEL_MAX, opt, fq_codel_policy, - NULL); + err = nla_parse_nested_deprecated(tb, TCA_FQ_CODEL_MAX, opt, + fq_codel_policy, NULL); if (err < 0) return err; if (tb[TCA_FQ_CODEL_FLOWS]) { @@ -527,7 +523,7 @@ static int fq_codel_dump(struct Qdisc *sch, struct sk_buff *skb) struct fq_codel_sched_data *q = qdisc_priv(sch); struct nlattr *opts; - opts = nla_nest_start(skb, TCA_OPTIONS); + opts = nla_nest_start_noflag(skb, TCA_OPTIONS); if (opts == NULL) goto nla_put_failure; diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index a117d9260558..11c03cf4aa74 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -1,11 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* * net/sched/sch_generic.c Generic packet scheduler routines. * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - * * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> * Jamal Hadi Salim, <hadi@cyberus.ca> 990601 * - Ingress support @@ -32,6 +28,7 @@ #include <net/pkt_sched.h> #include <net/dst.h> #include <trace/events/qdisc.h> +#include <trace/events/net.h> #include <net/xfrm.h> /* Qdisc to use by default */ @@ -68,7 +65,7 @@ static inline struct sk_buff *__skb_dequeue_bad_txq(struct Qdisc *q) skb = __skb_dequeue(&q->skb_bad_txq); if (qdisc_is_percpu_stats(q)) { qdisc_qstats_cpu_backlog_dec(q, skb); - qdisc_qstats_atomic_qlen_dec(q); + qdisc_qstats_cpu_qlen_dec(q); } else { qdisc_qstats_backlog_dec(q, skb); q->q.qlen--; @@ -108,7 +105,7 @@ static inline void qdisc_enqueue_skb_bad_txq(struct Qdisc *q, if (qdisc_is_percpu_stats(q)) { qdisc_qstats_cpu_backlog_inc(q, skb); - qdisc_qstats_atomic_qlen_inc(q); + qdisc_qstats_cpu_qlen_inc(q); } else { qdisc_qstats_backlog_inc(q, skb); q->q.qlen++; @@ -118,52 +115,36 @@ static inline void qdisc_enqueue_skb_bad_txq(struct Qdisc *q, spin_unlock(lock); } -static inline int __dev_requeue_skb(struct sk_buff *skb, struct Qdisc *q) +static inline void dev_requeue_skb(struct sk_buff *skb, struct Qdisc *q) { - while (skb) { - struct sk_buff *next = skb->next; - - __skb_queue_tail(&q->gso_skb, skb); - q->qstats.requeues++; - qdisc_qstats_backlog_inc(q, skb); - q->q.qlen++; /* it's still part of the queue */ + spinlock_t *lock = NULL; - skb = next; + if (q->flags & TCQ_F_NOLOCK) { + lock = qdisc_lock(q); + spin_lock(lock); } - __netif_schedule(q); - - return 0; -} -static inline int dev_requeue_skb_locked(struct sk_buff *skb, struct Qdisc *q) -{ - spinlock_t *lock = qdisc_lock(q); - - spin_lock(lock); while (skb) { struct sk_buff *next = skb->next; __skb_queue_tail(&q->gso_skb, skb); - qdisc_qstats_cpu_requeues_inc(q); - qdisc_qstats_cpu_backlog_inc(q, skb); - qdisc_qstats_atomic_qlen_inc(q); + /* it's still part of the queue */ + if (qdisc_is_percpu_stats(q)) { + qdisc_qstats_cpu_requeues_inc(q); + qdisc_qstats_cpu_backlog_inc(q, skb); + qdisc_qstats_cpu_qlen_inc(q); + } else { + q->qstats.requeues++; + qdisc_qstats_backlog_inc(q, skb); + q->q.qlen++; + } skb = next; } - spin_unlock(lock); - + if (lock) + spin_unlock(lock); __netif_schedule(q); - - return 0; -} - -static inline int dev_requeue_skb(struct sk_buff *skb, struct Qdisc *q) -{ - if (q->flags & TCQ_F_NOLOCK) - return dev_requeue_skb_locked(skb, q); - else - return __dev_requeue_skb(skb, q); } static void try_bulk_dequeue_skb(struct Qdisc *q, @@ -252,7 +233,7 @@ static struct sk_buff *dequeue_skb(struct Qdisc *q, bool *validate, skb = __skb_dequeue(&q->gso_skb); if (qdisc_is_percpu_stats(q)) { qdisc_qstats_cpu_backlog_dec(q, skb); - qdisc_qstats_atomic_qlen_dec(q); + qdisc_qstats_cpu_qlen_dec(q); } else { qdisc_qstats_backlog_dec(q, skb); q->q.qlen--; @@ -457,6 +438,7 @@ static void dev_watchdog(struct timer_list *t) } if (some_queue_timedout) { + trace_net_dev_xmit_timeout(dev, i); WARN_ONCE(1, KERN_INFO "NETDEV WATCHDOG: %s (%s): transmit queue %u timed out\n", dev->name, netdev_drivername(dev), i); dev->netdev_ops->ndo_tx_timeout(dev); @@ -645,11 +627,7 @@ static int pfifo_fast_enqueue(struct sk_buff *skb, struct Qdisc *qdisc, if (unlikely(err)) return qdisc_drop_cpu(skb, qdisc, to_free); - qdisc_qstats_atomic_qlen_inc(qdisc); - /* Note: skb can not be used after skb_array_produce(), - * so we better not use qdisc_qstats_cpu_backlog_inc() - */ - this_cpu_add(qdisc->cpu_qstats->backlog, pkt_len); + qdisc_update_stats_at_enqueue(qdisc, pkt_len); return NET_XMIT_SUCCESS; } @@ -668,9 +646,9 @@ static struct sk_buff *pfifo_fast_dequeue(struct Qdisc *qdisc) skb = __skb_array_consume(q); } if (likely(skb)) { - qdisc_qstats_cpu_backlog_dec(qdisc, skb); - qdisc_bstats_cpu_update(qdisc, skb); - qdisc_qstats_atomic_qlen_dec(qdisc); + qdisc_update_stats_at_dequeue(qdisc, skb); + } else { + qdisc->empty = true; } return skb; @@ -714,6 +692,7 @@ static void pfifo_fast_reset(struct Qdisc *qdisc) struct gnet_stats_queue *q = per_cpu_ptr(qdisc->cpu_qstats, i); q->backlog = 0; + q->qlen = 0; } } @@ -880,6 +859,7 @@ struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue, sch->enqueue = ops->enqueue; sch->dequeue = ops->dequeue; sch->dev_queue = dev_queue; + sch->empty = true; dev_hold(dev); refcount_set(&sch->refcnt, 1); diff --git a/net/sched/sch_gred.c b/net/sched/sch_gred.c index 234afbf9115b..8599c6f31b05 100644 --- a/net/sched/sch_gred.c +++ b/net/sched/sch_gred.c @@ -1,12 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* * net/sched/sch_gred.c Generic Random Early Detection queue. * - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - * * Authors: J Hadi Salim (hadi@cyberus.ca) 1998-2002 * * 991129: - Bug fix with grio mode @@ -538,7 +533,8 @@ static void gred_vq_apply(struct gred_sched *table, const struct nlattr *entry) struct nlattr *tb[TCA_GRED_VQ_MAX + 1]; u32 dp; - nla_parse_nested(tb, TCA_GRED_VQ_MAX, entry, gred_vq_policy, NULL); + nla_parse_nested_deprecated(tb, TCA_GRED_VQ_MAX, entry, + gred_vq_policy, NULL); dp = nla_get_u32(tb[TCA_GRED_VQ_DP]); @@ -568,8 +564,8 @@ static int gred_vq_validate(struct gred_sched *table, u32 cdp, int err; u32 dp; - err = nla_parse_nested(tb, TCA_GRED_VQ_MAX, entry, gred_vq_policy, - extack); + err = nla_parse_nested_deprecated(tb, TCA_GRED_VQ_MAX, entry, + gred_vq_policy, extack); if (err < 0) return err; @@ -610,8 +606,8 @@ static int gred_vqs_validate(struct gred_sched *table, u32 cdp, const struct nlattr *attr; int rem, err; - err = nla_validate_nested(vqs, TCA_GRED_VQ_ENTRY_MAX, - gred_vqe_policy, extack); + err = nla_validate_nested_deprecated(vqs, TCA_GRED_VQ_ENTRY_MAX, + gred_vqe_policy, extack); if (err < 0) return err; @@ -650,7 +646,8 @@ static int gred_change(struct Qdisc *sch, struct nlattr *opt, if (opt == NULL) return -EINVAL; - err = nla_parse_nested(tb, TCA_GRED_MAX, opt, gred_policy, extack); + err = nla_parse_nested_deprecated(tb, TCA_GRED_MAX, opt, gred_policy, + extack); if (err < 0) return err; @@ -737,7 +734,8 @@ static int gred_init(struct Qdisc *sch, struct nlattr *opt, if (!opt) return -EINVAL; - err = nla_parse_nested(tb, TCA_GRED_MAX, opt, gred_policy, extack); + err = nla_parse_nested_deprecated(tb, TCA_GRED_MAX, opt, gred_policy, + extack); if (err < 0) return err; @@ -772,7 +770,7 @@ static int gred_dump(struct Qdisc *sch, struct sk_buff *skb) if (gred_offload_dump_stats(sch)) goto nla_put_failure; - opts = nla_nest_start(skb, TCA_OPTIONS); + opts = nla_nest_start_noflag(skb, TCA_OPTIONS); if (opts == NULL) goto nla_put_failure; if (nla_put(skb, TCA_GRED_DPS, sizeof(sopt), &sopt)) @@ -790,7 +788,7 @@ static int gred_dump(struct Qdisc *sch, struct sk_buff *skb) goto nla_put_failure; /* Old style all-in-one dump of VQs */ - parms = nla_nest_start(skb, TCA_GRED_PARMS); + parms = nla_nest_start_noflag(skb, TCA_GRED_PARMS); if (parms == NULL) goto nla_put_failure; @@ -841,7 +839,7 @@ append_opt: nla_nest_end(skb, parms); /* Dump the VQs again, in more structured way */ - vqs = nla_nest_start(skb, TCA_GRED_VQ_LIST); + vqs = nla_nest_start_noflag(skb, TCA_GRED_VQ_LIST); if (!vqs) goto nla_put_failure; @@ -852,7 +850,7 @@ append_opt: if (!q) continue; - vq = nla_nest_start(skb, TCA_GRED_VQ_ENTRY); + vq = nla_nest_start_noflag(skb, TCA_GRED_VQ_ENTRY); if (!vq) goto nla_put_failure; diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c index 24cc220a3218..433f2190960f 100644 --- a/net/sched/sch_hfsc.c +++ b/net/sched/sch_hfsc.c @@ -845,16 +845,6 @@ qdisc_peek_len(struct Qdisc *sch) } static void -hfsc_purge_queue(struct Qdisc *sch, struct hfsc_class *cl) -{ - unsigned int len = cl->qdisc->q.qlen; - unsigned int backlog = cl->qdisc->qstats.backlog; - - qdisc_reset(cl->qdisc); - qdisc_tree_reduce_backlog(cl->qdisc, len, backlog); -} - -static void hfsc_adjust_levels(struct hfsc_class *cl) { struct hfsc_class *p; @@ -936,7 +926,8 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid, if (opt == NULL) return -EINVAL; - err = nla_parse_nested(tb, TCA_HFSC_MAX, opt, hfsc_policy, NULL); + err = nla_parse_nested_deprecated(tb, TCA_HFSC_MAX, opt, hfsc_policy, + NULL); if (err < 0) return err; @@ -1076,7 +1067,7 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid, qdisc_class_hash_insert(&q->clhash, &cl->cl_common); list_add_tail(&cl->siblings, &parent->children); if (parent->level == 0) - hfsc_purge_queue(sch, parent); + qdisc_purge_queue(parent->qdisc); hfsc_adjust_levels(parent); sch_tree_unlock(sch); @@ -1112,7 +1103,7 @@ hfsc_delete_class(struct Qdisc *sch, unsigned long arg) list_del(&cl->siblings); hfsc_adjust_levels(cl->cl_parent); - hfsc_purge_queue(sch, cl); + qdisc_purge_queue(cl->qdisc); qdisc_class_hash_remove(&q->clhash, &cl->cl_common); sch_tree_unlock(sch); @@ -1310,7 +1301,7 @@ hfsc_dump_class(struct Qdisc *sch, unsigned long arg, struct sk_buff *skb, if (cl->level == 0) tcm->tcm_info = cl->qdisc->handle; - nest = nla_nest_start(skb, TCA_OPTIONS); + nest = nla_nest_start_noflag(skb, TCA_OPTIONS); if (nest == NULL) goto nla_put_failure; if (hfsc_dump_curves(skb, cl) < 0) @@ -1328,8 +1319,9 @@ hfsc_dump_class_stats(struct Qdisc *sch, unsigned long arg, { struct hfsc_class *cl = (struct hfsc_class *)arg; struct tc_hfsc_stats xstats; + __u32 qlen; - cl->qstats.backlog = cl->qdisc->qstats.backlog; + qdisc_qstats_qlen_backlog(cl->qdisc, &qlen, &cl->qstats.backlog); xstats.level = cl->level; xstats.period = cl->cl_vtperiod; xstats.work = cl->cl_total; @@ -1337,7 +1329,7 @@ hfsc_dump_class_stats(struct Qdisc *sch, unsigned long arg, if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch), d, NULL, &cl->bstats) < 0 || gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 || - gnet_stats_copy_queue(d, NULL, &cl->qstats, cl->qdisc->q.qlen) < 0) + gnet_stats_copy_queue(d, NULL, &cl->qstats, qlen) < 0) return -1; return gnet_stats_copy_app(d, &xstats, sizeof(xstats)); diff --git a/net/sched/sch_hhf.c b/net/sched/sch_hhf.c index 9d6a47697406..cee6971c1c82 100644 --- a/net/sched/sch_hhf.c +++ b/net/sched/sch_hhf.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-only /* net/sched/sch_hhf.c Heavy-Hitter Filter (HHF) * * Copyright (C) 2013 Terry Lam <vtlam@google.com> @@ -518,7 +519,8 @@ static int hhf_change(struct Qdisc *sch, struct nlattr *opt, if (!opt) return -EINVAL; - err = nla_parse_nested(tb, TCA_HHF_MAX, opt, hhf_policy, NULL); + err = nla_parse_nested_deprecated(tb, TCA_HHF_MAX, opt, hhf_policy, + NULL); if (err < 0) return err; @@ -654,7 +656,7 @@ static int hhf_dump(struct Qdisc *sch, struct sk_buff *skb) struct hhf_sched_data *q = qdisc_priv(sch); struct nlattr *opts; - opts = nla_nest_start(skb, TCA_OPTIONS); + opts = nla_nest_start_noflag(skb, TCA_OPTIONS); if (opts == NULL) goto nla_put_failure; diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c index 30f9da7e1076..7bcf20ef9145 100644 --- a/net/sched/sch_htb.c +++ b/net/sched/sch_htb.c @@ -1,11 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* * net/sched/sch_htb.c Hierarchical token bucket, feed tree version * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - * * Authors: Martin Devera, <devik@cdi.cz> * * Credits (in time order) for older HTB versions: @@ -165,7 +161,8 @@ struct htb_sched { /* non shaped skbs; let them go directly thru */ struct qdisc_skb_head direct_queue; - long direct_pkts; + u32 direct_pkts; + u32 overlimits; struct qdisc_watchdog watchdog; @@ -533,8 +530,10 @@ htb_change_class_mode(struct htb_sched *q, struct htb_class *cl, s64 *diff) if (new_mode == cl->cmode) return; - if (new_mode == HTB_CANT_SEND) + if (new_mode == HTB_CANT_SEND) { cl->overlimits++; + q->overlimits++; + } if (cl->prio_activity) { /* not necessary: speed optimization */ if (cl->cmode != HTB_CANT_SEND) @@ -937,7 +936,6 @@ ok: goto ok; } } - qdisc_qstats_overlimit(sch); if (likely(next_event > q->now)) qdisc_watchdog_schedule_ns(&q->watchdog, next_event); else @@ -1012,7 +1010,8 @@ static int htb_init(struct Qdisc *sch, struct nlattr *opt, if (err) return err; - err = nla_parse_nested(tb, TCA_HTB_MAX, opt, htb_policy, NULL); + err = nla_parse_nested_deprecated(tb, TCA_HTB_MAX, opt, htb_policy, + NULL); if (err < 0) return err; @@ -1047,6 +1046,7 @@ static int htb_dump(struct Qdisc *sch, struct sk_buff *skb) struct nlattr *nest; struct tc_htb_glob gopt; + sch->qstats.overlimits = q->overlimits; /* Its safe to not acquire qdisc lock. As we hold RTNL, * no change can happen on the qdisc parameters. */ @@ -1057,7 +1057,7 @@ static int htb_dump(struct Qdisc *sch, struct sk_buff *skb) gopt.defcls = q->defcls; gopt.debug = 0; - nest = nla_nest_start(skb, TCA_OPTIONS); + nest = nla_nest_start_noflag(skb, TCA_OPTIONS); if (nest == NULL) goto nla_put_failure; if (nla_put(skb, TCA_HTB_INIT, sizeof(gopt), &gopt) || @@ -1086,7 +1086,7 @@ static int htb_dump_class(struct Qdisc *sch, unsigned long arg, if (!cl->level && cl->leaf.q) tcm->tcm_info = cl->leaf.q->handle; - nest = nla_nest_start(skb, TCA_OPTIONS); + nest = nla_nest_start_noflag(skb, TCA_OPTIONS); if (nest == NULL) goto nla_put_failure; @@ -1127,10 +1127,9 @@ htb_dump_class_stats(struct Qdisc *sch, unsigned long arg, struct gnet_dump *d) }; __u32 qlen = 0; - if (!cl->level && cl->leaf.q) { - qlen = cl->leaf.q->q.qlen; - qs.backlog = cl->leaf.q->qstats.backlog; - } + if (!cl->level && cl->leaf.q) + qdisc_qstats_qlen_backlog(cl->leaf.q, &qlen, &qs.backlog); + cl->xstats.tokens = clamp_t(s64, PSCHED_NS2TICKS(cl->tokens), INT_MIN, INT_MAX); cl->xstats.ctokens = clamp_t(s64, PSCHED_NS2TICKS(cl->ctokens), @@ -1270,13 +1269,8 @@ static int htb_delete(struct Qdisc *sch, unsigned long arg) sch_tree_lock(sch); - if (!cl->level) { - unsigned int qlen = cl->leaf.q->q.qlen; - unsigned int backlog = cl->leaf.q->qstats.backlog; - - qdisc_reset(cl->leaf.q); - qdisc_tree_reduce_backlog(cl->leaf.q, qlen, backlog); - } + if (!cl->level) + qdisc_purge_queue(cl->leaf.q); /* delete from hash and active; remainder in destroy_class */ qdisc_class_hash_remove(&q->clhash, &cl->common); @@ -1316,7 +1310,8 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, if (!opt) goto failure; - err = nla_parse_nested(tb, TCA_HTB_MAX, opt, htb_policy, NULL); + err = nla_parse_nested_deprecated(tb, TCA_HTB_MAX, opt, htb_policy, + NULL); if (err < 0) goto failure; @@ -1404,12 +1399,8 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, classid, NULL); sch_tree_lock(sch); if (parent && !parent->level) { - unsigned int qlen = parent->leaf.q->q.qlen; - unsigned int backlog = parent->leaf.q->qstats.backlog; - /* turn parent into inner node */ - qdisc_reset(parent->leaf.q); - qdisc_tree_reduce_backlog(parent->leaf.q, qlen, backlog); + qdisc_purge_queue(parent->leaf.q); qdisc_put(parent->leaf.q); if (parent->prio_activity) htb_deactivate(q, parent); diff --git a/net/sched/sch_ingress.c b/net/sched/sch_ingress.c index ce3f55259d0d..0f65f617756b 100644 --- a/net/sched/sch_ingress.c +++ b/net/sched/sch_ingress.c @@ -1,10 +1,6 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* net/sched/sch_ingress.c - Ingress and clsact qdisc * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - * * Authors: Jamal Hadi Salim 1999 */ @@ -106,7 +102,7 @@ static int ingress_dump(struct Qdisc *sch, struct sk_buff *skb) { struct nlattr *nest; - nest = nla_nest_start(skb, TCA_OPTIONS); + nest = nla_nest_start_noflag(skb, TCA_OPTIONS); if (nest == NULL) goto nla_put_failure; diff --git a/net/sched/sch_mq.c b/net/sched/sch_mq.c index 203659bc3906..3a3312467692 100644 --- a/net/sched/sch_mq.c +++ b/net/sched/sch_mq.c @@ -249,7 +249,7 @@ static int mq_dump_class_stats(struct Qdisc *sch, unsigned long cl, sch = dev_queue->qdisc_sleeping; if (gnet_stats_copy_basic(&sch->running, d, NULL, &sch->bstats) < 0 || - gnet_stats_copy_queue(d, NULL, &sch->qstats, sch->q.qlen) < 0) + qdisc_qstats_copy(d, sch) < 0) return -1; return 0; } diff --git a/net/sched/sch_mqprio.c b/net/sched/sch_mqprio.c index d364e63c396d..d05086dc3866 100644 --- a/net/sched/sch_mqprio.c +++ b/net/sched/sch_mqprio.c @@ -125,8 +125,9 @@ static int parse_attr(struct nlattr *tb[], int maxtype, struct nlattr *nla, int nested_len = nla_len(nla) - NLA_ALIGN(len); if (nested_len >= nla_attr_size(0)) - return nla_parse(tb, maxtype, nla_data(nla) + NLA_ALIGN(len), - nested_len, policy, NULL); + return nla_parse_deprecated(tb, maxtype, + nla_data(nla) + NLA_ALIGN(len), + nested_len, policy, NULL); memset(tb, 0, sizeof(struct nlattr *) * (maxtype + 1)); return 0; @@ -349,7 +350,7 @@ static int dump_rates(struct mqprio_sched *priv, int i; if (priv->flags & TC_MQPRIO_F_MIN_RATE) { - nest = nla_nest_start(skb, TCA_MQPRIO_MIN_RATE64); + nest = nla_nest_start_noflag(skb, TCA_MQPRIO_MIN_RATE64); if (!nest) goto nla_put_failure; @@ -363,7 +364,7 @@ static int dump_rates(struct mqprio_sched *priv, } if (priv->flags & TC_MQPRIO_F_MAX_RATE) { - nest = nla_nest_start(skb, TCA_MQPRIO_MAX_RATE64); + nest = nla_nest_start_noflag(skb, TCA_MQPRIO_MAX_RATE64); if (!nest) goto nla_put_failure; @@ -561,8 +562,7 @@ static int mqprio_dump_class_stats(struct Qdisc *sch, unsigned long cl, sch = dev_queue->qdisc_sleeping; if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch), d, NULL, &sch->bstats) < 0 || - gnet_stats_copy_queue(d, NULL, - &sch->qstats, sch->q.qlen) < 0) + qdisc_qstats_copy(d, sch) < 0) return -1; } return 0; diff --git a/net/sched/sch_multiq.c b/net/sched/sch_multiq.c index 7410ce4d0321..e1087746f6a2 100644 --- a/net/sched/sch_multiq.c +++ b/net/sched/sch_multiq.c @@ -1,18 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2008, Intel Corporation. * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program; if not, see <http://www.gnu.org/licenses/>. - * * Author: Alexander Duyck <alexander.h.duyck@intel.com> */ @@ -201,9 +190,9 @@ static int multiq_tune(struct Qdisc *sch, struct nlattr *opt, for (i = q->bands; i < q->max_bands; i++) { if (q->queues[i] != &noop_qdisc) { struct Qdisc *child = q->queues[i]; + q->queues[i] = &noop_qdisc; - qdisc_tree_reduce_backlog(child, child->q.qlen, - child->qstats.backlog); + qdisc_tree_flush_backlog(child); qdisc_put(child); } } @@ -225,9 +214,7 @@ static int multiq_tune(struct Qdisc *sch, struct nlattr *opt, qdisc_hash_add(child, true); if (old != &noop_qdisc) { - qdisc_tree_reduce_backlog(old, - old->q.qlen, - old->qstats.backlog); + qdisc_tree_flush_backlog(old); qdisc_put(old); } sch_tree_unlock(sch); @@ -344,7 +331,7 @@ static int multiq_dump_class_stats(struct Qdisc *sch, unsigned long cl, cl_q = q->queues[cl - 1]; if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch), d, NULL, &cl_q->bstats) < 0 || - gnet_stats_copy_queue(d, NULL, &cl_q->qstats, cl_q->q.qlen) < 0) + qdisc_qstats_copy(d, cl_q) < 0) return -1; return 0; diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index cc9d8133afcd..956ff3da81f4 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -1,11 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * net/sched/sch_netem.c Network emulator * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License. - * * Many of the algorithms and ideas for this came from * NIST Net which is not copyrighted. * @@ -935,8 +931,9 @@ static int parse_attr(struct nlattr *tb[], int maxtype, struct nlattr *nla, } if (nested_len >= nla_attr_size(0)) - return nla_parse(tb, maxtype, nla_data(nla) + NLA_ALIGN(len), - nested_len, policy, NULL); + return nla_parse_deprecated(tb, maxtype, + nla_data(nla) + NLA_ALIGN(len), + nested_len, policy, NULL); memset(tb, 0, sizeof(struct nlattr *) * (maxtype + 1)); return 0; @@ -1079,7 +1076,7 @@ static int dump_loss_model(const struct netem_sched_data *q, { struct nlattr *nest; - nest = nla_nest_start(skb, TCA_NETEM_LOSS); + nest = nla_nest_start_noflag(skb, TCA_NETEM_LOSS); if (nest == NULL) goto nla_put_failure; diff --git a/net/sched/sch_pie.c b/net/sched/sch_pie.c index 1cc0c7b74aa3..8fa129d3943e 100644 --- a/net/sched/sch_pie.c +++ b/net/sched/sch_pie.c @@ -216,7 +216,8 @@ static int pie_change(struct Qdisc *sch, struct nlattr *opt, if (!opt) return -EINVAL; - err = nla_parse_nested(tb, TCA_PIE_MAX, opt, pie_policy, NULL); + err = nla_parse_nested_deprecated(tb, TCA_PIE_MAX, opt, pie_policy, + NULL); if (err < 0) return err; @@ -491,7 +492,7 @@ static int pie_dump(struct Qdisc *sch, struct sk_buff *skb) struct pie_sched_data *q = qdisc_priv(sch); struct nlattr *opts; - opts = nla_nest_start(skb, TCA_OPTIONS); + opts = nla_nest_start_noflag(skb, TCA_OPTIONS); if (!opts) goto nla_put_failure; diff --git a/net/sched/sch_plug.c b/net/sched/sch_plug.c index 5619d2eb17b6..cbc2ebca4548 100644 --- a/net/sched/sch_plug.c +++ b/net/sched/sch_plug.c @@ -1,11 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* * sch_plug.c Queue traffic until an explicit release command * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - * * There are two ways to use this qdisc: * 1. A simple "instantaneous" plug/unplug operation, by issuing an alternating * sequence of TCQ_PLUG_BUFFER & TCQ_PLUG_RELEASE_INDEFINITE commands. diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c index 847141cd900f..0f8fedb8809a 100644 --- a/net/sched/sch_prio.c +++ b/net/sched/sch_prio.c @@ -1,11 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* * net/sched/sch_prio.c Simple 3-band priority "scheduler". * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - * * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> * Fixes: 19990609: J Hadi Salim <hadi@nortelnetworks.com>: * Init -- EINVAL when opt undefined @@ -216,12 +212,8 @@ static int prio_tune(struct Qdisc *sch, struct nlattr *opt, q->bands = qopt->bands; memcpy(q->prio2band, qopt->priomap, TC_PRIO_MAX+1); - for (i = q->bands; i < oldbands; i++) { - struct Qdisc *child = q->queues[i]; - - qdisc_tree_reduce_backlog(child, child->q.qlen, - child->qstats.backlog); - } + for (i = q->bands; i < oldbands; i++) + qdisc_tree_flush_backlog(q->queues[i]); for (i = oldbands; i < q->bands; i++) { q->queues[i] = queues[i]; @@ -365,7 +357,7 @@ static int prio_dump_class_stats(struct Qdisc *sch, unsigned long cl, cl_q = q->queues[cl - 1]; if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch), d, NULL, &cl_q->bstats) < 0 || - gnet_stats_copy_queue(d, NULL, &cl_q->qstats, cl_q->q.qlen) < 0) + qdisc_qstats_copy(d, cl_q) < 0) return -1; return 0; diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c index 29f5c4a24688..3f9e8b425ac6 100644 --- a/net/sched/sch_qfq.c +++ b/net/sched/sch_qfq.c @@ -217,15 +217,6 @@ static struct qfq_class *qfq_find_class(struct Qdisc *sch, u32 classid) return container_of(clc, struct qfq_class, common); } -static void qfq_purge_queue(struct qfq_class *cl) -{ - unsigned int len = cl->qdisc->q.qlen; - unsigned int backlog = cl->qdisc->qstats.backlog; - - qdisc_reset(cl->qdisc); - qdisc_tree_reduce_backlog(cl->qdisc, len, backlog); -} - static const struct nla_policy qfq_policy[TCA_QFQ_MAX + 1] = { [TCA_QFQ_WEIGHT] = { .type = NLA_U32 }, [TCA_QFQ_LMAX] = { .type = NLA_U32 }, @@ -419,8 +410,8 @@ static int qfq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, return -EINVAL; } - err = nla_parse_nested(tb, TCA_QFQ_MAX, tca[TCA_OPTIONS], qfq_policy, - NULL); + err = nla_parse_nested_deprecated(tb, TCA_QFQ_MAX, tca[TCA_OPTIONS], + qfq_policy, NULL); if (err < 0) return err; @@ -551,7 +542,7 @@ static int qfq_delete_class(struct Qdisc *sch, unsigned long arg) sch_tree_lock(sch); - qfq_purge_queue(cl); + qdisc_purge_queue(cl->qdisc); qdisc_class_hash_remove(&q->clhash, &cl->common); sch_tree_unlock(sch); @@ -628,7 +619,7 @@ static int qfq_dump_class(struct Qdisc *sch, unsigned long arg, tcm->tcm_handle = cl->common.classid; tcm->tcm_info = cl->qdisc->handle; - nest = nla_nest_start(skb, TCA_OPTIONS); + nest = nla_nest_start_noflag(skb, TCA_OPTIONS); if (nest == NULL) goto nla_put_failure; if (nla_put_u32(skb, TCA_QFQ_WEIGHT, cl->agg->class_weight) || @@ -655,8 +646,7 @@ static int qfq_dump_class_stats(struct Qdisc *sch, unsigned long arg, if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch), d, NULL, &cl->bstats) < 0 || gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 || - gnet_stats_copy_queue(d, NULL, - &cl->qdisc->qstats, cl->qdisc->q.qlen) < 0) + qdisc_qstats_copy(d, cl->qdisc) < 0) return -1; return gnet_stats_copy_app(d, &xstats, sizeof(xstats)); diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c index 9df9942340ea..1695421333e3 100644 --- a/net/sched/sch_red.c +++ b/net/sched/sch_red.c @@ -1,11 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* * net/sched/sch_red.c Random Early Detection queue. * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - * * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> * * Changes: @@ -205,7 +201,8 @@ static int red_change(struct Qdisc *sch, struct nlattr *opt, if (opt == NULL) return -EINVAL; - err = nla_parse_nested(tb, TCA_RED_MAX, opt, red_policy, NULL); + err = nla_parse_nested_deprecated(tb, TCA_RED_MAX, opt, red_policy, + NULL); if (err < 0) return err; @@ -233,8 +230,7 @@ static int red_change(struct Qdisc *sch, struct nlattr *opt, q->flags = ctl->flags; q->limit = ctl->limit; if (child) { - qdisc_tree_reduce_backlog(q->qdisc, q->qdisc->q.qlen, - q->qdisc->qstats.backlog); + qdisc_tree_flush_backlog(q->qdisc); old_child = q->qdisc; q->qdisc = child; } @@ -319,7 +315,7 @@ static int red_dump(struct Qdisc *sch, struct sk_buff *skb) if (err) goto nla_put_failure; - opts = nla_nest_start(skb, TCA_OPTIONS); + opts = nla_nest_start_noflag(skb, TCA_OPTIONS); if (opts == NULL) goto nla_put_failure; if (nla_put(skb, TCA_RED_PARMS, sizeof(opt), &opt) || diff --git a/net/sched/sch_sfb.c b/net/sched/sch_sfb.c index bab506b01a32..b245d6a2068d 100644 --- a/net/sched/sch_sfb.c +++ b/net/sched/sch_sfb.c @@ -499,7 +499,8 @@ static int sfb_change(struct Qdisc *sch, struct nlattr *opt, int err; if (opt) { - err = nla_parse_nested(tb, TCA_SFB_MAX, opt, sfb_policy, NULL); + err = nla_parse_nested_deprecated(tb, TCA_SFB_MAX, opt, + sfb_policy, NULL); if (err < 0) return -EINVAL; @@ -521,8 +522,7 @@ static int sfb_change(struct Qdisc *sch, struct nlattr *opt, qdisc_hash_add(child, true); sch_tree_lock(sch); - qdisc_tree_reduce_backlog(q->qdisc, q->qdisc->q.qlen, - q->qdisc->qstats.backlog); + qdisc_tree_flush_backlog(q->qdisc); qdisc_put(q->qdisc); q->qdisc = child; @@ -581,7 +581,7 @@ static int sfb_dump(struct Qdisc *sch, struct sk_buff *skb) }; sch->qstats.backlog = q->qdisc->qstats.backlog; - opts = nla_nest_start(skb, TCA_OPTIONS); + opts = nla_nest_start_noflag(skb, TCA_OPTIONS); if (opts == NULL) goto nla_put_failure; if (nla_put(skb, TCA_SFB_PARMS, sizeof(opt), &opt)) diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c index 2f2678197760..420bd8411677 100644 --- a/net/sched/sch_sfq.c +++ b/net/sched/sch_sfq.c @@ -1,11 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* * net/sched/sch_sfq.c Stochastic Fairness Queueing discipline. * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - * * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> */ diff --git a/net/sched/sch_skbprio.c b/net/sched/sch_skbprio.c index 52c0b6d8f1d7..0fb10abf7579 100644 --- a/net/sched/sch_skbprio.c +++ b/net/sched/sch_skbprio.c @@ -1,11 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* * net/sched/sch_skbprio.c SKB Priority Queue. * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - * * Authors: Nishanth Devarajan, <ndev2021@gmail.com> * Cody Doucette, <doucette@bu.edu> * original idea by Michel Machado, Cody Doucette, and Qiaobin Fu diff --git a/net/sched/sch_taprio.c b/net/sched/sch_taprio.c index 206e4dbed12f..9ecfb8f5902a 100644 --- a/net/sched/sch_taprio.c +++ b/net/sched/sch_taprio.c @@ -13,13 +13,18 @@ #include <linux/list.h> #include <linux/errno.h> #include <linux/skbuff.h> +#include <linux/math64.h> #include <linux/module.h> #include <linux/spinlock.h> +#include <linux/rcupdate.h> #include <net/netlink.h> #include <net/pkt_sched.h> #include <net/pkt_cls.h> #include <net/sch_generic.h> +static LIST_HEAD(taprio_list); +static DEFINE_SPINLOCK(taprio_list_lock); + #define TAPRIO_ALL_GATES_OPEN -1 struct sched_entry { @@ -37,24 +42,88 @@ struct sched_entry { u8 command; }; +struct sched_gate_list { + struct rcu_head rcu; + struct list_head entries; + size_t num_entries; + ktime_t cycle_close_time; + s64 cycle_time; + s64 cycle_time_extension; + s64 base_time; +}; + struct taprio_sched { struct Qdisc **qdiscs; struct Qdisc *root; - s64 base_time; int clockid; - int picos_per_byte; /* Using picoseconds because for 10Gbps+ - * speeds it's sub-nanoseconds per byte - */ - size_t num_entries; + atomic64_t picos_per_byte; /* Using picoseconds because for 10Gbps+ + * speeds it's sub-nanoseconds per byte + */ /* Protects the update side of the RCU protected current_entry */ spinlock_t current_entry_lock; struct sched_entry __rcu *current_entry; - struct list_head entries; + struct sched_gate_list __rcu *oper_sched; + struct sched_gate_list __rcu *admin_sched; ktime_t (*get_time)(void); struct hrtimer advance_timer; + struct list_head taprio_list; }; +static ktime_t sched_base_time(const struct sched_gate_list *sched) +{ + if (!sched) + return KTIME_MAX; + + return ns_to_ktime(sched->base_time); +} + +static void taprio_free_sched_cb(struct rcu_head *head) +{ + struct sched_gate_list *sched = container_of(head, struct sched_gate_list, rcu); + struct sched_entry *entry, *n; + + if (!sched) + return; + + list_for_each_entry_safe(entry, n, &sched->entries, list) { + list_del(&entry->list); + kfree(entry); + } + + kfree(sched); +} + +static void switch_schedules(struct taprio_sched *q, + struct sched_gate_list **admin, + struct sched_gate_list **oper) +{ + rcu_assign_pointer(q->oper_sched, *admin); + rcu_assign_pointer(q->admin_sched, NULL); + + if (*oper) + call_rcu(&(*oper)->rcu, taprio_free_sched_cb); + + *oper = *admin; + *admin = NULL; +} + +static ktime_t get_cycle_time(struct sched_gate_list *sched) +{ + struct sched_entry *entry; + ktime_t cycle = 0; + + if (sched->cycle_time != 0) + return sched->cycle_time; + + list_for_each_entry(entry, &sched->entries, list) + cycle = ktime_add_ns(cycle, entry->interval); + + sched->cycle_time = cycle; + + return cycle; +} + static int taprio_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free) { @@ -85,7 +154,7 @@ static struct sk_buff *taprio_peek(struct Qdisc *sch) rcu_read_lock(); entry = rcu_dereference(q->current_entry); - gate_mask = entry ? entry->gate_mask : -1; + gate_mask = entry ? entry->gate_mask : TAPRIO_ALL_GATES_OPEN; rcu_read_unlock(); if (!gate_mask) @@ -107,7 +176,7 @@ static struct sk_buff *taprio_peek(struct Qdisc *sch) tc = netdev_get_prio_tc_map(dev, prio); if (!(gate_mask & BIT(tc))) - return NULL; + continue; return skb; } @@ -117,18 +186,30 @@ static struct sk_buff *taprio_peek(struct Qdisc *sch) static inline int length_to_duration(struct taprio_sched *q, int len) { - return (len * q->picos_per_byte) / 1000; + return div_u64(len * atomic64_read(&q->picos_per_byte), 1000); +} + +static void taprio_set_budget(struct taprio_sched *q, struct sched_entry *entry) +{ + atomic_set(&entry->budget, + div64_u64((u64)entry->interval * 1000, + atomic64_read(&q->picos_per_byte))); } static struct sk_buff *taprio_dequeue(struct Qdisc *sch) { struct taprio_sched *q = qdisc_priv(sch); struct net_device *dev = qdisc_dev(sch); + struct sk_buff *skb = NULL; struct sched_entry *entry; - struct sk_buff *skb; u32 gate_mask; int i; + if (atomic64_read(&q->picos_per_byte) == -1) { + WARN_ONCE(1, "taprio: dequeue() called with unknown picos per byte."); + return NULL; + } + rcu_read_lock(); entry = rcu_dereference(q->current_entry); /* if there's no entry, it means that the schedule didn't @@ -137,10 +218,9 @@ static struct sk_buff *taprio_dequeue(struct Qdisc *sch) * "AdminGateSates" */ gate_mask = entry ? entry->gate_mask : TAPRIO_ALL_GATES_OPEN; - rcu_read_unlock(); if (!gate_mask) - return NULL; + goto done; for (i = 0; i < dev->num_tx_queues; i++) { struct Qdisc *child = q->qdiscs[i]; @@ -171,39 +251,81 @@ static struct sk_buff *taprio_dequeue(struct Qdisc *sch) */ if (gate_mask != TAPRIO_ALL_GATES_OPEN && ktime_after(guard, entry->close_time)) - return NULL; + continue; /* ... and no budget. */ if (gate_mask != TAPRIO_ALL_GATES_OPEN && atomic_sub_return(len, &entry->budget) < 0) - return NULL; + continue; skb = child->ops->dequeue(child); if (unlikely(!skb)) - return NULL; + goto done; qdisc_bstats_update(sch, skb); qdisc_qstats_backlog_dec(sch, skb); sch->q.qlen--; - return skb; + goto done; } - return NULL; +done: + rcu_read_unlock(); + + return skb; } -static bool should_restart_cycle(const struct taprio_sched *q, +static bool should_restart_cycle(const struct sched_gate_list *oper, const struct sched_entry *entry) { - WARN_ON(!entry); + if (list_is_last(&entry->list, &oper->entries)) + return true; + + if (ktime_compare(entry->close_time, oper->cycle_close_time) == 0) + return true; + + return false; +} + +static bool should_change_schedules(const struct sched_gate_list *admin, + const struct sched_gate_list *oper, + ktime_t close_time) +{ + ktime_t next_base_time, extension_time; + + if (!admin) + return false; - return list_is_last(&entry->list, &q->entries); + next_base_time = sched_base_time(admin); + + /* This is the simple case, the close_time would fall after + * the next schedule base_time. + */ + if (ktime_compare(next_base_time, close_time) <= 0) + return true; + + /* This is the cycle_time_extension case, if the close_time + * plus the amount that can be extended would fall after the + * next schedule base_time, we can extend the current schedule + * for that amount. + */ + extension_time = ktime_add_ns(close_time, oper->cycle_time_extension); + + /* FIXME: the IEEE 802.1Q-2018 Specification isn't clear about + * how precisely the extension should be made. So after + * conformance testing, this logic may change. + */ + if (ktime_compare(next_base_time, extension_time) <= 0) + return true; + + return false; } static enum hrtimer_restart advance_sched(struct hrtimer *timer) { struct taprio_sched *q = container_of(timer, struct taprio_sched, advance_timer); + struct sched_gate_list *oper, *admin; struct sched_entry *entry, *next; struct Qdisc *sch = q->root; ktime_t close_time; @@ -211,29 +333,49 @@ static enum hrtimer_restart advance_sched(struct hrtimer *timer) spin_lock(&q->current_entry_lock); entry = rcu_dereference_protected(q->current_entry, lockdep_is_held(&q->current_entry_lock)); + oper = rcu_dereference_protected(q->oper_sched, + lockdep_is_held(&q->current_entry_lock)); + admin = rcu_dereference_protected(q->admin_sched, + lockdep_is_held(&q->current_entry_lock)); - /* This is the case that it's the first time that the schedule - * runs, so it only happens once per schedule. The first entry - * is pre-calculated during the schedule initialization. + if (!oper) + switch_schedules(q, &admin, &oper); + + /* This can happen in two cases: 1. this is the very first run + * of this function (i.e. we weren't running any schedule + * previously); 2. The previous schedule just ended. The first + * entry of all schedules are pre-calculated during the + * schedule initialization. */ - if (unlikely(!entry)) { - next = list_first_entry(&q->entries, struct sched_entry, + if (unlikely(!entry || entry->close_time == oper->base_time)) { + next = list_first_entry(&oper->entries, struct sched_entry, list); close_time = next->close_time; goto first_run; } - if (should_restart_cycle(q, entry)) - next = list_first_entry(&q->entries, struct sched_entry, + if (should_restart_cycle(oper, entry)) { + next = list_first_entry(&oper->entries, struct sched_entry, list); - else + oper->cycle_close_time = ktime_add_ns(oper->cycle_close_time, + oper->cycle_time); + } else { next = list_next_entry(entry, list); + } close_time = ktime_add_ns(entry->close_time, next->interval); + close_time = min_t(ktime_t, close_time, oper->cycle_close_time); + + if (should_change_schedules(admin, oper, close_time)) { + /* Set things so the next time this runs, the new + * schedule runs. + */ + close_time = sched_base_time(admin); + switch_schedules(q, &admin, &oper); + } next->close_time = close_time; - atomic_set(&next->budget, - (next->interval * 1000) / q->picos_per_byte); + taprio_set_budget(q, next); first_run: rcu_assign_pointer(q->current_entry, next); @@ -263,10 +405,12 @@ static const struct nla_policy taprio_policy[TCA_TAPRIO_ATTR_MAX + 1] = { [TCA_TAPRIO_ATTR_PRIOMAP] = { .len = sizeof(struct tc_mqprio_qopt) }, - [TCA_TAPRIO_ATTR_SCHED_ENTRY_LIST] = { .type = NLA_NESTED }, - [TCA_TAPRIO_ATTR_SCHED_BASE_TIME] = { .type = NLA_S64 }, - [TCA_TAPRIO_ATTR_SCHED_SINGLE_ENTRY] = { .type = NLA_NESTED }, - [TCA_TAPRIO_ATTR_SCHED_CLOCKID] = { .type = NLA_S32 }, + [TCA_TAPRIO_ATTR_SCHED_ENTRY_LIST] = { .type = NLA_NESTED }, + [TCA_TAPRIO_ATTR_SCHED_BASE_TIME] = { .type = NLA_S64 }, + [TCA_TAPRIO_ATTR_SCHED_SINGLE_ENTRY] = { .type = NLA_NESTED }, + [TCA_TAPRIO_ATTR_SCHED_CLOCKID] = { .type = NLA_S32 }, + [TCA_TAPRIO_ATTR_SCHED_CYCLE_TIME] = { .type = NLA_S64 }, + [TCA_TAPRIO_ATTR_SCHED_CYCLE_TIME_EXTENSION] = { .type = NLA_S64 }, }; static int fill_sched_entry(struct nlattr **tb, struct sched_entry *entry, @@ -302,8 +446,8 @@ static int parse_sched_entry(struct nlattr *n, struct sched_entry *entry, struct nlattr *tb[TCA_TAPRIO_SCHED_ENTRY_MAX + 1] = { }; int err; - err = nla_parse_nested(tb, TCA_TAPRIO_SCHED_ENTRY_MAX, n, - entry_policy, NULL); + err = nla_parse_nested_deprecated(tb, TCA_TAPRIO_SCHED_ENTRY_MAX, n, + entry_policy, NULL); if (err < 0) { NL_SET_ERR_MSG(extack, "Could not parse nested entry"); return -EINVAL; @@ -314,70 +458,8 @@ static int parse_sched_entry(struct nlattr *n, struct sched_entry *entry, return fill_sched_entry(tb, entry, extack); } -/* Returns the number of entries in case of success */ -static int parse_sched_single_entry(struct nlattr *n, - struct taprio_sched *q, - struct netlink_ext_ack *extack) -{ - struct nlattr *tb_entry[TCA_TAPRIO_SCHED_ENTRY_MAX + 1] = { }; - struct nlattr *tb_list[TCA_TAPRIO_SCHED_MAX + 1] = { }; - struct sched_entry *entry; - bool found = false; - u32 index; - int err; - - err = nla_parse_nested(tb_list, TCA_TAPRIO_SCHED_MAX, - n, entry_list_policy, NULL); - if (err < 0) { - NL_SET_ERR_MSG(extack, "Could not parse nested entry"); - return -EINVAL; - } - - if (!tb_list[TCA_TAPRIO_SCHED_ENTRY]) { - NL_SET_ERR_MSG(extack, "Single-entry must include an entry"); - return -EINVAL; - } - - err = nla_parse_nested(tb_entry, TCA_TAPRIO_SCHED_ENTRY_MAX, - tb_list[TCA_TAPRIO_SCHED_ENTRY], - entry_policy, NULL); - if (err < 0) { - NL_SET_ERR_MSG(extack, "Could not parse nested entry"); - return -EINVAL; - } - - if (!tb_entry[TCA_TAPRIO_SCHED_ENTRY_INDEX]) { - NL_SET_ERR_MSG(extack, "Entry must specify an index\n"); - return -EINVAL; - } - - index = nla_get_u32(tb_entry[TCA_TAPRIO_SCHED_ENTRY_INDEX]); - if (index >= q->num_entries) { - NL_SET_ERR_MSG(extack, "Index for single entry exceeds number of entries in schedule"); - return -EINVAL; - } - - list_for_each_entry(entry, &q->entries, list) { - if (entry->index == index) { - found = true; - break; - } - } - - if (!found) { - NL_SET_ERR_MSG(extack, "Could not find entry"); - return -ENOENT; - } - - err = fill_sched_entry(tb_entry, entry, extack); - if (err < 0) - return err; - - return q->num_entries; -} - static int parse_sched_list(struct nlattr *list, - struct taprio_sched *q, + struct sched_gate_list *sched, struct netlink_ext_ack *extack) { struct nlattr *n; @@ -407,64 +489,42 @@ static int parse_sched_list(struct nlattr *list, return err; } - list_add_tail(&entry->list, &q->entries); + list_add_tail(&entry->list, &sched->entries); i++; } - q->num_entries = i; + sched->num_entries = i; return i; } -/* Returns the number of entries in case of success */ -static int parse_taprio_opt(struct nlattr **tb, struct taprio_sched *q, - struct netlink_ext_ack *extack) +static int parse_taprio_schedule(struct nlattr **tb, + struct sched_gate_list *new, + struct netlink_ext_ack *extack) { int err = 0; - int clockid; - if (tb[TCA_TAPRIO_ATTR_SCHED_ENTRY_LIST] && - tb[TCA_TAPRIO_ATTR_SCHED_SINGLE_ENTRY]) - return -EINVAL; - - if (tb[TCA_TAPRIO_ATTR_SCHED_SINGLE_ENTRY] && q->num_entries == 0) - return -EINVAL; - - if (q->clockid == -1 && !tb[TCA_TAPRIO_ATTR_SCHED_CLOCKID]) - return -EINVAL; + if (tb[TCA_TAPRIO_ATTR_SCHED_SINGLE_ENTRY]) { + NL_SET_ERR_MSG(extack, "Adding a single entry is not supported"); + return -ENOTSUPP; + } if (tb[TCA_TAPRIO_ATTR_SCHED_BASE_TIME]) - q->base_time = nla_get_s64( - tb[TCA_TAPRIO_ATTR_SCHED_BASE_TIME]); + new->base_time = nla_get_s64(tb[TCA_TAPRIO_ATTR_SCHED_BASE_TIME]); - if (tb[TCA_TAPRIO_ATTR_SCHED_CLOCKID]) { - clockid = nla_get_s32(tb[TCA_TAPRIO_ATTR_SCHED_CLOCKID]); + if (tb[TCA_TAPRIO_ATTR_SCHED_CYCLE_TIME_EXTENSION]) + new->cycle_time_extension = nla_get_s64(tb[TCA_TAPRIO_ATTR_SCHED_CYCLE_TIME_EXTENSION]); - /* We only support static clockids and we don't allow - * for it to be modified after the first init. - */ - if (clockid < 0 || (q->clockid != -1 && q->clockid != clockid)) - return -EINVAL; - - q->clockid = clockid; - } + if (tb[TCA_TAPRIO_ATTR_SCHED_CYCLE_TIME]) + new->cycle_time = nla_get_s64(tb[TCA_TAPRIO_ATTR_SCHED_CYCLE_TIME]); if (tb[TCA_TAPRIO_ATTR_SCHED_ENTRY_LIST]) err = parse_sched_list( - tb[TCA_TAPRIO_ATTR_SCHED_ENTRY_LIST], q, extack); - else if (tb[TCA_TAPRIO_ATTR_SCHED_SINGLE_ENTRY]) - err = parse_sched_single_entry( - tb[TCA_TAPRIO_ATTR_SCHED_SINGLE_ENTRY], q, extack); - - /* parse_sched_* return the number of entries in the schedule, - * a schedule with zero entries is an error. - */ - if (err == 0) { - NL_SET_ERR_MSG(extack, "The schedule should contain at least one entry"); - return -EINVAL; - } + tb[TCA_TAPRIO_ATTR_SCHED_ENTRY_LIST], new, extack); + if (err < 0) + return err; - return err; + return 0; } static int taprio_parse_mqprio_opt(struct net_device *dev, @@ -473,11 +533,17 @@ static int taprio_parse_mqprio_opt(struct net_device *dev, { int i, j; - if (!qopt) { + if (!qopt && !dev->num_tc) { NL_SET_ERR_MSG(extack, "'mqprio' configuration is necessary"); return -EINVAL; } + /* If num_tc is already set, it means that the user already + * configured the mqprio part + */ + if (dev->num_tc) + return 0; + /* Verify num_tc is not out of max range */ if (qopt->num_tc > TC_MAX_QUEUE) { NL_SET_ERR_MSG(extack, "Number of traffic classes is outside valid range"); @@ -523,76 +589,141 @@ static int taprio_parse_mqprio_opt(struct net_device *dev, return 0; } -static ktime_t taprio_get_start_time(struct Qdisc *sch) +static int taprio_get_start_time(struct Qdisc *sch, + struct sched_gate_list *sched, + ktime_t *start) { struct taprio_sched *q = qdisc_priv(sch); - struct sched_entry *entry; ktime_t now, base, cycle; s64 n; - base = ns_to_ktime(q->base_time); - cycle = 0; - - /* Calculate the cycle_time, by summing all the intervals. - */ - list_for_each_entry(entry, &q->entries, list) - cycle = ktime_add_ns(cycle, entry->interval); + base = sched_base_time(sched); + now = q->get_time(); - if (!cycle) - return base; + if (ktime_after(base, now)) { + *start = base; + return 0; + } - now = q->get_time(); + cycle = get_cycle_time(sched); - if (ktime_after(base, now)) - return base; + /* The qdisc is expected to have at least one sched_entry. Moreover, + * any entry must have 'interval' > 0. Thus if the cycle time is zero, + * something went really wrong. In that case, we should warn about this + * inconsistent state and return error. + */ + if (WARN_ON(!cycle)) + return -EFAULT; /* Schedule the start time for the beginning of the next * cycle. */ n = div64_s64(ktime_sub_ns(now, base), cycle); - - return ktime_add_ns(base, (n + 1) * cycle); + *start = ktime_add_ns(base, (n + 1) * cycle); + return 0; } -static void taprio_start_sched(struct Qdisc *sch, ktime_t start) +static void setup_first_close_time(struct taprio_sched *q, + struct sched_gate_list *sched, ktime_t base) { - struct taprio_sched *q = qdisc_priv(sch); struct sched_entry *first; - unsigned long flags; + ktime_t cycle; - spin_lock_irqsave(&q->current_entry_lock, flags); + first = list_first_entry(&sched->entries, + struct sched_entry, list); + + cycle = get_cycle_time(sched); - first = list_first_entry(&q->entries, struct sched_entry, - list); + /* FIXME: find a better place to do this */ + sched->cycle_close_time = ktime_add_ns(base, cycle); - first->close_time = ktime_add_ns(start, first->interval); - atomic_set(&first->budget, - (first->interval * 1000) / q->picos_per_byte); + first->close_time = ktime_add_ns(base, first->interval); + taprio_set_budget(q, first); rcu_assign_pointer(q->current_entry, NULL); +} - spin_unlock_irqrestore(&q->current_entry_lock, flags); +static void taprio_start_sched(struct Qdisc *sch, + ktime_t start, struct sched_gate_list *new) +{ + struct taprio_sched *q = qdisc_priv(sch); + ktime_t expires; + + expires = hrtimer_get_expires(&q->advance_timer); + if (expires == 0) + expires = KTIME_MAX; + + /* If the new schedule starts before the next expiration, we + * reprogram it to the earliest one, so we change the admin + * schedule to the operational one at the right time. + */ + start = min_t(ktime_t, start, expires); hrtimer_start(&q->advance_timer, start, HRTIMER_MODE_ABS); } +static void taprio_set_picos_per_byte(struct net_device *dev, + struct taprio_sched *q) +{ + struct ethtool_link_ksettings ecmd; + int picos_per_byte = -1; + + if (!__ethtool_get_link_ksettings(dev, &ecmd) && + ecmd.base.speed != SPEED_UNKNOWN) + picos_per_byte = div64_s64(NSEC_PER_SEC * 1000LL * 8, + ecmd.base.speed * 1000 * 1000); + + atomic64_set(&q->picos_per_byte, picos_per_byte); + netdev_dbg(dev, "taprio: set %s's picos_per_byte to: %lld, linkspeed: %d\n", + dev->name, (long long)atomic64_read(&q->picos_per_byte), + ecmd.base.speed); +} + +static int taprio_dev_notifier(struct notifier_block *nb, unsigned long event, + void *ptr) +{ + struct net_device *dev = netdev_notifier_info_to_dev(ptr); + struct net_device *qdev; + struct taprio_sched *q; + bool found = false; + + ASSERT_RTNL(); + + if (event != NETDEV_UP && event != NETDEV_CHANGE) + return NOTIFY_DONE; + + spin_lock(&taprio_list_lock); + list_for_each_entry(q, &taprio_list, taprio_list) { + qdev = qdisc_dev(q->root); + if (qdev == dev) { + found = true; + break; + } + } + spin_unlock(&taprio_list_lock); + + if (found) + taprio_set_picos_per_byte(dev, q); + + return NOTIFY_DONE; +} + static int taprio_change(struct Qdisc *sch, struct nlattr *opt, struct netlink_ext_ack *extack) { struct nlattr *tb[TCA_TAPRIO_ATTR_MAX + 1] = { }; + struct sched_gate_list *oper, *admin, *new_admin; struct taprio_sched *q = qdisc_priv(sch); struct net_device *dev = qdisc_dev(sch); struct tc_mqprio_qopt *mqprio = NULL; - struct ethtool_link_ksettings ecmd; - int i, err, size; - s64 link_speed; + int i, err, clockid; + unsigned long flags; ktime_t start; - err = nla_parse_nested(tb, TCA_TAPRIO_ATTR_MAX, opt, - taprio_policy, extack); + err = nla_parse_nested_deprecated(tb, TCA_TAPRIO_ATTR_MAX, opt, + taprio_policy, extack); if (err < 0) return err; - err = -EINVAL; if (tb[TCA_TAPRIO_ATTR_PRIOMAP]) mqprio = nla_data(tb[TCA_TAPRIO_ATTR_PRIOMAP]); @@ -600,13 +731,78 @@ static int taprio_change(struct Qdisc *sch, struct nlattr *opt, if (err < 0) return err; - /* A schedule with less than one entry is an error */ - size = parse_taprio_opt(tb, q, extack); - if (size < 0) - return size; + new_admin = kzalloc(sizeof(*new_admin), GFP_KERNEL); + if (!new_admin) { + NL_SET_ERR_MSG(extack, "Not enough memory for a new schedule"); + return -ENOMEM; + } + INIT_LIST_HEAD(&new_admin->entries); - hrtimer_init(&q->advance_timer, q->clockid, HRTIMER_MODE_ABS); - q->advance_timer.function = advance_sched; + rcu_read_lock(); + oper = rcu_dereference(q->oper_sched); + admin = rcu_dereference(q->admin_sched); + rcu_read_unlock(); + + if (mqprio && (oper || admin)) { + NL_SET_ERR_MSG(extack, "Changing the traffic mapping of a running schedule is not supported"); + err = -ENOTSUPP; + goto free_sched; + } + + err = parse_taprio_schedule(tb, new_admin, extack); + if (err < 0) + goto free_sched; + + if (new_admin->num_entries == 0) { + NL_SET_ERR_MSG(extack, "There should be at least one entry in the schedule"); + err = -EINVAL; + goto free_sched; + } + + if (tb[TCA_TAPRIO_ATTR_SCHED_CLOCKID]) { + clockid = nla_get_s32(tb[TCA_TAPRIO_ATTR_SCHED_CLOCKID]); + + /* We only support static clockids and we don't allow + * for it to be modified after the first init. + */ + if (clockid < 0 || + (q->clockid != -1 && q->clockid != clockid)) { + NL_SET_ERR_MSG(extack, "Changing the 'clockid' of a running schedule is not supported"); + err = -ENOTSUPP; + goto free_sched; + } + + q->clockid = clockid; + } + + if (q->clockid == -1 && !tb[TCA_TAPRIO_ATTR_SCHED_CLOCKID]) { + NL_SET_ERR_MSG(extack, "Specifying a 'clockid' is mandatory"); + err = -EINVAL; + goto free_sched; + } + + taprio_set_picos_per_byte(dev, q); + + /* Protects against enqueue()/dequeue() */ + spin_lock_bh(qdisc_lock(sch)); + + if (!hrtimer_active(&q->advance_timer)) { + hrtimer_init(&q->advance_timer, q->clockid, HRTIMER_MODE_ABS); + q->advance_timer.function = advance_sched; + } + + if (mqprio) { + netdev_set_num_tc(dev, mqprio->num_tc); + for (i = 0; i < mqprio->num_tc; i++) + netdev_set_tc_queue(dev, i, + mqprio->count[i], + mqprio->offset[i]); + + /* Always use supplied priority mappings */ + for (i = 0; i < TC_BITMASK + 1; i++) + netdev_set_prio_tc_map(dev, i, + mqprio->prio_tc_map[i]); + } switch (q->clockid) { case CLOCK_REALTIME: @@ -622,65 +818,52 @@ static int taprio_change(struct Qdisc *sch, struct nlattr *opt, q->get_time = ktime_get_clocktai; break; default: - return -ENOTSUPP; + NL_SET_ERR_MSG(extack, "Invalid 'clockid'"); + err = -EINVAL; + goto unlock; } - for (i = 0; i < dev->num_tx_queues; i++) { - struct netdev_queue *dev_queue; - struct Qdisc *qdisc; - - dev_queue = netdev_get_tx_queue(dev, i); - qdisc = qdisc_create_dflt(dev_queue, - &pfifo_qdisc_ops, - TC_H_MAKE(TC_H_MAJ(sch->handle), - TC_H_MIN(i + 1)), - extack); - if (!qdisc) - return -ENOMEM; + err = taprio_get_start_time(sch, new_admin, &start); + if (err < 0) { + NL_SET_ERR_MSG(extack, "Internal error: failed get start time"); + goto unlock; + } - if (i < dev->real_num_tx_queues) - qdisc_hash_add(qdisc, false); + setup_first_close_time(q, new_admin, start); - q->qdiscs[i] = qdisc; - } + /* Protects against advance_sched() */ + spin_lock_irqsave(&q->current_entry_lock, flags); - if (mqprio) { - netdev_set_num_tc(dev, mqprio->num_tc); - for (i = 0; i < mqprio->num_tc; i++) - netdev_set_tc_queue(dev, i, - mqprio->count[i], - mqprio->offset[i]); + taprio_start_sched(sch, start, new_admin); - /* Always use supplied priority mappings */ - for (i = 0; i < TC_BITMASK + 1; i++) - netdev_set_prio_tc_map(dev, i, - mqprio->prio_tc_map[i]); - } + rcu_assign_pointer(q->admin_sched, new_admin); + if (admin) + call_rcu(&admin->rcu, taprio_free_sched_cb); + new_admin = NULL; - if (!__ethtool_get_link_ksettings(dev, &ecmd)) - link_speed = ecmd.base.speed; - else - link_speed = SPEED_1000; + spin_unlock_irqrestore(&q->current_entry_lock, flags); - q->picos_per_byte = div64_s64(NSEC_PER_SEC * 1000LL * 8, - link_speed * 1000 * 1000); + err = 0; - start = taprio_get_start_time(sch); - if (!start) - return 0; +unlock: + spin_unlock_bh(qdisc_lock(sch)); - taprio_start_sched(sch, start); +free_sched: + kfree(new_admin); - return 0; + return err; } static void taprio_destroy(struct Qdisc *sch) { struct taprio_sched *q = qdisc_priv(sch); struct net_device *dev = qdisc_dev(sch); - struct sched_entry *entry, *n; unsigned int i; + spin_lock(&taprio_list_lock); + list_del(&q->taprio_list); + spin_unlock(&taprio_list_lock); + hrtimer_cancel(&q->advance_timer); if (q->qdiscs) { @@ -693,10 +876,11 @@ static void taprio_destroy(struct Qdisc *sch) netdev_set_num_tc(dev, 0); - list_for_each_entry_safe(entry, n, &q->entries, list) { - list_del(&entry->list); - kfree(entry); - } + if (q->oper_sched) + call_rcu(&q->oper_sched->rcu, taprio_free_sched_cb); + + if (q->admin_sched) + call_rcu(&q->admin_sched->rcu, taprio_free_sched_cb); } static int taprio_init(struct Qdisc *sch, struct nlattr *opt, @@ -704,12 +888,12 @@ static int taprio_init(struct Qdisc *sch, struct nlattr *opt, { struct taprio_sched *q = qdisc_priv(sch); struct net_device *dev = qdisc_dev(sch); + int i; - INIT_LIST_HEAD(&q->entries); spin_lock_init(&q->current_entry_lock); - /* We may overwrite the configuration later */ hrtimer_init(&q->advance_timer, CLOCK_TAI, HRTIMER_MODE_ABS); + q->advance_timer.function = advance_sched; q->root = sch; @@ -735,6 +919,29 @@ static int taprio_init(struct Qdisc *sch, struct nlattr *opt, if (!opt) return -EINVAL; + spin_lock(&taprio_list_lock); + list_add(&q->taprio_list, &taprio_list); + spin_unlock(&taprio_list_lock); + + for (i = 0; i < dev->num_tx_queues; i++) { + struct netdev_queue *dev_queue; + struct Qdisc *qdisc; + + dev_queue = netdev_get_tx_queue(dev, i); + qdisc = qdisc_create_dflt(dev_queue, + &pfifo_qdisc_ops, + TC_H_MAKE(TC_H_MAJ(sch->handle), + TC_H_MIN(i + 1)), + extack); + if (!qdisc) + return -ENOMEM; + + if (i < dev->real_num_tx_queues) + qdisc_hash_add(qdisc, false); + + q->qdiscs[i] = qdisc; + } + return taprio_change(sch, opt, extack); } @@ -781,7 +988,7 @@ static int dump_entry(struct sk_buff *msg, { struct nlattr *item; - item = nla_nest_start(msg, TCA_TAPRIO_SCHED_ENTRY); + item = nla_nest_start_noflag(msg, TCA_TAPRIO_SCHED_ENTRY); if (!item) return -ENOSPC; @@ -806,15 +1013,55 @@ nla_put_failure: return -1; } +static int dump_schedule(struct sk_buff *msg, + const struct sched_gate_list *root) +{ + struct nlattr *entry_list; + struct sched_entry *entry; + + if (nla_put_s64(msg, TCA_TAPRIO_ATTR_SCHED_BASE_TIME, + root->base_time, TCA_TAPRIO_PAD)) + return -1; + + if (nla_put_s64(msg, TCA_TAPRIO_ATTR_SCHED_CYCLE_TIME, + root->cycle_time, TCA_TAPRIO_PAD)) + return -1; + + if (nla_put_s64(msg, TCA_TAPRIO_ATTR_SCHED_CYCLE_TIME_EXTENSION, + root->cycle_time_extension, TCA_TAPRIO_PAD)) + return -1; + + entry_list = nla_nest_start_noflag(msg, + TCA_TAPRIO_ATTR_SCHED_ENTRY_LIST); + if (!entry_list) + goto error_nest; + + list_for_each_entry(entry, &root->entries, list) { + if (dump_entry(msg, entry) < 0) + goto error_nest; + } + + nla_nest_end(msg, entry_list); + return 0; + +error_nest: + nla_nest_cancel(msg, entry_list); + return -1; +} + static int taprio_dump(struct Qdisc *sch, struct sk_buff *skb) { struct taprio_sched *q = qdisc_priv(sch); struct net_device *dev = qdisc_dev(sch); + struct sched_gate_list *oper, *admin; struct tc_mqprio_qopt opt = { 0 }; - struct nlattr *nest, *entry_list; - struct sched_entry *entry; + struct nlattr *nest, *sched_nest; unsigned int i; + rcu_read_lock(); + oper = rcu_dereference(q->oper_sched); + admin = rcu_dereference(q->admin_sched); + opt.num_tc = netdev_get_num_tc(dev); memcpy(opt.prio_tc_map, dev->prio_tc_map, sizeof(opt.prio_tc_map)); @@ -823,36 +1070,45 @@ static int taprio_dump(struct Qdisc *sch, struct sk_buff *skb) opt.offset[i] = dev->tc_to_txq[i].offset; } - nest = nla_nest_start(skb, TCA_OPTIONS); + nest = nla_nest_start_noflag(skb, TCA_OPTIONS); if (!nest) - return -ENOSPC; + goto start_error; if (nla_put(skb, TCA_TAPRIO_ATTR_PRIOMAP, sizeof(opt), &opt)) goto options_error; - if (nla_put_s64(skb, TCA_TAPRIO_ATTR_SCHED_BASE_TIME, - q->base_time, TCA_TAPRIO_PAD)) + if (nla_put_s32(skb, TCA_TAPRIO_ATTR_SCHED_CLOCKID, q->clockid)) goto options_error; - if (nla_put_s32(skb, TCA_TAPRIO_ATTR_SCHED_CLOCKID, q->clockid)) + if (oper && dump_schedule(skb, oper)) goto options_error; - entry_list = nla_nest_start(skb, TCA_TAPRIO_ATTR_SCHED_ENTRY_LIST); - if (!entry_list) + if (!admin) + goto done; + + sched_nest = nla_nest_start_noflag(skb, TCA_TAPRIO_ATTR_ADMIN_SCHED); + if (!sched_nest) goto options_error; - list_for_each_entry(entry, &q->entries, list) { - if (dump_entry(skb, entry) < 0) - goto options_error; - } + if (dump_schedule(skb, admin)) + goto admin_error; + + nla_nest_end(skb, sched_nest); - nla_nest_end(skb, entry_list); +done: + rcu_read_unlock(); return nla_nest_end(skb, nest); +admin_error: + nla_nest_cancel(skb, sched_nest); + options_error: nla_nest_cancel(skb, nest); - return -1; + +start_error: + rcu_read_unlock(); + return -ENOSPC; } static struct Qdisc *taprio_leaf(struct Qdisc *sch, unsigned long cl) @@ -895,7 +1151,7 @@ static int taprio_dump_class_stats(struct Qdisc *sch, unsigned long cl, sch = dev_queue->qdisc_sleeping; if (gnet_stats_copy_basic(&sch->running, d, NULL, &sch->bstats) < 0 || - gnet_stats_copy_queue(d, NULL, &sch->qstats, sch->q.qlen) < 0) + qdisc_qstats_copy(d, sch) < 0) return -1; return 0; } @@ -939,6 +1195,7 @@ static struct Qdisc_ops taprio_qdisc_ops __read_mostly = { .id = "taprio", .priv_size = sizeof(struct taprio_sched), .init = taprio_init, + .change = taprio_change, .destroy = taprio_destroy, .peek = taprio_peek, .dequeue = taprio_dequeue, @@ -947,14 +1204,24 @@ static struct Qdisc_ops taprio_qdisc_ops __read_mostly = { .owner = THIS_MODULE, }; +static struct notifier_block taprio_device_notifier = { + .notifier_call = taprio_dev_notifier, +}; + static int __init taprio_module_init(void) { + int err = register_netdevice_notifier(&taprio_device_notifier); + + if (err) + return err; + return register_qdisc(&taprio_qdisc_ops); } static void __exit taprio_module_exit(void) { unregister_qdisc(&taprio_qdisc_ops); + unregister_netdevice_notifier(&taprio_device_notifier); } module_init(taprio_module_init); diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c index 7f272a9070c5..5f72f3f916a5 100644 --- a/net/sched/sch_tbf.c +++ b/net/sched/sch_tbf.c @@ -1,15 +1,10 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* * net/sched/sch_tbf.c Token Bucket Filter queue. * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - * * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> * Dmitry Torokhov <dtor@mail.ru> - allow attaching inner qdiscs - * original idea by Martin Devera - * */ #include <linux/module.h> @@ -308,7 +303,8 @@ static int tbf_change(struct Qdisc *sch, struct nlattr *opt, s64 buffer, mtu; u64 rate64 = 0, prate64 = 0; - err = nla_parse_nested(tb, TCA_TBF_MAX, opt, tbf_policy, NULL); + err = nla_parse_nested_deprecated(tb, TCA_TBF_MAX, opt, tbf_policy, + NULL); if (err < 0) return err; @@ -391,8 +387,7 @@ static int tbf_change(struct Qdisc *sch, struct nlattr *opt, sch_tree_lock(sch); if (child) { - qdisc_tree_reduce_backlog(q->qdisc, q->qdisc->q.qlen, - q->qdisc->qstats.backlog); + qdisc_tree_flush_backlog(q->qdisc); qdisc_put(q->qdisc); q->qdisc = child; } @@ -449,7 +444,7 @@ static int tbf_dump(struct Qdisc *sch, struct sk_buff *skb) struct tc_tbf_qopt opt; sch->qstats.backlog = q->qdisc->qstats.backlog; - nest = nla_nest_start(skb, TCA_OPTIONS); + nest = nla_nest_start_noflag(skb, TCA_OPTIONS); if (nest == NULL) goto nla_put_failure; diff --git a/net/sched/sch_teql.c b/net/sched/sch_teql.c index 93f04cf5cac1..689ef6f3ded8 100644 --- a/net/sched/sch_teql.c +++ b/net/sched/sch_teql.c @@ -1,10 +1,6 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* net/sched/sch_teql.c "True" (or "trivial") link equalizer. * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - * * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> */ |