96518518cc
This patch adds nftables which is the intended successor of iptables. This packet filtering framework reuses the existing netfilter hooks, the connection tracking system, the NAT subsystem, the transparent proxying engine, the logging infrastructure and the userspace packet queueing facilities. In a nutshell, nftables provides a pseudo-state machine with 4 general purpose registers of 128 bits and 1 specific purpose register to store verdicts. This pseudo-machine comes with an extensible instruction set, a.k.a. "expressions" in the nftables jargon. The expressions included in this patch provide the basic functionality, they are: * bitwise: to perform bitwise operations. * byteorder: to change from host/network endianess. * cmp: to compare data with the content of the registers. * counter: to enable counters on rules. * ct: to store conntrack keys into register. * exthdr: to match IPv6 extension headers. * immediate: to load data into registers. * limit: to limit matching based on packet rate. * log: to log packets. * meta: to match metainformation that usually comes with the skbuff. * nat: to perform Network Address Translation. * payload: to fetch data from the packet payload and store it into registers. * reject (IPv4 only): to explicitly close connection, eg. TCP RST. Using this instruction-set, the userspace utility 'nft' can transform the rules expressed in human-readable text representation (using a new syntax, inspired by tcpdump) to nftables bytecode. nftables also inherits the table, chain and rule objects from iptables, but in a more configurable way, and it also includes the original datatype-agnostic set infrastructure with mapping support. This set infrastructure is enhanced in the follow up patch (netfilter: nf_tables: add netlink set API). This patch includes the following components: * the netlink API: net/netfilter/nf_tables_api.c and include/uapi/netfilter/nf_tables.h * the packet filter core: net/netfilter/nf_tables_core.c * the expressions (described above): net/netfilter/nft_*.c * the filter tables: arp, IPv4, IPv6 and bridge: net/ipv4/netfilter/nf_tables_ipv4.c net/ipv6/netfilter/nf_tables_ipv6.c net/ipv4/netfilter/nf_tables_arp.c net/bridge/netfilter/nf_tables_bridge.c * the NAT table (IPv4 only): net/ipv4/netfilter/nf_table_nat_ipv4.c * the route table (similar to mangle): net/ipv4/netfilter/nf_table_route_ipv4.c net/ipv6/netfilter/nf_table_route_ipv6.c * internal definitions under: include/net/netfilter/nf_tables.h include/net/netfilter/nf_tables_core.h * It also includes an skeleton expression: net/netfilter/nft_expr_template.c and the preliminary implementation of the meta target net/netfilter/nft_meta_target.c It also includes a change in struct nf_hook_ops to add a new pointer to store private data to the hook, that is used to store the rule list per chain. This patch is based on the patch from Patrick McHardy, plus merged accumulated cleanups, fixes and small enhancements to the nftables code that has been done since 2009, which are: From Patrick McHardy: * nf_tables: adjust netlink handler function signatures * nf_tables: only retry table lookup after successful table module load * nf_tables: fix event notification echo and avoid unnecessary messages * nft_ct: add l3proto support * nf_tables: pass expression context to nft_validate_data_load() * nf_tables: remove redundant definition * nft_ct: fix maxattr initialization * nf_tables: fix invalid event type in nf_tables_getrule() * nf_tables: simplify nft_data_init() usage * nf_tables: build in more core modules * nf_tables: fix double lookup expression unregistation * nf_tables: move expression initialization to nf_tables_core.c * nf_tables: build in payload module * nf_tables: use NFPROTO constants * nf_tables: rename pid variables to portid * nf_tables: save 48 bits per rule * nf_tables: introduce chain rename * nf_tables: check for duplicate names on chain rename * nf_tables: remove ability to specify handles for new rules * nf_tables: return error for rule change request * nf_tables: return error for NLM_F_REPLACE without rule handle * nf_tables: include NLM_F_APPEND/NLM_F_REPLACE flags in rule notification * nf_tables: fix NLM_F_MULTI usage in netlink notifications * nf_tables: include NLM_F_APPEND in rule dumps From Pablo Neira Ayuso: * nf_tables: fix stack overflow in nf_tables_newrule * nf_tables: nft_ct: fix compilation warning * nf_tables: nft_ct: fix crash with invalid packets * nft_log: group and qthreshold are 2^16 * nf_tables: nft_meta: fix socket uid,gid handling * nft_counter: allow to restore counters * nf_tables: fix module autoload * nf_tables: allow to remove all rules placed in one chain * nf_tables: use 64-bits rule handle instead of 16-bits * nf_tables: fix chain after rule deletion * nf_tables: improve deletion performance * nf_tables: add missing code in route chain type * nf_tables: rise maximum number of expressions from 12 to 128 * nf_tables: don't delete table if in use * nf_tables: fix basechain release From Tomasz Bursztyka: * nf_tables: Add support for changing users chain's name * nf_tables: Change chain's name to be fixed sized * nf_tables: Add support for replacing a rule by another one * nf_tables: Update uapi nftables netlink header documentation From Florian Westphal: * nft_log: group is u16, snaplen u32 From Phil Oester: * nf_tables: operational limit match Signed-off-by: Patrick McHardy <kaber@trash.net> Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
341 lines
9.8 KiB
C
341 lines
9.8 KiB
C
#ifndef __LINUX_NETFILTER_H
|
|
#define __LINUX_NETFILTER_H
|
|
|
|
#include <linux/init.h>
|
|
#include <linux/skbuff.h>
|
|
#include <linux/net.h>
|
|
#include <linux/if.h>
|
|
#include <linux/in.h>
|
|
#include <linux/in6.h>
|
|
#include <linux/wait.h>
|
|
#include <linux/list.h>
|
|
#include <uapi/linux/netfilter.h>
|
|
#ifdef CONFIG_NETFILTER
|
|
static inline int NF_DROP_GETERR(int verdict)
|
|
{
|
|
return -(verdict >> NF_VERDICT_QBITS);
|
|
}
|
|
|
|
static inline int nf_inet_addr_cmp(const union nf_inet_addr *a1,
|
|
const union nf_inet_addr *a2)
|
|
{
|
|
return a1->all[0] == a2->all[0] &&
|
|
a1->all[1] == a2->all[1] &&
|
|
a1->all[2] == a2->all[2] &&
|
|
a1->all[3] == a2->all[3];
|
|
}
|
|
|
|
static inline void nf_inet_addr_mask(const union nf_inet_addr *a1,
|
|
union nf_inet_addr *result,
|
|
const union nf_inet_addr *mask)
|
|
{
|
|
result->all[0] = a1->all[0] & mask->all[0];
|
|
result->all[1] = a1->all[1] & mask->all[1];
|
|
result->all[2] = a1->all[2] & mask->all[2];
|
|
result->all[3] = a1->all[3] & mask->all[3];
|
|
}
|
|
|
|
int netfilter_init(void);
|
|
|
|
/* Largest hook number + 1 */
|
|
#define NF_MAX_HOOKS 8
|
|
|
|
struct sk_buff;
|
|
|
|
struct nf_hook_ops;
|
|
typedef unsigned int nf_hookfn(const struct nf_hook_ops *ops,
|
|
struct sk_buff *skb,
|
|
const struct net_device *in,
|
|
const struct net_device *out,
|
|
int (*okfn)(struct sk_buff *));
|
|
|
|
struct nf_hook_ops {
|
|
struct list_head list;
|
|
|
|
/* User fills in from here down. */
|
|
nf_hookfn *hook;
|
|
struct module *owner;
|
|
void *priv;
|
|
u_int8_t pf;
|
|
unsigned int hooknum;
|
|
/* Hooks are ordered in ascending priority. */
|
|
int priority;
|
|
};
|
|
|
|
struct nf_sockopt_ops {
|
|
struct list_head list;
|
|
|
|
u_int8_t pf;
|
|
|
|
/* Non-inclusive ranges: use 0/0/NULL to never get called. */
|
|
int set_optmin;
|
|
int set_optmax;
|
|
int (*set)(struct sock *sk, int optval, void __user *user, unsigned int len);
|
|
#ifdef CONFIG_COMPAT
|
|
int (*compat_set)(struct sock *sk, int optval,
|
|
void __user *user, unsigned int len);
|
|
#endif
|
|
int get_optmin;
|
|
int get_optmax;
|
|
int (*get)(struct sock *sk, int optval, void __user *user, int *len);
|
|
#ifdef CONFIG_COMPAT
|
|
int (*compat_get)(struct sock *sk, int optval,
|
|
void __user *user, int *len);
|
|
#endif
|
|
/* Use the module struct to lock set/get code in place */
|
|
struct module *owner;
|
|
};
|
|
|
|
/* Function to register/unregister hook points. */
|
|
int nf_register_hook(struct nf_hook_ops *reg);
|
|
void nf_unregister_hook(struct nf_hook_ops *reg);
|
|
int nf_register_hooks(struct nf_hook_ops *reg, unsigned int n);
|
|
void nf_unregister_hooks(struct nf_hook_ops *reg, unsigned int n);
|
|
|
|
/* Functions to register get/setsockopt ranges (non-inclusive). You
|
|
need to check permissions yourself! */
|
|
int nf_register_sockopt(struct nf_sockopt_ops *reg);
|
|
void nf_unregister_sockopt(struct nf_sockopt_ops *reg);
|
|
|
|
extern struct list_head nf_hooks[NFPROTO_NUMPROTO][NF_MAX_HOOKS];
|
|
|
|
#if defined(CONFIG_JUMP_LABEL)
|
|
#include <linux/static_key.h>
|
|
extern struct static_key nf_hooks_needed[NFPROTO_NUMPROTO][NF_MAX_HOOKS];
|
|
static inline bool nf_hooks_active(u_int8_t pf, unsigned int hook)
|
|
{
|
|
if (__builtin_constant_p(pf) &&
|
|
__builtin_constant_p(hook))
|
|
return static_key_false(&nf_hooks_needed[pf][hook]);
|
|
|
|
return !list_empty(&nf_hooks[pf][hook]);
|
|
}
|
|
#else
|
|
static inline bool nf_hooks_active(u_int8_t pf, unsigned int hook)
|
|
{
|
|
return !list_empty(&nf_hooks[pf][hook]);
|
|
}
|
|
#endif
|
|
|
|
int nf_hook_slow(u_int8_t pf, unsigned int hook, struct sk_buff *skb,
|
|
struct net_device *indev, struct net_device *outdev,
|
|
int (*okfn)(struct sk_buff *), int thresh);
|
|
|
|
/**
|
|
* nf_hook_thresh - call a netfilter hook
|
|
*
|
|
* Returns 1 if the hook has allowed the packet to pass. The function
|
|
* okfn must be invoked by the caller in this case. Any other return
|
|
* value indicates the packet has been consumed by the hook.
|
|
*/
|
|
static inline int nf_hook_thresh(u_int8_t pf, unsigned int hook,
|
|
struct sk_buff *skb,
|
|
struct net_device *indev,
|
|
struct net_device *outdev,
|
|
int (*okfn)(struct sk_buff *), int thresh)
|
|
{
|
|
if (nf_hooks_active(pf, hook))
|
|
return nf_hook_slow(pf, hook, skb, indev, outdev, okfn, thresh);
|
|
return 1;
|
|
}
|
|
|
|
static inline int nf_hook(u_int8_t pf, unsigned int hook, struct sk_buff *skb,
|
|
struct net_device *indev, struct net_device *outdev,
|
|
int (*okfn)(struct sk_buff *))
|
|
{
|
|
return nf_hook_thresh(pf, hook, skb, indev, outdev, okfn, INT_MIN);
|
|
}
|
|
|
|
/* Activate hook; either okfn or kfree_skb called, unless a hook
|
|
returns NF_STOLEN (in which case, it's up to the hook to deal with
|
|
the consequences).
|
|
|
|
Returns -ERRNO if packet dropped. Zero means queued, stolen or
|
|
accepted.
|
|
*/
|
|
|
|
/* RR:
|
|
> I don't want nf_hook to return anything because people might forget
|
|
> about async and trust the return value to mean "packet was ok".
|
|
|
|
AK:
|
|
Just document it clearly, then you can expect some sense from kernel
|
|
coders :)
|
|
*/
|
|
|
|
static inline int
|
|
NF_HOOK_THRESH(uint8_t pf, unsigned int hook, struct sk_buff *skb,
|
|
struct net_device *in, struct net_device *out,
|
|
int (*okfn)(struct sk_buff *), int thresh)
|
|
{
|
|
int ret = nf_hook_thresh(pf, hook, skb, in, out, okfn, thresh);
|
|
if (ret == 1)
|
|
ret = okfn(skb);
|
|
return ret;
|
|
}
|
|
|
|
static inline int
|
|
NF_HOOK_COND(uint8_t pf, unsigned int hook, struct sk_buff *skb,
|
|
struct net_device *in, struct net_device *out,
|
|
int (*okfn)(struct sk_buff *), bool cond)
|
|
{
|
|
int ret;
|
|
|
|
if (!cond ||
|
|
((ret = nf_hook_thresh(pf, hook, skb, in, out, okfn, INT_MIN)) == 1))
|
|
ret = okfn(skb);
|
|
return ret;
|
|
}
|
|
|
|
static inline int
|
|
NF_HOOK(uint8_t pf, unsigned int hook, struct sk_buff *skb,
|
|
struct net_device *in, struct net_device *out,
|
|
int (*okfn)(struct sk_buff *))
|
|
{
|
|
return NF_HOOK_THRESH(pf, hook, skb, in, out, okfn, INT_MIN);
|
|
}
|
|
|
|
/* Call setsockopt() */
|
|
int nf_setsockopt(struct sock *sk, u_int8_t pf, int optval, char __user *opt,
|
|
unsigned int len);
|
|
int nf_getsockopt(struct sock *sk, u_int8_t pf, int optval, char __user *opt,
|
|
int *len);
|
|
#ifdef CONFIG_COMPAT
|
|
int compat_nf_setsockopt(struct sock *sk, u_int8_t pf, int optval,
|
|
char __user *opt, unsigned int len);
|
|
int compat_nf_getsockopt(struct sock *sk, u_int8_t pf, int optval,
|
|
char __user *opt, int *len);
|
|
#endif
|
|
|
|
/* Call this before modifying an existing packet: ensures it is
|
|
modifiable and linear to the point you care about (writable_len).
|
|
Returns true or false. */
|
|
int skb_make_writable(struct sk_buff *skb, unsigned int writable_len);
|
|
|
|
struct flowi;
|
|
struct nf_queue_entry;
|
|
|
|
struct nf_afinfo {
|
|
unsigned short family;
|
|
__sum16 (*checksum)(struct sk_buff *skb, unsigned int hook,
|
|
unsigned int dataoff, u_int8_t protocol);
|
|
__sum16 (*checksum_partial)(struct sk_buff *skb,
|
|
unsigned int hook,
|
|
unsigned int dataoff,
|
|
unsigned int len,
|
|
u_int8_t protocol);
|
|
int (*route)(struct net *net, struct dst_entry **dst,
|
|
struct flowi *fl, bool strict);
|
|
void (*saveroute)(const struct sk_buff *skb,
|
|
struct nf_queue_entry *entry);
|
|
int (*reroute)(struct sk_buff *skb,
|
|
const struct nf_queue_entry *entry);
|
|
int route_key_size;
|
|
};
|
|
|
|
extern const struct nf_afinfo __rcu *nf_afinfo[NFPROTO_NUMPROTO];
|
|
static inline const struct nf_afinfo *nf_get_afinfo(unsigned short family)
|
|
{
|
|
return rcu_dereference(nf_afinfo[family]);
|
|
}
|
|
|
|
static inline __sum16
|
|
nf_checksum(struct sk_buff *skb, unsigned int hook, unsigned int dataoff,
|
|
u_int8_t protocol, unsigned short family)
|
|
{
|
|
const struct nf_afinfo *afinfo;
|
|
__sum16 csum = 0;
|
|
|
|
rcu_read_lock();
|
|
afinfo = nf_get_afinfo(family);
|
|
if (afinfo)
|
|
csum = afinfo->checksum(skb, hook, dataoff, protocol);
|
|
rcu_read_unlock();
|
|
return csum;
|
|
}
|
|
|
|
static inline __sum16
|
|
nf_checksum_partial(struct sk_buff *skb, unsigned int hook,
|
|
unsigned int dataoff, unsigned int len,
|
|
u_int8_t protocol, unsigned short family)
|
|
{
|
|
const struct nf_afinfo *afinfo;
|
|
__sum16 csum = 0;
|
|
|
|
rcu_read_lock();
|
|
afinfo = nf_get_afinfo(family);
|
|
if (afinfo)
|
|
csum = afinfo->checksum_partial(skb, hook, dataoff, len,
|
|
protocol);
|
|
rcu_read_unlock();
|
|
return csum;
|
|
}
|
|
|
|
int nf_register_afinfo(const struct nf_afinfo *afinfo);
|
|
void nf_unregister_afinfo(const struct nf_afinfo *afinfo);
|
|
|
|
#include <net/flow.h>
|
|
extern void (*nf_nat_decode_session_hook)(struct sk_buff *, struct flowi *);
|
|
|
|
static inline void
|
|
nf_nat_decode_session(struct sk_buff *skb, struct flowi *fl, u_int8_t family)
|
|
{
|
|
#ifdef CONFIG_NF_NAT_NEEDED
|
|
void (*decodefn)(struct sk_buff *, struct flowi *);
|
|
|
|
rcu_read_lock();
|
|
decodefn = rcu_dereference(nf_nat_decode_session_hook);
|
|
if (decodefn)
|
|
decodefn(skb, fl);
|
|
rcu_read_unlock();
|
|
#endif
|
|
}
|
|
|
|
#else /* !CONFIG_NETFILTER */
|
|
#define NF_HOOK(pf, hook, skb, indev, outdev, okfn) (okfn)(skb)
|
|
#define NF_HOOK_COND(pf, hook, skb, indev, outdev, okfn, cond) (okfn)(skb)
|
|
static inline int nf_hook_thresh(u_int8_t pf, unsigned int hook,
|
|
struct sk_buff *skb,
|
|
struct net_device *indev,
|
|
struct net_device *outdev,
|
|
int (*okfn)(struct sk_buff *), int thresh)
|
|
{
|
|
return okfn(skb);
|
|
}
|
|
static inline int nf_hook(u_int8_t pf, unsigned int hook, struct sk_buff *skb,
|
|
struct net_device *indev, struct net_device *outdev,
|
|
int (*okfn)(struct sk_buff *))
|
|
{
|
|
return 1;
|
|
}
|
|
struct flowi;
|
|
static inline void
|
|
nf_nat_decode_session(struct sk_buff *skb, struct flowi *fl, u_int8_t family)
|
|
{
|
|
}
|
|
#endif /*CONFIG_NETFILTER*/
|
|
|
|
#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
|
|
extern void (*ip_ct_attach)(struct sk_buff *, const struct sk_buff *) __rcu;
|
|
void nf_ct_attach(struct sk_buff *, const struct sk_buff *);
|
|
extern void (*nf_ct_destroy)(struct nf_conntrack *) __rcu;
|
|
|
|
struct nf_conn;
|
|
enum ip_conntrack_info;
|
|
struct nlattr;
|
|
|
|
struct nfq_ct_hook {
|
|
size_t (*build_size)(const struct nf_conn *ct);
|
|
int (*build)(struct sk_buff *skb, struct nf_conn *ct);
|
|
int (*parse)(const struct nlattr *attr, struct nf_conn *ct);
|
|
int (*attach_expect)(const struct nlattr *attr, struct nf_conn *ct,
|
|
u32 portid, u32 report);
|
|
void (*seq_adjust)(struct sk_buff *skb, struct nf_conn *ct,
|
|
enum ip_conntrack_info ctinfo, s32 off);
|
|
};
|
|
extern struct nfq_ct_hook __rcu *nfq_ct_hook;
|
|
#else
|
|
static inline void nf_ct_attach(struct sk_buff *new, struct sk_buff *skb) {}
|
|
#endif
|
|
|
|
#endif /*__LINUX_NETFILTER_H*/
|