diff options
Diffstat (limited to 'net')
223 files changed, 5681 insertions, 3898 deletions
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c index be737539f34d..e34ea9e5e28b 100644 --- a/net/8021q/vlan_dev.c +++ b/net/8021q/vlan_dev.c @@ -625,6 +625,19 @@ static int vlan_dev_fcoe_get_wwn(struct net_device *dev, u64 *wwn, int type) rc = ops->ndo_fcoe_get_wwn(real_dev, wwn, type); return rc; } + +static int vlan_dev_fcoe_ddp_target(struct net_device *dev, u16 xid, + struct scatterlist *sgl, unsigned int sgc) +{ + struct net_device *real_dev = vlan_dev_info(dev)->real_dev; + const struct net_device_ops *ops = real_dev->netdev_ops; + int rc = 0; + + if (ops->ndo_fcoe_ddp_target) + rc = ops->ndo_fcoe_ddp_target(real_dev, xid, sgl, sgc); + + return rc; +} #endif static void vlan_dev_change_rx_flags(struct net_device *dev, int change) @@ -707,6 +720,7 @@ static int vlan_dev_init(struct net_device *dev) dev->fcoe_ddp_xid = real_dev->fcoe_ddp_xid; #endif + dev->needed_headroom = real_dev->needed_headroom; if (real_dev->features & NETIF_F_HW_VLAN_TX) { dev->header_ops = real_dev->header_ops; dev->hard_header_len = real_dev->hard_header_len; @@ -858,6 +872,7 @@ static const struct net_device_ops vlan_netdev_ops = { .ndo_fcoe_enable = vlan_dev_fcoe_enable, .ndo_fcoe_disable = vlan_dev_fcoe_disable, .ndo_fcoe_get_wwn = vlan_dev_fcoe_get_wwn, + .ndo_fcoe_ddp_target = vlan_dev_fcoe_ddp_target, #endif }; diff --git a/net/9p/Makefile b/net/9p/Makefile index 198a640d53a6..a0874cc1f718 100644 --- a/net/9p/Makefile +++ b/net/9p/Makefile @@ -9,6 +9,7 @@ obj-$(CONFIG_NET_9P_RDMA) += 9pnet_rdma.o util.o \ protocol.o \ trans_fd.o \ + trans_common.o \ 9pnet_virtio-objs := \ trans_virtio.o \ diff --git a/net/9p/client.c b/net/9p/client.c index a848bca9fbff..347ec0cd2718 100644 --- a/net/9p/client.c +++ b/net/9p/client.c @@ -229,10 +229,23 @@ static struct p9_req_t *p9_tag_alloc(struct p9_client *c, u16 tag) return ERR_PTR(-ENOMEM); } init_waitqueue_head(req->wq); - req->tc = kmalloc(sizeof(struct p9_fcall)+c->msize, - GFP_KERNEL); - req->rc = kmalloc(sizeof(struct p9_fcall)+c->msize, - GFP_KERNEL); + if ((c->trans_mod->pref & P9_TRANS_PREF_PAYLOAD_MASK) == + P9_TRANS_PREF_PAYLOAD_SEP) { + int alloc_msize = min(c->msize, 4096); + req->tc = kmalloc(sizeof(struct p9_fcall)+alloc_msize, + GFP_KERNEL); + req->tc->capacity = alloc_msize; + req->rc = kmalloc(sizeof(struct p9_fcall)+alloc_msize, + GFP_KERNEL); + req->rc->capacity = alloc_msize; + } else { + req->tc = kmalloc(sizeof(struct p9_fcall)+c->msize, + GFP_KERNEL); + req->tc->capacity = c->msize; + req->rc = kmalloc(sizeof(struct p9_fcall)+c->msize, + GFP_KERNEL); + req->rc->capacity = c->msize; + } if ((!req->tc) || (!req->rc)) { printk(KERN_ERR "Couldn't grow tag array\n"); kfree(req->tc); @@ -243,9 +256,7 @@ static struct p9_req_t *p9_tag_alloc(struct p9_client *c, u16 tag) return ERR_PTR(-ENOMEM); } req->tc->sdata = (char *) req->tc + sizeof(struct p9_fcall); - req->tc->capacity = c->msize; req->rc->sdata = (char *) req->rc + sizeof(struct p9_fcall); - req->rc->capacity = c->msize; } p9pdu_reset(req->tc); @@ -443,6 +454,7 @@ static int p9_check_errors(struct p9_client *c, struct p9_req_t *req) { int8_t type; int err; + int ecode; err = p9_parse_header(req->rc, NULL, &type, NULL, 0); if (err) { @@ -450,36 +462,53 @@ static int p9_check_errors(struct p9_client *c, struct p9_req_t *req) return err; } - if (type == P9_RERROR || type == P9_RLERROR) { - int ecode; - - if (!p9_is_proto_dotl(c)) { - char *ename; + if (type != P9_RERROR && type != P9_RLERROR) + return 0; - err = p9pdu_readf(req->rc, c->proto_version, "s?d", - &ename, &ecode); - if (err) - goto out_err; + if (!p9_is_proto_dotl(c)) { + char *ename; + + if (req->tc->pbuf_size) { + /* Handle user buffers */ + size_t len = req->rc->size - req->rc->offset; + if (req->tc->pubuf) { + /* User Buffer */ + err = copy_from_user( + &req->rc->sdata[req->rc->offset], + req->tc->pubuf, len); + if (err) { + err = -EFAULT; + goto out_err; + } + } else { + /* Kernel Buffer */ + memmove(&req->rc->sdata[req->rc->offset], + req->tc->pkbuf, len); + } + } + err = p9pdu_readf(req->rc, c->proto_version, "s?d", + &ename, &ecode); + if (err) + goto out_err; - if (p9_is_proto_dotu(c)) - err = -ecode; + if (p9_is_proto_dotu(c)) + err = -ecode; - if (!err || !IS_ERR_VALUE(err)) { - err = p9_errstr2errno(ename, strlen(ename)); + if (!err || !IS_ERR_VALUE(err)) { + err = p9_errstr2errno(ename, strlen(ename)); - P9_DPRINTK(P9_DEBUG_9P, "<<< RERROR (%d) %s\n", -ecode, ename); + P9_DPRINTK(P9_DEBUG_9P, "<<< RERROR (%d) %s\n", -ecode, + ename); - kfree(ename); - } - } else { - err = p9pdu_readf(req->rc, c->proto_version, "d", &ecode); - err = -ecode; - - P9_DPRINTK(P9_DEBUG_9P, "<<< RLERROR (%d)\n", -ecode); + kfree(ename); } + } else { + err = p9pdu_readf(req->rc, c->proto_version, "d", &ecode); + err = -ecode; + + P9_DPRINTK(P9_DEBUG_9P, "<<< RLERROR (%d)\n", -ecode); + } - } else - err = 0; return err; @@ -1191,6 +1220,27 @@ error: } EXPORT_SYMBOL(p9_client_fsync); +int p9_client_sync_fs(struct p9_fid *fid) +{ + int err = 0; + struct p9_req_t *req; + struct p9_client *clnt; + + P9_DPRINTK(P9_DEBUG_9P, ">>> TSYNC_FS fid %d\n", fid->fid); + + clnt = fid->clnt; + req = p9_client_rpc(clnt, P9_TSYNCFS, "d", fid->fid); + if (IS_ERR(req)) { + err = PTR_ERR(req); + goto error; + } + P9_DPRINTK(P9_DEBUG_9P, "<<< RSYNCFS fid %d\n", fid->fid); + p9_free_req(clnt, req); +error: + return err; +} +EXPORT_SYMBOL(p9_client_sync_fs); + int p9_client_clunk(struct p9_fid *fid) { int err; @@ -1270,7 +1320,15 @@ p9_client_read(struct p9_fid *fid, char *data, char __user *udata, u64 offset, if (count < rsize) rsize = count; - req = p9_client_rpc(clnt, P9_TREAD, "dqd", fid->fid, offset, rsize); + /* Don't bother zerocopy form small IO (< 1024) */ + if (((clnt->trans_mod->pref & P9_TRANS_PREF_PAYLOAD_MASK) == + P9_TRANS_PREF_PAYLOAD_SEP) && (rsize > 1024)) { + req = p9_client_rpc(clnt, P9_TREAD, "dqE", fid->fid, offset, + rsize, data, udata); + } else { + req = p9_client_rpc(clnt, P9_TREAD, "dqd", fid->fid, offset, + rsize); + } if (IS_ERR(req)) { err = PTR_ERR(req); goto error; @@ -1284,13 +1342,15 @@ p9_client_read(struct p9_fid *fid, char *data, char __user *udata, u64 offset, P9_DPRINTK(P9_DEBUG_9P, "<<< RREAD count %d\n", count); - if (data) { - memmove(data, dataptr, count); - } else { - err = copy_to_user(udata, dataptr, count); - if (err) { - err = -EFAULT; - goto free_and_error; + if (!req->tc->pbuf_size) { + if (data) { + memmove(data, dataptr, count); + } else { + err = copy_to_user(udata, dataptr, count); + if (err) { + err = -EFAULT; + goto free_and_error; + } } } p9_free_req(clnt, req); @@ -1323,12 +1383,21 @@ p9_client_write(struct p9_fid *fid, char *data, const char __user *udata, if (count < rsize) rsize = count; - if (data) - req = p9_client_rpc(clnt, P9_TWRITE, "dqD", fid->fid, offset, - rsize, data); - else - req = p9_client_rpc(clnt, P9_TWRITE, "dqU", fid->fid, offset, - rsize, udata); + + /* Don't bother zerocopy form small IO (< 1024) */ + if (((clnt->trans_mod->pref & P9_TRANS_PREF_PAYLOAD_MASK) == + P9_TRANS_PREF_PAYLOAD_SEP) && (rsize > 1024)) { + req = p9_client_rpc(clnt, P9_TWRITE, "dqE", fid->fid, offset, + rsize, data, udata); + } else { + + if (data) + req = p9_client_rpc(clnt, P9_TWRITE, "dqD", fid->fid, + offset, rsize, data); + else + req = p9_client_rpc(clnt, P9_TWRITE, "dqU", fid->fid, + offset, rsize, udata); + } if (IS_ERR(req)) { err = PTR_ERR(req); goto error; @@ -1716,7 +1785,14 @@ int p9_client_readdir(struct p9_fid *fid, char *data, u32 count, u64 offset) if (count < rsize) rsize = count; - req = p9_client_rpc(clnt, P9_TREADDIR, "dqd", fid->fid, offset, rsize); + if ((clnt->trans_mod->pref & P9_TRANS_PREF_PAYLOAD_MASK) == + P9_TRANS_PREF_PAYLOAD_SEP) { + req = p9_client_rpc(clnt, P9_TREADDIR, "dqF", fid->fid, + offset, rsize, data); + } else { + req = p9_client_rpc(clnt, P9_TREADDIR, "dqd", fid->fid, + offset, rsize); + } if (IS_ERR(req)) { err = PTR_ERR(req); goto error; @@ -1730,7 +1806,7 @@ int p9_client_readdir(struct p9_fid *fid, char *data, u32 count, u64 offset) P9_DPRINTK(P9_DEBUG_9P, "<<< RREADDIR count %d\n", count); - if (data) + if (!req->tc->pbuf_size && data) memmove(data, dataptr, count); p9_free_req(clnt, req); diff --git a/net/9p/protocol.c b/net/9p/protocol.c index 1e308f210928..2ce515b859b3 100644 --- a/net/9p/protocol.c +++ b/net/9p/protocol.c @@ -114,6 +114,26 @@ pdu_write_u(struct p9_fcall *pdu, const char __user *udata, size_t size) return size - len; } +static size_t +pdu_write_urw(struct p9_fcall *pdu, const char *kdata, const char __user *udata, + size_t size) +{ + BUG_ON(pdu->size > P9_IOHDRSZ); + pdu->pubuf = (char __user *)udata; + pdu->pkbuf = (char *)kdata; + pdu->pbuf_size = size; + return 0; +} + +static size_t +pdu_write_readdir(struct p9_fcall *pdu, const char *kdata, size_t size) +{ + BUG_ON(pdu->size > P9_READDIRHDRSZ); + pdu->pkbuf = (char *)kdata; + pdu->pbuf_size = size; + return 0; +} + /* b - int8_t w - int16_t @@ -445,6 +465,25 @@ p9pdu_vwritef(struct p9_fcall *pdu, int proto_version, const char *fmt, errcode = -EFAULT; } break; + case 'E':{ + int32_t cnt = va_arg(ap, int32_t); + const char *k = va_arg(ap, const void *); + const char *u = va_arg(ap, const void *); + errcode = p9pdu_writef(pdu, proto_version, "d", + cnt); + if (!errcode && pdu_write_urw(pdu, k, u, cnt)) + errcode = -EFAULT; + } + break; + case 'F':{ + int32_t cnt = va_arg(ap, int32_t); + const char *k = va_arg(ap, const void *); + errcode = p9pdu_writef(pdu, proto_version, "d", + cnt); + if (!errcode && pdu_write_readdir(pdu, k, cnt)) + errcode = -EFAULT; + } + break; case 'U':{ int32_t count = va_arg(ap, int32_t); const char __user *udata = @@ -579,6 +618,7 @@ EXPORT_SYMBOL(p9stat_read); int p9pdu_prepare(struct p9_fcall *pdu, int16_t tag, int8_t type) { + pdu->id = type; return p9pdu_writef(pdu, 0, "dbw", 0, type, tag); } @@ -606,6 +646,10 @@ void p9pdu_reset(struct p9_fcall *pdu) { pdu->offset = 0; pdu->size = 0; + pdu->private = NULL; + pdu->pubuf = NULL; + pdu->pkbuf = NULL; + pdu->pbuf_size = 0; } int p9dirent_read(char *buf, int len, struct p9_dirent *dirent, diff --git a/net/9p/trans_common.c b/net/9p/trans_common.c new file mode 100644 index 000000000000..d62b9aa58df8 --- /dev/null +++ b/net/9p/trans_common.c @@ -0,0 +1,97 @@ +/* + * Copyright IBM Corporation, 2010 + * Author Venkateswararao Jujjuri <[email protected]> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2.1 of the GNU Lesser General Public License + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + */ + +#include <linux/slab.h> +#include <linux/module.h> +#include <net/9p/9p.h> +#include <net/9p/client.h> +#include <linux/scatterlist.h> +#include "trans_common.h" + +/** + * p9_release_req_pages - Release pages after the transaction. + * @*private: PDU's private page of struct trans_rpage_info + */ +void +p9_release_req_pages(struct trans_rpage_info *rpinfo) +{ + int i = 0; + + while (rpinfo->rp_data[i] && rpinfo->rp_nr_pages--) { + put_page(rpinfo->rp_data[i]); + i++; + } +} +EXPORT_SYMBOL(p9_release_req_pages); + +/** + * p9_nr_pages - Return number of pages needed to accomodate the payload. + */ +int +p9_nr_pages(struct p9_req_t *req) +{ + int start_page, end_page; + start_page = (unsigned long long)req->tc->pubuf >> PAGE_SHIFT; + end_page = ((unsigned long long)req->tc->pubuf + req->tc->pbuf_size + + PAGE_SIZE - 1) >> PAGE_SHIFT; + return end_page - start_page; +} +EXPORT_SYMBOL(p9_nr_pages); + +/** + * payload_gup - Translates user buffer into kernel pages and + * pins them either for read/write through get_user_pages_fast(). + * @req: Request to be sent to server. + * @pdata_off: data offset into the first page after translation (gup). + * @pdata_len: Total length of the IO. gup may not return requested # of pages. + * @nr_pages: number of pages to accomodate the payload + * @rw: Indicates if the pages are for read or write. + */ +int +p9_payload_gup(struct p9_req_t *req, size_t *pdata_off, int *pdata_len, + int nr_pages, u8 rw) +{ + uint32_t first_page_bytes = 0; + uint32_t pdata_mapped_pages; + struct trans_rpage_info *rpinfo; + + *pdata_off = (size_t)req->tc->pubuf & (PAGE_SIZE-1); + + if (*pdata_off) + first_page_bytes = min((PAGE_SIZE - *pdata_off), + req->tc->pbuf_size); + + rpinfo = req->tc->private; + pdata_mapped_pages = get_user_pages_fast((unsigned long)req->tc->pubuf, + nr_pages, rw, &rpinfo->rp_data[0]); + + if (pdata_mapped_pages < 0) { + printk(KERN_ERR "get_user_pages_fast failed:%d udata:%p" + "nr_pages:%d\n", pdata_mapped_pages, + req->tc->pubuf, nr_pages); + pdata_mapped_pages = 0; + return -EIO; + } + rpinfo->rp_nr_pages = pdata_mapped_pages; + if (*pdata_off) { + *pdata_len = first_page_bytes; + *pdata_len += min((req->tc->pbuf_size - *pdata_len), + ((size_t)pdata_mapped_pages - 1) << PAGE_SHIFT); + } else { + *pdata_len = min(req->tc->pbuf_size, + (size_t)pdata_mapped_pages << PAGE_SHIFT); + } + return 0; +} +EXPORT_SYMBOL(p9_payload_gup); diff --git a/net/9p/trans_common.h b/net/9p/trans_common.h new file mode 100644 index 000000000000..76309223bb02 --- /dev/null +++ b/net/9p/trans_common.h @@ -0,0 +1,32 @@ +/* + * Copyright IBM Corporation, 2010 + * Author Venkateswararao Jujjuri <[email protected]> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2.1 of the GNU Lesser General Public License + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + */ + +/* TRUE if it is user context */ +#define P9_IS_USER_CONTEXT (!segment_eq(get_fs(), KERNEL_DS)) + +/** + * struct trans_rpage_info - To store mapped page information in PDU. + * @rp_alloc:Set if this structure is allocd, not a reuse unused space in pdu. + * @rp_nr_pages: Number of mapped pages + * @rp_data: Array of page pointers + */ +struct trans_rpage_info { + u8 rp_alloc; + int rp_nr_pages; + struct page *rp_data[0]; +}; + +void p9_release_req_pages(struct trans_rpage_info *); +int p9_payload_gup(struct p9_req_t *, size_t *, int *, int, u8); +int p9_nr_pages(struct p9_req_t *); diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c index 078eb162d9bf..a30471e51740 100644 --- a/net/9p/trans_fd.c +++ b/net/9p/trans_fd.c @@ -153,10 +153,11 @@ struct p9_conn { unsigned long wsched; }; +static void p9_poll_workfn(struct work_struct *work); + static DEFINE_SPINLOCK(p9_poll_lock); static LIST_HEAD(p9_poll_pending_list); -static struct workqueue_struct *p9_mux_wq; -static struct task_struct *p9_poll_task; +static DECLARE_WORK(p9_poll_work, p9_poll_workfn); static void p9_mux_poll_stop(struct p9_conn *m) { @@ -384,7 +385,7 @@ static void p9_read_work(struct work_struct *work) if (n & POLLIN) { P9_DPRINTK(P9_DEBUG_TRANS, "sched read work %p\n", m); - queue_work(p9_mux_wq, &m->rq); + schedule_work(&m->rq); } else clear_bit(Rworksched, &m->wsched); } else @@ -497,7 +498,7 @@ static void p9_write_work(struct work_struct *work) if (n & POLLOUT) { P9_DPRINTK(P9_DEBUG_TRANS, "sched write work %p\n", m); - queue_work(p9_mux_wq, &m->wq); + schedule_work(&m->wq); } else clear_bit(Wworksched, &m->wsched); } else @@ -516,15 +517,14 @@ static int p9_pollwake(wait_queue_t *wait, unsigned mode, int sync, void *key) container_of(wait, struct p9_poll_wait, wait); struct p9_conn *m = pwait->conn; unsigned long flags; - DECLARE_WAITQUEUE(dummy_wait, p9_poll_task); spin_lock_irqsave(&p9_poll_lock, flags); if (list_empty(&m->poll_pending_link)) list_add_tail(&m->poll_pending_link, &p9_poll_pending_list); spin_unlock_irqrestore(&p9_poll_lock, flags); - /* perform the default wake up operation */ - return default_wake_function(&dummy_wait, mode, sync, key); + schedule_work(&p9_poll_work); + return 1; } /** @@ -629,7 +629,7 @@ static void p9_poll_mux(struct p9_conn *m) P9_DPRINTK(P9_DEBUG_TRANS, "mux %p can read\n", m); if (!test_and_set_bit(Rworksched, &m->wsched)) { P9_DPRINTK(P9_DEBUG_TRANS, "sched read work %p\n", m); - queue_work(p9_mux_wq, &m->rq); + schedule_work(&m->rq); } } @@ -639,7 +639,7 @@ static void p9_poll_mux(struct p9_conn *m) if ((m->wsize || !list_empty(&m->unsent_req_list)) && !test_and_set_bit(Wworksched, &m->wsched)) { P9_DPRINTK(P9_DEBUG_TRANS, "sched write work %p\n", m); - queue_work(p9_mux_wq, &m->wq); + schedule_work(&m->wq); } } } @@ -677,7 +677,7 @@ static int p9_fd_request(struct p9_client *client, struct p9_req_t *req) n = p9_fd_poll(m->client, NULL); if (n & POLLOUT && !test_and_set_bit(Wworksched, &m->wsched)) - queue_work(p9_mux_wq, &m->wq); + schedule_work(&m->wq); return 0; } @@ -1047,12 +1047,12 @@ static struct p9_trans_module p9_fd_trans = { * */ -static int p9_poll_proc(void *a) +static void p9_poll_workfn(struct work_struct *work) { unsigned long flags; P9_DPRINTK(P9_DEBUG_TRANS, "start %p\n", current); - repeat: + spin_lock_irqsave(&p9_poll_lock, flags); while (!list_empty(&p9_poll_pending_list)) { struct p9_conn *conn = list_first_entry(&p9_poll_pending_list, @@ -1067,35 +1067,11 @@ static int p9_poll_proc(void *a) } spin_unlock_irqrestore(&p9_poll_lock, flags); - set_current_state(TASK_INTERRUPTIBLE); - if (list_empty(&p9_poll_pending_list)) { - P9_DPRINTK(P9_DEBUG_TRANS, "sleeping...\n"); - schedule(); - } - __set_current_state(TASK_RUNNING); - - if (!kthread_should_stop()) - goto repeat; - P9_DPRINTK(P9_DEBUG_TRANS, "finish\n"); - return 0; } int p9_trans_fd_init(void) { - p9_mux_wq = create_workqueue("v9fs"); - if (!p9_mux_wq) { - printk(KERN_WARNING "v9fs: mux: creating workqueue failed\n"); - return -ENOMEM; - } - - p9_poll_task = kthread_run(p9_poll_proc, NULL, "v9fs-poll"); - if (IS_ERR(p9_poll_task)) { - destroy_workqueue(p9_mux_wq); - printk(KERN_WARNING "v9fs: mux: creating poll task failed\n"); - return PTR_ERR(p9_poll_task); - } - v9fs_register_trans(&p9_tcp_trans); v9fs_register_trans(&p9_unix_trans); v9fs_register_trans(&p9_fd_trans); @@ -1105,10 +1081,8 @@ int p9_trans_fd_init(void) void p9_trans_fd_exit(void) { - kthread_stop(p9_poll_task); + flush_work_sync(&p9_poll_work); v9fs_unregister_trans(&p9_tcp_trans); v9fs_unregister_trans(&p9_unix_trans); v9fs_unregister_trans(&p9_fd_trans); - - destroy_workqueue(p9_mux_wq); } diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c index c8f3f72ab20e..9b550ed9c711 100644 --- a/net/9p/trans_virtio.c +++ b/net/9p/trans_virtio.c @@ -45,6 +45,7 @@ #include <linux/scatterlist.h> #include <linux/virtio.h> #include <linux/virtio_9p.h> +#include "trans_common.h" #define VIRTQUEUE_NUM 128 @@ -155,6 +156,14 @@ static void req_done(struct virtqueue *vq) rc->tag); req = p9_tag_lookup(chan->client, rc->tag); req->status = REQ_STATUS_RCVD; + if (req->tc->private) { + struct trans_rpage_info *rp = req->tc->private; + /*Release pages */ + p9_release_req_pages(rp); + if (rp->rp_alloc) + kfree(rp); + req->tc->private = NULL; + } p9_client_cb(chan->client, req); } else { spin_unlock_irqrestore(&chan->lock, flags); @@ -203,6 +212,38 @@ static int p9_virtio_cancel(struct p9_client *client, struct p9_req_t *req) } /** + * pack_sg_list_p - Just like pack_sg_list. Instead of taking a buffer, + * this takes a list of pages. + * @sg: scatter/gather list to pack into + * @start: which segment of the sg_list to start at + * @pdata_off: Offset into the first page + * @**pdata: a list of pages to add into sg. + * @count: amount of data to pack into the scatter/gather list + */ +static int +pack_sg_list_p(struct scatterlist *sg, int start, int limit, size_t pdata_off, + struct page **pdata, int count) +{ + int s; + int i = 0; + int index = start; + + if (pdata_off) { + s = min((int)(PAGE_SIZE - pdata_off), count); + sg_set_page(&sg[index++], pdata[i++], s, pdata_off); + count -= s; + } + + while (count) { + BUG_ON(index > limit); + s = min((int)PAGE_SIZE, count); + sg_set_page(&sg[index++], pdata[i++], s, 0); + count -= s; + } + return index-start; +} + +/** * p9_virtio_request - issue a request * @client: client instance issuing the request * @req: request to be issued @@ -212,22 +253,97 @@ static int p9_virtio_cancel(struct p9_client *client, struct p9_req_t *req) static int p9_virtio_request(struct p9_client *client, struct p9_req_t *req) { - int in, out; + int in, out, inp, outp; struct virtio_chan *chan = client->trans; char *rdata = (char *)req->rc+sizeof(struct p9_fcall); unsigned long flags; - int err; + size_t pdata_off = 0; + struct trans_rpage_info *rpinfo = NULL; + int err, pdata_len = 0; P9_DPRINTK(P9_DEBUG_TRANS, "9p debug: virtio request\n"); req_retry: req->status = REQ_STATUS_SENT; + if (req->tc->pbuf_size && (req->tc->pubuf && P9_IS_USER_CONTEXT)) { + int nr_pages = p9_nr_pages(req); + int rpinfo_size = sizeof(struct trans_rpage_info) + + sizeof(struct page *) * nr_pages; + + if (rpinfo_size <= (req->tc->capacity - req->tc->size)) { + /* We can use sdata */ + req->tc->private = req->tc->sdata + req->tc->size; + rpinfo = (struct trans_rpage_info *)req->tc->private; + rpinfo->rp_alloc = 0; + } else { + req->tc->private = kmalloc(rpinfo_size, GFP_NOFS); + if (!req->tc->private) { + P9_DPRINTK(P9_DEBUG_TRANS, "9p debug: " + "private kmalloc returned NULL"); + return -ENOMEM; + } + rpinfo = (struct trans_rpage_info *)req->tc->private; + rpinfo->rp_alloc = 1; + } + + err = p9_payload_gup(req, &pdata_off, &pdata_len, nr_pages, + req->tc->id == P9_TREAD ? 1 : 0); + if (err < 0) { + if (rpinfo->rp_alloc) + kfree(rpinfo); + return err; + } + } + spin_lock_irqsave(&chan->lock, flags); + + /* Handle out VirtIO ring buffers */ out = pack_sg_list(chan->sg, 0, VIRTQUEUE_NUM, req->tc->sdata, - req->tc->size); - in = pack_sg_list(chan->sg, out, VIRTQUEUE_NUM-out, rdata, - client->msize); + req->tc->size); + + if (req->tc->pbuf_size && (req->tc->id == P9_TWRITE)) { + /* We have additional write payload buffer to take care */ + if (req->tc->pubuf && P9_IS_USER_CONTEXT) { + outp = pack_sg_list_p(chan->sg, out, VIRTQUEUE_NUM, + pdata_off, rpinfo->rp_data, pdata_len); + } else { + char *pbuf = req->tc->pubuf ? req->tc->pubuf : + req->tc->pkbuf; + outp = pack_sg_list(chan->sg, out, VIRTQUEUE_NUM, pbuf, + req->tc->pbuf_size); + } + out += outp; + } + + /* Handle in VirtIO ring buffers */ + if (req->tc->pbuf_size && + ((req->tc->id == P9_TREAD) || (req->tc->id == P9_TREADDIR))) { + /* + * Take care of additional Read payload. + * 11 is the read/write header = PDU Header(7) + IO Size (4). + * Arrange in such a way that server places header in the + * alloced memory and payload onto the user buffer. + */ + inp = pack_sg_list(chan->sg, out, VIRTQUEUE_NUM, rdata, 11); + /* + * Running executables in the filesystem may result in + * a read request with kernel buffer as opposed to user buffer. + */ + if (req->tc->pubuf && P9_IS_USER_CONTEXT) { + in = pack_sg_list_p(chan->sg, out+inp, VIRTQUEUE_NUM, + pdata_off, rpinfo->rp_data, pdata_len); + } else { + char *pbuf = req->tc->pubuf ? req->tc->pubuf : + req->tc->pkbuf; + in = pack_sg_list(chan->sg, out+inp, VIRTQUEUE_NUM, + pbuf, req->tc->pbuf_size); + } + in += inp; + } else { + in = pack_sg_list(chan->sg, out, VIRTQUEUE_NUM, rdata, + client->msize); + } err = virtqueue_add_buf(chan->vq, chan->sg, out, in, req->tc); if (err < 0) { @@ -246,6 +362,8 @@ req_retry: P9_DPRINTK(P9_DEBUG_TRANS, "9p debug: " "virtio rpc add_buf returned failure"); + if (rpinfo && rpinfo->rp_alloc) + kfree(rpinfo); return -EIO; } } @@ -448,6 +566,7 @@ static struct p9_trans_module p9_virtio_trans = { .request = p9_virtio_request, .cancel = p9_virtio_cancel, .maxsize = PAGE_SIZE*16, + .pref = P9_TRANS_PREF_PAYLOAD_SEP, .def = 0, .owner = THIS_MODULE, }; diff --git a/net/Makefile b/net/Makefile index a3330ebe2c53..a51d9465e628 100644 --- a/net/Makefile +++ b/net/Makefile @@ -19,9 +19,7 @@ obj-$(CONFIG_NETFILTER) += netfilter/ obj-$(CONFIG_INET) += ipv4/ obj-$(CONFIG_XFRM) += xfrm/ obj-$(CONFIG_UNIX) += unix/ -ifneq ($(CONFIG_IPV6),) -obj-y += ipv6/ -endif +obj-$(CONFIG_NET) += ipv6/ obj-$(CONFIG_PACKET) += packet/ obj-$(CONFIG_NET_KEY) += key/ obj-$(CONFIG_BRIDGE) += bridge/ diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c index c410b93fda2e..3d4f4b043406 100644 --- a/net/appletalk/ddp.c +++ b/net/appletalk/ddp.c @@ -54,7 +54,6 @@ #include <linux/capability.h> #include <linux/module.h> #include <linux/if_arp.h> -#include <linux/smp_lock.h> #include <linux/termios.h> /* For TIOCOUTQ/INQ */ #include <linux/compat.h> #include <linux/slab.h> @@ -1052,13 +1051,13 @@ static int atalk_release(struct socket *sock) { struct sock *sk = sock->sk; - lock_kernel(); + lock_sock(sk); if (sk) { sock_orphan(sk); sock->sk = NULL; atalk_destroy_socket(sk); } - unlock_kernel(); + release_sock(sk); return 0; } @@ -1143,7 +1142,7 @@ static int atalk_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) if (addr->sat_family != AF_APPLETALK) return -EAFNOSUPPORT; - lock_kernel(); + lock_sock(sk); if (addr->sat_addr.s_net == htons(ATADDR_ANYNET)) { struct atalk_addr *ap = atalk_find_primary(); @@ -1179,7 +1178,7 @@ static int atalk_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) sock_reset_flag(sk, SOCK_ZAPPED); err = 0; out: - unlock_kernel(); + release_sock(sk); return err; } @@ -1215,7 +1214,7 @@ static int atalk_connect(struct socket *sock, struct sockaddr *uaddr, #endif } - lock_kernel(); + lock_sock(sk); err = -EBUSY; if (sock_flag(sk, SOCK_ZAPPED)) if (atalk_autobind(sk) < 0) @@ -1233,7 +1232,7 @@ static int atalk_connect(struct socket *sock, struct sockaddr *uaddr, sk->sk_state = TCP_ESTABLISHED; err = 0; out: - unlock_kernel(); + release_sock(sk); return err; } @@ -1249,7 +1248,7 @@ static int atalk_getname(struct socket *sock, struct sockaddr *uaddr, struct atalk_sock *at = at_sk(sk); int err; - lock_kernel(); + lock_sock(sk); err = -ENOBUFS; if (sock_flag(sk, SOCK_ZAPPED)) if (atalk_autobind(sk) < 0) @@ -1277,17 +1276,7 @@ static int atalk_getname(struct socket *sock, struct sockaddr *uaddr, memcpy(uaddr, &sat, sizeof(sat)); out: - unlock_kernel(); - return err; -} - -static unsigned int atalk_poll(struct file *file, struct socket *sock, - poll_table *wait) -{ - int err; - lock_kernel(); - err = datagram_poll(file, sock, wait); - unlock_kernel(); + release_sock(sk); return err; } @@ -1596,7 +1585,7 @@ static int atalk_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr if (len > DDP_MAXSZ) return -EMSGSIZE; - lock_kernel(); + lock_sock(sk); if (usat) { err = -EBUSY; if (sock_flag(sk, SOCK_ZAPPED)) @@ -1651,7 +1640,9 @@ static int atalk_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr sk, size, dev->name); size += dev->hard_header_len; + release_sock(sk); skb = sock_alloc_send_skb(sk, size, (flags & MSG_DONTWAIT), &err); + lock_sock(sk); if (!skb) goto out; @@ -1738,7 +1729,7 @@ static int atalk_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr SOCK_DEBUG(sk, "SK %p: Done write (%Zd).\n", sk, len); out: - unlock_kernel(); + release_sock(sk); return err ? : len; } @@ -1753,9 +1744,10 @@ static int atalk_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr int err = 0; struct sk_buff *skb; - lock_kernel(); skb = skb_recv_datagram(sk, flags & ~MSG_DONTWAIT, flags & MSG_DONTWAIT, &err); + lock_sock(sk); + if (!skb) goto out; @@ -1787,7 +1779,7 @@ static int atalk_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr skb_free_datagram(sk, skb); /* Free the datagram. */ out: - unlock_kernel(); + release_sock(sk); return err ? : copied; } @@ -1887,7 +1879,7 @@ static const struct proto_ops atalk_dgram_ops = { .socketpair = sock_no_socketpair, .accept = sock_no_accept, .getname = atalk_getname, - .poll = atalk_poll, + .poll = datagram_poll, .ioctl = atalk_ioctl, #ifdef CONFIG_COMPAT .compat_ioctl = atalk_compat_ioctl, diff --git a/net/atm/clip.c b/net/atm/clip.c index 810a1294eddb..1d4be60e1390 100644 --- a/net/atm/clip.c +++ b/net/atm/clip.c @@ -502,8 +502,6 @@ static int clip_setentry(struct atm_vcc *vcc, __be32 ip) struct atmarp_entry *entry; int error; struct clip_vcc *clip_vcc; - struct flowi fl = { .fl4_dst = ip, - .fl4_tos = 1 }; struct rtable *rt; if (vcc->push != clip_push) { @@ -520,7 +518,7 @@ static int clip_setentry(struct atm_vcc *vcc, __be32 ip) unlink_clip_vcc(clip_vcc); return 0; } - rt = ip_route_output_key(&init_net, &fl); + rt = ip_route_output(&init_net, ip, 0, 1, 0); if (IS_ERR(rt)) return PTR_ERR(rt); neigh = __neigh_lookup(&clip_tbl, &ip, rt->dst.dev, 1); diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c index 88af9eb9aa48..8add9b499912 100644 --- a/net/bluetooth/af_bluetooth.c +++ b/net/bluetooth/af_bluetooth.c @@ -550,10 +550,8 @@ static int __init bt_init(void) goto error; err = l2cap_init(); - if (err < 0) { - hci_sock_cleanup(); + if (err < 0) goto sock_err; - } err = sco_init(); if (err < 0) { diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index a050a6984901..7a6f56b2f49d 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -286,6 +286,7 @@ struct hci_conn *hci_conn_add(struct hci_dev *hdev, int type, bdaddr_t *dst) conn->state = BT_OPEN; conn->auth_type = HCI_AT_GENERAL_BONDING; conn->io_capability = hdev->io_capability; + conn->remote_auth = 0xff; conn->power_save = 1; conn->disc_timeout = HCI_DISCONN_TIMEOUT; @@ -429,10 +430,11 @@ struct hci_conn *hci_connect(struct hci_dev *hdev, int type, bdaddr_t *dst, __u8 if (type == LE_LINK) { le = hci_conn_hash_lookup_ba(hdev, LE_LINK, dst); + if (le) + return ERR_PTR(-EBUSY); + le = hci_conn_add(hdev, LE_LINK, dst); if (!le) - le = hci_conn_add(hdev, LE_LINK, dst); - if (!le) - return NULL; + return ERR_PTR(-ENOMEM); if (le->state == BT_OPEN) hci_le_connect(le); diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 98b5764e4315..3fbfa50c2bff 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -796,6 +796,29 @@ static void hci_cc_le_read_buffer_size(struct hci_dev *hdev, hci_req_complete(hdev, HCI_OP_LE_READ_BUFFER_SIZE, rp->status); } +static void hci_cc_user_confirm_reply(struct hci_dev *hdev, struct sk_buff *skb) +{ + struct hci_rp_user_confirm_reply *rp = (void *) skb->data; + + BT_DBG("%s status 0x%x", hdev->name, rp->status); + + if (test_bit(HCI_MGMT, &hdev->flags)) + mgmt_user_confirm_reply_complete(hdev->id, &rp->bdaddr, + rp->status); +} + +static void hci_cc_user_confirm_neg_reply(struct hci_dev *hdev, + struct sk_buff *skb) +{ + struct hci_rp_user_confirm_reply *rp = (void *) skb->data; + + BT_DBG("%s status 0x%x", hdev->name, rp->status); + + if (test_bit(HCI_MGMT, &hdev->flags)) + mgmt_user_confirm_neg_reply_complete(hdev->id, &rp->bdaddr, + rp->status); +} + static inline void hci_cs_inquiry(struct hci_dev *hdev, __u8 status) { BT_DBG("%s status 0x%x", hdev->name, status); @@ -1401,8 +1424,10 @@ static inline void hci_auth_complete_evt(struct hci_dev *hdev, struct sk_buff *s if (!ev->status) { conn->link_mode |= HCI_LM_AUTH; conn->sec_level = conn->pending_sec_level; - } else + } else { + mgmt_auth_failed(hdev->id, &conn->dst, ev->status); conn->sec_level = BT_SECURITY_LOW; + } clear_bit(HCI_CONN_AUTH_PEND, &conn->pend); @@ -1728,6 +1753,14 @@ static inline void hci_cmd_complete_evt(struct hci_dev *hdev, struct sk_buff *sk hci_cc_le_read_buffer_size(hdev, skb); break; + case HCI_OP_USER_CONFIRM_REPLY: + hci_cc_user_confirm_reply(hdev, skb); + break; + + case HCI_OP_USER_CONFIRM_NEG_REPLY: + hci_cc_user_confirm_neg_reply(hdev, skb); + break; + default: BT_DBG("%s opcode 0x%x", hdev->name, opcode); break; @@ -2362,6 +2395,21 @@ unlock: hci_dev_unlock(hdev); } +static inline void hci_user_confirm_request_evt(struct hci_dev *hdev, + struct sk_buff *skb) +{ + struct hci_ev_user_confirm_req *ev = (void *) skb->data; + + BT_DBG("%s", hdev->name); + + hci_dev_lock(hdev); + + if (test_bit(HCI_MGMT, &hdev->flags)) + mgmt_user_confirm_request(hdev->id, &ev->bdaddr, ev->passkey); + + hci_dev_unlock(hdev); +} + static inline void hci_simple_pair_complete_evt(struct hci_dev *hdev, struct sk_buff *skb) { struct hci_ev_simple_pair_complete *ev = (void *) skb->data; @@ -2372,9 +2420,20 @@ static inline void hci_simple_pair_complete_evt(struct hci_dev *hdev, struct sk_ hci_dev_lock(hdev); conn = hci_conn_hash_lookup_ba(hdev, ACL_LINK, &ev->bdaddr); - if (conn) - hci_conn_put(conn); + if (!conn) + goto unlock; + /* To avoid duplicate auth_failed events to user space we check + * the HCI_CONN_AUTH_PEND flag which will be set if we + * initiated the authentication. A traditional auth_complete + * event gets always produced as initiator and is also mapped to + * the mgmt_auth_failed event */ + if (!test_bit(HCI_CONN_AUTH_PEND, &conn->pend) && ev->status != 0) + mgmt_auth_failed(hdev->id, &conn->dst, ev->status); + + hci_conn_put(conn); + +unlock: hci_dev_unlock(hdev); } @@ -2580,6 +2639,10 @@ void hci_event_packet(struct hci_dev *hdev, struct sk_buff *skb) hci_io_capa_reply_evt(hdev, skb); break; + case HCI_EV_USER_CONFIRM_REQUEST: + hci_user_confirm_request_evt(hdev, skb); + break; + case HCI_EV_SIMPLE_PAIR_COMPLETE: hci_simple_pair_complete_evt(hdev, skb); break; diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c index d50e96136608..295e4a88fff8 100644 --- a/net/bluetooth/hci_sock.c +++ b/net/bluetooth/hci_sock.c @@ -861,7 +861,7 @@ error: return err; } -void __exit hci_sock_cleanup(void) +void hci_sock_cleanup(void) { if (bt_sock_unregister(BTPROTO_HCI) < 0) BT_ERR("HCI socket unregistration failed"); diff --git a/net/bluetooth/hidp/core.c b/net/bluetooth/hidp/core.c index 2429ca2d7b06..5ec12971af6b 100644 --- a/net/bluetooth/hidp/core.c +++ b/net/bluetooth/hidp/core.c @@ -36,6 +36,7 @@ #include <linux/file.h> #include <linux/init.h> #include <linux/wait.h> +#include <linux/mutex.h> #include <net/sock.h> #include <linux/input.h> @@ -316,24 +317,144 @@ static int hidp_send_report(struct hidp_session *session, struct hid_report *rep return hidp_queue_report(session, buf, rsize); } +static int hidp_get_raw_report(struct hid_device *hid, + unsigned char report_number, + unsigned char *data, size_t count, + unsigned char report_type) +{ + struct hidp_session *session = hid->driver_data; + struct sk_buff *skb; + size_t len; + int numbered_reports = hid->report_enum[report_type].numbered; + + switch (report_type) { + case HID_FEATURE_REPORT: + report_type = HIDP_TRANS_GET_REPORT | HIDP_DATA_RTYPE_FEATURE; + break; + case HID_INPUT_REPORT: + report_type = HIDP_TRANS_GET_REPORT | HIDP_DATA_RTYPE_INPUT; + break; + case HID_OUTPUT_REPORT: + report_type = HIDP_TRANS_GET_REPORT | HIDP_DATA_RTYPE_OUPUT; + break; + default: + return -EINVAL; + } + + if (mutex_lock_interruptible(&session->report_mutex)) + return -ERESTARTSYS; + + /* Set up our wait, and send the report request to the device. */ + session->waiting_report_type = report_type & HIDP_DATA_RTYPE_MASK; + session->waiting_report_number = numbered_reports ? report_number : -1; + set_bit(HIDP_WAITING_FOR_RETURN, &session->flags); + data[0] = report_number; + if (hidp_send_ctrl_message(hid->driver_data, report_type, data, 1)) + goto err_eio; + + /* Wait for the return of the report. The returned report + gets put in session->report_return. */ + while (test_bit(HIDP_WAITING_FOR_RETURN, &session->flags)) { + int res; + + res = wait_event_interruptible_timeout(session->report_queue, + !test_bit(HIDP_WAITING_FOR_RETURN, &session->flags), + 5*HZ); + if (res == 0) { + /* timeout */ + goto err_eio; + } + if (res < 0) { + /* signal */ + goto err_restartsys; + } + } + + skb = session->report_return; + if (skb) { + len = skb->len < count ? skb->len : count; + memcpy(data, skb->data, len); + + kfree_skb(skb); + session->report_return = NULL; + } else { + /* Device returned a HANDSHAKE, indicating protocol error. */ + len = -EIO; + } + + clear_bit(HIDP_WAITING_FOR_RETURN, &session->flags); + mutex_unlock(&session->report_mutex); + + return len; + +err_restartsys: + clear_bit(HIDP_WAITING_FOR_RETURN, &session->flags); + mutex_unlock(&session->report_mutex); + return -ERESTARTSYS; +err_eio: + clear_bit(HIDP_WAITING_FOR_RETURN, &session->flags); + mutex_unlock(&session->report_mutex); + return -EIO; +} + static int hidp_output_raw_report(struct hid_device *hid, unsigned char *data, size_t count, unsigned char report_type) { + struct hidp_session *session = hid->driver_data; + int ret; + switch (report_type) { case HID_FEATURE_REPORT: report_type = HIDP_TRANS_SET_REPORT | HIDP_DATA_RTYPE_FEATURE; break; case HID_OUTPUT_REPORT: - report_type = HIDP_TRANS_DATA | HIDP_DATA_RTYPE_OUPUT; + report_type = HIDP_TRANS_SET_REPORT | HIDP_DATA_RTYPE_OUPUT; break; default: return -EINVAL; } + if (mutex_lock_interruptible(&session->report_mutex)) + return -ERESTARTSYS; + + /* Set up our wait, and send the report request to the device. */ + set_bit(HIDP_WAITING_FOR_SEND_ACK, &session->flags); if (hidp_send_ctrl_message(hid->driver_data, report_type, - data, count)) - return -ENOMEM; - return count; + data, count)) { + ret = -ENOMEM; + goto err; + } + + /* Wait for the ACK from the device. */ + while (test_bit(HIDP_WAITING_FOR_SEND_ACK, &session->flags)) { + int res; + + res = wait_event_interruptible_timeout(session->report_queue, + !test_bit(HIDP_WAITING_FOR_SEND_ACK, &session->flags), + 10*HZ); + if (res == 0) { + /* timeout */ + ret = -EIO; + goto err; + } + if (res < 0) { + /* signal */ + ret = -ERESTARTSYS; + goto err; + } + } + + if (!session->output_report_success) { + ret = -EIO; + goto err; + } + + ret = count; + +err: + clear_bit(HIDP_WAITING_FOR_SEND_ACK, &session->flags); + mutex_unlock(&session->report_mutex); + return ret; } static void hidp_idle_timeout(unsigned long arg) @@ -360,16 +481,22 @@ static void hidp_process_handshake(struct hidp_session *session, unsigned char param) { BT_DBG("session %p param 0x%02x", session, param); + session->output_report_success = 0; /* default condition */ switch (param) { case HIDP_HSHK_SUCCESSFUL: /* FIXME: Call into SET_ GET_ handlers here */ + session->output_report_success = 1; break; case HIDP_HSHK_NOT_READY: case HIDP_HSHK_ERR_INVALID_REPORT_ID: case HIDP_HSHK_ERR_UNSUPPORTED_REQUEST: case HIDP_HSHK_ERR_INVALID_PARAMETER: + if (test_bit(HIDP_WAITING_FOR_RETURN, &session->flags)) { + clear_bit(HIDP_WAITING_FOR_RETURN, &session->flags); + wake_up_interruptible(&session->report_queue); + } /* FIXME: Call into SET_ GET_ handlers here */ break; @@ -388,6 +515,12 @@ static void hidp_process_handshake(struct hidp_session *session, HIDP_TRANS_HANDSHAKE | HIDP_HSHK_ERR_INVALID_PARAMETER, NULL, 0); break; } + + /* Wake up the waiting thread. */ + if (test_bit(HIDP_WAITING_FOR_SEND_ACK, &session->flags)) { + clear_bit(HIDP_WAITING_FOR_SEND_ACK, &session->flags); + wake_up_interruptible(&session->report_queue); + } } static void hidp_process_hid_control(struct hidp_session *session, @@ -406,9 +539,11 @@ static void hidp_process_hid_control(struct hidp_session *session, } } -static void hidp_process_data(struct hidp_session *session, struct sk_buff *skb, +/* Returns true if the passed-in skb should be freed by the caller. */ +static int hidp_process_data(struct hidp_session *session, struct sk_buff *skb, unsigned char param) { + int done_with_skb = 1; BT_DBG("session %p skb %p len %d param 0x%02x", session, skb, skb->len, param); switch (param) { @@ -420,7 +555,6 @@ static void hidp_process_data(struct hidp_session *session, struct sk_buff *skb, if (session->hid) hid_input_report(session->hid, HID_INPUT_REPORT, skb->data, skb->len, 0); - break; case HIDP_DATA_RTYPE_OTHER: @@ -432,12 +566,27 @@ static void hidp_process_data(struct hidp_session *session, struct sk_buff *skb, __hidp_send_ctrl_message(session, HIDP_TRANS_HANDSHAKE | HIDP_HSHK_ERR_INVALID_PARAMETER, NULL, 0); } + + if (test_bit(HIDP_WAITING_FOR_RETURN, &session->flags) && + param == session->waiting_report_type) { + if (session->waiting_report_number < 0 || + session->waiting_report_number == skb->data[0]) { + /* hidp_get_raw_report() is waiting on this report. */ + session->report_return = skb; + done_with_skb = 0; + clear_bit(HIDP_WAITING_FOR_RETURN, &session->flags); + wake_up_interruptible(&session->report_queue); + } + } + + return done_with_skb; } static void hidp_recv_ctrl_frame(struct hidp_session *session, struct sk_buff *skb) { unsigned char hdr, type, param; + int free_skb = 1; BT_DBG("session %p skb %p len %d", session, skb, skb->len); @@ -457,7 +606,7 @@ static void hidp_recv_ctrl_frame(struct hidp_session *session, break; case HIDP_TRANS_DATA: - hidp_process_data(session, skb, param); + free_skb = hidp_process_data(session, skb, param); break; default: @@ -466,7 +615,8 @@ static void hidp_recv_ctrl_frame(struct hidp_session *session, break; } - kfree_skb(skb); + if (free_skb) + kfree_skb(skb); } static void hidp_recv_intr_frame(struct hidp_session *session, @@ -566,6 +716,8 @@ static int hidp_session(void *arg) init_waitqueue_entry(&intr_wait, current); add_wait_queue(sk_sleep(ctrl_sk), &ctrl_wait); add_wait_queue(sk_sleep(intr_sk), &intr_wait); + session->waiting_for_startup = 0; + wake_up_interruptible(&session->startup_queue); while (!atomic_read(&session->terminate)) { set_current_state(TASK_INTERRUPTIBLE); @@ -757,6 +909,8 @@ static struct hid_ll_driver hidp_hid_driver = { .hidinput_input_event = hidp_hidinput_event, }; +/* This function sets up the hid device. It does not add it + to the HID system. That is done in hidp_add_connection(). */ static int hidp_setup_hid(struct hidp_session *session, struct hidp_connadd_req *req) { @@ -796,18 +950,11 @@ static int hidp_setup_hid(struct hidp_session *session, hid->dev.parent = hidp_get_device(session); hid->ll_driver = &hidp_hid_driver; + hid->hid_get_raw_report = hidp_get_raw_report; hid->hid_output_raw_report = hidp_output_raw_report; - err = hid_add_device(hid); - if (err < 0) - goto failed; - return 0; -failed: - hid_destroy_device(hid); - session->hid = NULL; - fault: kfree(session->rd_data); session->rd_data = NULL; @@ -856,6 +1003,10 @@ int hidp_add_connection(struct hidp_connadd_req *req, struct socket *ctrl_sock, skb_queue_head_init(&session->ctrl_transmit); skb_queue_head_init(&session->intr_transmit); + mutex_init(&session->report_mutex); + init_waitqueue_head(&session->report_queue); + init_waitqueue_head(&session->startup_queue); + session->waiting_for_startup = 1; session->flags = req->flags & (1 << HIDP_BLUETOOTH_VENDOR_ID); session->idle_to = req->idle_to; @@ -878,6 +1029,14 @@ int hidp_add_connection(struct hidp_connadd_req *req, struct socket *ctrl_sock, err = kernel_thread(hidp_session, session, CLONE_KERNEL); if (err < 0) goto unlink; + while (session->waiting_for_startup) { + wait_event_interruptible(session->startup_queue, + !session->waiting_for_startup); + } + + err = hid_add_device(session->hid); + if (err < 0) + goto err_add_device; if (session->input) { hidp_send_ctrl_message(session, @@ -891,6 +1050,12 @@ int hidp_add_connection(struct hidp_connadd_req *req, struct socket *ctrl_sock, up_write(&hidp_session_sem); return 0; +err_add_device: + hid_destroy_device(session->hid); + session->hid = NULL; + atomic_inc(&session->terminate); + hidp_schedule(session); + unlink: hidp_del_timer(session); diff --git a/net/bluetooth/hidp/hidp.h b/net/bluetooth/hidp/hidp.h index 8d934a19da0a..13de5fa03480 100644 --- a/net/bluetooth/hidp/hidp.h +++ b/net/bluetooth/hidp/hidp.h @@ -80,6 +80,8 @@ #define HIDP_VIRTUAL_CABLE_UNPLUG 0 #define HIDP_BOOT_PROTOCOL_MODE 1 #define HIDP_BLUETOOTH_VENDOR_ID 9 +#define HIDP_WAITING_FOR_RETURN 10 +#define HIDP_WAITING_FOR_SEND_ACK 11 struct hidp_connadd_req { int ctrl_sock; // Connected control socket @@ -154,9 +156,22 @@ struct hidp_session { struct sk_buff_head ctrl_transmit; struct sk_buff_head intr_transmit; + /* Used in hidp_get_raw_report() */ + int waiting_report_type; /* HIDP_DATA_RTYPE_* */ + int waiting_report_number; /* -1 for not numbered */ + struct mutex report_mutex; + struct sk_buff *report_return; + wait_queue_head_t report_queue; + + /* Used in hidp_output_raw_report() */ + int output_report_success; /* boolean */ + /* Report descriptor */ __u8 *rd_data; uint rd_size; + + wait_queue_head_t startup_queue; + int waiting_for_startup; }; static inline void hidp_schedule(struct hidp_session *session) diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index efcef0dc1259..c9f9cecca527 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -852,8 +852,6 @@ int l2cap_do_connect(struct sock *sk) hci_dev_lock_bh(hdev); - err = -ENOMEM; - auth_type = l2cap_get_auth_type(sk); if (l2cap_pi(sk)->dcid == L2CAP_CID_LE_DATA) @@ -863,17 +861,18 @@ int l2cap_do_connect(struct sock *sk) hcon = hci_connect(hdev, ACL_LINK, dst, l2cap_pi(sk)->sec_level, auth_type); - if (!hcon) + if (IS_ERR(hcon)) { + err = PTR_ERR(hcon); goto done; + } conn = l2cap_conn_add(hcon, 0); if (!conn) { hci_conn_put(hcon); + err = -ENOMEM; goto done; } - err = 0; - /* Update source addr of the socket */ bacpy(src, conn->src); @@ -892,6 +891,8 @@ int l2cap_do_connect(struct sock *sk) l2cap_do_start(sk); } + err = 0; + done: hci_dev_unlock_bh(hdev); hci_dev_put(hdev); @@ -4033,8 +4034,6 @@ int __init l2cap_init(void) BT_ERR("Failed to create L2CAP debug file"); } - BT_INFO("L2CAP socket layer initialized"); - return 0; error: diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index f5ef7a3374c7..0054c74e27b7 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -38,17 +38,18 @@ struct pending_cmd { int index; void *cmd; struct sock *sk; + void *user_data; }; LIST_HEAD(cmd_list); -static int cmd_status(struct sock *sk, u16 cmd, u8 status) +static int cmd_status(struct sock *sk, u16 index, u16 cmd, u8 status) { struct sk_buff *skb; struct mgmt_hdr *hdr; struct mgmt_ev_cmd_status *ev; - BT_DBG("sock %p", sk); + BT_DBG("sock %p, index %u, cmd %u, status %u", sk, index, cmd, status); skb = alloc_skb(sizeof(*hdr) + sizeof(*ev), GFP_ATOMIC); if (!skb) @@ -57,6 +58,7 @@ static int cmd_status(struct sock *sk, u16 cmd, u8 status) hdr = (void *) skb_put(skb, sizeof(*hdr)); hdr->opcode = cpu_to_le16(MGMT_EV_CMD_STATUS); + hdr->index = cpu_to_le16(index); hdr->len = cpu_to_le16(sizeof(*ev)); ev = (void *) skb_put(skb, sizeof(*ev)); @@ -69,7 +71,8 @@ static int cmd_status(struct sock *sk, u16 cmd, u8 status) return 0; } -static int cmd_complete(struct sock *sk, u16 cmd, void *rp, size_t rp_len) +static int cmd_complete(struct sock *sk, u16 index, u16 cmd, void *rp, + size_t rp_len) { struct sk_buff *skb; struct mgmt_hdr *hdr; @@ -84,11 +87,14 @@ static int cmd_complete(struct sock *sk, u16 cmd, void *rp, size_t rp_len) hdr = (void *) skb_put(skb, sizeof(*hdr)); hdr->opcode = cpu_to_le16(MGMT_EV_CMD_COMPLETE); + hdr->index = cpu_to_le16(index); hdr->len = cpu_to_le16(sizeof(*ev) + rp_len); ev = (void *) skb_put(skb, sizeof(*ev) + rp_len); put_unaligned_le16(cmd, &ev->opcode); - memcpy(ev->data, rp, rp_len); + + if (rp) + memcpy(ev->data, rp, rp_len); if (sock_queue_rcv_skb(sk, skb) < 0) kfree_skb(skb); @@ -105,7 +111,8 @@ static int read_version(struct sock *sk) rp.version = MGMT_VERSION; put_unaligned_le16(MGMT_REVISION, &rp.revision); - return cmd_complete(sk, MGMT_OP_READ_VERSION, &rp, sizeof(rp)); + return cmd_complete(sk, MGMT_INDEX_NONE, MGMT_OP_READ_VERSION, &rp, + sizeof(rp)); } static int read_index_list(struct sock *sk) @@ -151,32 +158,24 @@ static int read_index_list(struct sock *sk) read_unlock(&hci_dev_list_lock); - err = cmd_complete(sk, MGMT_OP_READ_INDEX_LIST, rp, rp_len); + err = cmd_complete(sk, MGMT_INDEX_NONE, MGMT_OP_READ_INDEX_LIST, rp, + rp_len); kfree(rp); return err; } -static int read_controller_info(struct sock *sk, unsigned char *data, u16 len) +static int read_controller_info(struct sock *sk, u16 index) { struct mgmt_rp_read_info rp; - struct mgmt_cp_read_info *cp = (void *) data; struct hci_dev *hdev; - u16 dev_id; - - BT_DBG("sock %p", sk); - if (len != 2) - return cmd_status(sk, MGMT_OP_READ_INFO, EINVAL); + BT_DBG("sock %p hci%u", sk, index); - dev_id = get_unaligned_le16(&cp->index); - - BT_DBG("request for hci%u", dev_id); - - hdev = hci_dev_get(dev_id); + hdev = hci_dev_get(index); if (!hdev) - return cmd_status(sk, MGMT_OP_READ_INFO, ENODEV); + return cmd_status(sk, index, MGMT_OP_READ_INFO, ENODEV); hci_del_off_timer(hdev); @@ -184,7 +183,6 @@ static int read_controller_info(struct sock *sk, unsigned char *data, u16 len) set_bit(HCI_MGMT, &hdev->flags); - put_unaligned_le16(hdev->id, &rp.index); rp.type = hdev->dev_type; rp.powered = test_bit(HCI_UP, &hdev->flags); @@ -209,7 +207,7 @@ static int read_controller_info(struct sock *sk, unsigned char *data, u16 len) hci_dev_unlock_bh(hdev); hci_dev_put(hdev); - return cmd_complete(sk, MGMT_OP_READ_INFO, &rp, sizeof(rp)); + return cmd_complete(sk, index, MGMT_OP_READ_INFO, &rp, sizeof(rp)); } static void mgmt_pending_free(struct pending_cmd *cmd) @@ -219,14 +217,14 @@ static void mgmt_pending_free(struct pending_cmd *cmd) kfree(cmd); } -static int mgmt_pending_add(struct sock *sk, u16 opcode, int index, - void *data, u16 len) +static struct pending_cmd *mgmt_pending_add(struct sock *sk, u16 opcode, + u16 index, void *data, u16 len) { struct pending_cmd *cmd; cmd = kmalloc(sizeof(*cmd), GFP_ATOMIC); if (!cmd) - return -ENOMEM; + return NULL; cmd->opcode = opcode; cmd->index = index; @@ -234,7 +232,7 @@ static int mgmt_pending_add(struct sock *sk, u16 opcode, int index, cmd->cmd = kmalloc(len, GFP_ATOMIC); if (!cmd->cmd) { kfree(cmd); - return -ENOMEM; + return NULL; } memcpy(cmd->cmd, data, len); @@ -244,7 +242,7 @@ static int mgmt_pending_add(struct sock *sk, u16 opcode, int index, list_add(&cmd->list, &cmd_list); - return 0; + return cmd; } static void mgmt_pending_foreach(u16 opcode, int index, @@ -289,103 +287,106 @@ static struct pending_cmd *mgmt_pending_find(u16 opcode, int index) return NULL; } -static void mgmt_pending_remove(u16 opcode, int index) +static void mgmt_pending_remove(struct pending_cmd *cmd) { - struct pending_cmd *cmd; - - cmd = mgmt_pending_find(opcode, index); - if (cmd == NULL) - return; - list_del(&cmd->list); mgmt_pending_free(cmd); } -static int set_powered(struct sock *sk, unsigned char *data, u16 len) +static int set_powered(struct sock *sk, u16 index, unsigned char *data, u16 len) { struct mgmt_mode *cp; struct hci_dev *hdev; - u16 dev_id; - int ret, up; + struct pending_cmd *cmd; + int err, up; cp = (void *) data; - dev_id = get_unaligned_le16(&cp->index); - BT_DBG("request for hci%u", dev_id); + BT_DBG("request for hci%u", index); + + if (len != sizeof(*cp)) + return cmd_status(sk, index, MGMT_OP_SET_POWERED, EINVAL); - hdev = hci_dev_get(dev_id); + hdev = hci_dev_get(index); if (!hdev) - return cmd_status(sk, MGMT_OP_SET_POWERED, ENODEV); + return cmd_status(sk, index, MGMT_OP_SET_POWERED, ENODEV); hci_dev_lock_bh(hdev); up = test_bit(HCI_UP, &hdev->flags); if ((cp->val && up) || (!cp->val && !up)) { - ret = cmd_status(sk, MGMT_OP_SET_POWERED, EALREADY); + err = cmd_status(sk, index, MGMT_OP_SET_POWERED, EALREADY); goto failed; } - if (mgmt_pending_find(MGMT_OP_SET_POWERED, dev_id)) { - ret = cmd_status(sk, MGMT_OP_SET_POWERED, EBUSY); + if (mgmt_pending_find(MGMT_OP_SET_POWERED, index)) { + err = cmd_status(sk, index, MGMT_OP_SET_POWERED, EBUSY); goto failed; } - ret = mgmt_pending_add(sk, MGMT_OP_SET_POWERED, dev_id, data, len); - if (ret < 0) + cmd = mgmt_pending_add(sk, MGMT_OP_SET_POWERED, index, data, len); + if (!cmd) { + err = -ENOMEM; goto failed; + } if (cp->val) queue_work(hdev->workqueue, &hdev->power_on); else queue_work(hdev->workqueue, &hdev->power_off); - ret = 0; + err = 0; failed: hci_dev_unlock_bh(hdev); hci_dev_put(hdev); - return ret; + return err; } -static int set_discoverable(struct sock *sk, unsigned char *data, u16 len) +static int set_discoverable(struct sock *sk, u16 index, unsigned char *data, + u16 len) { struct mgmt_mode *cp; struct hci_dev *hdev; - u16 dev_id; + struct pending_cmd *cmd; u8 scan; int err; cp = (void *) data; - dev_id = get_unaligned_le16(&cp->index); - BT_DBG("request for hci%u", dev_id); + BT_DBG("request for hci%u", index); + + if (len != sizeof(*cp)) + return cmd_status(sk, index, MGMT_OP_SET_DISCOVERABLE, EINVAL); - hdev = hci_dev_get(dev_id); + hdev = hci_dev_get(index); if (!hdev) - return cmd_status(sk, MGMT_OP_SET_DISCOVERABLE, ENODEV); + return cmd_status(sk, index, MGMT_OP_SET_DISCOVERABLE, ENODEV); hci_dev_lock_bh(hdev); if (!test_bit(HCI_UP, &hdev->flags)) { - err = cmd_status(sk, MGMT_OP_SET_DISCOVERABLE, ENETDOWN); + err = cmd_status(sk, index, MGMT_OP_SET_DISCOVERABLE, ENETDOWN); goto failed; } - if (mgmt_pending_find(MGMT_OP_SET_DISCOVERABLE, dev_id) || - mgmt_pending_find(MGMT_OP_SET_CONNECTABLE, dev_id)) { - err = cmd_status(sk, MGMT_OP_SET_DISCOVERABLE, EBUSY); + if (mgmt_pending_find(MGMT_OP_SET_DISCOVERABLE, index) || + mgmt_pending_find(MGMT_OP_SET_CONNECTABLE, index)) { + err = cmd_status(sk, index, MGMT_OP_SET_DISCOVERABLE, EBUSY); goto failed; } if (cp->val == test_bit(HCI_ISCAN, &hdev->flags) && test_bit(HCI_PSCAN, &hdev->flags)) { - err = cmd_status(sk, MGMT_OP_SET_DISCOVERABLE, EALREADY); + err = cmd_status(sk, index, MGMT_OP_SET_DISCOVERABLE, EALREADY); goto failed; } - err = mgmt_pending_add(sk, MGMT_OP_SET_DISCOVERABLE, dev_id, data, len); - if (err < 0) + cmd = mgmt_pending_add(sk, MGMT_OP_SET_DISCOVERABLE, index, data, len); + if (!cmd) { + err = -ENOMEM; goto failed; + } scan = SCAN_PAGE; @@ -394,7 +395,7 @@ static int set_discoverable(struct sock *sk, unsigned char *data, u16 len) err = hci_send_cmd(hdev, HCI_OP_WRITE_SCAN_ENABLE, 1, &scan); if (err < 0) - mgmt_pending_remove(MGMT_OP_SET_DISCOVERABLE, dev_id); + mgmt_pending_remove(cmd); failed: hci_dev_unlock_bh(hdev); @@ -403,44 +404,49 @@ failed: return err; } -static int set_connectable(struct sock *sk, unsigned char *data, u16 len) +static int set_connectable(struct sock *sk, u16 index, unsigned char *data, + u16 len) { struct mgmt_mode *cp; struct hci_dev *hdev; - u16 dev_id; + struct pending_cmd *cmd; u8 scan; int err; cp = (void *) data; - dev_id = get_unaligned_le16(&cp->index); - BT_DBG("request for hci%u", dev_id); + BT_DBG("request for hci%u", index); + + if (len != sizeof(*cp)) + return cmd_status(sk, index, MGMT_OP_SET_CONNECTABLE, EINVAL); - hdev = hci_dev_get(dev_id); + hdev = hci_dev_get(index); if (!hdev) - return cmd_status(sk, MGMT_OP_SET_CONNECTABLE, ENODEV); + return cmd_status(sk, index, MGMT_OP_SET_CONNECTABLE, ENODEV); hci_dev_lock_bh(hdev); if (!test_bit(HCI_UP, &hdev->flags)) { - err = cmd_status(sk, MGMT_OP_SET_CONNECTABLE, ENETDOWN); + err = cmd_status(sk, index, MGMT_OP_SET_CONNECTABLE, ENETDOWN); goto failed; } - if (mgmt_pending_find(MGMT_OP_SET_DISCOVERABLE, dev_id) || - mgmt_pending_find(MGMT_OP_SET_CONNECTABLE, dev_id)) { - err = cmd_status(sk, MGMT_OP_SET_CONNECTABLE, EBUSY); + if (mgmt_pending_find(MGMT_OP_SET_DISCOVERABLE, index) || + mgmt_pending_find(MGMT_OP_SET_CONNECTABLE, index)) { + err = cmd_status(sk, index, MGMT_OP_SET_CONNECTABLE, EBUSY); goto failed; } if (cp->val == test_bit(HCI_PSCAN, &hdev->flags)) { - err = cmd_status(sk, MGMT_OP_SET_CONNECTABLE, EALREADY); + err = cmd_status(sk, index, MGMT_OP_SET_CONNECTABLE, EALREADY); goto failed; } - err = mgmt_pending_add(sk, MGMT_OP_SET_CONNECTABLE, dev_id, data, len); - if (err < 0) + cmd = mgmt_pending_add(sk, MGMT_OP_SET_CONNECTABLE, index, data, len); + if (!cmd) { + err = -ENOMEM; goto failed; + } if (cp->val) scan = SCAN_PAGE; @@ -449,7 +455,7 @@ static int set_connectable(struct sock *sk, unsigned char *data, u16 len) err = hci_send_cmd(hdev, HCI_OP_WRITE_SCAN_ENABLE, 1, &scan); if (err < 0) - mgmt_pending_remove(MGMT_OP_SET_CONNECTABLE, dev_id); + mgmt_pending_remove(cmd); failed: hci_dev_unlock_bh(hdev); @@ -458,7 +464,8 @@ failed: return err; } -static int mgmt_event(u16 event, void *data, u16 data_len, struct sock *skip_sk) +static int mgmt_event(u16 event, u16 index, void *data, u16 data_len, + struct sock *skip_sk) { struct sk_buff *skb; struct mgmt_hdr *hdr; @@ -471,9 +478,11 @@ static int mgmt_event(u16 event, void *data, u16 data_len, struct sock *skip_sk) hdr = (void *) skb_put(skb, sizeof(*hdr)); hdr->opcode = cpu_to_le16(event); + hdr->index = cpu_to_le16(index); hdr->len = cpu_to_le16(data_len); - memcpy(skb_put(skb, data_len), data, data_len); + if (data) + memcpy(skb_put(skb, data_len), data, data_len); hci_send_to_sock(NULL, skb, skip_sk); kfree_skb(skb); @@ -485,27 +494,28 @@ static int send_mode_rsp(struct sock *sk, u16 opcode, u16 index, u8 val) { struct mgmt_mode rp; - put_unaligned_le16(index, &rp.index); rp.val = val; - return cmd_complete(sk, opcode, &rp, sizeof(rp)); + return cmd_complete(sk, index, opcode, &rp, sizeof(rp)); } -static int set_pairable(struct sock *sk, unsigned char *data, u16 len) +static int set_pairable(struct sock *sk, u16 index, unsigned char *data, + u16 len) { struct mgmt_mode *cp, ev; struct hci_dev *hdev; - u16 dev_id; int err; cp = (void *) data; - dev_id = get_unaligned_le16(&cp->index); - BT_DBG("request for hci%u", dev_id); + BT_DBG("request for hci%u", index); + + if (len != sizeof(*cp)) + return cmd_status(sk, index, MGMT_OP_SET_PAIRABLE, EINVAL); - hdev = hci_dev_get(dev_id); + hdev = hci_dev_get(index); if (!hdev) - return cmd_status(sk, MGMT_OP_SET_PAIRABLE, ENODEV); + return cmd_status(sk, index, MGMT_OP_SET_PAIRABLE, ENODEV); hci_dev_lock_bh(hdev); @@ -514,14 +524,13 @@ static int set_pairable(struct sock *sk, unsigned char *data, u16 len) else clear_bit(HCI_PAIRABLE, &hdev->flags); - err = send_mode_rsp(sk, MGMT_OP_SET_PAIRABLE, dev_id, cp->val); + err = send_mode_rsp(sk, MGMT_OP_SET_PAIRABLE, index, cp->val); if (err < 0) goto failed; - put_unaligned_le16(dev_id, &ev.index); ev.val = cp->val; - err = mgmt_event(MGMT_EV_PAIRABLE, &ev, sizeof(ev), sk); + err = mgmt_event(MGMT_EV_PAIRABLE, index, &ev, sizeof(ev), sk); failed: hci_dev_unlock_bh(hdev); @@ -563,22 +572,23 @@ static int update_class(struct hci_dev *hdev) return hci_send_cmd(hdev, HCI_OP_WRITE_CLASS_OF_DEV, sizeof(cod), cod); } -static int add_uuid(struct sock *sk, unsigned char *data, u16 len) +static int add_uuid(struct sock *sk, u16 index, unsigned char *data, u16 len) { struct mgmt_cp_add_uuid *cp; struct hci_dev *hdev; struct bt_uuid *uuid; - u16 dev_id; int err; cp = (void *) data; - dev_id = get_unaligned_le16(&cp->index); - BT_DBG("request for hci%u", dev_id); + BT_DBG("request for hci%u", index); - hdev = hci_dev_get(dev_id); + if (len != sizeof(*cp)) + return cmd_status(sk, index, MGMT_OP_ADD_UUID, EINVAL); + + hdev = hci_dev_get(index); if (!hdev) - return cmd_status(sk, MGMT_OP_ADD_UUID, ENODEV); + return cmd_status(sk, index, MGMT_OP_ADD_UUID, ENODEV); hci_dev_lock_bh(hdev); @@ -597,7 +607,7 @@ static int add_uuid(struct sock *sk, unsigned char *data, u16 len) if (err < 0) goto failed; - err = cmd_complete(sk, MGMT_OP_ADD_UUID, &dev_id, sizeof(dev_id)); + err = cmd_complete(sk, index, MGMT_OP_ADD_UUID, NULL, 0); failed: hci_dev_unlock_bh(hdev); @@ -606,23 +616,24 @@ failed: return err; } -static int remove_uuid(struct sock *sk, unsigned char *data, u16 len) +static int remove_uuid(struct sock *sk, u16 index, unsigned char *data, u16 len) { struct list_head *p, *n; - struct mgmt_cp_add_uuid *cp; + struct mgmt_cp_remove_uuid *cp; struct hci_dev *hdev; u8 bt_uuid_any[] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; - u16 dev_id; int err, found; cp = (void *) data; - dev_id = get_unaligned_le16(&cp->index); - BT_DBG("request for hci%u", dev_id); + BT_DBG("request for hci%u", index); + + if (len != sizeof(*cp)) + return cmd_status(sk, index, MGMT_OP_REMOVE_UUID, EINVAL); - hdev = hci_dev_get(dev_id); + hdev = hci_dev_get(index); if (!hdev) - return cmd_status(sk, MGMT_OP_REMOVE_UUID, ENODEV); + return cmd_status(sk, index, MGMT_OP_REMOVE_UUID, ENODEV); hci_dev_lock_bh(hdev); @@ -644,7 +655,7 @@ static int remove_uuid(struct sock *sk, unsigned char *data, u16 len) } if (found == 0) { - err = cmd_status(sk, MGMT_OP_REMOVE_UUID, ENOENT); + err = cmd_status(sk, index, MGMT_OP_REMOVE_UUID, ENOENT); goto unlock; } @@ -652,7 +663,7 @@ static int remove_uuid(struct sock *sk, unsigned char *data, u16 len) if (err < 0) goto unlock; - err = cmd_complete(sk, MGMT_OP_REMOVE_UUID, &dev_id, sizeof(dev_id)); + err = cmd_complete(sk, index, MGMT_OP_REMOVE_UUID, NULL, 0); unlock: hci_dev_unlock_bh(hdev); @@ -661,21 +672,23 @@ unlock: return err; } -static int set_dev_class(struct sock *sk, unsigned char *data, u16 len) +static int set_dev_class(struct sock *sk, u16 index, unsigned char *data, + u16 len) { struct hci_dev *hdev; struct mgmt_cp_set_dev_class *cp; - u16 dev_id; int err; cp = (void *) data; - dev_id = get_unaligned_le16(&cp->index); - BT_DBG("request for hci%u", dev_id); + BT_DBG("request for hci%u", index); - hdev = hci_dev_get(dev_id); + if (len != sizeof(*cp)) + return cmd_status(sk, index, MGMT_OP_SET_DEV_CLASS, EINVAL); + + hdev = hci_dev_get(index); if (!hdev) - return cmd_status(sk, MGMT_OP_SET_DEV_CLASS, ENODEV); + return cmd_status(sk, index, MGMT_OP_SET_DEV_CLASS, ENODEV); hci_dev_lock_bh(hdev); @@ -685,8 +698,7 @@ static int set_dev_class(struct sock *sk, unsigned char *data, u16 len) err = update_class(hdev); if (err == 0) - err = cmd_complete(sk, MGMT_OP_SET_DEV_CLASS, &dev_id, - sizeof(dev_id)); + err = cmd_complete(sk, index, MGMT_OP_SET_DEV_CLASS, NULL, 0); hci_dev_unlock_bh(hdev); hci_dev_put(hdev); @@ -694,23 +706,25 @@ static int set_dev_class(struct sock *sk, unsigned char *data, u16 len) return err; } -static int set_service_cache(struct sock *sk, unsigned char *data, u16 len) +static int set_service_cache(struct sock *sk, u16 index, unsigned char *data, + u16 len) { struct hci_dev *hdev; struct mgmt_cp_set_service_cache *cp; - u16 dev_id; int err; cp = (void *) data; - dev_id = get_unaligned_le16(&cp->index); - hdev = hci_dev_get(dev_id); + if (len != sizeof(*cp)) + return cmd_status(sk, index, MGMT_OP_SET_SERVICE_CACHE, EINVAL); + + hdev = hci_dev_get(index); if (!hdev) - return cmd_status(sk, MGMT_OP_SET_SERVICE_CACHE, ENODEV); + return cmd_status(sk, index, MGMT_OP_SET_SERVICE_CACHE, ENODEV); hci_dev_lock_bh(hdev); - BT_DBG("hci%u enable %d", dev_id, cp->enable); + BT_DBG("hci%u enable %d", index, cp->enable); if (cp->enable) { set_bit(HCI_SERVICE_CACHE, &hdev->flags); @@ -721,8 +735,8 @@ static int set_service_cache(struct sock *sk, unsigned char *data, u16 len) } if (err == 0) - err = cmd_complete(sk, MGMT_OP_SET_SERVICE_CACHE, &dev_id, - sizeof(dev_id)); + err = cmd_complete(sk, index, MGMT_OP_SET_SERVICE_CACHE, NULL, + 0); hci_dev_unlock_bh(hdev); hci_dev_put(hdev); @@ -730,15 +744,18 @@ static int set_service_cache(struct sock *sk, unsigned char *data, u16 len) return err; } -static int load_keys(struct sock *sk, unsigned char *data, u16 len) +static int load_keys(struct sock *sk, u16 index, unsigned char *data, u16 len) { struct hci_dev *hdev; struct mgmt_cp_load_keys *cp; - u16 dev_id, key_count, expected_len; + u16 key_count, expected_len; int i; cp = (void *) data; - dev_id = get_unaligned_le16(&cp->index); + + if (len < sizeof(*cp)) + return -EINVAL; + key_count = get_unaligned_le16(&cp->key_count); expected_len = sizeof(*cp) + key_count * sizeof(struct mgmt_key_info); @@ -748,11 +765,11 @@ static int load_keys(struct sock *sk, unsigned char *data, u16 len) return -EINVAL; } - hdev = hci_dev_get(dev_id); + hdev = hci_dev_get(index); if (!hdev) - return cmd_status(sk, MGMT_OP_LOAD_KEYS, ENODEV); + return cmd_status(sk, index, MGMT_OP_LOAD_KEYS, ENODEV); - BT_DBG("hci%u debug_keys %u key_count %u", dev_id, cp->debug_keys, + BT_DBG("hci%u debug_keys %u key_count %u", index, cp->debug_keys, key_count); hci_dev_lock_bh(hdev); @@ -779,26 +796,27 @@ static int load_keys(struct sock *sk, unsigned char *data, u16 len) return 0; } -static int remove_key(struct sock *sk, unsigned char *data, u16 len) +static int remove_key(struct sock *sk, u16 index, unsigned char *data, u16 len) { struct hci_dev *hdev; struct mgmt_cp_remove_key *cp; struct hci_conn *conn; - u16 dev_id; int err; cp = (void *) data; - dev_id = get_unaligned_le16(&cp->index); - hdev = hci_dev_get(dev_id); + if (len != sizeof(*cp)) + return cmd_status(sk, index, MGMT_OP_REMOVE_KEY, EINVAL); + + hdev = hci_dev_get(index); if (!hdev) - return cmd_status(sk, MGMT_OP_REMOVE_KEY, ENODEV); + return cmd_status(sk, index, MGMT_OP_REMOVE_KEY, ENODEV); hci_dev_lock_bh(hdev); err = hci_remove_link_key(hdev, &cp->bdaddr); if (err < 0) { - err = cmd_status(sk, MGMT_OP_REMOVE_KEY, -err); + err = cmd_status(sk, index, MGMT_OP_REMOVE_KEY, -err); goto unlock; } @@ -823,52 +841,56 @@ unlock: return err; } -static int disconnect(struct sock *sk, unsigned char *data, u16 len) +static int disconnect(struct sock *sk, u16 index, unsigned char *data, u16 len) { struct hci_dev *hdev; struct mgmt_cp_disconnect *cp; struct hci_cp_disconnect dc; + struct pending_cmd *cmd; struct hci_conn *conn; - u16 dev_id; int err; BT_DBG(""); cp = (void *) data; - dev_id = get_unaligned_le16(&cp->index); - hdev = hci_dev_get(dev_id); + if (len != sizeof(*cp)) + return cmd_status(sk, index, MGMT_OP_DISCONNECT, EINVAL); + + hdev = hci_dev_get(index); if (!hdev) - return cmd_status(sk, MGMT_OP_DISCONNECT, ENODEV); + return cmd_status(sk, index, MGMT_OP_DISCONNECT, ENODEV); hci_dev_lock_bh(hdev); if (!test_bit(HCI_UP, &hdev->flags)) { - err = cmd_status(sk, MGMT_OP_DISCONNECT, ENETDOWN); + err = cmd_status(sk, index, MGMT_OP_DISCONNECT, ENETDOWN); goto failed; } - if (mgmt_pending_find(MGMT_OP_DISCONNECT, dev_id)) { - err = cmd_status(sk, MGMT_OP_DISCONNECT, EBUSY); + if (mgmt_pending_find(MGMT_OP_DISCONNECT, index)) { + err = cmd_status(sk, index, MGMT_OP_DISCONNECT, EBUSY); goto failed; } conn = hci_conn_hash_lookup_ba(hdev, ACL_LINK, &cp->bdaddr); if (!conn) { - err = cmd_status(sk, MGMT_OP_DISCONNECT, ENOTCONN); + err = cmd_status(sk, index, MGMT_OP_DISCONNECT, ENOTCONN); goto failed; } - err = mgmt_pending_add(sk, MGMT_OP_DISCONNECT, dev_id, data, len); - if (err < 0) + cmd = mgmt_pending_add(sk, MGMT_OP_DISCONNECT, index, data, len); + if (!cmd) { + err = -ENOMEM; goto failed; + } put_unaligned_le16(conn->handle, &dc.handle); dc.reason = 0x13; /* Remote User Terminated Connection */ err = hci_send_cmd(hdev, HCI_OP_DISCONNECT, sizeof(dc), &dc); if (err < 0) - mgmt_pending_remove(MGMT_OP_DISCONNECT, dev_id); + mgmt_pending_remove(cmd); failed: hci_dev_unlock_bh(hdev); @@ -877,24 +899,20 @@ failed: return err; } -static int get_connections(struct sock *sk, unsigned char *data, u16 len) +static int get_connections(struct sock *sk, u16 index) { - struct mgmt_cp_get_connections *cp; struct mgmt_rp_get_connections *rp; struct hci_dev *hdev; struct list_head *p; size_t rp_len; - u16 dev_id, count; + u16 count; int i, err; BT_DBG(""); - cp = (void *) data; - dev_id = get_unaligned_le16(&cp->index); - - hdev = hci_dev_get(dev_id); + hdev = hci_dev_get(index); if (!hdev) - return cmd_status(sk, MGMT_OP_GET_CONNECTIONS, ENODEV); + return cmd_status(sk, index, MGMT_OP_GET_CONNECTIONS, ENODEV); hci_dev_lock_bh(hdev); @@ -910,7 +928,6 @@ static int get_connections(struct sock *sk, unsigned char *data, u16 len) goto unlock; } - put_unaligned_le16(dev_id, &rp->index); put_unaligned_le16(count, &rp->conn_count); read_lock(&hci_dev_list_lock); @@ -924,7 +941,7 @@ static int get_connections(struct sock *sk, unsigned char *data, u16 len) read_unlock(&hci_dev_list_lock); - err = cmd_complete(sk, MGMT_OP_GET_CONNECTIONS, rp, rp_len); + err = cmd_complete(sk, index, MGMT_OP_GET_CONNECTIONS, rp, rp_len); unlock: kfree(rp); @@ -933,33 +950,38 @@ unlock: return err; } -static int pin_code_reply(struct sock *sk, unsigned char *data, u16 len) +static int pin_code_reply(struct sock *sk, u16 index, unsigned char *data, + u16 len) { struct hci_dev *hdev; struct mgmt_cp_pin_code_reply *cp; struct hci_cp_pin_code_reply reply; - u16 dev_id; + struct pending_cmd *cmd; int err; BT_DBG(""); cp = (void *) data; - dev_id = get_unaligned_le16(&cp->index); - hdev = hci_dev_get(dev_id); + if (len != sizeof(*cp)) + return cmd_status(sk, index, MGMT_OP_PIN_CODE_REPLY, EINVAL); + + hdev = hci_dev_get(index); if (!hdev) - return cmd_status(sk, MGMT_OP_DISCONNECT, ENODEV); + return cmd_status(sk, index, MGMT_OP_PIN_CODE_REPLY, ENODEV); hci_dev_lock_bh(hdev); if (!test_bit(HCI_UP, &hdev->flags)) { - err = cmd_status(sk, MGMT_OP_PIN_CODE_REPLY, ENETDOWN); + err = cmd_status(sk, index, MGMT_OP_PIN_CODE_REPLY, ENETDOWN); goto failed; } - err = mgmt_pending_add(sk, MGMT_OP_PIN_CODE_REPLY, dev_id, data, len); - if (err < 0) + cmd = mgmt_pending_add(sk, MGMT_OP_PIN_CODE_REPLY, index, data, len); + if (!cmd) { + err = -ENOMEM; goto failed; + } bacpy(&reply.bdaddr, &cp->bdaddr); reply.pin_len = cp->pin_len; @@ -967,7 +989,7 @@ static int pin_code_reply(struct sock *sk, unsigned char *data, u16 len) err = hci_send_cmd(hdev, HCI_OP_PIN_CODE_REPLY, sizeof(reply), &reply); if (err < 0) - mgmt_pending_remove(MGMT_OP_PIN_CODE_REPLY, dev_id); + mgmt_pending_remove(cmd); failed: hci_dev_unlock_bh(hdev); @@ -976,38 +998,46 @@ failed: return err; } -static int pin_code_neg_reply(struct sock *sk, unsigned char *data, u16 len) +static int pin_code_neg_reply(struct sock *sk, u16 index, unsigned char *data, + u16 len) { struct hci_dev *hdev; struct mgmt_cp_pin_code_neg_reply *cp; - u16 dev_id; + struct pending_cmd *cmd; int err; BT_DBG(""); cp = (void *) data; - dev_id = get_unaligned_le16(&cp->index); - hdev = hci_dev_get(dev_id); + if (len != sizeof(*cp)) + return cmd_status(sk, index, MGMT_OP_PIN_CODE_NEG_REPLY, + EINVAL); + + hdev = hci_dev_get(index); if (!hdev) - return cmd_status(sk, MGMT_OP_PIN_CODE_NEG_REPLY, ENODEV); + return cmd_status(sk, index, MGMT_OP_PIN_CODE_NEG_REPLY, + ENODEV); hci_dev_lock_bh(hdev); if (!test_bit(HCI_UP, &hdev->flags)) { - err = cmd_status(sk, MGMT_OP_PIN_CODE_NEG_REPLY, ENETDOWN); + err = cmd_status(sk, index, MGMT_OP_PIN_CODE_NEG_REPLY, + ENETDOWN); goto failed; } - err = mgmt_pending_add(sk, MGMT_OP_PIN_CODE_NEG_REPLY, dev_id, + cmd = mgmt_pending_add(sk, MGMT_OP_PIN_CODE_NEG_REPLY, index, data, len); - if (err < 0) + if (!cmd) { + err = -ENOMEM; goto failed; + } - err = hci_send_cmd(hdev, HCI_OP_PIN_CODE_NEG_REPLY, sizeof(bdaddr_t), + err = hci_send_cmd(hdev, HCI_OP_PIN_CODE_NEG_REPLY, sizeof(cp->bdaddr), &cp->bdaddr); if (err < 0) - mgmt_pending_remove(MGMT_OP_PIN_CODE_NEG_REPLY, dev_id); + mgmt_pending_remove(cmd); failed: hci_dev_unlock_bh(hdev); @@ -1016,40 +1046,217 @@ failed: return err; } -static int set_io_capability(struct sock *sk, unsigned char *data, u16 len) +static int set_io_capability(struct sock *sk, u16 index, unsigned char *data, + u16 len) { struct hci_dev *hdev; struct mgmt_cp_set_io_capability *cp; - u16 dev_id; BT_DBG(""); cp = (void *) data; - dev_id = get_unaligned_le16(&cp->index); - hdev = hci_dev_get(dev_id); + if (len != sizeof(*cp)) + return cmd_status(sk, index, MGMT_OP_SET_IO_CAPABILITY, EINVAL); + + hdev = hci_dev_get(index); if (!hdev) - return cmd_status(sk, MGMT_OP_SET_IO_CAPABILITY, ENODEV); + return cmd_status(sk, index, MGMT_OP_SET_IO_CAPABILITY, ENODEV); hci_dev_lock_bh(hdev); hdev->io_capability = cp->io_capability; BT_DBG("%s IO capability set to 0x%02x", hdev->name, - hdev->io_capability); + hdev->io_capability); hci_dev_unlock_bh(hdev); hci_dev_put(hdev); - return cmd_complete(sk, MGMT_OP_SET_IO_CAPABILITY, - &dev_id, sizeof(dev_id)); + return cmd_complete(sk, index, MGMT_OP_SET_IO_CAPABILITY, NULL, 0); +} + +static inline struct pending_cmd *find_pairing(struct hci_conn *conn) +{ + struct hci_dev *hdev = conn->hdev; + struct list_head *p; + + list_for_each(p, &cmd_list) { + struct pending_cmd *cmd; + + cmd = list_entry(p, struct pending_cmd, list); + + if (cmd->opcode != MGMT_OP_PAIR_DEVICE) + continue; + + if (cmd->index != hdev->id) + continue; + + if (cmd->user_data != conn) + continue; + + return cmd; + } + + return NULL; +} + +static void pairing_complete(struct pending_cmd *cmd, u8 status) +{ + struct mgmt_rp_pair_device rp; + struct hci_conn *conn = cmd->user_data; + + bacpy(&rp.bdaddr, &conn->dst); + rp.status = status; + + cmd_complete(cmd->sk, cmd->index, MGMT_OP_PAIR_DEVICE, &rp, sizeof(rp)); + + /* So we don't get further callbacks for this connection */ + conn->connect_cfm_cb = NULL; + conn->security_cfm_cb = NULL; + conn->disconn_cfm_cb = NULL; + + hci_conn_put(conn); + + mgmt_pending_remove(cmd); +} + +static void pairing_complete_cb(struct hci_conn *conn, u8 status) +{ + struct pending_cmd *cmd; + + BT_DBG("status %u", status); + + cmd = find_pairing(conn); + if (!cmd) { + BT_DBG("Unable to find a pending command"); + return; + } + + pairing_complete(cmd, status); +} + +static int pair_device(struct sock *sk, u16 index, unsigned char *data, u16 len) +{ + struct hci_dev *hdev; + struct mgmt_cp_pair_device *cp; + struct pending_cmd *cmd; + u8 sec_level, auth_type; + struct hci_conn *conn; + int err; + + BT_DBG(""); + + cp = (void *) data; + + if (len != sizeof(*cp)) + return cmd_status(sk, index, MGMT_OP_PAIR_DEVICE, EINVAL); + + hdev = hci_dev_get(index); + if (!hdev) + return cmd_status(sk, index, MGMT_OP_PAIR_DEVICE, ENODEV); + + hci_dev_lock_bh(hdev); + + if (cp->io_cap == 0x03) { + sec_level = BT_SECURITY_MEDIUM; + auth_type = HCI_AT_DEDICATED_BONDING; + } else { + sec_level = BT_SECURITY_HIGH; + auth_type = HCI_AT_DEDICATED_BONDING_MITM; + } + + conn = hci_connect(hdev, ACL_LINK, &cp->bdaddr, sec_level, auth_type); + if (IS_ERR(conn)) { + err = PTR_ERR(conn); + goto unlock; + } + + if (conn->connect_cfm_cb) { + hci_conn_put(conn); + err = cmd_status(sk, index, MGMT_OP_PAIR_DEVICE, EBUSY); + goto unlock; + } + + cmd = mgmt_pending_add(sk, MGMT_OP_PAIR_DEVICE, index, data, len); + if (!cmd) { + err = -ENOMEM; + hci_conn_put(conn); + goto unlock; + } + + conn->connect_cfm_cb = pairing_complete_cb; + conn->security_cfm_cb = pairing_complete_cb; + conn->disconn_cfm_cb = pairing_complete_cb; + conn->io_capability = cp->io_cap; + cmd->user_data = conn; + + if (conn->state == BT_CONNECTED && + hci_conn_security(conn, sec_level, auth_type)) + pairing_complete(cmd, 0); + + err = 0; + +unlock: + hci_dev_unlock_bh(hdev); + hci_dev_put(hdev); + + return err; +} + +static int user_confirm_reply(struct sock *sk, u16 index, unsigned char *data, + u16 len, int success) +{ + struct mgmt_cp_user_confirm_reply *cp = (void *) data; + u16 mgmt_op, hci_op; + struct pending_cmd *cmd; + struct hci_dev *hdev; + int err; + + BT_DBG(""); + + if (success) { + mgmt_op = MGMT_OP_USER_CONFIRM_REPLY; + hci_op = HCI_OP_USER_CONFIRM_REPLY; + } else { + mgmt_op = MGMT_OP_USER_CONFIRM_NEG_REPLY; + hci_op = HCI_OP_USER_CONFIRM_NEG_REPLY; + } + + if (len != sizeof(*cp)) + return cmd_status(sk, index, mgmt_op, EINVAL); + + hdev = hci_dev_get(index); + if (!hdev) + return cmd_status(sk, index, mgmt_op, ENODEV); + + if (!test_bit(HCI_UP, &hdev->flags)) { + err = cmd_status(sk, index, mgmt_op, ENETDOWN); + goto failed; + } + + cmd = mgmt_pending_add(sk, mgmt_op, index, data, len); + if (!cmd) { + err = -ENOMEM; + goto failed; + } + + err = hci_send_cmd(hdev, hci_op, sizeof(cp->bdaddr), &cp->bdaddr); + if (err < 0) + mgmt_pending_remove(cmd); + +failed: + hci_dev_unlock_bh(hdev); + hci_dev_put(hdev); + + return err; } int mgmt_control(struct sock *sk, struct msghdr *msg, size_t msglen) { unsigned char *buf; struct mgmt_hdr *hdr; - u16 opcode, len; + u16 opcode, index, len; int err; BT_DBG("got %zu bytes", msglen); @@ -1068,6 +1275,7 @@ int mgmt_control(struct sock *sk, struct msghdr *msg, size_t msglen) hdr = (struct mgmt_hdr *) buf; opcode = get_unaligned_le16(&hdr->opcode); + index = get_unaligned_le16(&hdr->index); len = get_unaligned_le16(&hdr->len); if (len != msglen - sizeof(*hdr)) { @@ -1083,56 +1291,65 @@ int mgmt_control(struct sock *sk, struct msghdr *msg, size_t msglen) err = read_index_list(sk); break; case MGMT_OP_READ_INFO: - err = read_controller_info(sk, buf + sizeof(*hdr), len); + err = read_controller_info(sk, index); break; case MGMT_OP_SET_POWERED: - err = set_powered(sk, buf + sizeof(*hdr), len); + err = set_powered(sk, index, buf + sizeof(*hdr), len); break; case MGMT_OP_SET_DISCOVERABLE: - err = set_discoverable(sk, buf + sizeof(*hdr), len); + err = set_discoverable(sk, index, buf + sizeof(*hdr), len); break; case MGMT_OP_SET_CONNECTABLE: - err = set_connectable(sk, buf + sizeof(*hdr), len); + err = set_connectable(sk, index, buf + sizeof(*hdr), len); break; case MGMT_OP_SET_PAIRABLE: - err = set_pairable(sk, buf + sizeof(*hdr), len); + err = set_pairable(sk, index, buf + sizeof(*hdr), len); break; case MGMT_OP_ADD_UUID: - err = add_uuid(sk, buf + sizeof(*hdr), len); + err = add_uuid(sk, index, buf + sizeof(*hdr), len); break; case MGMT_OP_REMOVE_UUID: - err = remove_uuid(sk, buf + sizeof(*hdr), len); + err = remove_uuid(sk, index, buf + sizeof(*hdr), len); break; case MGMT_OP_SET_DEV_CLASS: - err = set_dev_class(sk, buf + sizeof(*hdr), len); + err = set_dev_class(sk, index, buf + sizeof(*hdr), len); break; case MGMT_OP_SET_SERVICE_CACHE: - err = set_service_cache(sk, buf + sizeof(*hdr), len); + err = set_service_cache(sk, index, buf + sizeof(*hdr), len); break; case MGMT_OP_LOAD_KEYS: - err = load_keys(sk, buf + sizeof(*hdr), len); + err = load_keys(sk, index, buf + sizeof(*hdr), len); break; case MGMT_OP_REMOVE_KEY: - err = remove_key(sk, buf + sizeof(*hdr), len); + err = remove_key(sk, index, buf + sizeof(*hdr), len); break; case MGMT_OP_DISCONNECT: - err = disconnect(sk, buf + sizeof(*hdr), len); + err = disconnect(sk, index, buf + sizeof(*hdr), len); break; case MGMT_OP_GET_CONNECTIONS: - err = get_connections(sk, buf + sizeof(*hdr), len); + err = get_connections(sk, index); break; case MGMT_OP_PIN_CODE_REPLY: - err = pin_code_reply(sk, buf + sizeof(*hdr), len); + err = pin_code_reply(sk, index, buf + sizeof(*hdr), len); break; case MGMT_OP_PIN_CODE_NEG_REPLY: - err = pin_code_neg_reply(sk, buf + sizeof(*hdr), len); + err = pin_code_neg_reply(sk, index, buf + sizeof(*hdr), len); break; case MGMT_OP_SET_IO_CAPABILITY: - err = set_io_capability(sk, buf + sizeof(*hdr), len); + err = set_io_capability(sk, index, buf + sizeof(*hdr), len); + break; + case MGMT_OP_PAIR_DEVICE: + err = pair_device(sk, index, buf + sizeof(*hdr), len); + break; + case MGMT_OP_USER_CONFIRM_REPLY: + err = user_confirm_reply(sk, index, buf + sizeof(*hdr), len, 1); + break; + case MGMT_OP_USER_CONFIRM_NEG_REPLY: + err = user_confirm_reply(sk, index, buf + sizeof(*hdr), len, 0); break; default: BT_DBG("Unknown op %u", opcode); - err = cmd_status(sk, opcode, 0x01); + err = cmd_status(sk, index, opcode, 0x01); break; } @@ -1148,20 +1365,12 @@ done: int mgmt_index_added(u16 index) { - struct mgmt_ev_index_added ev; - - put_unaligned_le16(index, &ev.index); - - return mgmt_event(MGMT_EV_INDEX_ADDED, &ev, sizeof(ev), NULL); + return mgmt_event(MGMT_EV_INDEX_ADDED, index, NULL, 0, NULL); } int mgmt_index_removed(u16 index) { - struct mgmt_ev_index_added ev; - - put_unaligned_le16(index, &ev.index); - - return mgmt_event(MGMT_EV_INDEX_REMOVED, &ev, sizeof(ev), NULL); + return mgmt_event(MGMT_EV_INDEX_REMOVED, index, NULL, 0, NULL); } struct cmd_lookup { @@ -1197,10 +1406,9 @@ int mgmt_powered(u16 index, u8 powered) mgmt_pending_foreach(MGMT_OP_SET_POWERED, index, mode_rsp, &match); - put_unaligned_le16(index, &ev.index); ev.val = powered; - ret = mgmt_event(MGMT_EV_POWERED, &ev, sizeof(ev), match.sk); + ret = mgmt_event(MGMT_EV_POWERED, index, &ev, sizeof(ev), match.sk); if (match.sk) sock_put(match.sk); @@ -1214,13 +1422,12 @@ int mgmt_discoverable(u16 index, u8 discoverable) struct cmd_lookup match = { discoverable, NULL }; int ret; - mgmt_pending_foreach(MGMT_OP_SET_DISCOVERABLE, index, - mode_rsp, &match); + mgmt_pending_foreach(MGMT_OP_SET_DISCOVERABLE, index, mode_rsp, &match); - put_unaligned_le16(index, &ev.index); ev.val = discoverable; - ret = mgmt_event(MGMT_EV_DISCOVERABLE, &ev, sizeof(ev), match.sk); + ret = mgmt_event(MGMT_EV_DISCOVERABLE, index, &ev, sizeof(ev), + match.sk); if (match.sk) sock_put(match.sk); @@ -1236,10 +1443,9 @@ int mgmt_connectable(u16 index, u8 connectable) mgmt_pending_foreach(MGMT_OP_SET_CONNECTABLE, index, mode_rsp, &match); - put_unaligned_le16(index, &ev.index); ev.val = connectable; - ret = mgmt_event(MGMT_EV_CONNECTABLE, &ev, sizeof(ev), match.sk); + ret = mgmt_event(MGMT_EV_CONNECTABLE, index, &ev, sizeof(ev), match.sk); if (match.sk) sock_put(match.sk); @@ -1253,25 +1459,22 @@ int mgmt_new_key(u16 index, struct link_key *key, u8 old_key_type) memset(&ev, 0, sizeof(ev)); - put_unaligned_le16(index, &ev.index); - bacpy(&ev.key.bdaddr, &key->bdaddr); ev.key.type = key->type; memcpy(ev.key.val, key->val, 16); ev.key.pin_len = key->pin_len; ev.old_key_type = old_key_type; - return mgmt_event(MGMT_EV_NEW_KEY, &ev, sizeof(ev), NULL); + return mgmt_event(MGMT_EV_NEW_KEY, index, &ev, sizeof(ev), NULL); } int mgmt_connected(u16 index, bdaddr_t *bdaddr) { struct mgmt_ev_connected ev; - put_unaligned_le16(index, &ev.index); bacpy(&ev.bdaddr, bdaddr); - return mgmt_event(MGMT_EV_CONNECTED, &ev, sizeof(ev), NULL); + return mgmt_event(MGMT_EV_CONNECTED, index, &ev, sizeof(ev), NULL); } static void disconnect_rsp(struct pending_cmd *cmd, void *data) @@ -1280,16 +1483,14 @@ static void disconnect_rsp(struct pending_cmd *cmd, void *data) struct sock **sk = data; struct mgmt_rp_disconnect rp; - put_unaligned_le16(cmd->index, &rp.index); bacpy(&rp.bdaddr, &cp->bdaddr); - cmd_complete(cmd->sk, MGMT_OP_DISCONNECT, &rp, sizeof(rp)); + cmd_complete(cmd->sk, cmd->index, MGMT_OP_DISCONNECT, &rp, sizeof(rp)); *sk = cmd->sk; sock_hold(*sk); - list_del(&cmd->list); - mgmt_pending_free(cmd); + mgmt_pending_remove(cmd); } int mgmt_disconnected(u16 index, bdaddr_t *bdaddr) @@ -1300,10 +1501,9 @@ int mgmt_disconnected(u16 index, bdaddr_t *bdaddr) mgmt_pending_foreach(MGMT_OP_DISCONNECT, index, disconnect_rsp, &sk); - put_unaligned_le16(index, &ev.index); bacpy(&ev.bdaddr, bdaddr); - err = mgmt_event(MGMT_EV_DISCONNECTED, &ev, sizeof(ev), sk); + err = mgmt_event(MGMT_EV_DISCONNECTED, index, &ev, sizeof(ev), sk); if (sk) sock_put(sk); @@ -1320,10 +1520,9 @@ int mgmt_disconnect_failed(u16 index) if (!cmd) return -ENOENT; - err = cmd_status(cmd->sk, MGMT_OP_DISCONNECT, EIO); + err = cmd_status(cmd->sk, index, MGMT_OP_DISCONNECT, EIO); - list_del(&cmd->list); - mgmt_pending_free(cmd); + mgmt_pending_remove(cmd); return err; } @@ -1332,40 +1531,39 @@ int mgmt_connect_failed(u16 index, bdaddr_t *bdaddr, u8 status) { struct mgmt_ev_connect_failed ev; - put_unaligned_le16(index, &ev.index); bacpy(&ev.bdaddr, bdaddr); ev.status = status; - return mgmt_event(MGMT_EV_CONNECT_FAILED, &ev, sizeof(ev), NULL); + return mgmt_event(MGMT_EV_CONNECT_FAILED, index, &ev, sizeof(ev), NULL); } int mgmt_pin_code_request(u16 index, bdaddr_t *bdaddr) { struct mgmt_ev_pin_code_request ev; - put_unaligned_le16(index, &ev.index); bacpy(&ev.bdaddr, bdaddr); - return mgmt_event(MGMT_EV_PIN_CODE_REQUEST, &ev, sizeof(ev), NULL); + return mgmt_event(MGMT_EV_PIN_CODE_REQUEST, index, &ev, sizeof(ev), + NULL); } int mgmt_pin_code_reply_complete(u16 index, bdaddr_t *bdaddr, u8 status) { struct pending_cmd *cmd; + struct mgmt_rp_pin_code_reply rp; int err; cmd = mgmt_pending_find(MGMT_OP_PIN_CODE_REPLY, index); if (!cmd) return -ENOENT; - if (status != 0) - err = cmd_status(cmd->sk, MGMT_OP_PIN_CODE_REPLY, status); - else - err = cmd_complete(cmd->sk, MGMT_OP_PIN_CODE_REPLY, - bdaddr, sizeof(*bdaddr)); + bacpy(&rp.bdaddr, bdaddr); + rp.status = status; - list_del(&cmd->list); - mgmt_pending_free(cmd); + err = cmd_complete(cmd->sk, index, MGMT_OP_PIN_CODE_REPLY, &rp, + sizeof(rp)); + + mgmt_pending_remove(cmd); return err; } @@ -1373,20 +1571,75 @@ int mgmt_pin_code_reply_complete(u16 index, bdaddr_t *bdaddr, u8 status) int mgmt_pin_code_neg_reply_complete(u16 index, bdaddr_t *bdaddr, u8 status) { struct pending_cmd *cmd; + struct mgmt_rp_pin_code_reply rp; int err; cmd = mgmt_pending_find(MGMT_OP_PIN_CODE_NEG_REPLY, index); if (!cmd) return -ENOENT; - if (status != 0) - err = cmd_status(cmd->sk, MGMT_OP_PIN_CODE_NEG_REPLY, status); - else - err = cmd_complete(cmd->sk, MGMT_OP_PIN_CODE_NEG_REPLY, - bdaddr, sizeof(*bdaddr)); + bacpy(&rp.bdaddr, bdaddr); + rp.status = status; - list_del(&cmd->list); - mgmt_pending_free(cmd); + err = cmd_complete(cmd->sk, index, MGMT_OP_PIN_CODE_NEG_REPLY, &rp, + sizeof(rp)); + + mgmt_pending_remove(cmd); + + return err; +} + +int mgmt_user_confirm_request(u16 index, bdaddr_t *bdaddr, __le32 value) +{ + struct mgmt_ev_user_confirm_request ev; + + BT_DBG("hci%u", index); + + bacpy(&ev.bdaddr, bdaddr); + put_unaligned_le32(value, &ev.value); + + return mgmt_event(MGMT_EV_USER_CONFIRM_REQUEST, index, &ev, sizeof(ev), + NULL); +} + +static int confirm_reply_complete(u16 index, bdaddr_t *bdaddr, u8 status, + u8 opcode) +{ + struct pending_cmd *cmd; + struct mgmt_rp_user_confirm_reply rp; + int err; + + cmd = mgmt_pending_find(opcode, index); + if (!cmd) + return -ENOENT; + + bacpy(&rp.bdaddr, bdaddr); + rp.status = status; + err = cmd_complete(cmd->sk, index, opcode, &rp, sizeof(rp)); + + mgmt_pending_remove(cmd); return err; } + +int mgmt_user_confirm_reply_complete(u16 index, bdaddr_t *bdaddr, u8 status) +{ + return confirm_reply_complete(index, bdaddr, status, + MGMT_OP_USER_CONFIRM_REPLY); +} + +int mgmt_user_confirm_neg_reply_complete(u16 index, bdaddr_t *bdaddr, u8 status) +{ + return confirm_reply_complete(index, bdaddr, status, + MGMT_OP_USER_CONFIRM_NEG_REPLY); +} + +int mgmt_auth_failed(u16 index, bdaddr_t *bdaddr, u8 status) +{ + struct mgmt_ev_auth_failed ev; + + bacpy(&ev.bdaddr, bdaddr); + ev.status = status; + + return mgmt_event(MGMT_EV_AUTH_FAILED, index, &ev, sizeof(ev), NULL); +} diff --git a/net/bluetooth/rfcomm/tty.c b/net/bluetooth/rfcomm/tty.c index d7b9af4703d0..c258796313e0 100644 --- a/net/bluetooth/rfcomm/tty.c +++ b/net/bluetooth/rfcomm/tty.c @@ -832,7 +832,7 @@ static int rfcomm_tty_write_room(struct tty_struct *tty) return room; } -static int rfcomm_tty_ioctl(struct tty_struct *tty, struct file *filp, unsigned int cmd, unsigned long arg) +static int rfcomm_tty_ioctl(struct tty_struct *tty, unsigned int cmd, unsigned long arg) { BT_DBG("tty %p cmd 0x%02x", tty, cmd); @@ -1091,7 +1091,7 @@ static void rfcomm_tty_hangup(struct tty_struct *tty) } } -static int rfcomm_tty_tiocmget(struct tty_struct *tty, struct file *filp) +static int rfcomm_tty_tiocmget(struct tty_struct *tty) { struct rfcomm_dev *dev = (struct rfcomm_dev *) tty->driver_data; @@ -1100,7 +1100,7 @@ static int rfcomm_tty_tiocmget(struct tty_struct *tty, struct file *filp) return dev->modem_status; } -static int rfcomm_tty_tiocmset(struct tty_struct *tty, struct file *filp, unsigned int set, unsigned int clear) +static int rfcomm_tty_tiocmset(struct tty_struct *tty, unsigned int set, unsigned int clear) { struct rfcomm_dev *dev = (struct rfcomm_dev *) tty->driver_data; struct rfcomm_dlc *dlc = dev->dlc; diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c index c9348ddda877..42fdffd1d76c 100644 --- a/net/bluetooth/sco.c +++ b/net/bluetooth/sco.c @@ -190,20 +190,21 @@ static int sco_connect(struct sock *sk) hci_dev_lock_bh(hdev); - err = -ENOMEM; - if (lmp_esco_capable(hdev) && !disable_esco) type = ESCO_LINK; else type = SCO_LINK; hcon = hci_connect(hdev, type, dst, BT_SECURITY_LOW, HCI_AT_NO_BONDING); - if (!hcon) + if (IS_ERR(hcon)) { + err = PTR_ERR(hcon); goto done; + } conn = sco_conn_add(hcon, 0); if (!conn) { hci_conn_put(hcon); + err = -ENOMEM; goto done; } diff --git a/net/bridge/Kconfig b/net/bridge/Kconfig index 9190ae462cb4..6dee7bf648a9 100644 --- a/net/bridge/Kconfig +++ b/net/bridge/Kconfig @@ -6,6 +6,7 @@ config BRIDGE tristate "802.1d Ethernet Bridging" select LLC select STP + depends on IPV6 || IPV6=n ---help--- If you say Y here, then your Linux box will be able to act as an Ethernet bridge, which means that the different Ethernet segments it diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c index 1461b19efd38..21e5901186ea 100644 --- a/net/bridge/br_device.c +++ b/net/bridge/br_device.c @@ -78,6 +78,8 @@ static int br_dev_open(struct net_device *dev) { struct net_bridge *br = netdev_priv(dev); + netif_carrier_off(dev); + br_features_recompute(br); netif_start_queue(dev); br_stp_enable_bridge(br); @@ -94,6 +96,8 @@ static int br_dev_stop(struct net_device *dev) { struct net_bridge *br = netdev_priv(dev); + netif_carrier_off(dev); + br_stp_disable_bridge(br); br_multicast_stop(br); diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c index 88e4aa9cb1f9..e2160792e1bc 100644 --- a/net/bridge/br_input.c +++ b/net/bridge/br_input.c @@ -139,21 +139,22 @@ static inline int is_link_local(const unsigned char *dest) * Return NULL if skb is handled * note: already called with rcu_read_lock */ -struct sk_buff *br_handle_frame(struct sk_buff *skb) +rx_handler_result_t br_handle_frame(struct sk_buff **pskb) { struct net_bridge_port *p; + struct sk_buff *skb = *pskb; const unsigned char *dest = eth_hdr(skb)->h_dest; br_should_route_hook_t *rhook; if (unlikely(skb->pkt_type == PACKET_LOOPBACK)) - return skb; + return RX_HANDLER_PASS; if (!is_valid_ether_addr(eth_hdr(skb)->h_source)) goto drop; skb = skb_share_check(skb, GFP_ATOMIC); if (!skb) - return NULL; + return RX_HANDLER_CONSUMED; p = br_port_get_rcu(skb->dev); @@ -167,10 +168,12 @@ struct sk_buff *br_handle_frame(struct sk_buff *skb) goto forward; if (NF_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_IN, skb, skb->dev, - NULL, br_handle_local_finish)) - return NULL; /* frame consumed by filter */ - else - return skb; /* continue processing */ + NULL, br_handle_local_finish)) { + return RX_HANDLER_CONSUMED; /* consumed by filter */ + } else { + *pskb = skb; + return RX_HANDLER_PASS; /* continue processing */ + } } forward: @@ -178,8 +181,10 @@ forward: case BR_STATE_FORWARDING: rhook = rcu_dereference(br_should_route_hook); if (rhook) { - if ((*rhook)(skb)) - return skb; + if ((*rhook)(skb)) { + *pskb = skb; + return RX_HANDLER_PASS; + } dest = eth_hdr(skb)->h_dest; } /* fall through */ @@ -194,5 +199,5 @@ forward: drop: kfree_skb(skb); } - return NULL; + return RX_HANDLER_CONSUMED; } diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c index 45b57b173f70..008ff6c4eecf 100644 --- a/net/bridge/br_netfilter.c +++ b/net/bridge/br_netfilter.c @@ -412,10 +412,6 @@ static int br_nf_pre_routing_finish(struct sk_buff *skb) nf_bridge->mask ^= BRNF_NF_BRIDGE_PREROUTING; if (dnat_took_place(skb)) { if ((err = ip_route_input(skb, iph->daddr, iph->saddr, iph->tos, dev))) { - struct flowi fl = { - .fl4_dst = iph->daddr, - .fl4_tos = RT_TOS(iph->tos), - }; struct in_device *in_dev = __in_dev_get_rcu(dev); /* If err equals -EHOSTUNREACH the error is due to a @@ -428,7 +424,8 @@ static int br_nf_pre_routing_finish(struct sk_buff *skb) if (err != -EHOSTUNREACH || !in_dev || IN_DEV_FORWARD(in_dev)) goto free_skb; - rt = ip_route_output_key(dev_net(dev), &fl); + rt = ip_route_output(dev_net(dev), iph->daddr, 0, + RT_TOS(iph->tos), 0); if (!IS_ERR(rt)) { /* - Bridged-and-DNAT'ed traffic doesn't * require ip_forwarding. */ @@ -742,6 +739,9 @@ static unsigned int br_nf_forward_ip(unsigned int hook, struct sk_buff *skb, nf_bridge->mask |= BRNF_PKT_TYPE; } + if (br_parse_ip_options(skb)) + return NF_DROP; + /* The physdev module checks on this */ nf_bridge->mask |= BRNF_BRIDGED; nf_bridge->physoutdev = skb->dev; diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index f7afc364d777..19e2f46ed086 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -379,7 +379,7 @@ extern void br_features_recompute(struct net_bridge *br); /* br_input.c */ extern int br_handle_frame_finish(struct sk_buff *skb); -extern struct sk_buff *br_handle_frame(struct sk_buff *skb); +extern rx_handler_result_t br_handle_frame(struct sk_buff **pskb); /* br_ioctl.c */ extern int br_dev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd); diff --git a/net/bridge/br_stp.c b/net/bridge/br_stp.c index 57186d84d2bd..7370d14f634d 100644 --- a/net/bridge/br_stp.c +++ b/net/bridge/br_stp.c @@ -375,12 +375,12 @@ static void br_make_forwarding(struct net_bridge_port *p) if (p->state != BR_STATE_BLOCKING) return; - if (br->forward_delay == 0) { + if (br->stp_enabled == BR_NO_STP || br->forward_delay == 0) { p->state = BR_STATE_FORWARDING; br_topology_change_detection(br); del_timer(&p->forward_delay_timer); } - else if (p->br->stp_enabled == BR_KERNEL_STP) + else if (br->stp_enabled == BR_KERNEL_STP) p->state = BR_STATE_LISTENING; else p->state = BR_STATE_LEARNING; @@ -397,28 +397,37 @@ static void br_make_forwarding(struct net_bridge_port *p) void br_port_state_selection(struct net_bridge *br) { struct net_bridge_port *p; + unsigned int liveports = 0; /* Don't change port states if userspace is handling STP */ if (br->stp_enabled == BR_USER_STP) return; list_for_each_entry(p, &br->port_list, list) { - if (p->state != BR_STATE_DISABLED) { - if (p->port_no == br->root_port) { - p->config_pending = 0; - p->topology_change_ack = 0; - br_make_forwarding(p); - } else if (br_is_designated_port(p)) { - del_timer(&p->message_age_timer); - br_make_forwarding(p); - } else { - p->config_pending = 0; - p->topology_change_ack = 0; - br_make_blocking(p); - } + if (p->state == BR_STATE_DISABLED) + continue; + + if (p->port_no == br->root_port) { + p->config_pending = 0; + p->topology_change_ack = 0; + br_make_forwarding(p); + } else if (br_is_designated_port(p)) { + del_timer(&p->message_age_timer); + br_make_forwarding(p); + } else { + p->config_pending = 0; + p->topology_change_ack = 0; + br_make_blocking(p); } + if (p->state == BR_STATE_FORWARDING) + ++liveports; } + + if (liveports == 0) + netif_carrier_off(br->dev); + else + netif_carrier_on(br->dev); } /* called under bridge lock */ diff --git a/net/bridge/br_stp_timer.c b/net/bridge/br_stp_timer.c index 7b22456023c5..3e965140051e 100644 --- a/net/bridge/br_stp_timer.c +++ b/net/bridge/br_stp_timer.c @@ -94,6 +94,7 @@ static void br_forward_delay_timer_expired(unsigned long arg) p->state = BR_STATE_FORWARDING; if (br_is_designated_for_some_port(br)) br_topology_change_detection(br); + netif_carrier_on(br->dev); } br_log_state(p); spin_unlock(&br->lock); diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c index 35b36b86d762..05f357828a2f 100644 --- a/net/ceph/messenger.c +++ b/net/ceph/messenger.c @@ -336,7 +336,6 @@ static void reset_connection(struct ceph_connection *con) ceph_msg_put(con->out_msg); con->out_msg = NULL; } - con->out_keepalive_pending = false; con->in_seq = 0; con->in_seq_acked = 0; } @@ -1248,8 +1247,6 @@ static int process_connect(struct ceph_connection *con) con->auth_retry); if (con->auth_retry == 2) { con->error_msg = "connect authorization failure"; - reset_connection(con); - set_bit(CLOSED, &con->state); return -1; } con->auth_retry = 1; @@ -1715,14 +1712,6 @@ more: /* open the socket first? */ if (con->sock == NULL) { - /* - * if we were STANDBY and are reconnecting _this_ - * connection, bump connect_seq now. Always bump - * global_seq. - */ - if (test_and_clear_bit(STANDBY, &con->state)) - con->connect_seq++; - prepare_write_banner(msgr, con); prepare_write_connect(msgr, con, 1); prepare_read_banner(con); @@ -1951,7 +1940,24 @@ static void con_work(struct work_struct *work) work.work); mutex_lock(&con->mutex); + if (test_and_clear_bit(BACKOFF, &con->state)) { + dout("con_work %p backing off\n", con); + if (queue_delayed_work(ceph_msgr_wq, &con->work, + round_jiffies_relative(con->delay))) { + dout("con_work %p backoff %lu\n", con, con->delay); + mutex_unlock(&con->mutex); + return; + } else { + con->ops->put(con); + dout("con_work %p FAILED to back off %lu\n", con, + con->delay); + } + } + if (test_bit(STANDBY, &con->state)) { + dout("con_work %p STANDBY\n", con); + goto done; + } if (test_bit(CLOSED, &con->state)) { /* e.g. if we are replaced */ dout("con_work CLOSED\n"); con_close_socket(con); @@ -2008,10 +2014,12 @@ static void ceph_fault(struct ceph_connection *con) /* Requeue anything that hasn't been acked */ list_splice_init(&con->out_sent, &con->out_queue); - /* If there are no messages in the queue, place the connection - * in a STANDBY state (i.e., don't try to reconnect just yet). */ - if (list_empty(&con->out_queue) && !con->out_keepalive_pending) { - dout("fault setting STANDBY\n"); + /* If there are no messages queued or keepalive pending, place + * the connection in a STANDBY state */ + if (list_empty(&con->out_queue) && + !test_bit(KEEPALIVE_PENDING, &con->state)) { + dout("fault %p setting STANDBY clearing WRITE_PENDING\n", con); + clear_bit(WRITE_PENDING, &con->state); set_bit(STANDBY, &con->state); } else { /* retry after a delay. */ @@ -2019,11 +2027,24 @@ static void ceph_fault(struct ceph_connection *con) con->delay = BASE_DELAY_INTERVAL; else if (con->delay < MAX_DELAY_INTERVAL) con->delay *= 2; - dout("fault queueing %p delay %lu\n", con, con->delay); con->ops->get(con); if (queue_delayed_work(ceph_msgr_wq, &con->work, - round_jiffies_relative(con->delay)) == 0) + round_jiffies_relative(con->delay))) { + dout("fault queued %p delay %lu\n", con, con->delay); + } else { con->ops->put(con); + dout("fault failed to queue %p delay %lu, backoff\n", + con, con->delay); + /* + * In many cases we see a socket state change + * while con_work is running and end up + * queuing (non-delayed) work, such that we + * can't backoff with a delay. Set a flag so + * that when con_work restarts we schedule the + * delay then. + */ + set_bit(BACKOFF, &con->state); + } } out_unlock: @@ -2094,6 +2115,19 @@ void ceph_messenger_destroy(struct ceph_messenger *msgr) } EXPORT_SYMBOL(ceph_messenger_destroy); +static void clear_standby(struct ceph_connection *con) +{ + /* come back from STANDBY? */ + if (test_and_clear_bit(STANDBY, &con->state)) { + mutex_lock(&con->mutex); + dout("clear_standby %p and ++connect_seq\n", con); + con->connect_seq++; + WARN_ON(test_bit(WRITE_PENDING, &con->state)); + WARN_ON(test_bit(KEEPALIVE_PENDING, &con->state)); + mutex_unlock(&con->mutex); + } +} + /* * Queue up an outgoing message on the given connection. */ @@ -2126,6 +2160,7 @@ void ceph_con_send(struct ceph_connection *con, struct ceph_msg *msg) /* if there wasn't anything waiting to send before, queue * new work */ + clear_standby(con); if (test_and_set_bit(WRITE_PENDING, &con->state) == 0) queue_con(con); } @@ -2191,6 +2226,8 @@ void ceph_con_revoke_message(struct ceph_connection *con, struct ceph_msg *msg) */ void ceph_con_keepalive(struct ceph_connection *con) { + dout("con_keepalive %p\n", con); + clear_standby(con); if (test_and_set_bit(KEEPALIVE_PENDING, &con->state) == 0 && test_and_set_bit(WRITE_PENDING, &con->state) == 0) queue_con(con); diff --git a/net/ceph/pagevec.c b/net/ceph/pagevec.c index 1a040e64c69f..cd9c21df87d1 100644 --- a/net/ceph/pagevec.c +++ b/net/ceph/pagevec.c @@ -16,22 +16,30 @@ struct page **ceph_get_direct_page_vector(const char __user *data, int num_pages, bool write_page) { struct page **pages; - int rc; + int got = 0; + int rc = 0; pages = kmalloc(sizeof(*pages) * num_pages, GFP_NOFS); if (!pages) return ERR_PTR(-ENOMEM); down_read(¤t->mm->mmap_sem); - rc = get_user_pages(current, current->mm, (unsigned long)data, - num_pages, write_page, 0, pages, NULL); + while (got < num_pages) { + rc = get_user_pages(current, current->mm, + (unsigned long)data + ((unsigned long)got * PAGE_SIZE), + num_pages - got, write_page, 0, pages + got, NULL); + if (rc < 0) + break; + BUG_ON(rc == 0); + got += rc; + } up_read(¤t->mm->mmap_sem); - if (rc < num_pages) + if (rc < 0) goto fail; return pages; fail: - ceph_put_page_vector(pages, rc > 0 ? rc : 0, false); + ceph_put_page_vector(pages, got, false); return ERR_PTR(rc); } EXPORT_SYMBOL(ceph_get_direct_page_vector); diff --git a/net/core/dev.c b/net/core/dev.c index 9f66de9c0572..0b88eba97dab 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1115,13 +1115,21 @@ EXPORT_SYMBOL(netdev_bonding_change); void dev_load(struct net *net, const char *name) { struct net_device *dev; + int no_module; rcu_read_lock(); dev = dev_get_by_name_rcu(net, name); rcu_read_unlock(); - if (!dev && capable(CAP_NET_ADMIN)) - request_module("%s", name); + no_module = !dev; + if (no_module && capable(CAP_NET_ADMIN)) + no_module = request_module("netdev-%s", name); + if (no_module && capable(CAP_SYS_MODULE)) { + if (!request_module("%s", name)) + pr_err("Loading kernel module for a network device " +"with CAP_SYS_MODULE (deprecated). Use CAP_NET_ADMIN and alias netdev-%s " +"instead\n", name); + } } EXPORT_SYMBOL(dev_load); @@ -3062,6 +3070,8 @@ out: * on a failure. * * The caller must hold the rtnl_mutex. + * + * For a general description of rx_handler, see enum rx_handler_result. */ int netdev_rx_handler_register(struct net_device *dev, rx_handler_func_t *rx_handler, @@ -3121,6 +3131,7 @@ static int __netif_receive_skb(struct sk_buff *skb) rx_handler_func_t *rx_handler; struct net_device *orig_dev; struct net_device *null_or_dev; + bool deliver_exact = false; int ret = NET_RX_DROP; __be16 type; @@ -3173,18 +3184,22 @@ ncls: rx_handler = rcu_dereference(skb->dev->rx_handler); if (rx_handler) { - struct net_device *prev_dev; - if (pt_prev) { ret = deliver_skb(skb, pt_prev, orig_dev); pt_prev = NULL; } - prev_dev = skb->dev; - skb = rx_handler(skb); - if (!skb) + switch (rx_handler(&skb)) { + case RX_HANDLER_CONSUMED: goto out; - if (skb->dev != prev_dev) + case RX_HANDLER_ANOTHER: goto another_round; + case RX_HANDLER_EXACT: + deliver_exact = true; + case RX_HANDLER_PASS: + break; + default: + BUG(); + } } if (vlan_tx_tag_present(skb)) { @@ -3202,7 +3217,7 @@ ncls: vlan_on_bond_hook(skb); /* deliver only exact match when indicated */ - null_or_dev = skb->deliver_no_wcard ? skb->dev : NULL; + null_or_dev = deliver_exact ? skb->dev : NULL; type = skb->protocol; list_for_each_entry_rcu(ptype, diff --git a/net/core/dev_addr_lists.c b/net/core/dev_addr_lists.c index 133fd22ea287..7b39f3ed2fda 100644 --- a/net/core/dev_addr_lists.c +++ b/net/core/dev_addr_lists.c @@ -357,8 +357,8 @@ EXPORT_SYMBOL(dev_addr_add_multiple); /** * dev_addr_del_multiple - Delete device addresses by another device * @to_dev: device where the addresses will be deleted - * @from_dev: device by which addresses the addresses will be deleted - * @addr_type: address type - 0 means type will used from from_dev + * @from_dev: device supplying the addresses to be deleted + * @addr_type: address type - 0 means type will be used from from_dev * * Deletes addresses in to device by the list of addresses in from device. * diff --git a/net/core/ethtool.c b/net/core/ethtool.c index c1a71bb738da..a1086fb0c0c7 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -1457,6 +1457,9 @@ static int __ethtool_set_sg(struct net_device *dev, u32 data) { int err; + if (!dev->ethtool_ops->set_sg) + return -EOPNOTSUPP; + if (data && !(dev->features & NETIF_F_ALL_CSUM)) return -EINVAL; diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c index a20e5d3bbfa0..8248ebb5891d 100644 --- a/net/core/fib_rules.c +++ b/net/core/fib_rules.c @@ -181,13 +181,13 @@ static int fib_rule_match(struct fib_rule *rule, struct fib_rules_ops *ops, { int ret = 0; - if (rule->iifindex && (rule->iifindex != fl->iif)) + if (rule->iifindex && (rule->iifindex != fl->flowi_iif)) goto out; - if (rule->oifindex && (rule->oifindex != fl->oif)) + if (rule->oifindex && (rule->oifindex != fl->flowi_oif)) goto out; - if ((rule->mark ^ fl->mark) & rule->mark_mask) + if ((rule->mark ^ fl->flowi_mark) & rule->mark_mask) goto out; ret = ops->match(rule, fl, flags); diff --git a/net/core/pktgen.c b/net/core/pktgen.c index d73b77adb676..0c55eaa70e39 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -2620,8 +2620,7 @@ static void pktgen_finalize_skb(struct pktgen_dev *pkt_dev, struct sk_buff *skb, datalen -= sizeof(*pgh); if (pkt_dev->nfrags <= 0) { - pgh = (struct pktgen_hdr *)skb_put(skb, datalen); - memset(pgh + 1, 0, datalen); + memset(skb_put(skb, datalen), 0, datalen); } else { int frags = pkt_dev->nfrags; int i, len; @@ -3271,7 +3270,7 @@ static void show_results(struct pktgen_dev *pkt_dev, int nr_frags) pkt_dev->started_at); ktime_t idle = ns_to_ktime(pkt_dev->idle_acc); - p += sprintf(p, "OK: %llu(c%llu+d%llu) nsec, %llu (%dbyte,%dfrags)\n", + p += sprintf(p, "OK: %llu(c%llu+d%llu) usec, %llu (%dbyte,%dfrags)\n", (unsigned long long)ktime_to_us(elapsed), (unsigned long long)ktime_to_us(ktime_sub(elapsed, idle)), (unsigned long long)ktime_to_us(idle), diff --git a/net/core/scm.c b/net/core/scm.c index bbe454450801..4c1ef026d695 100644 --- a/net/core/scm.c +++ b/net/core/scm.c @@ -95,7 +95,7 @@ static int scm_fp_copy(struct cmsghdr *cmsg, struct scm_fp_list **fplp) int fd = fdp[i]; struct file *file; - if (fd < 0 || !(file = fget(fd))) + if (fd < 0 || !(file = fget_raw(fd))) return -EBADF; *fpp++ = file; fpl->count++; diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 1eb526a848ff..801dd08908f9 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -523,7 +523,6 @@ static void __copy_skb_header(struct sk_buff *new, const struct sk_buff *old) new->ip_summed = old->ip_summed; skb_copy_queue_mapping(new, old); new->priority = old->priority; - new->deliver_no_wcard = old->deliver_no_wcard; #if defined(CONFIG_IP_VS) || defined(CONFIG_IP_VS_MODULE) new->ipvs_property = old->ipvs_property; #endif diff --git a/net/dcb/dcbnl.c b/net/dcb/dcbnl.c index 118392f3872e..3609eacaf4ce 100644 --- a/net/dcb/dcbnl.c +++ b/net/dcb/dcbnl.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2008, Intel Corporation. + * Copyright (c) 2008-2011, Intel Corporation. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index 7882377bc62e..ae451c6d83ba 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -465,17 +465,18 @@ static struct dst_entry* dccp_v4_route_skb(struct net *net, struct sock *sk, struct sk_buff *skb) { struct rtable *rt; - struct flowi fl = { .oif = skb_rtable(skb)->rt_iif, - .fl4_dst = ip_hdr(skb)->saddr, - .fl4_src = ip_hdr(skb)->daddr, - .fl4_tos = RT_CONN_FLAGS(sk), - .proto = sk->sk_protocol, - .fl_ip_sport = dccp_hdr(skb)->dccph_dport, - .fl_ip_dport = dccp_hdr(skb)->dccph_sport - }; - - security_skb_classify_flow(skb, &fl); - rt = ip_route_output_flow(net, &fl, sk); + struct flowi4 fl4 = { + .flowi4_oif = skb_rtable(skb)->rt_iif, + .daddr = ip_hdr(skb)->saddr, + .saddr = ip_hdr(skb)->daddr, + .flowi4_tos = RT_CONN_FLAGS(sk), + .flowi4_proto = sk->sk_protocol, + .fl4_sport = dccp_hdr(skb)->dccph_dport, + .fl4_dport = dccp_hdr(skb)->dccph_sport, + }; + + security_skb_classify_flow(skb, flowi4_to_flowi(&fl4)); + rt = ip_route_output_flow(net, &fl4, sk); if (IS_ERR(rt)) { IP_INC_STATS_BH(net, IPSTATS_MIB_OUTNOROUTES); return NULL; diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index 5efc57f5e605..de1b7e37ad5b 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -147,22 +147,22 @@ static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, dst = __sk_dst_check(sk, np->dst_cookie); if (dst == NULL) { struct inet_sock *inet = inet_sk(sk); - struct flowi fl; + struct flowi6 fl6; /* BUGGG_FUTURE: Again, it is not clear how to handle rthdr case. Ignore this complexity for now. */ - memset(&fl, 0, sizeof(fl)); - fl.proto = IPPROTO_DCCP; - ipv6_addr_copy(&fl.fl6_dst, &np->daddr); - ipv6_addr_copy(&fl.fl6_src, &np->saddr); - fl.oif = sk->sk_bound_dev_if; - fl.fl_ip_dport = inet->inet_dport; - fl.fl_ip_sport = inet->inet_sport; - security_sk_classify_flow(sk, &fl); - - dst = ip6_dst_lookup_flow(sk, &fl, NULL, false); + memset(&fl6, 0, sizeof(fl6)); + fl6.flowi6_proto = IPPROTO_DCCP; + ipv6_addr_copy(&fl6.daddr, &np->daddr); + ipv6_addr_copy(&fl6.saddr, &np->saddr); + fl6.flowi6_oif = sk->sk_bound_dev_if; + fl6.fl6_dport = inet->inet_dport; + fl6.fl6_sport = inet->inet_sport; + security_sk_classify_flow(sk, flowi6_to_flowi(&fl6)); + + dst = ip6_dst_lookup_flow(sk, &fl6, NULL, false); if (IS_ERR(dst)) { sk->sk_err_soft = -PTR_ERR(dst); goto out; @@ -243,25 +243,25 @@ static int dccp_v6_send_response(struct sock *sk, struct request_sock *req, struct sk_buff *skb; struct ipv6_txoptions *opt = NULL; struct in6_addr *final_p, final; - struct flowi fl; + struct flowi6 fl6; int err = -1; struct dst_entry *dst; - memset(&fl, 0, sizeof(fl)); - fl.proto = IPPROTO_DCCP; - ipv6_addr_copy(&fl.fl6_dst, &ireq6->rmt_addr); - ipv6_addr_copy(&fl.fl6_src, &ireq6->loc_addr); - fl.fl6_flowlabel = 0; - fl.oif = ireq6->iif; - fl.fl_ip_dport = inet_rsk(req)->rmt_port; - fl.fl_ip_sport = inet_rsk(req)->loc_port; - security_req_classify_flow(req, &fl); + memset(&fl6, 0, sizeof(fl6)); + fl6.flowi6_proto = IPPROTO_DCCP; + ipv6_addr_copy(&fl6.daddr, &ireq6->rmt_addr); + ipv6_addr_copy(&fl6.saddr, &ireq6->loc_addr); + fl6.flowlabel = 0; + fl6.flowi6_oif = ireq6->iif; + fl6.fl6_dport = inet_rsk(req)->rmt_port; + fl6.fl6_sport = inet_rsk(req)->loc_port; + security_req_classify_flow(req, flowi6_to_flowi(&fl6)); opt = np->opt; - final_p = fl6_update_dst(&fl, opt, &final); + final_p = fl6_update_dst(&fl6, opt, &final); - dst = ip6_dst_lookup_flow(sk, &fl, final_p, false); + dst = ip6_dst_lookup_flow(sk, &fl6, final_p, false); if (IS_ERR(dst)) { err = PTR_ERR(dst); dst = NULL; @@ -275,8 +275,8 @@ static int dccp_v6_send_response(struct sock *sk, struct request_sock *req, dh->dccph_checksum = dccp_v6_csum_finish(skb, &ireq6->loc_addr, &ireq6->rmt_addr); - ipv6_addr_copy(&fl.fl6_dst, &ireq6->rmt_addr); - err = ip6_xmit(sk, skb, &fl, opt); + ipv6_addr_copy(&fl6.daddr, &ireq6->rmt_addr); + err = ip6_xmit(sk, skb, &fl6, opt); err = net_xmit_eval(err); } @@ -298,7 +298,7 @@ static void dccp_v6_ctl_send_reset(struct sock *sk, struct sk_buff *rxskb) { struct ipv6hdr *rxip6h; struct sk_buff *skb; - struct flowi fl; + struct flowi6 fl6; struct net *net = dev_net(skb_dst(rxskb)->dev); struct sock *ctl_sk = net->dccp.v6_ctl_sk; struct dst_entry *dst; @@ -317,21 +317,21 @@ static void dccp_v6_ctl_send_reset(struct sock *sk, struct sk_buff *rxskb) dccp_hdr(skb)->dccph_checksum = dccp_v6_csum_finish(skb, &rxip6h->saddr, &rxip6h->daddr); - memset(&fl, 0, sizeof(fl)); - ipv6_addr_copy(&fl.fl6_dst, &rxip6h->saddr); - ipv6_addr_copy(&fl.fl6_src, &rxip6h->daddr); + memset(&fl6, 0, sizeof(fl6)); + ipv6_addr_copy(&fl6.daddr, &rxip6h->saddr); + ipv6_addr_copy(&fl6.saddr, &rxip6h->daddr); - fl.proto = IPPROTO_DCCP; - fl.oif = inet6_iif(rxskb); - fl.fl_ip_dport = dccp_hdr(skb)->dccph_dport; - fl.fl_ip_sport = dccp_hdr(skb)->dccph_sport; - security_skb_classify_flow(rxskb, &fl); + fl6.flowi6_proto = IPPROTO_DCCP; + fl6.flowi6_oif = inet6_iif(rxskb); + fl6.fl6_dport = dccp_hdr(skb)->dccph_dport; + fl6.fl6_sport = dccp_hdr(skb)->dccph_sport; + security_skb_classify_flow(rxskb, flowi6_to_flowi(&fl6)); /* sk = NULL, but it is safe for now. RST socket required. */ - dst = ip6_dst_lookup_flow(ctl_sk, &fl, NULL, false); + dst = ip6_dst_lookup_flow(ctl_sk, &fl6, NULL, false); if (!IS_ERR(dst)) { skb_dst_set(skb, dst); - ip6_xmit(ctl_sk, skb, &fl, NULL); + ip6_xmit(ctl_sk, skb, &fl6, NULL); DCCP_INC_STATS_BH(DCCP_MIB_OUTSEGS); DCCP_INC_STATS_BH(DCCP_MIB_OUTRSTS); return; @@ -527,19 +527,19 @@ static struct sock *dccp_v6_request_recv_sock(struct sock *sk, if (dst == NULL) { struct in6_addr *final_p, final; - struct flowi fl; - - memset(&fl, 0, sizeof(fl)); - fl.proto = IPPROTO_DCCP; - ipv6_addr_copy(&fl.fl6_dst, &ireq6->rmt_addr); - final_p = fl6_update_dst(&fl, opt, &final); - ipv6_addr_copy(&fl.fl6_src, &ireq6->loc_addr); - fl.oif = sk->sk_bound_dev_if; - fl.fl_ip_dport = inet_rsk(req)->rmt_port; - fl.fl_ip_sport = inet_rsk(req)->loc_port; - security_sk_classify_flow(sk, &fl); - - dst = ip6_dst_lookup_flow(sk, &fl, final_p, false); + struct flowi6 fl6; + + memset(&fl6, 0, sizeof(fl6)); + fl6.flowi6_proto = IPPROTO_DCCP; + ipv6_addr_copy(&fl6.daddr, &ireq6->rmt_addr); + final_p = fl6_update_dst(&fl6, opt, &final); + ipv6_addr_copy(&fl6.saddr, &ireq6->loc_addr); + fl6.flowi6_oif = sk->sk_bound_dev_if; + fl6.fl6_dport = inet_rsk(req)->rmt_port; + fl6.fl6_sport = inet_rsk(req)->loc_port; + security_sk_classify_flow(sk, flowi6_to_flowi(&fl6)); + + dst = ip6_dst_lookup_flow(sk, &fl6, final_p, false); if (IS_ERR(dst)) goto out; } @@ -859,7 +859,7 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr, struct ipv6_pinfo *np = inet6_sk(sk); struct dccp_sock *dp = dccp_sk(sk); struct in6_addr *saddr = NULL, *final_p, final; - struct flowi fl; + struct flowi6 fl6; struct dst_entry *dst; int addr_type; int err; @@ -872,14 +872,14 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr, if (usin->sin6_family != AF_INET6) return -EAFNOSUPPORT; - memset(&fl, 0, sizeof(fl)); + memset(&fl6, 0, sizeof(fl6)); if (np->sndflow) { - fl.fl6_flowlabel = usin->sin6_flowinfo & IPV6_FLOWINFO_MASK; - IP6_ECN_flow_init(fl.fl6_flowlabel); - if (fl.fl6_flowlabel & IPV6_FLOWLABEL_MASK) { + fl6.flowlabel = usin->sin6_flowinfo & IPV6_FLOWINFO_MASK; + IP6_ECN_flow_init(fl6.flowlabel); + if (fl6.flowlabel & IPV6_FLOWLABEL_MASK) { struct ip6_flowlabel *flowlabel; - flowlabel = fl6_sock_lookup(sk, fl.fl6_flowlabel); + flowlabel = fl6_sock_lookup(sk, fl6.flowlabel); if (flowlabel == NULL) return -EINVAL; ipv6_addr_copy(&usin->sin6_addr, &flowlabel->dst); @@ -916,7 +916,7 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr, } ipv6_addr_copy(&np->daddr, &usin->sin6_addr); - np->flow_label = fl.fl6_flowlabel; + np->flow_label = fl6.flowlabel; /* * DCCP over IPv4 @@ -953,24 +953,24 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr, if (!ipv6_addr_any(&np->rcv_saddr)) saddr = &np->rcv_saddr; - fl.proto = IPPROTO_DCCP; - ipv6_addr_copy(&fl.fl6_dst, &np->daddr); - ipv6_addr_copy(&fl.fl6_src, saddr ? saddr : &np->saddr); - fl.oif = sk->sk_bound_dev_if; - fl.fl_ip_dport = usin->sin6_port; - fl.fl_ip_sport = inet->inet_sport; - security_sk_classify_flow(sk, &fl); + fl6.flowi6_proto = IPPROTO_DCCP; + ipv6_addr_copy(&fl6.daddr, &np->daddr); + ipv6_addr_copy(&fl6.saddr, saddr ? saddr : &np->saddr); + fl6.flowi6_oif = sk->sk_bound_dev_if; + fl6.fl6_dport = usin->sin6_port; + fl6.fl6_sport = inet->inet_sport; + security_sk_classify_flow(sk, flowi6_to_flowi(&fl6)); - final_p = fl6_update_dst(&fl, np->opt, &final); + final_p = fl6_update_dst(&fl6, np->opt, &final); - dst = ip6_dst_lookup_flow(sk, &fl, final_p, true); + dst = ip6_dst_lookup_flow(sk, &fl6, final_p, true); if (IS_ERR(dst)) { err = PTR_ERR(dst); goto failure; } if (saddr == NULL) { - saddr = &fl.fl6_src; + saddr = &fl6.saddr; ipv6_addr_copy(&np->rcv_saddr, saddr); } diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c index 2af15b15d1fa..ea3b6ee21fc9 100644 --- a/net/decnet/af_decnet.c +++ b/net/decnet/af_decnet.c @@ -908,7 +908,7 @@ static int __dn_connect(struct sock *sk, struct sockaddr_dn *addr, int addrlen, struct socket *sock = sk->sk_socket; struct dn_scp *scp = DN_SK(sk); int err = -EISCONN; - struct flowi fl; + struct flowidn fld; if (sock->state == SS_CONNECTED) goto out; @@ -947,13 +947,13 @@ static int __dn_connect(struct sock *sk, struct sockaddr_dn *addr, int addrlen, memcpy(&scp->peer, addr, sizeof(struct sockaddr_dn)); err = -EHOSTUNREACH; - memset(&fl, 0, sizeof(fl)); - fl.oif = sk->sk_bound_dev_if; - fl.fld_dst = dn_saddr2dn(&scp->peer); - fl.fld_src = dn_saddr2dn(&scp->addr); - dn_sk_ports_copy(&fl, scp); - fl.proto = DNPROTO_NSP; - if (dn_route_output_sock(&sk->sk_dst_cache, &fl, sk, flags) < 0) + memset(&fld, 0, sizeof(fld)); + fld.flowidn_oif = sk->sk_bound_dev_if; + fld.daddr = dn_saddr2dn(&scp->peer); + fld.saddr = dn_saddr2dn(&scp->addr); + dn_sk_ports_copy(&fld, scp); + fld.flowidn_proto = DNPROTO_NSP; + if (dn_route_output_sock(&sk->sk_dst_cache, &fld, sk, flags) < 0) goto out; sk->sk_route_caps = sk->sk_dst_cache->dev->features; sock->state = SS_CONNECTING; diff --git a/net/decnet/dn_fib.c b/net/decnet/dn_fib.c index 0ef0a81bcd72..1c74ed36ce8f 100644 --- a/net/decnet/dn_fib.c +++ b/net/decnet/dn_fib.c @@ -201,7 +201,7 @@ static int dn_fib_check_nh(const struct rtmsg *r, struct dn_fib_info *fi, struct int err; if (nh->nh_gw) { - struct flowi fl; + struct flowidn fld; struct dn_fib_res res; if (nh->nh_flags&RTNH_F_ONLINK) { @@ -221,15 +221,15 @@ static int dn_fib_check_nh(const struct rtmsg *r, struct dn_fib_info *fi, struct return 0; } - memset(&fl, 0, sizeof(fl)); - fl.fld_dst = nh->nh_gw; - fl.oif = nh->nh_oif; - fl.fld_scope = r->rtm_scope + 1; + memset(&fld, 0, sizeof(fld)); + fld.daddr = nh->nh_gw; + fld.flowidn_oif = nh->nh_oif; + fld.flowidn_scope = r->rtm_scope + 1; - if (fl.fld_scope < RT_SCOPE_LINK) - fl.fld_scope = RT_SCOPE_LINK; + if (fld.flowidn_scope < RT_SCOPE_LINK) + fld.flowidn_scope = RT_SCOPE_LINK; - if ((err = dn_fib_lookup(&fl, &res)) != 0) + if ((err = dn_fib_lookup(&fld, &res)) != 0) return err; err = -EINVAL; @@ -404,7 +404,7 @@ failure: return NULL; } -int dn_fib_semantic_match(int type, struct dn_fib_info *fi, const struct flowi *fl, struct dn_fib_res *res) +int dn_fib_semantic_match(int type, struct dn_fib_info *fi, const struct flowidn *fld, struct dn_fib_res *res) { int err = dn_fib_props[type].error; @@ -424,7 +424,8 @@ int dn_fib_semantic_match(int type, struct dn_fib_info *fi, const struct flowi * for_nexthops(fi) { if (nh->nh_flags & RTNH_F_DEAD) continue; - if (!fl->oif || fl->oif == nh->nh_oif) + if (!fld->flowidn_oif || + fld->flowidn_oif == nh->nh_oif) break; } if (nhsel < fi->fib_nhs) { @@ -445,7 +446,7 @@ int dn_fib_semantic_match(int type, struct dn_fib_info *fi, const struct flowi * return err; } -void dn_fib_select_multipath(const struct flowi *fl, struct dn_fib_res *res) +void dn_fib_select_multipath(const struct flowidn *fld, struct dn_fib_res *res) { struct dn_fib_info *fi = res->fi; int w; diff --git a/net/decnet/dn_nsp_out.c b/net/decnet/dn_nsp_out.c index 2ef115277bea..bd78836a81eb 100644 --- a/net/decnet/dn_nsp_out.c +++ b/net/decnet/dn_nsp_out.c @@ -78,7 +78,7 @@ static void dn_nsp_send(struct sk_buff *skb) struct sock *sk = skb->sk; struct dn_scp *scp = DN_SK(sk); struct dst_entry *dst; - struct flowi fl; + struct flowidn fld; skb_reset_transport_header(skb); scp->stamp = jiffies; @@ -91,13 +91,13 @@ try_again: return; } - memset(&fl, 0, sizeof(fl)); - fl.oif = sk->sk_bound_dev_if; - fl.fld_src = dn_saddr2dn(&scp->addr); - fl.fld_dst = dn_saddr2dn(&scp->peer); - dn_sk_ports_copy(&fl, scp); - fl.proto = DNPROTO_NSP; - if (dn_route_output_sock(&sk->sk_dst_cache, &fl, sk, 0) == 0) { + memset(&fld, 0, sizeof(fld)); + fld.flowidn_oif = sk->sk_bound_dev_if; + fld.saddr = dn_saddr2dn(&scp->addr); + fld.daddr = dn_saddr2dn(&scp->peer); + dn_sk_ports_copy(&fld, scp); + fld.flowidn_proto = DNPROTO_NSP; + if (dn_route_output_sock(&sk->sk_dst_cache, &fld, sk, 0) == 0) { dst = sk_dst_get(sk); sk->sk_route_caps = dst->dev->features; goto try_again; diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c index 484fdbf92bd8..9f09d4fc2880 100644 --- a/net/decnet/dn_route.c +++ b/net/decnet/dn_route.c @@ -282,14 +282,14 @@ static void dn_dst_link_failure(struct sk_buff *skb) { } -static inline int compare_keys(struct flowi *fl1, struct flowi *fl2) +static inline int compare_keys(struct flowidn *fl1, struct flowidn *fl2) { - return ((fl1->fld_dst ^ fl2->fld_dst) | - (fl1->fld_src ^ fl2->fld_src) | - (fl1->mark ^ fl2->mark) | - (fl1->fld_scope ^ fl2->fld_scope) | - (fl1->oif ^ fl2->oif) | - (fl1->iif ^ fl2->iif)) == 0; + return ((fl1->daddr ^ fl2->daddr) | + (fl1->saddr ^ fl2->saddr) | + (fl1->flowidn_mark ^ fl2->flowidn_mark) | + (fl1->flowidn_scope ^ fl2->flowidn_scope) | + (fl1->flowidn_oif ^ fl2->flowidn_oif) | + (fl1->flowidn_iif ^ fl2->flowidn_iif)) == 0; } static int dn_insert_route(struct dn_route *rt, unsigned hash, struct dn_route **rp) @@ -303,7 +303,7 @@ static int dn_insert_route(struct dn_route *rt, unsigned hash, struct dn_route * spin_lock_bh(&dn_rt_hash_table[hash].lock); while ((rth = rcu_dereference_protected(*rthp, lockdep_is_held(&dn_rt_hash_table[hash].lock))) != NULL) { - if (compare_keys(&rth->fl, &rt->fl)) { + if (compare_keys(&rth->fld, &rt->fld)) { /* Put it first */ *rthp = rth->dst.dn_next; rcu_assign_pointer(rth->dst.dn_next, @@ -903,14 +903,16 @@ static inline __le16 dn_fib_rules_map_destination(__le16 daddr, struct dn_fib_re return (daddr&~mask)|res->fi->fib_nh->nh_gw; } -static int dn_route_output_slow(struct dst_entry **pprt, const struct flowi *oldflp, int try_hard) +static int dn_route_output_slow(struct dst_entry **pprt, const struct flowidn *oldflp, int try_hard) { - struct flowi fl = { .fld_dst = oldflp->fld_dst, - .fld_src = oldflp->fld_src, - .fld_scope = RT_SCOPE_UNIVERSE, - .mark = oldflp->mark, - .iif = init_net.loopback_dev->ifindex, - .oif = oldflp->oif }; + struct flowidn fld = { + .daddr = oldflp->daddr, + .saddr = oldflp->saddr, + .flowidn_scope = RT_SCOPE_UNIVERSE, + .flowidn_mark = oldflp->flowidn_mark, + .flowidn_iif = init_net.loopback_dev->ifindex, + .flowidn_oif = oldflp->flowidn_oif, + }; struct dn_route *rt = NULL; struct net_device *dev_out = NULL, *dev; struct neighbour *neigh = NULL; @@ -924,13 +926,14 @@ static int dn_route_output_slow(struct dst_entry **pprt, const struct flowi *old if (decnet_debug_level & 16) printk(KERN_DEBUG "dn_route_output_slow: dst=%04x src=%04x mark=%d" - " iif=%d oif=%d\n", le16_to_cpu(oldflp->fld_dst), - le16_to_cpu(oldflp->fld_src), - oldflp->mark, init_net.loopback_dev->ifindex, oldflp->oif); + " iif=%d oif=%d\n", le16_to_cpu(oldflp->daddr), + le16_to_cpu(oldflp->saddr), + oldflp->flowidn_mark, init_net.loopback_dev->ifindex, + oldflp->flowidn_oif); /* If we have an output interface, verify its a DECnet device */ - if (oldflp->oif) { - dev_out = dev_get_by_index(&init_net, oldflp->oif); + if (oldflp->flowidn_oif) { + dev_out = dev_get_by_index(&init_net, oldflp->flowidn_oif); err = -ENODEV; if (dev_out && dev_out->dn_ptr == NULL) { dev_put(dev_out); @@ -941,11 +944,11 @@ static int dn_route_output_slow(struct dst_entry **pprt, const struct flowi *old } /* If we have a source address, verify that its a local address */ - if (oldflp->fld_src) { + if (oldflp->saddr) { err = -EADDRNOTAVAIL; if (dev_out) { - if (dn_dev_islocal(dev_out, oldflp->fld_src)) + if (dn_dev_islocal(dev_out, oldflp->saddr)) goto source_ok; dev_put(dev_out); goto out; @@ -954,11 +957,11 @@ static int dn_route_output_slow(struct dst_entry **pprt, const struct flowi *old for_each_netdev_rcu(&init_net, dev) { if (!dev->dn_ptr) continue; - if (!dn_dev_islocal(dev, oldflp->fld_src)) + if (!dn_dev_islocal(dev, oldflp->saddr)) continue; if ((dev->flags & IFF_LOOPBACK) && - oldflp->fld_dst && - !dn_dev_islocal(dev, oldflp->fld_dst)) + oldflp->daddr && + !dn_dev_islocal(dev, oldflp->daddr)) continue; dev_out = dev; @@ -973,22 +976,22 @@ source_ok: } /* No destination? Assume its local */ - if (!fl.fld_dst) { - fl.fld_dst = fl.fld_src; + if (!fld.daddr) { + fld.daddr = fld.saddr; err = -EADDRNOTAVAIL; if (dev_out) dev_put(dev_out); dev_out = init_net.loopback_dev; dev_hold(dev_out); - if (!fl.fld_dst) { - fl.fld_dst = - fl.fld_src = dnet_select_source(dev_out, 0, + if (!fld.daddr) { + fld.daddr = + fld.saddr = dnet_select_source(dev_out, 0, RT_SCOPE_HOST); - if (!fl.fld_dst) + if (!fld.daddr) goto out; } - fl.oif = init_net.loopback_dev->ifindex; + fld.flowidn_oif = init_net.loopback_dev->ifindex; res.type = RTN_LOCAL; goto make_route; } @@ -997,8 +1000,8 @@ source_ok: printk(KERN_DEBUG "dn_route_output_slow: initial checks complete." " dst=%o4x src=%04x oif=%d try_hard=%d\n", - le16_to_cpu(fl.fld_dst), le16_to_cpu(fl.fld_src), - fl.oif, try_hard); + le16_to_cpu(fld.daddr), le16_to_cpu(fld.saddr), + fld.flowidn_oif, try_hard); /* * N.B. If the kernel is compiled without router support then @@ -1006,7 +1009,7 @@ source_ok: * will always be executed. */ err = -ESRCH; - if (try_hard || (err = dn_fib_lookup(&fl, &res)) != 0) { + if (try_hard || (err = dn_fib_lookup(&fld, &res)) != 0) { struct dn_dev *dn_db; if (err != -ESRCH) goto out; @@ -1021,19 +1024,19 @@ source_ok: * here */ if (!try_hard) { - neigh = neigh_lookup_nodev(&dn_neigh_table, &init_net, &fl.fld_dst); + neigh = neigh_lookup_nodev(&dn_neigh_table, &init_net, &fld.daddr); if (neigh) { - if ((oldflp->oif && - (neigh->dev->ifindex != oldflp->oif)) || - (oldflp->fld_src && + if ((oldflp->flowidn_oif && + (neigh->dev->ifindex != oldflp->flowidn_oif)) || + (oldflp->saddr && (!dn_dev_islocal(neigh->dev, - oldflp->fld_src)))) { + oldflp->saddr)))) { neigh_release(neigh); neigh = NULL; } else { if (dev_out) dev_put(dev_out); - if (dn_dev_islocal(neigh->dev, fl.fld_dst)) { + if (dn_dev_islocal(neigh->dev, fld.daddr)) { dev_out = init_net.loopback_dev; res.type = RTN_LOCAL; } else { @@ -1053,7 +1056,7 @@ source_ok: goto out; dn_db = rcu_dereference_raw(dev_out->dn_ptr); /* Possible improvement - check all devices for local addr */ - if (dn_dev_islocal(dev_out, fl.fld_dst)) { + if (dn_dev_islocal(dev_out, fld.daddr)) { dev_put(dev_out); dev_out = init_net.loopback_dev; dev_hold(dev_out); @@ -1069,16 +1072,16 @@ select_source: if (neigh) gateway = ((struct dn_neigh *)neigh)->addr; if (gateway == 0) - gateway = fl.fld_dst; - if (fl.fld_src == 0) { - fl.fld_src = dnet_select_source(dev_out, gateway, - res.type == RTN_LOCAL ? - RT_SCOPE_HOST : - RT_SCOPE_LINK); - if (fl.fld_src == 0 && res.type != RTN_LOCAL) + gateway = fld.daddr; + if (fld.saddr == 0) { + fld.saddr = dnet_select_source(dev_out, gateway, + res.type == RTN_LOCAL ? + RT_SCOPE_HOST : + RT_SCOPE_LINK); + if (fld.saddr == 0 && res.type != RTN_LOCAL) goto e_addr; } - fl.oif = dev_out->ifindex; + fld.flowidn_oif = dev_out->ifindex; goto make_route; } free_res = 1; @@ -1087,35 +1090,35 @@ select_source: goto e_inval; if (res.type == RTN_LOCAL) { - if (!fl.fld_src) - fl.fld_src = fl.fld_dst; + if (!fld.saddr) + fld.saddr = fld.daddr; if (dev_out) dev_put(dev_out); dev_out = init_net.loopback_dev; dev_hold(dev_out); - fl.oif = dev_out->ifindex; + fld.flowidn_oif = dev_out->ifindex; if (res.fi) dn_fib_info_put(res.fi); res.fi = NULL; goto make_route; } - if (res.fi->fib_nhs > 1 && fl.oif == 0) - dn_fib_select_multipath(&fl, &res); + if (res.fi->fib_nhs > 1 && fld.flowidn_oif == 0) + dn_fib_select_multipath(&fld, &res); /* * We could add some logic to deal with default routes here and * get rid of some of the special casing above. */ - if (!fl.fld_src) - fl.fld_src = DN_FIB_RES_PREFSRC(res); + if (!fld.saddr) + fld.saddr = DN_FIB_RES_PREFSRC(res); if (dev_out) dev_put(dev_out); dev_out = DN_FIB_RES_DEV(res); dev_hold(dev_out); - fl.oif = dev_out->ifindex; + fld.flowidn_oif = dev_out->ifindex; gateway = DN_FIB_RES_GW(res); make_route: @@ -1129,19 +1132,19 @@ make_route: atomic_set(&rt->dst.__refcnt, 1); rt->dst.flags = DST_HOST; - rt->fl.fld_src = oldflp->fld_src; - rt->fl.fld_dst = oldflp->fld_dst; - rt->fl.oif = oldflp->oif; - rt->fl.iif = 0; - rt->fl.mark = oldflp->mark; + rt->fld.saddr = oldflp->saddr; + rt->fld.daddr = oldflp->daddr; + rt->fld.flowidn_oif = oldflp->flowidn_oif; + rt->fld.flowidn_iif = 0; + rt->fld.flowidn_mark = oldflp->flowidn_mark; - rt->rt_saddr = fl.fld_src; - rt->rt_daddr = fl.fld_dst; - rt->rt_gateway = gateway ? gateway : fl.fld_dst; - rt->rt_local_src = fl.fld_src; + rt->rt_saddr = fld.saddr; + rt->rt_daddr = fld.daddr; + rt->rt_gateway = gateway ? gateway : fld.daddr; + rt->rt_local_src = fld.saddr; - rt->rt_dst_map = fl.fld_dst; - rt->rt_src_map = fl.fld_src; + rt->rt_dst_map = fld.daddr; + rt->rt_src_map = fld.saddr; rt->dst.dev = dev_out; dev_hold(dev_out); @@ -1159,7 +1162,7 @@ make_route: if (err) goto e_neighbour; - hash = dn_hash(rt->fl.fld_src, rt->fl.fld_dst); + hash = dn_hash(rt->fld.saddr, rt->fld.daddr); dn_insert_route(rt, hash, (struct dn_route **)pprt); done: @@ -1190,20 +1193,20 @@ e_neighbour: /* * N.B. The flags may be moved into the flowi at some future stage. */ -static int __dn_route_output_key(struct dst_entry **pprt, const struct flowi *flp, int flags) +static int __dn_route_output_key(struct dst_entry **pprt, const struct flowidn *flp, int flags) { - unsigned hash = dn_hash(flp->fld_src, flp->fld_dst); + unsigned hash = dn_hash(flp->saddr, flp->daddr); struct dn_route *rt = NULL; if (!(flags & MSG_TRYHARD)) { rcu_read_lock_bh(); for (rt = rcu_dereference_bh(dn_rt_hash_table[hash].chain); rt; rt = rcu_dereference_bh(rt->dst.dn_next)) { - if ((flp->fld_dst == rt->fl.fld_dst) && - (flp->fld_src == rt->fl.fld_src) && - (flp->mark == rt->fl.mark) && + if ((flp->daddr == rt->fld.daddr) && + (flp->saddr == rt->fld.saddr) && + (flp->flowidn_mark == rt->fld.flowidn_mark) && dn_is_output_route(rt) && - (rt->fl.oif == flp->oif)) { + (rt->fld.flowidn_oif == flp->flowidn_oif)) { dst_use(&rt->dst, jiffies); rcu_read_unlock_bh(); *pprt = &rt->dst; @@ -1216,13 +1219,14 @@ static int __dn_route_output_key(struct dst_entry **pprt, const struct flowi *fl return dn_route_output_slow(pprt, flp, flags); } -static int dn_route_output_key(struct dst_entry **pprt, struct flowi *flp, int flags) +static int dn_route_output_key(struct dst_entry **pprt, struct flowidn *flp, int flags) { int err; err = __dn_route_output_key(pprt, flp, flags); - if (err == 0 && flp->proto) { - *pprt = xfrm_lookup(&init_net, *pprt, flp, NULL, 0); + if (err == 0 && flp->flowidn_proto) { + *pprt = xfrm_lookup(&init_net, *pprt, + flowidn_to_flowi(flp), NULL, 0); if (IS_ERR(*pprt)) { err = PTR_ERR(*pprt); *pprt = NULL; @@ -1231,15 +1235,16 @@ static int dn_route_output_key(struct dst_entry **pprt, struct flowi *flp, int f return err; } -int dn_route_output_sock(struct dst_entry **pprt, struct flowi *fl, struct sock *sk, int flags) +int dn_route_output_sock(struct dst_entry **pprt, struct flowidn *fl, struct sock *sk, int flags) { int err; err = __dn_route_output_key(pprt, fl, flags & MSG_TRYHARD); - if (err == 0 && fl->proto) { + if (err == 0 && fl->flowidn_proto) { if (!(flags & MSG_DONTWAIT)) - fl->flags |= FLOWI_FLAG_CAN_SLEEP; - *pprt = xfrm_lookup(&init_net, *pprt, fl, sk, 0); + fl->flowidn_flags |= FLOWI_FLAG_CAN_SLEEP; + *pprt = xfrm_lookup(&init_net, *pprt, + flowidn_to_flowi(fl), sk, 0); if (IS_ERR(*pprt)) { err = PTR_ERR(*pprt); *pprt = NULL; @@ -1260,11 +1265,13 @@ static int dn_route_input_slow(struct sk_buff *skb) int flags = 0; __le16 gateway = 0; __le16 local_src = 0; - struct flowi fl = { .fld_dst = cb->dst, - .fld_src = cb->src, - .fld_scope = RT_SCOPE_UNIVERSE, - .mark = skb->mark, - .iif = skb->dev->ifindex }; + struct flowidn fld = { + .daddr = cb->dst, + .saddr = cb->src, + .flowidn_scope = RT_SCOPE_UNIVERSE, + .flowidn_mark = skb->mark, + .flowidn_iif = skb->dev->ifindex, + }; struct dn_fib_res res = { .fi = NULL, .type = RTN_UNREACHABLE }; int err = -EINVAL; int free_res = 0; @@ -1275,7 +1282,7 @@ static int dn_route_input_slow(struct sk_buff *skb) goto out; /* Zero source addresses are not allowed */ - if (fl.fld_src == 0) + if (fld.saddr == 0) goto out; /* @@ -1289,7 +1296,7 @@ static int dn_route_input_slow(struct sk_buff *skb) if (dn_dev_islocal(in_dev, cb->src)) goto out; - err = dn_fib_lookup(&fl, &res); + err = dn_fib_lookup(&fld, &res); if (err) { if (err != -ESRCH) goto out; @@ -1301,7 +1308,7 @@ static int dn_route_input_slow(struct sk_buff *skb) res.type = RTN_LOCAL; } else { - __le16 src_map = fl.fld_src; + __le16 src_map = fld.saddr; free_res = 1; out_dev = DN_FIB_RES_DEV(res); @@ -1314,22 +1321,22 @@ static int dn_route_input_slow(struct sk_buff *skb) dev_hold(out_dev); if (res.r) - src_map = fl.fld_src; /* no NAT support for now */ + src_map = fld.saddr; /* no NAT support for now */ gateway = DN_FIB_RES_GW(res); if (res.type == RTN_NAT) { - fl.fld_dst = dn_fib_rules_map_destination(fl.fld_dst, &res); + fld.daddr = dn_fib_rules_map_destination(fld.daddr, &res); dn_fib_res_put(&res); free_res = 0; - if (dn_fib_lookup(&fl, &res)) + if (dn_fib_lookup(&fld, &res)) goto e_inval; free_res = 1; if (res.type != RTN_UNICAST) goto e_inval; flags |= RTCF_DNAT; - gateway = fl.fld_dst; + gateway = fld.daddr; } - fl.fld_src = src_map; + fld.saddr = src_map; } switch(res.type) { @@ -1343,8 +1350,8 @@ static int dn_route_input_slow(struct sk_buff *skb) if (dn_db->parms.forwarding == 0) goto e_inval; - if (res.fi->fib_nhs > 1 && fl.oif == 0) - dn_fib_select_multipath(&fl, &res); + if (res.fi->fib_nhs > 1 && fld.flowidn_oif == 0) + dn_fib_select_multipath(&fld, &res); /* * Check for out_dev == in_dev. We use the RTCF_DOREDIRECT @@ -1362,8 +1369,8 @@ static int dn_route_input_slow(struct sk_buff *skb) break; case RTN_LOCAL: flags |= RTCF_LOCAL; - fl.fld_src = cb->dst; - fl.fld_dst = cb->src; + fld.saddr = cb->dst; + fld.daddr = cb->src; /* Routing tables gave us a gateway */ if (gateway) @@ -1396,21 +1403,21 @@ make_route: if (rt == NULL) goto e_nobufs; - rt->rt_saddr = fl.fld_src; - rt->rt_daddr = fl.fld_dst; - rt->rt_gateway = fl.fld_dst; + rt->rt_saddr = fld.saddr; + rt->rt_daddr = fld.daddr; + rt->rt_gateway = fld.daddr; if (gateway) rt->rt_gateway = gateway; rt->rt_local_src = local_src ? local_src : rt->rt_saddr; - rt->rt_dst_map = fl.fld_dst; - rt->rt_src_map = fl.fld_src; + rt->rt_dst_map = fld.daddr; + rt->rt_src_map = fld.saddr; - rt->fl.fld_src = cb->src; - rt->fl.fld_dst = cb->dst; - rt->fl.oif = 0; - rt->fl.iif = in_dev->ifindex; - rt->fl.mark = fl.mark; + rt->fld.saddr = cb->src; + rt->fld.daddr = cb->dst; + rt->fld.flowidn_oif = 0; + rt->fld.flowidn_iif = in_dev->ifindex; + rt->fld.flowidn_mark = fld.flowidn_mark; rt->dst.flags = DST_HOST; rt->dst.neighbour = neigh; @@ -1440,7 +1447,7 @@ make_route: if (err) goto e_neighbour; - hash = dn_hash(rt->fl.fld_src, rt->fl.fld_dst); + hash = dn_hash(rt->fld.saddr, rt->fld.daddr); dn_insert_route(rt, hash, &rt); skb_dst_set(skb, &rt->dst); @@ -1480,11 +1487,11 @@ static int dn_route_input(struct sk_buff *skb) rcu_read_lock(); for(rt = rcu_dereference(dn_rt_hash_table[hash].chain); rt != NULL; rt = rcu_dereference(rt->dst.dn_next)) { - if ((rt->fl.fld_src == cb->src) && - (rt->fl.fld_dst == cb->dst) && - (rt->fl.oif == 0) && - (rt->fl.mark == skb->mark) && - (rt->fl.iif == cb->iif)) { + if ((rt->fld.saddr == cb->src) && + (rt->fld.daddr == cb->dst) && + (rt->fld.flowidn_oif == 0) && + (rt->fld.flowidn_mark == skb->mark) && + (rt->fld.flowidn_iif == cb->iif)) { dst_use(&rt->dst, jiffies); rcu_read_unlock(); skb_dst_set(skb, (struct dst_entry *)rt); @@ -1520,9 +1527,9 @@ static int dn_rt_fill_info(struct sk_buff *skb, u32 pid, u32 seq, if (rt->rt_flags & RTCF_NOTIFY) r->rtm_flags |= RTM_F_NOTIFY; RTA_PUT(skb, RTA_DST, 2, &rt->rt_daddr); - if (rt->fl.fld_src) { + if (rt->fld.saddr) { r->rtm_src_len = 16; - RTA_PUT(skb, RTA_SRC, 2, &rt->fl.fld_src); + RTA_PUT(skb, RTA_SRC, 2, &rt->fld.saddr); } if (rt->dst.dev) RTA_PUT(skb, RTA_OIF, sizeof(int), &rt->dst.dev->ifindex); @@ -1541,7 +1548,7 @@ static int dn_rt_fill_info(struct sk_buff *skb, u32 pid, u32 seq, rt->dst.error) < 0) goto rtattr_failure; if (dn_is_input_route(rt)) - RTA_PUT(skb, RTA_IIF, sizeof(int), &rt->fl.iif); + RTA_PUT(skb, RTA_IIF, sizeof(int), &rt->fld.flowidn_iif); nlh->nlmsg_len = skb_tail_pointer(skb) - b; return skb->len; @@ -1564,13 +1571,13 @@ static int dn_cache_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, void struct dn_skb_cb *cb; int err; struct sk_buff *skb; - struct flowi fl; + struct flowidn fld; if (!net_eq(net, &init_net)) return -EINVAL; - memset(&fl, 0, sizeof(fl)); - fl.proto = DNPROTO_NSP; + memset(&fld, 0, sizeof(fld)); + fld.flowidn_proto = DNPROTO_NSP; skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); if (skb == NULL) @@ -1579,15 +1586,15 @@ static int dn_cache_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, void cb = DN_SKB_CB(skb); if (rta[RTA_SRC-1]) - memcpy(&fl.fld_src, RTA_DATA(rta[RTA_SRC-1]), 2); + memcpy(&fld.saddr, RTA_DATA(rta[RTA_SRC-1]), 2); if (rta[RTA_DST-1]) - memcpy(&fl.fld_dst, RTA_DATA(rta[RTA_DST-1]), 2); + memcpy(&fld.daddr, RTA_DATA(rta[RTA_DST-1]), 2); if (rta[RTA_IIF-1]) - memcpy(&fl.iif, RTA_DATA(rta[RTA_IIF-1]), sizeof(int)); + memcpy(&fld.flowidn_iif, RTA_DATA(rta[RTA_IIF-1]), sizeof(int)); - if (fl.iif) { + if (fld.flowidn_iif) { struct net_device *dev; - if ((dev = dev_get_by_index(&init_net, fl.iif)) == NULL) { + if ((dev = dev_get_by_index(&init_net, fld.flowidn_iif)) == NULL) { kfree_skb(skb); return -ENODEV; } @@ -1598,8 +1605,8 @@ static int dn_cache_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, void } skb->protocol = htons(ETH_P_DNA_RT); skb->dev = dev; - cb->src = fl.fld_src; - cb->dst = fl.fld_dst; + cb->src = fld.saddr; + cb->dst = fld.daddr; local_bh_disable(); err = dn_route_input(skb); local_bh_enable(); @@ -1611,8 +1618,8 @@ static int dn_cache_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, void int oif = 0; if (rta[RTA_OIF - 1]) memcpy(&oif, RTA_DATA(rta[RTA_OIF - 1]), sizeof(int)); - fl.oif = oif; - err = dn_route_output_key((struct dst_entry **)&rt, &fl, 0); + fld.flowidn_oif = oif; + err = dn_route_output_key((struct dst_entry **)&rt, &fld, 0); } if (skb->dev) diff --git a/net/decnet/dn_rules.c b/net/decnet/dn_rules.c index 6eb91df3c550..f0efb0ccfeca 100644 --- a/net/decnet/dn_rules.c +++ b/net/decnet/dn_rules.c @@ -49,14 +49,15 @@ struct dn_fib_rule }; -int dn_fib_lookup(struct flowi *flp, struct dn_fib_res *res) +int dn_fib_lookup(struct flowidn *flp, struct dn_fib_res *res) { struct fib_lookup_arg arg = { .result = res, }; int err; - err = fib_rules_lookup(dn_fib_rules_ops, flp, 0, &arg); + err = fib_rules_lookup(dn_fib_rules_ops, + flowidn_to_flowi(flp), 0, &arg); res->r = arg.rule; return err; @@ -65,6 +66,7 @@ int dn_fib_lookup(struct flowi *flp, struct dn_fib_res *res) static int dn_fib_rule_action(struct fib_rule *rule, struct flowi *flp, int flags, struct fib_lookup_arg *arg) { + struct flowidn *fld = &flp->u.dn; int err = -EAGAIN; struct dn_fib_table *tbl; @@ -90,7 +92,7 @@ static int dn_fib_rule_action(struct fib_rule *rule, struct flowi *flp, if (tbl == NULL) goto errout; - err = tbl->lookup(tbl, flp, (struct dn_fib_res *)arg->result); + err = tbl->lookup(tbl, fld, (struct dn_fib_res *)arg->result); if (err > 0) err = -EAGAIN; errout: @@ -104,8 +106,9 @@ static const struct nla_policy dn_fib_rule_policy[FRA_MAX+1] = { static int dn_fib_rule_match(struct fib_rule *rule, struct flowi *fl, int flags) { struct dn_fib_rule *r = (struct dn_fib_rule *)rule; - __le16 daddr = fl->fld_dst; - __le16 saddr = fl->fld_src; + struct flowidn *fld = &fl->u.dn; + __le16 daddr = fld->daddr; + __le16 saddr = fld->saddr; if (((saddr ^ r->src) & r->srcmask) || ((daddr ^ r->dst) & r->dstmask)) @@ -175,7 +178,7 @@ static int dn_fib_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh, unsigned dnet_addr_type(__le16 addr) { - struct flowi fl = { .fld_dst = addr }; + struct flowidn fld = { .daddr = addr }; struct dn_fib_res res; unsigned ret = RTN_UNICAST; struct dn_fib_table *tb = dn_fib_get_table(RT_TABLE_LOCAL, 0); @@ -183,7 +186,7 @@ unsigned dnet_addr_type(__le16 addr) res.r = NULL; if (tb) { - if (!tb->lookup(tb, &fl, &res)) { + if (!tb->lookup(tb, &fld, &res)) { ret = res.type; dn_fib_res_put(&res); } diff --git a/net/decnet/dn_table.c b/net/decnet/dn_table.c index b66600b3f4b5..99d8d3a40998 100644 --- a/net/decnet/dn_table.c +++ b/net/decnet/dn_table.c @@ -764,7 +764,7 @@ static int dn_fib_table_flush(struct dn_fib_table *tb) return found; } -static int dn_fib_table_lookup(struct dn_fib_table *tb, const struct flowi *flp, struct dn_fib_res *res) +static int dn_fib_table_lookup(struct dn_fib_table *tb, const struct flowidn *flp, struct dn_fib_res *res) { int err; struct dn_zone *dz; @@ -773,7 +773,7 @@ static int dn_fib_table_lookup(struct dn_fib_table *tb, const struct flowi *flp, read_lock(&dn_fib_tables_lock); for(dz = t->dh_zone_list; dz; dz = dz->dz_next) { struct dn_fib_node *f; - dn_fib_key_t k = dz_key(flp->fld_dst, dz); + dn_fib_key_t k = dz_key(flp->daddr, dz); for(f = dz_chain(k, dz); f; f = f->fn_next) { if (!dn_key_eq(k, f->fn_key)) { @@ -788,7 +788,7 @@ static int dn_fib_table_lookup(struct dn_fib_table *tb, const struct flowi *flp, if (f->fn_state&DN_S_ZOMBIE) continue; - if (f->fn_scope < flp->fld_scope) + if (f->fn_scope < flp->flowidn_scope) continue; err = dn_fib_semantic_match(f->fn_type, DN_FIB_INFO(f), flp, res); diff --git a/net/dsa/mv88e6060.c b/net/dsa/mv88e6060.c index 83277f463af7..8f4ff5a2c813 100644 --- a/net/dsa/mv88e6060.c +++ b/net/dsa/mv88e6060.c @@ -18,7 +18,7 @@ static int reg_read(struct dsa_switch *ds, int addr, int reg) { - return mdiobus_read(ds->master_mii_bus, addr, reg); + return mdiobus_read(ds->master_mii_bus, ds->pd->sw_addr + addr, reg); } #define REG_READ(addr, reg) \ @@ -34,7 +34,8 @@ static int reg_read(struct dsa_switch *ds, int addr, int reg) static int reg_write(struct dsa_switch *ds, int addr, int reg, u16 val) { - return mdiobus_write(ds->master_mii_bus, addr, reg, val); + return mdiobus_write(ds->master_mii_bus, ds->pd->sw_addr + addr, + reg, val); } #define REG_WRITE(addr, reg, val) \ @@ -50,7 +51,7 @@ static char *mv88e6060_probe(struct mii_bus *bus, int sw_addr) { int ret; - ret = mdiobus_read(bus, REG_PORT(0), 0x03); + ret = mdiobus_read(bus, sw_addr + REG_PORT(0), 0x03); if (ret >= 0) { ret &= 0xfff0; if (ret == 0x0600) diff --git a/net/econet/af_econet.c b/net/econet/af_econet.c index 0c2826337919..116d3fd3d669 100644 --- a/net/econet/af_econet.c +++ b/net/econet/af_econet.c @@ -435,10 +435,10 @@ static int econet_sendmsg(struct kiocb *iocb, struct socket *sock, udpdest.sin_addr.s_addr = htonl(network | addr.station); } + memset(&ah, 0, sizeof(ah)); ah.port = port; ah.cb = cb & 0x7f; ah.code = 2; /* magic */ - ah.pad = 0; /* tack our header on the front of the iovec */ size = sizeof(struct aunhdr); diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 35a502055018..807d83c02ef6 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1157,22 +1157,10 @@ int inet_sk_rebuild_header(struct sock *sk) daddr = inet->inet_daddr; if (inet->opt && inet->opt->srr) daddr = inet->opt->faddr; - { - struct flowi fl = { - .oif = sk->sk_bound_dev_if, - .mark = sk->sk_mark, - .fl4_dst = daddr, - .fl4_src = inet->inet_saddr, - .fl4_tos = RT_CONN_FLAGS(sk), - .proto = sk->sk_protocol, - .flags = inet_sk_flowi_flags(sk), - .fl_ip_sport = inet->inet_sport, - .fl_ip_dport = inet->inet_dport, - }; - - security_sk_classify_flow(sk, &fl); - rt = ip_route_output_flow(sock_net(sk), &fl, sk); - } + rt = ip_route_output_ports(sock_net(sk), sk, daddr, inet->inet_saddr, + inet->inet_dport, inet->inet_sport, + sk->sk_protocol, RT_CONN_FLAGS(sk), + sk->sk_bound_dev_if); if (!IS_ERR(rt)) { err = 0; sk_setup_caps(sk, &rt->dst); diff --git a/net/ipv4/ah4.c b/net/ipv4/ah4.c index 325053df6e70..4286fd3cc0e2 100644 --- a/net/ipv4/ah4.c +++ b/net/ipv4/ah4.c @@ -208,7 +208,7 @@ static int ah_output(struct xfrm_state *x, struct sk_buff *skb) ah->reserved = 0; ah->spi = x->id.spi; - ah->seq_no = htonl(XFRM_SKB_CB(skb)->seq.output); + ah->seq_no = htonl(XFRM_SKB_CB(skb)->seq.output.low); sg_init_table(sg, nfrags); skb_to_sgvec(skb, sg, 0, skb->len); diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index fa9988da1da4..090d273d7865 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c @@ -433,14 +433,12 @@ static int arp_ignore(struct in_device *in_dev, __be32 sip, __be32 tip) static int arp_filter(__be32 sip, __be32 tip, struct net_device *dev) { - struct flowi fl = { .fl4_dst = sip, - .fl4_src = tip }; struct rtable *rt; int flag = 0; /*unsigned long now; */ struct net *net = dev_net(dev); - rt = ip_route_output_key(net, &fl); + rt = ip_route_output(net, sip, tip, 0, 0); if (IS_ERR(rt)) return 1; if (rt->dst.dev != dev) { @@ -1062,9 +1060,7 @@ static int arp_req_set(struct net *net, struct arpreq *r, if (r->arp_flags & ATF_PERM) r->arp_flags |= ATF_COM; if (dev == NULL) { - struct flowi fl = { .fl4_dst = ip, - .fl4_tos = RTO_ONLINK }; - struct rtable *rt = ip_route_output_key(net, &fl); + struct rtable *rt = ip_route_output(net, ip, 0, RTO_ONLINK, 0); if (IS_ERR(rt)) return PTR_ERR(rt); @@ -1185,9 +1181,7 @@ static int arp_req_delete(struct net *net, struct arpreq *r, ip = ((struct sockaddr_in *)&r->arp_pa)->sin_addr.s_addr; if (dev == NULL) { - struct flowi fl = { .fl4_dst = ip, - .fl4_tos = RTO_ONLINK }; - struct rtable *rt = ip_route_output_key(net, &fl); + struct rtable *rt = ip_route_output(net, ip, 0, RTO_ONLINK, 0); if (IS_ERR(rt)) return PTR_ERR(rt); dev = rt->dst.dev; diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index ff53860d1e56..6d85800daeb7 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -741,7 +741,7 @@ int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg) ifap = &ifa->ifa_next) { if (!strcmp(ifr.ifr_name, ifa->ifa_label) && sin_orig.sin_addr.s_addr == - ifa->ifa_address) { + ifa->ifa_local) { break; /* found */ } } @@ -1112,8 +1112,8 @@ static void inetdev_send_gratuitous_arp(struct net_device *dev, return; arp_send(ARPOP_REQUEST, ETH_P_ARP, - ifa->ifa_address, dev, - ifa->ifa_address, NULL, + ifa->ifa_local, dev, + ifa->ifa_local, NULL, dev->dev_addr, NULL); } diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c index e42a905180f0..03f994bcf7de 100644 --- a/net/ipv4/esp4.c +++ b/net/ipv4/esp4.c @@ -33,11 +33,14 @@ static u32 esp4_get_mtu(struct xfrm_state *x, int mtu); * * TODO: Use spare space in skb for this where possible. */ -static void *esp_alloc_tmp(struct crypto_aead *aead, int nfrags) +static void *esp_alloc_tmp(struct crypto_aead *aead, int nfrags, int seqhilen) { unsigned int len; - len = crypto_aead_ivsize(aead); + len = seqhilen; + + len += crypto_aead_ivsize(aead); + if (len) { len += crypto_aead_alignmask(aead) & ~(crypto_tfm_ctx_alignment() - 1); @@ -52,10 +55,15 @@ static void *esp_alloc_tmp(struct crypto_aead *aead, int nfrags) return kmalloc(len, GFP_ATOMIC); } -static inline u8 *esp_tmp_iv(struct crypto_aead *aead, void *tmp) +static inline __be32 *esp_tmp_seqhi(void *tmp) +{ + return PTR_ALIGN((__be32 *)tmp, __alignof__(__be32)); +} +static inline u8 *esp_tmp_iv(struct crypto_aead *aead, void *tmp, int seqhilen) { return crypto_aead_ivsize(aead) ? - PTR_ALIGN((u8 *)tmp, crypto_aead_alignmask(aead) + 1) : tmp; + PTR_ALIGN((u8 *)tmp + seqhilen, + crypto_aead_alignmask(aead) + 1) : tmp + seqhilen; } static inline struct aead_givcrypt_request *esp_tmp_givreq( @@ -122,6 +130,10 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb) int plen; int tfclen; int nfrags; + int assoclen; + int sglists; + int seqhilen; + __be32 *seqhi; /* skb is pure payload to encrypt */ @@ -151,14 +163,25 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb) goto error; nfrags = err; - tmp = esp_alloc_tmp(aead, nfrags + 1); + assoclen = sizeof(*esph); + sglists = 1; + seqhilen = 0; + + if (x->props.flags & XFRM_STATE_ESN) { + sglists += 2; + seqhilen += sizeof(__be32); + assoclen += seqhilen; + } + + tmp = esp_alloc_tmp(aead, nfrags + sglists, seqhilen); if (!tmp) goto error; - iv = esp_tmp_iv(aead, tmp); + seqhi = esp_tmp_seqhi(tmp); + iv = esp_tmp_iv(aead, tmp, seqhilen); req = esp_tmp_givreq(aead, iv); asg = esp_givreq_sg(aead, req); - sg = asg + 1; + sg = asg + sglists; /* Fill padding... */ tail = skb_tail_pointer(trailer); @@ -215,19 +238,27 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb) } esph->spi = x->id.spi; - esph->seq_no = htonl(XFRM_SKB_CB(skb)->seq.output); + esph->seq_no = htonl(XFRM_SKB_CB(skb)->seq.output.low); sg_init_table(sg, nfrags); skb_to_sgvec(skb, sg, esph->enc_data + crypto_aead_ivsize(aead) - skb->data, clen + alen); - sg_init_one(asg, esph, sizeof(*esph)); + + if ((x->props.flags & XFRM_STATE_ESN)) { + sg_init_table(asg, 3); + sg_set_buf(asg, &esph->spi, sizeof(__be32)); + *seqhi = htonl(XFRM_SKB_CB(skb)->seq.output.hi); + sg_set_buf(asg + 1, seqhi, seqhilen); + sg_set_buf(asg + 2, &esph->seq_no, sizeof(__be32)); + } else + sg_init_one(asg, esph, sizeof(*esph)); aead_givcrypt_set_callback(req, 0, esp_output_done, skb); aead_givcrypt_set_crypt(req, sg, sg, clen, iv); - aead_givcrypt_set_assoc(req, asg, sizeof(*esph)); + aead_givcrypt_set_assoc(req, asg, assoclen); aead_givcrypt_set_giv(req, esph->enc_data, - XFRM_SKB_CB(skb)->seq.output); + XFRM_SKB_CB(skb)->seq.output.low); ESP_SKB_CB(skb)->tmp = tmp; err = crypto_aead_givencrypt(req); @@ -346,6 +377,10 @@ static int esp_input(struct xfrm_state *x, struct sk_buff *skb) struct sk_buff *trailer; int elen = skb->len - sizeof(*esph) - crypto_aead_ivsize(aead); int nfrags; + int assoclen; + int sglists; + int seqhilen; + __be32 *seqhi; void *tmp; u8 *iv; struct scatterlist *sg; @@ -362,16 +397,27 @@ static int esp_input(struct xfrm_state *x, struct sk_buff *skb) goto out; nfrags = err; + assoclen = sizeof(*esph); + sglists = 1; + seqhilen = 0; + + if (x->props.flags & XFRM_STATE_ESN) { + sglists += 2; + seqhilen += sizeof(__be32); + assoclen += seqhilen; + } + err = -ENOMEM; - tmp = esp_alloc_tmp(aead, nfrags + 1); + tmp = esp_alloc_tmp(aead, nfrags + sglists, seqhilen); if (!tmp) goto out; ESP_SKB_CB(skb)->tmp = tmp; - iv = esp_tmp_iv(aead, tmp); + seqhi = esp_tmp_seqhi(tmp); + iv = esp_tmp_iv(aead, tmp, seqhilen); req = esp_tmp_req(aead, iv); asg = esp_req_sg(aead, req); - sg = asg + 1; + sg = asg + sglists; skb->ip_summed = CHECKSUM_NONE; @@ -382,11 +428,19 @@ static int esp_input(struct xfrm_state *x, struct sk_buff *skb) sg_init_table(sg, nfrags); skb_to_sgvec(skb, sg, sizeof(*esph) + crypto_aead_ivsize(aead), elen); - sg_init_one(asg, esph, sizeof(*esph)); + + if ((x->props.flags & XFRM_STATE_ESN)) { + sg_init_table(asg, 3); + sg_set_buf(asg, &esph->spi, sizeof(__be32)); + *seqhi = XFRM_SKB_CB(skb)->seq.input.hi; + sg_set_buf(asg + 1, seqhi, seqhilen); + sg_set_buf(asg + 2, &esph->seq_no, sizeof(__be32)); + } else + sg_init_one(asg, esph, sizeof(*esph)); aead_request_set_callback(req, 0, esp_input_done, skb); aead_request_set_crypt(req, sg, sg, elen, iv); - aead_request_set_assoc(req, asg, sizeof(*esph)); + aead_request_set_assoc(req, asg, assoclen); err = crypto_aead_decrypt(req); if (err == -EINPROGRESS) @@ -500,10 +554,20 @@ static int esp_init_authenc(struct xfrm_state *x) goto error; err = -ENAMETOOLONG; - if (snprintf(authenc_name, CRYPTO_MAX_ALG_NAME, "authenc(%s,%s)", - x->aalg ? x->aalg->alg_name : "digest_null", - x->ealg->alg_name) >= CRYPTO_MAX_ALG_NAME) - goto error; + + if ((x->props.flags & XFRM_STATE_ESN)) { + if (snprintf(authenc_name, CRYPTO_MAX_ALG_NAME, + "authencesn(%s,%s)", + x->aalg ? x->aalg->alg_name : "digest_null", + x->ealg->alg_name) >= CRYPTO_MAX_ALG_NAME) + goto error; + } else { + if (snprintf(authenc_name, CRYPTO_MAX_ALG_NAME, + "authenc(%s,%s)", + x->aalg ? x->aalg->alg_name : "digest_null", + x->ealg->alg_name) >= CRYPTO_MAX_ALG_NAME) + goto error; + } aead = crypto_alloc_aead(authenc_name, 0, 0); err = PTR_ERR(aead); diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 1d2233cd99e6..a373a259253c 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -140,7 +140,7 @@ static inline unsigned __inet_dev_addr_type(struct net *net, const struct net_device *dev, __be32 addr) { - struct flowi fl = { .fl4_dst = addr }; + struct flowi4 fl4 = { .daddr = addr }; struct fib_result res; unsigned ret = RTN_BROADCAST; struct fib_table *local_table; @@ -158,7 +158,7 @@ static inline unsigned __inet_dev_addr_type(struct net *net, if (local_table) { ret = RTN_UNICAST; rcu_read_lock(); - if (!fib_table_lookup(local_table, &fl, &res, FIB_LOOKUP_NOREF)) { + if (!fib_table_lookup(local_table, &fl4, &res, FIB_LOOKUP_NOREF)) { if (!dev || dev == res.fi->fib_dev) ret = res.type; } @@ -193,19 +193,21 @@ int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif, u32 *itag, u32 mark) { struct in_device *in_dev; - struct flowi fl = { - .fl4_dst = src, - .fl4_src = dst, - .fl4_tos = tos, - .mark = mark, - .iif = oif - }; + struct flowi4 fl4; struct fib_result res; int no_addr, rpf, accept_local; bool dev_match; int ret; struct net *net; + fl4.flowi4_oif = 0; + fl4.flowi4_iif = oif; + fl4.flowi4_mark = mark; + fl4.daddr = src; + fl4.saddr = dst; + fl4.flowi4_tos = tos; + fl4.flowi4_scope = RT_SCOPE_UNIVERSE; + no_addr = rpf = accept_local = 0; in_dev = __in_dev_get_rcu(dev); if (in_dev) { @@ -213,14 +215,14 @@ int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif, rpf = IN_DEV_RPFILTER(in_dev); accept_local = IN_DEV_ACCEPT_LOCAL(in_dev); if (mark && !IN_DEV_SRC_VMARK(in_dev)) - fl.mark = 0; + fl4.flowi4_mark = 0; } if (in_dev == NULL) goto e_inval; net = dev_net(dev); - if (fib_lookup(net, &fl, &res)) + if (fib_lookup(net, &fl4, &res)) goto last_resort; if (res.type != RTN_UNICAST) { if (res.type != RTN_LOCAL || !accept_local) @@ -251,10 +253,10 @@ int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif, goto last_resort; if (rpf == 1) goto e_rpf; - fl.oif = dev->ifindex; + fl4.flowi4_oif = dev->ifindex; ret = 0; - if (fib_lookup(net, &fl, &res) == 0) { + if (fib_lookup(net, &fl4, &res) == 0) { if (res.type == RTN_UNICAST) { *spec_dst = FIB_RES_PREFSRC(res); ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST; @@ -794,11 +796,11 @@ static void nl_fib_lookup(struct fib_result_nl *frn, struct fib_table *tb) { struct fib_result res; - struct flowi fl = { - .mark = frn->fl_mark, - .fl4_dst = frn->fl_addr, - .fl4_tos = frn->fl_tos, - .fl4_scope = frn->fl_scope, + struct flowi4 fl4 = { + .flowi4_mark = frn->fl_mark, + .daddr = frn->fl_addr, + .flowi4_tos = frn->fl_tos, + .flowi4_scope = frn->fl_scope, }; #ifdef CONFIG_IP_MULTIPLE_TABLES @@ -811,7 +813,7 @@ static void nl_fib_lookup(struct fib_result_nl *frn, struct fib_table *tb) frn->tb_id = tb->tb_id; rcu_read_lock(); - frn->err = fib_table_lookup(tb, &fl, &res, FIB_LOOKUP_NOREF); + frn->err = fib_table_lookup(tb, &fl4, &res, FIB_LOOKUP_NOREF); if (!frn->err) { frn->prefixlen = res.prefixlen; diff --git a/net/ipv4/fib_lookup.h b/net/ipv4/fib_lookup.h index 84db2da5c848..4ec323875a02 100644 --- a/net/ipv4/fib_lookup.h +++ b/net/ipv4/fib_lookup.h @@ -25,9 +25,6 @@ static inline void fib_alias_accessed(struct fib_alias *fa) } /* Exported by fib_semantics.c */ -extern int fib_semantic_match(struct fib_table *tb, struct list_head *head, - const struct flowi *flp, - struct fib_result *res, int prefixlen, int fib_flags); extern void fib_release_info(struct fib_info *); extern struct fib_info *fib_create_info(struct fib_config *cfg); extern int fib_nh_match(struct fib_config *cfg, struct fib_info *fi); diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c index 3018efbaea77..a53bb1b5b118 100644 --- a/net/ipv4/fib_rules.c +++ b/net/ipv4/fib_rules.c @@ -53,7 +53,7 @@ u32 fib_rules_tclass(const struct fib_result *res) } #endif -int fib_lookup(struct net *net, struct flowi *flp, struct fib_result *res) +int fib_lookup(struct net *net, struct flowi4 *flp, struct fib_result *res) { struct fib_lookup_arg arg = { .result = res, @@ -61,7 +61,7 @@ int fib_lookup(struct net *net, struct flowi *flp, struct fib_result *res) }; int err; - err = fib_rules_lookup(net->ipv4.rules_ops, flp, 0, &arg); + err = fib_rules_lookup(net->ipv4.rules_ops, flowi4_to_flowi(flp), 0, &arg); res->r = arg.rule; return err; @@ -95,7 +95,7 @@ static int fib4_rule_action(struct fib_rule *rule, struct flowi *flp, if (!tbl) goto errout; - err = fib_table_lookup(tbl, flp, (struct fib_result *) arg->result, arg->flags); + err = fib_table_lookup(tbl, &flp->u.ip4, (struct fib_result *) arg->result, arg->flags); if (err > 0) err = -EAGAIN; errout: @@ -106,14 +106,15 @@ errout: static int fib4_rule_match(struct fib_rule *rule, struct flowi *fl, int flags) { struct fib4_rule *r = (struct fib4_rule *) rule; - __be32 daddr = fl->fl4_dst; - __be32 saddr = fl->fl4_src; + struct flowi4 *fl4 = &fl->u.ip4; + __be32 daddr = fl4->daddr; + __be32 saddr = fl4->saddr; if (((saddr ^ r->src) & r->srcmask) || ((daddr ^ r->dst) & r->dstmask)) return 0; - if (r->tos && (r->tos != fl->fl4_tos)) + if (r->tos && (r->tos != fl4->flowi4_tos)) return 0; return 1; diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index 952c737f2a27..622ac4c95026 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -560,16 +560,16 @@ static int fib_check_nh(struct fib_config *cfg, struct fib_info *fi, } rcu_read_lock(); { - struct flowi fl = { - .fl4_dst = nh->nh_gw, - .fl4_scope = cfg->fc_scope + 1, - .oif = nh->nh_oif, + struct flowi4 fl4 = { + .daddr = nh->nh_gw, + .flowi4_scope = cfg->fc_scope + 1, + .flowi4_oif = nh->nh_oif, }; /* It is not necessary, but requires a bit of thinking */ - if (fl.fl4_scope < RT_SCOPE_LINK) - fl.fl4_scope = RT_SCOPE_LINK; - err = fib_lookup(net, &fl, &res); + if (fl4.flowi4_scope < RT_SCOPE_LINK) + fl4.flowi4_scope = RT_SCOPE_LINK; + err = fib_lookup(net, &fl4, &res); if (err) { rcu_read_unlock(); return err; @@ -854,9 +854,10 @@ struct fib_info *fib_create_info(struct fib_config *cfg) } change_nexthops(fi) { + nexthop_nh->nh_cfg_scope = cfg->fc_scope; nexthop_nh->nh_saddr = inet_select_addr(nexthop_nh->nh_dev, nexthop_nh->nh_gw, - nexthop_nh->nh_scope); + nexthop_nh->nh_cfg_scope); } endfor_nexthops(fi) link_it: @@ -1141,7 +1142,7 @@ void fib_update_nh_saddrs(struct net_device *dev) continue; nh->nh_saddr = inet_select_addr(nh->nh_dev, nh->nh_gw, - nh->nh_scope); + nh->nh_cfg_scope); } } @@ -1209,7 +1210,7 @@ int fib_sync_up(struct net_device *dev) * The algorithm is suboptimal, but it provides really * fair weighted route distribution. */ -void fib_select_multipath(const struct flowi *flp, struct fib_result *res) +void fib_select_multipath(struct fib_result *res) { struct fib_info *fi = res->fi; int w; diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index a4109a544778..3d28a35c2e1a 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -1341,7 +1341,7 @@ err: /* should be called with rcu_read_lock */ static int check_leaf(struct fib_table *tb, struct trie *t, struct leaf *l, - t_key key, const struct flowi *flp, + t_key key, const struct flowi4 *flp, struct fib_result *res, int fib_flags) { struct leaf_info *li; @@ -1360,9 +1360,9 @@ static int check_leaf(struct fib_table *tb, struct trie *t, struct leaf *l, struct fib_info *fi = fa->fa_info; int nhsel, err; - if (fa->fa_tos && fa->fa_tos != flp->fl4_tos) + if (fa->fa_tos && fa->fa_tos != flp->flowi4_tos) continue; - if (fa->fa_scope < flp->fl4_scope) + if (fa->fa_scope < flp->flowi4_scope) continue; fib_alias_accessed(fa); err = fib_props[fa->fa_type].error; @@ -1379,7 +1379,7 @@ static int check_leaf(struct fib_table *tb, struct trie *t, struct leaf *l, if (nh->nh_flags & RTNH_F_DEAD) continue; - if (flp->oif && flp->oif != nh->nh_oif) + if (flp->flowi4_oif && flp->flowi4_oif != nh->nh_oif) continue; #ifdef CONFIG_IP_FIB_TRIE_STATS @@ -1406,7 +1406,7 @@ static int check_leaf(struct fib_table *tb, struct trie *t, struct leaf *l, return 1; } -int fib_table_lookup(struct fib_table *tb, const struct flowi *flp, +int fib_table_lookup(struct fib_table *tb, const struct flowi4 *flp, struct fib_result *res, int fib_flags) { struct trie *t = (struct trie *) tb->tb_data; @@ -1414,7 +1414,7 @@ int fib_table_lookup(struct fib_table *tb, const struct flowi *flp, struct rt_trie_node *n; struct tnode *pn; unsigned int pos, bits; - t_key key = ntohl(flp->fl4_dst); + t_key key = ntohl(flp->daddr); unsigned int chopped_off; t_key cindex = 0; unsigned int current_prefix_length = KEYLENGTH; diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index 1771ce662548..a91dc1611081 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -353,12 +353,14 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb) daddr = icmp_param->replyopts.faddr; } { - struct flowi fl = { .fl4_dst= daddr, - .fl4_src = rt->rt_spec_dst, - .fl4_tos = RT_TOS(ip_hdr(skb)->tos), - .proto = IPPROTO_ICMP }; - security_skb_classify_flow(skb, &fl); - rt = ip_route_output_key(net, &fl); + struct flowi4 fl4 = { + .daddr = daddr, + .saddr = rt->rt_spec_dst, + .flowi4_tos = RT_TOS(ip_hdr(skb)->tos), + .flowi4_proto = IPPROTO_ICMP, + }; + security_skb_classify_flow(skb, flowi4_to_flowi(&fl4)); + rt = ip_route_output_key(net, &fl4); if (IS_ERR(rt)) goto out_unlock; } @@ -376,30 +378,31 @@ static struct rtable *icmp_route_lookup(struct net *net, struct sk_buff *skb_in, int type, int code, struct icmp_bxm *param) { - struct flowi fl = { - .fl4_dst = (param->replyopts.srr ? - param->replyopts.faddr : iph->saddr), - .fl4_src = saddr, - .fl4_tos = RT_TOS(tos), - .proto = IPPROTO_ICMP, - .fl_icmp_type = type, - .fl_icmp_code = code, + struct flowi4 fl4 = { + .daddr = (param->replyopts.srr ? + param->replyopts.faddr : iph->saddr), + .saddr = saddr, + .flowi4_tos = RT_TOS(tos), + .flowi4_proto = IPPROTO_ICMP, + .fl4_icmp_type = type, + .fl4_icmp_code = code, }; struct rtable *rt, *rt2; int err; - security_skb_classify_flow(skb_in, &fl); - rt = __ip_route_output_key(net, &fl); + security_skb_classify_flow(skb_in, flowi4_to_flowi(&fl4)); + rt = __ip_route_output_key(net, &fl4); if (IS_ERR(rt)) return rt; /* No need to clone since we're just using its address. */ rt2 = rt; - if (!fl.fl4_src) - fl.fl4_src = rt->rt_src; + if (!fl4.saddr) + fl4.saddr = rt->rt_src; - rt = (struct rtable *) xfrm_lookup(net, &rt->dst, &fl, NULL, 0); + rt = (struct rtable *) xfrm_lookup(net, &rt->dst, + flowi4_to_flowi(&fl4), NULL, 0); if (!IS_ERR(rt)) { if (rt != rt2) return rt; @@ -408,27 +411,27 @@ static struct rtable *icmp_route_lookup(struct net *net, struct sk_buff *skb_in, } else return rt; - err = xfrm_decode_session_reverse(skb_in, &fl, AF_INET); + err = xfrm_decode_session_reverse(skb_in, flowi4_to_flowi(&fl4), AF_INET); if (err) goto relookup_failed; - if (inet_addr_type(net, fl.fl4_src) == RTN_LOCAL) { - rt2 = __ip_route_output_key(net, &fl); + if (inet_addr_type(net, fl4.saddr) == RTN_LOCAL) { + rt2 = __ip_route_output_key(net, &fl4); if (IS_ERR(rt2)) err = PTR_ERR(rt2); } else { - struct flowi fl2 = {}; + struct flowi4 fl4_2 = {}; unsigned long orefdst; - fl2.fl4_dst = fl.fl4_src; - rt2 = ip_route_output_key(net, &fl2); + fl4_2.daddr = fl4.saddr; + rt2 = ip_route_output_key(net, &fl4_2); if (IS_ERR(rt2)) { err = PTR_ERR(rt2); goto relookup_failed; } /* Ugh! */ orefdst = skb_in->_skb_refdst; /* save old refdst */ - err = ip_route_input(skb_in, fl.fl4_dst, fl.fl4_src, + err = ip_route_input(skb_in, fl4.daddr, fl4.saddr, RT_TOS(tos), rt2->dst.dev); dst_release(&rt2->dst); @@ -439,7 +442,9 @@ static struct rtable *icmp_route_lookup(struct net *net, struct sk_buff *skb_in, if (err) goto relookup_failed; - rt2 = (struct rtable *) xfrm_lookup(net, &rt2->dst, &fl, NULL, XFRM_LOOKUP_ICMP); + rt2 = (struct rtable *) xfrm_lookup(net, &rt2->dst, + flowi4_to_flowi(&fl4), NULL, + XFRM_LOOKUP_ICMP); if (!IS_ERR(rt2)) { dst_release(&rt->dst); rt = rt2; diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index 44ba9068b72f..1fd3d9ce8398 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -321,15 +321,12 @@ static struct sk_buff *igmpv3_newpack(struct net_device *dev, int size) } igmp_skb_size(skb) = size; - { - struct flowi fl = { .oif = dev->ifindex, - .fl4_dst = IGMPV3_ALL_MCR, - .proto = IPPROTO_IGMP }; - rt = ip_route_output_key(net, &fl); - if (IS_ERR(rt)) { - kfree_skb(skb); - return NULL; - } + rt = ip_route_output_ports(net, NULL, IGMPV3_ALL_MCR, 0, + 0, 0, + IPPROTO_IGMP, 0, dev->ifindex); + if (IS_ERR(rt)) { + kfree_skb(skb); + return NULL; } if (rt->rt_src == 0) { kfree_skb(skb); @@ -667,14 +664,12 @@ static int igmp_send_report(struct in_device *in_dev, struct ip_mc_list *pmc, else dst = group; - { - struct flowi fl = { .oif = dev->ifindex, - .fl4_dst = dst, - .proto = IPPROTO_IGMP }; - rt = ip_route_output_key(net, &fl); - if (IS_ERR(rt)) - return -1; - } + rt = ip_route_output_ports(net, NULL, dst, 0, + 0, 0, + IPPROTO_IGMP, 0, dev->ifindex); + if (IS_ERR(rt)) + return -1; + if (rt->rt_src == 0) { ip_rt_put(rt); return -1; @@ -1441,7 +1436,6 @@ void ip_mc_destroy_dev(struct in_device *in_dev) /* RTNL is locked */ static struct in_device *ip_mc_find_dev(struct net *net, struct ip_mreqn *imr) { - struct flowi fl = { .fl4_dst = imr->imr_multiaddr.s_addr }; struct net_device *dev = NULL; struct in_device *idev = NULL; @@ -1456,7 +1450,9 @@ static struct in_device *ip_mc_find_dev(struct net *net, struct ip_mreqn *imr) } if (!dev) { - struct rtable *rt = ip_route_output_key(net, &fl); + struct rtable *rt = ip_route_output(net, + imr->imr_multiaddr.s_addr, + 0, 0, 0); if (!IS_ERR(rt)) { dev = rt->dst.dev; ip_rt_put(rt); @@ -2333,13 +2329,13 @@ void ip_mc_drop_socket(struct sock *sk) rtnl_unlock(); } -int ip_check_mc(struct in_device *in_dev, __be32 mc_addr, __be32 src_addr, u16 proto) +/* called with rcu_read_lock() */ +int ip_check_mc_rcu(struct in_device *in_dev, __be32 mc_addr, __be32 src_addr, u16 proto) { struct ip_mc_list *im; struct ip_sf_list *psf; int rv = 0; - rcu_read_lock(); for_each_pmc_rcu(in_dev, im) { if (im->multiaddr == mc_addr) break; @@ -2361,7 +2357,6 @@ int ip_check_mc(struct in_device *in_dev, __be32 mc_addr, __be32 src_addr, u16 p } else rv = 1; /* unspecified source; tentatively allow */ } - rcu_read_unlock(); return rv; } diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index e4e301a61c5b..6c0b7f4a3d7d 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -356,20 +356,22 @@ struct dst_entry *inet_csk_route_req(struct sock *sk, struct rtable *rt; const struct inet_request_sock *ireq = inet_rsk(req); struct ip_options *opt = inet_rsk(req)->opt; - struct flowi fl = { .oif = sk->sk_bound_dev_if, - .mark = sk->sk_mark, - .fl4_dst = ((opt && opt->srr) ? - opt->faddr : ireq->rmt_addr), - .fl4_src = ireq->loc_addr, - .fl4_tos = RT_CONN_FLAGS(sk), - .proto = sk->sk_protocol, - .flags = inet_sk_flowi_flags(sk), - .fl_ip_sport = inet_sk(sk)->inet_sport, - .fl_ip_dport = ireq->rmt_port }; + struct flowi4 fl4 = { + .flowi4_oif = sk->sk_bound_dev_if, + .flowi4_mark = sk->sk_mark, + .daddr = ((opt && opt->srr) ? + opt->faddr : ireq->rmt_addr), + .saddr = ireq->loc_addr, + .flowi4_tos = RT_CONN_FLAGS(sk), + .flowi4_proto = sk->sk_protocol, + .flowi4_flags = inet_sk_flowi_flags(sk), + .fl4_sport = inet_sk(sk)->inet_sport, + .fl4_dport = ireq->rmt_port, + }; struct net *net = sock_net(sk); - security_req_classify_flow(req, &fl); - rt = ip_route_output_flow(net, &fl, sk); + security_req_classify_flow(req, flowi4_to_flowi(&fl4)); + rt = ip_route_output_flow(net, &fl4, sk); if (IS_ERR(rt)) goto no_route; if (opt && opt->is_strictroute && rt->rt_dst != rt->rt_gateway) diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c index f604ffdbea27..dd1b20eca1a2 100644 --- a/net/ipv4/inetpeer.c +++ b/net/ipv4/inetpeer.c @@ -206,16 +206,16 @@ static int addr_compare(const struct inetpeer_addr *a, }) /* - * Called with rcu_read_lock_bh() + * Called with rcu_read_lock() * Because we hold no lock against a writer, its quite possible we fall * in an endless loop. * But every pointer we follow is guaranteed to be valid thanks to RCU. * We exit from this function if number of links exceeds PEER_MAXDEPTH */ -static struct inet_peer *lookup_rcu_bh(const struct inetpeer_addr *daddr, - struct inet_peer_base *base) +static struct inet_peer *lookup_rcu(const struct inetpeer_addr *daddr, + struct inet_peer_base *base) { - struct inet_peer *u = rcu_dereference_bh(base->root); + struct inet_peer *u = rcu_dereference(base->root); int count = 0; while (u != peer_avl_empty) { @@ -231,9 +231,9 @@ static struct inet_peer *lookup_rcu_bh(const struct inetpeer_addr *daddr, return u; } if (cmp == -1) - u = rcu_dereference_bh(u->avl_left); + u = rcu_dereference(u->avl_left); else - u = rcu_dereference_bh(u->avl_right); + u = rcu_dereference(u->avl_right); if (unlikely(++count == PEER_MAXDEPTH)) break; } @@ -399,7 +399,7 @@ static void unlink_from_pool(struct inet_peer *p, struct inet_peer_base *base) write_sequnlock_bh(&base->lock); if (do_free) - call_rcu_bh(&p->rcu, inetpeer_free_rcu); + call_rcu(&p->rcu, inetpeer_free_rcu); else /* The node is used again. Decrease the reference counter * back. The loop "cleanup -> unlink_from_unused @@ -470,11 +470,11 @@ struct inet_peer *inet_getpeer(struct inetpeer_addr *daddr, int create) /* Look up for the address quickly, lockless. * Because of a concurrent writer, we might not find an existing entry. */ - rcu_read_lock_bh(); + rcu_read_lock(); sequence = read_seqbegin(&base->lock); - p = lookup_rcu_bh(daddr, base); + p = lookup_rcu(daddr, base); invalidated = read_seqretry(&base->lock, sequence); - rcu_read_unlock_bh(); + rcu_read_unlock(); if (p) { /* The existing node has been found. @@ -511,6 +511,7 @@ struct inet_peer *inet_getpeer(struct inetpeer_addr *daddr, int create) p->rate_tokens = 0; p->rate_last = 0; p->pmtu_expires = 0; + p->pmtu_orig = 0; memset(&p->redirect_learned, 0, sizeof(p->redirect_learned)); INIT_LIST_HEAD(&p->unused); diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index f9af98dd7561..da5941f18c3c 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -769,20 +769,12 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev tos = ipv6_get_dsfield((struct ipv6hdr *)old_iph); } - { - struct flowi fl = { - .oif = tunnel->parms.link, - .fl4_dst = dst, - .fl4_src = tiph->saddr, - .fl4_tos = RT_TOS(tos), - .proto = IPPROTO_GRE, - .fl_gre_key = tunnel->parms.o_key - }; - rt = ip_route_output_key(dev_net(dev), &fl); - if (IS_ERR(rt)) { - dev->stats.tx_carrier_errors++; - goto tx_error; - } + rt = ip_route_output_gre(dev_net(dev), dst, tiph->saddr, + tunnel->parms.o_key, RT_TOS(tos), + tunnel->parms.link); + if (IS_ERR(rt)) { + dev->stats.tx_carrier_errors++; + goto tx_error; } tdev = rt->dst.dev; @@ -946,15 +938,11 @@ static int ipgre_tunnel_bind_dev(struct net_device *dev) /* Guess output device to choose reasonable mtu and needed_headroom */ if (iph->daddr) { - struct flowi fl = { - .oif = tunnel->parms.link, - .fl4_dst = iph->daddr, - .fl4_src = iph->saddr, - .fl4_tos = RT_TOS(iph->tos), - .proto = IPPROTO_GRE, - .fl_gre_key = tunnel->parms.o_key - }; - struct rtable *rt = ip_route_output_key(dev_net(dev), &fl); + struct rtable *rt = ip_route_output_gre(dev_net(dev), + iph->daddr, iph->saddr, + tunnel->parms.o_key, + RT_TOS(iph->tos), + tunnel->parms.link); if (!IS_ERR(rt)) { tdev = rt->dst.dev; @@ -1208,15 +1196,12 @@ static int ipgre_open(struct net_device *dev) struct ip_tunnel *t = netdev_priv(dev); if (ipv4_is_multicast(t->parms.iph.daddr)) { - struct flowi fl = { - .oif = t->parms.link, - .fl4_dst = t->parms.iph.daddr, - .fl4_src = t->parms.iph.saddr, - .fl4_tos = RT_TOS(t->parms.iph.tos), - .proto = IPPROTO_GRE, - .fl_gre_key = t->parms.o_key - }; - struct rtable *rt = ip_route_output_key(dev_net(dev), &fl); + struct rtable *rt = ip_route_output_gre(dev_net(dev), + t->parms.iph.daddr, + t->parms.iph.saddr, + t->parms.o_key, + RT_TOS(t->parms.iph.tos), + t->parms.link); if (IS_ERR(rt)) return -EADDRNOTAVAIL; @@ -1766,4 +1751,4 @@ module_exit(ipgre_fini); MODULE_LICENSE("GPL"); MODULE_ALIAS_RTNL_LINK("gre"); MODULE_ALIAS_RTNL_LINK("gretap"); -MODULE_ALIAS("gre0"); +MODULE_ALIAS_NETDEV("gre0"); diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 171f483b21d5..67f241b97649 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -339,26 +339,19 @@ int ip_queue_xmit(struct sk_buff *skb) if(opt && opt->srr) daddr = opt->faddr; - { - struct flowi fl = { .oif = sk->sk_bound_dev_if, - .mark = sk->sk_mark, - .fl4_dst = daddr, - .fl4_src = inet->inet_saddr, - .fl4_tos = RT_CONN_FLAGS(sk), - .proto = sk->sk_protocol, - .flags = inet_sk_flowi_flags(sk), - .fl_ip_sport = inet->inet_sport, - .fl_ip_dport = inet->inet_dport }; - - /* If this fails, retransmit mechanism of transport layer will - * keep trying until route appears or the connection times - * itself out. - */ - security_sk_classify_flow(sk, &fl); - rt = ip_route_output_flow(sock_net(sk), &fl, sk); - if (IS_ERR(rt)) - goto no_route; - } + /* If this fails, retransmit mechanism of transport layer will + * keep trying until route appears or the connection times + * itself out. + */ + rt = ip_route_output_ports(sock_net(sk), sk, + daddr, inet->inet_saddr, + inet->inet_dport, + inet->inet_sport, + sk->sk_protocol, + RT_CONN_FLAGS(sk), + sk->sk_bound_dev_if); + if (IS_ERR(rt)) + goto no_route; sk_setup_caps(sk, &rt->dst); } skb_dst_set_noref(skb, &rt->dst); @@ -1481,16 +1474,18 @@ void ip_send_reply(struct sock *sk, struct sk_buff *skb, struct ip_reply_arg *ar } { - struct flowi fl = { .oif = arg->bound_dev_if, - .fl4_dst = daddr, - .fl4_src = rt->rt_spec_dst, - .fl4_tos = RT_TOS(ip_hdr(skb)->tos), - .fl_ip_sport = tcp_hdr(skb)->dest, - .fl_ip_dport = tcp_hdr(skb)->source, - .proto = sk->sk_protocol, - .flags = ip_reply_arg_flowi_flags(arg) }; - security_skb_classify_flow(skb, &fl); - rt = ip_route_output_key(sock_net(sk), &fl); + struct flowi4 fl4 = { + .flowi4_oif = arg->bound_dev_if, + .daddr = daddr, + .saddr = rt->rt_spec_dst, + .flowi4_tos = RT_TOS(ip_hdr(skb)->tos), + .fl4_sport = tcp_hdr(skb)->dest, + .fl4_dport = tcp_hdr(skb)->source, + .flowi4_proto = sk->sk_protocol, + .flowi4_flags = ip_reply_arg_flowi_flags(arg), + }; + security_skb_classify_flow(skb, flowi4_to_flowi(&fl4)); + rt = ip_route_output_key(sock_net(sk), &fl4); if (IS_ERR(rt)) return; } diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c index e1e17576baa6..bfc17c5914e7 100644 --- a/net/ipv4/ipip.c +++ b/net/ipv4/ipip.c @@ -460,20 +460,14 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) goto tx_error_icmp; } - { - struct flowi fl = { - .oif = tunnel->parms.link, - .fl4_dst = dst, - .fl4_src= tiph->saddr, - .fl4_tos = RT_TOS(tos), - .proto = IPPROTO_IPIP - }; - - rt = ip_route_output_key(dev_net(dev), &fl); - if (IS_ERR(rt)) { - dev->stats.tx_carrier_errors++; - goto tx_error_icmp; - } + rt = ip_route_output_ports(dev_net(dev), NULL, + dst, tiph->saddr, + 0, 0, + IPPROTO_IPIP, RT_TOS(tos), + tunnel->parms.link); + if (IS_ERR(rt)) { + dev->stats.tx_carrier_errors++; + goto tx_error_icmp; } tdev = rt->dst.dev; @@ -584,14 +578,12 @@ static void ipip_tunnel_bind_dev(struct net_device *dev) iph = &tunnel->parms.iph; if (iph->daddr) { - struct flowi fl = { - .oif = tunnel->parms.link, - .fl4_dst = iph->daddr, - .fl4_src = iph->saddr, - .fl4_tos = RT_TOS(iph->tos), - .proto = IPPROTO_IPIP - }; - struct rtable *rt = ip_route_output_key(dev_net(dev), &fl); + struct rtable *rt = ip_route_output_ports(dev_net(dev), NULL, + iph->daddr, iph->saddr, + 0, 0, + IPPROTO_IPIP, + RT_TOS(iph->tos), + tunnel->parms.link); if (!IS_ERR(rt)) { tdev = rt->dst.dev; @@ -914,4 +906,4 @@ static void __exit ipip_fini(void) module_init(ipip_init); module_exit(ipip_fini); MODULE_LICENSE("GPL"); -MODULE_ALIAS("tunl0"); +MODULE_ALIAS_NETDEV("tunl0"); diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 9d5f6340af13..1f62eaeb6de4 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -148,14 +148,15 @@ static struct mr_table *ipmr_get_table(struct net *net, u32 id) return NULL; } -static int ipmr_fib_lookup(struct net *net, struct flowi *flp, +static int ipmr_fib_lookup(struct net *net, struct flowi4 *flp4, struct mr_table **mrt) { struct ipmr_result res; struct fib_lookup_arg arg = { .result = &res, }; int err; - err = fib_rules_lookup(net->ipv4.mr_rules_ops, flp, 0, &arg); + err = fib_rules_lookup(net->ipv4.mr_rules_ops, + flowi4_to_flowi(flp4), 0, &arg); if (err < 0) return err; *mrt = res.mrt; @@ -283,7 +284,7 @@ static struct mr_table *ipmr_get_table(struct net *net, u32 id) return net->ipv4.mrt; } -static int ipmr_fib_lookup(struct net *net, struct flowi *flp, +static int ipmr_fib_lookup(struct net *net, struct flowi4 *flp4, struct mr_table **mrt) { *mrt = net->ipv4.mrt; @@ -435,14 +436,14 @@ static netdev_tx_t reg_vif_xmit(struct sk_buff *skb, struct net_device *dev) { struct net *net = dev_net(dev); struct mr_table *mrt; - struct flowi fl = { - .oif = dev->ifindex, - .iif = skb->skb_iif, - .mark = skb->mark, + struct flowi4 fl4 = { + .flowi4_oif = dev->ifindex, + .flowi4_iif = skb->skb_iif, + .flowi4_mark = skb->mark, }; int err; - err = ipmr_fib_lookup(net, &fl, &mrt); + err = ipmr_fib_lookup(net, &fl4, &mrt); if (err < 0) { kfree_skb(skb); return err; @@ -1611,25 +1612,19 @@ static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt, #endif if (vif->flags & VIFF_TUNNEL) { - struct flowi fl = { - .oif = vif->link, - .fl4_dst = vif->remote, - .fl4_src = vif->local, - .fl4_tos = RT_TOS(iph->tos), - .proto = IPPROTO_IPIP - }; - rt = ip_route_output_key(net, &fl); + rt = ip_route_output_ports(net, NULL, + vif->remote, vif->local, + 0, 0, + IPPROTO_IPIP, + RT_TOS(iph->tos), vif->link); if (IS_ERR(rt)) goto out_free; encap = sizeof(struct iphdr); } else { - struct flowi fl = { - .oif = vif->link, - .fl4_dst = iph->daddr, - .fl4_tos = RT_TOS(iph->tos), - .proto = IPPROTO_IPIP - }; - rt = ip_route_output_key(net, &fl); + rt = ip_route_output_ports(net, NULL, iph->daddr, 0, + 0, 0, + IPPROTO_IPIP, + RT_TOS(iph->tos), vif->link); if (IS_ERR(rt)) goto out_free; } @@ -1793,6 +1788,24 @@ dont_forward: return 0; } +static struct mr_table *ipmr_rt_fib_lookup(struct net *net, struct rtable *rt) +{ + struct flowi4 fl4 = { + .daddr = rt->rt_key_dst, + .saddr = rt->rt_key_src, + .flowi4_tos = rt->rt_tos, + .flowi4_oif = rt->rt_oif, + .flowi4_iif = rt->rt_iif, + .flowi4_mark = rt->rt_mark, + }; + struct mr_table *mrt; + int err; + + err = ipmr_fib_lookup(net, &fl4, &mrt); + if (err) + return ERR_PTR(err); + return mrt; +} /* * Multicast packets for forwarding arrive here @@ -1805,7 +1818,6 @@ int ip_mr_input(struct sk_buff *skb) struct net *net = dev_net(skb->dev); int local = skb_rtable(skb)->rt_flags & RTCF_LOCAL; struct mr_table *mrt; - int err; /* Packet is looped back after forward, it should not be * forwarded second time, but still can be delivered locally. @@ -1813,21 +1825,10 @@ int ip_mr_input(struct sk_buff *skb) if (IPCB(skb)->flags & IPSKB_FORWARDED) goto dont_forward; - { - struct rtable *rt = skb_rtable(skb); - struct flowi fl = { - .fl4_dst = rt->rt_key_dst, - .fl4_src = rt->rt_key_src, - .fl4_tos = rt->rt_tos, - .oif = rt->rt_oif, - .iif = rt->rt_iif, - .mark = rt->rt_mark, - }; - err = ipmr_fib_lookup(net, &fl, &mrt); - if (err < 0) { - kfree_skb(skb); - return err; - } + mrt = ipmr_rt_fib_lookup(net, skb_rtable(skb)); + if (IS_ERR(mrt)) { + kfree_skb(skb); + return PTR_ERR(mrt); } if (!local) { if (IPCB(skb)->opt.router_alert) { @@ -1956,19 +1957,9 @@ int pim_rcv_v1(struct sk_buff *skb) pim = igmp_hdr(skb); - { - struct rtable *rt = skb_rtable(skb); - struct flowi fl = { - .fl4_dst = rt->rt_key_dst, - .fl4_src = rt->rt_key_src, - .fl4_tos = rt->rt_tos, - .oif = rt->rt_oif, - .iif = rt->rt_iif, - .mark = rt->rt_mark, - }; - if (ipmr_fib_lookup(net, &fl, &mrt) < 0) - goto drop; - } + mrt = ipmr_rt_fib_lookup(net, skb_rtable(skb)); + if (IS_ERR(mrt)) + goto drop; if (!mrt->mroute_do_pim || pim->group != PIM_V1_VERSION || pim->code != PIM_V1_REGISTER) goto drop; @@ -1998,19 +1989,9 @@ static int pim_rcv(struct sk_buff *skb) csum_fold(skb_checksum(skb, 0, skb->len, 0)))) goto drop; - { - struct rtable *rt = skb_rtable(skb); - struct flowi fl = { - .fl4_dst = rt->rt_key_dst, - .fl4_src = rt->rt_key_src, - .fl4_tos = rt->rt_tos, - .oif = rt->rt_oif, - .iif = rt->rt_iif, - .mark = rt->rt_mark, - }; - if (ipmr_fib_lookup(net, &fl, &mrt) < 0) - goto drop; - } + mrt = ipmr_rt_fib_lookup(net, skb_rtable(skb)); + if (IS_ERR(mrt)) + goto drop; if (__pim_rcv(mrt, skb, sizeof(*pim))) { drop: kfree_skb(skb); diff --git a/net/ipv4/netfilter.c b/net/ipv4/netfilter.c index 67bf709180de..f3c0b549b8e1 100644 --- a/net/ipv4/netfilter.c +++ b/net/ipv4/netfilter.c @@ -16,7 +16,7 @@ int ip_route_me_harder(struct sk_buff *skb, unsigned addr_type) struct net *net = dev_net(skb_dst(skb)->dev); const struct iphdr *iph = ip_hdr(skb); struct rtable *rt; - struct flowi fl = {}; + struct flowi4 fl4 = {}; unsigned long orefdst; unsigned int hh_len; unsigned int type; @@ -31,14 +31,14 @@ int ip_route_me_harder(struct sk_buff *skb, unsigned addr_type) * packets with foreign saddr to appear on the NF_INET_LOCAL_OUT hook. */ if (addr_type == RTN_LOCAL) { - fl.fl4_dst = iph->daddr; + fl4.daddr = iph->daddr; if (type == RTN_LOCAL) - fl.fl4_src = iph->saddr; - fl.fl4_tos = RT_TOS(iph->tos); - fl.oif = skb->sk ? skb->sk->sk_bound_dev_if : 0; - fl.mark = skb->mark; - fl.flags = skb->sk ? inet_sk_flowi_flags(skb->sk) : 0; - rt = ip_route_output_key(net, &fl); + fl4.saddr = iph->saddr; + fl4.flowi4_tos = RT_TOS(iph->tos); + fl4.flowi4_oif = skb->sk ? skb->sk->sk_bound_dev_if : 0; + fl4.flowi4_mark = skb->mark; + fl4.flowi4_flags = skb->sk ? inet_sk_flowi_flags(skb->sk) : 0; + rt = ip_route_output_key(net, &fl4); if (IS_ERR(rt)) return -1; @@ -48,8 +48,8 @@ int ip_route_me_harder(struct sk_buff *skb, unsigned addr_type) } else { /* non-local src, find valid iif to satisfy * rp-filter when calling ip_route_input. */ - fl.fl4_dst = iph->saddr; - rt = ip_route_output_key(net, &fl); + fl4.daddr = iph->saddr; + rt = ip_route_output_key(net, &fl4); if (IS_ERR(rt)) return -1; @@ -68,10 +68,10 @@ int ip_route_me_harder(struct sk_buff *skb, unsigned addr_type) #ifdef CONFIG_XFRM if (!(IPCB(skb)->flags & IPSKB_XFRM_TRANSFORMED) && - xfrm_decode_session(skb, &fl, AF_INET) == 0) { + xfrm_decode_session(skb, flowi4_to_flowi(&fl4), AF_INET) == 0) { struct dst_entry *dst = skb_dst(skb); skb_dst_set(skb, NULL); - dst = xfrm_lookup(net, dst, &fl, skb->sk, 0); + dst = xfrm_lookup(net, dst, flowi4_to_flowi(&fl4), skb->sk, 0); if (IS_ERR(dst)) return -1; skb_dst_set(skb, dst); @@ -223,7 +223,7 @@ static __sum16 nf_ip_checksum_partial(struct sk_buff *skb, unsigned int hook, static int nf_ip_route(struct dst_entry **dst, struct flowi *fl) { - struct rtable *rt = ip_route_output_key(&init_net, fl); + struct rtable *rt = ip_route_output_key(&init_net, &fl->u.ip4); if (IS_ERR(rt)) return PTR_ERR(rt); *dst = &rt->dst; diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig index f926a310075d..1dfc18a03fd4 100644 --- a/net/ipv4/netfilter/Kconfig +++ b/net/ipv4/netfilter/Kconfig @@ -64,16 +64,6 @@ config IP_NF_IPTABLES if IP_NF_IPTABLES # The matches. -config IP_NF_MATCH_ADDRTYPE - tristate '"addrtype" address type match support' - depends on NETFILTER_ADVANCED - help - This option allows you to match what routing thinks of an address, - eg. UNICAST, LOCAL, BROADCAST, ... - - If you want to compile it as a module, say M here and read - <file:Documentation/kbuild/modules.txt>. If unsure, say `N'. - config IP_NF_MATCH_AH tristate '"ah" match support' depends on NETFILTER_ADVANCED diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile index 19eb59d01037..dca2082ec683 100644 --- a/net/ipv4/netfilter/Makefile +++ b/net/ipv4/netfilter/Makefile @@ -48,7 +48,6 @@ obj-$(CONFIG_IP_NF_RAW) += iptable_raw.o obj-$(CONFIG_IP_NF_SECURITY) += iptable_security.o # matches -obj-$(CONFIG_IP_NF_MATCH_ADDRTYPE) += ipt_addrtype.o obj-$(CONFIG_IP_NF_MATCH_AH) += ipt_ah.o obj-$(CONFIG_IP_NF_MATCH_ECN) += ipt_ecn.o diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index e95054c690c6..4b5d457c2d76 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -1066,6 +1066,7 @@ static int do_replace(struct net *net, const void __user *user, /* overflow check */ if (tmp.num_counters >= INT_MAX / sizeof(struct xt_counters)) return -ENOMEM; + tmp.name[sizeof(tmp.name)-1] = 0; newinfo = xt_alloc_table_info(tmp.size); if (!newinfo) @@ -1488,6 +1489,7 @@ static int compat_do_replace(struct net *net, void __user *user, return -ENOMEM; if (tmp.num_counters >= INT_MAX / sizeof(struct xt_counters)) return -ENOMEM; + tmp.name[sizeof(tmp.name)-1] = 0; newinfo = xt_alloc_table_info(tmp.size); if (!newinfo) @@ -1740,6 +1742,7 @@ static int do_arpt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len ret = -EFAULT; break; } + rev.name[sizeof(rev.name)-1] = 0; try_then_request_module(xt_find_revision(NFPROTO_ARP, rev.name, rev.revision, 1, &ret), diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index ef7d7b9680ea..ffcea0d1678e 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -387,7 +387,7 @@ ipt_do_table(struct sk_buff *skb, verdict = (unsigned)(-v) - 1; break; } - if (*stackptr == 0) { + if (*stackptr <= origptr) { e = get_entry(table_base, private->underflow[hook]); pr_debug("Underflow (this is normal) " @@ -427,10 +427,10 @@ ipt_do_table(struct sk_buff *skb, /* Verdict */ break; } while (!acpar.hotdrop); - xt_info_rdunlock_bh(); pr_debug("Exiting %s; resetting sp from %u to %u\n", __func__, *stackptr, origptr); *stackptr = origptr; + xt_info_rdunlock_bh(); #ifdef DEBUG_ALLOW_ALL return NF_ACCEPT; #else @@ -1262,6 +1262,7 @@ do_replace(struct net *net, const void __user *user, unsigned int len) /* overflow check */ if (tmp.num_counters >= INT_MAX / sizeof(struct xt_counters)) return -ENOMEM; + tmp.name[sizeof(tmp.name)-1] = 0; newinfo = xt_alloc_table_info(tmp.size); if (!newinfo) @@ -1807,6 +1808,7 @@ compat_do_replace(struct net *net, void __user *user, unsigned int len) return -ENOMEM; if (tmp.num_counters >= INT_MAX / sizeof(struct xt_counters)) return -ENOMEM; + tmp.name[sizeof(tmp.name)-1] = 0; newinfo = xt_alloc_table_info(tmp.size); if (!newinfo) @@ -2036,6 +2038,7 @@ do_ipt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len) ret = -EFAULT; break; } + rev.name[sizeof(rev.name)-1] = 0; if (cmd == IPT_SO_GET_REVISION_TARGET) target = 1; diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c index 403ca57f6011..d609ac3cb9a4 100644 --- a/net/ipv4/netfilter/ipt_CLUSTERIP.c +++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c @@ -664,8 +664,11 @@ static ssize_t clusterip_proc_write(struct file *file, const char __user *input, char buffer[PROC_WRITELEN+1]; unsigned long nodenum; - if (copy_from_user(buffer, input, PROC_WRITELEN)) + if (size > PROC_WRITELEN) + return -EIO; + if (copy_from_user(buffer, input, size)) return -EFAULT; + buffer[size] = 0; if (*buffer == '+') { nodenum = simple_strtoul(buffer+1, NULL, 10); diff --git a/net/ipv4/netfilter/ipt_addrtype.c b/net/ipv4/netfilter/ipt_addrtype.c deleted file mode 100644 index db8bff0fb86d..000000000000 --- a/net/ipv4/netfilter/ipt_addrtype.c +++ /dev/null @@ -1,134 +0,0 @@ -/* - * iptables module to match inet_addr_type() of an ip. - * - * Copyright (c) 2004 Patrick McHardy <[email protected]> - * (C) 2007 Laszlo Attila Toth <[email protected]> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ -#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt -#include <linux/kernel.h> -#include <linux/module.h> -#include <linux/skbuff.h> -#include <linux/netdevice.h> -#include <linux/ip.h> -#include <net/route.h> - -#include <linux/netfilter_ipv4/ipt_addrtype.h> -#include <linux/netfilter/x_tables.h> - -MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Patrick McHardy <[email protected]>"); -MODULE_DESCRIPTION("Xtables: address type match for IPv4"); - -static inline bool match_type(struct net *net, const struct net_device *dev, - __be32 addr, u_int16_t mask) -{ - return !!(mask & (1 << inet_dev_addr_type(net, dev, addr))); -} - -static bool -addrtype_mt_v0(const struct sk_buff *skb, struct xt_action_param *par) -{ - struct net *net = dev_net(par->in ? par->in : par->out); - const struct ipt_addrtype_info *info = par->matchinfo; - const struct iphdr *iph = ip_hdr(skb); - bool ret = true; - - if (info->source) - ret &= match_type(net, NULL, iph->saddr, info->source) ^ - info->invert_source; - if (info->dest) - ret &= match_type(net, NULL, iph->daddr, info->dest) ^ - info->invert_dest; - - return ret; -} - -static bool -addrtype_mt_v1(const struct sk_buff *skb, struct xt_action_param *par) -{ - struct net *net = dev_net(par->in ? par->in : par->out); - const struct ipt_addrtype_info_v1 *info = par->matchinfo; - const struct iphdr *iph = ip_hdr(skb); - const struct net_device *dev = NULL; - bool ret = true; - - if (info->flags & IPT_ADDRTYPE_LIMIT_IFACE_IN) - dev = par->in; - else if (info->flags & IPT_ADDRTYPE_LIMIT_IFACE_OUT) - dev = par->out; - - if (info->source) - ret &= match_type(net, dev, iph->saddr, info->source) ^ - (info->flags & IPT_ADDRTYPE_INVERT_SOURCE); - if (ret && info->dest) - ret &= match_type(net, dev, iph->daddr, info->dest) ^ - !!(info->flags & IPT_ADDRTYPE_INVERT_DEST); - return ret; -} - -static int addrtype_mt_checkentry_v1(const struct xt_mtchk_param *par) -{ - struct ipt_addrtype_info_v1 *info = par->matchinfo; - - if (info->flags & IPT_ADDRTYPE_LIMIT_IFACE_IN && - info->flags & IPT_ADDRTYPE_LIMIT_IFACE_OUT) { - pr_info("both incoming and outgoing " - "interface limitation cannot be selected\n"); - return -EINVAL; - } - - if (par->hook_mask & ((1 << NF_INET_PRE_ROUTING) | - (1 << NF_INET_LOCAL_IN)) && - info->flags & IPT_ADDRTYPE_LIMIT_IFACE_OUT) { - pr_info("output interface limitation " - "not valid in PREROUTING and INPUT\n"); - return -EINVAL; - } - - if (par->hook_mask & ((1 << NF_INET_POST_ROUTING) | - (1 << NF_INET_LOCAL_OUT)) && - info->flags & IPT_ADDRTYPE_LIMIT_IFACE_IN) { - pr_info("input interface limitation " - "not valid in POSTROUTING and OUTPUT\n"); - return -EINVAL; - } - - return 0; -} - -static struct xt_match addrtype_mt_reg[] __read_mostly = { - { - .name = "addrtype", - .family = NFPROTO_IPV4, - .match = addrtype_mt_v0, - .matchsize = sizeof(struct ipt_addrtype_info), - .me = THIS_MODULE - }, - { - .name = "addrtype", - .family = NFPROTO_IPV4, - .revision = 1, - .match = addrtype_mt_v1, - .checkentry = addrtype_mt_checkentry_v1, - .matchsize = sizeof(struct ipt_addrtype_info_v1), - .me = THIS_MODULE - } -}; - -static int __init addrtype_mt_init(void) -{ - return xt_register_matches(addrtype_mt_reg, - ARRAY_SIZE(addrtype_mt_reg)); -} - -static void __exit addrtype_mt_exit(void) -{ - xt_unregister_matches(addrtype_mt_reg, ARRAY_SIZE(addrtype_mt_reg)); -} - -module_init(addrtype_mt_init); -module_exit(addrtype_mt_exit); diff --git a/net/ipv4/netfilter/nf_nat_standalone.c b/net/ipv4/netfilter/nf_nat_standalone.c index 95481fee8bdb..7317bdf1d457 100644 --- a/net/ipv4/netfilter/nf_nat_standalone.c +++ b/net/ipv4/netfilter/nf_nat_standalone.c @@ -31,6 +31,7 @@ #ifdef CONFIG_XFRM static void nat_decode_session(struct sk_buff *skb, struct flowi *fl) { + struct flowi4 *fl4 = &fl->u.ip4; const struct nf_conn *ct; const struct nf_conntrack_tuple *t; enum ip_conntrack_info ctinfo; @@ -49,25 +50,25 @@ static void nat_decode_session(struct sk_buff *skb, struct flowi *fl) statusbit = IPS_SRC_NAT; if (ct->status & statusbit) { - fl->fl4_dst = t->dst.u3.ip; + fl4->daddr = t->dst.u3.ip; if (t->dst.protonum == IPPROTO_TCP || t->dst.protonum == IPPROTO_UDP || t->dst.protonum == IPPROTO_UDPLITE || t->dst.protonum == IPPROTO_DCCP || t->dst.protonum == IPPROTO_SCTP) - fl->fl_ip_dport = t->dst.u.tcp.port; + fl4->fl4_dport = t->dst.u.tcp.port; } statusbit ^= IPS_NAT_MASK; if (ct->status & statusbit) { - fl->fl4_src = t->src.u3.ip; + fl4->saddr = t->src.u3.ip; if (t->dst.protonum == IPPROTO_TCP || t->dst.protonum == IPPROTO_UDP || t->dst.protonum == IPPROTO_UDPLITE || t->dst.protonum == IPPROTO_DCCP || t->dst.protonum == IPPROTO_SCTP) - fl->fl_ip_sport = t->src.u.tcp.port; + fl4->fl4_sport = t->src.u.tcp.port; } } #endif diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index 467d570d087a..e837ffd3edc3 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -402,7 +402,7 @@ error: return err; } -static int raw_probe_proto_opt(struct flowi *fl, struct msghdr *msg) +static int raw_probe_proto_opt(struct flowi4 *fl4, struct msghdr *msg) { struct iovec *iov; u8 __user *type = NULL; @@ -418,7 +418,7 @@ static int raw_probe_proto_opt(struct flowi *fl, struct msghdr *msg) if (!iov) continue; - switch (fl->proto) { + switch (fl4->flowi4_proto) { case IPPROTO_ICMP: /* check if one-byte field is readable or not. */ if (iov->iov_base && iov->iov_len < 1) @@ -433,8 +433,8 @@ static int raw_probe_proto_opt(struct flowi *fl, struct msghdr *msg) code = iov->iov_base; if (type && code) { - if (get_user(fl->fl_icmp_type, type) || - get_user(fl->fl_icmp_code, code)) + if (get_user(fl4->fl4_icmp_type, type) || + get_user(fl4->fl4_icmp_code, code)) return -EFAULT; probed = 1; } @@ -548,23 +548,25 @@ static int raw_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, } { - struct flowi fl = { .oif = ipc.oif, - .mark = sk->sk_mark, - .fl4_dst = daddr, - .fl4_src = saddr, - .fl4_tos = tos, - .proto = inet->hdrincl ? IPPROTO_RAW : - sk->sk_protocol, - .flags = FLOWI_FLAG_CAN_SLEEP, + struct flowi4 fl4 = { + .flowi4_oif = ipc.oif, + .flowi4_mark = sk->sk_mark, + .daddr = daddr, + .saddr = saddr, + .flowi4_tos = tos, + .flowi4_proto = (inet->hdrincl ? + IPPROTO_RAW : + sk->sk_protocol), + .flowi4_flags = FLOWI_FLAG_CAN_SLEEP, }; if (!inet->hdrincl) { - err = raw_probe_proto_opt(&fl, msg); + err = raw_probe_proto_opt(&fl4, msg); if (err) goto done; } - security_sk_classify_flow(sk, &fl); - rt = ip_route_output_flow(sock_net(sk), &fl, sk); + security_sk_classify_flow(sk, flowi4_to_flowi(&fl4)); + rt = ip_route_output_flow(sock_net(sk), &fl4, sk); if (IS_ERR(rt)) { err = PTR_ERR(rt); goto done; diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 92a24ea34c1b..870b5182ddd8 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -109,8 +109,8 @@ #include <linux/sysctl.h> #endif -#define RT_FL_TOS(oldflp) \ - ((u32)(oldflp->fl4_tos & (IPTOS_RT_MASK | RTO_ONLINK))) +#define RT_FL_TOS(oldflp4) \ + ((u32)(oldflp4->flowi4_tos & (IPTOS_RT_MASK | RTO_ONLINK))) #define IP_MAX_MTU 0xFFF0 @@ -204,7 +204,7 @@ static struct dst_ops ipv4_dst_ops = { const __u8 ip_tos2prio[16] = { TC_PRIO_BESTEFFORT, - ECN_OR_COST(FILLER), + ECN_OR_COST(BESTEFFORT), TC_PRIO_BESTEFFORT, ECN_OR_COST(BESTEFFORT), TC_PRIO_BULK, @@ -1533,9 +1533,15 @@ unsigned short ip_rt_frag_needed(struct net *net, struct iphdr *iph, if (mtu < ip_rt_min_pmtu) mtu = ip_rt_min_pmtu; if (!peer->pmtu_expires || mtu < peer->pmtu_learned) { + unsigned long pmtu_expires; + + pmtu_expires = jiffies + ip_rt_mtu_expires; + if (!pmtu_expires) + pmtu_expires = 1UL; + est_mtu = mtu; peer->pmtu_learned = mtu; - peer->pmtu_expires = jiffies + ip_rt_mtu_expires; + peer->pmtu_expires = pmtu_expires; } inet_putpeer(peer); @@ -1549,7 +1555,7 @@ static void check_peer_pmtu(struct dst_entry *dst, struct inet_peer *peer) { unsigned long expires = peer->pmtu_expires; - if (time_before(expires, jiffies)) { + if (time_before(jiffies, expires)) { u32 orig_dst_mtu = dst_mtu(dst); if (peer->pmtu_learned < orig_dst_mtu) { if (!peer->pmtu_orig) @@ -1574,14 +1580,20 @@ static void ip_rt_update_pmtu(struct dst_entry *dst, u32 mtu) if (mtu < ip_rt_min_pmtu) mtu = ip_rt_min_pmtu; if (!peer->pmtu_expires || mtu < peer->pmtu_learned) { + unsigned long pmtu_expires; + + pmtu_expires = jiffies + ip_rt_mtu_expires; + if (!pmtu_expires) + pmtu_expires = 1UL; + peer->pmtu_learned = mtu; - peer->pmtu_expires = jiffies + ip_rt_mtu_expires; + peer->pmtu_expires = pmtu_expires; atomic_inc(&__rt_peer_genid); rt->rt_peer_genid = rt_peer_genid(); - - check_peer_pmtu(dst, peer); } + check_peer_pmtu(dst, peer); + inet_putpeer(peer); } } @@ -1697,17 +1709,17 @@ void ip_rt_get_source(u8 *addr, struct rtable *rt) if (rt_is_output_route(rt)) src = rt->rt_src; else { - struct flowi fl = { - .fl4_dst = rt->rt_key_dst, - .fl4_src = rt->rt_key_src, - .fl4_tos = rt->rt_tos, - .oif = rt->rt_oif, - .iif = rt->rt_iif, - .mark = rt->rt_mark, + struct flowi4 fl4 = { + .daddr = rt->rt_key_dst, + .saddr = rt->rt_key_src, + .flowi4_tos = rt->rt_tos, + .flowi4_oif = rt->rt_oif, + .flowi4_iif = rt->rt_iif, + .flowi4_mark = rt->rt_mark, }; rcu_read_lock(); - if (fib_lookup(dev_net(rt->dst.dev), &fl, &res) == 0) + if (fib_lookup(dev_net(rt->dst.dev), &fl4, &res) == 0) src = FIB_RES_PREFSRC(res); else src = inet_select_addr(rt->dst.dev, rt->rt_gateway, @@ -1757,7 +1769,7 @@ static unsigned int ipv4_default_mtu(const struct dst_entry *dst) return mtu; } -static void rt_init_metrics(struct rtable *rt, const struct flowi *oldflp, +static void rt_init_metrics(struct rtable *rt, const struct flowi4 *oldflp4, struct fib_info *fi) { struct inet_peer *peer; @@ -1766,7 +1778,7 @@ static void rt_init_metrics(struct rtable *rt, const struct flowi *oldflp, /* If a peer entry exists for this destination, we must hook * it up in order to get at cached metrics. */ - if (oldflp && (oldflp->flags & FLOWI_FLAG_PRECOW_METRICS)) + if (oldflp4 && (oldflp4->flowi4_flags & FLOWI_FLAG_PRECOW_METRICS)) create = 1; rt->peer = peer = inet_getpeer_v4(rt->rt_dst, create); @@ -1793,7 +1805,7 @@ static void rt_init_metrics(struct rtable *rt, const struct flowi *oldflp, } } -static void rt_set_nexthop(struct rtable *rt, const struct flowi *oldflp, +static void rt_set_nexthop(struct rtable *rt, const struct flowi4 *oldflp4, const struct fib_result *res, struct fib_info *fi, u16 type, u32 itag) { @@ -1803,7 +1815,7 @@ static void rt_set_nexthop(struct rtable *rt, const struct flowi *oldflp, if (FIB_RES_GW(*res) && FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK) rt->rt_gateway = FIB_RES_GW(*res); - rt_init_metrics(rt, oldflp, fi); + rt_init_metrics(rt, oldflp4, fi); #ifdef CONFIG_IP_ROUTE_CLASSID dst->tclassid = FIB_RES_NH(*res).nh_tclassid; #endif @@ -2038,7 +2050,7 @@ static int __mkroute_input(struct sk_buff *skb, static int ip_mkroute_input(struct sk_buff *skb, struct fib_result *res, - const struct flowi *fl, + const struct flowi4 *fl4, struct in_device *in_dev, __be32 daddr, __be32 saddr, u32 tos) { @@ -2047,8 +2059,8 @@ static int ip_mkroute_input(struct sk_buff *skb, unsigned hash; #ifdef CONFIG_IP_ROUTE_MULTIPATH - if (res->fi && res->fi->fib_nhs > 1 && fl->oif == 0) - fib_select_multipath(fl, res); + if (res->fi && res->fi->fib_nhs > 1) + fib_select_multipath(res); #endif /* create a routing cache entry */ @@ -2057,9 +2069,9 @@ static int ip_mkroute_input(struct sk_buff *skb, return err; /* put it into the cache */ - hash = rt_hash(daddr, saddr, fl->iif, + hash = rt_hash(daddr, saddr, fl4->flowi4_iif, rt_genid(dev_net(rth->dst.dev))); - rth = rt_intern_hash(hash, rth, skb, fl->iif); + rth = rt_intern_hash(hash, rth, skb, fl4->flowi4_iif); if (IS_ERR(rth)) return PTR_ERR(rth); return 0; @@ -2081,12 +2093,7 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr, { struct fib_result res; struct in_device *in_dev = __in_dev_get_rcu(dev); - struct flowi fl = { .fl4_dst = daddr, - .fl4_src = saddr, - .fl4_tos = tos, - .fl4_scope = RT_SCOPE_UNIVERSE, - .mark = skb->mark, - .iif = dev->ifindex }; + struct flowi4 fl4; unsigned flags = 0; u32 itag = 0; struct rtable * rth; @@ -2123,7 +2130,14 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr, /* * Now we are ready to route packet. */ - err = fib_lookup(net, &fl, &res); + fl4.flowi4_oif = 0; + fl4.flowi4_iif = dev->ifindex; + fl4.flowi4_mark = skb->mark; + fl4.flowi4_tos = tos; + fl4.flowi4_scope = RT_SCOPE_UNIVERSE; + fl4.daddr = daddr; + fl4.saddr = saddr; + err = fib_lookup(net, &fl4, &res); if (err != 0) { if (!IN_DEV_FORWARD(in_dev)) goto e_hostunreach; @@ -2152,7 +2166,7 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr, if (res.type != RTN_UNICAST) goto martian_destination; - err = ip_mkroute_input(skb, &res, &fl, in_dev, daddr, saddr, tos); + err = ip_mkroute_input(skb, &res, &fl4, in_dev, daddr, saddr, tos); out: return err; brd_input: @@ -2203,8 +2217,8 @@ local_input: rth->rt_flags &= ~RTCF_LOCAL; } rth->rt_type = res.type; - hash = rt_hash(daddr, saddr, fl.iif, rt_genid(net)); - rth = rt_intern_hash(hash, rth, skb, fl.iif); + hash = rt_hash(daddr, saddr, fl4.flowi4_iif, rt_genid(net)); + rth = rt_intern_hash(hash, rth, skb, fl4.flowi4_iif); err = 0; if (IS_ERR(rth)) err = PTR_ERR(rth); @@ -2307,8 +2321,8 @@ skip_cache: struct in_device *in_dev = __in_dev_get_rcu(dev); if (in_dev) { - int our = ip_check_mc(in_dev, daddr, saddr, - ip_hdr(skb)->protocol); + int our = ip_check_mc_rcu(in_dev, daddr, saddr, + ip_hdr(skb)->protocol); if (our #ifdef CONFIG_IP_MROUTE || @@ -2333,25 +2347,25 @@ EXPORT_SYMBOL(ip_route_input_common); /* called with rcu_read_lock() */ static struct rtable *__mkroute_output(const struct fib_result *res, - const struct flowi *fl, - const struct flowi *oldflp, + const struct flowi4 *fl4, + const struct flowi4 *oldflp4, struct net_device *dev_out, unsigned int flags) { struct fib_info *fi = res->fi; - u32 tos = RT_FL_TOS(oldflp); + u32 tos = RT_FL_TOS(oldflp4); struct in_device *in_dev; u16 type = res->type; struct rtable *rth; - if (ipv4_is_loopback(fl->fl4_src) && !(dev_out->flags & IFF_LOOPBACK)) + if (ipv4_is_loopback(fl4->saddr) && !(dev_out->flags & IFF_LOOPBACK)) return ERR_PTR(-EINVAL); - if (ipv4_is_lbcast(fl->fl4_dst)) + if (ipv4_is_lbcast(fl4->daddr)) type = RTN_BROADCAST; - else if (ipv4_is_multicast(fl->fl4_dst)) + else if (ipv4_is_multicast(fl4->daddr)) type = RTN_MULTICAST; - else if (ipv4_is_zeronet(fl->fl4_dst)) + else if (ipv4_is_zeronet(fl4->daddr)) return ERR_PTR(-EINVAL); if (dev_out->flags & IFF_LOOPBACK) @@ -2366,8 +2380,8 @@ static struct rtable *__mkroute_output(const struct fib_result *res, fi = NULL; } else if (type == RTN_MULTICAST) { flags |= RTCF_MULTICAST | RTCF_LOCAL; - if (!ip_check_mc(in_dev, oldflp->fl4_dst, oldflp->fl4_src, - oldflp->proto)) + if (!ip_check_mc_rcu(in_dev, oldflp4->daddr, oldflp4->saddr, + oldflp4->flowi4_proto)) flags &= ~RTCF_LOCAL; /* If multicast route do not exist use * default one, but do not gateway in this case. @@ -2382,20 +2396,20 @@ static struct rtable *__mkroute_output(const struct fib_result *res, if (!rth) return ERR_PTR(-ENOBUFS); - rth->rt_key_dst = oldflp->fl4_dst; + rth->rt_key_dst = oldflp4->daddr; rth->rt_tos = tos; - rth->rt_key_src = oldflp->fl4_src; - rth->rt_oif = oldflp->oif; - rth->rt_mark = oldflp->mark; - rth->rt_dst = fl->fl4_dst; - rth->rt_src = fl->fl4_src; + rth->rt_key_src = oldflp4->saddr; + rth->rt_oif = oldflp4->flowi4_oif; + rth->rt_mark = oldflp4->flowi4_mark; + rth->rt_dst = fl4->daddr; + rth->rt_src = fl4->saddr; rth->rt_iif = 0; /* get references to the devices that are to be hold by the routing cache entry */ rth->dst.dev = dev_out; dev_hold(dev_out); - rth->rt_gateway = fl->fl4_dst; - rth->rt_spec_dst= fl->fl4_src; + rth->rt_gateway = fl4->daddr; + rth->rt_spec_dst= fl4->saddr; rth->dst.output=ip_output; rth->rt_genid = rt_genid(dev_net(dev_out)); @@ -2404,10 +2418,10 @@ static struct rtable *__mkroute_output(const struct fib_result *res, if (flags & RTCF_LOCAL) { rth->dst.input = ip_local_deliver; - rth->rt_spec_dst = fl->fl4_dst; + rth->rt_spec_dst = fl4->daddr; } if (flags & (RTCF_BROADCAST | RTCF_MULTICAST)) { - rth->rt_spec_dst = fl->fl4_src; + rth->rt_spec_dst = fl4->saddr; if (flags & RTCF_LOCAL && !(dev_out->flags & IFF_LOOPBACK)) { rth->dst.output = ip_mc_output; @@ -2416,7 +2430,7 @@ static struct rtable *__mkroute_output(const struct fib_result *res, #ifdef CONFIG_IP_MROUTE if (type == RTN_MULTICAST) { if (IN_DEV_MFORWARD(in_dev) && - !ipv4_is_local_multicast(oldflp->fl4_dst)) { + !ipv4_is_local_multicast(oldflp4->daddr)) { rth->dst.input = ip_mr_input; rth->dst.output = ip_mc_output; } @@ -2424,7 +2438,7 @@ static struct rtable *__mkroute_output(const struct fib_result *res, #endif } - rt_set_nexthop(rth, oldflp, res, fi, type, 0); + rt_set_nexthop(rth, oldflp4, res, fi, type, 0); rth->rt_flags = flags; return rth; @@ -2436,10 +2450,10 @@ static struct rtable *__mkroute_output(const struct fib_result *res, */ static struct rtable *ip_route_output_slow(struct net *net, - const struct flowi *oldflp) + const struct flowi4 *oldflp4) { - u32 tos = RT_FL_TOS(oldflp); - struct flowi fl; + u32 tos = RT_FL_TOS(oldflp4); + struct flowi4 fl4; struct fib_result res; unsigned int flags = 0; struct net_device *dev_out = NULL; @@ -2450,21 +2464,21 @@ static struct rtable *ip_route_output_slow(struct net *net, res.r = NULL; #endif - fl.oif = oldflp->oif; - fl.iif = net->loopback_dev->ifindex; - fl.mark = oldflp->mark; - fl.fl4_dst = oldflp->fl4_dst; - fl.fl4_src = oldflp->fl4_src; - fl.fl4_tos = tos & IPTOS_RT_MASK; - fl.fl4_scope = ((tos & RTO_ONLINK) ? + fl4.flowi4_oif = oldflp4->flowi4_oif; + fl4.flowi4_iif = net->loopback_dev->ifindex; + fl4.flowi4_mark = oldflp4->flowi4_mark; + fl4.daddr = oldflp4->daddr; + fl4.saddr = oldflp4->saddr; + fl4.flowi4_tos = tos & IPTOS_RT_MASK; + fl4.flowi4_scope = ((tos & RTO_ONLINK) ? RT_SCOPE_LINK : RT_SCOPE_UNIVERSE); rcu_read_lock(); - if (oldflp->fl4_src) { + if (oldflp4->saddr) { rth = ERR_PTR(-EINVAL); - if (ipv4_is_multicast(oldflp->fl4_src) || - ipv4_is_lbcast(oldflp->fl4_src) || - ipv4_is_zeronet(oldflp->fl4_src)) + if (ipv4_is_multicast(oldflp4->saddr) || + ipv4_is_lbcast(oldflp4->saddr) || + ipv4_is_zeronet(oldflp4->saddr)) goto out; /* I removed check for oif == dev_out->oif here. @@ -2475,11 +2489,11 @@ static struct rtable *ip_route_output_slow(struct net *net, of another iface. --ANK */ - if (oldflp->oif == 0 && - (ipv4_is_multicast(oldflp->fl4_dst) || - ipv4_is_lbcast(oldflp->fl4_dst))) { + if (oldflp4->flowi4_oif == 0 && + (ipv4_is_multicast(oldflp4->daddr) || + ipv4_is_lbcast(oldflp4->daddr))) { /* It is equivalent to inet_addr_type(saddr) == RTN_LOCAL */ - dev_out = __ip_dev_find(net, oldflp->fl4_src, false); + dev_out = __ip_dev_find(net, oldflp4->saddr, false); if (dev_out == NULL) goto out; @@ -2498,20 +2512,20 @@ static struct rtable *ip_route_output_slow(struct net *net, Luckily, this hack is good workaround. */ - fl.oif = dev_out->ifindex; + fl4.flowi4_oif = dev_out->ifindex; goto make_route; } - if (!(oldflp->flags & FLOWI_FLAG_ANYSRC)) { + if (!(oldflp4->flowi4_flags & FLOWI_FLAG_ANYSRC)) { /* It is equivalent to inet_addr_type(saddr) == RTN_LOCAL */ - if (!__ip_dev_find(net, oldflp->fl4_src, false)) + if (!__ip_dev_find(net, oldflp4->saddr, false)) goto out; } } - if (oldflp->oif) { - dev_out = dev_get_by_index_rcu(net, oldflp->oif); + if (oldflp4->flowi4_oif) { + dev_out = dev_get_by_index_rcu(net, oldflp4->flowi4_oif); rth = ERR_PTR(-ENODEV); if (dev_out == NULL) goto out; @@ -2521,37 +2535,37 @@ static struct rtable *ip_route_output_slow(struct net *net, rth = ERR_PTR(-ENETUNREACH); goto out; } - if (ipv4_is_local_multicast(oldflp->fl4_dst) || - ipv4_is_lbcast(oldflp->fl4_dst)) { - if (!fl.fl4_src) - fl.fl4_src = inet_select_addr(dev_out, 0, - RT_SCOPE_LINK); + if (ipv4_is_local_multicast(oldflp4->daddr) || + ipv4_is_lbcast(oldflp4->daddr)) { + if (!fl4.saddr) + fl4.saddr = inet_select_addr(dev_out, 0, + RT_SCOPE_LINK); goto make_route; } - if (!fl.fl4_src) { - if (ipv4_is_multicast(oldflp->fl4_dst)) - fl.fl4_src = inet_select_addr(dev_out, 0, - fl.fl4_scope); - else if (!oldflp->fl4_dst) - fl.fl4_src = inet_select_addr(dev_out, 0, - RT_SCOPE_HOST); + if (!fl4.saddr) { + if (ipv4_is_multicast(oldflp4->daddr)) + fl4.saddr = inet_select_addr(dev_out, 0, + fl4.flowi4_scope); + else if (!oldflp4->daddr) + fl4.saddr = inet_select_addr(dev_out, 0, + RT_SCOPE_HOST); } } - if (!fl.fl4_dst) { - fl.fl4_dst = fl.fl4_src; - if (!fl.fl4_dst) - fl.fl4_dst = fl.fl4_src = htonl(INADDR_LOOPBACK); + if (!fl4.daddr) { + fl4.daddr = fl4.saddr; + if (!fl4.daddr) + fl4.daddr = fl4.saddr = htonl(INADDR_LOOPBACK); dev_out = net->loopback_dev; - fl.oif = net->loopback_dev->ifindex; + fl4.flowi4_oif = net->loopback_dev->ifindex; res.type = RTN_LOCAL; flags |= RTCF_LOCAL; goto make_route; } - if (fib_lookup(net, &fl, &res)) { + if (fib_lookup(net, &fl4, &res)) { res.fi = NULL; - if (oldflp->oif) { + if (oldflp4->flowi4_oif) { /* Apparently, routing tables are wrong. Assume, that the destination is on link. @@ -2570,9 +2584,9 @@ static struct rtable *ip_route_output_slow(struct net *net, likely IPv6, but we do not. */ - if (fl.fl4_src == 0) - fl.fl4_src = inet_select_addr(dev_out, 0, - RT_SCOPE_LINK); + if (fl4.saddr == 0) + fl4.saddr = inet_select_addr(dev_out, 0, + RT_SCOPE_LINK); res.type = RTN_UNICAST; goto make_route; } @@ -2581,42 +2595,42 @@ static struct rtable *ip_route_output_slow(struct net *net, } if (res.type == RTN_LOCAL) { - if (!fl.fl4_src) { + if (!fl4.saddr) { if (res.fi->fib_prefsrc) - fl.fl4_src = res.fi->fib_prefsrc; + fl4.saddr = res.fi->fib_prefsrc; else - fl.fl4_src = fl.fl4_dst; + fl4.saddr = fl4.daddr; } dev_out = net->loopback_dev; - fl.oif = dev_out->ifindex; + fl4.flowi4_oif = dev_out->ifindex; res.fi = NULL; flags |= RTCF_LOCAL; goto make_route; } #ifdef CONFIG_IP_ROUTE_MULTIPATH - if (res.fi->fib_nhs > 1 && fl.oif == 0) - fib_select_multipath(&fl, &res); + if (res.fi->fib_nhs > 1 && fl4.flowi4_oif == 0) + fib_select_multipath(&res); else #endif - if (!res.prefixlen && res.type == RTN_UNICAST && !fl.oif) + if (!res.prefixlen && res.type == RTN_UNICAST && !fl4.flowi4_oif) fib_select_default(&res); - if (!fl.fl4_src) - fl.fl4_src = FIB_RES_PREFSRC(res); + if (!fl4.saddr) + fl4.saddr = FIB_RES_PREFSRC(res); dev_out = FIB_RES_DEV(res); - fl.oif = dev_out->ifindex; + fl4.flowi4_oif = dev_out->ifindex; make_route: - rth = __mkroute_output(&res, &fl, oldflp, dev_out, flags); + rth = __mkroute_output(&res, &fl4, oldflp4, dev_out, flags); if (!IS_ERR(rth)) { unsigned int hash; - hash = rt_hash(oldflp->fl4_dst, oldflp->fl4_src, oldflp->oif, + hash = rt_hash(oldflp4->daddr, oldflp4->saddr, oldflp4->flowi4_oif, rt_genid(dev_net(dev_out))); - rth = rt_intern_hash(hash, rth, NULL, oldflp->oif); + rth = rt_intern_hash(hash, rth, NULL, oldflp4->flowi4_oif); } out: @@ -2624,7 +2638,7 @@ out: return rth; } -struct rtable *__ip_route_output_key(struct net *net, const struct flowi *flp) +struct rtable *__ip_route_output_key(struct net *net, const struct flowi4 *flp4) { struct rtable *rth; unsigned int hash; @@ -2632,17 +2646,17 @@ struct rtable *__ip_route_output_key(struct net *net, const struct flowi *flp) if (!rt_caching(net)) goto slow_output; - hash = rt_hash(flp->fl4_dst, flp->fl4_src, flp->oif, rt_genid(net)); + hash = rt_hash(flp4->daddr, flp4->saddr, flp4->flowi4_oif, rt_genid(net)); rcu_read_lock_bh(); for (rth = rcu_dereference_bh(rt_hash_table[hash].chain); rth; rth = rcu_dereference_bh(rth->dst.rt_next)) { - if (rth->rt_key_dst == flp->fl4_dst && - rth->rt_key_src == flp->fl4_src && + if (rth->rt_key_dst == flp4->daddr && + rth->rt_key_src == flp4->saddr && rt_is_output_route(rth) && - rth->rt_oif == flp->oif && - rth->rt_mark == flp->mark && - !((rth->rt_tos ^ flp->fl4_tos) & + rth->rt_oif == flp4->flowi4_oif && + rth->rt_mark == flp4->flowi4_mark && + !((rth->rt_tos ^ flp4->flowi4_tos) & (IPTOS_RT_MASK | RTO_ONLINK)) && net_eq(dev_net(rth->dst.dev), net) && !rt_is_expired(rth)) { @@ -2656,7 +2670,7 @@ struct rtable *__ip_route_output_key(struct net *net, const struct flowi *flp) rcu_read_unlock_bh(); slow_output: - return ip_route_output_slow(net, flp); + return ip_route_output_slow(net, flp4); } EXPORT_SYMBOL_GPL(__ip_route_output_key); @@ -2731,20 +2745,22 @@ struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_or return rt ? &rt->dst : ERR_PTR(-ENOMEM); } -struct rtable *ip_route_output_flow(struct net *net, struct flowi *flp, +struct rtable *ip_route_output_flow(struct net *net, struct flowi4 *flp4, struct sock *sk) { - struct rtable *rt = __ip_route_output_key(net, flp); + struct rtable *rt = __ip_route_output_key(net, flp4); if (IS_ERR(rt)) return rt; - if (flp->proto) { - if (!flp->fl4_src) - flp->fl4_src = rt->rt_src; - if (!flp->fl4_dst) - flp->fl4_dst = rt->rt_dst; - rt = (struct rtable *) xfrm_lookup(net, &rt->dst, flp, sk, 0); + if (flp4->flowi4_proto) { + if (!flp4->saddr) + flp4->saddr = rt->rt_src; + if (!flp4->daddr) + flp4->daddr = rt->rt_dst; + rt = (struct rtable *) xfrm_lookup(net, &rt->dst, + flowi4_to_flowi(flp4), + sk, 0); } return rt; @@ -2911,14 +2927,14 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void if (err == 0 && rt->dst.error) err = -rt->dst.error; } else { - struct flowi fl = { - .fl4_dst = dst, - .fl4_src = src, - .fl4_tos = rtm->rtm_tos, - .oif = tb[RTA_OIF] ? nla_get_u32(tb[RTA_OIF]) : 0, - .mark = mark, + struct flowi4 fl4 = { + .daddr = dst, + .saddr = src, + .flowi4_tos = rtm->rtm_tos, + .flowi4_oif = tb[RTA_OIF] ? nla_get_u32(tb[RTA_OIF]) : 0, + .flowi4_mark = mark, }; - rt = ip_route_output_key(net, &fl); + rt = ip_route_output_key(net, &fl4); err = 0; if (IS_ERR(rt)) diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index 0ad6ddf638a7..8b44c6d2a79b 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -345,17 +345,19 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb, * no easy way to do this. */ { - struct flowi fl = { .mark = sk->sk_mark, - .fl4_dst = ((opt && opt->srr) ? - opt->faddr : ireq->rmt_addr), - .fl4_src = ireq->loc_addr, - .fl4_tos = RT_CONN_FLAGS(sk), - .proto = IPPROTO_TCP, - .flags = inet_sk_flowi_flags(sk), - .fl_ip_sport = th->dest, - .fl_ip_dport = th->source }; - security_req_classify_flow(req, &fl); - rt = ip_route_output_key(sock_net(sk), &fl); + struct flowi4 fl4 = { + .flowi4_mark = sk->sk_mark, + .daddr = ((opt && opt->srr) ? + opt->faddr : ireq->rmt_addr), + .saddr = ireq->loc_addr, + .flowi4_tos = RT_CONN_FLAGS(sk), + .flowi4_proto = IPPROTO_TCP, + .flowi4_flags = inet_sk_flowi_flags(sk), + .fl4_sport = th->dest, + .fl4_dport = th->source, + }; + security_req_classify_flow(req, flowi4_to_flowi(&fl4)); + rt = ip_route_output_key(sock_net(sk), &fl4); if (IS_ERR(rt)) { reqsk_free(req); goto out; diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index a17a5a72b98d..b22d45010545 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -505,6 +505,15 @@ int tcp_ioctl(struct sock *sk, int cmd, unsigned long arg) else answ = tp->write_seq - tp->snd_una; break; + case SIOCOUTQNSD: + if (sk->sk_state == TCP_LISTEN) + return -EINVAL; + + if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV)) + answ = 0; + else + answ = tp->write_seq - tp->snd_nxt; + break; default: return -ENOIOCTLCMD; } diff --git a/net/ipv4/tcp_bic.c b/net/ipv4/tcp_bic.c index 3b53fd1af23f..6187eb4d1dcf 100644 --- a/net/ipv4/tcp_bic.c +++ b/net/ipv4/tcp_bic.c @@ -209,7 +209,7 @@ static void bictcp_acked(struct sock *sk, u32 cnt, s32 rtt) } -static struct tcp_congestion_ops bictcp = { +static struct tcp_congestion_ops bictcp __read_mostly = { .init = bictcp_init, .ssthresh = bictcp_recalc_ssthresh, .cong_avoid = bictcp_cong_avoid, diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c index 71d5f2f29fa6..34340c9c95fa 100644 --- a/net/ipv4/tcp_cubic.c +++ b/net/ipv4/tcp_cubic.c @@ -39,7 +39,7 @@ /* Number of delay samples for detecting the increase of delay */ #define HYSTART_MIN_SAMPLES 8 -#define HYSTART_DELAY_MIN (2U<<3) +#define HYSTART_DELAY_MIN (4U<<3) #define HYSTART_DELAY_MAX (16U<<3) #define HYSTART_DELAY_THRESH(x) clamp(x, HYSTART_DELAY_MIN, HYSTART_DELAY_MAX) @@ -52,6 +52,7 @@ static int tcp_friendliness __read_mostly = 1; static int hystart __read_mostly = 1; static int hystart_detect __read_mostly = HYSTART_ACK_TRAIN | HYSTART_DELAY; static int hystart_low_window __read_mostly = 16; +static int hystart_ack_delta __read_mostly = 2; static u32 cube_rtt_scale __read_mostly; static u32 beta_scale __read_mostly; @@ -75,6 +76,8 @@ MODULE_PARM_DESC(hystart_detect, "hyrbrid slow start detection mechanisms" " 1: packet-train 2: delay 3: both packet-train and delay"); module_param(hystart_low_window, int, 0644); MODULE_PARM_DESC(hystart_low_window, "lower bound cwnd for hybrid slow start"); +module_param(hystart_ack_delta, int, 0644); +MODULE_PARM_DESC(hystart_ack_delta, "spacing between ack's indicating train (msecs)"); /* BIC TCP Parameters */ struct bictcp { @@ -85,7 +88,7 @@ struct bictcp { u32 last_time; /* time when updated last_cwnd */ u32 bic_origin_point;/* origin point of bic function */ u32 bic_K; /* time to origin point from the beginning of the current epoch */ - u32 delay_min; /* min delay */ + u32 delay_min; /* min delay (msec << 3) */ u32 epoch_start; /* beginning of an epoch */ u32 ack_cnt; /* number of acks */ u32 tcp_cwnd; /* estimated tcp cwnd */ @@ -95,7 +98,7 @@ struct bictcp { u8 found; /* the exit point is found? */ u32 round_start; /* beginning of each round */ u32 end_seq; /* end_seq of the round */ - u32 last_jiffies; /* last time when the ACK spacing is close */ + u32 last_ack; /* last time when the ACK spacing is close */ u32 curr_rtt; /* the minimum rtt of current round */ }; @@ -116,12 +119,21 @@ static inline void bictcp_reset(struct bictcp *ca) ca->found = 0; } +static inline u32 bictcp_clock(void) +{ +#if HZ < 1000 + return ktime_to_ms(ktime_get_real()); +#else + return jiffies_to_msecs(jiffies); +#endif +} + static inline void bictcp_hystart_reset(struct sock *sk) { struct tcp_sock *tp = tcp_sk(sk); struct bictcp *ca = inet_csk_ca(sk); - ca->round_start = ca->last_jiffies = jiffies; + ca->round_start = ca->last_ack = bictcp_clock(); ca->end_seq = tp->snd_nxt; ca->curr_rtt = 0; ca->sample_cnt = 0; @@ -236,8 +248,8 @@ static inline void bictcp_update(struct bictcp *ca, u32 cwnd) */ /* change the unit from HZ to bictcp_HZ */ - t = ((tcp_time_stamp + (ca->delay_min>>3) - ca->epoch_start) - << BICTCP_HZ) / HZ; + t = ((tcp_time_stamp + msecs_to_jiffies(ca->delay_min>>3) + - ca->epoch_start) << BICTCP_HZ) / HZ; if (t < ca->bic_K) /* t - K */ offs = ca->bic_K - t; @@ -258,6 +270,13 @@ static inline void bictcp_update(struct bictcp *ca, u32 cwnd) ca->cnt = 100 * cwnd; /* very small increment*/ } + /* + * The initial growth of cubic function may be too conservative + * when the available bandwidth is still unknown. + */ + if (ca->loss_cwnd == 0 && ca->cnt > 20) + ca->cnt = 20; /* increase cwnd 5% per RTT */ + /* TCP Friendly */ if (tcp_friendliness) { u32 scale = beta_scale; @@ -339,12 +358,12 @@ static void hystart_update(struct sock *sk, u32 delay) struct bictcp *ca = inet_csk_ca(sk); if (!(ca->found & hystart_detect)) { - u32 curr_jiffies = jiffies; + u32 now = bictcp_clock(); /* first detection parameter - ack-train detection */ - if (curr_jiffies - ca->last_jiffies <= msecs_to_jiffies(2)) { - ca->last_jiffies = curr_jiffies; - if (curr_jiffies - ca->round_start >= ca->delay_min>>4) + if ((s32)(now - ca->last_ack) <= hystart_ack_delta) { + ca->last_ack = now; + if ((s32)(now - ca->round_start) > ca->delay_min >> 4) ca->found |= HYSTART_ACK_TRAIN; } @@ -391,7 +410,7 @@ static void bictcp_acked(struct sock *sk, u32 cnt, s32 rtt_us) if ((s32)(tcp_time_stamp - ca->epoch_start) < HZ) return; - delay = usecs_to_jiffies(rtt_us) << 3; + delay = (rtt_us << 3) / USEC_PER_MSEC; if (delay == 0) delay = 1; @@ -405,7 +424,7 @@ static void bictcp_acked(struct sock *sk, u32 cnt, s32 rtt_us) hystart_update(sk, delay); } -static struct tcp_congestion_ops cubictcp = { +static struct tcp_congestion_ops cubictcp __read_mostly = { .init = bictcp_init, .ssthresh = bictcp_recalc_ssthresh, .cong_avoid = bictcp_cong_avoid, @@ -447,6 +466,10 @@ static int __init cubictcp_register(void) /* divide by bic_scale and by constant Srtt (100ms) */ do_div(cube_factor, bic_scale * 10); + /* hystart needs ms clock resolution */ + if (hystart && HZ < 1000) + cubictcp.flags |= TCP_CONG_RTT_STAMP; + return tcp_register_congestion_control(&cubictcp); } diff --git a/net/ipv4/tcp_highspeed.c b/net/ipv4/tcp_highspeed.c index 8b6caaf75bb9..30f27f6b3655 100644 --- a/net/ipv4/tcp_highspeed.c +++ b/net/ipv4/tcp_highspeed.c @@ -158,7 +158,7 @@ static u32 hstcp_ssthresh(struct sock *sk) } -static struct tcp_congestion_ops tcp_highspeed = { +static struct tcp_congestion_ops tcp_highspeed __read_mostly = { .init = hstcp_init, .ssthresh = hstcp_ssthresh, .cong_avoid = hstcp_cong_avoid, diff --git a/net/ipv4/tcp_htcp.c b/net/ipv4/tcp_htcp.c index 7c94a4955416..c1a8175361e8 100644 --- a/net/ipv4/tcp_htcp.c +++ b/net/ipv4/tcp_htcp.c @@ -284,7 +284,7 @@ static void htcp_state(struct sock *sk, u8 new_state) } } -static struct tcp_congestion_ops htcp = { +static struct tcp_congestion_ops htcp __read_mostly = { .init = htcp_init, .ssthresh = htcp_recalc_ssthresh, .cong_avoid = htcp_cong_avoid, diff --git a/net/ipv4/tcp_hybla.c b/net/ipv4/tcp_hybla.c index 377bc9349371..fe3ecf484b44 100644 --- a/net/ipv4/tcp_hybla.c +++ b/net/ipv4/tcp_hybla.c @@ -162,7 +162,7 @@ static void hybla_cong_avoid(struct sock *sk, u32 ack, u32 in_flight) tp->snd_cwnd = min_t(u32, tp->snd_cwnd, tp->snd_cwnd_clamp); } -static struct tcp_congestion_ops tcp_hybla = { +static struct tcp_congestion_ops tcp_hybla __read_mostly = { .init = hybla_init, .ssthresh = tcp_reno_ssthresh, .min_cwnd = tcp_reno_min_cwnd, diff --git a/net/ipv4/tcp_illinois.c b/net/ipv4/tcp_illinois.c index 00ca688d8964..813b43a76fec 100644 --- a/net/ipv4/tcp_illinois.c +++ b/net/ipv4/tcp_illinois.c @@ -322,7 +322,7 @@ static void tcp_illinois_info(struct sock *sk, u32 ext, } } -static struct tcp_congestion_ops tcp_illinois = { +static struct tcp_congestion_ops tcp_illinois __read_mostly = { .flags = TCP_CONG_RTT_STAMP, .init = tcp_illinois_init, .ssthresh = tcp_illinois_ssthresh, diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 08ea735b9d72..da782e7ab16d 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -3350,7 +3350,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets, net_invalid_timestamp())) rtt_us = ktime_us_delta(ktime_get_real(), last_ackt); - else if (ca_seq_rtt > 0) + else if (ca_seq_rtt >= 0) rtt_us = jiffies_to_usecs(ca_seq_rtt); } diff --git a/net/ipv4/tcp_lp.c b/net/ipv4/tcp_lp.c index de870377fbba..656d431c99ad 100644 --- a/net/ipv4/tcp_lp.c +++ b/net/ipv4/tcp_lp.c @@ -313,7 +313,7 @@ static void tcp_lp_pkts_acked(struct sock *sk, u32 num_acked, s32 rtt_us) lp->last_drop = tcp_time_stamp; } -static struct tcp_congestion_ops tcp_lp = { +static struct tcp_congestion_ops tcp_lp __read_mostly = { .flags = TCP_CONG_RTT_STAMP, .init = tcp_lp_init, .ssthresh = tcp_reno_ssthresh, diff --git a/net/ipv4/tcp_scalable.c b/net/ipv4/tcp_scalable.c index a76513779e2b..8ce55b8aaec8 100644 --- a/net/ipv4/tcp_scalable.c +++ b/net/ipv4/tcp_scalable.c @@ -35,7 +35,7 @@ static u32 tcp_scalable_ssthresh(struct sock *sk) } -static struct tcp_congestion_ops tcp_scalable = { +static struct tcp_congestion_ops tcp_scalable __read_mostly = { .ssthresh = tcp_scalable_ssthresh, .cong_avoid = tcp_scalable_cong_avoid, .min_cwnd = tcp_reno_min_cwnd, diff --git a/net/ipv4/tcp_vegas.c b/net/ipv4/tcp_vegas.c index c6743eec9b7d..80fa2bfd7ede 100644 --- a/net/ipv4/tcp_vegas.c +++ b/net/ipv4/tcp_vegas.c @@ -304,7 +304,7 @@ void tcp_vegas_get_info(struct sock *sk, u32 ext, struct sk_buff *skb) } EXPORT_SYMBOL_GPL(tcp_vegas_get_info); -static struct tcp_congestion_ops tcp_vegas = { +static struct tcp_congestion_ops tcp_vegas __read_mostly = { .flags = TCP_CONG_RTT_STAMP, .init = tcp_vegas_init, .ssthresh = tcp_reno_ssthresh, diff --git a/net/ipv4/tcp_veno.c b/net/ipv4/tcp_veno.c index 38bc0b52d745..ac43cd747bce 100644 --- a/net/ipv4/tcp_veno.c +++ b/net/ipv4/tcp_veno.c @@ -201,7 +201,7 @@ static u32 tcp_veno_ssthresh(struct sock *sk) return max(tp->snd_cwnd >> 1U, 2U); } -static struct tcp_congestion_ops tcp_veno = { +static struct tcp_congestion_ops tcp_veno __read_mostly = { .flags = TCP_CONG_RTT_STAMP, .init = tcp_veno_init, .ssthresh = tcp_veno_ssthresh, diff --git a/net/ipv4/tcp_westwood.c b/net/ipv4/tcp_westwood.c index a534dda5456e..1b91bf48e277 100644 --- a/net/ipv4/tcp_westwood.c +++ b/net/ipv4/tcp_westwood.c @@ -272,7 +272,7 @@ static void tcp_westwood_info(struct sock *sk, u32 ext, } -static struct tcp_congestion_ops tcp_westwood = { +static struct tcp_congestion_ops tcp_westwood __read_mostly = { .init = tcp_westwood_init, .ssthresh = tcp_reno_ssthresh, .cong_avoid = tcp_reno_cong_avoid, diff --git a/net/ipv4/tcp_yeah.c b/net/ipv4/tcp_yeah.c index a0f240358892..dc7f43179c9a 100644 --- a/net/ipv4/tcp_yeah.c +++ b/net/ipv4/tcp_yeah.c @@ -225,7 +225,7 @@ static u32 tcp_yeah_ssthresh(struct sock *sk) { return tp->snd_cwnd - reduction; } -static struct tcp_congestion_ops tcp_yeah = { +static struct tcp_congestion_ops tcp_yeah __read_mostly = { .flags = TCP_CONG_RTT_STAMP, .init = tcp_yeah_init, .ssthresh = tcp_yeah_ssthresh, diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index c9a73e5b26a3..588f47af5faf 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -770,7 +770,7 @@ static int udp_push_pending_frames(struct sock *sk) { struct udp_sock *up = udp_sk(sk); struct inet_sock *inet = inet_sk(sk); - struct flowi *fl = &inet->cork.fl; + struct flowi4 *fl4 = &inet->cork.fl.u.ip4; struct sk_buff *skb; int err = 0; @@ -778,7 +778,7 @@ static int udp_push_pending_frames(struct sock *sk) if (!skb) goto out; - err = udp_send_skb(skb, fl->fl4_dst, fl->fl_ip_dport); + err = udp_send_skb(skb, fl4->daddr, fl4->fl4_dport); out: up->len = 0; @@ -791,6 +791,7 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, { struct inet_sock *inet = inet_sk(sk); struct udp_sock *up = udp_sk(sk); + struct flowi4 *fl4; int ulen = len; struct ipcm_cookie ipc; struct rtable *rt = NULL; @@ -908,21 +909,22 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, rt = (struct rtable *)sk_dst_check(sk, 0); if (rt == NULL) { - struct flowi fl = { .oif = ipc.oif, - .mark = sk->sk_mark, - .fl4_dst = faddr, - .fl4_src = saddr, - .fl4_tos = tos, - .proto = sk->sk_protocol, - .flags = (inet_sk_flowi_flags(sk) | - FLOWI_FLAG_CAN_SLEEP), - .fl_ip_sport = inet->inet_sport, - .fl_ip_dport = dport + struct flowi4 fl4 = { + .flowi4_oif = ipc.oif, + .flowi4_mark = sk->sk_mark, + .daddr = faddr, + .saddr = saddr, + .flowi4_tos = tos, + .flowi4_proto = sk->sk_protocol, + .flowi4_flags = (inet_sk_flowi_flags(sk) | + FLOWI_FLAG_CAN_SLEEP), + .fl4_sport = inet->inet_sport, + .fl4_dport = dport, }; struct net *net = sock_net(sk); - security_sk_classify_flow(sk, &fl); - rt = ip_route_output_flow(net, &fl, sk); + security_sk_classify_flow(sk, flowi4_to_flowi(&fl4)); + rt = ip_route_output_flow(net, &fl4, sk); if (IS_ERR(rt)) { err = PTR_ERR(rt); rt = NULL; @@ -971,10 +973,11 @@ back_from_confirm: /* * Now cork the socket to pend data. */ - inet->cork.fl.fl4_dst = daddr; - inet->cork.fl.fl_ip_dport = dport; - inet->cork.fl.fl4_src = saddr; - inet->cork.fl.fl_ip_sport = inet->inet_sport; + fl4 = &inet->cork.fl.u.ip4; + fl4->daddr = daddr; + fl4->saddr = saddr; + fl4->fl4_dport = dport; + fl4->fl4_sport = inet->inet_sport; up->pending = AF_INET; do_append_data: diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c index c70c42e7e77b..13e0e7f659ff 100644 --- a/net/ipv4/xfrm4_policy.c +++ b/net/ipv4/xfrm4_policy.c @@ -22,16 +22,16 @@ static struct dst_entry *xfrm4_dst_lookup(struct net *net, int tos, const xfrm_address_t *saddr, const xfrm_address_t *daddr) { - struct flowi fl = { - .fl4_dst = daddr->a4, - .fl4_tos = tos, + struct flowi4 fl4 = { + .daddr = daddr->a4, + .flowi4_tos = tos, }; struct rtable *rt; if (saddr) - fl.fl4_src = saddr->a4; + fl4.saddr = saddr->a4; - rt = __ip_route_output_key(net, &fl); + rt = __ip_route_output_key(net, &fl4); if (!IS_ERR(rt)) return &rt->dst; @@ -56,7 +56,7 @@ static int xfrm4_get_saddr(struct net *net, static int xfrm4_get_tos(const struct flowi *fl) { - return IPTOS_RT_MASK & fl->fl4_tos; /* Strip ECN bits */ + return IPTOS_RT_MASK & fl->u.ip4.flowi4_tos; /* Strip ECN bits */ } static int xfrm4_init_path(struct xfrm_dst *path, struct dst_entry *dst, @@ -69,13 +69,14 @@ static int xfrm4_fill_dst(struct xfrm_dst *xdst, struct net_device *dev, const struct flowi *fl) { struct rtable *rt = (struct rtable *)xdst->route; + const struct flowi4 *fl4 = &fl->u.ip4; - rt->rt_key_dst = fl->fl4_dst; - rt->rt_key_src = fl->fl4_src; - rt->rt_tos = fl->fl4_tos; - rt->rt_iif = fl->iif; - rt->rt_oif = fl->oif; - rt->rt_mark = fl->mark; + rt->rt_key_dst = fl4->daddr; + rt->rt_key_src = fl4->saddr; + rt->rt_tos = fl4->flowi4_tos; + rt->rt_iif = fl4->flowi4_iif; + rt->rt_oif = fl4->flowi4_oif; + rt->rt_mark = fl4->flowi4_mark; xdst->u.dst.dev = dev; dev_hold(dev); @@ -102,9 +103,10 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse) { struct iphdr *iph = ip_hdr(skb); u8 *xprth = skb_network_header(skb) + iph->ihl * 4; + struct flowi4 *fl4 = &fl->u.ip4; - memset(fl, 0, sizeof(struct flowi)); - fl->mark = skb->mark; + memset(fl4, 0, sizeof(struct flowi4)); + fl4->flowi4_mark = skb->mark; if (!(iph->frag_off & htons(IP_MF | IP_OFFSET))) { switch (iph->protocol) { @@ -117,8 +119,8 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse) pskb_may_pull(skb, xprth + 4 - skb->data)) { __be16 *ports = (__be16 *)xprth; - fl->fl_ip_sport = ports[!!reverse]; - fl->fl_ip_dport = ports[!reverse]; + fl4->fl4_sport = ports[!!reverse]; + fl4->fl4_dport = ports[!reverse]; } break; @@ -126,8 +128,8 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse) if (pskb_may_pull(skb, xprth + 2 - skb->data)) { u8 *icmp = xprth; - fl->fl_icmp_type = icmp[0]; - fl->fl_icmp_code = icmp[1]; + fl4->fl4_icmp_type = icmp[0]; + fl4->fl4_icmp_code = icmp[1]; } break; @@ -135,7 +137,7 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse) if (pskb_may_pull(skb, xprth + 4 - skb->data)) { __be32 *ehdr = (__be32 *)xprth; - fl->fl_ipsec_spi = ehdr[0]; + fl4->fl4_ipsec_spi = ehdr[0]; } break; @@ -143,7 +145,7 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse) if (pskb_may_pull(skb, xprth + 8 - skb->data)) { __be32 *ah_hdr = (__be32*)xprth; - fl->fl_ipsec_spi = ah_hdr[1]; + fl4->fl4_ipsec_spi = ah_hdr[1]; } break; @@ -151,7 +153,7 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse) if (pskb_may_pull(skb, xprth + 4 - skb->data)) { __be16 *ipcomp_hdr = (__be16 *)xprth; - fl->fl_ipsec_spi = htonl(ntohs(ipcomp_hdr[1])); + fl4->fl4_ipsec_spi = htonl(ntohs(ipcomp_hdr[1])); } break; @@ -163,20 +165,20 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse) if (greflags[0] & GRE_KEY) { if (greflags[0] & GRE_CSUM) gre_hdr++; - fl->fl_gre_key = gre_hdr[1]; + fl4->fl4_gre_key = gre_hdr[1]; } } break; default: - fl->fl_ipsec_spi = 0; + fl4->fl4_ipsec_spi = 0; break; } } - fl->proto = iph->protocol; - fl->fl4_dst = reverse ? iph->saddr : iph->daddr; - fl->fl4_src = reverse ? iph->daddr : iph->saddr; - fl->fl4_tos = iph->tos; + fl4->flowi4_proto = iph->protocol; + fl4->daddr = reverse ? iph->saddr : iph->daddr; + fl4->saddr = reverse ? iph->daddr : iph->saddr; + fl4->flowi4_tos = iph->tos; } static inline int xfrm4_garbage_collect(struct dst_ops *ops) diff --git a/net/ipv4/xfrm4_state.c b/net/ipv4/xfrm4_state.c index 983eff248988..1717c64628d1 100644 --- a/net/ipv4/xfrm4_state.c +++ b/net/ipv4/xfrm4_state.c @@ -23,17 +23,19 @@ static int xfrm4_init_flags(struct xfrm_state *x) static void __xfrm4_init_tempsel(struct xfrm_selector *sel, const struct flowi *fl) { - sel->daddr.a4 = fl->fl4_dst; - sel->saddr.a4 = fl->fl4_src; - sel->dport = xfrm_flowi_dport(fl); + const struct flowi4 *fl4 = &fl->u.ip4; + + sel->daddr.a4 = fl4->daddr; + sel->saddr.a4 = fl4->saddr; + sel->dport = xfrm_flowi_dport(fl, &fl4->uli); sel->dport_mask = htons(0xffff); - sel->sport = xfrm_flowi_sport(fl); + sel->sport = xfrm_flowi_sport(fl, &fl4->uli); sel->sport_mask = htons(0xffff); sel->family = AF_INET; sel->prefixlen_d = 32; sel->prefixlen_s = 32; - sel->proto = fl->proto; - sel->ifindex = fl->oif; + sel->proto = fl4->flowi4_proto; + sel->ifindex = fl4->flowi4_oif; } static void diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index a88b2e9d25f1..4b13d5d8890e 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -652,22 +652,22 @@ int inet6_sk_rebuild_header(struct sock *sk) if (dst == NULL) { struct inet_sock *inet = inet_sk(sk); struct in6_addr *final_p, final; - struct flowi fl; - - memset(&fl, 0, sizeof(fl)); - fl.proto = sk->sk_protocol; - ipv6_addr_copy(&fl.fl6_dst, &np->daddr); - ipv6_addr_copy(&fl.fl6_src, &np->saddr); - fl.fl6_flowlabel = np->flow_label; - fl.oif = sk->sk_bound_dev_if; - fl.mark = sk->sk_mark; - fl.fl_ip_dport = inet->inet_dport; - fl.fl_ip_sport = inet->inet_sport; - security_sk_classify_flow(sk, &fl); - - final_p = fl6_update_dst(&fl, np->opt, &final); - - dst = ip6_dst_lookup_flow(sk, &fl, final_p, false); + struct flowi6 fl6; + + memset(&fl6, 0, sizeof(fl6)); + fl6.flowi6_proto = sk->sk_protocol; + ipv6_addr_copy(&fl6.daddr, &np->daddr); + ipv6_addr_copy(&fl6.saddr, &np->saddr); + fl6.flowlabel = np->flow_label; + fl6.flowi6_oif = sk->sk_bound_dev_if; + fl6.flowi6_mark = sk->sk_mark; + fl6.fl6_dport = inet->inet_dport; + fl6.fl6_sport = inet->inet_sport; + security_sk_classify_flow(sk, flowi6_to_flowi(&fl6)); + + final_p = fl6_update_dst(&fl6, np->opt, &final); + + dst = ip6_dst_lookup_flow(sk, &fl6, final_p, false); if (IS_ERR(dst)) { sk->sk_route_caps = 0; sk->sk_err_soft = -PTR_ERR(dst); diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c index 1aba54ae53c4..2195ae651923 100644 --- a/net/ipv6/ah6.c +++ b/net/ipv6/ah6.c @@ -409,7 +409,7 @@ static int ah6_output(struct xfrm_state *x, struct sk_buff *skb) ah->reserved = 0; ah->spi = x->id.spi; - ah->seq_no = htonl(XFRM_SKB_CB(skb)->seq.output); + ah->seq_no = htonl(XFRM_SKB_CB(skb)->seq.output.low); sg_init_table(sg, nfrags); skb_to_sgvec(skb, sg, 0, skb->len); diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index be3a781c0085..16560336eb72 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -40,7 +40,7 @@ int ip6_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) struct ipv6_pinfo *np = inet6_sk(sk); struct in6_addr *daddr, *final_p, final; struct dst_entry *dst; - struct flowi fl; + struct flowi6 fl6; struct ip6_flowlabel *flowlabel = NULL; struct ipv6_txoptions *opt; int addr_type; @@ -59,11 +59,11 @@ int ip6_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) if (usin->sin6_family != AF_INET6) return -EAFNOSUPPORT; - memset(&fl, 0, sizeof(fl)); + memset(&fl6, 0, sizeof(fl6)); if (np->sndflow) { - fl.fl6_flowlabel = usin->sin6_flowinfo&IPV6_FLOWINFO_MASK; - if (fl.fl6_flowlabel&IPV6_FLOWLABEL_MASK) { - flowlabel = fl6_sock_lookup(sk, fl.fl6_flowlabel); + fl6.flowlabel = usin->sin6_flowinfo&IPV6_FLOWINFO_MASK; + if (fl6.flowlabel&IPV6_FLOWLABEL_MASK) { + flowlabel = fl6_sock_lookup(sk, fl6.flowlabel); if (flowlabel == NULL) return -EINVAL; ipv6_addr_copy(&usin->sin6_addr, &flowlabel->dst); @@ -137,7 +137,7 @@ ipv4_connected: } ipv6_addr_copy(&np->daddr, daddr); - np->flow_label = fl.fl6_flowlabel; + np->flow_label = fl6.flowlabel; inet->inet_dport = usin->sin6_port; @@ -146,23 +146,23 @@ ipv4_connected: * destination cache for it. */ - fl.proto = sk->sk_protocol; - ipv6_addr_copy(&fl.fl6_dst, &np->daddr); - ipv6_addr_copy(&fl.fl6_src, &np->saddr); - fl.oif = sk->sk_bound_dev_if; - fl.mark = sk->sk_mark; - fl.fl_ip_dport = inet->inet_dport; - fl.fl_ip_sport = inet->inet_sport; + fl6.flowi6_proto = sk->sk_protocol; + ipv6_addr_copy(&fl6.daddr, &np->daddr); + ipv6_addr_copy(&fl6.saddr, &np->saddr); + fl6.flowi6_oif = sk->sk_bound_dev_if; + fl6.flowi6_mark = sk->sk_mark; + fl6.fl6_dport = inet->inet_dport; + fl6.fl6_sport = inet->inet_sport; - if (!fl.oif && (addr_type&IPV6_ADDR_MULTICAST)) - fl.oif = np->mcast_oif; + if (!fl6.flowi6_oif && (addr_type&IPV6_ADDR_MULTICAST)) + fl6.flowi6_oif = np->mcast_oif; - security_sk_classify_flow(sk, &fl); + security_sk_classify_flow(sk, flowi6_to_flowi(&fl6)); opt = flowlabel ? flowlabel->opt : np->opt; - final_p = fl6_update_dst(&fl, opt, &final); + final_p = fl6_update_dst(&fl6, opt, &final); - dst = ip6_dst_lookup_flow(sk, &fl, final_p, true); + dst = ip6_dst_lookup_flow(sk, &fl6, final_p, true); err = 0; if (IS_ERR(dst)) { err = PTR_ERR(dst); @@ -172,20 +172,20 @@ ipv4_connected: /* source address lookup done in ip6_dst_lookup */ if (ipv6_addr_any(&np->saddr)) - ipv6_addr_copy(&np->saddr, &fl.fl6_src); + ipv6_addr_copy(&np->saddr, &fl6.saddr); if (ipv6_addr_any(&np->rcv_saddr)) { - ipv6_addr_copy(&np->rcv_saddr, &fl.fl6_src); + ipv6_addr_copy(&np->rcv_saddr, &fl6.saddr); inet->inet_rcv_saddr = LOOPBACK4_IPV6; if (sk->sk_prot->rehash) sk->sk_prot->rehash(sk); } ip6_dst_store(sk, dst, - ipv6_addr_equal(&fl.fl6_dst, &np->daddr) ? + ipv6_addr_equal(&fl6.daddr, &np->daddr) ? &np->daddr : NULL, #ifdef CONFIG_IPV6_SUBTREES - ipv6_addr_equal(&fl.fl6_src, &np->saddr) ? + ipv6_addr_equal(&fl6.saddr, &np->saddr) ? &np->saddr : #endif NULL); @@ -231,7 +231,7 @@ void ipv6_icmp_error(struct sock *sk, struct sk_buff *skb, int err, kfree_skb(skb); } -void ipv6_local_error(struct sock *sk, int err, struct flowi *fl, u32 info) +void ipv6_local_error(struct sock *sk, int err, struct flowi6 *fl6, u32 info) { struct ipv6_pinfo *np = inet6_sk(sk); struct sock_exterr_skb *serr; @@ -250,7 +250,7 @@ void ipv6_local_error(struct sock *sk, int err, struct flowi *fl, u32 info) skb_put(skb, sizeof(struct ipv6hdr)); skb_reset_network_header(skb); iph = ipv6_hdr(skb); - ipv6_addr_copy(&iph->daddr, &fl->fl6_dst); + ipv6_addr_copy(&iph->daddr, &fl6->daddr); serr = SKB_EXT_ERR(skb); serr->ee.ee_errno = err; @@ -261,7 +261,7 @@ void ipv6_local_error(struct sock *sk, int err, struct flowi *fl, u32 info) serr->ee.ee_info = info; serr->ee.ee_data = 0; serr->addr_offset = (u8 *)&iph->daddr - skb_network_header(skb); - serr->port = fl->fl_ip_dport; + serr->port = fl6->fl6_dport; __skb_pull(skb, skb_tail_pointer(skb) - skb->data); skb_reset_transport_header(skb); @@ -270,7 +270,7 @@ void ipv6_local_error(struct sock *sk, int err, struct flowi *fl, u32 info) kfree_skb(skb); } -void ipv6_local_rxpmtu(struct sock *sk, struct flowi *fl, u32 mtu) +void ipv6_local_rxpmtu(struct sock *sk, struct flowi6 *fl6, u32 mtu) { struct ipv6_pinfo *np = inet6_sk(sk); struct ipv6hdr *iph; @@ -287,7 +287,7 @@ void ipv6_local_rxpmtu(struct sock *sk, struct flowi *fl, u32 mtu) skb_put(skb, sizeof(struct ipv6hdr)); skb_reset_network_header(skb); iph = ipv6_hdr(skb); - ipv6_addr_copy(&iph->daddr, &fl->fl6_dst); + ipv6_addr_copy(&iph->daddr, &fl6->daddr); mtu_info = IP6CBMTU(skb); if (!mtu_info) { @@ -299,7 +299,7 @@ void ipv6_local_rxpmtu(struct sock *sk, struct flowi *fl, u32 mtu) mtu_info->ip6m_addr.sin6_family = AF_INET6; mtu_info->ip6m_addr.sin6_port = 0; mtu_info->ip6m_addr.sin6_flowinfo = 0; - mtu_info->ip6m_addr.sin6_scope_id = fl->oif; + mtu_info->ip6m_addr.sin6_scope_id = fl6->flowi6_oif; ipv6_addr_copy(&mtu_info->ip6m_addr.sin6_addr, &ipv6_hdr(skb)->daddr); __skb_pull(skb, skb_tail_pointer(skb) - skb->data); @@ -593,7 +593,7 @@ int datagram_recv_ctl(struct sock *sk, struct msghdr *msg, struct sk_buff *skb) } int datagram_send_ctl(struct net *net, - struct msghdr *msg, struct flowi *fl, + struct msghdr *msg, struct flowi6 *fl6, struct ipv6_txoptions *opt, int *hlimit, int *tclass, int *dontfrag) { @@ -629,16 +629,17 @@ int datagram_send_ctl(struct net *net, src_info = (struct in6_pktinfo *)CMSG_DATA(cmsg); if (src_info->ipi6_ifindex) { - if (fl->oif && src_info->ipi6_ifindex != fl->oif) + if (fl6->flowi6_oif && + src_info->ipi6_ifindex != fl6->flowi6_oif) return -EINVAL; - fl->oif = src_info->ipi6_ifindex; + fl6->flowi6_oif = src_info->ipi6_ifindex; } addr_type = __ipv6_addr_type(&src_info->ipi6_addr); rcu_read_lock(); - if (fl->oif) { - dev = dev_get_by_index_rcu(net, fl->oif); + if (fl6->flowi6_oif) { + dev = dev_get_by_index_rcu(net, fl6->flowi6_oif); if (!dev) { rcu_read_unlock(); return -ENODEV; @@ -654,7 +655,7 @@ int datagram_send_ctl(struct net *net, strict ? dev : NULL, 0)) err = -EINVAL; else - ipv6_addr_copy(&fl->fl6_src, &src_info->ipi6_addr); + ipv6_addr_copy(&fl6->saddr, &src_info->ipi6_addr); } rcu_read_unlock(); @@ -671,13 +672,13 @@ int datagram_send_ctl(struct net *net, goto exit_f; } - if (fl->fl6_flowlabel&IPV6_FLOWINFO_MASK) { - if ((fl->fl6_flowlabel^*(__be32 *)CMSG_DATA(cmsg))&~IPV6_FLOWINFO_MASK) { + if (fl6->flowlabel&IPV6_FLOWINFO_MASK) { + if ((fl6->flowlabel^*(__be32 *)CMSG_DATA(cmsg))&~IPV6_FLOWINFO_MASK) { err = -EINVAL; goto exit_f; } } - fl->fl6_flowlabel = IPV6_FLOWINFO_MASK & *(__be32 *)CMSG_DATA(cmsg); + fl6->flowlabel = IPV6_FLOWINFO_MASK & *(__be32 *)CMSG_DATA(cmsg); break; case IPV6_2292HOPOPTS: diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c index 1b5c9825743b..5aa8ec88f194 100644 --- a/net/ipv6/esp6.c +++ b/net/ipv6/esp6.c @@ -54,16 +54,20 @@ static u32 esp6_get_mtu(struct xfrm_state *x, int mtu); /* * Allocate an AEAD request structure with extra space for SG and IV. * - * For alignment considerations the IV is placed at the front, followed - * by the request and finally the SG list. + * For alignment considerations the upper 32 bits of the sequence number are + * placed at the front, if present. Followed by the IV, the request and finally + * the SG list. * * TODO: Use spare space in skb for this where possible. */ -static void *esp_alloc_tmp(struct crypto_aead *aead, int nfrags) +static void *esp_alloc_tmp(struct crypto_aead *aead, int nfrags, int seqihlen) { unsigned int len; - len = crypto_aead_ivsize(aead); + len = seqihlen; + + len += crypto_aead_ivsize(aead); + if (len) { len += crypto_aead_alignmask(aead) & ~(crypto_tfm_ctx_alignment() - 1); @@ -78,10 +82,16 @@ static void *esp_alloc_tmp(struct crypto_aead *aead, int nfrags) return kmalloc(len, GFP_ATOMIC); } -static inline u8 *esp_tmp_iv(struct crypto_aead *aead, void *tmp) +static inline __be32 *esp_tmp_seqhi(void *tmp) +{ + return PTR_ALIGN((__be32 *)tmp, __alignof__(__be32)); +} + +static inline u8 *esp_tmp_iv(struct crypto_aead *aead, void *tmp, int seqhilen) { return crypto_aead_ivsize(aead) ? - PTR_ALIGN((u8 *)tmp, crypto_aead_alignmask(aead) + 1) : tmp; + PTR_ALIGN((u8 *)tmp + seqhilen, + crypto_aead_alignmask(aead) + 1) : tmp + seqhilen; } static inline struct aead_givcrypt_request *esp_tmp_givreq( @@ -145,8 +155,12 @@ static int esp6_output(struct xfrm_state *x, struct sk_buff *skb) int plen; int tfclen; int nfrags; + int assoclen; + int sglists; + int seqhilen; u8 *iv; u8 *tail; + __be32 *seqhi; struct esp_data *esp = x->data; /* skb is pure payload to encrypt */ @@ -175,14 +189,25 @@ static int esp6_output(struct xfrm_state *x, struct sk_buff *skb) goto error; nfrags = err; - tmp = esp_alloc_tmp(aead, nfrags + 1); + assoclen = sizeof(*esph); + sglists = 1; + seqhilen = 0; + + if (x->props.flags & XFRM_STATE_ESN) { + sglists += 2; + seqhilen += sizeof(__be32); + assoclen += seqhilen; + } + + tmp = esp_alloc_tmp(aead, nfrags + sglists, seqhilen); if (!tmp) goto error; - iv = esp_tmp_iv(aead, tmp); + seqhi = esp_tmp_seqhi(tmp); + iv = esp_tmp_iv(aead, tmp, seqhilen); req = esp_tmp_givreq(aead, iv); asg = esp_givreq_sg(aead, req); - sg = asg + 1; + sg = asg + sglists; /* Fill padding... */ tail = skb_tail_pointer(trailer); @@ -204,19 +229,27 @@ static int esp6_output(struct xfrm_state *x, struct sk_buff *skb) *skb_mac_header(skb) = IPPROTO_ESP; esph->spi = x->id.spi; - esph->seq_no = htonl(XFRM_SKB_CB(skb)->seq.output); + esph->seq_no = htonl(XFRM_SKB_CB(skb)->seq.output.low); sg_init_table(sg, nfrags); skb_to_sgvec(skb, sg, esph->enc_data + crypto_aead_ivsize(aead) - skb->data, clen + alen); - sg_init_one(asg, esph, sizeof(*esph)); + + if ((x->props.flags & XFRM_STATE_ESN)) { + sg_init_table(asg, 3); + sg_set_buf(asg, &esph->spi, sizeof(__be32)); + *seqhi = htonl(XFRM_SKB_CB(skb)->seq.output.hi); + sg_set_buf(asg + 1, seqhi, seqhilen); + sg_set_buf(asg + 2, &esph->seq_no, sizeof(__be32)); + } else + sg_init_one(asg, esph, sizeof(*esph)); aead_givcrypt_set_callback(req, 0, esp_output_done, skb); aead_givcrypt_set_crypt(req, sg, sg, clen, iv); - aead_givcrypt_set_assoc(req, asg, sizeof(*esph)); + aead_givcrypt_set_assoc(req, asg, assoclen); aead_givcrypt_set_giv(req, esph->enc_data, - XFRM_SKB_CB(skb)->seq.output); + XFRM_SKB_CB(skb)->seq.output.low); ESP_SKB_CB(skb)->tmp = tmp; err = crypto_aead_givencrypt(req); @@ -292,8 +325,12 @@ static int esp6_input(struct xfrm_state *x, struct sk_buff *skb) struct sk_buff *trailer; int elen = skb->len - sizeof(*esph) - crypto_aead_ivsize(aead); int nfrags; + int assoclen; + int sglists; + int seqhilen; int ret = 0; void *tmp; + __be32 *seqhi; u8 *iv; struct scatterlist *sg; struct scatterlist *asg; @@ -314,12 +351,24 @@ static int esp6_input(struct xfrm_state *x, struct sk_buff *skb) } ret = -ENOMEM; - tmp = esp_alloc_tmp(aead, nfrags + 1); + + assoclen = sizeof(*esph); + sglists = 1; + seqhilen = 0; + + if (x->props.flags & XFRM_STATE_ESN) { + sglists += 2; + seqhilen += sizeof(__be32); + assoclen += seqhilen; + } + + tmp = esp_alloc_tmp(aead, nfrags + sglists, seqhilen); if (!tmp) goto out; ESP_SKB_CB(skb)->tmp = tmp; - iv = esp_tmp_iv(aead, tmp); + seqhi = esp_tmp_seqhi(tmp); + iv = esp_tmp_iv(aead, tmp, seqhilen); req = esp_tmp_req(aead, iv); asg = esp_req_sg(aead, req); sg = asg + 1; @@ -333,11 +382,19 @@ static int esp6_input(struct xfrm_state *x, struct sk_buff *skb) sg_init_table(sg, nfrags); skb_to_sgvec(skb, sg, sizeof(*esph) + crypto_aead_ivsize(aead), elen); - sg_init_one(asg, esph, sizeof(*esph)); + + if ((x->props.flags & XFRM_STATE_ESN)) { + sg_init_table(asg, 3); + sg_set_buf(asg, &esph->spi, sizeof(__be32)); + *seqhi = XFRM_SKB_CB(skb)->seq.input.hi; + sg_set_buf(asg + 1, seqhi, seqhilen); + sg_set_buf(asg + 2, &esph->seq_no, sizeof(__be32)); + } else + sg_init_one(asg, esph, sizeof(*esph)); aead_request_set_callback(req, 0, esp_input_done, skb); aead_request_set_crypt(req, sg, sg, elen, iv); - aead_request_set_assoc(req, asg, sizeof(*esph)); + aead_request_set_assoc(req, asg, assoclen); ret = crypto_aead_decrypt(req); if (ret == -EINPROGRESS) @@ -443,10 +500,20 @@ static int esp_init_authenc(struct xfrm_state *x) goto error; err = -ENAMETOOLONG; - if (snprintf(authenc_name, CRYPTO_MAX_ALG_NAME, "authenc(%s,%s)", - x->aalg ? x->aalg->alg_name : "digest_null", - x->ealg->alg_name) >= CRYPTO_MAX_ALG_NAME) - goto error; + + if ((x->props.flags & XFRM_STATE_ESN)) { + if (snprintf(authenc_name, CRYPTO_MAX_ALG_NAME, + "authencesn(%s,%s)", + x->aalg ? x->aalg->alg_name : "digest_null", + x->ealg->alg_name) >= CRYPTO_MAX_ALG_NAME) + goto error; + } else { + if (snprintf(authenc_name, CRYPTO_MAX_ALG_NAME, + "authenc(%s,%s)", + x->aalg ? x->aalg->alg_name : "digest_null", + x->ealg->alg_name) >= CRYPTO_MAX_ALG_NAME) + goto error; + } aead = crypto_alloc_aead(authenc_name, 0, 0); err = PTR_ERR(aead); diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c index 262f105d23b9..79a485e8a700 100644 --- a/net/ipv6/exthdrs.c +++ b/net/ipv6/exthdrs.c @@ -876,22 +876,22 @@ struct ipv6_txoptions *ipv6_fixup_options(struct ipv6_txoptions *opt_space, * fl6_update_dst - update flowi destination address with info given * by srcrt option, if any. * - * @fl: flowi for which fl6_dst is to be updated + * @fl6: flowi6 for which daddr is to be updated * @opt: struct ipv6_txoptions in which to look for srcrt opt - * @orig: copy of original fl6_dst address if modified + * @orig: copy of original daddr address if modified * * Returns NULL if no txoptions or no srcrt, otherwise returns orig - * and initial value of fl->fl6_dst set in orig + * and initial value of fl6->daddr set in orig */ -struct in6_addr *fl6_update_dst(struct flowi *fl, +struct in6_addr *fl6_update_dst(struct flowi6 *fl6, const struct ipv6_txoptions *opt, struct in6_addr *orig) { if (!opt || !opt->srcrt) return NULL; - ipv6_addr_copy(orig, &fl->fl6_dst); - ipv6_addr_copy(&fl->fl6_dst, ((struct rt0_hdr *)opt->srcrt)->addr); + ipv6_addr_copy(orig, &fl6->daddr); + ipv6_addr_copy(&fl6->daddr, ((struct rt0_hdr *)opt->srcrt)->addr); return orig; } diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c index d829874d8946..34d244df907d 100644 --- a/net/ipv6/fib6_rules.c +++ b/net/ipv6/fib6_rules.c @@ -29,7 +29,7 @@ struct fib6_rule u8 tclass; }; -struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi *fl, +struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi6 *fl6, int flags, pol_lookup_t lookup) { struct fib_lookup_arg arg = { @@ -37,7 +37,8 @@ struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi *fl, .flags = FIB_LOOKUP_NOREF, }; - fib_rules_lookup(net->ipv6.fib6_rules_ops, fl, flags, &arg); + fib_rules_lookup(net->ipv6.fib6_rules_ops, + flowi6_to_flowi(fl6), flags, &arg); if (arg.result) return arg.result; @@ -49,6 +50,7 @@ struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi *fl, static int fib6_rule_action(struct fib_rule *rule, struct flowi *flp, int flags, struct fib_lookup_arg *arg) { + struct flowi6 *flp6 = &flp->u.ip6; struct rt6_info *rt = NULL; struct fib6_table *table; struct net *net = rule->fr_net; @@ -71,7 +73,7 @@ static int fib6_rule_action(struct fib_rule *rule, struct flowi *flp, table = fib6_get_table(net, rule->table); if (table) - rt = lookup(net, table, flp, flags); + rt = lookup(net, table, flp6, flags); if (rt != net->ipv6.ip6_null_entry) { struct fib6_rule *r = (struct fib6_rule *)rule; @@ -86,14 +88,14 @@ static int fib6_rule_action(struct fib_rule *rule, struct flowi *flp, if (ipv6_dev_get_saddr(net, ip6_dst_idev(&rt->dst)->dev, - &flp->fl6_dst, + &flp6->daddr, rt6_flags2srcprefs(flags), &saddr)) goto again; if (!ipv6_prefix_equal(&saddr, &r->src.addr, r->src.plen)) goto again; - ipv6_addr_copy(&flp->fl6_src, &saddr); + ipv6_addr_copy(&flp6->saddr, &saddr); } goto out; } @@ -113,9 +115,10 @@ out: static int fib6_rule_match(struct fib_rule *rule, struct flowi *fl, int flags) { struct fib6_rule *r = (struct fib6_rule *) rule; + struct flowi6 *fl6 = &fl->u.ip6; if (r->dst.plen && - !ipv6_prefix_equal(&fl->fl6_dst, &r->dst.addr, r->dst.plen)) + !ipv6_prefix_equal(&fl6->daddr, &r->dst.addr, r->dst.plen)) return 0; /* @@ -125,14 +128,14 @@ static int fib6_rule_match(struct fib_rule *rule, struct flowi *fl, int flags) */ if (r->src.plen) { if (flags & RT6_LOOKUP_F_HAS_SADDR) { - if (!ipv6_prefix_equal(&fl->fl6_src, &r->src.addr, + if (!ipv6_prefix_equal(&fl6->saddr, &r->src.addr, r->src.plen)) return 0; } else if (!(r->common.flags & FIB_RULE_FIND_SADDR)) return 0; } - if (r->tclass && r->tclass != ((ntohl(fl->fl6_flowlabel) >> 20) & 0xff)) + if (r->tclass && r->tclass != ((ntohl(fl6->flowlabel) >> 20) & 0xff)) return 0; return 1; diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index 55665956b3a8..83cb4f9add81 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -158,7 +158,7 @@ static int is_ineligible(struct sk_buff *skb) * Check the ICMP output rate limit */ static inline bool icmpv6_xrlim_allow(struct sock *sk, u8 type, - struct flowi *fl) + struct flowi6 *fl6) { struct dst_entry *dst; struct net *net = sock_net(sk); @@ -177,7 +177,7 @@ static inline bool icmpv6_xrlim_allow(struct sock *sk, u8 type, * XXX: perhaps the expire for routing entries cloned by * this lookup should be more aggressive (not longer than timeout). */ - dst = ip6_route_output(net, sk, fl); + dst = ip6_route_output(net, sk, fl6); if (dst->error) { IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES); @@ -217,7 +217,7 @@ static __inline__ int opt_unrec(struct sk_buff *skb, __u32 offset) return (*op & 0xC0) == 0x80; } -static int icmpv6_push_pending_frames(struct sock *sk, struct flowi *fl, struct icmp6hdr *thdr, int len) +static int icmpv6_push_pending_frames(struct sock *sk, struct flowi6 *fl6, struct icmp6hdr *thdr, int len) { struct sk_buff *skb; struct icmp6hdr *icmp6h; @@ -233,9 +233,9 @@ static int icmpv6_push_pending_frames(struct sock *sk, struct flowi *fl, struct if (skb_queue_len(&sk->sk_write_queue) == 1) { skb->csum = csum_partial(icmp6h, sizeof(struct icmp6hdr), skb->csum); - icmp6h->icmp6_cksum = csum_ipv6_magic(&fl->fl6_src, - &fl->fl6_dst, - len, fl->proto, + icmp6h->icmp6_cksum = csum_ipv6_magic(&fl6->saddr, + &fl6->daddr, + len, fl6->flowi6_proto, skb->csum); } else { __wsum tmp_csum = 0; @@ -246,9 +246,9 @@ static int icmpv6_push_pending_frames(struct sock *sk, struct flowi *fl, struct tmp_csum = csum_partial(icmp6h, sizeof(struct icmp6hdr), tmp_csum); - icmp6h->icmp6_cksum = csum_ipv6_magic(&fl->fl6_src, - &fl->fl6_dst, - len, fl->proto, + icmp6h->icmp6_cksum = csum_ipv6_magic(&fl6->saddr, + &fl6->daddr, + len, fl6->flowi6_proto, tmp_csum); } ip6_push_pending_frames(sk); @@ -301,13 +301,13 @@ static inline void mip6_addr_swap(struct sk_buff *skb) {} #endif static struct dst_entry *icmpv6_route_lookup(struct net *net, struct sk_buff *skb, - struct sock *sk, struct flowi *fl) + struct sock *sk, struct flowi6 *fl6) { struct dst_entry *dst, *dst2; - struct flowi fl2; + struct flowi6 fl2; int err; - err = ip6_dst_lookup(sk, &dst, fl); + err = ip6_dst_lookup(sk, &dst, fl6); if (err) return ERR_PTR(err); @@ -324,7 +324,7 @@ static struct dst_entry *icmpv6_route_lookup(struct net *net, struct sk_buff *sk /* No need to clone since we're just using its address. */ dst2 = dst; - dst = xfrm_lookup(net, dst, fl, sk, 0); + dst = xfrm_lookup(net, dst, flowi6_to_flowi(fl6), sk, 0); if (!IS_ERR(dst)) { if (dst != dst2) return dst; @@ -335,7 +335,7 @@ static struct dst_entry *icmpv6_route_lookup(struct net *net, struct sk_buff *sk return dst; } - err = xfrm_decode_session_reverse(skb, &fl2, AF_INET6); + err = xfrm_decode_session_reverse(skb, flowi6_to_flowi(&fl2), AF_INET6); if (err) goto relookup_failed; @@ -343,7 +343,7 @@ static struct dst_entry *icmpv6_route_lookup(struct net *net, struct sk_buff *sk if (err) goto relookup_failed; - dst2 = xfrm_lookup(net, dst2, &fl2, sk, XFRM_LOOKUP_ICMP); + dst2 = xfrm_lookup(net, dst2, flowi6_to_flowi(&fl2), sk, XFRM_LOOKUP_ICMP); if (!IS_ERR(dst2)) { dst_release(dst); dst = dst2; @@ -375,7 +375,7 @@ void icmpv6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info) struct in6_addr *saddr = NULL; struct dst_entry *dst; struct icmp6hdr tmp_hdr; - struct flowi fl; + struct flowi6 fl6; struct icmpv6_msg msg; int iif = 0; int addr_type = 0; @@ -442,22 +442,22 @@ void icmpv6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info) mip6_addr_swap(skb); - memset(&fl, 0, sizeof(fl)); - fl.proto = IPPROTO_ICMPV6; - ipv6_addr_copy(&fl.fl6_dst, &hdr->saddr); + memset(&fl6, 0, sizeof(fl6)); + fl6.flowi6_proto = IPPROTO_ICMPV6; + ipv6_addr_copy(&fl6.daddr, &hdr->saddr); if (saddr) - ipv6_addr_copy(&fl.fl6_src, saddr); - fl.oif = iif; - fl.fl_icmp_type = type; - fl.fl_icmp_code = code; - security_skb_classify_flow(skb, &fl); + ipv6_addr_copy(&fl6.saddr, saddr); + fl6.flowi6_oif = iif; + fl6.fl6_icmp_type = type; + fl6.fl6_icmp_code = code; + security_skb_classify_flow(skb, flowi6_to_flowi(&fl6)); sk = icmpv6_xmit_lock(net); if (sk == NULL) return; np = inet6_sk(sk); - if (!icmpv6_xrlim_allow(sk, type, &fl)) + if (!icmpv6_xrlim_allow(sk, type, &fl6)) goto out; tmp_hdr.icmp6_type = type; @@ -465,14 +465,14 @@ void icmpv6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info) tmp_hdr.icmp6_cksum = 0; tmp_hdr.icmp6_pointer = htonl(info); - if (!fl.oif && ipv6_addr_is_multicast(&fl.fl6_dst)) - fl.oif = np->mcast_oif; + if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr)) + fl6.flowi6_oif = np->mcast_oif; - dst = icmpv6_route_lookup(net, skb, sk, &fl); + dst = icmpv6_route_lookup(net, skb, sk, &fl6); if (IS_ERR(dst)) goto out; - if (ipv6_addr_is_multicast(&fl.fl6_dst)) + if (ipv6_addr_is_multicast(&fl6.daddr)) hlimit = np->mcast_hops; else hlimit = np->hop_limit; @@ -495,14 +495,14 @@ void icmpv6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info) err = ip6_append_data(sk, icmpv6_getfrag, &msg, len + sizeof(struct icmp6hdr), sizeof(struct icmp6hdr), hlimit, - np->tclass, NULL, &fl, (struct rt6_info*)dst, + np->tclass, NULL, &fl6, (struct rt6_info*)dst, MSG_DONTWAIT, np->dontfrag); if (err) { ICMP6_INC_STATS_BH(net, idev, ICMP6_MIB_OUTERRORS); ip6_flush_pending_frames(sk); goto out_put; } - err = icmpv6_push_pending_frames(sk, &fl, &tmp_hdr, len + sizeof(struct icmp6hdr)); + err = icmpv6_push_pending_frames(sk, &fl6, &tmp_hdr, len + sizeof(struct icmp6hdr)); out_put: if (likely(idev != NULL)) @@ -524,7 +524,7 @@ static void icmpv6_echo_reply(struct sk_buff *skb) struct in6_addr *saddr = NULL; struct icmp6hdr *icmph = icmp6_hdr(skb); struct icmp6hdr tmp_hdr; - struct flowi fl; + struct flowi6 fl6; struct icmpv6_msg msg; struct dst_entry *dst; int err = 0; @@ -538,31 +538,31 @@ static void icmpv6_echo_reply(struct sk_buff *skb) memcpy(&tmp_hdr, icmph, sizeof(tmp_hdr)); tmp_hdr.icmp6_type = ICMPV6_ECHO_REPLY; - memset(&fl, 0, sizeof(fl)); - fl.proto = IPPROTO_ICMPV6; - ipv6_addr_copy(&fl.fl6_dst, &ipv6_hdr(skb)->saddr); + memset(&fl6, 0, sizeof(fl6)); + fl6.flowi6_proto = IPPROTO_ICMPV6; + ipv6_addr_copy(&fl6.daddr, &ipv6_hdr(skb)->saddr); if (saddr) - ipv6_addr_copy(&fl.fl6_src, saddr); - fl.oif = skb->dev->ifindex; - fl.fl_icmp_type = ICMPV6_ECHO_REPLY; - security_skb_classify_flow(skb, &fl); + ipv6_addr_copy(&fl6.saddr, saddr); + fl6.flowi6_oif = skb->dev->ifindex; + fl6.fl6_icmp_type = ICMPV6_ECHO_REPLY; + security_skb_classify_flow(skb, flowi6_to_flowi(&fl6)); sk = icmpv6_xmit_lock(net); if (sk == NULL) return; np = inet6_sk(sk); - if (!fl.oif && ipv6_addr_is_multicast(&fl.fl6_dst)) - fl.oif = np->mcast_oif; + if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr)) + fl6.flowi6_oif = np->mcast_oif; - err = ip6_dst_lookup(sk, &dst, &fl); + err = ip6_dst_lookup(sk, &dst, &fl6); if (err) goto out; - dst = xfrm_lookup(net, dst, &fl, sk, 0); + dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), sk, 0); if (IS_ERR(dst)) goto out; - if (ipv6_addr_is_multicast(&fl.fl6_dst)) + if (ipv6_addr_is_multicast(&fl6.daddr)) hlimit = np->mcast_hops; else hlimit = np->hop_limit; @@ -576,7 +576,7 @@ static void icmpv6_echo_reply(struct sk_buff *skb) msg.type = ICMPV6_ECHO_REPLY; err = ip6_append_data(sk, icmpv6_getfrag, &msg, skb->len + sizeof(struct icmp6hdr), - sizeof(struct icmp6hdr), hlimit, np->tclass, NULL, &fl, + sizeof(struct icmp6hdr), hlimit, np->tclass, NULL, &fl6, (struct rt6_info*)dst, MSG_DONTWAIT, np->dontfrag); @@ -585,7 +585,7 @@ static void icmpv6_echo_reply(struct sk_buff *skb) ip6_flush_pending_frames(sk); goto out_put; } - err = icmpv6_push_pending_frames(sk, &fl, &tmp_hdr, skb->len + sizeof(struct icmp6hdr)); + err = icmpv6_push_pending_frames(sk, &fl6, &tmp_hdr, skb->len + sizeof(struct icmp6hdr)); out_put: if (likely(idev != NULL)) @@ -784,20 +784,20 @@ drop_no_count: return 0; } -void icmpv6_flow_init(struct sock *sk, struct flowi *fl, +void icmpv6_flow_init(struct sock *sk, struct flowi6 *fl6, u8 type, const struct in6_addr *saddr, const struct in6_addr *daddr, int oif) { - memset(fl, 0, sizeof(*fl)); - ipv6_addr_copy(&fl->fl6_src, saddr); - ipv6_addr_copy(&fl->fl6_dst, daddr); - fl->proto = IPPROTO_ICMPV6; - fl->fl_icmp_type = type; - fl->fl_icmp_code = 0; - fl->oif = oif; - security_sk_classify_flow(sk, fl); + memset(fl6, 0, sizeof(*fl6)); + ipv6_addr_copy(&fl6->saddr, saddr); + ipv6_addr_copy(&fl6->daddr, daddr); + fl6->flowi6_proto = IPPROTO_ICMPV6; + fl6->fl6_icmp_type = type; + fl6->fl6_icmp_code = 0; + fl6->flowi6_oif = oif; + security_sk_classify_flow(sk, flowi6_to_flowi(fl6)); } /* diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c index d687e1397333..166054650466 100644 --- a/net/ipv6/inet6_connection_sock.c +++ b/net/ipv6/inet6_connection_sock.c @@ -61,20 +61,20 @@ struct dst_entry *inet6_csk_route_req(struct sock *sk, struct ipv6_pinfo *np = inet6_sk(sk); struct in6_addr *final_p, final; struct dst_entry *dst; - struct flowi fl; - - memset(&fl, 0, sizeof(fl)); - fl.proto = IPPROTO_TCP; - ipv6_addr_copy(&fl.fl6_dst, &treq->rmt_addr); - final_p = fl6_update_dst(&fl, np->opt, &final); - ipv6_addr_copy(&fl.fl6_src, &treq->loc_addr); - fl.oif = sk->sk_bound_dev_if; - fl.mark = sk->sk_mark; - fl.fl_ip_dport = inet_rsk(req)->rmt_port; - fl.fl_ip_sport = inet_rsk(req)->loc_port; - security_req_classify_flow(req, &fl); - - dst = ip6_dst_lookup_flow(sk, &fl, final_p, false); + struct flowi6 fl6; + + memset(&fl6, 0, sizeof(fl6)); + fl6.flowi6_proto = IPPROTO_TCP; + ipv6_addr_copy(&fl6.daddr, &treq->rmt_addr); + final_p = fl6_update_dst(&fl6, np->opt, &final); + ipv6_addr_copy(&fl6.saddr, &treq->loc_addr); + fl6.flowi6_oif = sk->sk_bound_dev_if; + fl6.flowi6_mark = sk->sk_mark; + fl6.fl6_dport = inet_rsk(req)->rmt_port; + fl6.fl6_sport = inet_rsk(req)->loc_port; + security_req_classify_flow(req, flowi6_to_flowi(&fl6)); + + dst = ip6_dst_lookup_flow(sk, &fl6, final_p, false); if (IS_ERR(dst)) return NULL; @@ -208,28 +208,28 @@ int inet6_csk_xmit(struct sk_buff *skb) struct sock *sk = skb->sk; struct inet_sock *inet = inet_sk(sk); struct ipv6_pinfo *np = inet6_sk(sk); - struct flowi fl; + struct flowi6 fl6; struct dst_entry *dst; struct in6_addr *final_p, final; - memset(&fl, 0, sizeof(fl)); - fl.proto = sk->sk_protocol; - ipv6_addr_copy(&fl.fl6_dst, &np->daddr); - ipv6_addr_copy(&fl.fl6_src, &np->saddr); - fl.fl6_flowlabel = np->flow_label; - IP6_ECN_flow_xmit(sk, fl.fl6_flowlabel); - fl.oif = sk->sk_bound_dev_if; - fl.mark = sk->sk_mark; - fl.fl_ip_sport = inet->inet_sport; - fl.fl_ip_dport = inet->inet_dport; - security_sk_classify_flow(sk, &fl); + memset(&fl6, 0, sizeof(fl6)); + fl6.flowi6_proto = sk->sk_protocol; + ipv6_addr_copy(&fl6.daddr, &np->daddr); + ipv6_addr_copy(&fl6.saddr, &np->saddr); + fl6.flowlabel = np->flow_label; + IP6_ECN_flow_xmit(sk, fl6.flowlabel); + fl6.flowi6_oif = sk->sk_bound_dev_if; + fl6.flowi6_mark = sk->sk_mark; + fl6.fl6_sport = inet->inet_sport; + fl6.fl6_dport = inet->inet_dport; + security_sk_classify_flow(sk, flowi6_to_flowi(&fl6)); - final_p = fl6_update_dst(&fl, np->opt, &final); + final_p = fl6_update_dst(&fl6, np->opt, &final); dst = __inet6_csk_dst_check(sk, np->dst_cookie); if (dst == NULL) { - dst = ip6_dst_lookup_flow(sk, &fl, final_p, false); + dst = ip6_dst_lookup_flow(sk, &fl6, final_p, false); if (IS_ERR(dst)) { sk->sk_err_soft = -PTR_ERR(dst); @@ -244,9 +244,9 @@ int inet6_csk_xmit(struct sk_buff *skb) skb_dst_set(skb, dst_clone(dst)); /* Restore final destination back after routing done */ - ipv6_addr_copy(&fl.fl6_dst, &np->daddr); + ipv6_addr_copy(&fl6.daddr, &np->daddr); - return ip6_xmit(sk, skb, &fl, np->opt); + return ip6_xmit(sk, skb, &fl6, np->opt); } EXPORT_SYMBOL_GPL(inet6_csk_xmit); diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c index 633a6c266136..b53197233709 100644 --- a/net/ipv6/inet6_hashtables.c +++ b/net/ipv6/inet6_hashtables.c @@ -124,7 +124,7 @@ out: } EXPORT_SYMBOL(__inet6_lookup_established); -static int inline compute_score(struct sock *sk, struct net *net, +static inline int compute_score(struct sock *sk, struct net *net, const unsigned short hnum, const struct in6_addr *daddr, const int dif) diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index de382114609b..7548905e79e1 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -260,10 +260,10 @@ struct fib6_table *fib6_get_table(struct net *net, u32 id) return net->ipv6.fib6_main_tbl; } -struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi *fl, +struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi6 *fl6, int flags, pol_lookup_t lookup) { - return (struct dst_entry *) lookup(net, net->ipv6.fib6_main_tbl, fl, flags); + return (struct dst_entry *) lookup(net, net->ipv6.fib6_main_tbl, fl6, flags); } static void __net_init fib6_tables_init(struct net *net) diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c index 13654686aeab..f3caf1b8d572 100644 --- a/net/ipv6/ip6_flowlabel.c +++ b/net/ipv6/ip6_flowlabel.c @@ -342,7 +342,7 @@ fl_create(struct net *net, struct in6_flowlabel_req *freq, char __user *optval, if (olen > 0) { struct msghdr msg; - struct flowi flowi; + struct flowi6 flowi6; int junk; err = -ENOMEM; @@ -358,9 +358,9 @@ fl_create(struct net *net, struct in6_flowlabel_req *freq, char __user *optval, msg.msg_controllen = olen; msg.msg_control = (void*)(fl->opt+1); - flowi.oif = 0; + memset(&flowi6, 0, sizeof(flowi6)); - err = datagram_send_ctl(net, &msg, &flowi, fl->opt, &junk, + err = datagram_send_ctl(net, &msg, &flowi6, fl->opt, &junk, &junk, &junk); if (err) goto done; diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index adaffaf84555..18208876aa8a 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -174,15 +174,15 @@ int ip6_output(struct sk_buff *skb) * xmit an sk_buff (used by TCP, SCTP and DCCP) */ -int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl, +int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6, struct ipv6_txoptions *opt) { struct net *net = sock_net(sk); struct ipv6_pinfo *np = inet6_sk(sk); - struct in6_addr *first_hop = &fl->fl6_dst; + struct in6_addr *first_hop = &fl6->daddr; struct dst_entry *dst = skb_dst(skb); struct ipv6hdr *hdr; - u8 proto = fl->proto; + u8 proto = fl6->flowi6_proto; int seg_len = skb->len; int hlimit = -1; int tclass = 0; @@ -230,13 +230,13 @@ int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl, if (hlimit < 0) hlimit = ip6_dst_hoplimit(dst); - *(__be32 *)hdr = htonl(0x60000000 | (tclass << 20)) | fl->fl6_flowlabel; + *(__be32 *)hdr = htonl(0x60000000 | (tclass << 20)) | fl6->flowlabel; hdr->payload_len = htons(seg_len); hdr->nexthdr = proto; hdr->hop_limit = hlimit; - ipv6_addr_copy(&hdr->saddr, &fl->fl6_src); + ipv6_addr_copy(&hdr->saddr, &fl6->saddr); ipv6_addr_copy(&hdr->daddr, first_hop); skb->priority = sk->sk_priority; @@ -879,7 +879,7 @@ static inline int ip6_rt_check(struct rt6key *rt_key, static struct dst_entry *ip6_sk_dst_check(struct sock *sk, struct dst_entry *dst, - struct flowi *fl) + struct flowi6 *fl6) { struct ipv6_pinfo *np = inet6_sk(sk); struct rt6_info *rt = (struct rt6_info *)dst; @@ -904,11 +904,11 @@ static struct dst_entry *ip6_sk_dst_check(struct sock *sk, * sockets. * 2. oif also should be the same. */ - if (ip6_rt_check(&rt->rt6i_dst, &fl->fl6_dst, np->daddr_cache) || + if (ip6_rt_check(&rt->rt6i_dst, &fl6->daddr, np->daddr_cache) || #ifdef CONFIG_IPV6_SUBTREES - ip6_rt_check(&rt->rt6i_src, &fl->fl6_src, np->saddr_cache) || + ip6_rt_check(&rt->rt6i_src, &fl6->saddr, np->saddr_cache) || #endif - (fl->oif && fl->oif != dst->dev->ifindex)) { + (fl6->flowi6_oif && fl6->flowi6_oif != dst->dev->ifindex)) { dst_release(dst); dst = NULL; } @@ -918,22 +918,22 @@ out: } static int ip6_dst_lookup_tail(struct sock *sk, - struct dst_entry **dst, struct flowi *fl) + struct dst_entry **dst, struct flowi6 *fl6) { int err; struct net *net = sock_net(sk); if (*dst == NULL) - *dst = ip6_route_output(net, sk, fl); + *dst = ip6_route_output(net, sk, fl6); if ((err = (*dst)->error)) goto out_err_release; - if (ipv6_addr_any(&fl->fl6_src)) { + if (ipv6_addr_any(&fl6->saddr)) { err = ipv6_dev_get_saddr(net, ip6_dst_idev(*dst)->dev, - &fl->fl6_dst, + &fl6->daddr, sk ? inet6_sk(sk)->srcprefs : 0, - &fl->fl6_src); + &fl6->saddr); if (err) goto out_err_release; } @@ -949,10 +949,10 @@ static int ip6_dst_lookup_tail(struct sock *sk, */ if ((*dst)->neighbour && !((*dst)->neighbour->nud_state & NUD_VALID)) { struct inet6_ifaddr *ifp; - struct flowi fl_gw; + struct flowi6 fl_gw6; int redirect; - ifp = ipv6_get_ifaddr(net, &fl->fl6_src, + ifp = ipv6_get_ifaddr(net, &fl6->saddr, (*dst)->dev, 1); redirect = (ifp && ifp->flags & IFA_F_OPTIMISTIC); @@ -965,9 +965,9 @@ static int ip6_dst_lookup_tail(struct sock *sk, * default router instead */ dst_release(*dst); - memcpy(&fl_gw, fl, sizeof(struct flowi)); - memset(&fl_gw.fl6_dst, 0, sizeof(struct in6_addr)); - *dst = ip6_route_output(net, sk, &fl_gw); + memcpy(&fl_gw6, fl6, sizeof(struct flowi6)); + memset(&fl_gw6.daddr, 0, sizeof(struct in6_addr)); + *dst = ip6_route_output(net, sk, &fl_gw6); if ((err = (*dst)->error)) goto out_err_release; } @@ -988,23 +988,23 @@ out_err_release: * ip6_dst_lookup - perform route lookup on flow * @sk: socket which provides route info * @dst: pointer to dst_entry * for result - * @fl: flow to lookup + * @fl6: flow to lookup * * This function performs a route lookup on the given flow. * * It returns zero on success, or a standard errno code on error. */ -int ip6_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi *fl) +int ip6_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi6 *fl6) { *dst = NULL; - return ip6_dst_lookup_tail(sk, dst, fl); + return ip6_dst_lookup_tail(sk, dst, fl6); } EXPORT_SYMBOL_GPL(ip6_dst_lookup); /** * ip6_dst_lookup_flow - perform route lookup on flow with ipsec * @sk: socket which provides route info - * @fl: flow to lookup + * @fl6: flow to lookup * @final_dst: final destination address for ipsec lookup * @can_sleep: we are in a sleepable context * @@ -1013,29 +1013,29 @@ EXPORT_SYMBOL_GPL(ip6_dst_lookup); * It returns a valid dst pointer on success, or a pointer encoded * error code. */ -struct dst_entry *ip6_dst_lookup_flow(struct sock *sk, struct flowi *fl, +struct dst_entry *ip6_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6, const struct in6_addr *final_dst, bool can_sleep) { struct dst_entry *dst = NULL; int err; - err = ip6_dst_lookup_tail(sk, &dst, fl); + err = ip6_dst_lookup_tail(sk, &dst, fl6); if (err) return ERR_PTR(err); if (final_dst) - ipv6_addr_copy(&fl->fl6_dst, final_dst); + ipv6_addr_copy(&fl6->daddr, final_dst); if (can_sleep) - fl->flags |= FLOWI_FLAG_CAN_SLEEP; + fl6->flowi6_flags |= FLOWI_FLAG_CAN_SLEEP; - return xfrm_lookup(sock_net(sk), dst, fl, sk, 0); + return xfrm_lookup(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0); } EXPORT_SYMBOL_GPL(ip6_dst_lookup_flow); /** * ip6_sk_dst_lookup_flow - perform socket cached route lookup on flow * @sk: socket which provides the dst cache and route info - * @fl: flow to lookup + * @fl6: flow to lookup * @final_dst: final destination address for ipsec lookup * @can_sleep: we are in a sleepable context * @@ -1047,24 +1047,24 @@ EXPORT_SYMBOL_GPL(ip6_dst_lookup_flow); * It returns a valid dst pointer on success, or a pointer encoded * error code. */ -struct dst_entry *ip6_sk_dst_lookup_flow(struct sock *sk, struct flowi *fl, +struct dst_entry *ip6_sk_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6, const struct in6_addr *final_dst, bool can_sleep) { struct dst_entry *dst = sk_dst_check(sk, inet6_sk(sk)->dst_cookie); int err; - dst = ip6_sk_dst_check(sk, dst, fl); + dst = ip6_sk_dst_check(sk, dst, fl6); - err = ip6_dst_lookup_tail(sk, &dst, fl); + err = ip6_dst_lookup_tail(sk, &dst, fl6); if (err) return ERR_PTR(err); if (final_dst) - ipv6_addr_copy(&fl->fl6_dst, final_dst); + ipv6_addr_copy(&fl6->daddr, final_dst); if (can_sleep) - fl->flags |= FLOWI_FLAG_CAN_SLEEP; + fl6->flowi6_flags |= FLOWI_FLAG_CAN_SLEEP; - return xfrm_lookup(sock_net(sk), dst, fl, sk, 0); + return xfrm_lookup(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0); } EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup_flow); @@ -1145,7 +1145,7 @@ static inline struct ipv6_rt_hdr *ip6_rthdr_dup(struct ipv6_rt_hdr *src, int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb), void *from, int length, int transhdrlen, - int hlimit, int tclass, struct ipv6_txoptions *opt, struct flowi *fl, + int hlimit, int tclass, struct ipv6_txoptions *opt, struct flowi6 *fl6, struct rt6_info *rt, unsigned int flags, int dontfrag) { struct inet_sock *inet = inet_sk(sk); @@ -1203,7 +1203,7 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, } dst_hold(&rt->dst); inet->cork.dst = &rt->dst; - inet->cork.fl = *fl; + inet->cork.fl.u.ip6 = *fl6; np->cork.hop_limit = hlimit; np->cork.tclass = tclass; mtu = np->pmtudisc == IPV6_PMTUDISC_PROBE ? @@ -1224,7 +1224,7 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, transhdrlen += exthdrlen; } else { rt = (struct rt6_info *)inet->cork.dst; - fl = &inet->cork.fl; + fl6 = &inet->cork.fl.u.ip6; opt = np->cork.opt; transhdrlen = 0; exthdrlen = 0; @@ -1239,7 +1239,7 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, if (mtu <= sizeof(struct ipv6hdr) + IPV6_MAXPLEN) { if (inet->cork.length + length > sizeof(struct ipv6hdr) + IPV6_MAXPLEN - fragheaderlen) { - ipv6_local_error(sk, EMSGSIZE, fl, mtu-exthdrlen); + ipv6_local_error(sk, EMSGSIZE, fl6, mtu-exthdrlen); return -EMSGSIZE; } } @@ -1271,7 +1271,7 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, if (length > mtu) { int proto = sk->sk_protocol; if (dontfrag && (proto == IPPROTO_UDP || proto == IPPROTO_RAW)){ - ipv6_local_rxpmtu(sk, fl, mtu-exthdrlen); + ipv6_local_rxpmtu(sk, fl6, mtu-exthdrlen); return -EMSGSIZE; } @@ -1516,8 +1516,8 @@ int ip6_push_pending_frames(struct sock *sk) struct ipv6hdr *hdr; struct ipv6_txoptions *opt = np->cork.opt; struct rt6_info *rt = (struct rt6_info *)inet->cork.dst; - struct flowi *fl = &inet->cork.fl; - unsigned char proto = fl->proto; + struct flowi6 *fl6 = &inet->cork.fl.u.ip6; + unsigned char proto = fl6->flowi6_proto; int err = 0; if ((skb = __skb_dequeue(&sk->sk_write_queue)) == NULL) @@ -1542,7 +1542,7 @@ int ip6_push_pending_frames(struct sock *sk) if (np->pmtudisc < IPV6_PMTUDISC_DO) skb->local_df = 1; - ipv6_addr_copy(final_dst, &fl->fl6_dst); + ipv6_addr_copy(final_dst, &fl6->daddr); __skb_pull(skb, skb_network_header_len(skb)); if (opt && opt->opt_flen) ipv6_push_frag_opts(skb, opt, &proto); @@ -1553,12 +1553,12 @@ int ip6_push_pending_frames(struct sock *sk) skb_reset_network_header(skb); hdr = ipv6_hdr(skb); - *(__be32*)hdr = fl->fl6_flowlabel | + *(__be32*)hdr = fl6->flowlabel | htonl(0x60000000 | ((int)np->cork.tclass << 20)); hdr->hop_limit = np->cork.hop_limit; hdr->nexthdr = proto; - ipv6_addr_copy(&hdr->saddr, &fl->fl6_src); + ipv6_addr_copy(&hdr->saddr, &fl6->saddr); ipv6_addr_copy(&hdr->daddr, final_dst); skb->priority = sk->sk_priority; diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 02730ef26b0f..c1b1bd312df2 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -57,6 +57,7 @@ MODULE_AUTHOR("Ville Nuorvala"); MODULE_DESCRIPTION("IPv6 tunneling device"); MODULE_LICENSE("GPL"); +MODULE_ALIAS_NETDEV("ip6tnl0"); #ifdef IP6_TNL_DEBUG #define IP6_TNL_TRACE(x...) printk(KERN_DEBUG "%s:" x "\n", __func__) @@ -535,7 +536,6 @@ ip4ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, int err; struct sk_buff *skb2; struct iphdr *eiph; - struct flowi fl; struct rtable *rt; err = ip6_tnl_err(skb, IPPROTO_IPIP, opt, &rel_type, &rel_code, @@ -577,11 +577,10 @@ ip4ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, eiph = ip_hdr(skb2); /* Try to guess incoming interface */ - memset(&fl, 0, sizeof(fl)); - fl.fl4_dst = eiph->saddr; - fl.fl4_tos = RT_TOS(eiph->tos); - fl.proto = IPPROTO_IPIP; - rt = ip_route_output_key(dev_net(skb->dev), &fl); + rt = ip_route_output_ports(dev_net(skb->dev), NULL, + eiph->saddr, 0, + 0, 0, + IPPROTO_IPIP, RT_TOS(eiph->tos), 0); if (IS_ERR(rt)) goto out; @@ -591,10 +590,11 @@ ip4ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, if (rt->rt_flags & RTCF_LOCAL) { ip_rt_put(rt); rt = NULL; - fl.fl4_dst = eiph->daddr; - fl.fl4_src = eiph->saddr; - fl.fl4_tos = eiph->tos; - rt = ip_route_output_key(dev_net(skb->dev), &fl); + rt = ip_route_output_ports(dev_net(skb->dev), NULL, + eiph->daddr, eiph->saddr, + 0, 0, + IPPROTO_IPIP, + RT_TOS(eiph->tos), 0); if (IS_ERR(rt) || rt->dst.dev->type != ARPHRD_TUNNEL) { if (!IS_ERR(rt)) @@ -884,7 +884,7 @@ static inline int ip6_tnl_xmit_ctl(struct ip6_tnl *t) static int ip6_tnl_xmit2(struct sk_buff *skb, struct net_device *dev, __u8 dsfield, - struct flowi *fl, + struct flowi6 *fl6, int encap_limit, __u32 *pmtu) { @@ -904,11 +904,11 @@ static int ip6_tnl_xmit2(struct sk_buff *skb, if ((dst = ip6_tnl_dst_check(t)) != NULL) dst_hold(dst); else { - dst = ip6_route_output(net, NULL, fl); + dst = ip6_route_output(net, NULL, fl6); if (dst->error) goto tx_err_link_failure; - dst = xfrm_lookup(net, dst, fl, NULL, 0); + dst = xfrm_lookup(net, dst, flowi6_to_flowi(fl6), NULL, 0); if (IS_ERR(dst)) { err = PTR_ERR(dst); dst = NULL; @@ -963,7 +963,7 @@ static int ip6_tnl_xmit2(struct sk_buff *skb, skb->transport_header = skb->network_header; - proto = fl->proto; + proto = fl6->flowi6_proto; if (encap_limit >= 0) { init_tel_txopt(&opt, encap_limit); ipv6_push_nfrag_opts(skb, &opt.ops, &proto, NULL); @@ -971,13 +971,13 @@ static int ip6_tnl_xmit2(struct sk_buff *skb, skb_push(skb, sizeof(struct ipv6hdr)); skb_reset_network_header(skb); ipv6h = ipv6_hdr(skb); - *(__be32*)ipv6h = fl->fl6_flowlabel | htonl(0x60000000); + *(__be32*)ipv6h = fl6->flowlabel | htonl(0x60000000); dsfield = INET_ECN_encapsulate(0, dsfield); ipv6_change_dsfield(ipv6h, ~INET_ECN_MASK, dsfield); ipv6h->hop_limit = t->parms.hop_limit; ipv6h->nexthdr = proto; - ipv6_addr_copy(&ipv6h->saddr, &fl->fl6_src); - ipv6_addr_copy(&ipv6h->daddr, &fl->fl6_dst); + ipv6_addr_copy(&ipv6h->saddr, &fl6->saddr); + ipv6_addr_copy(&ipv6h->daddr, &fl6->daddr); nf_reset(skb); pkt_len = skb->len; err = ip6_local_out(skb); @@ -1007,7 +1007,7 @@ ip4ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev) struct ip6_tnl *t = netdev_priv(dev); struct iphdr *iph = ip_hdr(skb); int encap_limit = -1; - struct flowi fl; + struct flowi6 fl6; __u8 dsfield; __u32 mtu; int err; @@ -1019,16 +1019,16 @@ ip4ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev) if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT)) encap_limit = t->parms.encap_limit; - memcpy(&fl, &t->fl, sizeof (fl)); - fl.proto = IPPROTO_IPIP; + memcpy(&fl6, &t->fl.u.ip6, sizeof (fl6)); + fl6.flowi6_proto = IPPROTO_IPIP; dsfield = ipv4_get_dsfield(iph); if ((t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS)) - fl.fl6_flowlabel |= htonl((__u32)iph->tos << IPV6_TCLASS_SHIFT) + fl6.flowlabel |= htonl((__u32)iph->tos << IPV6_TCLASS_SHIFT) & IPV6_TCLASS_MASK; - err = ip6_tnl_xmit2(skb, dev, dsfield, &fl, encap_limit, &mtu); + err = ip6_tnl_xmit2(skb, dev, dsfield, &fl6, encap_limit, &mtu); if (err != 0) { /* XXX: send ICMP error even if DF is not set. */ if (err == -EMSGSIZE) @@ -1047,7 +1047,7 @@ ip6ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev) struct ipv6hdr *ipv6h = ipv6_hdr(skb); int encap_limit = -1; __u16 offset; - struct flowi fl; + struct flowi6 fl6; __u8 dsfield; __u32 mtu; int err; @@ -1069,16 +1069,16 @@ ip6ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev) } else if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT)) encap_limit = t->parms.encap_limit; - memcpy(&fl, &t->fl, sizeof (fl)); - fl.proto = IPPROTO_IPV6; + memcpy(&fl6, &t->fl.u.ip6, sizeof (fl6)); + fl6.flowi6_proto = IPPROTO_IPV6; dsfield = ipv6_get_dsfield(ipv6h); if ((t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS)) - fl.fl6_flowlabel |= (*(__be32 *) ipv6h & IPV6_TCLASS_MASK); + fl6.flowlabel |= (*(__be32 *) ipv6h & IPV6_TCLASS_MASK); if ((t->parms.flags & IP6_TNL_F_USE_ORIG_FLOWLABEL)) - fl.fl6_flowlabel |= (*(__be32 *) ipv6h & IPV6_FLOWLABEL_MASK); + fl6.flowlabel |= (*(__be32 *) ipv6h & IPV6_FLOWLABEL_MASK); - err = ip6_tnl_xmit2(skb, dev, dsfield, &fl, encap_limit, &mtu); + err = ip6_tnl_xmit2(skb, dev, dsfield, &fl6, encap_limit, &mtu); if (err != 0) { if (err == -EMSGSIZE) icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); @@ -1141,21 +1141,21 @@ static void ip6_tnl_link_config(struct ip6_tnl *t) { struct net_device *dev = t->dev; struct ip6_tnl_parm *p = &t->parms; - struct flowi *fl = &t->fl; + struct flowi6 *fl6 = &t->fl.u.ip6; memcpy(dev->dev_addr, &p->laddr, sizeof(struct in6_addr)); memcpy(dev->broadcast, &p->raddr, sizeof(struct in6_addr)); /* Set up flowi template */ - ipv6_addr_copy(&fl->fl6_src, &p->laddr); - ipv6_addr_copy(&fl->fl6_dst, &p->raddr); - fl->oif = p->link; - fl->fl6_flowlabel = 0; + ipv6_addr_copy(&fl6->saddr, &p->laddr); + ipv6_addr_copy(&fl6->daddr, &p->raddr); + fl6->flowi6_oif = p->link; + fl6->flowlabel = 0; if (!(p->flags&IP6_TNL_F_USE_ORIG_TCLASS)) - fl->fl6_flowlabel |= IPV6_TCLASS_MASK & p->flowinfo; + fl6->flowlabel |= IPV6_TCLASS_MASK & p->flowinfo; if (!(p->flags&IP6_TNL_F_USE_ORIG_FLOWLABEL)) - fl->fl6_flowlabel |= IPV6_FLOWLABEL_MASK & p->flowinfo; + fl6->flowlabel |= IPV6_FLOWLABEL_MASK & p->flowinfo; ip6_tnl_set_cap(t); diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index 618f67ccda31..7ff0343e05c7 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -135,14 +135,15 @@ static struct mr6_table *ip6mr_get_table(struct net *net, u32 id) return NULL; } -static int ip6mr_fib_lookup(struct net *net, struct flowi *flp, +static int ip6mr_fib_lookup(struct net *net, struct flowi6 *flp6, struct mr6_table **mrt) { struct ip6mr_result res; struct fib_lookup_arg arg = { .result = &res, }; int err; - err = fib_rules_lookup(net->ipv6.mr6_rules_ops, flp, 0, &arg); + err = fib_rules_lookup(net->ipv6.mr6_rules_ops, + flowi6_to_flowi(flp6), 0, &arg); if (err < 0) return err; *mrt = res.mrt; @@ -270,7 +271,7 @@ static struct mr6_table *ip6mr_get_table(struct net *net, u32 id) return net->ipv6.mrt6; } -static int ip6mr_fib_lookup(struct net *net, struct flowi *flp, +static int ip6mr_fib_lookup(struct net *net, struct flowi6 *flp6, struct mr6_table **mrt) { *mrt = net->ipv6.mrt6; @@ -617,9 +618,9 @@ static int pim6_rcv(struct sk_buff *skb) struct net_device *reg_dev = NULL; struct net *net = dev_net(skb->dev); struct mr6_table *mrt; - struct flowi fl = { - .iif = skb->dev->ifindex, - .mark = skb->mark, + struct flowi6 fl6 = { + .flowi6_iif = skb->dev->ifindex, + .flowi6_mark = skb->mark, }; int reg_vif_num; @@ -644,7 +645,7 @@ static int pim6_rcv(struct sk_buff *skb) ntohs(encap->payload_len) + sizeof(*pim) > skb->len) goto drop; - if (ip6mr_fib_lookup(net, &fl, &mrt) < 0) + if (ip6mr_fib_lookup(net, &fl6, &mrt) < 0) goto drop; reg_vif_num = mrt->mroute_reg_vif_num; @@ -687,14 +688,14 @@ static netdev_tx_t reg_vif_xmit(struct sk_buff *skb, { struct net *net = dev_net(dev); struct mr6_table *mrt; - struct flowi fl = { - .oif = dev->ifindex, - .iif = skb->skb_iif, - .mark = skb->mark, + struct flowi6 fl6 = { + .flowi6_oif = dev->ifindex, + .flowi6_iif = skb->skb_iif, + .flowi6_mark = skb->mark, }; int err; - err = ip6mr_fib_lookup(net, &fl, &mrt); + err = ip6mr_fib_lookup(net, &fl6, &mrt); if (err < 0) return err; @@ -1547,13 +1548,13 @@ int ip6mr_sk_done(struct sock *sk) struct sock *mroute6_socket(struct net *net, struct sk_buff *skb) { struct mr6_table *mrt; - struct flowi fl = { - .iif = skb->skb_iif, - .oif = skb->dev->ifindex, - .mark = skb->mark, + struct flowi6 fl6 = { + .flowi6_iif = skb->skb_iif, + .flowi6_oif = skb->dev->ifindex, + .flowi6_mark = skb->mark, }; - if (ip6mr_fib_lookup(net, &fl, &mrt) < 0) + if (ip6mr_fib_lookup(net, &fl6, &mrt) < 0) return NULL; return mrt->mroute6_sk; @@ -1897,7 +1898,7 @@ static int ip6mr_forward2(struct net *net, struct mr6_table *mrt, struct mif_device *vif = &mrt->vif6_table[vifi]; struct net_device *dev; struct dst_entry *dst; - struct flowi fl; + struct flowi6 fl6; if (vif->dev == NULL) goto out_free; @@ -1915,12 +1916,12 @@ static int ip6mr_forward2(struct net *net, struct mr6_table *mrt, ipv6h = ipv6_hdr(skb); - fl = (struct flowi) { - .oif = vif->link, - .fl6_dst = ipv6h->daddr, + fl6 = (struct flowi6) { + .flowi6_oif = vif->link, + .daddr = ipv6h->daddr, }; - dst = ip6_route_output(net, NULL, &fl); + dst = ip6_route_output(net, NULL, &fl6); if (!dst) goto out_free; @@ -2043,13 +2044,13 @@ int ip6_mr_input(struct sk_buff *skb) struct mfc6_cache *cache; struct net *net = dev_net(skb->dev); struct mr6_table *mrt; - struct flowi fl = { - .iif = skb->dev->ifindex, - .mark = skb->mark, + struct flowi6 fl6 = { + .flowi6_iif = skb->dev->ifindex, + .flowi6_mark = skb->mark, }; int err; - err = ip6mr_fib_lookup(net, &fl, &mrt); + err = ip6mr_fib_lookup(net, &fl6, &mrt); if (err < 0) return err; diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index d1770e061c08..9cb191ecaba8 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -444,12 +444,12 @@ sticky_done: { struct ipv6_txoptions *opt = NULL; struct msghdr msg; - struct flowi fl; + struct flowi6 fl6; int junk; - fl.fl6_flowlabel = 0; - fl.oif = sk->sk_bound_dev_if; - fl.mark = sk->sk_mark; + memset(&fl6, 0, sizeof(fl6)); + fl6.flowi6_oif = sk->sk_bound_dev_if; + fl6.flowi6_mark = sk->sk_mark; if (optlen == 0) goto update; @@ -475,7 +475,7 @@ sticky_done: msg.msg_controllen = optlen; msg.msg_control = (void*)(opt+1); - retv = datagram_send_ctl(net, &msg, &fl, opt, &junk, &junk, + retv = datagram_send_ctl(net, &msg, &fl6, opt, &junk, &junk, &junk); if (retv) goto done; diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index f2c9b6930ffc..76b893771e6e 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -1396,7 +1396,7 @@ static void mld_sendpack(struct sk_buff *skb) struct inet6_dev *idev; struct net *net = dev_net(skb->dev); int err; - struct flowi fl; + struct flowi6 fl6; struct dst_entry *dst; rcu_read_lock(); @@ -1419,11 +1419,11 @@ static void mld_sendpack(struct sk_buff *skb) goto err_out; } - icmpv6_flow_init(net->ipv6.igmp_sk, &fl, ICMPV6_MLD2_REPORT, + icmpv6_flow_init(net->ipv6.igmp_sk, &fl6, ICMPV6_MLD2_REPORT, &ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr, skb->dev->ifindex); - dst = xfrm_lookup(net, dst, &fl, NULL, 0); + dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), NULL, 0); err = 0; if (IS_ERR(dst)) { err = PTR_ERR(dst); @@ -1731,7 +1731,7 @@ static void igmp6_send(struct in6_addr *addr, struct net_device *dev, int type) u8 ra[8] = { IPPROTO_ICMPV6, 0, IPV6_TLV_ROUTERALERT, 2, 0, 0, IPV6_TLV_PADN, 0 }; - struct flowi fl; + struct flowi6 fl6; struct dst_entry *dst; if (type == ICMPV6_MGM_REDUCTION) @@ -1791,11 +1791,11 @@ static void igmp6_send(struct in6_addr *addr, struct net_device *dev, int type) goto err_out; } - icmpv6_flow_init(sk, &fl, type, + icmpv6_flow_init(sk, &fl6, type, &ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr, skb->dev->ifindex); - dst = xfrm_lookup(net, dst, &fl, NULL, 0); + dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), NULL, 0); if (IS_ERR(dst)) { err = PTR_ERR(dst); goto err_out; diff --git a/net/ipv6/mip6.c b/net/ipv6/mip6.c index f3e3ca938a54..9b210482fb05 100644 --- a/net/ipv6/mip6.c +++ b/net/ipv6/mip6.c @@ -208,14 +208,15 @@ static int mip6_destopt_reject(struct xfrm_state *x, struct sk_buff *skb, { struct net *net = xs_net(x); struct inet6_skb_parm *opt = (struct inet6_skb_parm *)skb->cb; + const struct flowi6 *fl6 = &fl->u.ip6; struct ipv6_destopt_hao *hao = NULL; struct xfrm_selector sel; int offset; struct timeval stamp; int err = 0; - if (unlikely(fl->proto == IPPROTO_MH && - fl->fl_mh_type <= IP6_MH_TYPE_MAX)) + if (unlikely(fl6->flowi6_proto == IPPROTO_MH && + fl6->fl6_mh_type <= IP6_MH_TYPE_MAX)) goto out; if (likely(opt->dsthao)) { @@ -240,14 +241,14 @@ static int mip6_destopt_reject(struct xfrm_state *x, struct sk_buff *skb, sizeof(sel.saddr)); sel.prefixlen_s = 128; sel.family = AF_INET6; - sel.proto = fl->proto; - sel.dport = xfrm_flowi_dport(fl); + sel.proto = fl6->flowi6_proto; + sel.dport = xfrm_flowi_dport(fl, &fl6->uli); if (sel.dport) sel.dport_mask = htons(~0); - sel.sport = xfrm_flowi_sport(fl); + sel.sport = xfrm_flowi_sport(fl, &fl6->uli); if (sel.sport) sel.sport_mask = htons(~0); - sel.ifindex = fl->oif; + sel.ifindex = fl6->flowi6_oif; err = km_report(net, IPPROTO_DSTOPTS, &sel, (hao ? (xfrm_address_t *)&hao->addr : NULL)); diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 9360d3be94f0..0e49c9db3c98 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -511,7 +511,7 @@ void ndisc_send_skb(struct sk_buff *skb, const struct in6_addr *saddr, struct icmp6hdr *icmp6h) { - struct flowi fl; + struct flowi6 fl6; struct dst_entry *dst; struct net *net = dev_net(dev); struct sock *sk = net->ipv6.ndisc_sk; @@ -521,7 +521,7 @@ void ndisc_send_skb(struct sk_buff *skb, type = icmp6h->icmp6_type; - icmpv6_flow_init(sk, &fl, type, saddr, daddr, dev->ifindex); + icmpv6_flow_init(sk, &fl6, type, saddr, daddr, dev->ifindex); dst = icmp6_dst_alloc(dev, neigh, daddr); if (!dst) { @@ -529,7 +529,7 @@ void ndisc_send_skb(struct sk_buff *skb, return; } - dst = xfrm_lookup(net, dst, &fl, NULL, 0); + dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), NULL, 0); if (IS_ERR(dst)) { kfree_skb(skb); return; @@ -1515,7 +1515,7 @@ void ndisc_send_redirect(struct sk_buff *skb, struct neighbour *neigh, struct rt6_info *rt; struct dst_entry *dst; struct inet6_dev *idev; - struct flowi fl; + struct flowi6 fl6; u8 *opt; int rd_len; int err; @@ -1535,14 +1535,14 @@ void ndisc_send_redirect(struct sk_buff *skb, struct neighbour *neigh, return; } - icmpv6_flow_init(sk, &fl, NDISC_REDIRECT, + icmpv6_flow_init(sk, &fl6, NDISC_REDIRECT, &saddr_buf, &ipv6_hdr(skb)->saddr, dev->ifindex); - dst = ip6_route_output(net, NULL, &fl); + dst = ip6_route_output(net, NULL, &fl6); if (dst == NULL) return; - dst = xfrm_lookup(net, dst, &fl, NULL, 0); + dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), NULL, 0); if (IS_ERR(dst)) return; diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c index 8d74116ae27d..39aaca2b4fd2 100644 --- a/net/ipv6/netfilter.c +++ b/net/ipv6/netfilter.c @@ -15,14 +15,14 @@ int ip6_route_me_harder(struct sk_buff *skb) struct net *net = dev_net(skb_dst(skb)->dev); struct ipv6hdr *iph = ipv6_hdr(skb); struct dst_entry *dst; - struct flowi fl = { - .oif = skb->sk ? skb->sk->sk_bound_dev_if : 0, - .mark = skb->mark, - .fl6_dst = iph->daddr, - .fl6_src = iph->saddr, + struct flowi6 fl6 = { + .flowi6_oif = skb->sk ? skb->sk->sk_bound_dev_if : 0, + .flowi6_mark = skb->mark, + .daddr = iph->daddr, + .saddr = iph->saddr, }; - dst = ip6_route_output(net, skb->sk, &fl); + dst = ip6_route_output(net, skb->sk, &fl6); if (dst->error) { IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES); LIMIT_NETDEBUG(KERN_DEBUG "ip6_route_me_harder: No more route.\n"); @@ -37,9 +37,9 @@ int ip6_route_me_harder(struct sk_buff *skb) #ifdef CONFIG_XFRM if (!(IP6CB(skb)->flags & IP6SKB_XFRM_TRANSFORMED) && - xfrm_decode_session(skb, &fl, AF_INET6) == 0) { + xfrm_decode_session(skb, flowi6_to_flowi(&fl6), AF_INET6) == 0) { skb_dst_set(skb, NULL); - dst = xfrm_lookup(net, dst, &fl, skb->sk, 0); + dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), skb->sk, 0); if (IS_ERR(dst)) return -1; skb_dst_set(skb, dst); @@ -92,7 +92,7 @@ static int nf_ip6_reroute(struct sk_buff *skb, static int nf_ip6_route(struct dst_entry **dst, struct flowi *fl) { - *dst = ip6_route_output(&init_net, NULL, fl); + *dst = ip6_route_output(&init_net, NULL, &fl->u.ip6); return (*dst)->error; } diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index 47b7b8df7fac..0b2af9b85cec 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -410,7 +410,7 @@ ip6t_do_table(struct sk_buff *skb, verdict = (unsigned)(-v) - 1; break; } - if (*stackptr == 0) + if (*stackptr <= origptr) e = get_entry(table_base, private->underflow[hook]); else @@ -441,8 +441,8 @@ ip6t_do_table(struct sk_buff *skb, break; } while (!acpar.hotdrop); - xt_info_rdunlock_bh(); *stackptr = origptr; + xt_info_rdunlock_bh(); #ifdef DEBUG_ALLOW_ALL return NF_ACCEPT; @@ -1275,6 +1275,7 @@ do_replace(struct net *net, const void __user *user, unsigned int len) /* overflow check */ if (tmp.num_counters >= INT_MAX / sizeof(struct xt_counters)) return -ENOMEM; + tmp.name[sizeof(tmp.name)-1] = 0; newinfo = xt_alloc_table_info(tmp.size); if (!newinfo) @@ -1822,6 +1823,7 @@ compat_do_replace(struct net *net, void __user *user, unsigned int len) return -ENOMEM; if (tmp.num_counters >= INT_MAX / sizeof(struct xt_counters)) return -ENOMEM; + tmp.name[sizeof(tmp.name)-1] = 0; newinfo = xt_alloc_table_info(tmp.size); if (!newinfo) @@ -2051,6 +2053,7 @@ do_ip6t_get_ctl(struct sock *sk, int cmd, void __user *user, int *len) ret = -EFAULT; break; } + rev.name[sizeof(rev.name)-1] = 0; if (cmd == IP6T_SO_GET_REVISION_TARGET) target = 1; diff --git a/net/ipv6/netfilter/ip6t_REJECT.c b/net/ipv6/netfilter/ip6t_REJECT.c index 91f6a61cefab..28e74488a329 100644 --- a/net/ipv6/netfilter/ip6t_REJECT.c +++ b/net/ipv6/netfilter/ip6t_REJECT.c @@ -47,7 +47,7 @@ static void send_reset(struct net *net, struct sk_buff *oldskb) struct ipv6hdr *ip6h; struct dst_entry *dst = NULL; u8 proto; - struct flowi fl; + struct flowi6 fl6; if ((!(ipv6_addr_type(&oip6h->saddr) & IPV6_ADDR_UNICAST)) || (!(ipv6_addr_type(&oip6h->daddr) & IPV6_ADDR_UNICAST))) { @@ -89,19 +89,19 @@ static void send_reset(struct net *net, struct sk_buff *oldskb) return; } - memset(&fl, 0, sizeof(fl)); - fl.proto = IPPROTO_TCP; - ipv6_addr_copy(&fl.fl6_src, &oip6h->daddr); - ipv6_addr_copy(&fl.fl6_dst, &oip6h->saddr); - fl.fl_ip_sport = otcph.dest; - fl.fl_ip_dport = otcph.source; - security_skb_classify_flow(oldskb, &fl); - dst = ip6_route_output(net, NULL, &fl); + memset(&fl6, 0, sizeof(fl6)); + fl6.flowi6_proto = IPPROTO_TCP; + ipv6_addr_copy(&fl6.saddr, &oip6h->daddr); + ipv6_addr_copy(&fl6.daddr, &oip6h->saddr); + fl6.fl6_sport = otcph.dest; + fl6.fl6_dport = otcph.source; + security_skb_classify_flow(oldskb, flowi6_to_flowi(&fl6)); + dst = ip6_route_output(net, NULL, &fl6); if (dst == NULL || dst->error) { dst_release(dst); return; } - dst = xfrm_lookup(net, dst, &fl, NULL, 0); + dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), NULL, 0); if (IS_ERR(dst)) return; diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index dc29b07caf42..4a1c3b46c56b 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -524,7 +524,7 @@ csum_copy_err: goto out; } -static int rawv6_push_pending_frames(struct sock *sk, struct flowi *fl, +static int rawv6_push_pending_frames(struct sock *sk, struct flowi6 *fl6, struct raw6_sock *rp) { struct sk_buff *skb; @@ -586,11 +586,10 @@ static int rawv6_push_pending_frames(struct sock *sk, struct flowi *fl, if (unlikely(csum)) tmp_csum = csum_sub(tmp_csum, csum_unfold(csum)); - csum = csum_ipv6_magic(&fl->fl6_src, - &fl->fl6_dst, - total_len, fl->proto, tmp_csum); + csum = csum_ipv6_magic(&fl6->saddr, &fl6->daddr, + total_len, fl6->flowi6_proto, tmp_csum); - if (csum == 0 && fl->proto == IPPROTO_UDP) + if (csum == 0 && fl6->flowi6_proto == IPPROTO_UDP) csum = CSUM_MANGLED_0; if (skb_store_bits(skb, offset, &csum, 2)) @@ -603,7 +602,7 @@ out: } static int rawv6_send_hdrinc(struct sock *sk, void *from, int length, - struct flowi *fl, struct dst_entry **dstp, + struct flowi6 *fl6, struct dst_entry **dstp, unsigned int flags) { struct ipv6_pinfo *np = inet6_sk(sk); @@ -613,7 +612,7 @@ static int rawv6_send_hdrinc(struct sock *sk, void *from, int length, struct rt6_info *rt = (struct rt6_info *)*dstp; if (length > rt->dst.dev->mtu) { - ipv6_local_error(sk, EMSGSIZE, fl, rt->dst.dev->mtu); + ipv6_local_error(sk, EMSGSIZE, fl6, rt->dst.dev->mtu); return -EMSGSIZE; } if (flags&MSG_PROBE) @@ -662,7 +661,7 @@ error: return err; } -static int rawv6_probe_proto_opt(struct flowi *fl, struct msghdr *msg) +static int rawv6_probe_proto_opt(struct flowi6 *fl6, struct msghdr *msg) { struct iovec *iov; u8 __user *type = NULL; @@ -679,7 +678,7 @@ static int rawv6_probe_proto_opt(struct flowi *fl, struct msghdr *msg) if (!iov) continue; - switch (fl->proto) { + switch (fl6->flowi6_proto) { case IPPROTO_ICMPV6: /* check if one-byte field is readable or not. */ if (iov->iov_base && iov->iov_len < 1) @@ -694,8 +693,8 @@ static int rawv6_probe_proto_opt(struct flowi *fl, struct msghdr *msg) code = iov->iov_base; if (type && code) { - if (get_user(fl->fl_icmp_type, type) || - get_user(fl->fl_icmp_code, code)) + if (get_user(fl6->fl6_icmp_type, type) || + get_user(fl6->fl6_icmp_code, code)) return -EFAULT; probed = 1; } @@ -706,7 +705,7 @@ static int rawv6_probe_proto_opt(struct flowi *fl, struct msghdr *msg) /* check if type field is readable or not. */ if (iov->iov_len > 2 - len) { u8 __user *p = iov->iov_base; - if (get_user(fl->fl_mh_type, &p[2 - len])) + if (get_user(fl6->fl6_mh_type, &p[2 - len])) return -EFAULT; probed = 1; } else @@ -735,7 +734,7 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk, struct ipv6_txoptions *opt = NULL; struct ip6_flowlabel *flowlabel = NULL; struct dst_entry *dst = NULL; - struct flowi fl; + struct flowi6 fl6; int addr_len = msg->msg_namelen; int hlimit = -1; int tclass = -1; @@ -756,9 +755,9 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk, /* * Get and verify the address. */ - memset(&fl, 0, sizeof(fl)); + memset(&fl6, 0, sizeof(fl6)); - fl.mark = sk->sk_mark; + fl6.flowi6_mark = sk->sk_mark; if (sin6) { if (addr_len < SIN6_LEN_RFC2133) @@ -780,9 +779,9 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk, daddr = &sin6->sin6_addr; if (np->sndflow) { - fl.fl6_flowlabel = sin6->sin6_flowinfo&IPV6_FLOWINFO_MASK; - if (fl.fl6_flowlabel&IPV6_FLOWLABEL_MASK) { - flowlabel = fl6_sock_lookup(sk, fl.fl6_flowlabel); + fl6.flowlabel = sin6->sin6_flowinfo&IPV6_FLOWINFO_MASK; + if (fl6.flowlabel&IPV6_FLOWLABEL_MASK) { + flowlabel = fl6_sock_lookup(sk, fl6.flowlabel); if (flowlabel == NULL) return -EINVAL; daddr = &flowlabel->dst; @@ -800,32 +799,32 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk, if (addr_len >= sizeof(struct sockaddr_in6) && sin6->sin6_scope_id && ipv6_addr_type(daddr)&IPV6_ADDR_LINKLOCAL) - fl.oif = sin6->sin6_scope_id; + fl6.flowi6_oif = sin6->sin6_scope_id; } else { if (sk->sk_state != TCP_ESTABLISHED) return -EDESTADDRREQ; proto = inet->inet_num; daddr = &np->daddr; - fl.fl6_flowlabel = np->flow_label; + fl6.flowlabel = np->flow_label; } - if (fl.oif == 0) - fl.oif = sk->sk_bound_dev_if; + if (fl6.flowi6_oif == 0) + fl6.flowi6_oif = sk->sk_bound_dev_if; if (msg->msg_controllen) { opt = &opt_space; memset(opt, 0, sizeof(struct ipv6_txoptions)); opt->tot_len = sizeof(struct ipv6_txoptions); - err = datagram_send_ctl(sock_net(sk), msg, &fl, opt, &hlimit, + err = datagram_send_ctl(sock_net(sk), msg, &fl6, opt, &hlimit, &tclass, &dontfrag); if (err < 0) { fl6_sock_release(flowlabel); return err; } - if ((fl.fl6_flowlabel&IPV6_FLOWLABEL_MASK) && !flowlabel) { - flowlabel = fl6_sock_lookup(sk, fl.fl6_flowlabel); + if ((fl6.flowlabel&IPV6_FLOWLABEL_MASK) && !flowlabel) { + flowlabel = fl6_sock_lookup(sk, fl6.flowlabel); if (flowlabel == NULL) return -EINVAL; } @@ -838,31 +837,31 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk, opt = fl6_merge_options(&opt_space, flowlabel, opt); opt = ipv6_fixup_options(&opt_space, opt); - fl.proto = proto; - err = rawv6_probe_proto_opt(&fl, msg); + fl6.flowi6_proto = proto; + err = rawv6_probe_proto_opt(&fl6, msg); if (err) goto out; if (!ipv6_addr_any(daddr)) - ipv6_addr_copy(&fl.fl6_dst, daddr); + ipv6_addr_copy(&fl6.daddr, daddr); else - fl.fl6_dst.s6_addr[15] = 0x1; /* :: means loopback (BSD'ism) */ - if (ipv6_addr_any(&fl.fl6_src) && !ipv6_addr_any(&np->saddr)) - ipv6_addr_copy(&fl.fl6_src, &np->saddr); + fl6.daddr.s6_addr[15] = 0x1; /* :: means loopback (BSD'ism) */ + if (ipv6_addr_any(&fl6.saddr) && !ipv6_addr_any(&np->saddr)) + ipv6_addr_copy(&fl6.saddr, &np->saddr); - final_p = fl6_update_dst(&fl, opt, &final); + final_p = fl6_update_dst(&fl6, opt, &final); - if (!fl.oif && ipv6_addr_is_multicast(&fl.fl6_dst)) - fl.oif = np->mcast_oif; - security_sk_classify_flow(sk, &fl); + if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr)) + fl6.flowi6_oif = np->mcast_oif; + security_sk_classify_flow(sk, flowi6_to_flowi(&fl6)); - dst = ip6_dst_lookup_flow(sk, &fl, final_p, true); + dst = ip6_dst_lookup_flow(sk, &fl6, final_p, true); if (IS_ERR(dst)) { err = PTR_ERR(dst); goto out; } if (hlimit < 0) { - if (ipv6_addr_is_multicast(&fl.fl6_dst)) + if (ipv6_addr_is_multicast(&fl6.daddr)) hlimit = np->mcast_hops; else hlimit = np->hop_limit; @@ -881,17 +880,17 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk, back_from_confirm: if (inet->hdrincl) - err = rawv6_send_hdrinc(sk, msg->msg_iov, len, &fl, &dst, msg->msg_flags); + err = rawv6_send_hdrinc(sk, msg->msg_iov, len, &fl6, &dst, msg->msg_flags); else { lock_sock(sk); err = ip6_append_data(sk, ip_generic_getfrag, msg->msg_iov, - len, 0, hlimit, tclass, opt, &fl, (struct rt6_info*)dst, + len, 0, hlimit, tclass, opt, &fl6, (struct rt6_info*)dst, msg->msg_flags, dontfrag); if (err) ip6_flush_pending_frames(sk); else if (!(msg->msg_flags & MSG_MORE)) - err = rawv6_push_pending_frames(sk, &fl, rp); + err = rawv6_push_pending_frames(sk, &fl6, rp); release_sock(sk); } done: diff --git a/net/ipv6/route.c b/net/ipv6/route.c index d55d00c2a824..6814c8722fa7 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -599,17 +599,17 @@ do { \ static struct rt6_info *ip6_pol_route_lookup(struct net *net, struct fib6_table *table, - struct flowi *fl, int flags) + struct flowi6 *fl6, int flags) { struct fib6_node *fn; struct rt6_info *rt; read_lock_bh(&table->tb6_lock); - fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src); + fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr); restart: rt = fn->leaf; - rt = rt6_device_match(net, rt, &fl->fl6_src, fl->oif, flags); - BACKTRACK(net, &fl->fl6_src); + rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags); + BACKTRACK(net, &fl6->saddr); out: dst_use(&rt->dst, jiffies); read_unlock_bh(&table->tb6_lock); @@ -620,19 +620,19 @@ out: struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr, const struct in6_addr *saddr, int oif, int strict) { - struct flowi fl = { - .oif = oif, - .fl6_dst = *daddr, + struct flowi6 fl6 = { + .flowi6_oif = oif, + .daddr = *daddr, }; struct dst_entry *dst; int flags = strict ? RT6_LOOKUP_F_IFACE : 0; if (saddr) { - memcpy(&fl.fl6_src, saddr, sizeof(*saddr)); + memcpy(&fl6.saddr, saddr, sizeof(*saddr)); flags |= RT6_LOOKUP_F_HAS_SADDR; } - dst = fib6_rule_lookup(net, &fl, flags, ip6_pol_route_lookup); + dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup); if (dst->error == 0) return (struct rt6_info *) dst; @@ -753,7 +753,7 @@ static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort, struct in6_addr *d } static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif, - struct flowi *fl, int flags) + struct flowi6 *fl6, int flags) { struct fib6_node *fn; struct rt6_info *rt, *nrt; @@ -768,12 +768,12 @@ relookup: read_lock_bh(&table->tb6_lock); restart_2: - fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src); + fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr); restart: rt = rt6_select(fn, oif, strict | reachable); - BACKTRACK(net, &fl->fl6_src); + BACKTRACK(net, &fl6->saddr); if (rt == net->ipv6.ip6_null_entry || rt->rt6i_flags & RTF_CACHE) goto out; @@ -782,9 +782,11 @@ restart: read_unlock_bh(&table->tb6_lock); if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) - nrt = rt6_alloc_cow(rt, &fl->fl6_dst, &fl->fl6_src); + nrt = rt6_alloc_cow(rt, &fl6->daddr, &fl6->saddr); + else if (!(rt->dst.flags & DST_HOST)) + nrt = rt6_alloc_clone(rt, &fl6->daddr); else - nrt = rt6_alloc_clone(rt, &fl->fl6_dst); + goto out2; dst_release(&rt->dst); rt = nrt ? : net->ipv6.ip6_null_entry; @@ -821,9 +823,9 @@ out2: } static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table, - struct flowi *fl, int flags) + struct flowi6 *fl6, int flags) { - return ip6_pol_route(net, table, fl->iif, fl, flags); + return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags); } void ip6_route_input(struct sk_buff *skb) @@ -831,41 +833,41 @@ void ip6_route_input(struct sk_buff *skb) struct ipv6hdr *iph = ipv6_hdr(skb); struct net *net = dev_net(skb->dev); int flags = RT6_LOOKUP_F_HAS_SADDR; - struct flowi fl = { - .iif = skb->dev->ifindex, - .fl6_dst = iph->daddr, - .fl6_src = iph->saddr, - .fl6_flowlabel = (* (__be32 *) iph)&IPV6_FLOWINFO_MASK, - .mark = skb->mark, - .proto = iph->nexthdr, + struct flowi6 fl6 = { + .flowi6_iif = skb->dev->ifindex, + .daddr = iph->daddr, + .saddr = iph->saddr, + .flowlabel = (* (__be32 *) iph)&IPV6_FLOWINFO_MASK, + .flowi6_mark = skb->mark, + .flowi6_proto = iph->nexthdr, }; if (rt6_need_strict(&iph->daddr) && skb->dev->type != ARPHRD_PIMREG) flags |= RT6_LOOKUP_F_IFACE; - skb_dst_set(skb, fib6_rule_lookup(net, &fl, flags, ip6_pol_route_input)); + skb_dst_set(skb, fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_input)); } static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table, - struct flowi *fl, int flags) + struct flowi6 *fl6, int flags) { - return ip6_pol_route(net, table, fl->oif, fl, flags); + return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags); } struct dst_entry * ip6_route_output(struct net *net, struct sock *sk, - struct flowi *fl) + struct flowi6 *fl6) { int flags = 0; - if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl->fl6_dst)) + if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr)) flags |= RT6_LOOKUP_F_IFACE; - if (!ipv6_addr_any(&fl->fl6_src)) + if (!ipv6_addr_any(&fl6->saddr)) flags |= RT6_LOOKUP_F_HAS_SADDR; else if (sk) flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs); - return fib6_rule_lookup(net, fl, flags, ip6_pol_route_output); + return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output); } EXPORT_SYMBOL(ip6_route_output); @@ -1442,16 +1444,16 @@ static int ip6_route_del(struct fib6_config *cfg) * Handle redirects */ struct ip6rd_flowi { - struct flowi fl; + struct flowi6 fl6; struct in6_addr gateway; }; static struct rt6_info *__ip6_route_redirect(struct net *net, struct fib6_table *table, - struct flowi *fl, + struct flowi6 *fl6, int flags) { - struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl; + struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6; struct rt6_info *rt; struct fib6_node *fn; @@ -1467,7 +1469,7 @@ static struct rt6_info *__ip6_route_redirect(struct net *net, */ read_lock_bh(&table->tb6_lock); - fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src); + fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr); restart: for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) { /* @@ -1482,7 +1484,7 @@ restart: continue; if (!(rt->rt6i_flags & RTF_GATEWAY)) continue; - if (fl->oif != rt->rt6i_dev->ifindex) + if (fl6->flowi6_oif != rt->rt6i_dev->ifindex) continue; if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway)) continue; @@ -1491,7 +1493,7 @@ restart: if (!rt) rt = net->ipv6.ip6_null_entry; - BACKTRACK(net, &fl->fl6_src); + BACKTRACK(net, &fl6->saddr); out: dst_hold(&rt->dst); @@ -1508,10 +1510,10 @@ static struct rt6_info *ip6_route_redirect(struct in6_addr *dest, int flags = RT6_LOOKUP_F_HAS_SADDR; struct net *net = dev_net(dev); struct ip6rd_flowi rdfl = { - .fl = { - .oif = dev->ifindex, - .fl6_dst = *dest, - .fl6_src = *src, + .fl6 = { + .flowi6_oif = dev->ifindex, + .daddr = *dest, + .saddr = *src, }, }; @@ -1520,7 +1522,7 @@ static struct rt6_info *ip6_route_redirect(struct in6_addr *dest, if (rt6_need_strict(dest)) flags |= RT6_LOOKUP_F_IFACE; - return (struct rt6_info *)fib6_rule_lookup(net, (struct flowi *)&rdfl, + return (struct rt6_info *)fib6_rule_lookup(net, &rdfl.fl6, flags, __ip6_route_redirect); } @@ -2383,7 +2385,7 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void struct rt6_info *rt; struct sk_buff *skb; struct rtmsg *rtm; - struct flowi fl; + struct flowi6 fl6; int err, iif = 0; err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy); @@ -2391,27 +2393,27 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void goto errout; err = -EINVAL; - memset(&fl, 0, sizeof(fl)); + memset(&fl6, 0, sizeof(fl6)); if (tb[RTA_SRC]) { if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr)) goto errout; - ipv6_addr_copy(&fl.fl6_src, nla_data(tb[RTA_SRC])); + ipv6_addr_copy(&fl6.saddr, nla_data(tb[RTA_SRC])); } if (tb[RTA_DST]) { if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr)) goto errout; - ipv6_addr_copy(&fl.fl6_dst, nla_data(tb[RTA_DST])); + ipv6_addr_copy(&fl6.daddr, nla_data(tb[RTA_DST])); } if (tb[RTA_IIF]) iif = nla_get_u32(tb[RTA_IIF]); if (tb[RTA_OIF]) - fl.oif = nla_get_u32(tb[RTA_OIF]); + fl6.flowi6_oif = nla_get_u32(tb[RTA_OIF]); if (iif) { struct net_device *dev; @@ -2434,10 +2436,10 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void skb_reset_mac_header(skb); skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr)); - rt = (struct rt6_info*) ip6_route_output(net, NULL, &fl); + rt = (struct rt6_info*) ip6_route_output(net, NULL, &fl6); skb_dst_set(skb, &rt->dst); - err = rt6_fill_node(net, skb, rt, &fl.fl6_dst, &fl.fl6_src, iif, + err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif, RTM_NEWROUTE, NETLINK_CB(in_skb).pid, nlh->nlmsg_seq, 0, 0, 0); if (err < 0) { diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index b8c8adbd7cf6..43b33373adb2 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -732,17 +732,14 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb, dst = addr6->s6_addr32[3]; } - { - struct flowi fl = { .fl4_dst = dst, - .fl4_src = tiph->saddr, - .fl4_tos = RT_TOS(tos), - .oif = tunnel->parms.link, - .proto = IPPROTO_IPV6 }; - rt = ip_route_output_key(dev_net(dev), &fl); - if (IS_ERR(rt)) { - dev->stats.tx_carrier_errors++; - goto tx_error_icmp; - } + rt = ip_route_output_ports(dev_net(dev), NULL, + dst, tiph->saddr, + 0, 0, + IPPROTO_IPV6, RT_TOS(tos), + tunnel->parms.link); + if (IS_ERR(rt)) { + dev->stats.tx_carrier_errors++; + goto tx_error_icmp; } if (rt->rt_type != RTN_UNICAST) { ip_rt_put(rt); @@ -858,12 +855,12 @@ static void ipip6_tunnel_bind_dev(struct net_device *dev) iph = &tunnel->parms.iph; if (iph->daddr) { - struct flowi fl = { .fl4_dst = iph->daddr, - .fl4_src = iph->saddr, - .fl4_tos = RT_TOS(iph->tos), - .oif = tunnel->parms.link, - .proto = IPPROTO_IPV6 }; - struct rtable *rt = ip_route_output_key(dev_net(dev), &fl); + struct rtable *rt = ip_route_output_ports(dev_net(dev), NULL, + iph->daddr, iph->saddr, + 0, 0, + IPPROTO_IPV6, + RT_TOS(iph->tos), + tunnel->parms.link); if (!IS_ERR(rt)) { tdev = rt->dst.dev; @@ -1295,4 +1292,4 @@ static int __init sit_init(void) module_init(sit_init); module_exit(sit_cleanup); MODULE_LICENSE("GPL"); -MODULE_ALIAS("sit0"); +MODULE_ALIAS_NETDEV("sit0"); diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c index 0b4cf350631b..352c26081f5d 100644 --- a/net/ipv6/syncookies.c +++ b/net/ipv6/syncookies.c @@ -232,19 +232,19 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) */ { struct in6_addr *final_p, final; - struct flowi fl; - memset(&fl, 0, sizeof(fl)); - fl.proto = IPPROTO_TCP; - ipv6_addr_copy(&fl.fl6_dst, &ireq6->rmt_addr); - final_p = fl6_update_dst(&fl, np->opt, &final); - ipv6_addr_copy(&fl.fl6_src, &ireq6->loc_addr); - fl.oif = sk->sk_bound_dev_if; - fl.mark = sk->sk_mark; - fl.fl_ip_dport = inet_rsk(req)->rmt_port; - fl.fl_ip_sport = inet_sk(sk)->inet_sport; - security_req_classify_flow(req, &fl); - - dst = ip6_dst_lookup_flow(sk, &fl, final_p, false); + struct flowi6 fl6; + memset(&fl6, 0, sizeof(fl6)); + fl6.flowi6_proto = IPPROTO_TCP; + ipv6_addr_copy(&fl6.daddr, &ireq6->rmt_addr); + final_p = fl6_update_dst(&fl6, np->opt, &final); + ipv6_addr_copy(&fl6.saddr, &ireq6->loc_addr); + fl6.flowi6_oif = sk->sk_bound_dev_if; + fl6.flowi6_mark = sk->sk_mark; + fl6.fl6_dport = inet_rsk(req)->rmt_port; + fl6.fl6_sport = inet_sk(sk)->inet_sport; + security_req_classify_flow(req, flowi6_to_flowi(&fl6)); + + dst = ip6_dst_lookup_flow(sk, &fl6, final_p, false); if (IS_ERR(dst)) goto out_free; } diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index e59a31c48baf..2b0c186862c8 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -131,7 +131,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, struct tcp_sock *tp = tcp_sk(sk); struct in6_addr *saddr = NULL, *final_p, final; struct rt6_info *rt; - struct flowi fl; + struct flowi6 fl6; struct dst_entry *dst; int addr_type; int err; @@ -142,14 +142,14 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, if (usin->sin6_family != AF_INET6) return -EAFNOSUPPORT; - memset(&fl, 0, sizeof(fl)); + memset(&fl6, 0, sizeof(fl6)); if (np->sndflow) { - fl.fl6_flowlabel = usin->sin6_flowinfo&IPV6_FLOWINFO_MASK; - IP6_ECN_flow_init(fl.fl6_flowlabel); - if (fl.fl6_flowlabel&IPV6_FLOWLABEL_MASK) { + fl6.flowlabel = usin->sin6_flowinfo&IPV6_FLOWINFO_MASK; + IP6_ECN_flow_init(fl6.flowlabel); + if (fl6.flowlabel&IPV6_FLOWLABEL_MASK) { struct ip6_flowlabel *flowlabel; - flowlabel = fl6_sock_lookup(sk, fl.fl6_flowlabel); + flowlabel = fl6_sock_lookup(sk, fl6.flowlabel); if (flowlabel == NULL) return -EINVAL; ipv6_addr_copy(&usin->sin6_addr, &flowlabel->dst); @@ -195,7 +195,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, } ipv6_addr_copy(&np->daddr, &usin->sin6_addr); - np->flow_label = fl.fl6_flowlabel; + np->flow_label = fl6.flowlabel; /* * TCP over IPv4 @@ -242,27 +242,27 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, if (!ipv6_addr_any(&np->rcv_saddr)) saddr = &np->rcv_saddr; - fl.proto = IPPROTO_TCP; - ipv6_addr_copy(&fl.fl6_dst, &np->daddr); - ipv6_addr_copy(&fl.fl6_src, + fl6.flowi6_proto = IPPROTO_TCP; + ipv6_addr_copy(&fl6.daddr, &np->daddr); + ipv6_addr_copy(&fl6.saddr, (saddr ? saddr : &np->saddr)); - fl.oif = sk->sk_bound_dev_if; - fl.mark = sk->sk_mark; - fl.fl_ip_dport = usin->sin6_port; - fl.fl_ip_sport = inet->inet_sport; + fl6.flowi6_oif = sk->sk_bound_dev_if; + fl6.flowi6_mark = sk->sk_mark; + fl6.fl6_dport = usin->sin6_port; + fl6.fl6_sport = inet->inet_sport; - final_p = fl6_update_dst(&fl, np->opt, &final); + final_p = fl6_update_dst(&fl6, np->opt, &final); - security_sk_classify_flow(sk, &fl); + security_sk_classify_flow(sk, flowi6_to_flowi(&fl6)); - dst = ip6_dst_lookup_flow(sk, &fl, final_p, true); + dst = ip6_dst_lookup_flow(sk, &fl6, final_p, true); if (IS_ERR(dst)) { err = PTR_ERR(dst); goto failure; } if (saddr == NULL) { - saddr = &fl.fl6_src; + saddr = &fl6.saddr; ipv6_addr_copy(&np->rcv_saddr, saddr); } @@ -389,23 +389,23 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, if (dst == NULL) { struct inet_sock *inet = inet_sk(sk); - struct flowi fl; + struct flowi6 fl6; /* BUGGG_FUTURE: Again, it is not clear how to handle rthdr case. Ignore this complexity for now. */ - memset(&fl, 0, sizeof(fl)); - fl.proto = IPPROTO_TCP; - ipv6_addr_copy(&fl.fl6_dst, &np->daddr); - ipv6_addr_copy(&fl.fl6_src, &np->saddr); - fl.oif = sk->sk_bound_dev_if; - fl.mark = sk->sk_mark; - fl.fl_ip_dport = inet->inet_dport; - fl.fl_ip_sport = inet->inet_sport; - security_skb_classify_flow(skb, &fl); - - dst = ip6_dst_lookup_flow(sk, &fl, NULL, false); + memset(&fl6, 0, sizeof(fl6)); + fl6.flowi6_proto = IPPROTO_TCP; + ipv6_addr_copy(&fl6.daddr, &np->daddr); + ipv6_addr_copy(&fl6.saddr, &np->saddr); + fl6.flowi6_oif = sk->sk_bound_dev_if; + fl6.flowi6_mark = sk->sk_mark; + fl6.fl6_dport = inet->inet_dport; + fl6.fl6_sport = inet->inet_sport; + security_skb_classify_flow(skb, flowi6_to_flowi(&fl6)); + + dst = ip6_dst_lookup_flow(sk, &fl6, NULL, false); if (IS_ERR(dst)) { sk->sk_err_soft = -PTR_ERR(dst); goto out; @@ -482,25 +482,25 @@ static int tcp_v6_send_synack(struct sock *sk, struct request_sock *req, struct sk_buff * skb; struct ipv6_txoptions *opt = NULL; struct in6_addr * final_p, final; - struct flowi fl; + struct flowi6 fl6; struct dst_entry *dst; int err; - memset(&fl, 0, sizeof(fl)); - fl.proto = IPPROTO_TCP; - ipv6_addr_copy(&fl.fl6_dst, &treq->rmt_addr); - ipv6_addr_copy(&fl.fl6_src, &treq->loc_addr); - fl.fl6_flowlabel = 0; - fl.oif = treq->iif; - fl.mark = sk->sk_mark; - fl.fl_ip_dport = inet_rsk(req)->rmt_port; - fl.fl_ip_sport = inet_rsk(req)->loc_port; - security_req_classify_flow(req, &fl); + memset(&fl6, 0, sizeof(fl6)); + fl6.flowi6_proto = IPPROTO_TCP; + ipv6_addr_copy(&fl6.daddr, &treq->rmt_addr); + ipv6_addr_copy(&fl6.saddr, &treq->loc_addr); + fl6.flowlabel = 0; + fl6.flowi6_oif = treq->iif; + fl6.flowi6_mark = sk->sk_mark; + fl6.fl6_dport = inet_rsk(req)->rmt_port; + fl6.fl6_sport = inet_rsk(req)->loc_port; + security_req_classify_flow(req, flowi6_to_flowi(&fl6)); opt = np->opt; - final_p = fl6_update_dst(&fl, opt, &final); + final_p = fl6_update_dst(&fl6, opt, &final); - dst = ip6_dst_lookup_flow(sk, &fl, final_p, false); + dst = ip6_dst_lookup_flow(sk, &fl6, final_p, false); if (IS_ERR(dst)) { err = PTR_ERR(dst); goto done; @@ -510,8 +510,8 @@ static int tcp_v6_send_synack(struct sock *sk, struct request_sock *req, if (skb) { __tcp_v6_send_check(skb, &treq->loc_addr, &treq->rmt_addr); - ipv6_addr_copy(&fl.fl6_dst, &treq->rmt_addr); - err = ip6_xmit(sk, skb, &fl, opt); + ipv6_addr_copy(&fl6.daddr, &treq->rmt_addr); + err = ip6_xmit(sk, skb, &fl6, opt); err = net_xmit_eval(err); } @@ -992,7 +992,7 @@ static void tcp_v6_send_response(struct sk_buff *skb, u32 seq, u32 ack, u32 win, { struct tcphdr *th = tcp_hdr(skb), *t1; struct sk_buff *buff; - struct flowi fl; + struct flowi6 fl6; struct net *net = dev_net(skb_dst(skb)->dev); struct sock *ctl_sk = net->ipv6.tcp_sk; unsigned int tot_len = sizeof(struct tcphdr); @@ -1046,29 +1046,29 @@ static void tcp_v6_send_response(struct sk_buff *skb, u32 seq, u32 ack, u32 win, } #endif - memset(&fl, 0, sizeof(fl)); - ipv6_addr_copy(&fl.fl6_dst, &ipv6_hdr(skb)->saddr); - ipv6_addr_copy(&fl.fl6_src, &ipv6_hdr(skb)->daddr); + memset(&fl6, 0, sizeof(fl6)); + ipv6_addr_copy(&fl6.daddr, &ipv6_hdr(skb)->saddr); + ipv6_addr_copy(&fl6.saddr, &ipv6_hdr(skb)->daddr); buff->ip_summed = CHECKSUM_PARTIAL; buff->csum = 0; - __tcp_v6_send_check(buff, &fl.fl6_src, &fl.fl6_dst); + __tcp_v6_send_check(buff, &fl6.saddr, &fl6.daddr); - fl.proto = IPPROTO_TCP; - fl.oif = inet6_iif(skb); - fl.fl_ip_dport = t1->dest; - fl.fl_ip_sport = t1->source; - security_skb_classify_flow(skb, &fl); + fl6.flowi6_proto = IPPROTO_TCP; + fl6.flowi6_oif = inet6_iif(skb); + fl6.fl6_dport = t1->dest; + fl6.fl6_sport = t1->source; + security_skb_classify_flow(skb, flowi6_to_flowi(&fl6)); /* Pass a socket to ip6_dst_lookup either it is for RST * Underlying function will use this to retrieve the network * namespace */ - dst = ip6_dst_lookup_flow(ctl_sk, &fl, NULL, false); + dst = ip6_dst_lookup_flow(ctl_sk, &fl6, NULL, false); if (!IS_ERR(dst)) { skb_dst_set(buff, dst); - ip6_xmit(ctl_sk, buff, &fl, NULL); + ip6_xmit(ctl_sk, buff, &fl6, NULL); TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS); if (rst) TCP_INC_STATS_BH(net, TCP_MIB_OUTRSTS); diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index d86d7f67a597..d7037c006e13 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -886,7 +886,7 @@ static int udp_v6_push_pending_frames(struct sock *sk) struct udphdr *uh; struct udp_sock *up = udp_sk(sk); struct inet_sock *inet = inet_sk(sk); - struct flowi *fl = &inet->cork.fl; + struct flowi6 *fl6 = &inet->cork.fl.u.ip6; int err = 0; int is_udplite = IS_UDPLITE(sk); __wsum csum = 0; @@ -899,23 +899,23 @@ static int udp_v6_push_pending_frames(struct sock *sk) * Create a UDP header */ uh = udp_hdr(skb); - uh->source = fl->fl_ip_sport; - uh->dest = fl->fl_ip_dport; + uh->source = fl6->fl6_sport; + uh->dest = fl6->fl6_dport; uh->len = htons(up->len); uh->check = 0; if (is_udplite) csum = udplite_csum_outgoing(sk, skb); else if (skb->ip_summed == CHECKSUM_PARTIAL) { /* UDP hardware csum */ - udp6_hwcsum_outgoing(sk, skb, &fl->fl6_src, &fl->fl6_dst, + udp6_hwcsum_outgoing(sk, skb, &fl6->saddr, &fl6->daddr, up->len); goto send; } else csum = udp_csum_outgoing(sk, skb); /* add protocol-dependent pseudo-header */ - uh->check = csum_ipv6_magic(&fl->fl6_src, &fl->fl6_dst, - up->len, fl->proto, csum ); + uh->check = csum_ipv6_magic(&fl6->saddr, &fl6->daddr, + up->len, fl6->flowi6_proto, csum); if (uh->check == 0) uh->check = CSUM_MANGLED_0; @@ -947,7 +947,7 @@ int udpv6_sendmsg(struct kiocb *iocb, struct sock *sk, struct in6_addr *daddr, *final_p, final; struct ipv6_txoptions *opt = NULL; struct ip6_flowlabel *flowlabel = NULL; - struct flowi fl; + struct flowi6 fl6; struct dst_entry *dst; int addr_len = msg->msg_namelen; int ulen = len; @@ -1030,19 +1030,19 @@ do_udp_sendmsg: } ulen += sizeof(struct udphdr); - memset(&fl, 0, sizeof(fl)); + memset(&fl6, 0, sizeof(fl6)); if (sin6) { if (sin6->sin6_port == 0) return -EINVAL; - fl.fl_ip_dport = sin6->sin6_port; + fl6.fl6_dport = sin6->sin6_port; daddr = &sin6->sin6_addr; if (np->sndflow) { - fl.fl6_flowlabel = sin6->sin6_flowinfo&IPV6_FLOWINFO_MASK; - if (fl.fl6_flowlabel&IPV6_FLOWLABEL_MASK) { - flowlabel = fl6_sock_lookup(sk, fl.fl6_flowlabel); + fl6.flowlabel = sin6->sin6_flowinfo&IPV6_FLOWINFO_MASK; + if (fl6.flowlabel&IPV6_FLOWLABEL_MASK) { + flowlabel = fl6_sock_lookup(sk, fl6.flowlabel); if (flowlabel == NULL) return -EINVAL; daddr = &flowlabel->dst; @@ -1060,38 +1060,38 @@ do_udp_sendmsg: if (addr_len >= sizeof(struct sockaddr_in6) && sin6->sin6_scope_id && ipv6_addr_type(daddr)&IPV6_ADDR_LINKLOCAL) - fl.oif = sin6->sin6_scope_id; + fl6.flowi6_oif = sin6->sin6_scope_id; } else { if (sk->sk_state != TCP_ESTABLISHED) return -EDESTADDRREQ; - fl.fl_ip_dport = inet->inet_dport; + fl6.fl6_dport = inet->inet_dport; daddr = &np->daddr; - fl.fl6_flowlabel = np->flow_label; + fl6.flowlabel = np->flow_label; connected = 1; } - if (!fl.oif) - fl.oif = sk->sk_bound_dev_if; + if (!fl6.flowi6_oif) + fl6.flowi6_oif = sk->sk_bound_dev_if; - if (!fl.oif) - fl.oif = np->sticky_pktinfo.ipi6_ifindex; + if (!fl6.flowi6_oif) + fl6.flowi6_oif = np->sticky_pktinfo.ipi6_ifindex; - fl.mark = sk->sk_mark; + fl6.flowi6_mark = sk->sk_mark; if (msg->msg_controllen) { opt = &opt_space; memset(opt, 0, sizeof(struct ipv6_txoptions)); opt->tot_len = sizeof(*opt); - err = datagram_send_ctl(sock_net(sk), msg, &fl, opt, &hlimit, + err = datagram_send_ctl(sock_net(sk), msg, &fl6, opt, &hlimit, &tclass, &dontfrag); if (err < 0) { fl6_sock_release(flowlabel); return err; } - if ((fl.fl6_flowlabel&IPV6_FLOWLABEL_MASK) && !flowlabel) { - flowlabel = fl6_sock_lookup(sk, fl.fl6_flowlabel); + if ((fl6.flowlabel&IPV6_FLOWLABEL_MASK) && !flowlabel) { + flowlabel = fl6_sock_lookup(sk, fl6.flowlabel); if (flowlabel == NULL) return -EINVAL; } @@ -1105,27 +1105,27 @@ do_udp_sendmsg: opt = fl6_merge_options(&opt_space, flowlabel, opt); opt = ipv6_fixup_options(&opt_space, opt); - fl.proto = sk->sk_protocol; + fl6.flowi6_proto = sk->sk_protocol; if (!ipv6_addr_any(daddr)) - ipv6_addr_copy(&fl.fl6_dst, daddr); + ipv6_addr_copy(&fl6.daddr, daddr); else - fl.fl6_dst.s6_addr[15] = 0x1; /* :: means loopback (BSD'ism) */ - if (ipv6_addr_any(&fl.fl6_src) && !ipv6_addr_any(&np->saddr)) - ipv6_addr_copy(&fl.fl6_src, &np->saddr); - fl.fl_ip_sport = inet->inet_sport; + fl6.daddr.s6_addr[15] = 0x1; /* :: means loopback (BSD'ism) */ + if (ipv6_addr_any(&fl6.saddr) && !ipv6_addr_any(&np->saddr)) + ipv6_addr_copy(&fl6.saddr, &np->saddr); + fl6.fl6_sport = inet->inet_sport; - final_p = fl6_update_dst(&fl, opt, &final); + final_p = fl6_update_dst(&fl6, opt, &final); if (final_p) connected = 0; - if (!fl.oif && ipv6_addr_is_multicast(&fl.fl6_dst)) { - fl.oif = np->mcast_oif; + if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr)) { + fl6.flowi6_oif = np->mcast_oif; connected = 0; } - security_sk_classify_flow(sk, &fl); + security_sk_classify_flow(sk, flowi6_to_flowi(&fl6)); - dst = ip6_sk_dst_lookup_flow(sk, &fl, final_p, true); + dst = ip6_sk_dst_lookup_flow(sk, &fl6, final_p, true); if (IS_ERR(dst)) { err = PTR_ERR(dst); dst = NULL; @@ -1133,7 +1133,7 @@ do_udp_sendmsg: } if (hlimit < 0) { - if (ipv6_addr_is_multicast(&fl.fl6_dst)) + if (ipv6_addr_is_multicast(&fl6.daddr)) hlimit = np->mcast_hops; else hlimit = np->hop_limit; @@ -1168,7 +1168,7 @@ do_append_data: up->len += ulen; getfrag = is_udplite ? udplite_getfrag : ip_generic_getfrag; err = ip6_append_data(sk, getfrag, msg->msg_iov, ulen, - sizeof(struct udphdr), hlimit, tclass, opt, &fl, + sizeof(struct udphdr), hlimit, tclass, opt, &fl6, (struct rt6_info*)dst, corkreq ? msg->msg_flags|MSG_MORE : msg->msg_flags, dontfrag); if (err) @@ -1181,10 +1181,10 @@ do_append_data: if (dst) { if (connected) { ip6_dst_store(sk, dst, - ipv6_addr_equal(&fl.fl6_dst, &np->daddr) ? + ipv6_addr_equal(&fl6.daddr, &np->daddr) ? &np->daddr : NULL, #ifdef CONFIG_IPV6_SUBTREES - ipv6_addr_equal(&fl.fl6_src, &np->saddr) ? + ipv6_addr_equal(&fl6.saddr, &np->saddr) ? &np->saddr : #endif NULL); diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c index 48ce496802fd..05e34c8ec913 100644 --- a/net/ipv6/xfrm6_policy.c +++ b/net/ipv6/xfrm6_policy.c @@ -30,15 +30,16 @@ static struct dst_entry *xfrm6_dst_lookup(struct net *net, int tos, const xfrm_address_t *saddr, const xfrm_address_t *daddr) { - struct flowi fl = {}; + struct flowi6 fl6; struct dst_entry *dst; int err; - memcpy(&fl.fl6_dst, daddr, sizeof(fl.fl6_dst)); + memset(&fl6, 0, sizeof(fl6)); + memcpy(&fl6.daddr, daddr, sizeof(fl6.daddr)); if (saddr) - memcpy(&fl.fl6_src, saddr, sizeof(fl.fl6_src)); + memcpy(&fl6.saddr, saddr, sizeof(fl6.saddr)); - dst = ip6_route_output(net, NULL, &fl); + dst = ip6_route_output(net, NULL, &fl6); err = dst->error; if (dst->error) { @@ -120,6 +121,7 @@ static int xfrm6_fill_dst(struct xfrm_dst *xdst, struct net_device *dev, static inline void _decode_session6(struct sk_buff *skb, struct flowi *fl, int reverse) { + struct flowi6 *fl6 = &fl->u.ip6; int onlyproto = 0; u16 offset = skb_network_header_len(skb); struct ipv6hdr *hdr = ipv6_hdr(skb); @@ -127,11 +129,11 @@ _decode_session6(struct sk_buff *skb, struct flowi *fl, int reverse) const unsigned char *nh = skb_network_header(skb); u8 nexthdr = nh[IP6CB(skb)->nhoff]; - memset(fl, 0, sizeof(struct flowi)); - fl->mark = skb->mark; + memset(fl6, 0, sizeof(struct flowi6)); + fl6->flowi6_mark = skb->mark; - ipv6_addr_copy(&fl->fl6_dst, reverse ? &hdr->saddr : &hdr->daddr); - ipv6_addr_copy(&fl->fl6_src, reverse ? &hdr->daddr : &hdr->saddr); + ipv6_addr_copy(&fl6->daddr, reverse ? &hdr->saddr : &hdr->daddr); + ipv6_addr_copy(&fl6->saddr, reverse ? &hdr->daddr : &hdr->saddr); while (nh + offset + 1 < skb->data || pskb_may_pull(skb, nh + offset + 1 - skb->data)) { @@ -158,20 +160,20 @@ _decode_session6(struct sk_buff *skb, struct flowi *fl, int reverse) pskb_may_pull(skb, nh + offset + 4 - skb->data))) { __be16 *ports = (__be16 *)exthdr; - fl->fl_ip_sport = ports[!!reverse]; - fl->fl_ip_dport = ports[!reverse]; + fl6->fl6_sport = ports[!!reverse]; + fl6->fl6_dport = ports[!reverse]; } - fl->proto = nexthdr; + fl6->flowi6_proto = nexthdr; return; case IPPROTO_ICMPV6: if (!onlyproto && pskb_may_pull(skb, nh + offset + 2 - skb->data)) { u8 *icmp = (u8 *)exthdr; - fl->fl_icmp_type = icmp[0]; - fl->fl_icmp_code = icmp[1]; + fl6->fl6_icmp_type = icmp[0]; + fl6->fl6_icmp_code = icmp[1]; } - fl->proto = nexthdr; + fl6->flowi6_proto = nexthdr; return; #if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) @@ -180,9 +182,9 @@ _decode_session6(struct sk_buff *skb, struct flowi *fl, int reverse) struct ip6_mh *mh; mh = (struct ip6_mh *)exthdr; - fl->fl_mh_type = mh->ip6mh_type; + fl6->fl6_mh_type = mh->ip6mh_type; } - fl->proto = nexthdr; + fl6->flowi6_proto = nexthdr; return; #endif @@ -191,8 +193,8 @@ _decode_session6(struct sk_buff *skb, struct flowi *fl, int reverse) case IPPROTO_ESP: case IPPROTO_COMP: default: - fl->fl_ipsec_spi = 0; - fl->proto = nexthdr; + fl6->fl6_ipsec_spi = 0; + fl6->flowi6_proto = nexthdr; return; } } diff --git a/net/ipv6/xfrm6_state.c b/net/ipv6/xfrm6_state.c index a02598e0079a..afe941e9415c 100644 --- a/net/ipv6/xfrm6_state.c +++ b/net/ipv6/xfrm6_state.c @@ -22,19 +22,21 @@ static void __xfrm6_init_tempsel(struct xfrm_selector *sel, const struct flowi *fl) { + const struct flowi6 *fl6 = &fl->u.ip6; + /* Initialize temporary selector matching only * to current session. */ - ipv6_addr_copy((struct in6_addr *)&sel->daddr, &fl->fl6_dst); - ipv6_addr_copy((struct in6_addr *)&sel->saddr, &fl->fl6_src); - sel->dport = xfrm_flowi_dport(fl); + ipv6_addr_copy((struct in6_addr *)&sel->daddr, &fl6->daddr); + ipv6_addr_copy((struct in6_addr *)&sel->saddr, &fl6->saddr); + sel->dport = xfrm_flowi_dport(fl, &fl6->uli); sel->dport_mask = htons(0xffff); - sel->sport = xfrm_flowi_sport(fl); + sel->sport = xfrm_flowi_sport(fl, &fl6->uli); sel->sport_mask = htons(0xffff); sel->family = AF_INET6; sel->prefixlen_d = 128; sel->prefixlen_s = 128; - sel->proto = fl->proto; - sel->ifindex = fl->oif; + sel->proto = fl6->flowi6_proto; + sel->ifindex = fl6->flowi6_oif; } static void diff --git a/net/ipx/Kconfig b/net/ipx/Kconfig index 02549cb2c328..e9ad0062fbb6 100644 --- a/net/ipx/Kconfig +++ b/net/ipx/Kconfig @@ -3,7 +3,6 @@ # config IPX tristate "The IPX protocol" - depends on BKL # should be fixable select LLC ---help--- This is support for the Novell networking protocol, IPX, commonly diff --git a/net/ipx/af_ipx.c b/net/ipx/af_ipx.c index da3d21c41d90..2731b51923d1 100644 --- a/net/ipx/af_ipx.c +++ b/net/ipx/af_ipx.c @@ -42,7 +42,6 @@ #include <linux/uio.h> #include <linux/slab.h> #include <linux/skbuff.h> -#include <linux/smp_lock.h> #include <linux/socket.h> #include <linux/sockios.h> #include <linux/string.h> @@ -1299,7 +1298,7 @@ static int ipx_setsockopt(struct socket *sock, int level, int optname, int opt; int rc = -EINVAL; - lock_kernel(); + lock_sock(sk); if (optlen != sizeof(int)) goto out; @@ -1314,7 +1313,7 @@ static int ipx_setsockopt(struct socket *sock, int level, int optname, ipx_sk(sk)->type = opt; rc = 0; out: - unlock_kernel(); + release_sock(sk); return rc; } @@ -1326,7 +1325,7 @@ static int ipx_getsockopt(struct socket *sock, int level, int optname, int len; int rc = -ENOPROTOOPT; - lock_kernel(); + lock_sock(sk); if (!(level == SOL_IPX && optname == IPX_TYPE)) goto out; @@ -1347,7 +1346,7 @@ static int ipx_getsockopt(struct socket *sock, int level, int optname, rc = 0; out: - unlock_kernel(); + release_sock(sk); return rc; } @@ -1396,7 +1395,7 @@ static int ipx_release(struct socket *sock) if (!sk) goto out; - lock_kernel(); + lock_sock(sk); if (!sock_flag(sk, SOCK_DEAD)) sk->sk_state_change(sk); @@ -1404,7 +1403,7 @@ static int ipx_release(struct socket *sock) sock->sk = NULL; sk_refcnt_debug_release(sk); ipx_destroy_socket(sk); - unlock_kernel(); + release_sock(sk); out: return 0; } @@ -1530,11 +1529,12 @@ out: static int ipx_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) { + struct sock *sk = sock->sk; int rc; - lock_kernel(); + lock_sock(sk); rc = __ipx_bind(sock, uaddr, addr_len); - unlock_kernel(); + release_sock(sk); return rc; } @@ -1551,7 +1551,7 @@ static int ipx_connect(struct socket *sock, struct sockaddr *uaddr, sk->sk_state = TCP_CLOSE; sock->state = SS_UNCONNECTED; - lock_kernel(); + lock_sock(sk); if (addr_len != sizeof(*addr)) goto out; addr = (struct sockaddr_ipx *)uaddr; @@ -1598,7 +1598,7 @@ static int ipx_connect(struct socket *sock, struct sockaddr *uaddr, ipxrtr_put(rt); rc = 0; out: - unlock_kernel(); + release_sock(sk); return rc; } @@ -1614,7 +1614,7 @@ static int ipx_getname(struct socket *sock, struct sockaddr *uaddr, *uaddr_len = sizeof(struct sockaddr_ipx); - lock_kernel(); + lock_sock(sk); if (peer) { rc = -ENOTCONN; if (sk->sk_state != TCP_ESTABLISHED) @@ -1649,19 +1649,7 @@ static int ipx_getname(struct socket *sock, struct sockaddr *uaddr, rc = 0; out: - unlock_kernel(); - return rc; -} - -static unsigned int ipx_datagram_poll(struct file *file, struct socket *sock, - poll_table *wait) -{ - int rc; - - lock_kernel(); - rc = datagram_poll(file, sock, wait); - unlock_kernel(); - + release_sock(sk); return rc; } @@ -1736,7 +1724,7 @@ static int ipx_sendmsg(struct kiocb *iocb, struct socket *sock, int rc = -EINVAL; int flags = msg->msg_flags; - lock_kernel(); + lock_sock(sk); /* Socket gets bound below anyway */ /* if (sk->sk_zapped) return -EIO; */ /* Socket not bound */ @@ -1788,7 +1776,7 @@ static int ipx_sendmsg(struct kiocb *iocb, struct socket *sock, if (rc >= 0) rc = len; out: - unlock_kernel(); + release_sock(sk); return rc; } @@ -1803,7 +1791,7 @@ static int ipx_recvmsg(struct kiocb *iocb, struct socket *sock, struct sk_buff *skb; int copied, rc; - lock_kernel(); + lock_sock(sk); /* put the autobinding in */ if (!ipxs->port) { struct sockaddr_ipx uaddr; @@ -1862,7 +1850,7 @@ static int ipx_recvmsg(struct kiocb *iocb, struct socket *sock, out_free: skb_free_datagram(sk, skb); out: - unlock_kernel(); + release_sock(sk); return rc; } @@ -1874,7 +1862,7 @@ static int ipx_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) struct sock *sk = sock->sk; void __user *argp = (void __user *)arg; - lock_kernel(); + lock_sock(sk); switch (cmd) { case TIOCOUTQ: amount = sk->sk_sndbuf - sk_wmem_alloc_get(sk); @@ -1937,7 +1925,7 @@ static int ipx_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) rc = -ENOIOCTLCMD; break; } - unlock_kernel(); + release_sock(sk); return rc; } @@ -1984,7 +1972,7 @@ static const struct proto_ops ipx_dgram_ops = { .socketpair = sock_no_socketpair, .accept = sock_no_accept, .getname = ipx_getname, - .poll = ipx_datagram_poll, + .poll = datagram_poll, .ioctl = ipx_ioctl, #ifdef CONFIG_COMPAT .compat_ioctl = ipx_compat_ioctl, diff --git a/net/irda/ircomm/ircomm_tty_ioctl.c b/net/irda/ircomm/ircomm_tty_ioctl.c index 24cb3aa2bbfb..77c5e6499f8f 100644 --- a/net/irda/ircomm/ircomm_tty_ioctl.c +++ b/net/irda/ircomm/ircomm_tty_ioctl.c @@ -189,12 +189,12 @@ void ircomm_tty_set_termios(struct tty_struct *tty, } /* - * Function ircomm_tty_tiocmget (tty, file) + * Function ircomm_tty_tiocmget (tty) * * * */ -int ircomm_tty_tiocmget(struct tty_struct *tty, struct file *file) +int ircomm_tty_tiocmget(struct tty_struct *tty) { struct ircomm_tty_cb *self = (struct ircomm_tty_cb *) tty->driver_data; unsigned int result; @@ -214,12 +214,12 @@ int ircomm_tty_tiocmget(struct tty_struct *tty, struct file *file) } /* - * Function ircomm_tty_tiocmset (tty, file, set, clear) + * Function ircomm_tty_tiocmset (tty, set, clear) * * * */ -int ircomm_tty_tiocmset(struct tty_struct *tty, struct file *file, +int ircomm_tty_tiocmset(struct tty_struct *tty, unsigned int set, unsigned int clear) { struct ircomm_tty_cb *self = (struct ircomm_tty_cb *) tty->driver_data; @@ -365,12 +365,12 @@ static int ircomm_tty_set_serial_info(struct ircomm_tty_cb *self, } /* - * Function ircomm_tty_ioctl (tty, file, cmd, arg) + * Function ircomm_tty_ioctl (tty, cmd, arg) * * * */ -int ircomm_tty_ioctl(struct tty_struct *tty, struct file *file, +int ircomm_tty_ioctl(struct tty_struct *tty, unsigned int cmd, unsigned long arg) { struct ircomm_tty_cb *self = (struct ircomm_tty_cb *) tty->driver_data; diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c index 2a698ff89db6..fce9bd3bd3fe 100644 --- a/net/l2tp/l2tp_ip.c +++ b/net/l2tp/l2tp_ip.c @@ -475,25 +475,17 @@ static int l2tp_ip_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *m if (opt && opt->srr) daddr = opt->faddr; - { - struct flowi fl = { .oif = sk->sk_bound_dev_if, - .fl4_dst = daddr, - .fl4_src = inet->inet_saddr, - .fl4_tos = RT_CONN_FLAGS(sk), - .proto = sk->sk_protocol, - .flags = inet_sk_flowi_flags(sk), - .fl_ip_sport = inet->inet_sport, - .fl_ip_dport = inet->inet_dport }; - - /* If this fails, retransmit mechanism of transport layer will - * keep trying until route appears or the connection times - * itself out. - */ - security_sk_classify_flow(sk, &fl); - rt = ip_route_output_flow(sock_net(sk), &fl, sk); - if (IS_ERR(rt)) - goto no_route; - } + /* If this fails, retransmit mechanism of transport layer will + * keep trying until route appears or the connection times + * itself out. + */ + rt = ip_route_output_ports(sock_net(sk), sk, + daddr, inet->inet_saddr, + inet->inet_dport, inet->inet_sport, + sk->sk_protocol, RT_CONN_FLAGS(sk), + sk->sk_bound_dev_if); + if (IS_ERR(rt)) + goto no_route; sk_setup_caps(sk, &rt->dst); } skb_dst_set(skb, dst_clone(&rt->dst)); diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 7b701dcddb50..334213571ad0 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -834,6 +834,10 @@ static int ieee80211_change_station(struct wiphy *wiphy, rcu_read_unlock(); + if (sdata->vif.type == NL80211_IFTYPE_STATION && + params->sta_flags_mask & BIT(NL80211_STA_FLAG_AUTHORIZED)) + ieee80211_recalc_ps(local, -1); + return 0; } @@ -2008,6 +2012,21 @@ static int ieee80211_get_antenna(struct wiphy *wiphy, u32 *tx_ant, u32 *rx_ant) return drv_get_antenna(local, tx_ant, rx_ant); } +static int ieee80211_set_ringparam(struct wiphy *wiphy, u32 tx, u32 rx) +{ + struct ieee80211_local *local = wiphy_priv(wiphy); + + return drv_set_ringparam(local, tx, rx); +} + +static void ieee80211_get_ringparam(struct wiphy *wiphy, + u32 *tx, u32 *tx_max, u32 *rx, u32 *rx_max) +{ + struct ieee80211_local *local = wiphy_priv(wiphy); + + drv_get_ringparam(local, tx, tx_max, rx, rx_max); +} + struct cfg80211_ops mac80211_config_ops = { .add_virtual_intf = ieee80211_add_iface, .del_virtual_intf = ieee80211_del_iface, @@ -2065,4 +2084,6 @@ struct cfg80211_ops mac80211_config_ops = { .mgmt_frame_register = ieee80211_mgmt_frame_register, .set_antenna = ieee80211_set_antenna, .get_antenna = ieee80211_get_antenna, + .set_ringparam = ieee80211_set_ringparam, + .get_ringparam = ieee80211_get_ringparam, }; diff --git a/net/mac80211/chan.c b/net/mac80211/chan.c index 5b24740fc0b0..889c3e93e0f4 100644 --- a/net/mac80211/chan.c +++ b/net/mac80211/chan.c @@ -77,6 +77,9 @@ bool ieee80211_set_channel_type(struct ieee80211_local *local, switch (tmp->vif.bss_conf.channel_type) { case NL80211_CHAN_NO_HT: case NL80211_CHAN_HT20: + if (superchan > tmp->vif.bss_conf.channel_type) + break; + superchan = tmp->vif.bss_conf.channel_type; break; case NL80211_CHAN_HT40PLUS: diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h index 3729296f6f95..9c0d62bb0ea3 100644 --- a/net/mac80211/driver-ops.h +++ b/net/mac80211/driver-ops.h @@ -526,4 +526,30 @@ static inline int drv_offchannel_tx_cancel_wait(struct ieee80211_local *local) return ret; } +static inline int drv_set_ringparam(struct ieee80211_local *local, + u32 tx, u32 rx) +{ + int ret = -ENOTSUPP; + + might_sleep(); + + trace_drv_set_ringparam(local, tx, rx); + if (local->ops->set_ringparam) + ret = local->ops->set_ringparam(&local->hw, tx, rx); + trace_drv_return_int(local, ret); + + return ret; +} + +static inline void drv_get_ringparam(struct ieee80211_local *local, + u32 *tx, u32 *tx_max, u32 *rx, u32 *rx_max) +{ + might_sleep(); + + trace_drv_get_ringparam(local, tx, tx_max, rx, rx_max); + if (local->ops->get_ringparam) + local->ops->get_ringparam(&local->hw, tx, tx_max, rx, rx_max); + trace_drv_return_void(local); +} + #endif /* __MAC80211_DRIVER_OPS */ diff --git a/net/mac80211/driver-trace.h b/net/mac80211/driver-trace.h index 520fe2444893..45aab80738e2 100644 --- a/net/mac80211/driver-trace.h +++ b/net/mac80211/driver-trace.h @@ -912,6 +912,58 @@ TRACE_EVENT(drv_offchannel_tx, ) ); +TRACE_EVENT(drv_set_ringparam, + TP_PROTO(struct ieee80211_local *local, u32 tx, u32 rx), + + TP_ARGS(local, tx, rx), + + TP_STRUCT__entry( + LOCAL_ENTRY + __field(u32, tx) + __field(u32, rx) + ), + + TP_fast_assign( + LOCAL_ASSIGN; + __entry->tx = tx; + __entry->rx = rx; + ), + + TP_printk( + LOCAL_PR_FMT " tx:%d rx %d", + LOCAL_PR_ARG, __entry->tx, __entry->rx + ) +); + +TRACE_EVENT(drv_get_ringparam, + TP_PROTO(struct ieee80211_local *local, u32 *tx, u32 *tx_max, + u32 *rx, u32 *rx_max), + + TP_ARGS(local, tx, tx_max, rx, rx_max), + + TP_STRUCT__entry( + LOCAL_ENTRY + __field(u32, tx) + __field(u32, tx_max) + __field(u32, rx) + __field(u32, rx_max) + ), + + TP_fast_assign( + LOCAL_ASSIGN; + __entry->tx = *tx; + __entry->tx_max = *tx_max; + __entry->rx = *rx; + __entry->rx_max = *rx_max; + ), + + TP_printk( + LOCAL_PR_FMT " tx:%d tx_max %d rx %d rx_max %d", + LOCAL_PR_ARG, + __entry->tx, __entry->tx_max, __entry->rx, __entry->rx_max + ) +); + DEFINE_EVENT(local_only_evt, drv_offchannel_tx_cancel_wait, TP_PROTO(struct ieee80211_local *local), TP_ARGS(local) diff --git a/net/mac80211/key.h b/net/mac80211/key.h index 8106aa1b7466..4ddbe27eb570 100644 --- a/net/mac80211/key.h +++ b/net/mac80211/key.h @@ -21,7 +21,6 @@ #define WEP_IV_LEN 4 #define WEP_ICV_LEN 4 -#define ALG_TKIP_KEY_LEN 32 #define ALG_CCMP_KEY_LEN 16 #define CCMP_HDR_LEN 8 #define CCMP_MIC_LEN 8 diff --git a/net/mac80211/main.c b/net/mac80211/main.c index 2543e48bd813..562d2984c482 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -380,6 +380,9 @@ void ieee80211_restart_hw(struct ieee80211_hw *hw) trace_api_restart_hw(local); + wiphy_info(hw->wiphy, + "Hardware restart was requested\n"); + /* use this reason, ieee80211_reconfig will unblock it */ ieee80211_stop_queues_by_reason(hw, IEEE80211_QUEUE_STOP_REASON_SUSPEND); diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index cc984bd861cf..64d92d5a7f40 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -613,6 +613,37 @@ static void ieee80211_change_ps(struct ieee80211_local *local) } } +static bool ieee80211_powersave_allowed(struct ieee80211_sub_if_data *sdata) +{ + struct ieee80211_if_managed *mgd = &sdata->u.mgd; + struct sta_info *sta = NULL; + u32 sta_flags = 0; + + if (!mgd->powersave) + return false; + + if (!mgd->associated) + return false; + + if (!mgd->associated->beacon_ies) + return false; + + if (mgd->flags & (IEEE80211_STA_BEACON_POLL | + IEEE80211_STA_CONNECTION_POLL)) + return false; + + rcu_read_lock(); + sta = sta_info_get(sdata, mgd->bssid); + if (sta) + sta_flags = get_sta_flags(sta); + rcu_read_unlock(); + + if (!(sta_flags & WLAN_STA_AUTHORIZED)) + return false; + + return true; +} + /* need to hold RTNL or interface lock */ void ieee80211_recalc_ps(struct ieee80211_local *local, s32 latency) { @@ -647,11 +678,7 @@ void ieee80211_recalc_ps(struct ieee80211_local *local, s32 latency) count++; } - if (count == 1 && found->u.mgd.powersave && - found->u.mgd.associated && - found->u.mgd.associated->beacon_ies && - !(found->u.mgd.flags & (IEEE80211_STA_BEACON_POLL | - IEEE80211_STA_CONNECTION_POLL))) { + if (count == 1 && ieee80211_powersave_allowed(found)) { struct ieee80211_conf *conf = &local->hw.conf; s32 beaconint_us; diff --git a/net/mac80211/rc80211_minstrel_ht.c b/net/mac80211/rc80211_minstrel_ht.c index 165a4518bb48..8212a8bebf06 100644 --- a/net/mac80211/rc80211_minstrel_ht.c +++ b/net/mac80211/rc80211_minstrel_ht.c @@ -415,10 +415,8 @@ minstrel_ht_tx_status(void *priv, struct ieee80211_supported_band *sband, mi->sample_count--; } - if (info->flags & IEEE80211_TX_CTL_RATE_CTRL_PROBE) { + if (info->flags & IEEE80211_TX_CTL_RATE_CTRL_PROBE) mi->sample_packets += info->status.ampdu_len; - minstrel_next_sample_idx(mi); - } for (i = 0; !last; i++) { last = (i == IEEE80211_TX_MAX_RATES - 1) || @@ -519,9 +517,7 @@ minstrel_ht_set_rate(struct minstrel_priv *mp, struct minstrel_ht_sta *mi, rate->count = mr->retry_count; rate->flags = IEEE80211_TX_RC_MCS | group->flags; - if (txrc->short_preamble) - rate->flags |= IEEE80211_TX_RC_USE_SHORT_PREAMBLE; - if (txrc->rts || rtscts) + if (rtscts) rate->flags |= IEEE80211_TX_RC_USE_RTS_CTS; rate->idx = index % MCS_GROUP_RATES + (group->streams - 1) * MCS_GROUP_RATES; } @@ -553,13 +549,14 @@ minstrel_get_sample_rate(struct minstrel_priv *mp, struct minstrel_ht_sta *mi) sample_idx = sample_table[mg->column][mg->index]; mr = &mg->rates[sample_idx]; sample_idx += mi->sample_group * MCS_GROUP_RATES; + minstrel_next_sample_idx(mi); /* * When not using MRR, do not sample if the probability is already * higher than 95% to avoid wasting airtime */ if (!mp->has_mrr && (mr->probability > MINSTREL_FRAC(95, 100))) - goto next; + return -1; /* * Make sure that lower rates get sampled only occasionally, @@ -568,17 +565,13 @@ minstrel_get_sample_rate(struct minstrel_priv *mp, struct minstrel_ht_sta *mi) if (minstrel_get_duration(sample_idx) > minstrel_get_duration(mi->max_tp_rate)) { if (mr->sample_skipped < 20) - goto next; + return -1; if (mi->sample_slow++ > 2) - goto next; + return -1; } return sample_idx; - -next: - minstrel_next_sample_idx(mi); - return -1; } static void @@ -605,19 +598,46 @@ minstrel_ht_get_rate(void *priv, struct ieee80211_sta *sta, void *priv_sta, sample = true; minstrel_ht_set_rate(mp, mi, &ar[0], sample_idx, txrc, true, false); - minstrel_ht_set_rate(mp, mi, &ar[1], mi->max_tp_rate, - txrc, false, false); info->flags |= IEEE80211_TX_CTL_RATE_CTRL_PROBE; } else { minstrel_ht_set_rate(mp, mi, &ar[0], mi->max_tp_rate, txrc, false, false); - minstrel_ht_set_rate(mp, mi, &ar[1], mi->max_tp_rate2, - txrc, false, true); } - minstrel_ht_set_rate(mp, mi, &ar[2], mi->max_prob_rate, txrc, false, !sample); - ar[3].count = 0; - ar[3].idx = -1; + if (mp->hw->max_rates >= 3) { + /* + * At least 3 tx rates supported, use + * sample_rate -> max_tp_rate -> max_prob_rate for sampling and + * max_tp_rate -> max_tp_rate2 -> max_prob_rate by default. + */ + if (sample_idx >= 0) + minstrel_ht_set_rate(mp, mi, &ar[1], mi->max_tp_rate, + txrc, false, false); + else + minstrel_ht_set_rate(mp, mi, &ar[1], mi->max_tp_rate2, + txrc, false, true); + + minstrel_ht_set_rate(mp, mi, &ar[2], mi->max_prob_rate, + txrc, false, !sample); + + ar[3].count = 0; + ar[3].idx = -1; + } else if (mp->hw->max_rates == 2) { + /* + * Only 2 tx rates supported, use + * sample_rate -> max_prob_rate for sampling and + * max_tp_rate -> max_prob_rate by default. + */ + minstrel_ht_set_rate(mp, mi, &ar[1], mi->max_prob_rate, + txrc, false, !sample); + + ar[2].count = 0; + ar[2].idx = -1; + } else { + /* Not using MRR, only use the first rate */ + ar[1].count = 0; + ar[1].idx = -1; + } mi->total_packets++; diff --git a/net/mac80211/rc80211_pid.h b/net/mac80211/rc80211_pid.h index 1a873f00691a..6510f8ee738e 100644 --- a/net/mac80211/rc80211_pid.h +++ b/net/mac80211/rc80211_pid.h @@ -24,9 +24,6 @@ /* Fixed point arithmetic shifting amount. */ #define RC_PID_ARITH_SHIFT 8 -/* Fixed point arithmetic factor. */ -#define RC_PID_ARITH_FACTOR (1 << RC_PID_ARITH_SHIFT) - /* Proportional PID component coefficient. */ #define RC_PID_COEFF_P 15 /* Integral PID component coefficient. */ diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c index 842954509925..489b6ad200d4 100644 --- a/net/mac80211/scan.c +++ b/net/mac80211/scan.c @@ -258,10 +258,12 @@ static bool ieee80211_prep_hw_scan(struct ieee80211_local *local) return true; } -static bool __ieee80211_scan_completed(struct ieee80211_hw *hw, bool aborted, +static void __ieee80211_scan_completed(struct ieee80211_hw *hw, bool aborted, bool was_hw_scan) { struct ieee80211_local *local = hw_to_local(hw); + bool on_oper_chan; + bool enable_beacons = false; lockdep_assert_held(&local->mtx); @@ -275,12 +277,12 @@ static bool __ieee80211_scan_completed(struct ieee80211_hw *hw, bool aborted, aborted = true; if (WARN_ON(!local->scan_req)) - return false; + return; if (was_hw_scan && !aborted && ieee80211_prep_hw_scan(local)) { int rc = drv_hw_scan(local, local->scan_sdata, local->hw_scan_req); if (rc == 0) - return false; + return; } kfree(local->hw_scan_req); @@ -294,26 +296,11 @@ static bool __ieee80211_scan_completed(struct ieee80211_hw *hw, bool aborted, local->scanning = 0; local->scan_channel = NULL; - return true; -} - -static void __ieee80211_scan_completed_finish(struct ieee80211_hw *hw, - bool was_hw_scan) -{ - struct ieee80211_local *local = hw_to_local(hw); - bool on_oper_chan; - bool enable_beacons = false; - - mutex_lock(&local->mtx); on_oper_chan = ieee80211_cfg_on_oper_channel(local); - WARN_ON(local->scanning & (SCAN_SW_SCANNING | SCAN_HW_SCANNING)); - - if (was_hw_scan || !on_oper_chan) { - if (WARN_ON(local->scan_channel)) - local->scan_channel = NULL; + if (was_hw_scan || !on_oper_chan) ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_CHANNEL); - } else + else /* Set power back to normal operating levels. */ ieee80211_hw_config(local, 0); @@ -331,7 +318,6 @@ static void __ieee80211_scan_completed_finish(struct ieee80211_hw *hw, } ieee80211_recalc_idle(local); - mutex_unlock(&local->mtx); ieee80211_mlme_notify_scan_completed(local); ieee80211_ibss_notify_scan_completed(local); @@ -686,12 +672,14 @@ void ieee80211_scan_work(struct work_struct *work) { struct ieee80211_local *local = container_of(work, struct ieee80211_local, scan_work.work); - struct ieee80211_sub_if_data *sdata = local->scan_sdata; + struct ieee80211_sub_if_data *sdata; unsigned long next_delay = 0; - bool aborted, hw_scan, finish; + bool aborted, hw_scan; mutex_lock(&local->mtx); + sdata = local->scan_sdata; + if (test_and_clear_bit(SCAN_COMPLETED, &local->scanning)) { aborted = test_and_clear_bit(SCAN_ABORTED, &local->scanning); goto out_complete; @@ -755,17 +743,11 @@ void ieee80211_scan_work(struct work_struct *work) } while (next_delay == 0); ieee80211_queue_delayed_work(&local->hw, &local->scan_work, next_delay); - mutex_unlock(&local->mtx); - return; + goto out; out_complete: hw_scan = test_bit(SCAN_HW_SCANNING, &local->scanning); - finish = __ieee80211_scan_completed(&local->hw, aborted, hw_scan); - mutex_unlock(&local->mtx); - if (finish) - __ieee80211_scan_completed_finish(&local->hw, hw_scan); - return; - + __ieee80211_scan_completed(&local->hw, aborted, hw_scan); out: mutex_unlock(&local->mtx); } @@ -835,7 +817,6 @@ int ieee80211_request_internal_scan(struct ieee80211_sub_if_data *sdata, void ieee80211_scan_cancel(struct ieee80211_local *local) { bool abortscan; - bool finish = false; /* * We are only canceling software scan, or deferred scan that was not @@ -855,14 +836,17 @@ void ieee80211_scan_cancel(struct ieee80211_local *local) mutex_lock(&local->mtx); abortscan = local->scan_req && !test_bit(SCAN_HW_SCANNING, &local->scanning); - if (abortscan) - finish = __ieee80211_scan_completed(&local->hw, true, false); - mutex_unlock(&local->mtx); - if (abortscan) { - /* The scan is canceled, but stop work from being pending */ - cancel_delayed_work_sync(&local->scan_work); + /* + * The scan is canceled, but stop work from being pending. + * + * If the work is currently running, it must be blocked on + * the mutex, but we'll set scan_sdata = NULL and it'll + * simply exit once it acquires the mutex. + */ + cancel_delayed_work(&local->scan_work); + /* and clean up */ + __ieee80211_scan_completed(&local->hw, true, false); } - if (finish) - __ieee80211_scan_completed_finish(&local->hw, false); + mutex_unlock(&local->mtx); } diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 081dcaf6577b..ce4596ed1268 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -169,7 +169,7 @@ static __le16 ieee80211_duration(struct ieee80211_tx_data *tx, int group_addr, return cpu_to_le16(dur); } -static int inline is_ieee80211_device(struct ieee80211_local *local, +static inline int is_ieee80211_device(struct ieee80211_local *local, struct net_device *dev) { return local == wdev_priv(dev->ieee80211_ptr); diff --git a/net/mac80211/work.c b/net/mac80211/work.c index 204f0a4db969..e73c8cae036b 100644 --- a/net/mac80211/work.c +++ b/net/mac80211/work.c @@ -30,7 +30,6 @@ #define IEEE80211_AUTH_MAX_TRIES 3 #define IEEE80211_ASSOC_TIMEOUT (HZ / 5) #define IEEE80211_ASSOC_MAX_TRIES 3 -#define IEEE80211_MAX_PROBE_TRIES 5 enum work_action { WORK_ACT_MISMATCH, diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index 82a6e0d80f05..c3f988aa1152 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -649,6 +649,17 @@ config NETFILTER_XT_TARGET_TCPOPTSTRIP comment "Xtables matches" +config NETFILTER_XT_MATCH_ADDRTYPE + tristate '"addrtype" address type match support' + depends on NETFILTER_ADVANCED + depends on (IPV6 || IPV6=n) + ---help--- + This option allows you to match what routing thinks of an address, + eg. UNICAST, LOCAL, BROADCAST, ... + + If you want to compile it as a module, say M here and read + <file:Documentation/kbuild/modules.txt>. If unsure, say `N'. + config NETFILTER_XT_MATCH_CLUSTER tristate '"cluster" match support' depends on NF_CONNTRACK diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile index d57a890eaee5..1a02853df863 100644 --- a/net/netfilter/Makefile +++ b/net/netfilter/Makefile @@ -70,6 +70,7 @@ obj-$(CONFIG_NETFILTER_XT_TARGET_TRACE) += xt_TRACE.o obj-$(CONFIG_NETFILTER_XT_TARGET_IDLETIMER) += xt_IDLETIMER.o # matches +obj-$(CONFIG_NETFILTER_XT_MATCH_ADDRTYPE) += xt_addrtype.o obj-$(CONFIG_NETFILTER_XT_MATCH_CLUSTER) += xt_cluster.o obj-$(CONFIG_NETFILTER_XT_MATCH_COMMENT) += xt_comment.o obj-$(CONFIG_NETFILTER_XT_MATCH_CONNBYTES) += xt_connbytes.o diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c index 8b1a54c1e400..d6b48230a540 100644 --- a/net/netfilter/ipset/ip_set_core.c +++ b/net/netfilter/ipset/ip_set_core.c @@ -94,16 +94,28 @@ static int find_set_type_get(const char *name, u8 family, u8 revision, struct ip_set_type **found) { + struct ip_set_type *type; + int err; + rcu_read_lock(); *found = find_set_type(name, family, revision); if (*found) { - int err = !try_module_get((*found)->me); - rcu_read_unlock(); - return err ? -EFAULT : 0; + err = !try_module_get((*found)->me) ? -EFAULT : 0; + goto unlock; } + /* Make sure the type is loaded but we don't support the revision */ + list_for_each_entry_rcu(type, &ip_set_type_list, list) + if (STREQ(type->name, name)) { + err = -IPSET_ERR_FIND_TYPE; + goto unlock; + } rcu_read_unlock(); return try_to_load_type(name); + +unlock: + rcu_read_unlock(); + return err; } /* Find a given set type by name and family. @@ -116,7 +128,7 @@ find_set_type_minmax(const char *name, u8 family, u8 *min, u8 *max) struct ip_set_type *type; bool found = false; - *min = *max = 0; + *min = 255; *max = 0; rcu_read_lock(); list_for_each_entry_rcu(type, &ip_set_type_list, list) if (STREQ(type->name, name) && @@ -124,7 +136,7 @@ find_set_type_minmax(const char *name, u8 family, u8 *min, u8 *max) found = true; if (type->revision < *min) *min = type->revision; - else if (type->revision > *max) + if (type->revision > *max) *max = type->revision; } rcu_read_unlock(); @@ -612,7 +624,7 @@ ip_set_create(struct sock *ctnl, struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const attr[]) { - struct ip_set *set, *clash; + struct ip_set *set, *clash = NULL; ip_set_id_t index = IPSET_INVALID_ID; struct nlattr *tb[IPSET_ATTR_CREATE_MAX+1] = {}; const char *name, *typename; diff --git a/net/netfilter/ipset/ip_set_hash_ipport.c b/net/netfilter/ipset/ip_set_hash_ipport.c index adbe787ea5dc..b9214145d357 100644 --- a/net/netfilter/ipset/ip_set_hash_ipport.c +++ b/net/netfilter/ipset/ip_set_hash_ipport.c @@ -150,6 +150,7 @@ hash_ipport4_uadt(struct ip_set *set, struct nlattr *tb[], struct hash_ipport4_elem data = { }; u32 ip, ip_to, p, port, port_to; u32 timeout = h->timeout; + bool with_ports = false; int ret; if (unlikely(!tb[IPSET_ATTR_IP] || @@ -172,21 +173,15 @@ hash_ipport4_uadt(struct ip_set *set, struct nlattr *tb[], if (tb[IPSET_ATTR_PROTO]) { data.proto = nla_get_u8(tb[IPSET_ATTR_PROTO]); + with_ports = ip_set_proto_with_ports(data.proto); if (data.proto == 0) return -IPSET_ERR_INVALID_PROTO; } else return -IPSET_ERR_MISSING_PROTO; - switch (data.proto) { - case IPPROTO_UDP: - case IPPROTO_TCP: - case IPPROTO_ICMP: - break; - default: + if (!(with_ports || data.proto == IPPROTO_ICMP)) data.port = 0; - break; - } if (tb[IPSET_ATTR_TIMEOUT]) { if (!with_timeout(h->timeout)) @@ -195,7 +190,6 @@ hash_ipport4_uadt(struct ip_set *set, struct nlattr *tb[], } if (adt == IPSET_TEST || - !(data.proto == IPPROTO_TCP || data.proto == IPPROTO_UDP) || !(tb[IPSET_ATTR_IP_TO] || tb[IPSET_ATTR_CIDR] || tb[IPSET_ATTR_PORT_TO])) { ret = adtfn(set, &data, timeout); @@ -219,13 +213,12 @@ hash_ipport4_uadt(struct ip_set *set, struct nlattr *tb[], } else ip_to = ip; - port = ntohs(data.port); - if (tb[IPSET_ATTR_PORT_TO]) { + port_to = port = ntohs(data.port); + if (with_ports && tb[IPSET_ATTR_PORT_TO]) { port_to = ip_set_get_h16(tb[IPSET_ATTR_PORT_TO]); if (port > port_to) swap(port, port_to); - } else - port_to = port; + } for (; !before(ip_to, ip); ip++) for (p = port; p <= port_to; p++) { @@ -361,6 +354,7 @@ hash_ipport6_uadt(struct ip_set *set, struct nlattr *tb[], struct hash_ipport6_elem data = { }; u32 port, port_to; u32 timeout = h->timeout; + bool with_ports = false; int ret; if (unlikely(!tb[IPSET_ATTR_IP] || @@ -385,21 +379,15 @@ hash_ipport6_uadt(struct ip_set *set, struct nlattr *tb[], if (tb[IPSET_ATTR_PROTO]) { data.proto = nla_get_u8(tb[IPSET_ATTR_PROTO]); + with_ports = ip_set_proto_with_ports(data.proto); if (data.proto == 0) return -IPSET_ERR_INVALID_PROTO; } else return -IPSET_ERR_MISSING_PROTO; - switch (data.proto) { - case IPPROTO_UDP: - case IPPROTO_TCP: - case IPPROTO_ICMPV6: - break; - default: + if (!(with_ports || data.proto == IPPROTO_ICMPV6)) data.port = 0; - break; - } if (tb[IPSET_ATTR_TIMEOUT]) { if (!with_timeout(h->timeout)) @@ -407,9 +395,7 @@ hash_ipport6_uadt(struct ip_set *set, struct nlattr *tb[], timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]); } - if (adt == IPSET_TEST || - !(data.proto == IPPROTO_TCP || data.proto == IPPROTO_UDP) || - !tb[IPSET_ATTR_PORT_TO]) { + if (adt == IPSET_TEST || !with_ports || !tb[IPSET_ATTR_PORT_TO]) { ret = adtfn(set, &data, timeout); return ip_set_eexist(ret, flags) ? 0 : ret; } diff --git a/net/netfilter/ipset/ip_set_hash_ipportip.c b/net/netfilter/ipset/ip_set_hash_ipportip.c index 22e23abb86c6..4642872df6e1 100644 --- a/net/netfilter/ipset/ip_set_hash_ipportip.c +++ b/net/netfilter/ipset/ip_set_hash_ipportip.c @@ -154,6 +154,7 @@ hash_ipportip4_uadt(struct ip_set *set, struct nlattr *tb[], struct hash_ipportip4_elem data = { }; u32 ip, ip_to, p, port, port_to; u32 timeout = h->timeout; + bool with_ports = false; int ret; if (unlikely(!tb[IPSET_ATTR_IP] || !tb[IPSET_ATTR_IP2] || @@ -180,21 +181,15 @@ hash_ipportip4_uadt(struct ip_set *set, struct nlattr *tb[], if (tb[IPSET_ATTR_PROTO]) { data.proto = nla_get_u8(tb[IPSET_ATTR_PROTO]); + with_ports = ip_set_proto_with_ports(data.proto); if (data.proto == 0) return -IPSET_ERR_INVALID_PROTO; } else return -IPSET_ERR_MISSING_PROTO; - switch (data.proto) { - case IPPROTO_UDP: - case IPPROTO_TCP: - case IPPROTO_ICMP: - break; - default: + if (!(with_ports || data.proto == IPPROTO_ICMP)) data.port = 0; - break; - } if (tb[IPSET_ATTR_TIMEOUT]) { if (!with_timeout(h->timeout)) @@ -203,7 +198,6 @@ hash_ipportip4_uadt(struct ip_set *set, struct nlattr *tb[], } if (adt == IPSET_TEST || - !(data.proto == IPPROTO_TCP || data.proto == IPPROTO_UDP) || !(tb[IPSET_ATTR_IP_TO] || tb[IPSET_ATTR_CIDR] || tb[IPSET_ATTR_PORT_TO])) { ret = adtfn(set, &data, timeout); @@ -227,13 +221,12 @@ hash_ipportip4_uadt(struct ip_set *set, struct nlattr *tb[], } else ip_to = ip; - port = ntohs(data.port); - if (tb[IPSET_ATTR_PORT_TO]) { + port_to = port = ntohs(data.port); + if (with_ports && tb[IPSET_ATTR_PORT_TO]) { port_to = ip_set_get_h16(tb[IPSET_ATTR_PORT_TO]); if (port > port_to) swap(port, port_to); - } else - port_to = port; + } for (; !before(ip_to, ip); ip++) for (p = port; p <= port_to; p++) { @@ -375,6 +368,7 @@ hash_ipportip6_uadt(struct ip_set *set, struct nlattr *tb[], struct hash_ipportip6_elem data = { }; u32 port, port_to; u32 timeout = h->timeout; + bool with_ports = false; int ret; if (unlikely(!tb[IPSET_ATTR_IP] || !tb[IPSET_ATTR_IP2] || @@ -403,21 +397,15 @@ hash_ipportip6_uadt(struct ip_set *set, struct nlattr *tb[], if (tb[IPSET_ATTR_PROTO]) { data.proto = nla_get_u8(tb[IPSET_ATTR_PROTO]); + with_ports = ip_set_proto_with_ports(data.proto); if (data.proto == 0) return -IPSET_ERR_INVALID_PROTO; } else return -IPSET_ERR_MISSING_PROTO; - switch (data.proto) { - case IPPROTO_UDP: - case IPPROTO_TCP: - case IPPROTO_ICMPV6: - break; - default: + if (!(with_ports || data.proto == IPPROTO_ICMPV6)) data.port = 0; - break; - } if (tb[IPSET_ATTR_TIMEOUT]) { if (!with_timeout(h->timeout)) @@ -425,9 +413,7 @@ hash_ipportip6_uadt(struct ip_set *set, struct nlattr *tb[], timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]); } - if (adt == IPSET_TEST || - !(data.proto == IPPROTO_TCP || data.proto == IPPROTO_UDP) || - !tb[IPSET_ATTR_PORT_TO]) { + if (adt == IPSET_TEST || !with_ports || !tb[IPSET_ATTR_PORT_TO]) { ret = adtfn(set, &data, timeout); return ip_set_eexist(ret, flags) ? 0 : ret; } diff --git a/net/netfilter/ipset/ip_set_hash_ipportnet.c b/net/netfilter/ipset/ip_set_hash_ipportnet.c index 6033e8b54bbd..2cb84a54b7ad 100644 --- a/net/netfilter/ipset/ip_set_hash_ipportnet.c +++ b/net/netfilter/ipset/ip_set_hash_ipportnet.c @@ -174,6 +174,7 @@ hash_ipportnet4_uadt(struct ip_set *set, struct nlattr *tb[], struct hash_ipportnet4_elem data = { .cidr = HOST_MASK }; u32 ip, ip_to, p, port, port_to; u32 timeout = h->timeout; + bool with_ports = false; int ret; if (unlikely(!tb[IPSET_ATTR_IP] || !tb[IPSET_ATTR_IP2] || @@ -208,21 +209,15 @@ hash_ipportnet4_uadt(struct ip_set *set, struct nlattr *tb[], if (tb[IPSET_ATTR_PROTO]) { data.proto = nla_get_u8(tb[IPSET_ATTR_PROTO]); + with_ports = ip_set_proto_with_ports(data.proto); if (data.proto == 0) return -IPSET_ERR_INVALID_PROTO; } else return -IPSET_ERR_MISSING_PROTO; - switch (data.proto) { - case IPPROTO_UDP: - case IPPROTO_TCP: - case IPPROTO_ICMP: - break; - default: + if (!(with_ports || data.proto == IPPROTO_ICMP)) data.port = 0; - break; - } if (tb[IPSET_ATTR_TIMEOUT]) { if (!with_timeout(h->timeout)) @@ -231,7 +226,6 @@ hash_ipportnet4_uadt(struct ip_set *set, struct nlattr *tb[], } if (adt == IPSET_TEST || - !(data.proto == IPPROTO_TCP || data.proto == IPPROTO_UDP) || !(tb[IPSET_ATTR_IP_TO] || tb[IPSET_ATTR_CIDR] || tb[IPSET_ATTR_PORT_TO])) { ret = adtfn(set, &data, timeout); @@ -255,13 +249,12 @@ hash_ipportnet4_uadt(struct ip_set *set, struct nlattr *tb[], } else ip_to = ip; - port = ntohs(data.port); - if (tb[IPSET_ATTR_PORT_TO]) { + port_to = port = ntohs(data.port); + if (with_ports && tb[IPSET_ATTR_PORT_TO]) { port_to = ip_set_get_h16(tb[IPSET_ATTR_PORT_TO]); if (port > port_to) swap(port, port_to); - } else - port_to = port; + } for (; !before(ip_to, ip); ip++) for (p = port; p <= port_to; p++) { @@ -429,6 +422,7 @@ hash_ipportnet6_uadt(struct ip_set *set, struct nlattr *tb[], struct hash_ipportnet6_elem data = { .cidr = HOST_MASK }; u32 port, port_to; u32 timeout = h->timeout; + bool with_ports = false; int ret; if (unlikely(!tb[IPSET_ATTR_IP] || !tb[IPSET_ATTR_IP2] || @@ -465,21 +459,15 @@ hash_ipportnet6_uadt(struct ip_set *set, struct nlattr *tb[], if (tb[IPSET_ATTR_PROTO]) { data.proto = nla_get_u8(tb[IPSET_ATTR_PROTO]); + with_ports = ip_set_proto_with_ports(data.proto); if (data.proto == 0) return -IPSET_ERR_INVALID_PROTO; } else return -IPSET_ERR_MISSING_PROTO; - switch (data.proto) { - case IPPROTO_UDP: - case IPPROTO_TCP: - case IPPROTO_ICMPV6: - break; - default: + if (!(with_ports || data.proto == IPPROTO_ICMPV6)) data.port = 0; - break; - } if (tb[IPSET_ATTR_TIMEOUT]) { if (!with_timeout(h->timeout)) @@ -487,9 +475,7 @@ hash_ipportnet6_uadt(struct ip_set *set, struct nlattr *tb[], timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]); } - if (adt == IPSET_TEST || - !(data.proto == IPPROTO_TCP || data.proto == IPPROTO_UDP) || - !tb[IPSET_ATTR_PORT_TO]) { + if (adt == IPSET_TEST || !with_ports || !tb[IPSET_ATTR_PORT_TO]) { ret = adtfn(set, &data, timeout); return ip_set_eexist(ret, flags) ? 0 : ret; } diff --git a/net/netfilter/ipset/ip_set_hash_netport.c b/net/netfilter/ipset/ip_set_hash_netport.c index 34a165626ee9..8598676f2a05 100644 --- a/net/netfilter/ipset/ip_set_hash_netport.c +++ b/net/netfilter/ipset/ip_set_hash_netport.c @@ -170,6 +170,7 @@ hash_netport4_uadt(struct ip_set *set, struct nlattr *tb[], struct hash_netport4_elem data = { .cidr = HOST_MASK }; u32 port, port_to; u32 timeout = h->timeout; + bool with_ports = false; int ret; if (unlikely(!tb[IPSET_ATTR_IP] || @@ -198,21 +199,15 @@ hash_netport4_uadt(struct ip_set *set, struct nlattr *tb[], if (tb[IPSET_ATTR_PROTO]) { data.proto = nla_get_u8(tb[IPSET_ATTR_PROTO]); + with_ports = ip_set_proto_with_ports(data.proto); if (data.proto == 0) return -IPSET_ERR_INVALID_PROTO; } else return -IPSET_ERR_MISSING_PROTO; - switch (data.proto) { - case IPPROTO_UDP: - case IPPROTO_TCP: - case IPPROTO_ICMP: - break; - default: + if (!(with_ports || data.proto == IPPROTO_ICMP)) data.port = 0; - break; - } if (tb[IPSET_ATTR_TIMEOUT]) { if (!with_timeout(h->timeout)) @@ -220,9 +215,7 @@ hash_netport4_uadt(struct ip_set *set, struct nlattr *tb[], timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]); } - if (adt == IPSET_TEST || - !(data.proto == IPPROTO_TCP || data.proto == IPPROTO_UDP) || - !tb[IPSET_ATTR_PORT_TO]) { + if (adt == IPSET_TEST || !with_ports || !tb[IPSET_ATTR_PORT_TO]) { ret = adtfn(set, &data, timeout); return ip_set_eexist(ret, flags) ? 0 : ret; } @@ -390,6 +383,7 @@ hash_netport6_uadt(struct ip_set *set, struct nlattr *tb[], struct hash_netport6_elem data = { .cidr = HOST_MASK }; u32 port, port_to; u32 timeout = h->timeout; + bool with_ports = false; int ret; if (unlikely(!tb[IPSET_ATTR_IP] || @@ -418,21 +412,15 @@ hash_netport6_uadt(struct ip_set *set, struct nlattr *tb[], if (tb[IPSET_ATTR_PROTO]) { data.proto = nla_get_u8(tb[IPSET_ATTR_PROTO]); + with_ports = ip_set_proto_with_ports(data.proto); if (data.proto == 0) return -IPSET_ERR_INVALID_PROTO; } else return -IPSET_ERR_MISSING_PROTO; - switch (data.proto) { - case IPPROTO_UDP: - case IPPROTO_TCP: - case IPPROTO_ICMPV6: - break; - default: + if (!(with_ports || data.proto == IPPROTO_ICMPV6)) data.port = 0; - break; - } if (tb[IPSET_ATTR_TIMEOUT]) { if (!with_timeout(h->timeout)) @@ -440,9 +428,7 @@ hash_netport6_uadt(struct ip_set *set, struct nlattr *tb[], timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]); } - if (adt == IPSET_TEST || - !(data.proto == IPPROTO_TCP || data.proto == IPPROTO_UDP) || - !tb[IPSET_ATTR_PORT_TO]) { + if (adt == IPSET_TEST || !with_ports || !tb[IPSET_ATTR_PORT_TO]) { ret = adtfn(set, &data, timeout); return ip_set_eexist(ret, flags) ? 0 : ret; } diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c index 9c2a517b69c8..f289306cbf12 100644 --- a/net/netfilter/ipvs/ip_vs_conn.c +++ b/net/netfilter/ipvs/ip_vs_conn.c @@ -680,6 +680,16 @@ static inline void ip_vs_unbind_dest(struct ip_vs_conn *cp) atomic_dec(&dest->refcnt); } +static int expire_quiescent_template(struct netns_ipvs *ipvs, + struct ip_vs_dest *dest) +{ +#ifdef CONFIG_SYSCTL + return ipvs->sysctl_expire_quiescent_template && + (atomic_read(&dest->weight) == 0); +#else + return 0; +#endif +} /* * Checking if the destination of a connection template is available. @@ -696,8 +706,7 @@ int ip_vs_check_template(struct ip_vs_conn *ct) */ if ((dest == NULL) || !(dest->flags & IP_VS_DEST_F_AVAILABLE) || - (ipvs->sysctl_expire_quiescent_template && - (atomic_read(&dest->weight) == 0))) { + expire_quiescent_template(ipvs, dest)) { IP_VS_DBG_BUF(9, "check_template: dest not available for " "protocol %s s:%s:%d v:%s:%d " "-> d:%s:%d\n", diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index 2d1f932add46..07accf6b2401 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -132,7 +132,7 @@ ip_vs_in_stats(struct ip_vs_conn *cp, struct sk_buff *skb) s->ustats.inbytes += skb->len; u64_stats_update_end(&s->syncp); - s = this_cpu_ptr(ipvs->cpustats); + s = this_cpu_ptr(ipvs->tot_stats.cpustats); s->ustats.inpkts++; u64_stats_update_begin(&s->syncp); s->ustats.inbytes += skb->len; @@ -162,7 +162,7 @@ ip_vs_out_stats(struct ip_vs_conn *cp, struct sk_buff *skb) s->ustats.outbytes += skb->len; u64_stats_update_end(&s->syncp); - s = this_cpu_ptr(ipvs->cpustats); + s = this_cpu_ptr(ipvs->tot_stats.cpustats); s->ustats.outpkts++; u64_stats_update_begin(&s->syncp); s->ustats.outbytes += skb->len; @@ -183,7 +183,7 @@ ip_vs_conn_stats(struct ip_vs_conn *cp, struct ip_vs_service *svc) s = this_cpu_ptr(svc->stats.cpustats); s->ustats.conns++; - s = this_cpu_ptr(ipvs->cpustats); + s = this_cpu_ptr(ipvs->tot_stats.cpustats); s->ustats.conns++; } @@ -499,11 +499,13 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb, int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb, struct ip_vs_proto_data *pd) { - struct net *net; - struct netns_ipvs *ipvs; __be16 _ports[2], *pptr; struct ip_vs_iphdr iph; +#ifdef CONFIG_SYSCTL + struct net *net; + struct netns_ipvs *ipvs; int unicast; +#endif ip_vs_fill_iphdr(svc->af, skb_network_header(skb), &iph); @@ -512,6 +514,8 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb, ip_vs_service_put(svc); return NF_DROP; } + +#ifdef CONFIG_SYSCTL net = skb_net(skb); #ifdef CONFIG_IP_VS_IPV6 @@ -563,6 +567,7 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb, ip_vs_conn_put(cp); return ret; } +#endif /* * When the virtual ftp service is presented, packets destined @@ -599,6 +604,33 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb, return NF_DROP; } +#ifdef CONFIG_SYSCTL + +static int sysctl_snat_reroute(struct sk_buff *skb) +{ + struct netns_ipvs *ipvs = net_ipvs(skb_net(skb)); + return ipvs->sysctl_snat_reroute; +} + +static int sysctl_nat_icmp_send(struct net *net) +{ + struct netns_ipvs *ipvs = net_ipvs(net); + return ipvs->sysctl_nat_icmp_send; +} + +static int sysctl_expire_nodest_conn(struct netns_ipvs *ipvs) +{ + return ipvs->sysctl_expire_nodest_conn; +} + +#else + +static int sysctl_snat_reroute(struct sk_buff *skb) { return 0; } +static int sysctl_nat_icmp_send(struct net *net) { return 0; } +static int sysctl_expire_nodest_conn(struct netns_ipvs *ipvs) { return 0; } + +#endif + __sum16 ip_vs_checksum_complete(struct sk_buff *skb, int offset) { return csum_fold(skb_checksum(skb, offset, skb->len - offset, 0)); @@ -631,6 +663,22 @@ static inline int ip_vs_gather_frags_v6(struct sk_buff *skb, u_int32_t user) } #endif +static int ip_vs_route_me_harder(int af, struct sk_buff *skb) +{ +#ifdef CONFIG_IP_VS_IPV6 + if (af == AF_INET6) { + if (sysctl_snat_reroute(skb) && ip6_route_me_harder(skb) != 0) + return 1; + } else +#endif + if ((sysctl_snat_reroute(skb) || + skb_rtable(skb)->rt_flags & RTCF_LOCAL) && + ip_route_me_harder(skb, RTN_LOCAL) != 0) + return 1; + + return 0; +} + /* * Packet has been made sufficiently writable in caller * - inout: 1=in->out, 0=out->in @@ -737,7 +785,6 @@ static int handle_response_icmp(int af, struct sk_buff *skb, struct ip_vs_protocol *pp, unsigned int offset, unsigned int ihl) { - struct netns_ipvs *ipvs; unsigned int verdict = NF_DROP; if (IP_VS_FWD_METHOD(cp) != 0) { @@ -759,8 +806,6 @@ static int handle_response_icmp(int af, struct sk_buff *skb, if (!skb_make_writable(skb, offset)) goto out; - ipvs = net_ipvs(skb_net(skb)); - #ifdef CONFIG_IP_VS_IPV6 if (af == AF_INET6) ip_vs_nat_icmp_v6(skb, pp, cp, 1); @@ -768,16 +813,8 @@ static int handle_response_icmp(int af, struct sk_buff *skb, #endif ip_vs_nat_icmp(skb, pp, cp, 1); -#ifdef CONFIG_IP_VS_IPV6 - if (af == AF_INET6) { - if (ipvs->sysctl_snat_reroute && ip6_route_me_harder(skb) != 0) - goto out; - } else -#endif - if ((ipvs->sysctl_snat_reroute || - skb_rtable(skb)->rt_flags & RTCF_LOCAL) && - ip_route_me_harder(skb, RTN_LOCAL) != 0) - goto out; + if (ip_vs_route_me_harder(af, skb)) + goto out; /* do the statistics and put it back */ ip_vs_out_stats(cp, skb); @@ -985,7 +1022,6 @@ handle_response(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd, struct ip_vs_conn *cp, int ihl) { struct ip_vs_protocol *pp = pd->pp; - struct netns_ipvs *ipvs; IP_VS_DBG_PKT(11, af, pp, skb, 0, "Outgoing packet"); @@ -1021,18 +1057,8 @@ handle_response(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd, * if it came from this machine itself. So re-compute * the routing information. */ - ipvs = net_ipvs(skb_net(skb)); - -#ifdef CONFIG_IP_VS_IPV6 - if (af == AF_INET6) { - if (ipvs->sysctl_snat_reroute && ip6_route_me_harder(skb) != 0) - goto drop; - } else -#endif - if ((ipvs->sysctl_snat_reroute || - skb_rtable(skb)->rt_flags & RTCF_LOCAL) && - ip_route_me_harder(skb, RTN_LOCAL) != 0) - goto drop; + if (ip_vs_route_me_harder(af, skb)) + goto drop; IP_VS_DBG_PKT(10, af, pp, skb, 0, "After SNAT"); @@ -1066,7 +1092,6 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af) struct ip_vs_protocol *pp; struct ip_vs_proto_data *pd; struct ip_vs_conn *cp; - struct netns_ipvs *ipvs; EnterFunction(11); @@ -1141,11 +1166,10 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af) * Check if the packet belongs to an existing entry */ cp = pp->conn_out_get(af, skb, &iph, iph.len, 0); - ipvs = net_ipvs(net); if (likely(cp)) return handle_response(af, skb, pd, cp, iph.len); - if (ipvs->sysctl_nat_icmp_send && + if (sysctl_nat_icmp_send(net) && (pp->protocol == IPPROTO_TCP || pp->protocol == IPPROTO_UDP || pp->protocol == IPPROTO_SCTP)) { @@ -1570,7 +1594,7 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af) if (cp->dest && !(cp->dest->flags & IP_VS_DEST_F_AVAILABLE)) { /* the destination server is not available */ - if (ipvs->sysctl_expire_nodest_conn) { + if (sysctl_expire_nodest_conn(ipvs)) { /* try to expire the connection immediately */ ip_vs_conn_expire_now(cp); } @@ -1600,15 +1624,15 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af) */ if (cp->flags & IP_VS_CONN_F_ONE_PACKET) - pkts = ipvs->sysctl_sync_threshold[0]; + pkts = sysctl_sync_threshold(ipvs); else pkts = atomic_add_return(1, &cp->in_pkts); if ((ipvs->sync_state & IP_VS_STATE_MASTER) && cp->protocol == IPPROTO_SCTP) { if ((cp->state == IP_VS_SCTP_S_ESTABLISHED && - (pkts % ipvs->sysctl_sync_threshold[1] - == ipvs->sysctl_sync_threshold[0])) || + (pkts % sysctl_sync_period(ipvs) + == sysctl_sync_threshold(ipvs))) || (cp->old_state != cp->state && ((cp->state == IP_VS_SCTP_S_CLOSED) || (cp->state == IP_VS_SCTP_S_SHUT_ACK_CLI) || @@ -1622,8 +1646,8 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af) else if ((ipvs->sync_state & IP_VS_STATE_MASTER) && (((cp->protocol != IPPROTO_TCP || cp->state == IP_VS_TCP_S_ESTABLISHED) && - (pkts % ipvs->sysctl_sync_threshold[1] - == ipvs->sysctl_sync_threshold[0])) || + (pkts % sysctl_sync_period(ipvs) + == sysctl_sync_threshold(ipvs))) || ((cp->protocol == IPPROTO_TCP) && (cp->old_state != cp->state) && ((cp->state == IP_VS_TCP_S_FIN_WAIT) || (cp->state == IP_VS_TCP_S_CLOSE) || diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index d69ec26b6bd4..b799cea31f95 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -75,19 +75,19 @@ static int __ip_vs_addr_is_local_v6(struct net *net, const struct in6_addr *addr) { struct rt6_info *rt; - struct flowi fl = { - .oif = 0, - .fl6_dst = *addr, - .fl6_src = { .s6_addr32 = {0, 0, 0, 0} }, + struct flowi6 fl6 = { + .daddr = *addr, }; - rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl); + rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl6); if (rt && rt->rt6i_dev && (rt->rt6i_dev->flags & IFF_LOOPBACK)) - return 1; + return 1; return 0; } #endif + +#ifdef CONFIG_SYSCTL /* * update_defense_level is called from keventd and from sysctl, * so it needs to protect itself from softirqs @@ -229,6 +229,7 @@ static void defense_work_handler(struct work_struct *work) ip_vs_random_dropentry(ipvs->net); schedule_delayed_work(&ipvs->defense_work, DEFENSE_TIMER_PERIOD); } +#endif int ip_vs_use_count_inc(void) @@ -411,9 +412,11 @@ ip_vs_service_get(struct net *net, int af, __u32 fwmark, __u16 protocol, /* * Check the table hashed by fwmark first */ - svc = __ip_vs_svc_fwm_find(net, af, fwmark); - if (fwmark && svc) - goto out; + if (fwmark) { + svc = __ip_vs_svc_fwm_find(net, af, fwmark); + if (svc) + goto out; + } /* * Check the table hashed by <protocol,addr,port> @@ -709,13 +712,39 @@ static void ip_vs_trash_cleanup(struct net *net) } } +static void +ip_vs_copy_stats(struct ip_vs_stats_user *dst, struct ip_vs_stats *src) +{ +#define IP_VS_SHOW_STATS_COUNTER(c) dst->c = src->ustats.c - src->ustats0.c + + spin_lock_bh(&src->lock); + + IP_VS_SHOW_STATS_COUNTER(conns); + IP_VS_SHOW_STATS_COUNTER(inpkts); + IP_VS_SHOW_STATS_COUNTER(outpkts); + IP_VS_SHOW_STATS_COUNTER(inbytes); + IP_VS_SHOW_STATS_COUNTER(outbytes); + + ip_vs_read_estimator(dst, src); + + spin_unlock_bh(&src->lock); +} static void ip_vs_zero_stats(struct ip_vs_stats *stats) { spin_lock_bh(&stats->lock); - memset(&stats->ustats, 0, sizeof(stats->ustats)); + /* get current counters as zero point, rates are zeroed */ + +#define IP_VS_ZERO_STATS_COUNTER(c) stats->ustats0.c = stats->ustats.c + + IP_VS_ZERO_STATS_COUNTER(conns); + IP_VS_ZERO_STATS_COUNTER(inpkts); + IP_VS_ZERO_STATS_COUNTER(outpkts); + IP_VS_ZERO_STATS_COUNTER(inbytes); + IP_VS_ZERO_STATS_COUNTER(outbytes); + ip_vs_zero_estimator(stats); spin_unlock_bh(&stats->lock); @@ -774,7 +803,7 @@ __ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest, spin_unlock_bh(&dest->dst_lock); if (add) - ip_vs_new_estimator(svc->net, &dest->stats); + ip_vs_start_estimator(svc->net, &dest->stats); write_lock_bh(&__ip_vs_svc_lock); @@ -980,7 +1009,7 @@ static void __ip_vs_del_dest(struct net *net, struct ip_vs_dest *dest) { struct netns_ipvs *ipvs = net_ipvs(net); - ip_vs_kill_estimator(net, &dest->stats); + ip_vs_stop_estimator(net, &dest->stats); /* * Remove it from the d-linked list with the real services. @@ -1173,7 +1202,7 @@ ip_vs_add_service(struct net *net, struct ip_vs_service_user_kern *u, else if (svc->port == 0) atomic_inc(&ipvs->nullsvc_counter); - ip_vs_new_estimator(net, &svc->stats); + ip_vs_start_estimator(net, &svc->stats); /* Count only IPv4 services for old get/setsockopt interface */ if (svc->af == AF_INET) @@ -1325,7 +1354,7 @@ static void __ip_vs_del_service(struct ip_vs_service *svc) if (svc->af == AF_INET) ipvs->num_services--; - ip_vs_kill_estimator(svc->net, &svc->stats); + ip_vs_stop_estimator(svc->net, &svc->stats); /* Unbind scheduler */ old_sched = svc->scheduler; @@ -1479,11 +1508,11 @@ static int ip_vs_zero_all(struct net *net) } } - ip_vs_zero_stats(net_ipvs(net)->tot_stats); + ip_vs_zero_stats(&net_ipvs(net)->tot_stats); return 0; } - +#ifdef CONFIG_SYSCTL static int proc_do_defense_mode(ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) @@ -1505,7 +1534,6 @@ proc_do_defense_mode(ctl_table *table, int write, return rc; } - static int proc_do_sync_threshold(ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) @@ -1739,6 +1767,7 @@ const struct ctl_path net_vs_ctl_path[] = { { } }; EXPORT_SYMBOL_GPL(net_vs_ctl_path); +#endif #ifdef CONFIG_PROC_FS @@ -1961,7 +1990,7 @@ static const struct file_operations ip_vs_info_fops = { static int ip_vs_stats_show(struct seq_file *seq, void *v) { struct net *net = seq_file_single_net(seq); - struct ip_vs_stats *tot_stats = net_ipvs(net)->tot_stats; + struct ip_vs_stats_user show; /* 01234567 01234567 01234567 0123456701234567 0123456701234567 */ seq_puts(seq, @@ -1969,22 +1998,18 @@ static int ip_vs_stats_show(struct seq_file *seq, void *v) seq_printf(seq, " Conns Packets Packets Bytes Bytes\n"); - spin_lock_bh(&tot_stats->lock); - seq_printf(seq, "%8X %8X %8X %16LX %16LX\n\n", tot_stats->ustats.conns, - tot_stats->ustats.inpkts, tot_stats->ustats.outpkts, - (unsigned long long) tot_stats->ustats.inbytes, - (unsigned long long) tot_stats->ustats.outbytes); + ip_vs_copy_stats(&show, &net_ipvs(net)->tot_stats); + seq_printf(seq, "%8X %8X %8X %16LX %16LX\n\n", show.conns, + show.inpkts, show.outpkts, + (unsigned long long) show.inbytes, + (unsigned long long) show.outbytes); /* 01234567 01234567 01234567 0123456701234567 0123456701234567 */ seq_puts(seq, " Conns/s Pkts/s Pkts/s Bytes/s Bytes/s\n"); - seq_printf(seq,"%8X %8X %8X %16X %16X\n", - tot_stats->ustats.cps, - tot_stats->ustats.inpps, - tot_stats->ustats.outpps, - tot_stats->ustats.inbps, - tot_stats->ustats.outbps); - spin_unlock_bh(&tot_stats->lock); + seq_printf(seq, "%8X %8X %8X %16X %16X\n", + show.cps, show.inpps, show.outpps, + show.inbps, show.outbps); return 0; } @@ -2005,7 +2030,9 @@ static const struct file_operations ip_vs_stats_fops = { static int ip_vs_stats_percpu_show(struct seq_file *seq, void *v) { struct net *net = seq_file_single_net(seq); - struct ip_vs_stats *tot_stats = net_ipvs(net)->tot_stats; + struct ip_vs_stats *tot_stats = &net_ipvs(net)->tot_stats; + struct ip_vs_cpu_stats *cpustats = tot_stats->cpustats; + struct ip_vs_stats_user rates; int i; /* 01234567 01234567 01234567 0123456701234567 0123456701234567 */ @@ -2015,30 +2042,43 @@ static int ip_vs_stats_percpu_show(struct seq_file *seq, void *v) "CPU Conns Packets Packets Bytes Bytes\n"); for_each_possible_cpu(i) { - struct ip_vs_cpu_stats *u = per_cpu_ptr(net->ipvs->cpustats, i); + struct ip_vs_cpu_stats *u = per_cpu_ptr(cpustats, i); + unsigned int start; + __u64 inbytes, outbytes; + + do { + start = u64_stats_fetch_begin_bh(&u->syncp); + inbytes = u->ustats.inbytes; + outbytes = u->ustats.outbytes; + } while (u64_stats_fetch_retry_bh(&u->syncp, start)); + seq_printf(seq, "%3X %8X %8X %8X %16LX %16LX\n", - i, u->ustats.conns, u->ustats.inpkts, - u->ustats.outpkts, (__u64)u->ustats.inbytes, - (__u64)u->ustats.outbytes); + i, u->ustats.conns, u->ustats.inpkts, + u->ustats.outpkts, (__u64)inbytes, + (__u64)outbytes); } spin_lock_bh(&tot_stats->lock); + seq_printf(seq, " ~ %8X %8X %8X %16LX %16LX\n\n", tot_stats->ustats.conns, tot_stats->ustats.inpkts, tot_stats->ustats.outpkts, (unsigned long long) tot_stats->ustats.inbytes, (unsigned long long) tot_stats->ustats.outbytes); + ip_vs_read_estimator(&rates, tot_stats); + + spin_unlock_bh(&tot_stats->lock); + /* 01234567 01234567 01234567 0123456701234567 0123456701234567 */ seq_puts(seq, " Conns/s Pkts/s Pkts/s Bytes/s Bytes/s\n"); seq_printf(seq, " %8X %8X %8X %16X %16X\n", - tot_stats->ustats.cps, - tot_stats->ustats.inpps, - tot_stats->ustats.outpps, - tot_stats->ustats.inbps, - tot_stats->ustats.outbps); - spin_unlock_bh(&tot_stats->lock); + rates.cps, + rates.inpps, + rates.outpps, + rates.inbps, + rates.outbps); return 0; } @@ -2286,14 +2326,6 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len) static void -ip_vs_copy_stats(struct ip_vs_stats_user *dst, struct ip_vs_stats *src) -{ - spin_lock_bh(&src->lock); - memcpy(dst, &src->ustats, sizeof(*dst)); - spin_unlock_bh(&src->lock); -} - -static void ip_vs_copy_service(struct ip_vs_service_entry *dst, struct ip_vs_service *src) { dst->protocol = src->protocol; @@ -2679,31 +2711,29 @@ static const struct nla_policy ip_vs_dest_policy[IPVS_DEST_ATTR_MAX + 1] = { static int ip_vs_genl_fill_stats(struct sk_buff *skb, int container_type, struct ip_vs_stats *stats) { + struct ip_vs_stats_user ustats; struct nlattr *nl_stats = nla_nest_start(skb, container_type); if (!nl_stats) return -EMSGSIZE; - spin_lock_bh(&stats->lock); - - NLA_PUT_U32(skb, IPVS_STATS_ATTR_CONNS, stats->ustats.conns); - NLA_PUT_U32(skb, IPVS_STATS_ATTR_INPKTS, stats->ustats.inpkts); - NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTPKTS, stats->ustats.outpkts); - NLA_PUT_U64(skb, IPVS_STATS_ATTR_INBYTES, stats->ustats.inbytes); - NLA_PUT_U64(skb, IPVS_STATS_ATTR_OUTBYTES, stats->ustats.outbytes); - NLA_PUT_U32(skb, IPVS_STATS_ATTR_CPS, stats->ustats.cps); - NLA_PUT_U32(skb, IPVS_STATS_ATTR_INPPS, stats->ustats.inpps); - NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTPPS, stats->ustats.outpps); - NLA_PUT_U32(skb, IPVS_STATS_ATTR_INBPS, stats->ustats.inbps); - NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTBPS, stats->ustats.outbps); + ip_vs_copy_stats(&ustats, stats); - spin_unlock_bh(&stats->lock); + NLA_PUT_U32(skb, IPVS_STATS_ATTR_CONNS, ustats.conns); + NLA_PUT_U32(skb, IPVS_STATS_ATTR_INPKTS, ustats.inpkts); + NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTPKTS, ustats.outpkts); + NLA_PUT_U64(skb, IPVS_STATS_ATTR_INBYTES, ustats.inbytes); + NLA_PUT_U64(skb, IPVS_STATS_ATTR_OUTBYTES, ustats.outbytes); + NLA_PUT_U32(skb, IPVS_STATS_ATTR_CPS, ustats.cps); + NLA_PUT_U32(skb, IPVS_STATS_ATTR_INPPS, ustats.inpps); + NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTPPS, ustats.outpps); + NLA_PUT_U32(skb, IPVS_STATS_ATTR_INBPS, ustats.inbps); + NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTBPS, ustats.outbps); nla_nest_end(skb, nl_stats); return 0; nla_put_failure: - spin_unlock_bh(&stats->lock); nla_nest_cancel(skb, nl_stats); return -EMSGSIZE; } @@ -3482,7 +3512,8 @@ static void ip_vs_genl_unregister(void) /* * per netns intit/exit func. */ -int __net_init __ip_vs_control_init(struct net *net) +#ifdef CONFIG_SYSCTL +int __net_init __ip_vs_control_init_sysctl(struct net *net) { int idx; struct netns_ipvs *ipvs = net_ipvs(net); @@ -3492,38 +3523,11 @@ int __net_init __ip_vs_control_init(struct net *net) spin_lock_init(&ipvs->dropentry_lock); spin_lock_init(&ipvs->droppacket_lock); spin_lock_init(&ipvs->securetcp_lock); - ipvs->rs_lock = __RW_LOCK_UNLOCKED(ipvs->rs_lock); - - /* Initialize rs_table */ - for (idx = 0; idx < IP_VS_RTAB_SIZE; idx++) - INIT_LIST_HEAD(&ipvs->rs_table[idx]); - - INIT_LIST_HEAD(&ipvs->dest_trash); - atomic_set(&ipvs->ftpsvc_counter, 0); - atomic_set(&ipvs->nullsvc_counter, 0); - - /* procfs stats */ - ipvs->tot_stats = kzalloc(sizeof(struct ip_vs_stats), GFP_KERNEL); - if (ipvs->tot_stats == NULL) { - pr_err("%s(): no memory.\n", __func__); - return -ENOMEM; - } - ipvs->cpustats = alloc_percpu(struct ip_vs_cpu_stats); - if (!ipvs->cpustats) { - pr_err("%s() alloc_percpu failed\n", __func__); - goto err_alloc; - } - spin_lock_init(&ipvs->tot_stats->lock); - - proc_net_fops_create(net, "ip_vs", 0, &ip_vs_info_fops); - proc_net_fops_create(net, "ip_vs_stats", 0, &ip_vs_stats_fops); - proc_net_fops_create(net, "ip_vs_stats_percpu", 0, - &ip_vs_stats_percpu_fops); if (!net_eq(net, &init_net)) { tbl = kmemdup(vs_vars, sizeof(vs_vars), GFP_KERNEL); if (tbl == NULL) - goto err_dup; + return -ENOMEM; } else tbl = vs_vars; /* Initialize sysctl defaults */ @@ -3545,52 +3549,94 @@ int __net_init __ip_vs_control_init(struct net *net) tbl[idx++].data = &ipvs->sysctl_cache_bypass; tbl[idx++].data = &ipvs->sysctl_expire_nodest_conn; tbl[idx++].data = &ipvs->sysctl_expire_quiescent_template; - ipvs->sysctl_sync_threshold[0] = 3; - ipvs->sysctl_sync_threshold[1] = 50; + ipvs->sysctl_sync_threshold[0] = DEFAULT_SYNC_THRESHOLD; + ipvs->sysctl_sync_threshold[1] = DEFAULT_SYNC_PERIOD; tbl[idx].data = &ipvs->sysctl_sync_threshold; tbl[idx++].maxlen = sizeof(ipvs->sysctl_sync_threshold); tbl[idx++].data = &ipvs->sysctl_nat_icmp_send; -#ifdef CONFIG_SYSCTL ipvs->sysctl_hdr = register_net_sysctl_table(net, net_vs_ctl_path, tbl); if (ipvs->sysctl_hdr == NULL) { if (!net_eq(net, &init_net)) kfree(tbl); - goto err_dup; + return -ENOMEM; } -#endif - ip_vs_new_estimator(net, ipvs->tot_stats); + ip_vs_start_estimator(net, &ipvs->tot_stats); ipvs->sysctl_tbl = tbl; /* Schedule defense work */ INIT_DELAYED_WORK(&ipvs->defense_work, defense_work_handler); schedule_delayed_work(&ipvs->defense_work, DEFENSE_TIMER_PERIOD); - return 0; -err_dup: - free_percpu(ipvs->cpustats); -err_alloc: - kfree(ipvs->tot_stats); - return -ENOMEM; + return 0; } -static void __net_exit __ip_vs_control_cleanup(struct net *net) +void __net_init __ip_vs_control_cleanup_sysctl(struct net *net) { struct netns_ipvs *ipvs = net_ipvs(net); - ip_vs_trash_cleanup(net); - ip_vs_kill_estimator(net, ipvs->tot_stats); cancel_delayed_work_sync(&ipvs->defense_work); cancel_work_sync(&ipvs->defense_work.work); -#ifdef CONFIG_SYSCTL unregister_net_sysctl_table(ipvs->sysctl_hdr); +} + +#else + +int __net_init __ip_vs_control_init_sysctl(struct net *net) { return 0; } +void __net_init __ip_vs_control_cleanup_sysctl(struct net *net) { } + #endif + +int __net_init __ip_vs_control_init(struct net *net) +{ + int idx; + struct netns_ipvs *ipvs = net_ipvs(net); + + ipvs->rs_lock = __RW_LOCK_UNLOCKED(ipvs->rs_lock); + + /* Initialize rs_table */ + for (idx = 0; idx < IP_VS_RTAB_SIZE; idx++) + INIT_LIST_HEAD(&ipvs->rs_table[idx]); + + INIT_LIST_HEAD(&ipvs->dest_trash); + atomic_set(&ipvs->ftpsvc_counter, 0); + atomic_set(&ipvs->nullsvc_counter, 0); + + /* procfs stats */ + ipvs->tot_stats.cpustats = alloc_percpu(struct ip_vs_cpu_stats); + if (ipvs->tot_stats.cpustats) { + pr_err("%s(): alloc_percpu.\n", __func__); + return -ENOMEM; + } + spin_lock_init(&ipvs->tot_stats.lock); + + proc_net_fops_create(net, "ip_vs", 0, &ip_vs_info_fops); + proc_net_fops_create(net, "ip_vs_stats", 0, &ip_vs_stats_fops); + proc_net_fops_create(net, "ip_vs_stats_percpu", 0, + &ip_vs_stats_percpu_fops); + + if (__ip_vs_control_init_sysctl(net)) + goto err; + + return 0; + +err: + free_percpu(ipvs->tot_stats.cpustats); + return -ENOMEM; +} + +static void __net_exit __ip_vs_control_cleanup(struct net *net) +{ + struct netns_ipvs *ipvs = net_ipvs(net); + + ip_vs_trash_cleanup(net); + ip_vs_stop_estimator(net, &ipvs->tot_stats); + __ip_vs_control_cleanup_sysctl(net); proc_net_remove(net, "ip_vs_stats_percpu"); proc_net_remove(net, "ip_vs_stats"); proc_net_remove(net, "ip_vs"); - free_percpu(ipvs->cpustats); - kfree(ipvs->tot_stats); + free_percpu(ipvs->tot_stats.cpustats); } static struct pernet_operations ipvs_control_ops = { diff --git a/net/netfilter/ipvs/ip_vs_est.c b/net/netfilter/ipvs/ip_vs_est.c index f560a05c965a..8c8766ca56ad 100644 --- a/net/netfilter/ipvs/ip_vs_est.c +++ b/net/netfilter/ipvs/ip_vs_est.c @@ -69,10 +69,10 @@ static void ip_vs_read_cpu_stats(struct ip_vs_stats_user *sum, sum->inpkts += s->ustats.inpkts; sum->outpkts += s->ustats.outpkts; do { - start = u64_stats_fetch_begin_bh(&s->syncp); + start = u64_stats_fetch_begin(&s->syncp); inbytes = s->ustats.inbytes; outbytes = s->ustats.outbytes; - } while (u64_stats_fetch_retry_bh(&s->syncp, start)); + } while (u64_stats_fetch_retry(&s->syncp, start)); sum->inbytes += inbytes; sum->outbytes += outbytes; } else { @@ -80,10 +80,10 @@ static void ip_vs_read_cpu_stats(struct ip_vs_stats_user *sum, sum->inpkts = s->ustats.inpkts; sum->outpkts = s->ustats.outpkts; do { - start = u64_stats_fetch_begin_bh(&s->syncp); + start = u64_stats_fetch_begin(&s->syncp); sum->inbytes = s->ustats.inbytes; sum->outbytes = s->ustats.outbytes; - } while (u64_stats_fetch_retry_bh(&s->syncp, start)); + } while (u64_stats_fetch_retry(&s->syncp, start)); } } } @@ -101,13 +101,12 @@ static void estimation_timer(unsigned long arg) struct netns_ipvs *ipvs; ipvs = net_ipvs(net); - ip_vs_read_cpu_stats(&ipvs->tot_stats->ustats, ipvs->cpustats); spin_lock(&ipvs->est_lock); list_for_each_entry(e, &ipvs->est_list, list) { s = container_of(e, struct ip_vs_stats, est); - ip_vs_read_cpu_stats(&s->ustats, s->cpustats); spin_lock(&s->lock); + ip_vs_read_cpu_stats(&s->ustats, s->cpustats); n_conns = s->ustats.conns; n_inpkts = s->ustats.inpkts; n_outpkts = s->ustats.outpkts; @@ -118,61 +117,41 @@ static void estimation_timer(unsigned long arg) rate = (n_conns - e->last_conns) << 9; e->last_conns = n_conns; e->cps += ((long)rate - (long)e->cps) >> 2; - s->ustats.cps = (e->cps + 0x1FF) >> 10; rate = (n_inpkts - e->last_inpkts) << 9; e->last_inpkts = n_inpkts; e->inpps += ((long)rate - (long)e->inpps) >> 2; - s->ustats.inpps = (e->inpps + 0x1FF) >> 10; rate = (n_outpkts - e->last_outpkts) << 9; e->last_outpkts = n_outpkts; e->outpps += ((long)rate - (long)e->outpps) >> 2; - s->ustats.outpps = (e->outpps + 0x1FF) >> 10; rate = (n_inbytes - e->last_inbytes) << 4; e->last_inbytes = n_inbytes; e->inbps += ((long)rate - (long)e->inbps) >> 2; - s->ustats.inbps = (e->inbps + 0xF) >> 5; rate = (n_outbytes - e->last_outbytes) << 4; e->last_outbytes = n_outbytes; e->outbps += ((long)rate - (long)e->outbps) >> 2; - s->ustats.outbps = (e->outbps + 0xF) >> 5; spin_unlock(&s->lock); } spin_unlock(&ipvs->est_lock); mod_timer(&ipvs->est_timer, jiffies + 2*HZ); } -void ip_vs_new_estimator(struct net *net, struct ip_vs_stats *stats) +void ip_vs_start_estimator(struct net *net, struct ip_vs_stats *stats) { struct netns_ipvs *ipvs = net_ipvs(net); struct ip_vs_estimator *est = &stats->est; INIT_LIST_HEAD(&est->list); - est->last_conns = stats->ustats.conns; - est->cps = stats->ustats.cps<<10; - - est->last_inpkts = stats->ustats.inpkts; - est->inpps = stats->ustats.inpps<<10; - - est->last_outpkts = stats->ustats.outpkts; - est->outpps = stats->ustats.outpps<<10; - - est->last_inbytes = stats->ustats.inbytes; - est->inbps = stats->ustats.inbps<<5; - - est->last_outbytes = stats->ustats.outbytes; - est->outbps = stats->ustats.outbps<<5; - spin_lock_bh(&ipvs->est_lock); list_add(&est->list, &ipvs->est_list); spin_unlock_bh(&ipvs->est_lock); } -void ip_vs_kill_estimator(struct net *net, struct ip_vs_stats *stats) +void ip_vs_stop_estimator(struct net *net, struct ip_vs_stats *stats) { struct netns_ipvs *ipvs = net_ipvs(net); struct ip_vs_estimator *est = &stats->est; @@ -185,13 +164,14 @@ void ip_vs_kill_estimator(struct net *net, struct ip_vs_stats *stats) void ip_vs_zero_estimator(struct ip_vs_stats *stats) { struct ip_vs_estimator *est = &stats->est; - - /* set counters zero, caller must hold the stats->lock lock */ - est->last_inbytes = 0; - est->last_outbytes = 0; - est->last_conns = 0; - est->last_inpkts = 0; - est->last_outpkts = 0; + struct ip_vs_stats_user *u = &stats->ustats; + + /* reset counters, caller must hold the stats->lock lock */ + est->last_inbytes = u->inbytes; + est->last_outbytes = u->outbytes; + est->last_conns = u->conns; + est->last_inpkts = u->inpkts; + est->last_outpkts = u->outpkts; est->cps = 0; est->inpps = 0; est->outpps = 0; @@ -199,6 +179,19 @@ void ip_vs_zero_estimator(struct ip_vs_stats *stats) est->outbps = 0; } +/* Get decoded rates */ +void ip_vs_read_estimator(struct ip_vs_stats_user *dst, + struct ip_vs_stats *stats) +{ + struct ip_vs_estimator *e = &stats->est; + + dst->cps = (e->cps + 0x1FF) >> 10; + dst->inpps = (e->inpps + 0x1FF) >> 10; + dst->outpps = (e->outpps + 0x1FF) >> 10; + dst->inbps = (e->inbps + 0xF) >> 5; + dst->outbps = (e->outbps + 0xF) >> 5; +} + static int __net_init __ip_vs_estimator_init(struct net *net) { struct netns_ipvs *ipvs = net_ipvs(net); diff --git a/net/netfilter/ipvs/ip_vs_lblc.c b/net/netfilter/ipvs/ip_vs_lblc.c index 6bf7a807649c..f276df9896b3 100644 --- a/net/netfilter/ipvs/ip_vs_lblc.c +++ b/net/netfilter/ipvs/ip_vs_lblc.c @@ -63,6 +63,8 @@ #define CHECK_EXPIRE_INTERVAL (60*HZ) #define ENTRY_TIMEOUT (6*60*HZ) +#define DEFAULT_EXPIRATION (24*60*60*HZ) + /* * It is for full expiration check. * When there is no partial expiration check (garbage collection) @@ -112,7 +114,7 @@ struct ip_vs_lblc_table { /* * IPVS LBLC sysctl table */ - +#ifdef CONFIG_SYSCTL static ctl_table vs_vars_table[] = { { .procname = "lblc_expiration", @@ -123,6 +125,7 @@ static ctl_table vs_vars_table[] = { }, { } }; +#endif static inline void ip_vs_lblc_free(struct ip_vs_lblc_entry *en) { @@ -238,6 +241,15 @@ static void ip_vs_lblc_flush(struct ip_vs_lblc_table *tbl) } } +static int sysctl_lblc_expiration(struct ip_vs_service *svc) +{ +#ifdef CONFIG_SYSCTL + struct netns_ipvs *ipvs = net_ipvs(svc->net); + return ipvs->sysctl_lblc_expiration; +#else + return DEFAULT_EXPIRATION; +#endif +} static inline void ip_vs_lblc_full_check(struct ip_vs_service *svc) { @@ -245,7 +257,6 @@ static inline void ip_vs_lblc_full_check(struct ip_vs_service *svc) struct ip_vs_lblc_entry *en, *nxt; unsigned long now = jiffies; int i, j; - struct netns_ipvs *ipvs = net_ipvs(svc->net); for (i=0, j=tbl->rover; i<IP_VS_LBLC_TAB_SIZE; i++) { j = (j + 1) & IP_VS_LBLC_TAB_MASK; @@ -254,7 +265,7 @@ static inline void ip_vs_lblc_full_check(struct ip_vs_service *svc) list_for_each_entry_safe(en, nxt, &tbl->bucket[j], list) { if (time_before(now, en->lastuse + - ipvs->sysctl_lblc_expiration)) + sysctl_lblc_expiration(svc))) continue; ip_vs_lblc_free(en); @@ -538,6 +549,7 @@ static struct ip_vs_scheduler ip_vs_lblc_scheduler = /* * per netns init. */ +#ifdef CONFIG_SYSCTL static int __net_init __ip_vs_lblc_init(struct net *net) { struct netns_ipvs *ipvs = net_ipvs(net); @@ -550,10 +562,9 @@ static int __net_init __ip_vs_lblc_init(struct net *net) return -ENOMEM; } else ipvs->lblc_ctl_table = vs_vars_table; - ipvs->sysctl_lblc_expiration = 24*60*60*HZ; + ipvs->sysctl_lblc_expiration = DEFAULT_EXPIRATION; ipvs->lblc_ctl_table[0].data = &ipvs->sysctl_lblc_expiration; -#ifdef CONFIG_SYSCTL ipvs->lblc_ctl_header = register_net_sysctl_table(net, net_vs_ctl_path, ipvs->lblc_ctl_table); @@ -562,7 +573,6 @@ static int __net_init __ip_vs_lblc_init(struct net *net) kfree(ipvs->lblc_ctl_table); return -ENOMEM; } -#endif return 0; } @@ -571,14 +581,19 @@ static void __net_exit __ip_vs_lblc_exit(struct net *net) { struct netns_ipvs *ipvs = net_ipvs(net); -#ifdef CONFIG_SYSCTL unregister_net_sysctl_table(ipvs->lblc_ctl_header); -#endif if (!net_eq(net, &init_net)) kfree(ipvs->lblc_ctl_table); } +#else + +static int __net_init __ip_vs_lblc_init(struct net *net) { return 0; } +static void __net_exit __ip_vs_lblc_exit(struct net *net) { } + +#endif + static struct pernet_operations ip_vs_lblc_ops = { .init = __ip_vs_lblc_init, .exit = __ip_vs_lblc_exit, diff --git a/net/netfilter/ipvs/ip_vs_lblcr.c b/net/netfilter/ipvs/ip_vs_lblcr.c index 00631765b92a..cb1c9913d38b 100644 --- a/net/netfilter/ipvs/ip_vs_lblcr.c +++ b/net/netfilter/ipvs/ip_vs_lblcr.c @@ -63,6 +63,8 @@ #define CHECK_EXPIRE_INTERVAL (60*HZ) #define ENTRY_TIMEOUT (6*60*HZ) +#define DEFAULT_EXPIRATION (24*60*60*HZ) + /* * It is for full expiration check. * When there is no partial expiration check (garbage collection) @@ -283,6 +285,7 @@ struct ip_vs_lblcr_table { }; +#ifdef CONFIG_SYSCTL /* * IPVS LBLCR sysctl table */ @@ -297,6 +300,7 @@ static ctl_table vs_vars_table[] = { }, { } }; +#endif static inline void ip_vs_lblcr_free(struct ip_vs_lblcr_entry *en) { @@ -410,6 +414,15 @@ static void ip_vs_lblcr_flush(struct ip_vs_lblcr_table *tbl) } } +static int sysctl_lblcr_expiration(struct ip_vs_service *svc) +{ +#ifdef CONFIG_SYSCTL + struct netns_ipvs *ipvs = net_ipvs(svc->net); + return ipvs->sysctl_lblcr_expiration; +#else + return DEFAULT_EXPIRATION; +#endif +} static inline void ip_vs_lblcr_full_check(struct ip_vs_service *svc) { @@ -417,15 +430,14 @@ static inline void ip_vs_lblcr_full_check(struct ip_vs_service *svc) unsigned long now = jiffies; int i, j; struct ip_vs_lblcr_entry *en, *nxt; - struct netns_ipvs *ipvs = net_ipvs(svc->net); for (i=0, j=tbl->rover; i<IP_VS_LBLCR_TAB_SIZE; i++) { j = (j + 1) & IP_VS_LBLCR_TAB_MASK; write_lock(&svc->sched_lock); list_for_each_entry_safe(en, nxt, &tbl->bucket[j], list) { - if (time_after(en->lastuse - + ipvs->sysctl_lblcr_expiration, now)) + if (time_after(en->lastuse + + sysctl_lblcr_expiration(svc), now)) continue; ip_vs_lblcr_free(en); @@ -650,7 +662,6 @@ ip_vs_lblcr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) read_lock(&svc->sched_lock); en = ip_vs_lblcr_get(svc->af, tbl, &iph.daddr); if (en) { - struct netns_ipvs *ipvs = net_ipvs(svc->net); /* We only hold a read lock, but this is atomic */ en->lastuse = jiffies; @@ -662,7 +673,7 @@ ip_vs_lblcr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) /* More than one destination + enough time passed by, cleanup */ if (atomic_read(&en->set.size) > 1 && time_after(jiffies, en->set.lastmod + - ipvs->sysctl_lblcr_expiration)) { + sysctl_lblcr_expiration(svc))) { struct ip_vs_dest *m; write_lock(&en->set.lock); @@ -734,6 +745,7 @@ static struct ip_vs_scheduler ip_vs_lblcr_scheduler = /* * per netns init. */ +#ifdef CONFIG_SYSCTL static int __net_init __ip_vs_lblcr_init(struct net *net) { struct netns_ipvs *ipvs = net_ipvs(net); @@ -746,10 +758,9 @@ static int __net_init __ip_vs_lblcr_init(struct net *net) return -ENOMEM; } else ipvs->lblcr_ctl_table = vs_vars_table; - ipvs->sysctl_lblcr_expiration = 24*60*60*HZ; + ipvs->sysctl_lblcr_expiration = DEFAULT_EXPIRATION; ipvs->lblcr_ctl_table[0].data = &ipvs->sysctl_lblcr_expiration; -#ifdef CONFIG_SYSCTL ipvs->lblcr_ctl_header = register_net_sysctl_table(net, net_vs_ctl_path, ipvs->lblcr_ctl_table); @@ -758,7 +769,6 @@ static int __net_init __ip_vs_lblcr_init(struct net *net) kfree(ipvs->lblcr_ctl_table); return -ENOMEM; } -#endif return 0; } @@ -767,14 +777,19 @@ static void __net_exit __ip_vs_lblcr_exit(struct net *net) { struct netns_ipvs *ipvs = net_ipvs(net); -#ifdef CONFIG_SYSCTL unregister_net_sysctl_table(ipvs->lblcr_ctl_header); -#endif if (!net_eq(net, &init_net)) kfree(ipvs->lblcr_ctl_table); } +#else + +static int __net_init __ip_vs_lblcr_init(struct net *net) { return 0; } +static void __net_exit __ip_vs_lblcr_exit(struct net *net) { } + +#endif + static struct pernet_operations ip_vs_lblcr_ops = { .init = __ip_vs_lblcr_init, .exit = __ip_vs_lblcr_exit, diff --git a/net/netfilter/ipvs/ip_vs_pe_sip.c b/net/netfilter/ipvs/ip_vs_pe_sip.c index 0d83bc01fed4..13d607ae9c52 100644 --- a/net/netfilter/ipvs/ip_vs_pe_sip.c +++ b/net/netfilter/ipvs/ip_vs_pe_sip.c @@ -92,14 +92,13 @@ ip_vs_sip_fill_param(struct ip_vs_conn_param *p, struct sk_buff *skb) if (get_callid(dptr, dataoff, datalen, &matchoff, &matchlen)) return -EINVAL; - p->pe_data = kmalloc(matchlen, GFP_ATOMIC); - if (!p->pe_data) - return -ENOMEM; - /* N.B: pe_data is only set on success, * this allows fallback to the default persistence logic on failure */ - memcpy(p->pe_data, dptr + matchoff, matchlen); + p->pe_data = kmemdup(dptr + matchoff, matchlen, GFP_ATOMIC); + if (!p->pe_data) + return -ENOMEM; + p->pe_data_len = matchlen; return 0; diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c index fecf24de4af3..3e7961e85e9c 100644 --- a/net/netfilter/ipvs/ip_vs_sync.c +++ b/net/netfilter/ipvs/ip_vs_sync.c @@ -394,7 +394,7 @@ void ip_vs_sync_switch_mode(struct net *net, int mode) if (!(ipvs->sync_state & IP_VS_STATE_MASTER)) return; - if (mode == ipvs->sysctl_sync_ver || !ipvs->sync_buff) + if (mode == sysctl_sync_ver(ipvs) || !ipvs->sync_buff) return; spin_lock_bh(&ipvs->sync_buff_lock); @@ -521,7 +521,7 @@ void ip_vs_sync_conn(struct net *net, struct ip_vs_conn *cp) unsigned int len, pe_name_len, pad; /* Handle old version of the protocol */ - if (ipvs->sysctl_sync_ver == 0) { + if (sysctl_sync_ver(ipvs) == 0) { ip_vs_sync_conn_v0(net, cp); return; } @@ -650,7 +650,7 @@ control: if (cp->flags & IP_VS_CONN_F_TEMPLATE) { int pkts = atomic_add_return(1, &cp->in_pkts); - if (pkts % ipvs->sysctl_sync_threshold[1] != 1) + if (pkts % sysctl_sync_period(ipvs) != 1) return; } goto sloop; @@ -697,13 +697,12 @@ ip_vs_conn_fill_param_sync(struct net *net, int af, union ip_vs_sync_conn *sc, return 1; } - p->pe_data = kmalloc(pe_data_len, GFP_ATOMIC); + p->pe_data = kmemdup(pe_data, pe_data_len, GFP_ATOMIC); if (!p->pe_data) { if (p->pe->module) module_put(p->pe->module); return -ENOMEM; } - memcpy(p->pe_data, pe_data, pe_data_len); p->pe_data_len = pe_data_len; } return 0; @@ -795,7 +794,7 @@ static void ip_vs_proc_conn(struct net *net, struct ip_vs_conn_param *param, if (opt) memcpy(&cp->in_seq, opt, sizeof(*opt)); - atomic_set(&cp->in_pkts, ipvs->sysctl_sync_threshold[0]); + atomic_set(&cp->in_pkts, sysctl_sync_threshold(ipvs)); cp->state = state; cp->old_state = cp->state; /* diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c index 878f6dd9dbad..6132b213eddc 100644 --- a/net/netfilter/ipvs/ip_vs_xmit.c +++ b/net/netfilter/ipvs/ip_vs_xmit.c @@ -98,12 +98,7 @@ __ip_vs_get_out_rt(struct sk_buff *skb, struct ip_vs_dest *dest, spin_lock(&dest->dst_lock); if (!(rt = (struct rtable *) __ip_vs_dst_check(dest, rtos))) { - struct flowi fl = { - .fl4_dst = dest->addr.ip, - .fl4_tos = rtos, - }; - - rt = ip_route_output_key(net, &fl); + rt = ip_route_output(net, dest->addr.ip, 0, rtos, 0); if (IS_ERR(rt)) { spin_unlock(&dest->dst_lock); IP_VS_DBG_RL("ip_route_output error, dest: %pI4\n", @@ -117,12 +112,7 @@ __ip_vs_get_out_rt(struct sk_buff *skb, struct ip_vs_dest *dest, } spin_unlock(&dest->dst_lock); } else { - struct flowi fl = { - .fl4_dst = daddr, - .fl4_tos = rtos, - }; - - rt = ip_route_output_key(net, &fl); + rt = ip_route_output(net, daddr, 0, rtos, 0); if (IS_ERR(rt)) { IP_VS_DBG_RL("ip_route_output error, dest: %pI4\n", &daddr); @@ -175,14 +165,14 @@ __ip_vs_reroute_locally(struct sk_buff *skb) return 0; refdst_drop(orefdst); } else { - struct flowi fl = { - .fl4_dst = iph->daddr, - .fl4_src = iph->saddr, - .fl4_tos = RT_TOS(iph->tos), - .mark = skb->mark, + struct flowi4 fl4 = { + .daddr = iph->daddr, + .saddr = iph->saddr, + .flowi4_tos = RT_TOS(iph->tos), + .flowi4_mark = skb->mark, }; - rt = ip_route_output_key(net, &fl); + rt = ip_route_output_key(net, &fl4); if (IS_ERR(rt)) return 0; if (!(rt->rt_flags & RTCF_LOCAL)) { @@ -208,27 +198,27 @@ __ip_vs_route_output_v6(struct net *net, struct in6_addr *daddr, struct in6_addr *ret_saddr, int do_xfrm) { struct dst_entry *dst; - struct flowi fl = { - .fl6_dst = *daddr, + struct flowi6 fl6 = { + .daddr = *daddr, }; - dst = ip6_route_output(net, NULL, &fl); + dst = ip6_route_output(net, NULL, &fl6); if (dst->error) goto out_err; if (!ret_saddr) return dst; - if (ipv6_addr_any(&fl.fl6_src) && + if (ipv6_addr_any(&fl6.saddr) && ipv6_dev_get_saddr(net, ip6_dst_idev(dst)->dev, - &fl.fl6_dst, 0, &fl.fl6_src) < 0) + &fl6.daddr, 0, &fl6.saddr) < 0) goto out_err; if (do_xfrm) { - dst = xfrm_lookup(net, dst, &fl, NULL, 0); + dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), NULL, 0); if (IS_ERR(dst)) { dst = NULL; goto out_err; } } - ipv6_addr_copy(ret_saddr, &fl.fl6_src); + ipv6_addr_copy(ret_saddr, &fl6.saddr); return dst; out_err: diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 2f454efa1a8b..941286ca911d 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -1301,6 +1301,7 @@ static void nf_conntrack_cleanup_net(struct net *net) nf_ct_free_hashtable(net->ct.hash, net->ct.htable_size); nf_conntrack_ecache_fini(net); + nf_conntrack_tstamp_fini(net); nf_conntrack_acct_fini(net); nf_conntrack_expect_fini(net); kmem_cache_destroy(net->ct.nf_conntrack_cachep); diff --git a/net/netfilter/nf_conntrack_h323_main.c b/net/netfilter/nf_conntrack_h323_main.c index b969025cf82f..533a183e6661 100644 --- a/net/netfilter/nf_conntrack_h323_main.c +++ b/net/netfilter/nf_conntrack_h323_main.c @@ -714,7 +714,6 @@ static int callforward_do_filter(const union nf_inet_addr *src, u_int8_t family) { const struct nf_afinfo *afinfo; - struct flowi fl1, fl2; int ret = 0; /* rcu_read_lock()ed by nf_hook_slow() */ @@ -722,17 +721,20 @@ static int callforward_do_filter(const union nf_inet_addr *src, if (!afinfo) return 0; - memset(&fl1, 0, sizeof(fl1)); - memset(&fl2, 0, sizeof(fl2)); - switch (family) { case AF_INET: { + struct flowi4 fl1, fl2; struct rtable *rt1, *rt2; - fl1.fl4_dst = src->ip; - fl2.fl4_dst = dst->ip; - if (!afinfo->route((struct dst_entry **)&rt1, &fl1)) { - if (!afinfo->route((struct dst_entry **)&rt2, &fl2)) { + memset(&fl1, 0, sizeof(fl1)); + fl1.daddr = src->ip; + + memset(&fl2, 0, sizeof(fl2)); + fl2.daddr = dst->ip; + if (!afinfo->route((struct dst_entry **)&rt1, + flowi4_to_flowi(&fl1))) { + if (!afinfo->route((struct dst_entry **)&rt2, + flowi4_to_flowi(&fl2))) { if (rt1->rt_gateway == rt2->rt_gateway && rt1->dst.dev == rt2->dst.dev) ret = 1; @@ -745,12 +747,18 @@ static int callforward_do_filter(const union nf_inet_addr *src, #if defined(CONFIG_NF_CONNTRACK_IPV6) || \ defined(CONFIG_NF_CONNTRACK_IPV6_MODULE) case AF_INET6: { + struct flowi6 fl1, fl2; struct rt6_info *rt1, *rt2; - memcpy(&fl1.fl6_dst, src, sizeof(fl1.fl6_dst)); - memcpy(&fl2.fl6_dst, dst, sizeof(fl2.fl6_dst)); - if (!afinfo->route((struct dst_entry **)&rt1, &fl1)) { - if (!afinfo->route((struct dst_entry **)&rt2, &fl2)) { + memset(&fl1, 0, sizeof(fl1)); + ipv6_addr_copy(&fl1.daddr, &src->in6); + + memset(&fl2, 0, sizeof(fl2)); + ipv6_addr_copy(&fl2.daddr, &dst->in6); + if (!afinfo->route((struct dst_entry **)&rt1, + flowi6_to_flowi(&fl1))) { + if (!afinfo->route((struct dst_entry **)&rt2, + flowi6_to_flowi(&fl2))) { if (!memcmp(&rt1->rt6i_gateway, &rt2->rt6i_gateway, sizeof(rt1->rt6i_gateway)) && rt1->dst.dev == rt2->dst.dev) diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c index 0a77d2ff2154..a9adf4c6b299 100644 --- a/net/netfilter/x_tables.c +++ b/net/netfilter/x_tables.c @@ -183,14 +183,14 @@ EXPORT_SYMBOL(xt_unregister_matches); /* * These are weird, but module loading must not be done with mutex * held (since they will register), and we have to have a single - * function to use try_then_request_module(). + * function to use. */ /* Find match, grabs ref. Returns ERR_PTR() on error. */ struct xt_match *xt_find_match(u8 af, const char *name, u8 revision) { struct xt_match *m; - int err = 0; + int err = -ENOENT; if (mutex_lock_interruptible(&xt[af].mutex) != 0) return ERR_PTR(-EINTR); @@ -221,9 +221,13 @@ xt_request_find_match(uint8_t nfproto, const char *name, uint8_t revision) { struct xt_match *match; - match = try_then_request_module(xt_find_match(nfproto, name, revision), - "%st_%s", xt_prefix[nfproto], name); - return (match != NULL) ? match : ERR_PTR(-ENOENT); + match = xt_find_match(nfproto, name, revision); + if (IS_ERR(match)) { + request_module("%st_%s", xt_prefix[nfproto], name); + match = xt_find_match(nfproto, name, revision); + } + + return match; } EXPORT_SYMBOL_GPL(xt_request_find_match); @@ -231,7 +235,7 @@ EXPORT_SYMBOL_GPL(xt_request_find_match); struct xt_target *xt_find_target(u8 af, const char *name, u8 revision) { struct xt_target *t; - int err = 0; + int err = -ENOENT; if (mutex_lock_interruptible(&xt[af].mutex) != 0) return ERR_PTR(-EINTR); @@ -261,9 +265,13 @@ struct xt_target *xt_request_find_target(u8 af, const char *name, u8 revision) { struct xt_target *target; - target = try_then_request_module(xt_find_target(af, name, revision), - "%st_%s", xt_prefix[af], name); - return (target != NULL) ? target : ERR_PTR(-ENOENT); + target = xt_find_target(af, name, revision); + if (IS_ERR(target)) { + request_module("%st_%s", xt_prefix[af], name); + target = xt_find_target(af, name, revision); + } + + return target; } EXPORT_SYMBOL_GPL(xt_request_find_target); diff --git a/net/netfilter/xt_AUDIT.c b/net/netfilter/xt_AUDIT.c index 81802d27346e..363a99ec0637 100644 --- a/net/netfilter/xt_AUDIT.c +++ b/net/netfilter/xt_AUDIT.c @@ -19,6 +19,7 @@ #include <linux/if_arp.h> #include <linux/netfilter/x_tables.h> #include <linux/netfilter/xt_AUDIT.h> +#include <linux/netfilter_bridge/ebtables.h> #include <net/ipv6.h> #include <net/ip.h> @@ -168,6 +169,13 @@ errout: return XT_CONTINUE; } +static unsigned int +audit_tg_ebt(struct sk_buff *skb, const struct xt_action_param *par) +{ + audit_tg(skb, par); + return EBT_CONTINUE; +} + static int audit_tg_check(const struct xt_tgchk_param *par) { const struct xt_audit_info *info = par->targinfo; @@ -181,23 +189,33 @@ static int audit_tg_check(const struct xt_tgchk_param *par) return 0; } -static struct xt_target audit_tg_reg __read_mostly = { - .name = "AUDIT", - .family = NFPROTO_UNSPEC, - .target = audit_tg, - .targetsize = sizeof(struct xt_audit_info), - .checkentry = audit_tg_check, - .me = THIS_MODULE, +static struct xt_target audit_tg_reg[] __read_mostly = { + { + .name = "AUDIT", + .family = NFPROTO_UNSPEC, + .target = audit_tg, + .targetsize = sizeof(struct xt_audit_info), + .checkentry = audit_tg_check, + .me = THIS_MODULE, + }, + { + .name = "AUDIT", + .family = NFPROTO_BRIDGE, + .target = audit_tg_ebt, + .targetsize = sizeof(struct xt_audit_info), + .checkentry = audit_tg_check, + .me = THIS_MODULE, + }, }; static int __init audit_tg_init(void) { - return xt_register_target(&audit_tg_reg); + return xt_register_targets(audit_tg_reg, ARRAY_SIZE(audit_tg_reg)); } static void __exit audit_tg_exit(void) { - xt_unregister_target(&audit_tg_reg); + xt_unregister_targets(audit_tg_reg, ARRAY_SIZE(audit_tg_reg)); } module_init(audit_tg_init); diff --git a/net/netfilter/xt_TCPMSS.c b/net/netfilter/xt_TCPMSS.c index eb81c380da1b..6e6b46cb1db9 100644 --- a/net/netfilter/xt_TCPMSS.c +++ b/net/netfilter/xt_TCPMSS.c @@ -148,16 +148,21 @@ tcpmss_mangle_packet(struct sk_buff *skb, static u_int32_t tcpmss_reverse_mtu(const struct sk_buff *skb, unsigned int family) { - struct flowi fl = {}; + struct flowi fl; const struct nf_afinfo *ai; struct rtable *rt = NULL; u_int32_t mtu = ~0U; - if (family == PF_INET) - fl.fl4_dst = ip_hdr(skb)->saddr; - else - fl.fl6_dst = ipv6_hdr(skb)->saddr; + if (family == PF_INET) { + struct flowi4 *fl4 = &fl.u.ip4; + memset(fl4, 0, sizeof(*fl4)); + fl4->daddr = ip_hdr(skb)->saddr; + } else { + struct flowi6 *fl6 = &fl.u.ip6; + memset(fl6, 0, sizeof(*fl6)); + ipv6_addr_copy(&fl6->daddr, &ipv6_hdr(skb)->saddr); + } rcu_read_lock(); ai = nf_get_afinfo(family); if (ai != NULL) diff --git a/net/netfilter/xt_TEE.c b/net/netfilter/xt_TEE.c index 624725b5286f..5f054a0dbbb1 100644 --- a/net/netfilter/xt_TEE.c +++ b/net/netfilter/xt_TEE.c @@ -62,18 +62,18 @@ tee_tg_route4(struct sk_buff *skb, const struct xt_tee_tginfo *info) const struct iphdr *iph = ip_hdr(skb); struct net *net = pick_net(skb); struct rtable *rt; - struct flowi fl; + struct flowi4 fl4; - memset(&fl, 0, sizeof(fl)); + memset(&fl4, 0, sizeof(fl4)); if (info->priv) { if (info->priv->oif == -1) return false; - fl.oif = info->priv->oif; + fl4.flowi4_oif = info->priv->oif; } - fl.fl4_dst = info->gw.ip; - fl.fl4_tos = RT_TOS(iph->tos); - fl.fl4_scope = RT_SCOPE_UNIVERSE; - rt = ip_route_output_key(net, &fl); + fl4.daddr = info->gw.ip; + fl4.flowi4_tos = RT_TOS(iph->tos); + fl4.flowi4_scope = RT_SCOPE_UNIVERSE; + rt = ip_route_output_key(net, &fl4); if (IS_ERR(rt)) return false; @@ -143,18 +143,18 @@ tee_tg_route6(struct sk_buff *skb, const struct xt_tee_tginfo *info) const struct ipv6hdr *iph = ipv6_hdr(skb); struct net *net = pick_net(skb); struct dst_entry *dst; - struct flowi fl; + struct flowi6 fl6; - memset(&fl, 0, sizeof(fl)); + memset(&fl6, 0, sizeof(fl6)); if (info->priv) { if (info->priv->oif == -1) return false; - fl.oif = info->priv->oif; + fl6.flowi6_oif = info->priv->oif; } - fl.fl6_dst = info->gw.in6; - fl.fl6_flowlabel = ((iph->flow_lbl[0] & 0xF) << 16) | + fl6.daddr = info->gw.in6; + fl6.flowlabel = ((iph->flow_lbl[0] & 0xF) << 16) | (iph->flow_lbl[1] << 8) | iph->flow_lbl[2]; - dst = ip6_route_output(net, NULL, &fl); + dst = ip6_route_output(net, NULL, &fl6); if (dst == NULL) return false; diff --git a/net/netfilter/xt_addrtype.c b/net/netfilter/xt_addrtype.c new file mode 100644 index 000000000000..2220b85e9519 --- /dev/null +++ b/net/netfilter/xt_addrtype.c @@ -0,0 +1,229 @@ +/* + * iptables module to match inet_addr_type() of an ip. + * + * Copyright (c) 2004 Patrick McHardy <[email protected]> + * (C) 2007 Laszlo Attila Toth <[email protected]> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/skbuff.h> +#include <linux/netdevice.h> +#include <linux/ip.h> +#include <net/route.h> + +#if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE) +#include <net/ipv6.h> +#include <net/ip6_route.h> +#include <net/ip6_fib.h> +#endif + +#include <linux/netfilter/xt_addrtype.h> +#include <linux/netfilter/x_tables.h> + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Patrick McHardy <[email protected]>"); +MODULE_DESCRIPTION("Xtables: address type match"); +MODULE_ALIAS("ipt_addrtype"); +MODULE_ALIAS("ip6t_addrtype"); + +#if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE) +static u32 xt_addrtype_rt6_to_type(const struct rt6_info *rt) +{ + u32 ret; + + if (!rt) + return XT_ADDRTYPE_UNREACHABLE; + + if (rt->rt6i_flags & RTF_REJECT) + ret = XT_ADDRTYPE_UNREACHABLE; + else + ret = 0; + + if (rt->rt6i_flags & RTF_LOCAL) + ret |= XT_ADDRTYPE_LOCAL; + if (rt->rt6i_flags & RTF_ANYCAST) + ret |= XT_ADDRTYPE_ANYCAST; + return ret; +} + +static bool match_type6(struct net *net, const struct net_device *dev, + const struct in6_addr *addr, u16 mask) +{ + int addr_type = ipv6_addr_type(addr); + + if ((mask & XT_ADDRTYPE_MULTICAST) && + !(addr_type & IPV6_ADDR_MULTICAST)) + return false; + if ((mask & XT_ADDRTYPE_UNICAST) && !(addr_type & IPV6_ADDR_UNICAST)) + return false; + if ((mask & XT_ADDRTYPE_UNSPEC) && addr_type != IPV6_ADDR_ANY) + return false; + + if ((XT_ADDRTYPE_LOCAL | XT_ADDRTYPE_ANYCAST | + XT_ADDRTYPE_UNREACHABLE) & mask) { + struct rt6_info *rt; + u32 type; + int ifindex = dev ? dev->ifindex : 0; + + rt = rt6_lookup(net, addr, NULL, ifindex, !!dev); + + type = xt_addrtype_rt6_to_type(rt); + + dst_release(&rt->dst); + return !!(mask & type); + } + return true; +} + +static bool +addrtype_mt6(struct net *net, const struct net_device *dev, + const struct sk_buff *skb, const struct xt_addrtype_info_v1 *info) +{ + const struct ipv6hdr *iph = ipv6_hdr(skb); + bool ret = true; + + if (info->source) + ret &= match_type6(net, dev, &iph->saddr, info->source) ^ + (info->flags & XT_ADDRTYPE_INVERT_SOURCE); + if (ret && info->dest) + ret &= match_type6(net, dev, &iph->daddr, info->dest) ^ + !!(info->flags & XT_ADDRTYPE_INVERT_DEST); + return ret; +} +#endif + +static inline bool match_type(struct net *net, const struct net_device *dev, + __be32 addr, u_int16_t mask) +{ + return !!(mask & (1 << inet_dev_addr_type(net, dev, addr))); +} + +static bool +addrtype_mt_v0(const struct sk_buff *skb, struct xt_action_param *par) +{ + struct net *net = dev_net(par->in ? par->in : par->out); + const struct xt_addrtype_info *info = par->matchinfo; + const struct iphdr *iph = ip_hdr(skb); + bool ret = true; + + if (info->source) + ret &= match_type(net, NULL, iph->saddr, info->source) ^ + info->invert_source; + if (info->dest) + ret &= match_type(net, NULL, iph->daddr, info->dest) ^ + info->invert_dest; + + return ret; +} + +static bool +addrtype_mt_v1(const struct sk_buff *skb, struct xt_action_param *par) +{ + struct net *net = dev_net(par->in ? par->in : par->out); + const struct xt_addrtype_info_v1 *info = par->matchinfo; + const struct iphdr *iph; + const struct net_device *dev = NULL; + bool ret = true; + + if (info->flags & XT_ADDRTYPE_LIMIT_IFACE_IN) + dev = par->in; + else if (info->flags & XT_ADDRTYPE_LIMIT_IFACE_OUT) + dev = par->out; + +#if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE) + if (par->family == NFPROTO_IPV6) + return addrtype_mt6(net, dev, skb, info); +#endif + iph = ip_hdr(skb); + if (info->source) + ret &= match_type(net, dev, iph->saddr, info->source) ^ + (info->flags & XT_ADDRTYPE_INVERT_SOURCE); + if (ret && info->dest) + ret &= match_type(net, dev, iph->daddr, info->dest) ^ + !!(info->flags & XT_ADDRTYPE_INVERT_DEST); + return ret; +} + +static int addrtype_mt_checkentry_v1(const struct xt_mtchk_param *par) +{ + struct xt_addrtype_info_v1 *info = par->matchinfo; + + if (info->flags & XT_ADDRTYPE_LIMIT_IFACE_IN && + info->flags & XT_ADDRTYPE_LIMIT_IFACE_OUT) { + pr_info("both incoming and outgoing " + "interface limitation cannot be selected\n"); + return -EINVAL; + } + + if (par->hook_mask & ((1 << NF_INET_PRE_ROUTING) | + (1 << NF_INET_LOCAL_IN)) && + info->flags & XT_ADDRTYPE_LIMIT_IFACE_OUT) { + pr_info("output interface limitation " + "not valid in PREROUTING and INPUT\n"); + return -EINVAL; + } + + if (par->hook_mask & ((1 << NF_INET_POST_ROUTING) | + (1 << NF_INET_LOCAL_OUT)) && + info->flags & XT_ADDRTYPE_LIMIT_IFACE_IN) { + pr_info("input interface limitation " + "not valid in POSTROUTING and OUTPUT\n"); + return -EINVAL; + } + +#if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE) + if (par->family == NFPROTO_IPV6) { + if ((info->source | info->dest) & XT_ADDRTYPE_BLACKHOLE) { + pr_err("ipv6 BLACKHOLE matching not supported\n"); + return -EINVAL; + } + if ((info->source | info->dest) >= XT_ADDRTYPE_PROHIBIT) { + pr_err("ipv6 PROHIBT (THROW, NAT ..) matching not supported\n"); + return -EINVAL; + } + if ((info->source | info->dest) & XT_ADDRTYPE_BROADCAST) { + pr_err("ipv6 does not support BROADCAST matching\n"); + return -EINVAL; + } + } +#endif + return 0; +} + +static struct xt_match addrtype_mt_reg[] __read_mostly = { + { + .name = "addrtype", + .family = NFPROTO_IPV4, + .match = addrtype_mt_v0, + .matchsize = sizeof(struct xt_addrtype_info), + .me = THIS_MODULE + }, + { + .name = "addrtype", + .family = NFPROTO_UNSPEC, + .revision = 1, + .match = addrtype_mt_v1, + .checkentry = addrtype_mt_checkentry_v1, + .matchsize = sizeof(struct xt_addrtype_info_v1), + .me = THIS_MODULE + } +}; + +static int __init addrtype_mt_init(void) +{ + return xt_register_matches(addrtype_mt_reg, + ARRAY_SIZE(addrtype_mt_reg)); +} + +static void __exit addrtype_mt_exit(void) +{ + xt_unregister_matches(addrtype_mt_reg, ARRAY_SIZE(addrtype_mt_reg)); +} + +module_init(addrtype_mt_init); +module_exit(addrtype_mt_exit); diff --git a/net/netfilter/xt_connlimit.c b/net/netfilter/xt_connlimit.c index e029c4807404..c6d5a83450c9 100644 --- a/net/netfilter/xt_connlimit.c +++ b/net/netfilter/xt_connlimit.c @@ -33,17 +33,17 @@ /* we will save the tuples of all connections we care about */ struct xt_connlimit_conn { - struct list_head list; - struct nf_conntrack_tuple tuple; + struct hlist_node node; + struct nf_conntrack_tuple tuple; + union nf_inet_addr addr; }; struct xt_connlimit_data { - struct list_head iphash[256]; - spinlock_t lock; + struct hlist_head iphash[256]; + spinlock_t lock; }; static u_int32_t connlimit_rnd __read_mostly; -static bool connlimit_rnd_inited __read_mostly; static inline unsigned int connlimit_iphash(__be32 addr) { @@ -101,9 +101,9 @@ static int count_them(struct net *net, { const struct nf_conntrack_tuple_hash *found; struct xt_connlimit_conn *conn; - struct xt_connlimit_conn *tmp; + struct hlist_node *pos, *n; struct nf_conn *found_ct; - struct list_head *hash; + struct hlist_head *hash; bool addit = true; int matches = 0; @@ -115,7 +115,7 @@ static int count_them(struct net *net, rcu_read_lock(); /* check the saved connections */ - list_for_each_entry_safe(conn, tmp, hash, list) { + hlist_for_each_entry_safe(conn, pos, n, hash, node) { found = nf_conntrack_find_get(net, NF_CT_DEFAULT_ZONE, &conn->tuple); found_ct = NULL; @@ -135,7 +135,7 @@ static int count_them(struct net *net, if (found == NULL) { /* this one is gone */ - list_del(&conn->list); + hlist_del(&conn->node); kfree(conn); continue; } @@ -146,12 +146,12 @@ static int count_them(struct net *net, * closed already -> ditch it */ nf_ct_put(found_ct); - list_del(&conn->list); + hlist_del(&conn->node); kfree(conn); continue; } - if (same_source_net(addr, mask, &conn->tuple.src.u3, family)) + if (same_source_net(addr, mask, &conn->addr, family)) /* same source network -> be counted! */ ++matches; nf_ct_put(found_ct); @@ -161,11 +161,12 @@ static int count_them(struct net *net, if (addit) { /* save the new connection in our list */ - conn = kzalloc(sizeof(*conn), GFP_ATOMIC); + conn = kmalloc(sizeof(*conn), GFP_ATOMIC); if (conn == NULL) return -ENOMEM; conn->tuple = *tuple; - list_add(&conn->list, hash); + conn->addr = *addr; + hlist_add_head(&conn->node, hash); ++matches; } @@ -185,15 +186,11 @@ connlimit_mt(const struct sk_buff *skb, struct xt_action_param *par) int connections; ct = nf_ct_get(skb, &ctinfo); - if (ct != NULL) { - if (info->flags & XT_CONNLIMIT_DADDR) - tuple_ptr = &ct->tuplehash[IP_CT_DIR_REPLY].tuple; - else - tuple_ptr = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple; - } else if (!nf_ct_get_tuplepr(skb, skb_network_offset(skb), - par->family, &tuple)) { + if (ct != NULL) + tuple_ptr = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple; + else if (!nf_ct_get_tuplepr(skb, skb_network_offset(skb), + par->family, &tuple)) goto hotdrop; - } if (par->family == NFPROTO_IPV6) { const struct ipv6hdr *iph = ipv6_hdr(skb); @@ -228,9 +225,13 @@ static int connlimit_mt_check(const struct xt_mtchk_param *par) unsigned int i; int ret; - if (unlikely(!connlimit_rnd_inited)) { - get_random_bytes(&connlimit_rnd, sizeof(connlimit_rnd)); - connlimit_rnd_inited = true; + if (unlikely(!connlimit_rnd)) { + u_int32_t rand; + + do { + get_random_bytes(&rand, sizeof(rand)); + } while (!rand); + cmpxchg(&connlimit_rnd, 0, rand); } ret = nf_ct_l3proto_try_module_get(par->family); if (ret < 0) { @@ -248,7 +249,7 @@ static int connlimit_mt_check(const struct xt_mtchk_param *par) spin_lock_init(&info->data->lock); for (i = 0; i < ARRAY_SIZE(info->data->iphash); ++i) - INIT_LIST_HEAD(&info->data->iphash[i]); + INIT_HLIST_HEAD(&info->data->iphash[i]); return 0; } @@ -257,15 +258,15 @@ static void connlimit_mt_destroy(const struct xt_mtdtor_param *par) { const struct xt_connlimit_info *info = par->matchinfo; struct xt_connlimit_conn *conn; - struct xt_connlimit_conn *tmp; - struct list_head *hash = info->data->iphash; + struct hlist_node *pos, *n; + struct hlist_head *hash = info->data->iphash; unsigned int i; nf_ct_l3proto_module_put(par->family); for (i = 0; i < ARRAY_SIZE(info->data->iphash); ++i) { - list_for_each_entry_safe(conn, tmp, &hash[i], list) { - list_del(&conn->list); + hlist_for_each_entry_safe(conn, pos, n, &hash[i], node) { + hlist_del(&conn->node); kfree(conn); } } diff --git a/net/phonet/Kconfig b/net/phonet/Kconfig index 0d9b8a220a78..6ec7d55b1769 100644 --- a/net/phonet/Kconfig +++ b/net/phonet/Kconfig @@ -14,15 +14,3 @@ config PHONET To compile this driver as a module, choose M here: the module will be called phonet. If unsure, say N. - -config PHONET_PIPECTRLR - bool "Phonet Pipe Controller (EXPERIMENTAL)" - depends on PHONET && EXPERIMENTAL - default N - help - The Pipe Controller implementation in Phonet stack to support Pipe - data with Nokia Slim modems like WG2.5 used on ST-Ericsson U8500 - platform. - - This option is incompatible with older Nokia modems. - Say N here unless you really know what you are doing. diff --git a/net/phonet/af_phonet.c b/net/phonet/af_phonet.c index 30cc676c35fd..c6fffd946d42 100644 --- a/net/phonet/af_phonet.c +++ b/net/phonet/af_phonet.c @@ -195,11 +195,7 @@ static int pn_send(struct sk_buff *skb, struct net_device *dev, if (skb->pkt_type == PACKET_LOOPBACK) { skb_reset_mac_header(skb); skb_orphan(skb); - if (irq) - netif_rx(skb); - else - netif_rx_ni(skb); - err = 0; + err = (irq ? netif_rx(skb) : netif_rx_ni(skb)) ? -ENOBUFS : 0; } else { err = dev_hard_header(skb, dev, ntohs(skb->protocol), NULL, NULL, skb->len); @@ -208,6 +204,8 @@ static int pn_send(struct sk_buff *skb, struct net_device *dev, goto drop; } err = dev_queue_xmit(skb); + if (unlikely(err > 0)) + err = net_xmit_errno(err); } return err; @@ -262,10 +260,9 @@ int pn_skb_send(struct sock *sk, struct sk_buff *skb, else if (phonet_address_lookup(net, daddr) == 0) { dev = phonet_device_get(net); skb->pkt_type = PACKET_LOOPBACK; - } else if (pn_sockaddr_get_object(target) == 0) { + } else if (dst == 0) { /* Resource routing (small race until phonet_rcv()) */ - struct sock *sk = pn_find_sock_by_res(net, - target->spn_resource); + struct sock *sk = pn_find_sock_by_res(net, res); if (sk) { sock_put(sk); dev = phonet_device_get(net); diff --git a/net/phonet/pep.c b/net/phonet/pep.c index 875e86cadcfd..f17fd841f948 100644 --- a/net/phonet/pep.c +++ b/net/phonet/pep.c @@ -42,7 +42,7 @@ * TCP_ESTABLISHED connected pipe in enabled state * * pep_sock locking: - * - sk_state, ackq, hlist: sock lock needed + * - sk_state, hlist: sock lock needed * - listener: read only * - pipe_handle: read only */ @@ -77,165 +77,95 @@ static unsigned char *pep_get_sb(struct sk_buff *skb, u8 *ptype, u8 *plen, return data; } -static int pep_reply(struct sock *sk, struct sk_buff *oskb, - u8 code, const void *data, int len, gfp_t priority) +static struct sk_buff *pep_alloc_skb(struct sock *sk, const void *payload, + int len, gfp_t priority) { - const struct pnpipehdr *oph = pnp_hdr(oskb); - struct pnpipehdr *ph; - struct sk_buff *skb; - struct sockaddr_pn peer; - - skb = alloc_skb(MAX_PNPIPE_HEADER + len, priority); + struct sk_buff *skb = alloc_skb(MAX_PNPIPE_HEADER + len, priority); if (!skb) - return -ENOMEM; + return NULL; skb_set_owner_w(skb, sk); skb_reserve(skb, MAX_PNPIPE_HEADER); __skb_put(skb, len); - skb_copy_to_linear_data(skb, data, len); - __skb_push(skb, sizeof(*ph)); + skb_copy_to_linear_data(skb, payload, len); + __skb_push(skb, sizeof(struct pnpipehdr)); skb_reset_transport_header(skb); - ph = pnp_hdr(skb); - ph->utid = oph->utid; - ph->message_id = oph->message_id + 1; /* REQ -> RESP */ - ph->pipe_handle = oph->pipe_handle; - ph->error_code = code; - - pn_skb_get_src_sockaddr(oskb, &peer); - return pn_skb_send(sk, skb, &peer); + return skb; } -#define PAD 0x00 - -#ifdef CONFIG_PHONET_PIPECTRLR -static int pipe_handler_send_req(struct sock *sk, u8 msg_id, gfp_t priority) +static int pep_reply(struct sock *sk, struct sk_buff *oskb, u8 code, + const void *data, int len, gfp_t priority) { - int len; + const struct pnpipehdr *oph = pnp_hdr(oskb); struct pnpipehdr *ph; struct sk_buff *skb; - struct pep_sock *pn = pep_sk(sk); - - static const u8 data[4] = { - PAD, PAD, PAD, PAD, - }; - - switch (msg_id) { - case PNS_PEP_CONNECT_REQ: - len = sizeof(data); - break; - - case PNS_PEP_DISCONNECT_REQ: - case PNS_PEP_ENABLE_REQ: - case PNS_PEP_DISABLE_REQ: - len = 0; - break; - - default: - return -EINVAL; - } + struct sockaddr_pn peer; - skb = alloc_skb(MAX_PNPIPE_HEADER + len, priority); + skb = pep_alloc_skb(sk, data, len, priority); if (!skb) return -ENOMEM; - skb_set_owner_w(skb, sk); - skb_reserve(skb, MAX_PNPIPE_HEADER); - if (len) { - __skb_put(skb, len); - skb_copy_to_linear_data(skb, data, len); - } - __skb_push(skb, sizeof(*ph)); - skb_reset_transport_header(skb); ph = pnp_hdr(skb); - ph->utid = msg_id; /* whatever */ - ph->message_id = msg_id; - ph->pipe_handle = pn->pipe_handle; - ph->error_code = PN_PIPE_NO_ERROR; + ph->utid = oph->utid; + ph->message_id = oph->message_id + 1; /* REQ -> RESP */ + ph->pipe_handle = oph->pipe_handle; + ph->error_code = code; - return pn_skb_send(sk, skb, NULL); + pn_skb_get_src_sockaddr(oskb, &peer); + return pn_skb_send(sk, skb, &peer); } -static int pipe_handler_send_created_ind(struct sock *sk, u8 msg_id) +static int pep_indicate(struct sock *sk, u8 id, u8 code, + const void *data, int len, gfp_t priority) { - int err_code; + struct pep_sock *pn = pep_sk(sk); struct pnpipehdr *ph; struct sk_buff *skb; - struct pep_sock *pn = pep_sk(sk); - static u8 data[4] = { - 0x03, 0x04, - }; - data[2] = pn->tx_fc; - data[3] = pn->rx_fc; - - /* - * actually, below is number of sub-blocks and not error code. - * Pipe_created_ind message format does not have any - * error code field. However, the Phonet stack will always send - * an error code as part of pnpipehdr. So, use that err_code to - * specify the number of sub-blocks. - */ - err_code = 0x01; - - skb = alloc_skb(MAX_PNPIPE_HEADER + sizeof(data), GFP_ATOMIC); + skb = pep_alloc_skb(sk, data, len, priority); if (!skb) return -ENOMEM; - skb_set_owner_w(skb, sk); - skb_reserve(skb, MAX_PNPIPE_HEADER); - __skb_put(skb, sizeof(data)); - skb_copy_to_linear_data(skb, data, sizeof(data)); - __skb_push(skb, sizeof(*ph)); - skb_reset_transport_header(skb); ph = pnp_hdr(skb); ph->utid = 0; - ph->message_id = msg_id; + ph->message_id = id; ph->pipe_handle = pn->pipe_handle; - ph->error_code = err_code; - + ph->data[0] = code; return pn_skb_send(sk, skb, NULL); } -static int pipe_handler_send_ind(struct sock *sk, u8 msg_id) +#define PAD 0x00 + +static int pipe_handler_request(struct sock *sk, u8 id, u8 code, + const void *data, int len) { - int err_code; + struct pep_sock *pn = pep_sk(sk); struct pnpipehdr *ph; struct sk_buff *skb; - struct pep_sock *pn = pep_sk(sk); - - /* - * actually, below is a filler. - * Pipe_enabled/disabled_ind message format does not have any - * error code field. However, the Phonet stack will always send - * an error code as part of pnpipehdr. So, use that err_code to - * specify the filler value. - */ - err_code = 0x0; - skb = alloc_skb(MAX_PNPIPE_HEADER, GFP_ATOMIC); + skb = pep_alloc_skb(sk, data, len, GFP_KERNEL); if (!skb) return -ENOMEM; - skb_set_owner_w(skb, sk); - skb_reserve(skb, MAX_PNPIPE_HEADER); - __skb_push(skb, sizeof(*ph)); - skb_reset_transport_header(skb); ph = pnp_hdr(skb); - ph->utid = 0; - ph->message_id = msg_id; + ph->utid = id; /* whatever */ + ph->message_id = id; ph->pipe_handle = pn->pipe_handle; - ph->error_code = err_code; - + ph->data[0] = code; return pn_skb_send(sk, skb, NULL); } -static int pipe_handler_enable_pipe(struct sock *sk, int enable) +static int pipe_handler_send_created_ind(struct sock *sk) { - u8 id = enable ? PNS_PEP_ENABLE_REQ : PNS_PEP_DISABLE_REQ; + struct pep_sock *pn = pep_sk(sk); + u8 data[4] = { + PN_PIPE_SB_NEGOTIATED_FC, pep_sb_size(2), + pn->tx_fc, pn->rx_fc, + }; - return pipe_handler_send_req(sk, id, GFP_KERNEL); + return pep_indicate(sk, PNS_PIPE_CREATED_IND, 1 /* sub-blocks */, + data, 4, GFP_ATOMIC); } -#endif static int pep_accept_conn(struct sock *sk, struct sk_buff *skb) { @@ -258,11 +188,12 @@ static int pep_accept_conn(struct sock *sk, struct sk_buff *skb) GFP_KERNEL); } -static int pep_reject_conn(struct sock *sk, struct sk_buff *skb, u8 code) +static int pep_reject_conn(struct sock *sk, struct sk_buff *skb, u8 code, + gfp_t priority) { static const u8 data[4] = { PAD, PAD, PAD, 0 /* sub-blocks */ }; WARN_ON(code == PN_PIPE_NO_ERROR); - return pep_reply(sk, skb, code, data, sizeof(data), GFP_ATOMIC); + return pep_reply(sk, skb, code, data, sizeof(data), priority); } /* Control requests are not sent by the pipe service and have a specific @@ -274,23 +205,21 @@ static int pep_ctrlreq_error(struct sock *sk, struct sk_buff *oskb, u8 code, struct sk_buff *skb; struct pnpipehdr *ph; struct sockaddr_pn dst; + u8 data[4] = { + oph->data[0], /* PEP type */ + code, /* error code, at an unusual offset */ + PAD, PAD, + }; - skb = alloc_skb(MAX_PNPIPE_HEADER + 4, priority); + skb = pep_alloc_skb(sk, data, 4, priority); if (!skb) return -ENOMEM; - skb_set_owner_w(skb, sk); - - skb_reserve(skb, MAX_PHONET_HEADER); - ph = (struct pnpipehdr *)skb_put(skb, sizeof(*ph) + 4); + ph = pnp_hdr(skb); ph->utid = oph->utid; ph->message_id = PNS_PEP_CTRL_RESP; ph->pipe_handle = oph->pipe_handle; ph->data[0] = oph->data[1]; /* CTRL id */ - ph->data[1] = oph->data[0]; /* PEP type */ - ph->data[2] = code; /* error code, at an usual offset */ - ph->data[3] = PAD; - ph->data[4] = PAD; pn_skb_get_src_sockaddr(oskb, &dst); return pn_skb_send(sk, skb, &dst); @@ -298,34 +227,15 @@ static int pep_ctrlreq_error(struct sock *sk, struct sk_buff *oskb, u8 code, static int pipe_snd_status(struct sock *sk, u8 type, u8 status, gfp_t priority) { - struct pep_sock *pn = pep_sk(sk); - struct pnpipehdr *ph; - struct sk_buff *skb; - - skb = alloc_skb(MAX_PNPIPE_HEADER + 4, priority); - if (!skb) - return -ENOMEM; - skb_set_owner_w(skb, sk); + u8 data[4] = { type, PAD, PAD, status }; - skb_reserve(skb, MAX_PNPIPE_HEADER + 4); - __skb_push(skb, sizeof(*ph) + 4); - skb_reset_transport_header(skb); - ph = pnp_hdr(skb); - ph->utid = 0; - ph->message_id = PNS_PEP_STATUS_IND; - ph->pipe_handle = pn->pipe_handle; - ph->pep_type = PN_PEP_TYPE_COMMON; - ph->data[1] = type; - ph->data[2] = PAD; - ph->data[3] = PAD; - ph->data[4] = status; - - return pn_skb_send(sk, skb, NULL); + return pep_indicate(sk, PNS_PEP_STATUS_IND, PN_PEP_TYPE_COMMON, + data, 4, priority); } /* Send our RX flow control information to the sender. * Socket must be locked. */ -static void pipe_grant_credits(struct sock *sk) +static void pipe_grant_credits(struct sock *sk, gfp_t priority) { struct pep_sock *pn = pep_sk(sk); @@ -335,16 +245,16 @@ static void pipe_grant_credits(struct sock *sk) case PN_LEGACY_FLOW_CONTROL: /* TODO */ break; case PN_ONE_CREDIT_FLOW_CONTROL: - pipe_snd_status(sk, PN_PEP_IND_FLOW_CONTROL, - PEP_IND_READY, GFP_ATOMIC); - pn->rx_credits = 1; + if (pipe_snd_status(sk, PN_PEP_IND_FLOW_CONTROL, + PEP_IND_READY, priority) == 0) + pn->rx_credits = 1; break; case PN_MULTI_CREDIT_FLOW_CONTROL: if ((pn->rx_credits + CREDITS_THR) > CREDITS_MAX) break; if (pipe_snd_status(sk, PN_PEP_IND_ID_MCFC_GRANT_CREDITS, CREDITS_MAX - pn->rx_credits, - GFP_ATOMIC) == 0) + priority) == 0) pn->rx_credits = CREDITS_MAX; break; } @@ -442,7 +352,7 @@ static int pipe_do_rcv(struct sock *sk, struct sk_buff *skb) switch (hdr->message_id) { case PNS_PEP_CONNECT_REQ: - pep_reject_conn(sk, skb, PN_PIPE_ERR_PEP_IN_USE); + pep_reject_conn(sk, skb, PN_PIPE_ERR_PEP_IN_USE, GFP_ATOMIC); break; case PNS_PEP_DISCONNECT_REQ: @@ -452,32 +362,11 @@ static int pipe_do_rcv(struct sock *sk, struct sk_buff *skb) sk->sk_state_change(sk); break; -#ifdef CONFIG_PHONET_PIPECTRLR - case PNS_PEP_DISCONNECT_RESP: - sk->sk_state = TCP_CLOSE; - break; -#endif - case PNS_PEP_ENABLE_REQ: /* Wait for PNS_PIPE_(ENABLED|REDIRECTED)_IND */ pep_reply(sk, skb, PN_PIPE_NO_ERROR, NULL, 0, GFP_ATOMIC); break; -#ifdef CONFIG_PHONET_PIPECTRLR - case PNS_PEP_ENABLE_RESP: - pipe_handler_send_ind(sk, PNS_PIPE_ENABLED_IND); - - if (!pn_flow_safe(pn->tx_fc)) { - atomic_set(&pn->tx_credits, 1); - sk->sk_write_space(sk); - } - if (sk->sk_state == TCP_ESTABLISHED) - break; /* Nothing to do */ - sk->sk_state = TCP_ESTABLISHED; - pipe_grant_credits(sk); - break; -#endif - case PNS_PEP_RESET_REQ: switch (hdr->state_after_reset) { case PN_PIPE_DISABLE: @@ -496,15 +385,6 @@ static int pipe_do_rcv(struct sock *sk, struct sk_buff *skb) pep_reply(sk, skb, PN_PIPE_NO_ERROR, NULL, 0, GFP_ATOMIC); break; -#ifdef CONFIG_PHONET_PIPECTRLR - case PNS_PEP_DISABLE_RESP: - atomic_set(&pn->tx_credits, 0); - pipe_handler_send_ind(sk, PNS_PIPE_DISABLED_IND); - sk->sk_state = TCP_SYN_RECV; - pn->rx_credits = 0; - break; -#endif - case PNS_PEP_CTRL_REQ: if (skb_queue_len(&pn->ctrlreq_queue) >= PNPIPE_CTRLREQ_MAX) { atomic_inc(&sk->sk_drops); @@ -522,7 +402,8 @@ static int pipe_do_rcv(struct sock *sk, struct sk_buff *skb) if (!pn_flow_safe(pn->rx_fc)) { err = sock_queue_rcv_skb(sk, skb); if (!err) - return 0; + return NET_RX_SUCCESS; + err = -ENOBUFS; break; } @@ -560,7 +441,7 @@ static int pipe_do_rcv(struct sock *sk, struct sk_buff *skb) if (sk->sk_state == TCP_ESTABLISHED) break; /* Nothing to do */ sk->sk_state = TCP_ESTABLISHED; - pipe_grant_credits(sk); + pipe_grant_credits(sk, GFP_ATOMIC); break; case PNS_PIPE_DISABLED_IND: @@ -575,7 +456,7 @@ static int pipe_do_rcv(struct sock *sk, struct sk_buff *skb) } out: kfree_skb(skb); - return err; + return (err == -ENOBUFS) ? NET_RX_DROP : NET_RX_SUCCESS; queue: skb->dev = NULL; @@ -584,7 +465,7 @@ queue: skb_queue_tail(queue, skb); if (!sock_flag(sk, SOCK_DEAD)) sk->sk_data_ready(sk, err); - return 0; + return NET_RX_SUCCESS; } /* Destroy connected sock. */ @@ -596,7 +477,6 @@ static void pipe_destruct(struct sock *sk) skb_queue_purge(&pn->ctrlreq_queue); } -#ifdef CONFIG_PHONET_PIPECTRLR static u8 pipe_negotiate_fc(const u8 *fcs, unsigned n) { unsigned i; @@ -621,6 +501,8 @@ static int pep_connresp_rcv(struct sock *sk, struct sk_buff *skb) return -EINVAL; hdr = pnp_hdr(skb); + if (hdr->error_code != PN_PIPE_NO_ERROR) + return -ECONNREFUSED; /* Parse sub-blocks */ n_sb = hdr->data[4]; @@ -648,109 +530,73 @@ static int pep_connresp_rcv(struct sock *sk, struct sk_buff *skb) n_sb--; } - sk->sk_state = TCP_SYN_RECV; - sk->sk_backlog_rcv = pipe_do_rcv; - sk->sk_destruct = pipe_destruct; - pn->rx_credits = 0; - sk->sk_state_change(sk); - - return pipe_handler_send_created_ind(sk, PNS_PIPE_CREATED_IND); + return pipe_handler_send_created_ind(sk); } -#endif -static int pep_connreq_rcv(struct sock *sk, struct sk_buff *skb) +/* Queue an skb to an actively connected sock. + * Socket lock must be held. */ +static int pipe_handler_do_rcv(struct sock *sk, struct sk_buff *skb) { - struct sock *newsk; - struct pep_sock *newpn, *pn = pep_sk(sk); - struct pnpipehdr *hdr; - struct sockaddr_pn dst, src; - u16 peer_type; - u8 pipe_handle, enabled, n_sb; - u8 aligned = 0; - - if (!pskb_pull(skb, sizeof(*hdr) + 4)) - return -EINVAL; - - hdr = pnp_hdr(skb); - pipe_handle = hdr->pipe_handle; - switch (hdr->state_after_connect) { - case PN_PIPE_DISABLE: - enabled = 0; - break; - case PN_PIPE_ENABLE: - enabled = 1; - break; - default: - pep_reject_conn(sk, skb, PN_PIPE_ERR_INVALID_PARAM); - return -EINVAL; - } - peer_type = hdr->other_pep_type << 8; + struct pep_sock *pn = pep_sk(sk); + struct pnpipehdr *hdr = pnp_hdr(skb); + int err = NET_RX_SUCCESS; - if (unlikely(sk->sk_state != TCP_LISTEN) || sk_acceptq_is_full(sk)) { - pep_reject_conn(sk, skb, PN_PIPE_ERR_PEP_IN_USE); - return -ENOBUFS; - } + switch (hdr->message_id) { + case PNS_PIPE_ALIGNED_DATA: + __skb_pull(skb, 1); + /* fall through */ + case PNS_PIPE_DATA: + __skb_pull(skb, 3); /* Pipe data header */ + if (!pn_flow_safe(pn->rx_fc)) { + err = sock_queue_rcv_skb(sk, skb); + if (!err) + return NET_RX_SUCCESS; + err = NET_RX_DROP; + break; + } - /* Parse sub-blocks (options) */ - n_sb = hdr->data[4]; - while (n_sb > 0) { - u8 type, buf[1], len = sizeof(buf); - const u8 *data = pep_get_sb(skb, &type, &len, buf); + if (pn->rx_credits == 0) { + atomic_inc(&sk->sk_drops); + err = NET_RX_DROP; + break; + } + pn->rx_credits--; + skb->dev = NULL; + skb_set_owner_r(skb, sk); + err = skb->len; + skb_queue_tail(&sk->sk_receive_queue, skb); + if (!sock_flag(sk, SOCK_DEAD)) + sk->sk_data_ready(sk, err); + return NET_RX_SUCCESS; - if (data == NULL) - return -EINVAL; - switch (type) { - case PN_PIPE_SB_CONNECT_REQ_PEP_SUB_TYPE: - if (len < 1) - return -EINVAL; - peer_type = (peer_type & 0xff00) | data[0]; + case PNS_PEP_CONNECT_RESP: + if (sk->sk_state != TCP_SYN_SENT) break; - case PN_PIPE_SB_ALIGNED_DATA: - aligned = data[0] != 0; + if (!sock_flag(sk, SOCK_DEAD)) + sk->sk_state_change(sk); + if (pep_connresp_rcv(sk, skb)) { + sk->sk_state = TCP_CLOSE_WAIT; break; } - n_sb--; - } - - skb = skb_clone(skb, GFP_ATOMIC); - if (!skb) - return -ENOMEM; - - /* Create a new to-be-accepted sock */ - newsk = sk_alloc(sock_net(sk), PF_PHONET, GFP_ATOMIC, sk->sk_prot); - if (!newsk) { - kfree_skb(skb); - return -ENOMEM; - } - sock_init_data(NULL, newsk); - newsk->sk_state = TCP_SYN_RECV; - newsk->sk_backlog_rcv = pipe_do_rcv; - newsk->sk_protocol = sk->sk_protocol; - newsk->sk_destruct = pipe_destruct; - newpn = pep_sk(newsk); - pn_skb_get_dst_sockaddr(skb, &dst); - pn_skb_get_src_sockaddr(skb, &src); - newpn->pn_sk.sobject = pn_sockaddr_get_object(&dst); - newpn->pn_sk.dobject = pn_sockaddr_get_object(&src); - newpn->pn_sk.resource = pn_sockaddr_get_resource(&dst); - skb_queue_head_init(&newpn->ctrlreq_queue); - newpn->pipe_handle = pipe_handle; - atomic_set(&newpn->tx_credits, 0); - newpn->peer_type = peer_type; - newpn->rx_credits = 0; - newpn->rx_fc = newpn->tx_fc = PN_LEGACY_FLOW_CONTROL; - newpn->init_enable = enabled; - newpn->aligned = aligned; + sk->sk_state = TCP_ESTABLISHED; + if (!pn_flow_safe(pn->tx_fc)) { + atomic_set(&pn->tx_credits, 1); + sk->sk_write_space(sk); + } + pipe_grant_credits(sk, GFP_ATOMIC); + break; - BUG_ON(!skb_queue_empty(&newsk->sk_receive_queue)); - skb_queue_head(&newsk->sk_receive_queue, skb); - if (!sock_flag(sk, SOCK_DEAD)) - sk->sk_data_ready(sk, 0); + case PNS_PEP_DISCONNECT_RESP: + /* sock should already be dead, nothing to do */ + break; - sk_acceptq_added(sk); - sk_add_node(newsk, &pn->ackq); - return 0; + case PNS_PEP_STATUS_IND: + pipe_rcv_status(sk, skb); + break; + } + kfree_skb(skb); + return err; } /* Listening sock must be locked */ @@ -790,7 +636,6 @@ static int pep_do_rcv(struct sock *sk, struct sk_buff *skb) struct sock *sknode; struct pnpipehdr *hdr; struct sockaddr_pn dst; - int err = NET_RX_SUCCESS; u8 pipe_handle; if (!pskb_may_pull(skb, sizeof(*hdr))) @@ -808,26 +653,18 @@ static int pep_do_rcv(struct sock *sk, struct sk_buff *skb) if (sknode) return sk_receive_skb(sknode, skb, 1); - /* Look for a pipe handle pending accept */ - sknode = pep_find_pipe(&pn->ackq, &dst, pipe_handle); - if (sknode) { - sock_put(sknode); - if (net_ratelimit()) - printk(KERN_WARNING"Phonet unconnected PEP ignored"); - err = NET_RX_DROP; - goto drop; - } - switch (hdr->message_id) { case PNS_PEP_CONNECT_REQ: - err = pep_connreq_rcv(sk, skb); - break; - -#ifdef CONFIG_PHONET_PIPECTRLR - case PNS_PEP_CONNECT_RESP: - err = pep_connresp_rcv(sk, skb); - break; -#endif + if (sk->sk_state != TCP_LISTEN || sk_acceptq_is_full(sk)) { + pep_reject_conn(sk, skb, PN_PIPE_ERR_PEP_IN_USE, + GFP_ATOMIC); + break; + } + skb_queue_head(&sk->sk_receive_queue, skb); + sk_acceptq_added(sk); + if (!sock_flag(sk, SOCK_DEAD)) + sk->sk_data_ready(sk, 0); + return NET_RX_SUCCESS; case PNS_PEP_DISCONNECT_REQ: pep_reply(sk, skb, PN_PIPE_NO_ERROR, NULL, 0, GFP_ATOMIC); @@ -841,37 +678,36 @@ static int pep_do_rcv(struct sock *sk, struct sk_buff *skb) case PNS_PEP_ENABLE_REQ: case PNS_PEP_DISABLE_REQ: /* invalid handle is not even allowed here! */ + break; + default: - err = NET_RX_DROP; + if ((1 << sk->sk_state) + & ~(TCPF_CLOSE|TCPF_LISTEN|TCPF_CLOSE_WAIT)) + /* actively connected socket */ + return pipe_handler_do_rcv(sk, skb); } drop: kfree_skb(skb); - return err; + return NET_RX_SUCCESS; } -#ifndef CONFIG_PHONET_PIPECTRLR static int pipe_do_remove(struct sock *sk) { struct pep_sock *pn = pep_sk(sk); struct pnpipehdr *ph; struct sk_buff *skb; - skb = alloc_skb(MAX_PNPIPE_HEADER, GFP_KERNEL); + skb = pep_alloc_skb(sk, NULL, 0, GFP_KERNEL); if (!skb) return -ENOMEM; - skb_reserve(skb, MAX_PNPIPE_HEADER); - __skb_push(skb, sizeof(*ph)); - skb_reset_transport_header(skb); ph = pnp_hdr(skb); ph->utid = 0; ph->message_id = PNS_PIPE_REMOVE_REQ; ph->pipe_handle = pn->pipe_handle; ph->data[0] = PAD; - return pn_skb_send(sk, skb, NULL); } -#endif /* associated socket ceases to exist */ static void pep_sock_close(struct sock *sk, long timeout) @@ -883,24 +719,15 @@ static void pep_sock_close(struct sock *sk, long timeout) sk_common_release(sk); lock_sock(sk); - if (sk->sk_state == TCP_LISTEN) { - /* Destroy the listen queue */ - struct sock *sknode; - struct hlist_node *p, *n; - - sk_for_each_safe(sknode, p, n, &pn->ackq) - sk_del_node_init(sknode); - sk->sk_state = TCP_CLOSE; - } else if ((1 << sk->sk_state) & (TCPF_SYN_RECV|TCPF_ESTABLISHED)) { -#ifndef CONFIG_PHONET_PIPECTRLR - /* Forcefully remove dangling Phonet pipe */ - pipe_do_remove(sk); -#else - /* send pep disconnect request */ - pipe_handler_send_req(sk, PNS_PEP_DISCONNECT_REQ, GFP_KERNEL); - sk->sk_state = TCP_CLOSE; -#endif + if ((1 << sk->sk_state) & (TCPF_SYN_RECV|TCPF_ESTABLISHED)) { + if (sk->sk_backlog_rcv == pipe_do_rcv) + /* Forcefully remove dangling Phonet pipe */ + pipe_do_remove(sk); + else + pipe_handler_request(sk, PNS_PEP_DISCONNECT_REQ, PAD, + NULL, 0); } + sk->sk_state = TCP_CLOSE; ifindex = pn->ifindex; pn->ifindex = 0; @@ -911,84 +738,141 @@ static void pep_sock_close(struct sock *sk, long timeout) sock_put(sk); } -static int pep_wait_connreq(struct sock *sk, int noblock) +static struct sock *pep_sock_accept(struct sock *sk, int flags, int *errp) { - struct task_struct *tsk = current; - struct pep_sock *pn = pep_sk(sk); - long timeo = sock_rcvtimeo(sk, noblock); - - for (;;) { - DEFINE_WAIT(wait); + struct pep_sock *pn = pep_sk(sk), *newpn; + struct sock *newsk = NULL; + struct sk_buff *skb; + struct pnpipehdr *hdr; + struct sockaddr_pn dst, src; + int err; + u16 peer_type; + u8 pipe_handle, enabled, n_sb; + u8 aligned = 0; - if (sk->sk_state != TCP_LISTEN) - return -EINVAL; - if (!hlist_empty(&pn->ackq)) - break; - if (!timeo) - return -EWOULDBLOCK; - if (signal_pending(tsk)) - return sock_intr_errno(timeo); + skb = skb_recv_datagram(sk, 0, flags & O_NONBLOCK, errp); + if (!skb) + return NULL; - prepare_to_wait_exclusive(sk_sleep(sk), &wait, - TASK_INTERRUPTIBLE); - release_sock(sk); - timeo = schedule_timeout(timeo); - lock_sock(sk); - finish_wait(sk_sleep(sk), &wait); + lock_sock(sk); + if (sk->sk_state != TCP_LISTEN) { + err = -EINVAL; + goto drop; } + sk_acceptq_removed(sk); - return 0; -} + err = -EPROTO; + if (!pskb_may_pull(skb, sizeof(*hdr) + 4)) + goto drop; -static struct sock *pep_sock_accept(struct sock *sk, int flags, int *errp) -{ - struct pep_sock *pn = pep_sk(sk); - struct sock *newsk = NULL; - struct sk_buff *oskb; - int err; + hdr = pnp_hdr(skb); + pipe_handle = hdr->pipe_handle; + switch (hdr->state_after_connect) { + case PN_PIPE_DISABLE: + enabled = 0; + break; + case PN_PIPE_ENABLE: + enabled = 1; + break; + default: + pep_reject_conn(sk, skb, PN_PIPE_ERR_INVALID_PARAM, + GFP_KERNEL); + goto drop; + } + peer_type = hdr->other_pep_type << 8; - lock_sock(sk); - err = pep_wait_connreq(sk, flags & O_NONBLOCK); - if (err) - goto out; + /* Parse sub-blocks (options) */ + n_sb = hdr->data[4]; + while (n_sb > 0) { + u8 type, buf[1], len = sizeof(buf); + const u8 *data = pep_get_sb(skb, &type, &len, buf); - newsk = __sk_head(&pn->ackq); + if (data == NULL) + goto drop; + switch (type) { + case PN_PIPE_SB_CONNECT_REQ_PEP_SUB_TYPE: + if (len < 1) + goto drop; + peer_type = (peer_type & 0xff00) | data[0]; + break; + case PN_PIPE_SB_ALIGNED_DATA: + aligned = data[0] != 0; + break; + } + n_sb--; + } - oskb = skb_dequeue(&newsk->sk_receive_queue); - err = pep_accept_conn(newsk, oskb); - if (err) { - skb_queue_head(&newsk->sk_receive_queue, oskb); + /* Check for duplicate pipe handle */ + newsk = pep_find_pipe(&pn->hlist, &dst, pipe_handle); + if (unlikely(newsk)) { + __sock_put(newsk); newsk = NULL; - goto out; + pep_reject_conn(sk, skb, PN_PIPE_ERR_PEP_IN_USE, GFP_KERNEL); + goto drop; } - kfree_skb(oskb); + /* Create a new to-be-accepted sock */ + newsk = sk_alloc(sock_net(sk), PF_PHONET, GFP_KERNEL, sk->sk_prot); + if (!newsk) { + pep_reject_conn(sk, skb, PN_PIPE_ERR_OVERLOAD, GFP_KERNEL); + err = -ENOBUFS; + goto drop; + } + + sock_init_data(NULL, newsk); + newsk->sk_state = TCP_SYN_RECV; + newsk->sk_backlog_rcv = pipe_do_rcv; + newsk->sk_protocol = sk->sk_protocol; + newsk->sk_destruct = pipe_destruct; + + newpn = pep_sk(newsk); + pn_skb_get_dst_sockaddr(skb, &dst); + pn_skb_get_src_sockaddr(skb, &src); + newpn->pn_sk.sobject = pn_sockaddr_get_object(&dst); + newpn->pn_sk.dobject = pn_sockaddr_get_object(&src); + newpn->pn_sk.resource = pn_sockaddr_get_resource(&dst); sock_hold(sk); - pep_sk(newsk)->listener = sk; + newpn->listener = sk; + skb_queue_head_init(&newpn->ctrlreq_queue); + newpn->pipe_handle = pipe_handle; + atomic_set(&newpn->tx_credits, 0); + newpn->ifindex = 0; + newpn->peer_type = peer_type; + newpn->rx_credits = 0; + newpn->rx_fc = newpn->tx_fc = PN_LEGACY_FLOW_CONTROL; + newpn->init_enable = enabled; + newpn->aligned = aligned; - sock_hold(newsk); - sk_del_node_init(newsk); - sk_acceptq_removed(sk); + err = pep_accept_conn(newsk, skb); + if (err) { + sock_put(newsk); + newsk = NULL; + goto drop; + } sk_add_node(newsk, &pn->hlist); - __sock_put(newsk); - -out: +drop: release_sock(sk); + kfree_skb(skb); *errp = err; return newsk; } -#ifdef CONFIG_PHONET_PIPECTRLR static int pep_sock_connect(struct sock *sk, struct sockaddr *addr, int len) { struct pep_sock *pn = pep_sk(sk); - const struct sockaddr_pn *spn = (struct sockaddr_pn *)addr; + int err; + u8 data[4] = { 0 /* sub-blocks */, PAD, PAD, PAD }; - pn->pn_sk.dobject = pn_sockaddr_get_object(spn); - pn->pn_sk.resource = pn_sockaddr_get_resource(spn); - return pipe_handler_send_req(sk, PNS_PEP_CONNECT_REQ, GFP_KERNEL); + pn->pipe_handle = 1; /* anything but INVALID_HANDLE */ + err = pipe_handler_request(sk, PNS_PEP_CONNECT_REQ, + PN_PIPE_ENABLE, data, 4); + if (err) { + pn->pipe_handle = PN_PIPE_INVALID_HANDLE; + return err; + } + sk->sk_state = TCP_SYN_SENT; + return 0; } -#endif static int pep_ioctl(struct sock *sk, int cmd, unsigned long arg) { @@ -1019,10 +903,18 @@ static int pep_init(struct sock *sk) { struct pep_sock *pn = pep_sk(sk); - INIT_HLIST_HEAD(&pn->ackq); + sk->sk_destruct = pipe_destruct; INIT_HLIST_HEAD(&pn->hlist); + pn->listener = NULL; skb_queue_head_init(&pn->ctrlreq_queue); + atomic_set(&pn->tx_credits, 0); + pn->ifindex = 0; + pn->peer_type = 0; pn->pipe_handle = PN_PIPE_INVALID_HANDLE; + pn->rx_credits = 0; + pn->rx_fc = pn->tx_fc = PN_LEGACY_FLOW_CONTROL; + pn->init_enable = 1; + pn->aligned = 0; return 0; } @@ -1041,14 +933,6 @@ static int pep_setsockopt(struct sock *sk, int level, int optname, lock_sock(sk); switch (optname) { -#ifdef CONFIG_PHONET_PIPECTRLR - case PNPIPE_PIPE_HANDLE: - if (val) { - pn->pipe_handle = val; - break; - } -#endif - case PNPIPE_ENCAP: if (val && val != PNPIPE_ENCAP_IP) { err = -EINVAL; @@ -1075,16 +959,6 @@ static int pep_setsockopt(struct sock *sk, int level, int optname, } goto out_norel; -#ifdef CONFIG_PHONET_PIPECTRLR - case PNPIPE_ENABLE: - if ((1 << sk->sk_state) & ~(TCPF_SYN_RECV|TCPF_ESTABLISHED)) { - err = -ENOTCONN; - break; - } - err = pipe_handler_enable_pipe(sk, val); - break; -#endif - default: err = -ENOPROTOOPT; } @@ -1114,11 +988,11 @@ static int pep_getsockopt(struct sock *sk, int level, int optname, val = pn->ifindex; break; -#ifdef CONFIG_PHONET_PIPECTRLR - case PNPIPE_ENABLE: - val = sk->sk_state == TCP_ESTABLISHED; + case PNPIPE_HANDLE: + val = pn->pipe_handle; + if (val == PN_PIPE_INVALID_HANDLE) + return -EINVAL; break; -#endif default: return -ENOPROTOOPT; @@ -1181,7 +1055,7 @@ static int pep_sendmsg(struct kiocb *iocb, struct sock *sk, if (!skb) return err; - skb_reserve(skb, MAX_PHONET_HEADER + 3); + skb_reserve(skb, MAX_PHONET_HEADER + 3 + pn->aligned); err = memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len); if (err < 0) goto outfree; @@ -1283,7 +1157,7 @@ struct sk_buff *pep_read(struct sock *sk) struct sk_buff *skb = skb_dequeue(&sk->sk_receive_queue); if (sk->sk_state == TCP_ESTABLISHED) - pipe_grant_credits(sk); + pipe_grant_credits(sk, GFP_ATOMIC); return skb; } @@ -1328,7 +1202,7 @@ static int pep_recvmsg(struct kiocb *iocb, struct sock *sk, } if (sk->sk_state == TCP_ESTABLISHED) - pipe_grant_credits(sk); + pipe_grant_credits(sk, GFP_KERNEL); release_sock(sk); copy: msg->msg_flags |= MSG_EOR; @@ -1352,9 +1226,9 @@ static void pep_sock_unhash(struct sock *sk) lock_sock(sk); -#ifndef CONFIG_PHONET_PIPECTRLR - if ((1 << sk->sk_state) & ~(TCPF_CLOSE|TCPF_LISTEN)) { + if (pn->listener != NULL) { skparent = pn->listener; + pn->listener = NULL; release_sock(sk); pn = pep_sk(skparent); @@ -1362,7 +1236,7 @@ static void pep_sock_unhash(struct sock *sk) sk_del_node_init(sk); sk = skparent; } -#endif + /* Unhash a listening sock only when it is closed * and all of its active connected pipes are closed. */ if (hlist_empty(&pn->hlist)) @@ -1376,9 +1250,7 @@ static void pep_sock_unhash(struct sock *sk) static struct proto pep_proto = { .close = pep_sock_close, .accept = pep_sock_accept, -#ifdef CONFIG_PHONET_PIPECTRLR .connect = pep_sock_connect, -#endif .ioctl = pep_ioctl, .init = pep_init, .setsockopt = pep_setsockopt, diff --git a/net/phonet/socket.c b/net/phonet/socket.c index 65a03338769e..b1adafab377c 100644 --- a/net/phonet/socket.c +++ b/net/phonet/socket.c @@ -225,15 +225,18 @@ static int pn_socket_autobind(struct socket *sock) return 0; /* socket was already bound */ } -#ifdef CONFIG_PHONET_PIPECTRLR static int pn_socket_connect(struct socket *sock, struct sockaddr *addr, int len, int flags) { struct sock *sk = sock->sk; + struct pn_sock *pn = pn_sk(sk); struct sockaddr_pn *spn = (struct sockaddr_pn *)addr; - long timeo; + struct task_struct *tsk = current; + long timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK); int err; + if (pn_socket_autobind(sock)) + return -ENOBUFS; if (len < sizeof(struct sockaddr_pn)) return -EINVAL; if (spn->spn_family != AF_PHONET) @@ -243,82 +246,61 @@ static int pn_socket_connect(struct socket *sock, struct sockaddr *addr, switch (sock->state) { case SS_UNCONNECTED: - sk->sk_state = TCP_CLOSE; - break; - case SS_CONNECTING: - switch (sk->sk_state) { - case TCP_SYN_RECV: - sock->state = SS_CONNECTED; + if (sk->sk_state != TCP_CLOSE) { err = -EISCONN; goto out; - case TCP_CLOSE: - err = -EALREADY; - if (flags & O_NONBLOCK) - goto out; - goto wait_connect; } break; - case SS_CONNECTED: - switch (sk->sk_state) { - case TCP_SYN_RECV: - err = -EISCONN; - goto out; - case TCP_CLOSE: - sock->state = SS_UNCONNECTED; - break; - } - break; - case SS_DISCONNECTING: - case SS_FREE: - break; + case SS_CONNECTING: + err = -EALREADY; + goto out; + default: + err = -EISCONN; + goto out; } - sk->sk_state = TCP_CLOSE; - sk_stream_kill_queues(sk); + pn->dobject = pn_sockaddr_get_object(spn); + pn->resource = pn_sockaddr_get_resource(spn); sock->state = SS_CONNECTING; + err = sk->sk_prot->connect(sk, addr, len); - if (err < 0) { + if (err) { sock->state = SS_UNCONNECTED; - sk->sk_state = TCP_CLOSE; + pn->dobject = 0; goto out; } - err = -EINPROGRESS; -wait_connect: - if (sk->sk_state != TCP_SYN_RECV && (flags & O_NONBLOCK)) - goto out; + while (sk->sk_state == TCP_SYN_SENT) { + DEFINE_WAIT(wait); - timeo = sock_sndtimeo(sk, flags & O_NONBLOCK); - release_sock(sk); - - err = -ERESTARTSYS; - timeo = wait_event_interruptible_timeout(*sk_sleep(sk), - sk->sk_state != TCP_CLOSE, - timeo); - - lock_sock(sk); - if (timeo < 0) - goto out; /* -ERESTARTSYS */ - - err = -ETIMEDOUT; - if (timeo == 0 && sk->sk_state != TCP_SYN_RECV) - goto out; + if (!timeo) { + err = -EINPROGRESS; + goto out; + } + if (signal_pending(tsk)) { + err = sock_intr_errno(timeo); + goto out; + } - if (sk->sk_state != TCP_SYN_RECV) { - sock->state = SS_UNCONNECTED; - err = sock_error(sk); - if (!err) - err = -ECONNREFUSED; - goto out; + prepare_to_wait_exclusive(sk_sleep(sk), &wait, + TASK_INTERRUPTIBLE); + release_sock(sk); + timeo = schedule_timeout(timeo); + lock_sock(sk); + finish_wait(sk_sleep(sk), &wait); } - sock->state = SS_CONNECTED; - err = 0; + if ((1 << sk->sk_state) & (TCPF_SYN_RECV|TCPF_ESTABLISHED)) + err = 0; + else if (sk->sk_state == TCP_CLOSE_WAIT) + err = -ECONNRESET; + else + err = -ECONNREFUSED; + sock->state = err ? SS_UNCONNECTED : SS_CONNECTED; out: release_sock(sk); return err; } -#endif static int pn_socket_accept(struct socket *sock, struct socket *newsock, int flags) @@ -327,6 +309,9 @@ static int pn_socket_accept(struct socket *sock, struct socket *newsock, struct sock *newsk; int err; + if (unlikely(sk->sk_state != TCP_LISTEN)) + return -EINVAL; + newsk = sk->sk_prot->accept(sk, flags, &err); if (!newsk) return err; @@ -363,13 +348,8 @@ static unsigned int pn_socket_poll(struct file *file, struct socket *sock, poll_wait(file, sk_sleep(sk), wait); - switch (sk->sk_state) { - case TCP_LISTEN: - return hlist_empty(&pn->ackq) ? 0 : POLLIN; - case TCP_CLOSE: + if (sk->sk_state == TCP_CLOSE) return POLLERR; - } - if (!skb_queue_empty(&sk->sk_receive_queue)) mask |= POLLIN | POLLRDNORM; if (!skb_queue_empty(&pn->ctrlreq_queue)) @@ -488,11 +468,7 @@ const struct proto_ops phonet_stream_ops = { .owner = THIS_MODULE, .release = pn_socket_release, .bind = pn_socket_bind, -#ifdef CONFIG_PHONET_PIPECTRLR .connect = pn_socket_connect, -#else - .connect = sock_no_connect, -#endif .socketpair = sock_no_socketpair, .accept = pn_socket_accept, .getname = pn_socket_getname, diff --git a/net/rds/ib.c b/net/rds/ib.c index 4123967d4d65..cce19f95c624 100644 --- a/net/rds/ib.c +++ b/net/rds/ib.c @@ -364,7 +364,6 @@ void rds_ib_exit(void) rds_ib_sysctl_exit(); rds_ib_recv_exit(); rds_trans_unregister(&rds_ib_transport); - rds_ib_fmr_exit(); } struct rds_transport rds_ib_transport = { @@ -400,13 +399,9 @@ int rds_ib_init(void) INIT_LIST_HEAD(&rds_ib_devices); - ret = rds_ib_fmr_init(); - if (ret) - goto out; - ret = ib_register_client(&rds_ib_client); if (ret) - goto out_fmr_exit; + goto out; ret = rds_ib_sysctl_init(); if (ret) @@ -430,8 +425,6 @@ out_sysctl: rds_ib_sysctl_exit(); out_ibreg: rds_ib_unregister_client(); -out_fmr_exit: - rds_ib_fmr_exit(); out: return ret; } diff --git a/net/rds/ib.h b/net/rds/ib.h index e34ad032b66d..4297d92788dc 100644 --- a/net/rds/ib.h +++ b/net/rds/ib.h @@ -307,8 +307,6 @@ void *rds_ib_get_mr(struct scatterlist *sg, unsigned long nents, void rds_ib_sync_mr(void *trans_private, int dir); void rds_ib_free_mr(void *trans_private, int invalidate); void rds_ib_flush_mrs(void); -int rds_ib_fmr_init(void); -void rds_ib_fmr_exit(void); /* ib_recv.c */ int rds_ib_recv_init(void); diff --git a/net/rds/ib_rdma.c b/net/rds/ib_rdma.c index 18a833c450c8..819c35a0d9cb 100644 --- a/net/rds/ib_rdma.c +++ b/net/rds/ib_rdma.c @@ -38,8 +38,6 @@ #include "ib.h" #include "xlist.h" -static struct workqueue_struct *rds_ib_fmr_wq; - static DEFINE_PER_CPU(unsigned long, clean_list_grace); #define CLEAN_LIST_BUSY_BIT 0 @@ -307,7 +305,7 @@ static struct rds_ib_mr *rds_ib_alloc_fmr(struct rds_ib_device *rds_ibdev) int err = 0, iter = 0; if (atomic_read(&pool->dirty_count) >= pool->max_items / 10) - queue_delayed_work(rds_ib_fmr_wq, &pool->flush_worker, 10); + schedule_delayed_work(&pool->flush_worker, 10); while (1) { ibmr = rds_ib_reuse_fmr(pool); @@ -696,24 +694,6 @@ out_nolock: return ret; } -int rds_ib_fmr_init(void) -{ - rds_ib_fmr_wq = create_workqueue("rds_fmr_flushd"); - if (!rds_ib_fmr_wq) - return -ENOMEM; - return 0; -} - -/* - * By the time this is called all the IB devices should have been torn down and - * had their pools freed. As each pool is freed its work struct is waited on, - * so the pool flushing work queue should be idle by the time we get here. - */ -void rds_ib_fmr_exit(void) -{ - destroy_workqueue(rds_ib_fmr_wq); -} - static void rds_ib_mr_pool_flush_worker(struct work_struct *work) { struct rds_ib_mr_pool *pool = container_of(work, struct rds_ib_mr_pool, flush_worker.work); @@ -741,7 +721,7 @@ void rds_ib_free_mr(void *trans_private, int invalidate) /* If we've pinned too many pages, request a flush */ if (atomic_read(&pool->free_pinned) >= pool->max_free_pinned || atomic_read(&pool->dirty_count) >= pool->max_items / 10) - queue_delayed_work(rds_ib_fmr_wq, &pool->flush_worker, 10); + schedule_delayed_work(&pool->flush_worker, 10); if (invalidate) { if (likely(!in_interrupt())) { @@ -749,8 +729,7 @@ void rds_ib_free_mr(void *trans_private, int invalidate) } else { /* We get here if the user created a MR marked * as use_once and invalidate at the same time. */ - queue_delayed_work(rds_ib_fmr_wq, - &pool->flush_worker, 10); + schedule_delayed_work(&pool->flush_worker, 10); } } diff --git a/net/rds/ib_send.c b/net/rds/ib_send.c index 71f373c421bc..c47a511f203d 100644 --- a/net/rds/ib_send.c +++ b/net/rds/ib_send.c @@ -551,7 +551,10 @@ int rds_ib_xmit(struct rds_connection *conn, struct rds_message *rm, if (conn->c_loopback && rm->m_inc.i_hdr.h_flags & RDS_FLAG_CONG_BITMAP) { rds_cong_map_updated(conn->c_fcong, ~(u64) 0); - return sizeof(struct rds_header) + RDS_CONG_MAP_BYTES; + scat = &rm->data.op_sg[sg]; + ret = sizeof(struct rds_header) + RDS_CONG_MAP_BYTES; + ret = min_t(int, ret, scat->length - conn->c_xmit_data_off); + return ret; } /* FIXME we may overallocate here */ diff --git a/net/rds/loop.c b/net/rds/loop.c index aeec1d483b17..bca6761a3ca2 100644 --- a/net/rds/loop.c +++ b/net/rds/loop.c @@ -61,10 +61,15 @@ static int rds_loop_xmit(struct rds_connection *conn, struct rds_message *rm, unsigned int hdr_off, unsigned int sg, unsigned int off) { + struct scatterlist *sgp = &rm->data.op_sg[sg]; + int ret = sizeof(struct rds_header) + + be32_to_cpu(rm->m_inc.i_hdr.h_len); + /* Do not send cong updates to loopback */ if (rm->m_inc.i_hdr.h_flags & RDS_FLAG_CONG_BITMAP) { rds_cong_map_updated(conn->c_fcong, ~(u64) 0); - return sizeof(struct rds_header) + RDS_CONG_MAP_BYTES; + ret = min_t(int, ret, sgp->length - conn->c_xmit_data_off); + goto out; } BUG_ON(hdr_off || sg || off); @@ -80,8 +85,8 @@ static int rds_loop_xmit(struct rds_connection *conn, struct rds_message *rm, NULL); rds_inc_put(&rm->m_inc); - - return sizeof(struct rds_header) + be32_to_cpu(rm->m_inc.i_hdr.h_len); +out: + return ret; } /* diff --git a/net/rxrpc/ar-key.c b/net/rxrpc/ar-key.c index d763793d39de..43ea7de2fc8e 100644 --- a/net/rxrpc/ar-key.c +++ b/net/rxrpc/ar-key.c @@ -25,6 +25,7 @@ #include <keys/user-type.h> #include "ar-internal.h" +static int rxrpc_vet_description_s(const char *); static int rxrpc_instantiate(struct key *, const void *, size_t); static int rxrpc_instantiate_s(struct key *, const void *, size_t); static void rxrpc_destroy(struct key *); @@ -52,6 +53,7 @@ EXPORT_SYMBOL(key_type_rxrpc); */ struct key_type key_type_rxrpc_s = { .name = "rxrpc_s", + .vet_description = rxrpc_vet_description_s, .instantiate = rxrpc_instantiate_s, .match = user_match, .destroy = rxrpc_destroy_s, @@ -59,6 +61,23 @@ struct key_type key_type_rxrpc_s = { }; /* + * Vet the description for an RxRPC server key + */ +static int rxrpc_vet_description_s(const char *desc) +{ + unsigned long num; + char *p; + + num = simple_strtoul(desc, &p, 10); + if (*p != ':' || num > 65535) + return -EINVAL; + num = simple_strtoul(p + 1, &p, 10); + if (*p || num < 1 || num > 255) + return -EINVAL; + return 0; +} + +/* * parse an RxKAD type XDR format token * - the caller guarantees we have at least 4 words */ diff --git a/net/rxrpc/ar-peer.c b/net/rxrpc/ar-peer.c index 3620c569275f..55b93dc60d0c 100644 --- a/net/rxrpc/ar-peer.c +++ b/net/rxrpc/ar-peer.c @@ -36,28 +36,13 @@ static void rxrpc_destroy_peer(struct work_struct *work); static void rxrpc_assess_MTU_size(struct rxrpc_peer *peer) { struct rtable *rt; - struct flowi fl; peer->if_mtu = 1500; - memset(&fl, 0, sizeof(fl)); - - switch (peer->srx.transport.family) { - case AF_INET: - fl.oif = 0; - fl.proto = IPPROTO_UDP, - fl.fl4_dst = peer->srx.transport.sin.sin_addr.s_addr; - fl.fl4_src = 0; - fl.fl4_tos = 0; - /* assume AFS.CM talking to AFS.FS */ - fl.fl_ip_sport = htons(7001); - fl.fl_ip_dport = htons(7000); - break; - default: - BUG(); - } - - rt = ip_route_output_key(&init_net, &fl); + rt = ip_route_output_ports(&init_net, NULL, + peer->srx.transport.sin.sin_addr.s_addr, 0, + htons(7000), htons(7001), + IPPROTO_UDP, 0, 0); if (IS_ERR(rt)) { _leave(" [route err %ld]", PTR_ERR(rt)); return; diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c index 95e0c8eda1a0..865ce7ba4e14 100644 --- a/net/sctp/ipv6.c +++ b/net/sctp/ipv6.c @@ -201,40 +201,40 @@ static int sctp_v6_xmit(struct sk_buff *skb, struct sctp_transport *transport) { struct sock *sk = skb->sk; struct ipv6_pinfo *np = inet6_sk(sk); - struct flowi fl; + struct flowi6 fl6; - memset(&fl, 0, sizeof(fl)); + memset(&fl6, 0, sizeof(fl6)); - fl.proto = sk->sk_protocol; + fl6.flowi6_proto = sk->sk_protocol; /* Fill in the dest address from the route entry passed with the skb * and the source address from the transport. */ - ipv6_addr_copy(&fl.fl6_dst, &transport->ipaddr.v6.sin6_addr); - ipv6_addr_copy(&fl.fl6_src, &transport->saddr.v6.sin6_addr); + ipv6_addr_copy(&fl6.daddr, &transport->ipaddr.v6.sin6_addr); + ipv6_addr_copy(&fl6.saddr, &transport->saddr.v6.sin6_addr); - fl.fl6_flowlabel = np->flow_label; - IP6_ECN_flow_xmit(sk, fl.fl6_flowlabel); - if (ipv6_addr_type(&fl.fl6_src) & IPV6_ADDR_LINKLOCAL) - fl.oif = transport->saddr.v6.sin6_scope_id; + fl6.flowlabel = np->flow_label; + IP6_ECN_flow_xmit(sk, fl6.flowlabel); + if (ipv6_addr_type(&fl6.saddr) & IPV6_ADDR_LINKLOCAL) + fl6.flowi6_oif = transport->saddr.v6.sin6_scope_id; else - fl.oif = sk->sk_bound_dev_if; + fl6.flowi6_oif = sk->sk_bound_dev_if; if (np->opt && np->opt->srcrt) { struct rt0_hdr *rt0 = (struct rt0_hdr *) np->opt->srcrt; - ipv6_addr_copy(&fl.fl6_dst, rt0->addr); + ipv6_addr_copy(&fl6.daddr, rt0->addr); } SCTP_DEBUG_PRINTK("%s: skb:%p, len:%d, src:%pI6 dst:%pI6\n", __func__, skb, skb->len, - &fl.fl6_src, &fl.fl6_dst); + &fl6.saddr, &fl6.daddr); SCTP_INC_STATS(SCTP_MIB_OUTSCTPPACKS); if (!(transport->param_flags & SPP_PMTUD_ENABLE)) skb->local_df = 1; - return ip6_xmit(sk, skb, &fl, np->opt); + return ip6_xmit(sk, skb, &fl6, np->opt); } /* Returns the dst cache entry for the given source and destination ip @@ -245,22 +245,22 @@ static struct dst_entry *sctp_v6_get_dst(struct sctp_association *asoc, union sctp_addr *saddr) { struct dst_entry *dst; - struct flowi fl; + struct flowi6 fl6; - memset(&fl, 0, sizeof(fl)); - ipv6_addr_copy(&fl.fl6_dst, &daddr->v6.sin6_addr); + memset(&fl6, 0, sizeof(fl6)); + ipv6_addr_copy(&fl6.daddr, &daddr->v6.sin6_addr); if (ipv6_addr_type(&daddr->v6.sin6_addr) & IPV6_ADDR_LINKLOCAL) - fl.oif = daddr->v6.sin6_scope_id; + fl6.flowi6_oif = daddr->v6.sin6_scope_id; - SCTP_DEBUG_PRINTK("%s: DST=%pI6 ", __func__, &fl.fl6_dst); + SCTP_DEBUG_PRINTK("%s: DST=%pI6 ", __func__, &fl6.daddr); if (saddr) { - ipv6_addr_copy(&fl.fl6_src, &saddr->v6.sin6_addr); - SCTP_DEBUG_PRINTK("SRC=%pI6 - ", &fl.fl6_src); + ipv6_addr_copy(&fl6.saddr, &saddr->v6.sin6_addr); + SCTP_DEBUG_PRINTK("SRC=%pI6 - ", &fl6.saddr); } - dst = ip6_route_output(&init_net, NULL, &fl); + dst = ip6_route_output(&init_net, NULL, &fl6); if (!dst->error) { struct rt6_info *rt; rt = (struct rt6_info *)dst; diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index 4e55e6c49ec9..152976ec0b74 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -468,30 +468,30 @@ static struct dst_entry *sctp_v4_get_dst(struct sctp_association *asoc, union sctp_addr *saddr) { struct rtable *rt; - struct flowi fl; + struct flowi4 fl4; struct sctp_bind_addr *bp; struct sctp_sockaddr_entry *laddr; struct dst_entry *dst = NULL; union sctp_addr dst_saddr; - memset(&fl, 0x0, sizeof(struct flowi)); - fl.fl4_dst = daddr->v4.sin_addr.s_addr; - fl.fl_ip_dport = daddr->v4.sin_port; - fl.proto = IPPROTO_SCTP; + memset(&fl4, 0x0, sizeof(struct flowi4)); + fl4.daddr = daddr->v4.sin_addr.s_addr; + fl4.fl4_dport = daddr->v4.sin_port; + fl4.flowi4_proto = IPPROTO_SCTP; if (asoc) { - fl.fl4_tos = RT_CONN_FLAGS(asoc->base.sk); - fl.oif = asoc->base.sk->sk_bound_dev_if; - fl.fl_ip_sport = htons(asoc->base.bind_addr.port); + fl4.flowi4_tos = RT_CONN_FLAGS(asoc->base.sk); + fl4.flowi4_oif = asoc->base.sk->sk_bound_dev_if; + fl4.fl4_sport = htons(asoc->base.bind_addr.port); } if (saddr) { - fl.fl4_src = saddr->v4.sin_addr.s_addr; - fl.fl_ip_sport = saddr->v4.sin_port; + fl4.saddr = saddr->v4.sin_addr.s_addr; + fl4.fl4_sport = saddr->v4.sin_port; } SCTP_DEBUG_PRINTK("%s: DST:%pI4, SRC:%pI4 - ", - __func__, &fl.fl4_dst, &fl.fl4_src); + __func__, &fl4.daddr, &fl4.saddr); - rt = ip_route_output_key(&init_net, &fl); + rt = ip_route_output_key(&init_net, &fl4); if (!IS_ERR(rt)) dst = &rt->dst; @@ -533,9 +533,9 @@ static struct dst_entry *sctp_v4_get_dst(struct sctp_association *asoc, continue; if ((laddr->state == SCTP_ADDR_SRC) && (AF_INET == laddr->a.sa.sa_family)) { - fl.fl4_src = laddr->a.v4.sin_addr.s_addr; - fl.fl_ip_sport = laddr->a.v4.sin_port; - rt = ip_route_output_key(&init_net, &fl); + fl4.saddr = laddr->a.v4.sin_addr.s_addr; + fl4.fl4_sport = laddr->a.v4.sin_port; + rt = ip_route_output_key(&init_net, &fl4); if (!IS_ERR(rt)) { dst = &rt->dst; goto out_unlock; diff --git a/net/socket.c b/net/socket.c index 937d0fcf74bc..5212447c86e7 100644 --- a/net/socket.c +++ b/net/socket.c @@ -2588,23 +2588,123 @@ static int dev_ifconf(struct net *net, struct compat_ifconf __user *uifc32) static int ethtool_ioctl(struct net *net, struct compat_ifreq __user *ifr32) { + struct compat_ethtool_rxnfc __user *compat_rxnfc; + bool convert_in = false, convert_out = false; + size_t buf_size = ALIGN(sizeof(struct ifreq), 8); + struct ethtool_rxnfc __user *rxnfc; struct ifreq __user *ifr; + u32 rule_cnt = 0, actual_rule_cnt; + u32 ethcmd; u32 data; - void __user *datap; + int ret; + + if (get_user(data, &ifr32->ifr_ifru.ifru_data)) + return -EFAULT; - ifr = compat_alloc_user_space(sizeof(*ifr)); + compat_rxnfc = compat_ptr(data); - if (copy_in_user(&ifr->ifr_name, &ifr32->ifr_name, IFNAMSIZ)) + if (get_user(ethcmd, &compat_rxnfc->cmd)) return -EFAULT; - if (get_user(data, &ifr32->ifr_ifru.ifru_data)) + /* Most ethtool structures are defined without padding. + * Unfortunately struct ethtool_rxnfc is an exception. + */ + switch (ethcmd) { + default: + break; + case ETHTOOL_GRXCLSRLALL: + /* Buffer size is variable */ + if (get_user(rule_cnt, &compat_rxnfc->rule_cnt)) + return -EFAULT; + if (rule_cnt > KMALLOC_MAX_SIZE / sizeof(u32)) + return -ENOMEM; + buf_size += rule_cnt * sizeof(u32); + /* fall through */ + case ETHTOOL_GRXRINGS: + case ETHTOOL_GRXCLSRLCNT: + case ETHTOOL_GRXCLSRULE: + convert_out = true; + /* fall through */ + case ETHTOOL_SRXCLSRLDEL: + case ETHTOOL_SRXCLSRLINS: + buf_size += sizeof(struct ethtool_rxnfc); + convert_in = true; + break; + } + + ifr = compat_alloc_user_space(buf_size); + rxnfc = (void *)ifr + ALIGN(sizeof(struct ifreq), 8); + + if (copy_in_user(&ifr->ifr_name, &ifr32->ifr_name, IFNAMSIZ)) return -EFAULT; - datap = compat_ptr(data); - if (put_user(datap, &ifr->ifr_ifru.ifru_data)) + if (put_user(convert_in ? rxnfc : compat_ptr(data), + &ifr->ifr_ifru.ifru_data)) return -EFAULT; - return dev_ioctl(net, SIOCETHTOOL, ifr); + if (convert_in) { + /* We expect there to be holes between fs.m_u and + * fs.ring_cookie and at the end of fs, but nowhere else. + */ + BUILD_BUG_ON(offsetof(struct compat_ethtool_rxnfc, fs.m_u) + + sizeof(compat_rxnfc->fs.m_u) != + offsetof(struct ethtool_rxnfc, fs.m_u) + + sizeof(rxnfc->fs.m_u)); + BUILD_BUG_ON( + offsetof(struct compat_ethtool_rxnfc, fs.location) - + offsetof(struct compat_ethtool_rxnfc, fs.ring_cookie) != + offsetof(struct ethtool_rxnfc, fs.location) - + offsetof(struct ethtool_rxnfc, fs.ring_cookie)); + + if (copy_in_user(rxnfc, compat_rxnfc, + (void *)(&rxnfc->fs.m_u + 1) - + (void *)rxnfc) || + copy_in_user(&rxnfc->fs.ring_cookie, + &compat_rxnfc->fs.ring_cookie, + (void *)(&rxnfc->fs.location + 1) - + (void *)&rxnfc->fs.ring_cookie) || + copy_in_user(&rxnfc->rule_cnt, &compat_rxnfc->rule_cnt, + sizeof(rxnfc->rule_cnt))) + return -EFAULT; + } + + ret = dev_ioctl(net, SIOCETHTOOL, ifr); + if (ret) + return ret; + + if (convert_out) { + if (copy_in_user(compat_rxnfc, rxnfc, + (const void *)(&rxnfc->fs.m_u + 1) - + (const void *)rxnfc) || + copy_in_user(&compat_rxnfc->fs.ring_cookie, + &rxnfc->fs.ring_cookie, + (const void *)(&rxnfc->fs.location + 1) - + (const void *)&rxnfc->fs.ring_cookie) || + copy_in_user(&compat_rxnfc->rule_cnt, &rxnfc->rule_cnt, + sizeof(rxnfc->rule_cnt))) + return -EFAULT; + + if (ethcmd == ETHTOOL_GRXCLSRLALL) { + /* As an optimisation, we only copy the actual + * number of rules that the underlying + * function returned. Since Mallory might + * change the rule count in user memory, we + * check that it is less than the rule count + * originally given (as the user buffer size), + * which has been range-checked. + */ + if (get_user(actual_rule_cnt, &rxnfc->rule_cnt)) + return -EFAULT; + if (actual_rule_cnt < rule_cnt) + rule_cnt = actual_rule_cnt; + if (copy_in_user(&compat_rxnfc->rule_locs[0], + &rxnfc->rule_locs[0], + rule_cnt * sizeof(u32))) + return -EFAULT; + } + } + + return 0; } static int compat_siocwandev(struct net *net, struct compat_ifreq __user *uifr32) diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c index 45dbf1521b9a..f3914d0c5079 100644 --- a/net/sunrpc/auth_gss/auth_gss.c +++ b/net/sunrpc/auth_gss/auth_gss.c @@ -417,7 +417,7 @@ static void gss_encode_v1_msg(struct gss_upcall_msg *gss_msg, gss_msg->msg.len += len; } if (mech->gm_upcall_enctypes) { - len = sprintf(p, mech->gm_upcall_enctypes); + len = sprintf(p, "enctypes=%s ", mech->gm_upcall_enctypes); p += len; gss_msg->msg.len += len; } diff --git a/net/sunrpc/auth_gss/gss_krb5_mech.c b/net/sunrpc/auth_gss/gss_krb5_mech.c index f375decc024b..9022f0a6503e 100644 --- a/net/sunrpc/auth_gss/gss_krb5_mech.c +++ b/net/sunrpc/auth_gss/gss_krb5_mech.c @@ -750,7 +750,7 @@ static struct gss_api_mech gss_kerberos_mech = { .gm_ops = &gss_kerberos_ops, .gm_pf_num = ARRAY_SIZE(gss_kerberos_pfs), .gm_pfs = gss_kerberos_pfs, - .gm_upcall_enctypes = "enctypes=18,17,16,23,3,1,2 ", + .gm_upcall_enctypes = "18,17,16,23,3,1,2", }; static int __init init_kerberos_module(void) diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 57d344cf2256..e7a96e478f63 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -436,7 +436,9 @@ void rpc_killall_tasks(struct rpc_clnt *clnt) if (!(rovr->tk_flags & RPC_TASK_KILLED)) { rovr->tk_flags |= RPC_TASK_KILLED; rpc_exit(rovr, -EIO); - rpc_wake_up_queued_task(rovr->tk_waitqueue, rovr); + if (RPC_IS_QUEUED(rovr)) + rpc_wake_up_queued_task(rovr->tk_waitqueue, + rovr); } } spin_unlock(&clnt->cl_lock); @@ -597,6 +599,14 @@ void rpc_task_set_client(struct rpc_task *task, struct rpc_clnt *clnt) } } +void rpc_task_reset_client(struct rpc_task *task, struct rpc_clnt *clnt) +{ + rpc_task_release_client(task); + rpc_task_set_client(task, clnt); +} +EXPORT_SYMBOL_GPL(rpc_task_reset_client); + + static void rpc_task_set_rpc_message(struct rpc_task *task, const struct rpc_message *msg) { @@ -636,12 +646,6 @@ struct rpc_task *rpc_run_task(const struct rpc_task_setup *task_setup_data) rpc_task_set_client(task, task_setup_data->rpc_client); rpc_task_set_rpc_message(task, task_setup_data->rpc_message); - if (task->tk_status != 0) { - int ret = task->tk_status; - rpc_put_task(task); - return ERR_PTR(ret); - } - if (task->tk_action == NULL) rpc_call_start(task); diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c index 243fc09b164e..ffb687671da0 100644 --- a/net/sunrpc/sched.c +++ b/net/sunrpc/sched.c @@ -252,23 +252,37 @@ static void rpc_set_active(struct rpc_task *task) /* * Mark an RPC call as having completed by clearing the 'active' bit + * and then waking up all tasks that were sleeping. */ -static void rpc_mark_complete_task(struct rpc_task *task) +static int rpc_complete_task(struct rpc_task *task) { - smp_mb__before_clear_bit(); + void *m = &task->tk_runstate; + wait_queue_head_t *wq = bit_waitqueue(m, RPC_TASK_ACTIVE); + struct wait_bit_key k = __WAIT_BIT_KEY_INITIALIZER(m, RPC_TASK_ACTIVE); + unsigned long flags; + int ret; + + spin_lock_irqsave(&wq->lock, flags); clear_bit(RPC_TASK_ACTIVE, &task->tk_runstate); - smp_mb__after_clear_bit(); - wake_up_bit(&task->tk_runstate, RPC_TASK_ACTIVE); + ret = atomic_dec_and_test(&task->tk_count); + if (waitqueue_active(wq)) + __wake_up_locked_key(wq, TASK_NORMAL, &k); + spin_unlock_irqrestore(&wq->lock, flags); + return ret; } /* * Allow callers to wait for completion of an RPC call + * + * Note the use of out_of_line_wait_on_bit() rather than wait_on_bit() + * to enforce taking of the wq->lock and hence avoid races with + * rpc_complete_task(). */ int __rpc_wait_for_completion_task(struct rpc_task *task, int (*action)(void *)) { if (action == NULL) action = rpc_wait_bit_killable; - return wait_on_bit(&task->tk_runstate, RPC_TASK_ACTIVE, + return out_of_line_wait_on_bit(&task->tk_runstate, RPC_TASK_ACTIVE, action, TASK_KILLABLE); } EXPORT_SYMBOL_GPL(__rpc_wait_for_completion_task); @@ -285,15 +299,8 @@ static void rpc_make_runnable(struct rpc_task *task) if (rpc_test_and_set_running(task)) return; if (RPC_IS_ASYNC(task)) { - int status; - INIT_WORK(&task->u.tk_work, rpc_async_schedule); - status = queue_work(rpciod_workqueue, &task->u.tk_work); - if (status < 0) { - printk(KERN_WARNING "RPC: failed to add task to queue: error: %d!\n", status); - task->tk_status = status; - return; - } + queue_work(rpciod_workqueue, &task->u.tk_work); } else wake_up_bit(&task->tk_runstate, RPC_TASK_QUEUED); } @@ -623,14 +630,12 @@ static void __rpc_execute(struct rpc_task *task) save_callback = task->tk_callback; task->tk_callback = NULL; save_callback(task); - } - - /* - * Perform the next FSM step. - * tk_action may be NULL when the task has been killed - * by someone else. - */ - if (!RPC_IS_QUEUED(task)) { + } else { + /* + * Perform the next FSM step. + * tk_action may be NULL when the task has been killed + * by someone else. + */ if (task->tk_action == NULL) break; task->tk_action(task); @@ -829,12 +834,6 @@ struct rpc_task *rpc_new_task(const struct rpc_task_setup *setup_data) } rpc_init_task(task, setup_data); - if (task->tk_status < 0) { - int err = task->tk_status; - rpc_put_task(task); - return ERR_PTR(err); - } - task->tk_flags |= flags; dprintk("RPC: allocated task %p\n", task); return task; @@ -857,34 +856,67 @@ static void rpc_async_release(struct work_struct *work) rpc_free_task(container_of(work, struct rpc_task, u.tk_work)); } -void rpc_put_task(struct rpc_task *task) +static void rpc_release_resources_task(struct rpc_task *task) { - if (!atomic_dec_and_test(&task->tk_count)) - return; - /* Release resources */ if (task->tk_rqstp) xprt_release(task); if (task->tk_msg.rpc_cred) put_rpccred(task->tk_msg.rpc_cred); rpc_task_release_client(task); - if (task->tk_workqueue != NULL) { +} + +static void rpc_final_put_task(struct rpc_task *task, + struct workqueue_struct *q) +{ + if (q != NULL) { INIT_WORK(&task->u.tk_work, rpc_async_release); - queue_work(task->tk_workqueue, &task->u.tk_work); + queue_work(q, &task->u.tk_work); } else rpc_free_task(task); } + +static void rpc_do_put_task(struct rpc_task *task, struct workqueue_struct *q) +{ + if (atomic_dec_and_test(&task->tk_count)) { + rpc_release_resources_task(task); + rpc_final_put_task(task, q); + } +} + +void rpc_put_task(struct rpc_task *task) +{ + rpc_do_put_task(task, NULL); +} EXPORT_SYMBOL_GPL(rpc_put_task); +void rpc_put_task_async(struct rpc_task *task) +{ + rpc_do_put_task(task, task->tk_workqueue); +} +EXPORT_SYMBOL_GPL(rpc_put_task_async); + static void rpc_release_task(struct rpc_task *task) { dprintk("RPC: %5u release task\n", task->tk_pid); BUG_ON (RPC_IS_QUEUED(task)); - /* Wake up anyone who is waiting for task completion */ - rpc_mark_complete_task(task); + rpc_release_resources_task(task); - rpc_put_task(task); + /* + * Note: at this point we have been removed from rpc_clnt->cl_tasks, + * so it should be safe to use task->tk_count as a test for whether + * or not any other processes still hold references to our rpc_task. + */ + if (atomic_read(&task->tk_count) != 1 + !RPC_IS_ASYNC(task)) { + /* Wake up anyone who may be waiting for task completion */ + if (!rpc_complete_task(task)) + return; + } else { + if (!atomic_dec_and_test(&task->tk_count)) + return; + } + rpc_final_put_task(task, task->tk_workqueue); } int rpciod_up(void) @@ -908,7 +940,7 @@ static int rpciod_start(void) * Create the rpciod thread and wait for it to start. */ dprintk("RPC: creating workqueue rpciod\n"); - wq = alloc_workqueue("rpciod", WQ_RESCUER, 0); + wq = alloc_workqueue("rpciod", WQ_MEM_RECLAIM, 0); rpciod_workqueue = wq; return rpciod_workqueue != NULL; } diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index 856274d7e85c..9494c3767356 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -202,10 +202,9 @@ int xprt_reserve_xprt(struct rpc_task *task) goto out_sleep; } xprt->snd_task = task; - if (req) { - req->rq_bytes_sent = 0; - req->rq_ntrans++; - } + req->rq_bytes_sent = 0; + req->rq_ntrans++; + return 1; out_sleep: @@ -213,7 +212,7 @@ out_sleep: task->tk_pid, xprt); task->tk_timeout = 0; task->tk_status = -EAGAIN; - if (req && req->rq_ntrans) + if (req->rq_ntrans) rpc_sleep_on(&xprt->resend, task, NULL); else rpc_sleep_on(&xprt->sending, task, NULL); @@ -965,7 +964,7 @@ struct rpc_xprt *xprt_alloc(struct net *net, int size, int max_req) xprt = kzalloc(size, GFP_KERNEL); if (xprt == NULL) goto out; - kref_init(&xprt->kref); + atomic_set(&xprt->count, 1); xprt->max_reqs = max_req; xprt->slot = kcalloc(max_req, sizeof(struct rpc_rqst), GFP_KERNEL); @@ -1145,13 +1144,11 @@ found: /** * xprt_destroy - destroy an RPC transport, killing off all requests. - * @kref: kref for the transport to destroy + * @xprt: transport to destroy * */ -static void xprt_destroy(struct kref *kref) +static void xprt_destroy(struct rpc_xprt *xprt) { - struct rpc_xprt *xprt = container_of(kref, struct rpc_xprt, kref); - dprintk("RPC: destroying transport %p\n", xprt); xprt->shutdown = 1; del_timer_sync(&xprt->timer); @@ -1175,7 +1172,8 @@ static void xprt_destroy(struct kref *kref) */ void xprt_put(struct rpc_xprt *xprt) { - kref_put(&xprt->kref, xprt_destroy); + if (atomic_dec_and_test(&xprt->count)) + xprt_destroy(xprt); } /** @@ -1185,6 +1183,7 @@ void xprt_put(struct rpc_xprt *xprt) */ struct rpc_xprt *xprt_get(struct rpc_xprt *xprt) { - kref_get(&xprt->kref); - return xprt; + if (atomic_inc_not_zero(&xprt->count)) + return xprt; + return NULL; } diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c index 2ac3f6e8adff..554d0814c875 100644 --- a/net/sunrpc/xprtrdma/rpc_rdma.c +++ b/net/sunrpc/xprtrdma/rpc_rdma.c @@ -87,6 +87,8 @@ rpcrdma_convert_iovs(struct xdr_buf *xdrbuf, unsigned int pos, enum rpcrdma_chunktype type, struct rpcrdma_mr_seg *seg, int nsegs) { int len, n = 0, p; + int page_base; + struct page **ppages; if (pos == 0 && xdrbuf->head[0].iov_len) { seg[n].mr_page = NULL; @@ -95,34 +97,32 @@ rpcrdma_convert_iovs(struct xdr_buf *xdrbuf, unsigned int pos, ++n; } - if (xdrbuf->page_len && (xdrbuf->pages[0] != NULL)) { - if (n == nsegs) - return 0; - seg[n].mr_page = xdrbuf->pages[0]; - seg[n].mr_offset = (void *)(unsigned long) xdrbuf->page_base; - seg[n].mr_len = min_t(u32, - PAGE_SIZE - xdrbuf->page_base, xdrbuf->page_len); - len = xdrbuf->page_len - seg[n].mr_len; + len = xdrbuf->page_len; + ppages = xdrbuf->pages + (xdrbuf->page_base >> PAGE_SHIFT); + page_base = xdrbuf->page_base & ~PAGE_MASK; + p = 0; + while (len && n < nsegs) { + seg[n].mr_page = ppages[p]; + seg[n].mr_offset = (void *)(unsigned long) page_base; + seg[n].mr_len = min_t(u32, PAGE_SIZE - page_base, len); + BUG_ON(seg[n].mr_len > PAGE_SIZE); + len -= seg[n].mr_len; ++n; - p = 1; - while (len > 0) { - if (n == nsegs) - return 0; - seg[n].mr_page = xdrbuf->pages[p]; - seg[n].mr_offset = NULL; - seg[n].mr_len = min_t(u32, PAGE_SIZE, len); - len -= seg[n].mr_len; - ++n; - ++p; - } + ++p; + page_base = 0; /* page offset only applies to first page */ } + /* Message overflows the seg array */ + if (len && n == nsegs) + return 0; + if (xdrbuf->tail[0].iov_len) { /* the rpcrdma protocol allows us to omit any trailing * xdr pad bytes, saving the server an RDMA operation. */ if (xdrbuf->tail[0].iov_len < 4 && xprt_rdma_pad_optimize) return n; if (n == nsegs) + /* Tail remains, but we're out of segments */ return 0; seg[n].mr_page = NULL; seg[n].mr_offset = xdrbuf->tail[0].iov_base; @@ -296,6 +296,8 @@ rpcrdma_inline_pullup(struct rpc_rqst *rqst, int pad) int copy_len; unsigned char *srcp, *destp; struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(rqst->rq_xprt); + int page_base; + struct page **ppages; destp = rqst->rq_svec[0].iov_base; curlen = rqst->rq_svec[0].iov_len; @@ -324,28 +326,25 @@ rpcrdma_inline_pullup(struct rpc_rqst *rqst, int pad) __func__, destp + copy_len, curlen); rqst->rq_svec[0].iov_len += curlen; } - r_xprt->rx_stats.pullup_copy_count += copy_len; - npages = PAGE_ALIGN(rqst->rq_snd_buf.page_base+copy_len) >> PAGE_SHIFT; + + page_base = rqst->rq_snd_buf.page_base; + ppages = rqst->rq_snd_buf.pages + (page_base >> PAGE_SHIFT); + page_base &= ~PAGE_MASK; + npages = PAGE_ALIGN(page_base+copy_len) >> PAGE_SHIFT; for (i = 0; copy_len && i < npages; i++) { - if (i == 0) - curlen = PAGE_SIZE - rqst->rq_snd_buf.page_base; - else - curlen = PAGE_SIZE; + curlen = PAGE_SIZE - page_base; if (curlen > copy_len) curlen = copy_len; dprintk("RPC: %s: page %d destp 0x%p len %d curlen %d\n", __func__, i, destp, copy_len, curlen); - srcp = kmap_atomic(rqst->rq_snd_buf.pages[i], - KM_SKB_SUNRPC_DATA); - if (i == 0) - memcpy(destp, srcp+rqst->rq_snd_buf.page_base, curlen); - else - memcpy(destp, srcp, curlen); + srcp = kmap_atomic(ppages[i], KM_SKB_SUNRPC_DATA); + memcpy(destp, srcp+page_base, curlen); kunmap_atomic(srcp, KM_SKB_SUNRPC_DATA); rqst->rq_svec[0].iov_len += curlen; destp += curlen; copy_len -= curlen; + page_base = 0; } /* header now contains entire send message */ return pad; @@ -606,6 +605,8 @@ rpcrdma_inline_fixup(struct rpc_rqst *rqst, char *srcp, int copy_len, int pad) { int i, npages, curlen, olen; char *destp; + struct page **ppages; + int page_base; curlen = rqst->rq_rcv_buf.head[0].iov_len; if (curlen > copy_len) { /* write chunk header fixup */ @@ -624,32 +625,29 @@ rpcrdma_inline_fixup(struct rpc_rqst *rqst, char *srcp, int copy_len, int pad) olen = copy_len; i = 0; rpcx_to_rdmax(rqst->rq_xprt)->rx_stats.fixup_copy_count += olen; + page_base = rqst->rq_rcv_buf.page_base; + ppages = rqst->rq_rcv_buf.pages + (page_base >> PAGE_SHIFT); + page_base &= ~PAGE_MASK; + if (copy_len && rqst->rq_rcv_buf.page_len) { - npages = PAGE_ALIGN(rqst->rq_rcv_buf.page_base + + npages = PAGE_ALIGN(page_base + rqst->rq_rcv_buf.page_len) >> PAGE_SHIFT; for (; i < npages; i++) { - if (i == 0) - curlen = PAGE_SIZE - rqst->rq_rcv_buf.page_base; - else - curlen = PAGE_SIZE; + curlen = PAGE_SIZE - page_base; if (curlen > copy_len) curlen = copy_len; dprintk("RPC: %s: page %d" " srcp 0x%p len %d curlen %d\n", __func__, i, srcp, copy_len, curlen); - destp = kmap_atomic(rqst->rq_rcv_buf.pages[i], - KM_SKB_SUNRPC_DATA); - if (i == 0) - memcpy(destp + rqst->rq_rcv_buf.page_base, - srcp, curlen); - else - memcpy(destp, srcp, curlen); - flush_dcache_page(rqst->rq_rcv_buf.pages[i]); + destp = kmap_atomic(ppages[i], KM_SKB_SUNRPC_DATA); + memcpy(destp + page_base, srcp, curlen); + flush_dcache_page(ppages[i]); kunmap_atomic(destp, KM_SKB_SUNRPC_DATA); srcp += curlen; copy_len -= curlen; if (copy_len == 0) break; + page_base = 0; } rqst->rq_rcv_buf.page_len = olen - copy_len; } else diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c index 9df1eadc912a..1a10dcd999ea 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_transport.c +++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c @@ -1335,6 +1335,7 @@ void svc_rdma_send_error(struct svcxprt_rdma *xprt, struct rpcrdma_msg *rmsgp, p, 0, length, DMA_FROM_DEVICE); if (ib_dma_mapping_error(xprt->sc_cm_id->device, ctxt->sge[0].addr)) { put_page(p); + svc_rdma_put_context(ctxt, 1); return; } atomic_inc(&xprt->sc_dma_used); diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index 5f4c7b3bc711..d4297dc43dc4 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -144,6 +144,7 @@ rpcrdma_cq_async_error_upcall(struct ib_event *event, void *context) static inline void rpcrdma_event_process(struct ib_wc *wc) { + struct rpcrdma_mw *frmr; struct rpcrdma_rep *rep = (struct rpcrdma_rep *)(unsigned long) wc->wr_id; @@ -154,15 +155,23 @@ void rpcrdma_event_process(struct ib_wc *wc) return; if (IB_WC_SUCCESS != wc->status) { - dprintk("RPC: %s: %s WC status %X, connection lost\n", - __func__, (wc->opcode & IB_WC_RECV) ? "recv" : "send", - wc->status); + dprintk("RPC: %s: WC opcode %d status %X, connection lost\n", + __func__, wc->opcode, wc->status); rep->rr_len = ~0U; - rpcrdma_schedule_tasklet(rep); + if (wc->opcode != IB_WC_FAST_REG_MR && wc->opcode != IB_WC_LOCAL_INV) + rpcrdma_schedule_tasklet(rep); return; } switch (wc->opcode) { + case IB_WC_FAST_REG_MR: + frmr = (struct rpcrdma_mw *)(unsigned long)wc->wr_id; + frmr->r.frmr.state = FRMR_IS_VALID; + break; + case IB_WC_LOCAL_INV: + frmr = (struct rpcrdma_mw *)(unsigned long)wc->wr_id; + frmr->r.frmr.state = FRMR_IS_INVALID; + break; case IB_WC_RECV: rep->rr_len = wc->byte_len; ib_dma_sync_single_for_cpu( @@ -1450,6 +1459,12 @@ rpcrdma_map_one(struct rpcrdma_ia *ia, struct rpcrdma_mr_seg *seg, int writing) seg->mr_dma = ib_dma_map_single(ia->ri_id->device, seg->mr_offset, seg->mr_dmalen, seg->mr_dir); + if (ib_dma_mapping_error(ia->ri_id->device, seg->mr_dma)) { + dprintk("RPC: %s: mr_dma %llx mr_offset %p mr_dma_len %zu\n", + __func__, + (unsigned long long)seg->mr_dma, + seg->mr_offset, seg->mr_dmalen); + } } static void @@ -1469,7 +1484,8 @@ rpcrdma_register_frmr_external(struct rpcrdma_mr_seg *seg, struct rpcrdma_xprt *r_xprt) { struct rpcrdma_mr_seg *seg1 = seg; - struct ib_send_wr frmr_wr, *bad_wr; + struct ib_send_wr invalidate_wr, frmr_wr, *bad_wr, *post_wr; + u8 key; int len, pageoff; int i, rc; @@ -1484,6 +1500,7 @@ rpcrdma_register_frmr_external(struct rpcrdma_mr_seg *seg, rpcrdma_map_one(ia, seg, writing); seg1->mr_chunk.rl_mw->r.frmr.fr_pgl->page_list[i] = seg->mr_dma; len += seg->mr_len; + BUG_ON(seg->mr_len > PAGE_SIZE); ++seg; ++i; /* Check for holes */ @@ -1494,26 +1511,45 @@ rpcrdma_register_frmr_external(struct rpcrdma_mr_seg *seg, dprintk("RPC: %s: Using frmr %p to map %d segments\n", __func__, seg1->mr_chunk.rl_mw, i); + if (unlikely(seg1->mr_chunk.rl_mw->r.frmr.state == FRMR_IS_VALID)) { + dprintk("RPC: %s: frmr %x left valid, posting invalidate.\n", + __func__, + seg1->mr_chunk.rl_mw->r.frmr.fr_mr->rkey); + /* Invalidate before using. */ + memset(&invalidate_wr, 0, sizeof invalidate_wr); + invalidate_wr.wr_id = (unsigned long)(void *)seg1->mr_chunk.rl_mw; + invalidate_wr.next = &frmr_wr; + invalidate_wr.opcode = IB_WR_LOCAL_INV; + invalidate_wr.send_flags = IB_SEND_SIGNALED; + invalidate_wr.ex.invalidate_rkey = + seg1->mr_chunk.rl_mw->r.frmr.fr_mr->rkey; + DECR_CQCOUNT(&r_xprt->rx_ep); + post_wr = &invalidate_wr; + } else + post_wr = &frmr_wr; + /* Bump the key */ key = (u8)(seg1->mr_chunk.rl_mw->r.frmr.fr_mr->rkey & 0x000000FF); ib_update_fast_reg_key(seg1->mr_chunk.rl_mw->r.frmr.fr_mr, ++key); /* Prepare FRMR WR */ memset(&frmr_wr, 0, sizeof frmr_wr); + frmr_wr.wr_id = (unsigned long)(void *)seg1->mr_chunk.rl_mw; frmr_wr.opcode = IB_WR_FAST_REG_MR; - frmr_wr.send_flags = 0; /* unsignaled */ + frmr_wr.send_flags = IB_SEND_SIGNALED; frmr_wr.wr.fast_reg.iova_start = seg1->mr_dma; frmr_wr.wr.fast_reg.page_list = seg1->mr_chunk.rl_mw->r.frmr.fr_pgl; frmr_wr.wr.fast_reg.page_list_len = i; frmr_wr.wr.fast_reg.page_shift = PAGE_SHIFT; frmr_wr.wr.fast_reg.length = i << PAGE_SHIFT; + BUG_ON(frmr_wr.wr.fast_reg.length < len); frmr_wr.wr.fast_reg.access_flags = (writing ? IB_ACCESS_REMOTE_WRITE | IB_ACCESS_LOCAL_WRITE : IB_ACCESS_REMOTE_READ); frmr_wr.wr.fast_reg.rkey = seg1->mr_chunk.rl_mw->r.frmr.fr_mr->rkey; DECR_CQCOUNT(&r_xprt->rx_ep); - rc = ib_post_send(ia->ri_id->qp, &frmr_wr, &bad_wr); + rc = ib_post_send(ia->ri_id->qp, post_wr, &bad_wr); if (rc) { dprintk("RPC: %s: failed ib_post_send for register," @@ -1542,8 +1578,9 @@ rpcrdma_deregister_frmr_external(struct rpcrdma_mr_seg *seg, rpcrdma_unmap_one(ia, seg++); memset(&invalidate_wr, 0, sizeof invalidate_wr); + invalidate_wr.wr_id = (unsigned long)(void *)seg1->mr_chunk.rl_mw; invalidate_wr.opcode = IB_WR_LOCAL_INV; - invalidate_wr.send_flags = 0; /* unsignaled */ + invalidate_wr.send_flags = IB_SEND_SIGNALED; invalidate_wr.ex.invalidate_rkey = seg1->mr_chunk.rl_mw->r.frmr.fr_mr->rkey; DECR_CQCOUNT(&r_xprt->rx_ep); diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h index c7a7eba991bc..cae761a8536c 100644 --- a/net/sunrpc/xprtrdma/xprt_rdma.h +++ b/net/sunrpc/xprtrdma/xprt_rdma.h @@ -164,6 +164,7 @@ struct rpcrdma_mr_seg { /* chunk descriptors */ struct { struct ib_fast_reg_page_list *fr_pgl; struct ib_mr *fr_mr; + enum { FRMR_IS_INVALID, FRMR_IS_VALID } state; } frmr; } r; struct list_head mw_list; diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index c431f5a57960..be96d429b475 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -1631,7 +1631,8 @@ static struct socket *xs_create_sock(struct rpc_xprt *xprt, } xs_reclassify_socket(family, sock); - if (xs_bind(transport, sock)) { + err = xs_bind(transport, sock); + if (err) { sock_release(sock); goto out; } diff --git a/net/tipc/Kconfig b/net/tipc/Kconfig index 0436927369f3..2c5954b85933 100644 --- a/net/tipc/Kconfig +++ b/net/tipc/Kconfig @@ -29,18 +29,6 @@ config TIPC_ADVANCED Saying Y here will open some advanced configuration for TIPC. Most users do not need to bother; if unsure, just say N. -config TIPC_NODES - int "Maximum number of nodes in a cluster" - depends on TIPC_ADVANCED - range 8 2047 - default "255" - help - Specifies how many nodes can be supported in a TIPC cluster. - Can range from 8 to 2047 nodes; default is 255. - - Setting this to a smaller value saves some memory; - setting it to higher allows for more nodes. - config TIPC_PORTS int "Maximum number of ports in a node" depends on TIPC_ADVANCED diff --git a/net/tipc/addr.c b/net/tipc/addr.c index 88463d9a6f12..a6fdab33877e 100644 --- a/net/tipc/addr.c +++ b/net/tipc/addr.c @@ -2,7 +2,7 @@ * net/tipc/addr.c: TIPC address utility routines * * Copyright (c) 2000-2006, Ericsson AB - * Copyright (c) 2004-2005, Wind River Systems + * Copyright (c) 2004-2005, 2010-2011, Wind River Systems * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -41,7 +41,7 @@ * tipc_addr_domain_valid - validates a network domain address * * Accepts <Z.C.N>, <Z.C.0>, <Z.0.0>, and <0.0.0>, - * where Z, C, and N are non-zero and do not exceed the configured limits. + * where Z, C, and N are non-zero. * * Returns 1 if domain address is valid, otherwise 0 */ @@ -51,10 +51,6 @@ int tipc_addr_domain_valid(u32 addr) u32 n = tipc_node(addr); u32 c = tipc_cluster(addr); u32 z = tipc_zone(addr); - u32 max_nodes = tipc_max_nodes; - - if (n > max_nodes) - return 0; if (n && (!z || !c)) return 0; @@ -66,8 +62,7 @@ int tipc_addr_domain_valid(u32 addr) /** * tipc_addr_node_valid - validates a proposed network address for this node * - * Accepts <Z.C.N>, where Z, C, and N are non-zero and do not exceed - * the configured limits. + * Accepts <Z.C.N>, where Z, C, and N are non-zero. * * Returns 1 if address can be used, otherwise 0 */ @@ -81,9 +76,9 @@ int tipc_in_scope(u32 domain, u32 addr) { if (!domain || (domain == addr)) return 1; - if (domain == (addr & 0xfffff000u)) /* domain <Z.C.0> */ + if (domain == tipc_cluster_mask(addr)) /* domain <Z.C.0> */ return 1; - if (domain == (addr & 0xff000000u)) /* domain <Z.0.0> */ + if (domain == tipc_zone_mask(addr)) /* domain <Z.0.0> */ return 1; return 0; } diff --git a/net/tipc/addr.h b/net/tipc/addr.h index 2490fadd0caf..8971aba99aea 100644 --- a/net/tipc/addr.h +++ b/net/tipc/addr.h @@ -37,6 +37,16 @@ #ifndef _TIPC_ADDR_H #define _TIPC_ADDR_H +static inline u32 tipc_zone_mask(u32 addr) +{ + return addr & 0xff000000u; +} + +static inline u32 tipc_cluster_mask(u32 addr) +{ + return addr & 0xfffff000u; +} + static inline int in_own_cluster(u32 addr) { return !((addr ^ tipc_own_addr) >> 12); @@ -49,14 +59,13 @@ static inline int in_own_cluster(u32 addr) * after a network hop. */ -static inline int addr_domain(int sc) +static inline u32 addr_domain(u32 sc) { if (likely(sc == TIPC_NODE_SCOPE)) return tipc_own_addr; if (sc == TIPC_CLUSTER_SCOPE) - return tipc_addr(tipc_zone(tipc_own_addr), - tipc_cluster(tipc_own_addr), 0); - return tipc_addr(tipc_zone(tipc_own_addr), 0, 0); + return tipc_cluster_mask(tipc_own_addr); + return tipc_zone_mask(tipc_own_addr); } int tipc_addr_domain_valid(u32); diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c index f2839b0f6b65..411719feb803 100644 --- a/net/tipc/bearer.c +++ b/net/tipc/bearer.c @@ -158,7 +158,6 @@ int tipc_register_media(u32 media_type, m_ptr->disable_bearer = disable; m_ptr->addr2str = addr2str; memcpy(&m_ptr->bcast_addr, bcast_addr, sizeof(*bcast_addr)); - m_ptr->bcast = 1; strcpy(m_ptr->name, name); m_ptr->priority = bearer_priority; m_ptr->tolerance = link_tolerance; @@ -474,7 +473,7 @@ int tipc_bearer_congested(struct tipc_bearer *b_ptr, struct link *l_ptr) * tipc_enable_bearer - enable bearer with the given name */ -int tipc_enable_bearer(const char *name, u32 bcast_scope, u32 priority) +int tipc_enable_bearer(const char *name, u32 disc_domain, u32 priority) { struct tipc_bearer *b_ptr; struct media *m_ptr; @@ -494,9 +493,9 @@ int tipc_enable_bearer(const char *name, u32 bcast_scope, u32 priority) warn("Bearer <%s> rejected, illegal name\n", name); return -EINVAL; } - if (!tipc_addr_domain_valid(bcast_scope) || - !tipc_in_scope(bcast_scope, tipc_own_addr)) { - warn("Bearer <%s> rejected, illegal broadcast scope\n", name); + if (!tipc_addr_domain_valid(disc_domain) || + !tipc_in_scope(disc_domain, tipc_own_addr)) { + warn("Bearer <%s> rejected, illegal discovery domain\n", name); return -EINVAL; } if ((priority < TIPC_MIN_LINK_PRI || @@ -560,18 +559,15 @@ restart: b_ptr->media = m_ptr; b_ptr->net_plane = bearer_id + 'A'; b_ptr->active = 1; - b_ptr->detect_scope = bcast_scope; b_ptr->priority = priority; INIT_LIST_HEAD(&b_ptr->cong_links); INIT_LIST_HEAD(&b_ptr->links); - if (m_ptr->bcast) { - b_ptr->link_req = tipc_disc_init_link_req(b_ptr, &m_ptr->bcast_addr, - bcast_scope); - } + b_ptr->link_req = tipc_disc_init_link_req(b_ptr, &m_ptr->bcast_addr, + disc_domain); spin_lock_init(&b_ptr->lock); write_unlock_bh(&tipc_net_lock); info("Enabled bearer <%s>, discovery domain %s, priority %u\n", - name, tipc_addr_string_fill(addr_string, bcast_scope), priority); + name, tipc_addr_string_fill(addr_string, disc_domain), priority); return 0; failed: write_unlock_bh(&tipc_net_lock); diff --git a/net/tipc/bearer.h b/net/tipc/bearer.h index 255dea64f7bd..31d6172b20fd 100644 --- a/net/tipc/bearer.h +++ b/net/tipc/bearer.h @@ -70,7 +70,6 @@ struct tipc_bearer; * @disable_bearer: routine which disables a bearer * @addr2str: routine which converts bearer's address to string form * @bcast_addr: media address used in broadcasting - * @bcast: non-zero if media supports broadcasting [currently mandatory] * @priority: default link (and bearer) priority * @tolerance: default time (in ms) before declaring link failure * @window: default window (in packets) before declaring link congestion @@ -87,7 +86,6 @@ struct media { char *(*addr2str)(struct tipc_media_addr *a, char *str_buf, int str_size); struct tipc_media_addr bcast_addr; - int bcast; u32 priority; u32 tolerance; u32 window; @@ -105,7 +103,6 @@ struct media { * @name: bearer name (format = media:interface) * @media: ptr to media structure associated with bearer * @priority: default link priority for bearer - * @detect_scope: network address mask used during automatic link creation * @identity: array index of this bearer within TIPC bearer array * @link_req: ptr to (optional) structure making periodic link setup requests * @links: list of non-congested links associated with bearer @@ -128,7 +125,6 @@ struct tipc_bearer { spinlock_t lock; struct media *media; u32 priority; - u32 detect_scope; u32 identity; struct link_req *link_req; struct list_head links; @@ -167,7 +163,7 @@ void tipc_recv_msg(struct sk_buff *buf, struct tipc_bearer *tb_ptr); int tipc_block_bearer(const char *name); void tipc_continue(struct tipc_bearer *tb_ptr); -int tipc_enable_bearer(const char *bearer_name, u32 bcast_scope, u32 priority); +int tipc_enable_bearer(const char *bearer_name, u32 disc_domain, u32 priority); int tipc_disable_bearer(const char *name); /* diff --git a/net/tipc/config.c b/net/tipc/config.c index e16750dcf3c1..b25a396b7e1e 100644 --- a/net/tipc/config.c +++ b/net/tipc/config.c @@ -2,7 +2,7 @@ * net/tipc/config.c: TIPC configuration management code * * Copyright (c) 2002-2006, Ericsson AB - * Copyright (c) 2004-2007, Wind River Systems + * Copyright (c) 2004-2007, 2010-2011, Wind River Systems * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -148,7 +148,7 @@ static struct sk_buff *cfg_enable_bearer(void) args = (struct tipc_bearer_config *)TLV_DATA(req_tlv_area); if (tipc_enable_bearer(args->name, - ntohl(args->detect_scope), + ntohl(args->disc_domain), ntohl(args->priority))) return tipc_cfg_reply_error_string("unable to enable bearer"); @@ -260,25 +260,6 @@ static struct sk_buff *cfg_set_max_ports(void) return tipc_cfg_reply_none(); } -static struct sk_buff *cfg_set_max_nodes(void) -{ - u32 value; - - if (!TLV_CHECK(req_tlv_area, req_tlv_space, TIPC_TLV_UNSIGNED)) - return tipc_cfg_reply_error_string(TIPC_CFG_TLV_ERROR); - value = ntohl(*(__be32 *)TLV_DATA(req_tlv_area)); - if (value == tipc_max_nodes) - return tipc_cfg_reply_none(); - if (value != delimit(value, 8, 2047)) - return tipc_cfg_reply_error_string(TIPC_CFG_INVALID_VALUE - " (max nodes must be 8-2047)"); - if (tipc_mode == TIPC_NET_MODE) - return tipc_cfg_reply_error_string(TIPC_CFG_NOT_SUPPORTED - " (cannot change max nodes once TIPC has joined a network)"); - tipc_max_nodes = value; - return tipc_cfg_reply_none(); -} - static struct sk_buff *cfg_set_netid(void) { u32 value; @@ -397,9 +378,6 @@ struct sk_buff *tipc_cfg_do_cmd(u32 orig_node, u16 cmd, const void *request_area case TIPC_CMD_SET_MAX_SUBSCR: rep_tlv_buf = cfg_set_max_subscriptions(); break; - case TIPC_CMD_SET_MAX_NODES: - rep_tlv_buf = cfg_set_max_nodes(); - break; case TIPC_CMD_SET_NETID: rep_tlv_buf = cfg_set_netid(); break; @@ -415,9 +393,6 @@ struct sk_buff *tipc_cfg_do_cmd(u32 orig_node, u16 cmd, const void *request_area case TIPC_CMD_GET_MAX_SUBSCR: rep_tlv_buf = tipc_cfg_reply_unsigned(tipc_max_subscriptions); break; - case TIPC_CMD_GET_MAX_NODES: - rep_tlv_buf = tipc_cfg_reply_unsigned(tipc_max_nodes); - break; case TIPC_CMD_GET_NETID: rep_tlv_buf = tipc_cfg_reply_unsigned(tipc_net_id); break; @@ -431,6 +406,8 @@ struct sk_buff *tipc_cfg_do_cmd(u32 orig_node, u16 cmd, const void *request_area case TIPC_CMD_GET_MAX_SLAVES: case TIPC_CMD_SET_MAX_CLUSTERS: case TIPC_CMD_GET_MAX_CLUSTERS: + case TIPC_CMD_SET_MAX_NODES: + case TIPC_CMD_GET_MAX_NODES: rep_tlv_buf = tipc_cfg_reply_error_string(TIPC_CFG_NOT_SUPPORTED " (obsolete command)"); break; diff --git a/net/tipc/core.c b/net/tipc/core.c index 2da1fc75ad65..c9a73e7763f6 100644 --- a/net/tipc/core.c +++ b/net/tipc/core.c @@ -41,10 +41,6 @@ #include "config.h" -#ifndef CONFIG_TIPC_NODES -#define CONFIG_TIPC_NODES 255 -#endif - #ifndef CONFIG_TIPC_PORTS #define CONFIG_TIPC_PORTS 8191 #endif @@ -64,7 +60,6 @@ const char tipc_alphabet[] = /* configurable TIPC parameters */ u32 tipc_own_addr; -int tipc_max_nodes; int tipc_max_ports; int tipc_max_subscriptions; int tipc_max_publications; @@ -192,7 +187,6 @@ static int __init tipc_init(void) tipc_max_publications = 10000; tipc_max_subscriptions = 2000; tipc_max_ports = CONFIG_TIPC_PORTS; - tipc_max_nodes = CONFIG_TIPC_NODES; tipc_net_id = 4711; res = tipc_core_start(); diff --git a/net/tipc/core.h b/net/tipc/core.h index 37544d9f73e1..436dda1159d2 100644 --- a/net/tipc/core.h +++ b/net/tipc/core.h @@ -147,7 +147,6 @@ void tipc_msg_dbg(struct print_buf *, struct tipc_msg *, const char *); */ extern u32 tipc_own_addr; -extern int tipc_max_nodes; extern int tipc_max_ports; extern int tipc_max_subscriptions; extern int tipc_max_publications; diff --git a/net/tipc/discover.c b/net/tipc/discover.c index 09ce2318b89e..491eff56b9da 100644 --- a/net/tipc/discover.c +++ b/net/tipc/discover.c @@ -75,12 +75,12 @@ static struct sk_buff *tipc_disc_init_msg(u32 type, u32 dest_domain, struct tipc_bearer *b_ptr) { - struct sk_buff *buf = tipc_buf_acquire(DSC_H_SIZE); + struct sk_buff *buf = tipc_buf_acquire(INT_H_SIZE); struct tipc_msg *msg; if (buf) { msg = buf_msg(buf); - tipc_msg_init(msg, LINK_CONFIG, type, DSC_H_SIZE, dest_domain); + tipc_msg_init(msg, LINK_CONFIG, type, INT_H_SIZE, dest_domain); msg_set_non_seq(msg, 1); msg_set_dest_domain(msg, dest_domain); msg_set_bc_netid(msg, tipc_net_id); @@ -119,17 +119,21 @@ static void disc_dupl_alert(struct tipc_bearer *b_ptr, u32 node_addr, void tipc_disc_recv_msg(struct sk_buff *buf, struct tipc_bearer *b_ptr) { + struct tipc_node *n_ptr; struct link *link; - struct tipc_media_addr media_addr; + struct tipc_media_addr media_addr, *addr; + struct sk_buff *rbuf; struct tipc_msg *msg = buf_msg(buf); u32 dest = msg_dest_domain(msg); u32 orig = msg_prevnode(msg); u32 net_id = msg_bc_netid(msg); u32 type = msg_type(msg); + int link_fully_up; msg_get_media_addr(msg, &media_addr); buf_discard(buf); + /* Validate discovery message from requesting node */ if (net_id != tipc_net_id) return; if (!tipc_addr_domain_valid(dest)) @@ -143,57 +147,70 @@ void tipc_disc_recv_msg(struct sk_buff *buf, struct tipc_bearer *b_ptr) } if (!tipc_in_scope(dest, tipc_own_addr)) return; - if (in_own_cluster(orig)) { - /* Always accept link here */ - struct sk_buff *rbuf; - struct tipc_media_addr *addr; - struct tipc_node *n_ptr = tipc_node_find(orig); - int link_fully_up; - - if (n_ptr == NULL) { - n_ptr = tipc_node_create(orig); - if (!n_ptr) - return; - } - spin_lock_bh(&n_ptr->lock); - - /* Don't talk to neighbor during cleanup after last session */ + if (!in_own_cluster(orig)) + return; - if (n_ptr->cleanup_required) { - spin_unlock_bh(&n_ptr->lock); + /* Locate structure corresponding to requesting node */ + n_ptr = tipc_node_find(orig); + if (!n_ptr) { + n_ptr = tipc_node_create(orig); + if (!n_ptr) return; - } + } + tipc_node_lock(n_ptr); + + /* Don't talk to neighbor during cleanup after last session */ + if (n_ptr->cleanup_required) { + tipc_node_unlock(n_ptr); + return; + } + + link = n_ptr->links[b_ptr->identity]; - link = n_ptr->links[b_ptr->identity]; + /* Create a link endpoint for this bearer, if necessary */ + if (!link) { + link = tipc_link_create(n_ptr, b_ptr, &media_addr); if (!link) { - link = tipc_link_create(b_ptr, orig, &media_addr); - if (!link) { - spin_unlock_bh(&n_ptr->lock); - return; - } - } - addr = &link->media_addr; - if (memcmp(addr, &media_addr, sizeof(*addr))) { - if (tipc_link_is_up(link) || (!link->started)) { - disc_dupl_alert(b_ptr, orig, &media_addr); - spin_unlock_bh(&n_ptr->lock); - return; - } - warn("Resetting link <%s>, peer interface address changed\n", - link->name); - memcpy(addr, &media_addr, sizeof(*addr)); - tipc_link_reset(link); + tipc_node_unlock(n_ptr); + return; } - link_fully_up = link_working_working(link); - spin_unlock_bh(&n_ptr->lock); - if ((type == DSC_RESP_MSG) || link_fully_up) + } + + /* + * Ensure requesting node's media address is correct + * + * If media address doesn't match and the link is working, reject the + * request (must be from a duplicate node). + * + * If media address doesn't match and the link is not working, accept + * the new media address and reset the link to ensure it starts up + * cleanly. + */ + addr = &link->media_addr; + if (memcmp(addr, &media_addr, sizeof(*addr))) { + if (tipc_link_is_up(link) || (!link->started)) { + disc_dupl_alert(b_ptr, orig, &media_addr); + tipc_node_unlock(n_ptr); return; + } + warn("Resetting link <%s>, peer interface address changed\n", + link->name); + memcpy(addr, &media_addr, sizeof(*addr)); + tipc_link_reset(link); + } + + /* Accept discovery message & send response, if necessary */ + link_fully_up = link_working_working(link); + + if ((type == DSC_REQ_MSG) && !link_fully_up && !b_ptr->blocked) { rbuf = tipc_disc_init_msg(DSC_RESP_MSG, orig, b_ptr); - if (rbuf != NULL) { + if (rbuf) { b_ptr->media->send_msg(rbuf, b_ptr, &media_addr); buf_discard(rbuf); } } + + tipc_node_unlock(n_ptr); } /** diff --git a/net/tipc/link.c b/net/tipc/link.c index 89fbb6d6e956..43639ff1cbec 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -293,19 +293,35 @@ static void link_set_timer(struct link *l_ptr, u32 time) /** * tipc_link_create - create a new link + * @n_ptr: pointer to associated node * @b_ptr: pointer to associated bearer - * @peer: network address of node at other end of link * @media_addr: media address to use when sending messages over link * * Returns pointer to link. */ -struct link *tipc_link_create(struct tipc_bearer *b_ptr, const u32 peer, +struct link *tipc_link_create(struct tipc_node *n_ptr, + struct tipc_bearer *b_ptr, const struct tipc_media_addr *media_addr) { struct link *l_ptr; struct tipc_msg *msg; char *if_name; + char addr_string[16]; + u32 peer = n_ptr->addr; + + if (n_ptr->link_cnt >= 2) { + tipc_addr_string_fill(addr_string, n_ptr->addr); + err("Attempt to establish third link to %s\n", addr_string); + return NULL; + } + + if (n_ptr->links[b_ptr->identity]) { + tipc_addr_string_fill(addr_string, n_ptr->addr); + err("Attempt to establish second link on <%s> to %s\n", + b_ptr->name, addr_string); + return NULL; + } l_ptr = kzalloc(sizeof(*l_ptr), GFP_ATOMIC); if (!l_ptr) { @@ -322,6 +338,7 @@ struct link *tipc_link_create(struct tipc_bearer *b_ptr, const u32 peer, tipc_zone(peer), tipc_cluster(peer), tipc_node(peer)); /* note: peer i/f is appended to link name by reset/activate */ memcpy(&l_ptr->media_addr, media_addr, sizeof(*media_addr)); + l_ptr->owner = n_ptr; l_ptr->checkpoint = 1; l_ptr->b_ptr = b_ptr; link_set_supervision_props(l_ptr, b_ptr->media->tolerance); @@ -345,11 +362,7 @@ struct link *tipc_link_create(struct tipc_bearer *b_ptr, const u32 peer, link_reset_statistics(l_ptr); - l_ptr->owner = tipc_node_attach_link(l_ptr); - if (!l_ptr->owner) { - kfree(l_ptr); - return NULL; - } + tipc_node_attach_link(n_ptr, l_ptr); k_init_timer(&l_ptr->timer, (Handler)link_timeout, (unsigned long)l_ptr); list_add_tail(&l_ptr->link_list, &b_ptr->links); @@ -548,7 +561,7 @@ void tipc_link_reset(struct link *l_ptr) tipc_node_link_down(l_ptr->owner, l_ptr); tipc_bearer_remove_dest(l_ptr->b_ptr, l_ptr->addr); - if (was_active_link && tipc_node_has_active_links(l_ptr->owner) && + if (was_active_link && tipc_node_active_links(l_ptr->owner) && l_ptr->owner->permit_changeover) { l_ptr->reset_checkpoint = checkpoint; l_ptr->exp_msg_count = START_CHANGEOVER; @@ -1733,10 +1746,6 @@ deliver: tipc_node_unlock(n_ptr); tipc_link_recv_bundle(buf); continue; - case ROUTE_DISTRIBUTOR: - tipc_node_unlock(n_ptr); - buf_discard(buf); - continue; case NAME_DISTRIBUTOR: tipc_node_unlock(n_ptr); tipc_named_recv(buf); @@ -1763,6 +1772,10 @@ deliver: goto protocol_check; } break; + default: + buf_discard(buf); + buf = NULL; + break; } } tipc_node_unlock(n_ptr); @@ -1898,6 +1911,7 @@ void tipc_link_send_proto_msg(struct link *l_ptr, u32 msg_typ, int probe_msg, struct sk_buff *buf = NULL; struct tipc_msg *msg = l_ptr->pmsg; u32 msg_size = sizeof(l_ptr->proto_msg); + int r_flag; if (link_blocked(l_ptr)) return; @@ -1954,10 +1968,8 @@ void tipc_link_send_proto_msg(struct link *l_ptr, u32 msg_typ, int probe_msg, msg_set_max_pkt(msg, l_ptr->max_pkt_target); } - if (tipc_node_has_redundant_links(l_ptr->owner)) - msg_set_redundant_link(msg); - else - msg_clear_redundant_link(msg); + r_flag = (l_ptr->owner->working_links > tipc_link_is_up(l_ptr)); + msg_set_redundant_link(msg, r_flag); msg_set_linkprio(msg, l_ptr->priority); /* Ensure sequence number will not fit : */ @@ -1977,7 +1989,6 @@ void tipc_link_send_proto_msg(struct link *l_ptr, u32 msg_typ, int probe_msg, skb_copy_to_linear_data(buf, msg, sizeof(l_ptr->proto_msg)); return; } - msg_set_timestamp(msg, jiffies_to_msecs(jiffies)); /* Message can be sent */ @@ -2065,7 +2076,7 @@ static void link_recv_proto_msg(struct link *l_ptr, struct sk_buff *buf) l_ptr->peer_bearer_id = msg_bearer_id(msg); /* Synchronize broadcast sequence numbers */ - if (!tipc_node_has_redundant_links(l_ptr->owner)) + if (!tipc_node_redundant_links(l_ptr->owner)) l_ptr->owner->bclink.last_in = mod(msg_last_bcast(msg)); break; case STATE_MSG: @@ -2412,9 +2423,6 @@ static int link_send_long_buf(struct link *l_ptr, struct sk_buff *buf) else destaddr = msg_destnode(inmsg); - if (msg_routed(inmsg)) - msg_set_prevnode(inmsg, tipc_own_addr); - /* Prepare reusable fragment header: */ tipc_msg_init(&fragm_hdr, MSG_FRAGMENTER, FIRST_FRAGMENT, diff --git a/net/tipc/link.h b/net/tipc/link.h index a7794e7ede29..e6a30dbe1aaa 100644 --- a/net/tipc/link.h +++ b/net/tipc/link.h @@ -207,7 +207,8 @@ struct link { struct tipc_port; -struct link *tipc_link_create(struct tipc_bearer *b_ptr, const u32 peer, +struct link *tipc_link_create(struct tipc_node *n_ptr, + struct tipc_bearer *b_ptr, const struct tipc_media_addr *media_addr); void tipc_link_delete(struct link *l_ptr); void tipc_link_changeover(struct link *l_ptr); diff --git a/net/tipc/msg.c b/net/tipc/msg.c index 0787e12423b8..6d92d17e7fb5 100644 --- a/net/tipc/msg.c +++ b/net/tipc/msg.c @@ -192,8 +192,6 @@ void tipc_msg_dbg(struct print_buf *buf, struct tipc_msg *msg, const char *str) default: tipc_printf(buf, "UNKNOWN TYPE %u", msg_type(msg)); } - if (msg_routed(msg) && !msg_non_seq(msg)) - tipc_printf(buf, "ROUT:"); if (msg_reroute_cnt(msg)) tipc_printf(buf, "REROUTED(%u):", msg_reroute_cnt(msg)); @@ -210,8 +208,6 @@ void tipc_msg_dbg(struct print_buf *buf, struct tipc_msg *msg, const char *str) default: tipc_printf(buf, "UNKNOWN:%x", msg_type(msg)); } - if (msg_routed(msg)) - tipc_printf(buf, "ROUT:"); if (msg_reroute_cnt(msg)) tipc_printf(buf, "REROUTED(%u):", msg_reroute_cnt(msg)); @@ -232,13 +228,10 @@ void tipc_msg_dbg(struct print_buf *buf, struct tipc_msg *msg, const char *str) default: tipc_printf(buf, "UNKNOWN TYPE:%x", msg_type(msg)); } - if (msg_routed(msg)) - tipc_printf(buf, "ROUT:"); if (msg_reroute_cnt(msg)) tipc_printf(buf, "REROUTED(%u):", msg_reroute_cnt(msg)); break; case LINK_PROTOCOL: - tipc_printf(buf, "PROT:TIM(%u):", msg_timestamp(msg)); switch (msg_type(msg)) { case STATE_MSG: tipc_printf(buf, "STATE:"); @@ -275,33 +268,6 @@ void tipc_msg_dbg(struct print_buf *buf, struct tipc_msg *msg, const char *str) tipc_printf(buf, "UNKNOWN TYPE:%x", msg_type(msg)); } break; - case ROUTE_DISTRIBUTOR: - tipc_printf(buf, "ROUTING_MNG:"); - switch (msg_type(msg)) { - case EXT_ROUTING_TABLE: - tipc_printf(buf, "EXT_TBL:"); - tipc_printf(buf, "TO:%x:", msg_remote_node(msg)); - break; - case LOCAL_ROUTING_TABLE: - tipc_printf(buf, "LOCAL_TBL:"); - tipc_printf(buf, "TO:%x:", msg_remote_node(msg)); - break; - case SLAVE_ROUTING_TABLE: - tipc_printf(buf, "DP_TBL:"); - tipc_printf(buf, "TO:%x:", msg_remote_node(msg)); - break; - case ROUTE_ADDITION: - tipc_printf(buf, "ADD:"); - tipc_printf(buf, "TO:%x:", msg_remote_node(msg)); - break; - case ROUTE_REMOVAL: - tipc_printf(buf, "REMOVE:"); - tipc_printf(buf, "TO:%x:", msg_remote_node(msg)); - break; - default: - tipc_printf(buf, "UNKNOWN TYPE:%x", msg_type(msg)); - } - break; case LINK_CONFIG: tipc_printf(buf, "CFG:"); switch (msg_type(msg)) { diff --git a/net/tipc/msg.h b/net/tipc/msg.h index 9d643a1b7d22..de02339fc175 100644 --- a/net/tipc/msg.h +++ b/net/tipc/msg.h @@ -421,13 +421,6 @@ static inline int msg_is_dest(struct tipc_msg *m, u32 d) return msg_short(m) || (msg_destnode(m) == d); } -static inline u32 msg_routed(struct tipc_msg *m) -{ - if (likely(msg_short(m))) - return 0; - return (msg_destnode(m) ^ msg_orignode(m)) >> 11; -} - static inline u32 msg_nametype(struct tipc_msg *m) { return msg_word(m, 8); @@ -438,16 +431,6 @@ static inline void msg_set_nametype(struct tipc_msg *m, u32 n) msg_set_word(m, 8, n); } -static inline void msg_set_timestamp(struct tipc_msg *m, u32 n) -{ - msg_set_word(m, 8, n); -} - -static inline u32 msg_timestamp(struct tipc_msg *m) -{ - return msg_word(m, 8); -} - static inline u32 msg_nameinst(struct tipc_msg *m) { return msg_word(m, 9); @@ -535,7 +518,6 @@ static inline struct tipc_msg *msg_get_wrapped(struct tipc_msg *m) #define NAME_DISTRIBUTOR 11 #define MSG_FRAGMENTER 12 #define LINK_CONFIG 13 -#define DSC_H_SIZE 40 /* * Connection management protocol messages @@ -729,14 +711,9 @@ static inline u32 msg_redundant_link(struct tipc_msg *m) return msg_bits(m, 5, 12, 0x1); } -static inline void msg_set_redundant_link(struct tipc_msg *m) +static inline void msg_set_redundant_link(struct tipc_msg *m, u32 r) { - msg_set_bits(m, 5, 12, 0x1, 1); -} - -static inline void msg_clear_redundant_link(struct tipc_msg *m) -{ - msg_set_bits(m, 5, 12, 0x1, 0); + msg_set_bits(m, 5, 12, 0x1, r); } @@ -785,21 +762,6 @@ static inline void msg_set_link_tolerance(struct tipc_msg *m, u32 n) } /* - * Routing table message data - */ - - -static inline u32 msg_remote_node(struct tipc_msg *m) -{ - return msg_word(m, msg_hdr_sz(m)/4); -} - -static inline void msg_set_remote_node(struct tipc_msg *m, u32 a) -{ - msg_set_word(m, msg_hdr_sz(m)/4, a); -} - -/* * Segmentation message types */ diff --git a/net/tipc/name_distr.c b/net/tipc/name_distr.c index 483c226c9581..c9fa6dfcf287 100644 --- a/net/tipc/name_distr.c +++ b/net/tipc/name_distr.c @@ -2,7 +2,7 @@ * net/tipc/name_distr.c: TIPC name distribution code * * Copyright (c) 2000-2006, Ericsson AB - * Copyright (c) 2005, Wind River Systems + * Copyright (c) 2005, 2010-2011, Wind River Systems * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -109,11 +109,9 @@ static void named_cluster_distribute(struct sk_buff *buf) { struct sk_buff *buf_copy; struct tipc_node *n_ptr; - u32 n_num; - for (n_num = 1; n_num <= tipc_net.highest_node; n_num++) { - n_ptr = tipc_net.nodes[n_num]; - if (n_ptr && tipc_node_has_active_links(n_ptr)) { + list_for_each_entry(n_ptr, &tipc_node_list, list) { + if (tipc_node_active_links(n_ptr)) { buf_copy = skb_copy(buf, GFP_ATOMIC); if (!buf_copy) break; @@ -214,17 +212,16 @@ exit: } /** - * node_is_down - remove publication associated with a failed node + * named_purge_publ - remove publication associated with a failed node * * Invoked for each publication issued by a newly failed node. * Removes publication structure from name table & deletes it. * In rare cases the link may have come back up again when this * function is called, and we have two items representing the same * publication. Nudge this item's key to distinguish it from the other. - * (Note: Publication's node subscription is already unsubscribed.) */ -static void node_is_down(struct publication *publ) +static void named_purge_publ(struct publication *publ) { struct publication *p; @@ -232,6 +229,8 @@ static void node_is_down(struct publication *publ) publ->key += 1222345; p = tipc_nametbl_remove_publ(publ->type, publ->lower, publ->node, publ->ref, publ->key); + if (p) + tipc_nodesub_unsubscribe(&p->subscr); write_unlock_bh(&tipc_nametbl_lock); if (p != publ) { @@ -268,7 +267,8 @@ void tipc_named_recv(struct sk_buff *buf) tipc_nodesub_subscribe(&publ->subscr, msg_orignode(msg), publ, - (net_ev_handler)node_is_down); + (net_ev_handler) + named_purge_publ); } } else if (msg_type(msg) == WITHDRAWAL) { publ = tipc_nametbl_remove_publ(ntohl(item->type), diff --git a/net/tipc/net.c b/net/tipc/net.c index 9bacfd00b91e..68b3dd637291 100644 --- a/net/tipc/net.c +++ b/net/tipc/net.c @@ -2,7 +2,7 @@ * net/tipc/net.c: TIPC network routing code * * Copyright (c) 1995-2006, Ericsson AB - * Copyright (c) 2005, Wind River Systems + * Copyright (c) 2005, 2010-2011, Wind River Systems * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -39,6 +39,7 @@ #include "name_distr.h" #include "subscr.h" #include "port.h" +#include "node.h" #include "config.h" /* @@ -108,26 +109,6 @@ */ DEFINE_RWLOCK(tipc_net_lock); -struct network tipc_net; - -static int net_start(void) -{ - tipc_net.nodes = kcalloc(tipc_max_nodes + 1, - sizeof(*tipc_net.nodes), GFP_ATOMIC); - tipc_net.highest_node = 0; - - return tipc_net.nodes ? 0 : -ENOMEM; -} - -static void net_stop(void) -{ - u32 n_num; - - for (n_num = 1; n_num <= tipc_net.highest_node; n_num++) - tipc_node_delete(tipc_net.nodes[n_num]); - kfree(tipc_net.nodes); - tipc_net.nodes = NULL; -} static void net_route_named_msg(struct sk_buff *buf) { @@ -217,9 +198,6 @@ int tipc_net_start(u32 addr) tipc_named_reinit(); tipc_port_reinit(); - res = net_start(); - if (res) - return res; res = tipc_bclink_init(); if (res) return res; @@ -235,14 +213,16 @@ int tipc_net_start(u32 addr) void tipc_net_stop(void) { + struct tipc_node *node, *t_node; + if (tipc_mode != TIPC_NET_MODE) return; write_lock_bh(&tipc_net_lock); tipc_bearer_stop(); tipc_mode = TIPC_NODE_MODE; tipc_bclink_stop(); - net_stop(); + list_for_each_entry_safe(node, t_node, &tipc_node_list, list) + tipc_node_delete(node); write_unlock_bh(&tipc_net_lock); info("Left network mode\n"); } - diff --git a/net/tipc/net.h b/net/tipc/net.h index 4ae59ad04893..9eb4b9e220eb 100644 --- a/net/tipc/net.h +++ b/net/tipc/net.h @@ -2,7 +2,7 @@ * net/tipc/net.h: Include file for TIPC network routing code * * Copyright (c) 1995-2006, Ericsson AB - * Copyright (c) 2005, Wind River Systems + * Copyright (c) 2005, 2010-2011, Wind River Systems * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -37,23 +37,6 @@ #ifndef _TIPC_NET_H #define _TIPC_NET_H -struct tipc_node; - -/** - * struct network - TIPC network structure - * @nodes: array of pointers to all nodes within cluster - * @highest_node: id of highest numbered node within cluster - * @links: number of (unicast) links to cluster - */ - -struct network { - struct tipc_node **nodes; - u32 highest_node; - u32 links; -}; - - -extern struct network tipc_net; extern rwlock_t tipc_net_lock; void tipc_net_route_msg(struct sk_buff *buf); diff --git a/net/tipc/node.c b/net/tipc/node.c index e4dba1dfb6ea..2d106ef4fa4c 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -44,9 +44,33 @@ static void node_established_contact(struct tipc_node *n_ptr); static DEFINE_SPINLOCK(node_create_lock); +static struct hlist_head node_htable[NODE_HTABLE_SIZE]; +LIST_HEAD(tipc_node_list); +static u32 tipc_num_nodes; + +static atomic_t tipc_num_links = ATOMIC_INIT(0); u32 tipc_own_tag; /** + * tipc_node_find - locate specified node object, if it exists + */ + +struct tipc_node *tipc_node_find(u32 addr) +{ + struct tipc_node *node; + struct hlist_node *pos; + + if (unlikely(!in_own_cluster(addr))) + return NULL; + + hlist_for_each_entry(node, pos, &node_htable[tipc_hashfn(addr)], hash) { + if (node->addr == addr) + return node; + } + return NULL; +} + +/** * tipc_node_create - create neighboring node * * Currently, this routine is called by neighbor discovery code, which holds @@ -58,8 +82,7 @@ u32 tipc_own_tag; struct tipc_node *tipc_node_create(u32 addr) { - struct tipc_node *n_ptr; - u32 n_num; + struct tipc_node *n_ptr, *temp_node; spin_lock_bh(&node_create_lock); @@ -78,12 +101,19 @@ struct tipc_node *tipc_node_create(u32 addr) n_ptr->addr = addr; spin_lock_init(&n_ptr->lock); + INIT_HLIST_NODE(&n_ptr->hash); + INIT_LIST_HEAD(&n_ptr->list); INIT_LIST_HEAD(&n_ptr->nsub); - n_num = tipc_node(addr); - tipc_net.nodes[n_num] = n_ptr; - if (n_num > tipc_net.highest_node) - tipc_net.highest_node = n_num; + hlist_add_head(&n_ptr->hash, &node_htable[tipc_hashfn(addr)]); + + list_for_each_entry(temp_node, &tipc_node_list, list) { + if (n_ptr->addr < temp_node->addr) + break; + } + list_add_tail(&n_ptr->list, &temp_node->list); + + tipc_num_nodes++; spin_unlock_bh(&node_create_lock); return n_ptr; @@ -91,18 +121,11 @@ struct tipc_node *tipc_node_create(u32 addr) void tipc_node_delete(struct tipc_node *n_ptr) { - u32 n_num; - - if (!n_ptr) - return; - - n_num = tipc_node(n_ptr->addr); - tipc_net.nodes[n_num] = NULL; + list_del(&n_ptr->list); + hlist_del(&n_ptr->hash); kfree(n_ptr); - while (!tipc_net.nodes[tipc_net.highest_node]) - if (--tipc_net.highest_node == 0) - break; + tipc_num_nodes--; } @@ -200,54 +223,32 @@ void tipc_node_link_down(struct tipc_node *n_ptr, struct link *l_ptr) node_lost_contact(n_ptr); } -int tipc_node_has_active_links(struct tipc_node *n_ptr) +int tipc_node_active_links(struct tipc_node *n_ptr) { return n_ptr->active_links[0] != NULL; } -int tipc_node_has_redundant_links(struct tipc_node *n_ptr) +int tipc_node_redundant_links(struct tipc_node *n_ptr) { return n_ptr->working_links > 1; } int tipc_node_is_up(struct tipc_node *n_ptr) { - return tipc_node_has_active_links(n_ptr); + return tipc_node_active_links(n_ptr); } -struct tipc_node *tipc_node_attach_link(struct link *l_ptr) +void tipc_node_attach_link(struct tipc_node *n_ptr, struct link *l_ptr) { - struct tipc_node *n_ptr = tipc_node_find(l_ptr->addr); - - if (!n_ptr) - n_ptr = tipc_node_create(l_ptr->addr); - if (n_ptr) { - u32 bearer_id = l_ptr->b_ptr->identity; - char addr_string[16]; - - if (n_ptr->link_cnt >= 2) { - err("Attempt to create third link to %s\n", - tipc_addr_string_fill(addr_string, n_ptr->addr)); - return NULL; - } - - if (!n_ptr->links[bearer_id]) { - n_ptr->links[bearer_id] = l_ptr; - tipc_net.links++; - n_ptr->link_cnt++; - return n_ptr; - } - err("Attempt to establish second link on <%s> to %s\n", - l_ptr->b_ptr->name, - tipc_addr_string_fill(addr_string, l_ptr->addr)); - } - return NULL; + n_ptr->links[l_ptr->b_ptr->identity] = l_ptr; + atomic_inc(&tipc_num_links); + n_ptr->link_cnt++; } void tipc_node_detach_link(struct tipc_node *n_ptr, struct link *l_ptr) { n_ptr->links[l_ptr->b_ptr->identity] = NULL; - tipc_net.links--; + atomic_dec(&tipc_num_links); n_ptr->link_cnt--; } @@ -327,7 +328,6 @@ static void node_cleanup_finished(unsigned long node_addr) static void node_lost_contact(struct tipc_node *n_ptr) { - struct tipc_node_subscr *ns, *tns; char addr_string[16]; u32 i; @@ -365,12 +365,7 @@ static void node_lost_contact(struct tipc_node *n_ptr) } /* Notify subscribers */ - list_for_each_entry_safe(ns, tns, &n_ptr->nsub, nodesub_list) { - ns->node = NULL; - list_del_init(&ns->nodesub_list); - tipc_k_signal((Handler)ns->handle_node_down, - (unsigned long)ns->usr_handle); - } + tipc_nodesub_notify(n_ptr); /* Prevent re-contact with node until all cleanup is done */ @@ -385,7 +380,6 @@ struct sk_buff *tipc_node_get_nodes(const void *req_tlv_area, int req_tlv_space) struct tipc_node *n_ptr; struct tipc_node_info node_info; u32 payload_size; - u32 n_num; if (!TLV_CHECK(req_tlv_area, req_tlv_space, TIPC_TLV_NET_ADDR)) return tipc_cfg_reply_error_string(TIPC_CFG_TLV_ERROR); @@ -396,15 +390,14 @@ struct sk_buff *tipc_node_get_nodes(const void *req_tlv_area, int req_tlv_space) " (network address)"); read_lock_bh(&tipc_net_lock); - if (!tipc_net.nodes) { + if (!tipc_num_nodes) { read_unlock_bh(&tipc_net_lock); return tipc_cfg_reply_none(); } /* For now, get space for all other nodes */ - payload_size = TLV_SPACE(sizeof(node_info)) * - (tipc_net.highest_node - 1); + payload_size = TLV_SPACE(sizeof(node_info)) * tipc_num_nodes; if (payload_size > 32768u) { read_unlock_bh(&tipc_net_lock); return tipc_cfg_reply_error_string(TIPC_CFG_NOT_SUPPORTED @@ -418,9 +411,8 @@ struct sk_buff *tipc_node_get_nodes(const void *req_tlv_area, int req_tlv_space) /* Add TLVs for all nodes in scope */ - for (n_num = 1; n_num <= tipc_net.highest_node; n_num++) { - n_ptr = tipc_net.nodes[n_num]; - if (!n_ptr || !tipc_in_scope(domain, n_ptr->addr)) + list_for_each_entry(n_ptr, &tipc_node_list, list) { + if (!tipc_in_scope(domain, n_ptr->addr)) continue; node_info.addr = htonl(n_ptr->addr); node_info.up = htonl(tipc_node_is_up(n_ptr)); @@ -439,7 +431,6 @@ struct sk_buff *tipc_node_get_links(const void *req_tlv_area, int req_tlv_space) struct tipc_node *n_ptr; struct tipc_link_info link_info; u32 payload_size; - u32 n_num; if (!TLV_CHECK(req_tlv_area, req_tlv_space, TIPC_TLV_NET_ADDR)) return tipc_cfg_reply_error_string(TIPC_CFG_TLV_ERROR); @@ -456,7 +447,8 @@ struct sk_buff *tipc_node_get_links(const void *req_tlv_area, int req_tlv_space) /* Get space for all unicast links + multicast link */ - payload_size = TLV_SPACE(sizeof(link_info)) * (tipc_net.links + 1); + payload_size = TLV_SPACE(sizeof(link_info)) * + (atomic_read(&tipc_num_links) + 1); if (payload_size > 32768u) { read_unlock_bh(&tipc_net_lock); return tipc_cfg_reply_error_string(TIPC_CFG_NOT_SUPPORTED @@ -470,18 +462,17 @@ struct sk_buff *tipc_node_get_links(const void *req_tlv_area, int req_tlv_space) /* Add TLV for broadcast link */ - link_info.dest = htonl(tipc_own_addr & 0xfffff00); + link_info.dest = htonl(tipc_cluster_mask(tipc_own_addr)); link_info.up = htonl(1); strlcpy(link_info.str, tipc_bclink_name, TIPC_MAX_LINK_NAME); tipc_cfg_append_tlv(buf, TIPC_TLV_LINK_INFO, &link_info, sizeof(link_info)); /* Add TLVs for any other links in scope */ - for (n_num = 1; n_num <= tipc_net.highest_node; n_num++) { + list_for_each_entry(n_ptr, &tipc_node_list, list) { u32 i; - n_ptr = tipc_net.nodes[n_num]; - if (!n_ptr || !tipc_in_scope(domain, n_ptr->addr)) + if (!tipc_in_scope(domain, n_ptr->addr)) continue; tipc_node_lock(n_ptr); for (i = 0; i < MAX_BEARERS; i++) { diff --git a/net/tipc/node.h b/net/tipc/node.h index 206a8efa410e..5c61afc7a0b9 100644 --- a/net/tipc/node.h +++ b/net/tipc/node.h @@ -2,7 +2,7 @@ * net/tipc/node.h: Include file for TIPC node management routines * * Copyright (c) 2000-2006, Ericsson AB - * Copyright (c) 2005, Wind River Systems + * Copyright (c) 2005, 2010-2011, Wind River Systems * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -46,7 +46,8 @@ * struct tipc_node - TIPC node structure * @addr: network address of node * @lock: spinlock governing access to structure - * @next: pointer to next node in sorted list of cluster's nodes + * @hash: links to adjacent nodes in unsorted hash chain + * @list: links to adjacent nodes in sorted list of cluster's nodes * @nsub: list of "node down" subscriptions monitoring node * @active_links: pointers to active links to node * @links: pointers to all links to node @@ -69,7 +70,8 @@ struct tipc_node { u32 addr; spinlock_t lock; - struct tipc_node *next; + struct hlist_node hash; + struct list_head list; struct list_head nsub; struct link *active_links[2]; struct link *links[MAX_BEARERS]; @@ -90,27 +92,35 @@ struct tipc_node { } bclink; }; +#define NODE_HTABLE_SIZE 512 +extern struct list_head tipc_node_list; + +/* + * A trivial power-of-two bitmask technique is used for speed, since this + * operation is done for every incoming TIPC packet. The number of hash table + * entries has been chosen so that no hash chain exceeds 8 nodes and will + * usually be much smaller (typically only a single node). + */ +static inline unsigned int tipc_hashfn(u32 addr) +{ + return addr & (NODE_HTABLE_SIZE - 1); +} + extern u32 tipc_own_tag; +struct tipc_node *tipc_node_find(u32 addr); struct tipc_node *tipc_node_create(u32 addr); void tipc_node_delete(struct tipc_node *n_ptr); -struct tipc_node *tipc_node_attach_link(struct link *l_ptr); +void tipc_node_attach_link(struct tipc_node *n_ptr, struct link *l_ptr); void tipc_node_detach_link(struct tipc_node *n_ptr, struct link *l_ptr); void tipc_node_link_down(struct tipc_node *n_ptr, struct link *l_ptr); void tipc_node_link_up(struct tipc_node *n_ptr, struct link *l_ptr); -int tipc_node_has_active_links(struct tipc_node *n_ptr); -int tipc_node_has_redundant_links(struct tipc_node *n_ptr); +int tipc_node_active_links(struct tipc_node *n_ptr); +int tipc_node_redundant_links(struct tipc_node *n_ptr); int tipc_node_is_up(struct tipc_node *n_ptr); struct sk_buff *tipc_node_get_links(const void *req_tlv_area, int req_tlv_space); struct sk_buff *tipc_node_get_nodes(const void *req_tlv_area, int req_tlv_space); -static inline struct tipc_node *tipc_node_find(u32 addr) -{ - if (likely(in_own_cluster(addr))) - return tipc_net.nodes[tipc_node(addr)]; - return NULL; -} - static inline void tipc_node_lock(struct tipc_node *n_ptr) { spin_lock_bh(&n_ptr->lock); diff --git a/net/tipc/node_subscr.c b/net/tipc/node_subscr.c index 018a55332d91..c3c2815ae630 100644 --- a/net/tipc/node_subscr.c +++ b/net/tipc/node_subscr.c @@ -2,7 +2,7 @@ * net/tipc/node_subscr.c: TIPC "node down" subscription handling * * Copyright (c) 1995-2006, Ericsson AB - * Copyright (c) 2005, Wind River Systems + * Copyright (c) 2005, 2010-2011, Wind River Systems * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -76,3 +76,22 @@ void tipc_nodesub_unsubscribe(struct tipc_node_subscr *node_sub) list_del_init(&node_sub->nodesub_list); tipc_node_unlock(node_sub->node); } + +/** + * tipc_nodesub_notify - notify subscribers that a node is unreachable + * + * Note: node is locked by caller + */ + +void tipc_nodesub_notify(struct tipc_node *node) +{ + struct tipc_node_subscr *ns; + + list_for_each_entry(ns, &node->nsub, nodesub_list) { + if (ns->handle_node_down) { + tipc_k_signal((Handler)ns->handle_node_down, + (unsigned long)ns->usr_handle); + ns->handle_node_down = NULL; + } + } +} diff --git a/net/tipc/node_subscr.h b/net/tipc/node_subscr.h index 006ed739f515..4bc2ca0867a1 100644 --- a/net/tipc/node_subscr.h +++ b/net/tipc/node_subscr.h @@ -2,7 +2,7 @@ * net/tipc/node_subscr.h: Include file for TIPC "node down" subscription handling * * Copyright (c) 1995-2006, Ericsson AB - * Copyright (c) 2005, Wind River Systems + * Copyright (c) 2005, 2010-2011, Wind River Systems * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -59,5 +59,6 @@ struct tipc_node_subscr { void tipc_nodesub_subscribe(struct tipc_node_subscr *node_sub, u32 addr, void *usr_handle, net_ev_handler handle_down); void tipc_nodesub_unsubscribe(struct tipc_node_subscr *node_sub); +void tipc_nodesub_notify(struct tipc_node *node); #endif diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 125dcb0737b2..29d94d53198d 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -58,6 +58,9 @@ struct tipc_sock { #define tipc_sk(sk) ((struct tipc_sock *)(sk)) #define tipc_sk_port(sk) ((struct tipc_port *)(tipc_sk(sk)->p)) +#define tipc_rx_ready(sock) (!skb_queue_empty(&sock->sk->sk_receive_queue) || \ + (sock->state == SS_DISCONNECTING)) + static int backlog_rcv(struct sock *sk, struct sk_buff *skb); static u32 dispatch(struct tipc_port *tport, struct sk_buff *buf); static void wakeupdispatch(struct tipc_port *tport); @@ -289,7 +292,7 @@ static int release(struct socket *sock) if (buf == NULL) break; atomic_dec(&tipc_queue_size); - if (TIPC_SKB_CB(buf)->handle != msg_data(buf_msg(buf))) + if (TIPC_SKB_CB(buf)->handle != 0) buf_discard(buf); else { if ((sock->state == SS_CONNECTING) || @@ -911,15 +914,13 @@ static int recv_msg(struct kiocb *iocb, struct socket *sock, struct tipc_port *tport = tipc_sk_port(sk); struct sk_buff *buf; struct tipc_msg *msg; + long timeout; unsigned int sz; u32 err; int res; /* Catch invalid receive requests */ - if (m->msg_iovlen != 1) - return -EOPNOTSUPP; /* Don't do multiple iovec entries yet */ - if (unlikely(!buf_len)) return -EINVAL; @@ -930,6 +931,7 @@ static int recv_msg(struct kiocb *iocb, struct socket *sock, goto exit; } + timeout = sock_rcvtimeo(sk, flags & MSG_DONTWAIT); restart: /* Look for a message in receive queue; wait if necessary */ @@ -939,17 +941,15 @@ restart: res = -ENOTCONN; goto exit; } - if (flags & MSG_DONTWAIT) { - res = -EWOULDBLOCK; + if (timeout <= 0L) { + res = timeout ? timeout : -EWOULDBLOCK; goto exit; } release_sock(sk); - res = wait_event_interruptible(*sk_sleep(sk), - (!skb_queue_empty(&sk->sk_receive_queue) || - (sock->state == SS_DISCONNECTING))); + timeout = wait_event_interruptible_timeout(*sk_sleep(sk), + tipc_rx_ready(sock), + timeout); lock_sock(sk); - if (res) - goto exit; } /* Look at first message in receive queue */ @@ -991,11 +991,10 @@ restart: sz = buf_len; m->msg_flags |= MSG_TRUNC; } - if (unlikely(copy_to_user(m->msg_iov->iov_base, msg_data(msg), - sz))) { - res = -EFAULT; + res = skb_copy_datagram_iovec(buf, msg_hdr_sz(msg), + m->msg_iov, sz); + if (res) goto exit; - } res = sz; } else { if ((sock->state == SS_READY) || @@ -1038,19 +1037,15 @@ static int recv_stream(struct kiocb *iocb, struct socket *sock, struct tipc_port *tport = tipc_sk_port(sk); struct sk_buff *buf; struct tipc_msg *msg; + long timeout; unsigned int sz; int sz_to_copy, target, needed; int sz_copied = 0; - char __user *crs = m->msg_iov->iov_base; - unsigned char *buf_crs; u32 err; int res = 0; /* Catch invalid receive attempts */ - if (m->msg_iovlen != 1) - return -EOPNOTSUPP; /* Don't do multiple iovec entries yet */ - if (unlikely(!buf_len)) return -EINVAL; @@ -1063,7 +1058,7 @@ static int recv_stream(struct kiocb *iocb, struct socket *sock, } target = sock_rcvlowat(sk, flags & MSG_WAITALL, buf_len); - + timeout = sock_rcvtimeo(sk, flags & MSG_DONTWAIT); restart: /* Look for a message in receive queue; wait if necessary */ @@ -1073,17 +1068,15 @@ restart: res = -ENOTCONN; goto exit; } - if (flags & MSG_DONTWAIT) { - res = -EWOULDBLOCK; + if (timeout <= 0L) { + res = timeout ? timeout : -EWOULDBLOCK; goto exit; } release_sock(sk); - res = wait_event_interruptible(*sk_sleep(sk), - (!skb_queue_empty(&sk->sk_receive_queue) || - (sock->state == SS_DISCONNECTING))); + timeout = wait_event_interruptible_timeout(*sk_sleep(sk), + tipc_rx_ready(sock), + timeout); lock_sock(sk); - if (res) - goto exit; } /* Look at first message in receive queue */ @@ -1112,24 +1105,25 @@ restart: /* Capture message data (if valid) & compute return value (always) */ if (!err) { - buf_crs = (unsigned char *)(TIPC_SKB_CB(buf)->handle); - sz = (unsigned char *)msg + msg_size(msg) - buf_crs; + u32 offset = (u32)(unsigned long)(TIPC_SKB_CB(buf)->handle); + sz -= offset; needed = (buf_len - sz_copied); sz_to_copy = (sz <= needed) ? sz : needed; - if (unlikely(copy_to_user(crs, buf_crs, sz_to_copy))) { - res = -EFAULT; + + res = skb_copy_datagram_iovec(buf, msg_hdr_sz(msg) + offset, + m->msg_iov, sz_to_copy); + if (res) goto exit; - } + sz_copied += sz_to_copy; if (sz_to_copy < sz) { if (!(flags & MSG_PEEK)) - TIPC_SKB_CB(buf)->handle = buf_crs + sz_to_copy; + TIPC_SKB_CB(buf)->handle = + (void *)(unsigned long)(offset + sz_to_copy); goto exit; } - - crs += sz_to_copy; } else { if (sz_copied != 0) goto exit; /* can't add error msg to valid data */ @@ -1256,7 +1250,7 @@ static u32 filter_rcv(struct sock *sk, struct sk_buff *buf) /* Enqueue message (finally!) */ - TIPC_SKB_CB(buf)->handle = msg_data(msg); + TIPC_SKB_CB(buf)->handle = 0; atomic_inc(&tipc_queue_size); __skb_queue_tail(&sk->sk_receive_queue, buf); @@ -1608,7 +1602,7 @@ restart: buf = __skb_dequeue(&sk->sk_receive_queue); if (buf) { atomic_dec(&tipc_queue_size); - if (TIPC_SKB_CB(buf)->handle != msg_data(buf_msg(buf))) { + if (TIPC_SKB_CB(buf)->handle != 0) { buf_discard(buf); goto restart; } diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index df5997d25826..1663e1a2efdd 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -850,7 +850,7 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) * Get the parent directory, calculate the hash for last * component. */ - err = path_lookup(sunaddr->sun_path, LOOKUP_PARENT, &nd); + err = kern_path_parent(sunaddr->sun_path, &nd); if (err) goto out_mknod_parent; @@ -1124,7 +1124,7 @@ restart: /* Latch our state. - It is tricky place. We need to grab write lock and cannot + It is tricky place. We need to grab our state lock and cannot drop lock on peer. It is dangerous because deadlock is possible. Connect to self case and simultaneous attempt to connect are eliminated by checking socket @@ -1728,7 +1728,11 @@ static int unix_dgram_recvmsg(struct kiocb *iocb, struct socket *sock, msg->msg_namelen = 0; - mutex_lock(&u->readlock); + err = mutex_lock_interruptible(&u->readlock); + if (err) { + err = sock_intr_errno(sock_rcvtimeo(sk, noblock)); + goto out; + } skb = skb_recv_datagram(sk, flags, noblock, &err); if (!skb) { @@ -1868,7 +1872,11 @@ static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock, memset(&tmp_scm, 0, sizeof(tmp_scm)); } - mutex_lock(&u->readlock); + err = mutex_lock_interruptible(&u->readlock); + if (err) { + err = sock_intr_errno(timeo); + goto out; + } do { int chunk; @@ -1899,11 +1907,12 @@ static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock, timeo = unix_stream_data_wait(sk, timeo); - if (signal_pending(current)) { + if (signal_pending(current) + || mutex_lock_interruptible(&u->readlock)) { err = sock_intr_errno(timeo); goto out; } - mutex_lock(&u->readlock); + continue; unlock: unix_state_unlock(sk); diff --git a/net/unix/garbage.c b/net/unix/garbage.c index f89f83bf828e..b6f4b994eb35 100644 --- a/net/unix/garbage.c +++ b/net/unix/garbage.c @@ -104,7 +104,7 @@ struct sock *unix_get_socket(struct file *filp) /* * Socket ? */ - if (S_ISSOCK(inode->i_mode)) { + if (S_ISSOCK(inode->i_mode) && !(filp->f_mode & FMODE_PATH)) { struct socket *sock = SOCKET_I(inode); struct sock *s = sock->sk; diff --git a/net/wireless/ethtool.c b/net/wireless/ethtool.c index ca4c825be93d..9bde4d1d3e9b 100644 --- a/net/wireless/ethtool.c +++ b/net/wireless/ethtool.c @@ -1,5 +1,6 @@ #include <linux/utsname.h> #include <net/cfg80211.h> +#include "core.h" #include "ethtool.h" static void cfg80211_get_drvinfo(struct net_device *dev, @@ -37,9 +38,41 @@ static void cfg80211_get_regs(struct net_device *dev, struct ethtool_regs *regs, regs->len = 0; } +static void cfg80211_get_ringparam(struct net_device *dev, + struct ethtool_ringparam *rp) +{ + struct wireless_dev *wdev = dev->ieee80211_ptr; + struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy); + + memset(rp, 0, sizeof(*rp)); + + if (rdev->ops->get_ringparam) + rdev->ops->get_ringparam(wdev->wiphy, + &rp->tx_pending, &rp->tx_max_pending, + &rp->rx_pending, &rp->rx_max_pending); +} + +static int cfg80211_set_ringparam(struct net_device *dev, + struct ethtool_ringparam *rp) +{ + struct wireless_dev *wdev = dev->ieee80211_ptr; + struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy); + + if (rp->rx_mini_pending != 0 || rp->rx_jumbo_pending != 0) + return -EINVAL; + + if (rdev->ops->set_ringparam) + return rdev->ops->set_ringparam(wdev->wiphy, + rp->tx_pending, rp->rx_pending); + + return -ENOTSUPP; +} + const struct ethtool_ops cfg80211_ethtool_ops = { .get_drvinfo = cfg80211_get_drvinfo, .get_regs_len = cfg80211_get_regs_len, .get_regs = cfg80211_get_regs, .get_link = ethtool_op_get_link, + .get_ringparam = cfg80211_get_ringparam, + .set_ringparam = cfg80211_set_ringparam, }; diff --git a/net/wireless/reg.c b/net/wireless/reg.c index c565689f0b9f..3332d5bce317 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -63,6 +63,10 @@ static struct regulatory_request *last_request; /* To trigger userspace events */ static struct platform_device *reg_pdev; +static struct device_type reg_device_type = { + .uevent = reg_device_uevent, +}; + /* * Central wireless core regulatory domains, we only need two, * the current one and a world regulatory domain in case we have no @@ -362,16 +366,11 @@ static inline void reg_regdb_query(const char *alpha2) {} /* * This lets us keep regulatory code which is updated on a regulatory - * basis in userspace. + * basis in userspace. Country information is filled in by + * reg_device_uevent */ static int call_crda(const char *alpha2) { - char country_env[9 + 2] = "COUNTRY="; - char *envp[] = { - country_env, - NULL - }; - if (!is_world_regdom((char *) alpha2)) pr_info("Calling CRDA for country: %c%c\n", alpha2[0], alpha2[1]); @@ -381,10 +380,7 @@ static int call_crda(const char *alpha2) /* query internal regulatory database (if it exists) */ reg_regdb_query(alpha2); - country_env[8] = alpha2[0]; - country_env[9] = alpha2[1]; - - return kobject_uevent_env(®_pdev->dev.kobj, KOBJ_CHANGE, envp); + return kobject_uevent(®_pdev->dev.kobj, KOBJ_CHANGE); } /* Used by nl80211 before kmalloc'ing our regulatory domain */ @@ -2087,6 +2083,25 @@ int set_regdom(const struct ieee80211_regdomain *rd) return r; } +#ifdef CONFIG_HOTPLUG +int reg_device_uevent(struct device *dev, struct kobj_uevent_env *env) +{ + if (last_request && !last_request->processed) { + if (add_uevent_var(env, "COUNTRY=%c%c", + last_request->alpha2[0], + last_request->alpha2[1])) + return -ENOMEM; + } + + return 0; +} +#else +int reg_device_uevent(struct device *dev, struct kobj_uevent_env *env) +{ + return -ENODEV; +} +#endif /* CONFIG_HOTPLUG */ + /* Caller must hold cfg80211_mutex */ void reg_device_remove(struct wiphy *wiphy) { @@ -2118,6 +2133,8 @@ int __init regulatory_init(void) if (IS_ERR(reg_pdev)) return PTR_ERR(reg_pdev); + reg_pdev->dev.type = ®_device_type; + spin_lock_init(®_requests_lock); spin_lock_init(®_pending_beacons_lock); diff --git a/net/wireless/reg.h b/net/wireless/reg.h index c4695d07af23..b67d1c3a2fb9 100644 --- a/net/wireless/reg.h +++ b/net/wireless/reg.h @@ -8,6 +8,7 @@ bool reg_is_valid_request(const char *alpha2); int regulatory_hint_user(const char *alpha2); +int reg_device_uevent(struct device *dev, struct kobj_uevent_env *env); void reg_device_remove(struct wiphy *wiphy); int __init regulatory_init(void); diff --git a/net/x25/Kconfig b/net/x25/Kconfig index 2196e55e4f61..e6759c9660bb 100644 --- a/net/x25/Kconfig +++ b/net/x25/Kconfig @@ -5,7 +5,6 @@ config X25 tristate "CCITT X.25 Packet Layer (EXPERIMENTAL)" depends on EXPERIMENTAL - depends on BKL # should be fixable ---help--- X.25 is a set of standardized network protocols, similar in scope to frame relay; the one physical line from your box to the X.25 network diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c index ad96ee90fe27..4680b1e4c79c 100644 --- a/net/x25/af_x25.c +++ b/net/x25/af_x25.c @@ -40,7 +40,6 @@ #include <linux/errno.h> #include <linux/kernel.h> #include <linux/sched.h> -#include <linux/smp_lock.h> #include <linux/timer.h> #include <linux/string.h> #include <linux/net.h> @@ -432,15 +431,6 @@ void x25_destroy_socket_from_timer(struct sock *sk) sock_put(sk); } -static void x25_destroy_socket(struct sock *sk) -{ - sock_hold(sk); - lock_sock(sk); - __x25_destroy_socket(sk); - release_sock(sk); - sock_put(sk); -} - /* * Handling for system calls applied via the various interfaces to a * X.25 socket object. @@ -647,18 +637,19 @@ static int x25_release(struct socket *sock) struct sock *sk = sock->sk; struct x25_sock *x25; - lock_kernel(); if (!sk) - goto out; + return 0; x25 = x25_sk(sk); + sock_hold(sk); + lock_sock(sk); switch (x25->state) { case X25_STATE_0: case X25_STATE_2: x25_disconnect(sk, 0, 0, 0); - x25_destroy_socket(sk); + __x25_destroy_socket(sk); goto out; case X25_STATE_1: @@ -678,7 +669,8 @@ static int x25_release(struct socket *sock) sock_orphan(sk); out: - unlock_kernel(); + release_sock(sk); + sock_put(sk); return 0; } @@ -1085,7 +1077,7 @@ static int x25_sendmsg(struct kiocb *iocb, struct socket *sock, size_t size; int qbit = 0, rc = -EINVAL; - lock_kernel(); + lock_sock(sk); if (msg->msg_flags & ~(MSG_DONTWAIT|MSG_OOB|MSG_EOR|MSG_CMSG_COMPAT)) goto out; @@ -1148,7 +1140,9 @@ static int x25_sendmsg(struct kiocb *iocb, struct socket *sock, size = len + X25_MAX_L2_LEN + X25_EXT_MIN_LEN; + release_sock(sk); skb = sock_alloc_send_skb(sk, size, noblock, &rc); + lock_sock(sk); if (!skb) goto out; X25_SKB_CB(skb)->flags = msg->msg_flags; @@ -1231,26 +1225,10 @@ static int x25_sendmsg(struct kiocb *iocb, struct socket *sock, len++; } - /* - * lock_sock() is currently only used to serialize this x25_kick() - * against input-driven x25_kick() calls. It currently only blocks - * incoming packets for this socket and does not protect against - * any other socket state changes and is not called from anywhere - * else. As x25_kick() cannot block and as long as all socket - * operations are BKL-wrapped, we don't need take to care about - * purging the backlog queue in x25_release(). - * - * Using lock_sock() to protect all socket operations entirely - * (and making the whole x25 stack SMP aware) unfortunately would - * require major changes to {send,recv}msg and skb allocation methods. - * -> 2.5 ;) - */ - lock_sock(sk); x25_kick(sk); - release_sock(sk); rc = len; out: - unlock_kernel(); + release_sock(sk); return rc; out_kfree_skb: kfree_skb(skb); @@ -1271,7 +1249,7 @@ static int x25_recvmsg(struct kiocb *iocb, struct socket *sock, unsigned char *asmptr; int rc = -ENOTCONN; - lock_kernel(); + lock_sock(sk); /* * This works for seqpacket too. The receiver has ordered the queue for * us! We do one quick check first though @@ -1300,8 +1278,10 @@ static int x25_recvmsg(struct kiocb *iocb, struct socket *sock, msg->msg_flags |= MSG_OOB; } else { /* Now we can treat all alike */ + release_sock(sk); skb = skb_recv_datagram(sk, flags & ~MSG_DONTWAIT, flags & MSG_DONTWAIT, &rc); + lock_sock(sk); if (!skb) goto out; @@ -1338,14 +1318,12 @@ static int x25_recvmsg(struct kiocb *iocb, struct socket *sock, msg->msg_namelen = sizeof(struct sockaddr_x25); - lock_sock(sk); x25_check_rbuf(sk); - release_sock(sk); rc = copied; out_free_dgram: skb_free_datagram(sk, skb); out: - unlock_kernel(); + release_sock(sk); return rc; } @@ -1581,18 +1559,18 @@ out_cud_release: case SIOCX25CALLACCPTAPPRV: { rc = -EINVAL; - lock_kernel(); + lock_sock(sk); if (sk->sk_state != TCP_CLOSE) break; clear_bit(X25_ACCPT_APPRV_FLAG, &x25->flags); - unlock_kernel(); + release_sock(sk); rc = 0; break; } case SIOCX25SENDCALLACCPT: { rc = -EINVAL; - lock_kernel(); + lock_sock(sk); if (sk->sk_state != TCP_ESTABLISHED) break; /* must call accptapprv above */ @@ -1600,7 +1578,7 @@ out_cud_release: break; x25_write_internal(sk, X25_CALL_ACCEPTED); x25->state = X25_STATE_3; - unlock_kernel(); + release_sock(sk); rc = 0; break; } diff --git a/net/x25/x25_out.c b/net/x25/x25_out.c index d00649fb251d..0144271d2184 100644 --- a/net/x25/x25_out.c +++ b/net/x25/x25_out.c @@ -68,8 +68,11 @@ int x25_output(struct sock *sk, struct sk_buff *skb) frontlen = skb_headroom(skb); while (skb->len > 0) { - if ((skbn = sock_alloc_send_skb(sk, frontlen + max_len, - noblock, &err)) == NULL){ + release_sock(sk); + skbn = sock_alloc_send_skb(sk, frontlen + max_len, + noblock, &err); + lock_sock(sk); + if (!skbn) { if (err == -EWOULDBLOCK && noblock){ kfree_skb(skb); return sent; diff --git a/net/xfrm/Makefile b/net/xfrm/Makefile index c631047e1b27..aa429eefe919 100644 --- a/net/xfrm/Makefile +++ b/net/xfrm/Makefile @@ -4,7 +4,7 @@ obj-$(CONFIG_XFRM) := xfrm_policy.o xfrm_state.o xfrm_hash.o \ xfrm_input.o xfrm_output.o xfrm_algo.o \ - xfrm_sysctl.o + xfrm_sysctl.o xfrm_replay.o obj-$(CONFIG_XFRM_STATISTICS) += xfrm_proc.o obj-$(CONFIG_XFRM_USER) += xfrm_user.o obj-$(CONFIG_XFRM_IPCOMP) += xfrm_ipcomp.o diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c index 45f1c98d4fce..872065ca7f8c 100644 --- a/net/xfrm/xfrm_input.c +++ b/net/xfrm/xfrm_input.c @@ -107,6 +107,7 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type) struct net *net = dev_net(skb->dev); int err; __be32 seq; + __be32 seq_hi; struct xfrm_state *x; xfrm_address_t *daddr; struct xfrm_mode *inner_mode; @@ -118,7 +119,7 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type) if (encap_type < 0) { async = 1; x = xfrm_input_state(skb); - seq = XFRM_SKB_CB(skb)->seq.input; + seq = XFRM_SKB_CB(skb)->seq.input.low; goto resume; } @@ -172,7 +173,7 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type) goto drop_unlock; } - if (x->props.replay_window && xfrm_replay_check(x, skb, seq)) { + if (x->props.replay_window && x->repl->check(x, skb, seq)) { XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATESEQERROR); goto drop_unlock; } @@ -184,7 +185,10 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type) spin_unlock(&x->lock); - XFRM_SKB_CB(skb)->seq.input = seq; + seq_hi = htonl(xfrm_replay_seqhi(x, seq)); + + XFRM_SKB_CB(skb)->seq.input.low = seq; + XFRM_SKB_CB(skb)->seq.input.hi = seq_hi; nexthdr = x->type->input(x, skb); @@ -206,8 +210,7 @@ resume: /* only the first xfrm gets the encap type */ encap_type = 0; - if (x->props.replay_window) - xfrm_replay_advance(x, seq); + x->repl->advance(x, seq); x->curlft.bytes += skb->len; x->curlft.packets++; diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c index 64f2ae1fdc15..1aba03f449cc 100644 --- a/net/xfrm/xfrm_output.c +++ b/net/xfrm/xfrm_output.c @@ -67,17 +67,10 @@ static int xfrm_output_one(struct sk_buff *skb, int err) goto error; } - if (x->type->flags & XFRM_TYPE_REPLAY_PROT) { - XFRM_SKB_CB(skb)->seq.output = ++x->replay.oseq; - if (unlikely(x->replay.oseq == 0)) { - XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTSTATESEQERROR); - x->replay.oseq--; - xfrm_audit_state_replay_overflow(x, skb); - err = -EOVERFLOW; - goto error; - } - if (xfrm_aevent_is_on(net)) - xfrm_replay_notify(x, XFRM_REPLAY_UPDATE); + err = x->repl->overflow(x, skb); + if (err) { + XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTSTATESEQERROR); + goto error; } x->curlft.bytes += skb->len; diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index b1932a629ef8..15792d8b6272 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -50,8 +50,7 @@ static struct xfrm_policy_afinfo *xfrm_policy_get_afinfo(unsigned short family); static void xfrm_policy_put_afinfo(struct xfrm_policy_afinfo *afinfo); static void xfrm_init_pmtu(struct dst_entry *dst); static int stale_bundle(struct dst_entry *dst); -static int xfrm_bundle_ok(struct xfrm_policy *pol, struct xfrm_dst *xdst, - const struct flowi *fl, int family); +static int xfrm_bundle_ok(struct xfrm_dst *xdst, int family); static struct xfrm_policy *__xfrm_policy_unlink(struct xfrm_policy *pol, @@ -60,23 +59,27 @@ static struct xfrm_policy *__xfrm_policy_unlink(struct xfrm_policy *pol, static inline int __xfrm4_selector_match(const struct xfrm_selector *sel, const struct flowi *fl) { - return addr_match(&fl->fl4_dst, &sel->daddr, sel->prefixlen_d) && - addr_match(&fl->fl4_src, &sel->saddr, sel->prefixlen_s) && - !((xfrm_flowi_dport(fl) ^ sel->dport) & sel->dport_mask) && - !((xfrm_flowi_sport(fl) ^ sel->sport) & sel->sport_mask) && - (fl->proto == sel->proto || !sel->proto) && - (fl->oif == sel->ifindex || !sel->ifindex); + const struct flowi4 *fl4 = &fl->u.ip4; + + return addr_match(&fl4->daddr, &sel->daddr, sel->prefixlen_d) && + addr_match(&fl4->saddr, &sel->saddr, sel->prefixlen_s) && + !((xfrm_flowi_dport(fl, &fl4->uli) ^ sel->dport) & sel->dport_mask) && + !((xfrm_flowi_sport(fl, &fl4->uli) ^ sel->sport) & sel->sport_mask) && + (fl4->flowi4_proto == sel->proto || !sel->proto) && + (fl4->flowi4_oif == sel->ifindex || !sel->ifindex); } static inline int __xfrm6_selector_match(const struct xfrm_selector *sel, const struct flowi *fl) { - return addr_match(&fl->fl6_dst, &sel->daddr, sel->prefixlen_d) && - addr_match(&fl->fl6_src, &sel->saddr, sel->prefixlen_s) && - !((xfrm_flowi_dport(fl) ^ sel->dport) & sel->dport_mask) && - !((xfrm_flowi_sport(fl) ^ sel->sport) & sel->sport_mask) && - (fl->proto == sel->proto || !sel->proto) && - (fl->oif == sel->ifindex || !sel->ifindex); + const struct flowi6 *fl6 = &fl->u.ip6; + + return addr_match(&fl6->daddr, &sel->daddr, sel->prefixlen_d) && + addr_match(&fl6->saddr, &sel->saddr, sel->prefixlen_s) && + !((xfrm_flowi_dport(fl, &fl6->uli) ^ sel->dport) & sel->dport_mask) && + !((xfrm_flowi_sport(fl, &fl6->uli) ^ sel->sport) & sel->sport_mask) && + (fl6->flowi6_proto == sel->proto || !sel->proto) && + (fl6->flowi6_oif == sel->ifindex || !sel->ifindex); } int xfrm_selector_match(const struct xfrm_selector *sel, const struct flowi *fl, @@ -877,13 +880,13 @@ static int xfrm_policy_match(const struct xfrm_policy *pol, int match, ret = -ESRCH; if (pol->family != family || - (fl->mark & pol->mark.m) != pol->mark.v || + (fl->flowi_mark & pol->mark.m) != pol->mark.v || pol->type != type) return ret; match = xfrm_selector_match(sel, fl, family); if (match) - ret = security_xfrm_policy_lookup(pol->security, fl->secid, + ret = security_xfrm_policy_lookup(pol->security, fl->flowi_secid, dir); return ret; @@ -1013,7 +1016,7 @@ static struct xfrm_policy *xfrm_sk_policy_lookup(struct sock *sk, int dir, goto out; } err = security_xfrm_policy_lookup(pol->security, - fl->secid, + fl->flowi_secid, policy_to_flow_dir(dir)); if (!err) xfrm_pol_hold(pol); @@ -1801,6 +1804,8 @@ restart: goto no_transform; } + dst_hold(&xdst->u.dst); + spin_lock_bh(&xfrm_policy_sk_bundle_lock); xdst->u.dst.next = xfrm_policy_sk_bundles; xfrm_policy_sk_bundles = &xdst->u.dst; @@ -1849,7 +1854,7 @@ restart: return make_blackhole(net, family, dst_orig); } - if (fl->flags & FLOWI_FLAG_CAN_SLEEP) { + if (fl->flowi_flags & FLOWI_FLAG_CAN_SLEEP) { DECLARE_WAITQUEUE(wait, current); add_wait_queue(&net->xfrm.km_waitq, &wait); @@ -1991,7 +1996,7 @@ int __xfrm_decode_session(struct sk_buff *skb, struct flowi *fl, return -EAFNOSUPPORT; afinfo->decode_session(skb, fl, reverse); - err = security_xfrm_decode_session(skb, &fl->secid); + err = security_xfrm_decode_session(skb, &fl->flowi_secid); xfrm_policy_put_afinfo(afinfo); return err; } @@ -2172,7 +2177,7 @@ int __xfrm_route_forward(struct sk_buff *skb, unsigned short family) struct net *net = dev_net(skb->dev); struct flowi fl; struct dst_entry *dst; - int res = 0; + int res = 1; if (xfrm_decode_session(skb, &fl, family) < 0) { XFRM_INC_STATS(net, LINUX_MIB_XFRMFWDHDRERROR); @@ -2183,7 +2188,7 @@ int __xfrm_route_forward(struct sk_buff *skb, unsigned short family) dst = xfrm_lookup(net, skb_dst(skb), &fl, NULL, 0); if (IS_ERR(dst)) { - res = 1; + res = 0; dst = NULL; } skb_dst_set(skb, dst); @@ -2223,7 +2228,7 @@ static struct dst_entry *xfrm_dst_check(struct dst_entry *dst, u32 cookie) static int stale_bundle(struct dst_entry *dst) { - return !xfrm_bundle_ok(NULL, (struct xfrm_dst *)dst, NULL, AF_UNSPEC); + return !xfrm_bundle_ok((struct xfrm_dst *)dst, AF_UNSPEC); } void xfrm_dst_ifdown(struct dst_entry *dst, struct net_device *dev) @@ -2295,8 +2300,7 @@ static void xfrm_init_pmtu(struct dst_entry *dst) * still valid. */ -static int xfrm_bundle_ok(struct xfrm_policy *pol, struct xfrm_dst *first, - const struct flowi *fl, int family) +static int xfrm_bundle_ok(struct xfrm_dst *first, int family) { struct dst_entry *dst = &first->u.dst; struct xfrm_dst *last; @@ -2305,26 +2309,12 @@ static int xfrm_bundle_ok(struct xfrm_policy *pol, struct xfrm_dst *first, if (!dst_check(dst->path, ((struct xfrm_dst *)dst)->path_cookie) || (dst->dev && !netif_running(dst->dev))) return 0; -#ifdef CONFIG_XFRM_SUB_POLICY - if (fl) { - if (first->origin && !flow_cache_uli_match(first->origin, fl)) - return 0; - if (first->partner && - !xfrm_selector_match(first->partner, fl, family)) - return 0; - } -#endif last = NULL; do { struct xfrm_dst *xdst = (struct xfrm_dst *)dst; - if (fl && !xfrm_selector_match(&dst->xfrm->sel, fl, family)) - return 0; - if (fl && pol && - !security_xfrm_state_pol_flow_match(dst->xfrm, pol, fl)) - return 0; if (dst->xfrm->km.state != XFRM_STATE_VALID) return 0; if (xdst->xfrm_genid != dst->xfrm->genid) diff --git a/net/xfrm/xfrm_replay.c b/net/xfrm/xfrm_replay.c new file mode 100644 index 000000000000..2f5be5b15740 --- /dev/null +++ b/net/xfrm/xfrm_replay.c @@ -0,0 +1,534 @@ +/* + * xfrm_replay.c - xfrm replay detection, derived from xfrm_state.c. + * + * Copyright (C) 2010 secunet Security Networks AG + * Copyright (C) 2010 Steffen Klassert <[email protected]> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#include <net/xfrm.h> + +u32 xfrm_replay_seqhi(struct xfrm_state *x, __be32 net_seq) +{ + u32 seq, seq_hi, bottom; + struct xfrm_replay_state_esn *replay_esn = x->replay_esn; + + if (!(x->props.flags & XFRM_STATE_ESN)) + return 0; + + seq = ntohl(net_seq); + seq_hi = replay_esn->seq_hi; + bottom = replay_esn->seq - replay_esn->replay_window + 1; + + if (likely(replay_esn->seq >= replay_esn->replay_window - 1)) { + /* A. same subspace */ + if (unlikely(seq < bottom)) + seq_hi++; + } else { + /* B. window spans two subspaces */ + if (unlikely(seq >= bottom)) + seq_hi--; + } + + return seq_hi; +} + +static void xfrm_replay_notify(struct xfrm_state *x, int event) +{ + struct km_event c; + /* we send notify messages in case + * 1. we updated on of the sequence numbers, and the seqno difference + * is at least x->replay_maxdiff, in this case we also update the + * timeout of our timer function + * 2. if x->replay_maxage has elapsed since last update, + * and there were changes + * + * The state structure must be locked! + */ + + switch (event) { + case XFRM_REPLAY_UPDATE: + if (x->replay_maxdiff && + (x->replay.seq - x->preplay.seq < x->replay_maxdiff) && + (x->replay.oseq - x->preplay.oseq < x->replay_maxdiff)) { + if (x->xflags & XFRM_TIME_DEFER) + event = XFRM_REPLAY_TIMEOUT; + else + return; + } + + break; + + case XFRM_REPLAY_TIMEOUT: + if (memcmp(&x->replay, &x->preplay, + sizeof(struct xfrm_replay_state)) == 0) { + x->xflags |= XFRM_TIME_DEFER; + return; + } + + break; + } + + memcpy(&x->preplay, &x->replay, sizeof(struct xfrm_replay_state)); + c.event = XFRM_MSG_NEWAE; + c.data.aevent = event; + km_state_notify(x, &c); + + if (x->replay_maxage && + !mod_timer(&x->rtimer, jiffies + x->replay_maxage)) + x->xflags &= ~XFRM_TIME_DEFER; +} + +static int xfrm_replay_overflow(struct xfrm_state *x, struct sk_buff *skb) +{ + int err = 0; + struct net *net = xs_net(x); + + if (x->type->flags & XFRM_TYPE_REPLAY_PROT) { + XFRM_SKB_CB(skb)->seq.output.low = ++x->replay.oseq; + if (unlikely(x->replay.oseq == 0)) { + x->replay.oseq--; + xfrm_audit_state_replay_overflow(x, skb); + err = -EOVERFLOW; + + return err; + } + if (xfrm_aevent_is_on(net)) + x->repl->notify(x, XFRM_REPLAY_UPDATE); + } + + return err; +} + +static int xfrm_replay_check(struct xfrm_state *x, + struct sk_buff *skb, __be32 net_seq) +{ + u32 diff; + u32 seq = ntohl(net_seq); + + if (unlikely(seq == 0)) + goto err; + + if (likely(seq > x->replay.seq)) + return 0; + + diff = x->replay.seq - seq; + if (diff >= min_t(unsigned int, x->props.replay_window, + sizeof(x->replay.bitmap) * 8)) { + x->stats.replay_window++; + goto err; + } + + if (x->replay.bitmap & (1U << diff)) { + x->stats.replay++; + goto err; + } + return 0; + +err: + xfrm_audit_state_replay(x, skb, net_seq); + return -EINVAL; +} + +static void xfrm_replay_advance(struct xfrm_state *x, __be32 net_seq) +{ + u32 diff; + u32 seq = ntohl(net_seq); + + if (!x->props.replay_window) + return; + + if (seq > x->replay.seq) { + diff = seq - x->replay.seq; + if (diff < x->props.replay_window) + x->replay.bitmap = ((x->replay.bitmap) << diff) | 1; + else + x->replay.bitmap = 1; + x->replay.seq = seq; + } else { + diff = x->replay.seq - seq; + x->replay.bitmap |= (1U << diff); + } + + if (xfrm_aevent_is_on(xs_net(x))) + xfrm_replay_notify(x, XFRM_REPLAY_UPDATE); +} + +static int xfrm_replay_overflow_bmp(struct xfrm_state *x, struct sk_buff *skb) +{ + int err = 0; + struct xfrm_replay_state_esn *replay_esn = x->replay_esn; + struct net *net = xs_net(x); + + if (x->type->flags & XFRM_TYPE_REPLAY_PROT) { + XFRM_SKB_CB(skb)->seq.output.low = ++replay_esn->oseq; + if (unlikely(replay_esn->oseq == 0)) { + replay_esn->oseq--; + xfrm_audit_state_replay_overflow(x, skb); + err = -EOVERFLOW; + + return err; + } + if (xfrm_aevent_is_on(net)) + x->repl->notify(x, XFRM_REPLAY_UPDATE); + } + + return err; +} + +static int xfrm_replay_check_bmp(struct xfrm_state *x, + struct sk_buff *skb, __be32 net_seq) +{ + unsigned int bitnr, nr; + struct xfrm_replay_state_esn *replay_esn = x->replay_esn; + u32 seq = ntohl(net_seq); + u32 diff = replay_esn->seq - seq; + u32 pos = (replay_esn->seq - 1) % replay_esn->replay_window; + + if (unlikely(seq == 0)) + goto err; + + if (likely(seq > replay_esn->seq)) + return 0; + + if (diff >= replay_esn->replay_window) { + x->stats.replay_window++; + goto err; + } + + if (pos >= diff) { + bitnr = (pos - diff) % replay_esn->replay_window; + nr = bitnr >> 5; + bitnr = bitnr & 0x1F; + if (replay_esn->bmp[nr] & (1U << bitnr)) + goto err_replay; + } else { + bitnr = replay_esn->replay_window - (diff - pos); + nr = bitnr >> 5; + bitnr = bitnr & 0x1F; + if (replay_esn->bmp[nr] & (1U << bitnr)) + goto err_replay; + } + return 0; + +err_replay: + x->stats.replay++; +err: + xfrm_audit_state_replay(x, skb, net_seq); + return -EINVAL; +} + +static void xfrm_replay_advance_bmp(struct xfrm_state *x, __be32 net_seq) +{ + unsigned int bitnr, nr, i; + u32 diff; + struct xfrm_replay_state_esn *replay_esn = x->replay_esn; + u32 seq = ntohl(net_seq); + u32 pos = (replay_esn->seq - 1) % replay_esn->replay_window; + + if (!replay_esn->replay_window) + return; + + if (seq > replay_esn->seq) { + diff = seq - replay_esn->seq; + + if (diff < replay_esn->replay_window) { + for (i = 1; i < diff; i++) { + bitnr = (pos + i) % replay_esn->replay_window; + nr = bitnr >> 5; + bitnr = bitnr & 0x1F; + replay_esn->bmp[nr] &= ~(1U << bitnr); + } + + bitnr = (pos + diff) % replay_esn->replay_window; + nr = bitnr >> 5; + bitnr = bitnr & 0x1F; + replay_esn->bmp[nr] |= (1U << bitnr); + } else { + nr = replay_esn->replay_window >> 5; + for (i = 0; i <= nr; i++) + replay_esn->bmp[i] = 0; + + bitnr = (pos + diff) % replay_esn->replay_window; + nr = bitnr >> 5; + bitnr = bitnr & 0x1F; + replay_esn->bmp[nr] |= (1U << bitnr); + } + + replay_esn->seq = seq; + } else { + diff = replay_esn->seq - seq; + + if (pos >= diff) { + bitnr = (pos - diff) % replay_esn->replay_window; + nr = bitnr >> 5; + bitnr = bitnr & 0x1F; + replay_esn->bmp[nr] |= (1U << bitnr); + } else { + bitnr = replay_esn->replay_window - (diff - pos); + nr = bitnr >> 5; + bitnr = bitnr & 0x1F; + replay_esn->bmp[nr] |= (1U << bitnr); + } + } + + if (xfrm_aevent_is_on(xs_net(x))) + xfrm_replay_notify(x, XFRM_REPLAY_UPDATE); +} + +static void xfrm_replay_notify_bmp(struct xfrm_state *x, int event) +{ + struct km_event c; + struct xfrm_replay_state_esn *replay_esn = x->replay_esn; + struct xfrm_replay_state_esn *preplay_esn = x->preplay_esn; + + /* we send notify messages in case + * 1. we updated on of the sequence numbers, and the seqno difference + * is at least x->replay_maxdiff, in this case we also update the + * timeout of our timer function + * 2. if x->replay_maxage has elapsed since last update, + * and there were changes + * + * The state structure must be locked! + */ + + switch (event) { + case XFRM_REPLAY_UPDATE: + if (x->replay_maxdiff && + (replay_esn->seq - preplay_esn->seq < x->replay_maxdiff) && + (replay_esn->oseq - preplay_esn->oseq < x->replay_maxdiff)) { + if (x->xflags & XFRM_TIME_DEFER) + event = XFRM_REPLAY_TIMEOUT; + else + return; + } + + break; + + case XFRM_REPLAY_TIMEOUT: + if (memcmp(x->replay_esn, x->preplay_esn, + xfrm_replay_state_esn_len(replay_esn)) == 0) { + x->xflags |= XFRM_TIME_DEFER; + return; + } + + break; + } + + memcpy(x->preplay_esn, x->replay_esn, + xfrm_replay_state_esn_len(replay_esn)); + c.event = XFRM_MSG_NEWAE; + c.data.aevent = event; + km_state_notify(x, &c); + + if (x->replay_maxage && + !mod_timer(&x->rtimer, jiffies + x->replay_maxage)) + x->xflags &= ~XFRM_TIME_DEFER; +} + +static int xfrm_replay_overflow_esn(struct xfrm_state *x, struct sk_buff *skb) +{ + int err = 0; + struct xfrm_replay_state_esn *replay_esn = x->replay_esn; + struct net *net = xs_net(x); + + if (x->type->flags & XFRM_TYPE_REPLAY_PROT) { + XFRM_SKB_CB(skb)->seq.output.low = ++replay_esn->oseq; + XFRM_SKB_CB(skb)->seq.output.hi = replay_esn->oseq_hi; + + if (unlikely(replay_esn->oseq == 0)) { + XFRM_SKB_CB(skb)->seq.output.hi = ++replay_esn->oseq_hi; + + if (replay_esn->oseq_hi == 0) { + replay_esn->oseq--; + replay_esn->oseq_hi--; + xfrm_audit_state_replay_overflow(x, skb); + err = -EOVERFLOW; + + return err; + } + } + if (xfrm_aevent_is_on(net)) + x->repl->notify(x, XFRM_REPLAY_UPDATE); + } + + return err; +} + +static int xfrm_replay_check_esn(struct xfrm_state *x, + struct sk_buff *skb, __be32 net_seq) +{ + unsigned int bitnr, nr; + u32 diff; + struct xfrm_replay_state_esn *replay_esn = x->replay_esn; + u32 seq = ntohl(net_seq); + u32 pos = (replay_esn->seq - 1) % replay_esn->replay_window; + u32 wsize = replay_esn->replay_window; + u32 top = replay_esn->seq; + u32 bottom = top - wsize + 1; + + if (unlikely(seq == 0 && replay_esn->seq_hi == 0 && + (replay_esn->seq < replay_esn->replay_window - 1))) + goto err; + + diff = top - seq; + + if (likely(top >= wsize - 1)) { + /* A. same subspace */ + if (likely(seq > top) || seq < bottom) + return 0; + } else { + /* B. window spans two subspaces */ + if (likely(seq > top && seq < bottom)) + return 0; + if (seq >= bottom) + diff = ~seq + top + 1; + } + + if (diff >= replay_esn->replay_window) { + x->stats.replay_window++; + goto err; + } + + if (pos >= diff) { + bitnr = (pos - diff) % replay_esn->replay_window; + nr = bitnr >> 5; + bitnr = bitnr & 0x1F; + if (replay_esn->bmp[nr] & (1U << bitnr)) + goto err_replay; + } else { + bitnr = replay_esn->replay_window - (diff - pos); + nr = bitnr >> 5; + bitnr = bitnr & 0x1F; + if (replay_esn->bmp[nr] & (1U << bitnr)) + goto err_replay; + } + return 0; + +err_replay: + x->stats.replay++; +err: + xfrm_audit_state_replay(x, skb, net_seq); + return -EINVAL; +} + +static void xfrm_replay_advance_esn(struct xfrm_state *x, __be32 net_seq) +{ + unsigned int bitnr, nr, i; + int wrap; + u32 diff, pos, seq, seq_hi; + struct xfrm_replay_state_esn *replay_esn = x->replay_esn; + + if (!replay_esn->replay_window) + return; + + seq = ntohl(net_seq); + pos = (replay_esn->seq - 1) % replay_esn->replay_window; + seq_hi = xfrm_replay_seqhi(x, net_seq); + wrap = seq_hi - replay_esn->seq_hi; + + if ((!wrap && seq > replay_esn->seq) || wrap > 0) { + if (likely(!wrap)) + diff = seq - replay_esn->seq; + else + diff = ~replay_esn->seq + seq + 1; + + if (diff < replay_esn->replay_window) { + for (i = 1; i < diff; i++) { + bitnr = (pos + i) % replay_esn->replay_window; + nr = bitnr >> 5; + bitnr = bitnr & 0x1F; + replay_esn->bmp[nr] &= ~(1U << bitnr); + } + + bitnr = (pos + diff) % replay_esn->replay_window; + nr = bitnr >> 5; + bitnr = bitnr & 0x1F; + replay_esn->bmp[nr] |= (1U << bitnr); + } else { + nr = replay_esn->replay_window >> 5; + for (i = 0; i <= nr; i++) + replay_esn->bmp[i] = 0; + + bitnr = (pos + diff) % replay_esn->replay_window; + nr = bitnr >> 5; + bitnr = bitnr & 0x1F; + replay_esn->bmp[nr] |= (1U << bitnr); + } + + replay_esn->seq = seq; + + if (unlikely(wrap > 0)) + replay_esn->seq_hi++; + } else { + diff = replay_esn->seq - seq; + + if (pos >= diff) { + bitnr = (pos - diff) % replay_esn->replay_window; + nr = bitnr >> 5; + bitnr = bitnr & 0x1F; + replay_esn->bmp[nr] |= (1U << bitnr); + } else { + bitnr = replay_esn->replay_window - (diff - pos); + nr = bitnr >> 5; + bitnr = bitnr & 0x1F; + replay_esn->bmp[nr] |= (1U << bitnr); + } + } + + if (xfrm_aevent_is_on(xs_net(x))) + xfrm_replay_notify(x, XFRM_REPLAY_UPDATE); +} + +static struct xfrm_replay xfrm_replay_legacy = { + .advance = xfrm_replay_advance, + .check = xfrm_replay_check, + .notify = xfrm_replay_notify, + .overflow = xfrm_replay_overflow, +}; + +static struct xfrm_replay xfrm_replay_bmp = { + .advance = xfrm_replay_advance_bmp, + .check = xfrm_replay_check_bmp, + .notify = xfrm_replay_notify_bmp, + .overflow = xfrm_replay_overflow_bmp, +}; + +static struct xfrm_replay xfrm_replay_esn = { + .advance = xfrm_replay_advance_esn, + .check = xfrm_replay_check_esn, + .notify = xfrm_replay_notify_bmp, + .overflow = xfrm_replay_overflow_esn, +}; + +int xfrm_init_replay(struct xfrm_state *x) +{ + struct xfrm_replay_state_esn *replay_esn = x->replay_esn; + + if (replay_esn) { + if (replay_esn->replay_window > + replay_esn->bmp_len * sizeof(__u32)) + return -EINVAL; + + if ((x->props.flags & XFRM_STATE_ESN) && x->replay_esn) + x->repl = &xfrm_replay_esn; + else + x->repl = &xfrm_replay_bmp; + } else + x->repl = &xfrm_replay_legacy; + + return 0; +} +EXPORT_SYMBOL(xfrm_init_replay); diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 81221d9cbf06..d575f0534868 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -42,13 +42,6 @@ static unsigned int xfrm_state_hashmax __read_mostly = 1 * 1024 * 1024; static struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned int family); static void xfrm_state_put_afinfo(struct xfrm_state_afinfo *afinfo); -#ifdef CONFIG_AUDITSYSCALL -static void xfrm_audit_state_replay(struct xfrm_state *x, - struct sk_buff *skb, __be32 net_seq); -#else -#define xfrm_audit_state_replay(x, s, sq) do { ; } while (0) -#endif /* CONFIG_AUDITSYSCALL */ - static inline unsigned int xfrm_dst_hash(struct net *net, const xfrm_address_t *daddr, const xfrm_address_t *saddr, @@ -363,6 +356,8 @@ static void xfrm_state_gc_destroy(struct xfrm_state *x) kfree(x->calg); kfree(x->encap); kfree(x->coaddr); + kfree(x->replay_esn); + kfree(x->preplay_esn); if (x->inner_mode) xfrm_put_mode(x->inner_mode); if (x->inner_mode_iaf) @@ -859,7 +854,7 @@ found: xfrm_init_tempstate(x, fl, tmpl, daddr, saddr, family); memcpy(&x->mark, &pol->mark, sizeof(x->mark)); - error = security_xfrm_state_alloc_acquire(x, pol->security, fl->secid); + error = security_xfrm_state_alloc_acquire(x, pol->security, fl->flowi_secid); if (error) { x->km.state = XFRM_STATE_DEAD; to_put = x; @@ -1619,54 +1614,6 @@ void xfrm_state_walk_done(struct xfrm_state_walk *walk) } EXPORT_SYMBOL(xfrm_state_walk_done); - -void xfrm_replay_notify(struct xfrm_state *x, int event) -{ - struct km_event c; - /* we send notify messages in case - * 1. we updated on of the sequence numbers, and the seqno difference - * is at least x->replay_maxdiff, in this case we also update the - * timeout of our timer function - * 2. if x->replay_maxage has elapsed since last update, - * and there were changes - * - * The state structure must be locked! - */ - - switch (event) { - case XFRM_REPLAY_UPDATE: - if (x->replay_maxdiff && - (x->replay.seq - x->preplay.seq < x->replay_maxdiff) && - (x->replay.oseq - x->preplay.oseq < x->replay_maxdiff)) { - if (x->xflags & XFRM_TIME_DEFER) - event = XFRM_REPLAY_TIMEOUT; - else - return; - } - - break; - - case XFRM_REPLAY_TIMEOUT: - if ((x->replay.seq == x->preplay.seq) && - (x->replay.bitmap == x->preplay.bitmap) && - (x->replay.oseq == x->preplay.oseq)) { - x->xflags |= XFRM_TIME_DEFER; - return; - } - - break; - } - - memcpy(&x->preplay, &x->replay, sizeof(struct xfrm_replay_state)); - c.event = XFRM_MSG_NEWAE; - c.data.aevent = event; - km_state_notify(x, &c); - - if (x->replay_maxage && - !mod_timer(&x->rtimer, jiffies + x->replay_maxage)) - x->xflags &= ~XFRM_TIME_DEFER; -} - static void xfrm_replay_timer_handler(unsigned long data) { struct xfrm_state *x = (struct xfrm_state*)data; @@ -1675,7 +1622,7 @@ static void xfrm_replay_timer_handler(unsigned long data) if (x->km.state == XFRM_STATE_VALID) { if (xfrm_aevent_is_on(xs_net(x))) - xfrm_replay_notify(x, XFRM_REPLAY_TIMEOUT); + x->repl->notify(x, XFRM_REPLAY_TIMEOUT); else x->xflags |= XFRM_TIME_DEFER; } @@ -1683,57 +1630,6 @@ static void xfrm_replay_timer_handler(unsigned long data) spin_unlock(&x->lock); } -int xfrm_replay_check(struct xfrm_state *x, - struct sk_buff *skb, __be32 net_seq) -{ - u32 diff; - u32 seq = ntohl(net_seq); - - if (unlikely(seq == 0)) - goto err; - - if (likely(seq > x->replay.seq)) - return 0; - - diff = x->replay.seq - seq; - if (diff >= min_t(unsigned int, x->props.replay_window, - sizeof(x->replay.bitmap) * 8)) { - x->stats.replay_window++; - goto err; - } - - if (x->replay.bitmap & (1U << diff)) { - x->stats.replay++; - goto err; - } - return 0; - -err: - xfrm_audit_state_replay(x, skb, net_seq); - return -EINVAL; -} - -void xfrm_replay_advance(struct xfrm_state *x, __be32 net_seq) -{ - u32 diff; - u32 seq = ntohl(net_seq); - - if (seq > x->replay.seq) { - diff = seq - x->replay.seq; - if (diff < x->props.replay_window) - x->replay.bitmap = ((x->replay.bitmap) << diff) | 1; - else - x->replay.bitmap = 1; - x->replay.seq = seq; - } else { - diff = x->replay.seq - seq; - x->replay.bitmap |= (1U << diff); - } - - if (xfrm_aevent_is_on(xs_net(x))) - xfrm_replay_notify(x, XFRM_REPLAY_UPDATE); -} - static LIST_HEAD(xfrm_km_list); static DEFINE_RWLOCK(xfrm_km_lock); @@ -2246,7 +2142,7 @@ void xfrm_audit_state_replay_overflow(struct xfrm_state *x, } EXPORT_SYMBOL_GPL(xfrm_audit_state_replay_overflow); -static void xfrm_audit_state_replay(struct xfrm_state *x, +void xfrm_audit_state_replay(struct xfrm_state *x, struct sk_buff *skb, __be32 net_seq) { struct audit_buffer *audit_buf; @@ -2261,6 +2157,7 @@ static void xfrm_audit_state_replay(struct xfrm_state *x, spi, spi, ntohl(net_seq)); audit_log_end(audit_buf); } +EXPORT_SYMBOL_GPL(xfrm_audit_state_replay); void xfrm_audit_state_notfound_simple(struct sk_buff *skb, u16 family) { diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 468ab60d3dc0..706385ae3e4b 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -119,6 +119,19 @@ static inline int verify_sec_ctx_len(struct nlattr **attrs) return 0; } +static inline int verify_replay(struct xfrm_usersa_info *p, + struct nlattr **attrs) +{ + struct nlattr *rt = attrs[XFRMA_REPLAY_ESN_VAL]; + + if (!rt) + return 0; + + if (p->replay_window != 0) + return -EINVAL; + + return 0; +} static int verify_newsa_info(struct xfrm_usersa_info *p, struct nlattr **attrs) @@ -214,6 +227,8 @@ static int verify_newsa_info(struct xfrm_usersa_info *p, goto out; if ((err = verify_sec_ctx_len(attrs))) goto out; + if ((err = verify_replay(p, attrs))) + goto out; err = -EINVAL; switch (p->mode) { @@ -345,6 +360,33 @@ static int attach_aead(struct xfrm_algo_aead **algpp, u8 *props, return 0; } +static int xfrm_alloc_replay_state_esn(struct xfrm_replay_state_esn **replay_esn, + struct xfrm_replay_state_esn **preplay_esn, + struct nlattr *rta) +{ + struct xfrm_replay_state_esn *p, *pp, *up; + + if (!rta) + return 0; + + up = nla_data(rta); + + p = kmemdup(up, xfrm_replay_state_esn_len(up), GFP_KERNEL); + if (!p) + return -ENOMEM; + + pp = kmemdup(up, xfrm_replay_state_esn_len(up), GFP_KERNEL); + if (!pp) { + kfree(p); + return -ENOMEM; + } + + *replay_esn = p; + *preplay_esn = pp; + + return 0; +} + static inline int xfrm_user_sec_ctx_size(struct xfrm_sec_ctx *xfrm_ctx) { int len = 0; @@ -380,10 +422,20 @@ static void copy_from_user_state(struct xfrm_state *x, struct xfrm_usersa_info * static void xfrm_update_ae_params(struct xfrm_state *x, struct nlattr **attrs) { struct nlattr *rp = attrs[XFRMA_REPLAY_VAL]; + struct nlattr *re = attrs[XFRMA_REPLAY_ESN_VAL]; struct nlattr *lt = attrs[XFRMA_LTIME_VAL]; struct nlattr *et = attrs[XFRMA_ETIMER_THRESH]; struct nlattr *rt = attrs[XFRMA_REPLAY_THRESH]; + if (re) { + struct xfrm_replay_state_esn *replay_esn; + replay_esn = nla_data(re); + memcpy(x->replay_esn, replay_esn, + xfrm_replay_state_esn_len(replay_esn)); + memcpy(x->preplay_esn, replay_esn, + xfrm_replay_state_esn_len(replay_esn)); + } + if (rp) { struct xfrm_replay_state *replay; replay = nla_data(rp); @@ -467,16 +519,19 @@ static struct xfrm_state *xfrm_state_construct(struct net *net, security_xfrm_state_alloc(x, nla_data(attrs[XFRMA_SEC_CTX]))) goto error; + if ((err = xfrm_alloc_replay_state_esn(&x->replay_esn, &x->preplay_esn, + attrs[XFRMA_REPLAY_ESN_VAL]))) + goto error; + x->km.seq = p->seq; x->replay_maxdiff = net->xfrm.sysctl_aevent_rseqth; /* sysctl_xfrm_aevent_etime is in 100ms units */ x->replay_maxage = (net->xfrm.sysctl_aevent_etime*HZ)/XFRM_AE_ETH_M; - x->preplay.bitmap = 0; - x->preplay.seq = x->replay.seq+x->replay_maxdiff; - x->preplay.oseq = x->replay.oseq +x->replay_maxdiff; - /* override default values from above */ + if ((err = xfrm_init_replay(x))) + goto error; + /* override default values from above */ xfrm_update_ae_params(x, attrs); return x; @@ -707,6 +762,10 @@ static int copy_to_user_state_extra(struct xfrm_state *x, if (xfrm_mark_put(skb, &x->mark)) goto nla_put_failure; + if (x->replay_esn) + NLA_PUT(skb, XFRMA_REPLAY_ESN_VAL, + xfrm_replay_state_esn_len(x->replay_esn), x->replay_esn); + if (x->security && copy_sec_ctx(x->security, skb) < 0) goto nla_put_failure; @@ -1576,10 +1635,14 @@ static int xfrm_flush_sa(struct sk_buff *skb, struct nlmsghdr *nlh, return 0; } -static inline size_t xfrm_aevent_msgsize(void) +static inline size_t xfrm_aevent_msgsize(struct xfrm_state *x) { + size_t replay_size = x->replay_esn ? + xfrm_replay_state_esn_len(x->replay_esn) : + sizeof(struct xfrm_replay_state); + return NLMSG_ALIGN(sizeof(struct xfrm_aevent_id)) - + nla_total_size(sizeof(struct xfrm_replay_state)) + + nla_total_size(replay_size) + nla_total_size(sizeof(struct xfrm_lifetime_cur)) + nla_total_size(sizeof(struct xfrm_mark)) + nla_total_size(4) /* XFRM_AE_RTHR */ @@ -1604,7 +1667,13 @@ static int build_aevent(struct sk_buff *skb, struct xfrm_state *x, const struct id->reqid = x->props.reqid; id->flags = c->data.aevent; - NLA_PUT(skb, XFRMA_REPLAY_VAL, sizeof(x->replay), &x->replay); + if (x->replay_esn) + NLA_PUT(skb, XFRMA_REPLAY_ESN_VAL, + xfrm_replay_state_esn_len(x->replay_esn), + x->replay_esn); + else + NLA_PUT(skb, XFRMA_REPLAY_VAL, sizeof(x->replay), &x->replay); + NLA_PUT(skb, XFRMA_LTIME_VAL, sizeof(x->curlft), &x->curlft); if (id->flags & XFRM_AE_RTHR) @@ -1637,16 +1706,16 @@ static int xfrm_get_ae(struct sk_buff *skb, struct nlmsghdr *nlh, struct xfrm_aevent_id *p = nlmsg_data(nlh); struct xfrm_usersa_id *id = &p->sa_id; - r_skb = nlmsg_new(xfrm_aevent_msgsize(), GFP_ATOMIC); - if (r_skb == NULL) - return -ENOMEM; - mark = xfrm_mark_get(attrs, &m); x = xfrm_state_lookup(net, mark, &id->daddr, id->spi, id->proto, id->family); - if (x == NULL) { - kfree_skb(r_skb); + if (x == NULL) return -ESRCH; + + r_skb = nlmsg_new(xfrm_aevent_msgsize(x), GFP_ATOMIC); + if (r_skb == NULL) { + xfrm_state_put(x); + return -ENOMEM; } /* @@ -1678,9 +1747,10 @@ static int xfrm_new_ae(struct sk_buff *skb, struct nlmsghdr *nlh, struct xfrm_mark m; struct xfrm_aevent_id *p = nlmsg_data(nlh); struct nlattr *rp = attrs[XFRMA_REPLAY_VAL]; + struct nlattr *re = attrs[XFRMA_REPLAY_ESN_VAL]; struct nlattr *lt = attrs[XFRMA_LTIME_VAL]; - if (!lt && !rp) + if (!lt && !rp && !re) return err; /* pedantic mode - thou shalt sayeth replaceth */ @@ -2145,6 +2215,7 @@ static const struct nla_policy xfrma_policy[XFRMA_MAX+1] = { [XFRMA_KMADDRESS] = { .len = sizeof(struct xfrm_user_kmaddress) }, [XFRMA_MARK] = { .len = sizeof(struct xfrm_mark) }, [XFRMA_TFCPAD] = { .type = NLA_U32 }, + [XFRMA_REPLAY_ESN_VAL] = { .len = sizeof(struct xfrm_replay_state_esn) }, }; static struct xfrm_link { @@ -2272,7 +2343,7 @@ static int xfrm_aevent_state_notify(struct xfrm_state *x, const struct km_event struct net *net = xs_net(x); struct sk_buff *skb; - skb = nlmsg_new(xfrm_aevent_msgsize(), GFP_ATOMIC); + skb = nlmsg_new(xfrm_aevent_msgsize(x), GFP_ATOMIC); if (skb == NULL) return -ENOMEM; @@ -2326,6 +2397,8 @@ static inline size_t xfrm_sa_len(struct xfrm_state *x) l += nla_total_size(sizeof(*x->encap)); if (x->tfcpad) l += nla_total_size(sizeof(x->tfcpad)); + if (x->replay_esn) + l += nla_total_size(xfrm_replay_state_esn_len(x->replay_esn)); if (x->security) l += nla_total_size(sizeof(struct xfrm_user_sec_ctx) + x->security->ctx_len); |