diff options
Diffstat (limited to 'net')
91 files changed, 1645 insertions, 1128 deletions
diff --git a/net/9p/client.c b/net/9p/client.c index 0505a03c374c..854ca7a911c4 100644 --- a/net/9p/client.c +++ b/net/9p/client.c @@ -38,6 +38,9 @@ #include <net/9p/transport.h> #include "protocol.h" +#define CREATE_TRACE_POINTS +#include <trace/events/9p.h> + /* * Client Option Parsing (code inspired by NFS code) * - a little lazy - parse all client options @@ -123,21 +126,19 @@ static int parse_opts(char *opts, struct p9_client *clnt) options = tmp_options; while ((p = strsep(&options, ",")) != NULL) { - int token; + int token, r; if (!*p) continue; token = match_token(p, tokens, args); - if (token < Opt_trans) { - int r = match_int(&args[0], &option); + switch (token) { + case Opt_msize: + r = match_int(&args[0], &option); if (r < 0) { P9_DPRINTK(P9_DEBUG_ERROR, - "integer field, but no integer?\n"); + "integer field, but no integer?\n"); ret = r; continue; } - } - switch (token) { - case Opt_msize: clnt->msize = option; break; case Opt_trans: @@ -203,11 +204,13 @@ free_and_return: * */ -static struct p9_req_t *p9_tag_alloc(struct p9_client *c, u16 tag) +static struct p9_req_t * +p9_tag_alloc(struct p9_client *c, u16 tag, unsigned int max_size) { unsigned long flags; int row, col; struct p9_req_t *req; + int alloc_msize = min(c->msize, max_size); /* This looks up the original request by tag so we know which * buffer to read the data into */ @@ -245,23 +248,10 @@ static struct p9_req_t *p9_tag_alloc(struct p9_client *c, u16 tag) return ERR_PTR(-ENOMEM); } init_waitqueue_head(req->wq); - if ((c->trans_mod->pref & P9_TRANS_PREF_PAYLOAD_MASK) == - P9_TRANS_PREF_PAYLOAD_SEP) { - int alloc_msize = min(c->msize, 4096); - req->tc = kmalloc(sizeof(struct p9_fcall)+alloc_msize, - GFP_NOFS); - req->tc->capacity = alloc_msize; - req->rc = kmalloc(sizeof(struct p9_fcall)+alloc_msize, - GFP_NOFS); - req->rc->capacity = alloc_msize; - } else { - req->tc = kmalloc(sizeof(struct p9_fcall)+c->msize, - GFP_NOFS); - req->tc->capacity = c->msize; - req->rc = kmalloc(sizeof(struct p9_fcall)+c->msize, - GFP_NOFS); - req->rc->capacity = c->msize; - } + req->tc = kmalloc(sizeof(struct p9_fcall) + alloc_msize, + GFP_NOFS); + req->rc = kmalloc(sizeof(struct p9_fcall) + alloc_msize, + GFP_NOFS); if ((!req->tc) || (!req->rc)) { printk(KERN_ERR "Couldn't grow tag array\n"); kfree(req->tc); @@ -271,6 +261,8 @@ static struct p9_req_t *p9_tag_alloc(struct p9_client *c, u16 tag) req->wq = NULL; return ERR_PTR(-ENOMEM); } + req->tc->capacity = alloc_msize; + req->rc->capacity = alloc_msize; req->tc->sdata = (char *) req->tc + sizeof(struct p9_fcall); req->rc->sdata = (char *) req->rc + sizeof(struct p9_fcall); } @@ -475,37 +467,22 @@ static int p9_check_errors(struct p9_client *c, struct p9_req_t *req) int ecode; err = p9_parse_header(req->rc, NULL, &type, NULL, 0); + /* + * dump the response from server + * This should be after check errors which poplulate pdu_fcall. + */ + trace_9p_protocol_dump(c, req->rc); if (err) { P9_DPRINTK(P9_DEBUG_ERROR, "couldn't parse header %d\n", err); return err; } - if (type != P9_RERROR && type != P9_RLERROR) return 0; if (!p9_is_proto_dotl(c)) { char *ename; - - if (req->tc->pbuf_size) { - /* Handle user buffers */ - size_t len = req->rc->size - req->rc->offset; - if (req->tc->pubuf) { - /* User Buffer */ - err = copy_from_user( - &req->rc->sdata[req->rc->offset], - req->tc->pubuf, len); - if (err) { - err = -EFAULT; - goto out_err; - } - } else { - /* Kernel Buffer */ - memmove(&req->rc->sdata[req->rc->offset], - req->tc->pkbuf, len); - } - } err = p9pdu_readf(req->rc, c->proto_version, "s?d", - &ename, &ecode); + &ename, &ecode); if (err) goto out_err; @@ -515,11 +492,10 @@ static int p9_check_errors(struct p9_client *c, struct p9_req_t *req) if (!err || !IS_ERR_VALUE(err)) { err = p9_errstr2errno(ename, strlen(ename)); - P9_DPRINTK(P9_DEBUG_9P, "<<< RERROR (%d) %s\n", -ecode, - ename); - - kfree(ename); + P9_DPRINTK(P9_DEBUG_9P, "<<< RERROR (%d) %s\n", + -ecode, ename); } + kfree(ename); } else { err = p9pdu_readf(req->rc, c->proto_version, "d", &ecode); err = -ecode; @@ -527,7 +503,6 @@ static int p9_check_errors(struct p9_client *c, struct p9_req_t *req) P9_DPRINTK(P9_DEBUG_9P, "<<< RLERROR (%d)\n", -ecode); } - return err; out_err: @@ -536,6 +511,115 @@ out_err: return err; } +/** + * p9_check_zc_errors - check 9p packet for error return and process it + * @c: current client instance + * @req: request to parse and check for error conditions + * @in_hdrlen: Size of response protocol buffer. + * + * returns error code if one is discovered, otherwise returns 0 + * + * this will have to be more complicated if we have multiple + * error packet types + */ + +static int p9_check_zc_errors(struct p9_client *c, struct p9_req_t *req, + char *uidata, int in_hdrlen, int kern_buf) +{ + int err; + int ecode; + int8_t type; + char *ename = NULL; + + err = p9_parse_header(req->rc, NULL, &type, NULL, 0); + /* + * dump the response from server + * This should be after parse_header which poplulate pdu_fcall. + */ + trace_9p_protocol_dump(c, req->rc); + if (err) { + P9_DPRINTK(P9_DEBUG_ERROR, "couldn't parse header %d\n", err); + return err; + } + + if (type != P9_RERROR && type != P9_RLERROR) + return 0; + + if (!p9_is_proto_dotl(c)) { + /* Error is reported in string format */ + uint16_t len; + /* 7 = header size for RERROR, 2 is the size of string len; */ + int inline_len = in_hdrlen - (7 + 2); + + /* Read the size of error string */ + err = p9pdu_readf(req->rc, c->proto_version, "w", &len); + if (err) + goto out_err; + + ename = kmalloc(len + 1, GFP_NOFS); + if (!ename) { + err = -ENOMEM; + goto out_err; + } + if (len <= inline_len) { + /* We have error in protocol buffer itself */ + if (pdu_read(req->rc, ename, len)) { + err = -EFAULT; + goto out_free; + + } + } else { + /* + * Part of the data is in user space buffer. + */ + if (pdu_read(req->rc, ename, inline_len)) { + err = -EFAULT; + goto out_free; + + } + if (kern_buf) { + memcpy(ename + inline_len, uidata, + len - inline_len); + } else { + err = copy_from_user(ename + inline_len, + uidata, len - inline_len); + if (err) { + err = -EFAULT; + goto out_free; + } + } + } + ename[len] = 0; + if (p9_is_proto_dotu(c)) { + /* For dotu we also have error code */ + err = p9pdu_readf(req->rc, + c->proto_version, "d", &ecode); + if (err) + goto out_free; + err = -ecode; + } + if (!err || !IS_ERR_VALUE(err)) { + err = p9_errstr2errno(ename, strlen(ename)); + + P9_DPRINTK(P9_DEBUG_9P, "<<< RERROR (%d) %s\n", + -ecode, ename); + } + kfree(ename); + } else { + err = p9pdu_readf(req->rc, c->proto_version, "d", &ecode); + err = -ecode; + + P9_DPRINTK(P9_DEBUG_9P, "<<< RLERROR (%d)\n", -ecode); + } + return err; + +out_free: + kfree(ename); +out_err: + P9_DPRINTK(P9_DEBUG_ERROR, "couldn't parse error%d\n", err); + return err; +} + static struct p9_req_t * p9_client_rpc(struct p9_client *c, int8_t type, const char *fmt, ...); @@ -579,23 +663,12 @@ static int p9_client_flush(struct p9_client *c, struct p9_req_t *oldreq) return 0; } -/** - * p9_client_rpc - issue a request and wait for a response - * @c: client session - * @type: type of request - * @fmt: protocol format string (see protocol.c) - * - * Returns request structure (which client must free using p9_free_req) - */ - -static struct p9_req_t * -p9_client_rpc(struct p9_client *c, int8_t type, const char *fmt, ...) +static struct p9_req_t *p9_client_prepare_req(struct p9_client *c, + int8_t type, int req_size, + const char *fmt, va_list ap) { - va_list ap; int tag, err; struct p9_req_t *req; - unsigned long flags; - int sigpending; P9_DPRINTK(P9_DEBUG_MUX, "client %p op %d\n", c, type); @@ -607,12 +680,6 @@ p9_client_rpc(struct p9_client *c, int8_t type, const char *fmt, ...) if ((c->status == BeginDisconnect) && (type != P9_TCLUNK)) return ERR_PTR(-EIO); - if (signal_pending(current)) { - sigpending = 1; - clear_thread_flag(TIF_SIGPENDING); - } else - sigpending = 0; - tag = P9_NOTAG; if (type != P9_TVERSION) { tag = p9_idpool_get(c->tagpool); @@ -620,18 +687,51 @@ p9_client_rpc(struct p9_client *c, int8_t type, const char *fmt, ...) return ERR_PTR(-ENOMEM); } - req = p9_tag_alloc(c, tag); + req = p9_tag_alloc(c, tag, req_size); if (IS_ERR(req)) return req; /* marshall the data */ p9pdu_prepare(req->tc, tag, type); - va_start(ap, fmt); err = p9pdu_vwritef(req->tc, c->proto_version, fmt, ap); - va_end(ap); if (err) goto reterr; - p9pdu_finalize(req->tc); + p9pdu_finalize(c, req->tc); + trace_9p_client_req(c, type, tag); + return req; +reterr: + p9_free_req(c, req); + return ERR_PTR(err); +} + +/** + * p9_client_rpc - issue a request and wait for a response + * @c: client session + * @type: type of request + * @fmt: protocol format string (see protocol.c) + * + * Returns request structure (which client must free using p9_free_req) + */ + +static struct p9_req_t * +p9_client_rpc(struct p9_client *c, int8_t type, const char *fmt, ...) +{ + va_list ap; + int sigpending, err; + unsigned long flags; + struct p9_req_t *req; + + va_start(ap, fmt); + req = p9_client_prepare_req(c, type, c->msize, fmt, ap); + va_end(ap); + if (IS_ERR(req)) + return req; + + if (signal_pending(current)) { + sigpending = 1; + clear_thread_flag(TIF_SIGPENDING); + } else + sigpending = 0; err = c->trans_mod->request(c, req); if (err < 0) { @@ -639,18 +739,14 @@ p9_client_rpc(struct p9_client *c, int8_t type, const char *fmt, ...) c->status = Disconnected; goto reterr; } - - P9_DPRINTK(P9_DEBUG_MUX, "wait %p tag: %d\n", req->wq, tag); + /* Wait for the response */ err = wait_event_interruptible(*req->wq, - req->status >= REQ_STATUS_RCVD); - P9_DPRINTK(P9_DEBUG_MUX, "wait %p tag: %d returned %d\n", - req->wq, tag, err); + req->status >= REQ_STATUS_RCVD); if (req->status == REQ_STATUS_ERROR) { P9_DPRINTK(P9_DEBUG_ERROR, "req_status error %d\n", req->t_err); err = req->t_err; } - if ((err == -ERESTARTSYS) && (c->status == Connected)) { P9_DPRINTK(P9_DEBUG_MUX, "flushing\n"); sigpending = 1; @@ -663,25 +759,102 @@ p9_client_rpc(struct p9_client *c, int8_t type, const char *fmt, ...) if (req->status == REQ_STATUS_RCVD) err = 0; } - if (sigpending) { spin_lock_irqsave(¤t->sighand->siglock, flags); recalc_sigpending(); spin_unlock_irqrestore(¤t->sighand->siglock, flags); } - if (err < 0) goto reterr; err = p9_check_errors(c, req); - if (!err) { - P9_DPRINTK(P9_DEBUG_MUX, "exit: client %p op %d\n", c, type); + trace_9p_client_res(c, type, req->rc->tag, err); + if (!err) + return req; +reterr: + p9_free_req(c, req); + return ERR_PTR(err); +} + +/** + * p9_client_zc_rpc - issue a request and wait for a response + * @c: client session + * @type: type of request + * @uidata: user bffer that should be ued for zero copy read + * @uodata: user buffer that shoud be user for zero copy write + * @inlen: read buffer size + * @olen: write buffer size + * @hdrlen: reader header size, This is the size of response protocol data + * @fmt: protocol format string (see protocol.c) + * + * Returns request structure (which client must free using p9_free_req) + */ +static struct p9_req_t *p9_client_zc_rpc(struct p9_client *c, int8_t type, + char *uidata, char *uodata, + int inlen, int olen, int in_hdrlen, + int kern_buf, const char *fmt, ...) +{ + va_list ap; + int sigpending, err; + unsigned long flags; + struct p9_req_t *req; + + va_start(ap, fmt); + /* + * We allocate a inline protocol data of only 4k bytes. + * The actual content is passed in zero-copy fashion. + */ + req = p9_client_prepare_req(c, type, P9_ZC_HDR_SZ, fmt, ap); + va_end(ap); + if (IS_ERR(req)) return req; + + if (signal_pending(current)) { + sigpending = 1; + clear_thread_flag(TIF_SIGPENDING); + } else + sigpending = 0; + + /* If we are called with KERNEL_DS force kern_buf */ + if (segment_eq(get_fs(), KERNEL_DS)) + kern_buf = 1; + + err = c->trans_mod->zc_request(c, req, uidata, uodata, + inlen, olen, in_hdrlen, kern_buf); + if (err < 0) { + if (err == -EIO) + c->status = Disconnected; + goto reterr; + } + if (req->status == REQ_STATUS_ERROR) { + P9_DPRINTK(P9_DEBUG_ERROR, "req_status error %d\n", req->t_err); + err = req->t_err; + } + if ((err == -ERESTARTSYS) && (c->status == Connected)) { + P9_DPRINTK(P9_DEBUG_MUX, "flushing\n"); + sigpending = 1; + clear_thread_flag(TIF_SIGPENDING); + + if (c->trans_mod->cancel(c, req)) + p9_client_flush(c, req); + + /* if we received the response anyway, don't signal error */ + if (req->status == REQ_STATUS_RCVD) + err = 0; + } + if (sigpending) { + spin_lock_irqsave(¤t->sighand->siglock, flags); + recalc_sigpending(); + spin_unlock_irqrestore(¤t->sighand->siglock, flags); } + if (err < 0) + goto reterr; + err = p9_check_zc_errors(c, req, uidata, in_hdrlen, kern_buf); + trace_9p_client_res(c, type, req->rc->tag, err); + if (!err) + return req; reterr: - P9_DPRINTK(P9_DEBUG_MUX, "exit: client %p op %d error: %d\n", c, type, - err); p9_free_req(c, req); return ERR_PTR(err); } @@ -769,7 +942,7 @@ static int p9_client_version(struct p9_client *c) err = p9pdu_readf(req->rc, c->proto_version, "ds", &msize, &version); if (err) { P9_DPRINTK(P9_DEBUG_9P, "version error %d\n", err); - P9_DUMP_PKT(1, req->rc); + trace_9p_protocol_dump(c, req->rc); goto error; } @@ -906,15 +1079,14 @@ EXPORT_SYMBOL(p9_client_begin_disconnect); struct p9_fid *p9_client_attach(struct p9_client *clnt, struct p9_fid *afid, char *uname, u32 n_uname, char *aname) { - int err; + int err = 0; struct p9_req_t *req; struct p9_fid *fid; struct p9_qid qid; - P9_DPRINTK(P9_DEBUG_9P, ">>> TATTACH afid %d uname %s aname %s\n", - afid ? afid->fid : -1, uname, aname); - err = 0; + P9_DPRINTK(P9_DEBUG_9P, ">>> TATTACH afid %d uname %s aname %s\n", + afid ? afid->fid : -1, uname, aname); fid = p9_fid_create(clnt); if (IS_ERR(fid)) { err = PTR_ERR(fid); @@ -931,7 +1103,7 @@ struct p9_fid *p9_client_attach(struct p9_client *clnt, struct p9_fid *afid, err = p9pdu_readf(req->rc, clnt->proto_version, "Q", &qid); if (err) { - P9_DUMP_PKT(1, req->rc); + trace_9p_protocol_dump(clnt, req->rc); p9_free_req(clnt, req); goto error; } @@ -991,7 +1163,7 @@ struct p9_fid *p9_client_walk(struct p9_fid *oldfid, uint16_t nwname, err = p9pdu_readf(req->rc, clnt->proto_version, "R", &nwqids, &wqids); if (err) { - P9_DUMP_PKT(1, req->rc); + trace_9p_protocol_dump(clnt, req->rc); p9_free_req(clnt, req); goto clunk_fid; } @@ -1058,7 +1230,7 @@ int p9_client_open(struct p9_fid *fid, int mode) err = p9pdu_readf(req->rc, clnt->proto_version, "Qd", &qid, &iounit); if (err) { - P9_DUMP_PKT(1, req->rc); + trace_9p_protocol_dump(clnt, req->rc); goto free_and_error; } @@ -1101,7 +1273,7 @@ int p9_client_create_dotl(struct p9_fid *ofid, char *name, u32 flags, u32 mode, err = p9pdu_readf(req->rc, clnt->proto_version, "Qd", qid, &iounit); if (err) { - P9_DUMP_PKT(1, req->rc); + trace_9p_protocol_dump(clnt, req->rc); goto free_and_error; } @@ -1146,7 +1318,7 @@ int p9_client_fcreate(struct p9_fid *fid, char *name, u32 perm, int mode, err = p9pdu_readf(req->rc, clnt->proto_version, "Qd", &qid, &iounit); if (err) { - P9_DUMP_PKT(1, req->rc); + trace_9p_protocol_dump(clnt, req->rc); goto free_and_error; } @@ -1185,7 +1357,7 @@ int p9_client_symlink(struct p9_fid *dfid, char *name, char *symtgt, gid_t gid, err = p9pdu_readf(req->rc, clnt->proto_version, "Q", qid); if (err) { - P9_DUMP_PKT(1, req->rc); + trace_9p_protocol_dump(clnt, req->rc); goto free_and_error; } @@ -1330,13 +1502,15 @@ int p9_client_read(struct p9_fid *fid, char *data, char __user *udata, u64 offset, u32 count) { - int err, rsize; - struct p9_client *clnt; - struct p9_req_t *req; char *dataptr; + int kernel_buf = 0; + struct p9_req_t *req; + struct p9_client *clnt; + int err, rsize, non_zc = 0; + - P9_DPRINTK(P9_DEBUG_9P, ">>> TREAD fid %d offset %llu %d\n", fid->fid, - (long long unsigned) offset, count); + P9_DPRINTK(P9_DEBUG_9P, ">>> TREAD fid %d offset %llu %d\n", + fid->fid, (long long unsigned) offset, count); err = 0; clnt = fid->clnt; @@ -1348,13 +1522,24 @@ p9_client_read(struct p9_fid *fid, char *data, char __user *udata, u64 offset, rsize = count; /* Don't bother zerocopy for small IO (< 1024) */ - if (((clnt->trans_mod->pref & P9_TRANS_PREF_PAYLOAD_MASK) == - P9_TRANS_PREF_PAYLOAD_SEP) && (rsize > 1024)) { - req = p9_client_rpc(clnt, P9_TREAD, "dqE", fid->fid, offset, - rsize, data, udata); + if (clnt->trans_mod->zc_request && rsize > 1024) { + char *indata; + if (data) { + kernel_buf = 1; + indata = data; + } else + indata = (char *)udata; + /* + * response header len is 11 + * PDU Header(7) + IO Size (4) + */ + req = p9_client_zc_rpc(clnt, P9_TREAD, indata, NULL, rsize, 0, + 11, kernel_buf, "dqd", fid->fid, + offset, rsize); } else { + non_zc = 1; req = p9_client_rpc(clnt, P9_TREAD, "dqd", fid->fid, offset, - rsize); + rsize); } if (IS_ERR(req)) { err = PTR_ERR(req); @@ -1363,14 +1548,13 @@ p9_client_read(struct p9_fid *fid, char *data, char __user *udata, u64 offset, err = p9pdu_readf(req->rc, clnt->proto_version, "D", &count, &dataptr); if (err) { - P9_DUMP_PKT(1, req->rc); + trace_9p_protocol_dump(clnt, req->rc); goto free_and_error; } P9_DPRINTK(P9_DEBUG_9P, "<<< RREAD count %d\n", count); - P9_DUMP_PKT(1, req->rc); - if (!req->tc->pbuf_size) { + if (non_zc) { if (data) { memmove(data, dataptr, count); } else { @@ -1396,6 +1580,7 @@ p9_client_write(struct p9_fid *fid, char *data, const char __user *udata, u64 offset, u32 count) { int err, rsize; + int kernel_buf = 0; struct p9_client *clnt; struct p9_req_t *req; @@ -1411,19 +1596,24 @@ p9_client_write(struct p9_fid *fid, char *data, const char __user *udata, if (count < rsize) rsize = count; - /* Don't bother zerocopy form small IO (< 1024) */ - if (((clnt->trans_mod->pref & P9_TRANS_PREF_PAYLOAD_MASK) == - P9_TRANS_PREF_PAYLOAD_SEP) && (rsize > 1024)) { - req = p9_client_rpc(clnt, P9_TWRITE, "dqE", fid->fid, offset, - rsize, data, udata); + /* Don't bother zerocopy for small IO (< 1024) */ + if (clnt->trans_mod->zc_request && rsize > 1024) { + char *odata; + if (data) { + kernel_buf = 1; + odata = data; + } else + odata = (char *)udata; + req = p9_client_zc_rpc(clnt, P9_TWRITE, NULL, odata, 0, rsize, + P9_ZC_HDR_SZ, kernel_buf, "dqd", + fid->fid, offset, rsize); } else { - if (data) req = p9_client_rpc(clnt, P9_TWRITE, "dqD", fid->fid, - offset, rsize, data); + offset, rsize, data); else req = p9_client_rpc(clnt, P9_TWRITE, "dqU", fid->fid, - offset, rsize, udata); + offset, rsize, udata); } if (IS_ERR(req)) { err = PTR_ERR(req); @@ -1432,7 +1622,7 @@ p9_client_write(struct p9_fid *fid, char *data, const char __user *udata, err = p9pdu_readf(req->rc, clnt->proto_version, "d", &count); if (err) { - P9_DUMP_PKT(1, req->rc); + trace_9p_protocol_dump(clnt, req->rc); goto free_and_error; } @@ -1472,7 +1662,7 @@ struct p9_wstat *p9_client_stat(struct p9_fid *fid) err = p9pdu_readf(req->rc, clnt->proto_version, "wS", &ignored, ret); if (err) { - P9_DUMP_PKT(1, req->rc); + trace_9p_protocol_dump(clnt, req->rc); p9_free_req(clnt, req); goto error; } @@ -1523,7 +1713,7 @@ struct p9_stat_dotl *p9_client_getattr_dotl(struct p9_fid *fid, err = p9pdu_readf(req->rc, clnt->proto_version, "A", ret); if (err) { - P9_DUMP_PKT(1, req->rc); + trace_9p_protocol_dump(clnt, req->rc); p9_free_req(clnt, req); goto error; } @@ -1671,7 +1861,7 @@ int p9_client_statfs(struct p9_fid *fid, struct p9_rstatfs *sb) &sb->bsize, &sb->blocks, &sb->bfree, &sb->bavail, &sb->files, &sb->ffree, &sb->fsid, &sb->namelen); if (err) { - P9_DUMP_PKT(1, req->rc); + trace_9p_protocol_dump(clnt, req->rc); p9_free_req(clnt, req); goto error; } @@ -1778,7 +1968,7 @@ struct p9_fid *p9_client_xattrwalk(struct p9_fid *file_fid, } err = p9pdu_readf(req->rc, clnt->proto_version, "q", attr_size); if (err) { - P9_DUMP_PKT(1, req->rc); + trace_9p_protocol_dump(clnt, req->rc); p9_free_req(clnt, req); goto clunk_fid; } @@ -1824,7 +2014,7 @@ EXPORT_SYMBOL_GPL(p9_client_xattrcreate); int p9_client_readdir(struct p9_fid *fid, char *data, u32 count, u64 offset) { - int err, rsize; + int err, rsize, non_zc = 0; struct p9_client *clnt; struct p9_req_t *req; char *dataptr; @@ -1842,13 +2032,18 @@ int p9_client_readdir(struct p9_fid *fid, char *data, u32 count, u64 offset) if (count < rsize) rsize = count; - if ((clnt->trans_mod->pref & P9_TRANS_PREF_PAYLOAD_MASK) == - P9_TRANS_PREF_PAYLOAD_SEP) { - req = p9_client_rpc(clnt, P9_TREADDIR, "dqF", fid->fid, - offset, rsize, data); + /* Don't bother zerocopy for small IO (< 1024) */ + if (clnt->trans_mod->zc_request && rsize > 1024) { + /* + * response header len is 11 + * PDU Header(7) + IO Size (4) + */ + req = p9_client_zc_rpc(clnt, P9_TREADDIR, data, NULL, rsize, 0, + 11, 1, "dqd", fid->fid, offset, rsize); } else { + non_zc = 1; req = p9_client_rpc(clnt, P9_TREADDIR, "dqd", fid->fid, - offset, rsize); + offset, rsize); } if (IS_ERR(req)) { err = PTR_ERR(req); @@ -1857,13 +2052,13 @@ int p9_client_readdir(struct p9_fid *fid, char *data, u32 count, u64 offset) err = p9pdu_readf(req->rc, clnt->proto_version, "D", &count, &dataptr); if (err) { - P9_DUMP_PKT(1, req->rc); + trace_9p_protocol_dump(clnt, req->rc); goto free_and_error; } P9_DPRINTK(P9_DEBUG_9P, "<<< RREADDIR count %d\n", count); - if (!req->tc->pbuf_size && data) + if (non_zc) memmove(data, dataptr, count); p9_free_req(clnt, req); @@ -1894,7 +2089,7 @@ int p9_client_mknod_dotl(struct p9_fid *fid, char *name, int mode, err = p9pdu_readf(req->rc, clnt->proto_version, "Q", qid); if (err) { - P9_DUMP_PKT(1, req->rc); + trace_9p_protocol_dump(clnt, req->rc); goto error; } P9_DPRINTK(P9_DEBUG_9P, "<<< RMKNOD qid %x.%llx.%x\n", qid->type, @@ -1925,7 +2120,7 @@ int p9_client_mkdir_dotl(struct p9_fid *fid, char *name, int mode, err = p9pdu_readf(req->rc, clnt->proto_version, "Q", qid); if (err) { - P9_DUMP_PKT(1, req->rc); + trace_9p_protocol_dump(clnt, req->rc); goto error; } P9_DPRINTK(P9_DEBUG_9P, "<<< RMKDIR qid %x.%llx.%x\n", qid->type, @@ -1960,7 +2155,7 @@ int p9_client_lock_dotl(struct p9_fid *fid, struct p9_flock *flock, u8 *status) err = p9pdu_readf(req->rc, clnt->proto_version, "b", status); if (err) { - P9_DUMP_PKT(1, req->rc); + trace_9p_protocol_dump(clnt, req->rc); goto error; } P9_DPRINTK(P9_DEBUG_9P, "<<< RLOCK status %i\n", *status); @@ -1993,7 +2188,7 @@ int p9_client_getlock_dotl(struct p9_fid *fid, struct p9_getlock *glock) &glock->start, &glock->length, &glock->proc_id, &glock->client_id); if (err) { - P9_DUMP_PKT(1, req->rc); + trace_9p_protocol_dump(clnt, req->rc); goto error; } P9_DPRINTK(P9_DEBUG_9P, "<<< RGETLOCK type %i start %lld length %lld " @@ -2021,7 +2216,7 @@ int p9_client_readlink(struct p9_fid *fid, char **target) err = p9pdu_readf(req->rc, clnt->proto_version, "s", target); if (err) { - P9_DUMP_PKT(1, req->rc); + trace_9p_protocol_dump(clnt, req->rc); goto error; } P9_DPRINTK(P9_DEBUG_9P, "<<< RREADLINK target %s\n", *target); diff --git a/net/9p/protocol.c b/net/9p/protocol.c index df58375ea6b3..55e10a96c902 100644 --- a/net/9p/protocol.c +++ b/net/9p/protocol.c @@ -37,40 +37,11 @@ #include <net/9p/client.h> #include "protocol.h" +#include <trace/events/9p.h> + static int p9pdu_writef(struct p9_fcall *pdu, int proto_version, const char *fmt, ...); -#ifdef CONFIG_NET_9P_DEBUG -void -p9pdu_dump(int way, struct p9_fcall *pdu) -{ - int len = pdu->size; - - if ((p9_debug_level & P9_DEBUG_VPKT) != P9_DEBUG_VPKT) { - if ((p9_debug_level & P9_DEBUG_PKT) == P9_DEBUG_PKT) { - if (len > 32) - len = 32; - } else { - /* shouldn't happen */ - return; - } - } - - if (way) - print_hex_dump_bytes("[9P] ", DUMP_PREFIX_OFFSET, pdu->sdata, - len); - else - print_hex_dump_bytes("]9P[ ", DUMP_PREFIX_OFFSET, pdu->sdata, - len); -} -#else -void -p9pdu_dump(int way, struct p9_fcall *pdu) -{ -} -#endif -EXPORT_SYMBOL(p9pdu_dump); - void p9stat_free(struct p9_wstat *stbuf) { kfree(stbuf->name); @@ -81,7 +52,7 @@ void p9stat_free(struct p9_wstat *stbuf) } EXPORT_SYMBOL(p9stat_free); -static size_t pdu_read(struct p9_fcall *pdu, void *data, size_t size) +size_t pdu_read(struct p9_fcall *pdu, void *data, size_t size) { size_t len = min(pdu->size - pdu->offset, size); memcpy(data, &pdu->sdata[pdu->offset], len); @@ -108,26 +79,6 @@ pdu_write_u(struct p9_fcall *pdu, const char __user *udata, size_t size) return size - len; } -static size_t -pdu_write_urw(struct p9_fcall *pdu, const char *kdata, const char __user *udata, - size_t size) -{ - BUG_ON(pdu->size > P9_IOHDRSZ); - pdu->pubuf = (char __user *)udata; - pdu->pkbuf = (char *)kdata; - pdu->pbuf_size = size; - return 0; -} - -static size_t -pdu_write_readdir(struct p9_fcall *pdu, const char *kdata, size_t size) -{ - BUG_ON(pdu->size > P9_READDIRHDRSZ); - pdu->pkbuf = (char *)kdata; - pdu->pbuf_size = size; - return 0; -} - /* b - int8_t w - int16_t @@ -459,26 +410,6 @@ p9pdu_vwritef(struct p9_fcall *pdu, int proto_version, const char *fmt, errcode = -EFAULT; } break; - case 'E':{ - int32_t cnt = va_arg(ap, int32_t); - const char *k = va_arg(ap, const void *); - const char __user *u = va_arg(ap, - const void __user *); - errcode = p9pdu_writef(pdu, proto_version, "d", - cnt); - if (!errcode && pdu_write_urw(pdu, k, u, cnt)) - errcode = -EFAULT; - } - break; - case 'F':{ - int32_t cnt = va_arg(ap, int32_t); - const char *k = va_arg(ap, const void *); - errcode = p9pdu_writef(pdu, proto_version, "d", - cnt); - if (!errcode && pdu_write_readdir(pdu, k, cnt)) - errcode = -EFAULT; - } - break; case 'U':{ int32_t count = va_arg(ap, int32_t); const char __user *udata = @@ -591,7 +522,7 @@ p9pdu_writef(struct p9_fcall *pdu, int proto_version, const char *fmt, ...) return ret; } -int p9stat_read(char *buf, int len, struct p9_wstat *st, int proto_version) +int p9stat_read(struct p9_client *clnt, char *buf, int len, struct p9_wstat *st) { struct p9_fcall fake_pdu; int ret; @@ -601,10 +532,10 @@ int p9stat_read(char *buf, int len, struct p9_wstat *st, int proto_version) fake_pdu.sdata = buf; fake_pdu.offset = 0; - ret = p9pdu_readf(&fake_pdu, proto_version, "S", st); + ret = p9pdu_readf(&fake_pdu, clnt->proto_version, "S", st); if (ret) { P9_DPRINTK(P9_DEBUG_9P, "<<< p9stat_read failed: %d\n", ret); - P9_DUMP_PKT(0, &fake_pdu); + trace_9p_protocol_dump(clnt, &fake_pdu); } return ret; @@ -617,7 +548,7 @@ int p9pdu_prepare(struct p9_fcall *pdu, int16_t tag, int8_t type) return p9pdu_writef(pdu, 0, "dbw", 0, type, tag); } -int p9pdu_finalize(struct p9_fcall *pdu) +int p9pdu_finalize(struct p9_client *clnt, struct p9_fcall *pdu) { int size = pdu->size; int err; @@ -626,7 +557,7 @@ int p9pdu_finalize(struct p9_fcall *pdu) err = p9pdu_writef(pdu, 0, "d", size); pdu->size = size; - P9_DUMP_PKT(0, pdu); + trace_9p_protocol_dump(clnt, pdu); P9_DPRINTK(P9_DEBUG_9P, ">>> size=%d type: %d tag: %d\n", pdu->size, pdu->id, pdu->tag); @@ -637,14 +568,10 @@ void p9pdu_reset(struct p9_fcall *pdu) { pdu->offset = 0; pdu->size = 0; - pdu->private = NULL; - pdu->pubuf = NULL; - pdu->pkbuf = NULL; - pdu->pbuf_size = 0; } -int p9dirent_read(char *buf, int len, struct p9_dirent *dirent, - int proto_version) +int p9dirent_read(struct p9_client *clnt, char *buf, int len, + struct p9_dirent *dirent) { struct p9_fcall fake_pdu; int ret; @@ -655,11 +582,11 @@ int p9dirent_read(char *buf, int len, struct p9_dirent *dirent, fake_pdu.sdata = buf; fake_pdu.offset = 0; - ret = p9pdu_readf(&fake_pdu, proto_version, "Qqbs", &dirent->qid, - &dirent->d_off, &dirent->d_type, &nameptr); + ret = p9pdu_readf(&fake_pdu, clnt->proto_version, "Qqbs", &dirent->qid, + &dirent->d_off, &dirent->d_type, &nameptr); if (ret) { P9_DPRINTK(P9_DEBUG_9P, "<<< p9dirent_read failed: %d\n", ret); - P9_DUMP_PKT(1, &fake_pdu); + trace_9p_protocol_dump(clnt, &fake_pdu); goto out; } diff --git a/net/9p/protocol.h b/net/9p/protocol.h index 2431c0f38d56..2cc525fa49fa 100644 --- a/net/9p/protocol.h +++ b/net/9p/protocol.h @@ -29,6 +29,6 @@ int p9pdu_vwritef(struct p9_fcall *pdu, int proto_version, const char *fmt, va_list ap); int p9pdu_readf(struct p9_fcall *pdu, int proto_version, const char *fmt, ...); int p9pdu_prepare(struct p9_fcall *pdu, int16_t tag, int8_t type); -int p9pdu_finalize(struct p9_fcall *pdu); -void p9pdu_dump(int, struct p9_fcall *); +int p9pdu_finalize(struct p9_client *clnt, struct p9_fcall *pdu); void p9pdu_reset(struct p9_fcall *pdu); +size_t pdu_read(struct p9_fcall *pdu, void *data, size_t size); diff --git a/net/9p/trans_common.c b/net/9p/trans_common.c index 9a70ebdec56e..de8df957867d 100644 --- a/net/9p/trans_common.c +++ b/net/9p/trans_common.c @@ -21,30 +21,25 @@ /** * p9_release_req_pages - Release pages after the transaction. - * @*private: PDU's private page of struct trans_rpage_info */ -void -p9_release_req_pages(struct trans_rpage_info *rpinfo) +void p9_release_pages(struct page **pages, int nr_pages) { int i = 0; - - while (rpinfo->rp_data[i] && rpinfo->rp_nr_pages--) { - put_page(rpinfo->rp_data[i]); + while (pages[i] && nr_pages--) { + put_page(pages[i]); i++; } } -EXPORT_SYMBOL(p9_release_req_pages); +EXPORT_SYMBOL(p9_release_pages); /** * p9_nr_pages - Return number of pages needed to accommodate the payload. */ -int -p9_nr_pages(struct p9_req_t *req) +int p9_nr_pages(char *data, int len) { unsigned long start_page, end_page; - start_page = (unsigned long)req->tc->pubuf >> PAGE_SHIFT; - end_page = ((unsigned long)req->tc->pubuf + req->tc->pbuf_size + - PAGE_SIZE - 1) >> PAGE_SHIFT; + start_page = (unsigned long)data >> PAGE_SHIFT; + end_page = ((unsigned long)data + len + PAGE_SIZE - 1) >> PAGE_SHIFT; return end_page - start_page; } EXPORT_SYMBOL(p9_nr_pages); @@ -58,35 +53,17 @@ EXPORT_SYMBOL(p9_nr_pages); * @nr_pages: number of pages to accommodate the payload * @rw: Indicates if the pages are for read or write. */ -int -p9_payload_gup(struct p9_req_t *req, size_t *pdata_off, int *pdata_len, - int nr_pages, u8 rw) -{ - uint32_t first_page_bytes = 0; - int32_t pdata_mapped_pages; - struct trans_rpage_info *rpinfo; - - *pdata_off = (__force size_t)req->tc->pubuf & (PAGE_SIZE-1); - if (*pdata_off) - first_page_bytes = min(((size_t)PAGE_SIZE - *pdata_off), - req->tc->pbuf_size); +int p9_payload_gup(char *data, int *nr_pages, struct page **pages, int write) +{ + int nr_mapped_pages; - rpinfo = req->tc->private; - pdata_mapped_pages = get_user_pages_fast((unsigned long)req->tc->pubuf, - nr_pages, rw, &rpinfo->rp_data[0]); - if (pdata_mapped_pages <= 0) - return pdata_mapped_pages; + nr_mapped_pages = get_user_pages_fast((unsigned long)data, + *nr_pages, write, pages); + if (nr_mapped_pages <= 0) + return nr_mapped_pages; - rpinfo->rp_nr_pages = pdata_mapped_pages; - if (*pdata_off) { - *pdata_len = first_page_bytes; - *pdata_len += min((req->tc->pbuf_size - *pdata_len), - ((size_t)pdata_mapped_pages - 1) << PAGE_SHIFT); - } else { - *pdata_len = min(req->tc->pbuf_size, - (size_t)pdata_mapped_pages << PAGE_SHIFT); - } + *nr_pages = nr_mapped_pages; return 0; } EXPORT_SYMBOL(p9_payload_gup); diff --git a/net/9p/trans_common.h b/net/9p/trans_common.h index 76309223bb02..173bb550a9eb 100644 --- a/net/9p/trans_common.h +++ b/net/9p/trans_common.h @@ -12,21 +12,6 @@ * */ -/* TRUE if it is user context */ -#define P9_IS_USER_CONTEXT (!segment_eq(get_fs(), KERNEL_DS)) - -/** - * struct trans_rpage_info - To store mapped page information in PDU. - * @rp_alloc:Set if this structure is allocd, not a reuse unused space in pdu. - * @rp_nr_pages: Number of mapped pages - * @rp_data: Array of page pointers - */ -struct trans_rpage_info { - u8 rp_alloc; - int rp_nr_pages; - struct page *rp_data[0]; -}; - -void p9_release_req_pages(struct trans_rpage_info *); -int p9_payload_gup(struct p9_req_t *, size_t *, int *, int, u8); -int p9_nr_pages(struct p9_req_t *); +void p9_release_pages(struct page **, int); +int p9_payload_gup(char *, int *, struct page **, int); +int p9_nr_pages(char *, int); diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c index e317583fcc73..32aa9834229c 100644 --- a/net/9p/trans_virtio.c +++ b/net/9p/trans_virtio.c @@ -150,12 +150,10 @@ static void req_done(struct virtqueue *vq) while (1) { spin_lock_irqsave(&chan->lock, flags); rc = virtqueue_get_buf(chan->vq, &len); - if (rc == NULL) { spin_unlock_irqrestore(&chan->lock, flags); break; } - chan->ring_bufs_avail = 1; spin_unlock_irqrestore(&chan->lock, flags); /* Wakeup if anyone waiting for VirtIO ring space. */ @@ -163,17 +161,6 @@ static void req_done(struct virtqueue *vq) P9_DPRINTK(P9_DEBUG_TRANS, ": rc %p\n", rc); P9_DPRINTK(P9_DEBUG_TRANS, ": lookup tag %d\n", rc->tag); req = p9_tag_lookup(chan->client, rc->tag); - if (req->tc->private) { - struct trans_rpage_info *rp = req->tc->private; - int p = rp->rp_nr_pages; - /*Release pages */ - p9_release_req_pages(rp); - atomic_sub(p, &vp_pinned); - wake_up(&vp_wq); - if (rp->rp_alloc) - kfree(rp); - req->tc->private = NULL; - } req->status = REQ_STATUS_RCVD; p9_client_cb(chan->client, req); } @@ -193,9 +180,8 @@ static void req_done(struct virtqueue *vq) * */ -static int -pack_sg_list(struct scatterlist *sg, int start, int limit, char *data, - int count) +static int pack_sg_list(struct scatterlist *sg, int start, + int limit, char *data, int count) { int s; int index = start; @@ -224,31 +210,36 @@ static int p9_virtio_cancel(struct p9_client *client, struct p9_req_t *req) * this takes a list of pages. * @sg: scatter/gather list to pack into * @start: which segment of the sg_list to start at - * @pdata_off: Offset into the first page * @**pdata: a list of pages to add into sg. + * @nr_pages: number of pages to pack into the scatter/gather list + * @data: data to pack into scatter/gather list * @count: amount of data to pack into the scatter/gather list */ static int -pack_sg_list_p(struct scatterlist *sg, int start, int limit, size_t pdata_off, - struct page **pdata, int count) +pack_sg_list_p(struct scatterlist *sg, int start, int limit, + struct page **pdata, int nr_pages, char *data, int count) { - int s; - int i = 0; + int i = 0, s; + int data_off; int index = start; - if (pdata_off) { - s = min((int)(PAGE_SIZE - pdata_off), count); - sg_set_page(&sg[index++], pdata[i++], s, pdata_off); - count -= s; - } - - while (count) { - BUG_ON(index > limit); - s = min((int)PAGE_SIZE, count); - sg_set_page(&sg[index++], pdata[i++], s, 0); + BUG_ON(nr_pages > (limit - start)); + /* + * if the first page doesn't start at + * page boundary find the offset + */ + data_off = offset_in_page(data); + while (nr_pages) { + s = rest_of_page(data); + if (s > count) + s = count; + sg_set_page(&sg[index++], pdata[i++], s, data_off); + data_off = 0; + data += s; count -= s; + nr_pages--; } - return index-start; + return index - start; } /** @@ -261,114 +252,166 @@ pack_sg_list_p(struct scatterlist *sg, int start, int limit, size_t pdata_off, static int p9_virtio_request(struct p9_client *client, struct p9_req_t *req) { - int in, out, inp, outp; - struct virtio_chan *chan = client->trans; + int err; + int in, out; unsigned long flags; - size_t pdata_off = 0; - struct trans_rpage_info *rpinfo = NULL; - int err, pdata_len = 0; + struct virtio_chan *chan = client->trans; P9_DPRINTK(P9_DEBUG_TRANS, "9p debug: virtio request\n"); req->status = REQ_STATUS_SENT; +req_retry: + spin_lock_irqsave(&chan->lock, flags); + + /* Handle out VirtIO ring buffers */ + out = pack_sg_list(chan->sg, 0, + VIRTQUEUE_NUM, req->tc->sdata, req->tc->size); - if (req->tc->pbuf_size && (req->tc->pubuf && P9_IS_USER_CONTEXT)) { - int nr_pages = p9_nr_pages(req); - int rpinfo_size = sizeof(struct trans_rpage_info) + - sizeof(struct page *) * nr_pages; + in = pack_sg_list(chan->sg, out, + VIRTQUEUE_NUM, req->rc->sdata, req->rc->capacity); - if (atomic_read(&vp_pinned) >= chan->p9_max_pages) { - err = wait_event_interruptible(vp_wq, - atomic_read(&vp_pinned) < chan->p9_max_pages); + err = virtqueue_add_buf(chan->vq, chan->sg, out, in, req->tc); + if (err < 0) { + if (err == -ENOSPC) { + chan->ring_bufs_avail = 0; + spin_unlock_irqrestore(&chan->lock, flags); + err = wait_event_interruptible(*chan->vc_wq, + chan->ring_bufs_avail); if (err == -ERESTARTSYS) return err; - P9_DPRINTK(P9_DEBUG_TRANS, "9p: May gup pages now.\n"); - } - if (rpinfo_size <= (req->tc->capacity - req->tc->size)) { - /* We can use sdata */ - req->tc->private = req->tc->sdata + req->tc->size; - rpinfo = (struct trans_rpage_info *)req->tc->private; - rpinfo->rp_alloc = 0; + P9_DPRINTK(P9_DEBUG_TRANS, "9p:Retry virtio request\n"); + goto req_retry; } else { - req->tc->private = kmalloc(rpinfo_size, GFP_NOFS); - if (!req->tc->private) { - P9_DPRINTK(P9_DEBUG_TRANS, "9p debug: " - "private kmalloc returned NULL"); - return -ENOMEM; - } - rpinfo = (struct trans_rpage_info *)req->tc->private; - rpinfo->rp_alloc = 1; + spin_unlock_irqrestore(&chan->lock, flags); + P9_DPRINTK(P9_DEBUG_TRANS, + "9p debug: " + "virtio rpc add_buf returned failure"); + return -EIO; } + } + virtqueue_kick(chan->vq); + spin_unlock_irqrestore(&chan->lock, flags); - err = p9_payload_gup(req, &pdata_off, &pdata_len, nr_pages, - req->tc->id == P9_TREAD ? 1 : 0); - if (err < 0) { - if (rpinfo->rp_alloc) - kfree(rpinfo); + P9_DPRINTK(P9_DEBUG_TRANS, "9p debug: virtio request kicked\n"); + return 0; +} + +static int p9_get_mapped_pages(struct virtio_chan *chan, + struct page **pages, char *data, + int nr_pages, int write, int kern_buf) +{ + int err; + if (!kern_buf) { + /* + * We allow only p9_max_pages pinned. We wait for the + * Other zc request to finish here + */ + if (atomic_read(&vp_pinned) >= chan->p9_max_pages) { + err = wait_event_interruptible(vp_wq, + (atomic_read(&vp_pinned) < chan->p9_max_pages)); + if (err == -ERESTARTSYS) + return err; + } + err = p9_payload_gup(data, &nr_pages, pages, write); + if (err < 0) return err; - } else { - atomic_add(rpinfo->rp_nr_pages, &vp_pinned); + atomic_add(nr_pages, &vp_pinned); + } else { + /* kernel buffer, no need to pin pages */ + int s, index = 0; + int count = nr_pages; + while (nr_pages) { + s = rest_of_page(data); + pages[index++] = virt_to_page(data); + data += s; + nr_pages--; } + nr_pages = count; } + return nr_pages; +} -req_retry_pinned: - spin_lock_irqsave(&chan->lock, flags); +/** + * p9_virtio_zc_request - issue a zero copy request + * @client: client instance issuing the request + * @req: request to be issued + * @uidata: user bffer that should be ued for zero copy read + * @uodata: user buffer that shoud be user for zero copy write + * @inlen: read buffer size + * @olen: write buffer size + * @hdrlen: reader header size, This is the size of response protocol data + * + */ +static int +p9_virtio_zc_request(struct p9_client *client, struct p9_req_t *req, + char *uidata, char *uodata, int inlen, + int outlen, int in_hdr_len, int kern_buf) +{ + int in, out, err; + unsigned long flags; + int in_nr_pages = 0, out_nr_pages = 0; + struct page **in_pages = NULL, **out_pages = NULL; + struct virtio_chan *chan = client->trans; - /* Handle out VirtIO ring buffers */ - out = pack_sg_list(chan->sg, 0, VIRTQUEUE_NUM, req->tc->sdata, - req->tc->size); - - if (req->tc->pbuf_size && (req->tc->id == P9_TWRITE)) { - /* We have additional write payload buffer to take care */ - if (req->tc->pubuf && P9_IS_USER_CONTEXT) { - outp = pack_sg_list_p(chan->sg, out, VIRTQUEUE_NUM, - pdata_off, rpinfo->rp_data, pdata_len); - } else { - char *pbuf; - if (req->tc->pubuf) - pbuf = (__force char *) req->tc->pubuf; - else - pbuf = req->tc->pkbuf; - outp = pack_sg_list(chan->sg, out, VIRTQUEUE_NUM, pbuf, - req->tc->pbuf_size); + P9_DPRINTK(P9_DEBUG_TRANS, "9p debug: virtio request\n"); + + if (uodata) { + out_nr_pages = p9_nr_pages(uodata, outlen); + out_pages = kmalloc(sizeof(struct page *) * out_nr_pages, + GFP_NOFS); + if (!out_pages) { + err = -ENOMEM; + goto err_out; + } + out_nr_pages = p9_get_mapped_pages(chan, out_pages, uodata, + out_nr_pages, 0, kern_buf); + if (out_nr_pages < 0) { + err = out_nr_pages; + kfree(out_pages); + out_pages = NULL; + goto err_out; } - out += outp; } - - /* Handle in VirtIO ring buffers */ - if (req->tc->pbuf_size && - ((req->tc->id == P9_TREAD) || (req->tc->id == P9_TREADDIR))) { - /* - * Take care of additional Read payload. - * 11 is the read/write header = PDU Header(7) + IO Size (4). - * Arrange in such a way that server places header in the - * alloced memory and payload onto the user buffer. - */ - inp = pack_sg_list(chan->sg, out, - VIRTQUEUE_NUM, req->rc->sdata, 11); - /* - * Running executables in the filesystem may result in - * a read request with kernel buffer as opposed to user buffer. - */ - if (req->tc->pubuf && P9_IS_USER_CONTEXT) { - in = pack_sg_list_p(chan->sg, out+inp, VIRTQUEUE_NUM, - pdata_off, rpinfo->rp_data, pdata_len); - } else { - char *pbuf; - if (req->tc->pubuf) - pbuf = (__force char *) req->tc->pubuf; - else - pbuf = req->tc->pkbuf; - - in = pack_sg_list(chan->sg, out+inp, VIRTQUEUE_NUM, - pbuf, req->tc->pbuf_size); + if (uidata) { + in_nr_pages = p9_nr_pages(uidata, inlen); + in_pages = kmalloc(sizeof(struct page *) * in_nr_pages, + GFP_NOFS); + if (!in_pages) { + err = -ENOMEM; + goto err_out; + } + in_nr_pages = p9_get_mapped_pages(chan, in_pages, uidata, + in_nr_pages, 1, kern_buf); + if (in_nr_pages < 0) { + err = in_nr_pages; + kfree(in_pages); + in_pages = NULL; + goto err_out; } - in += inp; - } else { - in = pack_sg_list(chan->sg, out, VIRTQUEUE_NUM, - req->rc->sdata, req->rc->capacity); } + req->status = REQ_STATUS_SENT; +req_retry_pinned: + spin_lock_irqsave(&chan->lock, flags); + /* out data */ + out = pack_sg_list(chan->sg, 0, + VIRTQUEUE_NUM, req->tc->sdata, req->tc->size); + + if (out_pages) + out += pack_sg_list_p(chan->sg, out, VIRTQUEUE_NUM, + out_pages, out_nr_pages, uodata, outlen); + /* + * Take care of in data + * For example TREAD have 11. + * 11 is the read/write header = PDU Header(7) + IO Size (4). + * Arrange in such a way that server places header in the + * alloced memory and payload onto the user buffer. + */ + in = pack_sg_list(chan->sg, out, + VIRTQUEUE_NUM, req->rc->sdata, in_hdr_len); + if (in_pages) + in += pack_sg_list_p(chan->sg, out + in, VIRTQUEUE_NUM, + in_pages, in_nr_pages, uidata, inlen); err = virtqueue_add_buf(chan->vq, chan->sg, out, in, req->tc); if (err < 0) { @@ -376,28 +419,45 @@ req_retry_pinned: chan->ring_bufs_avail = 0; spin_unlock_irqrestore(&chan->lock, flags); err = wait_event_interruptible(*chan->vc_wq, - chan->ring_bufs_avail); + chan->ring_bufs_avail); if (err == -ERESTARTSYS) - return err; + goto err_out; P9_DPRINTK(P9_DEBUG_TRANS, "9p:Retry virtio request\n"); goto req_retry_pinned; } else { spin_unlock_irqrestore(&chan->lock, flags); P9_DPRINTK(P9_DEBUG_TRANS, - "9p debug: " - "virtio rpc add_buf returned failure"); - if (rpinfo && rpinfo->rp_alloc) - kfree(rpinfo); - return -EIO; + "9p debug: " + "virtio rpc add_buf returned failure"); + err = -EIO; + goto err_out; } } - virtqueue_kick(chan->vq); spin_unlock_irqrestore(&chan->lock, flags); - P9_DPRINTK(P9_DEBUG_TRANS, "9p debug: virtio request kicked\n"); - return 0; + err = wait_event_interruptible(*req->wq, + req->status >= REQ_STATUS_RCVD); + /* + * Non kernel buffers are pinned, unpin them + */ +err_out: + if (!kern_buf) { + if (in_pages) { + p9_release_pages(in_pages, in_nr_pages); + atomic_sub(in_nr_pages, &vp_pinned); + } + if (out_pages) { + p9_release_pages(out_pages, out_nr_pages); + atomic_sub(out_nr_pages, &vp_pinned); + } + /* wakeup anybody waiting for slots to pin pages */ + wake_up(&vp_wq); + } + kfree(in_pages); + kfree(out_pages); + return err; } static ssize_t p9_mount_tag_show(struct device *dev, @@ -591,8 +651,8 @@ static struct p9_trans_module p9_virtio_trans = { .create = p9_virtio_create, .close = p9_virtio_close, .request = p9_virtio_request, + .zc_request = p9_virtio_zc_request, .cancel = p9_virtio_cancel, - /* * We leave one entry for input and one entry for response * headers. We also skip one more entry to accomodate, address @@ -600,7 +660,6 @@ static struct p9_trans_module p9_virtio_trans = { * page in zero copy. */ .maxsize = PAGE_SIZE * (VIRTQUEUE_NUM - 3), - .pref = P9_TRANS_PREF_PAYLOAD_SEP, .def = 0, .owner = THIS_MODULE, }; diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c index b1fe7c35e8d1..bfa9ab93eda5 100644 --- a/net/appletalk/ddp.c +++ b/net/appletalk/ddp.c @@ -951,13 +951,12 @@ static unsigned long atalk_sum_skb(const struct sk_buff *skb, int offset, /* checksum stuff in frags */ for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { int end; - + const skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; WARN_ON(start > offset + len); - end = start + skb_shinfo(skb)->frags[i].size; + end = start + skb_frag_size(frag); if ((copy = end - offset) > 0) { u8 *vaddr; - skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; if (copy > len) copy = len; diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c index cc53f78e448c..873fb3d8e56f 100644 --- a/net/batman-adv/translation-table.c +++ b/net/batman-adv/translation-table.c @@ -1049,7 +1049,6 @@ static struct sk_buff *tt_response_fill_table(uint16_t tt_len, uint8_t ttvn, tt_response = (struct tt_query_packet *)skb_put(skb, tt_query_size + tt_len); tt_response->ttvn = ttvn; - tt_response->tt_data = htons(tt_tot); tt_change = (struct tt_change *)(skb->data + tt_query_size); tt_count = 0; @@ -1075,6 +1074,10 @@ static struct sk_buff *tt_response_fill_table(uint16_t tt_len, uint8_t ttvn, } rcu_read_unlock(); + /* store in the message the number of entries we have successfully + * copied */ + tt_response->tt_data = htons(tt_count); + out: return skb; } @@ -1722,6 +1725,8 @@ static void tt_local_reset_flags(struct bat_priv *bat_priv, uint16_t flags) rcu_read_lock(); hlist_for_each_entry_rcu(tt_local_entry, node, head, hash_entry) { + if (!(tt_local_entry->flags & flags)) + continue; tt_local_entry->flags &= ~flags; atomic_inc(&bat_priv->num_local_tt); } diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c index 61f1f623091d..e8292369cdcf 100644 --- a/net/bluetooth/l2cap_sock.c +++ b/net/bluetooth/l2cap_sock.c @@ -26,6 +26,8 @@ /* Bluetooth L2CAP sockets. */ +#include <linux/security.h> + #include <net/bluetooth/bluetooth.h> #include <net/bluetooth/hci_core.h> #include <net/bluetooth/l2cap.h> @@ -933,6 +935,8 @@ static void l2cap_sock_init(struct sock *sk, struct sock *parent) chan->force_reliable = pchan->force_reliable; chan->flushable = pchan->flushable; chan->force_active = pchan->force_active; + + security_sk_clone(parent, sk); } else { switch (sk->sk_type) { diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c index 482722bbc7a0..5417f6127323 100644 --- a/net/bluetooth/rfcomm/sock.c +++ b/net/bluetooth/rfcomm/sock.c @@ -42,6 +42,7 @@ #include <linux/device.h> #include <linux/debugfs.h> #include <linux/seq_file.h> +#include <linux/security.h> #include <net/sock.h> #include <asm/system.h> @@ -264,6 +265,8 @@ static void rfcomm_sock_init(struct sock *sk, struct sock *parent) pi->sec_level = rfcomm_pi(parent)->sec_level; pi->role_switch = rfcomm_pi(parent)->role_switch; + + security_sk_clone(parent, sk); } else { pi->dlc->defer_setup = 0; diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c index 8270f05e3f1f..a324b009e34b 100644 --- a/net/bluetooth/sco.c +++ b/net/bluetooth/sco.c @@ -41,6 +41,7 @@ #include <linux/debugfs.h> #include <linux/seq_file.h> #include <linux/list.h> +#include <linux/security.h> #include <net/sock.h> #include <asm/system.h> @@ -403,8 +404,10 @@ static void sco_sock_init(struct sock *sk, struct sock *parent) { BT_DBG("sk %p", sk); - if (parent) + if (parent) { sk->sk_type = parent->sk_type; + security_sk_clone(parent, sk); + } } static struct proto sco_proto = { diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c index c3b77dceb937..f603e5b0b930 100644 --- a/net/bridge/br_if.c +++ b/net/bridge/br_if.c @@ -160,9 +160,10 @@ static void del_nbp(struct net_bridge_port *p) call_rcu(&p->rcu, destroy_nbp_rcu); } -/* called with RTNL */ -static void del_br(struct net_bridge *br, struct list_head *head) +/* Delete bridge device */ +void br_dev_delete(struct net_device *dev, struct list_head *head) { + struct net_bridge *br = netdev_priv(dev); struct net_bridge_port *p, *n; list_for_each_entry_safe(p, n, &br->port_list, list) { @@ -267,7 +268,7 @@ int br_del_bridge(struct net *net, const char *name) } else - del_br(netdev_priv(dev), NULL); + br_dev_delete(dev, NULL); rtnl_unlock(); return ret; @@ -446,7 +447,7 @@ void __net_exit br_net_exit(struct net *net) rtnl_lock(); for_each_netdev(net, dev) if (dev->priv_flags & IFF_EBRIDGE) - del_br(netdev_priv(dev), &list); + br_dev_delete(dev, &list); unregister_netdevice_many(&list); rtnl_unlock(); diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index 5b1ed1ba9aa7..e5f9ece3c9a0 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -210,6 +210,7 @@ static struct rtnl_link_ops br_link_ops __read_mostly = { .priv_size = sizeof(struct net_bridge), .setup = br_dev_setup, .validate = br_validate, + .dellink = br_dev_delete, }; int __init br_netlink_init(void) diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index a248fe65b29a..d7d6fb05411f 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -301,6 +301,7 @@ static inline int br_is_root_bridge(const struct net_bridge *br) /* br_device.c */ extern void br_dev_setup(struct net_device *dev); +extern void br_dev_delete(struct net_device *dev, struct list_head *list); extern netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev); #ifdef CONFIG_NET_POLL_CONTROLLER diff --git a/net/caif/caif_dev.c b/net/caif/caif_dev.c index 7f9ac0742d19..47fc8f3a47cf 100644 --- a/net/caif/caif_dev.c +++ b/net/caif/caif_dev.c @@ -212,8 +212,7 @@ static int caif_device_notify(struct notifier_block *me, unsigned long what, enum cfcnfg_phy_preference pref; enum cfcnfg_phy_type phy_type; struct cfcnfg *cfg; - struct caif_device_entry_list *caifdevs = - caif_device_list(dev_net(dev)); + struct caif_device_entry_list *caifdevs; if (dev->type != ARPHRD_CAIF) return 0; @@ -222,6 +221,8 @@ static int caif_device_notify(struct notifier_block *me, unsigned long what, if (cfg == NULL) return 0; + caifdevs = caif_device_list(dev_net(dev)); + switch (what) { case NETDEV_REGISTER: caifd = caif_device_alloc(dev); diff --git a/net/can/af_can.c b/net/can/af_can.c index d1ff5152c657..0ce2ad0696da 100644 --- a/net/can/af_can.c +++ b/net/can/af_can.c @@ -38,8 +38,6 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH * DAMAGE. * - * Send feedback to <[email protected]> - * */ #include <linux/module.h> diff --git a/net/can/af_can.h b/net/can/af_can.h index 34253b84e30f..fd882dbadad3 100644 --- a/net/can/af_can.h +++ b/net/can/af_can.h @@ -35,8 +35,6 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH * DAMAGE. * - * Send feedback to <[email protected]> - * */ #ifndef AF_CAN_H diff --git a/net/can/bcm.c b/net/can/bcm.c index c84963d2dee6..151b7730c12c 100644 --- a/net/can/bcm.c +++ b/net/can/bcm.c @@ -37,8 +37,6 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH * DAMAGE. * - * Send feedback to <[email protected]> - * */ #include <linux/module.h> diff --git a/net/can/gw.c b/net/can/gw.c index ac11407d3b54..3d79b127881e 100644 --- a/net/can/gw.c +++ b/net/can/gw.c @@ -37,8 +37,6 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH * DAMAGE. * - * Send feedback to <[email protected]> - * */ #include <linux/module.h> diff --git a/net/can/proc.c b/net/can/proc.c index 0016f7339699..ba873c36d2fd 100644 --- a/net/can/proc.c +++ b/net/can/proc.c @@ -37,8 +37,6 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH * DAMAGE. * - * Send feedback to <[email protected]> - * */ #include <linux/module.h> diff --git a/net/can/raw.c b/net/can/raw.c index dea99a6e596c..cde1b4a20f75 100644 --- a/net/can/raw.c +++ b/net/can/raw.c @@ -37,8 +37,6 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH * DAMAGE. * - * Send feedback to <[email protected]> - * */ #include <linux/module.h> diff --git a/net/core/datagram.c b/net/core/datagram.c index 6449bed457d4..68bbf9f65cb0 100644 --- a/net/core/datagram.c +++ b/net/core/datagram.c @@ -324,14 +324,14 @@ int skb_copy_datagram_iovec(const struct sk_buff *skb, int offset, /* Copy paged appendix. Hmm... why does this look so complicated? */ for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { int end; + const skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; WARN_ON(start > offset + len); - end = start + skb_shinfo(skb)->frags[i].size; + end = start + skb_frag_size(frag); if ((copy = end - offset) > 0) { int err; u8 *vaddr; - skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; struct page *page = skb_frag_page(frag); if (copy > len) @@ -410,14 +410,14 @@ int skb_copy_datagram_const_iovec(const struct sk_buff *skb, int offset, /* Copy paged appendix. Hmm... why does this look so complicated? */ for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { int end; + const skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; WARN_ON(start > offset + len); - end = start + skb_shinfo(skb)->frags[i].size; + end = start + skb_frag_size(frag); if ((copy = end - offset) > 0) { int err; u8 *vaddr; - skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; struct page *page = skb_frag_page(frag); if (copy > len) @@ -500,14 +500,14 @@ int skb_copy_datagram_from_iovec(struct sk_buff *skb, int offset, /* Copy paged appendix. Hmm... why does this look so complicated? */ for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { int end; + const skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; WARN_ON(start > offset + len); - end = start + skb_shinfo(skb)->frags[i].size; + end = start + skb_frag_size(frag); if ((copy = end - offset) > 0) { int err; u8 *vaddr; - skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; struct page *page = skb_frag_page(frag); if (copy > len) @@ -585,15 +585,15 @@ static int skb_copy_and_csum_datagram(const struct sk_buff *skb, int offset, for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { int end; + const skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; WARN_ON(start > offset + len); - end = start + skb_shinfo(skb)->frags[i].size; + end = start + skb_frag_size(frag); if ((copy = end - offset) > 0) { __wsum csum2; int err = 0; u8 *vaddr; - skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; struct page *page = skb_frag_page(frag); if (copy > len) diff --git a/net/core/dev.c b/net/core/dev.c index 70ecb86439ca..edcf019c056d 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -136,6 +136,7 @@ #include <linux/if_tunnel.h> #include <linux/if_pppox.h> #include <linux/ppp_defs.h> +#include <linux/net_tstamp.h> #include "net-sysfs.h" @@ -1477,6 +1478,57 @@ static inline void net_timestamp_check(struct sk_buff *skb) __net_timestamp(skb); } +static int net_hwtstamp_validate(struct ifreq *ifr) +{ + struct hwtstamp_config cfg; + enum hwtstamp_tx_types tx_type; + enum hwtstamp_rx_filters rx_filter; + int tx_type_valid = 0; + int rx_filter_valid = 0; + + if (copy_from_user(&cfg, ifr->ifr_data, sizeof(cfg))) + return -EFAULT; + + if (cfg.flags) /* reserved for future extensions */ + return -EINVAL; + + tx_type = cfg.tx_type; + rx_filter = cfg.rx_filter; + + switch (tx_type) { + case HWTSTAMP_TX_OFF: + case HWTSTAMP_TX_ON: + case HWTSTAMP_TX_ONESTEP_SYNC: + tx_type_valid = 1; + break; + } + + switch (rx_filter) { + case HWTSTAMP_FILTER_NONE: + case HWTSTAMP_FILTER_ALL: + case HWTSTAMP_FILTER_SOME: + case HWTSTAMP_FILTER_PTP_V1_L4_EVENT: + case HWTSTAMP_FILTER_PTP_V1_L4_SYNC: + case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ: + case HWTSTAMP_FILTER_PTP_V2_L4_EVENT: + case HWTSTAMP_FILTER_PTP_V2_L4_SYNC: + case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ: + case HWTSTAMP_FILTER_PTP_V2_L2_EVENT: + case HWTSTAMP_FILTER_PTP_V2_L2_SYNC: + case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ: + case HWTSTAMP_FILTER_PTP_V2_EVENT: + case HWTSTAMP_FILTER_PTP_V2_SYNC: + case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ: + rx_filter_valid = 1; + break; + } + + if (!tx_type_valid || !rx_filter_valid) + return -ERANGE; + + return 0; +} + static inline bool is_skb_forwardable(struct net_device *dev, struct sk_buff *skb) { @@ -3231,6 +3283,17 @@ another_round: ncls: #endif + if (vlan_tx_tag_present(skb)) { + if (pt_prev) { + ret = deliver_skb(skb, pt_prev, orig_dev); + pt_prev = NULL; + } + if (vlan_do_receive(&skb)) + goto another_round; + else if (unlikely(!skb)) + goto out; + } + rx_handler = rcu_dereference(skb->dev->rx_handler); if (rx_handler) { if (pt_prev) { @@ -3251,17 +3314,6 @@ ncls: } } - if (vlan_tx_tag_present(skb)) { - if (pt_prev) { - ret = deliver_skb(skb, pt_prev, orig_dev); - pt_prev = NULL; - } - if (vlan_do_receive(&skb)) - goto another_round; - else if (unlikely(!skb)) - goto out; - } - /* deliver only exact match when indicated */ null_or_dev = deliver_exact ? skb->dev : NULL; @@ -3489,9 +3541,9 @@ pull: skb->data_len -= grow; skb_shinfo(skb)->frags[0].page_offset += grow; - skb_shinfo(skb)->frags[0].size -= grow; + skb_frag_size_sub(&skb_shinfo(skb)->frags[0], grow); - if (unlikely(!skb_shinfo(skb)->frags[0].size)) { + if (unlikely(!skb_frag_size(&skb_shinfo(skb)->frags[0]))) { skb_frag_unref(skb, 0); memmove(skb_shinfo(skb)->frags, skb_shinfo(skb)->frags + 1, @@ -3559,7 +3611,7 @@ void skb_gro_reset_offset(struct sk_buff *skb) !PageHighMem(skb_frag_page(&skb_shinfo(skb)->frags[0]))) { NAPI_GRO_CB(skb)->frag0 = skb_frag_address(&skb_shinfo(skb)->frags[0]); - NAPI_GRO_CB(skb)->frag0_len = skb_shinfo(skb)->frags[0].size; + NAPI_GRO_CB(skb)->frag0_len = skb_frag_size(&skb_shinfo(skb)->frags[0]); } } EXPORT_SYMBOL(skb_gro_reset_offset); @@ -4041,6 +4093,60 @@ static int dev_ifconf(struct net *net, char __user *arg) } #ifdef CONFIG_PROC_FS + +#define BUCKET_SPACE (32 - NETDEV_HASHBITS) + +struct dev_iter_state { + struct seq_net_private p; + unsigned int pos; /* bucket << BUCKET_SPACE + offset */ +}; + +#define get_bucket(x) ((x) >> BUCKET_SPACE) +#define get_offset(x) ((x) & ((1 << BUCKET_SPACE) - 1)) +#define set_bucket_offset(b, o) ((b) << BUCKET_SPACE | (o)) + +static inline struct net_device *dev_from_same_bucket(struct seq_file *seq) +{ + struct dev_iter_state *state = seq->private; + struct net *net = seq_file_net(seq); + struct net_device *dev; + struct hlist_node *p; + struct hlist_head *h; + unsigned int count, bucket, offset; + + bucket = get_bucket(state->pos); + offset = get_offset(state->pos); + h = &net->dev_name_head[bucket]; + count = 0; + hlist_for_each_entry_rcu(dev, p, h, name_hlist) { + if (count++ == offset) { + state->pos = set_bucket_offset(bucket, count); + return dev; + } + } + + return NULL; +} + +static inline struct net_device *dev_from_new_bucket(struct seq_file *seq) +{ + struct dev_iter_state *state = seq->private; + struct net_device *dev; + unsigned int bucket; + + bucket = get_bucket(state->pos); + do { + dev = dev_from_same_bucket(seq); + if (dev) + return dev; + + bucket++; + state->pos = set_bucket_offset(bucket, 0); + } while (bucket < NETDEV_HASHENTRIES); + + return NULL; +} + /* * This is invoked by the /proc filesystem handler to display a device * in detail. @@ -4048,33 +4154,33 @@ static int dev_ifconf(struct net *net, char __user *arg) void *dev_seq_start(struct seq_file *seq, loff_t *pos) __acquires(RCU) { - struct net *net = seq_file_net(seq); - loff_t off; - struct net_device *dev; + struct dev_iter_state *state = seq->private; rcu_read_lock(); if (!*pos) return SEQ_START_TOKEN; - off = 1; - for_each_netdev_rcu(net, dev) - if (off++ == *pos) - return dev; + /* check for end of the hash */ + if (state->pos == 0 && *pos > 1) + return NULL; - return NULL; + return dev_from_new_bucket(seq); } void *dev_seq_next(struct seq_file *seq, void *v, loff_t *pos) { - struct net_device *dev = v; + struct net_device *dev; + + ++*pos; if (v == SEQ_START_TOKEN) - dev = first_net_device_rcu(seq_file_net(seq)); - else - dev = next_net_device_rcu(dev); + return dev_from_new_bucket(seq); - ++*pos; - return dev; + dev = dev_from_same_bucket(seq); + if (dev) + return dev; + + return dev_from_new_bucket(seq); } void dev_seq_stop(struct seq_file *seq, void *v) @@ -4173,7 +4279,7 @@ static const struct seq_operations dev_seq_ops = { static int dev_seq_open(struct inode *inode, struct file *file) { return seq_open_net(inode, file, &dev_seq_ops, - sizeof(struct seq_net_private)); + sizeof(struct dev_iter_state)); } static const struct file_operations dev_seq_fops = { @@ -4921,6 +5027,12 @@ static int dev_ifsioc(struct net *net, struct ifreq *ifr, unsigned int cmd) ifr->ifr_newname[IFNAMSIZ-1] = '\0'; return dev_change_name(dev, ifr->ifr_newname); + case SIOCSHWTSTAMP: + err = net_hwtstamp_validate(ifr); + if (err) + return err; + /* fall through */ + /* * Unknown or private ioctl */ @@ -5235,7 +5347,7 @@ static void rollback_registered_many(struct list_head *head) dev = list_first_entry(head, struct net_device, unreg_list); call_netdevice_notifiers(NETDEV_UNREGISTER_BATCH, dev); - rcu_barrier(); + synchronize_net(); list_for_each_entry(dev, head, unreg_list) dev_put(dev); @@ -5748,6 +5860,12 @@ void netdev_run_todo(void) __rtnl_unlock(); + /* Wait for rcu callbacks to finish before attempting to drain + * the device list. This usually avoids a 250ms wait. + */ + if (!list_empty(&list)) + rcu_barrier(); + while (!list_empty(&list)) { struct net_device *dev = list_first_entry(&list, struct net_device, todo_list); @@ -6148,6 +6266,7 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char */ call_netdevice_notifiers(NETDEV_UNREGISTER, dev); call_netdevice_notifiers(NETDEV_UNREGISTER_BATCH, dev); + rtmsg_ifinfo(RTM_DELLINK, dev, ~0U); /* * Flush the unicast and multicast chains @@ -6331,7 +6450,7 @@ const char *netdev_drivername(const struct net_device *dev) return empty; } -static int __netdev_printk(const char *level, const struct net_device *dev, +int __netdev_printk(const char *level, const struct net_device *dev, struct va_format *vaf) { int r; @@ -6346,6 +6465,7 @@ static int __netdev_printk(const char *level, const struct net_device *dev, return r; } +EXPORT_SYMBOL(__netdev_printk); int netdev_printk(const char *level, const struct net_device *dev, const char *format, ...) diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c index 38be4744133f..57e8f95110e6 100644 --- a/net/core/fib_rules.c +++ b/net/core/fib_rules.c @@ -475,8 +475,11 @@ static int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) list_del_rcu(&rule->list); - if (rule->action == FR_ACT_GOTO) + if (rule->action == FR_ACT_GOTO) { ops->nr_goto_rules--; + if (rtnl_dereference(rule->ctarget) == NULL) + ops->unresolved_rules--; + } /* * Check if this rule is a target to any of them. If so, diff --git a/net/core/filter.c b/net/core/filter.c index 8fcc2d776e09..5dea45279215 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -436,7 +436,7 @@ error: * * Returns 0 if the rule set is legal or -EINVAL if not. */ -int sk_chk_filter(struct sock_filter *filter, int flen) +int sk_chk_filter(struct sock_filter *filter, unsigned int flen) { /* * Valid instructions are initialized to non-0. diff --git a/net/core/flow.c b/net/core/flow.c index 555a456efb07..8ae42de9c79e 100644 --- a/net/core/flow.c +++ b/net/core/flow.c @@ -413,7 +413,7 @@ static int __init flow_cache_init(struct flow_cache *fc) for_each_online_cpu(i) { if (flow_cache_cpu_prepare(fc, i)) - return -ENOMEM; + goto err; } fc->hotcpu_notifier = (struct notifier_block){ .notifier_call = flow_cache_cpu, @@ -426,6 +426,18 @@ static int __init flow_cache_init(struct flow_cache *fc) add_timer(&fc->rnd_timer); return 0; + +err: + for_each_possible_cpu(i) { + struct flow_cache_percpu *fcp = per_cpu_ptr(fc->percpu, i); + kfree(fcp->hash_table); + fcp->hash_table = NULL; + } + + free_percpu(fc->percpu); + fc->percpu = NULL; + + return -ENOMEM; } static int __init flow_cache_init_global(void) diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 43449649cf73..909ecb3c2a33 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -1168,10 +1168,14 @@ int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new, struct dst_entry *dst = skb_dst(skb); struct neighbour *n2, *n1 = neigh; write_unlock_bh(&neigh->lock); + + rcu_read_lock(); /* On shaper/eql skb->dst->neighbour != neigh :( */ if (dst && (n2 = dst_get_neighbour(dst)) != NULL) n1 = n2; n1->output(n1, skb); + rcu_read_unlock(); + write_lock_bh(&neigh->lock); } skb_queue_purge(&neigh->arp_queue); diff --git a/net/core/pktgen.c b/net/core/pktgen.c index 796044ac0bf3..0001c243b35c 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -2145,9 +2145,12 @@ static void spin(struct pktgen_dev *pkt_dev, ktime_t spin_until) } start_time = ktime_now(); - if (remaining < 100000) - ndelay(remaining); /* really small just spin */ - else { + if (remaining < 100000) { + /* for small delays (<100us), just loop until limit is reached */ + do { + end_time = ktime_now(); + } while (ktime_lt(end_time, spin_until)); + } else { /* see do_nanosleep */ hrtimer_init_sleeper(&t, current); do { @@ -2162,8 +2165,8 @@ static void spin(struct pktgen_dev *pkt_dev, ktime_t spin_until) hrtimer_cancel(&t.timer); } while (t.task && pkt_dev->running && !signal_pending(current)); __set_current_state(TASK_RUNNING); + end_time = ktime_now(); } - end_time = ktime_now(); pkt_dev->idle_acc += ktime_to_ns(ktime_sub(end_time, start_time)); pkt_dev->next_tx = ktime_add_ns(spin_until, pkt_dev->delay); @@ -2602,17 +2605,18 @@ static void pktgen_finalize_skb(struct pktgen_dev *pkt_dev, struct sk_buff *skb, if (!pkt_dev->page) break; } + get_page(pkt_dev->page); skb_frag_set_page(skb, i, pkt_dev->page); skb_shinfo(skb)->frags[i].page_offset = 0; /*last fragment, fill rest of data*/ if (i == (frags - 1)) - skb_shinfo(skb)->frags[i].size = - (datalen < PAGE_SIZE ? datalen : PAGE_SIZE); + skb_frag_size_set(&skb_shinfo(skb)->frags[i], + (datalen < PAGE_SIZE ? datalen : PAGE_SIZE)); else - skb_shinfo(skb)->frags[i].size = frag_len; - datalen -= skb_shinfo(skb)->frags[i].size; - skb->len += skb_shinfo(skb)->frags[i].size; - skb->data_len += skb_shinfo(skb)->frags[i].size; + skb_frag_size_set(&skb_shinfo(skb)->frags[i], frag_len); + datalen -= skb_frag_size(&skb_shinfo(skb)->frags[i]); + skb->len += skb_frag_size(&skb_shinfo(skb)->frags[i]); + skb->data_len += skb_frag_size(&skb_shinfo(skb)->frags[i]); i++; skb_shinfo(skb)->nr_frags = i; } diff --git a/net/core/secure_seq.c b/net/core/secure_seq.c index 45329d7c9dd9..025233de25f9 100644 --- a/net/core/secure_seq.c +++ b/net/core/secure_seq.c @@ -35,7 +35,7 @@ static u32 seq_scale(u32 seq) } #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) -__u32 secure_tcpv6_sequence_number(__be32 *saddr, __be32 *daddr, +__u32 secure_tcpv6_sequence_number(const __be32 *saddr, const __be32 *daddr, __be16 sport, __be16 dport) { u32 secret[MD5_MESSAGE_BYTES / 4]; diff --git a/net/core/skbuff.c b/net/core/skbuff.c index a7f855dca922..ca4db40e75b8 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -485,6 +485,30 @@ void consume_skb(struct sk_buff *skb) EXPORT_SYMBOL(consume_skb); /** + * skb_recycle - clean up an skb for reuse + * @skb: buffer + * + * Recycles the skb to be reused as a receive buffer. This + * function does any necessary reference count dropping, and + * cleans up the skbuff as if it just came from __alloc_skb(). + */ +void skb_recycle(struct sk_buff *skb) +{ + struct skb_shared_info *shinfo; + + skb_release_head_state(skb); + + shinfo = skb_shinfo(skb); + memset(shinfo, 0, offsetof(struct skb_shared_info, dataref)); + atomic_set(&shinfo->dataref, 1); + + memset(skb, 0, offsetof(struct sk_buff, tail)); + skb->data = skb->head + NET_SKB_PAD; + skb_reset_tail_pointer(skb); +} +EXPORT_SYMBOL(skb_recycle); + +/** * skb_recycle_check - check if skb can be reused for receive * @skb: buffer * @skb_size: minimum receive buffer size @@ -498,33 +522,10 @@ EXPORT_SYMBOL(consume_skb); */ bool skb_recycle_check(struct sk_buff *skb, int skb_size) { - struct skb_shared_info *shinfo; - - if (irqs_disabled()) + if (!skb_is_recycleable(skb, skb_size)) return false; - if (skb_shinfo(skb)->tx_flags & SKBTX_DEV_ZEROCOPY) - return false; - - if (skb_is_nonlinear(skb) || skb->fclone != SKB_FCLONE_UNAVAILABLE) - return false; - - skb_size = SKB_DATA_ALIGN(skb_size + NET_SKB_PAD); - if (skb_end_pointer(skb) - skb->head < skb_size) - return false; - - if (skb_shared(skb) || skb_cloned(skb)) - return false; - - skb_release_head_state(skb); - - shinfo = skb_shinfo(skb); - memset(shinfo, 0, offsetof(struct skb_shared_info, dataref)); - atomic_set(&shinfo->dataref, 1); - - memset(skb, 0, offsetof(struct sk_buff, tail)); - skb->data = skb->head + NET_SKB_PAD; - skb_reset_tail_pointer(skb); + skb_recycle(skb); return true; } @@ -659,7 +660,7 @@ int skb_copy_ubufs(struct sk_buff *skb, gfp_t gfp_mask) } vaddr = kmap_skb_frag(&skb_shinfo(skb)->frags[i]); memcpy(page_address(page), - vaddr + f->page_offset, f->size); + vaddr + f->page_offset, skb_frag_size(f)); kunmap_skb_frag(vaddr); page->private = (unsigned long)head; head = page; @@ -667,14 +668,14 @@ int skb_copy_ubufs(struct sk_buff *skb, gfp_t gfp_mask) /* skb frags release userspace buffers */ for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) - put_page(skb_shinfo(skb)->frags[i].page); + skb_frag_unref(skb, i); uarg->callback(uarg); /* skb frags point to kernel buffers */ for (i = skb_shinfo(skb)->nr_frags; i > 0; i--) { - skb_shinfo(skb)->frags[i - 1].page_offset = 0; - skb_shinfo(skb)->frags[i - 1].page = head; + __skb_fill_page_desc(skb, i-1, head, 0, + skb_shinfo(skb)->frags[i - 1].size); head = (struct page *)head->private; } @@ -1190,14 +1191,14 @@ int ___pskb_trim(struct sk_buff *skb, unsigned int len) goto drop_pages; for (; i < nfrags; i++) { - int end = offset + skb_shinfo(skb)->frags[i].size; + int end = offset + skb_frag_size(&skb_shinfo(skb)->frags[i]); if (end < len) { offset = end; continue; } - skb_shinfo(skb)->frags[i++].size = len - offset; + skb_frag_size_set(&skb_shinfo(skb)->frags[i++], len - offset); drop_pages: skb_shinfo(skb)->nr_frags = i; @@ -1306,9 +1307,11 @@ unsigned char *__pskb_pull_tail(struct sk_buff *skb, int delta) /* Estimate size of pulled pages. */ eat = delta; for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { - if (skb_shinfo(skb)->frags[i].size >= eat) + int size = skb_frag_size(&skb_shinfo(skb)->frags[i]); + + if (size >= eat) goto pull_pages; - eat -= skb_shinfo(skb)->frags[i].size; + eat -= size; } /* If we need update frag list, we are in troubles. @@ -1371,14 +1374,16 @@ pull_pages: eat = delta; k = 0; for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { - if (skb_shinfo(skb)->frags[i].size <= eat) { + int size = skb_frag_size(&skb_shinfo(skb)->frags[i]); + + if (size <= eat) { skb_frag_unref(skb, i); - eat -= skb_shinfo(skb)->frags[i].size; + eat -= size; } else { skb_shinfo(skb)->frags[k] = skb_shinfo(skb)->frags[i]; if (eat) { skb_shinfo(skb)->frags[k].page_offset += eat; - skb_shinfo(skb)->frags[k].size -= eat; + skb_frag_size_sub(&skb_shinfo(skb)->frags[k], eat); eat = 0; } k++; @@ -1433,7 +1438,7 @@ int skb_copy_bits(const struct sk_buff *skb, int offset, void *to, int len) WARN_ON(start > offset + len); - end = start + skb_shinfo(skb)->frags[i].size; + end = start + skb_frag_size(&skb_shinfo(skb)->frags[i]); if ((copy = end - offset) > 0) { u8 *vaddr; @@ -1632,7 +1637,7 @@ static int __skb_splice_bits(struct sk_buff *skb, struct pipe_inode_info *pipe, const skb_frag_t *f = &skb_shinfo(skb)->frags[seg]; if (__splice_segment(skb_frag_page(f), - f->page_offset, f->size, + f->page_offset, skb_frag_size(f), offset, len, skb, spd, 0, sk, pipe)) return 1; } @@ -1742,7 +1747,7 @@ int skb_store_bits(struct sk_buff *skb, int offset, const void *from, int len) WARN_ON(start > offset + len); - end = start + frag->size; + end = start + skb_frag_size(frag); if ((copy = end - offset) > 0) { u8 *vaddr; @@ -1815,7 +1820,7 @@ __wsum skb_checksum(const struct sk_buff *skb, int offset, WARN_ON(start > offset + len); - end = start + skb_shinfo(skb)->frags[i].size; + end = start + skb_frag_size(&skb_shinfo(skb)->frags[i]); if ((copy = end - offset) > 0) { __wsum csum2; u8 *vaddr; @@ -1890,7 +1895,7 @@ __wsum skb_copy_and_csum_bits(const struct sk_buff *skb, int offset, WARN_ON(start > offset + len); - end = start + skb_shinfo(skb)->frags[i].size; + end = start + skb_frag_size(&skb_shinfo(skb)->frags[i]); if ((copy = end - offset) > 0) { __wsum csum2; u8 *vaddr; @@ -2163,7 +2168,7 @@ static inline void skb_split_no_header(struct sk_buff *skb, skb->data_len = len - pos; for (i = 0; i < nfrags; i++) { - int size = skb_shinfo(skb)->frags[i].size; + int size = skb_frag_size(&skb_shinfo(skb)->frags[i]); if (pos + size > len) { skb_shinfo(skb1)->frags[k] = skb_shinfo(skb)->frags[i]; @@ -2179,8 +2184,8 @@ static inline void skb_split_no_header(struct sk_buff *skb, */ skb_frag_ref(skb, i); skb_shinfo(skb1)->frags[0].page_offset += len - pos; - skb_shinfo(skb1)->frags[0].size -= len - pos; - skb_shinfo(skb)->frags[i].size = len - pos; + skb_frag_size_sub(&skb_shinfo(skb1)->frags[0], len - pos); + skb_frag_size_set(&skb_shinfo(skb)->frags[i], len - pos); skb_shinfo(skb)->nr_frags++; } k++; @@ -2258,7 +2263,7 @@ int skb_shift(struct sk_buff *tgt, struct sk_buff *skb, int shiftlen) } else { merge = to - 1; - todo -= fragfrom->size; + todo -= skb_frag_size(fragfrom); if (todo < 0) { if (skb_prepare_for_shift(skb) || skb_prepare_for_shift(tgt)) @@ -2268,8 +2273,8 @@ int skb_shift(struct sk_buff *tgt, struct sk_buff *skb, int shiftlen) fragfrom = &skb_shinfo(skb)->frags[from]; fragto = &skb_shinfo(tgt)->frags[merge]; - fragto->size += shiftlen; - fragfrom->size -= shiftlen; + skb_frag_size_add(fragto, shiftlen); + skb_frag_size_sub(fragfrom, shiftlen); fragfrom->page_offset += shiftlen; goto onlymerged; @@ -2293,9 +2298,9 @@ int skb_shift(struct sk_buff *tgt, struct sk_buff *skb, int shiftlen) fragfrom = &skb_shinfo(skb)->frags[from]; fragto = &skb_shinfo(tgt)->frags[to]; - if (todo >= fragfrom->size) { + if (todo >= skb_frag_size(fragfrom)) { *fragto = *fragfrom; - todo -= fragfrom->size; + todo -= skb_frag_size(fragfrom); from++; to++; @@ -2303,10 +2308,10 @@ int skb_shift(struct sk_buff *tgt, struct sk_buff *skb, int shiftlen) __skb_frag_ref(fragfrom); fragto->page = fragfrom->page; fragto->page_offset = fragfrom->page_offset; - fragto->size = todo; + skb_frag_size_set(fragto, todo); fragfrom->page_offset += todo; - fragfrom->size -= todo; + skb_frag_size_sub(fragfrom, todo); todo = 0; to++; @@ -2321,7 +2326,7 @@ int skb_shift(struct sk_buff *tgt, struct sk_buff *skb, int shiftlen) fragfrom = &skb_shinfo(skb)->frags[0]; fragto = &skb_shinfo(tgt)->frags[merge]; - fragto->size += fragfrom->size; + skb_frag_size_add(fragto, skb_frag_size(fragfrom)); __skb_frag_unref(fragfrom); } @@ -2419,7 +2424,7 @@ next_skb: while (st->frag_idx < skb_shinfo(st->cur_skb)->nr_frags) { frag = &skb_shinfo(st->cur_skb)->frags[st->frag_idx]; - block_limit = frag->size + st->stepped_offset; + block_limit = skb_frag_size(frag) + st->stepped_offset; if (abs_offset < block_limit) { if (!st->frag_data) @@ -2437,7 +2442,7 @@ next_skb: } st->frag_idx++; - st->stepped_offset += frag->size; + st->stepped_offset += skb_frag_size(frag); } if (st->frag_data) { @@ -2567,13 +2572,13 @@ int skb_append_datato_frags(struct sock *sk, struct sk_buff *skb, left = PAGE_SIZE - frag->page_offset; copy = (length > left)? left : length; - ret = getfrag(from, skb_frag_address(frag) + frag->size, + ret = getfrag(from, skb_frag_address(frag) + skb_frag_size(frag), offset, copy, 0, skb); if (ret < 0) return -EFAULT; /* copy was successful so update the size parameters */ - frag->size += copy; + skb_frag_size_add(frag, copy); skb->len += copy; skb->data_len += copy; offset += copy; @@ -2720,11 +2725,11 @@ struct sk_buff *skb_segment(struct sk_buff *skb, u32 features) while (pos < offset + len && i < nfrags) { *frag = skb_shinfo(skb)->frags[i]; __skb_frag_ref(frag); - size = frag->size; + size = skb_frag_size(frag); if (pos < offset) { frag->page_offset += offset - pos; - frag->size -= offset - pos; + skb_frag_size_sub(frag, offset - pos); } skb_shinfo(nskb)->nr_frags++; @@ -2733,7 +2738,7 @@ struct sk_buff *skb_segment(struct sk_buff *skb, u32 features) i++; pos += size; } else { - frag->size -= pos + size - (offset + len); + skb_frag_size_sub(frag, pos + size - (offset + len)); goto skip_fraglist; } @@ -2813,7 +2818,7 @@ int skb_gro_receive(struct sk_buff **head, struct sk_buff *skb) } while (--i); frag->page_offset += offset; - frag->size -= offset; + skb_frag_size_sub(frag, offset); skb->truesize -= skb->data_len; skb->len -= skb->data_len; @@ -2865,7 +2870,7 @@ merge: unsigned int eat = offset - headlen; skbinfo->frags[0].page_offset += eat; - skbinfo->frags[0].size -= eat; + skb_frag_size_sub(&skbinfo->frags[0], eat); skb->data_len -= eat; skb->len -= eat; offset = headlen; @@ -2936,7 +2941,7 @@ __skb_to_sgvec(struct sk_buff *skb, struct scatterlist *sg, int offset, int len) WARN_ON(start > offset + len); - end = start + skb_shinfo(skb)->frags[i].size; + end = start + skb_frag_size(&skb_shinfo(skb)->frags[i]); if ((copy = end - offset) > 0) { skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; diff --git a/net/core/sock.c b/net/core/sock.c index 5a087626bb3a..4ed7b1d12f5e 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1257,6 +1257,7 @@ struct sock *sk_clone(const struct sock *sk, const gfp_t priority) /* It is still raw copy of parent, so invalidate * destructor and make plain sk_free() */ newsk->sk_destruct = NULL; + bh_unlock_sock(newsk); sk_free(newsk); newsk = NULL; goto out; diff --git a/net/core/timestamping.c b/net/core/timestamping.c index 98a52640e7cd..82fb28857b64 100644 --- a/net/core/timestamping.c +++ b/net/core/timestamping.c @@ -57,9 +57,13 @@ void skb_clone_tx_timestamp(struct sk_buff *skb) case PTP_CLASS_V2_VLAN: phydev = skb->dev->phydev; if (likely(phydev->drv->txtstamp)) { + if (!atomic_inc_not_zero(&sk->sk_refcnt)) + return; clone = skb_clone(skb, GFP_ATOMIC); - if (!clone) + if (!clone) { + sock_put(sk); return; + } clone->sk = sk; phydev->drv->txtstamp(phydev, clone, type); } @@ -77,8 +81,11 @@ void skb_complete_tx_timestamp(struct sk_buff *skb, struct sock_exterr_skb *serr; int err; - if (!hwtstamps) + if (!hwtstamps) { + sock_put(sk); + kfree_skb(skb); return; + } *skb_hwtstamps(skb) = *hwtstamps; serr = SKB_EXT_ERR(skb); @@ -87,6 +94,7 @@ void skb_complete_tx_timestamp(struct sk_buff *skb, serr->ee.ee_origin = SO_EE_ORIGIN_TIMESTAMPING; skb->sk = NULL; err = sock_queue_err_skb(sk, skb); + sock_put(sk); if (err) kfree_skb(skb); } diff --git a/net/core/user_dma.c b/net/core/user_dma.c index 34e9664cae3b..2d7cf3d52b4c 100644 --- a/net/core/user_dma.c +++ b/net/core/user_dma.c @@ -71,13 +71,13 @@ int dma_skb_copy_datagram_iovec(struct dma_chan *chan, /* Copy paged appendix. Hmm... why does this look so complicated? */ for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { int end; + const skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; WARN_ON(start > offset + len); - end = start + skb_shinfo(skb)->frags[i].size; + end = start + skb_frag_size(frag); copy = end - offset; if (copy > 0) { - skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; struct page *page = skb_frag_page(frag); if (copy > len) diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index dd2b9478ddd1..1b5096a9875a 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -893,7 +893,7 @@ int inet_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) EXPORT_SYMBOL(inet_ioctl); #ifdef CONFIG_COMPAT -int inet_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) +static int inet_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) { struct sock *sk = sock->sk; int err = -ENOIOCTLCMD; diff --git a/net/ipv4/inet_lro.c b/net/ipv4/inet_lro.c index 8e6be5aad115..cc280a3f4f96 100644 --- a/net/ipv4/inet_lro.c +++ b/net/ipv4/inet_lro.c @@ -244,11 +244,11 @@ static void lro_add_frags(struct net_lro_desc *lro_desc, skb->truesize += truesize; skb_frags[0].page_offset += hlen; - skb_frags[0].size -= hlen; + skb_frag_size_sub(&skb_frags[0], hlen); while (tcp_data_len > 0) { *(lro_desc->next_frag) = *skb_frags; - tcp_data_len -= skb_frags->size; + tcp_data_len -= skb_frag_size(skb_frags); lro_desc->next_frag++; skb_frags++; skb_shinfo(skb)->nr_frags++; @@ -400,14 +400,14 @@ static struct sk_buff *lro_gen_skb(struct net_lro_mgr *lro_mgr, skb_frags = skb_shinfo(skb)->frags; while (data_len > 0) { *skb_frags = *frags; - data_len -= frags->size; + data_len -= skb_frag_size(frags); skb_frags++; frags++; skb_shinfo(skb)->nr_frags++; } skb_shinfo(skb)->frags[0].page_offset += hdr_len; - skb_shinfo(skb)->frags[0].size -= hdr_len; + skb_frag_size_sub(&skb_shinfo(skb)->frags[0], hdr_len); skb->ip_summed = ip_summed; skb->csum = sum; diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c index 3c8dfa16614d..44d65d546e30 100644 --- a/net/ipv4/inet_timewait_sock.c +++ b/net/ipv4/inet_timewait_sock.c @@ -183,6 +183,7 @@ struct inet_timewait_sock *inet_twsk_alloc(const struct sock *sk, const int stat tw->tw_daddr = inet->inet_daddr; tw->tw_rcv_saddr = inet->inet_rcv_saddr; tw->tw_bound_dev_if = sk->sk_bound_dev_if; + tw->tw_tos = inet->tos; tw->tw_num = inet->inet_num; tw->tw_state = TCP_TIME_WAIT; tw->tw_substate = state; diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index 0e0ab98abc6f..fdaabf2f2b68 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -599,8 +599,8 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev, head->next = clone; skb_shinfo(clone)->frag_list = skb_shinfo(head)->frag_list; skb_frag_list_init(head); - for (i=0; i<skb_shinfo(head)->nr_frags; i++) - plen += skb_shinfo(head)->frags[i].size; + for (i = 0; i < skb_shinfo(head)->nr_frags; i++) + plen += skb_frag_size(&skb_shinfo(head)->frags[i]); clone->len = clone->data_len = head->data_len - plen; head->data_len -= clone->len; head->len -= clone->len; @@ -682,6 +682,42 @@ int ip_defrag(struct sk_buff *skb, u32 user) } EXPORT_SYMBOL(ip_defrag); +struct sk_buff *ip_check_defrag(struct sk_buff *skb, u32 user) +{ + const struct iphdr *iph; + u32 len; + + if (skb->protocol != htons(ETH_P_IP)) + return skb; + + if (!pskb_may_pull(skb, sizeof(struct iphdr))) + return skb; + + iph = ip_hdr(skb); + if (iph->ihl < 5 || iph->version != 4) + return skb; + if (!pskb_may_pull(skb, iph->ihl*4)) + return skb; + iph = ip_hdr(skb); + len = ntohs(iph->tot_len); + if (skb->len < len || len < (iph->ihl * 4)) + return skb; + + if (ip_is_fragment(ip_hdr(skb))) { + skb = skb_share_check(skb, GFP_ATOMIC); + if (skb) { + if (pskb_trim_rcsum(skb, len)) + return skb; + memset(IPCB(skb), 0, sizeof(struct inet_skb_parm)); + if (ip_defrag(skb, user)) + return NULL; + skb->rxhash = 0; + } + } + return skb; +} +EXPORT_SYMBOL(ip_check_defrag); + #ifdef CONFIG_SYSCTL static int zero; diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index d7bb94c48345..d55110e93120 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -835,8 +835,6 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev if (skb_headroom(skb) < max_headroom || skb_shared(skb)|| (skb_cloned(skb) && !skb_clone_writable(skb, 0))) { struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom); - if (max_headroom > dev->needed_headroom) - dev->needed_headroom = max_headroom; if (!new_skb) { ip_rt_put(rt); dev->stats.tx_dropped++; diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index ae3bb147affd..0bc95f3977d2 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -1015,13 +1015,13 @@ alloc_new_skb: err = -EMSGSIZE; goto error; } - if (getfrag(from, skb_frag_address(frag)+frag->size, + if (getfrag(from, skb_frag_address(frag)+skb_frag_size(frag), offset, copy, skb->len, skb) < 0) { err = -EFAULT; goto error; } cork->off += copy; - frag->size += copy; + skb_frag_size_add(frag, copy); skb->len += copy; skb->data_len += copy; skb->truesize += copy; @@ -1230,7 +1230,7 @@ ssize_t ip_append_page(struct sock *sk, struct flowi4 *fl4, struct page *page, if (len > size) len = size; if (skb_can_coalesce(skb, i, page, offset)) { - skb_shinfo(skb)->frags[i-1].size += len; + skb_frag_size_add(&skb_shinfo(skb)->frags[i-1], len); } else if (i < MAX_SKB_FRAGS) { get_page(page); skb_fill_page_desc(skb, i, page, offset, len); @@ -1466,7 +1466,7 @@ static int ip_reply_glue_bits(void *dptr, char *to, int offset, * structure to pass arguments. */ void ip_send_reply(struct sock *sk, struct sk_buff *skb, __be32 daddr, - struct ip_reply_arg *arg, unsigned int len) + const struct ip_reply_arg *arg, unsigned int len) { struct inet_sock *inet = inet_sk(sk); struct ip_options_data replyopts; @@ -1489,7 +1489,7 @@ void ip_send_reply(struct sock *sk, struct sk_buff *skb, __be32 daddr, } flowi4_init_output(&fl4, arg->bound_dev_if, 0, - RT_TOS(ip_hdr(skb)->tos), + RT_TOS(arg->tos), RT_SCOPE_UNIVERSE, sk->sk_protocol, ip_reply_arg_flowi_flags(arg), daddr, rt->rt_spec_dst, @@ -1506,7 +1506,7 @@ void ip_send_reply(struct sock *sk, struct sk_buff *skb, __be32 daddr, with locally disabled BH and that sk cannot be already spinlocked. */ bh_lock_sock(sk); - inet->tos = ip_hdr(skb)->tos; + inet->tos = arg->tos; sk->sk_priority = skb->priority; sk->sk_protocol = ip_hdr(skb)->protocol; sk->sk_bound_dev_if = arg->bound_dev_if; diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index 8905e92f896a..09ff51bf16a4 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -33,6 +33,7 @@ #include <linux/netfilter.h> #include <linux/route.h> #include <linux/mroute.h> +#include <net/inet_ecn.h> #include <net/route.h> #include <net/xfrm.h> #include <net/compat.h> @@ -578,8 +579,8 @@ static int do_ip_setsockopt(struct sock *sk, int level, break; case IP_TOS: /* This sets both TOS and Precedence */ if (sk->sk_type == SOCK_STREAM) { - val &= ~3; - val |= inet->tos & 3; + val &= ~INET_ECN_MASK; + val |= inet->tos & INET_ECN_MASK; } if (inet->tos != val) { inet->tos = val; @@ -961,7 +962,7 @@ mc_msf_out: break; case IP_TRANSPARENT: - if (!capable(CAP_NET_ADMIN)) { + if (!!val && !capable(CAP_NET_RAW) && !capable(CAP_NET_ADMIN)) { err = -EPERM; break; } diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 26c77e14395f..155138d8ec8b 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1308,7 +1308,12 @@ static void rt_del(unsigned hash, struct rtable *rt) void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw, __be32 saddr, struct net_device *dev) { + int s, i; struct in_device *in_dev = __in_dev_get_rcu(dev); + struct rtable *rt; + __be32 skeys[2] = { saddr, 0 }; + int ikeys[2] = { dev->ifindex, 0 }; + struct flowi4 fl4; struct inet_peer *peer; struct net *net; @@ -1331,13 +1336,34 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw, goto reject_redirect; } - peer = inet_getpeer_v4(daddr, 1); - if (peer) { - peer->redirect_learned.a4 = new_gw; + memset(&fl4, 0, sizeof(fl4)); + fl4.daddr = daddr; + for (s = 0; s < 2; s++) { + for (i = 0; i < 2; i++) { + fl4.flowi4_oif = ikeys[i]; + fl4.saddr = skeys[s]; + rt = __ip_route_output_key(net, &fl4); + if (IS_ERR(rt)) + continue; - inet_putpeer(peer); + if (rt->dst.error || rt->dst.dev != dev || + rt->rt_gateway != old_gw) { + ip_rt_put(rt); + continue; + } - atomic_inc(&__rt_peer_genid); + if (!rt->peer) + rt_bind_peer(rt, rt->rt_dst, 1); + + peer = rt->peer; + if (peer) { + peer->redirect_learned.a4 = new_gw; + atomic_inc(&__rt_peer_genid); + } + + ip_rt_put(rt); + return; + } } return; @@ -1567,11 +1593,10 @@ unsigned short ip_rt_frag_needed(struct net *net, const struct iphdr *iph, est_mtu = mtu; peer->pmtu_learned = mtu; peer->pmtu_expires = pmtu_expires; + atomic_inc(&__rt_peer_genid); } inet_putpeer(peer); - - atomic_inc(&__rt_peer_genid); } return est_mtu ? : new_mtu; } diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index 3bc5c8f7c71b..d7b89b12f6d8 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -265,7 +265,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb, struct ip_options *opt) { struct tcp_options_received tcp_opt; - u8 *hash_location; + const u8 *hash_location; struct inet_request_sock *ireq; struct tcp_request_sock *treq; struct tcp_sock *tp = tcp_sk(sk); diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 4c0da24fb649..34f5db1e1c8b 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -374,7 +374,7 @@ unsigned int tcp_poll(struct file *file, struct socket *sock, poll_table *wait) { unsigned int mask; struct sock *sk = sock->sk; - struct tcp_sock *tp = tcp_sk(sk); + const struct tcp_sock *tp = tcp_sk(sk); sock_poll_wait(file, sk_sleep(sk), wait); if (sk->sk_state == TCP_LISTEN) @@ -528,7 +528,7 @@ static inline void tcp_mark_push(struct tcp_sock *tp, struct sk_buff *skb) tp->pushed_seq = tp->write_seq; } -static inline int forced_push(struct tcp_sock *tp) +static inline int forced_push(const struct tcp_sock *tp) { return after(tp->write_seq, tp->pushed_seq + (tp->max_window >> 1)); } @@ -813,7 +813,7 @@ new_segment: goto wait_for_memory; if (can_coalesce) { - skb_shinfo(skb)->frags[i - 1].size += copy; + skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy); } else { get_page(page); skb_fill_page_desc(skb, i, page, offset, copy); @@ -891,9 +891,9 @@ EXPORT_SYMBOL(tcp_sendpage); #define TCP_PAGE(sk) (sk->sk_sndmsg_page) #define TCP_OFF(sk) (sk->sk_sndmsg_off) -static inline int select_size(struct sock *sk, int sg) +static inline int select_size(const struct sock *sk, int sg) { - struct tcp_sock *tp = tcp_sk(sk); + const struct tcp_sock *tp = tcp_sk(sk); int tmp = tp->mss_cache; if (sg) { @@ -1058,8 +1058,7 @@ new_segment: /* Update the skb. */ if (merge) { - skb_shinfo(skb)->frags[i - 1].size += - copy; + skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy); } else { skb_fill_page_desc(skb, i, page, off, copy); if (TCP_PAGE(sk)) { @@ -1194,13 +1193,11 @@ void tcp_cleanup_rbuf(struct sock *sk, int copied) struct tcp_sock *tp = tcp_sk(sk); int time_to_ack = 0; -#if TCP_DEBUG struct sk_buff *skb = skb_peek(&sk->sk_receive_queue); WARN(skb && !before(tp->copied_seq, TCP_SKB_CB(skb)->end_seq), "cleanup rbuf bug: copied %X seq %X rcvnxt %X\n", tp->copied_seq, TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt); -#endif if (inet_csk_ack_scheduled(sk)) { const struct inet_connection_sock *icsk = inet_csk(sk); @@ -2409,7 +2406,7 @@ static int do_tcp_setsockopt(struct sock *sk, int level, int tcp_setsockopt(struct sock *sk, int level, int optname, char __user *optval, unsigned int optlen) { - struct inet_connection_sock *icsk = inet_csk(sk); + const struct inet_connection_sock *icsk = inet_csk(sk); if (level != SOL_TCP) return icsk->icsk_af_ops->setsockopt(sk, level, optname, @@ -2431,9 +2428,9 @@ EXPORT_SYMBOL(compat_tcp_setsockopt); #endif /* Return information about state of tcp endpoint in API format. */ -void tcp_get_info(struct sock *sk, struct tcp_info *info) +void tcp_get_info(const struct sock *sk, struct tcp_info *info) { - struct tcp_sock *tp = tcp_sk(sk); + const struct tcp_sock *tp = tcp_sk(sk); const struct inet_connection_sock *icsk = inet_csk(sk); u32 now = tcp_time_stamp; @@ -2995,23 +2992,25 @@ void tcp_put_md5sig_pool(void) EXPORT_SYMBOL(tcp_put_md5sig_pool); int tcp_md5_hash_header(struct tcp_md5sig_pool *hp, - struct tcphdr *th) + const struct tcphdr *th) { struct scatterlist sg; + struct tcphdr hdr; int err; - __sum16 old_checksum = th->check; - th->check = 0; + /* We are not allowed to change tcphdr, make a local copy */ + memcpy(&hdr, th, sizeof(hdr)); + hdr.check = 0; + /* options aren't included in the hash */ - sg_init_one(&sg, th, sizeof(struct tcphdr)); - err = crypto_hash_update(&hp->md5_desc, &sg, sizeof(struct tcphdr)); - th->check = old_checksum; + sg_init_one(&sg, &hdr, sizeof(hdr)); + err = crypto_hash_update(&hp->md5_desc, &sg, sizeof(hdr)); return err; } EXPORT_SYMBOL(tcp_md5_hash_header); int tcp_md5_hash_skb_data(struct tcp_md5sig_pool *hp, - struct sk_buff *skb, unsigned header_len) + const struct sk_buff *skb, unsigned int header_len) { struct scatterlist sg; const struct tcphdr *tp = tcp_hdr(skb); @@ -3031,8 +3030,8 @@ int tcp_md5_hash_skb_data(struct tcp_md5sig_pool *hp, for (i = 0; i < shi->nr_frags; ++i) { const struct skb_frag_struct *f = &shi->frags[i]; struct page *page = skb_frag_page(f); - sg_set_page(&sg, page, f->size, f->page_offset); - if (crypto_hash_update(desc, &sg, f->size)) + sg_set_page(&sg, page, skb_frag_size(f), f->page_offset); + if (crypto_hash_update(desc, &sg, skb_frag_size(f))) return 1; } @@ -3044,7 +3043,7 @@ int tcp_md5_hash_skb_data(struct tcp_md5sig_pool *hp, } EXPORT_SYMBOL(tcp_md5_hash_skb_data); -int tcp_md5_hash_key(struct tcp_md5sig_pool *hp, struct tcp_md5sig_key *key) +int tcp_md5_hash_key(struct tcp_md5sig_pool *hp, const struct tcp_md5sig_key *key) { struct scatterlist sg; diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index c1653fe47255..52b5c2d0ecd0 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -206,7 +206,7 @@ static inline void TCP_ECN_queue_cwr(struct tcp_sock *tp) tp->ecn_flags |= TCP_ECN_QUEUE_CWR; } -static inline void TCP_ECN_accept_cwr(struct tcp_sock *tp, struct sk_buff *skb) +static inline void TCP_ECN_accept_cwr(struct tcp_sock *tp, const struct sk_buff *skb) { if (tcp_hdr(skb)->cwr) tp->ecn_flags &= ~TCP_ECN_DEMAND_CWR; @@ -239,19 +239,19 @@ static inline void TCP_ECN_check_ce(struct tcp_sock *tp, const struct sk_buff *s } } -static inline void TCP_ECN_rcv_synack(struct tcp_sock *tp, struct tcphdr *th) +static inline void TCP_ECN_rcv_synack(struct tcp_sock *tp, const struct tcphdr *th) { if ((tp->ecn_flags & TCP_ECN_OK) && (!th->ece || th->cwr)) tp->ecn_flags &= ~TCP_ECN_OK; } -static inline void TCP_ECN_rcv_syn(struct tcp_sock *tp, struct tcphdr *th) +static inline void TCP_ECN_rcv_syn(struct tcp_sock *tp, const struct tcphdr *th) { if ((tp->ecn_flags & TCP_ECN_OK) && (!th->ece || !th->cwr)) tp->ecn_flags &= ~TCP_ECN_OK; } -static inline int TCP_ECN_rcv_ecn_echo(struct tcp_sock *tp, struct tcphdr *th) +static inline int TCP_ECN_rcv_ecn_echo(const struct tcp_sock *tp, const struct tcphdr *th) { if (th->ece && !th->syn && (tp->ecn_flags & TCP_ECN_OK)) return 1; @@ -267,11 +267,9 @@ static void tcp_fixup_sndbuf(struct sock *sk) { int sndmem = SKB_TRUESIZE(tcp_sk(sk)->rx_opt.mss_clamp + MAX_TCP_HEADER); - if (sk->sk_sndbuf < 3 * sndmem) { - sk->sk_sndbuf = 3 * sndmem; - if (sk->sk_sndbuf > sysctl_tcp_wmem[2]) - sk->sk_sndbuf = sysctl_tcp_wmem[2]; - } + sndmem *= TCP_INIT_CWND; + if (sk->sk_sndbuf < sndmem) + sk->sk_sndbuf = min(sndmem, sysctl_tcp_wmem[2]); } /* 2. Tuning advertised window (window_clamp, rcv_ssthresh) @@ -317,7 +315,7 @@ static int __tcp_grow_window(const struct sock *sk, const struct sk_buff *skb) return 0; } -static void tcp_grow_window(struct sock *sk, struct sk_buff *skb) +static void tcp_grow_window(struct sock *sk, const struct sk_buff *skb) { struct tcp_sock *tp = tcp_sk(sk); @@ -347,17 +345,24 @@ static void tcp_grow_window(struct sock *sk, struct sk_buff *skb) static void tcp_fixup_rcvbuf(struct sock *sk) { - struct tcp_sock *tp = tcp_sk(sk); - int rcvmem = SKB_TRUESIZE(tp->advmss + MAX_TCP_HEADER); + u32 mss = tcp_sk(sk)->advmss; + u32 icwnd = TCP_DEFAULT_INIT_RCVWND; + int rcvmem; - /* Try to select rcvbuf so that 4 mss-sized segments - * will fit to window and corresponding skbs will fit to our rcvbuf. - * (was 3; 4 is minimum to allow fast retransmit to work.) + /* Limit to 10 segments if mss <= 1460, + * or 14600/mss segments, with a minimum of two segments. */ - while (tcp_win_from_space(rcvmem) < tp->advmss) + if (mss > 1460) + icwnd = max_t(u32, (1460 * TCP_DEFAULT_INIT_RCVWND) / mss, 2); + + rcvmem = SKB_TRUESIZE(mss + MAX_TCP_HEADER); + while (tcp_win_from_space(rcvmem) < mss) rcvmem += 128; - if (sk->sk_rcvbuf < 4 * rcvmem) - sk->sk_rcvbuf = min(4 * rcvmem, sysctl_tcp_rmem[2]); + + rcvmem *= icwnd; + + if (sk->sk_rcvbuf < rcvmem) + sk->sk_rcvbuf = min(rcvmem, sysctl_tcp_rmem[2]); } /* 4. Try to fixup all. It is made immediately after connection enters @@ -424,7 +429,7 @@ static void tcp_clamp_window(struct sock *sk) */ void tcp_initialize_rcv_mss(struct sock *sk) { - struct tcp_sock *tp = tcp_sk(sk); + const struct tcp_sock *tp = tcp_sk(sk); unsigned int hint = min_t(unsigned int, tp->advmss, tp->mss_cache); hint = min(hint, tp->rcv_wnd / 2); @@ -819,7 +824,7 @@ void tcp_update_metrics(struct sock *sk) } } -__u32 tcp_init_cwnd(struct tcp_sock *tp, struct dst_entry *dst) +__u32 tcp_init_cwnd(const struct tcp_sock *tp, const struct dst_entry *dst) { __u32 cwnd = (dst ? dst_metric(dst, RTAX_INITCWND) : 0); @@ -1211,7 +1216,7 @@ static void tcp_mark_lost_retrans(struct sock *sk) tp->lost_retrans_low = new_low_seq; } -static int tcp_check_dsack(struct sock *sk, struct sk_buff *ack_skb, +static int tcp_check_dsack(struct sock *sk, const struct sk_buff *ack_skb, struct tcp_sack_block_wire *sp, int num_sacks, u32 prior_snd_una) { @@ -1305,7 +1310,7 @@ static int tcp_match_skb_to_sack(struct sock *sk, struct sk_buff *skb, return in_sack; } -static u8 tcp_sacktag_one(struct sk_buff *skb, struct sock *sk, +static u8 tcp_sacktag_one(const struct sk_buff *skb, struct sock *sk, struct tcp_sacktag_state *state, int dup_sack, int pcount) { @@ -1460,13 +1465,13 @@ static int tcp_shifted_skb(struct sock *sk, struct sk_buff *skb, /* I wish gso_size would have a bit more sane initialization than * something-or-zero which complicates things */ -static int tcp_skb_seglen(struct sk_buff *skb) +static int tcp_skb_seglen(const struct sk_buff *skb) { return tcp_skb_pcount(skb) == 1 ? skb->len : tcp_skb_mss(skb); } /* Shifting pages past head area doesn't work */ -static int skb_can_shift(struct sk_buff *skb) +static int skb_can_shift(const struct sk_buff *skb) { return !skb_headlen(skb) && skb_is_nonlinear(skb); } @@ -1715,19 +1720,19 @@ static struct sk_buff *tcp_maybe_skipping_dsack(struct sk_buff *skb, return skb; } -static int tcp_sack_cache_ok(struct tcp_sock *tp, struct tcp_sack_block *cache) +static int tcp_sack_cache_ok(const struct tcp_sock *tp, const struct tcp_sack_block *cache) { return cache < tp->recv_sack_cache + ARRAY_SIZE(tp->recv_sack_cache); } static int -tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, +tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb, u32 prior_snd_una) { const struct inet_connection_sock *icsk = inet_csk(sk); struct tcp_sock *tp = tcp_sk(sk); - unsigned char *ptr = (skb_transport_header(ack_skb) + - TCP_SKB_CB(ack_skb)->sacked); + const unsigned char *ptr = (skb_transport_header(ack_skb) + + TCP_SKB_CB(ack_skb)->sacked); struct tcp_sack_block_wire *sp_wire = (struct tcp_sack_block_wire *)(ptr+2); struct tcp_sack_block sp[TCP_NUM_SACKS]; struct tcp_sack_block *cache; @@ -2291,7 +2296,7 @@ static int tcp_check_sack_reneging(struct sock *sk, int flag) return 0; } -static inline int tcp_fackets_out(struct tcp_sock *tp) +static inline int tcp_fackets_out(const struct tcp_sock *tp) { return tcp_is_reno(tp) ? tp->sacked_out + 1 : tp->fackets_out; } @@ -2311,19 +2316,20 @@ static inline int tcp_fackets_out(struct tcp_sock *tp) * they differ. Since neither occurs due to loss, TCP should really * ignore them. */ -static inline int tcp_dupack_heuristics(struct tcp_sock *tp) +static inline int tcp_dupack_heuristics(const struct tcp_sock *tp) { return tcp_is_fack(tp) ? tp->fackets_out : tp->sacked_out + 1; } -static inline int tcp_skb_timedout(struct sock *sk, struct sk_buff *skb) +static inline int tcp_skb_timedout(const struct sock *sk, + const struct sk_buff *skb) { return tcp_time_stamp - TCP_SKB_CB(skb)->when > inet_csk(sk)->icsk_rto; } -static inline int tcp_head_timedout(struct sock *sk) +static inline int tcp_head_timedout(const struct sock *sk) { - struct tcp_sock *tp = tcp_sk(sk); + const struct tcp_sock *tp = tcp_sk(sk); return tp->packets_out && tcp_skb_timedout(sk, tcp_write_queue_head(sk)); @@ -2634,7 +2640,7 @@ static void tcp_cwnd_down(struct sock *sk, int flag) /* Nothing was retransmitted or returned timestamp is less * than timestamp of the first retransmission. */ -static inline int tcp_packet_delayed(struct tcp_sock *tp) +static inline int tcp_packet_delayed(const struct tcp_sock *tp) { return !tp->retrans_stamp || (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr && @@ -2695,7 +2701,7 @@ static void tcp_undo_cwr(struct sock *sk, const bool undo_ssthresh) tp->snd_cwnd_stamp = tcp_time_stamp; } -static inline int tcp_may_undo(struct tcp_sock *tp) +static inline int tcp_may_undo(const struct tcp_sock *tp) { return tp->undo_marker && (!tp->undo_retrans || tcp_packet_delayed(tp)); } @@ -2759,9 +2765,9 @@ static void tcp_try_undo_dsack(struct sock *sk) * that successive retransmissions of a segment must not advance * retrans_stamp under any conditions. */ -static int tcp_any_retrans_done(struct sock *sk) +static int tcp_any_retrans_done(const struct sock *sk) { - struct tcp_sock *tp = tcp_sk(sk); + const struct tcp_sock *tp = tcp_sk(sk); struct sk_buff *skb; if (tp->retrans_out) @@ -3240,7 +3246,7 @@ static void tcp_cong_avoid(struct sock *sk, u32 ack, u32 in_flight) */ static void tcp_rearm_rto(struct sock *sk) { - struct tcp_sock *tp = tcp_sk(sk); + const struct tcp_sock *tp = tcp_sk(sk); if (!tp->packets_out) { inet_csk_clear_xmit_timer(sk, ICSK_TIME_RETRANS); @@ -3492,7 +3498,7 @@ static inline int tcp_may_update_window(const struct tcp_sock *tp, * Window update algorithm, described in RFC793/RFC1122 (used in linux-2.2 * and in FreeBSD. NetBSD's one is even worse.) is wrong. */ -static int tcp_ack_update_window(struct sock *sk, struct sk_buff *skb, u32 ack, +static int tcp_ack_update_window(struct sock *sk, const struct sk_buff *skb, u32 ack, u32 ack_seq) { struct tcp_sock *tp = tcp_sk(sk); @@ -3668,7 +3674,7 @@ static int tcp_process_frto(struct sock *sk, int flag) } /* This routine deals with incoming acks, but not outgoing ones. */ -static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag) +static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) { struct inet_connection_sock *icsk = inet_csk(sk); struct tcp_sock *tp = tcp_sk(sk); @@ -3805,14 +3811,14 @@ old_ack: * But, this can also be called on packets in the established flow when * the fast version below fails. */ -void tcp_parse_options(struct sk_buff *skb, struct tcp_options_received *opt_rx, - u8 **hvpp, int estab) +void tcp_parse_options(const struct sk_buff *skb, struct tcp_options_received *opt_rx, + const u8 **hvpp, int estab) { - unsigned char *ptr; - struct tcphdr *th = tcp_hdr(skb); + const unsigned char *ptr; + const struct tcphdr *th = tcp_hdr(skb); int length = (th->doff * 4) - sizeof(struct tcphdr); - ptr = (unsigned char *)(th + 1); + ptr = (const unsigned char *)(th + 1); opt_rx->saw_tstamp = 0; while (length > 0) { @@ -3923,9 +3929,9 @@ void tcp_parse_options(struct sk_buff *skb, struct tcp_options_received *opt_rx, } EXPORT_SYMBOL(tcp_parse_options); -static int tcp_parse_aligned_timestamp(struct tcp_sock *tp, struct tcphdr *th) +static int tcp_parse_aligned_timestamp(struct tcp_sock *tp, const struct tcphdr *th) { - __be32 *ptr = (__be32 *)(th + 1); + const __be32 *ptr = (const __be32 *)(th + 1); if (*ptr == htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) | (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP)) { @@ -3942,8 +3948,9 @@ static int tcp_parse_aligned_timestamp(struct tcp_sock *tp, struct tcphdr *th) /* Fast parse options. This hopes to only see timestamps. * If it is wrong it falls back on tcp_parse_options(). */ -static int tcp_fast_parse_options(struct sk_buff *skb, struct tcphdr *th, - struct tcp_sock *tp, u8 **hvpp) +static int tcp_fast_parse_options(const struct sk_buff *skb, + const struct tcphdr *th, + struct tcp_sock *tp, const u8 **hvpp) { /* In the spirit of fast parsing, compare doff directly to constant * values. Because equality is used, short doff can be ignored here. @@ -3964,10 +3971,10 @@ static int tcp_fast_parse_options(struct sk_buff *skb, struct tcphdr *th, /* * Parse MD5 Signature option */ -u8 *tcp_parse_md5sig_option(struct tcphdr *th) +const u8 *tcp_parse_md5sig_option(const struct tcphdr *th) { - int length = (th->doff << 2) - sizeof (*th); - u8 *ptr = (u8*)(th + 1); + int length = (th->doff << 2) - sizeof(*th); + const u8 *ptr = (const u8 *)(th + 1); /* If the TCP option is too short, we can short cut */ if (length < TCPOLEN_MD5SIG) @@ -4044,8 +4051,8 @@ static inline void tcp_replace_ts_recent(struct tcp_sock *tp, u32 seq) static int tcp_disordered_ack(const struct sock *sk, const struct sk_buff *skb) { - struct tcp_sock *tp = tcp_sk(sk); - struct tcphdr *th = tcp_hdr(skb); + const struct tcp_sock *tp = tcp_sk(sk); + const struct tcphdr *th = tcp_hdr(skb); u32 seq = TCP_SKB_CB(skb)->seq; u32 ack = TCP_SKB_CB(skb)->ack_seq; @@ -4084,7 +4091,7 @@ static inline int tcp_paws_discard(const struct sock *sk, * (borrowed from freebsd) */ -static inline int tcp_sequence(struct tcp_sock *tp, u32 seq, u32 end_seq) +static inline int tcp_sequence(const struct tcp_sock *tp, u32 seq, u32 end_seq) { return !before(end_seq, tp->rcv_wup) && !after(seq, tp->rcv_nxt + tcp_receive_window(tp)); @@ -4129,7 +4136,7 @@ static void tcp_reset(struct sock *sk) * * If we are in FINWAIT-2, a received FIN moves us to TIME-WAIT. */ -static void tcp_fin(struct sk_buff *skb, struct sock *sk, struct tcphdr *th) +static void tcp_fin(struct sock *sk) { struct tcp_sock *tp = tcp_sk(sk); @@ -4241,7 +4248,7 @@ static void tcp_dsack_extend(struct sock *sk, u32 seq, u32 end_seq) tcp_sack_extend(tp->duplicate_sack, seq, end_seq); } -static void tcp_send_dupack(struct sock *sk, struct sk_buff *skb) +static void tcp_send_dupack(struct sock *sk, const struct sk_buff *skb) { struct tcp_sock *tp = tcp_sk(sk); @@ -4400,7 +4407,7 @@ static void tcp_ofo_queue(struct sock *sk) __skb_queue_tail(&sk->sk_receive_queue, skb); tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq; if (tcp_hdr(skb)->fin) - tcp_fin(skb, sk, tcp_hdr(skb)); + tcp_fin(sk); } } @@ -4428,7 +4435,7 @@ static inline int tcp_try_rmem_schedule(struct sock *sk, unsigned int size) static void tcp_data_queue(struct sock *sk, struct sk_buff *skb) { - struct tcphdr *th = tcp_hdr(skb); + const struct tcphdr *th = tcp_hdr(skb); struct tcp_sock *tp = tcp_sk(sk); int eaten = -1; @@ -4482,7 +4489,7 @@ queue_and_out: if (skb->len) tcp_event_data_recv(sk, skb); if (th->fin) - tcp_fin(skb, sk, th); + tcp_fin(sk); if (!skb_queue_empty(&tp->out_of_order_queue)) { tcp_ofo_queue(sk); @@ -4912,9 +4919,9 @@ void tcp_cwnd_application_limited(struct sock *sk) tp->snd_cwnd_stamp = tcp_time_stamp; } -static int tcp_should_expand_sndbuf(struct sock *sk) +static int tcp_should_expand_sndbuf(const struct sock *sk) { - struct tcp_sock *tp = tcp_sk(sk); + const struct tcp_sock *tp = tcp_sk(sk); /* If the user specified a specific send buffer setting, do * not modify it. @@ -5023,7 +5030,7 @@ static inline void tcp_ack_snd_check(struct sock *sk) * either form (or just set the sysctl tcp_stdurg). */ -static void tcp_check_urg(struct sock *sk, struct tcphdr *th) +static void tcp_check_urg(struct sock *sk, const struct tcphdr *th) { struct tcp_sock *tp = tcp_sk(sk); u32 ptr = ntohs(th->urg_ptr); @@ -5089,7 +5096,7 @@ static void tcp_check_urg(struct sock *sk, struct tcphdr *th) } /* This is the 'fast' part of urgent handling. */ -static void tcp_urg(struct sock *sk, struct sk_buff *skb, struct tcphdr *th) +static void tcp_urg(struct sock *sk, struct sk_buff *skb, const struct tcphdr *th) { struct tcp_sock *tp = tcp_sk(sk); @@ -5210,9 +5217,9 @@ out: * play significant role here. */ static int tcp_validate_incoming(struct sock *sk, struct sk_buff *skb, - struct tcphdr *th, int syn_inerr) + const struct tcphdr *th, int syn_inerr) { - u8 *hash_location; + const u8 *hash_location; struct tcp_sock *tp = tcp_sk(sk); /* RFC1323: H1. Apply PAWS check first. */ @@ -5293,7 +5300,7 @@ discard: * tcp_data_queue when everything is OK. */ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb, - struct tcphdr *th, unsigned len) + const struct tcphdr *th, unsigned int len) { struct tcp_sock *tp = tcp_sk(sk); int res; @@ -5504,9 +5511,9 @@ discard: EXPORT_SYMBOL(tcp_rcv_established); static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, - struct tcphdr *th, unsigned len) + const struct tcphdr *th, unsigned int len) { - u8 *hash_location; + const u8 *hash_location; struct inet_connection_sock *icsk = inet_csk(sk); struct tcp_sock *tp = tcp_sk(sk); struct tcp_cookie_values *cvp = tp->cookie_values; @@ -5781,7 +5788,7 @@ reset_and_undo: */ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb, - struct tcphdr *th, unsigned len) + const struct tcphdr *th, unsigned int len) { struct tcp_sock *tp = tcp_sk(sk); struct inet_connection_sock *icsk = inet_csk(sk); diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 48da7cc41e23..0ea10eefa60f 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -92,7 +92,7 @@ EXPORT_SYMBOL(sysctl_tcp_low_latency); static struct tcp_md5sig_key *tcp_v4_md5_do_lookup(struct sock *sk, __be32 addr); static int tcp_v4_md5_hash_hdr(char *md5_hash, struct tcp_md5sig_key *key, - __be32 daddr, __be32 saddr, struct tcphdr *th); + __be32 daddr, __be32 saddr, const struct tcphdr *th); #else static inline struct tcp_md5sig_key *tcp_v4_md5_do_lookup(struct sock *sk, __be32 addr) @@ -104,7 +104,7 @@ struct tcp_md5sig_key *tcp_v4_md5_do_lookup(struct sock *sk, __be32 addr) struct inet_hashinfo tcp_hashinfo; EXPORT_SYMBOL(tcp_hashinfo); -static inline __u32 tcp_v4_init_sequence(struct sk_buff *skb) +static inline __u32 tcp_v4_init_sequence(const struct sk_buff *skb) { return secure_tcp_sequence_number(ip_hdr(skb)->daddr, ip_hdr(skb)->saddr, @@ -552,7 +552,7 @@ static void __tcp_v4_send_check(struct sk_buff *skb, /* This routine computes an IPv4 TCP checksum. */ void tcp_v4_send_check(struct sock *sk, struct sk_buff *skb) { - struct inet_sock *inet = inet_sk(sk); + const struct inet_sock *inet = inet_sk(sk); __tcp_v4_send_check(skb, inet->inet_saddr, inet->inet_daddr); } @@ -590,7 +590,7 @@ int tcp_v4_gso_send_check(struct sk_buff *skb) static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb) { - struct tcphdr *th = tcp_hdr(skb); + const struct tcphdr *th = tcp_hdr(skb); struct { struct tcphdr th; #ifdef CONFIG_TCP_MD5SIG @@ -652,6 +652,7 @@ static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb) arg.flags = (sk && inet_sk(sk)->transparent) ? IP_REPLY_ARG_NOSRCCHECK : 0; net = dev_net(skb_dst(skb)->dev); + arg.tos = ip_hdr(skb)->tos; ip_send_reply(net->ipv4.tcp_sock, skb, ip_hdr(skb)->saddr, &arg, arg.iov[0].iov_len); @@ -666,9 +667,9 @@ static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb) static void tcp_v4_send_ack(struct sk_buff *skb, u32 seq, u32 ack, u32 win, u32 ts, int oif, struct tcp_md5sig_key *key, - int reply_flags) + int reply_flags, u8 tos) { - struct tcphdr *th = tcp_hdr(skb); + const struct tcphdr *th = tcp_hdr(skb); struct { struct tcphdr th; __be32 opt[(TCPOLEN_TSTAMP_ALIGNED >> 2) @@ -726,7 +727,7 @@ static void tcp_v4_send_ack(struct sk_buff *skb, u32 seq, u32 ack, arg.csumoffset = offsetof(struct tcphdr, check) / 2; if (oif) arg.bound_dev_if = oif; - + arg.tos = tos; ip_send_reply(net->ipv4.tcp_sock, skb, ip_hdr(skb)->saddr, &arg, arg.iov[0].iov_len); @@ -743,7 +744,8 @@ static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb) tcptw->tw_ts_recent, tw->tw_bound_dev_if, tcp_twsk_md5_key(tcptw), - tw->tw_transparent ? IP_REPLY_ARG_NOSRCCHECK : 0 + tw->tw_transparent ? IP_REPLY_ARG_NOSRCCHECK : 0, + tw->tw_tos ); inet_twsk_put(tw); @@ -757,7 +759,8 @@ static void tcp_v4_reqsk_send_ack(struct sock *sk, struct sk_buff *skb, req->ts_recent, 0, tcp_v4_md5_do_lookup(sk, ip_hdr(skb)->daddr), - inet_rsk(req)->no_srccheck ? IP_REPLY_ARG_NOSRCCHECK : 0); + inet_rsk(req)->no_srccheck ? IP_REPLY_ARG_NOSRCCHECK : 0, + ip_hdr(skb)->tos); } /* @@ -1090,7 +1093,7 @@ static int tcp_v4_md5_hash_pseudoheader(struct tcp_md5sig_pool *hp, } static int tcp_v4_md5_hash_hdr(char *md5_hash, struct tcp_md5sig_key *key, - __be32 daddr, __be32 saddr, struct tcphdr *th) + __be32 daddr, __be32 saddr, const struct tcphdr *th) { struct tcp_md5sig_pool *hp; struct hash_desc *desc; @@ -1122,12 +1125,12 @@ clear_hash_noput: } int tcp_v4_md5_hash_skb(char *md5_hash, struct tcp_md5sig_key *key, - struct sock *sk, struct request_sock *req, - struct sk_buff *skb) + const struct sock *sk, const struct request_sock *req, + const struct sk_buff *skb) { struct tcp_md5sig_pool *hp; struct hash_desc *desc; - struct tcphdr *th = tcp_hdr(skb); + const struct tcphdr *th = tcp_hdr(skb); __be32 saddr, daddr; if (sk) { @@ -1172,7 +1175,7 @@ clear_hash_noput: } EXPORT_SYMBOL(tcp_v4_md5_hash_skb); -static int tcp_v4_inbound_md5_hash(struct sock *sk, struct sk_buff *skb) +static int tcp_v4_inbound_md5_hash(struct sock *sk, const struct sk_buff *skb) { /* * This gets called for each TCP segment that arrives @@ -1182,10 +1185,10 @@ static int tcp_v4_inbound_md5_hash(struct sock *sk, struct sk_buff *skb) * o MD5 hash and we're not expecting one. * o MD5 hash and its wrong. */ - __u8 *hash_location = NULL; + const __u8 *hash_location = NULL; struct tcp_md5sig_key *hash_expected; const struct iphdr *iph = ip_hdr(skb); - struct tcphdr *th = tcp_hdr(skb); + const struct tcphdr *th = tcp_hdr(skb); int genhash; unsigned char newhash[16]; @@ -1248,7 +1251,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) { struct tcp_extend_values tmp_ext; struct tcp_options_received tmp_opt; - u8 *hash_location; + const u8 *hash_location; struct request_sock *req; struct inet_request_sock *ireq; struct tcp_sock *tp = tcp_sk(sk); @@ -1645,7 +1648,7 @@ EXPORT_SYMBOL(tcp_v4_do_rcv); int tcp_v4_rcv(struct sk_buff *skb) { const struct iphdr *iph; - struct tcphdr *th; + const struct tcphdr *th; struct sock *sk; int ret; struct net *net = dev_net(skb->dev); @@ -1809,7 +1812,7 @@ EXPORT_SYMBOL(tcp_v4_get_peer); void *tcp_v4_tw_get_peer(struct sock *sk) { - struct inet_timewait_sock *tw = inet_twsk(sk); + const struct inet_timewait_sock *tw = inet_twsk(sk); return inet_getpeer_v4(tw->tw_daddr, 1); } @@ -2381,7 +2384,7 @@ void tcp_proc_unregister(struct net *net, struct tcp_seq_afinfo *afinfo) } EXPORT_SYMBOL(tcp_proc_unregister); -static void get_openreq4(struct sock *sk, struct request_sock *req, +static void get_openreq4(const struct sock *sk, const struct request_sock *req, struct seq_file *f, int i, int uid, int *len) { const struct inet_request_sock *ireq = inet_rsk(req); @@ -2411,9 +2414,9 @@ static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i, int *len) { int timer_active; unsigned long timer_expires; - struct tcp_sock *tp = tcp_sk(sk); + const struct tcp_sock *tp = tcp_sk(sk); const struct inet_connection_sock *icsk = inet_csk(sk); - struct inet_sock *inet = inet_sk(sk); + const struct inet_sock *inet = inet_sk(sk); __be32 dest = inet->inet_daddr; __be32 src = inet->inet_rcv_saddr; __u16 destp = ntohs(inet->inet_dport); @@ -2462,7 +2465,7 @@ static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i, int *len) len); } -static void get_timewait4_sock(struct inet_timewait_sock *tw, +static void get_timewait4_sock(const struct inet_timewait_sock *tw, struct seq_file *f, int i, int *len) { __be32 dest, src; diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index d2fe4e06b472..85a2fbebd7ef 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -141,7 +141,7 @@ tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb, const struct tcphdr *th) { struct tcp_options_received tmp_opt; - u8 *hash_location; + const u8 *hash_location; struct tcp_timewait_sock *tcptw = tcp_twsk((struct sock *)tw); int paws_reject = 0; @@ -328,6 +328,7 @@ void tcp_time_wait(struct sock *sk, int state, int timeo) struct tcp_timewait_sock *tcptw = tcp_twsk((struct sock *)tw); const int rto = (icsk->icsk_rto << 2) - (icsk->icsk_rto >> 1); + tw->tw_transparent = inet_sk(sk)->transparent; tw->tw_rcv_wscale = tp->rx_opt.rcv_wscale; tcptw->tw_rcv_nxt = tp->rcv_nxt; tcptw->tw_snd_nxt = tp->snd_nxt; @@ -566,7 +567,7 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb, struct request_sock **prev) { struct tcp_options_received tmp_opt; - u8 *hash_location; + const u8 *hash_location; struct sock *child; const struct tcphdr *th = tcp_hdr(skb); __be32 flg = tcp_flag_word(th) & (TCP_FLAG_RST|TCP_FLAG_SYN|TCP_FLAG_ACK); diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index dde6b5768316..980b98f6288c 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -65,7 +65,7 @@ EXPORT_SYMBOL_GPL(sysctl_tcp_cookie_size); /* Account for new data that has been sent to the network. */ -static void tcp_event_new_data_sent(struct sock *sk, struct sk_buff *skb) +static void tcp_event_new_data_sent(struct sock *sk, const struct sk_buff *skb) { struct tcp_sock *tp = tcp_sk(sk); unsigned int prior_packets = tp->packets_out; @@ -89,9 +89,9 @@ static void tcp_event_new_data_sent(struct sock *sk, struct sk_buff *skb) * Anything in between SND.UNA...SND.UNA+SND.WND also can be already * invalid. OK, let's make this for now: */ -static inline __u32 tcp_acceptable_seq(struct sock *sk) +static inline __u32 tcp_acceptable_seq(const struct sock *sk) { - struct tcp_sock *tp = tcp_sk(sk); + const struct tcp_sock *tp = tcp_sk(sk); if (!before(tcp_wnd_end(tp), tp->snd_nxt)) return tp->snd_nxt; @@ -116,7 +116,7 @@ static inline __u32 tcp_acceptable_seq(struct sock *sk) static __u16 tcp_advertise_mss(struct sock *sk) { struct tcp_sock *tp = tcp_sk(sk); - struct dst_entry *dst = __sk_dst_get(sk); + const struct dst_entry *dst = __sk_dst_get(sk); int mss = tp->advmss; if (dst) { @@ -133,7 +133,7 @@ static __u16 tcp_advertise_mss(struct sock *sk) /* RFC2861. Reset CWND after idle period longer RTO to "restart window". * This is the first part of cwnd validation mechanism. */ -static void tcp_cwnd_restart(struct sock *sk, struct dst_entry *dst) +static void tcp_cwnd_restart(struct sock *sk, const struct dst_entry *dst) { struct tcp_sock *tp = tcp_sk(sk); s32 delta = tcp_time_stamp - tp->lsndtime; @@ -154,7 +154,7 @@ static void tcp_cwnd_restart(struct sock *sk, struct dst_entry *dst) /* Congestion state accounting after a packet has been sent. */ static void tcp_event_data_sent(struct tcp_sock *tp, - struct sk_buff *skb, struct sock *sk) + struct sock *sk) { struct inet_connection_sock *icsk = inet_csk(sk); const u32 now = tcp_time_stamp; @@ -295,7 +295,7 @@ static u16 tcp_select_window(struct sock *sk) } /* Packet ECN state for a SYN-ACK */ -static inline void TCP_ECN_send_synack(struct tcp_sock *tp, struct sk_buff *skb) +static inline void TCP_ECN_send_synack(const struct tcp_sock *tp, struct sk_buff *skb) { TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_CWR; if (!(tp->ecn_flags & TCP_ECN_OK)) @@ -315,7 +315,7 @@ static inline void TCP_ECN_send_syn(struct sock *sk, struct sk_buff *skb) } static __inline__ void -TCP_ECN_make_synack(struct request_sock *req, struct tcphdr *th) +TCP_ECN_make_synack(const struct request_sock *req, struct tcphdr *th) { if (inet_rsk(req)->ecn_ok) th->ece = 1; @@ -565,7 +565,8 @@ static void tcp_options_write(__be32 *ptr, struct tcp_sock *tp, */ static unsigned tcp_syn_options(struct sock *sk, struct sk_buff *skb, struct tcp_out_options *opts, - struct tcp_md5sig_key **md5) { + struct tcp_md5sig_key **md5) +{ struct tcp_sock *tp = tcp_sk(sk); struct tcp_cookie_values *cvp = tp->cookie_values; unsigned remaining = MAX_TCP_OPTION_SPACE; @@ -743,7 +744,8 @@ static unsigned tcp_synack_options(struct sock *sk, */ static unsigned tcp_established_options(struct sock *sk, struct sk_buff *skb, struct tcp_out_options *opts, - struct tcp_md5sig_key **md5) { + struct tcp_md5sig_key **md5) +{ struct tcp_skb_cb *tcb = skb ? TCP_SKB_CB(skb) : NULL; struct tcp_sock *tp = tcp_sk(sk); unsigned size = 0; @@ -893,7 +895,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, tcp_event_ack_sent(sk, tcp_skb_pcount(skb)); if (skb->len != tcp_header_size) - tcp_event_data_sent(tp, skb, sk); + tcp_event_data_sent(tp, sk); if (after(tcb->end_seq, tp->snd_nxt) || tcb->seq == tcb->end_seq) TCP_ADD_STATS(sock_net(sk), TCP_MIB_OUTSEGS, @@ -926,7 +928,7 @@ static void tcp_queue_skb(struct sock *sk, struct sk_buff *skb) } /* Initialize TSO segments for a packet. */ -static void tcp_set_skb_tso_segs(struct sock *sk, struct sk_buff *skb, +static void tcp_set_skb_tso_segs(const struct sock *sk, struct sk_buff *skb, unsigned int mss_now) { if (skb->len <= mss_now || !sk_can_gso(sk) || @@ -947,7 +949,7 @@ static void tcp_set_skb_tso_segs(struct sock *sk, struct sk_buff *skb, /* When a modification to fackets out becomes necessary, we need to check * skb is counted to fackets_out or not. */ -static void tcp_adjust_fackets_out(struct sock *sk, struct sk_buff *skb, +static void tcp_adjust_fackets_out(struct sock *sk, const struct sk_buff *skb, int decr) { struct tcp_sock *tp = tcp_sk(sk); @@ -962,7 +964,7 @@ static void tcp_adjust_fackets_out(struct sock *sk, struct sk_buff *skb, /* Pcount in the middle of the write queue got changed, we need to do various * tweaks to fix counters */ -static void tcp_adjust_pcount(struct sock *sk, struct sk_buff *skb, int decr) +static void tcp_adjust_pcount(struct sock *sk, const struct sk_buff *skb, int decr) { struct tcp_sock *tp = tcp_sk(sk); @@ -1094,14 +1096,16 @@ static void __pskb_trim_head(struct sk_buff *skb, int len) eat = len; k = 0; for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { - if (skb_shinfo(skb)->frags[i].size <= eat) { + int size = skb_frag_size(&skb_shinfo(skb)->frags[i]); + + if (size <= eat) { skb_frag_unref(skb, i); - eat -= skb_shinfo(skb)->frags[i].size; + eat -= size; } else { skb_shinfo(skb)->frags[k] = skb_shinfo(skb)->frags[i]; if (eat) { skb_shinfo(skb)->frags[k].page_offset += eat; - skb_shinfo(skb)->frags[k].size -= eat; + skb_frag_size_sub(&skb_shinfo(skb)->frags[k], eat); eat = 0; } k++; @@ -1144,10 +1148,10 @@ int tcp_trim_head(struct sock *sk, struct sk_buff *skb, u32 len) } /* Calculate MSS. Not accounting for SACKs here. */ -int tcp_mtu_to_mss(struct sock *sk, int pmtu) +int tcp_mtu_to_mss(const struct sock *sk, int pmtu) { - struct tcp_sock *tp = tcp_sk(sk); - struct inet_connection_sock *icsk = inet_csk(sk); + const struct tcp_sock *tp = tcp_sk(sk); + const struct inet_connection_sock *icsk = inet_csk(sk); int mss_now; /* Calculate base mss without TCP options: @@ -1173,10 +1177,10 @@ int tcp_mtu_to_mss(struct sock *sk, int pmtu) } /* Inverse of above */ -int tcp_mss_to_mtu(struct sock *sk, int mss) +int tcp_mss_to_mtu(const struct sock *sk, int mss) { - struct tcp_sock *tp = tcp_sk(sk); - struct inet_connection_sock *icsk = inet_csk(sk); + const struct tcp_sock *tp = tcp_sk(sk); + const struct inet_connection_sock *icsk = inet_csk(sk); int mtu; mtu = mss + @@ -1250,8 +1254,8 @@ EXPORT_SYMBOL(tcp_sync_mss); */ unsigned int tcp_current_mss(struct sock *sk) { - struct tcp_sock *tp = tcp_sk(sk); - struct dst_entry *dst = __sk_dst_get(sk); + const struct tcp_sock *tp = tcp_sk(sk); + const struct dst_entry *dst = __sk_dst_get(sk); u32 mss_now; unsigned header_len; struct tcp_out_options opts; @@ -1311,10 +1315,10 @@ static void tcp_cwnd_validate(struct sock *sk) * modulo only when the receiver window alone is the limiting factor or * when we would be allowed to send the split-due-to-Nagle skb fully. */ -static unsigned int tcp_mss_split_point(struct sock *sk, struct sk_buff *skb, +static unsigned int tcp_mss_split_point(const struct sock *sk, const struct sk_buff *skb, unsigned int mss_now, unsigned int cwnd) { - struct tcp_sock *tp = tcp_sk(sk); + const struct tcp_sock *tp = tcp_sk(sk); u32 needed, window, cwnd_len; window = tcp_wnd_end(tp) - TCP_SKB_CB(skb)->seq; @@ -1334,8 +1338,8 @@ static unsigned int tcp_mss_split_point(struct sock *sk, struct sk_buff *skb, /* Can at least one segment of SKB be sent right now, according to the * congestion window rules? If so, return how many segments are allowed. */ -static inline unsigned int tcp_cwnd_test(struct tcp_sock *tp, - struct sk_buff *skb) +static inline unsigned int tcp_cwnd_test(const struct tcp_sock *tp, + const struct sk_buff *skb) { u32 in_flight, cwnd; @@ -1356,7 +1360,7 @@ static inline unsigned int tcp_cwnd_test(struct tcp_sock *tp, * This must be invoked the first time we consider transmitting * SKB onto the wire. */ -static int tcp_init_tso_segs(struct sock *sk, struct sk_buff *skb, +static int tcp_init_tso_segs(const struct sock *sk, struct sk_buff *skb, unsigned int mss_now) { int tso_segs = tcp_skb_pcount(skb); @@ -1394,7 +1398,7 @@ static inline int tcp_nagle_check(const struct tcp_sock *tp, /* Return non-zero if the Nagle test allows this packet to be * sent now. */ -static inline int tcp_nagle_test(struct tcp_sock *tp, struct sk_buff *skb, +static inline int tcp_nagle_test(const struct tcp_sock *tp, const struct sk_buff *skb, unsigned int cur_mss, int nonagle) { /* Nagle rule does not apply to frames, which sit in the middle of the @@ -1420,7 +1424,7 @@ static inline int tcp_nagle_test(struct tcp_sock *tp, struct sk_buff *skb, } /* Does at least the first segment of SKB fit into the send window? */ -static inline int tcp_snd_wnd_test(struct tcp_sock *tp, struct sk_buff *skb, +static inline int tcp_snd_wnd_test(const struct tcp_sock *tp, const struct sk_buff *skb, unsigned int cur_mss) { u32 end_seq = TCP_SKB_CB(skb)->end_seq; @@ -1435,10 +1439,10 @@ static inline int tcp_snd_wnd_test(struct tcp_sock *tp, struct sk_buff *skb, * should be put on the wire right now. If so, it returns the number of * packets allowed by the congestion window. */ -static unsigned int tcp_snd_test(struct sock *sk, struct sk_buff *skb, +static unsigned int tcp_snd_test(const struct sock *sk, struct sk_buff *skb, unsigned int cur_mss, int nonagle) { - struct tcp_sock *tp = tcp_sk(sk); + const struct tcp_sock *tp = tcp_sk(sk); unsigned int cwnd_quota; tcp_init_tso_segs(sk, skb, cur_mss); @@ -1456,7 +1460,7 @@ static unsigned int tcp_snd_test(struct sock *sk, struct sk_buff *skb, /* Test if sending is allowed right now. */ int tcp_may_send_now(struct sock *sk) { - struct tcp_sock *tp = tcp_sk(sk); + const struct tcp_sock *tp = tcp_sk(sk); struct sk_buff *skb = tcp_send_head(sk); return skb && @@ -2006,7 +2010,7 @@ static void tcp_collapse_retrans(struct sock *sk, struct sk_buff *skb) } /* Check if coalescing SKBs is legal. */ -static int tcp_can_collapse(struct sock *sk, struct sk_buff *skb) +static int tcp_can_collapse(const struct sock *sk, const struct sk_buff *skb) { if (tcp_skb_pcount(skb) > 1) return 0; @@ -2182,7 +2186,7 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb) static int tcp_can_forward_retransmit(struct sock *sk) { const struct inet_connection_sock *icsk = inet_csk(sk); - struct tcp_sock *tp = tcp_sk(sk); + const struct tcp_sock *tp = tcp_sk(sk); /* Forward retransmissions are possible only during Recovery. */ if (icsk->icsk_ca_state != TCP_CA_Recovery) @@ -2548,7 +2552,7 @@ EXPORT_SYMBOL(tcp_make_synack); /* Do all connect socket setups that can be done AF independent. */ static void tcp_connect_init(struct sock *sk) { - struct dst_entry *dst = __sk_dst_get(sk); + const struct dst_entry *dst = __sk_dst_get(sk); struct tcp_sock *tp = tcp_sk(sk); __u8 rcv_wscale; diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index ecd44b0c45f1..2e0f0af76c19 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -334,7 +334,6 @@ void tcp_retransmit_timer(struct sock *sk) * connection. If the socket is an orphan, time it out, * we cannot allow such beasts to hang infinitely. */ -#ifdef TCP_DEBUG struct inet_sock *inet = inet_sk(sk); if (sk->sk_family == AF_INET) { LIMIT_NETDEBUG(KERN_DEBUG "TCP: Peer %pI4:%u/%u unexpectedly shrunk window %u:%u (repaired)\n", @@ -349,7 +348,6 @@ void tcp_retransmit_timer(struct sock *sk) inet->inet_num, tp->snd_una, tp->snd_nxt); } #endif -#endif if (tcp_time_stamp - tp->rcv_tstamp > TCP_RTO_MAX) { tcp_write_err(sk); goto out; diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c index fc5368ad2b0d..a0b4c5da8d43 100644 --- a/net/ipv4/xfrm4_policy.c +++ b/net/ipv4/xfrm4_policy.c @@ -79,13 +79,13 @@ static int xfrm4_fill_dst(struct xfrm_dst *xdst, struct net_device *dev, struct rtable *rt = (struct rtable *)xdst->route; const struct flowi4 *fl4 = &fl->u.ip4; - rt->rt_key_dst = fl4->daddr; - rt->rt_key_src = fl4->saddr; - rt->rt_key_tos = fl4->flowi4_tos; - rt->rt_route_iif = fl4->flowi4_iif; - rt->rt_iif = fl4->flowi4_iif; - rt->rt_oif = fl4->flowi4_oif; - rt->rt_mark = fl4->flowi4_mark; + xdst->u.rt.rt_key_dst = fl4->daddr; + xdst->u.rt.rt_key_src = fl4->saddr; + xdst->u.rt.rt_key_tos = fl4->flowi4_tos; + xdst->u.rt.rt_route_iif = fl4->flowi4_iif; + xdst->u.rt.rt_iif = fl4->flowi4_iif; + xdst->u.rt.rt_oif = fl4->flowi4_oif; + xdst->u.rt.rt_mark = fl4->flowi4_mark; xdst->u.dst.dev = dev; dev_hold(dev); diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 3b5669a2582d..d27c797f9f05 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -875,6 +875,7 @@ static struct sk_buff **ipv6_gro_receive(struct sk_buff **head, skb_reset_transport_header(skb); __skb_push(skb, skb_gro_offset(skb)); + ops = rcu_dereference(inet6_protos[proto]); if (!ops || !ops->gro_receive) goto out_unlock; diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index b46e9f88ce37..e2480691c220 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -297,10 +297,6 @@ void ipv6_local_rxpmtu(struct sock *sk, struct flowi6 *fl6, u32 mtu) ipv6_addr_copy(&iph->daddr, &fl6->daddr); mtu_info = IP6CBMTU(skb); - if (!mtu_info) { - kfree_skb(skb); - return; - } mtu_info->ip6m_mtu = mtu; mtu_info->ip6m_addr.sin6_family = AF_INET6; diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 320d91d20ad7..93718f3db79b 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -28,10 +28,6 @@ #include <linux/list.h> #include <linux/slab.h> -#ifdef CONFIG_PROC_FS -#include <linux/proc_fs.h> -#endif - #include <net/ipv6.h> #include <net/ndisc.h> #include <net/addrconf.h> diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 835c04b5239f..1c9bf8b5c30a 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -1193,6 +1193,7 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, struct sk_buff *skb; unsigned int maxfraglen, fragheaderlen; int exthdrlen; + int dst_exthdrlen; int hh_len; int mtu; int copy; @@ -1248,7 +1249,7 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, np->cork.hop_limit = hlimit; np->cork.tclass = tclass; mtu = np->pmtudisc == IPV6_PMTUDISC_PROBE ? - rt->dst.dev->mtu : dst_mtu(rt->dst.path); + rt->dst.dev->mtu : dst_mtu(&rt->dst); if (np->frag_size < mtu) { if (np->frag_size) mtu = np->frag_size; @@ -1259,16 +1260,17 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, cork->length = 0; sk->sk_sndmsg_page = NULL; sk->sk_sndmsg_off = 0; - exthdrlen = rt->dst.header_len + (opt ? opt->opt_flen : 0) - - rt->rt6i_nfheader_len; + exthdrlen = (opt ? opt->opt_flen : 0) - rt->rt6i_nfheader_len; length += exthdrlen; transhdrlen += exthdrlen; + dst_exthdrlen = rt->dst.header_len; } else { rt = (struct rt6_info *)cork->dst; fl6 = &inet->cork.fl.u.ip6; opt = np->cork.opt; transhdrlen = 0; exthdrlen = 0; + dst_exthdrlen = 0; mtu = cork->fragsize; } @@ -1368,6 +1370,8 @@ alloc_new_skb: else alloclen = datalen + fragheaderlen; + alloclen += dst_exthdrlen; + /* * The last fragment gets additional space at tail. * Note: we overallocate on fragments with MSG_MODE @@ -1419,9 +1423,9 @@ alloc_new_skb: /* * Find where to start putting bytes */ - data = skb_put(skb, fraglen); - skb_set_network_header(skb, exthdrlen); - data += fragheaderlen; + data = skb_put(skb, fraglen + dst_exthdrlen); + skb_set_network_header(skb, exthdrlen + dst_exthdrlen); + data += fragheaderlen + dst_exthdrlen; skb->transport_header = (skb->network_header + fragheaderlen); if (fraggap) { @@ -1434,6 +1438,7 @@ alloc_new_skb: pskb_trim_unique(skb_prev, maxfraglen); } copy = datalen - transhdrlen - fraggap; + if (copy < 0) { err = -EINVAL; kfree_skb(skb); @@ -1448,6 +1453,7 @@ alloc_new_skb: length -= datalen - fraggap; transhdrlen = 0; exthdrlen = 0; + dst_exthdrlen = 0; csummode = CHECKSUM_NONE; /* @@ -1506,13 +1512,14 @@ alloc_new_skb: err = -EMSGSIZE; goto error; } - if (getfrag(from, skb_frag_address(frag)+frag->size, + if (getfrag(from, + skb_frag_address(frag) + skb_frag_size(frag), offset, copy, skb->len, skb) < 0) { err = -EFAULT; goto error; } sk->sk_sndmsg_off += copy; - frag->size += copy; + skb_frag_size_add(frag, copy); skb->len += copy; skb->data_len += copy; skb->truesize += copy; diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index 2fbda5fc4cc4..c99e3ee9781f 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -343,7 +343,7 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname, break; case IPV6_TRANSPARENT: - if (!capable(CAP_NET_ADMIN)) { + if (valbool && !capable(CAP_NET_ADMIN) && !capable(CAP_NET_RAW)) { retv = -EPERM; break; } diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 1f52dd257631..44e5b7f2a6c1 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -370,17 +370,14 @@ static int ndisc_constructor(struct neighbour *neigh) struct neigh_parms *parms; int is_multicast = ipv6_addr_is_multicast(addr); - rcu_read_lock(); in6_dev = in6_dev_get(dev); if (in6_dev == NULL) { - rcu_read_unlock(); return -EINVAL; } parms = in6_dev->nd_parms; __neigh_parms_put(neigh->parms); neigh->parms = neigh_parms_clone(parms); - rcu_read_unlock(); neigh->type = is_multicast ? RTN_MULTICAST : RTN_UNICAST; if (!dev->header_ops) { @@ -1224,6 +1221,9 @@ static void ndisc_router_discovery(struct sk_buff *skb) if (!in6_dev->cnf.accept_ra_defrtr) goto skip_defrtr; + if (ipv6_chk_addr(dev_net(in6_dev->dev), &ipv6_hdr(skb)->saddr, NULL, 0)) + goto skip_defrtr; + lifetime = ntohs(ra_msg->icmph.icmp6_rt_lifetime); #ifdef CONFIG_IPV6_ROUTER_PREF @@ -1346,6 +1346,9 @@ skip_linkparms: goto out; #ifdef CONFIG_IPV6_ROUTE_INFO + if (ipv6_chk_addr(dev_net(in6_dev->dev), &ipv6_hdr(skb)->saddr, NULL, 0)) + goto skip_routeinfo; + if (in6_dev->cnf.accept_ra_rtr_pref && ndopts.nd_opts_ri) { struct nd_opt_hdr *p; for (p = ndopts.nd_opts_ri; @@ -1363,6 +1366,8 @@ skip_linkparms: &ipv6_hdr(skb)->saddr); } } + +skip_routeinfo: #endif #ifdef CONFIG_IPV6_NDISC_NODETYPE diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c index 085727263812..e8762c73b170 100644 --- a/net/ipv6/netfilter/nf_conntrack_reasm.c +++ b/net/ipv6/netfilter/nf_conntrack_reasm.c @@ -378,8 +378,8 @@ nf_ct_frag6_reasm(struct nf_ct_frag6_queue *fq, struct net_device *dev) head->next = clone; skb_shinfo(clone)->frag_list = skb_shinfo(head)->frag_list; skb_frag_list_init(head); - for (i=0; i<skb_shinfo(head)->nr_frags; i++) - plen += skb_shinfo(head)->frags[i].size; + for (i = 0; i < skb_shinfo(head)->nr_frags; i++) + plen += skb_frag_size(&skb_shinfo(head)->frags[i]); clone->len = clone->data_len = head->data_len - plen; head->data_len -= clone->len; head->len -= clone->len; diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index 3486f62befa3..6f7824e1cea4 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -542,8 +542,7 @@ static int rawv6_push_pending_frames(struct sock *sk, struct flowi6 *fl6, goto out; offset = rp->offset; - total_len = inet_sk(sk)->cork.base.length - (skb_network_header(skb) - - skb->data); + total_len = inet_sk(sk)->cork.base.length; if (offset >= total_len - 1) { err = -EINVAL; ip6_flush_pending_frames(sk); diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c index 7b954e2539d0..cc22099ac8b6 100644 --- a/net/ipv6/reassembly.c +++ b/net/ipv6/reassembly.c @@ -464,8 +464,8 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev, head->next = clone; skb_shinfo(clone)->frag_list = skb_shinfo(head)->frag_list; skb_frag_list_init(head); - for (i=0; i<skb_shinfo(head)->nr_frags; i++) - plen += skb_shinfo(head)->frags[i].size; + for (i = 0; i < skb_shinfo(head)->nr_frags; i++) + plen += skb_frag_size(&skb_shinfo(head)->frags[i]); clone->len = clone->data_len = head->data_len - plen; head->data_len -= clone->len; head->len -= clone->len; diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c index ac838965ff34..5a0d6648bbbc 100644 --- a/net/ipv6/syncookies.c +++ b/net/ipv6/syncookies.c @@ -115,7 +115,7 @@ static __u32 check_tcp_syn_cookie(__u32 cookie, const struct in6_addr *saddr, & COOKIEMASK; } -__u32 cookie_v6_init_sequence(struct sock *sk, struct sk_buff *skb, __u16 *mssp) +__u32 cookie_v6_init_sequence(struct sock *sk, const struct sk_buff *skb, __u16 *mssp) { const struct ipv6hdr *iph = ipv6_hdr(skb); const struct tcphdr *th = tcp_hdr(skb); @@ -137,7 +137,7 @@ __u32 cookie_v6_init_sequence(struct sock *sk, struct sk_buff *skb, __u16 *mssp) jiffies / (HZ * 60), mssind); } -static inline int cookie_check(struct sk_buff *skb, __u32 cookie) +static inline int cookie_check(const struct sk_buff *skb, __u32 cookie) { const struct ipv6hdr *iph = ipv6_hdr(skb); const struct tcphdr *th = tcp_hdr(skb); @@ -152,7 +152,7 @@ static inline int cookie_check(struct sk_buff *skb, __u32 cookie) struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) { struct tcp_options_received tcp_opt; - u8 *hash_location; + const u8 *hash_location; struct inet_request_sock *ireq; struct inet6_request_sock *ireq6; struct tcp_request_sock *treq; diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 5357902c7978..c8683fcc487a 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -114,7 +114,7 @@ static __inline__ __sum16 tcp_v6_check(int len, return csum_ipv6_magic(saddr, daddr, len, IPPROTO_TCP, base); } -static __u32 tcp_v6_init_sequence(struct sk_buff *skb) +static __u32 tcp_v6_init_sequence(const struct sk_buff *skb) { return secure_tcpv6_sequence_number(ipv6_hdr(skb)->daddr.s6_addr32, ipv6_hdr(skb)->saddr.s6_addr32, @@ -761,7 +761,7 @@ static int tcp_v6_md5_hash_pseudoheader(struct tcp_md5sig_pool *hp, static int tcp_v6_md5_hash_hdr(char *md5_hash, struct tcp_md5sig_key *key, const struct in6_addr *daddr, struct in6_addr *saddr, - struct tcphdr *th) + const struct tcphdr *th) { struct tcp_md5sig_pool *hp; struct hash_desc *desc; @@ -793,13 +793,14 @@ clear_hash_noput: } static int tcp_v6_md5_hash_skb(char *md5_hash, struct tcp_md5sig_key *key, - struct sock *sk, struct request_sock *req, - struct sk_buff *skb) + const struct sock *sk, + const struct request_sock *req, + const struct sk_buff *skb) { const struct in6_addr *saddr, *daddr; struct tcp_md5sig_pool *hp; struct hash_desc *desc; - struct tcphdr *th = tcp_hdr(skb); + const struct tcphdr *th = tcp_hdr(skb); if (sk) { saddr = &inet6_sk(sk)->saddr; @@ -842,12 +843,12 @@ clear_hash_noput: return 1; } -static int tcp_v6_inbound_md5_hash (struct sock *sk, struct sk_buff *skb) +static int tcp_v6_inbound_md5_hash(struct sock *sk, const struct sk_buff *skb) { - __u8 *hash_location = NULL; + const __u8 *hash_location = NULL; struct tcp_md5sig_key *hash_expected; const struct ipv6hdr *ip6h = ipv6_hdr(skb); - struct tcphdr *th = tcp_hdr(skb); + const struct tcphdr *th = tcp_hdr(skb); int genhash; u8 newhash[16]; @@ -980,7 +981,8 @@ static int tcp6_gro_complete(struct sk_buff *skb) static void tcp_v6_send_response(struct sk_buff *skb, u32 seq, u32 ack, u32 win, u32 ts, struct tcp_md5sig_key *key, int rst) { - struct tcphdr *th = tcp_hdr(skb), *t1; + const struct tcphdr *th = tcp_hdr(skb); + struct tcphdr *t1; struct sk_buff *buff; struct flowi6 fl6; struct net *net = dev_net(skb_dst(skb)->dev); @@ -1070,7 +1072,7 @@ static void tcp_v6_send_response(struct sk_buff *skb, u32 seq, u32 ack, u32 win, static void tcp_v6_send_reset(struct sock *sk, struct sk_buff *skb) { - struct tcphdr *th = tcp_hdr(skb); + const struct tcphdr *th = tcp_hdr(skb); u32 seq = 0, ack_seq = 0; struct tcp_md5sig_key *key = NULL; @@ -1160,7 +1162,7 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb) { struct tcp_extend_values tmp_ext; struct tcp_options_received tmp_opt; - u8 *hash_location; + const u8 *hash_location; struct request_sock *req; struct inet6_request_sock *treq; struct ipv6_pinfo *np = inet6_sk(sk); @@ -1688,7 +1690,7 @@ ipv6_pktoptions: static int tcp_v6_rcv(struct sk_buff *skb) { - struct tcphdr *th; + const struct tcphdr *th; const struct ipv6hdr *hdr; struct sock *sk; int ret; @@ -1856,8 +1858,8 @@ static struct inet_peer *tcp_v6_get_peer(struct sock *sk, bool *release_it) static void *tcp_v6_tw_get_peer(struct sock *sk) { - struct inet6_timewait_sock *tw6 = inet6_twsk(sk); - struct inet_timewait_sock *tw = inet_twsk(sk); + const struct inet6_timewait_sock *tw6 = inet6_twsk(sk); + const struct inet_timewait_sock *tw = inet_twsk(sk); if (tw->tw_family == AF_INET) return tcp_v4_tw_get_peer(sk); @@ -2012,7 +2014,7 @@ static void tcp_v6_destroy_sock(struct sock *sk) #ifdef CONFIG_PROC_FS /* Proc filesystem TCPv6 sock list dumping. */ static void get_openreq6(struct seq_file *seq, - struct sock *sk, struct request_sock *req, int i, int uid) + const struct sock *sk, struct request_sock *req, int i, int uid) { int ttd = req->expires - jiffies; const struct in6_addr *src = &inet6_rsk(req)->loc_addr; @@ -2048,10 +2050,10 @@ static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i) __u16 destp, srcp; int timer_active; unsigned long timer_expires; - struct inet_sock *inet = inet_sk(sp); - struct tcp_sock *tp = tcp_sk(sp); + const struct inet_sock *inet = inet_sk(sp); + const struct tcp_sock *tp = tcp_sk(sp); const struct inet_connection_sock *icsk = inet_csk(sp); - struct ipv6_pinfo *np = inet6_sk(sp); + const struct ipv6_pinfo *np = inet6_sk(sp); dest = &np->daddr; src = &np->rcv_saddr; @@ -2103,7 +2105,7 @@ static void get_timewait6_sock(struct seq_file *seq, { const struct in6_addr *dest, *src; __u16 destp, srcp; - struct inet6_timewait_sock *tw6 = inet6_twsk((struct sock *)tw); + const struct inet6_timewait_sock *tw6 = inet6_twsk((struct sock *)tw); int ttd = tw->tw_ttd - jiffies; if (ttd < 0) diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c index 49a91c5f5623..faae41737fca 100644 --- a/net/ipv6/xfrm6_output.c +++ b/net/ipv6/xfrm6_output.c @@ -28,6 +28,43 @@ int xfrm6_find_1stfragopt(struct xfrm_state *x, struct sk_buff *skb, EXPORT_SYMBOL(xfrm6_find_1stfragopt); +static int xfrm6_local_dontfrag(struct sk_buff *skb) +{ + int proto; + struct sock *sk = skb->sk; + + if (sk) { + proto = sk->sk_protocol; + + if (proto == IPPROTO_UDP || proto == IPPROTO_RAW) + return inet6_sk(sk)->dontfrag; + } + + return 0; +} + +static void xfrm6_local_rxpmtu(struct sk_buff *skb, u32 mtu) +{ + struct flowi6 fl6; + struct sock *sk = skb->sk; + + fl6.flowi6_oif = sk->sk_bound_dev_if; + ipv6_addr_copy(&fl6.daddr, &ipv6_hdr(skb)->daddr); + + ipv6_local_rxpmtu(sk, &fl6, mtu); +} + +static void xfrm6_local_error(struct sk_buff *skb, u32 mtu) +{ + struct flowi6 fl6; + struct sock *sk = skb->sk; + + fl6.fl6_dport = inet_sk(sk)->inet_dport; + ipv6_addr_copy(&fl6.daddr, &ipv6_hdr(skb)->daddr); + + ipv6_local_error(sk, EMSGSIZE, &fl6, mtu); +} + static int xfrm6_tunnel_check_size(struct sk_buff *skb) { int mtu, ret = 0; @@ -39,7 +76,13 @@ static int xfrm6_tunnel_check_size(struct sk_buff *skb) if (!skb->local_df && skb->len > mtu) { skb->dev = dst->dev; - icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); + + if (xfrm6_local_dontfrag(skb)) + xfrm6_local_rxpmtu(skb, mtu); + else if (skb->sk) + xfrm6_local_error(skb, mtu); + else + icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); ret = -EMSGSIZE; } @@ -93,9 +136,18 @@ static int __xfrm6_output(struct sk_buff *skb) { struct dst_entry *dst = skb_dst(skb); struct xfrm_state *x = dst->xfrm; + int mtu = ip6_skb_dst_mtu(skb); + + if (skb->len > mtu && xfrm6_local_dontfrag(skb)) { + xfrm6_local_rxpmtu(skb, mtu); + return -EMSGSIZE; + } else if (!skb->local_df && skb->len > mtu && skb->sk) { + xfrm6_local_error(skb, mtu); + return -EMSGSIZE; + } if ((x && x->props.mode == XFRM_MODE_TUNNEL) && - ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) || + ((skb->len > mtu && !skb_is_gso(skb)) || dst_allfrag(skb_dst(skb)))) { return ip6_fragment(skb, x->outer_mode->afinfo->output_finish); } diff --git a/net/irda/ircomm/ircomm_tty.c b/net/irda/ircomm/ircomm_tty.c index b3cc8b3989a9..253695d43fd9 100644 --- a/net/irda/ircomm/ircomm_tty.c +++ b/net/irda/ircomm/ircomm_tty.c @@ -551,7 +551,7 @@ static void ircomm_tty_close(struct tty_struct *tty, struct file *filp) */ tty->closing = 1; if (self->closing_wait != ASYNC_CLOSING_WAIT_NONE) - tty_wait_until_sent(tty, self->closing_wait); + tty_wait_until_sent_from_close(tty, self->closing_wait); ircomm_tty_shutdown(self); diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index ad4ac2601a56..34b2ddeacb67 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -1045,8 +1045,10 @@ int l2tp_xmit_skb(struct l2tp_session *session, struct sk_buff *skb, int hdr_len headroom = NET_SKB_PAD + sizeof(struct iphdr) + uhlen + hdr_len; old_headroom = skb_headroom(skb); - if (skb_cow_head(skb, headroom)) + if (skb_cow_head(skb, headroom)) { + dev_kfree_skb(skb); goto abort; + } new_headroom = skb_headroom(skb); skb_orphan(skb); diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c index f42cd0915966..8a90d756c904 100644 --- a/net/l2tp/l2tp_ppp.c +++ b/net/l2tp/l2tp_ppp.c @@ -395,6 +395,7 @@ static int pppol2tp_xmit(struct ppp_channel *chan, struct sk_buff *skb) struct pppol2tp_session *ps; int old_headroom; int new_headroom; + int uhlen, headroom; if (sock_flag(sk, SOCK_DEAD) || !(sk->sk_state & PPPOX_CONNECTED)) goto abort; @@ -413,7 +414,13 @@ static int pppol2tp_xmit(struct ppp_channel *chan, struct sk_buff *skb) goto abort_put_sess; old_headroom = skb_headroom(skb); - if (skb_cow_head(skb, sizeof(ppph))) + uhlen = (tunnel->encap == L2TP_ENCAPTYPE_UDP) ? sizeof(struct udphdr) : 0; + headroom = NET_SKB_PAD + + sizeof(struct iphdr) + /* IP header */ + uhlen + /* UDP header (if L2TP_ENCAPTYPE_UDP) */ + session->hdr_len + /* L2TP header */ + sizeof(ppph); /* PPP header */ + if (skb_cow_head(skb, headroom)) goto abort_put_sess_tun; new_headroom = skb_headroom(skb); diff --git a/net/mac80211/main.c b/net/mac80211/main.c index d4ee6d234a78..d999bf3b84e1 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -19,7 +19,7 @@ #include <linux/if_arp.h> #include <linux/rtnetlink.h> #include <linux/bitmap.h> -#include <linux/pm_qos_params.h> +#include <linux/pm_qos.h> #include <linux/inetdevice.h> #include <net/net_namespace.h> #include <net/cfg80211.h> diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 0e5d8daba1ee..ba2da11a997b 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -17,7 +17,7 @@ #include <linux/if_arp.h> #include <linux/etherdevice.h> #include <linux/rtnetlink.h> -#include <linux/pm_qos_params.h> +#include <linux/pm_qos.h> #include <linux/crc32.h> #include <linux/slab.h> #include <net/mac80211.h> diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c index 397343a59275..83a0b050b374 100644 --- a/net/mac80211/scan.c +++ b/net/mac80211/scan.c @@ -14,7 +14,7 @@ #include <linux/if_arp.h> #include <linux/rtnetlink.h> -#include <linux/pm_qos_params.h> +#include <linux/pm_qos.h> #include <net/sch_generic.h> #include <linux/slab.h> #include <net/mac80211.h> diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index 32bff6d86cb2..8260b13d93c9 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -505,7 +505,7 @@ config NETFILTER_XT_TARGET_LED echo netfilter-ssh > /sys/class/leds/<ledname>/trigger For more information on the LEDs available on your system, see - Documentation/leds-class.txt + Documentation/leds/leds-class.txt config NETFILTER_XT_TARGET_MARK tristate '"MARK" target support' diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index 5290ac353a5e..e3be48bf4dcd 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -2283,6 +2283,7 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len) struct ip_vs_service *svc; struct ip_vs_dest_user *udest_compat; struct ip_vs_dest_user_kern udest; + struct netns_ipvs *ipvs = net_ipvs(net); if (!capable(CAP_NET_ADMIN)) return -EPERM; @@ -2303,6 +2304,24 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len) /* increase the module use count */ ip_vs_use_count_inc(); + /* Handle daemons since they have another lock */ + if (cmd == IP_VS_SO_SET_STARTDAEMON || + cmd == IP_VS_SO_SET_STOPDAEMON) { + struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg; + + if (mutex_lock_interruptible(&ipvs->sync_mutex)) { + ret = -ERESTARTSYS; + goto out_dec; + } + if (cmd == IP_VS_SO_SET_STARTDAEMON) + ret = start_sync_thread(net, dm->state, dm->mcast_ifn, + dm->syncid); + else + ret = stop_sync_thread(net, dm->state); + mutex_unlock(&ipvs->sync_mutex); + goto out_dec; + } + if (mutex_lock_interruptible(&__ip_vs_mutex)) { ret = -ERESTARTSYS; goto out_dec; @@ -2316,15 +2335,6 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len) /* Set timeout values for (tcp tcpfin udp) */ ret = ip_vs_set_timeout(net, (struct ip_vs_timeout_user *)arg); goto out_unlock; - } else if (cmd == IP_VS_SO_SET_STARTDAEMON) { - struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg; - ret = start_sync_thread(net, dm->state, dm->mcast_ifn, - dm->syncid); - goto out_unlock; - } else if (cmd == IP_VS_SO_SET_STOPDAEMON) { - struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg; - ret = stop_sync_thread(net, dm->state); - goto out_unlock; } usvc_compat = (struct ip_vs_service_user *)arg; @@ -2584,6 +2594,33 @@ do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len) if (copy_from_user(arg, user, copylen) != 0) return -EFAULT; + /* + * Handle daemons first since it has its own locking + */ + if (cmd == IP_VS_SO_GET_DAEMON) { + struct ip_vs_daemon_user d[2]; + + memset(&d, 0, sizeof(d)); + if (mutex_lock_interruptible(&ipvs->sync_mutex)) + return -ERESTARTSYS; + + if (ipvs->sync_state & IP_VS_STATE_MASTER) { + d[0].state = IP_VS_STATE_MASTER; + strlcpy(d[0].mcast_ifn, ipvs->master_mcast_ifn, + sizeof(d[0].mcast_ifn)); + d[0].syncid = ipvs->master_syncid; + } + if (ipvs->sync_state & IP_VS_STATE_BACKUP) { + d[1].state = IP_VS_STATE_BACKUP; + strlcpy(d[1].mcast_ifn, ipvs->backup_mcast_ifn, + sizeof(d[1].mcast_ifn)); + d[1].syncid = ipvs->backup_syncid; + } + if (copy_to_user(user, &d, sizeof(d)) != 0) + ret = -EFAULT; + mutex_unlock(&ipvs->sync_mutex); + return ret; + } if (mutex_lock_interruptible(&__ip_vs_mutex)) return -ERESTARTSYS; @@ -2681,28 +2718,6 @@ do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len) } break; - case IP_VS_SO_GET_DAEMON: - { - struct ip_vs_daemon_user d[2]; - - memset(&d, 0, sizeof(d)); - if (ipvs->sync_state & IP_VS_STATE_MASTER) { - d[0].state = IP_VS_STATE_MASTER; - strlcpy(d[0].mcast_ifn, ipvs->master_mcast_ifn, - sizeof(d[0].mcast_ifn)); - d[0].syncid = ipvs->master_syncid; - } - if (ipvs->sync_state & IP_VS_STATE_BACKUP) { - d[1].state = IP_VS_STATE_BACKUP; - strlcpy(d[1].mcast_ifn, ipvs->backup_mcast_ifn, - sizeof(d[1].mcast_ifn)); - d[1].syncid = ipvs->backup_syncid; - } - if (copy_to_user(user, &d, sizeof(d)) != 0) - ret = -EFAULT; - } - break; - default: ret = -EINVAL; } @@ -3205,7 +3220,7 @@ static int ip_vs_genl_dump_daemons(struct sk_buff *skb, struct net *net = skb_sknet(skb); struct netns_ipvs *ipvs = net_ipvs(net); - mutex_lock(&__ip_vs_mutex); + mutex_lock(&ipvs->sync_mutex); if ((ipvs->sync_state & IP_VS_STATE_MASTER) && !cb->args[0]) { if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_MASTER, ipvs->master_mcast_ifn, @@ -3225,7 +3240,7 @@ static int ip_vs_genl_dump_daemons(struct sk_buff *skb, } nla_put_failure: - mutex_unlock(&__ip_vs_mutex); + mutex_unlock(&ipvs->sync_mutex); return skb->len; } @@ -3271,13 +3286,9 @@ static int ip_vs_genl_set_config(struct net *net, struct nlattr **attrs) return ip_vs_set_timeout(net, &t); } -static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info) +static int ip_vs_genl_set_daemon(struct sk_buff *skb, struct genl_info *info) { - struct ip_vs_service *svc = NULL; - struct ip_vs_service_user_kern usvc; - struct ip_vs_dest_user_kern udest; int ret = 0, cmd; - int need_full_svc = 0, need_full_dest = 0; struct net *net; struct netns_ipvs *ipvs; @@ -3285,19 +3296,10 @@ static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info) ipvs = net_ipvs(net); cmd = info->genlhdr->cmd; - mutex_lock(&__ip_vs_mutex); - - if (cmd == IPVS_CMD_FLUSH) { - ret = ip_vs_flush(net); - goto out; - } else if (cmd == IPVS_CMD_SET_CONFIG) { - ret = ip_vs_genl_set_config(net, info->attrs); - goto out; - } else if (cmd == IPVS_CMD_NEW_DAEMON || - cmd == IPVS_CMD_DEL_DAEMON) { - + if (cmd == IPVS_CMD_NEW_DAEMON || cmd == IPVS_CMD_DEL_DAEMON) { struct nlattr *daemon_attrs[IPVS_DAEMON_ATTR_MAX + 1]; + mutex_lock(&ipvs->sync_mutex); if (!info->attrs[IPVS_CMD_ATTR_DAEMON] || nla_parse_nested(daemon_attrs, IPVS_DAEMON_ATTR_MAX, info->attrs[IPVS_CMD_ATTR_DAEMON], @@ -3310,6 +3312,33 @@ static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info) ret = ip_vs_genl_new_daemon(net, daemon_attrs); else ret = ip_vs_genl_del_daemon(net, daemon_attrs); +out: + mutex_unlock(&ipvs->sync_mutex); + } + return ret; +} + +static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info) +{ + struct ip_vs_service *svc = NULL; + struct ip_vs_service_user_kern usvc; + struct ip_vs_dest_user_kern udest; + int ret = 0, cmd; + int need_full_svc = 0, need_full_dest = 0; + struct net *net; + struct netns_ipvs *ipvs; + + net = skb_sknet(skb); + ipvs = net_ipvs(net); + cmd = info->genlhdr->cmd; + + mutex_lock(&__ip_vs_mutex); + + if (cmd == IPVS_CMD_FLUSH) { + ret = ip_vs_flush(net); + goto out; + } else if (cmd == IPVS_CMD_SET_CONFIG) { + ret = ip_vs_genl_set_config(net, info->attrs); goto out; } else if (cmd == IPVS_CMD_ZERO && !info->attrs[IPVS_CMD_ATTR_SERVICE]) { @@ -3536,13 +3565,13 @@ static struct genl_ops ip_vs_genl_ops[] __read_mostly = { .cmd = IPVS_CMD_NEW_DAEMON, .flags = GENL_ADMIN_PERM, .policy = ip_vs_cmd_policy, - .doit = ip_vs_genl_set_cmd, + .doit = ip_vs_genl_set_daemon, }, { .cmd = IPVS_CMD_DEL_DAEMON, .flags = GENL_ADMIN_PERM, .policy = ip_vs_cmd_policy, - .doit = ip_vs_genl_set_cmd, + .doit = ip_vs_genl_set_daemon, }, { .cmd = IPVS_CMD_GET_DAEMON, diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c index 7ee7215b8ba0..3cdd479f9b5d 100644 --- a/net/netfilter/ipvs/ip_vs_sync.c +++ b/net/netfilter/ipvs/ip_vs_sync.c @@ -61,6 +61,7 @@ #define SYNC_PROTO_VER 1 /* Protocol version in header */ +static struct lock_class_key __ipvs_sync_key; /* * IPVS sync connection entry * Version 0, i.e. original version. @@ -1545,6 +1546,7 @@ int start_sync_thread(struct net *net, int state, char *mcast_ifn, __u8 syncid) IP_VS_DBG(7, "Each ip_vs_sync_conn entry needs %Zd bytes\n", sizeof(struct ip_vs_sync_conn_v0)); + if (state == IP_VS_STATE_MASTER) { if (ipvs->master_thread) return -EEXIST; @@ -1667,6 +1669,7 @@ int __net_init ip_vs_sync_net_init(struct net *net) { struct netns_ipvs *ipvs = net_ipvs(net); + __mutex_init(&ipvs->sync_mutex, "ipvs->sync_mutex", &__ipvs_sync_key); INIT_LIST_HEAD(&ipvs->sync_queue); spin_lock_init(&ipvs->sync_lock); spin_lock_init(&ipvs->sync_buff_lock); @@ -1680,7 +1683,9 @@ int __net_init ip_vs_sync_net_init(struct net *net) void ip_vs_sync_net_cleanup(struct net *net) { int retc; + struct netns_ipvs *ipvs = net_ipvs(net); + mutex_lock(&ipvs->sync_mutex); retc = stop_sync_thread(net, IP_VS_STATE_MASTER); if (retc && retc != -ESRCH) pr_err("Failed to stop Master Daemon\n"); @@ -1688,4 +1693,5 @@ void ip_vs_sync_net_cleanup(struct net *net) retc = stop_sync_thread(net, IP_VS_STATE_BACKUP); if (retc && retc != -ESRCH) pr_err("Failed to stop Backup Daemon\n"); + mutex_unlock(&ipvs->sync_mutex); } diff --git a/net/netfilter/nf_conntrack_proto_gre.c b/net/netfilter/nf_conntrack_proto_gre.c index cf616e55ca41..d69facdd9a7a 100644 --- a/net/netfilter/nf_conntrack_proto_gre.c +++ b/net/netfilter/nf_conntrack_proto_gre.c @@ -241,8 +241,8 @@ static int gre_packet(struct nf_conn *ct, nf_ct_refresh_acct(ct, ctinfo, skb, ct->proto.gre.stream_timeout); /* Also, more likely to be important, and not a probe. */ - set_bit(IPS_ASSURED_BIT, &ct->status); - nf_conntrack_event_cache(IPCT_ASSURED, ct); + if (!test_and_set_bit(IPS_ASSURED_BIT, &ct->status)) + nf_conntrack_event_cache(IPCT_ASSURED, ct); } else nf_ct_refresh_acct(ct, ctinfo, skb, ct->proto.gre.timeout); diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c index b0869fe3633b..71441b934ffd 100644 --- a/net/netfilter/x_tables.c +++ b/net/netfilter/x_tables.c @@ -776,12 +776,11 @@ static int xt_jumpstack_alloc(struct xt_table_info *i) size = sizeof(void **) * nr_cpu_ids; if (size > PAGE_SIZE) - i->jumpstack = vmalloc(size); + i->jumpstack = vzalloc(size); else - i->jumpstack = kmalloc(size, GFP_KERNEL); + i->jumpstack = kzalloc(size, GFP_KERNEL); if (i->jumpstack == NULL) return -ENOMEM; - memset(i->jumpstack, 0, size); i->stacksize *= xt_jumpstack_multiplier; size = sizeof(void *) * i->stacksize; diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 7b5f03253016..03bb45adf2fc 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -1213,43 +1213,6 @@ static struct sock *fanout_demux_cpu(struct packet_fanout *f, struct sk_buff *sk return f->arr[cpu % num]; } -static struct sk_buff *fanout_check_defrag(struct sk_buff *skb) -{ -#ifdef CONFIG_INET - const struct iphdr *iph; - u32 len; - - if (skb->protocol != htons(ETH_P_IP)) - return skb; - - if (!pskb_may_pull(skb, sizeof(struct iphdr))) - return skb; - - iph = ip_hdr(skb); - if (iph->ihl < 5 || iph->version != 4) - return skb; - if (!pskb_may_pull(skb, iph->ihl*4)) - return skb; - iph = ip_hdr(skb); - len = ntohs(iph->tot_len); - if (skb->len < len || len < (iph->ihl * 4)) - return skb; - - if (ip_is_fragment(ip_hdr(skb))) { - skb = skb_share_check(skb, GFP_ATOMIC); - if (skb) { - if (pskb_trim_rcsum(skb, len)) - return skb; - memset(IPCB(skb), 0, sizeof(struct inet_skb_parm)); - if (ip_defrag(skb, IP_DEFRAG_AF_PACKET)) - return NULL; - skb->rxhash = 0; - } - } -#endif - return skb; -} - static int packet_rcv_fanout(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev) { @@ -1268,7 +1231,7 @@ static int packet_rcv_fanout(struct sk_buff *skb, struct net_device *dev, case PACKET_FANOUT_HASH: default: if (f->defrag) { - skb = fanout_check_defrag(skb); + skb = ip_check_defrag(skb, IP_DEFRAG_AF_PACKET); if (!skb) return 0; } diff --git a/net/rds/ib_cm.c b/net/rds/ib_cm.c index cd67026be2d5..51c868923f64 100644 --- a/net/rds/ib_cm.c +++ b/net/rds/ib_cm.c @@ -375,23 +375,21 @@ static int rds_ib_setup_qp(struct rds_connection *conn) goto out; } - ic->i_sends = vmalloc_node(ic->i_send_ring.w_nr * sizeof(struct rds_ib_send_work), + ic->i_sends = vzalloc_node(ic->i_send_ring.w_nr * sizeof(struct rds_ib_send_work), ibdev_to_node(dev)); if (!ic->i_sends) { ret = -ENOMEM; rdsdebug("send allocation failed\n"); goto out; } - memset(ic->i_sends, 0, ic->i_send_ring.w_nr * sizeof(struct rds_ib_send_work)); - ic->i_recvs = vmalloc_node(ic->i_recv_ring.w_nr * sizeof(struct rds_ib_recv_work), + ic->i_recvs = vzalloc_node(ic->i_recv_ring.w_nr * sizeof(struct rds_ib_recv_work), ibdev_to_node(dev)); if (!ic->i_recvs) { ret = -ENOMEM; rdsdebug("recv allocation failed\n"); goto out; } - memset(ic->i_recvs, 0, ic->i_recv_ring.w_nr * sizeof(struct rds_ib_recv_work)); rds_ib_recv_init_ack(ic); diff --git a/net/sched/cls_flow.c b/net/sched/cls_flow.c index 6994214db8f8..9e087d885675 100644 --- a/net/sched/cls_flow.c +++ b/net/sched/cls_flow.c @@ -65,132 +65,134 @@ static inline u32 addr_fold(void *addr) return (a & 0xFFFFFFFF) ^ (BITS_PER_LONG > 32 ? a >> 32 : 0); } -static u32 flow_get_src(struct sk_buff *skb) +static u32 flow_get_src(const struct sk_buff *skb, int nhoff) { + __be32 *data = NULL, hdata; + switch (skb->protocol) { case htons(ETH_P_IP): - if (pskb_network_may_pull(skb, sizeof(struct iphdr))) - return ntohl(ip_hdr(skb)->saddr); + data = skb_header_pointer(skb, + nhoff + offsetof(struct iphdr, + saddr), + 4, &hdata); break; case htons(ETH_P_IPV6): - if (pskb_network_may_pull(skb, sizeof(struct ipv6hdr))) - return ntohl(ipv6_hdr(skb)->saddr.s6_addr32[3]); + data = skb_header_pointer(skb, + nhoff + offsetof(struct ipv6hdr, + saddr.s6_addr32[3]), + 4, &hdata); break; } + if (data) + return ntohl(*data); return addr_fold(skb->sk); } -static u32 flow_get_dst(struct sk_buff *skb) +static u32 flow_get_dst(const struct sk_buff *skb, int nhoff) { + __be32 *data = NULL, hdata; + switch (skb->protocol) { case htons(ETH_P_IP): - if (pskb_network_may_pull(skb, sizeof(struct iphdr))) - return ntohl(ip_hdr(skb)->daddr); + data = skb_header_pointer(skb, + nhoff + offsetof(struct iphdr, + daddr), + 4, &hdata); break; case htons(ETH_P_IPV6): - if (pskb_network_may_pull(skb, sizeof(struct ipv6hdr))) - return ntohl(ipv6_hdr(skb)->daddr.s6_addr32[3]); + data = skb_header_pointer(skb, + nhoff + offsetof(struct ipv6hdr, + daddr.s6_addr32[3]), + 4, &hdata); break; } + if (data) + return ntohl(*data); return addr_fold(skb_dst(skb)) ^ (__force u16)skb->protocol; } -static u32 flow_get_proto(struct sk_buff *skb) +static u32 flow_get_proto(const struct sk_buff *skb, int nhoff) { + __u8 *data = NULL, hdata; + switch (skb->protocol) { case htons(ETH_P_IP): - return pskb_network_may_pull(skb, sizeof(struct iphdr)) ? - ip_hdr(skb)->protocol : 0; + data = skb_header_pointer(skb, + nhoff + offsetof(struct iphdr, + protocol), + 1, &hdata); + break; case htons(ETH_P_IPV6): - return pskb_network_may_pull(skb, sizeof(struct ipv6hdr)) ? - ipv6_hdr(skb)->nexthdr : 0; - default: - return 0; + data = skb_header_pointer(skb, + nhoff + offsetof(struct ipv6hdr, + nexthdr), + 1, &hdata); + break; } + if (data) + return *data; + return 0; } -static u32 flow_get_proto_src(struct sk_buff *skb) +/* helper function to get either src or dst port */ +static __be16 *flow_get_proto_common(const struct sk_buff *skb, int nhoff, + __be16 *_port, int dst) { + __be16 *port = NULL; + int poff; + switch (skb->protocol) { case htons(ETH_P_IP): { - struct iphdr *iph; - int poff; + struct iphdr *iph, _iph; - if (!pskb_network_may_pull(skb, sizeof(*iph))) + iph = skb_header_pointer(skb, nhoff, sizeof(_iph), &_iph); + if (!iph) break; - iph = ip_hdr(skb); if (ip_is_fragment(iph)) break; poff = proto_ports_offset(iph->protocol); - if (poff >= 0 && - pskb_network_may_pull(skb, iph->ihl * 4 + 2 + poff)) { - iph = ip_hdr(skb); - return ntohs(*(__be16 *)((void *)iph + iph->ihl * 4 + - poff)); - } + if (poff >= 0) + port = skb_header_pointer(skb, + nhoff + iph->ihl * 4 + poff + dst, + sizeof(*_port), _port); break; } case htons(ETH_P_IPV6): { - struct ipv6hdr *iph; - int poff; + struct ipv6hdr *iph, _iph; - if (!pskb_network_may_pull(skb, sizeof(*iph))) + iph = skb_header_pointer(skb, nhoff, sizeof(_iph), &_iph); + if (!iph) break; - iph = ipv6_hdr(skb); poff = proto_ports_offset(iph->nexthdr); - if (poff >= 0 && - pskb_network_may_pull(skb, sizeof(*iph) + poff + 2)) { - iph = ipv6_hdr(skb); - return ntohs(*(__be16 *)((void *)iph + sizeof(*iph) + - poff)); - } + if (poff >= 0) + port = skb_header_pointer(skb, + nhoff + sizeof(*iph) + poff + dst, + sizeof(*_port), _port); break; } } - return addr_fold(skb->sk); + return port; } -static u32 flow_get_proto_dst(struct sk_buff *skb) +static u32 flow_get_proto_src(const struct sk_buff *skb, int nhoff) { - switch (skb->protocol) { - case htons(ETH_P_IP): { - struct iphdr *iph; - int poff; + __be16 _port, *port = flow_get_proto_common(skb, nhoff, &_port, 0); - if (!pskb_network_may_pull(skb, sizeof(*iph))) - break; - iph = ip_hdr(skb); - if (ip_is_fragment(iph)) - break; - poff = proto_ports_offset(iph->protocol); - if (poff >= 0 && - pskb_network_may_pull(skb, iph->ihl * 4 + 4 + poff)) { - iph = ip_hdr(skb); - return ntohs(*(__be16 *)((void *)iph + iph->ihl * 4 + - 2 + poff)); - } - break; - } - case htons(ETH_P_IPV6): { - struct ipv6hdr *iph; - int poff; + if (port) + return ntohs(*port); - if (!pskb_network_may_pull(skb, sizeof(*iph))) - break; - iph = ipv6_hdr(skb); - poff = proto_ports_offset(iph->nexthdr); - if (poff >= 0 && - pskb_network_may_pull(skb, sizeof(*iph) + poff + 4)) { - iph = ipv6_hdr(skb); - return ntohs(*(__be16 *)((void *)iph + sizeof(*iph) + - poff + 2)); - } - break; - } - } + return addr_fold(skb->sk); +} + +static u32 flow_get_proto_dst(const struct sk_buff *skb, int nhoff) +{ + __be16 _port, *port = flow_get_proto_common(skb, nhoff, &_port, 2); + + if (port) + return ntohs(*port); return addr_fold(skb_dst(skb)) ^ (__force u16)skb->protocol; } @@ -223,7 +225,7 @@ static u32 flow_get_nfct(const struct sk_buff *skb) #define CTTUPLE(skb, member) \ ({ \ enum ip_conntrack_info ctinfo; \ - struct nf_conn *ct = nf_ct_get(skb, &ctinfo); \ + const struct nf_conn *ct = nf_ct_get(skb, &ctinfo); \ if (ct == NULL) \ goto fallback; \ ct->tuplehash[CTINFO2DIR(ctinfo)].tuple.member; \ @@ -236,7 +238,7 @@ static u32 flow_get_nfct(const struct sk_buff *skb) }) #endif -static u32 flow_get_nfct_src(struct sk_buff *skb) +static u32 flow_get_nfct_src(const struct sk_buff *skb, int nhoff) { switch (skb->protocol) { case htons(ETH_P_IP): @@ -245,10 +247,10 @@ static u32 flow_get_nfct_src(struct sk_buff *skb) return ntohl(CTTUPLE(skb, src.u3.ip6[3])); } fallback: - return flow_get_src(skb); + return flow_get_src(skb, nhoff); } -static u32 flow_get_nfct_dst(struct sk_buff *skb) +static u32 flow_get_nfct_dst(const struct sk_buff *skb, int nhoff) { switch (skb->protocol) { case htons(ETH_P_IP): @@ -257,21 +259,21 @@ static u32 flow_get_nfct_dst(struct sk_buff *skb) return ntohl(CTTUPLE(skb, dst.u3.ip6[3])); } fallback: - return flow_get_dst(skb); + return flow_get_dst(skb, nhoff); } -static u32 flow_get_nfct_proto_src(struct sk_buff *skb) +static u32 flow_get_nfct_proto_src(const struct sk_buff *skb, int nhoff) { return ntohs(CTTUPLE(skb, src.u.all)); fallback: - return flow_get_proto_src(skb); + return flow_get_proto_src(skb, nhoff); } -static u32 flow_get_nfct_proto_dst(struct sk_buff *skb) +static u32 flow_get_nfct_proto_dst(const struct sk_buff *skb, int nhoff) { return ntohs(CTTUPLE(skb, dst.u.all)); fallback: - return flow_get_proto_dst(skb); + return flow_get_proto_dst(skb, nhoff); } static u32 flow_get_rtclassid(const struct sk_buff *skb) @@ -313,17 +315,19 @@ static u32 flow_get_rxhash(struct sk_buff *skb) static u32 flow_key_get(struct sk_buff *skb, int key) { + int nhoff = skb_network_offset(skb); + switch (key) { case FLOW_KEY_SRC: - return flow_get_src(skb); + return flow_get_src(skb, nhoff); case FLOW_KEY_DST: - return flow_get_dst(skb); + return flow_get_dst(skb, nhoff); case FLOW_KEY_PROTO: - return flow_get_proto(skb); + return flow_get_proto(skb, nhoff); case FLOW_KEY_PROTO_SRC: - return flow_get_proto_src(skb); + return flow_get_proto_src(skb, nhoff); case FLOW_KEY_PROTO_DST: - return flow_get_proto_dst(skb); + return flow_get_proto_dst(skb, nhoff); case FLOW_KEY_IIF: return flow_get_iif(skb); case FLOW_KEY_PRIORITY: @@ -333,13 +337,13 @@ static u32 flow_key_get(struct sk_buff *skb, int key) case FLOW_KEY_NFCT: return flow_get_nfct(skb); case FLOW_KEY_NFCT_SRC: - return flow_get_nfct_src(skb); + return flow_get_nfct_src(skb, nhoff); case FLOW_KEY_NFCT_DST: - return flow_get_nfct_dst(skb); + return flow_get_nfct_dst(skb, nhoff); case FLOW_KEY_NFCT_PROTO_SRC: - return flow_get_nfct_proto_src(skb); + return flow_get_nfct_proto_src(skb, nhoff); case FLOW_KEY_NFCT_PROTO_DST: - return flow_get_nfct_proto_dst(skb); + return flow_get_nfct_proto_dst(skb, nhoff); case FLOW_KEY_RTCLASSID: return flow_get_rtclassid(skb); case FLOW_KEY_SKUID: diff --git a/net/sunrpc/addr.c b/net/sunrpc/addr.c index 4195233c4914..4548757c9871 100644 --- a/net/sunrpc/addr.c +++ b/net/sunrpc/addr.c @@ -255,12 +255,13 @@ EXPORT_SYMBOL_GPL(rpc_pton); /** * rpc_sockaddr2uaddr - Construct a universal address string from @sap. * @sap: socket address + * @gfp_flags: allocation mode * * Returns a %NUL-terminated string in dynamically allocated memory; * otherwise NULL is returned if an error occurred. Caller must * free the returned string. */ -char *rpc_sockaddr2uaddr(const struct sockaddr *sap) +char *rpc_sockaddr2uaddr(const struct sockaddr *sap, gfp_t gfp_flags) { char portbuf[RPCBIND_MAXUADDRPLEN]; char addrbuf[RPCBIND_MAXUADDRLEN]; @@ -288,9 +289,8 @@ char *rpc_sockaddr2uaddr(const struct sockaddr *sap) if (strlcat(addrbuf, portbuf, sizeof(addrbuf)) > sizeof(addrbuf)) return NULL; - return kstrdup(addrbuf, GFP_KERNEL); + return kstrdup(addrbuf, gfp_flags); } -EXPORT_SYMBOL_GPL(rpc_sockaddr2uaddr); /** * rpc_uaddr2sockaddr - convert a universal address to a socket address. diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c index d4132754cbe1..afb56553dfe7 100644 --- a/net/sunrpc/auth_gss/auth_gss.c +++ b/net/sunrpc/auth_gss/auth_gss.c @@ -603,26 +603,6 @@ out: return err; } -static ssize_t -gss_pipe_upcall(struct file *filp, struct rpc_pipe_msg *msg, - char __user *dst, size_t buflen) -{ - char *data = (char *)msg->data + msg->copied; - size_t mlen = min(msg->len, buflen); - unsigned long left; - - left = copy_to_user(dst, data, mlen); - if (left == mlen) { - msg->errno = -EFAULT; - return -EFAULT; - } - - mlen -= left; - msg->copied += mlen; - msg->errno = 0; - return mlen; -} - #define MSG_BUF_MAXSIZE 1024 static ssize_t @@ -1590,7 +1570,7 @@ static const struct rpc_credops gss_nullops = { }; static const struct rpc_pipe_ops gss_upcall_ops_v0 = { - .upcall = gss_pipe_upcall, + .upcall = rpc_pipe_generic_upcall, .downcall = gss_pipe_downcall, .destroy_msg = gss_pipe_destroy_msg, .open_pipe = gss_pipe_open_v0, @@ -1598,7 +1578,7 @@ static const struct rpc_pipe_ops gss_upcall_ops_v0 = { }; static const struct rpc_pipe_ops gss_upcall_ops_v1 = { - .upcall = gss_pipe_upcall, + .upcall = rpc_pipe_generic_upcall, .downcall = gss_pipe_downcall, .destroy_msg = gss_pipe_destroy_msg, .open_pipe = gss_pipe_open_v1, diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index c5347d29cfb7..f0268ea7e711 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -850,7 +850,9 @@ rpc_restart_call_prepare(struct rpc_task *task) { if (RPC_ASSASSINATED(task)) return 0; - task->tk_action = rpc_prepare_task; + task->tk_action = call_start; + if (task->tk_ops->rpc_call_prepare != NULL) + task->tk_action = rpc_prepare_task; return 1; } EXPORT_SYMBOL_GPL(rpc_restart_call_prepare); diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c index b181e3441323..bfddd68b31d3 100644 --- a/net/sunrpc/rpc_pipe.c +++ b/net/sunrpc/rpc_pipe.c @@ -77,6 +77,26 @@ rpc_timeout_upcall_queue(struct work_struct *work) rpc_purge_list(rpci, &free_list, destroy_msg, -ETIMEDOUT); } +ssize_t rpc_pipe_generic_upcall(struct file *filp, struct rpc_pipe_msg *msg, + char __user *dst, size_t buflen) +{ + char *data = (char *)msg->data + msg->copied; + size_t mlen = min(msg->len - msg->copied, buflen); + unsigned long left; + + left = copy_to_user(dst, data, mlen); + if (left == mlen) { + msg->errno = -EFAULT; + return -EFAULT; + } + + mlen -= left; + msg->copied += mlen; + msg->errno = 0; + return mlen; +} +EXPORT_SYMBOL_GPL(rpc_pipe_generic_upcall); + /** * rpc_queue_upcall - queue an upcall message to userspace * @inode: inode of upcall pipe on which to queue given message @@ -1084,3 +1104,6 @@ void unregister_rpc_pipefs(void) kmem_cache_destroy(rpc_inode_cachep); unregister_filesystem(&rpc_pipe_fs_type); } + +/* Make 'mount -t rpc_pipefs ...' autoload this module. */ +MODULE_ALIAS("rpc_pipefs"); diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c index e45d2fbbe5a8..f588b852d41c 100644 --- a/net/sunrpc/rpcb_clnt.c +++ b/net/sunrpc/rpcb_clnt.c @@ -410,7 +410,7 @@ static int rpcb_register_inet4(const struct sockaddr *sap, unsigned short port = ntohs(sin->sin_port); int result; - map->r_addr = rpc_sockaddr2uaddr(sap); + map->r_addr = rpc_sockaddr2uaddr(sap, GFP_KERNEL); dprintk("RPC: %sregistering [%u, %u, %s, '%s'] with " "local rpcbind\n", (port ? "" : "un"), @@ -437,7 +437,7 @@ static int rpcb_register_inet6(const struct sockaddr *sap, unsigned short port = ntohs(sin6->sin6_port); int result; - map->r_addr = rpc_sockaddr2uaddr(sap); + map->r_addr = rpc_sockaddr2uaddr(sap, GFP_KERNEL); dprintk("RPC: %sregistering [%u, %u, %s, '%s'] with " "local rpcbind\n", (port ? "" : "un"), @@ -686,7 +686,7 @@ void rpcb_getport_async(struct rpc_task *task) case RPCBVERS_4: case RPCBVERS_3: map->r_netid = rpc_peeraddr2str(clnt, RPC_DISPLAY_NETID); - map->r_addr = rpc_sockaddr2uaddr(sap); + map->r_addr = rpc_sockaddr2uaddr(sap, GFP_ATOMIC); map->r_owner = ""; break; case RPCBVERS_2: diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index 6a69a1131fb7..30d70abb4e2c 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -295,6 +295,18 @@ svc_pool_map_put(void) } +static int svc_pool_map_get_node(unsigned int pidx) +{ + const struct svc_pool_map *m = &svc_pool_map; + + if (m->count) { + if (m->mode == SVC_POOL_PERCPU) + return cpu_to_node(m->pool_to[pidx]); + if (m->mode == SVC_POOL_PERNODE) + return m->pool_to[pidx]; + } + return NUMA_NO_NODE; +} /* * Set the given thread's cpus_allowed mask so that it * will only run on cpus in the given pool. @@ -499,7 +511,7 @@ EXPORT_SYMBOL_GPL(svc_destroy); * We allocate pages and place them in rq_argpages. */ static int -svc_init_buffer(struct svc_rqst *rqstp, unsigned int size) +svc_init_buffer(struct svc_rqst *rqstp, unsigned int size, int node) { unsigned int pages, arghi; @@ -513,7 +525,7 @@ svc_init_buffer(struct svc_rqst *rqstp, unsigned int size) arghi = 0; BUG_ON(pages > RPCSVC_MAXPAGES); while (pages) { - struct page *p = alloc_page(GFP_KERNEL); + struct page *p = alloc_pages_node(node, GFP_KERNEL, 0); if (!p) break; rqstp->rq_pages[arghi++] = p; @@ -536,11 +548,11 @@ svc_release_buffer(struct svc_rqst *rqstp) } struct svc_rqst * -svc_prepare_thread(struct svc_serv *serv, struct svc_pool *pool) +svc_prepare_thread(struct svc_serv *serv, struct svc_pool *pool, int node) { struct svc_rqst *rqstp; - rqstp = kzalloc(sizeof(*rqstp), GFP_KERNEL); + rqstp = kzalloc_node(sizeof(*rqstp), GFP_KERNEL, node); if (!rqstp) goto out_enomem; @@ -554,15 +566,15 @@ svc_prepare_thread(struct svc_serv *serv, struct svc_pool *pool) rqstp->rq_server = serv; rqstp->rq_pool = pool; - rqstp->rq_argp = kmalloc(serv->sv_xdrsize, GFP_KERNEL); + rqstp->rq_argp = kmalloc_node(serv->sv_xdrsize, GFP_KERNEL, node); if (!rqstp->rq_argp) goto out_thread; - rqstp->rq_resp = kmalloc(serv->sv_xdrsize, GFP_KERNEL); + rqstp->rq_resp = kmalloc_node(serv->sv_xdrsize, GFP_KERNEL, node); if (!rqstp->rq_resp) goto out_thread; - if (!svc_init_buffer(rqstp, serv->sv_max_mesg)) + if (!svc_init_buffer(rqstp, serv->sv_max_mesg, node)) goto out_thread; return rqstp; @@ -647,6 +659,7 @@ svc_set_num_threads(struct svc_serv *serv, struct svc_pool *pool, int nrservs) struct svc_pool *chosen_pool; int error = 0; unsigned int state = serv->sv_nrthreads-1; + int node; if (pool == NULL) { /* The -1 assumes caller has done a svc_get() */ @@ -662,14 +675,16 @@ svc_set_num_threads(struct svc_serv *serv, struct svc_pool *pool, int nrservs) nrservs--; chosen_pool = choose_pool(serv, pool, &state); - rqstp = svc_prepare_thread(serv, chosen_pool); + node = svc_pool_map_get_node(chosen_pool->sp_id); + rqstp = svc_prepare_thread(serv, chosen_pool, node); if (IS_ERR(rqstp)) { error = PTR_ERR(rqstp); break; } __module_get(serv->sv_module); - task = kthread_create(serv->sv_function, rqstp, serv->sv_name); + task = kthread_create_on_node(serv->sv_function, rqstp, + node, serv->sv_name); if (IS_ERR(task)) { error = PTR_ERR(task); module_put(serv->sv_module); diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index bd31208bbb61..d86bb673e1f6 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -254,8 +254,6 @@ EXPORT_SYMBOL_GPL(svc_create_xprt); */ void svc_xprt_copy_addrs(struct svc_rqst *rqstp, struct svc_xprt *xprt) { - struct sockaddr *sin; - memcpy(&rqstp->rq_addr, &xprt->xpt_remote, xprt->xpt_remotelen); rqstp->rq_addrlen = xprt->xpt_remotelen; @@ -263,15 +261,8 @@ void svc_xprt_copy_addrs(struct svc_rqst *rqstp, struct svc_xprt *xprt) * Destination address in request is needed for binding the * source address in RPC replies/callbacks later. */ - sin = (struct sockaddr *)&xprt->xpt_local; - switch (sin->sa_family) { - case AF_INET: - rqstp->rq_daddr.addr = ((struct sockaddr_in *)sin)->sin_addr; - break; - case AF_INET6: - rqstp->rq_daddr.addr6 = ((struct sockaddr_in6 *)sin)->sin6_addr; - break; - } + memcpy(&rqstp->rq_daddr, &xprt->xpt_local, xprt->xpt_locallen); + rqstp->rq_daddrlen = xprt->xpt_locallen; } EXPORT_SYMBOL_GPL(svc_xprt_copy_addrs); diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index 767d494de7a2..dfd686eb0b7f 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -143,19 +143,20 @@ static void svc_set_cmsg_data(struct svc_rqst *rqstp, struct cmsghdr *cmh) cmh->cmsg_level = SOL_IP; cmh->cmsg_type = IP_PKTINFO; pki->ipi_ifindex = 0; - pki->ipi_spec_dst.s_addr = rqstp->rq_daddr.addr.s_addr; + pki->ipi_spec_dst.s_addr = + svc_daddr_in(rqstp)->sin_addr.s_addr; cmh->cmsg_len = CMSG_LEN(sizeof(*pki)); } break; case AF_INET6: { struct in6_pktinfo *pki = CMSG_DATA(cmh); + struct sockaddr_in6 *daddr = svc_daddr_in6(rqstp); cmh->cmsg_level = SOL_IPV6; cmh->cmsg_type = IPV6_PKTINFO; - pki->ipi6_ifindex = 0; - ipv6_addr_copy(&pki->ipi6_addr, - &rqstp->rq_daddr.addr6); + pki->ipi6_ifindex = daddr->sin6_scope_id; + ipv6_addr_copy(&pki->ipi6_addr, &daddr->sin6_addr); cmh->cmsg_len = CMSG_LEN(sizeof(*pki)); } break; @@ -498,9 +499,13 @@ static int svc_udp_get_dest_address4(struct svc_rqst *rqstp, struct cmsghdr *cmh) { struct in_pktinfo *pki = CMSG_DATA(cmh); + struct sockaddr_in *daddr = svc_daddr_in(rqstp); + if (cmh->cmsg_type != IP_PKTINFO) return 0; - rqstp->rq_daddr.addr.s_addr = pki->ipi_spec_dst.s_addr; + + daddr->sin_family = AF_INET; + daddr->sin_addr.s_addr = pki->ipi_spec_dst.s_addr; return 1; } @@ -511,9 +516,14 @@ static int svc_udp_get_dest_address6(struct svc_rqst *rqstp, struct cmsghdr *cmh) { struct in6_pktinfo *pki = CMSG_DATA(cmh); + struct sockaddr_in6 *daddr = svc_daddr_in6(rqstp); + if (cmh->cmsg_type != IPV6_PKTINFO) return 0; - ipv6_addr_copy(&rqstp->rq_daddr.addr6, &pki->ipi6_addr); + + daddr->sin6_family = AF_INET6; + ipv6_addr_copy(&daddr->sin6_addr, &pki->ipi6_addr); + daddr->sin6_scope_id = pki->ipi6_ifindex; return 1; } @@ -614,6 +624,7 @@ static int svc_udp_recvfrom(struct svc_rqst *rqstp) skb_free_datagram_locked(svsk->sk_sk, skb); return 0; } + rqstp->rq_daddrlen = svc_addr_len(svc_daddr(rqstp)); if (skb_is_nonlinear(skb)) { /* we have to copy */ diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c index d30615419b4d..5f03e4ea65bf 100644 --- a/net/x25/af_x25.c +++ b/net/x25/af_x25.c @@ -91,7 +91,7 @@ int x25_parse_address_block(struct sk_buff *skb, int needed; int rc; - if (skb->len < 1) { + if (!pskb_may_pull(skb, 1)) { /* packet has no address block */ rc = 0; goto empty; @@ -100,7 +100,7 @@ int x25_parse_address_block(struct sk_buff *skb, len = *skb->data; needed = 1 + (len >> 4) + (len & 0x0f); - if (skb->len < needed) { + if (!pskb_may_pull(skb, needed)) { /* packet is too short to hold the addresses it claims to hold */ rc = -1; @@ -295,7 +295,8 @@ static struct sock *x25_find_listener(struct x25_address *addr, * Found a listening socket, now check the incoming * call user data vs this sockets call user data */ - if(skb->len > 0 && x25_sk(s)->cudmatchlength > 0) { + if (x25_sk(s)->cudmatchlength > 0 && + skb->len >= x25_sk(s)->cudmatchlength) { if((memcmp(x25_sk(s)->calluserdata.cuddata, skb->data, x25_sk(s)->cudmatchlength)) == 0) { @@ -951,14 +952,27 @@ int x25_rx_call_request(struct sk_buff *skb, struct x25_neigh *nb, * * Facilities length is mandatory in call request packets */ - if (skb->len < 1) + if (!pskb_may_pull(skb, 1)) goto out_clear_request; len = skb->data[0] + 1; - if (skb->len < len) + if (!pskb_may_pull(skb, len)) goto out_clear_request; skb_pull(skb,len); /* + * Ensure that the amount of call user data is valid. + */ + if (skb->len > X25_MAX_CUD_LEN) + goto out_clear_request; + + /* + * Get all the call user data so it can be used in + * x25_find_listener and skb_copy_from_linear_data up ahead. + */ + if (!pskb_may_pull(skb, skb->len)) + goto out_clear_request; + + /* * Find a listener for the particular address/cud pair. */ sk = x25_find_listener(&source_addr,skb); @@ -1166,6 +1180,9 @@ static int x25_sendmsg(struct kiocb *iocb, struct socket *sock, * byte of the user data is the logical value of the Q Bit. */ if (test_bit(X25_Q_BIT_FLAG, &x25->flags)) { + if (!pskb_may_pull(skb, 1)) + goto out_kfree_skb; + qbit = skb->data[0]; skb_pull(skb, 1); } @@ -1244,7 +1261,9 @@ static int x25_recvmsg(struct kiocb *iocb, struct socket *sock, struct x25_sock *x25 = x25_sk(sk); struct sockaddr_x25 *sx25 = (struct sockaddr_x25 *)msg->msg_name; size_t copied; - int qbit; + int qbit, header_len = x25->neighbour->extended ? + X25_EXT_MIN_LEN : X25_STD_MIN_LEN; + struct sk_buff *skb; unsigned char *asmptr; int rc = -ENOTCONN; @@ -1265,6 +1284,9 @@ static int x25_recvmsg(struct kiocb *iocb, struct socket *sock, skb = skb_dequeue(&x25->interrupt_in_queue); + if (!pskb_may_pull(skb, X25_STD_MIN_LEN)) + goto out_free_dgram; + skb_pull(skb, X25_STD_MIN_LEN); /* @@ -1285,10 +1307,12 @@ static int x25_recvmsg(struct kiocb *iocb, struct socket *sock, if (!skb) goto out; + if (!pskb_may_pull(skb, header_len)) + goto out_free_dgram; + qbit = (skb->data[0] & X25_Q_BIT) == X25_Q_BIT; - skb_pull(skb, x25->neighbour->extended ? - X25_EXT_MIN_LEN : X25_STD_MIN_LEN); + skb_pull(skb, header_len); if (test_bit(X25_Q_BIT_FLAG, &x25->flags)) { asmptr = skb_push(skb, 1); diff --git a/net/x25/x25_dev.c b/net/x25/x25_dev.c index e547ca1578c3..fa2b41888bd9 100644 --- a/net/x25/x25_dev.c +++ b/net/x25/x25_dev.c @@ -32,6 +32,9 @@ static int x25_receive_data(struct sk_buff *skb, struct x25_neigh *nb) unsigned short frametype; unsigned int lci; + if (!pskb_may_pull(skb, X25_STD_MIN_LEN)) + return 0; + frametype = skb->data[2]; lci = ((skb->data[0] << 8) & 0xF00) + ((skb->data[1] << 0) & 0x0FF); @@ -115,6 +118,9 @@ int x25_lapb_receive_frame(struct sk_buff *skb, struct net_device *dev, goto drop; } + if (!pskb_may_pull(skb, 1)) + return 0; + switch (skb->data[0]) { case X25_IFACE_DATA: diff --git a/net/x25/x25_facilities.c b/net/x25/x25_facilities.c index f77e4e75f914..36384a1fa9f2 100644 --- a/net/x25/x25_facilities.c +++ b/net/x25/x25_facilities.c @@ -44,7 +44,7 @@ int x25_parse_facilities(struct sk_buff *skb, struct x25_facilities *facilities, struct x25_dte_facilities *dte_facs, unsigned long *vc_fac_mask) { - unsigned char *p = skb->data; + unsigned char *p; unsigned int len; *vc_fac_mask = 0; @@ -60,14 +60,16 @@ int x25_parse_facilities(struct sk_buff *skb, struct x25_facilities *facilities, memset(dte_facs->called_ae, '\0', sizeof(dte_facs->called_ae)); memset(dte_facs->calling_ae, '\0', sizeof(dte_facs->calling_ae)); - if (skb->len < 1) + if (!pskb_may_pull(skb, 1)) return 0; - len = *p++; + len = skb->data[0]; - if (len >= skb->len) + if (!pskb_may_pull(skb, 1 + len)) return -1; + p = skb->data + 1; + while (len > 0) { switch (*p & X25_FAC_CLASS_MASK) { case X25_FAC_CLASS_A: diff --git a/net/x25/x25_in.c b/net/x25/x25_in.c index 0b073b51b183..a49cd4ec551a 100644 --- a/net/x25/x25_in.c +++ b/net/x25/x25_in.c @@ -107,6 +107,8 @@ static int x25_state1_machine(struct sock *sk, struct sk_buff *skb, int frametyp /* * Parse the data in the frame. */ + if (!pskb_may_pull(skb, X25_STD_MIN_LEN)) + goto out_clear; skb_pull(skb, X25_STD_MIN_LEN); len = x25_parse_address_block(skb, &source_addr, @@ -127,9 +129,11 @@ static int x25_state1_machine(struct sock *sk, struct sk_buff *skb, int frametyp * Copy any Call User Data. */ if (skb->len > 0) { - skb_copy_from_linear_data(skb, - x25->calluserdata.cuddata, - skb->len); + if (skb->len > X25_MAX_CUD_LEN) + goto out_clear; + + skb_copy_bits(skb, 0, x25->calluserdata.cuddata, + skb->len); x25->calluserdata.cudlength = skb->len; } if (!sock_flag(sk, SOCK_DEAD)) @@ -137,6 +141,9 @@ static int x25_state1_machine(struct sock *sk, struct sk_buff *skb, int frametyp break; } case X25_CLEAR_REQUEST: + if (!pskb_may_pull(skb, X25_STD_MIN_LEN + 2)) + goto out_clear; + x25_write_internal(sk, X25_CLEAR_CONFIRMATION); x25_disconnect(sk, ECONNREFUSED, skb->data[3], skb->data[4]); break; @@ -164,6 +171,9 @@ static int x25_state2_machine(struct sock *sk, struct sk_buff *skb, int frametyp switch (frametype) { case X25_CLEAR_REQUEST: + if (!pskb_may_pull(skb, X25_STD_MIN_LEN + 2)) + goto out_clear; + x25_write_internal(sk, X25_CLEAR_CONFIRMATION); x25_disconnect(sk, 0, skb->data[3], skb->data[4]); break; @@ -177,6 +187,11 @@ static int x25_state2_machine(struct sock *sk, struct sk_buff *skb, int frametyp } return 0; + +out_clear: + x25_write_internal(sk, X25_CLEAR_REQUEST); + x25_start_t23timer(sk); + return 0; } /* @@ -206,6 +221,9 @@ static int x25_state3_machine(struct sock *sk, struct sk_buff *skb, int frametyp break; case X25_CLEAR_REQUEST: + if (!pskb_may_pull(skb, X25_STD_MIN_LEN + 2)) + goto out_clear; + x25_write_internal(sk, X25_CLEAR_CONFIRMATION); x25_disconnect(sk, 0, skb->data[3], skb->data[4]); break; @@ -304,6 +322,12 @@ static int x25_state3_machine(struct sock *sk, struct sk_buff *skb, int frametyp } return queued; + +out_clear: + x25_write_internal(sk, X25_CLEAR_REQUEST); + x25->state = X25_STATE_2; + x25_start_t23timer(sk); + return 0; } /* @@ -313,13 +337,13 @@ static int x25_state3_machine(struct sock *sk, struct sk_buff *skb, int frametyp */ static int x25_state4_machine(struct sock *sk, struct sk_buff *skb, int frametype) { + struct x25_sock *x25 = x25_sk(sk); + switch (frametype) { case X25_RESET_REQUEST: x25_write_internal(sk, X25_RESET_CONFIRMATION); case X25_RESET_CONFIRMATION: { - struct x25_sock *x25 = x25_sk(sk); - x25_stop_timer(sk); x25->condition = 0x00; x25->va = 0; @@ -331,6 +355,9 @@ static int x25_state4_machine(struct sock *sk, struct sk_buff *skb, int frametyp break; } case X25_CLEAR_REQUEST: + if (!pskb_may_pull(skb, X25_STD_MIN_LEN + 2)) + goto out_clear; + x25_write_internal(sk, X25_CLEAR_CONFIRMATION); x25_disconnect(sk, 0, skb->data[3], skb->data[4]); break; @@ -340,6 +367,12 @@ static int x25_state4_machine(struct sock *sk, struct sk_buff *skb, int frametyp } return 0; + +out_clear: + x25_write_internal(sk, X25_CLEAR_REQUEST); + x25->state = X25_STATE_2; + x25_start_t23timer(sk); + return 0; } /* Higher level upcall for a LAPB frame */ diff --git a/net/x25/x25_link.c b/net/x25/x25_link.c index 037958ff8eed..4acacf3c6617 100644 --- a/net/x25/x25_link.c +++ b/net/x25/x25_link.c @@ -90,6 +90,9 @@ void x25_link_control(struct sk_buff *skb, struct x25_neigh *nb, break; case X25_DIAGNOSTIC: + if (!pskb_may_pull(skb, X25_STD_MIN_LEN + 4)) + break; + printk(KERN_WARNING "x25: diagnostic #%d - %02X %02X %02X\n", skb->data[3], skb->data[4], skb->data[5], skb->data[6]); diff --git a/net/x25/x25_subr.c b/net/x25/x25_subr.c index 24a342ebc7f5..5170d52bfd96 100644 --- a/net/x25/x25_subr.c +++ b/net/x25/x25_subr.c @@ -269,7 +269,11 @@ int x25_decode(struct sock *sk, struct sk_buff *skb, int *ns, int *nr, int *q, int *d, int *m) { struct x25_sock *x25 = x25_sk(sk); - unsigned char *frame = skb->data; + unsigned char *frame; + + if (!pskb_may_pull(skb, X25_STD_MIN_LEN)) + return X25_ILLEGAL; + frame = skb->data; *ns = *nr = *q = *d = *m = 0; @@ -294,6 +298,10 @@ int x25_decode(struct sock *sk, struct sk_buff *skb, int *ns, int *nr, int *q, if (frame[2] == X25_RR || frame[2] == X25_RNR || frame[2] == X25_REJ) { + if (!pskb_may_pull(skb, X25_EXT_MIN_LEN)) + return X25_ILLEGAL; + frame = skb->data; + *nr = (frame[3] >> 1) & 0x7F; return frame[2]; } @@ -308,6 +316,10 @@ int x25_decode(struct sock *sk, struct sk_buff *skb, int *ns, int *nr, int *q, if (x25->neighbour->extended) { if ((frame[2] & 0x01) == X25_DATA) { + if (!pskb_may_pull(skb, X25_EXT_MIN_LEN)) + return X25_ILLEGAL; + frame = skb->data; + *q = (frame[0] & X25_Q_BIT) == X25_Q_BIT; *d = (frame[0] & X25_D_BIT) == X25_D_BIT; *m = (frame[3] & X25_EXT_M_BIT) == X25_EXT_M_BIT; diff --git a/net/xfrm/xfrm_ipcomp.c b/net/xfrm/xfrm_ipcomp.c index f781b9ab8a54..e5246fbe36c4 100644 --- a/net/xfrm/xfrm_ipcomp.c +++ b/net/xfrm/xfrm_ipcomp.c @@ -90,7 +90,7 @@ static int ipcomp_decompress(struct xfrm_state *x, struct sk_buff *skb) len = dlen; frag->page_offset = 0; - frag->size = len; + skb_frag_size_set(frag, len); memcpy(skb_frag_address(frag), scratch, len); skb->truesize += len; diff --git a/net/xfrm/xfrm_replay.c b/net/xfrm/xfrm_replay.c index b11ea692bd7d..6ca357406ea8 100644 --- a/net/xfrm/xfrm_replay.c +++ b/net/xfrm/xfrm_replay.c @@ -203,8 +203,6 @@ static int xfrm_replay_check_bmp(struct xfrm_state *x, if (!replay_esn->replay_window) return 0; - pos = (replay_esn->seq - 1) % replay_esn->replay_window; - if (unlikely(seq == 0)) goto err; @@ -216,19 +214,18 @@ static int xfrm_replay_check_bmp(struct xfrm_state *x, goto err; } - if (pos >= diff) { + pos = (replay_esn->seq - 1) % replay_esn->replay_window; + + if (pos >= diff) bitnr = (pos - diff) % replay_esn->replay_window; - nr = bitnr >> 5; - bitnr = bitnr & 0x1F; - if (replay_esn->bmp[nr] & (1U << bitnr)) - goto err_replay; - } else { + else bitnr = replay_esn->replay_window - (diff - pos); - nr = bitnr >> 5; - bitnr = bitnr & 0x1F; - if (replay_esn->bmp[nr] & (1U << bitnr)) - goto err_replay; - } + + nr = bitnr >> 5; + bitnr = bitnr & 0x1F; + if (replay_esn->bmp[nr] & (1U << bitnr)) + goto err_replay; + return 0; err_replay: @@ -259,39 +256,27 @@ static void xfrm_replay_advance_bmp(struct xfrm_state *x, __be32 net_seq) bitnr = bitnr & 0x1F; replay_esn->bmp[nr] &= ~(1U << bitnr); } - - bitnr = (pos + diff) % replay_esn->replay_window; - nr = bitnr >> 5; - bitnr = bitnr & 0x1F; - replay_esn->bmp[nr] |= (1U << bitnr); } else { nr = (replay_esn->replay_window - 1) >> 5; for (i = 0; i <= nr; i++) replay_esn->bmp[i] = 0; - - bitnr = (pos + diff) % replay_esn->replay_window; - nr = bitnr >> 5; - bitnr = bitnr & 0x1F; - replay_esn->bmp[nr] |= (1U << bitnr); } + bitnr = (pos + diff) % replay_esn->replay_window; replay_esn->seq = seq; } else { diff = replay_esn->seq - seq; - if (pos >= diff) { + if (pos >= diff) bitnr = (pos - diff) % replay_esn->replay_window; - nr = bitnr >> 5; - bitnr = bitnr & 0x1F; - replay_esn->bmp[nr] |= (1U << bitnr); - } else { + else bitnr = replay_esn->replay_window - (diff - pos); - nr = bitnr >> 5; - bitnr = bitnr & 0x1F; - replay_esn->bmp[nr] |= (1U << bitnr); - } } + nr = bitnr >> 5; + bitnr = bitnr & 0x1F; + replay_esn->bmp[nr] |= (1U << bitnr); + if (xfrm_aevent_is_on(xs_net(x))) xfrm_replay_notify(x, XFRM_REPLAY_UPDATE); } @@ -390,8 +375,6 @@ static int xfrm_replay_check_esn(struct xfrm_state *x, if (!wsize) return 0; - pos = (replay_esn->seq - 1) % replay_esn->replay_window; - if (unlikely(seq == 0 && replay_esn->seq_hi == 0 && (replay_esn->seq < replay_esn->replay_window - 1))) goto err; @@ -415,19 +398,18 @@ static int xfrm_replay_check_esn(struct xfrm_state *x, goto err; } - if (pos >= diff) { + pos = (replay_esn->seq - 1) % replay_esn->replay_window; + + if (pos >= diff) bitnr = (pos - diff) % replay_esn->replay_window; - nr = bitnr >> 5; - bitnr = bitnr & 0x1F; - if (replay_esn->bmp[nr] & (1U << bitnr)) - goto err_replay; - } else { + else bitnr = replay_esn->replay_window - (diff - pos); - nr = bitnr >> 5; - bitnr = bitnr & 0x1F; - if (replay_esn->bmp[nr] & (1U << bitnr)) - goto err_replay; - } + + nr = bitnr >> 5; + bitnr = bitnr & 0x1F; + if (replay_esn->bmp[nr] & (1U << bitnr)) + goto err_replay; + return 0; err_replay: @@ -465,22 +447,13 @@ static void xfrm_replay_advance_esn(struct xfrm_state *x, __be32 net_seq) bitnr = bitnr & 0x1F; replay_esn->bmp[nr] &= ~(1U << bitnr); } - - bitnr = (pos + diff) % replay_esn->replay_window; - nr = bitnr >> 5; - bitnr = bitnr & 0x1F; - replay_esn->bmp[nr] |= (1U << bitnr); } else { nr = (replay_esn->replay_window - 1) >> 5; for (i = 0; i <= nr; i++) replay_esn->bmp[i] = 0; - - bitnr = (pos + diff) % replay_esn->replay_window; - nr = bitnr >> 5; - bitnr = bitnr & 0x1F; - replay_esn->bmp[nr] |= (1U << bitnr); } + bitnr = (pos + diff) % replay_esn->replay_window; replay_esn->seq = seq; if (unlikely(wrap > 0)) @@ -488,19 +461,16 @@ static void xfrm_replay_advance_esn(struct xfrm_state *x, __be32 net_seq) } else { diff = replay_esn->seq - seq; - if (pos >= diff) { + if (pos >= diff) bitnr = (pos - diff) % replay_esn->replay_window; - nr = bitnr >> 5; - bitnr = bitnr & 0x1F; - replay_esn->bmp[nr] |= (1U << bitnr); - } else { + else bitnr = replay_esn->replay_window - (diff - pos); - nr = bitnr >> 5; - bitnr = bitnr & 0x1F; - replay_esn->bmp[nr] |= (1U << bitnr); - } } + nr = bitnr >> 5; + bitnr = bitnr & 0x1F; + replay_esn->bmp[nr] |= (1U << bitnr); + if (xfrm_aevent_is_on(xs_net(x))) xfrm_replay_notify(x, XFRM_REPLAY_UPDATE); } |