diff options
Diffstat (limited to 'net')
56 files changed, 782 insertions, 288 deletions
diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c index 02f43f3e2c56..3e49d28824ed 100644 --- a/net/ax25/af_ax25.c +++ b/net/ax25/af_ax25.c @@ -77,6 +77,7 @@ static void ax25_kill_by_device(struct net_device *dev) { ax25_dev *ax25_dev; ax25_cb *s; + struct sock *sk; if ((ax25_dev = ax25_dev_ax25dev(dev)) == NULL) return; @@ -85,13 +86,16 @@ static void ax25_kill_by_device(struct net_device *dev) again: ax25_for_each(s, &ax25_list) { if (s->ax25_dev == ax25_dev) { + sk = s->sk; + sock_hold(sk); spin_unlock_bh(&ax25_list_lock); - lock_sock(s->sk); + lock_sock(sk); s->ax25_dev = NULL; - release_sock(s->sk); + ax25_dev_put(ax25_dev); + release_sock(sk); ax25_disconnect(s, ENETUNREACH); spin_lock_bh(&ax25_list_lock); - + sock_put(sk); /* The entry could have been deleted from the * list meanwhile and thus the next pointer is * no longer valid. Play it safe and restart @@ -355,21 +359,25 @@ static int ax25_ctl_ioctl(const unsigned int cmd, void __user *arg) if (copy_from_user(&ax25_ctl, arg, sizeof(ax25_ctl))) return -EFAULT; - if ((ax25_dev = ax25_addr_ax25dev(&ax25_ctl.port_addr)) == NULL) - return -ENODEV; - if (ax25_ctl.digi_count > AX25_MAX_DIGIS) return -EINVAL; if (ax25_ctl.arg > ULONG_MAX / HZ && ax25_ctl.cmd != AX25_KILL) return -EINVAL; + ax25_dev = ax25_addr_ax25dev(&ax25_ctl.port_addr); + if (!ax25_dev) + return -ENODEV; + digi.ndigi = ax25_ctl.digi_count; for (k = 0; k < digi.ndigi; k++) digi.calls[k] = ax25_ctl.digi_addr[k]; - if ((ax25 = ax25_find_cb(&ax25_ctl.source_addr, &ax25_ctl.dest_addr, &digi, ax25_dev->dev)) == NULL) + ax25 = ax25_find_cb(&ax25_ctl.source_addr, &ax25_ctl.dest_addr, &digi, ax25_dev->dev); + if (!ax25) { + ax25_dev_put(ax25_dev); return -ENOTCONN; + } switch (ax25_ctl.cmd) { case AX25_KILL: @@ -436,6 +444,7 @@ static int ax25_ctl_ioctl(const unsigned int cmd, void __user *arg) } out_put: + ax25_dev_put(ax25_dev); ax25_cb_put(ax25); return ret; diff --git a/net/ax25/ax25_dev.c b/net/ax25/ax25_dev.c index 256fadb94df3..d2a244e1c260 100644 --- a/net/ax25/ax25_dev.c +++ b/net/ax25/ax25_dev.c @@ -37,6 +37,7 @@ ax25_dev *ax25_addr_ax25dev(ax25_address *addr) for (ax25_dev = ax25_dev_list; ax25_dev != NULL; ax25_dev = ax25_dev->next) if (ax25cmp(addr, (const ax25_address *)ax25_dev->dev->dev_addr) == 0) { res = ax25_dev; + ax25_dev_hold(ax25_dev); } spin_unlock_bh(&ax25_dev_lock); @@ -56,6 +57,7 @@ void ax25_dev_device_up(struct net_device *dev) return; } + refcount_set(&ax25_dev->refcount, 1); dev->ax25_ptr = ax25_dev; ax25_dev->dev = dev; dev_hold_track(dev, &ax25_dev->dev_tracker, GFP_ATOMIC); @@ -84,6 +86,7 @@ void ax25_dev_device_up(struct net_device *dev) ax25_dev->next = ax25_dev_list; ax25_dev_list = ax25_dev; spin_unlock_bh(&ax25_dev_lock); + ax25_dev_hold(ax25_dev); ax25_register_dev_sysctl(ax25_dev); } @@ -113,9 +116,10 @@ void ax25_dev_device_down(struct net_device *dev) if ((s = ax25_dev_list) == ax25_dev) { ax25_dev_list = s->next; spin_unlock_bh(&ax25_dev_lock); + ax25_dev_put(ax25_dev); dev->ax25_ptr = NULL; dev_put_track(dev, &ax25_dev->dev_tracker); - kfree(ax25_dev); + ax25_dev_put(ax25_dev); return; } @@ -123,9 +127,10 @@ void ax25_dev_device_down(struct net_device *dev) if (s->next == ax25_dev) { s->next = ax25_dev->next; spin_unlock_bh(&ax25_dev_lock); + ax25_dev_put(ax25_dev); dev->ax25_ptr = NULL; dev_put_track(dev, &ax25_dev->dev_tracker); - kfree(ax25_dev); + ax25_dev_put(ax25_dev); return; } @@ -133,6 +138,7 @@ void ax25_dev_device_down(struct net_device *dev) } spin_unlock_bh(&ax25_dev_lock); dev->ax25_ptr = NULL; + ax25_dev_put(ax25_dev); } int ax25_fwd_ioctl(unsigned int cmd, struct ax25_fwd_struct *fwd) @@ -144,20 +150,32 @@ int ax25_fwd_ioctl(unsigned int cmd, struct ax25_fwd_struct *fwd) switch (cmd) { case SIOCAX25ADDFWD: - if ((fwd_dev = ax25_addr_ax25dev(&fwd->port_to)) == NULL) + fwd_dev = ax25_addr_ax25dev(&fwd->port_to); + if (!fwd_dev) { + ax25_dev_put(ax25_dev); return -EINVAL; - if (ax25_dev->forward != NULL) + } + if (ax25_dev->forward) { + ax25_dev_put(fwd_dev); + ax25_dev_put(ax25_dev); return -EINVAL; + } ax25_dev->forward = fwd_dev->dev; + ax25_dev_put(fwd_dev); + ax25_dev_put(ax25_dev); break; case SIOCAX25DELFWD: - if (ax25_dev->forward == NULL) + if (!ax25_dev->forward) { + ax25_dev_put(ax25_dev); return -EINVAL; + } ax25_dev->forward = NULL; + ax25_dev_put(ax25_dev); break; default: + ax25_dev_put(ax25_dev); return -EINVAL; } diff --git a/net/ax25/ax25_route.c b/net/ax25/ax25_route.c index d0b2e094bd55..9751207f7757 100644 --- a/net/ax25/ax25_route.c +++ b/net/ax25/ax25_route.c @@ -75,11 +75,13 @@ static int __must_check ax25_rt_add(struct ax25_routes_struct *route) ax25_dev *ax25_dev; int i; - if ((ax25_dev = ax25_addr_ax25dev(&route->port_addr)) == NULL) - return -EINVAL; if (route->digi_count > AX25_MAX_DIGIS) return -EINVAL; + ax25_dev = ax25_addr_ax25dev(&route->port_addr); + if (!ax25_dev) + return -EINVAL; + write_lock_bh(&ax25_route_lock); ax25_rt = ax25_route_list; @@ -91,6 +93,7 @@ static int __must_check ax25_rt_add(struct ax25_routes_struct *route) if (route->digi_count != 0) { if ((ax25_rt->digipeat = kmalloc(sizeof(ax25_digi), GFP_ATOMIC)) == NULL) { write_unlock_bh(&ax25_route_lock); + ax25_dev_put(ax25_dev); return -ENOMEM; } ax25_rt->digipeat->lastrepeat = -1; @@ -101,6 +104,7 @@ static int __must_check ax25_rt_add(struct ax25_routes_struct *route) } } write_unlock_bh(&ax25_route_lock); + ax25_dev_put(ax25_dev); return 0; } ax25_rt = ax25_rt->next; @@ -108,6 +112,7 @@ static int __must_check ax25_rt_add(struct ax25_routes_struct *route) if ((ax25_rt = kmalloc(sizeof(ax25_route), GFP_ATOMIC)) == NULL) { write_unlock_bh(&ax25_route_lock); + ax25_dev_put(ax25_dev); return -ENOMEM; } @@ -120,6 +125,7 @@ static int __must_check ax25_rt_add(struct ax25_routes_struct *route) if ((ax25_rt->digipeat = kmalloc(sizeof(ax25_digi), GFP_ATOMIC)) == NULL) { write_unlock_bh(&ax25_route_lock); kfree(ax25_rt); + ax25_dev_put(ax25_dev); return -ENOMEM; } ax25_rt->digipeat->lastrepeat = -1; @@ -132,6 +138,7 @@ static int __must_check ax25_rt_add(struct ax25_routes_struct *route) ax25_rt->next = ax25_route_list; ax25_route_list = ax25_rt; write_unlock_bh(&ax25_route_lock); + ax25_dev_put(ax25_dev); return 0; } @@ -173,6 +180,7 @@ static int ax25_rt_del(struct ax25_routes_struct *route) } } write_unlock_bh(&ax25_route_lock); + ax25_dev_put(ax25_dev); return 0; } @@ -215,6 +223,7 @@ static int ax25_rt_opt(struct ax25_route_opt_struct *rt_option) out: write_unlock_bh(&ax25_route_lock); + ax25_dev_put(ax25_dev); return err; } diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c index 84ba456a78cc..1402d5ca242d 100644 --- a/net/bridge/br_vlan.c +++ b/net/bridge/br_vlan.c @@ -560,10 +560,10 @@ static bool __allowed_ingress(const struct net_bridge *br, !br_opt_get(br, BROPT_VLAN_STATS_ENABLED)) { if (*state == BR_STATE_FORWARDING) { *state = br_vlan_get_pvid_state(vg); - return br_vlan_state_allowed(*state, true); - } else { - return true; + if (!br_vlan_state_allowed(*state, true)) + goto drop; } + return true; } } v = br_vlan_find(vg, *vid); @@ -2020,7 +2020,8 @@ static int br_vlan_rtm_dump(struct sk_buff *skb, struct netlink_callback *cb) goto out_err; } err = br_vlan_dump_dev(dev, skb, cb, dump_flags); - if (err && err != -EMSGSIZE) + /* if the dump completed without an error we return 0 here */ + if (err != -EMSGSIZE) goto out_err; } else { for_each_netdev_rcu(net, dev) { diff --git a/net/bridge/netfilter/nft_reject_bridge.c b/net/bridge/netfilter/nft_reject_bridge.c index eba0efe64d05..fbf858ddec35 100644 --- a/net/bridge/netfilter/nft_reject_bridge.c +++ b/net/bridge/netfilter/nft_reject_bridge.c @@ -49,7 +49,7 @@ static void nft_reject_br_send_v4_tcp_reset(struct net *net, { struct sk_buff *nskb; - nskb = nf_reject_skb_v4_tcp_reset(net, oldskb, dev, hook); + nskb = nf_reject_skb_v4_tcp_reset(net, oldskb, NULL, hook); if (!nskb) return; @@ -65,7 +65,7 @@ static void nft_reject_br_send_v4_unreach(struct net *net, { struct sk_buff *nskb; - nskb = nf_reject_skb_v4_unreach(net, oldskb, dev, hook, code); + nskb = nf_reject_skb_v4_unreach(net, oldskb, NULL, hook, code); if (!nskb) return; @@ -81,7 +81,7 @@ static void nft_reject_br_send_v6_tcp_reset(struct net *net, { struct sk_buff *nskb; - nskb = nf_reject_skb_v6_tcp_reset(net, oldskb, dev, hook); + nskb = nf_reject_skb_v6_tcp_reset(net, oldskb, NULL, hook); if (!nskb) return; @@ -98,7 +98,7 @@ static void nft_reject_br_send_v6_unreach(struct net *net, { struct sk_buff *nskb; - nskb = nf_reject_skb_v6_unreach(net, oldskb, dev, hook, code); + nskb = nf_reject_skb_v6_unreach(net, oldskb, NULL, hook, code); if (!nskb) return; diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c index ecc400a0b7bb..4c6441536d55 100644 --- a/net/ceph/ceph_common.c +++ b/net/ceph/ceph_common.c @@ -246,6 +246,7 @@ enum { Opt_cephx_sign_messages, Opt_tcp_nodelay, Opt_abort_on_full, + Opt_rxbounce, }; enum { @@ -295,6 +296,7 @@ static const struct fs_parameter_spec ceph_parameters[] = { fsparam_u32 ("osdkeepalive", Opt_osdkeepalivetimeout), fsparam_enum ("read_from_replica", Opt_read_from_replica, ceph_param_read_from_replica), + fsparam_flag ("rxbounce", Opt_rxbounce), fsparam_enum ("ms_mode", Opt_ms_mode, ceph_param_ms_mode), fsparam_string ("secret", Opt_secret), @@ -584,6 +586,9 @@ int ceph_parse_param(struct fs_parameter *param, struct ceph_options *opt, case Opt_abort_on_full: opt->flags |= CEPH_OPT_ABORT_ON_FULL; break; + case Opt_rxbounce: + opt->flags |= CEPH_OPT_RXBOUNCE; + break; default: BUG(); @@ -660,6 +665,8 @@ int ceph_print_client_options(struct seq_file *m, struct ceph_client *client, seq_puts(m, "notcp_nodelay,"); if (show_all && (opt->flags & CEPH_OPT_ABORT_ON_FULL)) seq_puts(m, "abort_on_full,"); + if (opt->flags & CEPH_OPT_RXBOUNCE) + seq_puts(m, "rxbounce,"); if (opt->mount_timeout != CEPH_MOUNT_TIMEOUT_DEFAULT) seq_printf(m, "mount_timeout=%d,", diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c index 45eba2dcb67a..d3bb656308b4 100644 --- a/net/ceph/messenger.c +++ b/net/ceph/messenger.c @@ -515,6 +515,10 @@ static void ceph_con_reset_protocol(struct ceph_connection *con) ceph_msg_put(con->out_msg); con->out_msg = NULL; } + if (con->bounce_page) { + __free_page(con->bounce_page); + con->bounce_page = NULL; + } if (ceph_msgr2(from_msgr(con->msgr))) ceph_con_v2_reset_protocol(con); diff --git a/net/ceph/messenger_v1.c b/net/ceph/messenger_v1.c index 2cb5ffdf071a..6b014eca3a13 100644 --- a/net/ceph/messenger_v1.c +++ b/net/ceph/messenger_v1.c @@ -992,8 +992,7 @@ static int read_partial_message_section(struct ceph_connection *con, static int read_partial_msg_data(struct ceph_connection *con) { - struct ceph_msg *msg = con->in_msg; - struct ceph_msg_data_cursor *cursor = &msg->cursor; + struct ceph_msg_data_cursor *cursor = &con->in_msg->cursor; bool do_datacrc = !ceph_test_opt(from_msgr(con->msgr), NOCRC); struct page *page; size_t page_offset; @@ -1001,9 +1000,6 @@ static int read_partial_msg_data(struct ceph_connection *con) u32 crc = 0; int ret; - if (!msg->num_data_items) - return -EIO; - if (do_datacrc) crc = con->in_data_crc; while (cursor->total_resid) { @@ -1031,6 +1027,46 @@ static int read_partial_msg_data(struct ceph_connection *con) return 1; /* must return > 0 to indicate success */ } +static int read_partial_msg_data_bounce(struct ceph_connection *con) +{ + struct ceph_msg_data_cursor *cursor = &con->in_msg->cursor; + struct page *page; + size_t off, len; + u32 crc; + int ret; + + if (unlikely(!con->bounce_page)) { + con->bounce_page = alloc_page(GFP_NOIO); + if (!con->bounce_page) { + pr_err("failed to allocate bounce page\n"); + return -ENOMEM; + } + } + + crc = con->in_data_crc; + while (cursor->total_resid) { + if (!cursor->resid) { + ceph_msg_data_advance(cursor, 0); + continue; + } + + page = ceph_msg_data_next(cursor, &off, &len, NULL); + ret = ceph_tcp_recvpage(con->sock, con->bounce_page, 0, len); + if (ret <= 0) { + con->in_data_crc = crc; + return ret; + } + + crc = crc32c(crc, page_address(con->bounce_page), ret); + memcpy_to_page(page, off, page_address(con->bounce_page), ret); + + ceph_msg_data_advance(cursor, ret); + } + con->in_data_crc = crc; + + return 1; /* must return > 0 to indicate success */ +} + /* * read (part of) a message. */ @@ -1141,7 +1177,13 @@ static int read_partial_message(struct ceph_connection *con) /* (page) data */ if (data_len) { - ret = read_partial_msg_data(con); + if (!m->num_data_items) + return -EIO; + + if (ceph_test_opt(from_msgr(con->msgr), RXBOUNCE)) + ret = read_partial_msg_data_bounce(con); + else + ret = read_partial_msg_data(con); if (ret <= 0) return ret; } diff --git a/net/ceph/messenger_v2.c b/net/ceph/messenger_v2.c index c4099b641b38..c81379f93ad5 100644 --- a/net/ceph/messenger_v2.c +++ b/net/ceph/messenger_v2.c @@ -57,8 +57,9 @@ #define IN_S_HANDLE_CONTROL_REMAINDER 3 #define IN_S_PREPARE_READ_DATA 4 #define IN_S_PREPARE_READ_DATA_CONT 5 -#define IN_S_HANDLE_EPILOGUE 6 -#define IN_S_FINISH_SKIP 7 +#define IN_S_PREPARE_READ_ENC_PAGE 6 +#define IN_S_HANDLE_EPILOGUE 7 +#define IN_S_FINISH_SKIP 8 #define OUT_S_QUEUE_DATA 1 #define OUT_S_QUEUE_DATA_CONT 2 @@ -1032,22 +1033,41 @@ static int decrypt_control_remainder(struct ceph_connection *con) padded_len(rem_len) + CEPH_GCM_TAG_LEN); } -static int decrypt_message(struct ceph_connection *con) +static int decrypt_tail(struct ceph_connection *con) { + struct sg_table enc_sgt = {}; struct sg_table sgt = {}; + int tail_len; int ret; + tail_len = tail_onwire_len(con->in_msg, true); + ret = sg_alloc_table_from_pages(&enc_sgt, con->v2.in_enc_pages, + con->v2.in_enc_page_cnt, 0, tail_len, + GFP_NOIO); + if (ret) + goto out; + ret = setup_message_sgs(&sgt, con->in_msg, FRONT_PAD(con->v2.in_buf), MIDDLE_PAD(con->v2.in_buf), DATA_PAD(con->v2.in_buf), con->v2.in_buf, true); if (ret) goto out; - ret = gcm_crypt(con, false, sgt.sgl, sgt.sgl, - tail_onwire_len(con->in_msg, true)); + dout("%s con %p msg %p enc_page_cnt %d sg_cnt %d\n", __func__, con, + con->in_msg, con->v2.in_enc_page_cnt, sgt.orig_nents); + ret = gcm_crypt(con, false, enc_sgt.sgl, sgt.sgl, tail_len); + if (ret) + goto out; + + WARN_ON(!con->v2.in_enc_page_cnt); + ceph_release_page_vector(con->v2.in_enc_pages, + con->v2.in_enc_page_cnt); + con->v2.in_enc_pages = NULL; + con->v2.in_enc_page_cnt = 0; out: sg_free_table(&sgt); + sg_free_table(&enc_sgt); return ret; } @@ -1733,54 +1753,157 @@ static int prepare_read_control_remainder(struct ceph_connection *con) return 0; } -static void prepare_read_data(struct ceph_connection *con) +static int prepare_read_data(struct ceph_connection *con) { struct bio_vec bv; - if (!con_secure(con)) - con->in_data_crc = -1; + con->in_data_crc = -1; ceph_msg_data_cursor_init(&con->v2.in_cursor, con->in_msg, data_len(con->in_msg)); get_bvec_at(&con->v2.in_cursor, &bv); - set_in_bvec(con, &bv); + if (ceph_test_opt(from_msgr(con->msgr), RXBOUNCE)) { + if (unlikely(!con->bounce_page)) { + con->bounce_page = alloc_page(GFP_NOIO); + if (!con->bounce_page) { + pr_err("failed to allocate bounce page\n"); + return -ENOMEM; + } + } + + bv.bv_page = con->bounce_page; + bv.bv_offset = 0; + set_in_bvec(con, &bv); + } else { + set_in_bvec(con, &bv); + } con->v2.in_state = IN_S_PREPARE_READ_DATA_CONT; + return 0; } static void prepare_read_data_cont(struct ceph_connection *con) { struct bio_vec bv; - if (!con_secure(con)) + if (ceph_test_opt(from_msgr(con->msgr), RXBOUNCE)) { + con->in_data_crc = crc32c(con->in_data_crc, + page_address(con->bounce_page), + con->v2.in_bvec.bv_len); + + get_bvec_at(&con->v2.in_cursor, &bv); + memcpy_to_page(bv.bv_page, bv.bv_offset, + page_address(con->bounce_page), + con->v2.in_bvec.bv_len); + } else { con->in_data_crc = ceph_crc32c_page(con->in_data_crc, con->v2.in_bvec.bv_page, con->v2.in_bvec.bv_offset, con->v2.in_bvec.bv_len); + } ceph_msg_data_advance(&con->v2.in_cursor, con->v2.in_bvec.bv_len); if (con->v2.in_cursor.total_resid) { get_bvec_at(&con->v2.in_cursor, &bv); - set_in_bvec(con, &bv); + if (ceph_test_opt(from_msgr(con->msgr), RXBOUNCE)) { + bv.bv_page = con->bounce_page; + bv.bv_offset = 0; + set_in_bvec(con, &bv); + } else { + set_in_bvec(con, &bv); + } WARN_ON(con->v2.in_state != IN_S_PREPARE_READ_DATA_CONT); return; } /* - * We've read all data. Prepare to read data padding (if any) - * and epilogue. + * We've read all data. Prepare to read epilogue. */ reset_in_kvecs(con); - if (con_secure(con)) { - if (need_padding(data_len(con->in_msg))) - add_in_kvec(con, DATA_PAD(con->v2.in_buf), - padding_len(data_len(con->in_msg))); - add_in_kvec(con, con->v2.in_buf, CEPH_EPILOGUE_SECURE_LEN); + add_in_kvec(con, con->v2.in_buf, CEPH_EPILOGUE_PLAIN_LEN); + con->v2.in_state = IN_S_HANDLE_EPILOGUE; +} + +static int prepare_read_tail_plain(struct ceph_connection *con) +{ + struct ceph_msg *msg = con->in_msg; + + if (!front_len(msg) && !middle_len(msg)) { + WARN_ON(!data_len(msg)); + return prepare_read_data(con); + } + + reset_in_kvecs(con); + if (front_len(msg)) { + add_in_kvec(con, msg->front.iov_base, front_len(msg)); + WARN_ON(msg->front.iov_len != front_len(msg)); + } + if (middle_len(msg)) { + add_in_kvec(con, msg->middle->vec.iov_base, middle_len(msg)); + WARN_ON(msg->middle->vec.iov_len != middle_len(msg)); + } + + if (data_len(msg)) { + con->v2.in_state = IN_S_PREPARE_READ_DATA; } else { add_in_kvec(con, con->v2.in_buf, CEPH_EPILOGUE_PLAIN_LEN); + con->v2.in_state = IN_S_HANDLE_EPILOGUE; } + return 0; +} + +static void prepare_read_enc_page(struct ceph_connection *con) +{ + struct bio_vec bv; + + dout("%s con %p i %d resid %d\n", __func__, con, con->v2.in_enc_i, + con->v2.in_enc_resid); + WARN_ON(!con->v2.in_enc_resid); + + bv.bv_page = con->v2.in_enc_pages[con->v2.in_enc_i]; + bv.bv_offset = 0; + bv.bv_len = min(con->v2.in_enc_resid, (int)PAGE_SIZE); + + set_in_bvec(con, &bv); + con->v2.in_enc_i++; + con->v2.in_enc_resid -= bv.bv_len; + + if (con->v2.in_enc_resid) { + con->v2.in_state = IN_S_PREPARE_READ_ENC_PAGE; + return; + } + + /* + * We are set to read the last piece of ciphertext (ending + * with epilogue) + auth tag. + */ + WARN_ON(con->v2.in_enc_i != con->v2.in_enc_page_cnt); con->v2.in_state = IN_S_HANDLE_EPILOGUE; } +static int prepare_read_tail_secure(struct ceph_connection *con) +{ + struct page **enc_pages; + int enc_page_cnt; + int tail_len; + + tail_len = tail_onwire_len(con->in_msg, true); + WARN_ON(!tail_len); + + enc_page_cnt = calc_pages_for(0, tail_len); + enc_pages = ceph_alloc_page_vector(enc_page_cnt, GFP_NOIO); + if (IS_ERR(enc_pages)) + return PTR_ERR(enc_pages); + + WARN_ON(con->v2.in_enc_pages || con->v2.in_enc_page_cnt); + con->v2.in_enc_pages = enc_pages; + con->v2.in_enc_page_cnt = enc_page_cnt; + con->v2.in_enc_resid = tail_len; + con->v2.in_enc_i = 0; + + prepare_read_enc_page(con); + return 0; +} + static void __finish_skip(struct ceph_connection *con) { con->in_seq++; @@ -2589,47 +2712,26 @@ static int __handle_control(struct ceph_connection *con, void *p) } msg = con->in_msg; /* set in process_message_header() */ - if (!front_len(msg) && !middle_len(msg)) { - if (!data_len(msg)) - return process_message(con); - - prepare_read_data(con); - return 0; - } - - reset_in_kvecs(con); if (front_len(msg)) { WARN_ON(front_len(msg) > msg->front_alloc_len); - add_in_kvec(con, msg->front.iov_base, front_len(msg)); msg->front.iov_len = front_len(msg); - - if (con_secure(con) && need_padding(front_len(msg))) - add_in_kvec(con, FRONT_PAD(con->v2.in_buf), - padding_len(front_len(msg))); } else { msg->front.iov_len = 0; } if (middle_len(msg)) { WARN_ON(middle_len(msg) > msg->middle->alloc_len); - add_in_kvec(con, msg->middle->vec.iov_base, middle_len(msg)); msg->middle->vec.iov_len = middle_len(msg); - - if (con_secure(con) && need_padding(middle_len(msg))) - add_in_kvec(con, MIDDLE_PAD(con->v2.in_buf), - padding_len(middle_len(msg))); } else if (msg->middle) { msg->middle->vec.iov_len = 0; } - if (data_len(msg)) { - con->v2.in_state = IN_S_PREPARE_READ_DATA; - } else { - add_in_kvec(con, con->v2.in_buf, - con_secure(con) ? CEPH_EPILOGUE_SECURE_LEN : - CEPH_EPILOGUE_PLAIN_LEN); - con->v2.in_state = IN_S_HANDLE_EPILOGUE; - } - return 0; + if (!front_len(msg) && !middle_len(msg) && !data_len(msg)) + return process_message(con); + + if (con_secure(con)) + return prepare_read_tail_secure(con); + + return prepare_read_tail_plain(con); } static int handle_preamble(struct ceph_connection *con) @@ -2717,7 +2819,7 @@ static int handle_epilogue(struct ceph_connection *con) int ret; if (con_secure(con)) { - ret = decrypt_message(con); + ret = decrypt_tail(con); if (ret) { if (ret == -EBADMSG) con->error_msg = "integrity error, bad epilogue auth tag"; @@ -2785,13 +2887,16 @@ static int populate_in_iter(struct ceph_connection *con) ret = handle_control_remainder(con); break; case IN_S_PREPARE_READ_DATA: - prepare_read_data(con); - ret = 0; + ret = prepare_read_data(con); break; case IN_S_PREPARE_READ_DATA_CONT: prepare_read_data_cont(con); ret = 0; break; + case IN_S_PREPARE_READ_ENC_PAGE: + prepare_read_enc_page(con); + ret = 0; + break; case IN_S_HANDLE_EPILOGUE: ret = handle_epilogue(con); break; @@ -3326,20 +3431,16 @@ void ceph_con_v2_revoke(struct ceph_connection *con) static void revoke_at_prepare_read_data(struct ceph_connection *con) { - int remaining; /* data + [data padding] + epilogue */ + int remaining; int resid; + WARN_ON(con_secure(con)); WARN_ON(!data_len(con->in_msg)); WARN_ON(!iov_iter_is_kvec(&con->v2.in_iter)); resid = iov_iter_count(&con->v2.in_iter); WARN_ON(!resid); - if (con_secure(con)) - remaining = padded_len(data_len(con->in_msg)) + - CEPH_EPILOGUE_SECURE_LEN; - else - remaining = data_len(con->in_msg) + CEPH_EPILOGUE_PLAIN_LEN; - + remaining = data_len(con->in_msg) + CEPH_EPILOGUE_PLAIN_LEN; dout("%s con %p resid %d remaining %d\n", __func__, con, resid, remaining); con->v2.in_iter.count -= resid; @@ -3350,8 +3451,9 @@ static void revoke_at_prepare_read_data(struct ceph_connection *con) static void revoke_at_prepare_read_data_cont(struct ceph_connection *con) { int recved, resid; /* current piece of data */ - int remaining; /* [data padding] + epilogue */ + int remaining; + WARN_ON(con_secure(con)); WARN_ON(!data_len(con->in_msg)); WARN_ON(!iov_iter_is_bvec(&con->v2.in_iter)); resid = iov_iter_count(&con->v2.in_iter); @@ -3363,12 +3465,7 @@ static void revoke_at_prepare_read_data_cont(struct ceph_connection *con) ceph_msg_data_advance(&con->v2.in_cursor, recved); WARN_ON(resid > con->v2.in_cursor.total_resid); - if (con_secure(con)) - remaining = padding_len(data_len(con->in_msg)) + - CEPH_EPILOGUE_SECURE_LEN; - else - remaining = CEPH_EPILOGUE_PLAIN_LEN; - + remaining = CEPH_EPILOGUE_PLAIN_LEN; dout("%s con %p total_resid %zu remaining %d\n", __func__, con, con->v2.in_cursor.total_resid, remaining); con->v2.in_iter.count -= resid; @@ -3376,11 +3473,26 @@ static void revoke_at_prepare_read_data_cont(struct ceph_connection *con) con->v2.in_state = IN_S_FINISH_SKIP; } +static void revoke_at_prepare_read_enc_page(struct ceph_connection *con) +{ + int resid; /* current enc page (not necessarily data) */ + + WARN_ON(!con_secure(con)); + WARN_ON(!iov_iter_is_bvec(&con->v2.in_iter)); + resid = iov_iter_count(&con->v2.in_iter); + WARN_ON(!resid || resid > con->v2.in_bvec.bv_len); + + dout("%s con %p resid %d enc_resid %d\n", __func__, con, resid, + con->v2.in_enc_resid); + con->v2.in_iter.count -= resid; + set_in_skip(con, resid + con->v2.in_enc_resid); + con->v2.in_state = IN_S_FINISH_SKIP; +} + static void revoke_at_handle_epilogue(struct ceph_connection *con) { int resid; - WARN_ON(!iov_iter_is_kvec(&con->v2.in_iter)); resid = iov_iter_count(&con->v2.in_iter); WARN_ON(!resid); @@ -3399,6 +3511,9 @@ void ceph_con_v2_revoke_incoming(struct ceph_connection *con) case IN_S_PREPARE_READ_DATA_CONT: revoke_at_prepare_read_data_cont(con); break; + case IN_S_PREPARE_READ_ENC_PAGE: + revoke_at_prepare_read_enc_page(con); + break; case IN_S_HANDLE_EPILOGUE: revoke_at_handle_epilogue(con); break; @@ -3432,6 +3547,13 @@ void ceph_con_v2_reset_protocol(struct ceph_connection *con) clear_out_sign_kvecs(con); free_conn_bufs(con); + if (con->v2.in_enc_pages) { + WARN_ON(!con->v2.in_enc_page_cnt); + ceph_release_page_vector(con->v2.in_enc_pages, + con->v2.in_enc_page_cnt); + con->v2.in_enc_pages = NULL; + con->v2.in_enc_page_cnt = 0; + } if (con->v2.out_enc_pages) { WARN_ON(!con->v2.out_enc_page_cnt); ceph_release_page_vector(con->v2.out_enc_pages, diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 6c2016f7f3d1..ec0bf737b076 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -1133,7 +1133,8 @@ out: neigh_release(neigh); } -int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb) +int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb, + const bool immediate_ok) { int rc; bool immediate_probe = false; @@ -1154,12 +1155,17 @@ int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb) atomic_set(&neigh->probes, NEIGH_VAR(neigh->parms, UCAST_PROBES)); neigh_del_timer(neigh); - neigh->nud_state = NUD_INCOMPLETE; + neigh->nud_state = NUD_INCOMPLETE; neigh->updated = now; - next = now + max(NEIGH_VAR(neigh->parms, RETRANS_TIME), - HZ/100); + if (!immediate_ok) { + next = now + 1; + } else { + immediate_probe = true; + next = now + max(NEIGH_VAR(neigh->parms, + RETRANS_TIME), + HZ / 100); + } neigh_add_timer(neigh, next); - immediate_probe = true; } else { neigh->nud_state = NUD_FAILED; neigh->updated = jiffies; @@ -1571,7 +1577,7 @@ static void neigh_managed_work(struct work_struct *work) write_lock_bh(&tbl->lock); list_for_each_entry(neigh, &tbl->managed_list, managed_list) - neigh_event_send(neigh, NULL); + neigh_event_send_probe(neigh, NULL, false); queue_delayed_work(system_power_efficient_wq, &tbl->managed_work, NEIGH_VAR(&tbl->parms, DELAY_PROBE_TIME)); write_unlock_bh(&tbl->lock); diff --git a/net/core/net-procfs.c b/net/core/net-procfs.c index d8b9dbabd4a4..88cc0ad7d386 100644 --- a/net/core/net-procfs.c +++ b/net/core/net-procfs.c @@ -190,12 +190,23 @@ static const struct seq_operations softnet_seq_ops = { .show = softnet_seq_show, }; -static void *ptype_get_idx(loff_t pos) +static void *ptype_get_idx(struct seq_file *seq, loff_t pos) { + struct list_head *ptype_list = NULL; struct packet_type *pt = NULL; + struct net_device *dev; loff_t i = 0; int t; + for_each_netdev_rcu(seq_file_net(seq), dev) { + ptype_list = &dev->ptype_all; + list_for_each_entry_rcu(pt, ptype_list, list) { + if (i == pos) + return pt; + ++i; + } + } + list_for_each_entry_rcu(pt, &ptype_all, list) { if (i == pos) return pt; @@ -216,22 +227,40 @@ static void *ptype_seq_start(struct seq_file *seq, loff_t *pos) __acquires(RCU) { rcu_read_lock(); - return *pos ? ptype_get_idx(*pos - 1) : SEQ_START_TOKEN; + return *pos ? ptype_get_idx(seq, *pos - 1) : SEQ_START_TOKEN; } static void *ptype_seq_next(struct seq_file *seq, void *v, loff_t *pos) { + struct net_device *dev; struct packet_type *pt; struct list_head *nxt; int hash; ++*pos; if (v == SEQ_START_TOKEN) - return ptype_get_idx(0); + return ptype_get_idx(seq, 0); pt = v; nxt = pt->list.next; + if (pt->dev) { + if (nxt != &pt->dev->ptype_all) + goto found; + + dev = pt->dev; + for_each_netdev_continue_rcu(seq_file_net(seq), dev) { + if (!list_empty(&dev->ptype_all)) { + nxt = dev->ptype_all.next; + goto found; + } + } + + nxt = ptype_all.next; + goto ptype_all; + } + if (pt->type == htons(ETH_P_ALL)) { +ptype_all: if (nxt != &ptype_all) goto found; hash = 0; @@ -260,7 +289,8 @@ static int ptype_seq_show(struct seq_file *seq, void *v) if (v == SEQ_START_TOKEN) seq_puts(seq, "Type Device Function\n"); - else if (pt->dev == NULL || dev_net(pt->dev) == seq_file_net(seq)) { + else if ((!pt->af_packet_net || net_eq(pt->af_packet_net, seq_file_net(seq))) && + (!pt->dev || net_eq(dev_net(pt->dev), seq_file_net(seq)))) { if (pt->type == htons(ETH_P_ALL)) seq_puts(seq, "ALL "); else diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index e476403231f0..710da8a36729 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -3275,8 +3275,8 @@ static int __rtnl_newlink(struct sk_buff *skb, struct nlmsghdr *nlh, struct nlattr *slave_attr[RTNL_SLAVE_MAX_TYPE + 1]; unsigned char name_assign_type = NET_NAME_USER; struct nlattr *linkinfo[IFLA_INFO_MAX + 1]; - const struct rtnl_link_ops *m_ops = NULL; - struct net_device *master_dev = NULL; + const struct rtnl_link_ops *m_ops; + struct net_device *master_dev; struct net *net = sock_net(skb->sk); const struct rtnl_link_ops *ops; struct nlattr *tb[IFLA_MAX + 1]; @@ -3314,6 +3314,8 @@ replay: else dev = NULL; + master_dev = NULL; + m_ops = NULL; if (dev) { master_dev = netdev_master_upper_dev_get(dev); if (master_dev) diff --git a/net/ieee802154/nl802154.c b/net/ieee802154/nl802154.c index 277124f206e0..e0b072aecf0f 100644 --- a/net/ieee802154/nl802154.c +++ b/net/ieee802154/nl802154.c @@ -1441,7 +1441,7 @@ static int nl802154_send_key(struct sk_buff *msg, u32 cmd, u32 portid, hdr = nl802154hdr_put(msg, portid, seq, flags, cmd); if (!hdr) - return -1; + return -ENOBUFS; if (nla_put_u32(msg, NL802154_ATTR_IFINDEX, dev->ifindex)) goto nla_put_failure; @@ -1634,7 +1634,7 @@ static int nl802154_send_device(struct sk_buff *msg, u32 cmd, u32 portid, hdr = nl802154hdr_put(msg, portid, seq, flags, cmd); if (!hdr) - return -1; + return -ENOBUFS; if (nla_put_u32(msg, NL802154_ATTR_IFINDEX, dev->ifindex)) goto nla_put_failure; @@ -1812,7 +1812,7 @@ static int nl802154_send_devkey(struct sk_buff *msg, u32 cmd, u32 portid, hdr = nl802154hdr_put(msg, portid, seq, flags, cmd); if (!hdr) - return -1; + return -ENOBUFS; if (nla_put_u32(msg, NL802154_ATTR_IFINDEX, dev->ifindex)) goto nla_put_failure; @@ -1988,7 +1988,7 @@ static int nl802154_send_seclevel(struct sk_buff *msg, u32 cmd, u32 portid, hdr = nl802154hdr_put(msg, portid, seq, flags, cmd); if (!hdr) - return -1; + return -ENOBUFS; if (nla_put_u32(msg, NL802154_ATTR_IFINDEX, dev->ifindex)) goto nla_put_failure; diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 57c1d8431386..139cec29ed06 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -162,12 +162,19 @@ int ip_build_and_send_pkt(struct sk_buff *skb, const struct sock *sk, iph->daddr = (opt && opt->opt.srr ? opt->opt.faddr : daddr); iph->saddr = saddr; iph->protocol = sk->sk_protocol; - if (ip_dont_fragment(sk, &rt->dst)) { + /* Do not bother generating IPID for small packets (eg SYNACK) */ + if (skb->len <= IPV4_MIN_MTU || ip_dont_fragment(sk, &rt->dst)) { iph->frag_off = htons(IP_DF); iph->id = 0; } else { iph->frag_off = 0; - __ip_select_ident(net, iph, 1); + /* TCP packets here are SYNACK with fat IPv4/TCP options. + * Avoid using the hashed IP ident generator. + */ + if (sk->sk_protocol == IPPROTO_TCP) + iph->id = (__force __be16)prandom_u32(); + else + __ip_select_ident(net, iph, 1); } if (opt && opt->opt.optlen) { @@ -825,15 +832,24 @@ int ip_do_fragment(struct net *net, struct sock *sk, struct sk_buff *skb, /* Everything is OK. Generate! */ ip_fraglist_init(skb, iph, hlen, &iter); - if (iter.frag) - ip_options_fragment(iter.frag); - for (;;) { /* Prepare header of the next frame, * before previous one went down. */ if (iter.frag) { + bool first_frag = (iter.offset == 0); + IPCB(iter.frag)->flags = IPCB(skb)->flags; ip_fraglist_prepare(skb, &iter); + if (first_frag && IPCB(skb)->opt.optlen) { + /* ipcb->opt is not populated for frags + * coming from __ip_make_skb(), + * ip_options_fragment() needs optlen + */ + IPCB(iter.frag)->opt.optlen = + IPCB(skb)->opt.optlen; + ip_options_fragment(iter.frag); + ip_send_check(iter.iph); + } } skb->tstamp = tstamp; diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig index 67087f95579f..aab384126f61 100644 --- a/net/ipv4/netfilter/Kconfig +++ b/net/ipv4/netfilter/Kconfig @@ -58,10 +58,6 @@ config NF_TABLES_ARP endif # NF_TABLES -config NF_FLOW_TABLE_IPV4 - tristate - select NF_FLOW_TABLE_INET - config NF_DUP_IPV4 tristate "Netfilter IPv4 packet duplication to alternate destination" depends on !NF_CONNTRACK || NF_CONNTRACK diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c index 0e56df3a45e2..bcf7bc71cb56 100644 --- a/net/ipv4/ping.c +++ b/net/ipv4/ping.c @@ -220,7 +220,8 @@ static struct sock *ping_lookup(struct net *net, struct sk_buff *skb, u16 ident) continue; } - if (sk->sk_bound_dev_if && sk->sk_bound_dev_if != dif) + if (sk->sk_bound_dev_if && sk->sk_bound_dev_if != dif && + sk->sk_bound_dev_if != inet_sdif(skb)) continue; sock_hold(sk); diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index 9eb5fc247868..9f97b9cbf7b3 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -722,6 +722,7 @@ static int raw_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len) int ret = -EINVAL; int chk_addr_ret; + lock_sock(sk); if (sk->sk_state != TCP_CLOSE || addr_len < sizeof(struct sockaddr_in)) goto out; @@ -741,7 +742,9 @@ static int raw_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len) inet->inet_saddr = 0; /* Use device */ sk_dst_reset(sk); ret = 0; -out: return ret; +out: + release_sock(sk); + return ret; } /* diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 3b75836db19b..bdf108f544a4 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -842,6 +842,7 @@ ssize_t tcp_splice_read(struct socket *sock, loff_t *ppos, } release_sock(sk); + sk_defer_free_flush(sk); if (spliced) return spliced; @@ -1321,10 +1322,13 @@ new_segment: /* skb changing from pure zc to mixed, must charge zc */ if (unlikely(skb_zcopy_pure(skb))) { - if (!sk_wmem_schedule(sk, skb->data_len)) + u32 extra = skb->truesize - + SKB_TRUESIZE(skb_end_offset(skb)); + + if (!sk_wmem_schedule(sk, extra)) goto wait_for_space; - sk_mem_charge(sk, skb->data_len); + sk_mem_charge(sk, extra); skb_shinfo(skb)->flags &= ~SKBFL_PURE_ZEROCOPY; } diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index dc49a3d551eb..bfe4112e000c 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -1660,6 +1660,8 @@ static struct sk_buff *tcp_shift_skb_data(struct sock *sk, struct sk_buff *skb, (mss != tcp_skb_seglen(skb))) goto out; + if (!tcp_skb_can_collapse(prev, skb)) + goto out; len = skb->len; pcount = tcp_skb_pcount(skb); if (tcp_skb_shift(prev, skb, pcount, len)) diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index b53476e78c84..fec656f5a39e 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -2095,7 +2095,7 @@ process: nf_reset_ct(skb); if (tcp_filter(sk, skb)) { - drop_reason = SKB_DROP_REASON_TCP_FILTER; + drop_reason = SKB_DROP_REASON_SOCKET_FILTER; goto discard_and_relse; } th = (const struct tcphdr *)skb->data; diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 3eee17790a82..f927c199a93c 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -2589,7 +2589,7 @@ int addrconf_prefix_rcv_add_addr(struct net *net, struct net_device *dev, __u32 valid_lft, u32 prefered_lft) { struct inet6_ifaddr *ifp = ipv6_get_ifaddr(net, addr, dev, 1); - int create = 0; + int create = 0, update_lft = 0; if (!ifp && valid_lft) { int max_addresses = in6_dev->cnf.max_addresses; @@ -2633,19 +2633,32 @@ int addrconf_prefix_rcv_add_addr(struct net *net, struct net_device *dev, unsigned long now; u32 stored_lft; - /* Update lifetime (RFC4862 5.5.3 e) - * We deviate from RFC4862 by honoring all Valid Lifetimes to - * improve the reaction of SLAAC to renumbering events - * (draft-gont-6man-slaac-renum-06, Section 4.2) - */ + /* update lifetime (RFC2462 5.5.3 e) */ spin_lock_bh(&ifp->lock); now = jiffies; if (ifp->valid_lft > (now - ifp->tstamp) / HZ) stored_lft = ifp->valid_lft - (now - ifp->tstamp) / HZ; else stored_lft = 0; - if (!create && stored_lft) { + const u32 minimum_lft = min_t(u32, + stored_lft, MIN_VALID_LIFETIME); + valid_lft = max(valid_lft, minimum_lft); + + /* RFC4862 Section 5.5.3e: + * "Note that the preferred lifetime of the + * corresponding address is always reset to + * the Preferred Lifetime in the received + * Prefix Information option, regardless of + * whether the valid lifetime is also reset or + * ignored." + * + * So we should always update prefered_lft here. + */ + update_lft = 1; + } + + if (update_lft) { ifp->valid_lft = valid_lft; ifp->prefered_lft = prefered_lft; ifp->tstamp = now; diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 463c37dea449..413f66781e50 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -112,7 +112,7 @@ void fib6_update_sernum(struct net *net, struct fib6_info *f6i) fn = rcu_dereference_protected(f6i->fib6_node, lockdep_is_held(&f6i->fib6_table->tb6_lock)); if (fn) - fn->fn_sernum = fib6_new_sernum(net); + WRITE_ONCE(fn->fn_sernum, fib6_new_sernum(net)); } /* @@ -590,12 +590,13 @@ static int fib6_dump_table(struct fib6_table *table, struct sk_buff *skb, spin_unlock_bh(&table->tb6_lock); if (res > 0) { cb->args[4] = 1; - cb->args[5] = w->root->fn_sernum; + cb->args[5] = READ_ONCE(w->root->fn_sernum); } } else { - if (cb->args[5] != w->root->fn_sernum) { + int sernum = READ_ONCE(w->root->fn_sernum); + if (cb->args[5] != sernum) { /* Begin at the root if the tree changed */ - cb->args[5] = w->root->fn_sernum; + cb->args[5] = sernum; w->state = FWS_INIT; w->node = w->root; w->skip = w->count; @@ -1345,7 +1346,7 @@ static void __fib6_update_sernum_upto_root(struct fib6_info *rt, /* paired with smp_rmb() in fib6_get_cookie_safe() */ smp_wmb(); while (fn) { - fn->fn_sernum = sernum; + WRITE_ONCE(fn->fn_sernum, sernum); fn = rcu_dereference_protected(fn->parent, lockdep_is_held(&rt->fib6_table->tb6_lock)); } @@ -2174,8 +2175,8 @@ static int fib6_clean_node(struct fib6_walker *w) }; if (c->sernum != FIB6_NO_SERNUM_CHANGE && - w->node->fn_sernum != c->sernum) - w->node->fn_sernum = c->sernum; + READ_ONCE(w->node->fn_sernum) != c->sernum) + WRITE_ONCE(w->node->fn_sernum, c->sernum); if (!c->func) { WARN_ON_ONCE(c->sernum == FIB6_NO_SERNUM_CHANGE); @@ -2543,7 +2544,7 @@ static void ipv6_route_seq_setup_walk(struct ipv6_route_iter *iter, iter->w.state = FWS_INIT; iter->w.node = iter->w.root; iter->w.args = iter; - iter->sernum = iter->w.root->fn_sernum; + iter->sernum = READ_ONCE(iter->w.root->fn_sernum); INIT_LIST_HEAD(&iter->w.lh); fib6_walker_link(net, &iter->w); } @@ -2571,8 +2572,10 @@ static struct fib6_table *ipv6_route_seq_next_table(struct fib6_table *tbl, static void ipv6_route_check_sernum(struct ipv6_route_iter *iter) { - if (iter->sernum != iter->w.root->fn_sernum) { - iter->sernum = iter->w.root->fn_sernum; + int sernum = READ_ONCE(iter->w.root->fn_sernum); + + if (iter->sernum != sernum) { + iter->sernum = sernum; iter->w.state = FWS_INIT; iter->w.node = iter->w.root; WARN_ON(iter->w.skip); diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index fe786df4f849..97ade833f58c 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -1036,14 +1036,14 @@ int ip6_tnl_xmit_ctl(struct ip6_tnl *t, if (unlikely(!ipv6_chk_addr_and_flags(net, laddr, ldev, false, 0, IFA_F_TENTATIVE))) - pr_warn("%s xmit: Local address not yet configured!\n", - p->name); + pr_warn_ratelimited("%s xmit: Local address not yet configured!\n", + p->name); else if (!(p->flags & IP6_TNL_F_ALLOW_LOCAL_REMOTE) && !ipv6_addr_is_multicast(raddr) && unlikely(ipv6_chk_addr_and_flags(net, raddr, ldev, true, 0, IFA_F_TENTATIVE))) - pr_warn("%s xmit: Routing loop! Remote address found on this node!\n", - p->name); + pr_warn_ratelimited("%s xmit: Routing loop! Remote address found on this node!\n", + p->name); else ret = 1; rcu_read_unlock(); diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig index 97d3d1b36dbc..0ba62f4868f9 100644 --- a/net/ipv6/netfilter/Kconfig +++ b/net/ipv6/netfilter/Kconfig @@ -47,10 +47,6 @@ config NFT_FIB_IPV6 endif # NF_TABLES_IPV6 endif # NF_TABLES -config NF_FLOW_TABLE_IPV6 - tristate - select NF_FLOW_TABLE_INET - config NF_DUP_IPV6 tristate "Netfilter IPv6 packet duplication to alternate destination" depends on !NF_CONNTRACK || NF_CONNTRACK diff --git a/net/ipv6/netfilter/Makefile b/net/ipv6/netfilter/Makefile index b85383606df7..b8d6dc9aeeb6 100644 --- a/net/ipv6/netfilter/Makefile +++ b/net/ipv6/netfilter/Makefile @@ -28,9 +28,6 @@ obj-$(CONFIG_NFT_REJECT_IPV6) += nft_reject_ipv6.o obj-$(CONFIG_NFT_DUP_IPV6) += nft_dup_ipv6.o obj-$(CONFIG_NFT_FIB_IPV6) += nft_fib_ipv6.o -# flow table support -obj-$(CONFIG_NF_FLOW_TABLE_IPV6) += nf_flow_table_ipv6.o - # matches obj-$(CONFIG_IP6_NF_MATCH_AH) += ip6t_ah.o obj-$(CONFIG_IP6_NF_MATCH_EUI64) += ip6t_eui64.o diff --git a/net/ipv6/netfilter/nf_flow_table_ipv6.c b/net/ipv6/netfilter/nf_flow_table_ipv6.c deleted file mode 100644 index e69de29bb2d1..000000000000 --- a/net/ipv6/netfilter/nf_flow_table_ipv6.c +++ /dev/null diff --git a/net/ipv6/route.c b/net/ipv6/route.c index e6de94203c13..f4884cda13b9 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -2802,7 +2802,7 @@ static void ip6_link_failure(struct sk_buff *skb) if (from) { fn = rcu_dereference(from->fib6_node); if (fn && (rt->rt6i_flags & RTF_DEFAULT)) - fn->fn_sernum = -1; + WRITE_ONCE(fn->fn_sernum, -1); } } rcu_read_unlock(); diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c index 75af1f701e1d..782b1d452269 100644 --- a/net/mptcp/pm_netlink.c +++ b/net/mptcp/pm_netlink.c @@ -478,6 +478,20 @@ __lookup_addr_by_id(struct pm_nl_pernet *pernet, unsigned int id) return NULL; } +static struct mptcp_pm_addr_entry * +__lookup_addr(struct pm_nl_pernet *pernet, const struct mptcp_addr_info *info, + bool lookup_by_id) +{ + struct mptcp_pm_addr_entry *entry; + + list_for_each_entry(entry, &pernet->local_addr_list, list) { + if ((!lookup_by_id && addresses_equal(&entry->addr, info, true)) || + (lookup_by_id && entry->addr.id == info->id)) + return entry; + } + return NULL; +} + static int lookup_id_by_addr(struct pm_nl_pernet *pernet, const struct mptcp_addr_info *addr) { @@ -777,7 +791,7 @@ static void mptcp_pm_nl_rm_addr_or_subflow(struct mptcp_sock *msk, removed = true; __MPTCP_INC_STATS(sock_net(sk), rm_type); } - __set_bit(rm_list->ids[1], msk->pm.id_avail_bitmap); + __set_bit(rm_list->ids[i], msk->pm.id_avail_bitmap); if (!removed) continue; @@ -1763,18 +1777,21 @@ static int mptcp_nl_cmd_set_flags(struct sk_buff *skb, struct genl_info *info) return -EOPNOTSUPP; } - list_for_each_entry(entry, &pernet->local_addr_list, list) { - if ((!lookup_by_id && addresses_equal(&entry->addr, &addr.addr, true)) || - (lookup_by_id && entry->addr.id == addr.addr.id)) { - mptcp_nl_addr_backup(net, &entry->addr, bkup); - - if (bkup) - entry->flags |= MPTCP_PM_ADDR_FLAG_BACKUP; - else - entry->flags &= ~MPTCP_PM_ADDR_FLAG_BACKUP; - } + spin_lock_bh(&pernet->lock); + entry = __lookup_addr(pernet, &addr.addr, lookup_by_id); + if (!entry) { + spin_unlock_bh(&pernet->lock); + return -EINVAL; } + if (bkup) + entry->flags |= MPTCP_PM_ADDR_FLAG_BACKUP; + else + entry->flags &= ~MPTCP_PM_ADDR_FLAG_BACKUP; + addr = *entry; + spin_unlock_bh(&pernet->lock); + + mptcp_nl_addr_backup(net, &addr.addr, bkup); return 0; } diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index 0e6b42c76ea0..85317ce38e3f 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -408,7 +408,7 @@ DECLARE_PER_CPU(struct mptcp_delegated_action, mptcp_delegated_actions); struct mptcp_subflow_context { struct list_head node;/* conn_list of subflows */ - char reset_start[0]; + struct_group(reset, unsigned long avg_pacing_rate; /* protected by msk socket lock */ u64 local_key; @@ -458,7 +458,7 @@ struct mptcp_subflow_context { long delegated_status; - char reset_end[0]; + ); struct list_head delegated_node; /* link into delegated_action, protected by local BH */ @@ -494,7 +494,7 @@ mptcp_subflow_tcp_sock(const struct mptcp_subflow_context *subflow) static inline void mptcp_subflow_ctx_reset(struct mptcp_subflow_context *subflow) { - memset(subflow->reset_start, 0, subflow->reset_end - subflow->reset_start); + memset(&subflow->reset, 0, sizeof(subflow->reset)); subflow->request_mptcp = 1; } diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 894a325d39f2..d6aa5b47031e 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -1924,15 +1924,17 @@ repeat: pr_debug("nf_conntrack_in: Can't track with proto module\n"); nf_ct_put(ct); skb->_nfct = 0; - NF_CT_STAT_INC_ATOMIC(state->net, invalid); - if (ret == -NF_DROP) - NF_CT_STAT_INC_ATOMIC(state->net, drop); /* Special case: TCP tracker reports an attempt to reopen a * closed/aborted connection. We have to go back and create a * fresh conntrack. */ if (ret == -NF_REPEAT) goto repeat; + + NF_CT_STAT_INC_ATOMIC(state->net, invalid); + if (ret == -NF_DROP) + NF_CT_STAT_INC_ATOMIC(state->net, drop); + ret = -ret; goto out; } diff --git a/net/netfilter/nf_conntrack_netbios_ns.c b/net/netfilter/nf_conntrack_netbios_ns.c index 7f19ee259609..55415f011943 100644 --- a/net/netfilter/nf_conntrack_netbios_ns.c +++ b/net/netfilter/nf_conntrack_netbios_ns.c @@ -20,13 +20,14 @@ #include <net/netfilter/nf_conntrack_helper.h> #include <net/netfilter/nf_conntrack_expect.h> +#define HELPER_NAME "netbios-ns" #define NMBD_PORT 137 MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>"); MODULE_DESCRIPTION("NetBIOS name service broadcast connection tracking helper"); MODULE_LICENSE("GPL"); MODULE_ALIAS("ip_conntrack_netbios_ns"); -MODULE_ALIAS_NFCT_HELPER("netbios_ns"); +MODULE_ALIAS_NFCT_HELPER(HELPER_NAME); static unsigned int timeout __read_mostly = 3; module_param(timeout, uint, 0400); @@ -44,7 +45,7 @@ static int netbios_ns_help(struct sk_buff *skb, unsigned int protoff, } static struct nf_conntrack_helper helper __read_mostly = { - .name = "netbios-ns", + .name = HELPER_NAME, .tuple.src.l3num = NFPROTO_IPV4, .tuple.src.u.udp.port = cpu_to_be16(NMBD_PORT), .tuple.dst.protonum = IPPROTO_UDP, diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 77938b1042f3..5fa16990da95 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -2011,7 +2011,6 @@ static void nft_last_rule(struct nft_rule_blob *blob, const void *ptr) prule = (struct nft_rule_dp *)ptr; prule->is_last = 1; - ptr += offsetof(struct nft_rule_dp, data); /* blob size does not include the trailer rule */ } @@ -8264,14 +8263,12 @@ static int nf_tables_commit_chain_prepare(struct net *net, struct nft_chain *cha void *data, *data_boundary; struct nft_rule_dp *prule; struct nft_rule *rule; - int i; /* already handled or inactive chain? */ if (chain->blob_next || !nft_is_active_next(net, chain)) return 0; rule = list_entry(&chain->rules, struct nft_rule, list); - i = 0; data_size = 0; list_for_each_entry_continue(rule, &chain->rules, list) { @@ -8301,7 +8298,7 @@ static int nf_tables_commit_chain_prepare(struct net *net, struct nft_chain *cha return -ENOMEM; size = 0; - track.last = last; + track.last = nft_expr_last(rule); nft_rule_for_each_expr(expr, last, rule) { track.cur = expr; diff --git a/net/netfilter/nft_byteorder.c b/net/netfilter/nft_byteorder.c index 9d5947ab8d4e..e646e9ee4a98 100644 --- a/net/netfilter/nft_byteorder.c +++ b/net/netfilter/nft_byteorder.c @@ -167,12 +167,24 @@ nla_put_failure: return -1; } +static bool nft_byteorder_reduce(struct nft_regs_track *track, + const struct nft_expr *expr) +{ + struct nft_byteorder *priv = nft_expr_priv(expr); + + track->regs[priv->dreg].selector = NULL; + track->regs[priv->dreg].bitwise = NULL; + + return false; +} + static const struct nft_expr_ops nft_byteorder_ops = { .type = &nft_byteorder_type, .size = NFT_EXPR_SIZE(sizeof(struct nft_byteorder)), .eval = nft_byteorder_eval, .init = nft_byteorder_init, .dump = nft_byteorder_dump, + .reduce = nft_byteorder_reduce, }; struct nft_expr_type nft_byteorder_type __read_mostly = { diff --git a/net/netfilter/nft_connlimit.c b/net/netfilter/nft_connlimit.c index 7d00a1452b1d..3362417ebfdb 100644 --- a/net/netfilter/nft_connlimit.c +++ b/net/netfilter/nft_connlimit.c @@ -62,6 +62,7 @@ static int nft_connlimit_do_init(const struct nft_ctx *ctx, { bool invert = false; u32 flags, limit; + int err; if (!tb[NFTA_CONNLIMIT_COUNT]) return -EINVAL; @@ -84,7 +85,15 @@ static int nft_connlimit_do_init(const struct nft_ctx *ctx, priv->limit = limit; priv->invert = invert; - return nf_ct_netns_get(ctx->net, ctx->family); + err = nf_ct_netns_get(ctx->net, ctx->family); + if (err < 0) + goto err_netns; + + return 0; +err_netns: + kfree(priv->list); + + return err; } static void nft_connlimit_do_destroy(const struct nft_ctx *ctx, diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c index 518d96c8c247..5adf8bb628a8 100644 --- a/net/netfilter/nft_ct.c +++ b/net/netfilter/nft_ct.c @@ -260,9 +260,12 @@ static void nft_ct_set_zone_eval(const struct nft_expr *expr, ct = this_cpu_read(nft_ct_pcpu_template); if (likely(refcount_read(&ct->ct_general.use) == 1)) { + refcount_inc(&ct->ct_general.use); nf_ct_zone_add(ct, &zone); } else { - /* previous skb got queued to userspace */ + /* previous skb got queued to userspace, allocate temporary + * one until percpu template can be reused. + */ ct = nf_ct_tmpl_alloc(nft_net(pkt), &zone, GFP_ATOMIC); if (!ct) { regs->verdict.code = NF_DROP; diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 5bd409ab4cc2..ab87f22cc7ec 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -1774,6 +1774,7 @@ static int fanout_add(struct sock *sk, struct fanout_args *args) match->prot_hook.dev = po->prot_hook.dev; match->prot_hook.func = packet_rcv_fanout; match->prot_hook.af_packet_priv = match; + match->prot_hook.af_packet_net = read_pnet(&match->net); match->prot_hook.id_match = match_fanout_group; match->max_num_members = args->max_num_members; list_add(&match->list, &fanout_list); @@ -1788,7 +1789,10 @@ static int fanout_add(struct sock *sk, struct fanout_args *args) err = -ENOSPC; if (refcount_read(&match->sk_ref) < match->max_num_members) { __dev_remove_pack(&po->prot_hook); - po->fanout = match; + + /* Paired with packet_setsockopt(PACKET_FANOUT_DATA) */ + WRITE_ONCE(po->fanout, match); + po->rollover = rollover; rollover = NULL; refcount_set(&match->sk_ref, refcount_read(&match->sk_ref) + 1); @@ -3353,6 +3357,7 @@ static int packet_create(struct net *net, struct socket *sock, int protocol, po->prot_hook.func = packet_rcv_spkt; po->prot_hook.af_packet_priv = sk; + po->prot_hook.af_packet_net = sock_net(sk); if (proto) { po->prot_hook.type = proto; @@ -3932,7 +3937,8 @@ packet_setsockopt(struct socket *sock, int level, int optname, sockptr_t optval, } case PACKET_FANOUT_DATA: { - if (!po->fanout) + /* Paired with the WRITE_ONCE() in fanout_add() */ + if (!READ_ONCE(po->fanout)) return -EINVAL; return fanout_set_data(po, optval, optlen); diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c index 6be2672a65ea..df864e692267 100644 --- a/net/rxrpc/call_event.c +++ b/net/rxrpc/call_event.c @@ -157,7 +157,7 @@ static void rxrpc_congestion_timeout(struct rxrpc_call *call) static void rxrpc_resend(struct rxrpc_call *call, unsigned long now_j) { struct sk_buff *skb; - unsigned long resend_at, rto_j; + unsigned long resend_at; rxrpc_seq_t cursor, seq, top; ktime_t now, max_age, oldest, ack_ts; int ix; @@ -165,10 +165,8 @@ static void rxrpc_resend(struct rxrpc_call *call, unsigned long now_j) _enter("{%d,%d}", call->tx_hard_ack, call->tx_top); - rto_j = call->peer->rto_j; - now = ktime_get_real(); - max_age = ktime_sub(now, jiffies_to_usecs(rto_j)); + max_age = ktime_sub(now, jiffies_to_usecs(call->peer->rto_j)); spin_lock_bh(&call->lock); @@ -213,7 +211,7 @@ static void rxrpc_resend(struct rxrpc_call *call, unsigned long now_j) } resend_at = nsecs_to_jiffies(ktime_to_ns(ktime_sub(now, oldest))); - resend_at += jiffies + rto_j; + resend_at += jiffies + rxrpc_get_rto_backoff(call->peer, retrans); WRITE_ONCE(call->resend_at, resend_at); if (unacked) diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c index 10f2bf2e9068..a45c83f22236 100644 --- a/net/rxrpc/output.c +++ b/net/rxrpc/output.c @@ -468,7 +468,7 @@ done: if (call->peer->rtt_count > 1) { unsigned long nowj = jiffies, ack_lost_at; - ack_lost_at = rxrpc_get_rto_backoff(call->peer, retrans); + ack_lost_at = rxrpc_get_rto_backoff(call->peer, false); ack_lost_at += nowj; WRITE_ONCE(call->ack_lost_at, ack_lost_at); rxrpc_reduce_call_timer(call, ack_lost_at, nowj, diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index d4e27c679123..5f0f346b576f 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -1945,9 +1945,9 @@ static int tc_new_tfilter(struct sk_buff *skb, struct nlmsghdr *n, bool prio_allocate; u32 parent; u32 chain_index; - struct Qdisc *q = NULL; + struct Qdisc *q; struct tcf_chain_info chain_info; - struct tcf_chain *chain = NULL; + struct tcf_chain *chain; struct tcf_block *block; struct tcf_proto *tp; unsigned long cl; @@ -1976,6 +1976,8 @@ replay: tp = NULL; cl = 0; block = NULL; + q = NULL; + chain = NULL; flags = 0; if (prio == 0) { @@ -2798,8 +2800,8 @@ static int tc_ctl_chain(struct sk_buff *skb, struct nlmsghdr *n, struct tcmsg *t; u32 parent; u32 chain_index; - struct Qdisc *q = NULL; - struct tcf_chain *chain = NULL; + struct Qdisc *q; + struct tcf_chain *chain; struct tcf_block *block; unsigned long cl; int err; @@ -2809,6 +2811,7 @@ static int tc_ctl_chain(struct sk_buff *skb, struct nlmsghdr *n, return -EPERM; replay: + q = NULL; err = nlmsg_parse_deprecated(n, sizeof(*t), tca, TCA_MAX, rtm_tca_policy, extack); if (err < 0) diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index 2cb496c84878..179825a3b2fd 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -1204,7 +1204,7 @@ static struct Qdisc *qdisc_create(struct net_device *dev, err = -ENOENT; if (!ops) { - NL_SET_ERR_MSG(extack, "Specified qdisc not found"); + NL_SET_ERR_MSG(extack, "Specified qdisc kind is unknown"); goto err_out; } diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c index 9267922ea9c3..23a9d6242429 100644 --- a/net/sched/sch_htb.c +++ b/net/sched/sch_htb.c @@ -1810,6 +1810,26 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, if (!hopt->rate.rate || !hopt->ceil.rate) goto failure; + if (q->offload) { + /* Options not supported by the offload. */ + if (hopt->rate.overhead || hopt->ceil.overhead) { + NL_SET_ERR_MSG(extack, "HTB offload doesn't support the overhead parameter"); + goto failure; + } + if (hopt->rate.mpu || hopt->ceil.mpu) { + NL_SET_ERR_MSG(extack, "HTB offload doesn't support the mpu parameter"); + goto failure; + } + if (hopt->quantum) { + NL_SET_ERR_MSG(extack, "HTB offload doesn't support the quantum parameter"); + goto failure; + } + if (hopt->prio) { + NL_SET_ERR_MSG(extack, "HTB offload doesn't support the prio parameter"); + goto failure; + } + } + /* Keeping backward compatible with rate_table based iproute2 tc */ if (hopt->rate.linklayer == TC_LINKLAYER_UNAWARE) qdisc_put_rtab(qdisc_get_rtab(&hopt->rate, tb[TCA_HTB_RTAB], diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index 961854e56736..8c89d0b0ca18 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -566,11 +566,114 @@ static void smc_stat_fallback(struct smc_sock *smc) mutex_unlock(&net->smc.mutex_fback_rsn); } -static void smc_switch_to_fallback(struct smc_sock *smc, int reason_code) +/* must be called under rcu read lock */ +static void smc_fback_wakeup_waitqueue(struct smc_sock *smc, void *key) { - wait_queue_head_t *smc_wait = sk_sleep(&smc->sk); - wait_queue_head_t *clc_wait = sk_sleep(smc->clcsock->sk); - unsigned long flags; + struct socket_wq *wq; + __poll_t flags; + + wq = rcu_dereference(smc->sk.sk_wq); + if (!skwq_has_sleeper(wq)) + return; + + /* wake up smc sk->sk_wq */ + if (!key) { + /* sk_state_change */ + wake_up_interruptible_all(&wq->wait); + } else { + flags = key_to_poll(key); + if (flags & (EPOLLIN | EPOLLOUT)) + /* sk_data_ready or sk_write_space */ + wake_up_interruptible_sync_poll(&wq->wait, flags); + else if (flags & EPOLLERR) + /* sk_error_report */ + wake_up_interruptible_poll(&wq->wait, flags); + } +} + +static int smc_fback_mark_woken(wait_queue_entry_t *wait, + unsigned int mode, int sync, void *key) +{ + struct smc_mark_woken *mark = + container_of(wait, struct smc_mark_woken, wait_entry); + + mark->woken = true; + mark->key = key; + return 0; +} + +static void smc_fback_forward_wakeup(struct smc_sock *smc, struct sock *clcsk, + void (*clcsock_callback)(struct sock *sk)) +{ + struct smc_mark_woken mark = { .woken = false }; + struct socket_wq *wq; + + init_waitqueue_func_entry(&mark.wait_entry, + smc_fback_mark_woken); + rcu_read_lock(); + wq = rcu_dereference(clcsk->sk_wq); + if (!wq) + goto out; + add_wait_queue(sk_sleep(clcsk), &mark.wait_entry); + clcsock_callback(clcsk); + remove_wait_queue(sk_sleep(clcsk), &mark.wait_entry); + + if (mark.woken) + smc_fback_wakeup_waitqueue(smc, mark.key); +out: + rcu_read_unlock(); +} + +static void smc_fback_state_change(struct sock *clcsk) +{ + struct smc_sock *smc = + smc_clcsock_user_data(clcsk); + + if (!smc) + return; + smc_fback_forward_wakeup(smc, clcsk, smc->clcsk_state_change); +} + +static void smc_fback_data_ready(struct sock *clcsk) +{ + struct smc_sock *smc = + smc_clcsock_user_data(clcsk); + + if (!smc) + return; + smc_fback_forward_wakeup(smc, clcsk, smc->clcsk_data_ready); +} + +static void smc_fback_write_space(struct sock *clcsk) +{ + struct smc_sock *smc = + smc_clcsock_user_data(clcsk); + + if (!smc) + return; + smc_fback_forward_wakeup(smc, clcsk, smc->clcsk_write_space); +} + +static void smc_fback_error_report(struct sock *clcsk) +{ + struct smc_sock *smc = + smc_clcsock_user_data(clcsk); + + if (!smc) + return; + smc_fback_forward_wakeup(smc, clcsk, smc->clcsk_error_report); +} + +static int smc_switch_to_fallback(struct smc_sock *smc, int reason_code) +{ + struct sock *clcsk; + + mutex_lock(&smc->clcsock_release_lock); + if (!smc->clcsock) { + mutex_unlock(&smc->clcsock_release_lock); + return -EBADF; + } + clcsk = smc->clcsock->sk; smc->use_fallback = true; smc->fallback_rsn = reason_code; @@ -582,22 +685,40 @@ static void smc_switch_to_fallback(struct smc_sock *smc, int reason_code) smc->clcsock->wq.fasync_list = smc->sk.sk_socket->wq.fasync_list; - /* There may be some entries remaining in - * smc socket->wq, which should be removed - * to clcsocket->wq during the fallback. + /* There might be some wait entries remaining + * in smc sk->sk_wq and they should be woken up + * as clcsock's wait queue is woken up. */ - spin_lock_irqsave(&smc_wait->lock, flags); - spin_lock_nested(&clc_wait->lock, SINGLE_DEPTH_NESTING); - list_splice_init(&smc_wait->head, &clc_wait->head); - spin_unlock(&clc_wait->lock); - spin_unlock_irqrestore(&smc_wait->lock, flags); + smc->clcsk_state_change = clcsk->sk_state_change; + smc->clcsk_data_ready = clcsk->sk_data_ready; + smc->clcsk_write_space = clcsk->sk_write_space; + smc->clcsk_error_report = clcsk->sk_error_report; + + clcsk->sk_state_change = smc_fback_state_change; + clcsk->sk_data_ready = smc_fback_data_ready; + clcsk->sk_write_space = smc_fback_write_space; + clcsk->sk_error_report = smc_fback_error_report; + + smc->clcsock->sk->sk_user_data = + (void *)((uintptr_t)smc | SK_USER_DATA_NOCOPY); } + mutex_unlock(&smc->clcsock_release_lock); + return 0; } /* fall back during connect */ static int smc_connect_fallback(struct smc_sock *smc, int reason_code) { - smc_switch_to_fallback(smc, reason_code); + struct net *net = sock_net(&smc->sk); + int rc = 0; + + rc = smc_switch_to_fallback(smc, reason_code); + if (rc) { /* fallback fails */ + this_cpu_inc(net->smc.smc_stats->clnt_hshake_err_cnt); + if (smc->sk.sk_state == SMC_INIT) + sock_put(&smc->sk); /* passive closing */ + return rc; + } smc_copy_sock_settings_to_clc(smc); smc->connect_nonblock = 0; if (smc->sk.sk_state == SMC_INIT) @@ -1518,11 +1639,12 @@ static void smc_listen_decline(struct smc_sock *new_smc, int reason_code, { /* RDMA setup failed, switch back to TCP */ smc_conn_abort(new_smc, local_first); - if (reason_code < 0) { /* error, no fallback possible */ + if (reason_code < 0 || + smc_switch_to_fallback(new_smc, reason_code)) { + /* error, no fallback possible */ smc_listen_out_err(new_smc); return; } - smc_switch_to_fallback(new_smc, reason_code); if (reason_code && reason_code != SMC_CLC_DECL_PEERDECL) { if (smc_clc_send_decline(new_smc, reason_code, version) < 0) { smc_listen_out_err(new_smc); @@ -1964,8 +2086,11 @@ static void smc_listen_work(struct work_struct *work) /* check if peer is smc capable */ if (!tcp_sk(newclcsock->sk)->syn_smc) { - smc_switch_to_fallback(new_smc, SMC_CLC_DECL_PEERNOSMC); - smc_listen_out_connected(new_smc); + rc = smc_switch_to_fallback(new_smc, SMC_CLC_DECL_PEERNOSMC); + if (rc) + smc_listen_out_err(new_smc); + else + smc_listen_out_connected(new_smc); return; } @@ -2094,10 +2219,9 @@ out: static void smc_clcsock_data_ready(struct sock *listen_clcsock) { - struct smc_sock *lsmc; + struct smc_sock *lsmc = + smc_clcsock_user_data(listen_clcsock); - lsmc = (struct smc_sock *) - ((uintptr_t)listen_clcsock->sk_user_data & ~SK_USER_DATA_NOCOPY); if (!lsmc) return; lsmc->clcsk_data_ready(listen_clcsock); @@ -2254,7 +2378,9 @@ static int smc_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) if (msg->msg_flags & MSG_FASTOPEN) { if (sk->sk_state == SMC_INIT && !smc->connect_nonblock) { - smc_switch_to_fallback(smc, SMC_CLC_DECL_OPTUNSUPP); + rc = smc_switch_to_fallback(smc, SMC_CLC_DECL_OPTUNSUPP); + if (rc) + goto out; } else { rc = -EINVAL; goto out; @@ -2447,6 +2573,11 @@ static int smc_setsockopt(struct socket *sock, int level, int optname, /* generic setsockopts reaching us here always apply to the * CLC socket */ + mutex_lock(&smc->clcsock_release_lock); + if (!smc->clcsock) { + mutex_unlock(&smc->clcsock_release_lock); + return -EBADF; + } if (unlikely(!smc->clcsock->ops->setsockopt)) rc = -EOPNOTSUPP; else @@ -2456,6 +2587,7 @@ static int smc_setsockopt(struct socket *sock, int level, int optname, sk->sk_err = smc->clcsock->sk->sk_err; sk_error_report(sk); } + mutex_unlock(&smc->clcsock_release_lock); if (optlen < sizeof(int)) return -EINVAL; @@ -2472,7 +2604,7 @@ static int smc_setsockopt(struct socket *sock, int level, int optname, case TCP_FASTOPEN_NO_COOKIE: /* option not supported by SMC */ if (sk->sk_state == SMC_INIT && !smc->connect_nonblock) { - smc_switch_to_fallback(smc, SMC_CLC_DECL_OPTUNSUPP); + rc = smc_switch_to_fallback(smc, SMC_CLC_DECL_OPTUNSUPP); } else { rc = -EINVAL; } @@ -2515,13 +2647,23 @@ static int smc_getsockopt(struct socket *sock, int level, int optname, char __user *optval, int __user *optlen) { struct smc_sock *smc; + int rc; smc = smc_sk(sock->sk); + mutex_lock(&smc->clcsock_release_lock); + if (!smc->clcsock) { + mutex_unlock(&smc->clcsock_release_lock); + return -EBADF; + } /* socket options apply to the CLC socket */ - if (unlikely(!smc->clcsock->ops->getsockopt)) + if (unlikely(!smc->clcsock->ops->getsockopt)) { + mutex_unlock(&smc->clcsock_release_lock); return -EOPNOTSUPP; - return smc->clcsock->ops->getsockopt(smc->clcsock, level, optname, - optval, optlen); + } + rc = smc->clcsock->ops->getsockopt(smc->clcsock, level, optname, + optval, optlen); + mutex_unlock(&smc->clcsock_release_lock); + return rc; } static int smc_ioctl(struct socket *sock, unsigned int cmd, diff --git a/net/smc/smc.h b/net/smc/smc.h index 3d0b8e300deb..37b2001a0255 100644 --- a/net/smc/smc.h +++ b/net/smc/smc.h @@ -139,6 +139,12 @@ enum smc_urg_state { SMC_URG_READ = 3, /* data was already read */ }; +struct smc_mark_woken { + bool woken; + void *key; + wait_queue_entry_t wait_entry; +}; + struct smc_connection { struct rb_node alert_node; struct smc_link_group *lgr; /* link group of connection */ @@ -228,8 +234,14 @@ struct smc_connection { struct smc_sock { /* smc sock container */ struct sock sk; struct socket *clcsock; /* internal tcp socket */ + void (*clcsk_state_change)(struct sock *sk); + /* original stat_change fct. */ void (*clcsk_data_ready)(struct sock *sk); - /* original data_ready fct. **/ + /* original data_ready fct. */ + void (*clcsk_write_space)(struct sock *sk); + /* original write_space fct. */ + void (*clcsk_error_report)(struct sock *sk); + /* original error_report fct. */ struct smc_connection conn; /* smc connection */ struct smc_sock *listen_smc; /* listen parent */ struct work_struct connect_work; /* handle non-blocking connect*/ @@ -264,6 +276,12 @@ static inline struct smc_sock *smc_sk(const struct sock *sk) return (struct smc_sock *)sk; } +static inline struct smc_sock *smc_clcsock_user_data(struct sock *clcsk) +{ + return (struct smc_sock *) + ((uintptr_t)clcsk->sk_user_data & ~SK_USER_DATA_NOCOPY); +} + extern struct workqueue_struct *smc_hs_wq; /* wq for handshake work */ extern struct workqueue_struct *smc_close_wq; /* wq for close work */ diff --git a/net/smc/smc_diag.c b/net/smc/smc_diag.c index b8898c787d23..1fca2f90a9c7 100644 --- a/net/smc/smc_diag.c +++ b/net/smc/smc_diag.c @@ -146,13 +146,11 @@ static int __smc_diag_dump(struct sock *sk, struct sk_buff *skb, (req->diag_ext & (1 << (SMC_DIAG_LGRINFO - 1))) && !list_empty(&smc->conn.lgr->list)) { struct smc_link *link = smc->conn.lnk; - struct net *net = read_pnet(&link->smcibdev->ibdev->coredev.rdma_net); struct smc_diag_lgrinfo linfo = { .role = smc->conn.lgr->role, .lnk[0].ibport = link->ibport, .lnk[0].link_id = link->link_id, - .lnk[0].net_cookie = net->net_cookie, }; memcpy(linfo.lnk[0].ibname, diff --git a/net/sunrpc/auth_gss/gss_generic_token.c b/net/sunrpc/auth_gss/gss_generic_token.c index fe97f3106536..4a4082bb22ad 100644 --- a/net/sunrpc/auth_gss/gss_generic_token.c +++ b/net/sunrpc/auth_gss/gss_generic_token.c @@ -222,10 +222,8 @@ g_verify_token_header(struct xdr_netobj *mech, int *body_size, if (ret) return ret; - if (!ret) { - *buf_in = buf; - *body_size = toksize; - } + *buf_in = buf; + *body_size = toksize; return ret; } diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index a312ea2bc440..c83fe618767c 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -2900,7 +2900,7 @@ int rpc_clnt_add_xprt(struct rpc_clnt *clnt, unsigned long connect_timeout; unsigned long reconnect_timeout; unsigned char resvport, reuseport; - int ret = 0; + int ret = 0, ident; rcu_read_lock(); xps = xprt_switch_get(rcu_dereference(clnt->cl_xpi.xpi_xpswitch)); @@ -2914,8 +2914,11 @@ int rpc_clnt_add_xprt(struct rpc_clnt *clnt, reuseport = xprt->reuseport; connect_timeout = xprt->connect_timeout; reconnect_timeout = xprt->max_reconnect_timeout; + ident = xprt->xprt_class->ident; rcu_read_unlock(); + if (!xprtargs->ident) + xprtargs->ident = ident; xprt = xprt_create_transport(xprtargs); if (IS_ERR(xprt)) { ret = PTR_ERR(xprt); diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c index ee5336d73fdd..35588f0afa86 100644 --- a/net/sunrpc/rpc_pipe.c +++ b/net/sunrpc/rpc_pipe.c @@ -600,9 +600,9 @@ static int __rpc_rmdir(struct inode *dir, struct dentry *dentry) dget(dentry); ret = simple_rmdir(dir, dentry); + d_drop(dentry); if (!ret) fsnotify_rmdir(dir, dentry); - d_delete(dentry); dput(dentry); return ret; } @@ -613,9 +613,9 @@ static int __rpc_unlink(struct inode *dir, struct dentry *dentry) dget(dentry); ret = simple_unlink(dir, dentry); + d_drop(dentry); if (!ret) fsnotify_unlink(dir, dentry); - d_delete(dentry); dput(dentry); return ret; } diff --git a/net/sunrpc/sysfs.c b/net/sunrpc/sysfs.c index 2766dd21935b..05c758da6a92 100644 --- a/net/sunrpc/sysfs.c +++ b/net/sunrpc/sysfs.c @@ -115,11 +115,14 @@ static ssize_t rpc_sysfs_xprt_srcaddr_show(struct kobject *kobj, } sock = container_of(xprt, struct sock_xprt, xprt); - if (kernel_getsockname(sock->sock, (struct sockaddr *)&saddr) < 0) + mutex_lock(&sock->recv_mutex); + if (sock->sock == NULL || + kernel_getsockname(sock->sock, (struct sockaddr *)&saddr) < 0) goto out; ret = sprintf(buf, "%pISc\n", &saddr); out: + mutex_unlock(&sock->recv_mutex); xprt_put(xprt); return ret + 1; } @@ -295,8 +298,10 @@ static ssize_t rpc_sysfs_xprt_state_change(struct kobject *kobj, online = 1; else if (!strncmp(buf, "remove", 6)) remove = 1; - else - return -EINVAL; + else { + count = -EINVAL; + goto out_put; + } if (wait_on_bit_lock(&xprt->state, XPRT_LOCKED, TASK_KILLABLE)) { count = -EINTR; @@ -307,25 +312,28 @@ static ssize_t rpc_sysfs_xprt_state_change(struct kobject *kobj, goto release_tasks; } if (offline) { - set_bit(XPRT_OFFLINE, &xprt->state); - spin_lock(&xps->xps_lock); - xps->xps_nactive--; - spin_unlock(&xps->xps_lock); + if (!test_and_set_bit(XPRT_OFFLINE, &xprt->state)) { + spin_lock(&xps->xps_lock); + xps->xps_nactive--; + spin_unlock(&xps->xps_lock); + } } else if (online) { - clear_bit(XPRT_OFFLINE, &xprt->state); - spin_lock(&xps->xps_lock); - xps->xps_nactive++; - spin_unlock(&xps->xps_lock); + if (test_and_clear_bit(XPRT_OFFLINE, &xprt->state)) { + spin_lock(&xps->xps_lock); + xps->xps_nactive++; + spin_unlock(&xps->xps_lock); + } } else if (remove) { if (test_bit(XPRT_OFFLINE, &xprt->state)) { - set_bit(XPRT_REMOVE, &xprt->state); - xprt_force_disconnect(xprt); - if (test_bit(XPRT_CONNECTED, &xprt->state)) { - if (!xprt->sending.qlen && - !xprt->pending.qlen && - !xprt->backlog.qlen && - !atomic_long_read(&xprt->queuelen)) - rpc_xprt_switch_remove_xprt(xps, xprt); + if (!test_and_set_bit(XPRT_REMOVE, &xprt->state)) { + xprt_force_disconnect(xprt); + if (test_bit(XPRT_CONNECTED, &xprt->state)) { + if (!xprt->sending.qlen && + !xprt->pending.qlen && + !xprt->backlog.qlen && + !atomic_long_read(&xprt->queuelen)) + rpc_xprt_switch_remove_xprt(xps, xprt); + } } } else { count = -EINVAL; @@ -422,6 +430,7 @@ static struct attribute *rpc_sysfs_xprt_attrs[] = { &rpc_sysfs_xprt_change_state.attr, NULL, }; +ATTRIBUTE_GROUPS(rpc_sysfs_xprt); static struct kobj_attribute rpc_sysfs_xprt_switch_info = __ATTR(xprt_switch_info, 0444, rpc_sysfs_xprt_switch_info_show, NULL); @@ -430,6 +439,7 @@ static struct attribute *rpc_sysfs_xprt_switch_attrs[] = { &rpc_sysfs_xprt_switch_info.attr, NULL, }; +ATTRIBUTE_GROUPS(rpc_sysfs_xprt_switch); static struct kobj_type rpc_sysfs_client_type = { .release = rpc_sysfs_client_release, @@ -439,14 +449,14 @@ static struct kobj_type rpc_sysfs_client_type = { static struct kobj_type rpc_sysfs_xprt_switch_type = { .release = rpc_sysfs_xprt_switch_release, - .default_attrs = rpc_sysfs_xprt_switch_attrs, + .default_groups = rpc_sysfs_xprt_switch_groups, .sysfs_ops = &kobj_sysfs_ops, .namespace = rpc_sysfs_xprt_switch_namespace, }; static struct kobj_type rpc_sysfs_xprt_type = { .release = rpc_sysfs_xprt_release, - .default_attrs = rpc_sysfs_xprt_attrs, + .default_groups = rpc_sysfs_xprt_groups, .sysfs_ops = &kobj_sysfs_ops, .namespace = rpc_sysfs_xprt_namespace, }; diff --git a/net/sunrpc/xprtrdma/backchannel.c b/net/sunrpc/xprtrdma/backchannel.c index 17f174d6ea3b..faba7136dd9a 100644 --- a/net/sunrpc/xprtrdma/backchannel.c +++ b/net/sunrpc/xprtrdma/backchannel.c @@ -13,10 +13,6 @@ #include "xprt_rdma.h" #include <trace/events/rpcrdma.h> -#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) -# define RPCDBG_FACILITY RPCDBG_TRANS -#endif - #undef RPCRDMA_BACKCHANNEL_DEBUG /** diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c index ff699307e820..515dd7a66a04 100644 --- a/net/sunrpc/xprtrdma/frwr_ops.c +++ b/net/sunrpc/xprtrdma/frwr_ops.c @@ -45,10 +45,6 @@ #include "xprt_rdma.h" #include <trace/events/rpcrdma.h> -#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) -# define RPCDBG_FACILITY RPCDBG_TRANS -#endif - static void frwr_cid_init(struct rpcrdma_ep *ep, struct rpcrdma_mr *mr) { diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c index 8035a983c8ce..281ddb87ac8d 100644 --- a/net/sunrpc/xprtrdma/rpc_rdma.c +++ b/net/sunrpc/xprtrdma/rpc_rdma.c @@ -54,10 +54,6 @@ #include "xprt_rdma.h" #include <trace/events/rpcrdma.h> -#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) -# define RPCDBG_FACILITY RPCDBG_TRANS -#endif - /* Returns size of largest RPC-over-RDMA header in a Call message * * The largest Call header contains a full-size Read list and a diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c index 16e5696314a4..42e375dbdadb 100644 --- a/net/sunrpc/xprtrdma/transport.c +++ b/net/sunrpc/xprtrdma/transport.c @@ -60,10 +60,6 @@ #include "xprt_rdma.h" #include <trace/events/rpcrdma.h> -#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) -# define RPCDBG_FACILITY RPCDBG_TRANS -#endif - /* * tunables */ diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index 3d3673ba9e1e..7b5fce2faa10 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -63,17 +63,6 @@ #include "xprt_rdma.h" #include <trace/events/rpcrdma.h> -/* - * Globals/Macros - */ - -#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) -# define RPCDBG_FACILITY RPCDBG_TRANS -#endif - -/* - * internal functions - */ static int rpcrdma_sendctxs_create(struct rpcrdma_xprt *r_xprt); static void rpcrdma_sendctxs_destroy(struct rpcrdma_xprt *r_xprt); static void rpcrdma_sendctx_put_locked(struct rpcrdma_xprt *r_xprt, @@ -274,8 +263,6 @@ rpcrdma_cm_event_handler(struct rdma_cm_id *id, struct rdma_cm_event *event) ep->re_connect_status = -ENETUNREACH; goto wake_connect_worker; case RDMA_CM_EVENT_REJECTED: - dprintk("rpcrdma: connection to %pISpc rejected: %s\n", - sap, rdma_reject_msg(id, event->status)); ep->re_connect_status = -ECONNREFUSED; if (event->status == IB_CM_REJ_STALE_CONN) ep->re_connect_status = -ENOTCONN; @@ -291,8 +278,6 @@ disconnected: break; } - dprintk("RPC: %s: %pISpc on %s/frwr: %s\n", __func__, sap, - ep->re_id->device->name, rdma_event_msg(event->event)); return 0; } @@ -419,14 +404,6 @@ static int rpcrdma_ep_create(struct rpcrdma_xprt *r_xprt) ep->re_attr.qp_type = IB_QPT_RC; ep->re_attr.port_num = ~0; - dprintk("RPC: %s: requested max: dtos: send %d recv %d; " - "iovs: send %d recv %d\n", - __func__, - ep->re_attr.cap.max_send_wr, - ep->re_attr.cap.max_recv_wr, - ep->re_attr.cap.max_send_sge, - ep->re_attr.cap.max_recv_sge); - ep->re_send_batch = ep->re_max_requests >> 3; ep->re_send_count = ep->re_send_batch; init_waitqueue_head(&ep->re_connect_wait); @@ -436,6 +413,7 @@ static int rpcrdma_ep_create(struct rpcrdma_xprt *r_xprt) IB_POLL_WORKQUEUE); if (IS_ERR(ep->re_attr.send_cq)) { rc = PTR_ERR(ep->re_attr.send_cq); + ep->re_attr.send_cq = NULL; goto out_destroy; } @@ -444,6 +422,7 @@ static int rpcrdma_ep_create(struct rpcrdma_xprt *r_xprt) IB_POLL_WORKQUEUE); if (IS_ERR(ep->re_attr.recv_cq)) { rc = PTR_ERR(ep->re_attr.recv_cq); + ep->re_attr.recv_cq = NULL; goto out_destroy; } ep->re_receive_count = 0; @@ -482,6 +461,7 @@ static int rpcrdma_ep_create(struct rpcrdma_xprt *r_xprt) ep->re_pd = ib_alloc_pd(device, 0); if (IS_ERR(ep->re_pd)) { rc = PTR_ERR(ep->re_pd); + ep->re_pd = NULL; goto out_destroy; } diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index d8ee06a9650a..0f39e08ee580 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -1641,7 +1641,12 @@ static int xs_get_srcport(struct sock_xprt *transport) unsigned short get_srcport(struct rpc_xprt *xprt) { struct sock_xprt *sock = container_of(xprt, struct sock_xprt, xprt); - return xs_sock_getport(sock->sock); + unsigned short ret = 0; + mutex_lock(&sock->recv_mutex); + if (sock->sock) + ret = xs_sock_getport(sock->sock); + mutex_unlock(&sock->recv_mutex); + return ret; } EXPORT_SYMBOL(get_srcport); @@ -1910,7 +1915,7 @@ static void xs_local_connect(struct rpc_xprt *xprt, struct rpc_task *task) struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); int ret; - if (RPC_IS_ASYNC(task)) { + if (RPC_IS_ASYNC(task)) { /* * We want the AF_LOCAL connect to be resolved in the * filesystem namespace of the process making the rpc diff --git a/net/tipc/link.c b/net/tipc/link.c index 8d9e09f48f4c..1e14d7f8f28f 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -2200,7 +2200,7 @@ static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb, struct tipc_msg *hdr = buf_msg(skb); struct tipc_gap_ack_blks *ga = NULL; bool reply = msg_probe(hdr), retransmitted = false; - u16 dlen = msg_data_sz(hdr), glen = 0; + u32 dlen = msg_data_sz(hdr), glen = 0; u16 peers_snd_nxt = msg_next_sent(hdr); u16 peers_tol = msg_link_tolerance(hdr); u16 peers_prio = msg_linkprio(hdr); @@ -2214,6 +2214,10 @@ static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb, void *data; trace_tipc_proto_rcv(skb, false, l->name); + + if (dlen > U16_MAX) + goto exit; + if (tipc_link_is_blocked(l) || !xmitq) goto exit; @@ -2309,7 +2313,8 @@ static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb, /* Receive Gap ACK blocks from peer if any */ glen = tipc_get_gap_ack_blks(&ga, l, hdr, true); - + if(glen > dlen) + break; tipc_mon_rcv(l->net, data + glen, dlen - glen, l->addr, &l->mon_state, l->bearer_id); diff --git a/net/tipc/monitor.c b/net/tipc/monitor.c index 407619697292..2f4d23238a7e 100644 --- a/net/tipc/monitor.c +++ b/net/tipc/monitor.c @@ -496,6 +496,8 @@ void tipc_mon_rcv(struct net *net, void *data, u16 dlen, u32 addr, state->probing = false; /* Sanity check received domain record */ + if (new_member_cnt > MAX_MON_DOMAIN) + return; if (dlen < dom_rec_len(arrv_dom, 0)) return; if (dlen != dom_rec_len(arrv_dom, new_member_cnt)) |