From b6b50a21625bbf59a89b807dd0fc1eb5412aeff3 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Fri, 9 Jan 2009 15:25:09 -0800 Subject: mac80211: more kernel-doc fixes Fix (delete) more mac80211 kernel-doc: Warning(linux-2.6.28-git13//include/net/mac80211.h:375): Excess struct/union/enum/typedef member 'retry_count' description in 'ieee80211_tx_info' Warning(linux-2.6.28-git13//net/mac80211/sta_info.h:308): Excess struct/union/enum/typedef member 'last_txrate' description in 'sta_info' Signed-off-by: Randy Dunlap Signed-off-by: John W. Linville --- net/mac80211/sta_info.h | 1 - 1 file changed, 1 deletion(-) (limited to 'net') diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h index dc2606d0ae77..e49a5b99cf10 100644 --- a/net/mac80211/sta_info.h +++ b/net/mac80211/sta_info.h @@ -195,7 +195,6 @@ struct sta_ampdu_mlme { * @tx_packets: number of RX/TX MSDUs * @tx_bytes: number of bytes transmitted to this STA * @tx_fragments: number of transmitted MPDUs - * @last_txrate: description of the last used transmit rate * @tid_seq: per-TID sequence numbers for sending to this STA * @ampdu_mlme: A-MPDU state machine state * @timer_to_tid: identity mapping to ID timers -- cgit From a92a3ce72483d7f0902dff8a3be8cdcee215a37c Mon Sep 17 00:00:00 2001 From: "Luis R. Rodriguez" Date: Wed, 7 Jan 2009 17:43:33 -0800 Subject: cfg80211: make handle_band() and handle_channel() wiphy specific This allows us to make more wiphy specific judgements when handling the channels later on. Signed-off-by: Luis R. Rodriguez Acked-by: Johannes Berg Signed-off-by: John W. Linville --- net/wireless/reg.c | 25 +++++++++++++++++++------ 1 file changed, 19 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/wireless/reg.c b/net/wireless/reg.c index 4f877535e666..af805b0e1579 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -778,13 +778,22 @@ static int freq_reg_info(u32 center_freq, u32 *bandwidth, return !max_bandwidth; } -static void handle_channel(struct ieee80211_channel *chan) +static void handle_channel(struct wiphy *wiphy, enum ieee80211_band band, + unsigned int chan_idx) { int r; - u32 flags = chan->orig_flags; + u32 flags; u32 max_bandwidth = 0; const struct ieee80211_reg_rule *reg_rule = NULL; const struct ieee80211_power_rule *power_rule = NULL; + struct ieee80211_supported_band *sband; + struct ieee80211_channel *chan; + + sband = wiphy->bands[band]; + BUG_ON(chan_idx >= sband->n_channels); + chan = &sband->channels[chan_idx]; + + flags = chan->orig_flags; r = freq_reg_info(MHZ_TO_KHZ(chan->center_freq), &max_bandwidth, ®_rule); @@ -808,12 +817,16 @@ static void handle_channel(struct ieee80211_channel *chan) chan->max_power = (int) MBM_TO_DBM(power_rule->max_eirp); } -static void handle_band(struct ieee80211_supported_band *sband) +static void handle_band(struct wiphy *wiphy, enum ieee80211_band band) { - int i; + unsigned int i; + struct ieee80211_supported_band *sband; + + BUG_ON(!wiphy->bands[band]); + sband = wiphy->bands[band]; for (i = 0; i < sband->n_channels; i++) - handle_channel(&sband->channels[i]); + handle_channel(wiphy, band, i); } static bool ignore_reg_update(struct wiphy *wiphy, enum reg_set_by setby) @@ -840,7 +853,7 @@ void wiphy_update_regulatory(struct wiphy *wiphy, enum reg_set_by setby) enum ieee80211_band band; for (band = 0; band < IEEE80211_NUM_BANDS; band++) { if (wiphy->bands[band]) - handle_band(wiphy->bands[band]); + handle_band(wiphy, band); if (wiphy->reg_notifier) wiphy->reg_notifier(wiphy, setby); } -- cgit From 0c7dc45d21de6ae212b5ccb7cdff5beff795ccf0 Mon Sep 17 00:00:00 2001 From: "Luis R. Rodriguez" Date: Wed, 7 Jan 2009 17:43:36 -0800 Subject: cfg80211: Fix regression with 11d on bands This fixes a regression on disallowing bands introduced with the new 802.11d support. The issue is that IEEE-802.11 allows APs to send a subset of what a country regulatory domain defines. This was clarified in this document: http://tinyurl.com/11d-clarification As such it is possible, and this is what is done in practice, that a single band 2.4 GHz AP will only send 2.4 GHz band regulatory information through the 802.11 country information element and then the current intersection with what CRDA provided yields a regulatory domain with no 5 GHz information -- even though that country may actually allow 5 GHz operation. We correct this by only applying the intersection rules on a channel if the the intersection yields a regulatory rule on the same band the channel is on. Signed-off-by: Luis R. Rodriguez Acked-by: Johannes Berg Signed-off-by: John W. Linville --- net/wireless/reg.c | 82 ++++++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 79 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/wireless/reg.c b/net/wireless/reg.c index af805b0e1579..5f6d20d98eeb 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -421,6 +421,31 @@ static u32 freq_max_bandwidth(const struct ieee80211_freq_range *freq_range, return 0; } +/** + * freq_in_rule_band - tells us if a frequency is in a frequency band + * @freq_range: frequency rule we want to query + * @freq_khz: frequency we are inquiring about + * + * This lets us know if a specific frequency rule is or is not relevant to + * a specific frequency's band. Bands are device specific and artificial + * definitions (the "2.4 GHz band" and the "5 GHz band"), however it is + * safe for now to assume that a frequency rule should not be part of a + * frequency's band if the start freq or end freq are off by more than 2 GHz. + * This resolution can be lowered and should be considered as we add + * regulatory rule support for other "bands". + **/ +static bool freq_in_rule_band(const struct ieee80211_freq_range *freq_range, + u32 freq_khz) +{ +#define ONE_GHZ_IN_KHZ 1000000 + if (abs(freq_khz - freq_range->start_freq_khz) <= (2 * ONE_GHZ_IN_KHZ)) + return true; + if (abs(freq_khz - freq_range->end_freq_khz) <= (2 * ONE_GHZ_IN_KHZ)) + return true; + return false; +#undef ONE_GHZ_IN_KHZ +} + /* Converts a country IE to a regulatory domain. A regulatory domain * structure has a lot of information which the IE doesn't yet have, * so for the other values we use upper max values as we will intersect @@ -748,12 +773,23 @@ static u32 map_regdom_flags(u32 rd_flags) * this value to the maximum allowed bandwidth. * @reg_rule: the regulatory rule which we have for this frequency * - * Use this function to get the regulatory rule for a specific frequency. + * Use this function to get the regulatory rule for a specific frequency on + * a given wireless device. If the device has a specific regulatory domain + * it wants to follow we respect that unless a country IE has been received + * and processed already. + * + * Returns 0 if it was able to find a valid regulatory rule which does + * apply to the given center_freq otherwise it returns non-zero. It will + * also return -ERANGE if we determine the given center_freq does not even have + * a regulatory rule for a frequency range in the center_freq's band. See + * freq_in_rule_band() for our current definition of a band -- this is purely + * subjective and right now its 802.11 specific. */ static int freq_reg_info(u32 center_freq, u32 *bandwidth, const struct ieee80211_reg_rule **reg_rule) { int i; + bool band_rule_found = false; u32 max_bandwidth = 0; if (!cfg80211_regdomain) @@ -767,7 +803,15 @@ static int freq_reg_info(u32 center_freq, u32 *bandwidth, rr = &cfg80211_regdomain->reg_rules[i]; fr = &rr->freq_range; pr = &rr->power_rule; + + /* We only need to know if one frequency rule was + * was in center_freq's band, that's enough, so lets + * not overwrite it once found */ + if (!band_rule_found) + band_rule_found = freq_in_rule_band(fr, center_freq); + max_bandwidth = freq_max_bandwidth(fr, center_freq); + if (max_bandwidth && *bandwidth <= max_bandwidth) { *reg_rule = rr; *bandwidth = max_bandwidth; @@ -775,6 +819,9 @@ static int freq_reg_info(u32 center_freq, u32 *bandwidth, } } + if (!band_rule_found) + return -ERANGE; + return !max_bandwidth; } @@ -799,8 +846,37 @@ static void handle_channel(struct wiphy *wiphy, enum ieee80211_band band, &max_bandwidth, ®_rule); if (r) { - flags |= IEEE80211_CHAN_DISABLED; - chan->flags = flags; + /* This means no regulatory rule was found in the country IE + * with a frequency range on the center_freq's band, since + * IEEE-802.11 allows for a country IE to have a subset of the + * regulatory information provided in a country we ignore + * disabling the channel unless at least one reg rule was + * found on the center_freq's band. For details see this + * clarification: + * + * http://tinyurl.com/11d-clarification + */ + if (r == -ERANGE && + last_request->initiator == REGDOM_SET_BY_COUNTRY_IE) { +#ifdef CONFIG_CFG80211_REG_DEBUG + printk(KERN_DEBUG "cfg80211: Leaving channel %d MHz " + "intact on %s - no rule found in band on " + "Country IE\n", + chan->center_freq, wiphy_name(wiphy)); +#endif + } else { + /* In this case we know the country IE has at least one reg rule + * for the band so we respect its band definitions */ +#ifdef CONFIG_CFG80211_REG_DEBUG + if (last_request->initiator == REGDOM_SET_BY_COUNTRY_IE) + printk(KERN_DEBUG "cfg80211: Disabling " + "channel %d MHz on %s due to " + "Country IE\n", + chan->center_freq, wiphy_name(wiphy)); +#endif + flags |= IEEE80211_CHAN_DISABLED; + chan->flags = flags; + } return; } -- cgit From 02e68a3da0fbdb178cdec54b7db48edeefd1691d Mon Sep 17 00:00:00 2001 From: "Luis R. Rodriguez" Date: Wed, 7 Jan 2009 17:43:37 -0800 Subject: cfg80211: Fix parsed country IE info for 5 GHz The country IE number of channels on 5 GHz specifies the number of 5 GHz channels, not the number of sequential channel numbers. For example, if in a country IEs if the first channel given is 36 and the number of channels passed is 4 then the individual channel numbers defined for the 5 GHz PHY by these parameters are: 36, 40, 44, 48 not: 36, 37, 38, 39 See: http://tinyurl.com/11d-clarification Signed-off-by: Luis R. Rodriguez Acked-by: Johannes Berg Signed-off-by: John W. Linville --- net/wireless/reg.c | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/wireless/reg.c b/net/wireless/reg.c index 5f6d20d98eeb..bc494cef2102 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -563,6 +563,7 @@ static struct ieee80211_regdomain *country_ie_2_rd( /* This time around we fill in the rd */ while (country_ie_len >= 3) { + int end_channel = 0; struct ieee80211_country_ie_triplet *triplet = (struct ieee80211_country_ie_triplet *) country_ie; struct ieee80211_reg_rule *reg_rule = NULL; @@ -584,6 +585,23 @@ static struct ieee80211_regdomain *country_ie_2_rd( reg_rule->flags = flags; + /* 2 GHz */ + if (triplet->chans.first_channel <= 14) + end_channel = triplet->chans.first_channel + + triplet->chans.num_channels; + else + /* + * 5 GHz -- For example in country IEs if the first + * channel given is 36 and the number of channels is 4 + * then the individual channel numbers defined for the + * 5 GHz PHY by these parameters are: 36, 40, 44, and 48 + * and not 36, 37, 38, 39. + * + * See: http://tinyurl.com/11d-clarification + */ + end_channel = triplet->chans.first_channel + + (4 * (triplet->chans.num_channels - 1)); + /* The +10 is since the regulatory domain expects * the actual band edge, not the center of freq for * its start and end freqs, assuming 20 MHz bandwidth on @@ -593,8 +611,7 @@ static struct ieee80211_regdomain *country_ie_2_rd( triplet->chans.first_channel) - 10); freq_range->end_freq_khz = MHZ_TO_KHZ(ieee80211_channel_to_frequency( - triplet->chans.first_channel + - triplet->chans.num_channels) + 10); + end_channel) + 10); /* Large arbitrary values, we intersect later */ /* Increment this if we ever support >= 40 MHz channels -- cgit From 24e94de41e76134fad05552588fe01af2cab1494 Mon Sep 17 00:00:00 2001 From: Roel Kluin Date: Sun, 18 Jan 2009 21:32:11 -0800 Subject: net/9p: fid->fid is used uninitialized Signed-off-by: Roel Kluin Signed-off-by: David S. Miller --- net/9p/client.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/9p/client.c b/net/9p/client.c index 821f1ec0b2c3..1eb580c38fbb 100644 --- a/net/9p/client.c +++ b/net/9p/client.c @@ -618,7 +618,7 @@ static struct p9_fid *p9_fid_create(struct p9_client *clnt) return ERR_PTR(-ENOMEM); ret = p9_idpool_get(clnt->fidpool); - if (fid->fid < 0) { + if (ret < 0) { ret = -ENOSPC; goto error; } -- cgit From 67fd1a731ff1a990d4da7689909317756e50cb4d Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Mon, 19 Jan 2009 16:26:44 -0800 Subject: net: Add debug info to track down GSO checksum bug I'm trying to track down why people're hitting the checksum warning in skb_gso_segment. As the problem seems to be hitting lots of people and I can't reproduce it or locate the bug, here is a patch to print out more details which hopefully should help us to track this down. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- net/core/dev.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index 8d675975d85b..6e44c3277101 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1534,7 +1534,19 @@ struct sk_buff *skb_gso_segment(struct sk_buff *skb, int features) skb->mac_len = skb->network_header - skb->mac_header; __skb_pull(skb, skb->mac_len); - if (WARN_ON(skb->ip_summed != CHECKSUM_PARTIAL)) { + if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) { + struct net_device *dev = skb->dev; + struct ethtool_drvinfo info = {}; + + if (dev && dev->ethtool_ops && dev->ethtool_ops->get_drvinfo) + dev->ethtool_ops->get_drvinfo(dev, &info); + + WARN(1, "%s: caps=(0x%lx, 0x%lx) len=%d data_len=%d " + "ip_summed=%d", + info.driver, dev ? dev->features : 0L, + skb->sk ? skb->sk->sk_route_caps : 0L, + skb->len, skb->data_len, skb->ip_summed); + if (skb_header_cloned(skb) && (err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC))) return ERR_PTR(err); -- cgit From 8b9d3728977760f6bd1317c4420890f73695354e Mon Sep 17 00:00:00 2001 From: Jarek Poplawski Date: Mon, 19 Jan 2009 17:03:56 -0800 Subject: net: Fix data corruption when splicing from sockets. The trick in socket splicing where we try to convert the skb->data into a page based reference using virt_to_page() does not work so well. The idea is to pass the virt_to_page() reference via the pipe buffer, and refcount the buffer using a SKB reference. But if we are splicing from a socket to a socket (via sendpage) this doesn't work. The from side processing will grab the page (and SKB) references. The sendpage() calls will grab page references only, return, and then the from side processing completes and drops the SKB ref. The page based reference to skb->data is not enough to keep the kmalloc() buffer backing it from being reused. Yet, that is all that the socket send side has at this point. This leads to data corruption if the skb->data buffer is reused by SLAB before the send side socket actually gets the TX packet out to the device. The fix employed here is to simply allocate a page and copy the skb->data bytes into that page. This will hurt performance, but there is no clear way to fix this properly without a copy at the present time, and it is important to get rid of the data corruption. With fixes from Herbert Xu. Tested-by: Willy Tarreau Foreseen-by: Changli Gao Diagnosed-by: Willy Tarreau Reported-by: Willy Tarreau Fixed-by: Jens Axboe Signed-off-by: Jarek Poplawski Signed-off-by: David S. Miller --- net/core/skbuff.c | 61 ++++++++++++++++++++++++++----------------------------- 1 file changed, 29 insertions(+), 32 deletions(-) (limited to 'net') diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 65eac7739033..56272ac6dfd8 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -73,17 +73,13 @@ static struct kmem_cache *skbuff_fclone_cache __read_mostly; static void sock_pipe_buf_release(struct pipe_inode_info *pipe, struct pipe_buffer *buf) { - struct sk_buff *skb = (struct sk_buff *) buf->private; - - kfree_skb(skb); + put_page(buf->page); } static void sock_pipe_buf_get(struct pipe_inode_info *pipe, struct pipe_buffer *buf) { - struct sk_buff *skb = (struct sk_buff *) buf->private; - - skb_get(skb); + get_page(buf->page); } static int sock_pipe_buf_steal(struct pipe_inode_info *pipe, @@ -1334,9 +1330,19 @@ fault: */ static void sock_spd_release(struct splice_pipe_desc *spd, unsigned int i) { - struct sk_buff *skb = (struct sk_buff *) spd->partial[i].private; + put_page(spd->pages[i]); +} - kfree_skb(skb); +static inline struct page *linear_to_page(struct page *page, unsigned int len, + unsigned int offset) +{ + struct page *p = alloc_pages(GFP_KERNEL, 0); + + if (!p) + return NULL; + memcpy(page_address(p) + offset, page_address(page) + offset, len); + + return p; } /* @@ -1344,16 +1350,23 @@ static void sock_spd_release(struct splice_pipe_desc *spd, unsigned int i) */ static inline int spd_fill_page(struct splice_pipe_desc *spd, struct page *page, unsigned int len, unsigned int offset, - struct sk_buff *skb) + struct sk_buff *skb, int linear) { if (unlikely(spd->nr_pages == PIPE_BUFFERS)) return 1; + if (linear) { + page = linear_to_page(page, len, offset); + if (!page) + return 1; + } else + get_page(page); + spd->pages[spd->nr_pages] = page; spd->partial[spd->nr_pages].len = len; spd->partial[spd->nr_pages].offset = offset; - spd->partial[spd->nr_pages].private = (unsigned long) skb_get(skb); spd->nr_pages++; + return 0; } @@ -1369,7 +1382,7 @@ static inline void __segment_seek(struct page **page, unsigned int *poff, static inline int __splice_segment(struct page *page, unsigned int poff, unsigned int plen, unsigned int *off, unsigned int *len, struct sk_buff *skb, - struct splice_pipe_desc *spd) + struct splice_pipe_desc *spd, int linear) { if (!*len) return 1; @@ -1392,7 +1405,7 @@ static inline int __splice_segment(struct page *page, unsigned int poff, /* the linear region may spread across several pages */ flen = min_t(unsigned int, flen, PAGE_SIZE - poff); - if (spd_fill_page(spd, page, flen, poff, skb)) + if (spd_fill_page(spd, page, flen, poff, skb, linear)) return 1; __segment_seek(&page, &poff, &plen, flen); @@ -1419,7 +1432,7 @@ static int __skb_splice_bits(struct sk_buff *skb, unsigned int *offset, if (__splice_segment(virt_to_page(skb->data), (unsigned long) skb->data & (PAGE_SIZE - 1), skb_headlen(skb), - offset, len, skb, spd)) + offset, len, skb, spd, 1)) return 1; /* @@ -1429,7 +1442,7 @@ static int __skb_splice_bits(struct sk_buff *skb, unsigned int *offset, const skb_frag_t *f = &skb_shinfo(skb)->frags[seg]; if (__splice_segment(f->page, f->page_offset, f->size, - offset, len, skb, spd)) + offset, len, skb, spd, 0)) return 1; } @@ -1442,7 +1455,7 @@ static int __skb_splice_bits(struct sk_buff *skb, unsigned int *offset, * the frag list, if such a thing exists. We'd probably need to recurse to * handle that cleanly. */ -int skb_splice_bits(struct sk_buff *__skb, unsigned int offset, +int skb_splice_bits(struct sk_buff *skb, unsigned int offset, struct pipe_inode_info *pipe, unsigned int tlen, unsigned int flags) { @@ -1455,16 +1468,6 @@ int skb_splice_bits(struct sk_buff *__skb, unsigned int offset, .ops = &sock_pipe_buf_ops, .spd_release = sock_spd_release, }; - struct sk_buff *skb; - - /* - * I'd love to avoid the clone here, but tcp_read_sock() - * ignores reference counts and unconditonally kills the sk_buff - * on return from the actor. - */ - skb = skb_clone(__skb, GFP_KERNEL); - if (unlikely(!skb)) - return -ENOMEM; /* * __skb_splice_bits() only fails if the output has no room left, @@ -1488,15 +1491,9 @@ int skb_splice_bits(struct sk_buff *__skb, unsigned int offset, } done: - /* - * drop our reference to the clone, the pipe consumption will - * drop the rest. - */ - kfree_skb(skb); - if (spd.nr_pages) { + struct sock *sk = skb->sk; int ret; - struct sock *sk = __skb->sk; /* * Drop the socket lock, otherwise we have reverse -- cgit From 66f9a2590aa87dc77cddaeaf46177de76edd2339 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 20 Jan 2009 09:49:51 -0800 Subject: Revert "xfrm: For 32/64 compatability wrt. xfrm_usersa_info" This reverts commit fc8c7dc1b29560c016a67a34ccff32a712b5aa86. As indicated by Jiri Klimes, this won't work. These numbers are not only used the size validation, they are also used to locate attributes sitting after the message. Signed-off-by: David S. Miller --- net/xfrm/xfrm_user.c | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) (limited to 'net') diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 7877e7975dae..b95a2d64eb59 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -1914,17 +1914,10 @@ static int xfrm_send_migrate(struct xfrm_selector *sel, u8 dir, u8 type, } #endif -/* For the xfrm_usersa_info cases we have to work around some 32-bit vs. - * 64-bit compatability issues. On 32-bit the structure is 220 bytes, but - * for 64-bit it gets padded out to 224 bytes. Those bytes are just - * padding and don't have any content we care about. Therefore as long - * as we have enough bytes for the content we can make both cases work. - */ - #define XMSGSIZE(type) sizeof(struct type) static const int xfrm_msg_min[XFRM_NR_MSGTYPES] = { - [XFRM_MSG_NEWSA - XFRM_MSG_BASE] = 220, /* see above */ + [XFRM_MSG_NEWSA - XFRM_MSG_BASE] = XMSGSIZE(xfrm_usersa_info), [XFRM_MSG_DELSA - XFRM_MSG_BASE] = XMSGSIZE(xfrm_usersa_id), [XFRM_MSG_GETSA - XFRM_MSG_BASE] = XMSGSIZE(xfrm_usersa_id), [XFRM_MSG_NEWPOLICY - XFRM_MSG_BASE] = XMSGSIZE(xfrm_userpolicy_info), @@ -1934,7 +1927,7 @@ static const int xfrm_msg_min[XFRM_NR_MSGTYPES] = { [XFRM_MSG_ACQUIRE - XFRM_MSG_BASE] = XMSGSIZE(xfrm_user_acquire), [XFRM_MSG_EXPIRE - XFRM_MSG_BASE] = XMSGSIZE(xfrm_user_expire), [XFRM_MSG_UPDPOLICY - XFRM_MSG_BASE] = XMSGSIZE(xfrm_userpolicy_info), - [XFRM_MSG_UPDSA - XFRM_MSG_BASE] = 220, /* see above */ + [XFRM_MSG_UPDSA - XFRM_MSG_BASE] = XMSGSIZE(xfrm_usersa_info), [XFRM_MSG_POLEXPIRE - XFRM_MSG_BASE] = XMSGSIZE(xfrm_user_polexpire), [XFRM_MSG_FLUSHSA - XFRM_MSG_BASE] = XMSGSIZE(xfrm_usersa_flush), [XFRM_MSG_FLUSHPOLICY - XFRM_MSG_BASE] = 0, -- cgit From 357f5b0b91054ae23385ea4b0634bb8b43736e83 Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Sat, 17 Jan 2009 06:47:12 +0000 Subject: NET: net_namespace, fix lock imbalance register_pernet_gen_subsys omits mutex_unlock in one fail path. Fix it. Signed-off-by: Jiri Slaby Signed-off-by: David S. Miller --- net/core/net_namespace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index 55cffad2f328..55151faaf90c 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -341,8 +341,8 @@ again: rv = register_pernet_operations(first_device, ops); if (rv < 0) ida_remove(&net_generic_ids, *id); - mutex_unlock(&net_mutex); out: + mutex_unlock(&net_mutex); return rv; } EXPORT_SYMBOL_GPL(register_pernet_gen_subsys); -- cgit From ebad18e93fbc6bc63ee734edbc0eb38ac6b919c0 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sat, 17 Jan 2009 19:46:16 +0000 Subject: gro: Fix handling of complete checksums in IPv6 We need to perform skb_postpull_rcsum after pulling the IPv6 header in order to maintain the correctness of the complete checksum. This patch also adds a missing iph reload after pulling. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- net/ipv6/af_inet6.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'net') diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 94f74f5b0cbf..c802bc1658a8 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -797,6 +797,7 @@ static struct sk_buff **ipv6_gro_receive(struct sk_buff **head, unsigned int nlen; int flush = 1; int proto; + __wsum csum; if (unlikely(!pskb_may_pull(skb, sizeof(*iph)))) goto out; @@ -808,6 +809,7 @@ static struct sk_buff **ipv6_gro_receive(struct sk_buff **head, rcu_read_lock(); proto = ipv6_gso_pull_exthdrs(skb, iph->nexthdr); + iph = ipv6_hdr(skb); IPV6_GRO_CB(skb)->proto = proto; ops = rcu_dereference(inet6_protos[proto]); if (!ops || !ops->gro_receive) @@ -839,8 +841,13 @@ static struct sk_buff **ipv6_gro_receive(struct sk_buff **head, NAPI_GRO_CB(skb)->flush |= flush; + csum = skb->csum; + skb_postpull_rcsum(skb, iph, skb_network_header_len(skb)); + pp = ops->gro_receive(head, skb); + skb->csum = csum; + out_unlock: rcu_read_unlock(); -- cgit From 9a8e47ffd95608f0768e1a8a0225c822aa53aa9b Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sat, 17 Jan 2009 19:47:18 +0000 Subject: gro: Fix error handling on extremely short frags When a frag is shorter than an Ethernet header, we'd return a zeroed packet instead of aborting. This patch fixes that. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- net/core/dev.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index 6e44c3277101..5379b0c1190a 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2536,6 +2536,7 @@ struct sk_buff *napi_fraginfo_skb(struct napi_struct *napi, if (!pskb_may_pull(skb, ETH_HLEN)) { napi_reuse_skb(napi, skb); + skb = NULL; goto out; } -- cgit From 37fe4732b978eb02e5433387a40f2b61706cebe3 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sat, 17 Jan 2009 19:48:13 +0000 Subject: gro: Fix merging of paged packets The previous fix to paged packets broke the merging because it reset the skb->len before we added it to the merged packet. This wasn't detected because it simply resulted in the truncation of the packet while the missing bit is subsequently retransmitted. The fix is to store skb->len before we clobber it. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- net/core/skbuff.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 56272ac6dfd8..2e5f2ca3bdcd 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -2585,8 +2585,9 @@ int skb_gro_receive(struct sk_buff **head, struct sk_buff *skb) struct sk_buff *nskb; unsigned int headroom; unsigned int hlen = p->data - skb_mac_header(p); + unsigned int len = skb->len; - if (hlen + p->len + skb->len >= 65536) + if (hlen + p->len + len >= 65536) return -E2BIG; if (skb_shinfo(p)->frag_list) @@ -2648,9 +2649,9 @@ merge: done: NAPI_GRO_CB(p)->count++; - p->data_len += skb->len; - p->truesize += skb->len; - p->len += skb->len; + p->data_len += len; + p->truesize += len; + p->len += len; NAPI_GRO_CB(skb)->same_flow = 1; return 0; -- cgit From 748085fcbedbf7b0f38d95e178265d7b13360b44 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Wed, 21 Jan 2009 12:19:49 -0800 Subject: netfilter: ctnetlink: fix scheduling while atomic MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Caused by call to request_module() while holding nf_conntrack_lock. Reported-and-tested-by: Kövesdi György Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/netfilter/nf_conntrack_netlink.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'net') diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 3dddec6d2f7e..c32a7e8e3a1b 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -831,13 +831,16 @@ ctnetlink_parse_nat_setup(struct nf_conn *ct, if (!parse_nat_setup) { #ifdef CONFIG_MODULES rcu_read_unlock(); + spin_unlock_bh(&nf_conntrack_lock); nfnl_unlock(); if (request_module("nf-nat-ipv4") < 0) { nfnl_lock(); + spin_lock_bh(&nf_conntrack_lock); rcu_read_lock(); return -EOPNOTSUPP; } nfnl_lock(); + spin_lock_bh(&nf_conntrack_lock); rcu_read_lock(); if (nfnetlink_parse_nat_setup_hook) return -EAGAIN; -- cgit From 9098c24f35f7da6c89a83420acf21e3d7b35151d Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Thu, 22 Jan 2009 11:11:56 +0300 Subject: fs/Kconfig: move sunrpc out Signed-off-by: Alexey Dobriyan --- fs/Kconfig | 80 +----------------------------------------------------- net/sunrpc/Kconfig | 79 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 80 insertions(+), 79 deletions(-) create mode 100644 net/sunrpc/Kconfig (limited to 'net') diff --git a/fs/Kconfig b/fs/Kconfig index acceb6e62bff..1d7c0f6fade4 100644 --- a/fs/Kconfig +++ b/fs/Kconfig @@ -264,85 +264,7 @@ config NFS_COMMON depends on NFSD || NFS_FS default y -config SUNRPC - tristate - -config SUNRPC_GSS - tristate - -config SUNRPC_XPRT_RDMA - tristate - depends on SUNRPC && INFINIBAND && EXPERIMENTAL - default SUNRPC && INFINIBAND - help - This option enables an RPC client transport capability that - allows the NFS client to mount servers via an RDMA-enabled - transport. - - To compile RPC client RDMA transport support as a module, - choose M here: the module will be called xprtrdma. - - If unsure, say N. - -config SUNRPC_REGISTER_V4 - bool "Register local RPC services via rpcbind v4 (EXPERIMENTAL)" - depends on SUNRPC && EXPERIMENTAL - default n - help - Sun added support for registering RPC services at an IPv6 - address by creating two new versions of the rpcbind protocol - (RFC 1833). - - This option enables support in the kernel RPC server for - registering kernel RPC services via version 4 of the rpcbind - protocol. If you enable this option, you must run a portmapper - daemon that supports rpcbind protocol version 4. - - Serving NFS over IPv6 from knfsd (the kernel's NFS server) - requires that you enable this option and use a portmapper that - supports rpcbind version 4. - - If unsure, say N to get traditional behavior (register kernel - RPC services using only rpcbind version 2). Distributions - using the legacy Linux portmapper daemon must say N here. - -config RPCSEC_GSS_KRB5 - tristate "Secure RPC: Kerberos V mechanism (EXPERIMENTAL)" - depends on SUNRPC && EXPERIMENTAL - select SUNRPC_GSS - select CRYPTO - select CRYPTO_MD5 - select CRYPTO_DES - select CRYPTO_CBC - help - Choose Y here to enable Secure RPC using the Kerberos version 5 - GSS-API mechanism (RFC 1964). - - Secure RPC calls with Kerberos require an auxiliary user-space - daemon which may be found in the Linux nfs-utils package - available from http://linux-nfs.org/. In addition, user-space - Kerberos support should be installed. - - If unsure, say N. - -config RPCSEC_GSS_SPKM3 - tristate "Secure RPC: SPKM3 mechanism (EXPERIMENTAL)" - depends on SUNRPC && EXPERIMENTAL - select SUNRPC_GSS - select CRYPTO - select CRYPTO_MD5 - select CRYPTO_DES - select CRYPTO_CAST5 - select CRYPTO_CBC - help - Choose Y here to enable Secure RPC using the SPKM3 public key - GSS-API mechansim (RFC 2025). - - Secure RPC calls with SPKM3 require an auxiliary userspace - daemon which may be found in the Linux nfs-utils package - available from http://linux-nfs.org/. - - If unsure, say N. +source "net/sunrpc/Kconfig" config SMB_FS tristate "SMB file system support (OBSOLETE, please use CIFS)" diff --git a/net/sunrpc/Kconfig b/net/sunrpc/Kconfig new file mode 100644 index 000000000000..eda4a7aee596 --- /dev/null +++ b/net/sunrpc/Kconfig @@ -0,0 +1,79 @@ +config SUNRPC + tristate + +config SUNRPC_GSS + tristate + +config SUNRPC_XPRT_RDMA + tristate + depends on SUNRPC && INFINIBAND && EXPERIMENTAL + default SUNRPC && INFINIBAND + help + This option enables an RPC client transport capability that + allows the NFS client to mount servers via an RDMA-enabled + transport. + + To compile RPC client RDMA transport support as a module, + choose M here: the module will be called xprtrdma. + + If unsure, say N. + +config SUNRPC_REGISTER_V4 + bool "Register local RPC services via rpcbind v4 (EXPERIMENTAL)" + depends on SUNRPC && EXPERIMENTAL + default n + help + Sun added support for registering RPC services at an IPv6 + address by creating two new versions of the rpcbind protocol + (RFC 1833). + + This option enables support in the kernel RPC server for + registering kernel RPC services via version 4 of the rpcbind + protocol. If you enable this option, you must run a portmapper + daemon that supports rpcbind protocol version 4. + + Serving NFS over IPv6 from knfsd (the kernel's NFS server) + requires that you enable this option and use a portmapper that + supports rpcbind version 4. + + If unsure, say N to get traditional behavior (register kernel + RPC services using only rpcbind version 2). Distributions + using the legacy Linux portmapper daemon must say N here. + +config RPCSEC_GSS_KRB5 + tristate "Secure RPC: Kerberos V mechanism (EXPERIMENTAL)" + depends on SUNRPC && EXPERIMENTAL + select SUNRPC_GSS + select CRYPTO + select CRYPTO_MD5 + select CRYPTO_DES + select CRYPTO_CBC + help + Choose Y here to enable Secure RPC using the Kerberos version 5 + GSS-API mechanism (RFC 1964). + + Secure RPC calls with Kerberos require an auxiliary user-space + daemon which may be found in the Linux nfs-utils package + available from http://linux-nfs.org/. In addition, user-space + Kerberos support should be installed. + + If unsure, say N. + +config RPCSEC_GSS_SPKM3 + tristate "Secure RPC: SPKM3 mechanism (EXPERIMENTAL)" + depends on SUNRPC && EXPERIMENTAL + select SUNRPC_GSS + select CRYPTO + select CRYPTO_MD5 + select CRYPTO_DES + select CRYPTO_CAST5 + select CRYPTO_CBC + help + Choose Y here to enable Secure RPC using the SPKM3 public key + GSS-API mechansim (RFC 2025). + + Secure RPC calls with SPKM3 require an auxiliary userspace + daemon which may be found in the Linux nfs-utils package + available from http://linux-nfs.org/. + + If unsure, say N. -- cgit From 5dc306f3bd1d4cfdf79df39221b3036eab1ddcf3 Mon Sep 17 00:00:00 2001 From: Brian Cavagnolo Date: Fri, 16 Jan 2009 19:04:49 -0800 Subject: mac80211: decrement ref count to netdev after launching mesh discovery After launching mesh discovery in tx path, reference count was not being decremented. This was preventing module unload. Signed-off-by: Brian Cavagnolo Signed-off-by: Andrey Yurovsky Acked-by: Johannes Berg Signed-off-by: John W. Linville --- net/mac80211/tx.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index a4af3a124cce..4278e545638f 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -1307,8 +1307,10 @@ int ieee80211_master_start_xmit(struct sk_buff *skb, struct net_device *dev) if (is_multicast_ether_addr(hdr->addr3)) memcpy(hdr->addr1, hdr->addr3, ETH_ALEN); else - if (mesh_nexthop_lookup(skb, osdata)) - return 0; + if (mesh_nexthop_lookup(skb, osdata)) { + dev_put(odev); + return 0; + } if (memcmp(odev->dev_addr, hdr->addr4, ETH_ALEN) != 0) IEEE80211_IFSTA_MESH_CTR_INC(&osdata->u.mesh, fwded_frames); -- cgit From 391429c18f58ae37cc2e254e408bff847f4beb21 Mon Sep 17 00:00:00 2001 From: Christian Lamparter Date: Sun, 18 Jan 2009 02:24:15 +0100 Subject: mac80211: fix slot time debug message wlan0: switched to short barker preamble (BSSID=00:01:aa:bb:cc:dd) wlan0: switched to short slot (BSSID=) should be: wlan0: switched to short barker preamble (BSSID=00:01:aa:bb:cc:dd) wlan0: switched to short slot (BSSID=00:01:aa:bb:cc:dd) Signed-off-by: Christian Lamparter Signed-off-by: John W. Linville --- net/mac80211/mlme.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 5ba721b6a399..2b890af01ba4 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -620,8 +620,8 @@ static u32 ieee80211_handle_bss_capability(struct ieee80211_sub_if_data *sdata, if (use_short_slot != bss_conf->use_short_slot) { #ifdef CONFIG_MAC80211_VERBOSE_DEBUG if (net_ratelimit()) { - printk(KERN_DEBUG "%s: switched to %s slot" - " (BSSID=%s)\n", + printk(KERN_DEBUG "%s: switched to %s slot time" + " (BSSID=%pM)\n", sdata->dev->name, use_short_slot ? "short" : "long", ifsta->bssid); -- cgit From 6574df9a89f9f7da3a4e5cee7633d430319d3350 Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Thu, 22 Jan 2009 14:52:43 -0800 Subject: sctp: Correctly start rtx timer on new packet transmissions. Commit 62aeaff5ccd96462b7077046357a6d7886175a57 (sctp: Start T3-RTX timer when fast retransmitting lowest TSN) introduced a regression where it was possible to forcibly restart the sctp retransmit timer at the transmission of any new chunk. This resulted in much longer timeout times and sometimes hung sctp connections. Signed-off-by: Vlad Yasevich Signed-off-by: David S. Miller --- net/sctp/outqueue.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'net') diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c index 247ebc95c1e5..bc411c896216 100644 --- a/net/sctp/outqueue.c +++ b/net/sctp/outqueue.c @@ -929,7 +929,6 @@ static int sctp_outq_flush(struct sctp_outq *q, int rtx_timeout) } /* Finally, transmit new packets. */ - start_timer = 0; while ((chunk = sctp_outq_dequeue_data(q)) != NULL) { /* RFC 2960 6.5 Every DATA chunk MUST carry a valid * stream identifier. @@ -1028,7 +1027,7 @@ static int sctp_outq_flush(struct sctp_outq *q, int rtx_timeout) list_add_tail(&chunk->transmitted_list, &transport->transmitted); - sctp_transport_reset_timers(transport, start_timer-1); + sctp_transport_reset_timers(transport, 0); q->empty = 0; -- cgit From 759af00ebef858015eb68876ac1f383bcb6a1774 Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Thu, 22 Jan 2009 14:53:01 -0800 Subject: sctp: Properly timestamp outgoing data chunks for rtx purposes Recent changes to the retransmit code exposed a long standing bug where it was possible for a chunk to be time stamped after the retransmit timer was reset. This caused a rare situation where the retrnamist timer has expired, but nothing was marked for retrnasmission because all of timesamps on data were less then 1 rto ago. As result, the timer was never restarted since nothing was retransmitted, and this resulted in a hung association that did couldn't complete the data transfer. The solution is to timestamp the chunk when it's added to the packet for transmission purposes. After the packet is trsnmitted the rtx timer is restarted. This guarantees that when the timer expires, there will be data to retransmit. Signed-off-by: Vlad Yasevich Signed-off-by: David S. Miller --- net/sctp/output.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/sctp/output.c b/net/sctp/output.c index c3f417f7ec6e..73639355157e 100644 --- a/net/sctp/output.c +++ b/net/sctp/output.c @@ -324,14 +324,16 @@ append: switch (chunk->chunk_hdr->type) { case SCTP_CID_DATA: retval = sctp_packet_append_data(packet, chunk); + if (SCTP_XMIT_OK != retval) + goto finish; /* Disallow SACK bundling after DATA. */ packet->has_sack = 1; /* Disallow AUTH bundling after DATA */ packet->has_auth = 1; /* Let it be knows that packet has DATA in it */ packet->has_data = 1; - if (SCTP_XMIT_OK != retval) - goto finish; + /* timestamp the chunk for rtx purposes */ + chunk->sent_at = jiffies; break; case SCTP_CID_COOKIE_ECHO: packet->has_cookie_echo = 1; @@ -470,7 +472,6 @@ int sctp_packet_transmit(struct sctp_packet *packet) } else chunk->resent = 1; - chunk->sent_at = jiffies; has_data = 1; } -- cgit From ae53b5bd77719fed58086c5be60ce4f22bffe1c6 Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Thu, 22 Jan 2009 14:53:23 -0800 Subject: sctp: Fix another socket race during accept/peeloff There is a race between sctp_rcv() and sctp_accept() where we have moved the association from the listening socket to the accepted socket, but sctp_rcv() processing cached the old socket and continues to use it. The easy solution is to check for the socket mismatch once we've grabed the socket lock. If we hit a mis-match, that means that were are currently holding the lock on the listening socket, but the association is refrencing a newly accepted socket. We need to drop the lock on the old socket and grab the lock on the new one. A more proper solution might be to create accepted sockets when the new association is established, similar to TCP. That would eliminate the race for 1-to-1 style sockets, but it would still existing for 1-to-many sockets where a user wished to peeloff an association. For now, we'll live with this easy solution as it addresses the problem. Reported-by: Michal Hocko Reported-by: Karsten Keil Signed-off-by: Vlad Yasevich Signed-off-by: David S. Miller --- net/sctp/input.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'net') diff --git a/net/sctp/input.c b/net/sctp/input.c index bf612d954d41..2e4a8646dbc3 100644 --- a/net/sctp/input.c +++ b/net/sctp/input.c @@ -249,6 +249,19 @@ int sctp_rcv(struct sk_buff *skb) */ sctp_bh_lock_sock(sk); + if (sk != rcvr->sk) { + /* Our cached sk is different from the rcvr->sk. This is + * because migrate()/accept() may have moved the association + * to a new socket and released all the sockets. So now we + * are holding a lock on the old socket while the user may + * be doing something with the new socket. Switch our veiw + * of the current sk. + */ + sctp_bh_unlock_sock(sk); + sk = rcvr->sk; + sctp_bh_lock_sock(sk); + } + if (sock_owned_by_user(sk)) { SCTP_INC_STATS_BH(SCTP_MIB_IN_PKT_BACKLOG); sctp_add_backlog(sk, skb); -- cgit From a8d694c651356ec89452e15b0189c061fb7e1cf1 Mon Sep 17 00:00:00 2001 From: Timo Teras Date: Sun, 25 Jan 2009 20:49:14 -0800 Subject: af_key: initialize xfrm encap_oa Currently encap_oa is left uninitialized, so it contains garbage data which is visible to userland via Netlink. Initialize it by zeroing it out. Signed-off-by: Timo Teras Acked-by: Herbert Xu Signed-off-by: David S. Miller --- net/key/af_key.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/key/af_key.c b/net/key/af_key.c index f8bd8df5e257..7dcbde3ea7d9 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -1285,6 +1285,7 @@ static struct xfrm_state * pfkey_msg2xfrm_state(struct net *net, ext_hdrs[SADB_X_EXT_NAT_T_DPORT-1]; natt->encap_dport = n_port->sadb_x_nat_t_port_port; } + memset(&natt->encap_oa, 0, sizeof(natt->encap_oa)); } err = xfrm_init_state(x); -- cgit From d6eb633fe680c18119346a364acff7723245e278 Mon Sep 17 00:00:00 2001 From: Matt Helsley Date: Mon, 26 Jan 2009 12:25:55 -0800 Subject: net: Move config NET_NS to from net/Kconfig to init/Kconfig Make NET_NS available underneath the generic Namespaces config option since all of the other namespace options are there. Signed-off-by: Matt Helsley Acked-by: Serge Hallyn Signed-off-by: David S. Miller --- init/Kconfig | 8 ++++++++ net/Kconfig | 8 -------- 2 files changed, 8 insertions(+), 8 deletions(-) (limited to 'net') diff --git a/init/Kconfig b/init/Kconfig index 2af83825634e..1e948825974c 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -541,6 +541,14 @@ config PID_NS Unless you want to work with an experimental feature say N here. +config NET_NS + bool "Network namespace" + default n + depends on NAMESPACES && EXPERIMENTAL && NET + help + Allow user space to create what appear to be multiple instances + of the network stack. + config BLK_DEV_INITRD bool "Initial RAM filesystem and RAM disk (initramfs/initrd) support" depends on BROKEN || !FRV diff --git a/net/Kconfig b/net/Kconfig index bf2776018f71..cdb8fdef6c4a 100644 --- a/net/Kconfig +++ b/net/Kconfig @@ -24,14 +24,6 @@ if NET menu "Networking options" -config NET_NS - bool "Network namespace support" - default n - depends on EXPERIMENTAL && NAMESPACES - help - Allow user space to create what appear to be multiple instances - of the network stack. - config COMPAT_NET_DEV_OPS def_bool y -- cgit From 116cb42855fbd052fc8cd2ca3e06050bff762673 Mon Sep 17 00:00:00 2001 From: Ben Greear Date: Mon, 26 Jan 2009 12:37:53 -0800 Subject: vlan: Export symbols as non GPL symbols. In previous kernels, any kernel module could get access to the 'real-device' and the VLAN-ID for a particular VLAN. In more recent kernels, the code was restructured such that this is hard to do without accessing private .h files for any module that cannot use GPL-only symbols. Attached is a patch to once again allow non-GPL modules the ability to access the real-device and VLAN id for VLANs. Signed-off-by: Ben Greear Acked-by: Patrick McHardy Signed-off-by: David S. Miller --- net/8021q/vlan_core.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/8021q/vlan_core.c b/net/8021q/vlan_core.c index 6c1323940263..e9db889d6222 100644 --- a/net/8021q/vlan_core.c +++ b/net/8021q/vlan_core.c @@ -62,13 +62,13 @@ struct net_device *vlan_dev_real_dev(const struct net_device *dev) { return vlan_dev_info(dev)->real_dev; } -EXPORT_SYMBOL_GPL(vlan_dev_real_dev); +EXPORT_SYMBOL(vlan_dev_real_dev); u16 vlan_dev_vlan_id(const struct net_device *dev) { return vlan_dev_info(dev)->vlan_id; } -EXPORT_SYMBOL_GPL(vlan_dev_vlan_id); +EXPORT_SYMBOL(vlan_dev_vlan_id); static int vlan_gro_common(struct napi_struct *napi, struct vlan_group *grp, unsigned int vlan_tci, struct sk_buff *skb) -- cgit From 98322f22eca889478045cf896b572250d03dc45f Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 26 Jan 2009 21:35:35 -0800 Subject: udp: optimize bind(0) if many ports are in use commit 9088c5609584684149f3fb5b065aa7f18dcb03ff (udp: Improve port randomization) introduced a regression for UDP bind() syscall to null port (getting a random port) in case lot of ports are already in use. This is because we do about 28000 scans of very long chains (220 sockets per chain), with many spin_lock_bh()/spin_unlock_bh() calls. Fix this using a bitmap (64 bytes for current value of UDP_HTABLE_SIZE) so that we scan chains at most once. Instead of 250 ms per bind() call, we get after patch a time of 2.9 ms Based on a report from Vitaly Mayatskikh Reported-by: Vitaly Mayatskikh Signed-off-by: Eric Dumazet Tested-by: Vitaly Mayatskikh Signed-off-by: David S. Miller --- net/ipv4/udp.c | 55 +++++++++++++++++++++++++++++++++++++++---------------- 1 file changed, 39 insertions(+), 16 deletions(-) (limited to 'net') diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index cf5ab0581eba..b7faffe5c029 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -120,8 +120,11 @@ EXPORT_SYMBOL(sysctl_udp_wmem_min); atomic_t udp_memory_allocated; EXPORT_SYMBOL(udp_memory_allocated); +#define PORTS_PER_CHAIN (65536 / UDP_HTABLE_SIZE) + static int udp_lib_lport_inuse(struct net *net, __u16 num, const struct udp_hslot *hslot, + unsigned long *bitmap, struct sock *sk, int (*saddr_comp)(const struct sock *sk1, const struct sock *sk2)) @@ -132,12 +135,17 @@ static int udp_lib_lport_inuse(struct net *net, __u16 num, sk_nulls_for_each(sk2, node, &hslot->head) if (net_eq(sock_net(sk2), net) && sk2 != sk && - sk2->sk_hash == num && + (bitmap || sk2->sk_hash == num) && (!sk2->sk_reuse || !sk->sk_reuse) && (!sk2->sk_bound_dev_if || !sk->sk_bound_dev_if || sk2->sk_bound_dev_if == sk->sk_bound_dev_if) && - (*saddr_comp)(sk, sk2)) - return 1; + (*saddr_comp)(sk, sk2)) { + if (bitmap) + __set_bit(sk2->sk_hash / UDP_HTABLE_SIZE, + bitmap); + else + return 1; + } return 0; } @@ -160,32 +168,47 @@ int udp_lib_get_port(struct sock *sk, unsigned short snum, if (!snum) { int low, high, remaining; unsigned rand; - unsigned short first; + unsigned short first, last; + DECLARE_BITMAP(bitmap, PORTS_PER_CHAIN); inet_get_local_port_range(&low, &high); remaining = (high - low) + 1; rand = net_random(); - snum = first = rand % remaining + low; - rand |= 1; - for (;;) { - hslot = &udptable->hash[udp_hashfn(net, snum)]; + first = (((u64)rand * remaining) >> 32) + low; + /* + * force rand to be an odd multiple of UDP_HTABLE_SIZE + */ + rand = (rand | 1) * UDP_HTABLE_SIZE; + for (last = first + UDP_HTABLE_SIZE; first != last; first++) { + hslot = &udptable->hash[udp_hashfn(net, first)]; + bitmap_zero(bitmap, PORTS_PER_CHAIN); spin_lock_bh(&hslot->lock); - if (!udp_lib_lport_inuse(net, snum, hslot, sk, saddr_comp)) - break; - spin_unlock_bh(&hslot->lock); + udp_lib_lport_inuse(net, snum, hslot, bitmap, sk, + saddr_comp); + + snum = first; + /* + * Iterate on all possible values of snum for this hash. + * Using steps of an odd multiple of UDP_HTABLE_SIZE + * give us randomization and full range coverage. + */ do { - snum = snum + rand; - } while (snum < low || snum > high); - if (snum == first) - goto fail; + if (low <= snum && snum <= high && + !test_bit(snum / UDP_HTABLE_SIZE, bitmap)) + goto found; + snum += rand; + } while (snum != first); + spin_unlock_bh(&hslot->lock); } + goto fail; } else { hslot = &udptable->hash[udp_hashfn(net, snum)]; spin_lock_bh(&hslot->lock); - if (udp_lib_lport_inuse(net, snum, hslot, sk, saddr_comp)) + if (udp_lib_lport_inuse(net, snum, hslot, NULL, sk, saddr_comp)) goto fail_unlock; } +found: inet_sk(sk)->num = snum; sk->sk_hash = snum; if (sk_unhashed(sk)) { -- cgit From 9fa5fdf291c9b58b1cb8b4bb2a0ee57efa21d635 Mon Sep 17 00:00:00 2001 From: Dimitris Michailidis Date: Mon, 26 Jan 2009 22:15:31 -0800 Subject: tcp: Fix length tcp_splice_data_recv passes to skb_splice_bits. tcp_splice_data_recv has two lengths to consider: the len parameter it gets from tcp_read_sock, which specifies the amount of data in the skb, and rd_desc->count, which is the amount of data the splice caller still wants. Currently it passes just the latter to skb_splice_bits, which then splices min(rd_desc->count, skb->len - offset) bytes. Most of the time this is fine, except when the skb contains urgent data. In that case len goes only up to the urgent byte and is less than skb->len - offset. By ignoring len tcp_splice_data_recv may a) splice data tcp_read_sock told it not to, b) return to tcp_read_sock a value > len. Now, tcp_read_sock doesn't handle used > len and leaves the socket in a bad state (both sk_receive_queue and copied_seq are bad at that point) resulting in duplicated data and corruption. Fix by passing min(rd_desc->count, len) to skb_splice_bits. Signed-off-by: Dimitris Michailidis Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/tcp.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 0cd71b84e483..76b148bcb0dc 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -524,7 +524,8 @@ static int tcp_splice_data_recv(read_descriptor_t *rd_desc, struct sk_buff *skb, struct tcp_splice_state *tss = rd_desc->arg.data; int ret; - ret = skb_splice_bits(skb, offset, tss->pipe, rd_desc->count, tss->flags); + ret = skb_splice_bits(skb, offset, tss->pipe, min(rd_desc->count, len), + tss->flags); if (ret > 0) rd_desc->count -= ret; return ret; -- cgit From ce0cf6622c9a6f18c2723ea4bef7616799a1ca39 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Sun, 2 Nov 2008 16:18:08 -0500 Subject: nfs: note that CONFIG_SUNRPC_XPRT_RDMA turns on server side support too We forgot to update this when adding server-side support. Signed-off-by: J. Bruce Fields --- net/sunrpc/Kconfig | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/sunrpc/Kconfig b/net/sunrpc/Kconfig index eda4a7aee596..dcef600d0bf5 100644 --- a/net/sunrpc/Kconfig +++ b/net/sunrpc/Kconfig @@ -9,9 +9,8 @@ config SUNRPC_XPRT_RDMA depends on SUNRPC && INFINIBAND && EXPERIMENTAL default SUNRPC && INFINIBAND help - This option enables an RPC client transport capability that - allows the NFS client to mount servers via an RDMA-enabled - transport. + This option allows the NFS client and server to support + an RDMA-enabled transport. To compile RPC client RDMA transport support as a module, choose M here: the module will be called xprtrdma. -- cgit From 6c06a478c9e59d1584a5dc1b2b3519bae5d6546a Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Tue, 27 Jan 2009 22:30:19 -0800 Subject: net: fix xfrm reverse flow lookup for icmp6 This patch fixes the xfrm reverse flow lookup for icmp6 so that icmp6 packets don't get lost over ipsec tunnels. Similar patch is in RHEL5 kernel for a quite long time and I do not see why it isn't in mainline. Signed-off-by: Jiri Pirko Acked-by: Herbert Xu Signed-off-by: David S. Miller --- net/ipv6/icmp.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index 4f433847d95f..36dff8807183 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -443,10 +443,10 @@ void icmpv6_send(struct sk_buff *skb, int type, int code, __u32 info, if (xfrm_decode_session_reverse(skb, &fl2, AF_INET6)) goto relookup_failed; - if (ip6_dst_lookup(sk, &dst2, &fl)) + if (ip6_dst_lookup(sk, &dst2, &fl2)) goto relookup_failed; - err = xfrm_lookup(net, &dst2, &fl, sk, XFRM_LOOKUP_ICMP); + err = xfrm_lookup(net, &dst2, &fl2, sk, XFRM_LOOKUP_ICMP); switch (err) { case 0: dst_release(dst); -- cgit From 1d6e55f195128813f96458203a9fa14204f9251e Mon Sep 17 00:00:00 2001 From: Thomas Goff Date: Tue, 27 Jan 2009 22:39:59 -0800 Subject: IPv6: Fix multicast routing bugs. This patch addresses the IPv6 multicast routing issues described below. It was tested with XORP 1.4/1.5 as the IPv6 PIM-SM routing daemon against FreeBSD peers. net/ipv6/ip6_input.c: - Don't try to forward link-local multicast packets. - Don't reset skb2->dev before calling ip6_mr_input() so packets can be identified as coming from the PIM register vif properly. net/ipv6/ip6mr.c: - Fix incoming PIM register messages processing: * The IPv6 pseudo-header should be included when checksumming PIM messages (RFC 4601 section 4.9; RFC 3973 section 4.7.1). * Packets decapsulated from PIM register messages should have skb->protocol ETH_P_IPV6. - Enable/disable IPv6 multicast forwarding on the corresponding interface when a routing daemon adds/removes a multicast virtual interface. - Remove incorrect skb_pull() to fix userspace signaling. - Enable/disable global IPv6 multicast forwarding when an IPv6 multicast routing socket is opened/closed. net/ipv6/route.c: - Don't use strict routing logic for packets decapsulated from PIM register messages (similar to disabling rp_filter for the IPv4 case). Signed-off-by: Thomas Goff Reviewed-by: Fred Templin Signed-off-by: David S. Miller --- net/ipv6/ip6_input.c | 2 +- net/ipv6/ip6mr.c | 23 ++++++++++++++++++----- net/ipv6/route.c | 2 +- 3 files changed, 20 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c index 936f48946e20..f171e8dbac91 100644 --- a/net/ipv6/ip6_input.c +++ b/net/ipv6/ip6_input.c @@ -255,6 +255,7 @@ int ip6_mc_input(struct sk_buff *skb) * IPv6 multicast router mode is now supported ;) */ if (dev_net(skb->dev)->ipv6.devconf_all->mc_forwarding && + !(ipv6_addr_type(&hdr->daddr) & IPV6_ADDR_LINKLOCAL) && likely(!(IP6CB(skb)->flags & IP6SKB_FORWARDED))) { /* * Okay, we try to forward - split and duplicate @@ -316,7 +317,6 @@ int ip6_mc_input(struct sk_buff *skb) } if (skb2) { - skb2->dev = skb2->dst->dev; ip6_mr_input(skb2); } } diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index 3c51b2d827f4..d19a84b79503 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -365,7 +365,9 @@ static int pim6_rcv(struct sk_buff *skb) pim = (struct pimreghdr *)skb_transport_header(skb); if (pim->type != ((PIM_VERSION << 4) | PIM_REGISTER) || (pim->flags & PIM_NULL_REGISTER) || - (ip_compute_csum((void *)pim, sizeof(*pim)) != 0 && + (csum_ipv6_magic(&ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr, + sizeof(*pim), IPPROTO_PIM, + csum_partial((void *)pim, sizeof(*pim), 0)) && csum_fold(skb_checksum(skb, 0, skb->len, 0)))) goto drop; @@ -392,7 +394,7 @@ static int pim6_rcv(struct sk_buff *skb) skb_pull(skb, (u8 *)encap - skb->data); skb_reset_network_header(skb); skb->dev = reg_dev; - skb->protocol = htons(ETH_P_IP); + skb->protocol = htons(ETH_P_IPV6); skb->ip_summed = 0; skb->pkt_type = PACKET_HOST; dst_release(skb->dst); @@ -481,6 +483,7 @@ static int mif6_delete(struct net *net, int vifi) { struct mif_device *v; struct net_device *dev; + struct inet6_dev *in6_dev; if (vifi < 0 || vifi >= net->ipv6.maxvif) return -EADDRNOTAVAIL; @@ -513,6 +516,10 @@ static int mif6_delete(struct net *net, int vifi) dev_set_allmulti(dev, -1); + in6_dev = __in6_dev_get(dev); + if (in6_dev) + in6_dev->cnf.mc_forwarding--; + if (v->flags & MIFF_REGISTER) unregister_netdevice(dev); @@ -622,6 +629,7 @@ static int mif6_add(struct net *net, struct mif6ctl *vifc, int mrtsock) int vifi = vifc->mif6c_mifi; struct mif_device *v = &net->ipv6.vif6_table[vifi]; struct net_device *dev; + struct inet6_dev *in6_dev; int err; /* Is vif busy ? */ @@ -662,6 +670,10 @@ static int mif6_add(struct net *net, struct mif6ctl *vifc, int mrtsock) return -EINVAL; } + in6_dev = __in6_dev_get(dev); + if (in6_dev) + in6_dev->cnf.mc_forwarding++; + /* * Fill in the VIF structures */ @@ -838,8 +850,6 @@ static int ip6mr_cache_report(struct net *net, struct sk_buff *pkt, mifi_t mifi, skb->dst = dst_clone(pkt->dst); skb->ip_summed = CHECKSUM_UNNECESSARY; - - skb_pull(skb, sizeof(struct ipv6hdr)); } if (net->ipv6.mroute6_sk == NULL) { @@ -1222,8 +1232,10 @@ static int ip6mr_sk_init(struct sock *sk) rtnl_lock(); write_lock_bh(&mrt_lock); - if (likely(net->ipv6.mroute6_sk == NULL)) + if (likely(net->ipv6.mroute6_sk == NULL)) { net->ipv6.mroute6_sk = sk; + net->ipv6.devconf_all->mc_forwarding++; + } else err = -EADDRINUSE; write_unlock_bh(&mrt_lock); @@ -1242,6 +1254,7 @@ int ip6mr_sk_done(struct sock *sk) if (sk == net->ipv6.mroute6_sk) { write_lock_bh(&mrt_lock); net->ipv6.mroute6_sk = NULL; + net->ipv6.devconf_all->mc_forwarding--; write_unlock_bh(&mrt_lock); mroute_clean_tables(net); diff --git a/net/ipv6/route.c b/net/ipv6/route.c index c4a59824ac2c..9c574235c905 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -794,7 +794,7 @@ void ip6_route_input(struct sk_buff *skb) .proto = iph->nexthdr, }; - if (rt6_need_strict(&iph->daddr)) + if (rt6_need_strict(&iph->daddr) && skb->dev->type != ARPHRD_PIMREG) flags |= RT6_LOOKUP_F_IFACE; skb->dst = fib6_rule_lookup(net, &fl, flags, ip6_pol_route_input); -- cgit From a4e6db07984529847c6ad8bc616485e721dcb809 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 27 Jan 2009 22:41:03 -0800 Subject: ipv6: Make mc_forwarding sysctl read-only. The kernel manages this value internally, as necessary, as VIFs are added/removed and as multicast routers are registered and deregistered. Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index e92ad8455c63..f9afb452249c 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -4250,7 +4250,7 @@ static struct addrconf_sysctl_table .procname = "mc_forwarding", .data = &ipv6_devconf.mc_forwarding, .maxlen = sizeof(int), - .mode = 0644, + .mode = 0444, .proc_handler = proc_dointvec, }, #endif -- cgit From 615aab4b75dfa77b00c372330d6f70edd2458bf9 Mon Sep 17 00:00:00 2001 From: "Luis R. Rodriguez" Date: Thu, 22 Jan 2009 15:05:46 -0800 Subject: cfg80211: Fix sanity check on 5 GHz when processing country IE This fixes two issues with the sanity check loop when processing the country IE: 1. Do not use frequency for the current subband channel check, this was a big fat typo. 2. Apply the 5 GHz 4-channel steps when considering max channel on each subband as was done with a recent patch. Signed-off-by: Luis R. Rodriguez Signed-off-by: John W. Linville --- net/wireless/reg.c | 30 +++++++++++++++++++----------- 1 file changed, 19 insertions(+), 11 deletions(-) (limited to 'net') diff --git a/net/wireless/reg.c b/net/wireless/reg.c index bc494cef2102..61698095e1ad 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -498,6 +498,7 @@ static struct ieee80211_regdomain *country_ie_2_rd( * calculate the number of reg rules we will need. We will need one * for each channel subband */ while (country_ie_len >= 3) { + int end_channel = 0; struct ieee80211_country_ie_triplet *triplet = (struct ieee80211_country_ie_triplet *) country_ie; int cur_sub_max_channel = 0, cur_channel = 0; @@ -509,9 +510,25 @@ static struct ieee80211_regdomain *country_ie_2_rd( continue; } + /* 2 GHz */ + if (triplet->chans.first_channel <= 14) + end_channel = triplet->chans.first_channel + + triplet->chans.num_channels; + else + /* + * 5 GHz -- For example in country IEs if the first + * channel given is 36 and the number of channels is 4 + * then the individual channel numbers defined for the + * 5 GHz PHY by these parameters are: 36, 40, 44, and 48 + * and not 36, 37, 38, 39. + * + * See: http://tinyurl.com/11d-clarification + */ + end_channel = triplet->chans.first_channel + + (4 * (triplet->chans.num_channels - 1)); + cur_channel = triplet->chans.first_channel; - cur_sub_max_channel = ieee80211_channel_to_frequency( - cur_channel + triplet->chans.num_channels); + cur_sub_max_channel = end_channel; /* Basic sanity check */ if (cur_sub_max_channel < cur_channel) @@ -590,15 +607,6 @@ static struct ieee80211_regdomain *country_ie_2_rd( end_channel = triplet->chans.first_channel + triplet->chans.num_channels; else - /* - * 5 GHz -- For example in country IEs if the first - * channel given is 36 and the number of channels is 4 - * then the individual channel numbers defined for the - * 5 GHz PHY by these parameters are: 36, 40, 44, and 48 - * and not 36, 37, 38, 39. - * - * See: http://tinyurl.com/11d-clarification - */ end_channel = triplet->chans.first_channel + (4 * (triplet->chans.num_channels - 1)); -- cgit From 667ecd010d870f861a9e276aaaca8cb443ded8b3 Mon Sep 17 00:00:00 2001 From: "Luis R. Rodriguez" Date: Thu, 22 Jan 2009 15:05:43 -0800 Subject: cfg80211: print correct intersected regulatory domain When CONFIG_CFG80211_REG_DEBUG is enabled and an intersection occurs we are printing the regulatory domain passed by CRDA and indicating its the intersected regulatory domain. Lets fix this and print the intersection as originally intended. Signed-off-by: Luis R. Rodriguez Acked-by: Johannes Berg Signed-off-by: John W. Linville --- net/wireless/reg.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/wireless/reg.c b/net/wireless/reg.c index 61698095e1ad..85c9034c59b2 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -1284,7 +1284,7 @@ static void reg_country_ie_process_debug( if (intersected_rd) { printk(KERN_DEBUG "cfg80211: We intersect both of these " "and get:\n"); - print_regdomain_info(rd); + print_regdomain_info(intersected_rd); return; } printk(KERN_DEBUG "cfg80211: Intersection between both failed\n"); -- cgit From 95e3b24cfb4ec0479d2c42f7a1780d68063a542a Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Thu, 29 Jan 2009 16:07:52 -0800 Subject: net: Fix frag_list handling in skb_seq_read The frag_list handling was broken in skb_seq_read: 1) We didn't add the stepped offset when looking at the head are of fragments other than the first. 2) We didn't take the stepped offset away when setting the data pointer in the head area. 3) The frag index wasn't reset. This patch fixes both issues. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- net/core/skbuff.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 2e5f2ca3bdcd..f23fd43539ed 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -2212,10 +2212,10 @@ unsigned int skb_seq_read(unsigned int consumed, const u8 **data, return 0; next_skb: - block_limit = skb_headlen(st->cur_skb); + block_limit = skb_headlen(st->cur_skb) + st->stepped_offset; if (abs_offset < block_limit) { - *data = st->cur_skb->data + abs_offset; + *data = st->cur_skb->data + (abs_offset - st->stepped_offset); return block_limit - abs_offset; } @@ -2257,6 +2257,7 @@ next_skb: } else if (st->root_skb == st->cur_skb && skb_shinfo(st->root_skb)->frag_list) { st->cur_skb = skb_shinfo(st->root_skb)->frag_list; + st->frag_idx = 0; goto next_skb; } -- cgit From 71b3346d182355f19509fadb8fe45114a35cc499 Mon Sep 17 00:00:00 2001 From: Shyam Iyer Date: Thu, 29 Jan 2009 16:12:42 -0800 Subject: net: Fix OOPS in skb_seq_read(). It oopsd for me in skb_seq_read. addr2line said it was linux-2.6/net/core/skbuff.c:2228, which is this line: while (st->frag_idx < skb_shinfo(st->cur_skb)->nr_frags) { I added some printks in there and it looks like we hit this: } else if (st->root_skb == st->cur_skb && skb_shinfo(st->root_skb)->frag_list) { st->cur_skb = skb_shinfo(st->root_skb)->frag_list; st->frag_idx = 0; goto next_skb; } Actually I did some testing and added a few printks and found that the st->cur_skb->data was 0 and hence the ptr used by iscsi_tcp was null. This caused the kernel panic. if (abs_offset < block_limit) { - *data = st->cur_skb->data + abs_offset; + *data = st->cur_skb->data + (abs_offset - st->stepped_offset); I enabled the debug_tcp and with a few printks found that the code did not go to the next_skb label and could find that the sequence being followed was this - It hit this if condition - if (st->cur_skb->next) { st->cur_skb = st->cur_skb->next; st->frag_idx = 0; goto next_skb; And so, now the st pointer is shifted to the next skb whereas actually it should have hit the second else if first since the data is in the frag_list. else if (st->root_skb == st->cur_skb && skb_shinfo(st->root_skb)->frag_list) { st->cur_skb = skb_shinfo(st->root_skb)->frag_list; goto next_skb; } Reversing the two conditions the attached patch fixes the issue for me on top of Herbert's patches. Signed-off-by: Shyam Iyer Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- net/core/skbuff.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/core/skbuff.c b/net/core/skbuff.c index f23fd43539ed..da74b844f4ea 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -2250,13 +2250,13 @@ next_skb: st->frag_data = NULL; } - if (st->cur_skb->next) { - st->cur_skb = st->cur_skb->next; + if (st->root_skb == st->cur_skb && + skb_shinfo(st->root_skb)->frag_list) { + st->cur_skb = skb_shinfo(st->root_skb)->frag_list; st->frag_idx = 0; goto next_skb; - } else if (st->root_skb == st->cur_skb && - skb_shinfo(st->root_skb)->frag_list) { - st->cur_skb = skb_shinfo(st->root_skb)->frag_list; + } else if (st->cur_skb->next) { + st->cur_skb = st->cur_skb->next; st->frag_idx = 0; goto next_skb; } -- cgit From 9d8dba6c979fa99c96938c869611b9a23b73efa9 Mon Sep 17 00:00:00 2001 From: Benjamin Zores Date: Thu, 29 Jan 2009 16:19:13 -0800 Subject: ipv4: fix infinite retry loop in IP-Config Signed-off-by: Benjamin Zores Signed-off-by: David S. Miller --- net/ipv4/ipconfig.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c index 42a0f3dd3fd6..d722013c1cae 100644 --- a/net/ipv4/ipconfig.c +++ b/net/ipv4/ipconfig.c @@ -1268,6 +1268,9 @@ __be32 __init root_nfs_parse_addr(char *name) static int __init ip_auto_config(void) { __be32 addr; +#ifdef IPCONFIG_DYNAMIC + int retries = CONF_OPEN_RETRIES; +#endif #ifdef CONFIG_PROC_FS proc_net_fops_create(&init_net, "pnp", S_IRUGO, &pnp_seq_fops); @@ -1304,9 +1307,6 @@ static int __init ip_auto_config(void) #endif ic_first_dev->next) { #ifdef IPCONFIG_DYNAMIC - - int retries = CONF_OPEN_RETRIES; - if (ic_dynamic() < 0) { ic_close_devs(); -- cgit From 1af7ad51049d6a310a19d497960597198290ddfa Mon Sep 17 00:00:00 2001 From: Inaky Perez-Gonzalez Date: Thu, 29 Jan 2009 17:18:31 -0800 Subject: wimax: fix build issue when debugfs is disabled MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit As reported by Toralf Förster and Randy Dunlap. - http://linuxwimax.org/pipermail/wimax/2009-January/000460.html - http://lkml.org/lkml/2009/1/29/279 The definitions needed for the wimax stack and i2400m driver debug infrastructure was, by mistake, compiled depending on CONFIG_DEBUG_FS (by them being placed in the debugfs.c files); thus the build broke in 2.6.29-rc3 when debugging was enabled (CONFIG_WIMAX_DEBUG) and DEBUG_FS was disabled. These definitions are always needed if debug is enabled at compile time (independently of DEBUG_FS being or not enabled), so moving them to a file that is always compiled fixes the issue. Signed-off-by: Inaky Perez-Gonzalez Signed-off-by: David S. Miller --- drivers/net/wimax/i2400m/debugfs.c | 14 -------------- drivers/net/wimax/i2400m/driver.c | 16 ++++++++++++++++ net/wimax/debugfs.c | 11 ----------- net/wimax/stack.c | 13 +++++++++++++ 4 files changed, 29 insertions(+), 25 deletions(-) (limited to 'net') diff --git a/drivers/net/wimax/i2400m/debugfs.c b/drivers/net/wimax/i2400m/debugfs.c index 626632985977..9b81af3f80a9 100644 --- a/drivers/net/wimax/i2400m/debugfs.c +++ b/drivers/net/wimax/i2400m/debugfs.c @@ -234,20 +234,6 @@ struct dentry *debugfs_create_i2400m_reset( &fops_i2400m_reset); } -/* - * Debug levels control; see debug.h - */ -struct d_level D_LEVEL[] = { - D_SUBMODULE_DEFINE(control), - D_SUBMODULE_DEFINE(driver), - D_SUBMODULE_DEFINE(debugfs), - D_SUBMODULE_DEFINE(fw), - D_SUBMODULE_DEFINE(netdev), - D_SUBMODULE_DEFINE(rfkill), - D_SUBMODULE_DEFINE(rx), - D_SUBMODULE_DEFINE(tx), -}; -size_t D_LEVEL_SIZE = ARRAY_SIZE(D_LEVEL); #define __debugfs_register(prefix, name, parent) \ do { \ diff --git a/drivers/net/wimax/i2400m/driver.c b/drivers/net/wimax/i2400m/driver.c index 5f98047e18cf..e80a0b65a754 100644 --- a/drivers/net/wimax/i2400m/driver.c +++ b/drivers/net/wimax/i2400m/driver.c @@ -707,6 +707,22 @@ void i2400m_release(struct i2400m *i2400m) EXPORT_SYMBOL_GPL(i2400m_release); +/* + * Debug levels control; see debug.h + */ +struct d_level D_LEVEL[] = { + D_SUBMODULE_DEFINE(control), + D_SUBMODULE_DEFINE(driver), + D_SUBMODULE_DEFINE(debugfs), + D_SUBMODULE_DEFINE(fw), + D_SUBMODULE_DEFINE(netdev), + D_SUBMODULE_DEFINE(rfkill), + D_SUBMODULE_DEFINE(rx), + D_SUBMODULE_DEFINE(tx), +}; +size_t D_LEVEL_SIZE = ARRAY_SIZE(D_LEVEL); + + static int __init i2400m_driver_init(void) { diff --git a/net/wimax/debugfs.c b/net/wimax/debugfs.c index 87cf4430079c..94d216a46407 100644 --- a/net/wimax/debugfs.c +++ b/net/wimax/debugfs.c @@ -28,17 +28,6 @@ #include "debug-levels.h" -/* Debug framework control of debug levels */ -struct d_level D_LEVEL[] = { - D_SUBMODULE_DEFINE(debugfs), - D_SUBMODULE_DEFINE(id_table), - D_SUBMODULE_DEFINE(op_msg), - D_SUBMODULE_DEFINE(op_reset), - D_SUBMODULE_DEFINE(op_rfkill), - D_SUBMODULE_DEFINE(stack), -}; -size_t D_LEVEL_SIZE = ARRAY_SIZE(D_LEVEL); - #define __debugfs_register(prefix, name, parent) \ do { \ result = d_level_register_debugfs(prefix, name, parent); \ diff --git a/net/wimax/stack.c b/net/wimax/stack.c index d4da92f8981a..3869c0327882 100644 --- a/net/wimax/stack.c +++ b/net/wimax/stack.c @@ -516,6 +516,19 @@ void wimax_dev_rm(struct wimax_dev *wimax_dev) } EXPORT_SYMBOL_GPL(wimax_dev_rm); + +/* Debug framework control of debug levels */ +struct d_level D_LEVEL[] = { + D_SUBMODULE_DEFINE(debugfs), + D_SUBMODULE_DEFINE(id_table), + D_SUBMODULE_DEFINE(op_msg), + D_SUBMODULE_DEFINE(op_reset), + D_SUBMODULE_DEFINE(op_rfkill), + D_SUBMODULE_DEFINE(stack), +}; +size_t D_LEVEL_SIZE = ARRAY_SIZE(D_LEVEL); + + struct genl_family wimax_gnl_family = { .id = GENL_ID_GENERATE, .name = "WiMAX", -- cgit From 905db44087855e3c1709f538ecdc22fd149cadd8 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Fri, 30 Jan 2009 14:12:06 -0800 Subject: packet: Avoid lock_sock in mmap handler MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit As the mmap handler gets called under mmap_sem, and we may grab mmap_sem elsewhere under the socket lock to access user data, we should avoid grabbing the socket lock in the mmap handler. Since the only thing we care about in the mmap handler is for pg_vec* to be invariant, i.e., to exclude packet_set_ring, we can achieve this by simply using a new mutex. Signed-off-by: Herbert Xu Tested-by: Martin MOKREJŠ Signed-off-by: David S. Miller --- net/packet/af_packet.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 5f94db2f3e9e..9454d4ae46df 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -77,6 +77,7 @@ #include #include #include +#include #ifdef CONFIG_INET #include @@ -175,6 +176,7 @@ struct packet_sock { #endif struct packet_type prot_hook; spinlock_t bind_lock; + struct mutex pg_vec_lock; unsigned int running:1, /* prot_hook is attached*/ auxdata:1, origdev:1; @@ -1069,6 +1071,7 @@ static int packet_create(struct net *net, struct socket *sock, int protocol) */ spin_lock_init(&po->bind_lock); + mutex_init(&po->pg_vec_lock); po->prot_hook.func = packet_rcv; if (sock->type == SOCK_PACKET) @@ -1865,6 +1868,7 @@ static int packet_set_ring(struct sock *sk, struct tpacket_req *req, int closing synchronize_net(); err = -EBUSY; + mutex_lock(&po->pg_vec_lock); if (closing || atomic_read(&po->mapped) == 0) { err = 0; #define XC(a, b) ({ __typeof__ ((a)) __t; __t = (a); (a) = (b); __t; }) @@ -1886,6 +1890,7 @@ static int packet_set_ring(struct sock *sk, struct tpacket_req *req, int closing if (atomic_read(&po->mapped)) printk(KERN_DEBUG "packet_mmap: vma is busy: %d\n", atomic_read(&po->mapped)); } + mutex_unlock(&po->pg_vec_lock); spin_lock(&po->bind_lock); if (was_running && !po->running) { @@ -1918,7 +1923,7 @@ static int packet_mmap(struct file *file, struct socket *sock, struct vm_area_st size = vma->vm_end - vma->vm_start; - lock_sock(sk); + mutex_lock(&po->pg_vec_lock); if (po->pg_vec == NULL) goto out; if (size != po->pg_vec_len*po->pg_vec_pages*PAGE_SIZE) @@ -1941,7 +1946,7 @@ static int packet_mmap(struct file *file, struct socket *sock, struct vm_area_st err = 0; out: - release_sock(sk); + mutex_unlock(&po->pg_vec_lock); return err; } #endif -- cgit From 5d6e430d3bafe743b18dc443189093bf532e91ed Mon Sep 17 00:00:00 2001 From: Dave Jones Date: Sat, 31 Jan 2009 00:51:49 -0800 Subject: ipv6: compile fix for ip6mr.c net/ipv6/ip6mr.c: In function 'pim6_rcv': net/ipv6/ip6mr.c:368: error: implicit declaration of function 'csum_ipv6_magic' Signed-off-by: Dave Jones Signed-off-by: David S. Miller --- net/ipv6/ip6mr.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index d19a84b79503..228be551e9c1 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -48,6 +48,7 @@ #include #include #include +#include /* Big lock, protecting vif table, mrt cache and mroute socket state. Note that the changes are semaphored via rtnl_lock. -- cgit