aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/networking/ipvs-sysctl.txt68
-rw-r--r--drivers/net/ethernet/intel/igb/igb.h58
-rw-r--r--drivers/net/ethernet/intel/igb/igb_ethtool.c65
-rw-r--r--drivers/net/ethernet/intel/igb/igb_main.c514
-rw-r--r--drivers/net/ethernet/intel/igb/igb_ptp.c3
-rw-r--r--drivers/net/ethernet/intel/ixgbe/ixgbe_main.c2
-rw-r--r--include/net/ip_vs.h16
-rw-r--r--include/net/netfilter/nf_conntrack_expect.h4
-rw-r--r--include/net/netfilter/nf_conntrack_timeout.h3
-rw-r--r--include/net/netfilter/nf_tables.h12
-rw-r--r--include/net/netfilter/nft_fib.h2
-rw-r--r--include/uapi/linux/netfilter/nf_tables.h26
-rw-r--r--net/bridge/br_netfilter_hooks.c3
-rw-r--r--net/bridge/netfilter/ebt_log.c34
-rw-r--r--net/bridge/netfilter/nft_reject_bridge.c6
-rw-r--r--net/ipv4/netfilter/arp_tables.c2
-rw-r--r--net/ipv4/netfilter/ipt_CLUSTERIP.c19
-rw-r--r--net/ipv4/netfilter/nf_nat_snmp_basic.c15
-rw-r--r--net/ipv4/netfilter/nf_reject_ipv4.c3
-rw-r--r--net/ipv4/netfilter/nft_fib_ipv4.c4
-rw-r--r--net/ipv6/netfilter/nft_fib_ipv6.c2
-rw-r--r--net/netfilter/ipvs/ip_vs_conn.c24
-rw-r--r--net/netfilter/ipvs/ip_vs_core.c6
-rw-r--r--net/netfilter/ipvs/ip_vs_ctl.c12
-rw-r--r--net/netfilter/ipvs/ip_vs_lblc.c2
-rw-r--r--net/netfilter/ipvs/ip_vs_lblcr.c6
-rw-r--r--net/netfilter/ipvs/ip_vs_nq.c2
-rw-r--r--net/netfilter/ipvs/ip_vs_proto_sctp.c2
-rw-r--r--net/netfilter/ipvs/ip_vs_proto_tcp.c2
-rw-r--r--net/netfilter/ipvs/ip_vs_rr.c2
-rw-r--r--net/netfilter/ipvs/ip_vs_sed.c2
-rw-r--r--net/netfilter/ipvs/ip_vs_wlc.c2
-rw-r--r--net/netfilter/ipvs/ip_vs_wrr.c2
-rw-r--r--net/netfilter/nf_conntrack_core.c57
-rw-r--r--net/netfilter/nf_conntrack_expect.c10
-rw-r--r--net/netfilter/nf_conntrack_netlink.c4
-rw-r--r--net/netfilter/nf_tables_api.c49
-rw-r--r--net/netfilter/nfnetlink_acct.c15
-rw-r--r--net/netfilter/nfnetlink_cttimeout.c12
-rw-r--r--net/netfilter/nfnetlink_log.c14
-rw-r--r--net/netfilter/nft_compat.c8
-rw-r--r--net/netfilter/nft_counter.c3
-rw-r--r--net/netfilter/nft_ct.c171
-rw-r--r--net/netfilter/nft_dynset.c14
-rw-r--r--net/netfilter/nft_exthdr.c13
-rw-r--r--net/netfilter/nft_fib.c16
-rw-r--r--net/netfilter/nft_hash.c133
-rw-r--r--net/netfilter/nft_limit.c10
-rw-r--r--net/netfilter/nft_lookup.c14
-rw-r--r--net/netfilter/nft_masq.c4
-rw-r--r--net/netfilter/nft_meta.c4
-rw-r--r--net/netfilter/nft_nat.c4
-rw-r--r--net/netfilter/nft_objref.c14
-rw-r--r--net/netfilter/nft_quota.c3
-rw-r--r--net/netfilter/nft_redir.c4
-rw-r--r--net/netfilter/nft_reject.c5
-rw-r--r--net/netfilter/nft_reject_inet.c6
-rw-r--r--net/netfilter/nft_set_rbtree.c31
-rw-r--r--net/netfilter/xt_limit.c11
59 files changed, 1054 insertions, 500 deletions
diff --git a/Documentation/networking/ipvs-sysctl.txt b/Documentation/networking/ipvs-sysctl.txt
index e6b1c025fdd8..056898685d40 100644
--- a/Documentation/networking/ipvs-sysctl.txt
+++ b/Documentation/networking/ipvs-sysctl.txt
@@ -175,6 +175,14 @@ nat_icmp_send - BOOLEAN
for VS/NAT when the load balancer receives packets from real
servers but the connection entries don't exist.
+pmtu_disc - BOOLEAN
+ 0 - disabled
+ not 0 - enabled (default)
+
+ By default, reject with FRAG_NEEDED all DF packets that exceed
+ the PMTU, irrespective of the forwarding method. For TUN method
+ the flag can be disabled to fragment such packets.
+
secure_tcp - INTEGER
0 - disabled (default)
@@ -185,15 +193,59 @@ secure_tcp - INTEGER
The value definition is the same as that of drop_entry and
drop_packet.
-sync_threshold - INTEGER
- default 3
+sync_threshold - vector of 2 INTEGERs: sync_threshold, sync_period
+ default 3 50
+
+ It sets synchronization threshold, which is the minimum number
+ of incoming packets that a connection needs to receive before
+ the connection will be synchronized. A connection will be
+ synchronized, every time the number of its incoming packets
+ modulus sync_period equals the threshold. The range of the
+ threshold is from 0 to sync_period.
+
+ When sync_period and sync_refresh_period are 0, send sync only
+ for state changes or only once when pkts matches sync_threshold
+
+sync_refresh_period - UNSIGNED INTEGER
+ default 0
+
+ In seconds, difference in reported connection timer that triggers
+ new sync message. It can be used to avoid sync messages for the
+ specified period (or half of the connection timeout if it is lower)
+ if connection state is not changed since last sync.
+
+ This is useful for normal connections with high traffic to reduce
+ sync rate. Additionally, retry sync_retries times with period of
+ sync_refresh_period/8.
+
+sync_retries - INTEGER
+ default 0
+
+ Defines sync retries with period of sync_refresh_period/8. Useful
+ to protect against loss of sync messages. The range of the
+ sync_retries is from 0 to 3.
+
+sync_qlen_max - UNSIGNED LONG
+
+ Hard limit for queued sync messages that are not sent yet. It
+ defaults to 1/32 of the memory pages but actually represents
+ number of messages. It will protect us from allocating large
+ parts of memory when the sending rate is lower than the queuing
+ rate.
+
+sync_sock_size - INTEGER
+ default 0
+
+ Configuration of SNDBUF (master) or RCVBUF (slave) socket limit.
+ Default value is 0 (preserve system defaults).
+
+sync_ports - INTEGER
+ default 1
- It sets synchronization threshold, which is the minimum number
- of incoming packets that a connection needs to receive before
- the connection will be synchronized. A connection will be
- synchronized, every time the number of its incoming packets
- modulus 50 equals the threshold. The range of the threshold is
- from 0 to 49.
+ The number of threads that master and backup servers can use for
+ sync traffic. Every thread will use single UDP port, thread 0 will
+ use the default port 8848 while last thread will use port
+ 8848+sync_ports-1.
snat_reroute - BOOLEAN
0 - disabled
diff --git a/drivers/net/ethernet/intel/igb/igb.h b/drivers/net/ethernet/intel/igb/igb.h
index acbc3abe2ddd..dc6e2980718f 100644
--- a/drivers/net/ethernet/intel/igb/igb.h
+++ b/drivers/net/ethernet/intel/igb/igb.h
@@ -142,12 +142,24 @@ struct vf_data_storage {
/* Supported Rx Buffer Sizes */
#define IGB_RXBUFFER_256 256
#define IGB_RXBUFFER_2048 2048
+#define IGB_RXBUFFER_3072 3072
#define IGB_RX_HDR_LEN IGB_RXBUFFER_256
-#define IGB_RX_BUFSZ IGB_RXBUFFER_2048
+#define IGB_TS_HDR_LEN 16
+
+#define IGB_SKB_PAD (NET_SKB_PAD + NET_IP_ALIGN)
+#if (PAGE_SIZE < 8192)
+#define IGB_MAX_FRAME_BUILD_SKB \
+ (SKB_WITH_OVERHEAD(IGB_RXBUFFER_2048) - IGB_SKB_PAD - IGB_TS_HDR_LEN)
+#else
+#define IGB_MAX_FRAME_BUILD_SKB (IGB_RXBUFFER_2048 - IGB_TS_HDR_LEN)
+#endif
/* How many Rx Buffers do we bundle into one write to the hardware ? */
#define IGB_RX_BUFFER_WRITE 16 /* Must be power of 2 */
+#define IGB_RX_DMA_ATTR \
+ (DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING)
+
#define AUTO_ALL_MODES 0
#define IGB_EEPROM_APME 0x0400
@@ -301,12 +313,51 @@ struct igb_q_vector {
};
enum e1000_ring_flags_t {
+ IGB_RING_FLAG_RX_3K_BUFFER,
+ IGB_RING_FLAG_RX_BUILD_SKB_ENABLED,
IGB_RING_FLAG_RX_SCTP_CSUM,
IGB_RING_FLAG_RX_LB_VLAN_BSWAP,
IGB_RING_FLAG_TX_CTX_IDX,
IGB_RING_FLAG_TX_DETECT_HANG
};
+#define ring_uses_large_buffer(ring) \
+ test_bit(IGB_RING_FLAG_RX_3K_BUFFER, &(ring)->flags)
+#define set_ring_uses_large_buffer(ring) \
+ set_bit(IGB_RING_FLAG_RX_3K_BUFFER, &(ring)->flags)
+#define clear_ring_uses_large_buffer(ring) \
+ clear_bit(IGB_RING_FLAG_RX_3K_BUFFER, &(ring)->flags)
+
+#define ring_uses_build_skb(ring) \
+ test_bit(IGB_RING_FLAG_RX_BUILD_SKB_ENABLED, &(ring)->flags)
+#define set_ring_build_skb_enabled(ring) \
+ set_bit(IGB_RING_FLAG_RX_BUILD_SKB_ENABLED, &(ring)->flags)
+#define clear_ring_build_skb_enabled(ring) \
+ clear_bit(IGB_RING_FLAG_RX_BUILD_SKB_ENABLED, &(ring)->flags)
+
+static inline unsigned int igb_rx_bufsz(struct igb_ring *ring)
+{
+#if (PAGE_SIZE < 8192)
+ if (ring_uses_large_buffer(ring))
+ return IGB_RXBUFFER_3072;
+
+ if (ring_uses_build_skb(ring))
+ return IGB_MAX_FRAME_BUILD_SKB + IGB_TS_HDR_LEN;
+#endif
+ return IGB_RXBUFFER_2048;
+}
+
+static inline unsigned int igb_rx_pg_order(struct igb_ring *ring)
+{
+#if (PAGE_SIZE < 8192)
+ if (ring_uses_large_buffer(ring))
+ return 1;
+#endif
+ return 0;
+}
+
+#define igb_rx_pg_size(_ring) (PAGE_SIZE << igb_rx_pg_order(_ring))
+
#define IGB_TXD_DCMD (E1000_ADVTXD_DCMD_EOP | E1000_ADVTXD_DCMD_RS)
#define IGB_RX_DESC(R, i) \
@@ -545,6 +596,7 @@ struct igb_adapter {
#define IGB_FLAG_HAS_MSIX BIT(13)
#define IGB_FLAG_EEE BIT(14)
#define IGB_FLAG_VLAN_PROMISC BIT(15)
+#define IGB_FLAG_RX_LEGACY BIT(16)
/* Media Auto Sense */
#define IGB_MAS_ENABLE_0 0X0001
@@ -558,7 +610,6 @@ struct igb_adapter {
#define IGB_DMCTLX_DCFLUSH_DIS 0x80000000 /* Disable DMA Coal Flush */
#define IGB_82576_TSYNC_SHIFT 19
-#define IGB_TS_HDR_LEN 16
enum e1000_state_t {
__IGB_TESTING,
__IGB_RESETTING,
@@ -591,7 +642,6 @@ void igb_configure_rx_ring(struct igb_adapter *, struct igb_ring *);
void igb_setup_tctl(struct igb_adapter *);
void igb_setup_rctl(struct igb_adapter *);
netdev_tx_t igb_xmit_frame_ring(struct sk_buff *, struct igb_ring *);
-void igb_unmap_and_free_tx_resource(struct igb_ring *, struct igb_tx_buffer *);
void igb_alloc_rx_buffers(struct igb_ring *, u16);
void igb_update_stats(struct igb_adapter *, struct rtnl_link_stats64 *);
bool igb_has_link(struct igb_adapter *adapter);
@@ -604,7 +654,7 @@ void igb_ptp_reset(struct igb_adapter *adapter);
void igb_ptp_suspend(struct igb_adapter *adapter);
void igb_ptp_rx_hang(struct igb_adapter *adapter);
void igb_ptp_rx_rgtstamp(struct igb_q_vector *q_vector, struct sk_buff *skb);
-void igb_ptp_rx_pktstamp(struct igb_q_vector *q_vector, unsigned char *va,
+void igb_ptp_rx_pktstamp(struct igb_q_vector *q_vector, void *va,
struct sk_buff *skb);
int igb_ptp_set_ts_config(struct net_device *netdev, struct ifreq *ifr);
int igb_ptp_get_ts_config(struct net_device *netdev, struct ifreq *ifr);
diff --git a/drivers/net/ethernet/intel/igb/igb_ethtool.c b/drivers/net/ethernet/intel/igb/igb_ethtool.c
index 737b664d004c..797b9daba224 100644
--- a/drivers/net/ethernet/intel/igb/igb_ethtool.c
+++ b/drivers/net/ethernet/intel/igb/igb_ethtool.c
@@ -144,6 +144,13 @@ static const char igb_gstrings_test[][ETH_GSTRING_LEN] = {
};
#define IGB_TEST_LEN (sizeof(igb_gstrings_test) / ETH_GSTRING_LEN)
+static const char igb_priv_flags_strings[][ETH_GSTRING_LEN] = {
+#define IGB_PRIV_FLAGS_LEGACY_RX BIT(0)
+ "legacy-rx",
+};
+
+#define IGB_PRIV_FLAGS_STR_LEN ARRAY_SIZE(igb_priv_flags_strings)
+
static int igb_get_settings(struct net_device *netdev, struct ethtool_cmd *ecmd)
{
struct igb_adapter *adapter = netdev_priv(netdev);
@@ -852,6 +859,8 @@ static void igb_get_drvinfo(struct net_device *netdev,
sizeof(drvinfo->fw_version));
strlcpy(drvinfo->bus_info, pci_name(adapter->pdev),
sizeof(drvinfo->bus_info));
+
+ drvinfo->n_priv_flags = IGB_PRIV_FLAGS_STR_LEN;
}
static void igb_get_ringparam(struct net_device *netdev,
@@ -1811,14 +1820,14 @@ static int igb_clean_test_rings(struct igb_ring *rx_ring,
tx_ntc = tx_ring->next_to_clean;
rx_desc = IGB_RX_DESC(rx_ring, rx_ntc);
- while (igb_test_staterr(rx_desc, E1000_RXD_STAT_DD)) {
+ while (rx_desc->wb.upper.length) {
/* check Rx buffer */
rx_buffer_info = &rx_ring->rx_buffer_info[rx_ntc];
/* sync Rx buffer for CPU read */
dma_sync_single_for_cpu(rx_ring->dev,
rx_buffer_info->dma,
- IGB_RX_BUFSZ,
+ size,
DMA_FROM_DEVICE);
/* verify contents of skb */
@@ -1828,12 +1837,21 @@ static int igb_clean_test_rings(struct igb_ring *rx_ring,
/* sync Rx buffer for device write */
dma_sync_single_for_device(rx_ring->dev,
rx_buffer_info->dma,
- IGB_RX_BUFSZ,
+ size,
DMA_FROM_DEVICE);
/* unmap buffer on Tx side */
tx_buffer_info = &tx_ring->tx_buffer_info[tx_ntc];
- igb_unmap_and_free_tx_resource(tx_ring, tx_buffer_info);
+
+ /* Free all the Tx ring sk_buffs */
+ dev_kfree_skb_any(tx_buffer_info->skb);
+
+ /* unmap skb header data */
+ dma_unmap_single(tx_ring->dev,
+ dma_unmap_addr(tx_buffer_info, dma),
+ dma_unmap_len(tx_buffer_info, len),
+ DMA_TO_DEVICE);
+ dma_unmap_len_set(tx_buffer_info, len, 0);
/* increment Rx/Tx next to clean counters */
rx_ntc++;
@@ -2271,6 +2289,8 @@ static int igb_get_sset_count(struct net_device *netdev, int sset)
return IGB_STATS_LEN;
case ETH_SS_TEST:
return IGB_TEST_LEN;
+ case ETH_SS_PRIV_FLAGS:
+ return IGB_PRIV_FLAGS_STR_LEN;
default:
return -ENOTSUPP;
}
@@ -2376,6 +2396,10 @@ static void igb_get_strings(struct net_device *netdev, u32 stringset, u8 *data)
}
/* BUG_ON(p - data != IGB_STATS_LEN * ETH_GSTRING_LEN); */
break;
+ case ETH_SS_PRIV_FLAGS:
+ memcpy(data, igb_priv_flags_strings,
+ IGB_PRIV_FLAGS_STR_LEN * ETH_GSTRING_LEN);
+ break;
}
}
@@ -3388,6 +3412,37 @@ static int igb_set_channels(struct net_device *netdev,
return 0;
}
+static u32 igb_get_priv_flags(struct net_device *netdev)
+{
+ struct igb_adapter *adapter = netdev_priv(netdev);
+ u32 priv_flags = 0;
+
+ if (adapter->flags & IGB_FLAG_RX_LEGACY)
+ priv_flags |= IGB_PRIV_FLAGS_LEGACY_RX;
+
+ return priv_flags;
+}
+
+static int igb_set_priv_flags(struct net_device *netdev, u32 priv_flags)
+{
+ struct igb_adapter *adapter = netdev_priv(netdev);
+ unsigned int flags = adapter->flags;
+
+ flags &= ~IGB_FLAG_RX_LEGACY;
+ if (priv_flags & IGB_PRIV_FLAGS_LEGACY_RX)
+ flags |= IGB_FLAG_RX_LEGACY;
+
+ if (flags != adapter->flags) {
+ adapter->flags = flags;
+
+ /* reset interface to repopulate queues */
+ if (netif_running(netdev))
+ igb_reinit_locked(adapter);
+ }
+
+ return 0;
+}
+
static const struct ethtool_ops igb_ethtool_ops = {
.get_settings = igb_get_settings,
.set_settings = igb_set_settings,
@@ -3426,6 +3481,8 @@ static const struct ethtool_ops igb_ethtool_ops = {
.set_rxfh = igb_set_rxfh,
.get_channels = igb_get_channels,
.set_channels = igb_set_channels,
+ .get_priv_flags = igb_get_priv_flags,
+ .set_priv_flags = igb_set_priv_flags,
.begin = igb_ethtool_begin,
.complete = igb_ethtool_complete,
};
diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c
index be456bae8169..26a821fcd220 100644
--- a/drivers/net/ethernet/intel/igb/igb_main.c
+++ b/drivers/net/ethernet/intel/igb/igb_main.c
@@ -554,7 +554,7 @@ rx_ring_summary:
16, 1,
page_address(buffer_info->page) +
buffer_info->page_offset,
- IGB_RX_BUFSZ, true);
+ igb_rx_bufsz(rx_ring), true);
}
}
}
@@ -3293,7 +3293,7 @@ int igb_setup_tx_resources(struct igb_ring *tx_ring)
size = sizeof(struct igb_tx_buffer) * tx_ring->count;
- tx_ring->tx_buffer_info = vzalloc(size);
+ tx_ring->tx_buffer_info = vmalloc(size);
if (!tx_ring->tx_buffer_info)
goto err;
@@ -3404,6 +3404,10 @@ void igb_configure_tx_ring(struct igb_adapter *adapter,
txdctl |= IGB_TX_HTHRESH << 8;
txdctl |= IGB_TX_WTHRESH << 16;
+ /* reinitialize tx_buffer_info */
+ memset(ring->tx_buffer_info, 0,
+ sizeof(struct igb_tx_buffer) * ring->count);
+
txdctl |= E1000_TXDCTL_QUEUE_ENABLE;
wr32(E1000_TXDCTL(reg_idx), txdctl);
}
@@ -3435,7 +3439,7 @@ int igb_setup_rx_resources(struct igb_ring *rx_ring)
size = sizeof(struct igb_rx_buffer) * rx_ring->count;
- rx_ring->rx_buffer_info = vzalloc(size);
+ rx_ring->rx_buffer_info = vmalloc(size);
if (!rx_ring->rx_buffer_info)
goto err;
@@ -3720,6 +3724,7 @@ void igb_configure_rx_ring(struct igb_adapter *adapter,
struct igb_ring *ring)
{
struct e1000_hw *hw = &adapter->hw;
+ union e1000_adv_rx_desc *rx_desc;
u64 rdba = ring->dma;
int reg_idx = ring->reg_idx;
u32 srrctl = 0, rxdctl = 0;
@@ -3741,7 +3746,10 @@ void igb_configure_rx_ring(struct igb_adapter *adapter,
/* set descriptor configuration */
srrctl = IGB_RX_HDR_LEN << E1000_SRRCTL_BSIZEHDRSIZE_SHIFT;
- srrctl |= IGB_RX_BUFSZ >> E1000_SRRCTL_BSIZEPKT_SHIFT;
+ if (ring_uses_large_buffer(ring))
+ srrctl |= IGB_RXBUFFER_3072 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
+ else
+ srrctl |= IGB_RXBUFFER_2048 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
srrctl |= E1000_SRRCTL_DESCTYPE_ADV_ONEBUF;
if (hw->mac.type >= e1000_82580)
srrctl |= E1000_SRRCTL_TIMESTAMP;
@@ -3758,11 +3766,39 @@ void igb_configure_rx_ring(struct igb_adapter *adapter,
rxdctl |= IGB_RX_HTHRESH << 8;
rxdctl |= IGB_RX_WTHRESH << 16;
+ /* initialize rx_buffer_info */
+ memset(ring->rx_buffer_info, 0,
+ sizeof(struct igb_rx_buffer) * ring->count);
+
+ /* initialize Rx descriptor 0 */
+ rx_desc = IGB_RX_DESC(ring, 0);
+ rx_desc->wb.upper.length = 0;
+
/* enable receive descriptor fetching */
rxdctl |= E1000_RXDCTL_QUEUE_ENABLE;
wr32(E1000_RXDCTL(reg_idx), rxdctl);
}
+static void igb_set_rx_buffer_len(struct igb_adapter *adapter,
+ struct igb_ring *rx_ring)
+{
+ /* set build_skb and buffer size flags */
+ clear_ring_build_skb_enabled(rx_ring);
+ clear_ring_uses_large_buffer(rx_ring);
+
+ if (adapter->flags & IGB_FLAG_RX_LEGACY)
+ return;
+
+ set_ring_build_skb_enabled(rx_ring);
+
+#if (PAGE_SIZE < 8192)
+ if (adapter->max_frame_size <= IGB_MAX_FRAME_BUILD_SKB)
+ return;
+
+ set_ring_uses_large_buffer(rx_ring);
+#endif
+}
+
/**
* igb_configure_rx - Configure receive Unit after Reset
* @adapter: board private structure
@@ -3780,8 +3816,12 @@ static void igb_configure_rx(struct igb_adapter *adapter)
/* Setup the HW Rx Head and Tail Descriptor Pointers and
* the Base and Length of the Rx Descriptor Ring
*/
- for (i = 0; i < adapter->num_rx_queues; i++)
- igb_configure_rx_ring(adapter, adapter->rx_ring[i]);
+ for (i = 0; i < adapter->num_rx_queues; i++) {
+ struct igb_ring *rx_ring = adapter->rx_ring[i];
+
+ igb_set_rx_buffer_len(adapter, rx_ring);
+ igb_configure_rx_ring(adapter, rx_ring);
+ }
}
/**
@@ -3822,55 +3862,63 @@ static void igb_free_all_tx_resources(struct igb_adapter *adapter)
igb_free_tx_resources(adapter->tx_ring[i]);
}
-void igb_unmap_and_free_tx_resource(struct igb_ring *ring,
- struct igb_tx_buffer *tx_buffer)
-{
- if (tx_buffer->skb) {
- dev_kfree_skb_any(tx_buffer->skb);
- if (dma_unmap_len(tx_buffer, len))
- dma_unmap_single(ring->dev,
- dma_unmap_addr(tx_buffer, dma),
- dma_unmap_len(tx_buffer, len),
- DMA_TO_DEVICE);
- } else if (dma_unmap_len(tx_buffer, len)) {
- dma_unmap_page(ring->dev,
- dma_unmap_addr(tx_buffer, dma),
- dma_unmap_len(tx_buffer, len),
- DMA_TO_DEVICE);
- }
- tx_buffer->next_to_watch = NULL;
- tx_buffer->skb = NULL;
- dma_unmap_len_set(tx_buffer, len, 0);
- /* buffer_info must be completely set up in the transmit path */
-}
-
/**
* igb_clean_tx_ring - Free Tx Buffers
* @tx_ring: ring to be cleaned
**/
static void igb_clean_tx_ring(struct igb_ring *tx_ring)
{
- struct igb_tx_buffer *buffer_info;
- unsigned long size;
- u16 i;
+ u16 i = tx_ring->next_to_clean;
+ struct igb_tx_buffer *tx_buffer = &tx_ring->tx_buffer_info[i];
- if (!tx_ring->tx_buffer_info)
- return;
- /* Free all the Tx ring sk_buffs */
+ while (i != tx_ring->next_to_use) {
+ union e1000_adv_tx_desc *eop_desc, *tx_desc;
- for (i = 0; i < tx_ring->count; i++) {
- buffer_info = &tx_ring->tx_buffer_info[i];
- igb_unmap_and_free_tx_resource(tx_ring, buffer_info);
- }
+ /* Free all the Tx ring sk_buffs */
+ dev_kfree_skb_any(tx_buffer->skb);
- netdev_tx_reset_queue(txring_txq(tx_ring));
+ /* unmap skb header data */
+ dma_unmap_single(tx_ring->dev,
+ dma_unmap_addr(tx_buffer, dma),
+ dma_unmap_len(tx_buffer, len),
+ DMA_TO_DEVICE);
- size = sizeof(struct igb_tx_buffer) * tx_ring->count;
- memset(tx_ring->tx_buffer_info, 0, size);
+ /* check for eop_desc to determine the end of the packet */
+ eop_desc = tx_buffer->next_to_watch;
+ tx_desc = IGB_TX_DESC(tx_ring, i);
+
+ /* unmap remaining buffers */
+ while (tx_desc != eop_desc) {
+ tx_buffer++;
+ tx_desc++;
+ i++;
+ if (unlikely(i == tx_ring->count)) {
+ i = 0;
+ tx_buffer = tx_ring->tx_buffer_info;
+ tx_desc = IGB_TX_DESC(tx_ring, 0);
+ }
+
+ /* unmap any remaining paged data */
+ if (dma_unmap_len(tx_buffer, len))
+ dma_unmap_page(tx_ring->dev,
+ dma_unmap_addr(tx_buffer, dma),
+ dma_unmap_len(tx_buffer, len),
+ DMA_TO_DEVICE);
+ }
- /* Zero out the descriptor ring */
- memset(tx_ring->desc, 0, tx_ring->size);
+ /* move us one more past the eop_desc for start of next pkt */
+ tx_buffer++;
+ i++;
+ if (unlikely(i == tx_ring->count)) {
+ i = 0;
+ tx_buffer = tx_ring->tx_buffer_info;
+ }
+ }
+ /* reset BQL for queue */
+ netdev_tx_reset_queue(txring_txq(tx_ring));
+
+ /* reset next_to_use and next_to_clean */
tx_ring->next_to_use = 0;
tx_ring->next_to_clean = 0;
}
@@ -3932,50 +3980,39 @@ static void igb_free_all_rx_resources(struct igb_adapter *adapter)
**/
static void igb_clean_rx_ring(struct igb_ring *rx_ring)
{
- unsigned long size;
- u16 i;
+ u16 i = rx_ring->next_to_clean;
if (rx_ring->skb)
dev_kfree_skb(rx_ring->skb);
rx_ring->skb = NULL;
- if (!rx_ring->rx_buffer_info)
- return;
-
/* Free all the Rx ring sk_buffs */
- for (i = 0; i < rx_ring->count; i++) {
+ while (i != rx_ring->next_to_alloc) {
struct igb_rx_buffer *buffer_info = &rx_ring->rx_buffer_info[i];
- if (!buffer_info->page)
- continue;
-
/* Invalidate cache lines that may have been written to by
* device so that we avoid corrupting memory.
*/
dma_sync_single_range_for_cpu(rx_ring->dev,
buffer_info->dma,
buffer_info->page_offset,
- IGB_RX_BUFSZ,
+ igb_rx_bufsz(rx_ring),
DMA_FROM_DEVICE);
/* free resources associated with mapping */
dma_unmap_page_attrs(rx_ring->dev,
buffer_info->dma,
- PAGE_SIZE,
+ igb_rx_pg_size(rx_ring),
DMA_FROM_DEVICE,
- DMA_ATTR_SKIP_CPU_SYNC);
+ IGB_RX_DMA_ATTR);
__page_frag_cache_drain(buffer_info->page,
buffer_info->pagecnt_bias);
- buffer_info->page = NULL;
+ i++;
+ if (i == rx_ring->count)
+ i = 0;
}
- size = sizeof(struct igb_rx_buffer) * rx_ring->count;
- memset(rx_ring->rx_buffer_info, 0, size);
-
- /* Zero out the descriptor ring */
- memset(rx_ring->desc, 0, rx_ring->size);
-
rx_ring->next_to_alloc = 0;
rx_ring->next_to_clean = 0;
rx_ring->next_to_use = 0;
@@ -4240,7 +4277,7 @@ static void igb_set_rx_mode(struct net_device *netdev)
struct igb_adapter *adapter = netdev_priv(netdev);
struct e1000_hw *hw = &adapter->hw;
unsigned int vfn = adapter->vfs_allocated_count;
- u32 rctl = 0, vmolr = 0;
+ u32 rctl = 0, vmolr = 0, rlpml = MAX_JUMBO_FRAME_SIZE;
int count;
/* Check for Promiscuous and All Multicast modes */
@@ -4298,6 +4335,14 @@ static void igb_set_rx_mode(struct net_device *netdev)
E1000_RCTL_VFE);
wr32(E1000_RCTL, rctl);
+#if (PAGE_SIZE < 8192)
+ if (!adapter->vfs_allocated_count) {
+ if (adapter->max_frame_size <= IGB_MAX_FRAME_BUILD_SKB)
+ rlpml = IGB_MAX_FRAME_BUILD_SKB;
+ }
+#endif
+ wr32(E1000_RLPML, rlpml);
+
/* In order to support SR-IOV and eventually VMDq it is necessary to set
* the VMOLR to enable the appropriate modes. Without this workaround
* we will have issues with VLAN tag stripping not being done for frames
@@ -4312,12 +4357,17 @@ static void igb_set_rx_mode(struct net_device *netdev)
vmolr |= rd32(E1000_VMOLR(vfn)) &
~(E1000_VMOLR_ROPE | E1000_VMOLR_MPME | E1000_VMOLR_ROMPE);
- /* enable Rx jumbo frames, no need for restriction */
+ /* enable Rx jumbo frames, restrict as needed to support build_skb */
vmolr &= ~E1000_VMOLR_RLPML_MASK;
- vmolr |= MAX_JUMBO_FRAME_SIZE | E1000_VMOLR_LPE;
+#if (PAGE_SIZE < 8192)
+ if (adapter->max_frame_size <= IGB_MAX_FRAME_BUILD_SKB)
+ vmolr |= IGB_MAX_FRAME_BUILD_SKB;
+ else
+#endif
+ vmolr |= MAX_JUMBO_FRAME_SIZE;
+ vmolr |= E1000_VMOLR_LPE;
wr32(E1000_VMOLR(vfn), vmolr);
- wr32(E1000_RLPML, MAX_JUMBO_FRAME_SIZE);
igb_restore_vf_multicasts(adapter);
}
@@ -5256,18 +5306,32 @@ static void igb_tx_map(struct igb_ring *tx_ring,
dma_error:
dev_err(tx_ring->dev, "TX DMA map failed\n");
+ tx_buffer = &tx_ring->tx_buffer_info[i];
/* clear dma mappings for failed tx_buffer_info map */
- for (;;) {
+ while (tx_buffer != first) {
+ if (dma_unmap_len(tx_buffer, len))
+ dma_unmap_page(tx_ring->dev,
+ dma_unmap_addr(tx_buffer, dma),
+ dma_unmap_len(tx_buffer, len),
+ DMA_TO_DEVICE);
+ dma_unmap_len_set(tx_buffer, len, 0);
+
+ if (i--)
+ i += tx_ring->count;
tx_buffer = &tx_ring->tx_buffer_info[i];
- igb_unmap_and_free_tx_resource(tx_ring, tx_buffer);
- if (tx_buffer == first)
- break;
- if (i == 0)
- i = tx_ring->count;
- i--;
}
+ if (dma_unmap_len(tx_buffer, len))
+ dma_unmap_single(tx_ring->dev,
+ dma_unmap_addr(tx_buffer, dma),
+ dma_unmap_len(tx_buffer, len),
+ DMA_TO_DEVICE);
+ dma_unmap_len_set(tx_buffer, len, 0);
+
+ dev_kfree_skb_any(tx_buffer->skb);
+ tx_buffer->skb = NULL;
+
tx_ring->next_to_use = i;
}
@@ -5339,7 +5403,8 @@ netdev_tx_t igb_xmit_frame_ring(struct sk_buff *skb,
return NETDEV_TX_OK;
out_drop:
- igb_unmap_and_free_tx_resource(tx_ring, first);
+ dev_kfree_skb_any(first->skb);
+ first->skb = NULL;
return NETDEV_TX_OK;
}
@@ -6686,7 +6751,6 @@ static bool igb_clean_tx_irq(struct igb_q_vector *q_vector, int napi_budget)
DMA_TO_DEVICE);
/* clear tx_buffer data */
- tx_buffer->skb = NULL;
dma_unmap_len_set(tx_buffer, len, 0);
/* clear last DMA location and unmap remaining buffers */
@@ -6822,8 +6886,14 @@ static void igb_reuse_rx_page(struct igb_ring *rx_ring,
nta++;
rx_ring->next_to_alloc = (nta < rx_ring->count) ? nta : 0;
- /* transfer page from old buffer to new buffer */
- *new_buff = *old_buff;
+ /* Transfer page from old buffer to new buffer.
+ * Move each member individually to avoid possible store
+ * forwarding stalls.
+ */
+ new_buff->dma = old_buff->dma;
+ new_buff->page = old_buff->page;
+ new_buff->page_offset = old_buff->page_offset;
+ new_buff->pagecnt_bias = old_buff->pagecnt_bias;
}
static inline bool igb_page_is_reserved(struct page *page)
@@ -6831,11 +6901,10 @@ static inline bool igb_page_is_reserved(struct page *page)
return (page_to_nid(page) != numa_mem_id()) || page_is_pfmemalloc(page);
}
-static bool igb_can_reuse_rx_page(struct igb_rx_buffer *rx_buffer,
- struct page *page,
- unsigned int truesize)
+static bool igb_can_reuse_rx_page(struct igb_rx_buffer *rx_buffer)
{
- unsigned int pagecnt_bias = rx_buffer->pagecnt_bias--;
+ unsigned int pagecnt_bias = rx_buffer->pagecnt_bias;
+ struct page *page = rx_buffer->page;
/* avoid re-using remote pages */
if (unlikely(igb_page_is_reserved(page)))
@@ -6843,16 +6912,13 @@ static bool igb_can_reuse_rx_page(struct igb_rx_buffer *rx_buffer,
#if (PAGE_SIZE < 8192)
/* if we are only owner of page we can reuse it */
- if (unlikely(page_ref_count(page) != pagecnt_bias))
+ if (unlikely((page_ref_count(page) - pagecnt_bias) > 1))
return false;
-
- /* flip page offset to other buffer */
- rx_buffer->page_offset ^= IGB_RX_BUFSZ;
#else
- /* move offset up to the next cache line */
- rx_buffer->page_offset += truesize;
+#define IGB_LAST_OFFSET \
+ (SKB_WITH_OVERHEAD(PAGE_SIZE) - IGB_RXBUFFER_2048)
- if (rx_buffer->page_offset > (PAGE_SIZE - IGB_RX_BUFSZ))
+ if (rx_buffer->page_offset > IGB_LAST_OFFSET)
return false;
#endif
@@ -6860,7 +6926,7 @@ static bool igb_can_reuse_rx_page(struct igb_rx_buffer *rx_buffer,
* the pagecnt_bias and page count so that we fully restock the
* number of references the driver holds.
*/
- if (unlikely(pagecnt_bias == 1)) {
+ if (unlikely(!pagecnt_bias)) {
page_ref_add(page, USHRT_MAX);
rx_buffer->pagecnt_bias = USHRT_MAX;
}
@@ -6872,34 +6938,56 @@ static bool igb_can_reuse_rx_page(struct igb_rx_buffer *rx_buffer,
* igb_add_rx_frag - Add contents of Rx buffer to sk_buff
* @rx_ring: rx descriptor ring to transact packets on
* @rx_buffer: buffer containing page to add
- * @rx_desc: descriptor containing length of buffer written by hardware
* @skb: sk_buff to place the data into
+ * @size: size of buffer to be added
*
* This function will add the data contained in rx_buffer->page to the skb.
- * This is done either through a direct copy if the data in the buffer is
- * less than the skb header size, otherwise it will just attach the page as
- * a frag to the skb.
- *
- * The function will then update the page offset if necessary and return
- * true if the buffer can be reused by the adapter.
**/
-static bool igb_add_rx_frag(struct igb_ring *rx_ring,
+static void igb_add_rx_frag(struct igb_ring *rx_ring,
struct igb_rx_buffer *rx_buffer,
- unsigned int size,
- union e1000_adv_rx_desc *rx_desc,
- struct sk_buff *skb)
+ struct sk_buff *skb,
+ unsigned int size)
{
- struct page *page = rx_buffer->page;
- unsigned char *va = page_address(page) + rx_buffer->page_offset;
#if (PAGE_SIZE < 8192)
- unsigned int truesize = IGB_RX_BUFSZ;
+ unsigned int truesize = igb_rx_pg_size(rx_ring) / 2;
+#else
+ unsigned int truesize = ring_uses_build_skb(rx_ring) ?
+ SKB_DATA_ALIGN(IGB_SKB_PAD + size) :
+ SKB_DATA_ALIGN(size);
+#endif
+ skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, rx_buffer->page,
+ rx_buffer->page_offset, size, truesize);
+#if (PAGE_SIZE < 8192)
+ rx_buffer->page_offset ^= truesize;
+#else
+ rx_buffer->page_offset += truesize;
+#endif
+}
+
+static struct sk_buff *igb_construct_skb(struct igb_ring *rx_ring,
+ struct igb_rx_buffer *rx_buffer,
+ union e1000_adv_rx_desc *rx_desc,
+ unsigned int size)
+{
+ void *va = page_address(rx_buffer->page) + rx_buffer->page_offset;
+#if (PAGE_SIZE < 8192)
+ unsigned int truesize = igb_rx_pg_size(rx_ring) / 2;
#else
unsigned int truesize = SKB_DATA_ALIGN(size);
#endif
- unsigned int pull_len;
+ unsigned int headlen;
+ struct sk_buff *skb;
- if (unlikely(skb_is_nonlinear(skb)))
- goto add_tail_frag;
+ /* prefetch first cache line of first page */
+ prefetch(va);
+#if L1_CACHE_BYTES < 128
+ prefetch(va + L1_CACHE_BYTES);
+#endif
+
+ /* allocate a skb to store the frags */
+ skb = napi_alloc_skb(&rx_ring->q_vector->napi, IGB_RX_HDR_LEN);
+ if (unlikely(!skb))
+ return NULL;
if (unlikely(igb_test_staterr(rx_desc, E1000_RXDADV_STAT_TSIP))) {
igb_ptp_rx_pktstamp(rx_ring->q_vector, va, skb);
@@ -6907,95 +6995,73 @@ static bool igb_add_rx_frag(struct igb_ring *rx_ring,
size -= IGB_TS_HDR_LEN;
}
- if (likely(size <= IGB_RX_HDR_LEN)) {
- memcpy(__skb_put(skb, size), va, ALIGN(size, sizeof(long)));
-
- /* page is not reserved, we can reuse buffer as-is */
- if (likely(!igb_page_is_reserved(page)))
- return true;
-
- /* this page cannot be reused so discard it */
- return false;
- }
-
- /* we need the header to contain the greater of either ETH_HLEN or
- * 60 bytes if the skb->len is less than 60 for skb_pad.
- */
- pull_len = eth_get_headlen(va, IGB_RX_HDR_LEN);
+ /* Determine available headroom for copy */
+ headlen = size;
+ if (headlen > IGB_RX_HDR_LEN)
+ headlen = eth_get_headlen(va, IGB_RX_HDR_LEN);
/* align pull length to size of long to optimize memcpy performance */
- memcpy(__skb_put(skb, pull_len), va, ALIGN(pull_len, sizeof(long)));
+ memcpy(__skb_put(skb, headlen), va, ALIGN(headlen, sizeof(long)));
/* update all of the pointers */
- va += pull_len;
- size -= pull_len;
-
-add_tail_frag:
- skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, page,
- (unsigned long)va & ~PAGE_MASK, size, truesize);
+ size -= headlen;
+ if (size) {
+ skb_add_rx_frag(skb, 0, rx_buffer->page,
+ (va + headlen) - page_address(rx_buffer->page),
+ size, truesize);
+#if (PAGE_SIZE < 8192)
+ rx_buffer->page_offset ^= truesize;
+#else
+ rx_buffer->page_offset += truesize;
+#endif
+ } else {
+ rx_buffer->pagecnt_bias++;
+ }
- return igb_can_reuse_rx_page(rx_buffer, page, truesize);
+ return skb;
}
-static struct sk_buff *igb_fetch_rx_buffer(struct igb_ring *rx_ring,
- union e1000_adv_rx_desc *rx_desc,
- struct sk_buff *skb)
+static struct sk_buff *igb_build_skb(struct igb_ring *rx_ring,
+ struct igb_rx_buffer *rx_buffer,
+ union e1000_adv_rx_desc *rx_desc,
+ unsigned int size)
{
- unsigned int size = le16_to_cpu(rx_desc->wb.upper.length);
- struct igb_rx_buffer *rx_buffer;
- struct page *page;
-
- rx_buffer = &rx_ring->rx_buffer_info[rx_ring->next_to_clean];
- page = rx_buffer->page;
- prefetchw(page);
-
- /* we are reusing so sync this buffer for CPU use */
- dma_sync_single_range_for_cpu(rx_ring->dev,
- rx_buffer->dma,
- rx_buffer->page_offset,
- size,
- DMA_FROM_DEVICE);
-
- if (likely(!skb)) {
- void *page_addr = page_address(page) +
- rx_buffer->page_offset;
+ void *va = page_address(rx_buffer->page) + rx_buffer->page_offset;
+#if (PAGE_SIZE < 8192)
+ unsigned int truesize = igb_rx_pg_size(rx_ring) / 2;
+#else
+ unsigned int truesize = SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) +
+ SKB_DATA_ALIGN(IGB_SKB_PAD + size);
+#endif
+ struct sk_buff *skb;
- /* prefetch first cache line of first page */
- prefetch(page_addr);
+ /* prefetch first cache line of first page */
+ prefetch(va);
#if L1_CACHE_BYTES < 128
- prefetch(page_addr + L1_CACHE_BYTES);
+ prefetch(va + L1_CACHE_BYTES);
#endif
- /* allocate a skb to store the frags */
- skb = napi_alloc_skb(&rx_ring->q_vector->napi, IGB_RX_HDR_LEN);
- if (unlikely(!skb)) {
- rx_ring->rx_stats.alloc_failed++;
- return NULL;
- }
+ /* build an skb around the page buffer */
+ skb = build_skb(va - IGB_SKB_PAD, truesize);
+ if (unlikely(!skb))
+ return NULL;
- /* we will be copying header into skb->data in
- * pskb_may_pull so it is in our interest to prefetch
- * it now to avoid a possible cache miss
- */
- prefetchw(skb->data);
- }
+ /* update pointers within the skb to store the data */
+ skb_reserve(skb, IGB_SKB_PAD);
+ __skb_put(skb, size);
- /* pull page into skb */
- if (igb_add_rx_frag(rx_ring, rx_buffer, size, rx_desc, skb)) {
- /* hand second half of page back to the ring */
- igb_reuse_rx_page(rx_ring, rx_buffer);
- } else {
- /* We are not reusing the buffer so unmap it and free
- * any references we are holding to it
- */
- dma_unmap_page_attrs(rx_ring->dev, rx_buffer->dma,
- PAGE_SIZE, DMA_FROM_DEVICE,
- DMA_ATTR_SKIP_CPU_SYNC);
- __page_frag_cache_drain(page, rx_buffer->pagecnt_bias);
+ /* pull timestamp out of packet data */
+ if (igb_test_staterr(rx_desc, E1000_RXDADV_STAT_TSIP)) {
+ igb_ptp_rx_pktstamp(rx_ring->q_vector, skb->data, skb);
+ __skb_pull(skb, IGB_TS_HDR_LEN);
}
- /* clear contents of rx_buffer */
- rx_buffer->page = NULL;
+ /* update buffer offset */
+#if (PAGE_SIZE < 8192)
+ rx_buffer->page_offset ^= truesize;
+#else
+ rx_buffer->page_offset += truesize;
+#endif
return skb;
}
@@ -7154,6 +7220,47 @@ static void igb_process_skb_fields(struct igb_ring *rx_ring,
skb->protocol = eth_type_trans(skb, rx_ring->netdev);
}
+static struct igb_rx_buffer *igb_get_rx_buffer(struct igb_ring *rx_ring,
+ const unsigned int size)
+{
+ struct igb_rx_buffer *rx_buffer;
+
+ rx_buffer = &rx_ring->rx_buffer_info[rx_ring->next_to_clean];
+ prefetchw(rx_buffer->page);
+
+ /* we are reusing so sync this buffer for CPU use */
+ dma_sync_single_range_for_cpu(rx_ring->dev,
+ rx_buffer->dma,
+ rx_buffer->page_offset,
+ size,
+ DMA_FROM_DEVICE);
+
+ rx_buffer->pagecnt_bias--;
+
+ return rx_buffer;
+}
+
+static void igb_put_rx_buffer(struct igb_ring *rx_ring,
+ struct igb_rx_buffer *rx_buffer)
+{
+ if (igb_can_reuse_rx_page(rx_buffer)) {
+ /* hand second half of page back to the ring */
+ igb_reuse_rx_page(rx_ring, rx_buffer);
+ } else {
+ /* We are not reusing the buffer so unmap it and free
+ * any references we are holding to it
+ */
+ dma_unmap_page_attrs(rx_ring->dev, rx_buffer->dma,
+ igb_rx_pg_size(rx_ring), DMA_FROM_DEVICE,
+ IGB_RX_DMA_ATTR);
+ __page_frag_cache_drain(rx_buffer->page,
+ rx_buffer->pagecnt_bias);
+ }
+
+ /* clear contents of rx_buffer */
+ rx_buffer->page = NULL;
+}
+
static int igb_clean_rx_irq(struct igb_q_vector *q_vector, const int budget)
{
struct igb_ring *rx_ring = q_vector->rx.ring;
@@ -7163,6 +7270,8 @@ static int igb_clean_rx_irq(struct igb_q_vector *q_vector, const int budget)
while (likely(total_packets < budget)) {
union e1000_adv_rx_desc *rx_desc;
+ struct igb_rx_buffer *rx_buffer;
+ unsigned int size;
/* return some buffers to hardware, one at a time is too slow */
if (cleaned_count >= IGB_RX_BUFFER_WRITE) {
@@ -7171,8 +7280,8 @@ static int igb_clean_rx_irq(struct igb_q_vector *q_vector, const int budget)
}
rx_desc = IGB_RX_DESC(rx_ring, rx_ring->next_to_clean);
-
- if (!rx_desc->wb.upper.status_error)
+ size = le16_to_cpu(rx_desc->wb.upper.length);
+ if (!size)
break;
/* This memory barrier is needed to keep us from reading
@@ -7181,13 +7290,25 @@ static int igb_clean_rx_irq(struct igb_q_vector *q_vector, const int budget)
*/
dma_rmb();
+ rx_buffer = igb_get_rx_buffer(rx_ring, size);
+
/* retrieve a buffer from the ring */
- skb = igb_fetch_rx_buffer(rx_ring, rx_desc, skb);
+ if (skb)
+ igb_add_rx_frag(rx_ring, rx_buffer, skb, size);
+ else if (ring_uses_build_skb(rx_ring))
+ skb = igb_build_skb(rx_ring, rx_buffer, rx_desc, size);
+ else
+ skb = igb_construct_skb(rx_ring, rx_buffer,
+ rx_desc, size);
/* exit if we failed to retrieve a buffer */
- if (!skb)
+ if (!skb) {
+ rx_ring->rx_stats.alloc_failed++;
+ rx_buffer->pagecnt_bias++;
break;
+ }
+ igb_put_rx_buffer(rx_ring, rx_buffer);
cleaned_count++;
/* fetch next buffer in frame if non-eop */
@@ -7231,6 +7352,11 @@ static int igb_clean_rx_irq(struct igb_q_vector *q_vector, const int budget)
return total_packets;
}
+static inline unsigned int igb_rx_offset(struct igb_ring *rx_ring)
+{
+ return ring_uses_build_skb(rx_ring) ? IGB_SKB_PAD : 0;
+}
+
static bool igb_alloc_mapped_page(struct igb_ring *rx_ring,
struct igb_rx_buffer *bi)
{
@@ -7242,21 +7368,23 @@ static bool igb_alloc_mapped_page(struct igb_ring *rx_ring,
return true;
/* alloc new page for storage */
- page = dev_alloc_page();
+ page = dev_alloc_pages(igb_rx_pg_order(rx_ring));
if (unlikely(!page)) {
rx_ring->rx_stats.alloc_failed++;
return false;
}
/* map page for use */
- dma = dma_map_page_attrs(rx_ring->dev, page, 0, PAGE_SIZE,
- DMA_FROM_DEVICE, DMA_ATTR_SKIP_CPU_SYNC);
+ dma = dma_map_page_attrs(rx_ring->dev, page, 0,
+ igb_rx_pg_size(rx_ring),
+ DMA_FROM_DEVICE,
+ IGB_RX_DMA_ATTR);
/* if mapping failed free memory back to system since
* there isn't much point in holding memory we can't use
*/
if (dma_mapping_error(rx_ring->dev, dma)) {
- __free_page(page);
+ __free_pages(page, igb_rx_pg_order(rx_ring));
rx_ring->rx_stats.alloc_failed++;
return false;
@@ -7264,7 +7392,7 @@ static bool igb_alloc_mapped_page(struct igb_ring *rx_ring,
bi->dma = dma;
bi->page = page;
- bi->page_offset = 0;
+ bi->page_offset = igb_rx_offset(rx_ring);
bi->pagecnt_bias = 1;
return true;
@@ -7279,6 +7407,7 @@ void igb_alloc_rx_buffers(struct igb_ring *rx_ring, u16 cleaned_count)
union e1000_adv_rx_desc *rx_desc;
struct igb_rx_buffer *bi;
u16 i = rx_ring->next_to_use;
+ u16 bufsz;
/* nothing to do */
if (!cleaned_count)
@@ -7288,14 +7417,15 @@ void igb_alloc_rx_buffers(struct igb_ring *rx_ring, u16 cleaned_count)
bi = &rx_ring->rx_buffer_info[i];
i -= rx_ring->count;
+ bufsz = igb_rx_bufsz(rx_ring);
+
do {
if (!igb_alloc_mapped_page(rx_ring, bi))
break;
/* sync the buffer for use by the device */
dma_sync_single_range_for_device(rx_ring->dev, bi->dma,
- bi->page_offset,
- IGB_RX_BUFSZ,
+ bi->page_offset, bufsz,
DMA_FROM_DEVICE);
/* Refresh the desc even if buffer_addrs didn't change
@@ -7312,8 +7442,8 @@ void igb_alloc_rx_buffers(struct igb_ring *rx_ring, u16 cleaned_count)
i -= rx_ring->count;
}
- /* clear the status bits for the next_to_use descriptor */
- rx_desc->wb.upper.status_error = 0;
+ /* clear the length for the next_to_use descriptor */
+ rx_desc->wb.upper.length = 0;
cleaned_count--;
} while (cleaned_count);
diff --git a/drivers/net/ethernet/intel/igb/igb_ptp.c b/drivers/net/ethernet/intel/igb/igb_ptp.c
index c4477552ce9e..7a3fd4d74592 100644
--- a/drivers/net/ethernet/intel/igb/igb_ptp.c
+++ b/drivers/net/ethernet/intel/igb/igb_ptp.c
@@ -764,8 +764,7 @@ static void igb_ptp_tx_hwtstamp(struct igb_adapter *adapter)
* incoming frame. The value is stored in little endian format starting on
* byte 8.
**/
-void igb_ptp_rx_pktstamp(struct igb_q_vector *q_vector,
- unsigned char *va,
+void igb_ptp_rx_pktstamp(struct igb_q_vector *q_vector, void *va,
struct sk_buff *skb)
{
__le64 *regval = (__le64 *)va;
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
index d45477db0227..852a2e7e25ed 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
@@ -2122,7 +2122,7 @@ static struct sk_buff *ixgbe_build_skb(struct ixgbe_ring *rx_ring,
prefetch(va + L1_CACHE_BYTES);
#endif
- /* build an skb to around the page buffer */
+ /* build an skb around the page buffer */
skb = build_skb(va - IXGBE_SKB_PAD, truesize);
if (unlikely(!skb))
return NULL;
diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h
index 7bdfa7d78363..8a4a57b887fb 100644
--- a/include/net/ip_vs.h
+++ b/include/net/ip_vs.h
@@ -12,6 +12,8 @@
#include <linux/list.h> /* for struct list_head */
#include <linux/spinlock.h> /* for struct rwlock_t */
#include <linux/atomic.h> /* for struct atomic_t */
+#include <linux/refcount.h> /* for struct refcount_t */
+
#include <linux/compiler.h>
#include <linux/timer.h>
#include <linux/bug.h>
@@ -525,7 +527,7 @@ struct ip_vs_conn {
struct netns_ipvs *ipvs;
/* counter and timer */
- atomic_t refcnt; /* reference count */
+ refcount_t refcnt; /* reference count */
struct timer_list timer; /* Expiration timer */
volatile unsigned long timeout; /* timeout */
@@ -667,7 +669,7 @@ struct ip_vs_dest {
atomic_t conn_flags; /* flags to copy to conn */
atomic_t weight; /* server weight */
- atomic_t refcnt; /* reference counter */
+ refcount_t refcnt; /* reference counter */
struct ip_vs_stats stats; /* statistics */
unsigned long idle_start; /* start time, jiffies */
@@ -1211,14 +1213,14 @@ struct ip_vs_conn * ip_vs_conn_out_get_proto(struct netns_ipvs *ipvs, int af,
*/
static inline bool __ip_vs_conn_get(struct ip_vs_conn *cp)
{
- return atomic_inc_not_zero(&cp->refcnt);
+ return refcount_inc_not_zero(&cp->refcnt);
}
/* put back the conn without restarting its timer */
static inline void __ip_vs_conn_put(struct ip_vs_conn *cp)
{
smp_mb__before_atomic();
- atomic_dec(&cp->refcnt);
+ refcount_dec(&cp->refcnt);
}
void ip_vs_conn_put(struct ip_vs_conn *cp);
void ip_vs_conn_fill_cport(struct ip_vs_conn *cp, __be16 cport);
@@ -1410,18 +1412,18 @@ void ip_vs_try_bind_dest(struct ip_vs_conn *cp);
static inline void ip_vs_dest_hold(struct ip_vs_dest *dest)
{
- atomic_inc(&dest->refcnt);
+ refcount_inc(&dest->refcnt);
}
static inline void ip_vs_dest_put(struct ip_vs_dest *dest)
{
smp_mb__before_atomic();
- atomic_dec(&dest->refcnt);
+ refcount_dec(&dest->refcnt);
}
static inline void ip_vs_dest_put_and_free(struct ip_vs_dest *dest)
{
- if (atomic_dec_and_test(&dest->refcnt))
+ if (refcount_dec_and_test(&dest->refcnt))
kfree(dest);
}
diff --git a/include/net/netfilter/nf_conntrack_expect.h b/include/net/netfilter/nf_conntrack_expect.h
index 5ed33ea4718e..65cc2cb005d9 100644
--- a/include/net/netfilter/nf_conntrack_expect.h
+++ b/include/net/netfilter/nf_conntrack_expect.h
@@ -5,6 +5,8 @@
#ifndef _NF_CONNTRACK_EXPECT_H
#define _NF_CONNTRACK_EXPECT_H
+#include <linux/refcount.h>
+
#include <net/netfilter/nf_conntrack.h>
#include <net/netfilter/nf_conntrack_zones.h>
@@ -37,7 +39,7 @@ struct nf_conntrack_expect {
struct timer_list timeout;
/* Usage count. */
- atomic_t use;
+ refcount_t use;
/* Flags */
unsigned int flags;
diff --git a/include/net/netfilter/nf_conntrack_timeout.h b/include/net/netfilter/nf_conntrack_timeout.h
index 5cc5e9e6171a..d40b89355fdd 100644
--- a/include/net/netfilter/nf_conntrack_timeout.h
+++ b/include/net/netfilter/nf_conntrack_timeout.h
@@ -4,6 +4,7 @@
#include <net/net_namespace.h>
#include <linux/netfilter/nf_conntrack_common.h>
#include <linux/netfilter/nf_conntrack_tuple_common.h>
+#include <linux/refcount.h>
#include <net/netfilter/nf_conntrack.h>
#include <net/netfilter/nf_conntrack_extend.h>
@@ -12,7 +13,7 @@
struct ctnl_timeout {
struct list_head head;
struct rcu_head rcu_head;
- atomic_t refcnt;
+ refcount_t refcnt;
char name[CTNL_TIMEOUT_NAME_MAX];
__u16 l3num;
struct nf_conntrack_l4proto *l4proto;
diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index 2aa8a9d80fbe..49436849d7d7 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -385,10 +385,11 @@ static inline struct nft_set *nft_set_container_of(const void *priv)
return (void *)priv - offsetof(struct nft_set, data);
}
-struct nft_set *nf_tables_set_lookup(const struct nft_table *table,
- const struct nlattr *nla, u8 genmask);
-struct nft_set *nf_tables_set_lookup_byid(const struct net *net,
- const struct nlattr *nla, u8 genmask);
+struct nft_set *nft_set_lookup(const struct net *net,
+ const struct nft_table *table,
+ const struct nlattr *nla_set_name,
+ const struct nlattr *nla_set_id,
+ u8 genmask);
static inline unsigned long nft_set_gc_interval(const struct nft_set *set)
{
@@ -1016,7 +1017,8 @@ struct nft_object_type {
unsigned int maxattr;
struct module *owner;
const struct nla_policy *policy;
- int (*init)(const struct nlattr * const tb[],
+ int (*init)(const struct nft_ctx *ctx,
+ const struct nlattr *const tb[],
struct nft_object *obj);
void (*destroy)(struct nft_object *obj);
int (*dump)(struct sk_buff *skb,
diff --git a/include/net/netfilter/nft_fib.h b/include/net/netfilter/nft_fib.h
index 5ceb2205e4e3..381af9469e6a 100644
--- a/include/net/netfilter/nft_fib.h
+++ b/include/net/netfilter/nft_fib.h
@@ -32,6 +32,6 @@ void nft_fib6_eval_type(const struct nft_expr *expr, struct nft_regs *regs,
void nft_fib6_eval(const struct nft_expr *expr, struct nft_regs *regs,
const struct nft_pktinfo *pkt);
-void nft_fib_store_result(void *reg, enum nft_fib_result r,
+void nft_fib_store_result(void *reg, const struct nft_fib *priv,
const struct nft_pktinfo *pkt, int index);
#endif
diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h
index 05215d30fe5c..8f3842690d17 100644
--- a/include/uapi/linux/netfilter/nf_tables.h
+++ b/include/uapi/linux/netfilter/nf_tables.h
@@ -816,6 +816,17 @@ enum nft_rt_keys {
};
/**
+ * enum nft_hash_types - nf_tables hash expression types
+ *
+ * @NFT_HASH_JENKINS: Jenkins Hash
+ * @NFT_HASH_SYM: Symmetric Hash
+ */
+enum nft_hash_types {
+ NFT_HASH_JENKINS,
+ NFT_HASH_SYM,
+};
+
+/**
* enum nft_hash_attributes - nf_tables hash expression netlink attributes
*
* @NFTA_HASH_SREG: source register (NLA_U32)
@@ -824,6 +835,7 @@ enum nft_rt_keys {
* @NFTA_HASH_MODULUS: modulus value (NLA_U32)
* @NFTA_HASH_SEED: seed value (NLA_U32)
* @NFTA_HASH_OFFSET: add this offset value to hash result (NLA_U32)
+ * @NFTA_HASH_TYPE: hash operation (NLA_U32: nft_hash_types)
*/
enum nft_hash_attributes {
NFTA_HASH_UNSPEC,
@@ -833,6 +845,7 @@ enum nft_hash_attributes {
NFTA_HASH_MODULUS,
NFTA_HASH_SEED,
NFTA_HASH_OFFSET,
+ NFTA_HASH_TYPE,
__NFTA_HASH_MAX,
};
#define NFTA_HASH_MAX (__NFTA_HASH_MAX - 1)
@@ -1244,12 +1257,23 @@ enum nft_fib_flags {
NFTA_FIB_F_MARK = 1 << 2, /* use skb->mark */
NFTA_FIB_F_IIF = 1 << 3, /* restrict to iif */
NFTA_FIB_F_OIF = 1 << 4, /* restrict to oif */
+ NFTA_FIB_F_PRESENT = 1 << 5, /* check existence only */
+};
+
+enum nft_ct_helper_attributes {
+ NFTA_CT_HELPER_UNSPEC,
+ NFTA_CT_HELPER_NAME,
+ NFTA_CT_HELPER_L3PROTO,
+ NFTA_CT_HELPER_L4PROTO,
+ __NFTA_CT_HELPER_MAX,
};
+#define NFTA_CT_HELPER_MAX (__NFTA_CT_HELPER_MAX - 1)
#define NFT_OBJECT_UNSPEC 0
#define NFT_OBJECT_COUNTER 1
#define NFT_OBJECT_QUOTA 2
-#define __NFT_OBJECT_MAX 3
+#define NFT_OBJECT_CT_HELPER 3
+#define __NFT_OBJECT_MAX 4
#define NFT_OBJECT_MAX (__NFT_OBJECT_MAX - 1)
/**
diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c
index fa87fbd62bb7..d20b01b8d103 100644
--- a/net/bridge/br_netfilter_hooks.c
+++ b/net/bridge/br_netfilter_hooks.c
@@ -995,13 +995,10 @@ int br_nf_hook_thresh(unsigned int hook, struct net *net,
if (!elem)
return okfn(net, sk, skb);
- /* We may already have this, but read-locks nest anyway */
- rcu_read_lock();
nf_hook_state_init(&state, hook, NFPROTO_BRIDGE, indev, outdev,
sk, net, okfn);
ret = nf_hook_slow(skb, &state, elem);
- rcu_read_unlock();
if (ret == 1)
ret = okfn(net, sk, skb);
diff --git a/net/bridge/netfilter/ebt_log.c b/net/bridge/netfilter/ebt_log.c
index 98b9c8e8615e..707caea39743 100644
--- a/net/bridge/netfilter/ebt_log.c
+++ b/net/bridge/netfilter/ebt_log.c
@@ -62,10 +62,10 @@ print_ports(const struct sk_buff *skb, uint8_t protocol, int offset)
pptr = skb_header_pointer(skb, offset,
sizeof(_ports), &_ports);
if (pptr == NULL) {
- printk(" INCOMPLETE TCP/UDP header");
+ pr_cont(" INCOMPLETE TCP/UDP header");
return;
}
- printk(" SPT=%u DPT=%u", ntohs(pptr->src), ntohs(pptr->dst));
+ pr_cont(" SPT=%u DPT=%u", ntohs(pptr->src), ntohs(pptr->dst));
}
}
@@ -100,11 +100,11 @@ ebt_log_packet(struct net *net, u_int8_t pf, unsigned int hooknum,
ih = skb_header_pointer(skb, 0, sizeof(_iph), &_iph);
if (ih == NULL) {
- printk(" INCOMPLETE IP header");
+ pr_cont(" INCOMPLETE IP header");
goto out;
}
- printk(" IP SRC=%pI4 IP DST=%pI4, IP tos=0x%02X, IP proto=%d",
- &ih->saddr, &ih->daddr, ih->tos, ih->protocol);
+ pr_cont(" IP SRC=%pI4 IP DST=%pI4, IP tos=0x%02X, IP proto=%d",
+ &ih->saddr, &ih->daddr, ih->tos, ih->protocol);
print_ports(skb, ih->protocol, ih->ihl*4);
goto out;
}
@@ -120,11 +120,11 @@ ebt_log_packet(struct net *net, u_int8_t pf, unsigned int hooknum,
ih = skb_header_pointer(skb, 0, sizeof(_iph), &_iph);
if (ih == NULL) {
- printk(" INCOMPLETE IPv6 header");
+ pr_cont(" INCOMPLETE IPv6 header");
goto out;
}
- printk(" IPv6 SRC=%pI6 IPv6 DST=%pI6, IPv6 priority=0x%01X, Next Header=%d",
- &ih->saddr, &ih->daddr, ih->priority, ih->nexthdr);
+ pr_cont(" IPv6 SRC=%pI6 IPv6 DST=%pI6, IPv6 priority=0x%01X, Next Header=%d",
+ &ih->saddr, &ih->daddr, ih->priority, ih->nexthdr);
nexthdr = ih->nexthdr;
offset_ph = ipv6_skip_exthdr(skb, sizeof(_iph), &nexthdr, &frag_off);
if (offset_ph == -1)
@@ -142,12 +142,12 @@ ebt_log_packet(struct net *net, u_int8_t pf, unsigned int hooknum,
ah = skb_header_pointer(skb, 0, sizeof(_arph), &_arph);
if (ah == NULL) {
- printk(" INCOMPLETE ARP header");
+ pr_cont(" INCOMPLETE ARP header");
goto out;
}
- printk(" ARP HTYPE=%d, PTYPE=0x%04x, OPCODE=%d",
- ntohs(ah->ar_hrd), ntohs(ah->ar_pro),
- ntohs(ah->ar_op));
+ pr_cont(" ARP HTYPE=%d, PTYPE=0x%04x, OPCODE=%d",
+ ntohs(ah->ar_hrd), ntohs(ah->ar_pro),
+ ntohs(ah->ar_op));
/* If it's for Ethernet and the lengths are OK,
* then log the ARP payload
@@ -161,17 +161,17 @@ ebt_log_packet(struct net *net, u_int8_t pf, unsigned int hooknum,
ap = skb_header_pointer(skb, sizeof(_arph),
sizeof(_arpp), &_arpp);
if (ap == NULL) {
- printk(" INCOMPLETE ARP payload");
+ pr_cont(" INCOMPLETE ARP payload");
goto out;
}
- printk(" ARP MAC SRC=%pM ARP IP SRC=%pI4 ARP MAC DST=%pM ARP IP DST=%pI4",
- ap->mac_src, ap->ip_src, ap->mac_dst, ap->ip_dst);
+ pr_cont(" ARP MAC SRC=%pM ARP IP SRC=%pI4 ARP MAC DST=%pM ARP IP DST=%pI4",
+ ap->mac_src, ap->ip_src,
+ ap->mac_dst, ap->ip_dst);
}
}
out:
- printk("\n");
+ pr_cont("\n");
spin_unlock_bh(&ebt_log_lock);
-
}
static unsigned int
diff --git a/net/bridge/netfilter/nft_reject_bridge.c b/net/bridge/netfilter/nft_reject_bridge.c
index 206dc266ecd2..346ef6b00b8f 100644
--- a/net/bridge/netfilter/nft_reject_bridge.c
+++ b/net/bridge/netfilter/nft_reject_bridge.c
@@ -375,11 +375,7 @@ static int nft_reject_bridge_init(const struct nft_ctx *ctx,
const struct nlattr * const tb[])
{
struct nft_reject *priv = nft_expr_priv(expr);
- int icmp_code, err;
-
- err = nft_reject_bridge_validate(ctx, expr, NULL);
- if (err < 0)
- return err;
+ int icmp_code;
if (tb[NFTA_REJECT_TYPE] == NULL)
return -EINVAL;
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index 6241a81fd7f5..f17dab1dee6e 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -562,8 +562,6 @@ static int translate_table(struct xt_table_info *newinfo, void *entry0,
XT_ERROR_TARGET) == 0)
++newinfo->stacksize;
}
- if (ret != 0)
- goto out_free;
ret = -EINVAL;
if (i != repl->num_entries)
diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c
index 52f26459efc3..fcbdc0c49b0e 100644
--- a/net/ipv4/netfilter/ipt_CLUSTERIP.c
+++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c
@@ -22,6 +22,7 @@
#include <linux/icmp.h>
#include <linux/if_arp.h>
#include <linux/seq_file.h>
+#include <linux/refcount.h>
#include <linux/netfilter_arp.h>
#include <linux/netfilter/x_tables.h>
#include <linux/netfilter_ipv4/ip_tables.h>
@@ -40,8 +41,8 @@ MODULE_DESCRIPTION("Xtables: CLUSTERIP target");
struct clusterip_config {
struct list_head list; /* list of all configs */
- atomic_t refcount; /* reference count */
- atomic_t entries; /* number of entries/rules
+ refcount_t refcount; /* reference count */
+ refcount_t entries; /* number of entries/rules
* referencing us */
__be32 clusterip; /* the IP address */
@@ -77,7 +78,7 @@ struct clusterip_net {
static inline void
clusterip_config_get(struct clusterip_config *c)
{
- atomic_inc(&c->refcount);
+ refcount_inc(&c->refcount);
}
@@ -89,7 +90,7 @@ static void clusterip_config_rcu_free(struct rcu_head *head)
static inline void
clusterip_config_put(struct clusterip_config *c)
{
- if (atomic_dec_and_test(&c->refcount))
+ if (refcount_dec_and_test(&c->refcount))
call_rcu_bh(&c->rcu, clusterip_config_rcu_free);
}
@@ -103,7 +104,7 @@ clusterip_config_entry_put(struct clusterip_config *c)
struct clusterip_net *cn = net_generic(net, clusterip_net_id);
local_bh_disable();
- if (atomic_dec_and_lock(&c->entries, &cn->lock)) {
+ if (refcount_dec_and_lock(&c->entries, &cn->lock)) {
list_del_rcu(&c->list);
spin_unlock(&cn->lock);
local_bh_enable();
@@ -149,10 +150,10 @@ clusterip_config_find_get(struct net *net, __be32 clusterip, int entry)
c = NULL;
else
#endif
- if (unlikely(!atomic_inc_not_zero(&c->refcount)))
+ if (unlikely(!refcount_inc_not_zero(&c->refcount)))
c = NULL;
else if (entry)
- atomic_inc(&c->entries);
+ refcount_inc(&c->entries);
}
rcu_read_unlock_bh();
@@ -188,8 +189,8 @@ clusterip_config_init(const struct ipt_clusterip_tgt_info *i, __be32 ip,
clusterip_config_init_nodelist(c, i);
c->hash_mode = i->hash_mode;
c->hash_initval = i->hash_initval;
- atomic_set(&c->refcount, 1);
- atomic_set(&c->entries, 1);
+ refcount_set(&c->refcount, 1);
+ refcount_set(&c->entries, 1);
spin_lock_bh(&cn->lock);
if (__clusterip_config_find(net, ip)) {
diff --git a/net/ipv4/netfilter/nf_nat_snmp_basic.c b/net/ipv4/netfilter/nf_nat_snmp_basic.c
index c9b52c361da2..ef49989c93b1 100644
--- a/net/ipv4/netfilter/nf_nat_snmp_basic.c
+++ b/net/ipv4/netfilter/nf_nat_snmp_basic.c
@@ -998,18 +998,6 @@ err_id_free:
*
*****************************************************************************/
-static void hex_dump(const unsigned char *buf, size_t len)
-{
- size_t i;
-
- for (i = 0; i < len; i++) {
- if (i && !(i % 16))
- printk("\n");
- printk("%02x ", *(buf + i));
- }
- printk("\n");
-}
-
/*
* Parse and mangle SNMP message according to mapping.
* (And this is the fucking 'basic' method).
@@ -1026,7 +1014,8 @@ static int snmp_parse_mangle(unsigned char *msg,
struct snmp_object *obj;
if (debug > 1)
- hex_dump(msg, len);
+ print_hex_dump(KERN_DEBUG, "", DUMP_PREFIX_NONE, 16, 1,
+ msg, len, 0);
asn1_open(&ctx, msg, len);
diff --git a/net/ipv4/netfilter/nf_reject_ipv4.c b/net/ipv4/netfilter/nf_reject_ipv4.c
index 146d86105183..7cd8d0d918f8 100644
--- a/net/ipv4/netfilter/nf_reject_ipv4.c
+++ b/net/ipv4/netfilter/nf_reject_ipv4.c
@@ -104,7 +104,6 @@ EXPORT_SYMBOL_GPL(nf_reject_ip_tcphdr_put);
void nf_send_reset(struct net *net, struct sk_buff *oldskb, int hook)
{
struct sk_buff *nskb;
- const struct iphdr *oiph;
struct iphdr *niph;
const struct tcphdr *oth;
struct tcphdr _oth;
@@ -116,8 +115,6 @@ void nf_send_reset(struct net *net, struct sk_buff *oldskb, int hook)
if (skb_rtable(oldskb)->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST))
return;
- oiph = ip_hdr(oldskb);
-
nskb = alloc_skb(sizeof(struct iphdr) + sizeof(struct tcphdr) +
LL_MAX_HEADER, GFP_ATOMIC);
if (!nskb)
diff --git a/net/ipv4/netfilter/nft_fib_ipv4.c b/net/ipv4/netfilter/nft_fib_ipv4.c
index 2981291910dd..f4e4462cb5bb 100644
--- a/net/ipv4/netfilter/nft_fib_ipv4.c
+++ b/net/ipv4/netfilter/nft_fib_ipv4.c
@@ -90,7 +90,7 @@ void nft_fib4_eval(const struct nft_expr *expr, struct nft_regs *regs,
if (nft_hook(pkt) == NF_INET_PRE_ROUTING &&
nft_fib_is_loopback(pkt->skb, nft_in(pkt))) {
- nft_fib_store_result(dest, priv->result, pkt,
+ nft_fib_store_result(dest, priv, pkt,
nft_in(pkt)->ifindex);
return;
}
@@ -99,7 +99,7 @@ void nft_fib4_eval(const struct nft_expr *expr, struct nft_regs *regs,
if (ipv4_is_zeronet(iph->saddr)) {
if (ipv4_is_lbcast(iph->daddr) ||
ipv4_is_local_multicast(iph->daddr)) {
- nft_fib_store_result(dest, priv->result, pkt,
+ nft_fib_store_result(dest, priv, pkt,
get_ifindex(pkt->skb->dev));
return;
}
diff --git a/net/ipv6/netfilter/nft_fib_ipv6.c b/net/ipv6/netfilter/nft_fib_ipv6.c
index 765facf03d45..e8d88d82636b 100644
--- a/net/ipv6/netfilter/nft_fib_ipv6.c
+++ b/net/ipv6/netfilter/nft_fib_ipv6.c
@@ -159,7 +159,7 @@ void nft_fib6_eval(const struct nft_expr *expr, struct nft_regs *regs,
if (nft_hook(pkt) == NF_INET_PRE_ROUTING &&
nft_fib_is_loopback(pkt->skb, nft_in(pkt))) {
- nft_fib_store_result(dest, priv->result, pkt,
+ nft_fib_store_result(dest, priv, pkt,
nft_in(pkt)->ifindex);
return;
}
diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c
index e6a2753dff9e..3d2ac71a83ec 100644
--- a/net/netfilter/ipvs/ip_vs_conn.c
+++ b/net/netfilter/ipvs/ip_vs_conn.c
@@ -181,7 +181,7 @@ static inline int ip_vs_conn_hash(struct ip_vs_conn *cp)
if (!(cp->flags & IP_VS_CONN_F_HASHED)) {
cp->flags |= IP_VS_CONN_F_HASHED;
- atomic_inc(&cp->refcnt);
+ refcount_inc(&cp->refcnt);
hlist_add_head_rcu(&cp->c_list, &ip_vs_conn_tab[hash]);
ret = 1;
} else {
@@ -215,7 +215,7 @@ static inline int ip_vs_conn_unhash(struct ip_vs_conn *cp)
if (cp->flags & IP_VS_CONN_F_HASHED) {
hlist_del_rcu(&cp->c_list);
cp->flags &= ~IP_VS_CONN_F_HASHED;
- atomic_dec(&cp->refcnt);
+ refcount_dec(&cp->refcnt);
ret = 1;
} else
ret = 0;
@@ -242,13 +242,13 @@ static inline bool ip_vs_conn_unlink(struct ip_vs_conn *cp)
if (cp->flags & IP_VS_CONN_F_HASHED) {
ret = false;
/* Decrease refcnt and unlink conn only if we are last user */
- if (atomic_cmpxchg(&cp->refcnt, 1, 0) == 1) {
+ if (refcount_dec_if_one(&cp->refcnt)) {
hlist_del_rcu(&cp->c_list);
cp->flags &= ~IP_VS_CONN_F_HASHED;
ret = true;
}
} else
- ret = atomic_read(&cp->refcnt) ? false : true;
+ ret = refcount_read(&cp->refcnt) ? false : true;
spin_unlock(&cp->lock);
ct_write_unlock_bh(hash);
@@ -475,7 +475,7 @@ static void __ip_vs_conn_put_timer(struct ip_vs_conn *cp)
void ip_vs_conn_put(struct ip_vs_conn *cp)
{
if ((cp->flags & IP_VS_CONN_F_ONE_PACKET) &&
- (atomic_read(&cp->refcnt) == 1) &&
+ (refcount_read(&cp->refcnt) == 1) &&
!timer_pending(&cp->timer))
/* expire connection immediately */
__ip_vs_conn_put_notimer(cp);
@@ -617,8 +617,8 @@ ip_vs_bind_dest(struct ip_vs_conn *cp, struct ip_vs_dest *dest)
IP_VS_DBG_ADDR(cp->af, &cp->vaddr), ntohs(cp->vport),
IP_VS_DBG_ADDR(cp->daf, &cp->daddr), ntohs(cp->dport),
ip_vs_fwd_tag(cp), cp->state,
- cp->flags, atomic_read(&cp->refcnt),
- atomic_read(&dest->refcnt));
+ cp->flags, refcount_read(&cp->refcnt),
+ refcount_read(&dest->refcnt));
/* Update the connection counters */
if (!(flags & IP_VS_CONN_F_TEMPLATE)) {
@@ -714,8 +714,8 @@ static inline void ip_vs_unbind_dest(struct ip_vs_conn *cp)
IP_VS_DBG_ADDR(cp->af, &cp->vaddr), ntohs(cp->vport),
IP_VS_DBG_ADDR(cp->daf, &cp->daddr), ntohs(cp->dport),
ip_vs_fwd_tag(cp), cp->state,
- cp->flags, atomic_read(&cp->refcnt),
- atomic_read(&dest->refcnt));
+ cp->flags, refcount_read(&cp->refcnt),
+ refcount_read(&dest->refcnt));
/* Update the connection counters */
if (!(cp->flags & IP_VS_CONN_F_TEMPLATE)) {
@@ -863,10 +863,10 @@ static void ip_vs_conn_expire(unsigned long data)
expire_later:
IP_VS_DBG(7, "delayed: conn->refcnt=%d conn->n_control=%d\n",
- atomic_read(&cp->refcnt),
+ refcount_read(&cp->refcnt),
atomic_read(&cp->n_control));
- atomic_inc(&cp->refcnt);
+ refcount_inc(&cp->refcnt);
cp->timeout = 60*HZ;
if (ipvs->sync_state & IP_VS_STATE_MASTER)
@@ -941,7 +941,7 @@ ip_vs_conn_new(const struct ip_vs_conn_param *p, int dest_af,
* it in the table, so that other thread run ip_vs_random_dropentry
* but cannot drop this entry.
*/
- atomic_set(&cp->refcnt, 1);
+ refcount_set(&cp->refcnt, 1);
cp->control = NULL;
atomic_set(&cp->n_control, 0);
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index db40050f8785..b4a746d0e39b 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -542,7 +542,7 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb,
IP_VS_DBG_ADDR(cp->af, &cp->caddr), ntohs(cp->cport),
IP_VS_DBG_ADDR(cp->af, &cp->vaddr), ntohs(cp->vport),
IP_VS_DBG_ADDR(cp->daf, &cp->daddr), ntohs(cp->dport),
- cp->flags, atomic_read(&cp->refcnt));
+ cp->flags, refcount_read(&cp->refcnt));
ip_vs_conn_stats(cp, svc);
return cp;
@@ -1193,7 +1193,7 @@ struct ip_vs_conn *ip_vs_new_conn_out(struct ip_vs_service *svc,
IP_VS_DBG_ADDR(cp->af, &cp->caddr), ntohs(cp->cport),
IP_VS_DBG_ADDR(cp->af, &cp->vaddr), ntohs(cp->vport),
IP_VS_DBG_ADDR(cp->af, &cp->daddr), ntohs(cp->dport),
- cp->flags, atomic_read(&cp->refcnt));
+ cp->flags, refcount_read(&cp->refcnt));
LeaveFunction(12);
return cp;
}
@@ -2231,8 +2231,6 @@ static int __net_init __ip_vs_init(struct net *net)
if (ip_vs_sync_net_init(ipvs) < 0)
goto sync_fail;
- printk(KERN_INFO "IPVS: Creating netns size=%zu id=%d\n",
- sizeof(struct netns_ipvs), ipvs->gen);
return 0;
/*
* Error handling
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index 5aeb0dde6ccc..541aa7694775 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -699,7 +699,7 @@ ip_vs_trash_get_dest(struct ip_vs_service *svc, int dest_af,
dest->vfwmark,
IP_VS_DBG_ADDR(dest->af, &dest->addr),
ntohs(dest->port),
- atomic_read(&dest->refcnt));
+ refcount_read(&dest->refcnt));
if (dest->af == dest_af &&
ip_vs_addr_equal(dest_af, &dest->addr, daddr) &&
dest->port == dport &&
@@ -934,7 +934,7 @@ ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest,
atomic_set(&dest->activeconns, 0);
atomic_set(&dest->inactconns, 0);
atomic_set(&dest->persistconns, 0);
- atomic_set(&dest->refcnt, 1);
+ refcount_set(&dest->refcnt, 1);
INIT_HLIST_NODE(&dest->d_list);
spin_lock_init(&dest->dst_lock);
@@ -998,7 +998,7 @@ ip_vs_add_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
IP_VS_DBG_BUF(3, "Get destination %s:%u from trash, "
"dest->refcnt=%d, service %u/%s:%u\n",
IP_VS_DBG_ADDR(udest->af, &daddr), ntohs(dport),
- atomic_read(&dest->refcnt),
+ refcount_read(&dest->refcnt),
dest->vfwmark,
IP_VS_DBG_ADDR(svc->af, &dest->vaddr),
ntohs(dest->vport));
@@ -1074,7 +1074,7 @@ static void __ip_vs_del_dest(struct netns_ipvs *ipvs, struct ip_vs_dest *dest,
spin_lock_bh(&ipvs->dest_trash_lock);
IP_VS_DBG_BUF(3, "Moving dest %s:%u into trash, dest->refcnt=%d\n",
IP_VS_DBG_ADDR(dest->af, &dest->addr), ntohs(dest->port),
- atomic_read(&dest->refcnt));
+ refcount_read(&dest->refcnt));
if (list_empty(&ipvs->dest_trash) && !cleanup)
mod_timer(&ipvs->dest_trash_timer,
jiffies + (IP_VS_DEST_TRASH_PERIOD >> 1));
@@ -1157,7 +1157,7 @@ static void ip_vs_dest_trash_expire(unsigned long data)
spin_lock(&ipvs->dest_trash_lock);
list_for_each_entry_safe(dest, next, &ipvs->dest_trash, t_list) {
- if (atomic_read(&dest->refcnt) > 1)
+ if (refcount_read(&dest->refcnt) > 1)
continue;
if (dest->idle_start) {
if (time_before(now, dest->idle_start +
@@ -1545,7 +1545,7 @@ ip_vs_forget_dev(struct ip_vs_dest *dest, struct net_device *dev)
dev->name,
IP_VS_DBG_ADDR(dest->af, &dest->addr),
ntohs(dest->port),
- atomic_read(&dest->refcnt));
+ refcount_read(&dest->refcnt));
__ip_vs_dst_cache_reset(dest);
}
spin_unlock_bh(&dest->dst_lock);
diff --git a/net/netfilter/ipvs/ip_vs_lblc.c b/net/netfilter/ipvs/ip_vs_lblc.c
index 5824927cf8e0..b6aa4a970c6e 100644
--- a/net/netfilter/ipvs/ip_vs_lblc.c
+++ b/net/netfilter/ipvs/ip_vs_lblc.c
@@ -448,7 +448,7 @@ __ip_vs_lblc_schedule(struct ip_vs_service *svc)
IP_VS_DBG_ADDR(least->af, &least->addr),
ntohs(least->port),
atomic_read(&least->activeconns),
- atomic_read(&least->refcnt),
+ refcount_read(&least->refcnt),
atomic_read(&least->weight), loh);
return least;
diff --git a/net/netfilter/ipvs/ip_vs_lblcr.c b/net/netfilter/ipvs/ip_vs_lblcr.c
index 703f11877bee..c13ff575f9f7 100644
--- a/net/netfilter/ipvs/ip_vs_lblcr.c
+++ b/net/netfilter/ipvs/ip_vs_lblcr.c
@@ -204,7 +204,7 @@ static inline struct ip_vs_dest *ip_vs_dest_set_min(struct ip_vs_dest_set *set)
IP_VS_DBG_ADDR(least->af, &least->addr),
ntohs(least->port),
atomic_read(&least->activeconns),
- atomic_read(&least->refcnt),
+ refcount_read(&least->refcnt),
atomic_read(&least->weight), loh);
return least;
}
@@ -249,7 +249,7 @@ static inline struct ip_vs_dest *ip_vs_dest_set_max(struct ip_vs_dest_set *set)
__func__,
IP_VS_DBG_ADDR(most->af, &most->addr), ntohs(most->port),
atomic_read(&most->activeconns),
- atomic_read(&most->refcnt),
+ refcount_read(&most->refcnt),
atomic_read(&most->weight), moh);
return most;
}
@@ -612,7 +612,7 @@ __ip_vs_lblcr_schedule(struct ip_vs_service *svc)
IP_VS_DBG_ADDR(least->af, &least->addr),
ntohs(least->port),
atomic_read(&least->activeconns),
- atomic_read(&least->refcnt),
+ refcount_read(&least->refcnt),
atomic_read(&least->weight), loh);
return least;
diff --git a/net/netfilter/ipvs/ip_vs_nq.c b/net/netfilter/ipvs/ip_vs_nq.c
index a8b63401e773..7d9d4ac596ca 100644
--- a/net/netfilter/ipvs/ip_vs_nq.c
+++ b/net/netfilter/ipvs/ip_vs_nq.c
@@ -110,7 +110,7 @@ ip_vs_nq_schedule(struct ip_vs_service *svc, const struct sk_buff *skb,
IP_VS_DBG_ADDR(least->af, &least->addr),
ntohs(least->port),
atomic_read(&least->activeconns),
- atomic_read(&least->refcnt),
+ refcount_read(&least->refcnt),
atomic_read(&least->weight), loh);
return least;
diff --git a/net/netfilter/ipvs/ip_vs_proto_sctp.c b/net/netfilter/ipvs/ip_vs_proto_sctp.c
index d952d67f904d..56f8e4b204ff 100644
--- a/net/netfilter/ipvs/ip_vs_proto_sctp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_sctp.c
@@ -447,7 +447,7 @@ set_sctp_state(struct ip_vs_proto_data *pd, struct ip_vs_conn *cp,
ntohs(cp->cport),
sctp_state_name(cp->state),
sctp_state_name(next_state),
- atomic_read(&cp->refcnt));
+ refcount_read(&cp->refcnt));
if (dest) {
if (!(cp->flags & IP_VS_CONN_F_INACTIVE) &&
(next_state != IP_VS_SCTP_S_ESTABLISHED)) {
diff --git a/net/netfilter/ipvs/ip_vs_proto_tcp.c b/net/netfilter/ipvs/ip_vs_proto_tcp.c
index 5117bcb7d2f0..12dc8d5bc37d 100644
--- a/net/netfilter/ipvs/ip_vs_proto_tcp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_tcp.c
@@ -557,7 +557,7 @@ set_tcp_state(struct ip_vs_proto_data *pd, struct ip_vs_conn *cp,
ntohs(cp->cport),
tcp_state_name(cp->state),
tcp_state_name(new_state),
- atomic_read(&cp->refcnt));
+ refcount_read(&cp->refcnt));
if (dest) {
if (!(cp->flags & IP_VS_CONN_F_INACTIVE) &&
diff --git a/net/netfilter/ipvs/ip_vs_rr.c b/net/netfilter/ipvs/ip_vs_rr.c
index 58bacfc461ee..ee0530d14c5f 100644
--- a/net/netfilter/ipvs/ip_vs_rr.c
+++ b/net/netfilter/ipvs/ip_vs_rr.c
@@ -97,7 +97,7 @@ stop:
"activeconns %d refcnt %d weight %d\n",
IP_VS_DBG_ADDR(dest->af, &dest->addr), ntohs(dest->port),
atomic_read(&dest->activeconns),
- atomic_read(&dest->refcnt), atomic_read(&dest->weight));
+ refcount_read(&dest->refcnt), atomic_read(&dest->weight));
return dest;
}
diff --git a/net/netfilter/ipvs/ip_vs_sed.c b/net/netfilter/ipvs/ip_vs_sed.c
index f8e2d00f528b..ab23cf203437 100644
--- a/net/netfilter/ipvs/ip_vs_sed.c
+++ b/net/netfilter/ipvs/ip_vs_sed.c
@@ -111,7 +111,7 @@ ip_vs_sed_schedule(struct ip_vs_service *svc, const struct sk_buff *skb,
IP_VS_DBG_ADDR(least->af, &least->addr),
ntohs(least->port),
atomic_read(&least->activeconns),
- atomic_read(&least->refcnt),
+ refcount_read(&least->refcnt),
atomic_read(&least->weight), loh);
return least;
diff --git a/net/netfilter/ipvs/ip_vs_wlc.c b/net/netfilter/ipvs/ip_vs_wlc.c
index 6b366fd90554..6add39e0ec20 100644
--- a/net/netfilter/ipvs/ip_vs_wlc.c
+++ b/net/netfilter/ipvs/ip_vs_wlc.c
@@ -83,7 +83,7 @@ ip_vs_wlc_schedule(struct ip_vs_service *svc, const struct sk_buff *skb,
IP_VS_DBG_ADDR(least->af, &least->addr),
ntohs(least->port),
atomic_read(&least->activeconns),
- atomic_read(&least->refcnt),
+ refcount_read(&least->refcnt),
atomic_read(&least->weight), loh);
return least;
diff --git a/net/netfilter/ipvs/ip_vs_wrr.c b/net/netfilter/ipvs/ip_vs_wrr.c
index 17e6d4406ca7..62258dd457ac 100644
--- a/net/netfilter/ipvs/ip_vs_wrr.c
+++ b/net/netfilter/ipvs/ip_vs_wrr.c
@@ -218,7 +218,7 @@ found:
"activeconns %d refcnt %d weight %d\n",
IP_VS_DBG_ADDR(dest->af, &dest->addr), ntohs(dest->port),
atomic_read(&dest->activeconns),
- atomic_read(&dest->refcnt),
+ refcount_read(&dest->refcnt),
atomic_read(&dest->weight));
mark->cl = dest;
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 071b97fcbefb..b0f2e8e65084 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -1129,7 +1129,7 @@ EXPORT_SYMBOL_GPL(nf_conntrack_free);
/* Allocate a new conntrack: we return -ENOMEM if classification
failed due to stress. Otherwise it really is unclassifiable. */
-static struct nf_conntrack_tuple_hash *
+static noinline struct nf_conntrack_tuple_hash *
init_conntrack(struct net *net, struct nf_conn *tmpl,
const struct nf_conntrack_tuple *tuple,
struct nf_conntrack_l3proto *l3proto,
@@ -1237,21 +1237,20 @@ init_conntrack(struct net *net, struct nf_conn *tmpl,
return &ct->tuplehash[IP_CT_DIR_ORIGINAL];
}
-/* On success, returns conntrack ptr, sets skb->_nfct | ctinfo */
-static inline struct nf_conn *
+/* On success, returns 0, sets skb->_nfct | ctinfo */
+static int
resolve_normal_ct(struct net *net, struct nf_conn *tmpl,
struct sk_buff *skb,
unsigned int dataoff,
u_int16_t l3num,
u_int8_t protonum,
struct nf_conntrack_l3proto *l3proto,
- struct nf_conntrack_l4proto *l4proto,
- int *set_reply,
- enum ip_conntrack_info *ctinfo)
+ struct nf_conntrack_l4proto *l4proto)
{
const struct nf_conntrack_zone *zone;
struct nf_conntrack_tuple tuple;
struct nf_conntrack_tuple_hash *h;
+ enum ip_conntrack_info ctinfo;
struct nf_conntrack_zone tmp;
struct nf_conn *ct;
u32 hash;
@@ -1260,7 +1259,7 @@ resolve_normal_ct(struct net *net, struct nf_conn *tmpl,
dataoff, l3num, protonum, net, &tuple, l3proto,
l4proto)) {
pr_debug("Can't get tuple\n");
- return NULL;
+ return 0;
}
/* look for tuple match */
@@ -1271,33 +1270,30 @@ resolve_normal_ct(struct net *net, struct nf_conn *tmpl,
h = init_conntrack(net, tmpl, &tuple, l3proto, l4proto,
skb, dataoff, hash);
if (!h)
- return NULL;
+ return 0;
if (IS_ERR(h))
- return (void *)h;
+ return PTR_ERR(h);
}
ct = nf_ct_tuplehash_to_ctrack(h);
/* It exists; we have (non-exclusive) reference. */
if (NF_CT_DIRECTION(h) == IP_CT_DIR_REPLY) {
- *ctinfo = IP_CT_ESTABLISHED_REPLY;
- /* Please set reply bit if this packet OK */
- *set_reply = 1;
+ ctinfo = IP_CT_ESTABLISHED_REPLY;
} else {
/* Once we've had two way comms, always ESTABLISHED. */
if (test_bit(IPS_SEEN_REPLY_BIT, &ct->status)) {
pr_debug("normal packet for %p\n", ct);
- *ctinfo = IP_CT_ESTABLISHED;
+ ctinfo = IP_CT_ESTABLISHED;
} else if (test_bit(IPS_EXPECTED_BIT, &ct->status)) {
pr_debug("related packet for %p\n", ct);
- *ctinfo = IP_CT_RELATED;
+ ctinfo = IP_CT_RELATED;
} else {
pr_debug("new packet for %p\n", ct);
- *ctinfo = IP_CT_NEW;
+ ctinfo = IP_CT_NEW;
}
- *set_reply = 0;
}
- nf_ct_set(skb, ct, *ctinfo);
- return ct;
+ nf_ct_set(skb, ct, ctinfo);
+ return 0;
}
unsigned int
@@ -1311,7 +1307,6 @@ nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum,
unsigned int *timeouts;
unsigned int dataoff;
u_int8_t protonum;
- int set_reply = 0;
int ret;
tmpl = nf_ct_get(skb, &ctinfo);
@@ -1354,23 +1349,22 @@ nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum,
goto out;
}
repeat:
- ct = resolve_normal_ct(net, tmpl, skb, dataoff, pf, protonum,
- l3proto, l4proto, &set_reply, &ctinfo);
- if (!ct) {
- /* Not valid part of a connection */
- NF_CT_STAT_INC_ATOMIC(net, invalid);
- ret = NF_ACCEPT;
- goto out;
- }
-
- if (IS_ERR(ct)) {
+ ret = resolve_normal_ct(net, tmpl, skb, dataoff, pf, protonum,
+ l3proto, l4proto);
+ if (ret < 0) {
/* Too stressed to deal. */
NF_CT_STAT_INC_ATOMIC(net, drop);
ret = NF_DROP;
goto out;
}
- NF_CT_ASSERT(skb_nfct(skb));
+ ct = nf_ct_get(skb, &ctinfo);
+ if (!ct) {
+ /* Not valid part of a connection */
+ NF_CT_STAT_INC_ATOMIC(net, invalid);
+ ret = NF_ACCEPT;
+ goto out;
+ }
/* Decide what timeout policy we want to apply to this flow. */
timeouts = nf_ct_timeout_lookup(net, ct, l4proto);
@@ -1395,7 +1389,8 @@ repeat:
goto out;
}
- if (set_reply && !test_and_set_bit(IPS_SEEN_REPLY_BIT, &ct->status))
+ if (ctinfo == IP_CT_ESTABLISHED_REPLY &&
+ !test_and_set_bit(IPS_SEEN_REPLY_BIT, &ct->status))
nf_conntrack_event_cache(IPCT_REPLY, ct);
out:
if (tmpl)
diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c
index 4b2e1fb28bb4..cb29e598605f 100644
--- a/net/netfilter/nf_conntrack_expect.c
+++ b/net/netfilter/nf_conntrack_expect.c
@@ -133,7 +133,7 @@ nf_ct_expect_find_get(struct net *net,
rcu_read_lock();
i = __nf_ct_expect_find(net, zone, tuple);
- if (i && !atomic_inc_not_zero(&i->use))
+ if (i && !refcount_inc_not_zero(&i->use))
i = NULL;
rcu_read_unlock();
@@ -186,7 +186,7 @@ nf_ct_find_expectation(struct net *net,
return NULL;
if (exp->flags & NF_CT_EXPECT_PERMANENT) {
- atomic_inc(&exp->use);
+ refcount_inc(&exp->use);
return exp;
} else if (del_timer(&exp->timeout)) {
nf_ct_unlink_expect(exp);
@@ -275,7 +275,7 @@ struct nf_conntrack_expect *nf_ct_expect_alloc(struct nf_conn *me)
return NULL;
new->master = me;
- atomic_set(&new->use, 1);
+ refcount_set(&new->use, 1);
return new;
}
EXPORT_SYMBOL_GPL(nf_ct_expect_alloc);
@@ -348,7 +348,7 @@ static void nf_ct_expect_free_rcu(struct rcu_head *head)
void nf_ct_expect_put(struct nf_conntrack_expect *exp)
{
- if (atomic_dec_and_test(&exp->use))
+ if (refcount_dec_and_test(&exp->use))
call_rcu(&exp->rcu, nf_ct_expect_free_rcu);
}
EXPORT_SYMBOL_GPL(nf_ct_expect_put);
@@ -361,7 +361,7 @@ static void nf_ct_expect_insert(struct nf_conntrack_expect *exp)
unsigned int h = nf_ct_expect_dst_hash(net, &exp->tuple);
/* two references : one for hash insert, one for the timer */
- atomic_add(2, &exp->use);
+ refcount_add(2, &exp->use);
hlist_add_head(&exp->lnode, &master_help->expectations);
master_help->expecting[exp->class]++;
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 6806b5e73567..d49cc1e03c5b 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -2693,7 +2693,7 @@ restart:
cb->nlh->nlmsg_seq,
IPCTNL_MSG_EXP_NEW,
exp) < 0) {
- if (!atomic_inc_not_zero(&exp->use))
+ if (!refcount_inc_not_zero(&exp->use))
continue;
cb->args[1] = (unsigned long)exp;
goto out;
@@ -2739,7 +2739,7 @@ restart:
cb->nlh->nlmsg_seq,
IPCTNL_MSG_EXP_NEW,
exp) < 0) {
- if (!atomic_inc_not_zero(&exp->use))
+ if (!refcount_inc_not_zero(&exp->use))
continue;
cb->args[1] = (unsigned long)exp;
goto out;
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 5e0ccfd5bb37..12cc5218de96 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -1772,8 +1772,19 @@ static int nf_tables_newexpr(const struct nft_ctx *ctx,
goto err1;
}
+ if (ops->validate) {
+ const struct nft_data *data = NULL;
+
+ err = ops->validate(ctx, expr, &data);
+ if (err < 0)
+ goto err2;
+ }
+
return 0;
+err2:
+ if (ops->destroy)
+ ops->destroy(ctx, expr);
err1:
expr->ops = NULL;
return err;
@@ -2523,8 +2534,8 @@ static int nft_ctx_init_from_setattr(struct nft_ctx *ctx, struct net *net,
return 0;
}
-struct nft_set *nf_tables_set_lookup(const struct nft_table *table,
- const struct nlattr *nla, u8 genmask)
+static struct nft_set *nf_tables_set_lookup(const struct nft_table *table,
+ const struct nlattr *nla, u8 genmask)
{
struct nft_set *set;
@@ -2538,11 +2549,10 @@ struct nft_set *nf_tables_set_lookup(const struct nft_table *table,
}
return ERR_PTR(-ENOENT);
}
-EXPORT_SYMBOL_GPL(nf_tables_set_lookup);
-struct nft_set *nf_tables_set_lookup_byid(const struct net *net,
- const struct nlattr *nla,
- u8 genmask)
+static struct nft_set *nf_tables_set_lookup_byid(const struct net *net,
+ const struct nlattr *nla,
+ u8 genmask)
{
struct nft_trans *trans;
u32 id = ntohl(nla_get_be32(nla));
@@ -2557,7 +2567,25 @@ struct nft_set *nf_tables_set_lookup_byid(const struct net *net,
}
return ERR_PTR(-ENOENT);
}
-EXPORT_SYMBOL_GPL(nf_tables_set_lookup_byid);
+
+struct nft_set *nft_set_lookup(const struct net *net,
+ const struct nft_table *table,
+ const struct nlattr *nla_set_name,
+ const struct nlattr *nla_set_id,
+ u8 genmask)
+{
+ struct nft_set *set;
+
+ set = nf_tables_set_lookup(table, nla_set_name, genmask);
+ if (IS_ERR(set)) {
+ if (!nla_set_id)
+ return set;
+
+ set = nf_tables_set_lookup_byid(net, nla_set_id, genmask);
+ }
+ return set;
+}
+EXPORT_SYMBOL_GPL(nft_set_lookup);
static int nf_tables_set_alloc_name(struct nft_ctx *ctx, struct nft_set *set,
const char *name)
@@ -4067,7 +4095,8 @@ static const struct nla_policy nft_obj_policy[NFTA_OBJ_MAX + 1] = {
[NFTA_OBJ_DATA] = { .type = NLA_NESTED },
};
-static struct nft_object *nft_obj_init(const struct nft_object_type *type,
+static struct nft_object *nft_obj_init(const struct nft_ctx *ctx,
+ const struct nft_object_type *type,
const struct nlattr *attr)
{
struct nlattr *tb[type->maxattr + 1];
@@ -4087,7 +4116,7 @@ static struct nft_object *nft_obj_init(const struct nft_object_type *type,
if (obj == NULL)
goto err1;
- err = type->init((const struct nlattr * const *)tb, obj);
+ err = type->init(ctx, (const struct nlattr * const *)tb, obj);
if (err < 0)
goto err2;
@@ -4195,7 +4224,7 @@ static int nf_tables_newobj(struct net *net, struct sock *nlsk,
if (IS_ERR(type))
return PTR_ERR(type);
- obj = nft_obj_init(type, nla[NFTA_OBJ_DATA]);
+ obj = nft_obj_init(&ctx, type, nla[NFTA_OBJ_DATA]);
if (IS_ERR(obj)) {
err = PTR_ERR(obj);
goto err1;
diff --git a/net/netfilter/nfnetlink_acct.c b/net/netfilter/nfnetlink_acct.c
index d44d89b56127..c86da174a5fc 100644
--- a/net/netfilter/nfnetlink_acct.c
+++ b/net/netfilter/nfnetlink_acct.c
@@ -11,6 +11,7 @@
#include <linux/kernel.h>
#include <linux/skbuff.h>
#include <linux/atomic.h>
+#include <linux/refcount.h>
#include <linux/netlink.h>
#include <linux/rculist.h>
#include <linux/slab.h>
@@ -32,7 +33,7 @@ struct nf_acct {
atomic64_t bytes;
unsigned long flags;
struct list_head head;
- atomic_t refcnt;
+ refcount_t refcnt;
char name[NFACCT_NAME_MAX];
struct rcu_head rcu_head;
char data[0];
@@ -123,7 +124,7 @@ static int nfnl_acct_new(struct net *net, struct sock *nfnl,
atomic64_set(&nfacct->pkts,
be64_to_cpu(nla_get_be64(tb[NFACCT_PKTS])));
}
- atomic_set(&nfacct->refcnt, 1);
+ refcount_set(&nfacct->refcnt, 1);
list_add_tail_rcu(&nfacct->head, &net->nfnl_acct_list);
return 0;
}
@@ -166,7 +167,7 @@ nfnl_acct_fill_info(struct sk_buff *skb, u32 portid, u32 seq, u32 type,
NFACCT_PAD) ||
nla_put_be64(skb, NFACCT_BYTES, cpu_to_be64(bytes),
NFACCT_PAD) ||
- nla_put_be32(skb, NFACCT_USE, htonl(atomic_read(&acct->refcnt))))
+ nla_put_be32(skb, NFACCT_USE, htonl(refcount_read(&acct->refcnt))))
goto nla_put_failure;
if (acct->flags & NFACCT_F_QUOTA) {
u64 *quota = (u64 *)acct->data;
@@ -329,7 +330,7 @@ static int nfnl_acct_try_del(struct nf_acct *cur)
/* We want to avoid races with nfnl_acct_put. So only when the current
* refcnt is 1, we decrease it to 0.
*/
- if (atomic_cmpxchg(&cur->refcnt, 1, 0) == 1) {
+ if (refcount_dec_if_one(&cur->refcnt)) {
/* We are protected by nfnl mutex. */
list_del_rcu(&cur->head);
kfree_rcu(cur, rcu_head);
@@ -413,7 +414,7 @@ struct nf_acct *nfnl_acct_find_get(struct net *net, const char *acct_name)
if (!try_module_get(THIS_MODULE))
goto err;
- if (!atomic_inc_not_zero(&cur->refcnt)) {
+ if (!refcount_inc_not_zero(&cur->refcnt)) {
module_put(THIS_MODULE);
goto err;
}
@@ -429,7 +430,7 @@ EXPORT_SYMBOL_GPL(nfnl_acct_find_get);
void nfnl_acct_put(struct nf_acct *acct)
{
- if (atomic_dec_and_test(&acct->refcnt))
+ if (refcount_dec_and_test(&acct->refcnt))
kfree_rcu(acct, rcu_head);
module_put(THIS_MODULE);
@@ -502,7 +503,7 @@ static void __net_exit nfnl_acct_net_exit(struct net *net)
list_for_each_entry_safe(cur, tmp, &net->nfnl_acct_list, head) {
list_del_rcu(&cur->head);
- if (atomic_dec_and_test(&cur->refcnt))
+ if (refcount_dec_and_test(&cur->refcnt))
kfree_rcu(cur, rcu_head);
}
}
diff --git a/net/netfilter/nfnetlink_cttimeout.c b/net/netfilter/nfnetlink_cttimeout.c
index 139e0867e56e..baa75f3ab7e7 100644
--- a/net/netfilter/nfnetlink_cttimeout.c
+++ b/net/netfilter/nfnetlink_cttimeout.c
@@ -138,7 +138,7 @@ static int cttimeout_new_timeout(struct net *net, struct sock *ctnl,
strcpy(timeout->name, nla_data(cda[CTA_TIMEOUT_NAME]));
timeout->l3num = l3num;
timeout->l4proto = l4proto;
- atomic_set(&timeout->refcnt, 1);
+ refcount_set(&timeout->refcnt, 1);
list_add_tail_rcu(&timeout->head, &net->nfct_timeout_list);
return 0;
@@ -172,7 +172,7 @@ ctnl_timeout_fill_info(struct sk_buff *skb, u32 portid, u32 seq, u32 type,
nla_put_be16(skb, CTA_TIMEOUT_L3PROTO, htons(timeout->l3num)) ||
nla_put_u8(skb, CTA_TIMEOUT_L4PROTO, timeout->l4proto->l4proto) ||
nla_put_be32(skb, CTA_TIMEOUT_USE,
- htonl(atomic_read(&timeout->refcnt))))
+ htonl(refcount_read(&timeout->refcnt))))
goto nla_put_failure;
if (likely(l4proto->ctnl_timeout.obj_to_nlattr)) {
@@ -339,7 +339,7 @@ static int ctnl_timeout_try_del(struct net *net, struct ctnl_timeout *timeout)
/* We want to avoid races with ctnl_timeout_put. So only when the
* current refcnt is 1, we decrease it to 0.
*/
- if (atomic_cmpxchg(&timeout->refcnt, 1, 0) == 1) {
+ if (refcount_dec_if_one(&timeout->refcnt)) {
/* We are protected by nfnl mutex. */
list_del_rcu(&timeout->head);
nf_ct_l4proto_put(timeout->l4proto);
@@ -536,7 +536,7 @@ ctnl_timeout_find_get(struct net *net, const char *name)
if (!try_module_get(THIS_MODULE))
goto err;
- if (!atomic_inc_not_zero(&timeout->refcnt)) {
+ if (!refcount_inc_not_zero(&timeout->refcnt)) {
module_put(THIS_MODULE);
goto err;
}
@@ -550,7 +550,7 @@ err:
static void ctnl_timeout_put(struct ctnl_timeout *timeout)
{
- if (atomic_dec_and_test(&timeout->refcnt))
+ if (refcount_dec_and_test(&timeout->refcnt))
kfree_rcu(timeout, rcu_head);
module_put(THIS_MODULE);
@@ -601,7 +601,7 @@ static void __net_exit cttimeout_net_exit(struct net *net)
list_del_rcu(&cur->head);
nf_ct_l4proto_put(cur->l4proto);
- if (atomic_dec_and_test(&cur->refcnt))
+ if (refcount_dec_and_test(&cur->refcnt))
kfree_rcu(cur, rcu_head);
}
}
diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c
index 08247bf7d7b8..ecd857b75ffe 100644
--- a/net/netfilter/nfnetlink_log.c
+++ b/net/netfilter/nfnetlink_log.c
@@ -40,6 +40,8 @@
#include <net/netfilter/nfnetlink_log.h>
#include <linux/atomic.h>
+#include <linux/refcount.h>
+
#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
#include "../bridge/br_private.h"
@@ -57,7 +59,7 @@
struct nfulnl_instance {
struct hlist_node hlist; /* global list of instances */
spinlock_t lock;
- atomic_t use; /* use count */
+ refcount_t use; /* use count */
unsigned int qlen; /* number of nlmsgs in skb */
struct sk_buff *skb; /* pre-allocatd skb */
@@ -115,7 +117,7 @@ __instance_lookup(struct nfnl_log_net *log, u_int16_t group_num)
static inline void
instance_get(struct nfulnl_instance *inst)
{
- atomic_inc(&inst->use);
+ refcount_inc(&inst->use);
}
static struct nfulnl_instance *
@@ -125,7 +127,7 @@ instance_lookup_get(struct nfnl_log_net *log, u_int16_t group_num)
rcu_read_lock_bh();
inst = __instance_lookup(log, group_num);
- if (inst && !atomic_inc_not_zero(&inst->use))
+ if (inst && !refcount_inc_not_zero(&inst->use))
inst = NULL;
rcu_read_unlock_bh();
@@ -145,7 +147,7 @@ static void nfulnl_instance_free_rcu(struct rcu_head *head)
static void
instance_put(struct nfulnl_instance *inst)
{
- if (inst && atomic_dec_and_test(&inst->use))
+ if (inst && refcount_dec_and_test(&inst->use))
call_rcu_bh(&inst->rcu, nfulnl_instance_free_rcu);
}
@@ -180,7 +182,7 @@ instance_create(struct net *net, u_int16_t group_num,
INIT_HLIST_NODE(&inst->hlist);
spin_lock_init(&inst->lock);
/* needs to be two, since we _put() after creation */
- atomic_set(&inst->use, 2);
+ refcount_set(&inst->use, 2);
setup_timer(&inst->timer, nfulnl_timer, (unsigned long)inst);
@@ -1031,7 +1033,7 @@ static int seq_show(struct seq_file *s, void *v)
inst->group_num,
inst->peer_portid, inst->qlen,
inst->copy_mode, inst->copy_range,
- inst->flushtimeout, atomic_read(&inst->use));
+ inst->flushtimeout, refcount_read(&inst->use));
return 0;
}
diff --git a/net/netfilter/nft_compat.c b/net/netfilter/nft_compat.c
index c21e7eb8dce0..fab6bf3f955e 100644
--- a/net/netfilter/nft_compat.c
+++ b/net/netfilter/nft_compat.c
@@ -230,10 +230,6 @@ nft_target_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
union nft_entry e = {};
int ret;
- ret = nft_compat_chain_validate_dependency(target->table, ctx->chain);
- if (ret < 0)
- goto err;
-
target_compat_from_user(target, nla_data(tb[NFTA_TARGET_INFO]), info);
if (ctx->nla[NFTA_RULE_COMPAT]) {
@@ -419,10 +415,6 @@ nft_match_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
union nft_entry e = {};
int ret;
- ret = nft_compat_chain_validate_dependency(match->table, ctx->chain);
- if (ret < 0)
- goto err;
-
match_compat_from_user(match, nla_data(tb[NFTA_MATCH_INFO]), info);
if (ctx->nla[NFTA_RULE_COMPAT]) {
diff --git a/net/netfilter/nft_counter.c b/net/netfilter/nft_counter.c
index 7f8422213341..67a710ebde09 100644
--- a/net/netfilter/nft_counter.c
+++ b/net/netfilter/nft_counter.c
@@ -82,7 +82,8 @@ static int nft_counter_do_init(const struct nlattr * const tb[],
return 0;
}
-static int nft_counter_obj_init(const struct nlattr * const tb[],
+static int nft_counter_obj_init(const struct nft_ctx *ctx,
+ const struct nlattr * const tb[],
struct nft_object *obj)
{
struct nft_counter_percpu_priv *priv = nft_obj_data(obj);
diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c
index bf548a7a71ec..4144ae845bdd 100644
--- a/net/netfilter/nft_ct.c
+++ b/net/netfilter/nft_ct.c
@@ -32,6 +32,12 @@ struct nft_ct {
};
};
+struct nft_ct_helper_obj {
+ struct nf_conntrack_helper *helper4;
+ struct nf_conntrack_helper *helper6;
+ u8 l4proto;
+};
+
#ifdef CONFIG_NF_CONNTRACK_ZONES
static DEFINE_PER_CPU(struct nf_conn *, nft_ct_pcpu_template);
static unsigned int nft_ct_pcpu_template_refcnt __read_mostly;
@@ -730,6 +736,162 @@ static struct nft_expr_type nft_notrack_type __read_mostly = {
.owner = THIS_MODULE,
};
+static int nft_ct_helper_obj_init(const struct nft_ctx *ctx,
+ const struct nlattr * const tb[],
+ struct nft_object *obj)
+{
+ struct nft_ct_helper_obj *priv = nft_obj_data(obj);
+ struct nf_conntrack_helper *help4, *help6;
+ char name[NF_CT_HELPER_NAME_LEN];
+ int family = ctx->afi->family;
+
+ if (!tb[NFTA_CT_HELPER_NAME] || !tb[NFTA_CT_HELPER_L4PROTO])
+ return -EINVAL;
+
+ priv->l4proto = nla_get_u8(tb[NFTA_CT_HELPER_L4PROTO]);
+ if (!priv->l4proto)
+ return -ENOENT;
+
+ nla_strlcpy(name, tb[NFTA_CT_HELPER_NAME], sizeof(name));
+
+ if (tb[NFTA_CT_HELPER_L3PROTO])
+ family = ntohs(nla_get_be16(tb[NFTA_CT_HELPER_L3PROTO]));
+
+ help4 = NULL;
+ help6 = NULL;
+
+ switch (family) {
+ case NFPROTO_IPV4:
+ if (ctx->afi->family == NFPROTO_IPV6)
+ return -EINVAL;
+
+ help4 = nf_conntrack_helper_try_module_get(name, family,
+ priv->l4proto);
+ break;
+ case NFPROTO_IPV6:
+ if (ctx->afi->family == NFPROTO_IPV4)
+ return -EINVAL;
+
+ help6 = nf_conntrack_helper_try_module_get(name, family,
+ priv->l4proto);
+ break;
+ case NFPROTO_NETDEV: /* fallthrough */
+ case NFPROTO_BRIDGE: /* same */
+ case NFPROTO_INET:
+ help4 = nf_conntrack_helper_try_module_get(name, NFPROTO_IPV4,
+ priv->l4proto);
+ help6 = nf_conntrack_helper_try_module_get(name, NFPROTO_IPV6,
+ priv->l4proto);
+ break;
+ default:
+ return -EAFNOSUPPORT;
+ }
+
+ /* && is intentional; only error if INET found neither ipv4 or ipv6 */
+ if (!help4 && !help6)
+ return -ENOENT;
+
+ priv->helper4 = help4;
+ priv->helper6 = help6;
+
+ return 0;
+}
+
+static void nft_ct_helper_obj_destroy(struct nft_object *obj)
+{
+ struct nft_ct_helper_obj *priv = nft_obj_data(obj);
+
+ if (priv->helper4)
+ module_put(priv->helper4->me);
+ if (priv->helper6)
+ module_put(priv->helper6->me);
+}
+
+static void nft_ct_helper_obj_eval(struct nft_object *obj,
+ struct nft_regs *regs,
+ const struct nft_pktinfo *pkt)
+{
+ const struct nft_ct_helper_obj *priv = nft_obj_data(obj);
+ struct nf_conn *ct = (struct nf_conn *)skb_nfct(pkt->skb);
+ struct nf_conntrack_helper *to_assign = NULL;
+ struct nf_conn_help *help;
+
+ if (!ct ||
+ nf_ct_is_confirmed(ct) ||
+ nf_ct_is_template(ct) ||
+ priv->l4proto != nf_ct_protonum(ct))
+ return;
+
+ switch (nf_ct_l3num(ct)) {
+ case NFPROTO_IPV4:
+ to_assign = priv->helper4;
+ break;
+ case NFPROTO_IPV6:
+ to_assign = priv->helper6;
+ break;
+ default:
+ WARN_ON_ONCE(1);
+ return;
+ }
+
+ if (!to_assign)
+ return;
+
+ if (test_bit(IPS_HELPER_BIT, &ct->status))
+ return;
+
+ help = nf_ct_helper_ext_add(ct, to_assign, GFP_ATOMIC);
+ if (help) {
+ rcu_assign_pointer(help->helper, to_assign);
+ set_bit(IPS_HELPER_BIT, &ct->status);
+ }
+}
+
+static int nft_ct_helper_obj_dump(struct sk_buff *skb,
+ struct nft_object *obj, bool reset)
+{
+ const struct nft_ct_helper_obj *priv = nft_obj_data(obj);
+ const struct nf_conntrack_helper *helper = priv->helper4;
+ u16 family;
+
+ if (nla_put_string(skb, NFTA_CT_HELPER_NAME, helper->name))
+ return -1;
+
+ if (nla_put_u8(skb, NFTA_CT_HELPER_L4PROTO, priv->l4proto))
+ return -1;
+
+ if (priv->helper4 && priv->helper6)
+ family = NFPROTO_INET;
+ else if (priv->helper6)
+ family = NFPROTO_IPV6;
+ else
+ family = NFPROTO_IPV4;
+
+ if (nla_put_be16(skb, NFTA_CT_HELPER_L3PROTO, htons(family)))
+ return -1;
+
+ return 0;
+}
+
+static const struct nla_policy nft_ct_helper_policy[NFTA_CT_HELPER_MAX + 1] = {
+ [NFTA_CT_HELPER_NAME] = { .type = NLA_STRING,
+ .len = NF_CT_HELPER_NAME_LEN - 1 },
+ [NFTA_CT_HELPER_L3PROTO] = { .type = NLA_U16 },
+ [NFTA_CT_HELPER_L4PROTO] = { .type = NLA_U8 },
+};
+
+static struct nft_object_type nft_ct_helper_obj __read_mostly = {
+ .type = NFT_OBJECT_CT_HELPER,
+ .size = sizeof(struct nft_ct_helper_obj),
+ .maxattr = NFTA_CT_HELPER_MAX,
+ .policy = nft_ct_helper_policy,
+ .eval = nft_ct_helper_obj_eval,
+ .init = nft_ct_helper_obj_init,
+ .destroy = nft_ct_helper_obj_destroy,
+ .dump = nft_ct_helper_obj_dump,
+ .owner = THIS_MODULE,
+};
+
static int __init nft_ct_module_init(void)
{
int err;
@@ -744,7 +906,14 @@ static int __init nft_ct_module_init(void)
if (err < 0)
goto err1;
+ err = nft_register_obj(&nft_ct_helper_obj);
+ if (err < 0)
+ goto err2;
+
return 0;
+
+err2:
+ nft_unregister_expr(&nft_notrack_type);
err1:
nft_unregister_expr(&nft_ct_type);
return err;
@@ -752,6 +921,7 @@ err1:
static void __exit nft_ct_module_exit(void)
{
+ nft_unregister_obj(&nft_ct_helper_obj);
nft_unregister_expr(&nft_notrack_type);
nft_unregister_expr(&nft_ct_type);
}
@@ -763,3 +933,4 @@ MODULE_LICENSE("GPL");
MODULE_AUTHOR("Patrick McHardy <[email protected]>");
MODULE_ALIAS_NFT_EXPR("ct");
MODULE_ALIAS_NFT_EXPR("notrack");
+MODULE_ALIAS_NFT_OBJ(NFT_OBJECT_CT_HELPER);
diff --git a/net/netfilter/nft_dynset.c b/net/netfilter/nft_dynset.c
index 049ad2d9ee66..3948da380259 100644
--- a/net/netfilter/nft_dynset.c
+++ b/net/netfilter/nft_dynset.c
@@ -133,16 +133,10 @@ static int nft_dynset_init(const struct nft_ctx *ctx,
priv->invert = true;
}
- set = nf_tables_set_lookup(ctx->table, tb[NFTA_DYNSET_SET_NAME],
- genmask);
- if (IS_ERR(set)) {
- if (tb[NFTA_DYNSET_SET_ID])
- set = nf_tables_set_lookup_byid(ctx->net,
- tb[NFTA_DYNSET_SET_ID],
- genmask);
- if (IS_ERR(set))
- return PTR_ERR(set);
- }
+ set = nft_set_lookup(ctx->net, ctx->table, tb[NFTA_DYNSET_SET_NAME],
+ tb[NFTA_DYNSET_SET_ID], genmask);
+ if (IS_ERR(set))
+ return PTR_ERR(set);
if (set->ops->update == NULL)
return -EOPNOTSUPP;
diff --git a/net/netfilter/nft_exthdr.c b/net/netfilter/nft_exthdr.c
index c308920b194c..d212a85d2f33 100644
--- a/net/netfilter/nft_exthdr.c
+++ b/net/netfilter/nft_exthdr.c
@@ -98,14 +98,21 @@ static void nft_exthdr_tcp_eval(const struct nft_expr *expr,
goto err;
offset = i + priv->offset;
- dest[priv->len / NFT_REG32_SIZE] = 0;
- memcpy(dest, opt + offset, priv->len);
+ if (priv->flags & NFT_EXTHDR_F_PRESENT) {
+ *dest = 1;
+ } else {
+ dest[priv->len / NFT_REG32_SIZE] = 0;
+ memcpy(dest, opt + offset, priv->len);
+ }
return;
}
err:
- regs->verdict.code = NFT_BREAK;
+ if (priv->flags & NFT_EXTHDR_F_PRESENT)
+ *dest = 0;
+ else
+ regs->verdict.code = NFT_BREAK;
}
static const struct nla_policy nft_exthdr_policy[NFTA_EXTHDR_MAX + 1] = {
diff --git a/net/netfilter/nft_fib.c b/net/netfilter/nft_fib.c
index 29a4906adc27..21df8cccea65 100644
--- a/net/netfilter/nft_fib.c
+++ b/net/netfilter/nft_fib.c
@@ -24,7 +24,8 @@ const struct nla_policy nft_fib_policy[NFTA_FIB_MAX + 1] = {
EXPORT_SYMBOL(nft_fib_policy);
#define NFTA_FIB_F_ALL (NFTA_FIB_F_SADDR | NFTA_FIB_F_DADDR | \
- NFTA_FIB_F_MARK | NFTA_FIB_F_IIF | NFTA_FIB_F_OIF)
+ NFTA_FIB_F_MARK | NFTA_FIB_F_IIF | NFTA_FIB_F_OIF | \
+ NFTA_FIB_F_PRESENT)
int nft_fib_validate(const struct nft_ctx *ctx, const struct nft_expr *expr,
const struct nft_data **data)
@@ -112,7 +113,7 @@ int nft_fib_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
if (err < 0)
return err;
- return nft_fib_validate(ctx, expr, NULL);
+ return 0;
}
EXPORT_SYMBOL_GPL(nft_fib_init);
@@ -133,19 +134,22 @@ int nft_fib_dump(struct sk_buff *skb, const struct nft_expr *expr)
}
EXPORT_SYMBOL_GPL(nft_fib_dump);
-void nft_fib_store_result(void *reg, enum nft_fib_result r,
+void nft_fib_store_result(void *reg, const struct nft_fib *priv,
const struct nft_pktinfo *pkt, int index)
{
struct net_device *dev;
u32 *dreg = reg;
- switch (r) {
+ switch (priv->result) {
case NFT_FIB_RESULT_OIF:
- *dreg = index;
+ *dreg = (priv->flags & NFTA_FIB_F_PRESENT) ? !!index : index;
break;
case NFT_FIB_RESULT_OIFNAME:
dev = dev_get_by_index_rcu(nft_net(pkt), index);
- strncpy(reg, dev ? dev->name : "", IFNAMSIZ);
+ if (priv->flags & NFTA_FIB_F_PRESENT)
+ *dreg = !!dev;
+ else
+ strncpy(reg, dev ? dev->name : "", IFNAMSIZ);
break;
default:
WARN_ON_ONCE(1);
diff --git a/net/netfilter/nft_hash.c b/net/netfilter/nft_hash.c
index eb2721af898d..a6a4633725bb 100644
--- a/net/netfilter/nft_hash.c
+++ b/net/netfilter/nft_hash.c
@@ -17,7 +17,7 @@
#include <net/netfilter/nf_tables_core.h>
#include <linux/jhash.h>
-struct nft_hash {
+struct nft_jhash {
enum nft_registers sreg:8;
enum nft_registers dreg:8;
u8 len;
@@ -26,11 +26,11 @@ struct nft_hash {
u32 offset;
};
-static void nft_hash_eval(const struct nft_expr *expr,
- struct nft_regs *regs,
- const struct nft_pktinfo *pkt)
+static void nft_jhash_eval(const struct nft_expr *expr,
+ struct nft_regs *regs,
+ const struct nft_pktinfo *pkt)
{
- struct nft_hash *priv = nft_expr_priv(expr);
+ struct nft_jhash *priv = nft_expr_priv(expr);
const void *data = &regs->data[priv->sreg];
u32 h;
@@ -38,6 +38,25 @@ static void nft_hash_eval(const struct nft_expr *expr,
regs->data[priv->dreg] = h + priv->offset;
}
+struct nft_symhash {
+ enum nft_registers dreg:8;
+ u32 modulus;
+ u32 offset;
+};
+
+static void nft_symhash_eval(const struct nft_expr *expr,
+ struct nft_regs *regs,
+ const struct nft_pktinfo *pkt)
+{
+ struct nft_symhash *priv = nft_expr_priv(expr);
+ struct sk_buff *skb = pkt->skb;
+ u32 h;
+
+ h = reciprocal_scale(__skb_get_hash_symmetric(skb), priv->modulus);
+
+ regs->data[priv->dreg] = h + priv->offset;
+}
+
static const struct nla_policy nft_hash_policy[NFTA_HASH_MAX + 1] = {
[NFTA_HASH_SREG] = { .type = NLA_U32 },
[NFTA_HASH_DREG] = { .type = NLA_U32 },
@@ -45,13 +64,14 @@ static const struct nla_policy nft_hash_policy[NFTA_HASH_MAX + 1] = {
[NFTA_HASH_MODULUS] = { .type = NLA_U32 },
[NFTA_HASH_SEED] = { .type = NLA_U32 },
[NFTA_HASH_OFFSET] = { .type = NLA_U32 },
+ [NFTA_HASH_TYPE] = { .type = NLA_U32 },
};
-static int nft_hash_init(const struct nft_ctx *ctx,
- const struct nft_expr *expr,
- const struct nlattr * const tb[])
+static int nft_jhash_init(const struct nft_ctx *ctx,
+ const struct nft_expr *expr,
+ const struct nlattr * const tb[])
{
- struct nft_hash *priv = nft_expr_priv(expr);
+ struct nft_jhash *priv = nft_expr_priv(expr);
u32 len;
int err;
@@ -92,10 +112,36 @@ static int nft_hash_init(const struct nft_ctx *ctx,
NFT_DATA_VALUE, sizeof(u32));
}
-static int nft_hash_dump(struct sk_buff *skb,
- const struct nft_expr *expr)
+static int nft_symhash_init(const struct nft_ctx *ctx,
+ const struct nft_expr *expr,
+ const struct nlattr * const tb[])
{
- const struct nft_hash *priv = nft_expr_priv(expr);
+ struct nft_symhash *priv = nft_expr_priv(expr);
+
+ if (!tb[NFTA_HASH_DREG] ||
+ !tb[NFTA_HASH_MODULUS])
+ return -EINVAL;
+
+ if (tb[NFTA_HASH_OFFSET])
+ priv->offset = ntohl(nla_get_be32(tb[NFTA_HASH_OFFSET]));
+
+ priv->dreg = nft_parse_register(tb[NFTA_HASH_DREG]);
+
+ priv->modulus = ntohl(nla_get_be32(tb[NFTA_HASH_MODULUS]));
+ if (priv->modulus <= 1)
+ return -ERANGE;
+
+ if (priv->offset + priv->modulus - 1 < priv->offset)
+ return -EOVERFLOW;
+
+ return nft_validate_register_store(ctx, priv->dreg, NULL,
+ NFT_DATA_VALUE, sizeof(u32));
+}
+
+static int nft_jhash_dump(struct sk_buff *skb,
+ const struct nft_expr *expr)
+{
+ const struct nft_jhash *priv = nft_expr_priv(expr);
if (nft_dump_register(skb, NFTA_HASH_SREG, priv->sreg))
goto nla_put_failure;
@@ -110,6 +156,28 @@ static int nft_hash_dump(struct sk_buff *skb,
if (priv->offset != 0)
if (nla_put_be32(skb, NFTA_HASH_OFFSET, htonl(priv->offset)))
goto nla_put_failure;
+ if (nla_put_be32(skb, NFTA_HASH_TYPE, htonl(NFT_HASH_JENKINS)))
+ goto nla_put_failure;
+ return 0;
+
+nla_put_failure:
+ return -1;
+}
+
+static int nft_symhash_dump(struct sk_buff *skb,
+ const struct nft_expr *expr)
+{
+ const struct nft_symhash *priv = nft_expr_priv(expr);
+
+ if (nft_dump_register(skb, NFTA_HASH_DREG, priv->dreg))
+ goto nla_put_failure;
+ if (nla_put_be32(skb, NFTA_HASH_MODULUS, htonl(priv->modulus)))
+ goto nla_put_failure;
+ if (priv->offset != 0)
+ if (nla_put_be32(skb, NFTA_HASH_OFFSET, htonl(priv->offset)))
+ goto nla_put_failure;
+ if (nla_put_be32(skb, NFTA_HASH_TYPE, htonl(NFT_HASH_SYM)))
+ goto nla_put_failure;
return 0;
nla_put_failure:
@@ -117,17 +185,46 @@ nla_put_failure:
}
static struct nft_expr_type nft_hash_type;
-static const struct nft_expr_ops nft_hash_ops = {
+static const struct nft_expr_ops nft_jhash_ops = {
.type = &nft_hash_type,
- .size = NFT_EXPR_SIZE(sizeof(struct nft_hash)),
- .eval = nft_hash_eval,
- .init = nft_hash_init,
- .dump = nft_hash_dump,
+ .size = NFT_EXPR_SIZE(sizeof(struct nft_jhash)),
+ .eval = nft_jhash_eval,
+ .init = nft_jhash_init,
+ .dump = nft_jhash_dump,
};
+static const struct nft_expr_ops nft_symhash_ops = {
+ .type = &nft_hash_type,
+ .size = NFT_EXPR_SIZE(sizeof(struct nft_symhash)),
+ .eval = nft_symhash_eval,
+ .init = nft_symhash_init,
+ .dump = nft_symhash_dump,
+};
+
+static const struct nft_expr_ops *
+nft_hash_select_ops(const struct nft_ctx *ctx,
+ const struct nlattr * const tb[])
+{
+ u32 type;
+
+ if (!tb[NFTA_HASH_TYPE])
+ return &nft_jhash_ops;
+
+ type = ntohl(nla_get_be32(tb[NFTA_HASH_TYPE]));
+ switch (type) {
+ case NFT_HASH_SYM:
+ return &nft_symhash_ops;
+ case NFT_HASH_JENKINS:
+ return &nft_jhash_ops;
+ default:
+ break;
+ }
+ return ERR_PTR(-EOPNOTSUPP);
+}
+
static struct nft_expr_type nft_hash_type __read_mostly = {
.name = "hash",
- .ops = &nft_hash_ops,
+ .select_ops = &nft_hash_select_ops,
.policy = nft_hash_policy,
.maxattr = NFTA_HASH_MAX,
.owner = THIS_MODULE,
diff --git a/net/netfilter/nft_limit.c b/net/netfilter/nft_limit.c
index c6baf412236d..18dd57a52651 100644
--- a/net/netfilter/nft_limit.c
+++ b/net/netfilter/nft_limit.c
@@ -17,9 +17,8 @@
#include <linux/netfilter/nf_tables.h>
#include <net/netfilter/nf_tables.h>
-static DEFINE_SPINLOCK(limit_lock);
-
struct nft_limit {
+ spinlock_t lock;
u64 last;
u64 tokens;
u64 tokens_max;
@@ -34,7 +33,7 @@ static inline bool nft_limit_eval(struct nft_limit *limit, u64 cost)
u64 now, tokens;
s64 delta;
- spin_lock_bh(&limit_lock);
+ spin_lock_bh(&limit->lock);
now = ktime_get_ns();
tokens = limit->tokens + now - limit->last;
if (tokens > limit->tokens_max)
@@ -44,11 +43,11 @@ static inline bool nft_limit_eval(struct nft_limit *limit, u64 cost)
delta = tokens - cost;
if (delta >= 0) {
limit->tokens = delta;
- spin_unlock_bh(&limit_lock);
+ spin_unlock_bh(&limit->lock);
return limit->invert;
}
limit->tokens = tokens;
- spin_unlock_bh(&limit_lock);
+ spin_unlock_bh(&limit->lock);
return !limit->invert;
}
@@ -86,6 +85,7 @@ static int nft_limit_init(struct nft_limit *limit,
limit->invert = true;
}
limit->last = ktime_get_ns();
+ spin_lock_init(&limit->lock);
return 0;
}
diff --git a/net/netfilter/nft_lookup.c b/net/netfilter/nft_lookup.c
index e21aea7e5ec8..475570e89ede 100644
--- a/net/netfilter/nft_lookup.c
+++ b/net/netfilter/nft_lookup.c
@@ -71,16 +71,10 @@ static int nft_lookup_init(const struct nft_ctx *ctx,
tb[NFTA_LOOKUP_SREG] == NULL)
return -EINVAL;
- set = nf_tables_set_lookup(ctx->table, tb[NFTA_LOOKUP_SET], genmask);
- if (IS_ERR(set)) {
- if (tb[NFTA_LOOKUP_SET_ID]) {
- set = nf_tables_set_lookup_byid(ctx->net,
- tb[NFTA_LOOKUP_SET_ID],
- genmask);
- }
- if (IS_ERR(set))
- return PTR_ERR(set);
- }
+ set = nft_set_lookup(ctx->net, ctx->table, tb[NFTA_LOOKUP_SET],
+ tb[NFTA_LOOKUP_SET_ID], genmask);
+ if (IS_ERR(set))
+ return PTR_ERR(set);
if (set->flags & NFT_SET_EVAL)
return -EOPNOTSUPP;
diff --git a/net/netfilter/nft_masq.c b/net/netfilter/nft_masq.c
index 11ce016cd479..6ac03d4266c9 100644
--- a/net/netfilter/nft_masq.c
+++ b/net/netfilter/nft_masq.c
@@ -46,10 +46,6 @@ int nft_masq_init(const struct nft_ctx *ctx,
struct nft_masq *priv = nft_expr_priv(expr);
int err;
- err = nft_masq_validate(ctx, expr, NULL);
- if (err)
- return err;
-
if (tb[NFTA_MASQ_FLAGS]) {
priv->flags = ntohl(nla_get_be32(tb[NFTA_MASQ_FLAGS]));
if (priv->flags & ~NF_NAT_RANGE_MASK)
diff --git a/net/netfilter/nft_meta.c b/net/netfilter/nft_meta.c
index e1f5ca9b423b..d14417aaf5d4 100644
--- a/net/netfilter/nft_meta.c
+++ b/net/netfilter/nft_meta.c
@@ -370,10 +370,6 @@ int nft_meta_set_init(const struct nft_ctx *ctx,
return -EOPNOTSUPP;
}
- err = nft_meta_set_validate(ctx, expr, NULL);
- if (err < 0)
- return err;
-
priv->sreg = nft_parse_register(tb[NFTA_META_SREG]);
err = nft_validate_register_load(priv->sreg, len);
if (err < 0)
diff --git a/net/netfilter/nft_nat.c b/net/netfilter/nft_nat.c
index 19a7bf3236f9..26a74dfb3b7a 100644
--- a/net/netfilter/nft_nat.c
+++ b/net/netfilter/nft_nat.c
@@ -138,10 +138,6 @@ static int nft_nat_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
return -EINVAL;
}
- err = nft_nat_validate(ctx, expr, NULL);
- if (err < 0)
- return err;
-
if (tb[NFTA_NAT_FAMILY] == NULL)
return -EINVAL;
diff --git a/net/netfilter/nft_objref.c b/net/netfilter/nft_objref.c
index 1ae8c49ca4a1..1dd428fbaaa3 100644
--- a/net/netfilter/nft_objref.c
+++ b/net/netfilter/nft_objref.c
@@ -116,16 +116,10 @@ static int nft_objref_map_init(const struct nft_ctx *ctx,
struct nft_set *set;
int err;
- set = nf_tables_set_lookup(ctx->table, tb[NFTA_OBJREF_SET_NAME], genmask);
- if (IS_ERR(set)) {
- if (tb[NFTA_OBJREF_SET_ID]) {
- set = nf_tables_set_lookup_byid(ctx->net,
- tb[NFTA_OBJREF_SET_ID],
- genmask);
- }
- if (IS_ERR(set))
- return PTR_ERR(set);
- }
+ set = nft_set_lookup(ctx->net, ctx->table, tb[NFTA_OBJREF_SET_NAME],
+ tb[NFTA_OBJREF_SET_ID], genmask);
+ if (IS_ERR(set))
+ return PTR_ERR(set);
if (!(set->flags & NFT_SET_OBJECT))
return -EINVAL;
diff --git a/net/netfilter/nft_quota.c b/net/netfilter/nft_quota.c
index 2d6fe3559912..25e33159be57 100644
--- a/net/netfilter/nft_quota.c
+++ b/net/netfilter/nft_quota.c
@@ -99,7 +99,8 @@ static int nft_quota_do_init(const struct nlattr * const tb[],
return 0;
}
-static int nft_quota_obj_init(const struct nlattr * const tb[],
+static int nft_quota_obj_init(const struct nft_ctx *ctx,
+ const struct nlattr * const tb[],
struct nft_object *obj)
{
struct nft_quota *priv = nft_obj_data(obj);
diff --git a/net/netfilter/nft_redir.c b/net/netfilter/nft_redir.c
index 40dcd05146d5..1e66538bf0ff 100644
--- a/net/netfilter/nft_redir.c
+++ b/net/netfilter/nft_redir.c
@@ -47,10 +47,6 @@ int nft_redir_init(const struct nft_ctx *ctx,
unsigned int plen;
int err;
- err = nft_redir_validate(ctx, expr, NULL);
- if (err < 0)
- return err;
-
plen = FIELD_SIZEOF(struct nf_nat_range, min_addr.all);
if (tb[NFTA_REDIR_REG_PROTO_MIN]) {
priv->sreg_proto_min =
diff --git a/net/netfilter/nft_reject.c b/net/netfilter/nft_reject.c
index c64de3f7379d..29f5bd2377b0 100644
--- a/net/netfilter/nft_reject.c
+++ b/net/netfilter/nft_reject.c
@@ -42,11 +42,6 @@ int nft_reject_init(const struct nft_ctx *ctx,
const struct nlattr * const tb[])
{
struct nft_reject *priv = nft_expr_priv(expr);
- int err;
-
- err = nft_reject_validate(ctx, expr, NULL);
- if (err < 0)
- return err;
if (tb[NFTA_REJECT_TYPE] == NULL)
return -EINVAL;
diff --git a/net/netfilter/nft_reject_inet.c b/net/netfilter/nft_reject_inet.c
index 9e90a02cb104..5a7fb5ff867d 100644
--- a/net/netfilter/nft_reject_inet.c
+++ b/net/netfilter/nft_reject_inet.c
@@ -66,11 +66,7 @@ static int nft_reject_inet_init(const struct nft_ctx *ctx,
const struct nlattr * const tb[])
{
struct nft_reject *priv = nft_expr_priv(expr);
- int icmp_code, err;
-
- err = nft_reject_validate(ctx, expr, NULL);
- if (err < 0)
- return err;
+ int icmp_code;
if (tb[NFTA_REJECT_TYPE] == NULL)
return -EINVAL;
diff --git a/net/netfilter/nft_set_rbtree.c b/net/netfilter/nft_set_rbtree.c
index 78dfbf9588b3..e97e2fb53f0a 100644
--- a/net/netfilter/nft_set_rbtree.c
+++ b/net/netfilter/nft_set_rbtree.c
@@ -18,9 +18,8 @@
#include <linux/netfilter/nf_tables.h>
#include <net/netfilter/nf_tables.h>
-static DEFINE_SPINLOCK(nft_rbtree_lock);
-
struct nft_rbtree {
+ rwlock_t lock;
struct rb_root root;
};
@@ -44,14 +43,14 @@ static bool nft_rbtree_equal(const struct nft_set *set, const void *this,
static bool nft_rbtree_lookup(const struct net *net, const struct nft_set *set,
const u32 *key, const struct nft_set_ext **ext)
{
- const struct nft_rbtree *priv = nft_set_priv(set);
+ struct nft_rbtree *priv = nft_set_priv(set);
const struct nft_rbtree_elem *rbe, *interval = NULL;
u8 genmask = nft_genmask_cur(net);
const struct rb_node *parent;
const void *this;
int d;
- spin_lock_bh(&nft_rbtree_lock);
+ read_lock_bh(&priv->lock);
parent = priv->root.rb_node;
while (parent != NULL) {
rbe = rb_entry(parent, struct nft_rbtree_elem, node);
@@ -75,7 +74,7 @@ static bool nft_rbtree_lookup(const struct net *net, const struct nft_set *set,
}
if (nft_rbtree_interval_end(rbe))
goto out;
- spin_unlock_bh(&nft_rbtree_lock);
+ read_unlock_bh(&priv->lock);
*ext = &rbe->ext;
return true;
@@ -85,12 +84,12 @@ static bool nft_rbtree_lookup(const struct net *net, const struct nft_set *set,
if (set->flags & NFT_SET_INTERVAL && interval != NULL &&
nft_set_elem_active(&interval->ext, genmask) &&
!nft_rbtree_interval_end(interval)) {
- spin_unlock_bh(&nft_rbtree_lock);
+ read_unlock_bh(&priv->lock);
*ext = &interval->ext;
return true;
}
out:
- spin_unlock_bh(&nft_rbtree_lock);
+ read_unlock_bh(&priv->lock);
return false;
}
@@ -140,12 +139,13 @@ static int nft_rbtree_insert(const struct net *net, const struct nft_set *set,
const struct nft_set_elem *elem,
struct nft_set_ext **ext)
{
+ struct nft_rbtree *priv = nft_set_priv(set);
struct nft_rbtree_elem *rbe = elem->priv;
int err;
- spin_lock_bh(&nft_rbtree_lock);
+ write_lock_bh(&priv->lock);
err = __nft_rbtree_insert(net, set, rbe, ext);
- spin_unlock_bh(&nft_rbtree_lock);
+ write_unlock_bh(&priv->lock);
return err;
}
@@ -157,9 +157,9 @@ static void nft_rbtree_remove(const struct net *net,
struct nft_rbtree *priv = nft_set_priv(set);
struct nft_rbtree_elem *rbe = elem->priv;
- spin_lock_bh(&nft_rbtree_lock);
+ write_lock_bh(&priv->lock);
rb_erase(&rbe->node, &priv->root);
- spin_unlock_bh(&nft_rbtree_lock);
+ write_unlock_bh(&priv->lock);
}
static void nft_rbtree_activate(const struct net *net,
@@ -224,12 +224,12 @@ static void nft_rbtree_walk(const struct nft_ctx *ctx,
struct nft_set *set,
struct nft_set_iter *iter)
{
- const struct nft_rbtree *priv = nft_set_priv(set);
+ struct nft_rbtree *priv = nft_set_priv(set);
struct nft_rbtree_elem *rbe;
struct nft_set_elem elem;
struct rb_node *node;
- spin_lock_bh(&nft_rbtree_lock);
+ read_lock_bh(&priv->lock);
for (node = rb_first(&priv->root); node != NULL; node = rb_next(node)) {
rbe = rb_entry(node, struct nft_rbtree_elem, node);
@@ -242,13 +242,13 @@ static void nft_rbtree_walk(const struct nft_ctx *ctx,
iter->err = iter->fn(ctx, set, iter, &elem);
if (iter->err < 0) {
- spin_unlock_bh(&nft_rbtree_lock);
+ read_unlock_bh(&priv->lock);
return;
}
cont:
iter->count++;
}
- spin_unlock_bh(&nft_rbtree_lock);
+ read_unlock_bh(&priv->lock);
}
static unsigned int nft_rbtree_privsize(const struct nlattr * const nla[])
@@ -262,6 +262,7 @@ static int nft_rbtree_init(const struct nft_set *set,
{
struct nft_rbtree *priv = nft_set_priv(set);
+ rwlock_init(&priv->lock);
priv->root = RB_ROOT;
return 0;
}
diff --git a/net/netfilter/xt_limit.c b/net/netfilter/xt_limit.c
index dab962df1787..d27b5f1ea619 100644
--- a/net/netfilter/xt_limit.c
+++ b/net/netfilter/xt_limit.c
@@ -18,6 +18,7 @@
#include <linux/netfilter/xt_limit.h>
struct xt_limit_priv {
+ spinlock_t lock;
unsigned long prev;
uint32_t credit;
};
@@ -32,8 +33,6 @@ MODULE_ALIAS("ip6t_limit");
* see net/sched/sch_tbf.c in the linux source tree
*/
-static DEFINE_SPINLOCK(limit_lock);
-
/* Rusty: This is my (non-mathematically-inclined) understanding of
this algorithm. The `average rate' in jiffies becomes your initial
amount of credit `credit' and the most credit you can ever have
@@ -72,7 +71,7 @@ limit_mt(const struct sk_buff *skb, struct xt_action_param *par)
struct xt_limit_priv *priv = r->master;
unsigned long now = jiffies;
- spin_lock_bh(&limit_lock);
+ spin_lock_bh(&priv->lock);
priv->credit += (now - xchg(&priv->prev, now)) * CREDITS_PER_JIFFY;
if (priv->credit > r->credit_cap)
priv->credit = r->credit_cap;
@@ -80,11 +79,11 @@ limit_mt(const struct sk_buff *skb, struct xt_action_param *par)
if (priv->credit >= r->cost) {
/* We're not limited. */
priv->credit -= r->cost;
- spin_unlock_bh(&limit_lock);
+ spin_unlock_bh(&priv->lock);
return true;
}
- spin_unlock_bh(&limit_lock);
+ spin_unlock_bh(&priv->lock);
return false;
}
@@ -126,6 +125,8 @@ static int limit_mt_check(const struct xt_mtchk_param *par)
r->credit_cap = priv->credit; /* Credits full. */
r->cost = user2credits(r->avg);
}
+ spin_lock_init(&priv->lock);
+
return 0;
}