aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--arch/sparc/net/bpf_jit_comp.c9
-rw-r--r--drivers/net/dsa/bcm_sf2.c16
-rw-r--r--drivers/net/dsa/bcm_sf2.h1
-rw-r--r--drivers/net/dsa/bcm_sf2_regs.h1
-rw-r--r--drivers/net/ethernet/broadcom/bnx2x/bnx2x.h53
-rw-r--r--drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c12
-rw-r--r--drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h14
-rw-r--r--drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c2
-rw-r--r--drivers/net/ethernet/broadcom/bnx2x/bnx2x_hsi.h28
-rw-r--r--drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c204
-rw-r--r--drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.c41
-rw-r--r--drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.h23
-rw-r--r--drivers/net/ethernet/broadcom/genet/bcmgenet.c7
-rw-r--r--drivers/net/ethernet/broadcom/genet/bcmgenet.h1
-rw-r--r--drivers/net/ethernet/broadcom/genet/bcmmii.c18
-rw-r--r--drivers/net/ethernet/freescale/fec.h17
-rw-r--r--drivers/net/ethernet/freescale/fec_main.c173
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/en_netdev.c1
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/en_rx.c4
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/en_tx.c4
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/eq.c30
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/fw.c38
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/fw.h2
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/main.c61
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/mlx4.h3
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/mlx4_en.h6
-rw-r--r--drivers/net/phy/bcm7xxx.c25
-rw-r--r--drivers/net/vxlan.c105
-rw-r--r--drivers/of/of_mdio.c2
-rw-r--r--include/linux/brcmphy.h3
-rw-r--r--include/linux/mlx4/device.h11
-rw-r--r--include/linux/netdevice.h3
-rw-r--r--include/linux/skbuff.h6
-rw-r--r--include/net/dsa.h1
-rw-r--r--include/net/ip_tunnels.h19
-rw-r--r--include/net/snmp.h8
-rw-r--r--include/net/udp_tunnel.h85
-rw-r--r--include/uapi/linux/fou.h32
-rw-r--r--include/uapi/linux/if_tunnel.h16
-rw-r--r--net/core/skbuff.c78
-rw-r--r--net/core/sock.c78
-rw-r--r--net/dsa/slave.c9
-rw-r--r--net/ipv4/Kconfig10
-rw-r--r--net/ipv4/Makefile1
-rw-r--r--net/ipv4/fou.c368
-rw-r--r--net/ipv4/ip_gre.c90
-rw-r--r--net/ipv4/ip_tunnel.c91
-rw-r--r--net/ipv4/ipip.c78
-rw-r--r--net/ipv4/protocol.c1
-rw-r--r--net/ipv4/tcp_input.c13
-rw-r--r--net/ipv4/udp_offload.c5
-rw-r--r--net/ipv4/udp_tunnel.c138
-rw-r--r--net/ipv6/Makefile1
-rw-r--r--net/ipv6/ip6_udp_tunnel.c105
-rw-r--r--net/ipv6/protocol.c1
-rw-r--r--net/ipv6/sit.c107
-rw-r--r--net/l2tp/l2tp_core.c24
-rw-r--r--net/sched/cls_u32.c5
-rw-r--r--net/sched/sch_generic.c2
-rw-r--r--net/sched/sch_htb.c2
60 files changed, 1882 insertions, 410 deletions
diff --git a/arch/sparc/net/bpf_jit_comp.c b/arch/sparc/net/bpf_jit_comp.c
index b2ad9dc5425e..2dde48bdcc42 100644
--- a/arch/sparc/net/bpf_jit_comp.c
+++ b/arch/sparc/net/bpf_jit_comp.c
@@ -579,16 +579,11 @@ void bpf_jit_compile(struct bpf_prog *fp)
case BPF_ANC | SKF_AD_PROTOCOL:
emit_skb_load16(protocol, r_A);
break;
-#if 0
- /* GCC won't let us take the address of
- * a bit field even though we very much
- * know what we are doing here.
- */
case BPF_ANC | SKF_AD_PKTTYPE:
- __emit_skb_load8(pkt_type, r_A);
+ __emit_skb_load8(__pkt_type_offset, r_A);
+ emit_andi(r_A, PKT_TYPE_MAX, r_A);
emit_alu_K(SRL, 5);
break;
-#endif
case BPF_ANC | SKF_AD_IFINDEX:
emit_skb_loadptr(dev, r_A);
emit_cmpi(r_A, 0);
diff --git a/drivers/net/dsa/bcm_sf2.c b/drivers/net/dsa/bcm_sf2.c
index 02d7db320d90..a97ba2548ea5 100644
--- a/drivers/net/dsa/bcm_sf2.c
+++ b/drivers/net/dsa/bcm_sf2.c
@@ -376,6 +376,9 @@ static int bcm_sf2_sw_setup(struct dsa_switch *ds)
SWITCH_TOP_REV_MASK;
priv->hw_params.core_rev = (rev & SF2_REV_MASK);
+ rev = reg_readl(priv, REG_PHY_REVISION);
+ priv->hw_params.gphy_rev = rev & PHY_REVISION_MASK;
+
pr_info("Starfighter 2 top: %x.%02x, core: %x.%02x base: 0x%p, IRQs: %d, %d\n",
priv->hw_params.top_rev >> 8, priv->hw_params.top_rev & 0xff,
priv->hw_params.core_rev >> 8, priv->hw_params.core_rev & 0xff,
@@ -399,6 +402,18 @@ static int bcm_sf2_sw_set_addr(struct dsa_switch *ds, u8 *addr)
return 0;
}
+static u32 bcm_sf2_sw_get_phy_flags(struct dsa_switch *ds, int port)
+{
+ struct bcm_sf2_priv *priv = ds_to_priv(ds);
+
+ /* The BCM7xxx PHY driver expects to find the integrated PHY revision
+ * in bits 15:8 and the patch level in bits 7:0 which is exactly what
+ * the REG_PHY_REVISION register layout is.
+ */
+
+ return priv->hw_params.gphy_rev;
+}
+
static int bcm_sf2_sw_indir_rw(struct dsa_switch *ds, int op, int addr,
int regnum, u16 val)
{
@@ -597,6 +612,7 @@ static struct dsa_switch_driver bcm_sf2_switch_driver = {
.probe = bcm_sf2_sw_probe,
.setup = bcm_sf2_sw_setup,
.set_addr = bcm_sf2_sw_set_addr,
+ .get_phy_flags = bcm_sf2_sw_get_phy_flags,
.phy_read = bcm_sf2_sw_phy_read,
.phy_write = bcm_sf2_sw_phy_write,
.get_strings = bcm_sf2_sw_get_strings,
diff --git a/drivers/net/dsa/bcm_sf2.h b/drivers/net/dsa/bcm_sf2.h
index 260bab313e58..d3bd52dc40d2 100644
--- a/drivers/net/dsa/bcm_sf2.h
+++ b/drivers/net/dsa/bcm_sf2.h
@@ -26,6 +26,7 @@
struct bcm_sf2_hw_params {
u16 top_rev;
u16 core_rev;
+ u16 gphy_rev;
u32 num_gphy;
u8 num_acb_queue;
u8 num_rgmii;
diff --git a/drivers/net/dsa/bcm_sf2_regs.h b/drivers/net/dsa/bcm_sf2_regs.h
index 885c231b03b5..c65f138c777f 100644
--- a/drivers/net/dsa/bcm_sf2_regs.h
+++ b/drivers/net/dsa/bcm_sf2_regs.h
@@ -25,6 +25,7 @@
#define SWITCH_TOP_REV_MASK 0xffff
#define REG_PHY_REVISION 0x1C
+#define PHY_REVISION_MASK 0xffff
#define REG_SPHY_CNTRL 0x2C
#define IDDQ_BIAS (1 << 0)
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h
index 86e94517a536..c3a6072134f5 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h
@@ -1448,6 +1448,12 @@ struct bnx2x_fp_stats {
struct bnx2x_eth_q_stats_old eth_q_stats_old;
};
+enum {
+ SUB_MF_MODE_UNKNOWN = 0,
+ SUB_MF_MODE_UFP,
+ SUB_MF_MODE_NPAR1_DOT_5,
+};
+
struct bnx2x {
/* Fields used in the tx and intr/napi performance paths
* are grouped together in the beginning of the structure
@@ -1659,6 +1665,9 @@ struct bnx2x {
#define IS_MF_SI(bp) (bp->mf_mode == MULTI_FUNCTION_SI)
#define IS_MF_SD(bp) (bp->mf_mode == MULTI_FUNCTION_SD)
#define IS_MF_AFEX(bp) (bp->mf_mode == MULTI_FUNCTION_AFEX)
+ u8 mf_sub_mode;
+#define IS_MF_UFP(bp) (IS_MF_SD(bp) && \
+ bp->mf_sub_mode == SUB_MF_MODE_UFP)
u8 wol;
@@ -2361,7 +2370,7 @@ void bnx2x_igu_clear_sb_gen(struct bnx2x *bp, u8 func, u8 idu_sb_id,
#define ATTN_HARD_WIRED_MASK 0xff00
#define ATTENTION_ID 4
-#define IS_MF_STORAGE_ONLY(bp) (IS_MF_STORAGE_SD(bp) || \
+#define IS_MF_STORAGE_ONLY(bp) (IS_MF_STORAGE_PERSONALITY_ONLY(bp) || \
IS_MF_FCOE_AFEX(bp))
/* stuff added to make the code fit 80Col */
@@ -2537,14 +2546,44 @@ void bnx2x_notify_link_changed(struct bnx2x *bp);
#define IS_MF_ISCSI_SD(bp) (IS_MF_SD(bp) && BNX2X_IS_MF_SD_PROTOCOL_ISCSI(bp))
#define IS_MF_FCOE_SD(bp) (IS_MF_SD(bp) && BNX2X_IS_MF_SD_PROTOCOL_FCOE(bp))
+#define IS_MF_ISCSI_SI(bp) (IS_MF_SI(bp) && BNX2X_IS_MF_EXT_PROTOCOL_ISCSI(bp))
+
+#define IS_MF_ISCSI_ONLY(bp) (IS_MF_ISCSI_SD(bp) || IS_MF_ISCSI_SI(bp))
+
+#define BNX2X_MF_EXT_PROTOCOL_MASK \
+ (MACP_FUNC_CFG_FLAGS_ETHERNET | \
+ MACP_FUNC_CFG_FLAGS_ISCSI_OFFLOAD | \
+ MACP_FUNC_CFG_FLAGS_FCOE_OFFLOAD)
+
+#define BNX2X_MF_EXT_PROT(bp) ((bp)->mf_ext_config & \
+ BNX2X_MF_EXT_PROTOCOL_MASK)
+
+#define BNX2X_HAS_MF_EXT_PROTOCOL_FCOE(bp) \
+ (BNX2X_MF_EXT_PROT(bp) & MACP_FUNC_CFG_FLAGS_FCOE_OFFLOAD)
+
+#define BNX2X_IS_MF_EXT_PROTOCOL_FCOE(bp) \
+ (BNX2X_MF_EXT_PROT(bp) == MACP_FUNC_CFG_FLAGS_FCOE_OFFLOAD)
+
+#define BNX2X_IS_MF_EXT_PROTOCOL_ISCSI(bp) \
+ (BNX2X_MF_EXT_PROT(bp) == MACP_FUNC_CFG_FLAGS_ISCSI_OFFLOAD)
+
+#define IS_MF_FCOE_AFEX(bp) \
+ (IS_MF_AFEX(bp) && BNX2X_IS_MF_EXT_PROTOCOL_FCOE(bp))
+
+#define IS_MF_SD_STORAGE_PERSONALITY_ONLY(bp) \
+ (IS_MF_SD(bp) && \
+ (BNX2X_IS_MF_SD_PROTOCOL_ISCSI(bp) || \
+ BNX2X_IS_MF_SD_PROTOCOL_FCOE(bp)))
+
+#define IS_MF_SI_STORAGE_PERSONALITY_ONLY(bp) \
+ (IS_MF_SI(bp) && \
+ (BNX2X_IS_MF_EXT_PROTOCOL_ISCSI(bp) || \
+ BNX2X_IS_MF_EXT_PROTOCOL_FCOE(bp)))
-#define BNX2X_MF_EXT_PROTOCOL_FCOE(bp) ((bp)->mf_ext_config & \
- MACP_FUNC_CFG_FLAGS_FCOE_OFFLOAD)
+#define IS_MF_STORAGE_PERSONALITY_ONLY(bp) \
+ (IS_MF_SD_STORAGE_PERSONALITY_ONLY(bp) || \
+ IS_MF_SI_STORAGE_PERSONALITY_ONLY(bp))
-#define IS_MF_FCOE_AFEX(bp) (IS_MF_AFEX(bp) && BNX2X_MF_EXT_PROTOCOL_FCOE(bp))
-#define IS_MF_STORAGE_SD(bp) (IS_MF_SD(bp) && \
- (BNX2X_IS_MF_SD_PROTOCOL_ISCSI(bp) || \
- BNX2X_IS_MF_SD_PROTOCOL_FCOE(bp)))
#define SET_FLAG(value, mask, flag) \
do {\
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
index 6dc32aee96bf..40beef5bca88 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
@@ -1938,7 +1938,7 @@ void bnx2x_set_num_queues(struct bnx2x *bp)
bp->num_ethernet_queues = bnx2x_calc_num_queues(bp);
/* override in STORAGE SD modes */
- if (IS_MF_STORAGE_SD(bp) || IS_MF_FCOE_AFEX(bp))
+ if (IS_MF_STORAGE_ONLY(bp))
bp->num_ethernet_queues = 1;
/* Add special queues */
@@ -4231,14 +4231,13 @@ int bnx2x_change_mac_addr(struct net_device *dev, void *p)
struct bnx2x *bp = netdev_priv(dev);
int rc = 0;
- if (!bnx2x_is_valid_ether_addr(bp, addr->sa_data)) {
+ if (!is_valid_ether_addr(addr->sa_data)) {
BNX2X_ERR("Requested MAC address is not valid\n");
return -EINVAL;
}
- if ((IS_MF_STORAGE_SD(bp) || IS_MF_FCOE_AFEX(bp)) &&
- !is_zero_ether_addr(addr->sa_data)) {
- BNX2X_ERR("Can't configure non-zero address on iSCSI or FCoE functions in MF-SD mode\n");
+ if (IS_MF_STORAGE_ONLY(bp)) {
+ BNX2X_ERR("Can't change address on STORAGE ONLY function\n");
return -EINVAL;
}
@@ -4417,8 +4416,7 @@ static int bnx2x_alloc_fp_mem_at(struct bnx2x *bp, int index)
u8 cos;
int rx_ring_size = 0;
- if (!bp->rx_ring_size &&
- (IS_MF_STORAGE_SD(bp) || IS_MF_FCOE_AFEX(bp))) {
+ if (!bp->rx_ring_size && IS_MF_STORAGE_ONLY(bp)) {
rx_ring_size = MIN_RX_SIZE_NONTPA;
bp->rx_ring_size = rx_ring_size;
} else if (!bp->rx_ring_size) {
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h
index ac63e16829ef..adcacda7af7b 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h
@@ -936,6 +936,12 @@ static inline int bnx2x_func_start(struct bnx2x *bp)
start_params->gre_tunnel_type = IPGRE_TUNNEL;
start_params->inner_gre_rss_en = 1;
+ if (IS_MF_UFP(bp) && BNX2X_IS_MF_SD_PROTOCOL_FCOE(bp)) {
+ start_params->class_fail_ethtype = ETH_P_FIP;
+ start_params->class_fail = 1;
+ start_params->no_added_tags = 1;
+ }
+
return bnx2x_func_state_change(bp, &func_params);
}
@@ -1298,15 +1304,7 @@ static inline void bnx2x_update_drv_flags(struct bnx2x *bp, u32 flags, u32 set)
}
}
-static inline bool bnx2x_is_valid_ether_addr(struct bnx2x *bp, u8 *addr)
-{
- if (is_valid_ether_addr(addr) ||
- (is_zero_ether_addr(addr) &&
- (IS_MF_STORAGE_SD(bp) || IS_MF_FCOE_AFEX(bp))))
- return true;
- return false;
-}
/**
* bnx2x_fill_fw_str - Fill buffer with FW version string
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c
index 0b173ed20ae9..1edc931b1458 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c
@@ -1852,7 +1852,7 @@ static int bnx2x_set_ringparam(struct net_device *dev,
if ((ering->rx_pending > MAX_RX_AVAIL) ||
(ering->rx_pending < (bp->disable_tpa ? MIN_RX_SIZE_NONTPA :
MIN_RX_SIZE_TPA)) ||
- (ering->tx_pending > (IS_MF_FCOE_AFEX(bp) ? 0 : MAX_TX_AVAIL)) ||
+ (ering->tx_pending > (IS_MF_STORAGE_ONLY(bp) ? 0 : MAX_TX_AVAIL)) ||
(ering->tx_pending <= MAX_SKB_FRAGS + 4)) {
DP(BNX2X_MSG_ETHTOOL, "Command parameters not supported\n");
return -EINVAL;
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_hsi.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_hsi.h
index 3e0621acdf05..583591d52497 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_hsi.h
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_hsi.h
@@ -280,17 +280,11 @@ struct shared_hw_cfg { /* NVRAM Offset */
#define SHARED_HW_CFG_MDC_MDIO_ACCESS2_BOTH 0x60000000
#define SHARED_HW_CFG_MDC_MDIO_ACCESS2_SWAPPED 0x80000000
-
- u32 power_dissipated; /* 0x11c */
- #define SHARED_HW_CFG_POWER_MGNT_SCALE_MASK 0x00ff0000
- #define SHARED_HW_CFG_POWER_MGNT_SCALE_SHIFT 16
- #define SHARED_HW_CFG_POWER_MGNT_UNKNOWN_SCALE 0x00000000
- #define SHARED_HW_CFG_POWER_MGNT_DOT_1_WATT 0x00010000
- #define SHARED_HW_CFG_POWER_MGNT_DOT_01_WATT 0x00020000
- #define SHARED_HW_CFG_POWER_MGNT_DOT_001_WATT 0x00030000
-
- #define SHARED_HW_CFG_POWER_DIS_CMN_MASK 0xff000000
- #define SHARED_HW_CFG_POWER_DIS_CMN_SHIFT 24
+ u32 config_3; /* 0x11C */
+ #define SHARED_HW_CFG_EXTENDED_MF_MODE_MASK 0x00000F00
+ #define SHARED_HW_CFG_EXTENDED_MF_MODE_SHIFT 8
+ #define SHARED_HW_CFG_EXTENDED_MF_MODE_NPAR1_DOT_5 0x00000000
+ #define SHARED_HW_CFG_EXTENDED_MF_MODE_NPAR2_DOT_0 0x00000100
u32 ump_nc_si_config; /* 0x120 */
#define SHARED_HW_CFG_UMP_NC_SI_MII_MODE_MASK 0x00000003
@@ -859,6 +853,8 @@ struct shared_feat_cfg { /* NVRAM Offset */
#define SHARED_FEAT_CFG_FORCE_SF_MODE_SPIO4 0x00000200
#define SHARED_FEAT_CFG_FORCE_SF_MODE_SWITCH_INDEPT 0x00000300
#define SHARED_FEAT_CFG_FORCE_SF_MODE_AFEX_MODE 0x00000400
+ #define SHARED_FEAT_CFG_FORCE_SF_MODE_UFP_MODE 0x00000600
+ #define SHARED_FEAT_CFG_FORCE_SF_MODE_EXTENDED_MODE 0x00000700
/* The interval in seconds between sending LLDP packets. Set to zero
to disable the feature */
@@ -1268,6 +1264,10 @@ struct drv_func_mb {
#define DRV_MSG_CODE_GET_UPGRADE_KEY 0x81000000
#define DRV_MSG_CODE_GET_MANUF_KEY 0x82000000
#define DRV_MSG_CODE_LOAD_L2B_PRAM 0x90000000
+ #define DRV_MSG_CODE_OEM_OK 0x00010000
+ #define DRV_MSG_CODE_OEM_FAILURE 0x00020000
+ #define DRV_MSG_CODE_OEM_UPDATE_SVID_OK 0x00030000
+ #define DRV_MSG_CODE_OEM_UPDATE_SVID_FAILURE 0x00040000
/*
* The optic module verification command requires bootcode
* v5.0.6 or later, te specific optic module verification command
@@ -1423,6 +1423,12 @@ struct drv_func_mb {
#define DRV_STATUS_SET_MF_BW 0x00000004
#define DRV_STATUS_LINK_EVENT 0x00000008
+ #define DRV_STATUS_OEM_EVENT_MASK 0x00000070
+ #define DRV_STATUS_OEM_DISABLE_ENABLE_PF 0x00000010
+ #define DRV_STATUS_OEM_BANDWIDTH_ALLOCATION 0x00000020
+
+ #define DRV_STATUS_OEM_UPDATE_SVID 0x00000080
+
#define DRV_STATUS_DCC_EVENT_MASK 0x0000ff00
#define DRV_STATUS_DCC_DISABLE_ENABLE_PF 0x00000100
#define DRV_STATUS_DCC_BANDWIDTH_ALLOCATION 0x00000200
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
index 32e2444ab5e1..74fbf9ea7bd8 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
@@ -2905,6 +2905,57 @@ static void bnx2x_handle_afex_cmd(struct bnx2x *bp, u32 cmd)
}
}
+static void bnx2x_handle_update_svid_cmd(struct bnx2x *bp)
+{
+ struct bnx2x_func_switch_update_params *switch_update_params;
+ struct bnx2x_func_state_params func_params;
+
+ memset(&func_params, 0, sizeof(struct bnx2x_func_state_params));
+ switch_update_params = &func_params.params.switch_update;
+ func_params.f_obj = &bp->func_obj;
+ func_params.cmd = BNX2X_F_CMD_SWITCH_UPDATE;
+
+ if (IS_MF_UFP(bp)) {
+ int func = BP_ABS_FUNC(bp);
+ u32 val;
+
+ /* Re-learn the S-tag from shmem */
+ val = MF_CFG_RD(bp, func_mf_config[func].e1hov_tag) &
+ FUNC_MF_CFG_E1HOV_TAG_MASK;
+ if (val != FUNC_MF_CFG_E1HOV_TAG_DEFAULT) {
+ bp->mf_ov = val;
+ } else {
+ BNX2X_ERR("Got an SVID event, but no tag is configured in shmem\n");
+ goto fail;
+ }
+
+ /* Configure new S-tag in LLH */
+ REG_WR(bp, NIG_REG_LLH0_FUNC_VLAN_ID + BP_PORT(bp) * 8,
+ bp->mf_ov);
+
+ /* Send Ramrod to update FW of change */
+ __set_bit(BNX2X_F_UPDATE_SD_VLAN_TAG_CHNG,
+ &switch_update_params->changes);
+ switch_update_params->vlan = bp->mf_ov;
+
+ if (bnx2x_func_state_change(bp, &func_params) < 0) {
+ BNX2X_ERR("Failed to configure FW of S-tag Change to %02x\n",
+ bp->mf_ov);
+ goto fail;
+ }
+
+ DP(BNX2X_MSG_MCP, "Configured S-tag %02x\n", bp->mf_ov);
+
+ bnx2x_fw_command(bp, DRV_MSG_CODE_OEM_UPDATE_SVID_OK, 0);
+
+ return;
+ }
+
+ /* not supported by SW yet */
+fail:
+ bnx2x_fw_command(bp, DRV_MSG_CODE_OEM_UPDATE_SVID_FAILURE, 0);
+}
+
static void bnx2x_pmf_update(struct bnx2x *bp)
{
int port = BP_PORT(bp);
@@ -3297,7 +3348,8 @@ static void bnx2x_e1h_enable(struct bnx2x *bp)
{
int port = BP_PORT(bp);
- REG_WR(bp, NIG_REG_LLH0_FUNC_EN + port*8, 1);
+ if (!(IS_MF_UFP(bp) && BNX2X_IS_MF_SD_PROTOCOL_FCOE(bp)))
+ REG_WR(bp, NIG_REG_LLH0_FUNC_EN + port * 8, 1);
/* Tx queue should be only re-enabled */
netif_tx_wake_all_queues(bp->dev);
@@ -3652,14 +3704,30 @@ out:
ethver, iscsiver, fcoever);
}
-static void bnx2x_dcc_event(struct bnx2x *bp, u32 dcc_event)
+static void bnx2x_oem_event(struct bnx2x *bp, u32 event)
{
- DP(BNX2X_MSG_MCP, "dcc_event 0x%x\n", dcc_event);
+ u32 cmd_ok, cmd_fail;
- if (dcc_event & DRV_STATUS_DCC_DISABLE_ENABLE_PF) {
+ /* sanity */
+ if (event & DRV_STATUS_DCC_EVENT_MASK &&
+ event & DRV_STATUS_OEM_EVENT_MASK) {
+ BNX2X_ERR("Received simultaneous events %08x\n", event);
+ return;
+ }
- /*
- * This is the only place besides the function initialization
+ if (event & DRV_STATUS_DCC_EVENT_MASK) {
+ cmd_fail = DRV_MSG_CODE_DCC_FAILURE;
+ cmd_ok = DRV_MSG_CODE_DCC_OK;
+ } else /* if (event & DRV_STATUS_OEM_EVENT_MASK) */ {
+ cmd_fail = DRV_MSG_CODE_OEM_FAILURE;
+ cmd_ok = DRV_MSG_CODE_OEM_OK;
+ }
+
+ DP(BNX2X_MSG_MCP, "oem_event 0x%x\n", event);
+
+ if (event & (DRV_STATUS_DCC_DISABLE_ENABLE_PF |
+ DRV_STATUS_OEM_DISABLE_ENABLE_PF)) {
+ /* This is the only place besides the function initialization
* where the bp->flags can change so it is done without any
* locks
*/
@@ -3674,18 +3742,22 @@ static void bnx2x_dcc_event(struct bnx2x *bp, u32 dcc_event)
bnx2x_e1h_enable(bp);
}
- dcc_event &= ~DRV_STATUS_DCC_DISABLE_ENABLE_PF;
+ event &= ~(DRV_STATUS_DCC_DISABLE_ENABLE_PF |
+ DRV_STATUS_OEM_DISABLE_ENABLE_PF);
}
- if (dcc_event & DRV_STATUS_DCC_BANDWIDTH_ALLOCATION) {
+
+ if (event & (DRV_STATUS_DCC_BANDWIDTH_ALLOCATION |
+ DRV_STATUS_OEM_BANDWIDTH_ALLOCATION)) {
bnx2x_config_mf_bw(bp);
- dcc_event &= ~DRV_STATUS_DCC_BANDWIDTH_ALLOCATION;
+ event &= ~(DRV_STATUS_DCC_BANDWIDTH_ALLOCATION |
+ DRV_STATUS_OEM_BANDWIDTH_ALLOCATION);
}
/* Report results to MCP */
- if (dcc_event)
- bnx2x_fw_command(bp, DRV_MSG_CODE_DCC_FAILURE, 0);
+ if (event)
+ bnx2x_fw_command(bp, cmd_fail, 0);
else
- bnx2x_fw_command(bp, DRV_MSG_CODE_DCC_OK, 0);
+ bnx2x_fw_command(bp, cmd_ok, 0);
}
/* must be called under the spq lock */
@@ -4167,9 +4239,12 @@ static void bnx2x_attn_int_deasserted3(struct bnx2x *bp, u32 attn)
func_mf_config[BP_ABS_FUNC(bp)].config);
val = SHMEM_RD(bp,
func_mb[BP_FW_MB_IDX(bp)].drv_status);
- if (val & DRV_STATUS_DCC_EVENT_MASK)
- bnx2x_dcc_event(bp,
- (val & DRV_STATUS_DCC_EVENT_MASK));
+
+ if (val & (DRV_STATUS_DCC_EVENT_MASK |
+ DRV_STATUS_OEM_EVENT_MASK))
+ bnx2x_oem_event(bp,
+ (val & (DRV_STATUS_DCC_EVENT_MASK |
+ DRV_STATUS_OEM_EVENT_MASK)));
if (val & DRV_STATUS_SET_MF_BW)
bnx2x_set_mf_bw(bp);
@@ -4195,6 +4270,10 @@ static void bnx2x_attn_int_deasserted3(struct bnx2x *bp, u32 attn)
val & DRV_STATUS_AFEX_EVENT_MASK);
if (val & DRV_STATUS_EEE_NEGOTIATION_RESULTS)
bnx2x_handle_eee_event(bp);
+
+ if (val & DRV_STATUS_OEM_UPDATE_SVID)
+ bnx2x_handle_update_svid_cmd(bp);
+
if (bp->link_vars.periodic_flags &
PERIODIC_FLAGS_LINK_EVENT) {
/* sync with link */
@@ -7930,8 +8009,11 @@ static int bnx2x_init_hw_func(struct bnx2x *bp)
REG_WR(bp, CFC_REG_WEAK_ENABLE_PF, 1);
if (IS_MF(bp)) {
- REG_WR(bp, NIG_REG_LLH0_FUNC_EN + port*8, 1);
- REG_WR(bp, NIG_REG_LLH0_FUNC_VLAN_ID + port*8, bp->mf_ov);
+ if (!(IS_MF_UFP(bp) && BNX2X_IS_MF_SD_PROTOCOL_FCOE(bp))) {
+ REG_WR(bp, NIG_REG_LLH0_FUNC_EN + port * 8, 1);
+ REG_WR(bp, NIG_REG_LLH0_FUNC_VLAN_ID + port * 8,
+ bp->mf_ov);
+ }
}
bnx2x_init_block(bp, BLOCK_MISC_AEU, init_phase);
@@ -8323,13 +8405,6 @@ int bnx2x_del_all_macs(struct bnx2x *bp,
int bnx2x_set_eth_mac(struct bnx2x *bp, bool set)
{
- if (is_zero_ether_addr(bp->dev->dev_addr) &&
- (IS_MF_STORAGE_SD(bp) || IS_MF_FCOE_AFEX(bp))) {
- DP(NETIF_MSG_IFUP | NETIF_MSG_IFDOWN,
- "Ignoring Zero MAC for STORAGE SD mode\n");
- return 0;
- }
-
if (IS_PF(bp)) {
unsigned long ramrod_flags = 0;
@@ -11355,15 +11430,14 @@ static void bnx2x_get_fcoe_info(struct bnx2x *bp)
dev_info.port_hw_config[port].
fcoe_wwn_node_name_lower);
} else if (!IS_MF_SD(bp)) {
- /*
- * Read the WWN info only if the FCoE feature is enabled for
+ /* Read the WWN info only if the FCoE feature is enabled for
* this function.
*/
- if (BNX2X_MF_EXT_PROTOCOL_FCOE(bp) && !CHIP_IS_E1x(bp))
+ if (BNX2X_HAS_MF_EXT_PROTOCOL_FCOE(bp))
+ bnx2x_get_ext_wwn_info(bp, func);
+ } else {
+ if (BNX2X_IS_MF_SD_PROTOCOL_FCOE(bp) && !CHIP_IS_E1x(bp))
bnx2x_get_ext_wwn_info(bp, func);
-
- } else if (IS_MF_FCOE_SD(bp) && !CHIP_IS_E1x(bp)) {
- bnx2x_get_ext_wwn_info(bp, func);
}
BNX2X_DEV_INFO("max_fcoe_conn 0x%x\n", bp->cnic_eth_dev.max_fcoe_conn);
@@ -11401,7 +11475,7 @@ static void bnx2x_get_cnic_mac_hwinfo(struct bnx2x *bp)
* In non SD mode features configuration comes from struct
* func_ext_config.
*/
- if (!IS_MF_SD(bp) && !CHIP_IS_E1x(bp)) {
+ if (!IS_MF_SD(bp)) {
u32 cfg = MF_CFG_RD(bp, func_ext_config[func].func_cfg);
if (cfg & MACP_FUNC_CFG_FLAGS_ISCSI_OFFLOAD) {
val2 = MF_CFG_RD(bp, func_ext_config[func].
@@ -11520,7 +11594,7 @@ static void bnx2x_get_mac_hwinfo(struct bnx2x *bp)
memcpy(bp->link_params.mac_addr, bp->dev->dev_addr, ETH_ALEN);
- if (!bnx2x_is_valid_ether_addr(bp, bp->dev->dev_addr))
+ if (!is_valid_ether_addr(bp->dev->dev_addr))
dev_err(&bp->pdev->dev,
"bad Ethernet MAC address configuration: %pM\n"
"change it manually before bringing up the appropriate network interface\n",
@@ -11550,11 +11624,27 @@ static bool bnx2x_get_dropless_info(struct bnx2x *bp)
return cfg;
}
+static void validate_set_si_mode(struct bnx2x *bp)
+{
+ u8 func = BP_ABS_FUNC(bp);
+ u32 val;
+
+ val = MF_CFG_RD(bp, func_mf_config[func].mac_upper);
+
+ /* check for legal mac (upper bytes) */
+ if (val != 0xffff) {
+ bp->mf_mode = MULTI_FUNCTION_SI;
+ bp->mf_config[BP_VN(bp)] =
+ MF_CFG_RD(bp, func_mf_config[func].config);
+ } else
+ BNX2X_DEV_INFO("illegal MAC address for SI\n");
+}
+
static int bnx2x_get_hwinfo(struct bnx2x *bp)
{
int /*abs*/func = BP_ABS_FUNC(bp);
int vn;
- u32 val = 0;
+ u32 val = 0, val2 = 0;
int rc = 0;
bnx2x_get_common_hwinfo(bp);
@@ -11634,6 +11724,7 @@ static int bnx2x_get_hwinfo(struct bnx2x *bp)
bp->mf_ov = 0;
bp->mf_mode = 0;
+ bp->mf_sub_mode = 0;
vn = BP_VN(bp);
if (!CHIP_IS_E1(bp) && !BP_NOMCP(bp)) {
@@ -11663,15 +11754,7 @@ static int bnx2x_get_hwinfo(struct bnx2x *bp)
switch (val) {
case SHARED_FEAT_CFG_FORCE_SF_MODE_SWITCH_INDEPT:
- val = MF_CFG_RD(bp, func_mf_config[func].
- mac_upper);
- /* check for legal mac (upper bytes)*/
- if (val != 0xffff) {
- bp->mf_mode = MULTI_FUNCTION_SI;
- bp->mf_config[vn] = MF_CFG_RD(bp,
- func_mf_config[func].config);
- } else
- BNX2X_DEV_INFO("illegal MAC address for SI\n");
+ validate_set_si_mode(bp);
break;
case SHARED_FEAT_CFG_FORCE_SF_MODE_AFEX_MODE:
if ((!CHIP_IS_E1x(bp)) &&
@@ -11699,9 +11782,33 @@ static int bnx2x_get_hwinfo(struct bnx2x *bp)
} else
BNX2X_DEV_INFO("illegal OV for SD\n");
break;
+ case SHARED_FEAT_CFG_FORCE_SF_MODE_UFP_MODE:
+ bp->mf_mode = MULTI_FUNCTION_SD;
+ bp->mf_sub_mode = SUB_MF_MODE_UFP;
+ bp->mf_config[vn] =
+ MF_CFG_RD(bp,
+ func_mf_config[func].config);
+ break;
case SHARED_FEAT_CFG_FORCE_SF_MODE_FORCED_SF:
bp->mf_config[vn] = 0;
break;
+ case SHARED_FEAT_CFG_FORCE_SF_MODE_EXTENDED_MODE:
+ val2 = SHMEM_RD(bp,
+ dev_info.shared_hw_config.config_3);
+ val2 &= SHARED_HW_CFG_EXTENDED_MF_MODE_MASK;
+ switch (val2) {
+ case SHARED_HW_CFG_EXTENDED_MF_MODE_NPAR1_DOT_5:
+ validate_set_si_mode(bp);
+ bp->mf_sub_mode =
+ SUB_MF_MODE_NPAR1_DOT_5;
+ break;
+ default:
+ /* Unknown configuration */
+ bp->mf_config[vn] = 0;
+ BNX2X_DEV_INFO("unknown extended MF mode 0x%x\n",
+ val);
+ }
+ break;
default:
/* Unknown configuration: reset mf_config */
bp->mf_config[vn] = 0;
@@ -11722,6 +11829,11 @@ static int bnx2x_get_hwinfo(struct bnx2x *bp)
BNX2X_DEV_INFO("MF OV for func %d is %d (0x%04x)\n",
func, bp->mf_ov, bp->mf_ov);
+ } else if (bp->mf_sub_mode == SUB_MF_MODE_UFP) {
+ dev_err(&bp->pdev->dev,
+ "Unexpected - no valid MF OV for func %d in UFP mode\n",
+ func);
+ bp->path_has_ovlan = true;
} else {
dev_err(&bp->pdev->dev,
"No valid MF OV for func %d, aborting\n",
@@ -11970,7 +12082,7 @@ static int bnx2x_init_bp(struct bnx2x *bp)
dev_err(&bp->pdev->dev, "MCP disabled, must load devices in order!\n");
bp->disable_tpa = disable_tpa;
- bp->disable_tpa |= IS_MF_STORAGE_SD(bp) || IS_MF_FCOE_AFEX(bp);
+ bp->disable_tpa |= !!IS_MF_STORAGE_ONLY(bp);
/* Reduce memory usage in kdump environment by disabling TPA */
bp->disable_tpa |= is_kdump_kernel();
@@ -11990,7 +12102,7 @@ static int bnx2x_init_bp(struct bnx2x *bp)
bp->mrrs = mrrs;
- bp->tx_ring_size = IS_MF_FCOE_AFEX(bp) ? 0 : MAX_TX_AVAIL;
+ bp->tx_ring_size = IS_MF_STORAGE_ONLY(bp) ? 0 : MAX_TX_AVAIL;
if (IS_VF(bp))
bp->rx_ring_size = MAX_RX_AVAIL;
@@ -12310,7 +12422,7 @@ void bnx2x_set_rx_mode_inner(struct bnx2x *bp)
bp->rx_mode = rx_mode;
/* handle ISCSI SD mode */
- if (IS_MF_ISCSI_SD(bp))
+ if (IS_MF_ISCSI_ONLY(bp))
bp->rx_mode = BNX2X_RX_MODE_NONE;
/* Schedule the rx_mode command */
@@ -12417,7 +12529,7 @@ static int bnx2x_validate_addr(struct net_device *dev)
if (IS_VF(bp))
bnx2x_sample_bulletin(bp);
- if (!bnx2x_is_valid_ether_addr(bp, dev->dev_addr)) {
+ if (!is_valid_ether_addr(dev->dev_addr)) {
BNX2X_ERR("Non-valid Ethernet address\n");
return -EADDRNOTAVAIL;
}
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.c
index 19d0c1152434..7bc2924a7e24 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.c
@@ -5673,8 +5673,23 @@ static inline int bnx2x_func_send_start(struct bnx2x *bp,
rdata->gre_tunnel_type = start_params->gre_tunnel_type;
rdata->inner_gre_rss_en = start_params->inner_gre_rss_en;
rdata->vxlan_dst_port = cpu_to_le16(4789);
- rdata->sd_vlan_eth_type = cpu_to_le16(0x8100);
+ rdata->sd_accept_mf_clss_fail = start_params->class_fail;
+ if (start_params->class_fail_ethtype) {
+ rdata->sd_accept_mf_clss_fail_match_ethtype = 1;
+ rdata->sd_accept_mf_clss_fail_ethtype =
+ cpu_to_le16(start_params->class_fail_ethtype);
+ }
+
+ rdata->sd_vlan_force_pri_flg = start_params->sd_vlan_force_pri;
+ rdata->sd_vlan_force_pri_val = start_params->sd_vlan_force_pri_val;
+ if (start_params->sd_vlan_eth_type)
+ rdata->sd_vlan_eth_type =
+ cpu_to_le16(start_params->sd_vlan_eth_type);
+ else
+ rdata->sd_vlan_eth_type =
+ cpu_to_le16(0x8100);
+ rdata->no_added_tags = start_params->no_added_tags;
/* No need for an explicit memory barrier here as long we would
* need to ensure the ordering of writing to the SPQ element
* and updating of the SPQ producer which involves a memory
@@ -5708,6 +5723,30 @@ static inline int bnx2x_func_send_switch_update(struct bnx2x *bp,
&switch_update_params->changes);
}
+ if (test_bit(BNX2X_F_UPDATE_SD_VLAN_TAG_CHNG,
+ &switch_update_params->changes)) {
+ rdata->sd_vlan_tag_change_flg = 1;
+ rdata->sd_vlan_tag =
+ cpu_to_le16(switch_update_params->vlan);
+ }
+
+ if (test_bit(BNX2X_F_UPDATE_SD_VLAN_ETH_TYPE_CHNG,
+ &switch_update_params->changes)) {
+ rdata->sd_vlan_eth_type_change_flg = 1;
+ rdata->sd_vlan_eth_type =
+ cpu_to_le16(switch_update_params->vlan_eth_type);
+ }
+
+ if (test_bit(BNX2X_F_UPDATE_VLAN_FORCE_PRIO_CHNG,
+ &switch_update_params->changes)) {
+ rdata->sd_vlan_force_pri_change_flg = 1;
+ if (test_bit(BNX2X_F_UPDATE_VLAN_FORCE_PRIO_FLAG,
+ &switch_update_params->changes))
+ rdata->sd_vlan_force_pri_flg = 1;
+ rdata->sd_vlan_force_pri_flg =
+ switch_update_params->vlan_force_prio;
+ }
+
if (test_bit(BNX2X_F_UPDATE_TUNNEL_CFG_CHNG,
&switch_update_params->changes)) {
rdata->update_tunn_cfg_flg = 1;
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.h
index 21c8f6fb89e5..e97275f456c0 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.h
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.h
@@ -1098,6 +1098,10 @@ struct bnx2x_queue_sp_obj {
enum {
BNX2X_F_UPDATE_TX_SWITCH_SUSPEND_CHNG,
BNX2X_F_UPDATE_TX_SWITCH_SUSPEND,
+ BNX2X_F_UPDATE_SD_VLAN_TAG_CHNG,
+ BNX2X_F_UPDATE_SD_VLAN_ETH_TYPE_CHNG,
+ BNX2X_F_UPDATE_VLAN_FORCE_PRIO_CHNG,
+ BNX2X_F_UPDATE_VLAN_FORCE_PRIO_FLAG,
BNX2X_F_UPDATE_TUNNEL_CFG_CHNG,
BNX2X_F_UPDATE_TUNNEL_CLSS_EN,
BNX2X_F_UPDATE_TUNNEL_INNER_GRE_RSS_EN,
@@ -1178,10 +1182,29 @@ struct bnx2x_func_start_params {
* capailities
*/
u8 inner_gre_rss_en;
+
+ /* Allows accepting of packets failing MF classification, possibly
+ * only matching a given ethertype
+ */
+ u8 class_fail;
+ u16 class_fail_ethtype;
+
+ /* Override priority of output packets */
+ u8 sd_vlan_force_pri;
+ u8 sd_vlan_force_pri_val;
+
+ /* Replace vlan's ethertype */
+ u16 sd_vlan_eth_type;
+
+ /* Prevent inner vlans from being added by FW */
+ u8 no_added_tags;
};
struct bnx2x_func_switch_update_params {
unsigned long changes; /* BNX2X_F_UPDATE_XX bits */
+ u16 vlan;
+ u16 vlan_eth_type;
+ u8 vlan_force_prio;
u8 tunnel_mode;
u8 gre_tunnel_type;
};
diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c
index 3f9d4de8173c..a12c65604f9d 100644
--- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c
+++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c
@@ -2432,6 +2432,13 @@ static void bcmgenet_set_hw_params(struct bcmgenet_priv *priv)
dev_info(&priv->pdev->dev, "GENET " GENET_VER_FMT,
major, (reg >> 16) & 0x0f, reg & 0xffff);
+ /* Store the integrated PHY revision for the MDIO probing function
+ * to pass this information to the PHY driver. The PHY driver expects
+ * to find the PHY major revision in bits 15:8 while the GENET register
+ * stores that information in bits 7:0, account for that.
+ */
+ priv->gphy_rev = (reg & 0xffff) << 8;
+
#ifdef CONFIG_PHYS_ADDR_T_64BIT
if (!(params->flags & GENET_HAS_40BITS))
pr_warn("GENET does not support 40-bits PA\n");
diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.h b/drivers/net/ethernet/broadcom/genet/bcmgenet.h
index c862d0666771..ad95fe57ebcd 100644
--- a/drivers/net/ethernet/broadcom/genet/bcmgenet.h
+++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.h
@@ -545,6 +545,7 @@ struct bcmgenet_priv {
struct phy_device *phydev;
struct device_node *phy_dn;
struct mii_bus *mii_bus;
+ u16 gphy_rev;
/* PHY device variables */
int old_duplex;
diff --git a/drivers/net/ethernet/broadcom/genet/bcmmii.c b/drivers/net/ethernet/broadcom/genet/bcmmii.c
index c88f7ae99636..75b26cbaa7c1 100644
--- a/drivers/net/ethernet/broadcom/genet/bcmmii.c
+++ b/drivers/net/ethernet/broadcom/genet/bcmmii.c
@@ -296,7 +296,7 @@ static int bcmgenet_mii_probe(struct net_device *dev)
struct bcmgenet_priv *priv = netdev_priv(dev);
struct device_node *dn = priv->pdev->dev.of_node;
struct phy_device *phydev;
- unsigned int phy_flags;
+ u32 phy_flags;
int ret;
if (priv->phydev) {
@@ -315,8 +315,11 @@ static int bcmgenet_mii_probe(struct net_device *dev)
priv->phy_dn = of_node_get(dn);
}
- phydev = of_phy_connect(dev, priv->phy_dn, bcmgenet_mii_setup, 0,
- priv->phy_interface);
+ /* Communicate the integrated PHY revision */
+ phy_flags = priv->gphy_rev;
+
+ phydev = of_phy_connect(dev, priv->phy_dn, bcmgenet_mii_setup,
+ phy_flags, priv->phy_interface);
if (!phydev) {
pr_err("could not attach to PHY\n");
return -ENODEV;
@@ -338,15 +341,6 @@ static int bcmgenet_mii_probe(struct net_device *dev)
return ret;
}
- phy_flags = PHY_BRCM_100MBPS_WAR;
-
- /* workarounds are only needed for 100Mpbs PHYs, and
- * never on GENET V1 hardware
- */
- if ((phydev->supported & PHY_GBIT_FEATURES) || GENET_IS_V1(priv))
- phy_flags = 0;
-
- phydev->dev_flags |= phy_flags;
phydev->advertising = phydev->supported;
/* The internal PHY has its link interrupts routed to the
diff --git a/drivers/net/ethernet/freescale/fec.h b/drivers/net/ethernet/freescale/fec.h
index e1bcb4fc244a..26fb1de98826 100644
--- a/drivers/net/ethernet/freescale/fec.h
+++ b/drivers/net/ethernet/freescale/fec.h
@@ -316,7 +316,7 @@ struct bufdesc_ex {
* the skbuffer directly.
*/
-#define FEC_ENET_RX_PAGES 8
+#define FEC_ENET_RX_PAGES 256
#define FEC_ENET_RX_FRSIZE 2048
#define FEC_ENET_RX_FRPPG (PAGE_SIZE / FEC_ENET_RX_FRSIZE)
#define RX_RING_SIZE (FEC_ENET_RX_FRPPG * FEC_ENET_RX_PAGES)
@@ -355,6 +355,14 @@ struct bufdesc_ex {
#define FEC_DEFAULT_IMASK (FEC_ENET_TXF | FEC_ENET_RXF | FEC_ENET_MII | FEC_ENET_TS_TIMER)
#define FEC_RX_DISABLED_IMASK (FEC_DEFAULT_IMASK & (~FEC_ENET_RXF))
+/* ENET interrupt coalescing macro define */
+#define FEC_ITR_CLK_SEL (0x1 << 30)
+#define FEC_ITR_EN (0x1 << 31)
+#define FEC_ITR_ICFT(X) ((X & 0xFF) << 20)
+#define FEC_ITR_ICTT(X) ((X) & 0xFFFF)
+#define FEC_ITR_ICFT_DEFAULT 200 /* Set 200 frame count threshold */
+#define FEC_ITR_ICTT_DEFAULT 1000 /* Set 1000us timer threshold */
+
#define FEC_VLAN_TAG_LEN 0x04
#define FEC_ETHTYPE_LEN 0x02
@@ -466,6 +474,13 @@ struct fec_enet_private {
unsigned int tx_align;
unsigned int rx_align;
+
+ /* hw interrupt coalesce */
+ unsigned int rx_pkts_itr;
+ unsigned int rx_time_itr;
+ unsigned int tx_pkts_itr;
+ unsigned int tx_time_itr;
+ unsigned int itr_clk_rate;
};
void fec_ptp_init(struct platform_device *pdev);
diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c
index 6e93336d1d20..09d7f5f667ae 100644
--- a/drivers/net/ethernet/freescale/fec_main.c
+++ b/drivers/net/ethernet/freescale/fec_main.c
@@ -63,6 +63,7 @@
#include "fec.h"
static void set_multicast_list(struct net_device *ndev);
+static void fec_enet_itr_coal_init(struct net_device *ndev);
#define DRIVER_NAME "fec"
@@ -110,6 +111,12 @@ static void set_multicast_list(struct net_device *ndev);
* independent rings
*/
#define FEC_QUIRK_HAS_AVB (1 << 8)
+/* There is a TDAR race condition for mutliQ when the software sets TDAR
+ * and the UDMA clears TDAR simultaneously or in a small window (2-4 cycles).
+ * This will cause the udma_tx and udma_tx_arbiter state machines to hang.
+ * The issue exist at i.MX6SX enet IP.
+ */
+#define FEC_QUIRK_ERR007885 (1 << 9)
static struct platform_device_id fec_devtype[] = {
{
@@ -138,7 +145,7 @@ static struct platform_device_id fec_devtype[] = {
.driver_data = FEC_QUIRK_ENET_MAC | FEC_QUIRK_HAS_GBIT |
FEC_QUIRK_HAS_BUFDESC_EX | FEC_QUIRK_HAS_CSUM |
FEC_QUIRK_HAS_VLAN | FEC_QUIRK_ERR006358 |
- FEC_QUIRK_HAS_AVB,
+ FEC_QUIRK_HAS_AVB | FEC_QUIRK_ERR007885,
}, {
/* sentinel */
}
@@ -708,6 +715,8 @@ static int fec_enet_txq_submit_tso(struct fec_enet_priv_tx_q *txq,
struct tso_t tso;
unsigned int index = 0;
int ret;
+ const struct platform_device_id *id_entry =
+ platform_get_device_id(fep->pdev);
if (tso_count_descs(skb) >= fec_enet_get_free_txdesc_num(fep, txq)) {
dev_kfree_skb_any(skb);
@@ -769,7 +778,12 @@ static int fec_enet_txq_submit_tso(struct fec_enet_priv_tx_q *txq,
txq->cur_tx = bdp;
/* Trigger transmission start */
- writel(0, fep->hwp + FEC_X_DES_ACTIVE(queue));
+ if (!(id_entry->driver_data & FEC_QUIRK_ERR007885) ||
+ !readl(fep->hwp + FEC_X_DES_ACTIVE(queue)) ||
+ !readl(fep->hwp + FEC_X_DES_ACTIVE(queue)) ||
+ !readl(fep->hwp + FEC_X_DES_ACTIVE(queue)) ||
+ !readl(fep->hwp + FEC_X_DES_ACTIVE(queue)))
+ writel(0, fep->hwp + FEC_X_DES_ACTIVE(queue));
return 0;
@@ -1095,6 +1109,10 @@ fec_restart(struct net_device *ndev)
/* Enable interrupts we wish to service */
writel(FEC_DEFAULT_IMASK, fep->hwp + FEC_IMASK);
+
+ /* Init the interrupt coalescing */
+ fec_enet_itr_coal_init(ndev);
+
}
static void
@@ -2234,12 +2252,141 @@ static int fec_enet_nway_reset(struct net_device *dev)
return genphy_restart_aneg(phydev);
}
+/* ITR clock source is enet system clock (clk_ahb).
+ * TCTT unit is cycle_ns * 64 cycle
+ * So, the ICTT value = X us / (cycle_ns * 64)
+ */
+static int fec_enet_us_to_itr_clock(struct net_device *ndev, int us)
+{
+ struct fec_enet_private *fep = netdev_priv(ndev);
+
+ return us * (fep->itr_clk_rate / 64000) / 1000;
+}
+
+/* Set threshold for interrupt coalescing */
+static void fec_enet_itr_coal_set(struct net_device *ndev)
+{
+ struct fec_enet_private *fep = netdev_priv(ndev);
+ const struct platform_device_id *id_entry =
+ platform_get_device_id(fep->pdev);
+ int rx_itr, tx_itr;
+
+ if (!(id_entry->driver_data & FEC_QUIRK_HAS_AVB))
+ return;
+
+ /* Must be greater than zero to avoid unpredictable behavior */
+ if (!fep->rx_time_itr || !fep->rx_pkts_itr ||
+ !fep->tx_time_itr || !fep->tx_pkts_itr)
+ return;
+
+ /* Select enet system clock as Interrupt Coalescing
+ * timer Clock Source
+ */
+ rx_itr = FEC_ITR_CLK_SEL;
+ tx_itr = FEC_ITR_CLK_SEL;
+
+ /* set ICFT and ICTT */
+ rx_itr |= FEC_ITR_ICFT(fep->rx_pkts_itr);
+ rx_itr |= FEC_ITR_ICTT(fec_enet_us_to_itr_clock(ndev, fep->rx_time_itr));
+ tx_itr |= FEC_ITR_ICFT(fep->tx_pkts_itr);
+ tx_itr |= FEC_ITR_ICTT(fec_enet_us_to_itr_clock(ndev, fep->tx_time_itr));
+
+ rx_itr |= FEC_ITR_EN;
+ tx_itr |= FEC_ITR_EN;
+
+ writel(tx_itr, fep->hwp + FEC_TXIC0);
+ writel(rx_itr, fep->hwp + FEC_RXIC0);
+ writel(tx_itr, fep->hwp + FEC_TXIC1);
+ writel(rx_itr, fep->hwp + FEC_RXIC1);
+ writel(tx_itr, fep->hwp + FEC_TXIC2);
+ writel(rx_itr, fep->hwp + FEC_RXIC2);
+}
+
+static int
+fec_enet_get_coalesce(struct net_device *ndev, struct ethtool_coalesce *ec)
+{
+ struct fec_enet_private *fep = netdev_priv(ndev);
+ const struct platform_device_id *id_entry =
+ platform_get_device_id(fep->pdev);
+
+ if (!(id_entry->driver_data & FEC_QUIRK_HAS_AVB))
+ return -EOPNOTSUPP;
+
+ ec->rx_coalesce_usecs = fep->rx_time_itr;
+ ec->rx_max_coalesced_frames = fep->rx_pkts_itr;
+
+ ec->tx_coalesce_usecs = fep->tx_time_itr;
+ ec->tx_max_coalesced_frames = fep->tx_pkts_itr;
+
+ return 0;
+}
+
+static int
+fec_enet_set_coalesce(struct net_device *ndev, struct ethtool_coalesce *ec)
+{
+ struct fec_enet_private *fep = netdev_priv(ndev);
+ const struct platform_device_id *id_entry =
+ platform_get_device_id(fep->pdev);
+
+ unsigned int cycle;
+
+ if (!(id_entry->driver_data & FEC_QUIRK_HAS_AVB))
+ return -EOPNOTSUPP;
+
+ if (ec->rx_max_coalesced_frames > 255) {
+ pr_err("Rx coalesced frames exceed hardware limiation");
+ return -EINVAL;
+ }
+
+ if (ec->tx_max_coalesced_frames > 255) {
+ pr_err("Tx coalesced frame exceed hardware limiation");
+ return -EINVAL;
+ }
+
+ cycle = fec_enet_us_to_itr_clock(ndev, fep->rx_time_itr);
+ if (cycle > 0xFFFF) {
+ pr_err("Rx coalesed usec exceeed hardware limiation");
+ return -EINVAL;
+ }
+
+ cycle = fec_enet_us_to_itr_clock(ndev, fep->tx_time_itr);
+ if (cycle > 0xFFFF) {
+ pr_err("Rx coalesed usec exceeed hardware limiation");
+ return -EINVAL;
+ }
+
+ fep->rx_time_itr = ec->rx_coalesce_usecs;
+ fep->rx_pkts_itr = ec->rx_max_coalesced_frames;
+
+ fep->tx_time_itr = ec->tx_coalesce_usecs;
+ fep->tx_pkts_itr = ec->tx_max_coalesced_frames;
+
+ fec_enet_itr_coal_set(ndev);
+
+ return 0;
+}
+
+static void fec_enet_itr_coal_init(struct net_device *ndev)
+{
+ struct ethtool_coalesce ec;
+
+ ec.rx_coalesce_usecs = FEC_ITR_ICTT_DEFAULT;
+ ec.rx_max_coalesced_frames = FEC_ITR_ICFT_DEFAULT;
+
+ ec.tx_coalesce_usecs = FEC_ITR_ICTT_DEFAULT;
+ ec.tx_max_coalesced_frames = FEC_ITR_ICFT_DEFAULT;
+
+ fec_enet_set_coalesce(ndev, &ec);
+}
+
static const struct ethtool_ops fec_enet_ethtool_ops = {
.get_settings = fec_enet_get_settings,
.set_settings = fec_enet_set_settings,
.get_drvinfo = fec_enet_get_drvinfo,
.nway_reset = fec_enet_nway_reset,
.get_link = ethtool_op_get_link,
+ .get_coalesce = fec_enet_get_coalesce,
+ .set_coalesce = fec_enet_set_coalesce,
#ifndef CONFIG_M5272
.get_pauseparam = fec_enet_get_pauseparam,
.set_pauseparam = fec_enet_set_pauseparam,
@@ -2890,23 +3037,23 @@ fec_enet_get_queue_num(struct platform_device *pdev, int *num_tx, int *num_rx)
/* parse the num of tx and rx queues */
err = of_property_read_u32(np, "fsl,num-tx-queues", num_tx);
- err |= of_property_read_u32(np, "fsl,num-rx-queues", num_rx);
- if (err) {
+ if (err)
*num_tx = 1;
+
+ err = of_property_read_u32(np, "fsl,num-rx-queues", num_rx);
+ if (err)
*num_rx = 1;
- return;
- }
if (*num_tx < 1 || *num_tx > FEC_ENET_MAX_TX_QS) {
- dev_err(&pdev->dev, "Invalidate num_tx(=%d), fail back to 1\n",
- *num_tx);
+ dev_warn(&pdev->dev, "Invalid num_tx(=%d), fall back to 1\n",
+ *num_tx);
*num_tx = 1;
return;
}
if (*num_rx < 1 || *num_rx > FEC_ENET_MAX_RX_QS) {
- dev_err(&pdev->dev, "Invalidate num_rx(=%d), fail back to 1\n",
- *num_rx);
+ dev_warn(&pdev->dev, "Invalid num_rx(=%d), fall back to 1\n",
+ *num_rx);
*num_rx = 1;
return;
}
@@ -2924,8 +3071,8 @@ fec_probe(struct platform_device *pdev)
const struct of_device_id *of_id;
static int dev_id;
struct device_node *np = pdev->dev.of_node, *phy_node;
- int num_tx_qs = 1;
- int num_rx_qs = 1;
+ int num_tx_qs;
+ int num_rx_qs;
of_id = of_match_device(fec_dt_ids, &pdev->dev);
if (of_id)
@@ -3006,6 +3153,8 @@ fec_probe(struct platform_device *pdev)
goto failed_clk;
}
+ fep->itr_clk_rate = clk_get_rate(fep->clk_ahb);
+
/* enet_out is optional, depends on board */
fep->clk_enet_out = devm_clk_get(&pdev->dev, "enet_out");
if (IS_ERR(fep->clk_enet_out))
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
index abddcf8c40aa..f3032fec8fce 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
@@ -2459,6 +2459,7 @@ int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port,
}
priv->rx_ring_num = prof->rx_ring_num;
priv->cqe_factor = (mdev->dev->caps.cqe_size == 64) ? 1 : 0;
+ priv->cqe_size = mdev->dev->caps.cqe_size;
priv->mac_index = -1;
priv->msg_enable = MLX4_EN_MSG_LEVEL;
spin_lock_init(&priv->stats_lock);
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c
index 14686b6f4bc5..a33048ee9621 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c
@@ -671,7 +671,7 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud
* descriptor offset can be deduced from the CQE index instead of
* reading 'cqe->index' */
index = cq->mcq.cons_index & ring->size_mask;
- cqe = &cq->buf[(index << factor) + factor];
+ cqe = mlx4_en_get_cqe(cq->buf, index, priv->cqe_size) + factor;
/* Process all completed CQEs */
while (XNOR(cqe->owner_sr_opcode & MLX4_CQE_OWNER_MASK,
@@ -858,7 +858,7 @@ next:
++cq->mcq.cons_index;
index = (cq->mcq.cons_index) & ring->size_mask;
- cqe = &cq->buf[(index << factor) + factor];
+ cqe = mlx4_en_get_cqe(cq->buf, index, priv->cqe_size) + factor;
if (++polled == budget)
goto out;
}
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_tx.c b/drivers/net/ethernet/mellanox/mlx4/en_tx.c
index bc8f51c77d80..c44f4237b9be 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_tx.c
@@ -382,7 +382,7 @@ static bool mlx4_en_process_tx_cq(struct net_device *dev,
return true;
index = cons_index & size_mask;
- cqe = &buf[(index << factor) + factor];
+ cqe = mlx4_en_get_cqe(buf, index, priv->cqe_size) + factor;
ring_index = ring->cons & size_mask;
stamp_index = ring_index;
@@ -430,7 +430,7 @@ static bool mlx4_en_process_tx_cq(struct net_device *dev,
++cons_index;
index = cons_index & size_mask;
- cqe = &buf[(index << factor) + factor];
+ cqe = mlx4_en_get_cqe(buf, index, priv->cqe_size) + factor;
}
diff --git a/drivers/net/ethernet/mellanox/mlx4/eq.c b/drivers/net/ethernet/mellanox/mlx4/eq.c
index 2a004b347e1d..a49c9d11d8a5 100644
--- a/drivers/net/ethernet/mellanox/mlx4/eq.c
+++ b/drivers/net/ethernet/mellanox/mlx4/eq.c
@@ -101,21 +101,24 @@ static void eq_set_ci(struct mlx4_eq *eq, int req_not)
mb();
}
-static struct mlx4_eqe *get_eqe(struct mlx4_eq *eq, u32 entry, u8 eqe_factor)
+static struct mlx4_eqe *get_eqe(struct mlx4_eq *eq, u32 entry, u8 eqe_factor,
+ u8 eqe_size)
{
/* (entry & (eq->nent - 1)) gives us a cyclic array */
- unsigned long offset = (entry & (eq->nent - 1)) * (MLX4_EQ_ENTRY_SIZE << eqe_factor);
- /* CX3 is capable of extending the EQE from 32 to 64 bytes.
- * When this feature is enabled, the first (in the lower addresses)
+ unsigned long offset = (entry & (eq->nent - 1)) * eqe_size;
+ /* CX3 is capable of extending the EQE from 32 to 64 bytes with
+ * strides of 64B,128B and 256B.
+ * When 64B EQE is used, the first (in the lower addresses)
* 32 bytes in the 64 byte EQE are reserved and the next 32 bytes
* contain the legacy EQE information.
+ * In all other cases, the first 32B contains the legacy EQE info.
*/
return eq->page_list[offset / PAGE_SIZE].buf + (offset + (eqe_factor ? MLX4_EQ_ENTRY_SIZE : 0)) % PAGE_SIZE;
}
-static struct mlx4_eqe *next_eqe_sw(struct mlx4_eq *eq, u8 eqe_factor)
+static struct mlx4_eqe *next_eqe_sw(struct mlx4_eq *eq, u8 eqe_factor, u8 size)
{
- struct mlx4_eqe *eqe = get_eqe(eq, eq->cons_index, eqe_factor);
+ struct mlx4_eqe *eqe = get_eqe(eq, eq->cons_index, eqe_factor, size);
return !!(eqe->owner & 0x80) ^ !!(eq->cons_index & eq->nent) ? NULL : eqe;
}
@@ -459,8 +462,9 @@ static int mlx4_eq_int(struct mlx4_dev *dev, struct mlx4_eq *eq)
enum slave_port_gen_event gen_event;
unsigned long flags;
struct mlx4_vport_state *s_info;
+ int eqe_size = dev->caps.eqe_size;
- while ((eqe = next_eqe_sw(eq, dev->caps.eqe_factor))) {
+ while ((eqe = next_eqe_sw(eq, dev->caps.eqe_factor, eqe_size))) {
/*
* Make sure we read EQ entry contents after we've
* checked the ownership bit.
@@ -894,8 +898,10 @@ static int mlx4_create_eq(struct mlx4_dev *dev, int nent,
eq->dev = dev;
eq->nent = roundup_pow_of_two(max(nent, 2));
- /* CX3 is capable of extending the CQE/EQE from 32 to 64 bytes */
- npages = PAGE_ALIGN(eq->nent * (MLX4_EQ_ENTRY_SIZE << dev->caps.eqe_factor)) / PAGE_SIZE;
+ /* CX3 is capable of extending the CQE/EQE from 32 to 64 bytes, with
+ * strides of 64B,128B and 256B.
+ */
+ npages = PAGE_ALIGN(eq->nent * dev->caps.eqe_size) / PAGE_SIZE;
eq->page_list = kmalloc(npages * sizeof *eq->page_list,
GFP_KERNEL);
@@ -997,8 +1003,10 @@ static void mlx4_free_eq(struct mlx4_dev *dev,
struct mlx4_cmd_mailbox *mailbox;
int err;
int i;
- /* CX3 is capable of extending the CQE/EQE from 32 to 64 bytes */
- int npages = PAGE_ALIGN((MLX4_EQ_ENTRY_SIZE << dev->caps.eqe_factor) * eq->nent) / PAGE_SIZE;
+ /* CX3 is capable of extending the CQE/EQE from 32 to 64 bytes, with
+ * strides of 64B,128B and 256B
+ */
+ int npages = PAGE_ALIGN(dev->caps.eqe_size * eq->nent) / PAGE_SIZE;
mailbox = mlx4_alloc_cmd_mailbox(dev);
if (IS_ERR(mailbox))
diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.c b/drivers/net/ethernet/mellanox/mlx4/fw.c
index 494753e44ae3..13b2e4a51ef4 100644
--- a/drivers/net/ethernet/mellanox/mlx4/fw.c
+++ b/drivers/net/ethernet/mellanox/mlx4/fw.c
@@ -137,7 +137,9 @@ static void dump_dev_cap_flags2(struct mlx4_dev *dev, u64 flags)
[8] = "Dynamic QP updates support",
[9] = "Device managed flow steering IPoIB support",
[10] = "TCP/IP offloads/flow-steering for VXLAN support",
- [11] = "MAD DEMUX (Secure-Host) support"
+ [11] = "MAD DEMUX (Secure-Host) support",
+ [12] = "Large cache line (>64B) CQE stride support",
+ [13] = "Large cache line (>64B) EQE stride support"
};
int i;
@@ -557,6 +559,7 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
#define QUERY_DEV_CAP_FLOW_STEERING_IPOIB_OFFSET 0x74
#define QUERY_DEV_CAP_FLOW_STEERING_RANGE_EN_OFFSET 0x76
#define QUERY_DEV_CAP_FLOW_STEERING_MAX_QP_OFFSET 0x77
+#define QUERY_DEV_CAP_CQ_EQ_CACHE_LINE_STRIDE 0x7a
#define QUERY_DEV_CAP_RDMARC_ENTRY_SZ_OFFSET 0x80
#define QUERY_DEV_CAP_QPC_ENTRY_SZ_OFFSET 0x82
#define QUERY_DEV_CAP_AUX_ENTRY_SZ_OFFSET 0x84
@@ -733,6 +736,11 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
dev_cap->max_rq_sg = field;
MLX4_GET(size, outbox, QUERY_DEV_CAP_MAX_DESC_SZ_RQ_OFFSET);
dev_cap->max_rq_desc_sz = size;
+ MLX4_GET(field, outbox, QUERY_DEV_CAP_CQ_EQ_CACHE_LINE_STRIDE);
+ if (field & (1 << 6))
+ dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_CQE_STRIDE;
+ if (field & (1 << 7))
+ dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_EQE_STRIDE;
MLX4_GET(dev_cap->bmme_flags, outbox,
QUERY_DEV_CAP_BMME_FLAGS_OFFSET);
@@ -1376,6 +1384,7 @@ int mlx4_INIT_HCA(struct mlx4_dev *dev, struct mlx4_init_hca_param *param)
#define INIT_HCA_CQC_BASE_OFFSET (INIT_HCA_QPC_OFFSET + 0x30)
#define INIT_HCA_LOG_CQ_OFFSET (INIT_HCA_QPC_OFFSET + 0x37)
#define INIT_HCA_EQE_CQE_OFFSETS (INIT_HCA_QPC_OFFSET + 0x38)
+#define INIT_HCA_EQE_CQE_STRIDE_OFFSET (INIT_HCA_QPC_OFFSET + 0x3b)
#define INIT_HCA_ALTC_BASE_OFFSET (INIT_HCA_QPC_OFFSET + 0x40)
#define INIT_HCA_AUXC_BASE_OFFSET (INIT_HCA_QPC_OFFSET + 0x50)
#define INIT_HCA_EQC_BASE_OFFSET (INIT_HCA_QPC_OFFSET + 0x60)
@@ -1452,11 +1461,25 @@ int mlx4_INIT_HCA(struct mlx4_dev *dev, struct mlx4_init_hca_param *param)
if (dev->caps.flags & MLX4_DEV_CAP_FLAG_64B_CQE) {
*(inbox + INIT_HCA_EQE_CQE_OFFSETS / 4) |= cpu_to_be32(1 << 30);
dev->caps.cqe_size = 64;
- dev->caps.userspace_caps |= MLX4_USER_DEV_CAP_64B_CQE;
+ dev->caps.userspace_caps |= MLX4_USER_DEV_CAP_LARGE_CQE;
} else {
dev->caps.cqe_size = 32;
}
+ /* CX3 is capable of extending CQEs\EQEs to strides larger than 64B */
+ if ((dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_EQE_STRIDE) &&
+ (dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_CQE_STRIDE)) {
+ dev->caps.eqe_size = cache_line_size();
+ dev->caps.cqe_size = cache_line_size();
+ dev->caps.eqe_factor = 0;
+ MLX4_PUT(inbox, (u8)((ilog2(dev->caps.eqe_size) - 5) << 4 |
+ (ilog2(dev->caps.eqe_size) - 5)),
+ INIT_HCA_EQE_CQE_STRIDE_OFFSET);
+
+ /* User still need to know to support CQE > 32B */
+ dev->caps.userspace_caps |= MLX4_USER_DEV_CAP_LARGE_CQE;
+ }
+
/* QPC/EEC/CQC/EQC/RDMARC attributes */
MLX4_PUT(inbox, param->qpc_base, INIT_HCA_QPC_BASE_OFFSET);
@@ -1616,6 +1639,17 @@ int mlx4_QUERY_HCA(struct mlx4_dev *dev,
if (byte_field & 0x40) /* 64-bytes cqe enabled */
param->dev_cap_enabled |= MLX4_DEV_CAP_64B_CQE_ENABLED;
+ /* CX3 is capable of extending CQEs\EQEs to strides larger than 64B */
+ MLX4_GET(byte_field, outbox, INIT_HCA_EQE_CQE_STRIDE_OFFSET);
+ if (byte_field) {
+ param->dev_cap_enabled |= MLX4_DEV_CAP_64B_EQE_ENABLED;
+ param->dev_cap_enabled |= MLX4_DEV_CAP_64B_CQE_ENABLED;
+ param->cqe_size = 1 << ((byte_field &
+ MLX4_CQE_SIZE_MASK_STRIDE) + 5);
+ param->eqe_size = 1 << (((byte_field &
+ MLX4_EQE_SIZE_MASK_STRIDE) >> 4) + 5);
+ }
+
/* TPT attributes */
MLX4_GET(param->dmpt_base, outbox, INIT_HCA_DMPT_BASE_OFFSET);
diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.h b/drivers/net/ethernet/mellanox/mlx4/fw.h
index 1fce03ebe5c4..9b835aecac96 100644
--- a/drivers/net/ethernet/mellanox/mlx4/fw.h
+++ b/drivers/net/ethernet/mellanox/mlx4/fw.h
@@ -178,6 +178,8 @@ struct mlx4_init_hca_param {
u8 uar_page_sz; /* log pg sz in 4k chunks */
u8 steering_mode; /* for QUERY_HCA */
u64 dev_cap_enabled;
+ u16 cqe_size; /* For use only when CQE stride feature enabled */
+ u16 eqe_size; /* For use only when EQE stride feature enabled */
};
struct mlx4_init_ib_param {
diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c
index 7e2d5d57c598..1f10023af1db 100644
--- a/drivers/net/ethernet/mellanox/mlx4/main.c
+++ b/drivers/net/ethernet/mellanox/mlx4/main.c
@@ -104,7 +104,8 @@ module_param(enable_64b_cqe_eqe, bool, 0444);
MODULE_PARM_DESC(enable_64b_cqe_eqe,
"Enable 64 byte CQEs/EQEs when the FW supports this (default: True)");
-#define PF_CONTEXT_BEHAVIOUR_MASK MLX4_FUNC_CAP_64B_EQE_CQE
+#define PF_CONTEXT_BEHAVIOUR_MASK (MLX4_FUNC_CAP_64B_EQE_CQE | \
+ MLX4_FUNC_CAP_EQE_CQE_STRIDE)
static char mlx4_version[] =
DRV_NAME ": Mellanox ConnectX core driver v"
@@ -196,6 +197,40 @@ static void mlx4_set_port_mask(struct mlx4_dev *dev)
dev->caps.port_mask[i] = dev->caps.port_type[i];
}
+static void mlx4_enable_cqe_eqe_stride(struct mlx4_dev *dev)
+{
+ struct mlx4_caps *dev_cap = &dev->caps;
+
+ /* FW not supporting or cancelled by user */
+ if (!(dev_cap->flags2 & MLX4_DEV_CAP_FLAG2_EQE_STRIDE) ||
+ !(dev_cap->flags2 & MLX4_DEV_CAP_FLAG2_CQE_STRIDE))
+ return;
+
+ /* Must have 64B CQE_EQE enabled by FW to use bigger stride
+ * When FW has NCSI it may decide not to report 64B CQE/EQEs
+ */
+ if (!(dev_cap->flags & MLX4_DEV_CAP_FLAG_64B_EQE) ||
+ !(dev_cap->flags & MLX4_DEV_CAP_FLAG_64B_CQE)) {
+ dev_cap->flags2 &= ~MLX4_DEV_CAP_FLAG2_CQE_STRIDE;
+ dev_cap->flags2 &= ~MLX4_DEV_CAP_FLAG2_EQE_STRIDE;
+ return;
+ }
+
+ if (cache_line_size() == 128 || cache_line_size() == 256) {
+ mlx4_dbg(dev, "Enabling CQE stride cacheLine supported\n");
+ /* Changing the real data inside CQE size to 32B */
+ dev_cap->flags &= ~MLX4_DEV_CAP_FLAG_64B_CQE;
+ dev_cap->flags &= ~MLX4_DEV_CAP_FLAG_64B_EQE;
+
+ if (mlx4_is_master(dev))
+ dev_cap->function_caps |= MLX4_FUNC_CAP_EQE_CQE_STRIDE;
+ } else {
+ mlx4_dbg(dev, "Disabling CQE stride cacheLine unsupported\n");
+ dev_cap->flags2 &= ~MLX4_DEV_CAP_FLAG2_CQE_STRIDE;
+ dev_cap->flags2 &= ~MLX4_DEV_CAP_FLAG2_EQE_STRIDE;
+ }
+}
+
static int mlx4_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
{
int err;
@@ -390,6 +425,14 @@ static int mlx4_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
dev->caps.flags &= ~MLX4_DEV_CAP_FLAG_64B_CQE;
dev->caps.flags &= ~MLX4_DEV_CAP_FLAG_64B_EQE;
}
+
+ if (dev_cap->flags2 &
+ (MLX4_DEV_CAP_FLAG2_CQE_STRIDE |
+ MLX4_DEV_CAP_FLAG2_EQE_STRIDE)) {
+ mlx4_warn(dev, "Disabling EQE/CQE stride per user request\n");
+ dev_cap->flags2 &= ~MLX4_DEV_CAP_FLAG2_CQE_STRIDE;
+ dev_cap->flags2 &= ~MLX4_DEV_CAP_FLAG2_EQE_STRIDE;
+ }
}
if ((dev->caps.flags &
@@ -397,6 +440,9 @@ static int mlx4_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
mlx4_is_master(dev))
dev->caps.function_caps |= MLX4_FUNC_CAP_64B_EQE_CQE;
+ if (!mlx4_is_slave(dev))
+ mlx4_enable_cqe_eqe_stride(dev);
+
return 0;
}
@@ -724,11 +770,22 @@ static int mlx4_slave_cap(struct mlx4_dev *dev)
if (hca_param.dev_cap_enabled & MLX4_DEV_CAP_64B_CQE_ENABLED) {
dev->caps.cqe_size = 64;
- dev->caps.userspace_caps |= MLX4_USER_DEV_CAP_64B_CQE;
+ dev->caps.userspace_caps |= MLX4_USER_DEV_CAP_LARGE_CQE;
} else {
dev->caps.cqe_size = 32;
}
+ if (hca_param.dev_cap_enabled & MLX4_DEV_CAP_EQE_STRIDE_ENABLED) {
+ dev->caps.eqe_size = hca_param.eqe_size;
+ dev->caps.eqe_factor = 0;
+ }
+
+ if (hca_param.dev_cap_enabled & MLX4_DEV_CAP_CQE_STRIDE_ENABLED) {
+ dev->caps.cqe_size = hca_param.cqe_size;
+ /* User still need to know when CQE > 32B */
+ dev->caps.userspace_caps |= MLX4_USER_DEV_CAP_LARGE_CQE;
+ }
+
dev->caps.flags2 &= ~MLX4_DEV_CAP_FLAG2_TS;
mlx4_warn(dev, "Timestamping is not supported in slave mode\n");
diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4.h b/drivers/net/ethernet/mellanox/mlx4/mlx4.h
index b508c7887ef8..de10dbb2e6ed 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mlx4.h
+++ b/drivers/net/ethernet/mellanox/mlx4/mlx4.h
@@ -285,6 +285,9 @@ struct mlx4_icm_table {
#define MLX4_MPT_STATUS_SW 0xF0
#define MLX4_MPT_STATUS_HW 0x00
+#define MLX4_CQE_SIZE_MASK_STRIDE 0x3
+#define MLX4_EQE_SIZE_MASK_STRIDE 0x30
+
/*
* Must be packed because mtt_seg is 64 bits but only aligned to 32 bits.
*/
diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
index 3de41be49425..e3d71c386da7 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
+++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
@@ -542,6 +542,7 @@ struct mlx4_en_priv {
unsigned max_mtu;
int base_qpn;
int cqe_factor;
+ int cqe_size;
struct mlx4_en_rss_map rss_map;
__be32 ctrl_flags;
@@ -612,6 +613,11 @@ struct mlx4_mac_entry {
struct rcu_head rcu;
};
+static inline struct mlx4_cqe *mlx4_en_get_cqe(void *buf, int idx, int cqe_sz)
+{
+ return buf + idx * cqe_sz;
+}
+
#ifdef CONFIG_NET_RX_BUSY_POLL
static inline void mlx4_en_cq_init_lock(struct mlx4_en_cq *cq)
{
diff --git a/drivers/net/phy/bcm7xxx.c b/drivers/net/phy/bcm7xxx.c
index 09dd6e1dc6e1..daae69950925 100644
--- a/drivers/net/phy/bcm7xxx.c
+++ b/drivers/net/phy/bcm7xxx.c
@@ -196,13 +196,22 @@ static int bcm7xxx_eee_enable(struct phy_device *phydev)
static int bcm7xxx_28nm_config_init(struct phy_device *phydev)
{
- int ret;
-
- ret = bcm7445_config_init(phydev);
- if (ret)
- return ret;
+ u8 rev = PHY_BRCM_7XXX_REV(phydev->dev_flags);
+ u8 patch = PHY_BRCM_7XXX_PATCH(phydev->dev_flags);
+ int ret = 0;
+
+ dev_info(&phydev->dev, "PHY revision: 0x%02x, patch: %d\n", rev, patch);
+
+ switch (rev) {
+ case 0xa0:
+ case 0xb0:
+ ret = bcm7445_config_init(phydev);
+ break;
+ default:
+ ret = bcm7xxx_28nm_afe_config_init(phydev);
+ break;
+ }
- ret = bcm7xxx_28nm_afe_config_init(phydev);
if (ret)
return ret;
@@ -257,8 +266,8 @@ static int bcm7xxx_config_init(struct phy_device *phydev)
phy_write(phydev, MII_BCM7XXX_AUX_MODE, MII_BCM7XX_64CLK_MDIO);
phy_read(phydev, MII_BCM7XXX_AUX_MODE);
- /* Workaround only required for 100Mbits/sec */
- if (!(phydev->dev_flags & PHY_BRCM_100MBPS_WAR))
+ /* Workaround only required for 100Mbits/sec capable PHYs */
+ if (phydev->supported & PHY_GBIT_FEATURES)
return 0;
/* set shadow mode 2 */
diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
index 53c3ec19807c..39c86536fb9b 100644
--- a/drivers/net/vxlan.c
+++ b/drivers/net/vxlan.c
@@ -42,6 +42,7 @@
#include <net/netns/generic.h>
#include <net/vxlan.h>
#include <net/protocol.h>
+#include <net/udp_tunnel.h>
#if IS_ENABLED(CONFIG_IPV6)
#include <net/ipv6.h>
#include <net/addrconf.h>
@@ -1062,7 +1063,6 @@ void vxlan_sock_release(struct vxlan_sock *vs)
spin_lock(&vn->sock_lock);
hlist_del_rcu(&vs->hlist);
- rcu_assign_sk_user_data(vs->sock->sk, NULL);
vxlan_notify_del_rx_port(vs);
spin_unlock(&vn->sock_lock);
@@ -1336,7 +1336,6 @@ out:
}
#if IS_ENABLED(CONFIG_IPV6)
-
static struct sk_buff *vxlan_na_create(struct sk_buff *request,
struct neighbour *n, bool isrouter)
{
@@ -1570,13 +1569,6 @@ static bool route_shortcircuit(struct net_device *dev, struct sk_buff *skb)
return false;
}
-static inline struct sk_buff *vxlan_handle_offloads(struct sk_buff *skb,
- bool udp_csum)
-{
- int type = udp_csum ? SKB_GSO_UDP_TUNNEL_CSUM : SKB_GSO_UDP_TUNNEL;
- return iptunnel_handle_offloads(skb, udp_csum, type);
-}
-
#if IS_ENABLED(CONFIG_IPV6)
static int vxlan6_xmit_skb(struct vxlan_sock *vs,
struct dst_entry *dst, struct sk_buff *skb,
@@ -1585,13 +1577,12 @@ static int vxlan6_xmit_skb(struct vxlan_sock *vs,
__be16 src_port, __be16 dst_port, __be32 vni,
bool xnet)
{
- struct ipv6hdr *ip6h;
struct vxlanhdr *vxh;
- struct udphdr *uh;
int min_headroom;
int err;
+ bool udp_sum = !udp_get_no_check6_tx(vs->sock->sk);
- skb = vxlan_handle_offloads(skb, !udp_get_no_check6_tx(vs->sock->sk));
+ skb = udp_tunnel_handle_offloads(skb, udp_sum);
if (IS_ERR(skb))
return -EINVAL;
@@ -1619,38 +1610,8 @@ static int vxlan6_xmit_skb(struct vxlan_sock *vs,
vxh->vx_flags = htonl(VXLAN_FLAGS);
vxh->vx_vni = vni;
- __skb_push(skb, sizeof(*uh));
- skb_reset_transport_header(skb);
- uh = udp_hdr(skb);
-
- uh->dest = dst_port;
- uh->source = src_port;
-
- uh->len = htons(skb->len);
-
- memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
- IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED |
- IPSKB_REROUTED);
- skb_dst_set(skb, dst);
-
- udp6_set_csum(udp_get_no_check6_tx(vs->sock->sk), skb,
- saddr, daddr, skb->len);
-
- __skb_push(skb, sizeof(*ip6h));
- skb_reset_network_header(skb);
- ip6h = ipv6_hdr(skb);
- ip6h->version = 6;
- ip6h->priority = prio;
- ip6h->flow_lbl[0] = 0;
- ip6h->flow_lbl[1] = 0;
- ip6h->flow_lbl[2] = 0;
- ip6h->payload_len = htons(skb->len);
- ip6h->nexthdr = IPPROTO_UDP;
- ip6h->hop_limit = ttl;
- ip6h->daddr = *daddr;
- ip6h->saddr = *saddr;
-
- ip6tunnel_xmit(skb, dev);
+ udp_tunnel6_xmit_skb(vs->sock, dst, skb, dev, saddr, daddr, prio,
+ ttl, src_port, dst_port);
return 0;
}
#endif
@@ -1661,11 +1622,11 @@ int vxlan_xmit_skb(struct vxlan_sock *vs,
__be16 src_port, __be16 dst_port, __be32 vni, bool xnet)
{
struct vxlanhdr *vxh;
- struct udphdr *uh;
int min_headroom;
int err;
+ bool udp_sum = !vs->sock->sk->sk_no_check_tx;
- skb = vxlan_handle_offloads(skb, !vs->sock->sk->sk_no_check_tx);
+ skb = udp_tunnel_handle_offloads(skb, udp_sum);
if (IS_ERR(skb))
return -EINVAL;
@@ -1691,20 +1652,8 @@ int vxlan_xmit_skb(struct vxlan_sock *vs,
vxh->vx_flags = htonl(VXLAN_FLAGS);
vxh->vx_vni = vni;
- __skb_push(skb, sizeof(*uh));
- skb_reset_transport_header(skb);
- uh = udp_hdr(skb);
-
- uh->dest = dst_port;
- uh->source = src_port;
-
- uh->len = htons(skb->len);
-
- udp_set_csum(vs->sock->sk->sk_no_check_tx, skb,
- src, dst, skb->len);
-
- return iptunnel_xmit(vs->sock->sk, rt, skb, src, dst, IPPROTO_UDP,
- tos, ttl, df, xnet);
+ return udp_tunnel_xmit_skb(vs->sock, rt, skb, src, dst, tos,
+ ttl, df, src_port, dst_port, xnet);
}
EXPORT_SYMBOL_GPL(vxlan_xmit_skb);
@@ -1829,11 +1778,11 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
tos = ip_tunnel_ecn_encap(tos, old_iph, skb);
ttl = ttl ? : ip4_dst_hoplimit(&rt->dst);
- err = vxlan_xmit_skb(vxlan->vn_sock, rt, skb,
- fl4.saddr, dst->sin.sin_addr.s_addr,
- tos, ttl, df, src_port, dst_port,
- htonl(vni << 8),
- !net_eq(vxlan->net, dev_net(vxlan->dev)));
+ err = udp_tunnel_xmit_skb(vxlan->vn_sock->sock, rt, skb,
+ fl4.saddr, dst->sin.sin_addr.s_addr,
+ tos, ttl, df, src_port, dst_port,
+ !net_eq(vxlan->net,
+ dev_net(vxlan->dev)));
if (err < 0)
goto rt_tx_error;
@@ -2333,8 +2282,7 @@ static const struct ethtool_ops vxlan_ethtool_ops = {
static void vxlan_del_work(struct work_struct *work)
{
struct vxlan_sock *vs = container_of(work, struct vxlan_sock, del_work);
-
- sk_release_kernel(vs->sock->sk);
+ udp_tunnel_sock_release(vs->sock);
kfree_rcu(vs, rcu);
}
@@ -2367,11 +2315,6 @@ static struct socket *vxlan_create_sock(struct net *net, bool ipv6,
if (err < 0)
return ERR_PTR(err);
- /* Disable multicast loopback */
- inet_sk(sock->sk)->mc_loop = 0;
-
- udp_set_convert_csum(sock->sk, true);
-
return sock;
}
@@ -2383,9 +2326,9 @@ static struct vxlan_sock *vxlan_socket_create(struct net *net, __be16 port,
struct vxlan_net *vn = net_generic(net, vxlan_net_id);
struct vxlan_sock *vs;
struct socket *sock;
- struct sock *sk;
unsigned int h;
bool ipv6 = !!(flags & VXLAN_F_IPV6);
+ struct udp_tunnel_sock_cfg tunnel_cfg;
vs = kzalloc(sizeof(*vs), GFP_KERNEL);
if (!vs)
@@ -2403,11 +2346,9 @@ static struct vxlan_sock *vxlan_socket_create(struct net *net, __be16 port,
}
vs->sock = sock;
- sk = sock->sk;
atomic_set(&vs->refcnt, 1);
vs->rcv = rcv;
vs->data = data;
- rcu_assign_sk_user_data(vs->sock->sk, vs);
/* Initialize the vxlan udp offloads structure */
vs->udp_offloads.port = port;
@@ -2420,14 +2361,12 @@ static struct vxlan_sock *vxlan_socket_create(struct net *net, __be16 port,
spin_unlock(&vn->sock_lock);
/* Mark socket as an encapsulation socket. */
- udp_sk(sk)->encap_type = 1;
- udp_sk(sk)->encap_rcv = vxlan_udp_encap_recv;
-#if IS_ENABLED(CONFIG_IPV6)
- if (ipv6)
- ipv6_stub->udpv6_encap_enable();
- else
-#endif
- udp_encap_enable();
+ tunnel_cfg.sk_user_data = vs;
+ tunnel_cfg.encap_type = 1;
+ tunnel_cfg.encap_rcv = vxlan_udp_encap_recv;
+ tunnel_cfg.encap_destroy = NULL;
+
+ setup_udp_tunnel_sock(net, sock, &tunnel_cfg);
return vs;
}
diff --git a/drivers/of/of_mdio.c b/drivers/of/of_mdio.c
index 401b2453da45..a85d80012993 100644
--- a/drivers/of/of_mdio.c
+++ b/drivers/of/of_mdio.c
@@ -224,6 +224,8 @@ struct phy_device *of_phy_connect(struct net_device *dev,
if (!phy)
return NULL;
+ phy->dev_flags = flags;
+
return phy_connect_direct(dev, phy, hndlr, iface) ? NULL : phy;
}
EXPORT_SYMBOL(of_phy_connect);
diff --git a/include/linux/brcmphy.h b/include/linux/brcmphy.h
index 5bd35cc0d471..3f626fe48c9a 100644
--- a/include/linux/brcmphy.h
+++ b/include/linux/brcmphy.h
@@ -40,7 +40,8 @@
#define PHY_BRCM_CLEAR_RGMII_MODE 0x00004000
#define PHY_BRCM_DIS_TXCRXC_NOENRGY 0x00008000
/* Broadcom BCM7xxx specific workarounds */
-#define PHY_BRCM_100MBPS_WAR 0x00010000
+#define PHY_BRCM_7XXX_REV(x) (((x) >> 8) & 0xff)
+#define PHY_BRCM_7XXX_PATCH(x) ((x) & 0xff)
#define PHY_BCM_FLAGS_VALID 0x80000000
/* Broadcom BCM54XX register definitions, common to most Broadcom PHYs */
diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h
index 1befd8df9cfc..7bcefe749a39 100644
--- a/include/linux/mlx4/device.h
+++ b/include/linux/mlx4/device.h
@@ -185,19 +185,24 @@ enum {
MLX4_DEV_CAP_FLAG2_DMFS_IPOIB = 1LL << 9,
MLX4_DEV_CAP_FLAG2_VXLAN_OFFLOADS = 1LL << 10,
MLX4_DEV_CAP_FLAG2_MAD_DEMUX = 1LL << 11,
+ MLX4_DEV_CAP_FLAG2_CQE_STRIDE = 1LL << 12,
+ MLX4_DEV_CAP_FLAG2_EQE_STRIDE = 1LL << 13
};
enum {
MLX4_DEV_CAP_64B_EQE_ENABLED = 1LL << 0,
- MLX4_DEV_CAP_64B_CQE_ENABLED = 1LL << 1
+ MLX4_DEV_CAP_64B_CQE_ENABLED = 1LL << 1,
+ MLX4_DEV_CAP_CQE_STRIDE_ENABLED = 1LL << 2,
+ MLX4_DEV_CAP_EQE_STRIDE_ENABLED = 1LL << 3
};
enum {
- MLX4_USER_DEV_CAP_64B_CQE = 1L << 0
+ MLX4_USER_DEV_CAP_LARGE_CQE = 1L << 0
};
enum {
- MLX4_FUNC_CAP_64B_EQE_CQE = 1L << 0
+ MLX4_FUNC_CAP_64B_EQE_CQE = 1L << 0,
+ MLX4_FUNC_CAP_EQE_CQE_STRIDE = 1L << 1
};
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 28d4378615e5..4354b4307e37 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1874,7 +1874,7 @@ struct napi_gro_cb {
/* jiffies when first packet was created/queued */
unsigned long age;
- /* Used in ipv6_gro_receive() */
+ /* Used in ipv6_gro_receive() and foo-over-udp */
u16 proto;
/* Used in udp_gro_receive */
@@ -1925,6 +1925,7 @@ struct packet_offload {
struct udp_offload {
__be16 port;
+ u8 ipproto;
struct offload_callbacks callbacks;
};
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 756e3d057e84..f1bfa3781c75 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -769,6 +769,12 @@ static inline struct sk_buff *alloc_skb(unsigned int size,
return __alloc_skb(size, priority, 0, NUMA_NO_NODE);
}
+struct sk_buff *alloc_skb_with_frags(unsigned long header_len,
+ unsigned long data_len,
+ int max_page_order,
+ int *errcode,
+ gfp_t gfp_mask);
+
static inline struct sk_buff *alloc_skb_fclone(unsigned int size,
gfp_t priority)
{
diff --git a/include/net/dsa.h b/include/net/dsa.h
index c779e9bba1b3..e020b8a12b7d 100644
--- a/include/net/dsa.h
+++ b/include/net/dsa.h
@@ -181,6 +181,7 @@ struct dsa_switch_driver {
char *(*probe)(struct device *host_dev, int sw_addr);
int (*setup)(struct dsa_switch *ds);
int (*set_addr)(struct dsa_switch *ds, u8 *addr);
+ u32 (*get_phy_flags)(struct dsa_switch *ds, int port);
/*
* Access to the switch's PHY registers.
diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h
index 8dd8cab88b87..7f538ba6e267 100644
--- a/include/net/ip_tunnels.h
+++ b/include/net/ip_tunnels.h
@@ -10,6 +10,7 @@
#include <net/gro_cells.h>
#include <net/inet_ecn.h>
#include <net/ip.h>
+#include <net/netns/generic.h>
#include <net/rtnetlink.h>
#if IS_ENABLED(CONFIG_IPV6)
@@ -31,6 +32,13 @@ struct ip_tunnel_6rd_parm {
};
#endif
+struct ip_tunnel_encap {
+ __u16 type;
+ __u16 flags;
+ __be16 sport;
+ __be16 dport;
+};
+
struct ip_tunnel_prl_entry {
struct ip_tunnel_prl_entry __rcu *next;
__be32 addr;
@@ -56,13 +64,18 @@ struct ip_tunnel {
/* These four fields used only by GRE */
__u32 i_seqno; /* The last seen seqno */
__u32 o_seqno; /* The last output seqno */
- int hlen; /* Precalculated header length */
+ int tun_hlen; /* Precalculated header length */
int mlink;
struct ip_tunnel_dst __percpu *dst_cache;
struct ip_tunnel_parm parms;
+ int encap_hlen; /* Encap header length (FOU,GUE) */
+ struct ip_tunnel_encap encap;
+
+ int hlen; /* tun_hlen + encap_hlen */
+
/* for SIT */
#ifdef CONFIG_IPV6_SIT_6RD
struct ip_tunnel_6rd_parm ip6rd;
@@ -114,6 +127,8 @@ void ip_tunnel_delete_net(struct ip_tunnel_net *itn, struct rtnl_link_ops *ops);
void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
const struct iphdr *tnl_params, const u8 protocol);
int ip_tunnel_ioctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd);
+int ip_tunnel_encap(struct sk_buff *skb, struct ip_tunnel *t,
+ u8 *protocol, struct flowi4 *fl4);
int ip_tunnel_change_mtu(struct net_device *dev, int new_mtu);
struct rtnl_link_stats64 *ip_tunnel_get_stats64(struct net_device *dev,
@@ -131,6 +146,8 @@ int ip_tunnel_newlink(struct net_device *dev, struct nlattr *tb[],
struct ip_tunnel_parm *p);
void ip_tunnel_setup(struct net_device *dev, int net_id);
void ip_tunnel_dst_reset_all(struct ip_tunnel *t);
+int ip_tunnel_encap_setup(struct ip_tunnel *t,
+ struct ip_tunnel_encap *ipencap);
/* Extract dsfield from inner protocol */
static inline u8 ip_tunnel_get_dsfield(const struct iphdr *iph,
diff --git a/include/net/snmp.h b/include/net/snmp.h
index f1f27fdbb0d5..8fd2f498782e 100644
--- a/include/net/snmp.h
+++ b/include/net/snmp.h
@@ -146,19 +146,15 @@ struct linux_xfrm_mib {
#define SNMP_ADD_STATS(mib, field, addend) \
this_cpu_add(mib->mibs[field], addend)
-/*
- * Use "__typeof__(*mib) *ptr" instead of "__typeof__(mib) ptr"
- * to make @ptr a non-percpu pointer.
- */
#define SNMP_UPD_PO_STATS(mib, basefield, addend) \
do { \
- __typeof__(*mib->mibs) *ptr = mib->mibs; \
+ __typeof__((mib->mibs) + 0) ptr = mib->mibs; \
this_cpu_inc(ptr[basefield##PKTS]); \
this_cpu_add(ptr[basefield##OCTETS], addend); \
} while (0)
#define SNMP_UPD_PO_STATS_BH(mib, basefield, addend) \
do { \
- __typeof__(*mib->mibs) *ptr = mib->mibs; \
+ __typeof__((mib->mibs) + 0) ptr = mib->mibs; \
__this_cpu_inc(ptr[basefield##PKTS]); \
__this_cpu_add(ptr[basefield##OCTETS], addend); \
} while (0)
diff --git a/include/net/udp_tunnel.h b/include/net/udp_tunnel.h
index ffd69cbded35..a47790bcaa38 100644
--- a/include/net/udp_tunnel.h
+++ b/include/net/udp_tunnel.h
@@ -1,6 +1,14 @@
#ifndef __NET_UDP_TUNNEL_H
#define __NET_UDP_TUNNEL_H
+#include <net/ip_tunnels.h>
+#include <net/udp.h>
+
+#if IS_ENABLED(CONFIG_IPV6)
+#include <net/ipv6.h>
+#include <net/addrconf.h>
+#endif
+
struct udp_port_cfg {
u8 family;
@@ -26,7 +34,80 @@ struct udp_port_cfg {
use_udp6_rx_checksums:1;
};
-int udp_sock_create(struct net *net, struct udp_port_cfg *cfg,
- struct socket **sockp);
+int udp_sock_create4(struct net *net, struct udp_port_cfg *cfg,
+ struct socket **sockp);
+
+#if IS_ENABLED(CONFIG_IPV6)
+int udp_sock_create6(struct net *net, struct udp_port_cfg *cfg,
+ struct socket **sockp);
+#else
+static inline int udp_sock_create6(struct net *net, struct udp_port_cfg *cfg,
+ struct socket **sockp)
+{
+ return 0;
+}
+#endif
+
+static inline int udp_sock_create(struct net *net,
+ struct udp_port_cfg *cfg,
+ struct socket **sockp)
+{
+ if (cfg->family == AF_INET)
+ return udp_sock_create4(net, cfg, sockp);
+
+ if (cfg->family == AF_INET6)
+ return udp_sock_create6(net, cfg, sockp);
+
+ return -EPFNOSUPPORT;
+}
+
+typedef int (*udp_tunnel_encap_rcv_t)(struct sock *sk, struct sk_buff *skb);
+typedef void (*udp_tunnel_encap_destroy_t)(struct sock *sk);
+
+struct udp_tunnel_sock_cfg {
+ void *sk_user_data; /* user data used by encap_rcv call back */
+ /* Used for setting up udp_sock fields, see udp.h for details */
+ __u8 encap_type;
+ udp_tunnel_encap_rcv_t encap_rcv;
+ udp_tunnel_encap_destroy_t encap_destroy;
+};
+
+/* Setup the given (UDP) sock to receive UDP encapsulated packets */
+void setup_udp_tunnel_sock(struct net *net, struct socket *sock,
+ struct udp_tunnel_sock_cfg *sock_cfg);
+
+/* Transmit the skb using UDP encapsulation. */
+int udp_tunnel_xmit_skb(struct socket *sock, struct rtable *rt,
+ struct sk_buff *skb, __be32 src, __be32 dst,
+ __u8 tos, __u8 ttl, __be16 df, __be16 src_port,
+ __be16 dst_port, bool xnet);
+
+#if IS_ENABLED(CONFIG_IPV6)
+int udp_tunnel6_xmit_skb(struct socket *sock, struct dst_entry *dst,
+ struct sk_buff *skb, struct net_device *dev,
+ struct in6_addr *saddr, struct in6_addr *daddr,
+ __u8 prio, __u8 ttl, __be16 src_port,
+ __be16 dst_port);
+#endif
+
+void udp_tunnel_sock_release(struct socket *sock);
+
+static inline struct sk_buff *udp_tunnel_handle_offloads(struct sk_buff *skb,
+ bool udp_csum)
+{
+ int type = udp_csum ? SKB_GSO_UDP_TUNNEL_CSUM : SKB_GSO_UDP_TUNNEL;
+
+ return iptunnel_handle_offloads(skb, udp_csum, type);
+}
+
+static inline void udp_tunnel_encap_enable(struct socket *sock)
+{
+#if IS_ENABLED(CONFIG_IPV6)
+ if (sock->sk->sk_family == PF_INET6)
+ ipv6_stub->udpv6_encap_enable();
+ else
+#endif
+ udp_encap_enable();
+}
#endif
diff --git a/include/uapi/linux/fou.h b/include/uapi/linux/fou.h
new file mode 100644
index 000000000000..e03376de453d
--- /dev/null
+++ b/include/uapi/linux/fou.h
@@ -0,0 +1,32 @@
+/* fou.h - FOU Interface */
+
+#ifndef _UAPI_LINUX_FOU_H
+#define _UAPI_LINUX_FOU_H
+
+/* NETLINK_GENERIC related info
+ */
+#define FOU_GENL_NAME "fou"
+#define FOU_GENL_VERSION 0x1
+
+enum {
+ FOU_ATTR_UNSPEC,
+ FOU_ATTR_PORT, /* u16 */
+ FOU_ATTR_AF, /* u8 */
+ FOU_ATTR_IPPROTO, /* u8 */
+
+ __FOU_ATTR_MAX,
+};
+
+#define FOU_ATTR_MAX (__FOU_ATTR_MAX - 1)
+
+enum {
+ FOU_CMD_UNSPEC,
+ FOU_CMD_ADD,
+ FOU_CMD_DEL,
+
+ __FOU_CMD_MAX,
+};
+
+#define FOU_CMD_MAX (__FOU_CMD_MAX - 1)
+
+#endif /* _UAPI_LINUX_FOU_H */
diff --git a/include/uapi/linux/if_tunnel.h b/include/uapi/linux/if_tunnel.h
index 3bce9e9d9f7c..7c832afdfa94 100644
--- a/include/uapi/linux/if_tunnel.h
+++ b/include/uapi/linux/if_tunnel.h
@@ -53,10 +53,22 @@ enum {
IFLA_IPTUN_6RD_RELAY_PREFIX,
IFLA_IPTUN_6RD_PREFIXLEN,
IFLA_IPTUN_6RD_RELAY_PREFIXLEN,
+ IFLA_IPTUN_ENCAP_TYPE,
+ IFLA_IPTUN_ENCAP_FLAGS,
+ IFLA_IPTUN_ENCAP_SPORT,
+ IFLA_IPTUN_ENCAP_DPORT,
__IFLA_IPTUN_MAX,
};
#define IFLA_IPTUN_MAX (__IFLA_IPTUN_MAX - 1)
+enum tunnel_encap_types {
+ TUNNEL_ENCAP_NONE,
+ TUNNEL_ENCAP_FOU,
+};
+
+#define TUNNEL_ENCAP_FLAG_CSUM (1<<0)
+#define TUNNEL_ENCAP_FLAG_CSUM6 (1<<1)
+
/* SIT-mode i_flags */
#define SIT_ISATAP 0x0001
@@ -94,6 +106,10 @@ enum {
IFLA_GRE_ENCAP_LIMIT,
IFLA_GRE_FLOWINFO,
IFLA_GRE_FLAGS,
+ IFLA_GRE_ENCAP_TYPE,
+ IFLA_GRE_ENCAP_FLAGS,
+ IFLA_GRE_ENCAP_SPORT,
+ IFLA_GRE_ENCAP_DPORT,
__IFLA_GRE_MAX,
};
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 29f7f0121491..06a8feb10099 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -4102,3 +4102,81 @@ err_free:
return NULL;
}
EXPORT_SYMBOL(skb_vlan_untag);
+
+/**
+ * alloc_skb_with_frags - allocate skb with page frags
+ *
+ * header_len: size of linear part
+ * data_len: needed length in frags
+ * max_page_order: max page order desired.
+ * errcode: pointer to error code if any
+ * gfp_mask: allocation mask
+ *
+ * This can be used to allocate a paged skb, given a maximal order for frags.
+ */
+struct sk_buff *alloc_skb_with_frags(unsigned long header_len,
+ unsigned long data_len,
+ int max_page_order,
+ int *errcode,
+ gfp_t gfp_mask)
+{
+ int npages = (data_len + (PAGE_SIZE - 1)) >> PAGE_SHIFT;
+ unsigned long chunk;
+ struct sk_buff *skb;
+ struct page *page;
+ gfp_t gfp_head;
+ int i;
+
+ *errcode = -EMSGSIZE;
+ /* Note this test could be relaxed, if we succeed to allocate
+ * high order pages...
+ */
+ if (npages > MAX_SKB_FRAGS)
+ return NULL;
+
+ gfp_head = gfp_mask;
+ if (gfp_head & __GFP_WAIT)
+ gfp_head |= __GFP_REPEAT;
+
+ *errcode = -ENOBUFS;
+ skb = alloc_skb(header_len, gfp_head);
+ if (!skb)
+ return NULL;
+
+ skb->truesize += npages << PAGE_SHIFT;
+
+ for (i = 0; npages > 0; i++) {
+ int order = max_page_order;
+
+ while (order) {
+ if (npages >= 1 << order) {
+ page = alloc_pages(gfp_mask |
+ __GFP_COMP |
+ __GFP_NOWARN |
+ __GFP_NORETRY,
+ order);
+ if (page)
+ goto fill_page;
+ /* Do not retry other high order allocations */
+ order = 1;
+ max_page_order = 0;
+ }
+ order--;
+ }
+ page = alloc_page(gfp_mask);
+ if (!page)
+ goto failure;
+fill_page:
+ chunk = min_t(unsigned long, data_len,
+ PAGE_SIZE << order);
+ skb_fill_page_desc(skb, i, page, 0, chunk);
+ data_len -= chunk;
+ npages -= 1 << order;
+ }
+ return skb;
+
+failure:
+ kfree_skb(skb);
+ return NULL;
+}
+EXPORT_SYMBOL(alloc_skb_with_frags);
diff --git a/net/core/sock.c b/net/core/sock.c
index 6f436b5e4961..de887c45c63b 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1762,21 +1762,12 @@ struct sk_buff *sock_alloc_send_pskb(struct sock *sk, unsigned long header_len,
unsigned long data_len, int noblock,
int *errcode, int max_page_order)
{
- struct sk_buff *skb = NULL;
- unsigned long chunk;
- gfp_t gfp_mask;
+ struct sk_buff *skb;
long timeo;
int err;
- int npages = (data_len + (PAGE_SIZE - 1)) >> PAGE_SHIFT;
- struct page *page;
- int i;
-
- err = -EMSGSIZE;
- if (npages > MAX_SKB_FRAGS)
- goto failure;
timeo = sock_sndtimeo(sk, noblock);
- while (!skb) {
+ for (;;) {
err = sock_error(sk);
if (err != 0)
goto failure;
@@ -1785,66 +1776,27 @@ struct sk_buff *sock_alloc_send_pskb(struct sock *sk, unsigned long header_len,
if (sk->sk_shutdown & SEND_SHUTDOWN)
goto failure;
- if (atomic_read(&sk->sk_wmem_alloc) >= sk->sk_sndbuf) {
- set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
- set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
- err = -EAGAIN;
- if (!timeo)
- goto failure;
- if (signal_pending(current))
- goto interrupted;
- timeo = sock_wait_for_wmem(sk, timeo);
- continue;
- }
-
- err = -ENOBUFS;
- gfp_mask = sk->sk_allocation;
- if (gfp_mask & __GFP_WAIT)
- gfp_mask |= __GFP_REPEAT;
+ if (sk_wmem_alloc_get(sk) < sk->sk_sndbuf)
+ break;
- skb = alloc_skb(header_len, gfp_mask);
- if (!skb)
+ set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
+ set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
+ err = -EAGAIN;
+ if (!timeo)
goto failure;
-
- skb->truesize += data_len;
-
- for (i = 0; npages > 0; i++) {
- int order = max_page_order;
-
- while (order) {
- if (npages >= 1 << order) {
- page = alloc_pages(sk->sk_allocation |
- __GFP_COMP |
- __GFP_NOWARN |
- __GFP_NORETRY,
- order);
- if (page)
- goto fill_page;
- /* Do not retry other high order allocations */
- order = 1;
- max_page_order = 0;
- }
- order--;
- }
- page = alloc_page(sk->sk_allocation);
- if (!page)
- goto failure;
-fill_page:
- chunk = min_t(unsigned long, data_len,
- PAGE_SIZE << order);
- skb_fill_page_desc(skb, i, page, 0, chunk);
- data_len -= chunk;
- npages -= 1 << order;
- }
+ if (signal_pending(current))
+ goto interrupted;
+ timeo = sock_wait_for_wmem(sk, timeo);
}
-
- skb_set_owner_w(skb, sk);
+ skb = alloc_skb_with_frags(header_len, data_len, max_page_order,
+ errcode, sk->sk_allocation);
+ if (skb)
+ skb_set_owner_w(skb, sk);
return skb;
interrupted:
err = sock_intr_errno(timeo);
failure:
- kfree_skb(skb);
*errcode = err;
return NULL;
}
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index 90c9689ed362..a7997265019a 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -371,6 +371,7 @@ static void dsa_slave_phy_setup(struct dsa_slave_priv *p,
struct dsa_chip_data *cd = ds->pd;
struct device_node *phy_dn, *port_dn;
bool phy_is_fixed = false;
+ u32 phy_flags = 0;
int ret;
port_dn = cd->port_dn[p->port];
@@ -390,9 +391,12 @@ static void dsa_slave_phy_setup(struct dsa_slave_priv *p,
phy_dn = port_dn;
}
+ if (ds->drv->get_phy_flags)
+ phy_flags = ds->drv->get_phy_flags(ds, p->port);
+
if (phy_dn)
p->phy = of_phy_connect(slave_dev, phy_dn,
- dsa_slave_adjust_link, 0,
+ dsa_slave_adjust_link, phy_flags,
p->phy_interface);
if (p->phy && phy_is_fixed)
@@ -480,6 +484,9 @@ dsa_slave_create(struct dsa_switch *ds, struct device *parent,
netif_carrier_off(slave_dev);
if (p->phy != NULL) {
+ if (ds->drv->get_phy_flags(ds, port))
+ p->phy->dev_flags |= ds->drv->get_phy_flags(ds, port);
+
phy_attach(slave_dev, dev_name(&p->phy->dev),
PHY_INTERFACE_MODE_GMII);
diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig
index dbc10d84161f..84f710b7472a 100644
--- a/net/ipv4/Kconfig
+++ b/net/ipv4/Kconfig
@@ -311,6 +311,16 @@ config NET_UDP_TUNNEL
tristate
default n
+config NET_FOU
+ tristate "IP: Foo (IP protocols) over UDP"
+ select XFRM
+ select NET_UDP_TUNNEL
+ ---help---
+ Foo over UDP allows any IP protocol to be directly encapsulated
+ over UDP include tunnels (IPIP, GRE, SIT). By encapsulating in UDP
+ network mechanisms and optimizations for UDP (such as ECMP
+ and RSS) can be leveraged to provide better service.
+
config INET_AH
tristate "IP: AH transformation"
select XFRM_ALGO
diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile
index 8ee1cd4053ee..d78d404c596f 100644
--- a/net/ipv4/Makefile
+++ b/net/ipv4/Makefile
@@ -20,6 +20,7 @@ obj-$(CONFIG_IP_MULTIPLE_TABLES) += fib_rules.o
obj-$(CONFIG_IP_MROUTE) += ipmr.o
obj-$(CONFIG_NET_IPIP) += ipip.o
gre-y := gre_demux.o
+obj-$(CONFIG_NET_FOU) += fou.o
obj-$(CONFIG_NET_IPGRE_DEMUX) += gre.o
obj-$(CONFIG_NET_IPGRE) += ip_gre.o
obj-$(CONFIG_NET_UDP_TUNNEL) += udp_tunnel.o
diff --git a/net/ipv4/fou.c b/net/ipv4/fou.c
new file mode 100644
index 000000000000..dced89fbe480
--- /dev/null
+++ b/net/ipv4/fou.c
@@ -0,0 +1,368 @@
+#include <linux/module.h>
+#include <linux/errno.h>
+#include <linux/socket.h>
+#include <linux/skbuff.h>
+#include <linux/ip.h>
+#include <linux/udp.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <net/genetlink.h>
+#include <net/ip.h>
+#include <net/protocol.h>
+#include <net/udp.h>
+#include <net/udp_tunnel.h>
+#include <net/xfrm.h>
+#include <uapi/linux/fou.h>
+#include <uapi/linux/genetlink.h>
+
+static DEFINE_SPINLOCK(fou_lock);
+static LIST_HEAD(fou_list);
+
+struct fou {
+ struct socket *sock;
+ u8 protocol;
+ u16 port;
+ struct udp_offload udp_offloads;
+ struct list_head list;
+};
+
+struct fou_cfg {
+ u8 protocol;
+ struct udp_port_cfg udp_config;
+};
+
+static inline struct fou *fou_from_sock(struct sock *sk)
+{
+ return sk->sk_user_data;
+}
+
+static int fou_udp_encap_recv_deliver(struct sk_buff *skb,
+ u8 protocol, size_t len)
+{
+ struct iphdr *iph = ip_hdr(skb);
+
+ /* Remove 'len' bytes from the packet (UDP header and
+ * FOU header if present), modify the protocol to the one
+ * we found, and then call rcv_encap.
+ */
+ iph->tot_len = htons(ntohs(iph->tot_len) - len);
+ __skb_pull(skb, len);
+ skb_postpull_rcsum(skb, udp_hdr(skb), len);
+ skb_reset_transport_header(skb);
+
+ return -protocol;
+}
+
+static int fou_udp_recv(struct sock *sk, struct sk_buff *skb)
+{
+ struct fou *fou = fou_from_sock(sk);
+
+ if (!fou)
+ return 1;
+
+ return fou_udp_encap_recv_deliver(skb, fou->protocol,
+ sizeof(struct udphdr));
+}
+
+static struct sk_buff **fou_gro_receive(struct sk_buff **head,
+ struct sk_buff *skb,
+ const struct net_offload **offloads)
+{
+ const struct net_offload *ops;
+ struct sk_buff **pp = NULL;
+ u8 proto = NAPI_GRO_CB(skb)->proto;
+
+ rcu_read_lock();
+ ops = rcu_dereference(offloads[proto]);
+ if (!ops || !ops->callbacks.gro_receive)
+ goto out_unlock;
+
+ pp = ops->callbacks.gro_receive(head, skb);
+
+out_unlock:
+ rcu_read_unlock();
+
+ return pp;
+}
+
+static int fou_gro_complete(struct sk_buff *skb, int nhoff,
+ const struct net_offload **offloads)
+{
+ const struct net_offload *ops;
+ u8 proto = NAPI_GRO_CB(skb)->proto;
+ int err = -ENOSYS;
+
+ rcu_read_lock();
+ ops = rcu_dereference(offloads[proto]);
+ if (WARN_ON(!ops || !ops->callbacks.gro_complete))
+ goto out_unlock;
+
+ err = ops->callbacks.gro_complete(skb, nhoff);
+
+out_unlock:
+ rcu_read_unlock();
+
+ return err;
+}
+
+static struct sk_buff **fou4_gro_receive(struct sk_buff **head,
+ struct sk_buff *skb)
+{
+ return fou_gro_receive(head, skb, inet_offloads);
+}
+
+static int fou4_gro_complete(struct sk_buff *skb, int nhoff)
+{
+ return fou_gro_complete(skb, nhoff, inet_offloads);
+}
+
+static struct sk_buff **fou6_gro_receive(struct sk_buff **head,
+ struct sk_buff *skb)
+{
+ return fou_gro_receive(head, skb, inet6_offloads);
+}
+
+static int fou6_gro_complete(struct sk_buff *skb, int nhoff)
+{
+ return fou_gro_complete(skb, nhoff, inet6_offloads);
+}
+
+static int fou_add_to_port_list(struct fou *fou)
+{
+ struct fou *fout;
+
+ spin_lock(&fou_lock);
+ list_for_each_entry(fout, &fou_list, list) {
+ if (fou->port == fout->port) {
+ spin_unlock(&fou_lock);
+ return -EALREADY;
+ }
+ }
+
+ list_add(&fou->list, &fou_list);
+ spin_unlock(&fou_lock);
+
+ return 0;
+}
+
+static void fou_release(struct fou *fou)
+{
+ struct socket *sock = fou->sock;
+ struct sock *sk = sock->sk;
+
+ udp_del_offload(&fou->udp_offloads);
+
+ list_del(&fou->list);
+
+ /* Remove hooks into tunnel socket */
+ sk->sk_user_data = NULL;
+
+ sock_release(sock);
+
+ kfree(fou);
+}
+
+static int fou_create(struct net *net, struct fou_cfg *cfg,
+ struct socket **sockp)
+{
+ struct fou *fou = NULL;
+ int err;
+ struct socket *sock = NULL;
+ struct sock *sk;
+
+ /* Open UDP socket */
+ err = udp_sock_create(net, &cfg->udp_config, &sock);
+ if (err < 0)
+ goto error;
+
+ /* Allocate FOU port structure */
+ fou = kzalloc(sizeof(*fou), GFP_KERNEL);
+ if (!fou) {
+ err = -ENOMEM;
+ goto error;
+ }
+
+ sk = sock->sk;
+
+ /* Mark socket as an encapsulation socket. See net/ipv4/udp.c */
+ fou->protocol = cfg->protocol;
+ fou->port = cfg->udp_config.local_udp_port;
+ udp_sk(sk)->encap_rcv = fou_udp_recv;
+
+ udp_sk(sk)->encap_type = 1;
+ udp_encap_enable();
+
+ sk->sk_user_data = fou;
+ fou->sock = sock;
+
+ udp_set_convert_csum(sk, true);
+
+ sk->sk_allocation = GFP_ATOMIC;
+
+ switch (cfg->udp_config.family) {
+ case AF_INET:
+ fou->udp_offloads.callbacks.gro_receive = fou4_gro_receive;
+ fou->udp_offloads.callbacks.gro_complete = fou4_gro_complete;
+ break;
+ case AF_INET6:
+ fou->udp_offloads.callbacks.gro_receive = fou6_gro_receive;
+ fou->udp_offloads.callbacks.gro_complete = fou6_gro_complete;
+ break;
+ default:
+ err = -EPFNOSUPPORT;
+ goto error;
+ }
+
+ fou->udp_offloads.port = cfg->udp_config.local_udp_port;
+ fou->udp_offloads.ipproto = cfg->protocol;
+
+ if (cfg->udp_config.family == AF_INET) {
+ err = udp_add_offload(&fou->udp_offloads);
+ if (err)
+ goto error;
+ }
+
+ err = fou_add_to_port_list(fou);
+ if (err)
+ goto error;
+
+ if (sockp)
+ *sockp = sock;
+
+ return 0;
+
+error:
+ kfree(fou);
+ if (sock)
+ sock_release(sock);
+
+ return err;
+}
+
+static int fou_destroy(struct net *net, struct fou_cfg *cfg)
+{
+ struct fou *fou;
+ u16 port = cfg->udp_config.local_udp_port;
+ int err = -EINVAL;
+
+ spin_lock(&fou_lock);
+ list_for_each_entry(fou, &fou_list, list) {
+ if (fou->port == port) {
+ udp_del_offload(&fou->udp_offloads);
+ fou_release(fou);
+ err = 0;
+ break;
+ }
+ }
+ spin_unlock(&fou_lock);
+
+ return err;
+}
+
+static struct genl_family fou_nl_family = {
+ .id = GENL_ID_GENERATE,
+ .hdrsize = 0,
+ .name = FOU_GENL_NAME,
+ .version = FOU_GENL_VERSION,
+ .maxattr = FOU_ATTR_MAX,
+ .netnsok = true,
+};
+
+static struct nla_policy fou_nl_policy[FOU_ATTR_MAX + 1] = {
+ [FOU_ATTR_PORT] = { .type = NLA_U16, },
+ [FOU_ATTR_AF] = { .type = NLA_U8, },
+ [FOU_ATTR_IPPROTO] = { .type = NLA_U8, },
+};
+
+static int parse_nl_config(struct genl_info *info,
+ struct fou_cfg *cfg)
+{
+ memset(cfg, 0, sizeof(*cfg));
+
+ cfg->udp_config.family = AF_INET;
+
+ if (info->attrs[FOU_ATTR_AF]) {
+ u8 family = nla_get_u8(info->attrs[FOU_ATTR_AF]);
+
+ if (family != AF_INET && family != AF_INET6)
+ return -EINVAL;
+
+ cfg->udp_config.family = family;
+ }
+
+ if (info->attrs[FOU_ATTR_PORT]) {
+ u16 port = nla_get_u16(info->attrs[FOU_ATTR_PORT]);
+
+ cfg->udp_config.local_udp_port = port;
+ }
+
+ if (info->attrs[FOU_ATTR_IPPROTO])
+ cfg->protocol = nla_get_u8(info->attrs[FOU_ATTR_IPPROTO]);
+
+ return 0;
+}
+
+static int fou_nl_cmd_add_port(struct sk_buff *skb, struct genl_info *info)
+{
+ struct fou_cfg cfg;
+ int err;
+
+ err = parse_nl_config(info, &cfg);
+ if (err)
+ return err;
+
+ return fou_create(&init_net, &cfg, NULL);
+}
+
+static int fou_nl_cmd_rm_port(struct sk_buff *skb, struct genl_info *info)
+{
+ struct fou_cfg cfg;
+
+ parse_nl_config(info, &cfg);
+
+ return fou_destroy(&init_net, &cfg);
+}
+
+static const struct genl_ops fou_nl_ops[] = {
+ {
+ .cmd = FOU_CMD_ADD,
+ .doit = fou_nl_cmd_add_port,
+ .policy = fou_nl_policy,
+ .flags = GENL_ADMIN_PERM,
+ },
+ {
+ .cmd = FOU_CMD_DEL,
+ .doit = fou_nl_cmd_rm_port,
+ .policy = fou_nl_policy,
+ .flags = GENL_ADMIN_PERM,
+ },
+};
+
+static int __init fou_init(void)
+{
+ int ret;
+
+ ret = genl_register_family_with_ops(&fou_nl_family,
+ fou_nl_ops);
+
+ return ret;
+}
+
+static void __exit fou_fini(void)
+{
+ struct fou *fou, *next;
+
+ genl_unregister_family(&fou_nl_family);
+
+ /* Close all the FOU sockets */
+
+ spin_lock(&fou_lock);
+ list_for_each_entry_safe(fou, next, &fou_list, list)
+ fou_release(fou);
+ spin_unlock(&fou_lock);
+}
+
+module_init(fou_init);
+module_exit(fou_fini);
+MODULE_AUTHOR("Tom Herbert <[email protected]>");
+MODULE_LICENSE("GPL");
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 9b842544aea3..829aff8bf723 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -239,7 +239,7 @@ static void __gre_xmit(struct sk_buff *skb, struct net_device *dev,
tpi.seq = htonl(tunnel->o_seqno);
/* Push GRE header. */
- gre_build_header(skb, &tpi, tunnel->hlen);
+ gre_build_header(skb, &tpi, tunnel->tun_hlen);
ip_tunnel_xmit(skb, dev, tnl_params, tnl_params->protocol);
}
@@ -310,7 +310,7 @@ out:
static int ipgre_tunnel_ioctl(struct net_device *dev,
struct ifreq *ifr, int cmd)
{
- int err = 0;
+ int err;
struct ip_tunnel_parm p;
if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
@@ -470,13 +470,18 @@ static void ipgre_tunnel_setup(struct net_device *dev)
static void __gre_tunnel_init(struct net_device *dev)
{
struct ip_tunnel *tunnel;
+ int t_hlen;
tunnel = netdev_priv(dev);
- tunnel->hlen = ip_gre_calc_hlen(tunnel->parms.o_flags);
+ tunnel->tun_hlen = ip_gre_calc_hlen(tunnel->parms.o_flags);
tunnel->parms.iph.protocol = IPPROTO_GRE;
- dev->needed_headroom = LL_MAX_HEADER + sizeof(struct iphdr) + 4;
- dev->mtu = ETH_DATA_LEN - sizeof(struct iphdr) - 4;
+ tunnel->hlen = tunnel->tun_hlen + tunnel->encap_hlen;
+
+ t_hlen = tunnel->hlen + sizeof(struct iphdr);
+
+ dev->needed_headroom = LL_MAX_HEADER + t_hlen + 4;
+ dev->mtu = ETH_DATA_LEN - t_hlen - 4;
dev->features |= GRE_FEATURES;
dev->hw_features |= GRE_FEATURES;
@@ -628,6 +633,40 @@ static void ipgre_netlink_parms(struct nlattr *data[], struct nlattr *tb[],
parms->iph.frag_off = htons(IP_DF);
}
+/* This function returns true when ENCAP attributes are present in the nl msg */
+static bool ipgre_netlink_encap_parms(struct nlattr *data[],
+ struct ip_tunnel_encap *ipencap)
+{
+ bool ret = false;
+
+ memset(ipencap, 0, sizeof(*ipencap));
+
+ if (!data)
+ return ret;
+
+ if (data[IFLA_GRE_ENCAP_TYPE]) {
+ ret = true;
+ ipencap->type = nla_get_u16(data[IFLA_GRE_ENCAP_TYPE]);
+ }
+
+ if (data[IFLA_GRE_ENCAP_FLAGS]) {
+ ret = true;
+ ipencap->flags = nla_get_u16(data[IFLA_GRE_ENCAP_FLAGS]);
+ }
+
+ if (data[IFLA_GRE_ENCAP_SPORT]) {
+ ret = true;
+ ipencap->sport = nla_get_u16(data[IFLA_GRE_ENCAP_SPORT]);
+ }
+
+ if (data[IFLA_GRE_ENCAP_DPORT]) {
+ ret = true;
+ ipencap->dport = nla_get_u16(data[IFLA_GRE_ENCAP_DPORT]);
+ }
+
+ return ret;
+}
+
static int gre_tap_init(struct net_device *dev)
{
__gre_tunnel_init(dev);
@@ -657,6 +696,15 @@ static int ipgre_newlink(struct net *src_net, struct net_device *dev,
struct nlattr *tb[], struct nlattr *data[])
{
struct ip_tunnel_parm p;
+ struct ip_tunnel_encap ipencap;
+
+ if (ipgre_netlink_encap_parms(data, &ipencap)) {
+ struct ip_tunnel *t = netdev_priv(dev);
+ int err = ip_tunnel_encap_setup(t, &ipencap);
+
+ if (err < 0)
+ return err;
+ }
ipgre_netlink_parms(data, tb, &p);
return ip_tunnel_newlink(dev, tb, &p);
@@ -666,6 +714,15 @@ static int ipgre_changelink(struct net_device *dev, struct nlattr *tb[],
struct nlattr *data[])
{
struct ip_tunnel_parm p;
+ struct ip_tunnel_encap ipencap;
+
+ if (ipgre_netlink_encap_parms(data, &ipencap)) {
+ struct ip_tunnel *t = netdev_priv(dev);
+ int err = ip_tunnel_encap_setup(t, &ipencap);
+
+ if (err < 0)
+ return err;
+ }
ipgre_netlink_parms(data, tb, &p);
return ip_tunnel_changelink(dev, tb, &p);
@@ -694,6 +751,14 @@ static size_t ipgre_get_size(const struct net_device *dev)
nla_total_size(1) +
/* IFLA_GRE_PMTUDISC */
nla_total_size(1) +
+ /* IFLA_GRE_ENCAP_TYPE */
+ nla_total_size(2) +
+ /* IFLA_GRE_ENCAP_FLAGS */
+ nla_total_size(2) +
+ /* IFLA_GRE_ENCAP_SPORT */
+ nla_total_size(2) +
+ /* IFLA_GRE_ENCAP_DPORT */
+ nla_total_size(2) +
0;
}
@@ -714,6 +779,17 @@ static int ipgre_fill_info(struct sk_buff *skb, const struct net_device *dev)
nla_put_u8(skb, IFLA_GRE_PMTUDISC,
!!(p->iph.frag_off & htons(IP_DF))))
goto nla_put_failure;
+
+ if (nla_put_u16(skb, IFLA_GRE_ENCAP_TYPE,
+ t->encap.type) ||
+ nla_put_u16(skb, IFLA_GRE_ENCAP_SPORT,
+ t->encap.sport) ||
+ nla_put_u16(skb, IFLA_GRE_ENCAP_DPORT,
+ t->encap.dport) ||
+ nla_put_u16(skb, IFLA_GRE_ENCAP_FLAGS,
+ t->encap.dport))
+ goto nla_put_failure;
+
return 0;
nla_put_failure:
@@ -731,6 +807,10 @@ static const struct nla_policy ipgre_policy[IFLA_GRE_MAX + 1] = {
[IFLA_GRE_TTL] = { .type = NLA_U8 },
[IFLA_GRE_TOS] = { .type = NLA_U8 },
[IFLA_GRE_PMTUDISC] = { .type = NLA_U8 },
+ [IFLA_GRE_ENCAP_TYPE] = { .type = NLA_U16 },
+ [IFLA_GRE_ENCAP_FLAGS] = { .type = NLA_U16 },
+ [IFLA_GRE_ENCAP_SPORT] = { .type = NLA_U16 },
+ [IFLA_GRE_ENCAP_DPORT] = { .type = NLA_U16 },
};
static struct rtnl_link_ops ipgre_link_ops __read_mostly = {
diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c
index afed1aac2638..e3a3dc91e49c 100644
--- a/net/ipv4/ip_tunnel.c
+++ b/net/ipv4/ip_tunnel.c
@@ -55,6 +55,7 @@
#include <net/net_namespace.h>
#include <net/netns/generic.h>
#include <net/rtnetlink.h>
+#include <net/udp.h>
#if IS_ENABLED(CONFIG_IPV6)
#include <net/ipv6.h>
@@ -487,6 +488,91 @@ drop:
}
EXPORT_SYMBOL_GPL(ip_tunnel_rcv);
+static int ip_encap_hlen(struct ip_tunnel_encap *e)
+{
+ switch (e->type) {
+ case TUNNEL_ENCAP_NONE:
+ return 0;
+ case TUNNEL_ENCAP_FOU:
+ return sizeof(struct udphdr);
+ default:
+ return -EINVAL;
+ }
+}
+
+int ip_tunnel_encap_setup(struct ip_tunnel *t,
+ struct ip_tunnel_encap *ipencap)
+{
+ int hlen;
+
+ memset(&t->encap, 0, sizeof(t->encap));
+
+ hlen = ip_encap_hlen(ipencap);
+ if (hlen < 0)
+ return hlen;
+
+ t->encap.type = ipencap->type;
+ t->encap.sport = ipencap->sport;
+ t->encap.dport = ipencap->dport;
+ t->encap.flags = ipencap->flags;
+
+ t->encap_hlen = hlen;
+ t->hlen = t->encap_hlen + t->tun_hlen;
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(ip_tunnel_encap_setup);
+
+static int fou_build_header(struct sk_buff *skb, struct ip_tunnel_encap *e,
+ size_t hdr_len, u8 *protocol, struct flowi4 *fl4)
+{
+ struct udphdr *uh;
+ __be16 sport;
+ bool csum = !!(e->flags & TUNNEL_ENCAP_FLAG_CSUM);
+ int type = csum ? SKB_GSO_UDP_TUNNEL_CSUM : SKB_GSO_UDP_TUNNEL;
+
+ skb = iptunnel_handle_offloads(skb, csum, type);
+
+ if (IS_ERR(skb))
+ return PTR_ERR(skb);
+
+ /* Get length and hash before making space in skb */
+
+ sport = e->sport ? : udp_flow_src_port(dev_net(skb->dev),
+ skb, 0, 0, false);
+
+ skb_push(skb, hdr_len);
+
+ skb_reset_transport_header(skb);
+ uh = udp_hdr(skb);
+
+ uh->dest = e->dport;
+ uh->source = sport;
+ uh->len = htons(skb->len);
+ uh->check = 0;
+ udp_set_csum(!(e->flags & TUNNEL_ENCAP_FLAG_CSUM), skb,
+ fl4->saddr, fl4->daddr, skb->len);
+
+ *protocol = IPPROTO_UDP;
+
+ return 0;
+}
+
+int ip_tunnel_encap(struct sk_buff *skb, struct ip_tunnel *t,
+ u8 *protocol, struct flowi4 *fl4)
+{
+ switch (t->encap.type) {
+ case TUNNEL_ENCAP_NONE:
+ return 0;
+ case TUNNEL_ENCAP_FOU:
+ return fou_build_header(skb, &t->encap, t->encap_hlen,
+ protocol, fl4);
+ default:
+ return -EINVAL;
+ }
+}
+EXPORT_SYMBOL(ip_tunnel_encap);
+
static int tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb,
struct rtable *rt, __be16 df)
{
@@ -536,7 +622,7 @@ static int tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb,
}
void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
- const struct iphdr *tnl_params, const u8 protocol)
+ const struct iphdr *tnl_params, u8 protocol)
{
struct ip_tunnel *tunnel = netdev_priv(dev);
const struct iphdr *inner_iph;
@@ -617,6 +703,9 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
init_tunnel_flow(&fl4, protocol, dst, tnl_params->saddr,
tunnel->parms.o_key, RT_TOS(tos), tunnel->parms.link);
+ if (ip_tunnel_encap(skb, tunnel, &protocol, &fl4) < 0)
+ goto tx_error;
+
rt = connected ? tunnel_rtable_get(tunnel, 0, &fl4.saddr) : NULL;
if (!rt) {
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
index 62eaa005e146..bfec31df8b21 100644
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c
@@ -301,7 +301,8 @@ static int ipip_tunnel_init(struct net_device *dev)
memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4);
memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4);
- tunnel->hlen = 0;
+ tunnel->tun_hlen = 0;
+ tunnel->hlen = tunnel->tun_hlen + tunnel->encap_hlen;
tunnel->parms.iph.protocol = IPPROTO_IPIP;
return ip_tunnel_init(dev);
}
@@ -340,10 +341,53 @@ static void ipip_netlink_parms(struct nlattr *data[],
parms->iph.frag_off = htons(IP_DF);
}
+/* This function returns true when ENCAP attributes are present in the nl msg */
+static bool ipip_netlink_encap_parms(struct nlattr *data[],
+ struct ip_tunnel_encap *ipencap)
+{
+ bool ret = false;
+
+ memset(ipencap, 0, sizeof(*ipencap));
+
+ if (!data)
+ return ret;
+
+ if (data[IFLA_IPTUN_ENCAP_TYPE]) {
+ ret = true;
+ ipencap->type = nla_get_u16(data[IFLA_IPTUN_ENCAP_TYPE]);
+ }
+
+ if (data[IFLA_IPTUN_ENCAP_FLAGS]) {
+ ret = true;
+ ipencap->flags = nla_get_u16(data[IFLA_IPTUN_ENCAP_FLAGS]);
+ }
+
+ if (data[IFLA_IPTUN_ENCAP_SPORT]) {
+ ret = true;
+ ipencap->sport = nla_get_u16(data[IFLA_IPTUN_ENCAP_SPORT]);
+ }
+
+ if (data[IFLA_IPTUN_ENCAP_DPORT]) {
+ ret = true;
+ ipencap->dport = nla_get_u16(data[IFLA_IPTUN_ENCAP_DPORT]);
+ }
+
+ return ret;
+}
+
static int ipip_newlink(struct net *src_net, struct net_device *dev,
struct nlattr *tb[], struct nlattr *data[])
{
struct ip_tunnel_parm p;
+ struct ip_tunnel_encap ipencap;
+
+ if (ipip_netlink_encap_parms(data, &ipencap)) {
+ struct ip_tunnel *t = netdev_priv(dev);
+ int err = ip_tunnel_encap_setup(t, &ipencap);
+
+ if (err < 0)
+ return err;
+ }
ipip_netlink_parms(data, &p);
return ip_tunnel_newlink(dev, tb, &p);
@@ -353,6 +397,15 @@ static int ipip_changelink(struct net_device *dev, struct nlattr *tb[],
struct nlattr *data[])
{
struct ip_tunnel_parm p;
+ struct ip_tunnel_encap ipencap;
+
+ if (ipip_netlink_encap_parms(data, &ipencap)) {
+ struct ip_tunnel *t = netdev_priv(dev);
+ int err = ip_tunnel_encap_setup(t, &ipencap);
+
+ if (err < 0)
+ return err;
+ }
ipip_netlink_parms(data, &p);
@@ -378,6 +431,14 @@ static size_t ipip_get_size(const struct net_device *dev)
nla_total_size(1) +
/* IFLA_IPTUN_PMTUDISC */
nla_total_size(1) +
+ /* IFLA_IPTUN_ENCAP_TYPE */
+ nla_total_size(2) +
+ /* IFLA_IPTUN_ENCAP_FLAGS */
+ nla_total_size(2) +
+ /* IFLA_IPTUN_ENCAP_SPORT */
+ nla_total_size(2) +
+ /* IFLA_IPTUN_ENCAP_DPORT */
+ nla_total_size(2) +
0;
}
@@ -394,6 +455,17 @@ static int ipip_fill_info(struct sk_buff *skb, const struct net_device *dev)
nla_put_u8(skb, IFLA_IPTUN_PMTUDISC,
!!(parm->iph.frag_off & htons(IP_DF))))
goto nla_put_failure;
+
+ if (nla_put_u16(skb, IFLA_IPTUN_ENCAP_TYPE,
+ tunnel->encap.type) ||
+ nla_put_u16(skb, IFLA_IPTUN_ENCAP_SPORT,
+ tunnel->encap.sport) ||
+ nla_put_u16(skb, IFLA_IPTUN_ENCAP_DPORT,
+ tunnel->encap.dport) ||
+ nla_put_u16(skb, IFLA_IPTUN_ENCAP_FLAGS,
+ tunnel->encap.dport))
+ goto nla_put_failure;
+
return 0;
nla_put_failure:
@@ -407,6 +479,10 @@ static const struct nla_policy ipip_policy[IFLA_IPTUN_MAX + 1] = {
[IFLA_IPTUN_TTL] = { .type = NLA_U8 },
[IFLA_IPTUN_TOS] = { .type = NLA_U8 },
[IFLA_IPTUN_PMTUDISC] = { .type = NLA_U8 },
+ [IFLA_IPTUN_ENCAP_TYPE] = { .type = NLA_U16 },
+ [IFLA_IPTUN_ENCAP_FLAGS] = { .type = NLA_U16 },
+ [IFLA_IPTUN_ENCAP_SPORT] = { .type = NLA_U16 },
+ [IFLA_IPTUN_ENCAP_DPORT] = { .type = NLA_U16 },
};
static struct rtnl_link_ops ipip_link_ops __read_mostly = {
diff --git a/net/ipv4/protocol.c b/net/ipv4/protocol.c
index 46d6a1c923a8..4b7c0ec65251 100644
--- a/net/ipv4/protocol.c
+++ b/net/ipv4/protocol.c
@@ -30,6 +30,7 @@
const struct net_protocol __rcu *inet_protos[MAX_INET_PROTOS] __read_mostly;
const struct net_offload __rcu *inet_offloads[MAX_INET_PROTOS] __read_mostly;
+EXPORT_SYMBOL(inet_offloads);
int inet_add_protocol(const struct net_protocol *prot, unsigned char protocol)
{
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index ea92f23ffaf1..02fb66d4a018 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -4304,24 +4304,19 @@ static int __must_check tcp_queue_rcv(struct sock *sk, struct sk_buff *skb, int
int tcp_send_rcvq(struct sock *sk, struct msghdr *msg, size_t size)
{
- struct sk_buff *skb = NULL;
- struct tcphdr *th;
+ struct sk_buff *skb;
bool fragstolen;
if (size == 0)
return 0;
- skb = alloc_skb(size + sizeof(*th), sk->sk_allocation);
+ skb = alloc_skb(size, sk->sk_allocation);
if (!skb)
goto err;
- if (tcp_try_rmem_schedule(sk, skb, size + sizeof(*th)))
+ if (tcp_try_rmem_schedule(sk, skb, skb->truesize))
goto err_free;
- th = (struct tcphdr *)skb_put(skb, sizeof(*th));
- skb_reset_transport_header(skb);
- memset(th, 0, sizeof(*th));
-
if (memcpy_fromiovec(skb_put(skb, size), msg->msg_iov, size))
goto err_free;
@@ -4329,7 +4324,7 @@ int tcp_send_rcvq(struct sock *sk, struct msghdr *msg, size_t size)
TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(skb)->seq + size;
TCP_SKB_CB(skb)->ack_seq = tcp_sk(sk)->snd_una - 1;
- if (tcp_queue_rcv(sk, skb, sizeof(*th), &fragstolen)) {
+ if (tcp_queue_rcv(sk, skb, 0, &fragstolen)) {
WARN_ON_ONCE(fragstolen); /* should not happen */
__kfree_skb(skb);
}
diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
index adab393b2fe5..d7c43f764c71 100644
--- a/net/ipv4/udp_offload.c
+++ b/net/ipv4/udp_offload.c
@@ -276,6 +276,7 @@ unflush:
skb_gro_pull(skb, sizeof(struct udphdr)); /* pull encapsulating udp header */
skb_gro_postpull_rcsum(skb, uh, sizeof(struct udphdr));
+ NAPI_GRO_CB(skb)->proto = uo_priv->offload->ipproto;
pp = uo_priv->offload->callbacks.gro_receive(head, skb);
out_unlock:
@@ -329,8 +330,10 @@ int udp_gro_complete(struct sk_buff *skb, int nhoff)
break;
}
- if (uo_priv != NULL)
+ if (uo_priv != NULL) {
+ NAPI_GRO_CB(skb)->proto = uo_priv->offload->ipproto;
err = uo_priv->offload->callbacks.gro_complete(skb, nhoff + sizeof(struct udphdr));
+ }
rcu_read_unlock();
return err;
diff --git a/net/ipv4/udp_tunnel.c b/net/ipv4/udp_tunnel.c
index 61ec1a65207e..1671263e5fa0 100644
--- a/net/ipv4/udp_tunnel.c
+++ b/net/ipv4/udp_tunnel.c
@@ -8,83 +8,40 @@
#include <net/udp_tunnel.h>
#include <net/net_namespace.h>
-int udp_sock_create(struct net *net, struct udp_port_cfg *cfg,
- struct socket **sockp)
+int udp_sock_create4(struct net *net, struct udp_port_cfg *cfg,
+ struct socket **sockp)
{
- int err = -EINVAL;
+ int err;
struct socket *sock = NULL;
+ struct sockaddr_in udp_addr;
-#if IS_ENABLED(CONFIG_IPV6)
- if (cfg->family == AF_INET6) {
- struct sockaddr_in6 udp6_addr;
+ err = sock_create_kern(AF_INET, SOCK_DGRAM, 0, &sock);
+ if (err < 0)
+ goto error;
- err = sock_create_kern(AF_INET6, SOCK_DGRAM, 0, &sock);
- if (err < 0)
- goto error;
-
- sk_change_net(sock->sk, net);
-
- udp6_addr.sin6_family = AF_INET6;
- memcpy(&udp6_addr.sin6_addr, &cfg->local_ip6,
- sizeof(udp6_addr.sin6_addr));
- udp6_addr.sin6_port = cfg->local_udp_port;
- err = kernel_bind(sock, (struct sockaddr *)&udp6_addr,
- sizeof(udp6_addr));
- if (err < 0)
- goto error;
-
- if (cfg->peer_udp_port) {
- udp6_addr.sin6_family = AF_INET6;
- memcpy(&udp6_addr.sin6_addr, &cfg->peer_ip6,
- sizeof(udp6_addr.sin6_addr));
- udp6_addr.sin6_port = cfg->peer_udp_port;
- err = kernel_connect(sock,
- (struct sockaddr *)&udp6_addr,
- sizeof(udp6_addr), 0);
- }
- if (err < 0)
- goto error;
+ sk_change_net(sock->sk, net);
- udp_set_no_check6_tx(sock->sk, !cfg->use_udp6_tx_checksums);
- udp_set_no_check6_rx(sock->sk, !cfg->use_udp6_rx_checksums);
- } else
-#endif
- if (cfg->family == AF_INET) {
- struct sockaddr_in udp_addr;
-
- err = sock_create_kern(AF_INET, SOCK_DGRAM, 0, &sock);
- if (err < 0)
- goto error;
-
- sk_change_net(sock->sk, net);
+ udp_addr.sin_family = AF_INET;
+ udp_addr.sin_addr = cfg->local_ip;
+ udp_addr.sin_port = cfg->local_udp_port;
+ err = kernel_bind(sock, (struct sockaddr *)&udp_addr,
+ sizeof(udp_addr));
+ if (err < 0)
+ goto error;
+ if (cfg->peer_udp_port) {
udp_addr.sin_family = AF_INET;
- udp_addr.sin_addr = cfg->local_ip;
- udp_addr.sin_port = cfg->local_udp_port;
- err = kernel_bind(sock, (struct sockaddr *)&udp_addr,
- sizeof(udp_addr));
+ udp_addr.sin_addr = cfg->peer_ip;
+ udp_addr.sin_port = cfg->peer_udp_port;
+ err = kernel_connect(sock, (struct sockaddr *)&udp_addr,
+ sizeof(udp_addr), 0);
if (err < 0)
goto error;
-
- if (cfg->peer_udp_port) {
- udp_addr.sin_family = AF_INET;
- udp_addr.sin_addr = cfg->peer_ip;
- udp_addr.sin_port = cfg->peer_udp_port;
- err = kernel_connect(sock,
- (struct sockaddr *)&udp_addr,
- sizeof(udp_addr), 0);
- if (err < 0)
- goto error;
- }
-
- sock->sk->sk_no_check_tx = !cfg->use_udp_checksums;
- } else {
- return -EPFNOSUPPORT;
}
+ sock->sk->sk_no_check_tx = !cfg->use_udp_checksums;
*sockp = sock;
-
return 0;
error:
@@ -95,6 +52,57 @@ error:
*sockp = NULL;
return err;
}
-EXPORT_SYMBOL(udp_sock_create);
+EXPORT_SYMBOL(udp_sock_create4);
+
+void setup_udp_tunnel_sock(struct net *net, struct socket *sock,
+ struct udp_tunnel_sock_cfg *cfg)
+{
+ struct sock *sk = sock->sk;
+
+ /* Disable multicast loopback */
+ inet_sk(sk)->mc_loop = 0;
+
+ /* Enable CHECKSUM_UNNECESSARY to CHECKSUM_COMPLETE conversion */
+ udp_set_convert_csum(sk, true);
+
+ rcu_assign_sk_user_data(sk, cfg->sk_user_data);
+
+ udp_sk(sk)->encap_type = cfg->encap_type;
+ udp_sk(sk)->encap_rcv = cfg->encap_rcv;
+ udp_sk(sk)->encap_destroy = cfg->encap_destroy;
+
+ udp_tunnel_encap_enable(sock);
+}
+EXPORT_SYMBOL_GPL(setup_udp_tunnel_sock);
+
+int udp_tunnel_xmit_skb(struct socket *sock, struct rtable *rt,
+ struct sk_buff *skb, __be32 src, __be32 dst,
+ __u8 tos, __u8 ttl, __be16 df, __be16 src_port,
+ __be16 dst_port, bool xnet)
+{
+ struct udphdr *uh;
+
+ __skb_push(skb, sizeof(*uh));
+ skb_reset_transport_header(skb);
+ uh = udp_hdr(skb);
+
+ uh->dest = dst_port;
+ uh->source = src_port;
+ uh->len = htons(skb->len);
+
+ udp_set_csum(sock->sk->sk_no_check_tx, skb, src, dst, skb->len);
+
+ return iptunnel_xmit(sock->sk, rt, skb, src, dst, IPPROTO_UDP,
+ tos, ttl, df, xnet);
+}
+EXPORT_SYMBOL_GPL(udp_tunnel_xmit_skb);
+
+void udp_tunnel_sock_release(struct socket *sock)
+{
+ rcu_assign_sk_user_data(sock->sk, NULL);
+ kernel_sock_shutdown(sock, SHUT_RDWR);
+ sk_release_kernel(sock->sk);
+}
+EXPORT_SYMBOL_GPL(udp_tunnel_sock_release);
MODULE_LICENSE("GPL");
diff --git a/net/ipv6/Makefile b/net/ipv6/Makefile
index 2fe68364bb20..45f830e5f820 100644
--- a/net/ipv6/Makefile
+++ b/net/ipv6/Makefile
@@ -35,6 +35,7 @@ obj-$(CONFIG_INET6_XFRM_MODE_ROUTEOPTIMIZATION) += xfrm6_mode_ro.o
obj-$(CONFIG_INET6_XFRM_MODE_BEET) += xfrm6_mode_beet.o
obj-$(CONFIG_IPV6_MIP6) += mip6.o
obj-$(CONFIG_NETFILTER) += netfilter/
+obj-$(CONFIG_NET_UDP_TUNNEL) += ip6_udp_tunnel.o
obj-$(CONFIG_IPV6_VTI) += ip6_vti.o
obj-$(CONFIG_IPV6_SIT) += sit.o
diff --git a/net/ipv6/ip6_udp_tunnel.c b/net/ipv6/ip6_udp_tunnel.c
new file mode 100644
index 000000000000..cbc990712cc1
--- /dev/null
+++ b/net/ipv6/ip6_udp_tunnel.c
@@ -0,0 +1,105 @@
+#include <linux/module.h>
+#include <linux/errno.h>
+#include <linux/socket.h>
+#include <linux/udp.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/in6.h>
+#include <net/udp.h>
+#include <net/udp_tunnel.h>
+#include <net/net_namespace.h>
+#include <net/netns/generic.h>
+#include <net/ip6_tunnel.h>
+#include <net/ip6_checksum.h>
+
+int udp_sock_create6(struct net *net, struct udp_port_cfg *cfg,
+ struct socket **sockp)
+{
+ struct sockaddr_in6 udp6_addr;
+ int err;
+ struct socket *sock = NULL;
+
+ err = sock_create_kern(AF_INET6, SOCK_DGRAM, 0, &sock);
+ if (err < 0)
+ goto error;
+
+ sk_change_net(sock->sk, net);
+
+ udp6_addr.sin6_family = AF_INET6;
+ memcpy(&udp6_addr.sin6_addr, &cfg->local_ip6,
+ sizeof(udp6_addr.sin6_addr));
+ udp6_addr.sin6_port = cfg->local_udp_port;
+ err = kernel_bind(sock, (struct sockaddr *)&udp6_addr,
+ sizeof(udp6_addr));
+ if (err < 0)
+ goto error;
+
+ if (cfg->peer_udp_port) {
+ udp6_addr.sin6_family = AF_INET6;
+ memcpy(&udp6_addr.sin6_addr, &cfg->peer_ip6,
+ sizeof(udp6_addr.sin6_addr));
+ udp6_addr.sin6_port = cfg->peer_udp_port;
+ err = kernel_connect(sock,
+ (struct sockaddr *)&udp6_addr,
+ sizeof(udp6_addr), 0);
+ }
+ if (err < 0)
+ goto error;
+
+ udp_set_no_check6_tx(sock->sk, !cfg->use_udp6_tx_checksums);
+ udp_set_no_check6_rx(sock->sk, !cfg->use_udp6_rx_checksums);
+
+ *sockp = sock;
+ return 0;
+
+error:
+ if (sock) {
+ kernel_sock_shutdown(sock, SHUT_RDWR);
+ sk_release_kernel(sock->sk);
+ }
+ *sockp = NULL;
+ return err;
+}
+EXPORT_SYMBOL_GPL(udp_sock_create6);
+
+int udp_tunnel6_xmit_skb(struct socket *sock, struct dst_entry *dst,
+ struct sk_buff *skb, struct net_device *dev,
+ struct in6_addr *saddr, struct in6_addr *daddr,
+ __u8 prio, __u8 ttl, __be16 src_port, __be16 dst_port)
+{
+ struct udphdr *uh;
+ struct ipv6hdr *ip6h;
+ struct sock *sk = sock->sk;
+
+ __skb_push(skb, sizeof(*uh));
+ skb_reset_transport_header(skb);
+ uh = udp_hdr(skb);
+
+ uh->dest = dst_port;
+ uh->source = src_port;
+
+ uh->len = htons(skb->len);
+ uh->check = 0;
+
+ memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
+ IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED
+ | IPSKB_REROUTED);
+ skb_dst_set(skb, dst);
+
+ udp6_set_csum(udp_get_no_check6_tx(sk), skb, &inet6_sk(sk)->saddr,
+ &sk->sk_v6_daddr, skb->len);
+
+ __skb_push(skb, sizeof(*ip6h));
+ skb_reset_network_header(skb);
+ ip6h = ipv6_hdr(skb);
+ ip6_flow_hdr(ip6h, prio, htonl(0));
+ ip6h->payload_len = htons(skb->len);
+ ip6h->nexthdr = IPPROTO_UDP;
+ ip6h->hop_limit = ttl;
+ ip6h->daddr = *daddr;
+ ip6h->saddr = *saddr;
+
+ ip6tunnel_xmit(skb, dev);
+ return 0;
+}
+EXPORT_SYMBOL_GPL(udp_tunnel6_xmit_skb);
diff --git a/net/ipv6/protocol.c b/net/ipv6/protocol.c
index e048cf1bb6a2..e3770abe688a 100644
--- a/net/ipv6/protocol.c
+++ b/net/ipv6/protocol.c
@@ -51,6 +51,7 @@ EXPORT_SYMBOL(inet6_del_protocol);
#endif
const struct net_offload __rcu *inet6_offloads[MAX_INET_PROTOS] __read_mostly;
+EXPORT_SYMBOL(inet6_offloads);
int inet6_add_offload(const struct net_offload *prot, unsigned char protocol)
{
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index 86e3fa81f85e..db75809ab843 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -822,6 +822,8 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb,
int addr_type;
u8 ttl;
int err;
+ u8 protocol = IPPROTO_IPV6;
+ int t_hlen = tunnel->hlen + sizeof(struct iphdr);
if (skb->protocol != htons(ETH_P_IPV6))
goto tx_error;
@@ -911,8 +913,14 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb,
goto tx_error;
}
+ skb = iptunnel_handle_offloads(skb, false, SKB_GSO_SIT);
+ if (IS_ERR(skb)) {
+ ip_rt_put(rt);
+ goto out;
+ }
+
if (df) {
- mtu = dst_mtu(&rt->dst) - sizeof(struct iphdr);
+ mtu = dst_mtu(&rt->dst) - t_hlen;
if (mtu < 68) {
dev->stats.collisions++;
@@ -947,7 +955,7 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb,
/*
* Okay, now see if we can stuff it in the buffer as-is.
*/
- max_headroom = LL_RESERVED_SPACE(tdev)+sizeof(struct iphdr);
+ max_headroom = LL_RESERVED_SPACE(tdev) + t_hlen;
if (skb_headroom(skb) < max_headroom || skb_shared(skb) ||
(skb_cloned(skb) && !skb_clone_writable(skb, 0))) {
@@ -969,14 +977,13 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb,
ttl = iph6->hop_limit;
tos = INET_ECN_encapsulate(tos, ipv6_get_dsfield(iph6));
- skb = iptunnel_handle_offloads(skb, false, SKB_GSO_SIT);
- if (IS_ERR(skb)) {
+ if (ip_tunnel_encap(skb, tunnel, &protocol, &fl4) < 0) {
ip_rt_put(rt);
- goto out;
+ goto tx_error;
}
err = iptunnel_xmit(skb->sk, rt, skb, fl4.saddr, fl4.daddr,
- IPPROTO_IPV6, tos, ttl, df,
+ protocol, tos, ttl, df,
!net_eq(tunnel->net, dev_net(dev)));
iptunnel_xmit_stats(err, &dev->stats, dev->tstats);
return NETDEV_TX_OK;
@@ -1059,8 +1066,10 @@ static void ipip6_tunnel_bind_dev(struct net_device *dev)
tdev = __dev_get_by_index(tunnel->net, tunnel->parms.link);
if (tdev) {
+ int t_hlen = tunnel->hlen + sizeof(struct iphdr);
+
dev->hard_header_len = tdev->hard_header_len + sizeof(struct iphdr);
- dev->mtu = tdev->mtu - sizeof(struct iphdr);
+ dev->mtu = tdev->mtu - t_hlen;
if (dev->mtu < IPV6_MIN_MTU)
dev->mtu = IPV6_MIN_MTU;
}
@@ -1307,7 +1316,10 @@ done:
static int ipip6_tunnel_change_mtu(struct net_device *dev, int new_mtu)
{
- if (new_mtu < IPV6_MIN_MTU || new_mtu > 0xFFF8 - sizeof(struct iphdr))
+ struct ip_tunnel *tunnel = netdev_priv(dev);
+ int t_hlen = tunnel->hlen + sizeof(struct iphdr);
+
+ if (new_mtu < IPV6_MIN_MTU || new_mtu > 0xFFF8 - t_hlen)
return -EINVAL;
dev->mtu = new_mtu;
return 0;
@@ -1338,12 +1350,15 @@ static void ipip6_dev_free(struct net_device *dev)
static void ipip6_tunnel_setup(struct net_device *dev)
{
+ struct ip_tunnel *tunnel = netdev_priv(dev);
+ int t_hlen = tunnel->hlen + sizeof(struct iphdr);
+
dev->netdev_ops = &ipip6_netdev_ops;
dev->destructor = ipip6_dev_free;
dev->type = ARPHRD_SIT;
- dev->hard_header_len = LL_MAX_HEADER + sizeof(struct iphdr);
- dev->mtu = ETH_DATA_LEN - sizeof(struct iphdr);
+ dev->hard_header_len = LL_MAX_HEADER + t_hlen;
+ dev->mtu = ETH_DATA_LEN - t_hlen;
dev->flags = IFF_NOARP;
dev->priv_flags &= ~IFF_XMIT_DST_RELEASE;
dev->iflink = 0;
@@ -1466,6 +1481,40 @@ static void ipip6_netlink_parms(struct nlattr *data[],
}
+/* This function returns true when ENCAP attributes are present in the nl msg */
+static bool ipip6_netlink_encap_parms(struct nlattr *data[],
+ struct ip_tunnel_encap *ipencap)
+{
+ bool ret = false;
+
+ memset(ipencap, 0, sizeof(*ipencap));
+
+ if (!data)
+ return ret;
+
+ if (data[IFLA_IPTUN_ENCAP_TYPE]) {
+ ret = true;
+ ipencap->type = nla_get_u16(data[IFLA_IPTUN_ENCAP_TYPE]);
+ }
+
+ if (data[IFLA_IPTUN_ENCAP_FLAGS]) {
+ ret = true;
+ ipencap->flags = nla_get_u16(data[IFLA_IPTUN_ENCAP_FLAGS]);
+ }
+
+ if (data[IFLA_IPTUN_ENCAP_SPORT]) {
+ ret = true;
+ ipencap->sport = nla_get_u16(data[IFLA_IPTUN_ENCAP_SPORT]);
+ }
+
+ if (data[IFLA_IPTUN_ENCAP_DPORT]) {
+ ret = true;
+ ipencap->dport = nla_get_u16(data[IFLA_IPTUN_ENCAP_DPORT]);
+ }
+
+ return ret;
+}
+
#ifdef CONFIG_IPV6_SIT_6RD
/* This function returns true when 6RD attributes are present in the nl msg */
static bool ipip6_netlink_6rd_parms(struct nlattr *data[],
@@ -1509,12 +1558,20 @@ static int ipip6_newlink(struct net *src_net, struct net_device *dev,
{
struct net *net = dev_net(dev);
struct ip_tunnel *nt;
+ struct ip_tunnel_encap ipencap;
#ifdef CONFIG_IPV6_SIT_6RD
struct ip_tunnel_6rd ip6rd;
#endif
int err;
nt = netdev_priv(dev);
+
+ if (ipip6_netlink_encap_parms(data, &ipencap)) {
+ err = ip_tunnel_encap_setup(nt, &ipencap);
+ if (err < 0)
+ return err;
+ }
+
ipip6_netlink_parms(data, &nt->parms);
if (ipip6_tunnel_locate(net, &nt->parms, 0))
@@ -1537,15 +1594,23 @@ static int ipip6_changelink(struct net_device *dev, struct nlattr *tb[],
{
struct ip_tunnel *t = netdev_priv(dev);
struct ip_tunnel_parm p;
+ struct ip_tunnel_encap ipencap;
struct net *net = t->net;
struct sit_net *sitn = net_generic(net, sit_net_id);
#ifdef CONFIG_IPV6_SIT_6RD
struct ip_tunnel_6rd ip6rd;
#endif
+ int err;
if (dev == sitn->fb_tunnel_dev)
return -EINVAL;
+ if (ipip6_netlink_encap_parms(data, &ipencap)) {
+ err = ip_tunnel_encap_setup(t, &ipencap);
+ if (err < 0)
+ return err;
+ }
+
ipip6_netlink_parms(data, &p);
if (((dev->flags & IFF_POINTOPOINT) && !p.iph.daddr) ||
@@ -1599,6 +1664,14 @@ static size_t ipip6_get_size(const struct net_device *dev)
/* IFLA_IPTUN_6RD_RELAY_PREFIXLEN */
nla_total_size(2) +
#endif
+ /* IFLA_IPTUN_ENCAP_TYPE */
+ nla_total_size(2) +
+ /* IFLA_IPTUN_ENCAP_FLAGS */
+ nla_total_size(2) +
+ /* IFLA_IPTUN_ENCAP_SPORT */
+ nla_total_size(2) +
+ /* IFLA_IPTUN_ENCAP_DPORT */
+ nla_total_size(2) +
0;
}
@@ -1630,6 +1703,16 @@ static int ipip6_fill_info(struct sk_buff *skb, const struct net_device *dev)
goto nla_put_failure;
#endif
+ if (nla_put_u16(skb, IFLA_IPTUN_ENCAP_TYPE,
+ tunnel->encap.type) ||
+ nla_put_u16(skb, IFLA_IPTUN_ENCAP_SPORT,
+ tunnel->encap.sport) ||
+ nla_put_u16(skb, IFLA_IPTUN_ENCAP_DPORT,
+ tunnel->encap.dport) ||
+ nla_put_u16(skb, IFLA_IPTUN_ENCAP_FLAGS,
+ tunnel->encap.dport))
+ goto nla_put_failure;
+
return 0;
nla_put_failure:
@@ -1651,6 +1734,10 @@ static const struct nla_policy ipip6_policy[IFLA_IPTUN_MAX + 1] = {
[IFLA_IPTUN_6RD_PREFIXLEN] = { .type = NLA_U16 },
[IFLA_IPTUN_6RD_RELAY_PREFIXLEN] = { .type = NLA_U16 },
#endif
+ [IFLA_IPTUN_ENCAP_TYPE] = { .type = NLA_U16 },
+ [IFLA_IPTUN_ENCAP_FLAGS] = { .type = NLA_U16 },
+ [IFLA_IPTUN_ENCAP_SPORT] = { .type = NLA_U16 },
+ [IFLA_IPTUN_ENCAP_DPORT] = { .type = NLA_U16 },
};
static void ipip6_dellink(struct net_device *dev, struct list_head *head)
diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c
index 2aa2b6c15f20..895348e44c7d 100644
--- a/net/l2tp/l2tp_core.c
+++ b/net/l2tp/l2tp_core.c
@@ -1392,8 +1392,6 @@ static int l2tp_tunnel_sock_create(struct net *net,
if (err < 0)
goto out;
- udp_set_convert_csum(sock->sk, true);
-
break;
case L2TP_ENCAPTYPE_IP:
@@ -1584,19 +1582,17 @@ int l2tp_tunnel_create(struct net *net, int fd, int version, u32 tunnel_id, u32
/* Mark socket as an encapsulation socket. See net/ipv4/udp.c */
tunnel->encap = encap;
if (encap == L2TP_ENCAPTYPE_UDP) {
- /* Mark socket as an encapsulation socket. See net/ipv4/udp.c */
- udp_sk(sk)->encap_type = UDP_ENCAP_L2TPINUDP;
- udp_sk(sk)->encap_rcv = l2tp_udp_encap_recv;
- udp_sk(sk)->encap_destroy = l2tp_udp_encap_destroy;
-#if IS_ENABLED(CONFIG_IPV6)
- if (sk->sk_family == PF_INET6 && !tunnel->v4mapped)
- udpv6_encap_enable();
- else
-#endif
- udp_encap_enable();
- }
+ struct udp_tunnel_sock_cfg udp_cfg;
+
+ udp_cfg.sk_user_data = tunnel;
+ udp_cfg.encap_type = UDP_ENCAP_L2TPINUDP;
+ udp_cfg.encap_rcv = l2tp_udp_encap_recv;
+ udp_cfg.encap_destroy = l2tp_udp_encap_destroy;
- sk->sk_user_data = tunnel;
+ setup_udp_tunnel_sock(net, sock, &udp_cfg);
+ } else {
+ sk->sk_user_data = tunnel;
+ }
/* Hook on the tunnel socket destructor so that we can cleanup
* if the tunnel socket goes away.
diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c
index eceeb0456d26..730edb29d43b 100644
--- a/net/sched/cls_u32.c
+++ b/net/sched/cls_u32.c
@@ -64,8 +64,11 @@ struct tc_u_knode {
u32 __percpu *pcpu_success;
#endif
struct tcf_proto *tp;
- struct tc_u32_sel sel;
struct rcu_head rcu;
+ /* The 'sel' field MUST be the last field in structure to allow for
+ * tc_u32_keys allocated at end of structure.
+ */
+ struct tc_u32_sel sel;
};
struct tc_u_hnode {
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 346ef85617d3..11b28f651ad1 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -523,7 +523,7 @@ static int pfifo_fast_init(struct Qdisc *qdisc, struct nlattr *opt)
struct pfifo_fast_priv *priv = qdisc_priv(qdisc);
for (prio = 0; prio < PFIFO_FAST_BANDS; prio++)
- skb_queue_head_init(band2list(priv, prio));
+ __skb_queue_head_init(band2list(priv, prio));
/* Can by-pass the queue discipline */
qdisc->flags |= TCQ_F_CAN_BYPASS;
diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
index 6d16b9b81c28..14408f262143 100644
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c
@@ -1044,7 +1044,7 @@ static int htb_init(struct Qdisc *sch, struct nlattr *opt)
qdisc_watchdog_init(&q->watchdog, sch);
INIT_WORK(&q->work, htb_work_func);
- skb_queue_head_init(&q->direct_queue);
+ __skb_queue_head_init(&q->direct_queue);
if (tb[TCA_HTB_DIRECT_QLEN])
q->direct_qlen = nla_get_u32(tb[TCA_HTB_DIRECT_QLEN]);