diff options
154 files changed, 2658 insertions, 1215 deletions
diff --git a/Documentation/networking/ixgbe.txt b/Documentation/networking/ixgbe.txt index 96cccebb839b..0ace6e776ac8 100644 --- a/Documentation/networking/ixgbe.txt +++ b/Documentation/networking/ixgbe.txt @@ -138,7 +138,7 @@ Other ethtool Commands: To enable Flow Director ethtool -K ethX ntuple on To add a filter - Use -U switch. e.g., ethtool -U ethX flow-type tcp4 src-ip 0x178000a + Use -U switch. e.g., ethtool -U ethX flow-type tcp4 src-ip 10.0.128.23 action 1 To see the list of filters currently present: ethtool -u ethX diff --git a/Documentation/networking/timestamping.txt b/Documentation/networking/timestamping.txt index 1d6d02d6ba52..a5c784c89312 100644 --- a/Documentation/networking/timestamping.txt +++ b/Documentation/networking/timestamping.txt @@ -122,7 +122,7 @@ SOF_TIMESTAMPING_RAW_HARDWARE: 1.3.3 Timestamp Options -The interface supports one option +The interface supports the options SOF_TIMESTAMPING_OPT_ID: @@ -130,19 +130,36 @@ SOF_TIMESTAMPING_OPT_ID: have multiple concurrent timestamping requests outstanding. Packets can be reordered in the transmit path, for instance in the packet scheduler. In that case timestamps will be queued onto the error - queue out of order from the original send() calls. This option - embeds a counter that is incremented at send() time, to order - timestamps within a flow. + queue out of order from the original send() calls. It is not always + possible to uniquely match timestamps to the original send() calls + based on timestamp order or payload inspection alone, then. + + This option associates each packet at send() with a unique + identifier and returns that along with the timestamp. The identifier + is derived from a per-socket u32 counter (that wraps). For datagram + sockets, the counter increments with each sent packet. For stream + sockets, it increments with every byte. + + The counter starts at zero. It is initialized the first time that + the socket option is enabled. It is reset each time the option is + enabled after having been disabled. Resetting the counter does not + change the identifiers of existing packets in the system. This option is implemented only for transmit timestamps. There, the timestamp is always looped along with a struct sock_extended_err. The option modifies field ee_data to pass an id that is unique among all possibly concurrently outstanding timestamp requests for - that socket. In practice, it is a monotonically increasing u32 - (that wraps). + that socket. + + +SOF_TIMESTAMPING_OPT_CMSG: - In datagram sockets, the counter increments on each send call. In - stream sockets, it increments with every byte. + Support recv() cmsg for all timestamped packets. Control messages + are already supported unconditionally on all packets with receive + timestamps and on IPv6 packets with transmit timestamp. This option + extends them to IPv4 packets with transmit timestamp. One use case + is to correlate packets with their egress device, by enabling socket + option IP_PKTINFO simultaneously. 1.4 Bytestream Timestamps diff --git a/Documentation/networking/timestamping/txtimestamp.c b/Documentation/networking/timestamping/txtimestamp.c index b32fc2a07734..876f71c5625a 100644 --- a/Documentation/networking/timestamping/txtimestamp.c +++ b/Documentation/networking/timestamping/txtimestamp.c @@ -46,6 +46,7 @@ #include <netpacket/packet.h> #include <poll.h> #include <stdarg.h> +#include <stdbool.h> #include <stdint.h> #include <stdio.h> #include <stdlib.h> @@ -58,6 +59,14 @@ #include <time.h> #include <unistd.h> +/* ugly hack to work around netinet/in.h and linux/ipv6.h conflicts */ +#ifndef in6_pktinfo +struct in6_pktinfo { + struct in6_addr ipi6_addr; + int ipi6_ifindex; +}; +#endif + /* command line parameters */ static int cfg_proto = SOCK_STREAM; static int cfg_ipproto = IPPROTO_TCP; @@ -65,6 +74,8 @@ static int cfg_num_pkts = 4; static int do_ipv4 = 1; static int do_ipv6 = 1; static int cfg_payload_len = 10; +static bool cfg_show_payload; +static bool cfg_do_pktinfo; static uint16_t dest_port = 9000; static struct sockaddr_in daddr; @@ -131,6 +142,30 @@ static void print_timestamp(struct scm_timestamping *tss, int tstype, __print_timestamp(tsname, &tss->ts[0], tskey, payload_len); } +/* TODO: convert to check_and_print payload once API is stable */ +static void print_payload(char *data, int len) +{ + int i; + + if (len > 70) + len = 70; + + fprintf(stderr, "payload: "); + for (i = 0; i < len; i++) + fprintf(stderr, "%02hhx ", data[i]); + fprintf(stderr, "\n"); +} + +static void print_pktinfo(int family, int ifindex, void *saddr, void *daddr) +{ + char sa[INET6_ADDRSTRLEN], da[INET6_ADDRSTRLEN]; + + fprintf(stderr, " pktinfo: ifindex=%u src=%s dst=%s\n", + ifindex, + saddr ? inet_ntop(family, saddr, sa, sizeof(sa)) : "unknown", + daddr ? inet_ntop(family, daddr, da, sizeof(da)) : "unknown"); +} + static void __poll(int fd) { struct pollfd pollfd; @@ -156,10 +191,9 @@ static void __recv_errmsg_cmsg(struct msghdr *msg, int payload_len) cm->cmsg_type == SCM_TIMESTAMPING) { tss = (void *) CMSG_DATA(cm); } else if ((cm->cmsg_level == SOL_IP && - cm->cmsg_type == IP_RECVERR) || - (cm->cmsg_level == SOL_IPV6 && - cm->cmsg_type == IPV6_RECVERR)) { - + cm->cmsg_type == IP_RECVERR) || + (cm->cmsg_level == SOL_IPV6 && + cm->cmsg_type == IPV6_RECVERR)) { serr = (void *) CMSG_DATA(cm); if (serr->ee_errno != ENOMSG || serr->ee_origin != SO_EE_ORIGIN_TIMESTAMPING) { @@ -168,6 +202,16 @@ static void __recv_errmsg_cmsg(struct msghdr *msg, int payload_len) serr->ee_origin); serr = NULL; } + } else if (cm->cmsg_level == SOL_IP && + cm->cmsg_type == IP_PKTINFO) { + struct in_pktinfo *info = (void *) CMSG_DATA(cm); + print_pktinfo(AF_INET, info->ipi_ifindex, + &info->ipi_spec_dst, &info->ipi_addr); + } else if (cm->cmsg_level == SOL_IPV6 && + cm->cmsg_type == IPV6_PKTINFO) { + struct in6_pktinfo *info6 = (void *) CMSG_DATA(cm); + print_pktinfo(AF_INET6, info6->ipi6_ifindex, + NULL, &info6->ipi6_addr); } else fprintf(stderr, "unknown cmsg %d,%d\n", cm->cmsg_level, cm->cmsg_type); @@ -206,7 +250,11 @@ static int recv_errmsg(int fd) if (ret == -1 && errno != EAGAIN) error(1, errno, "recvmsg"); - __recv_errmsg_cmsg(&msg, ret); + if (ret > 0) { + __recv_errmsg_cmsg(&msg, ret); + if (cfg_show_payload) + print_payload(data, cfg_payload_len); + } free(data); return ret == -1; @@ -215,9 +263,9 @@ static int recv_errmsg(int fd) static void do_test(int family, unsigned int opt) { char *buf; - int fd, i, val, total_len; + int fd, i, val = 1, total_len; - if (family == IPPROTO_IPV6 && cfg_proto != SOCK_STREAM) { + if (family == AF_INET6 && cfg_proto != SOCK_STREAM) { /* due to lack of checksum generation code */ fprintf(stderr, "test: skipping datagram over IPv6\n"); return; @@ -239,7 +287,6 @@ static void do_test(int family, unsigned int opt) error(1, errno, "socket"); if (cfg_proto == SOCK_STREAM) { - val = 1; if (setsockopt(fd, IPPROTO_TCP, TCP_NODELAY, (char*) &val, sizeof(val))) error(1, 0, "setsockopt no nagle"); @@ -253,7 +300,20 @@ static void do_test(int family, unsigned int opt) } } + if (cfg_do_pktinfo) { + if (family == AF_INET6) { + if (setsockopt(fd, SOL_IPV6, IPV6_RECVPKTINFO, + &val, sizeof(val))) + error(1, errno, "setsockopt pktinfo ipv6"); + } else { + if (setsockopt(fd, SOL_IP, IP_PKTINFO, + &val, sizeof(val))) + error(1, errno, "setsockopt pktinfo ipv4"); + } + } + opt |= SOF_TIMESTAMPING_SOFTWARE | + SOF_TIMESTAMPING_OPT_CMSG | SOF_TIMESTAMPING_OPT_ID; if (setsockopt(fd, SOL_SOCKET, SO_TIMESTAMPING, (char *) &opt, sizeof(opt))) @@ -262,8 +322,6 @@ static void do_test(int family, unsigned int opt) for (i = 0; i < cfg_num_pkts; i++) { memset(&ts_prev, 0, sizeof(ts_prev)); memset(buf, 'a' + i, total_len); - buf[total_len - 2] = '\n'; - buf[total_len - 1] = '\0'; if (cfg_proto == SOCK_RAW) { struct udphdr *udph; @@ -324,11 +382,13 @@ static void __attribute__((noreturn)) usage(const char *filepath) " -4: only IPv4\n" " -6: only IPv6\n" " -h: show this message\n" + " -I: request PKTINFO\n" " -l N: send N bytes at a time\n" " -r: use raw\n" " -R: use raw (IP_HDRINCL)\n" " -p N: connect to port N\n" - " -u: use udp\n", + " -u: use udp\n" + " -x: show payload (up to 70 bytes)\n", filepath); exit(1); } @@ -338,7 +398,7 @@ static void parse_opt(int argc, char **argv) int proto_count = 0; char c; - while ((c = getopt(argc, argv, "46hl:p:rRu")) != -1) { + while ((c = getopt(argc, argv, "46hIl:p:rRux")) != -1) { switch (c) { case '4': do_ipv6 = 0; @@ -346,6 +406,9 @@ static void parse_opt(int argc, char **argv) case '6': do_ipv4 = 0; break; + case 'I': + cfg_do_pktinfo = true; + break; case 'r': proto_count++; cfg_proto = SOCK_RAW; @@ -367,6 +430,9 @@ static void parse_opt(int argc, char **argv) case 'p': dest_port = strtoul(optarg, NULL, 10); break; + case 'x': + cfg_show_payload = true; + break; case 'h': default: usage(argv[0]); diff --git a/arch/sparc/include/asm/vio.h b/arch/sparc/include/asm/vio.h index d758c8d8f47d..fb124feb363b 100644 --- a/arch/sparc/include/asm/vio.h +++ b/arch/sparc/include/asm/vio.h @@ -247,6 +247,25 @@ struct vio_net_desc { struct ldc_trans_cookie cookies[0]; }; +struct vio_net_dext { + u8 flags; +#define VNET_PKT_HASH 0x01 +#define VNET_PKT_HCK_IPV4_HDRCKSUM 0x02 +#define VNET_PKT_HCK_FULLCKSUM 0x04 +#define VNET_PKT_IPV4_LSO 0x08 +#define VNET_PKT_HCK_IPV4_HDRCKSUM_OK 0x10 +#define VNET_PKT_HCK_FULLCKSUM_OK 0x20 + + u8 vnet_hashval; + u16 ipv4_lso_mss; + u32 resv3; +}; + +static inline struct vio_net_dext *vio_net_ext(struct vio_net_desc *desc) +{ + return (struct vio_net_dext *)&desc->cookies[2]; +} + #define VIO_MAX_RING_COOKIES 24 struct vio_dring_state { diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index 3f627345d51c..626e01377a01 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -24,7 +24,7 @@ extern u8 sk_load_byte_positive_offset[]; extern u8 sk_load_word_negative_offset[], sk_load_half_negative_offset[]; extern u8 sk_load_byte_negative_offset[]; -static inline u8 *emit_code(u8 *ptr, u32 bytes, unsigned int len) +static u8 *emit_code(u8 *ptr, u32 bytes, unsigned int len) { if (len == 1) *ptr = bytes; @@ -52,12 +52,12 @@ static inline u8 *emit_code(u8 *ptr, u32 bytes, unsigned int len) #define EMIT4_off32(b1, b2, b3, b4, off) \ do {EMIT4(b1, b2, b3, b4); EMIT(off, 4); } while (0) -static inline bool is_imm8(int value) +static bool is_imm8(int value) { return value <= 127 && value >= -128; } -static inline bool is_simm32(s64 value) +static bool is_simm32(s64 value) { return value == (s64) (s32) value; } @@ -94,7 +94,7 @@ static int bpf_size_to_x86_bytes(int bpf_size) #define X86_JGE 0x7D #define X86_JG 0x7F -static inline void bpf_flush_icache(void *start, void *end) +static void bpf_flush_icache(void *start, void *end) { mm_segment_t old_fs = get_fs(); @@ -133,24 +133,24 @@ static const int reg2hex[] = { * which need extra byte of encoding. * rax,rcx,...,rbp have simpler encoding */ -static inline bool is_ereg(u32 reg) +static bool is_ereg(u32 reg) { - if (reg == BPF_REG_5 || reg == AUX_REG || - (reg >= BPF_REG_7 && reg <= BPF_REG_9)) - return true; - else - return false; + return (1 << reg) & (BIT(BPF_REG_5) | + BIT(AUX_REG) | + BIT(BPF_REG_7) | + BIT(BPF_REG_8) | + BIT(BPF_REG_9)); } /* add modifiers if 'reg' maps to x64 registers r8..r15 */ -static inline u8 add_1mod(u8 byte, u32 reg) +static u8 add_1mod(u8 byte, u32 reg) { if (is_ereg(reg)) byte |= 1; return byte; } -static inline u8 add_2mod(u8 byte, u32 r1, u32 r2) +static u8 add_2mod(u8 byte, u32 r1, u32 r2) { if (is_ereg(r1)) byte |= 1; @@ -160,13 +160,13 @@ static inline u8 add_2mod(u8 byte, u32 r1, u32 r2) } /* encode 'dst_reg' register into x64 opcode 'byte' */ -static inline u8 add_1reg(u8 byte, u32 dst_reg) +static u8 add_1reg(u8 byte, u32 dst_reg) { return byte + reg2hex[dst_reg]; } /* encode 'dst_reg' and 'src_reg' registers into x64 opcode 'byte' */ -static inline u8 add_2reg(u8 byte, u32 dst_reg, u32 src_reg) +static u8 add_2reg(u8 byte, u32 dst_reg, u32 src_reg) { return byte + reg2hex[dst_reg] + (reg2hex[src_reg] << 3); } diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c index a2d1a9612c86..191a6a3ae6ca 100644 --- a/drivers/hv/channel_mgmt.c +++ b/drivers/hv/channel_mgmt.c @@ -517,6 +517,8 @@ static void vmbus_onoffer_rescind(struct vmbus_channel_message_header *hdr) /* Just return here, no channel found */ return; + channel->rescind = true; + /* work is initialized for vmbus_process_rescind_offer() from * vmbus_process_offer() where the channel got created */ queue_work(channel->controlwq, &channel->work); diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index 4c89b64aa9cf..5a80dd993761 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -159,6 +159,9 @@ static int add_keys(struct mlx5_ib_dev *dev, int c, int num) sizeof(*in), reg_mr_callback, mr, &mr->out); if (err) { + spin_lock_irq(&ent->lock); + ent->pending--; + spin_unlock_irq(&ent->lock); mlx5_ib_warn(dev, "create mkey failed %d\n", err); kfree(mr); break; diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 0e2ef9fe0e29..1cae1c7132b4 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -1011,9 +1011,14 @@ static void mlx5_ib_lock_cqs(struct mlx5_ib_cq *send_cq, struct mlx5_ib_cq *recv } } else { spin_lock_irq(&send_cq->lock); + __acquire(&recv_cq->lock); } } else if (recv_cq) { spin_lock_irq(&recv_cq->lock); + __acquire(&send_cq->lock); + } else { + __acquire(&send_cq->lock); + __acquire(&recv_cq->lock); } } @@ -1033,10 +1038,15 @@ static void mlx5_ib_unlock_cqs(struct mlx5_ib_cq *send_cq, struct mlx5_ib_cq *re spin_unlock_irq(&recv_cq->lock); } } else { + __release(&recv_cq->lock); spin_unlock_irq(&send_cq->lock); } } else if (recv_cq) { + __release(&send_cq->lock); spin_unlock_irq(&recv_cq->lock); + } else { + __release(&recv_cq->lock); + __release(&send_cq->lock); } } @@ -2411,7 +2421,7 @@ static u8 get_fence(u8 fence, struct ib_send_wr *wr) static int begin_wqe(struct mlx5_ib_qp *qp, void **seg, struct mlx5_wqe_ctrl_seg **ctrl, - struct ib_send_wr *wr, int *idx, + struct ib_send_wr *wr, unsigned *idx, int *size, int nreq) { int err = 0; @@ -2737,6 +2747,8 @@ out: if (bf->need_lock) spin_lock(&bf->lock); + else + __acquire(&bf->lock); /* TBD enable WC */ if (0 && nreq == 1 && bf->uuarn && inl && size > 1 && size <= bf->buf_size / 16) { @@ -2753,6 +2765,8 @@ out: bf->offset ^= bf->buf_size; if (bf->need_lock) spin_unlock(&bf->lock); + else + __release(&bf->lock); } spin_unlock_irqrestore(&qp->sq.lock, flags); diff --git a/drivers/isdn/hisax/hfc_2bs0.c b/drivers/isdn/hisax/hfc_2bs0.c index 838531b6a60e..14dada42874e 100644 --- a/drivers/isdn/hisax/hfc_2bs0.c +++ b/drivers/isdn/hisax/hfc_2bs0.c @@ -31,7 +31,7 @@ WaitForBusy(struct IsdnCardState *cs) to--; } if (!to) { - printk(KERN_WARNING "HiSax: waitforBusy timeout\n"); + printk(KERN_WARNING "HiSax: %s timeout\n", __func__); return (0); } else return (to); diff --git a/drivers/isdn/hisax/hfc_sx.c b/drivers/isdn/hisax/hfc_sx.c index fa1fefd711cd..b1fad81f0722 100644 --- a/drivers/isdn/hisax/hfc_sx.c +++ b/drivers/isdn/hisax/hfc_sx.c @@ -1159,7 +1159,8 @@ hfcsx_l2l1(struct PStack *st, int pr, void *arg) case (PH_PULL | INDICATION): spin_lock_irqsave(&bcs->cs->lock, flags); if (bcs->tx_skb) { - printk(KERN_WARNING "hfc_l2l1: this shouldn't happen\n"); + printk(KERN_WARNING "%s: this shouldn't happen\n", + __func__); } else { // test_and_set_bit(BC_FLG_BUSY, &bcs->Flag); bcs->tx_skb = skb; diff --git a/drivers/isdn/hisax/hfc_usb.c b/drivers/isdn/hisax/hfc_usb.c index 849a80752685..678bd5224bc3 100644 --- a/drivers/isdn/hisax/hfc_usb.c +++ b/drivers/isdn/hisax/hfc_usb.c @@ -927,9 +927,8 @@ start_int_fifo(usb_fifo *fifo) fifo->active = 1; /* must be marked active */ errcode = usb_submit_urb(fifo->urb, GFP_KERNEL); if (errcode) { - printk(KERN_ERR - "HFC-S USB: submit URB error(start_int_info): status:%i\n", - errcode); + printk(KERN_ERR "HFC-S USB: submit URB error(%s): status:%i\n", + __func__, errcode); fifo->active = 0; fifo->skbuff = NULL; } diff --git a/drivers/isdn/hisax/ipacx.c b/drivers/isdn/hisax/ipacx.c index 5faa5de24305..9cc26b40a437 100644 --- a/drivers/isdn/hisax/ipacx.c +++ b/drivers/isdn/hisax/ipacx.c @@ -580,7 +580,7 @@ bch_fill_fifo(struct BCState *bcs) if (cs->debug & L1_DEB_HSCX_FIFO) { char *t = bcs->blog; - t += sprintf(t, "chb_fill_fifo() B-%d cnt %d", hscx, count); + t += sprintf(t, "%s() B-%d cnt %d", __func__, hscx, count); QuickHex(t, ptr, count); debugl1(cs, "%s", bcs->blog); } diff --git a/drivers/isdn/hisax/isdnl1.c b/drivers/isdn/hisax/isdnl1.c index 800095781bfb..a560842c0e48 100644 --- a/drivers/isdn/hisax/isdnl1.c +++ b/drivers/isdn/hisax/isdnl1.c @@ -867,7 +867,7 @@ l1_msg(struct IsdnCardState *cs, int pr, void *arg) { break; default: if (cs->debug) - debugl1(cs, "l1msg %04X unhandled", pr); + debugl1(cs, "%s %04X unhandled", __func__, pr); break; } st = st->next; diff --git a/drivers/isdn/hisax/isdnl3.c b/drivers/isdn/hisax/isdnl3.c index 45b03840f716..c754706f83cd 100644 --- a/drivers/isdn/hisax/isdnl3.c +++ b/drivers/isdn/hisax/isdnl3.c @@ -153,7 +153,7 @@ void newl3state(struct l3_process *pc, int state) { if (pc->debug & L3_DEB_STATE) - l3_debug(pc->st, "newstate cr %d %d --> %d", + l3_debug(pc->st, "%s cr %d %d --> %d", __func__, pc->callref & 0x7F, pc->state, state); pc->state = state; diff --git a/drivers/isdn/hysdn/hycapi.c b/drivers/isdn/hysdn/hycapi.c index 00aad10507d8..93bae94314a6 100644 --- a/drivers/isdn/hysdn/hycapi.c +++ b/drivers/isdn/hysdn/hycapi.c @@ -501,7 +501,7 @@ static char *hycapi_procinfo(struct capi_ctr *ctrl) { hycapictrl_info *cinfo = (hycapictrl_info *)(ctrl->driverdata); #ifdef HYCAPI_PRINTFNAMES - printk(KERN_NOTICE "hycapi_proc_info\n"); + printk(KERN_NOTICE "%s\n", __func__); #endif if (!cinfo) return ""; diff --git a/drivers/isdn/pcbit/layer2.c b/drivers/isdn/pcbit/layer2.c index 42ecfef80132..46e1240ae074 100644 --- a/drivers/isdn/pcbit/layer2.c +++ b/drivers/isdn/pcbit/layer2.c @@ -85,7 +85,6 @@ pcbit_l2_write(struct pcbit_dev *dev, ulong msg, ushort refnum, } if ((frame = kmalloc(sizeof(struct frame_buf), GFP_ATOMIC)) == NULL) { - printk(KERN_WARNING "pcbit_2_write: kmalloc failed\n"); dev_kfree_skb(skb); return -1; } diff --git a/drivers/net/dummy.c b/drivers/net/dummy.c index ff435fbd1ad0..413ca4f73997 100644 --- a/drivers/net/dummy.c +++ b/drivers/net/dummy.c @@ -38,6 +38,9 @@ #include <net/rtnetlink.h> #include <linux/u64_stats_sync.h> +#define DRV_NAME "dummy" +#define DRV_VERSION "1.0" + static int numdummies = 1; /* fake multicast ability */ @@ -120,12 +123,24 @@ static const struct net_device_ops dummy_netdev_ops = { .ndo_change_carrier = dummy_change_carrier, }; +static void dummy_get_drvinfo(struct net_device *dev, + struct ethtool_drvinfo *info) +{ + strlcpy(info->driver, DRV_NAME, sizeof(info->driver)); + strlcpy(info->version, DRV_VERSION, sizeof(info->version)); +} + +static const struct ethtool_ops dummy_ethtool_ops = { + .get_drvinfo = dummy_get_drvinfo, +}; + static void dummy_setup(struct net_device *dev) { ether_setup(dev); /* Initialize the device structure. */ dev->netdev_ops = &dummy_netdev_ops; + dev->ethtool_ops = &dummy_ethtool_ops; dev->destructor = free_netdev; /* Fill in device structure with ethernet-generic values. */ @@ -150,7 +165,7 @@ static int dummy_validate(struct nlattr *tb[], struct nlattr *data[]) } static struct rtnl_link_ops dummy_link_ops __read_mostly = { - .kind = "dummy", + .kind = DRV_NAME, .setup = dummy_setup, .validate = dummy_validate, }; @@ -209,4 +224,4 @@ static void __exit dummy_cleanup_module(void) module_init(dummy_init_module); module_exit(dummy_cleanup_module); MODULE_LICENSE("GPL"); -MODULE_ALIAS_RTNL_LINK("dummy"); +MODULE_ALIAS_RTNL_LINK(DRV_NAME); diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c index 02c104dc2aa4..bedfdb1c430d 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c @@ -1285,7 +1285,6 @@ static int xgbe_open(struct net_device *netdev) struct xgbe_hw_if *hw_if = &pdata->hw_if; struct xgbe_desc_if *desc_if = &pdata->desc_if; struct xgbe_channel *channel = NULL; - char dma_irq_name[IFNAMSIZ + 32]; unsigned int i = 0; int ret; @@ -1341,13 +1340,14 @@ static int xgbe_open(struct net_device *netdev) if (pdata->per_channel_irq) { channel = pdata->channel; for (i = 0; i < pdata->channel_count; i++, channel++) { - snprintf(dma_irq_name, sizeof(dma_irq_name) - 1, + snprintf(channel->dma_irq_name, + sizeof(channel->dma_irq_name) - 1, "%s-TxRx-%u", netdev_name(netdev), channel->queue_index); ret = devm_request_irq(pdata->dev, channel->dma_irq, - xgbe_dma_isr, 0, dma_irq_name, - channel); + xgbe_dma_isr, 0, + channel->dma_irq_name, channel); if (ret) { netdev_alert(netdev, "error requesting irq %d\n", diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-ethtool.c b/drivers/net/ethernet/amd/xgbe/xgbe-ethtool.c index 95d44538357f..ebf489351555 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-ethtool.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-ethtool.c @@ -511,7 +511,8 @@ static u32 xgbe_get_rxfh_indir_size(struct net_device *netdev) return ARRAY_SIZE(pdata->rss_table); } -static int xgbe_get_rxfh(struct net_device *netdev, u32 *indir, u8 *key) +static int xgbe_get_rxfh(struct net_device *netdev, u32 *indir, u8 *key, + u8 *hfunc) { struct xgbe_prv_data *pdata = netdev_priv(netdev); unsigned int i; @@ -525,16 +526,22 @@ static int xgbe_get_rxfh(struct net_device *netdev, u32 *indir, u8 *key) if (key) memcpy(key, pdata->rss_key, sizeof(pdata->rss_key)); + if (hfunc) + *hfunc = ETH_RSS_HASH_TOP; + return 0; } static int xgbe_set_rxfh(struct net_device *netdev, const u32 *indir, - const u8 *key) + const u8 *key, const u8 hfunc) { struct xgbe_prv_data *pdata = netdev_priv(netdev); struct xgbe_hw_if *hw_if = &pdata->hw_if; unsigned int ret; + if (hfunc != ETH_RSS_HASH_NO_CHANGE && hfunc != ETH_RSS_HASH_TOP) + return -EOPNOTSUPP; + if (indir) { ret = hw_if->set_rss_lookup_table(pdata, indir); if (ret) diff --git a/drivers/net/ethernet/amd/xgbe/xgbe.h b/drivers/net/ethernet/amd/xgbe/xgbe.h index eb3387398c6f..f9ec762ac3f0 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe.h +++ b/drivers/net/ethernet/amd/xgbe/xgbe.h @@ -400,6 +400,7 @@ struct xgbe_channel { /* Per channel interrupt irq number */ int dma_irq; + char dma_irq_name[IFNAMSIZ + 32]; /* Netdev related settings */ struct napi_struct napi; diff --git a/drivers/net/ethernet/broadcom/Kconfig b/drivers/net/ethernet/broadcom/Kconfig index c3e260c21734..888247ad9068 100644 --- a/drivers/net/ethernet/broadcom/Kconfig +++ b/drivers/net/ethernet/broadcom/Kconfig @@ -62,7 +62,6 @@ config BCM63XX_ENET config BCMGENET tristate "Broadcom GENET internal MAC support" - depends on OF select MII select PHYLIB select FIXED_PHY if BCMGENET=y diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c index 1edc931b1458..ffe4e003e636 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c @@ -3358,12 +3358,18 @@ static u32 bnx2x_get_rxfh_indir_size(struct net_device *dev) return T_ETH_INDIRECTION_TABLE_SIZE; } -static int bnx2x_get_rxfh(struct net_device *dev, u32 *indir, u8 *key) +static int bnx2x_get_rxfh(struct net_device *dev, u32 *indir, u8 *key, + u8 *hfunc) { struct bnx2x *bp = netdev_priv(dev); u8 ind_table[T_ETH_INDIRECTION_TABLE_SIZE] = {0}; size_t i; + if (hfunc) + *hfunc = ETH_RSS_HASH_TOP; + if (!indir) + return 0; + /* Get the current configuration of the RSS indirection table */ bnx2x_get_rss_ind_table(&bp->rss_conf_obj, ind_table); @@ -3383,11 +3389,21 @@ static int bnx2x_get_rxfh(struct net_device *dev, u32 *indir, u8 *key) } static int bnx2x_set_rxfh(struct net_device *dev, const u32 *indir, - const u8 *key) + const u8 *key, const u8 hfunc) { struct bnx2x *bp = netdev_priv(dev); size_t i; + /* We require at least one supported parameter to be changed and no + * change in any of the unsupported parameters + */ + if (key || + (hfunc != ETH_RSS_HASH_NO_CHANGE && hfunc != ETH_RSS_HASH_TOP)) + return -EOPNOTSUPP; + + if (!indir) + return 0; + for (i = 0; i < T_ETH_INDIRECTION_TABLE_SIZE; i++) { /* * The same as in bnx2x_get_rxfh: we can't use a memcpy() diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c index 336ef3cf5773..07c636815127 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c @@ -3163,6 +3163,8 @@ static void bnx2x_pf_q_prep_general(struct bnx2x *bp, gen_init->mtu = bp->dev->mtu; gen_init->cos = cos; + + gen_init->fp_hsi = ETH_FP_HSI_VERSION; } static void bnx2x_pf_rx_q_prep(struct bnx2x *bp, diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.c index 7bc2924a7e24..07cdf9bbffef 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.c @@ -4336,7 +4336,7 @@ static void bnx2x_q_fill_init_general_data(struct bnx2x *bp, test_bit(BNX2X_Q_FLG_FCOE, flags) ? LLFC_TRAFFIC_TYPE_FCOE : LLFC_TRAFFIC_TYPE_NW; - gen_data->fp_hsi_ver = ETH_FP_HSI_VERSION; + gen_data->fp_hsi_ver = params->fp_hsi; DP(BNX2X_MSG_SP, "flags: active %d, cos %d, stats en %d\n", gen_data->activate_flg, gen_data->cos, gen_data->statistics_en_flg); diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.h index e97275f456c0..86baecb7c60c 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.h +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.h @@ -937,6 +937,8 @@ struct bnx2x_general_setup_params { u8 spcl_id; u16 mtu; u8 cos; + + u8 fp_hsi; }; struct bnx2x_rxq_setup_params { diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c index c88b20af87df..e5aca2de1871 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c @@ -193,6 +193,7 @@ void bnx2x_vfop_qctor_prep(struct bnx2x *bp, /* Setup-op general parameters */ setup_p->gen_params.spcl_id = vf->sp_cl_id; setup_p->gen_params.stat_id = vfq_stat_id(vf, q); + setup_p->gen_params.fp_hsi = vf->fp_hsi; /* Setup-op pause params: * Nothing to do, the pause thresholds are set by default to 0 which diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.h index 01bafa4ac045..66ee62a0401a 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.h +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.h @@ -205,6 +205,8 @@ struct bnx2x_virtf { /* slow-path operations */ struct mutex op_mutex; /* one vfop at a time mutex */ enum channel_tlvs op_current; + + u8 fp_hsi; }; #define BNX2X_NR_VIRTFN(bp) ((bp)->vfdb->sriov.nr_virtfn) diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c index b1d9c44aa56c..be40eabc5304 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c @@ -224,6 +224,7 @@ int bnx2x_vfpf_acquire(struct bnx2x *bp, u8 tx_count, u8 rx_count) struct vfpf_acquire_tlv *req = &bp->vf2pf_mbox->req.acquire; struct pfvf_acquire_resp_tlv *resp = &bp->vf2pf_mbox->resp.acquire_resp; struct vfpf_port_phys_id_resp_tlv *phys_port_resp; + struct vfpf_fp_hsi_resp_tlv *fp_hsi_resp; u32 vf_id; bool resources_acquired = false; @@ -237,6 +238,7 @@ int bnx2x_vfpf_acquire(struct bnx2x *bp, u8 tx_count, u8 rx_count) req->vfdev_info.vf_id = vf_id; req->vfdev_info.vf_os = 0; + req->vfdev_info.fp_hsi_ver = ETH_FP_HSI_VERSION; req->resc_request.num_rxqs = rx_count; req->resc_request.num_txqs = tx_count; @@ -316,9 +318,14 @@ int bnx2x_vfpf_acquire(struct bnx2x *bp, u8 tx_count, u8 rx_count) memset(&bp->vf2pf_mbox->resp, 0, sizeof(union pfvf_tlvs)); } else { - /* PF reports error */ - BNX2X_ERR("Failed to get the requested amount of resources: %d. Breaking...\n", - bp->acquire_resp.hdr.status); + /* Determine reason of PF failure of acquire process */ + fp_hsi_resp = bnx2x_search_tlv_list(bp, resp, + CHANNEL_TLV_FP_HSI_SUPPORT); + if (fp_hsi_resp && !fp_hsi_resp->is_supported) + BNX2X_ERR("Old hypervisor - doesn't support current fastpath HSI version; Need to downgrade VF driver [or upgrade hypervisor]\n"); + else + BNX2X_ERR("Failed to get the requested amount of resources: %d. Breaking...\n", + bp->acquire_resp.hdr.status); rc = -EAGAIN; goto out; } @@ -333,6 +340,25 @@ int bnx2x_vfpf_acquire(struct bnx2x *bp, u8 tx_count, u8 rx_count) bp->flags |= HAS_PHYS_PORT_ID; } + /* Old Hypevisors might not even support the FP_HSI_SUPPORT TLV. + * If that's the case, we need to make certain required FW was + * supported by such a hypervisor [i.e., v0-v2]. + */ + fp_hsi_resp = bnx2x_search_tlv_list(bp, resp, + CHANNEL_TLV_FP_HSI_SUPPORT); + if (!fp_hsi_resp && (ETH_FP_HSI_VERSION > ETH_FP_HSI_VER_2)) { + BNX2X_ERR("Old hypervisor - need to downgrade VF's driver\n"); + + /* Since acquire succeeded on the PF side, we need to send a + * release message in order to allow future probes. + */ + bnx2x_vfpf_finalize(bp, &req->first_tlv); + bnx2x_vfpf_release(bp); + + rc = -EINVAL; + goto out; + } + /* get HW info */ bp->common.chip_id |= (bp->acquire_resp.pfdev_info.chip_num & 0xffff); bp->link_params.chip_id = bp->common.chip_id; @@ -1125,6 +1151,26 @@ static void bnx2x_vf_mbx_resp_phys_port(struct bnx2x *bp, *offset += sizeof(struct vfpf_port_phys_id_resp_tlv); } +static void bnx2x_vf_mbx_resp_fp_hsi_ver(struct bnx2x *bp, + struct bnx2x_virtf *vf, + void *buffer, + u16 *offset) +{ + struct vfpf_fp_hsi_resp_tlv *fp_hsi; + + bnx2x_add_tlv(bp, buffer, *offset, CHANNEL_TLV_FP_HSI_SUPPORT, + sizeof(struct vfpf_fp_hsi_resp_tlv)); + + fp_hsi = (struct vfpf_fp_hsi_resp_tlv *) + (((u8 *)buffer) + *offset); + fp_hsi->is_supported = (vf->fp_hsi > ETH_FP_HSI_VERSION) ? 0 : 1; + + /* Offset should continue representing the offset to the tail + * of TLV data (outside this function scope) + */ + *offset += sizeof(struct vfpf_fp_hsi_resp_tlv); +} + static void bnx2x_vf_mbx_acquire_resp(struct bnx2x *bp, struct bnx2x_virtf *vf, struct bnx2x_vf_mbx *mbx, int vfop_status) { @@ -1219,6 +1265,12 @@ static void bnx2x_vf_mbx_acquire_resp(struct bnx2x *bp, struct bnx2x_virtf *vf, CHANNEL_TLV_PHYS_PORT_ID)) bnx2x_vf_mbx_resp_phys_port(bp, vf, &mbx->msg->resp, &length); + /* `New' vfs will want to know if fastpath HSI is supported, since + * if that's not the case they could print into system log the fact + * the driver version must be updated. + */ + bnx2x_vf_mbx_resp_fp_hsi_ver(bp, vf, &mbx->msg->resp, &length); + bnx2x_add_tlv(bp, &mbx->msg->resp, length, CHANNEL_TLV_LIST_END, sizeof(struct channel_list_end_tlv)); @@ -1288,6 +1340,23 @@ static void bnx2x_vf_mbx_acquire(struct bnx2x *bp, struct bnx2x_virtf *vf, goto out; } + /* Verify the VF fastpath HSI can be supported by the loaded FW. + * Linux vfs should be oblivious to changes between v0 and v2. + */ + if (bnx2x_vf_mbx_is_windows_vm(bp, &mbx->msg->req.acquire)) + vf->fp_hsi = acquire->vfdev_info.fp_hsi_ver; + else + vf->fp_hsi = max_t(u8, acquire->vfdev_info.fp_hsi_ver, + ETH_FP_HSI_VER_2); + if (vf->fp_hsi > ETH_FP_HSI_VERSION) { + DP(BNX2X_MSG_IOV, + "VF [%d] - Can't support acquire request since VF requests a FW version which is too new [%02x > %02x]\n", + vf->abs_vfid, acquire->vfdev_info.fp_hsi_ver, + ETH_FP_HSI_VERSION); + rc = -EINVAL; + goto out; + } + /* acquire the resources */ rc = bnx2x_vf_acquire(bp, vf, &acquire->resc_request); diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.h index 15670c499a20..b86479fc0d2f 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.h +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.h @@ -124,7 +124,7 @@ struct vfpf_acquire_tlv { #define VF_OS_UNDEFINED (0 << VF_OS_SHIFT) #define VF_OS_WINDOWS (1 << VF_OS_SHIFT) - u8 padding; + u8 fp_hsi_ver; u8 caps; #define VF_CAP_SUPPORT_EXT_BULLETIN (1 << 0) } vfdev_info; @@ -204,6 +204,12 @@ struct vfpf_port_phys_id_resp_tlv { u8 padding[2]; }; +struct vfpf_fp_hsi_resp_tlv { + struct channel_tlv tl; + u8 is_supported; + u8 padding[3]; +}; + #define VFPF_INIT_FLG_STATS_COALESCE (1 << 0) /* when set the VFs queues * stats will be coalesced on * the leading RSS queue @@ -448,6 +454,7 @@ enum channel_tlvs { CHANNEL_TLV_UPDATE_RSS, CHANNEL_TLV_PHYS_PORT_ID, CHANNEL_TLV_UPDATE_TPA, + CHANNEL_TLV_FP_HSI_SUPPORT, CHANNEL_TLV_MAX }; diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c index f2fadb053d52..7078bd386fb7 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c +++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c @@ -42,6 +42,7 @@ #include <linux/ip.h> #include <linux/ipv6.h> #include <linux/phy.h> +#include <linux/platform_data/bcmgenet.h> #include <asm/unaligned.h> @@ -2503,6 +2504,7 @@ static void bcmgenet_set_hw_params(struct bcmgenet_priv *priv) struct bcmgenet_hw_params *params; u32 reg; u8 major; + u16 gphy_rev; if (GENET_IS_V4(priv)) { bcmgenet_dma_regs = bcmgenet_dma_regs_v3plus; @@ -2551,8 +2553,29 @@ static void bcmgenet_set_hw_params(struct bcmgenet_priv *priv) * to pass this information to the PHY driver. The PHY driver expects * to find the PHY major revision in bits 15:8 while the GENET register * stores that information in bits 7:0, account for that. + * + * On newer chips, starting with PHY revision G0, a new scheme is + * deployed similar to the Starfighter 2 switch with GPHY major + * revision in bits 15:8 and patch level in bits 7:0. Major revision 0 + * is reserved as well as special value 0x01ff, we have a small + * heuristic to check for the new GPHY revision and re-arrange things + * so the GPHY driver is happy. */ - priv->gphy_rev = (reg & 0xffff) << 8; + gphy_rev = reg & 0xffff; + + /* This is the good old scheme, just GPHY major, no minor nor patch */ + if ((gphy_rev & 0xf0) != 0) + priv->gphy_rev = gphy_rev << 8; + + /* This is the new scheme, GPHY major rolls over with 0x10 = rev G0 */ + else if ((gphy_rev & 0xff00) != 0) + priv->gphy_rev = gphy_rev; + + /* This is reserved so should require special treatment */ + else if (gphy_rev == 0 || gphy_rev == 0x01ff) { + pr_warn("Invalid GPHY revision detected: 0x%04x\n", gphy_rev); + return; + } #ifdef CONFIG_PHYS_ADDR_T_64BIT if (!(params->flags & GENET_HAS_40BITS)) @@ -2586,8 +2609,9 @@ static const struct of_device_id bcmgenet_match[] = { static int bcmgenet_probe(struct platform_device *pdev) { + struct bcmgenet_platform_data *pd = pdev->dev.platform_data; struct device_node *dn = pdev->dev.of_node; - const struct of_device_id *of_id; + const struct of_device_id *of_id = NULL; struct bcmgenet_priv *priv; struct net_device *dev; const void *macaddr; @@ -2601,9 +2625,11 @@ static int bcmgenet_probe(struct platform_device *pdev) return -ENOMEM; } - of_id = of_match_node(bcmgenet_match, dn); - if (!of_id) - return -EINVAL; + if (dn) { + of_id = of_match_node(bcmgenet_match, dn); + if (!of_id) + return -EINVAL; + } priv = netdev_priv(dev); priv->irq0 = platform_get_irq(pdev, 0); @@ -2615,11 +2641,15 @@ static int bcmgenet_probe(struct platform_device *pdev) goto err; } - macaddr = of_get_mac_address(dn); - if (!macaddr) { - dev_err(&pdev->dev, "can't find MAC address\n"); - err = -EINVAL; - goto err; + if (dn) { + macaddr = of_get_mac_address(dn); + if (!macaddr) { + dev_err(&pdev->dev, "can't find MAC address\n"); + err = -EINVAL; + goto err; + } + } else { + macaddr = pd->mac_address; } r = platform_get_resource(pdev, IORESOURCE_MEM, 0); @@ -2659,7 +2689,10 @@ static int bcmgenet_probe(struct platform_device *pdev) priv->dev = dev; priv->pdev = pdev; - priv->version = (enum bcmgenet_version)of_id->data; + if (of_id) + priv->version = (enum bcmgenet_version)of_id->data; + else + priv->version = pd->genet_version; priv->clk = devm_clk_get(&priv->pdev->dev, "enet"); if (IS_ERR(priv->clk)) diff --git a/drivers/net/ethernet/broadcom/genet/bcmmii.c b/drivers/net/ethernet/broadcom/genet/bcmmii.c index 933cd7e7cd33..446889cc3c6a 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmmii.c +++ b/drivers/net/ethernet/broadcom/genet/bcmmii.c @@ -23,6 +23,7 @@ #include <linux/of.h> #include <linux/of_net.h> #include <linux/of_mdio.h> +#include <linux/platform_data/bcmgenet.h> #include "bcmgenet.h" @@ -312,22 +313,6 @@ static int bcmgenet_mii_probe(struct net_device *dev) u32 phy_flags; int ret; - if (priv->phydev) { - pr_info("PHY already attached\n"); - return 0; - } - - /* In the case of a fixed PHY, the DT node associated - * to the PHY is the Ethernet MAC DT node. - */ - if (!priv->phy_dn && of_phy_is_fixed_link(dn)) { - ret = of_phy_register_fixed_link(dn); - if (ret) - return ret; - - priv->phy_dn = of_node_get(dn); - } - /* Communicate the integrated PHY revision */ phy_flags = priv->gphy_rev; @@ -337,11 +322,39 @@ static int bcmgenet_mii_probe(struct net_device *dev) priv->old_duplex = -1; priv->old_pause = -1; - phydev = of_phy_connect(dev, priv->phy_dn, bcmgenet_mii_setup, - phy_flags, priv->phy_interface); - if (!phydev) { - pr_err("could not attach to PHY\n"); - return -ENODEV; + if (dn) { + if (priv->phydev) { + pr_info("PHY already attached\n"); + return 0; + } + + /* In the case of a fixed PHY, the DT node associated + * to the PHY is the Ethernet MAC DT node. + */ + if (!priv->phy_dn && of_phy_is_fixed_link(dn)) { + ret = of_phy_register_fixed_link(dn); + if (ret) + return ret; + + priv->phy_dn = of_node_get(dn); + } + + phydev = of_phy_connect(dev, priv->phy_dn, bcmgenet_mii_setup, + phy_flags, priv->phy_interface); + if (!phydev) { + pr_err("could not attach to PHY\n"); + return -ENODEV; + } + } else { + phydev = priv->phydev; + phydev->dev_flags = phy_flags; + + ret = phy_connect_direct(dev, phydev, bcmgenet_mii_setup, + priv->phy_interface); + if (ret) { + pr_err("could not attach to PHY\n"); + return -ENODEV; + } } priv->phydev = phydev; @@ -438,6 +451,75 @@ static int bcmgenet_mii_of_init(struct bcmgenet_priv *priv) return 0; } +static int bcmgenet_mii_pd_init(struct bcmgenet_priv *priv) +{ + struct device *kdev = &priv->pdev->dev; + struct bcmgenet_platform_data *pd = kdev->platform_data; + struct mii_bus *mdio = priv->mii_bus; + struct phy_device *phydev; + int ret; + + if (pd->phy_interface != PHY_INTERFACE_MODE_MOCA && pd->mdio_enabled) { + /* + * Internal or external PHY with MDIO access + */ + if (pd->phy_address >= 0 && pd->phy_address < PHY_MAX_ADDR) + mdio->phy_mask = ~(1 << pd->phy_address); + else + mdio->phy_mask = 0; + + ret = mdiobus_register(mdio); + if (ret) { + dev_err(kdev, "failed to register MDIO bus\n"); + return ret; + } + + if (pd->phy_address >= 0 && pd->phy_address < PHY_MAX_ADDR) + phydev = mdio->phy_map[pd->phy_address]; + else + phydev = phy_find_first(mdio); + + if (!phydev) { + dev_err(kdev, "failed to register PHY device\n"); + mdiobus_unregister(mdio); + return -ENODEV; + } + } else { + /* + * MoCA port or no MDIO access. + * Use fixed PHY to represent the link layer. + */ + struct fixed_phy_status fphy_status = { + .link = 1, + .speed = pd->phy_speed, + .duplex = pd->phy_duplex, + .pause = 0, + .asym_pause = 0, + }; + + phydev = fixed_phy_register(PHY_POLL, &fphy_status, NULL); + if (!phydev || IS_ERR(phydev)) { + dev_err(kdev, "failed to register fixed PHY device\n"); + return -ENODEV; + } + } + + priv->phydev = phydev; + priv->phy_interface = pd->phy_interface; + + return 0; +} + +static int bcmgenet_mii_bus_init(struct bcmgenet_priv *priv) +{ + struct device_node *dn = priv->pdev->dev.of_node; + + if (dn) + return bcmgenet_mii_of_init(priv); + else + return bcmgenet_mii_pd_init(priv); +} + int bcmgenet_mii_init(struct net_device *dev) { struct bcmgenet_priv *priv = netdev_priv(dev); @@ -447,7 +529,7 @@ int bcmgenet_mii_init(struct net_device *dev) if (ret) return ret; - ret = bcmgenet_mii_of_init(priv); + ret = bcmgenet_mii_bus_init(priv); if (ret) goto out_free; diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c index 43fd1b72c1ea..bb48a610b72a 100644 --- a/drivers/net/ethernet/broadcom/tg3.c +++ b/drivers/net/ethernet/broadcom/tg3.c @@ -12561,22 +12561,38 @@ static u32 tg3_get_rxfh_indir_size(struct net_device *dev) return size; } -static int tg3_get_rxfh(struct net_device *dev, u32 *indir, u8 *key) +static int tg3_get_rxfh(struct net_device *dev, u32 *indir, u8 *key, u8 *hfunc) { struct tg3 *tp = netdev_priv(dev); int i; + if (hfunc) + *hfunc = ETH_RSS_HASH_TOP; + if (!indir) + return 0; + for (i = 0; i < TG3_RSS_INDIR_TBL_SIZE; i++) indir[i] = tp->rss_ind_tbl[i]; return 0; } -static int tg3_set_rxfh(struct net_device *dev, const u32 *indir, const u8 *key) +static int tg3_set_rxfh(struct net_device *dev, const u32 *indir, const u8 *key, + const u8 hfunc) { struct tg3 *tp = netdev_priv(dev); size_t i; + /* We require at least one supported parameter to be changed and no + * change in any of the unsupported parameters + */ + if (key || + (hfunc != ETH_RSS_HASH_NO_CHANGE && hfunc != ETH_RSS_HASH_TOP)) + return -EOPNOTSUPP; + + if (!indir) + return 0; + for (i = 0; i < TG3_RSS_INDIR_TBL_SIZE; i++) tp->rss_ind_tbl[i] = indir[i]; diff --git a/drivers/net/ethernet/cadence/Kconfig b/drivers/net/ethernet/cadence/Kconfig index 9e089d24466e..6932be08c62c 100644 --- a/drivers/net/ethernet/cadence/Kconfig +++ b/drivers/net/ethernet/cadence/Kconfig @@ -35,8 +35,8 @@ config MACB ---help--- The Cadence MACB ethernet interface is found on many Atmel AT32 and AT91 parts. This driver also supports the Cadence GEM (Gigabit - Ethernet MAC found in some ARM SoC devices). Note: the Gigabit mode - is not yet supported. Say Y to include support for the MACB/GEM chip. + Ethernet MAC found in some ARM SoC devices). Say Y to include + support for the MACB/GEM chip. To compile this driver as a module, choose M here: the module will be called macb. diff --git a/drivers/net/ethernet/chelsio/cxgb/sge.c b/drivers/net/ethernet/chelsio/cxgb/sge.c index 4c5879389003..86222a1bdb12 100644 --- a/drivers/net/ethernet/chelsio/cxgb/sge.c +++ b/drivers/net/ethernet/chelsio/cxgb/sge.c @@ -301,7 +301,7 @@ unsigned int t1_sched_update_parms(struct sge *sge, unsigned int port, struct sched_port *p = &s->p[port]; unsigned int max_avail_segs; - pr_debug("t1_sched_update_params mtu=%d speed=%d\n", mtu, speed); + pr_debug("%s mtu=%d speed=%d\n", __func__, mtu, speed); if (speed) p->speed = speed; if (mtu) diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h index e8b09bbdc226..c38a93607ea2 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h @@ -222,6 +222,12 @@ struct tp_err_stats { u32 ofldCongDefer; }; +struct sge_params { + u32 hps; /* host page size for our PF/VF */ + u32 eq_qpp; /* egress queues/page for our PF/VF */ + u32 iq_qpp; /* egress queues/page for our PF/VF */ +}; + struct tp_params { unsigned int ntxchan; /* # of Tx channels */ unsigned int tre; /* log2 of core clocks per TP tick */ @@ -285,6 +291,7 @@ enum chip_type { }; struct adapter_params { + struct sge_params sge; struct tp_params tp; struct vpd_params vpd; struct pci_params pci; @@ -431,7 +438,8 @@ struct sge_fl { /* SGE free-buffer queue state */ struct rx_sw_desc *sdesc; /* address of SW Rx descriptor ring */ __be64 *desc; /* address of HW Rx descriptor ring */ dma_addr_t addr; /* bus address of HW ring start */ - u64 udb; /* BAR2 offset of User Doorbell area */ + void __iomem *bar2_addr; /* address of BAR2 Queue registers */ + unsigned int bar2_qid; /* Queue ID for BAR2 Queue registers */ }; /* A packet gather list */ @@ -461,7 +469,8 @@ struct sge_rspq { /* state for an SGE response queue */ u16 abs_id; /* absolute SGE id for the response q */ __be64 *desc; /* address of HW response ring */ dma_addr_t phys_addr; /* physical address of the ring */ - u64 udb; /* BAR2 offset of User Doorbell area */ + void __iomem *bar2_addr; /* address of BAR2 Queue registers */ + unsigned int bar2_qid; /* Queue ID for BAR2 Queue registers */ unsigned int iqe_len; /* entry size */ unsigned int size; /* capacity of response queue */ struct adapter *adap; @@ -519,7 +528,8 @@ struct sge_txq { int db_disabled; unsigned short db_pidx; unsigned short db_pidx_inc; - u64 udb; /* BAR2 offset of User Doorbell area */ + void __iomem *bar2_addr; /* address of BAR2 Queue registers */ + unsigned int bar2_qid; /* Queue ID for BAR2 Queue registers */ }; struct sge_eth_txq { /* state for an SGE Ethernet Tx queue */ @@ -995,6 +1005,15 @@ int t4_prep_fw(struct adapter *adap, struct fw_info *fw_info, const u8 *fw_data, unsigned int fw_size, struct fw_hdr *card_fw, enum dev_state state, int *reset); int t4_prep_adapter(struct adapter *adapter); + +enum t4_bar2_qtype { T4_BAR2_QTYPE_EGRESS, T4_BAR2_QTYPE_INGRESS }; +int t4_bar2_sge_qregs(struct adapter *adapter, + unsigned int qid, + enum t4_bar2_qtype qtype, + u64 *pbar2_qoffset, + unsigned int *pbar2_qid); + +int t4_init_sge_params(struct adapter *adapter); int t4_init_tp_params(struct adapter *adap); int t4_filter_field_shift(const struct adapter *adap, int filter_sel); int t4_port_init(struct adapter *adap, int mbox, int pf, int vf); diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c index 3aea82bb9039..4c26be97fc9a 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c @@ -2923,21 +2923,35 @@ static u32 get_rss_table_size(struct net_device *dev) return pi->rss_size; } -static int get_rss_table(struct net_device *dev, u32 *p, u8 *key) +static int get_rss_table(struct net_device *dev, u32 *p, u8 *key, u8 *hfunc) { const struct port_info *pi = netdev_priv(dev); unsigned int n = pi->rss_size; + if (hfunc) + *hfunc = ETH_RSS_HASH_TOP; + if (!p) + return 0; while (n--) p[n] = pi->rss[n]; return 0; } -static int set_rss_table(struct net_device *dev, const u32 *p, const u8 *key) +static int set_rss_table(struct net_device *dev, const u32 *p, const u8 *key, + const u8 hfunc) { unsigned int i; struct port_info *pi = netdev_priv(dev); + /* We require at least one supported parameter to be changed and no + * change in any of the unsupported parameters + */ + if (key || + (hfunc != ETH_RSS_HASH_NO_CHANGE && hfunc != ETH_RSS_HASH_TOP)) + return -EOPNOTSUPP; + if (!p) + return 0; + for (i = 0; i < pi->rss_size; i++) pi->rss[i] = p[i]; if (pi->adapter->flags & FULL_INIT_DONE) @@ -3791,6 +3805,22 @@ u64 cxgb4_read_sge_timestamp(struct net_device *dev) } EXPORT_SYMBOL(cxgb4_read_sge_timestamp); +int cxgb4_bar2_sge_qregs(struct net_device *dev, + unsigned int qid, + enum cxgb4_bar2_qtype qtype, + u64 *pbar2_qoffset, + unsigned int *pbar2_qid) +{ + return t4_bar2_sge_qregs(netdev2adap(dev), + qid, + (qtype == CXGB4_BAR2_QTYPE_EGRESS + ? T4_BAR2_QTYPE_EGRESS + : T4_BAR2_QTYPE_INGRESS), + pbar2_qoffset, + pbar2_qid); +} +EXPORT_SYMBOL(cxgb4_bar2_sge_qregs); + static struct pci_driver cxgb4_driver; static void check_neigh_update(struct neighbour *neigh) @@ -3973,31 +4003,18 @@ static void process_db_drop(struct work_struct *work) u32 dropped_db = t4_read_reg(adap, 0x010ac); u16 qid = (dropped_db >> 15) & 0x1ffff; u16 pidx_inc = dropped_db & 0x1fff; - unsigned int s_qpp; - unsigned short udb_density; - unsigned long qpshift; - int page; - u32 udb; - - dev_warn(adap->pdev_dev, - "Dropped DB 0x%x qid %d bar2 %d coalesce %d pidx %d\n", - dropped_db, qid, - (dropped_db >> 14) & 1, - (dropped_db >> 13) & 1, - pidx_inc); + u64 bar2_qoffset; + unsigned int bar2_qid; + int ret; - drain_db_fifo(adap, 1); - - s_qpp = QUEUESPERPAGEPF1 * adap->fn; - udb_density = 1 << QUEUESPERPAGEPF0_GET(t4_read_reg(adap, - SGE_EGRESS_QUEUES_PER_PAGE_PF) >> s_qpp); - qpshift = PAGE_SHIFT - ilog2(udb_density); - udb = qid << qpshift; - udb &= PAGE_MASK; - page = udb / PAGE_SIZE; - udb += (qid - (page * udb_density)) * 128; - - writel(PIDX(pidx_inc), adap->bar2 + udb + 8); + ret = t4_bar2_sge_qregs(adap, qid, T4_BAR2_QTYPE_EGRESS, + &bar2_qoffset, &bar2_qid); + if (ret) + dev_err(adap->pdev_dev, "doorbell drop recovery: " + "qid=%d, pidx_inc=%d\n", qid, pidx_inc); + else + writel(PIDX_T5(pidx_inc) | QID(bar2_qid), + adap->bar2 + bar2_qoffset + SGE_UDB_KDOORBELL); /* Re-enable BAR2 WC */ t4_set_reg_field(adap, 0x10b0, 1<<15, 1<<15); @@ -4055,12 +4072,8 @@ static void uld_attach(struct adapter *adap, unsigned int uld) lli.adapter_type = adap->params.chip; lli.iscsi_iolen = MAXRXDATA_GET(t4_read_reg(adap, TP_PARA_REG2)); lli.cclk_ps = 1000000000 / adap->params.vpd.cclk; - lli.udb_density = 1 << QUEUESPERPAGEPF0_GET( - t4_read_reg(adap, SGE_EGRESS_QUEUES_PER_PAGE_PF) >> - (adap->fn * 4)); - lli.ucq_density = 1 << QUEUESPERPAGEPF0_GET( - t4_read_reg(adap, SGE_INGRESS_QUEUES_PER_PAGE_PF) >> - (adap->fn * 4)); + lli.udb_density = 1 << adap->params.sge.eq_qpp; + lli.ucq_density = 1 << adap->params.sge.iq_qpp; lli.filt_mode = adap->params.tp.vlan_pri_map; /* MODQ_REQ_MAP sets queues 0-3 to chan 0-3 */ for (i = 0; i < NCHAN; i++) @@ -5912,6 +5925,7 @@ static int adap_init0(struct adapter *adap) t4_load_mtus(adap, adap->params.mtus, adap->params.a_wnd, adap->params.b_wnd); } + t4_init_sge_params(adap); t4_init_tp_params(adap); adap->flags |= FW_OK; return 0; diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h index 4eba7cb1b89c..152b4c4c7809 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h @@ -305,4 +305,11 @@ void cxgb4_enable_db_coalescing(struct net_device *dev); int cxgb4_read_tpte(struct net_device *dev, u32 stag, __be32 *tpte); u64 cxgb4_read_sge_timestamp(struct net_device *dev); +enum cxgb4_bar2_qtype { CXGB4_BAR2_QTYPE_EGRESS, CXGB4_BAR2_QTYPE_INGRESS }; +int cxgb4_bar2_sge_qregs(struct net_device *dev, + unsigned int qid, + enum cxgb4_bar2_qtype qtype, + u64 *pbar2_qoffset, + unsigned int *pbar2_qid); + #endif /* !__CXGB4_OFLD_H */ diff --git a/drivers/net/ethernet/chelsio/cxgb4/sge.c b/drivers/net/ethernet/chelsio/cxgb4/sge.c index 433560b8cb1b..f12debd98dac 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/sge.c +++ b/drivers/net/ethernet/chelsio/cxgb4/sge.c @@ -527,14 +527,16 @@ static inline void ring_fl_db(struct adapter *adap, struct sge_fl *q) val |= DBPRIO(1); wmb(); - /* If we're on T4, use the old doorbell mechanism; otherwise - * use the new BAR2 mechanism. + /* If we don't have access to the new User Doorbell (T5+), use + * the old doorbell mechanism; otherwise use the new BAR2 + * mechanism. */ - if (is_t4(adap->params.chip)) { + if (unlikely(q->bar2_addr == NULL)) { t4_write_reg(adap, MYPF_REG(SGE_PF_KDOORBELL), val | QID(q->cntxt_id)); } else { - writel(val, adap->bar2 + q->udb + SGE_UDB_KDOORBELL); + writel(val | QID(q->bar2_qid), + q->bar2_addr + SGE_UDB_KDOORBELL); /* This Write memory Barrier will force the write to * the User Doorbell area to be flushed. @@ -850,14 +852,13 @@ static void write_sgl(const struct sk_buff *skb, struct sge_txq *q, *end = 0; } -/* This function copies a tx_desc struct to memory mapped BAR2 space(user space - * writes). For coalesced WR SGE, fetches data from the FIFO instead of from - * Host. +/* This function copies 64 byte coalesced work request to + * memory mapped BAR2 space. For coalesced WR SGE fetches + * data from the FIFO instead of from Host. */ -static void cxgb_pio_copy(u64 __iomem *dst, struct tx_desc *desc) +static void cxgb_pio_copy(u64 __iomem *dst, u64 *src) { - int count = sizeof(*desc) / sizeof(u64); - u64 *src = (u64 *)desc; + int count = 8; while (count) { writeq(*src, dst); @@ -879,7 +880,10 @@ static inline void ring_tx_db(struct adapter *adap, struct sge_txq *q, int n) { wmb(); /* write descriptors before telling HW */ - if (is_t4(adap->params.chip)) { + /* If we don't have access to the new User Doorbell (T5+), use the old + * doorbell mechanism; otherwise use the new BAR2 mechanism. + */ + if (unlikely(q->bar2_addr == NULL)) { u32 val = PIDX(n); unsigned long flags; @@ -905,21 +909,22 @@ static inline void ring_tx_db(struct adapter *adap, struct sge_txq *q, int n) */ WARN_ON(val & DBPRIO(1)); - /* For T5 and later we use the Write-Combine mapped BAR2 User - * Doorbell mechanism. If we're only writing a single TX - * Descriptor and TX Write Combining hasn't been disabled, we - * can use the Write Combining Gather Buffer; otherwise we use - * the simple doorbell. + /* If we're only writing a single TX Descriptor and we can use + * Inferred QID registers, we can use the Write Combining + * Gather Buffer; otherwise we use the simple doorbell. */ - if (n == 1) { + if (n == 1 && q->bar2_qid == 0) { int index = (q->pidx ? (q->pidx - 1) : (q->size - 1)); + u64 *wr = (u64 *)&q->desc[index]; - cxgb_pio_copy(adap->bar2 + q->udb + SGE_UDB_WCDOORBELL, - q->desc + index); + cxgb_pio_copy((u64 __iomem *) + (q->bar2_addr + SGE_UDB_WCDOORBELL), + wr); } else { - writel(val, adap->bar2 + q->udb + SGE_UDB_KDOORBELL); + writel(val | QID(q->bar2_qid), + q->bar2_addr + SGE_UDB_KDOORBELL); } /* This Write Memory Barrier will force the write to the User @@ -1997,11 +2002,16 @@ static int napi_rx_handler(struct napi_struct *napi, int budget) params = QINTR_TIMER_IDX(7); val = CIDXINC(work_done) | SEINTARM(params); - if (is_t4(q->adap->params.chip)) { + + /* If we don't have access to the new User GTS (T5+), use the old + * doorbell mechanism; otherwise use the new BAR2 mechanism. + */ + if (unlikely(q->bar2_addr == NULL)) { t4_write_reg(q->adap, MYPF_REG(SGE_PF_GTS), val | INGRESSQID((u32)q->cntxt_id)); } else { - writel(val, q->adap->bar2 + q->udb + SGE_UDB_GTS); + writel(val | INGRESSQID(q->bar2_qid), + q->bar2_addr + SGE_UDB_GTS); wmb(); } return work_done; @@ -2047,11 +2057,16 @@ static unsigned int process_intrq(struct adapter *adap) } val = CIDXINC(credits) | SEINTARM(q->intr_params); - if (is_t4(adap->params.chip)) { + + /* If we don't have access to the new User GTS (T5+), use the old + * doorbell mechanism; otherwise use the new BAR2 mechanism. + */ + if (unlikely(q->bar2_addr == NULL)) { t4_write_reg(adap, MYPF_REG(SGE_PF_GTS), val | INGRESSQID(q->cntxt_id)); } else { - writel(val, adap->bar2 + q->udb + SGE_UDB_GTS); + writel(val | INGRESSQID(q->bar2_qid), + q->bar2_addr + SGE_UDB_GTS); wmb(); } spin_unlock(&adap->sge.intrq_lock); @@ -2235,48 +2250,32 @@ static void sge_tx_timer_cb(unsigned long data) } /** - * udb_address - return the BAR2 User Doorbell address for a Queue - * @adap: the adapter - * @cntxt_id: the Queue Context ID - * @qpp: Queues Per Page (for all PFs) + * bar2_address - return the BAR2 address for an SGE Queue's Registers + * @adapter: the adapter + * @qid: the SGE Queue ID + * @qtype: the SGE Queue Type (Egress or Ingress) + * @pbar2_qid: BAR2 Queue ID or 0 for Queue ID inferred SGE Queues * - * Returns the BAR2 address of the user Doorbell associated with the - * indicated Queue Context ID. Note that this is only applicable - * for T5 and later. - */ -static u64 udb_address(struct adapter *adap, unsigned int cntxt_id, - unsigned int qpp) -{ - u64 udb; - unsigned int s_qpp; - unsigned short udb_density; - unsigned long qpshift; - int page; - - BUG_ON(is_t4(adap->params.chip)); - - s_qpp = (QUEUESPERPAGEPF0 + - (QUEUESPERPAGEPF1 - QUEUESPERPAGEPF0) * adap->fn); - udb_density = 1 << ((qpp >> s_qpp) & QUEUESPERPAGEPF0_MASK); - qpshift = PAGE_SHIFT - ilog2(udb_density); - udb = (u64)cntxt_id << qpshift; - udb &= PAGE_MASK; - page = udb / PAGE_SIZE; - udb += (cntxt_id - (page * udb_density)) * SGE_UDB_SIZE; - - return udb; -} + * Returns the BAR2 address for the SGE Queue Registers associated with + * @qid. If BAR2 SGE Registers aren't available, returns NULL. Also + * returns the BAR2 Queue ID to be used with writes to the BAR2 SGE + * Queue Registers. If the BAR2 Queue ID is 0, then "Inferred Queue ID" + * Registers are supported (e.g. the Write Combining Doorbell Buffer). + */ +static void __iomem *bar2_address(struct adapter *adapter, + unsigned int qid, + enum t4_bar2_qtype qtype, + unsigned int *pbar2_qid) +{ + u64 bar2_qoffset; + int ret; -static u64 udb_address_eq(struct adapter *adap, unsigned int cntxt_id) -{ - return udb_address(adap, cntxt_id, - t4_read_reg(adap, SGE_EGRESS_QUEUES_PER_PAGE_PF)); -} + ret = t4_bar2_sge_qregs(adapter, qid, qtype, + &bar2_qoffset, pbar2_qid); + if (ret) + return NULL; -static u64 udb_address_iq(struct adapter *adap, unsigned int cntxt_id) -{ - return udb_address(adap, cntxt_id, - t4_read_reg(adap, SGE_INGRESS_QUEUES_PER_PAGE_PF)); + return adapter->bar2 + bar2_qoffset; } int t4_sge_alloc_rxq(struct adapter *adap, struct sge_rspq *iq, bool fwevtq, @@ -2344,8 +2343,10 @@ int t4_sge_alloc_rxq(struct adapter *adap, struct sge_rspq *iq, bool fwevtq, iq->next_intr_params = iq->intr_params; iq->cntxt_id = ntohs(c.iqid); iq->abs_id = ntohs(c.physiqid); - if (!is_t4(adap->params.chip)) - iq->udb = udb_address_iq(adap, iq->cntxt_id); + iq->bar2_addr = bar2_address(adap, + iq->cntxt_id, + T4_BAR2_QTYPE_INGRESS, + &iq->bar2_qid); iq->size--; /* subtract status entry */ iq->netdev = dev; iq->handler = hnd; @@ -2362,11 +2363,13 @@ int t4_sge_alloc_rxq(struct adapter *adap, struct sge_rspq *iq, bool fwevtq, fl->alloc_failed = fl->large_alloc_failed = fl->starving = 0; adap->sge.egr_map[fl->cntxt_id - adap->sge.egr_start] = fl; - /* Note, we must initialize the Free List User Doorbell - * address before refilling the Free List! + /* Note, we must initialize the BAR2 Free List User Doorbell + * information before refilling the Free List! */ - if (!is_t4(adap->params.chip)) - fl->udb = udb_address_eq(adap, fl->cntxt_id); + fl->bar2_addr = bar2_address(adap, + fl->cntxt_id, + T4_BAR2_QTYPE_EGRESS, + &fl->bar2_qid); refill_fl(adap, fl, fl_cap(fl), GFP_KERNEL); } return 0; @@ -2392,9 +2395,10 @@ err: static void init_txq(struct adapter *adap, struct sge_txq *q, unsigned int id) { q->cntxt_id = id; - if (!is_t4(adap->params.chip)) - q->udb = udb_address_eq(adap, q->cntxt_id); - + q->bar2_addr = bar2_address(adap, + q->cntxt_id, + T4_BAR2_QTYPE_EGRESS, + &q->bar2_qid); q->in_use = 0; q->cidx = q->pidx = 0; q->stops = q->restarts = 0; diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c index 7975d26f50df..a9323bdb3585 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c +++ b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c @@ -1213,6 +1213,8 @@ out: if (ret) dev_err(adap->pdev_dev, "firmware download failed, error %d\n", ret); + else + ret = t4_get_fw_version(adap, &adap->params.fw_vers); return ret; } @@ -4003,6 +4005,126 @@ int t4_prep_adapter(struct adapter *adapter) } /** + * t4_bar2_sge_qregs - return BAR2 SGE Queue register information + * @adapter: the adapter + * @qid: the Queue ID + * @qtype: the Ingress or Egress type for @qid + * @pbar2_qoffset: BAR2 Queue Offset + * @pbar2_qid: BAR2 Queue ID or 0 for Queue ID inferred SGE Queues + * + * Returns the BAR2 SGE Queue Registers information associated with the + * indicated Absolute Queue ID. These are passed back in return value + * pointers. @qtype should be T4_BAR2_QTYPE_EGRESS for Egress Queue + * and T4_BAR2_QTYPE_INGRESS for Ingress Queues. + * + * This may return an error which indicates that BAR2 SGE Queue + * registers aren't available. If an error is not returned, then the + * following values are returned: + * + * *@pbar2_qoffset: the BAR2 Offset of the @qid Registers + * *@pbar2_qid: the BAR2 SGE Queue ID or 0 of @qid + * + * If the returned BAR2 Queue ID is 0, then BAR2 SGE registers which + * require the "Inferred Queue ID" ability may be used. E.g. the + * Write Combining Doorbell Buffer. If the BAR2 Queue ID is not 0, + * then these "Inferred Queue ID" register may not be used. + */ +int t4_bar2_sge_qregs(struct adapter *adapter, + unsigned int qid, + enum t4_bar2_qtype qtype, + u64 *pbar2_qoffset, + unsigned int *pbar2_qid) +{ + unsigned int page_shift, page_size, qpp_shift, qpp_mask; + u64 bar2_page_offset, bar2_qoffset; + unsigned int bar2_qid, bar2_qid_offset, bar2_qinferred; + + /* T4 doesn't support BAR2 SGE Queue registers. + */ + if (is_t4(adapter->params.chip)) + return -EINVAL; + + /* Get our SGE Page Size parameters. + */ + page_shift = adapter->params.sge.hps + 10; + page_size = 1 << page_shift; + + /* Get the right Queues per Page parameters for our Queue. + */ + qpp_shift = (qtype == T4_BAR2_QTYPE_EGRESS + ? adapter->params.sge.eq_qpp + : adapter->params.sge.iq_qpp); + qpp_mask = (1 << qpp_shift) - 1; + + /* Calculate the basics of the BAR2 SGE Queue register area: + * o The BAR2 page the Queue registers will be in. + * o The BAR2 Queue ID. + * o The BAR2 Queue ID Offset into the BAR2 page. + */ + bar2_page_offset = ((qid >> qpp_shift) << page_shift); + bar2_qid = qid & qpp_mask; + bar2_qid_offset = bar2_qid * SGE_UDB_SIZE; + + /* If the BAR2 Queue ID Offset is less than the Page Size, then the + * hardware will infer the Absolute Queue ID simply from the writes to + * the BAR2 Queue ID Offset within the BAR2 Page (and we need to use a + * BAR2 Queue ID of 0 for those writes). Otherwise, we'll simply + * write to the first BAR2 SGE Queue Area within the BAR2 Page with + * the BAR2 Queue ID and the hardware will infer the Absolute Queue ID + * from the BAR2 Page and BAR2 Queue ID. + * + * One important censequence of this is that some BAR2 SGE registers + * have a "Queue ID" field and we can write the BAR2 SGE Queue ID + * there. But other registers synthesize the SGE Queue ID purely + * from the writes to the registers -- the Write Combined Doorbell + * Buffer is a good example. These BAR2 SGE Registers are only + * available for those BAR2 SGE Register areas where the SGE Absolute + * Queue ID can be inferred from simple writes. + */ + bar2_qoffset = bar2_page_offset; + bar2_qinferred = (bar2_qid_offset < page_size); + if (bar2_qinferred) { + bar2_qoffset += bar2_qid_offset; + bar2_qid = 0; + } + + *pbar2_qoffset = bar2_qoffset; + *pbar2_qid = bar2_qid; + return 0; +} + +/** + * t4_init_sge_params - initialize adap->params.sge + * @adapter: the adapter + * + * Initialize various fields of the adapter's SGE Parameters structure. + */ +int t4_init_sge_params(struct adapter *adapter) +{ + struct sge_params *sge_params = &adapter->params.sge; + u32 hps, qpp; + unsigned int s_hps, s_qpp; + + /* Extract the SGE Page Size for our PF. + */ + hps = t4_read_reg(adapter, SGE_HOST_PAGE_SIZE); + s_hps = (HOSTPAGESIZEPF0_S + + (HOSTPAGESIZEPF1_S - HOSTPAGESIZEPF0_S) * adapter->fn); + sge_params->hps = ((hps >> s_hps) & HOSTPAGESIZEPF0_M); + + /* Extract the SGE Egress and Ingess Queues Per Page for our PF. + */ + s_qpp = (QUEUESPERPAGEPF0_S + + (QUEUESPERPAGEPF1_S - QUEUESPERPAGEPF0_S) * adapter->fn); + qpp = t4_read_reg(adapter, SGE_EGRESS_QUEUES_PER_PAGE_PF); + sge_params->eq_qpp = ((qpp >> s_qpp) & QUEUESPERPAGEPF0_MASK); + qpp = t4_read_reg(adapter, SGE_INGRESS_QUEUES_PER_PAGE_PF); + sge_params->iq_qpp = ((qpp >> s_qpp) & QUEUESPERPAGEPF0_MASK); + + return 0; +} + +/** * t4_init_tp_params - initialize adap->params.tp * @adap: the adapter * diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_regs.h b/drivers/net/ethernet/chelsio/cxgb4/t4_regs.h index ccdf8a7f4916..d7bd34ee65bd 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/t4_regs.h +++ b/drivers/net/ethernet/chelsio/cxgb4/t4_regs.h @@ -155,17 +155,22 @@ #define HOSTPAGESIZEPF2_SHIFT 8 #define HOSTPAGESIZEPF2(x) ((x) << HOSTPAGESIZEPF2_SHIFT) -#define HOSTPAGESIZEPF1_MASK 0x0000000fU -#define HOSTPAGESIZEPF1_SHIFT 4 -#define HOSTPAGESIZEPF1(x) ((x) << HOSTPAGESIZEPF1_SHIFT) +#define HOSTPAGESIZEPF1_M 0x0000000fU +#define HOSTPAGESIZEPF1_S 4 +#define HOSTPAGESIZEPF1(x) ((x) << HOSTPAGESIZEPF1_S) -#define HOSTPAGESIZEPF0_MASK 0x0000000fU -#define HOSTPAGESIZEPF0_SHIFT 0 -#define HOSTPAGESIZEPF0(x) ((x) << HOSTPAGESIZEPF0_SHIFT) +#define HOSTPAGESIZEPF0_M 0x0000000fU +#define HOSTPAGESIZEPF0_S 0 +#define HOSTPAGESIZEPF0(x) ((x) << HOSTPAGESIZEPF0_S) #define SGE_EGRESS_QUEUES_PER_PAGE_PF 0x1010 -#define QUEUESPERPAGEPF0_MASK 0x0000000fU -#define QUEUESPERPAGEPF0_GET(x) ((x) & QUEUESPERPAGEPF0_MASK) +#define SGE_EGRESS_QUEUES_PER_PAGE_VF_A 0x1014 + +#define QUEUESPERPAGEPF1_S 4 + +#define QUEUESPERPAGEPF0_S 0 +#define QUEUESPERPAGEPF0_MASK 0x0000000fU +#define QUEUESPERPAGEPF0_GET(x) ((x) & QUEUESPERPAGEPF0_MASK) #define QUEUESPERPAGEPF0 0 #define QUEUESPERPAGEPF1 4 @@ -323,6 +328,7 @@ #define SGE_DEBUG_DATA_LOW_INDEX_3 0x12cc #define SGE_DEBUG_DATA_HIGH_INDEX_10 0x12a8 #define SGE_INGRESS_QUEUES_PER_PAGE_PF 0x10f4 +#define SGE_INGRESS_QUEUES_PER_PAGE_VF_A 0x10f8 #define S_HP_INT_THRESH 28 #define M_HP_INT_THRESH 0xfU diff --git a/drivers/net/ethernet/chelsio/cxgb4vf/adapter.h b/drivers/net/ethernet/chelsio/cxgb4vf/adapter.h index 3d06e77d7121..d00a751f0588 100644 --- a/drivers/net/ethernet/chelsio/cxgb4vf/adapter.h +++ b/drivers/net/ethernet/chelsio/cxgb4vf/adapter.h @@ -138,6 +138,8 @@ struct sge_fl { struct rx_sw_desc *sdesc; /* address of SW RX descriptor ring */ __be64 *desc; /* address of HW RX descriptor ring */ dma_addr_t addr; /* PCI bus address of hardware ring */ + void __iomem *bar2_addr; /* address of BAR2 Queue registers */ + unsigned int bar2_qid; /* Queue ID for BAR2 Queue registers */ }; /* @@ -178,6 +180,8 @@ struct sge_rspq { u16 abs_id; /* SGE abs QID for the response Q */ __be64 *desc; /* address of hardware response ring */ dma_addr_t phys_addr; /* PCI bus address of ring */ + void __iomem *bar2_addr; /* address of BAR2 Queue registers */ + unsigned int bar2_qid; /* Queue ID for BAR2 Queue registers */ unsigned int iqe_len; /* entry size */ unsigned int size; /* capcity of response Q */ struct adapter *adapter; /* our adapter */ @@ -240,6 +244,8 @@ struct sge_txq { struct tx_sw_desc *sdesc; /* address of SW TX descriptor ring */ struct sge_qstat *stat; /* queue status entry */ dma_addr_t phys_addr; /* PCI bus address of hardware ring */ + void __iomem *bar2_addr; /* address of BAR2 Queue registers */ + unsigned int bar2_qid; /* Queue ID for BAR2 Queue registers */ }; /* @@ -345,6 +351,7 @@ struct sge { struct adapter { /* PCI resources */ void __iomem *regs; + void __iomem *bar2; struct pci_dev *pdev; struct device *pdev_dev; diff --git a/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c b/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c index ad88246a428e..aa74ec34a467 100644 --- a/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c +++ b/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c @@ -2095,7 +2095,6 @@ static int adap_init0(struct adapter *adapter) unsigned int ethqsets; int err; u32 param, val = 0; - unsigned int chipid; /* * Wait for the device to become ready before proceeding ... @@ -2123,17 +2122,6 @@ static int adap_init0(struct adapter *adapter) return err; } - adapter->params.chip = 0; - switch (adapter->pdev->device >> 12) { - case CHELSIO_T4: - adapter->params.chip = CHELSIO_CHIP_CODE(CHELSIO_T4, 0); - break; - case CHELSIO_T5: - chipid = G_REV(t4_read_reg(adapter, A_PL_VF_REV)); - adapter->params.chip |= CHELSIO_CHIP_CODE(CHELSIO_T5, chipid); - break; - } - /* * Grab basic operational parameters. These will predominantly have * been set up by the Physical Function Driver or will be hard coded @@ -2594,6 +2582,27 @@ static int cxgb4vf_pci_probe(struct pci_dev *pdev, goto err_free_adapter; } + /* Wait for the device to become ready before proceeding ... + */ + err = t4vf_prep_adapter(adapter); + if (err) { + dev_err(adapter->pdev_dev, "device didn't become ready:" + " err=%d\n", err); + goto err_unmap_bar0; + } + + /* For T5 and later we want to use the new BAR-based User Doorbells, + * so we need to map BAR2 here ... + */ + if (!is_t4(adapter->params.chip)) { + adapter->bar2 = ioremap_wc(pci_resource_start(pdev, 2), + pci_resource_len(pdev, 2)); + if (!adapter->bar2) { + dev_err(adapter->pdev_dev, "cannot map BAR2 doorbells\n"); + err = -ENOMEM; + goto err_unmap_bar0; + } + } /* * Initialize adapter level features. */ @@ -2786,6 +2795,10 @@ err_free_dev: } err_unmap_bar: + if (!is_t4(adapter->params.chip)) + iounmap(adapter->bar2); + +err_unmap_bar0: iounmap(adapter->regs); err_free_adapter: @@ -2856,6 +2869,8 @@ static void cxgb4vf_pci_remove(struct pci_dev *pdev) free_netdev(netdev); } iounmap(adapter->regs); + if (!is_t4(adapter->params.chip)) + iounmap(adapter->bar2); kfree(adapter); } diff --git a/drivers/net/ethernet/chelsio/cxgb4vf/sge.c b/drivers/net/ethernet/chelsio/cxgb4vf/sge.c index 045301d336bb..f7fd1317d996 100644 --- a/drivers/net/ethernet/chelsio/cxgb4vf/sge.c +++ b/drivers/net/ethernet/chelsio/cxgb4vf/sge.c @@ -525,19 +525,40 @@ static inline void ring_fl_db(struct adapter *adapter, struct sge_fl *fl) { u32 val; - /* - * The SGE keeps track of its Producer and Consumer Indices in terms + /* The SGE keeps track of its Producer and Consumer Indices in terms * of Egress Queue Units so we can only tell it about integral numbers * of multiples of Free List Entries per Egress Queue Units ... */ if (fl->pend_cred >= FL_PER_EQ_UNIT) { - val = PIDX(fl->pend_cred / FL_PER_EQ_UNIT); - if (!is_t4(adapter->params.chip)) - val |= DBTYPE(1); + if (is_t4(adapter->params.chip)) + val = PIDX(fl->pend_cred / FL_PER_EQ_UNIT); + else + val = PIDX_T5(fl->pend_cred / FL_PER_EQ_UNIT) | + DBTYPE(1); + val |= DBPRIO(1); + + /* Make sure all memory writes to the Free List queue are + * committed before we tell the hardware about them. + */ wmb(); - t4_write_reg(adapter, T4VF_SGE_BASE_ADDR + SGE_VF_KDOORBELL, - DBPRIO(1) | - QID(fl->cntxt_id) | val); + + /* If we don't have access to the new User Doorbell (T5+), use + * the old doorbell mechanism; otherwise use the new BAR2 + * mechanism. + */ + if (unlikely(fl->bar2_addr == NULL)) { + t4_write_reg(adapter, + T4VF_SGE_BASE_ADDR + SGE_VF_KDOORBELL, + QID(fl->cntxt_id) | val); + } else { + writel(val | QID(fl->bar2_qid), + fl->bar2_addr + SGE_UDB_KDOORBELL); + + /* This Write memory Barrier will force the write to + * the User Doorbell area to be flushed. + */ + wmb(); + } fl->pend_cred %= FL_PER_EQ_UNIT; } } @@ -949,14 +970,74 @@ static void write_sgl(const struct sk_buff *skb, struct sge_txq *tq, static inline void ring_tx_db(struct adapter *adapter, struct sge_txq *tq, int n) { - /* - * Warn if we write doorbells with the wrong priority and write - * descriptors before telling HW. + /* Make sure that all writes to the TX Descriptors are committed + * before we tell the hardware about them. */ - WARN_ON((QID(tq->cntxt_id) | PIDX(n)) & DBPRIO(1)); wmb(); - t4_write_reg(adapter, T4VF_SGE_BASE_ADDR + SGE_VF_KDOORBELL, - QID(tq->cntxt_id) | PIDX(n)); + + /* If we don't have access to the new User Doorbell (T5+), use the old + * doorbell mechanism; otherwise use the new BAR2 mechanism. + */ + if (unlikely(tq->bar2_addr == NULL)) { + u32 val = PIDX(n); + + t4_write_reg(adapter, T4VF_SGE_BASE_ADDR + SGE_VF_KDOORBELL, + QID(tq->cntxt_id) | val); + } else { + u32 val = PIDX_T5(n); + + /* T4 and later chips share the same PIDX field offset within + * the doorbell, but T5 and later shrank the field in order to + * gain a bit for Doorbell Priority. The field was absurdly + * large in the first place (14 bits) so we just use the T5 + * and later limits and warn if a Queue ID is too large. + */ + WARN_ON(val & DBPRIO(1)); + + /* If we're only writing a single Egress Unit and the BAR2 + * Queue ID is 0, we can use the Write Combining Doorbell + * Gather Buffer; otherwise we use the simple doorbell. + */ + if (n == 1 && tq->bar2_qid == 0) { + unsigned int index = (tq->pidx + ? (tq->pidx - 1) + : (tq->size - 1)); + __be64 *src = (__be64 *)&tq->desc[index]; + __be64 __iomem *dst = (__be64 *)(tq->bar2_addr + + SGE_UDB_WCDOORBELL); + unsigned int count = EQ_UNIT / sizeof(__be64); + + /* Copy the TX Descriptor in a tight loop in order to + * try to get it to the adapter in a single Write + * Combined transfer on the PCI-E Bus. If the Write + * Combine fails (say because of an interrupt, etc.) + * the hardware will simply take the last write as a + * simple doorbell write with a PIDX Increment of 1 + * and will fetch the TX Descriptor from memory via + * DMA. + */ + while (count) { + writeq(*src, dst); + src++; + dst++; + count--; + } + } else + writel(val | QID(tq->bar2_qid), + tq->bar2_addr + SGE_UDB_KDOORBELL); + + /* This Write Memory Barrier will force the write to the User + * Doorbell area to be flushed. This is needed to prevent + * writes on different CPUs for the same queue from hitting + * the adapter out of order. This is required when some Work + * Requests take the Write Combine Gather Buffer path (user + * doorbell area offset [SGE_UDB_WCDOORBELL..+63]) and some + * take the traditional path where we simply increment the + * PIDX (User Doorbell area SGE_UDB_KDOORBELL) and have the + * hardware DMA read the actual Work Request. + */ + wmb(); + } } /** @@ -1782,6 +1863,7 @@ static int napi_rx_handler(struct napi_struct *napi, int budget) unsigned int intr_params; struct sge_rspq *rspq = container_of(napi, struct sge_rspq, napi); int work_done = process_responses(rspq, budget); + u32 val; if (likely(work_done < budget)) { napi_complete(napi); @@ -1793,11 +1875,16 @@ static int napi_rx_handler(struct napi_struct *napi, int budget) if (unlikely(work_done == 0)) rspq->unhandled_irqs++; - t4_write_reg(rspq->adapter, - T4VF_SGE_BASE_ADDR + SGE_VF_GTS, - CIDXINC(work_done) | - INGRESSQID((u32)rspq->cntxt_id) | - SEINTARM(intr_params)); + val = CIDXINC(work_done) | SEINTARM(intr_params); + if (is_t4(rspq->adapter->params.chip)) { + t4_write_reg(rspq->adapter, + T4VF_SGE_BASE_ADDR + SGE_VF_GTS, + val | INGRESSQID((u32)rspq->cntxt_id)); + } else { + writel(val | INGRESSQID(rspq->bar2_qid), + rspq->bar2_addr + SGE_UDB_GTS); + wmb(); + } return work_done; } @@ -1822,6 +1909,7 @@ static unsigned int process_intrq(struct adapter *adapter) struct sge *s = &adapter->sge; struct sge_rspq *intrq = &s->intrq; unsigned int work_done; + u32 val; spin_lock(&adapter->sge.intrq_lock); for (work_done = 0; ; work_done++) { @@ -1887,10 +1975,15 @@ static unsigned int process_intrq(struct adapter *adapter) rspq_next(intrq); } - t4_write_reg(adapter, T4VF_SGE_BASE_ADDR + SGE_VF_GTS, - CIDXINC(work_done) | - INGRESSQID(intrq->cntxt_id) | - SEINTARM(intrq->intr_params)); + val = CIDXINC(work_done) | SEINTARM(intrq->intr_params); + if (is_t4(adapter->params.chip)) + t4_write_reg(adapter, T4VF_SGE_BASE_ADDR + SGE_VF_GTS, + val | INGRESSQID(intrq->cntxt_id)); + else { + writel(val | INGRESSQID(intrq->bar2_qid), + intrq->bar2_addr + SGE_UDB_GTS); + wmb(); + } spin_unlock(&adapter->sge.intrq_lock); @@ -2036,6 +2129,35 @@ static void sge_tx_timer_cb(unsigned long data) } /** + * bar2_address - return the BAR2 address for an SGE Queue's Registers + * @adapter: the adapter + * @qid: the SGE Queue ID + * @qtype: the SGE Queue Type (Egress or Ingress) + * @pbar2_qid: BAR2 Queue ID or 0 for Queue ID inferred SGE Queues + * + * Returns the BAR2 address for the SGE Queue Registers associated with + * @qid. If BAR2 SGE Registers aren't available, returns NULL. Also + * returns the BAR2 Queue ID to be used with writes to the BAR2 SGE + * Queue Registers. If the BAR2 Queue ID is 0, then "Inferred Queue ID" + * Registers are supported (e.g. the Write Combining Doorbell Buffer). + */ +static void __iomem *bar2_address(struct adapter *adapter, + unsigned int qid, + enum t4_bar2_qtype qtype, + unsigned int *pbar2_qid) +{ + u64 bar2_qoffset; + int ret; + + ret = t4_bar2_sge_qregs(adapter, qid, qtype, + &bar2_qoffset, pbar2_qid); + if (ret) + return NULL; + + return adapter->bar2 + bar2_qoffset; +} + +/** * t4vf_sge_alloc_rxq - allocate an SGE RX Queue * @adapter: the adapter * @rspq: pointer to to the new rxq's Response Queue to be filled in @@ -2166,6 +2288,10 @@ int t4vf_sge_alloc_rxq(struct adapter *adapter, struct sge_rspq *rspq, rspq->gen = 1; rspq->next_intr_params = rspq->intr_params; rspq->cntxt_id = be16_to_cpu(rpl.iqid); + rspq->bar2_addr = bar2_address(adapter, + rspq->cntxt_id, + T4_BAR2_QTYPE_INGRESS, + &rspq->bar2_qid); rspq->abs_id = be16_to_cpu(rpl.physiqid); rspq->size--; /* subtract status entry */ rspq->adapter = adapter; @@ -2184,6 +2310,15 @@ int t4vf_sge_alloc_rxq(struct adapter *adapter, struct sge_rspq *rspq, fl->alloc_failed = 0; fl->large_alloc_failed = 0; fl->starving = 0; + + /* Note, we must initialize the BAR2 Free List User Doorbell + * information before refilling the Free List! + */ + fl->bar2_addr = bar2_address(adapter, + fl->cntxt_id, + T4_BAR2_QTYPE_EGRESS, + &fl->bar2_qid); + refill_fl(adapter, fl, fl_cap(fl), GFP_KERNEL); } @@ -2296,6 +2431,10 @@ int t4vf_sge_alloc_eth_txq(struct adapter *adapter, struct sge_eth_txq *txq, txq->q.pidx = 0; txq->q.stat = (void *)&txq->q.desc[txq->q.size]; txq->q.cntxt_id = FW_EQ_ETH_CMD_EQID_G(be32_to_cpu(rpl.eqid_pkd)); + txq->q.bar2_addr = bar2_address(adapter, + txq->q.cntxt_id, + T4_BAR2_QTYPE_EGRESS, + &txq->q.bar2_qid); txq->q.abs_id = FW_EQ_ETH_CMD_PHYSEQID_G(be32_to_cpu(rpl.physeqid_pkd)); txq->txq = devq; diff --git a/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_common.h b/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_common.h index a608c6657d63..8d3237f5e364 100644 --- a/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_common.h +++ b/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_common.h @@ -135,9 +135,12 @@ struct dev_params { struct sge_params { u32 sge_control; /* padding, boundaries, lengths, etc. */ u32 sge_control2; /* T5: more of the same */ - u32 sge_host_page_size; /* RDMA page sizes */ - u32 sge_queues_per_page; /* RDMA queues/page */ - u32 sge_user_mode_limits; /* limits for BAR2 user mode accesses */ + u32 sge_host_page_size; /* PF0-7 page sizes */ + u32 sge_egress_queues_per_page; /* PF0-7 egress queues/page */ + u32 sge_ingress_queues_per_page;/* PF0-7 ingress queues/page */ + u32 sge_vf_hps; /* host page size for our vf */ + u32 sge_vf_eq_qpp; /* egress queues/page for our VF */ + u32 sge_vf_iq_qpp; /* ingress queues/page for our VF */ u32 sge_fl_buffer_size[16]; /* free list buffer sizes */ u32 sge_ingress_rx_threshold; /* RX counter interrupt threshold[4] */ u32 sge_congestion_control; /* congestion thresholds, etc. */ @@ -267,6 +270,8 @@ static inline int t4vf_wr_mbox_ns(struct adapter *adapter, const void *cmd, return t4vf_wr_mbox_core(adapter, cmd, size, rpl, false); } +#define CHELSIO_PCI_ID_VER(dev_id) ((dev_id) >> 12) + static inline int is_t4(enum chip_type chip) { return CHELSIO_CHIP_VERSION(chip) == CHELSIO_T4; @@ -278,6 +283,13 @@ int t4vf_port_init(struct adapter *, int); int t4vf_fw_reset(struct adapter *); int t4vf_set_params(struct adapter *, unsigned int, const u32 *, const u32 *); +enum t4_bar2_qtype { T4_BAR2_QTYPE_EGRESS, T4_BAR2_QTYPE_INGRESS }; +int t4_bar2_sge_qregs(struct adapter *adapter, + unsigned int qid, + enum t4_bar2_qtype qtype, + u64 *pbar2_qoffset, + unsigned int *pbar2_qid); + int t4vf_get_sge_params(struct adapter *); int t4vf_get_vpd_params(struct adapter *); int t4vf_get_dev_params(struct adapter *); @@ -309,5 +321,6 @@ int t4vf_iq_free(struct adapter *, unsigned int, unsigned int, unsigned int, int t4vf_eth_eq_free(struct adapter *, unsigned int); int t4vf_handle_fw_rpl(struct adapter *, const __be64 *); +int t4vf_prep_adapter(struct adapter *); #endif /* __T4VF_COMMON_H__ */ diff --git a/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_hw.c b/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_hw.c index 624a213dea87..02e8833b7797 100644 --- a/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_hw.c +++ b/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_hw.c @@ -430,6 +430,95 @@ int t4vf_set_params(struct adapter *adapter, unsigned int nparams, } /** + * t4_bar2_sge_qregs - return BAR2 SGE Queue register information + * @adapter: the adapter + * @qid: the Queue ID + * @qtype: the Ingress or Egress type for @qid + * @pbar2_qoffset: BAR2 Queue Offset + * @pbar2_qid: BAR2 Queue ID or 0 for Queue ID inferred SGE Queues + * + * Returns the BAR2 SGE Queue Registers information associated with the + * indicated Absolute Queue ID. These are passed back in return value + * pointers. @qtype should be T4_BAR2_QTYPE_EGRESS for Egress Queue + * and T4_BAR2_QTYPE_INGRESS for Ingress Queues. + * + * This may return an error which indicates that BAR2 SGE Queue + * registers aren't available. If an error is not returned, then the + * following values are returned: + * + * *@pbar2_qoffset: the BAR2 Offset of the @qid Registers + * *@pbar2_qid: the BAR2 SGE Queue ID or 0 of @qid + * + * If the returned BAR2 Queue ID is 0, then BAR2 SGE registers which + * require the "Inferred Queue ID" ability may be used. E.g. the + * Write Combining Doorbell Buffer. If the BAR2 Queue ID is not 0, + * then these "Inferred Queue ID" register may not be used. + */ +int t4_bar2_sge_qregs(struct adapter *adapter, + unsigned int qid, + enum t4_bar2_qtype qtype, + u64 *pbar2_qoffset, + unsigned int *pbar2_qid) +{ + unsigned int page_shift, page_size, qpp_shift, qpp_mask; + u64 bar2_page_offset, bar2_qoffset; + unsigned int bar2_qid, bar2_qid_offset, bar2_qinferred; + + /* T4 doesn't support BAR2 SGE Queue registers. + */ + if (is_t4(adapter->params.chip)) + return -EINVAL; + + /* Get our SGE Page Size parameters. + */ + page_shift = adapter->params.sge.sge_vf_hps + 10; + page_size = 1 << page_shift; + + /* Get the right Queues per Page parameters for our Queue. + */ + qpp_shift = (qtype == T4_BAR2_QTYPE_EGRESS + ? adapter->params.sge.sge_vf_eq_qpp + : adapter->params.sge.sge_vf_iq_qpp); + qpp_mask = (1 << qpp_shift) - 1; + + /* Calculate the basics of the BAR2 SGE Queue register area: + * o The BAR2 page the Queue registers will be in. + * o The BAR2 Queue ID. + * o The BAR2 Queue ID Offset into the BAR2 page. + */ + bar2_page_offset = ((qid >> qpp_shift) << page_shift); + bar2_qid = qid & qpp_mask; + bar2_qid_offset = bar2_qid * SGE_UDB_SIZE; + + /* If the BAR2 Queue ID Offset is less than the Page Size, then the + * hardware will infer the Absolute Queue ID simply from the writes to + * the BAR2 Queue ID Offset within the BAR2 Page (and we need to use a + * BAR2 Queue ID of 0 for those writes). Otherwise, we'll simply + * write to the first BAR2 SGE Queue Area within the BAR2 Page with + * the BAR2 Queue ID and the hardware will infer the Absolute Queue ID + * from the BAR2 Page and BAR2 Queue ID. + * + * One important censequence of this is that some BAR2 SGE registers + * have a "Queue ID" field and we can write the BAR2 SGE Queue ID + * there. But other registers synthesize the SGE Queue ID purely + * from the writes to the registers -- the Write Combined Doorbell + * Buffer is a good example. These BAR2 SGE Registers are only + * available for those BAR2 SGE Register areas where the SGE Absolute + * Queue ID can be inferred from simple writes. + */ + bar2_qoffset = bar2_page_offset; + bar2_qinferred = (bar2_qid_offset < page_size); + if (bar2_qinferred) { + bar2_qoffset += bar2_qid_offset; + bar2_qid = 0; + } + + *pbar2_qoffset = bar2_qoffset; + *pbar2_qid = bar2_qid; + return 0; +} + +/** * t4vf_get_sge_params - retrieve adapter Scatter gather Engine parameters * @adapter: the adapter * @@ -501,6 +590,55 @@ int t4vf_get_sge_params(struct adapter *adapter) sge_params->sge_ingress_rx_threshold = vals[0]; sge_params->sge_congestion_control = vals[1]; + /* For T5 and later we want to use the new BAR2 Doorbells. + * Unfortunately, older firmware didn't allow the this register to be + * read. + */ + if (!is_t4(adapter->params.chip)) { + u32 whoami; + unsigned int pf, s_hps, s_qpp; + + params[0] = (FW_PARAMS_MNEM_V(FW_PARAMS_MNEM_REG) | + FW_PARAMS_PARAM_XYZ_V( + SGE_EGRESS_QUEUES_PER_PAGE_VF_A)); + params[1] = (FW_PARAMS_MNEM_V(FW_PARAMS_MNEM_REG) | + FW_PARAMS_PARAM_XYZ_V( + SGE_INGRESS_QUEUES_PER_PAGE_VF_A)); + v = t4vf_query_params(adapter, 2, params, vals); + if (v != FW_SUCCESS) { + dev_warn(adapter->pdev_dev, + "Unable to get VF SGE Queues/Page; " + "probably old firmware.\n"); + return v; + } + sge_params->sge_egress_queues_per_page = vals[0]; + sge_params->sge_ingress_queues_per_page = vals[1]; + + /* We need the Queues/Page for our VF. This is based on the + * PF from which we're instantiated and is indexed in the + * register we just read. Do it once here so other code in + * the driver can just use it. + */ + whoami = t4_read_reg(adapter, + T4VF_PL_BASE_ADDR + A_PL_VF_WHOAMI); + pf = SOURCEPF_GET(whoami); + + s_hps = (HOSTPAGESIZEPF0_S + + (HOSTPAGESIZEPF1_S - HOSTPAGESIZEPF0_S) * pf); + sge_params->sge_vf_hps = + ((sge_params->sge_host_page_size >> s_hps) + & HOSTPAGESIZEPF0_M); + + s_qpp = (QUEUESPERPAGEPF0_S + + (QUEUESPERPAGEPF1_S - QUEUESPERPAGEPF0_S) * pf); + sge_params->sge_vf_eq_qpp = + ((sge_params->sge_egress_queues_per_page >> s_qpp) + & QUEUESPERPAGEPF0_MASK); + sge_params->sge_vf_iq_qpp = + ((sge_params->sge_ingress_queues_per_page >> s_qpp) + & QUEUESPERPAGEPF0_MASK); + } + return 0; } @@ -1420,3 +1558,38 @@ int t4vf_handle_fw_rpl(struct adapter *adapter, const __be64 *rpl) } return 0; } + +/** + */ +int t4vf_prep_adapter(struct adapter *adapter) +{ + int err; + unsigned int chipid; + + /* Wait for the device to become ready before proceeding ... + */ + err = t4vf_wait_dev_ready(adapter); + if (err) + return err; + + /* Default port and clock for debugging in case we can't reach + * firmware. + */ + adapter->params.nports = 1; + adapter->params.vfres.pmask = 1; + adapter->params.vpd.cclk = 50000; + + adapter->params.chip = 0; + switch (CHELSIO_PCI_ID_VER(adapter->pdev->device)) { + case CHELSIO_T4: + adapter->params.chip |= CHELSIO_CHIP_CODE(CHELSIO_T4, 0); + break; + + case CHELSIO_T5: + chipid = G_REV(t4_read_reg(adapter, A_PL_VF_REV)); + adapter->params.chip |= CHELSIO_CHIP_CODE(CHELSIO_T5, chipid); + break; + } + + return 0; +} diff --git a/drivers/net/ethernet/dec/tulip/de4x5.c b/drivers/net/ethernet/dec/tulip/de4x5.c index cf8b6ff21613..badff181e719 100644 --- a/drivers/net/ethernet/dec/tulip/de4x5.c +++ b/drivers/net/ethernet/dec/tulip/de4x5.c @@ -995,7 +995,6 @@ static void de4x5_dbg_mii(struct net_device *dev, int k); static void de4x5_dbg_media(struct net_device *dev); static void de4x5_dbg_srom(struct de4x5_srom *p); static void de4x5_dbg_rx(struct sk_buff *skb, int len); -static int de4x5_strncmp(char *a, char *b, int n); static int dc21041_infoleaf(struct net_device *dev); static int dc21140_infoleaf(struct net_device *dev); static int dc21142_infoleaf(struct net_device *dev); @@ -4102,8 +4101,7 @@ get_hw_addr(struct net_device *dev) } /* -** Test for enet addresses in the first 32 bytes. The built-in strncmp -** didn't seem to work here...? +** Test for enet addresses in the first 32 bytes. */ static int de4x5_bad_srom(struct de4x5_private *lp) @@ -4111,8 +4109,8 @@ de4x5_bad_srom(struct de4x5_private *lp) int i, status = 0; for (i = 0; i < ARRAY_SIZE(enet_det); i++) { - if (!de4x5_strncmp((char *)&lp->srom, (char *)&enet_det[i], 3) && - !de4x5_strncmp((char *)&lp->srom+0x10, (char *)&enet_det[i], 3)) { + if (!memcmp(&lp->srom, &enet_det[i], 3) && + !memcmp((char *)&lp->srom+0x10, &enet_det[i], 3)) { if (i == 0) { status = SMC; } else if (i == 1) { @@ -4125,18 +4123,6 @@ de4x5_bad_srom(struct de4x5_private *lp) return status; } -static int -de4x5_strncmp(char *a, char *b, int n) -{ - int ret=0; - - for (;n && !ret; n--) { - ret = *a++ - *b++; - } - - return ret; -} - static void srom_repair(struct net_device *dev, int card) { diff --git a/drivers/net/ethernet/dec/tulip/dmfe.c b/drivers/net/ethernet/dec/tulip/dmfe.c index c8205606c775..50a00777228e 100644 --- a/drivers/net/ethernet/dec/tulip/dmfe.c +++ b/drivers/net/ethernet/dec/tulip/dmfe.c @@ -2265,7 +2265,7 @@ static int __init dmfe_init_module(void) static void __exit dmfe_cleanup_module(void) { - DMFE_DBUG(0, "dmfe_clean_module() ", debug); + DMFE_DBUG(0, "dmfe_cleanup_module() ", debug); pci_unregister_driver(&dmfe_driver); } diff --git a/drivers/net/ethernet/dec/tulip/uli526x.c b/drivers/net/ethernet/dec/tulip/uli526x.c index 4061f9b22812..1c5916b13778 100644 --- a/drivers/net/ethernet/dec/tulip/uli526x.c +++ b/drivers/net/ethernet/dec/tulip/uli526x.c @@ -1837,7 +1837,7 @@ static int __init uli526x_init_module(void) static void __exit uli526x_cleanup_module(void) { - ULI526X_DBUG(0, "uli526x_clean_module() ", debug); + ULI526X_DBUG(0, "uli526x_cleanup_module() ", debug); pci_unregister_driver(&uli526x_driver); } diff --git a/drivers/net/ethernet/emulex/benet/be_ethtool.c b/drivers/net/ethernet/emulex/benet/be_ethtool.c index e42a791c1835..73a500ccbf69 100644 --- a/drivers/net/ethernet/emulex/benet/be_ethtool.c +++ b/drivers/net/ethernet/emulex/benet/be_ethtool.c @@ -1171,7 +1171,8 @@ static u32 be_get_rxfh_key_size(struct net_device *netdev) return RSS_HASH_KEY_LEN; } -static int be_get_rxfh(struct net_device *netdev, u32 *indir, u8 *hkey) +static int be_get_rxfh(struct net_device *netdev, u32 *indir, u8 *hkey, + u8 *hfunc) { struct be_adapter *adapter = netdev_priv(netdev); int i; @@ -1185,16 +1186,23 @@ static int be_get_rxfh(struct net_device *netdev, u32 *indir, u8 *hkey) if (hkey) memcpy(hkey, rss->rss_hkey, RSS_HASH_KEY_LEN); + if (hfunc) + *hfunc = ETH_RSS_HASH_TOP; + return 0; } static int be_set_rxfh(struct net_device *netdev, const u32 *indir, - const u8 *hkey) + const u8 *hkey, const u8 hfunc) { int rc = 0, i, j; struct be_adapter *adapter = netdev_priv(netdev); u8 rsstable[RSS_INDIR_TABLE_LEN]; + /* We do not allow change in unsupported parameters */ + if (hfunc != ETH_RSS_HASH_NO_CHANGE && hfunc != ETH_RSS_HASH_TOP) + return -EOPNOTSUPP; + if (indir) { struct be_rx_obj *rxo; diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c index e0ab7673afe7..9461ad8d837b 100644 --- a/drivers/net/ethernet/emulex/benet/be_main.c +++ b/drivers/net/ethernet/emulex/benet/be_main.c @@ -1017,9 +1017,8 @@ static struct sk_buff *be_xmit_workarounds(struct be_adapter *adapter, * to pad short packets (<= 32 bytes) to a 36-byte length. */ if (unlikely(!BEx_chip(adapter) && skb->len <= 32)) { - if (skb_padto(skb, 36)) + if (skb_put_padto(skb, 36)) return NULL; - skb->len = 36; } if (BEx_chip(adapter) || lancer_chip(adapter)) { diff --git a/drivers/net/ethernet/hp/hp100.c b/drivers/net/ethernet/hp/hp100.c index 76a6e0c77d69..ae6e30d39f0f 100644 --- a/drivers/net/ethernet/hp/hp100.c +++ b/drivers/net/ethernet/hp/hp100.c @@ -490,7 +490,8 @@ static int hp100_probe1(struct net_device *dev, int ioaddr, u_char bus, eid = hp100_read_id(ioaddr); if (eid == NULL) { /* bad checksum? */ - printk(KERN_WARNING "hp100_probe: bad ID checksum at base port 0x%x\n", ioaddr); + printk(KERN_WARNING "%s: bad ID checksum at base port 0x%x\n", + __func__, ioaddr); goto out2; } @@ -498,7 +499,9 @@ static int hp100_probe1(struct net_device *dev, int ioaddr, u_char bus, for (i = uc = 0; i < 7; i++) uc += hp100_inb(LAN_ADDR + i); if (uc != 0xff) { - printk(KERN_WARNING "hp100_probe: bad lan address checksum at port 0x%x)\n", ioaddr); + printk(KERN_WARNING + "%s: bad lan address checksum at port 0x%x)\n", + __func__, ioaddr); err = -EIO; goto out2; } diff --git a/drivers/net/ethernet/intel/e1000/e1000_main.c b/drivers/net/ethernet/intel/e1000/e1000_main.c index 24f3986cfae2..862d1989ae1c 100644 --- a/drivers/net/ethernet/intel/e1000/e1000_main.c +++ b/drivers/net/ethernet/intel/e1000/e1000_main.c @@ -3136,12 +3136,8 @@ static netdev_tx_t e1000_xmit_frame(struct sk_buff *skb, * packets may get corrupted during padding by HW. * To WA this issue, pad all small packets manually. */ - if (skb->len < ETH_ZLEN) { - if (skb_pad(skb, ETH_ZLEN - skb->len)) - return NETDEV_TX_OK; - skb->len = ETH_ZLEN; - skb_set_tail_pointer(skb, ETH_ZLEN); - } + if (eth_skb_pad(skb)) + return NETDEV_TX_OK; mss = skb_shinfo(skb)->gso_size; /* The controller does a simple calculation to diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c index 370cfa275ddb..88936aa0029d 100644 --- a/drivers/net/ethernet/intel/e1000e/netdev.c +++ b/drivers/net/ethernet/intel/e1000e/netdev.c @@ -5554,12 +5554,8 @@ static netdev_tx_t e1000_xmit_frame(struct sk_buff *skb, /* The minimum packet size with TCTL.PSP set is 17 bytes so * pad skb in order to meet this minimum size requirement */ - if (unlikely(skb->len < 17)) { - if (skb_pad(skb, 17 - skb->len)) - return NETDEV_TX_OK; - skb->len = 17; - skb_set_tail_pointer(skb, 17); - } + if (skb_put_padto(skb, 17)) + return NETDEV_TX_OK; mss = skb_shinfo(skb)->gso_size; if (mss) { diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_ethtool.c b/drivers/net/ethernet/intel/fm10k/fm10k_ethtool.c index 2d04464e6aa3..651f53bc7376 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k_ethtool.c +++ b/drivers/net/ethernet/intel/fm10k/fm10k_ethtool.c @@ -916,11 +916,15 @@ static u32 fm10k_get_rssrk_size(struct net_device *netdev) return FM10K_RSSRK_SIZE * FM10K_RSSRK_ENTRIES_PER_REG; } -static int fm10k_get_rssh(struct net_device *netdev, u32 *indir, u8 *key) +static int fm10k_get_rssh(struct net_device *netdev, u32 *indir, u8 *key, + u8 *hfunc) { struct fm10k_intfc *interface = netdev_priv(netdev); int i, err; + if (hfunc) + *hfunc = ETH_RSS_HASH_TOP; + err = fm10k_get_reta(netdev, indir); if (err || !key) return err; @@ -932,12 +936,16 @@ static int fm10k_get_rssh(struct net_device *netdev, u32 *indir, u8 *key) } static int fm10k_set_rssh(struct net_device *netdev, const u32 *indir, - const u8 *key) + const u8 *key, const u8 hfunc) { struct fm10k_intfc *interface = netdev_priv(netdev); struct fm10k_hw *hw = &interface->hw; int i, err; + /* We do not allow change in unsupported parameters */ + if (hfunc != ETH_RSS_HASH_NO_CHANGE && hfunc != ETH_RSS_HASH_TOP) + return -EOPNOTSUPP; + err = fm10k_set_reta(netdev, indir); if (err || !key) return err; diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_main.c b/drivers/net/ethernet/intel/fm10k/fm10k_main.c index 73457ede53ec..91516aed373e 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k_main.c +++ b/drivers/net/ethernet/intel/fm10k/fm10k_main.c @@ -578,14 +578,9 @@ static bool fm10k_cleanup_headers(struct fm10k_ring *rx_ring, if (skb_is_nonlinear(skb)) fm10k_pull_tail(rx_ring, rx_desc, skb); - /* if skb_pad returns an error the skb was freed */ - if (unlikely(skb->len < 60)) { - int pad_len = 60 - skb->len; - - if (skb_pad(skb, pad_len)) - return true; - __skb_put(skb, pad_len); - } + /* if eth_skb_pad returns an error the skb was freed */ + if (eth_skb_pad(skb)) + return true; return false; } diff --git a/drivers/net/ethernet/intel/i40e/i40e.h b/drivers/net/ethernet/intel/i40e/i40e.h index 464342a35214..fc50f6461b13 100644 --- a/drivers/net/ethernet/intel/i40e/i40e.h +++ b/drivers/net/ethernet/intel/i40e/i40e.h @@ -87,7 +87,7 @@ #define I40E_MINIMUM_FCOE 1 /* minimum number of QPs for FCoE */ #endif /* I40E_FCOE */ #define I40E_MAX_AQ_BUF_SIZE 4096 -#define I40E_AQ_LEN 32 +#define I40E_AQ_LEN 128 #define I40E_AQ_WORK_LIMIT 16 #define I40E_MAX_USER_PRIORITY 8 #define I40E_DEFAULT_MSG_ENABLE 4 diff --git a/drivers/net/ethernet/intel/i40e/i40e_adminq.c b/drivers/net/ethernet/intel/i40e/i40e_adminq.c index 5bb4914bda56..35fa09a2c162 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_adminq.c +++ b/drivers/net/ethernet/intel/i40e/i40e_adminq.c @@ -51,7 +51,7 @@ static inline bool i40e_is_nvm_update_op(struct i40e_aq_desc *desc) static void i40e_adminq_init_regs(struct i40e_hw *hw) { /* set head and tail registers in our local struct */ - if (hw->mac.type == I40E_MAC_VF) { + if (i40e_is_vf(hw)) { hw->aq.asq.tail = I40E_VF_ATQT1; hw->aq.asq.head = I40E_VF_ATQH1; hw->aq.asq.len = I40E_VF_ATQLEN1; @@ -956,9 +956,6 @@ i40e_status i40e_clean_arq_element(struct i40e_hw *hw, ntu = (rd32(hw, hw->aq.arq.head) & I40E_PF_ARQH_ARQH_MASK); if (ntu == ntc) { /* nothing to do - shouldn't need to update ring's values */ - i40e_debug(hw, - I40E_DEBUG_AQ_MESSAGE, - "AQRX: Queue is empty.\n"); ret_code = I40E_ERR_ADMIN_QUEUE_NO_WORK; goto clean_arq_element_out; } diff --git a/drivers/net/ethernet/intel/i40e/i40e_common.c b/drivers/net/ethernet/intel/i40e/i40e_common.c index b601b3cfd92b..3d741ee99a2c 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_common.c +++ b/drivers/net/ethernet/intel/i40e/i40e_common.c @@ -550,7 +550,7 @@ struct i40e_rx_ptype_decoded i40e_ptype_lookup[] = { i40e_status i40e_init_shared_code(struct i40e_hw *hw) { i40e_status status = 0; - u32 reg; + u32 port, ari, func_rid; i40e_set_mac_type(hw); @@ -563,18 +563,17 @@ i40e_status i40e_init_shared_code(struct i40e_hw *hw) hw->phy.get_link_info = true; - /* Determine port number */ - reg = rd32(hw, I40E_PFGEN_PORTNUM); - reg = ((reg & I40E_PFGEN_PORTNUM_PORT_NUM_MASK) >> - I40E_PFGEN_PORTNUM_PORT_NUM_SHIFT); - hw->port = (u8)reg; - - /* Determine the PF number based on the PCI fn */ - reg = rd32(hw, I40E_GLPCI_CAPSUP); - if (reg & I40E_GLPCI_CAPSUP_ARI_EN_MASK) - hw->pf_id = (u8)((hw->bus.device << 3) | hw->bus.func); + /* Determine port number and PF number*/ + port = (rd32(hw, I40E_PFGEN_PORTNUM) & I40E_PFGEN_PORTNUM_PORT_NUM_MASK) + >> I40E_PFGEN_PORTNUM_PORT_NUM_SHIFT; + hw->port = (u8)port; + ari = (rd32(hw, I40E_GLPCI_CAPSUP) & I40E_GLPCI_CAPSUP_ARI_EN_MASK) >> + I40E_GLPCI_CAPSUP_ARI_EN_SHIFT; + func_rid = rd32(hw, I40E_PF_FUNC_RID); + if (ari) + hw->pf_id = (u8)(func_rid & 0xff); else - hw->pf_id = (u8)hw->bus.func; + hw->pf_id = (u8)(func_rid & 0x7); status = i40e_init_nvm(hw); return status; @@ -791,7 +790,7 @@ static enum i40e_media_type i40e_get_media_type(struct i40e_hw *hw) } #define I40E_PF_RESET_WAIT_COUNT_A0 200 -#define I40E_PF_RESET_WAIT_COUNT 100 +#define I40E_PF_RESET_WAIT_COUNT 110 /** * i40e_pf_reset - Reset the PF * @hw: pointer to the hardware structure diff --git a/drivers/net/ethernet/intel/i40e/i40e_debugfs.c b/drivers/net/ethernet/intel/i40e/i40e_debugfs.c index 3a3c237b76d4..433a55886ad2 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_debugfs.c +++ b/drivers/net/ethernet/intel/i40e/i40e_debugfs.c @@ -773,7 +773,7 @@ static void i40e_dbg_dump_desc(int cnt, int vsi_seid, int ring_id, int desc_n, { struct i40e_tx_desc *txd; union i40e_rx_desc *rxd; - struct i40e_ring ring; + struct i40e_ring *ring; struct i40e_vsi *vsi; int i; @@ -792,29 +792,32 @@ static void i40e_dbg_dump_desc(int cnt, int vsi_seid, int ring_id, int desc_n, vsi_seid); return; } - if (is_rx_ring) - ring = *vsi->rx_rings[ring_id]; - else - ring = *vsi->tx_rings[ring_id]; + + ring = kmemdup(is_rx_ring + ? vsi->rx_rings[ring_id] : vsi->tx_rings[ring_id], + sizeof(*ring), GFP_KERNEL); + if (!ring) + return; + if (cnt == 2) { dev_info(&pf->pdev->dev, "vsi = %02i %s ring = %02i\n", vsi_seid, is_rx_ring ? "rx" : "tx", ring_id); - for (i = 0; i < ring.count; i++) { + for (i = 0; i < ring->count; i++) { if (!is_rx_ring) { - txd = I40E_TX_DESC(&ring, i); + txd = I40E_TX_DESC(ring, i); dev_info(&pf->pdev->dev, " d[%03i] = 0x%016llx 0x%016llx\n", i, txd->buffer_addr, txd->cmd_type_offset_bsz); } else if (sizeof(union i40e_rx_desc) == sizeof(union i40e_16byte_rx_desc)) { - rxd = I40E_RX_DESC(&ring, i); + rxd = I40E_RX_DESC(ring, i); dev_info(&pf->pdev->dev, " d[%03i] = 0x%016llx 0x%016llx\n", i, rxd->read.pkt_addr, rxd->read.hdr_addr); } else { - rxd = I40E_RX_DESC(&ring, i); + rxd = I40E_RX_DESC(ring, i); dev_info(&pf->pdev->dev, " d[%03i] = 0x%016llx 0x%016llx 0x%016llx 0x%016llx\n", i, rxd->read.pkt_addr, @@ -823,26 +826,26 @@ static void i40e_dbg_dump_desc(int cnt, int vsi_seid, int ring_id, int desc_n, } } } else if (cnt == 3) { - if (desc_n >= ring.count || desc_n < 0) { + if (desc_n >= ring->count || desc_n < 0) { dev_info(&pf->pdev->dev, "descriptor %d not found\n", desc_n); return; } if (!is_rx_ring) { - txd = I40E_TX_DESC(&ring, desc_n); + txd = I40E_TX_DESC(ring, desc_n); dev_info(&pf->pdev->dev, "vsi = %02i tx ring = %02i d[%03i] = 0x%016llx 0x%016llx\n", vsi_seid, ring_id, desc_n, txd->buffer_addr, txd->cmd_type_offset_bsz); } else if (sizeof(union i40e_rx_desc) == sizeof(union i40e_16byte_rx_desc)) { - rxd = I40E_RX_DESC(&ring, desc_n); + rxd = I40E_RX_DESC(ring, desc_n); dev_info(&pf->pdev->dev, "vsi = %02i rx ring = %02i d[%03i] = 0x%016llx 0x%016llx\n", vsi_seid, ring_id, desc_n, rxd->read.pkt_addr, rxd->read.hdr_addr); } else { - rxd = I40E_RX_DESC(&ring, desc_n); + rxd = I40E_RX_DESC(ring, desc_n); dev_info(&pf->pdev->dev, "vsi = %02i rx ring = %02i d[%03i] = 0x%016llx 0x%016llx 0x%016llx 0x%016llx\n", vsi_seid, ring_id, desc_n, @@ -852,6 +855,7 @@ static void i40e_dbg_dump_desc(int cnt, int vsi_seid, int ring_id, int desc_n, } else { dev_info(&pf->pdev->dev, "dump desc rx/tx <vsi_seid> <ring_id> [<desc_n>]\n"); } + kfree(ring); } /** @@ -1493,7 +1497,7 @@ static ssize_t i40e_dbg_command_write(struct file *filp, if (!desc) goto command_write_done; cnt = sscanf(&cmd_buf[11], - "%hx %hx %hx %hx %x %x %x %x %x %x", + "%hi %hi %hi %hi %i %i %i %i %i %i", &desc->flags, &desc->opcode, &desc->datalen, &desc->retval, &desc->cookie_high, &desc->cookie_low, @@ -1541,7 +1545,7 @@ static ssize_t i40e_dbg_command_write(struct file *filp, if (!desc) goto command_write_done; cnt = sscanf(&cmd_buf[20], - "%hx %hx %hx %hx %x %x %x %x %x %x %hd", + "%hi %hi %hi %hi %i %i %i %i %i %i %hi", &desc->flags, &desc->opcode, &desc->datalen, &desc->retval, &desc->cookie_high, &desc->cookie_low, diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c index b2402851a9bd..fcd815dc7d3c 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c +++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c @@ -40,8 +40,9 @@ struct i40e_stats { .sizeof_stat = FIELD_SIZEOF(_type, _stat), \ .stat_offset = offsetof(_type, _stat) \ } + #define I40E_NETDEV_STAT(_net_stat) \ - I40E_STAT(struct net_device_stats, #_net_stat, _net_stat) + I40E_STAT(struct rtnl_link_stats64, #_net_stat, _net_stat) #define I40E_PF_STAT(_name, _stat) \ I40E_STAT(struct i40e_pf, _name, _stat) #define I40E_VSI_STAT(_name, _stat) \ @@ -1325,6 +1326,10 @@ static int i40e_get_ts_info(struct net_device *dev, { struct i40e_pf *pf = i40e_netdev_to_pf(dev); + /* only report HW timestamping if PTP is enabled */ + if (!(pf->flags & I40E_FLAG_PTP)) + return ethtool_op_get_ts_info(dev, info); + info->so_timestamping = SOF_TIMESTAMPING_TX_SOFTWARE | SOF_TIMESTAMPING_RX_SOFTWARE | SOF_TIMESTAMPING_SOFTWARE | diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 9ae4270db0b3..0a7ea4c5f9d3 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -38,8 +38,8 @@ static const char i40e_driver_string[] = #define DRV_KERN "-k" #define DRV_VERSION_MAJOR 1 -#define DRV_VERSION_MINOR 1 -#define DRV_VERSION_BUILD 23 +#define DRV_VERSION_MINOR 2 +#define DRV_VERSION_BUILD 2 #define DRV_VERSION __stringify(DRV_VERSION_MAJOR) "." \ __stringify(DRV_VERSION_MINOR) "." \ __stringify(DRV_VERSION_BUILD) DRV_KERN @@ -4870,9 +4870,11 @@ int i40e_vsi_open(struct i40e_vsi *vsi) goto err_set_queues; } else if (vsi->type == I40E_VSI_FDIR) { - snprintf(int_name, sizeof(int_name) - 1, "%s-fdir", - dev_driver_string(&pf->pdev->dev)); + snprintf(int_name, sizeof(int_name) - 1, "%s-%s-fdir", + dev_driver_string(&pf->pdev->dev), + dev_name(&pf->pdev->dev)); err = i40e_vsi_request_irq(vsi, int_name); + } else { err = -EINVAL; goto err_setup_rx; @@ -5450,8 +5452,6 @@ static void i40e_vsi_link_event(struct i40e_vsi *vsi, bool link_up) break; case I40E_VSI_SRIOV: - break; - case I40E_VSI_VMDQ2: case I40E_VSI_CTRL: case I40E_VSI_MIRROR: @@ -5888,6 +5888,9 @@ static int i40e_reconstitute_veb(struct i40e_veb *veb) if (ret) goto end_reconstitute; + /* Enable LB mode for the main VSI now that it is on a VEB */ + i40e_enable_pf_switch_lb(pf); + /* create the remaining VSIs attached to this VEB */ for (v = 0; v < pf->num_alloc_vsi; v++) { if (!pf->vsi[v] || pf->vsi[v] == ctl_vsi) @@ -7797,6 +7800,10 @@ static int i40e_add_vsi(struct i40e_vsi *vsi) ctxt.uplink_seid = vsi->uplink_seid; ctxt.connection_type = 0x1; /* regular data port */ ctxt.flags = I40E_AQ_VSI_TYPE_PF; + ctxt.info.valid_sections |= + cpu_to_le16(I40E_AQ_VSI_PROP_SWITCH_VALID); + ctxt.info.switch_id = + cpu_to_le16(I40E_AQ_VSI_SW_ID_FLAG_ALLOW_LB); i40e_vsi_setup_queue_map(vsi, &ctxt, enabled_tc, true); break; @@ -8182,7 +8189,15 @@ struct i40e_vsi *i40e_vsi_setup(struct i40e_pf *pf, u8 type, else if ((vsi->flags & I40E_VSI_FLAG_VEB_OWNER) == 0) veb = i40e_veb_setup(pf, 0, vsi->uplink_seid, vsi->seid, vsi->tc_config.enabled_tc); - + if (veb) { + if (vsi->seid != pf->vsi[pf->lan_vsi]->seid) { + dev_info(&vsi->back->pdev->dev, + "%s: New VSI creation error, uplink seid of LAN VSI expected.\n", + __func__); + return NULL; + } + i40e_enable_pf_switch_lb(pf); + } for (i = 0; i < I40E_MAX_VEB && !veb; i++) { if (pf->veb[i] && pf->veb[i]->seid == vsi->uplink_seid) veb = pf->veb[i]; @@ -9143,9 +9158,10 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent) hw->aq.arq_buf_size = I40E_MAX_AQ_BUF_SIZE; hw->aq.asq_buf_size = I40E_MAX_AQ_BUF_SIZE; pf->adminq_work_limit = I40E_AQ_WORK_LIMIT; + snprintf(pf->misc_int_name, sizeof(pf->misc_int_name) - 1, - "%s-pf%d:misc", - dev_driver_string(&pf->pdev->dev), pf->hw.pf_id); + "%s-%s:misc", + dev_driver_string(&pf->pdev->dev), dev_name(&pdev->dev)); err = i40e_init_shared_code(hw); if (err) { diff --git a/drivers/net/ethernet/intel/i40e/i40e_ptp.c b/drivers/net/ethernet/intel/i40e/i40e_ptp.c index f91510370c35..6d1ec926aa37 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_ptp.c +++ b/drivers/net/ethernet/intel/i40e/i40e_ptp.c @@ -424,6 +424,9 @@ int i40e_ptp_get_ts_config(struct i40e_pf *pf, struct ifreq *ifr) { struct hwtstamp_config *config = &pf->tstamp_config; + if (!(pf->flags & I40E_FLAG_PTP)) + return -EOPNOTSUPP; + return copy_to_user(ifr->ifr_data, config, sizeof(*config)) ? -EFAULT : 0; } @@ -444,22 +447,12 @@ static int i40e_ptp_set_timestamp_mode(struct i40e_pf *pf, struct hwtstamp_config *config) { struct i40e_hw *hw = &pf->hw; - u32 pf_id, tsyntype, regval; + u32 tsyntype, regval; /* Reserved for future extensions. */ if (config->flags) return -EINVAL; - /* Confirm that 1588 is supported on this PF. */ - pf_id = (rd32(hw, I40E_PRTTSYN_CTL0) & I40E_PRTTSYN_CTL0_PF_ID_MASK) >> - I40E_PRTTSYN_CTL0_PF_ID_SHIFT; - if (hw->pf_id != pf_id) { - dev_err(&pf->pdev->dev, - "PF %d attempted to control timestamp mode on port %d, which is owned by PF %d\n", - hw->pf_id, hw->port, pf_id); - return -EPERM; - } - switch (config->tx_type) { case HWTSTAMP_TX_OFF: pf->ptp_tx = false; @@ -562,6 +555,9 @@ int i40e_ptp_set_ts_config(struct i40e_pf *pf, struct ifreq *ifr) struct hwtstamp_config config; int err; + if (!(pf->flags & I40E_FLAG_PTP)) + return -EOPNOTSUPP; + if (copy_from_user(&config, ifr->ifr_data, sizeof(config))) return -EFAULT; @@ -631,8 +627,22 @@ void i40e_ptp_init(struct i40e_pf *pf) { struct net_device *netdev = pf->vsi[pf->lan_vsi]->netdev; struct i40e_hw *hw = &pf->hw; + u32 pf_id; long err; + /* Only one PF is assigned to control 1588 logic per port. Do not + * enable any support for PFs not assigned via PRTTSYN_CTL0.PF_ID + */ + pf_id = (rd32(hw, I40E_PRTTSYN_CTL0) & I40E_PRTTSYN_CTL0_PF_ID_MASK) >> + I40E_PRTTSYN_CTL0_PF_ID_SHIFT; + if (hw->pf_id != pf_id) { + pf->flags &= ~I40E_FLAG_PTP; + dev_info(&pf->pdev->dev, "%s: PTP not supported on %s\n", + __func__, + netdev->name); + return; + } + /* we have to initialize the lock first, since we can't control * when the user will enter the PHC device entry points */ diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c index 3195d82e4942..04b441460bbd 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c @@ -2399,12 +2399,8 @@ netdev_tx_t i40e_lan_xmit_frame(struct sk_buff *skb, struct net_device *netdev) /* hardware can't handle really short frames, hardware padding works * beyond this point */ - if (unlikely(skb->len < I40E_MIN_TX_LEN)) { - if (skb_pad(skb, I40E_MIN_TX_LEN - skb->len)) - return NETDEV_TX_OK; - skb->len = I40E_MIN_TX_LEN; - skb_set_tail_pointer(skb, I40E_MIN_TX_LEN); - } + if (skb_put_padto(skb, I40E_MIN_TX_LEN)) + return NETDEV_TX_OK; return i40e_xmit_frame_ring(skb, tx_ring); } diff --git a/drivers/net/ethernet/intel/i40e/i40e_type.h b/drivers/net/ethernet/intel/i40e/i40e_type.h index c85214373a51..3904dd8ea1f1 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_type.h +++ b/drivers/net/ethernet/intel/i40e/i40e_type.h @@ -482,6 +482,8 @@ struct i40e_hw { u32 debug_mask; }; +#define i40e_is_vf(_hw) ((_hw)->mac.type == I40E_MAC_VF) + struct i40e_driver_version { u8 major_version; u8 minor_version; diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl.h b/drivers/net/ethernet/intel/i40e/i40e_virtchnl.h index 70951d2edcad..61dd1b187624 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl.h +++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl.h @@ -79,6 +79,7 @@ enum i40e_virtchnl_ops { I40E_VIRTCHNL_OP_CONFIG_PROMISCUOUS_MODE, I40E_VIRTCHNL_OP_GET_STATS, I40E_VIRTCHNL_OP_FCOE, + I40E_VIRTCHNL_OP_CONFIG_RSS, /* PF sends status change events to vfs using * the following op. */ diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c index 668d860275d6..5bae89550657 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c +++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c @@ -707,7 +707,6 @@ complete_reset: wr32(hw, I40E_VFGEN_RSTAT1(vf->vf_id), I40E_VFR_VFACTIVE); i40e_flush(hw); } -#ifdef CONFIG_PCI_IOV /** * i40e_enable_pf_switch_lb @@ -715,7 +714,7 @@ complete_reset: * * enable switch loop back or die - no point in a return value **/ -static void i40e_enable_pf_switch_lb(struct i40e_pf *pf) +void i40e_enable_pf_switch_lb(struct i40e_pf *pf) { struct i40e_vsi *vsi = pf->vsi[pf->lan_vsi]; struct i40e_vsi_context ctxt; @@ -742,7 +741,6 @@ static void i40e_enable_pf_switch_lb(struct i40e_pf *pf) __func__, vsi->back->hw.aq.asq_last_status); } } -#endif /** * i40e_disable_pf_switch_lb diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h index 0adc61e1052d..9452f5247cff 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h +++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h @@ -126,5 +126,6 @@ int i40e_ndo_set_vf_spoofchk(struct net_device *netdev, int vf_id, bool enable); void i40e_vc_notify_link_state(struct i40e_pf *pf); void i40e_vc_notify_reset(struct i40e_pf *pf); +void i40e_enable_pf_switch_lb(struct i40e_pf *pf); #endif /* _I40E_VIRTCHNL_PF_H_ */ diff --git a/drivers/net/ethernet/intel/i40evf/i40e_adminq.c b/drivers/net/ethernet/intel/i40evf/i40e_adminq.c index d7e446f3e7a4..16989946c52a 100644 --- a/drivers/net/ethernet/intel/i40evf/i40e_adminq.c +++ b/drivers/net/ethernet/intel/i40evf/i40e_adminq.c @@ -49,7 +49,7 @@ static inline bool i40e_is_nvm_update_op(struct i40e_aq_desc *desc) static void i40e_adminq_init_regs(struct i40e_hw *hw) { /* set head and tail registers in our local struct */ - if (hw->mac.type == I40E_MAC_VF) { + if (i40e_is_vf(hw)) { hw->aq.asq.tail = I40E_VF_ATQT1; hw->aq.asq.head = I40E_VF_ATQH1; hw->aq.asq.len = I40E_VF_ATQLEN1; @@ -905,9 +905,6 @@ i40e_status i40evf_clean_arq_element(struct i40e_hw *hw, ntu = (rd32(hw, hw->aq.arq.head) & I40E_PF_ARQH_ARQH_MASK); if (ntu == ntc) { /* nothing to do - shouldn't need to update ring's values */ - i40e_debug(hw, - I40E_DEBUG_AQ_MESSAGE, - "AQRX: Queue is empty.\n"); ret_code = I40E_ERR_ADMIN_QUEUE_NO_WORK; goto clean_arq_element_out; } diff --git a/drivers/net/ethernet/intel/i40evf/i40e_type.h b/drivers/net/ethernet/intel/i40evf/i40e_type.h index 8fe34fc5c469..77abe17217f9 100644 --- a/drivers/net/ethernet/intel/i40evf/i40e_type.h +++ b/drivers/net/ethernet/intel/i40evf/i40e_type.h @@ -476,6 +476,8 @@ struct i40e_hw { u32 debug_mask; }; +#define i40e_is_vf(_hw) ((_hw)->mac.type == I40E_MAC_VF) + struct i40e_driver_version { u8 major_version; u8 minor_version; diff --git a/drivers/net/ethernet/intel/i40evf/i40e_virtchnl.h b/drivers/net/ethernet/intel/i40evf/i40e_virtchnl.h index cd18d5689006..e0c8208138f4 100644 --- a/drivers/net/ethernet/intel/i40evf/i40e_virtchnl.h +++ b/drivers/net/ethernet/intel/i40evf/i40e_virtchnl.h @@ -79,6 +79,7 @@ enum i40e_virtchnl_ops { I40E_VIRTCHNL_OP_CONFIG_PROMISCUOUS_MODE, I40E_VIRTCHNL_OP_GET_STATS, I40E_VIRTCHNL_OP_FCOE, + I40E_VIRTCHNL_OP_CONFIG_RSS, /* PF sends status change events to vfs using * the following op. */ diff --git a/drivers/net/ethernet/intel/i40evf/i40evf_ethtool.c b/drivers/net/ethernet/intel/i40evf/i40evf_ethtool.c index 69a269b23be6..69b97bac182c 100644 --- a/drivers/net/ethernet/intel/i40evf/i40evf_ethtool.c +++ b/drivers/net/ethernet/intel/i40evf/i40evf_ethtool.c @@ -627,13 +627,19 @@ static u32 i40evf_get_rxfh_indir_size(struct net_device *netdev) * * Reads the indirection table directly from the hardware. Always returns 0. **/ -static int i40evf_get_rxfh(struct net_device *netdev, u32 *indir, u8 *key) +static int i40evf_get_rxfh(struct net_device *netdev, u32 *indir, u8 *key, + u8 *hfunc) { struct i40evf_adapter *adapter = netdev_priv(netdev); struct i40e_hw *hw = &adapter->hw; u32 hlut_val; int i, j; + if (hfunc) + *hfunc = ETH_RSS_HASH_TOP; + if (!indir) + return 0; + for (i = 0, j = 0; i <= I40E_VFQF_HLUT_MAX_INDEX; i++) { hlut_val = rd32(hw, I40E_VFQF_HLUT(i)); indir[j++] = hlut_val & 0xff; @@ -654,13 +660,20 @@ static int i40evf_get_rxfh(struct net_device *netdev, u32 *indir, u8 *key) * returns 0 after programming the table. **/ static int i40evf_set_rxfh(struct net_device *netdev, const u32 *indir, - const u8 *key) + const u8 *key, const u8 hfunc) { struct i40evf_adapter *adapter = netdev_priv(netdev); struct i40e_hw *hw = &adapter->hw; u32 hlut_val; int i, j; + /* We do not allow change in unsupported parameters */ + if (key || + (hfunc != ETH_RSS_HASH_NO_CHANGE && hfunc != ETH_RSS_HASH_TOP)) + return -EOPNOTSUPP; + if (!indir) + return 0; + for (i = 0, j = 0; i <= I40E_VFQF_HLUT_MAX_INDEX; i++) { hlut_val = indir[j++]; hlut_val |= indir[j++] << 8; diff --git a/drivers/net/ethernet/intel/i40evf/i40evf_main.c b/drivers/net/ethernet/intel/i40evf/i40evf_main.c index 8e01009695da..cabaf599f562 100644 --- a/drivers/net/ethernet/intel/i40evf/i40evf_main.c +++ b/drivers/net/ethernet/intel/i40evf/i40evf_main.c @@ -36,7 +36,7 @@ char i40evf_driver_name[] = "i40evf"; static const char i40evf_driver_string[] = "Intel(R) XL710/X710 Virtual Function Network Driver"; -#define DRV_VERSION "1.0.5" +#define DRV_VERSION "1.0.6" const char i40evf_driver_version[] = DRV_VERSION; static const char i40evf_copyright[] = "Copyright (c) 2013 - 2014 Intel Corporation."; @@ -2045,6 +2045,8 @@ static void i40evf_init_task(struct work_struct *work) case __I40EVF_INIT_VERSION_CHECK: if (!i40evf_asq_done(hw)) { dev_err(&pdev->dev, "Admin queue command never completed\n"); + i40evf_shutdown_adminq(hw); + adapter->state = __I40EVF_STARTUP; goto err; } @@ -2078,8 +2080,11 @@ static void i40evf_init_task(struct work_struct *work) goto err; } err = i40evf_get_vf_config(adapter); - if (err == I40E_ERR_ADMIN_QUEUE_NO_WORK) - goto restart; + if (err == I40E_ERR_ADMIN_QUEUE_NO_WORK) { + dev_info(&pdev->dev, "Resending VF config request\n"); + err = i40evf_send_vf_config_msg(adapter); + goto err; + } if (err) { dev_err(&pdev->dev, "Unable to get VF config (%d)\n", err); diff --git a/drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c b/drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c index 07c13b039181..5fde5a7f4591 100644 --- a/drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c +++ b/drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c @@ -715,15 +715,9 @@ void i40evf_virtchnl_completion(struct i40evf_adapter *adapter, } return; } - if (v_opcode != adapter->current_op) { - dev_err(&adapter->pdev->dev, "%s: Pending op is %d, received %d\n", - __func__, adapter->current_op, v_opcode); - /* We're probably completely screwed at this point, but clear - * the current op and try to carry on.... - */ - adapter->current_op = I40E_VIRTCHNL_OP_UNKNOWN; - return; - } + if (v_opcode != adapter->current_op) + dev_info(&adapter->pdev->dev, "Pending op is %d, received %d\n", + adapter->current_op, v_opcode); if (v_retval) { dev_err(&adapter->pdev->dev, "%s: PF returned error %d to our request %d\n", __func__, v_retval, v_opcode); @@ -775,8 +769,8 @@ void i40evf_virtchnl_completion(struct i40evf_adapter *adapter, adapter->aq_pending &= ~(I40EVF_FLAG_AQ_MAP_VECTORS); break; default: - dev_warn(&adapter->pdev->dev, "%s: Received unexpected message %d from PF\n", - __func__, v_opcode); + dev_info(&adapter->pdev->dev, "Received unexpected message %d from PF\n", + v_opcode); break; } /* switch v_opcode */ adapter->current_op = I40E_VIRTCHNL_OP_UNKNOWN; diff --git a/drivers/net/ethernet/intel/igb/igb_ethtool.c b/drivers/net/ethernet/intel/igb/igb_ethtool.c index 02cfd3b14762..d5673eb90c54 100644 --- a/drivers/net/ethernet/intel/igb/igb_ethtool.c +++ b/drivers/net/ethernet/intel/igb/igb_ethtool.c @@ -2842,11 +2842,16 @@ static u32 igb_get_rxfh_indir_size(struct net_device *netdev) return IGB_RETA_SIZE; } -static int igb_get_rxfh(struct net_device *netdev, u32 *indir, u8 *key) +static int igb_get_rxfh(struct net_device *netdev, u32 *indir, u8 *key, + u8 *hfunc) { struct igb_adapter *adapter = netdev_priv(netdev); int i; + if (hfunc) + *hfunc = ETH_RSS_HASH_TOP; + if (!indir) + return 0; for (i = 0; i < IGB_RETA_SIZE; i++) indir[i] = adapter->rss_indir_tbl[i]; @@ -2889,13 +2894,20 @@ void igb_write_rss_indir_tbl(struct igb_adapter *adapter) } static int igb_set_rxfh(struct net_device *netdev, const u32 *indir, - const u8 *key) + const u8 *key, const u8 hfunc) { struct igb_adapter *adapter = netdev_priv(netdev); struct e1000_hw *hw = &adapter->hw; int i; u32 num_queues; + /* We do not allow change in unsupported parameters */ + if (key || + (hfunc != ETH_RSS_HASH_NO_CHANGE && hfunc != ETH_RSS_HASH_TOP)) + return -EOPNOTSUPP; + if (!indir) + return 0; + num_queues = adapter->rss_queues; switch (hw->mac.type) { diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c index 3c0221620c9d..f04ad13f7159 100644 --- a/drivers/net/ethernet/intel/igb/igb_main.c +++ b/drivers/net/ethernet/intel/igb/igb_main.c @@ -5091,12 +5091,8 @@ static netdev_tx_t igb_xmit_frame(struct sk_buff *skb, /* The minimum packet size with TCTL.PSP set is 17 so pad the skb * in order to meet this minimum size requirement. */ - if (unlikely(skb->len < 17)) { - if (skb_pad(skb, 17 - skb->len)) - return NETDEV_TX_OK; - skb->len = 17; - skb_set_tail_pointer(skb, 17); - } + if (skb_put_padto(skb, 17)) + return NETDEV_TX_OK; return igb_xmit_frame_ring(skb, igb_tx_queue_mapping(adapter, skb)); } @@ -6850,14 +6846,9 @@ static bool igb_cleanup_headers(struct igb_ring *rx_ring, if (skb_is_nonlinear(skb)) igb_pull_tail(rx_ring, rx_desc, skb); - /* if skb_pad returns an error the skb was freed */ - if (unlikely(skb->len < 60)) { - int pad_len = 60 - skb->len; - - if (skb_pad(skb, pad_len)) - return true; - __skb_put(skb, pad_len); - } + /* if eth_skb_pad returns an error the skb was freed */ + if (eth_skb_pad(skb)) + return true; return false; } diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index 82d418729dd4..fbd52924ee34 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -1774,14 +1774,9 @@ static bool ixgbe_cleanup_headers(struct ixgbe_ring *rx_ring, return false; #endif - /* if skb_pad returns an error the skb was freed */ - if (unlikely(skb->len < 60)) { - int pad_len = 60 - skb->len; - - if (skb_pad(skb, pad_len)) - return true; - __skb_put(skb, pad_len); - } + /* if eth_skb_pad returns an error the skb was freed */ + if (eth_skb_pad(skb)) + return true; return false; } @@ -7334,12 +7329,8 @@ static netdev_tx_t __ixgbe_xmit_frame(struct sk_buff *skb, * The minimum packet size for olinfo paylen is 17 so pad the skb * in order to meet this minimum size requirement. */ - if (unlikely(skb->len < 17)) { - if (skb_pad(skb, 17 - skb->len)) - return NETDEV_TX_OK; - skb->len = 17; - skb_set_tail_pointer(skb, 17); - } + if (skb_put_padto(skb, 17)) + return NETDEV_TX_OK; tx_ring = ring ? ring : adapter->tx_ring[skb->queue_mapping]; diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c index 3b0ddf757fb6..62a0d8e0f17d 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c +++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c @@ -616,14 +616,9 @@ static bool ixgbevf_cleanup_headers(struct ixgbevf_ring *rx_ring, if (skb_is_nonlinear(skb)) ixgbevf_pull_tail(rx_ring, skb); - /* if skb_pad returns an error the skb was freed */ - if (unlikely(skb->len < 60)) { - int pad_len = 60 - skb->len; - - if (skb_pad(skb, pad_len)) - return true; - __skb_put(skb, pad_len); - } + /* if eth_skb_pad returns an error the skb was freed */ + if (eth_skb_pad(skb)) + return true; return false; } diff --git a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c index c45e06abc073..90e0f045a6bc 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c @@ -978,7 +978,29 @@ static u32 mlx4_en_get_rxfh_key_size(struct net_device *netdev) return MLX4_EN_RSS_KEY_SIZE; } -static int mlx4_en_get_rxfh(struct net_device *dev, u32 *ring_index, u8 *key) +static int mlx4_en_check_rxfh_func(struct net_device *dev, u8 hfunc) +{ + struct mlx4_en_priv *priv = netdev_priv(dev); + + /* check if requested function is supported by the device */ + if ((hfunc == ETH_RSS_HASH_TOP && + !(priv->mdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_RSS_TOP)) || + (hfunc == ETH_RSS_HASH_XOR && + !(priv->mdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_RSS_XOR))) + return -EINVAL; + + priv->rss_hash_fn = hfunc; + if (hfunc == ETH_RSS_HASH_TOP && !(dev->features & NETIF_F_RXHASH)) + en_warn(priv, + "Toeplitz hash function should be used in conjunction with RX hashing for optimal performance\n"); + if (hfunc == ETH_RSS_HASH_XOR && (dev->features & NETIF_F_RXHASH)) + en_warn(priv, + "Enabling both XOR Hash function and RX Hashing can limit RPS functionality\n"); + return 0; +} + +static int mlx4_en_get_rxfh(struct net_device *dev, u32 *ring_index, u8 *key, + u8 *hfunc) { struct mlx4_en_priv *priv = netdev_priv(dev); struct mlx4_en_rss_map *rss_map = &priv->rss_map; @@ -990,16 +1012,20 @@ static int mlx4_en_get_rxfh(struct net_device *dev, u32 *ring_index, u8 *key) rss_rings = 1 << ilog2(rss_rings); while (n--) { + if (!ring_index) + break; ring_index[n] = rss_map->qps[n % rss_rings].qpn - rss_map->base_qpn; } if (key) memcpy(key, priv->rss_key, MLX4_EN_RSS_KEY_SIZE); + if (hfunc) + *hfunc = priv->rss_hash_fn; return err; } static int mlx4_en_set_rxfh(struct net_device *dev, const u32 *ring_index, - const u8 *key) + const u8 *key, const u8 hfunc) { struct mlx4_en_priv *priv = netdev_priv(dev); struct mlx4_en_dev *mdev = priv->mdev; @@ -1028,6 +1054,12 @@ static int mlx4_en_set_rxfh(struct net_device *dev, const u32 *ring_index, if (!is_power_of_2(rss_rings)) return -EINVAL; + if (hfunc != ETH_RSS_HASH_NO_CHANGE) { + err = mlx4_en_check_rxfh_func(dev, hfunc); + if (err) + return err; + } + mutex_lock(&mdev->state_lock); if (priv->port_up) { port_up = 1; @@ -1038,6 +1070,7 @@ static int mlx4_en_set_rxfh(struct net_device *dev, const u32 *ring_index, priv->prof->rss_rings = rss_rings; if (key) memcpy(priv->rss_key, key, MLX4_EN_RSS_KEY_SIZE); + if (port_up) { err = mlx4_en_start_port(dev); if (err) diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c index 1597fb07576c..dccf0e1f86be 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c @@ -2608,6 +2608,17 @@ int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port, if (mdev->dev->caps.steering_mode != MLX4_STEERING_MODE_A0) dev->priv_flags |= IFF_UNICAST_FLT; + /* Setting a default hash function value */ + if (mdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_RSS_TOP) { + priv->rss_hash_fn = ETH_RSS_HASH_TOP; + } else if (mdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_RSS_XOR) { + priv->rss_hash_fn = ETH_RSS_HASH_XOR; + } else { + en_warn(priv, + "No RSS hash capabilities exposed, using Toeplitz\n"); + priv->rss_hash_fn = ETH_RSS_HASH_TOP; + } + mdev->pndev[port] = dev; netif_carrier_off(dev); diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c index 946d35280abc..4ca396e3168f 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c @@ -1223,7 +1223,19 @@ int mlx4_en_config_rss_steer(struct mlx4_en_priv *priv) rss_context->flags = rss_mask; rss_context->hash_fn = MLX4_RSS_HASH_TOP; - memcpy(rss_context->rss_key, priv->rss_key, MLX4_EN_RSS_KEY_SIZE); + if (priv->rss_hash_fn == ETH_RSS_HASH_XOR) { + rss_context->hash_fn = MLX4_RSS_HASH_XOR; + } else if (priv->rss_hash_fn == ETH_RSS_HASH_TOP) { + rss_context->hash_fn = MLX4_RSS_HASH_TOP; + memcpy(rss_context->rss_key, priv->rss_key, + MLX4_EN_RSS_KEY_SIZE); + netdev_rss_key_fill(rss_context->rss_key, + MLX4_EN_RSS_KEY_SIZE); + } else { + en_err(priv, "Unknown RSS hash function requested\n"); + err = -EINVAL; + goto indir_err; + } err = mlx4_qp_to_ready(mdev->dev, &priv->res.mtt, &context, &rss_map->indir_qp, &rss_map->indir_state); if (err) diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h index aaa7efbb9664..ac48a8d91501 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h @@ -376,7 +376,6 @@ struct mlx4_en_port_profile { }; struct mlx4_en_profile { - int rss_xor; int udp_rss; u8 rss_mask; u32 active_ports; @@ -619,6 +618,7 @@ struct mlx4_en_priv { u32 pflags; u8 rss_key[MLX4_EN_RSS_KEY_SIZE]; + u8 rss_hash_fn; }; enum mlx4_en_wol { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c index 368c6c5ea014..a2853057c779 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c @@ -1363,7 +1363,7 @@ int mlx5_cmd_init(struct mlx5_core_dev *dev) goto err_map; } - if (cmd->log_sz + cmd->log_stride > PAGE_SHIFT) { + if (cmd->log_sz + cmd->log_stride > MLX5_ADAPTER_PAGE_SHIFT) { dev_err(&dev->pdev->dev, "command queue size overflow\n"); err = -EINVAL; goto err_map; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c index dfd3ad0a39c1..ab684463780b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c @@ -225,8 +225,8 @@ static int mlx5_eq_int(struct mlx5_core_dev *dev, struct mlx5_eq *eq) case MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR: case MLX5_EVENT_TYPE_WQ_ACCESS_ERROR: rsn = be32_to_cpu(eqe->data.qp_srq.qp_srq_n) & 0xffffff; - mlx5_core_dbg(dev, "event %s(%d) arrived\n", - eqe_type_str(eqe->type), eqe->type); + mlx5_core_dbg(dev, "event %s(%d) arrived on resource 0x%x\n", + eqe_type_str(eqe->type), eqe->type, rsn); mlx5_rsc_event(dev, rsn, eqe->type); break; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index 71b10b210792..3f4525619a07 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -43,6 +43,7 @@ #include <linux/mlx5/qp.h> #include <linux/mlx5/srq.h> #include <linux/debugfs.h> +#include <linux/kmod.h> #include <linux/mlx5/mlx5_ifc.h> #include "mlx5_core.h" @@ -225,7 +226,7 @@ static int mlx5_enable_msix(struct mlx5_core_dev *dev) table->msix_arr[i].entry = i; nvec = pci_enable_msix_range(dev->pdev, table->msix_arr, - MLX5_EQ_VEC_COMP_BASE, nvec); + MLX5_EQ_VEC_COMP_BASE + 1, nvec); if (nvec < 0) return nvec; @@ -840,6 +841,8 @@ struct mlx5_core_event_handler { void *data); }; +#define MLX5_IB_MOD "mlx5_ib" + static int init_one(struct pci_dev *pdev, const struct pci_device_id *id) { @@ -878,6 +881,10 @@ static int init_one(struct pci_dev *pdev, goto out_init; } + err = request_module_nowait(MLX5_IB_MOD); + if (err) + pr_info("failed request module on %s\n", MLX5_IB_MOD); + return 0; out_init: @@ -896,8 +903,12 @@ static void remove_one(struct pci_dev *pdev) } static const struct pci_device_id mlx5_core_pci_table[] = { - { PCI_VDEVICE(MELLANOX, 4113) }, /* MT4113 Connect-IB */ + { PCI_VDEVICE(MELLANOX, 4113) }, /* Connect-IB */ + { PCI_VDEVICE(MELLANOX, 4114) }, /* Connect-IB VF */ { PCI_VDEVICE(MELLANOX, 4115) }, /* ConnectX-4 */ + { PCI_VDEVICE(MELLANOX, 4116) }, /* ConnectX-4 VF */ + { PCI_VDEVICE(MELLANOX, 4117) }, /* ConnectX-4LX */ + { PCI_VDEVICE(MELLANOX, 4118) }, /* ConnectX-4LX VF */ { 0, } }; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/uar.c b/drivers/net/ethernet/mellanox/mlx5/core/uar.c index 0a6348cefc01..06801d6f595e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/uar.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/uar.c @@ -96,6 +96,7 @@ int mlx5_cmd_free_uar(struct mlx5_core_dev *dev, u32 uarn) int err; memset(&in, 0, sizeof(in)); + memset(&out, 0, sizeof(out)); in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_DEALLOC_UAR); in.uarn = cpu_to_be32(uarn); err = mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out)); diff --git a/drivers/net/ethernet/myricom/myri10ge/myri10ge.c b/drivers/net/ethernet/myricom/myri10ge/myri10ge.c index 9e7e3f1dce3e..af099057f0e9 100644 --- a/drivers/net/ethernet/myricom/myri10ge/myri10ge.c +++ b/drivers/net/ethernet/myricom/myri10ge/myri10ge.c @@ -2913,16 +2913,11 @@ again: flags |= MXGEFW_FLAGS_SMALL; /* pad frames to at least ETH_ZLEN bytes */ - if (unlikely(skb->len < ETH_ZLEN)) { - if (skb_padto(skb, ETH_ZLEN)) { - /* The packet is gone, so we must - * return 0 */ - ss->stats.tx_dropped += 1; - return NETDEV_TX_OK; - } - /* adjust the len to account for the zero pad - * so that the nic can know how long it is */ - skb->len = ETH_ZLEN; + if (eth_skb_pad(skb)) { + /* The packet is gone, so we must + * return 0 */ + ss->stats.tx_dropped += 1; + return NETDEV_TX_OK; } } diff --git a/drivers/net/ethernet/realtek/r8169.c b/drivers/net/ethernet/realtek/r8169.c index cf154f74cba1..b9c2f33b463d 100644 --- a/drivers/net/ethernet/realtek/r8169.c +++ b/drivers/net/ethernet/realtek/r8169.c @@ -1377,6 +1377,16 @@ DECLARE_RTL_COND(rtl_ocp_tx_cond) return RTL_R8(IBISR0) & 0x02; } +static void rtl8168ep_stop_cmac(struct rtl8169_private *tp) +{ + void __iomem *ioaddr = tp->mmio_addr; + + RTL_W8(IBCR2, RTL_R8(IBCR2) & ~0x01); + rtl_msleep_loop_wait_low(tp, &rtl_ocp_tx_cond, 50, 2000); + RTL_W8(IBISR0, RTL_R8(IBISR0) | 0x20); + RTL_W8(IBCR0, RTL_R8(IBCR0) & ~0x01); +} + static void rtl8168dp_driver_start(struct rtl8169_private *tp) { rtl8168_oob_notify(tp, OOB_CMD_DRIVER_START); @@ -1417,12 +1427,7 @@ static void rtl8168dp_driver_stop(struct rtl8169_private *tp) static void rtl8168ep_driver_stop(struct rtl8169_private *tp) { - void __iomem *ioaddr = tp->mmio_addr; - - RTL_W8(IBCR2, RTL_R8(IBCR2) & ~0x01); - rtl_msleep_loop_wait_low(tp, &rtl_ocp_tx_cond, 50, 2000); - RTL_W8(IBISR0, RTL_R8(IBISR0) | 0x20); - RTL_W8(IBCR0, RTL_R8(IBCR0) & ~0x01); + rtl8168ep_stop_cmac(tp); ocp_write(tp, 0x01, 0x180, OOB_CMD_DRIVER_STOP); ocp_write(tp, 0x01, 0x30, ocp_read(tp, 0x01, 0x30) | 0x01); rtl_msleep_loop_wait_low(tp, &rtl_ep_ocp_read_cond, 10, 10); @@ -5934,7 +5939,6 @@ static void rtl_hw_start_8168g_1(struct rtl8169_private *tp) rtl_w0w1_eri(tp, 0xdc, ERIAR_MASK_0001, 0x01, 0x00, ERIAR_EXGMAC); rtl_eri_write(tp, 0x2f8, ERIAR_MASK_0011, 0x1d8f, ERIAR_EXGMAC); - RTL_W8(ChipCmd, CmdTxEnb | CmdRxEnb); RTL_W32(MISC, RTL_R32(MISC) & ~RXDV_GATED_EN); RTL_W8(MaxTxPacketSize, EarlySize); @@ -6027,7 +6031,6 @@ static void rtl_hw_start_8168h_1(struct rtl8169_private *tp) rtl_eri_write(tp, 0x5f0, ERIAR_MASK_0011, 0x4f87, ERIAR_EXGMAC); - RTL_W8(ChipCmd, CmdTxEnb | CmdRxEnb); RTL_W32(MISC, RTL_R32(MISC) & ~RXDV_GATED_EN); RTL_W8(MaxTxPacketSize, EarlySize); @@ -6091,6 +6094,8 @@ static void rtl_hw_start_8168ep(struct rtl8169_private *tp) void __iomem *ioaddr = tp->mmio_addr; struct pci_dev *pdev = tp->pci_dev; + rtl8168ep_stop_cmac(tp); + RTL_W32(TxConfig, RTL_R32(TxConfig) | TXCFG_AUTO_FIFO); rtl_eri_write(tp, 0xc8, ERIAR_MASK_0101, 0x00080002, ERIAR_EXGMAC); @@ -6109,7 +6114,6 @@ static void rtl_hw_start_8168ep(struct rtl8169_private *tp) rtl_eri_write(tp, 0x5f0, ERIAR_MASK_0011, 0x4f87, ERIAR_EXGMAC); - RTL_W8(ChipCmd, CmdTxEnb | CmdRxEnb); RTL_W32(MISC, RTL_R32(MISC) & ~RXDV_GATED_EN); RTL_W8(MaxTxPacketSize, EarlySize); @@ -6832,14 +6836,6 @@ err_out: return -EIO; } -static bool rtl_skb_pad(struct sk_buff *skb) -{ - if (skb_padto(skb, ETH_ZLEN)) - return false; - skb_put(skb, ETH_ZLEN - skb->len); - return true; -} - static bool rtl_test_hw_pad_bug(struct rtl8169_private *tp, struct sk_buff *skb) { return skb->len < ETH_ZLEN && tp->mac_version == RTL_GIGA_MAC_VER_34; @@ -6980,7 +6976,7 @@ static bool rtl8169_tso_csum_v2(struct rtl8169_private *tp, u8 ip_protocol; if (unlikely(rtl_test_hw_pad_bug(tp, skb))) - return skb_checksum_help(skb) == 0 && rtl_skb_pad(skb); + return !(skb_checksum_help(skb) || eth_skb_pad(skb)); if (transport_offset > TCPHO_MAX) { netif_warn(tp, tx_err, tp->dev, @@ -7015,7 +7011,7 @@ static bool rtl8169_tso_csum_v2(struct rtl8169_private *tp, opts[1] |= transport_offset << TCPHO_SHIFT; } else { if (unlikely(rtl_test_hw_pad_bug(tp, skb))) - return rtl_skb_pad(skb); + return !eth_skb_pad(skb); } return true; @@ -8005,6 +8001,12 @@ static void rtl_hw_init_8168g(struct rtl8169_private *tp) return; } +static void rtl_hw_init_8168ep(struct rtl8169_private *tp) +{ + rtl8168ep_stop_cmac(tp); + rtl_hw_init_8168g(tp); +} + static void rtl_hw_initialize(struct rtl8169_private *tp) { switch (tp->mac_version) { @@ -8017,12 +8019,13 @@ static void rtl_hw_initialize(struct rtl8169_private *tp) case RTL_GIGA_MAC_VER_46: case RTL_GIGA_MAC_VER_47: case RTL_GIGA_MAC_VER_48: + rtl_hw_init_8168g(tp); + break; case RTL_GIGA_MAC_VER_49: case RTL_GIGA_MAC_VER_50: case RTL_GIGA_MAC_VER_51: - rtl_hw_init_8168g(tp); + rtl_hw_init_8168ep(tp); break; - default: break; } diff --git a/drivers/net/ethernet/rocker/Kconfig b/drivers/net/ethernet/rocker/Kconfig index 11a850eab628..b9952ef040e4 100644 --- a/drivers/net/ethernet/rocker/Kconfig +++ b/drivers/net/ethernet/rocker/Kconfig @@ -17,7 +17,7 @@ if NET_VENDOR_ROCKER config ROCKER tristate "Rocker switch driver (EXPERIMENTAL)" - depends on PCI && NET_SWITCHDEV + depends on PCI && NET_SWITCHDEV && BRIDGE ---help--- This driver supports Rocker switch device. diff --git a/drivers/net/ethernet/rocker/rocker.c b/drivers/net/ethernet/rocker/rocker.c index fded12784f22..55364359b868 100644 --- a/drivers/net/ethernet/rocker/rocker.c +++ b/drivers/net/ethernet/rocker/rocker.c @@ -648,6 +648,11 @@ static u16 rocker_tlv_get_u16(const struct rocker_tlv *tlv) return *(u16 *) rocker_tlv_data(tlv); } +static __be16 rocker_tlv_get_be16(const struct rocker_tlv *tlv) +{ + return *(__be16 *) rocker_tlv_data(tlv); +} + static u32 rocker_tlv_get_u32(const struct rocker_tlv *tlv) { return *(u32 *) rocker_tlv_data(tlv); @@ -726,12 +731,24 @@ static int rocker_tlv_put_u16(struct rocker_desc_info *desc_info, return rocker_tlv_put(desc_info, attrtype, sizeof(u16), &value); } +static int rocker_tlv_put_be16(struct rocker_desc_info *desc_info, + int attrtype, __be16 value) +{ + return rocker_tlv_put(desc_info, attrtype, sizeof(__be16), &value); +} + static int rocker_tlv_put_u32(struct rocker_desc_info *desc_info, int attrtype, u32 value) { return rocker_tlv_put(desc_info, attrtype, sizeof(u32), &value); } +static int rocker_tlv_put_be32(struct rocker_desc_info *desc_info, + int attrtype, __be32 value) +{ + return rocker_tlv_put(desc_info, attrtype, sizeof(__be32), &value); +} + static int rocker_tlv_put_u64(struct rocker_desc_info *desc_info, int attrtype, u64 value) { @@ -1343,7 +1360,7 @@ static int rocker_event_mac_vlan_seen(struct rocker *rocker, port_number = rocker_tlv_get_u32(attrs[ROCKER_TLV_EVENT_MAC_VLAN_LPORT]) - 1; addr = rocker_tlv_data(attrs[ROCKER_TLV_EVENT_MAC_VLAN_MAC]); - vlan_id = rocker_tlv_get_u16(attrs[ROCKER_TLV_EVENT_MAC_VLAN_VLAN_ID]); + vlan_id = rocker_tlv_get_be16(attrs[ROCKER_TLV_EVENT_MAC_VLAN_VLAN_ID]); if (port_number >= rocker->port_count) return -EINVAL; @@ -1717,18 +1734,18 @@ static int rocker_cmd_flow_tbl_add_vlan(struct rocker_desc_info *desc_info, if (rocker_tlv_put_u32(desc_info, ROCKER_TLV_OF_DPA_IN_LPORT, entry->key.vlan.in_lport)) return -EMSGSIZE; - if (rocker_tlv_put_u16(desc_info, ROCKER_TLV_OF_DPA_VLAN_ID, - entry->key.vlan.vlan_id)) + if (rocker_tlv_put_be16(desc_info, ROCKER_TLV_OF_DPA_VLAN_ID, + entry->key.vlan.vlan_id)) return -EMSGSIZE; - if (rocker_tlv_put_u16(desc_info, ROCKER_TLV_OF_DPA_VLAN_ID_MASK, - entry->key.vlan.vlan_id_mask)) + if (rocker_tlv_put_be16(desc_info, ROCKER_TLV_OF_DPA_VLAN_ID_MASK, + entry->key.vlan.vlan_id_mask)) return -EMSGSIZE; if (rocker_tlv_put_u16(desc_info, ROCKER_TLV_OF_DPA_GOTO_TABLE_ID, entry->key.vlan.goto_tbl)) return -EMSGSIZE; if (entry->key.vlan.untagged && - rocker_tlv_put_u16(desc_info, ROCKER_TLV_OF_DPA_NEW_VLAN_ID, - entry->key.vlan.new_vlan_id)) + rocker_tlv_put_be16(desc_info, ROCKER_TLV_OF_DPA_NEW_VLAN_ID, + entry->key.vlan.new_vlan_id)) return -EMSGSIZE; return 0; @@ -1743,8 +1760,8 @@ static int rocker_cmd_flow_tbl_add_term_mac(struct rocker_desc_info *desc_info, if (rocker_tlv_put_u32(desc_info, ROCKER_TLV_OF_DPA_IN_LPORT_MASK, entry->key.term_mac.in_lport_mask)) return -EMSGSIZE; - if (rocker_tlv_put_u16(desc_info, ROCKER_TLV_OF_DPA_ETHERTYPE, - entry->key.term_mac.eth_type)) + if (rocker_tlv_put_be16(desc_info, ROCKER_TLV_OF_DPA_ETHERTYPE, + entry->key.term_mac.eth_type)) return -EMSGSIZE; if (rocker_tlv_put(desc_info, ROCKER_TLV_OF_DPA_DST_MAC, ETH_ALEN, entry->key.term_mac.eth_dst)) @@ -1752,11 +1769,11 @@ static int rocker_cmd_flow_tbl_add_term_mac(struct rocker_desc_info *desc_info, if (rocker_tlv_put(desc_info, ROCKER_TLV_OF_DPA_DST_MAC_MASK, ETH_ALEN, entry->key.term_mac.eth_dst_mask)) return -EMSGSIZE; - if (rocker_tlv_put_u16(desc_info, ROCKER_TLV_OF_DPA_VLAN_ID, - entry->key.term_mac.vlan_id)) + if (rocker_tlv_put_be16(desc_info, ROCKER_TLV_OF_DPA_VLAN_ID, + entry->key.term_mac.vlan_id)) return -EMSGSIZE; - if (rocker_tlv_put_u16(desc_info, ROCKER_TLV_OF_DPA_VLAN_ID_MASK, - entry->key.term_mac.vlan_id_mask)) + if (rocker_tlv_put_be16(desc_info, ROCKER_TLV_OF_DPA_VLAN_ID_MASK, + entry->key.term_mac.vlan_id_mask)) return -EMSGSIZE; if (rocker_tlv_put_u16(desc_info, ROCKER_TLV_OF_DPA_GOTO_TABLE_ID, entry->key.term_mac.goto_tbl)) @@ -1773,14 +1790,14 @@ static int rocker_cmd_flow_tbl_add_ucast_routing(struct rocker_desc_info *desc_info, struct rocker_flow_tbl_entry *entry) { - if (rocker_tlv_put_u16(desc_info, ROCKER_TLV_OF_DPA_ETHERTYPE, - entry->key.ucast_routing.eth_type)) + if (rocker_tlv_put_be16(desc_info, ROCKER_TLV_OF_DPA_ETHERTYPE, + entry->key.ucast_routing.eth_type)) return -EMSGSIZE; - if (rocker_tlv_put_u32(desc_info, ROCKER_TLV_OF_DPA_DST_IP, - entry->key.ucast_routing.dst4)) + if (rocker_tlv_put_be32(desc_info, ROCKER_TLV_OF_DPA_DST_IP, + entry->key.ucast_routing.dst4)) return -EMSGSIZE; - if (rocker_tlv_put_u32(desc_info, ROCKER_TLV_OF_DPA_DST_IP_MASK, - entry->key.ucast_routing.dst4_mask)) + if (rocker_tlv_put_be32(desc_info, ROCKER_TLV_OF_DPA_DST_IP_MASK, + entry->key.ucast_routing.dst4_mask)) return -EMSGSIZE; if (rocker_tlv_put_u16(desc_info, ROCKER_TLV_OF_DPA_GOTO_TABLE_ID, entry->key.ucast_routing.goto_tbl)) @@ -1804,8 +1821,8 @@ static int rocker_cmd_flow_tbl_add_bridge(struct rocker_desc_info *desc_info, ETH_ALEN, entry->key.bridge.eth_dst_mask)) return -EMSGSIZE; if (entry->key.bridge.vlan_id && - rocker_tlv_put_u16(desc_info, ROCKER_TLV_OF_DPA_VLAN_ID, - entry->key.bridge.vlan_id)) + rocker_tlv_put_be16(desc_info, ROCKER_TLV_OF_DPA_VLAN_ID, + entry->key.bridge.vlan_id)) return -EMSGSIZE; if (entry->key.bridge.tunnel_id && rocker_tlv_put_u32(desc_info, ROCKER_TLV_OF_DPA_TUNNEL_ID, @@ -1846,14 +1863,14 @@ static int rocker_cmd_flow_tbl_add_acl(struct rocker_desc_info *desc_info, if (rocker_tlv_put(desc_info, ROCKER_TLV_OF_DPA_DST_MAC_MASK, ETH_ALEN, entry->key.acl.eth_dst_mask)) return -EMSGSIZE; - if (rocker_tlv_put_u16(desc_info, ROCKER_TLV_OF_DPA_ETHERTYPE, - entry->key.acl.eth_type)) + if (rocker_tlv_put_be16(desc_info, ROCKER_TLV_OF_DPA_ETHERTYPE, + entry->key.acl.eth_type)) return -EMSGSIZE; - if (rocker_tlv_put_u16(desc_info, ROCKER_TLV_OF_DPA_VLAN_ID, - entry->key.acl.vlan_id)) + if (rocker_tlv_put_be16(desc_info, ROCKER_TLV_OF_DPA_VLAN_ID, + entry->key.acl.vlan_id)) return -EMSGSIZE; - if (rocker_tlv_put_u16(desc_info, ROCKER_TLV_OF_DPA_VLAN_ID_MASK, - entry->key.acl.vlan_id_mask)) + if (rocker_tlv_put_be16(desc_info, ROCKER_TLV_OF_DPA_VLAN_ID_MASK, + entry->key.acl.vlan_id_mask)) return -EMSGSIZE; switch (ntohs(entry->key.acl.eth_type)) { @@ -2002,8 +2019,8 @@ rocker_cmd_group_tbl_add_l2_rewrite(struct rocker_desc_info *desc_info, ETH_ALEN, entry->l2_rewrite.eth_dst)) return -EMSGSIZE; if (entry->l2_rewrite.vlan_id && - rocker_tlv_put_u16(desc_info, ROCKER_TLV_OF_DPA_VLAN_ID, - entry->l2_rewrite.vlan_id)) + rocker_tlv_put_be16(desc_info, ROCKER_TLV_OF_DPA_VLAN_ID, + entry->l2_rewrite.vlan_id)) return -EMSGSIZE; return 0; @@ -2048,8 +2065,8 @@ rocker_cmd_group_tbl_add_l3_unicast(struct rocker_desc_info *desc_info, ETH_ALEN, entry->l3_unicast.eth_dst)) return -EMSGSIZE; if (entry->l3_unicast.vlan_id && - rocker_tlv_put_u16(desc_info, ROCKER_TLV_OF_DPA_VLAN_ID, - entry->l3_unicast.vlan_id)) + rocker_tlv_put_be16(desc_info, ROCKER_TLV_OF_DPA_VLAN_ID, + entry->l3_unicast.vlan_id)) return -EMSGSIZE; if (rocker_tlv_put_u8(desc_info, ROCKER_TLV_OF_DPA_TTL_CHECK, entry->l3_unicast.ttl_check)) @@ -2736,7 +2753,7 @@ static int rocker_port_vlan_l2_groups(struct rocker_port *rocker_port, static struct rocker_ctrl { const u8 *eth_dst; const u8 *eth_dst_mask; - u16 eth_type; + __be16 eth_type; bool acl; bool bridge; bool term; diff --git a/drivers/net/ethernet/sfc/ethtool.c b/drivers/net/ethernet/sfc/ethtool.c index cad258a78708..4835bc0d0de8 100644 --- a/drivers/net/ethernet/sfc/ethtool.c +++ b/drivers/net/ethernet/sfc/ethtool.c @@ -1086,19 +1086,29 @@ static u32 efx_ethtool_get_rxfh_indir_size(struct net_device *net_dev) 0 : ARRAY_SIZE(efx->rx_indir_table)); } -static int efx_ethtool_get_rxfh(struct net_device *net_dev, u32 *indir, u8 *key) +static int efx_ethtool_get_rxfh(struct net_device *net_dev, u32 *indir, u8 *key, + u8 *hfunc) { struct efx_nic *efx = netdev_priv(net_dev); - memcpy(indir, efx->rx_indir_table, sizeof(efx->rx_indir_table)); + if (hfunc) + *hfunc = ETH_RSS_HASH_TOP; + if (indir) + memcpy(indir, efx->rx_indir_table, sizeof(efx->rx_indir_table)); return 0; } -static int efx_ethtool_set_rxfh(struct net_device *net_dev, - const u32 *indir, const u8 *key) +static int efx_ethtool_set_rxfh(struct net_device *net_dev, const u32 *indir, + const u8 *key, const u8 hfunc) { struct efx_nic *efx = netdev_priv(net_dev); + /* We do not allow change in unsupported parameters */ + if (key || + (hfunc != ETH_RSS_HASH_NO_CHANGE && hfunc != ETH_RSS_HASH_TOP)) + return -EOPNOTSUPP; + if (!indir) + return 0; memcpy(efx->rx_indir_table, indir, sizeof(efx->rx_indir_table)); efx->type->rx_push_rss_config(efx); return 0; diff --git a/drivers/net/ethernet/sun/niu.c b/drivers/net/ethernet/sun/niu.c index 904fd1ab5f6e..4aaa3240453a 100644 --- a/drivers/net/ethernet/sun/niu.c +++ b/drivers/net/ethernet/sun/niu.c @@ -6651,13 +6651,8 @@ static netdev_tx_t niu_start_xmit(struct sk_buff *skb, return NETDEV_TX_BUSY; } - if (skb->len < ETH_ZLEN) { - unsigned int pad_bytes = ETH_ZLEN - skb->len; - - if (skb_pad(skb, pad_bytes)) - goto out; - skb_put(skb, pad_bytes); - } + if (eth_skb_pad(skb)) + goto out; len = sizeof(struct tx_pkt_hdr) + 15; if (skb_headroom(skb) < len) { diff --git a/drivers/net/ethernet/sun/sunvnet.c b/drivers/net/ethernet/sun/sunvnet.c index a556eba8eeed..90c86cd3be14 100644 --- a/drivers/net/ethernet/sun/sunvnet.c +++ b/drivers/net/ethernet/sun/sunvnet.c @@ -15,12 +15,14 @@ #include <linux/ethtool.h> #include <linux/etherdevice.h> #include <linux/mutex.h> +#include <linux/highmem.h> #include <linux/if_vlan.h> #if IS_ENABLED(CONFIG_IPV6) #include <linux/icmpv6.h> #endif +#include <net/ip.h> #include <net/icmp.h> #include <net/route.h> @@ -51,6 +53,8 @@ static int __vnet_tx_trigger(struct vnet_port *port, u32 start); /* Ordered from largest major to lowest */ static struct vio_version vnet_versions[] = { + { .major = 1, .minor = 8 }, + { .major = 1, .minor = 7 }, { .major = 1, .minor = 6 }, { .major = 1, .minor = 0 }, }; @@ -73,13 +77,19 @@ static int vnet_handle_unknown(struct vnet_port *port, void *arg) return -ECONNRESET; } +static int vnet_port_alloc_tx_ring(struct vnet_port *port); + static int vnet_send_attr(struct vio_driver_state *vio) { struct vnet_port *port = to_vnet_port(vio); struct net_device *dev = port->vp->dev; struct vio_net_attr_info pkt; int framelen = ETH_FRAME_LEN; - int i; + int i, err; + + err = vnet_port_alloc_tx_ring(to_vnet_port(vio)); + if (err) + return err; memset(&pkt, 0, sizeof(pkt)); pkt.tag.type = VIO_TYPE_CTRL; @@ -110,8 +120,15 @@ static int vnet_send_attr(struct vio_driver_state *vio) pkt.mtu = framelen + VLAN_HLEN; } - pkt.plnk_updt = PHYSLINK_UPDATE_NONE; pkt.cflags = 0; + if (vio_version_after_eq(vio, 1, 7) && port->tso) { + pkt.cflags |= VNET_LSO_IPV4_CAPAB; + if (!port->tsolen) + port->tsolen = VNET_MAXTSO; + pkt.ipv4_lso_maxlen = port->tsolen; + } + + pkt.plnk_updt = PHYSLINK_UPDATE_NONE; viodbg(HS, "SEND NET ATTR xmode[0x%x] atype[0x%x] addr[%llx] " "ackfreq[%u] plnk_updt[0x%02x] opts[0x%02x] mtu[%llu] " @@ -165,6 +182,26 @@ static int handle_attr_info(struct vio_driver_state *vio, } port->rmtu = localmtu; + /* LSO negotiation */ + if (vio_version_after_eq(vio, 1, 7)) + port->tso &= !!(pkt->cflags & VNET_LSO_IPV4_CAPAB); + else + port->tso = false; + if (port->tso) { + if (!port->tsolen) + port->tsolen = VNET_MAXTSO; + port->tsolen = min(port->tsolen, pkt->ipv4_lso_maxlen); + if (port->tsolen < VNET_MINTSO) { + port->tso = false; + port->tsolen = 0; + pkt->cflags &= ~VNET_LSO_IPV4_CAPAB; + } + pkt->ipv4_lso_maxlen = port->tsolen; + } else { + pkt->cflags &= ~VNET_LSO_IPV4_CAPAB; + pkt->ipv4_lso_maxlen = 0; + } + /* for version >= 1.6, ACK packet mode we support */ if (vio_version_after_eq(vio, 1, 6)) { pkt->xfer_mode = VIO_NEW_DRING_MODE; @@ -276,10 +313,42 @@ static struct sk_buff *alloc_and_align_skb(struct net_device *dev, return skb; } -static int vnet_rx_one(struct vnet_port *port, unsigned int len, - struct ldc_trans_cookie *cookies, int ncookies) +static inline void vnet_fullcsum(struct sk_buff *skb) +{ + struct iphdr *iph = ip_hdr(skb); + int offset = skb_transport_offset(skb); + + if (skb->protocol != htons(ETH_P_IP)) + return; + if (iph->protocol != IPPROTO_TCP && + iph->protocol != IPPROTO_UDP) + return; + skb->ip_summed = CHECKSUM_NONE; + skb->csum_level = 1; + skb->csum = 0; + if (iph->protocol == IPPROTO_TCP) { + struct tcphdr *ptcp = tcp_hdr(skb); + + ptcp->check = 0; + skb->csum = skb_checksum(skb, offset, skb->len - offset, 0); + ptcp->check = csum_tcpudp_magic(iph->saddr, iph->daddr, + skb->len - offset, IPPROTO_TCP, + skb->csum); + } else if (iph->protocol == IPPROTO_UDP) { + struct udphdr *pudp = udp_hdr(skb); + + pudp->check = 0; + skb->csum = skb_checksum(skb, offset, skb->len - offset, 0); + pudp->check = csum_tcpudp_magic(iph->saddr, iph->daddr, + skb->len - offset, IPPROTO_UDP, + skb->csum); + } +} + +static int vnet_rx_one(struct vnet_port *port, struct vio_net_desc *desc) { struct net_device *dev = port->vp->dev; + unsigned int len = desc->size; unsigned int copy_len; struct sk_buff *skb; int err; @@ -301,7 +370,7 @@ static int vnet_rx_one(struct vnet_port *port, unsigned int len, skb_put(skb, copy_len); err = ldc_copy(port->vio.lp, LDC_COPY_IN, skb->data, copy_len, 0, - cookies, ncookies); + desc->cookies, desc->ncookies); if (unlikely(err < 0)) { dev->stats.rx_frame_errors++; goto out_free_skb; @@ -311,6 +380,30 @@ static int vnet_rx_one(struct vnet_port *port, unsigned int len, skb_trim(skb, len); skb->protocol = eth_type_trans(skb, dev); + if (vio_version_after_eq(&port->vio, 1, 8)) { + struct vio_net_dext *dext = vio_net_ext(desc); + + if (dext->flags & VNET_PKT_HCK_IPV4_HDRCKSUM) { + if (skb->protocol == ETH_P_IP) { + struct iphdr *iph = (struct iphdr *)skb->data; + + iph->check = 0; + ip_send_check(iph); + } + } + if ((dext->flags & VNET_PKT_HCK_FULLCKSUM) && + skb->ip_summed == CHECKSUM_NONE) + vnet_fullcsum(skb); + if (dext->flags & VNET_PKT_HCK_IPV4_HDRCKSUM_OK) { + skb->ip_summed = CHECKSUM_PARTIAL; + skb->csum_level = 0; + if (dext->flags & VNET_PKT_HCK_FULLCKSUM_OK) + skb->csum_level = 1; + } + } + + skb->ip_summed = port->switch_port ? CHECKSUM_NONE : CHECKSUM_PARTIAL; + dev->stats.rx_packets++; dev->stats.rx_bytes += len; napi_gro_receive(&port->napi, skb); @@ -445,7 +538,7 @@ static int vnet_walk_rx_one(struct vnet_port *port, desc->cookies[0].cookie_addr, desc->cookies[0].cookie_size); - err = vnet_rx_one(port, desc->size, desc->cookies, desc->ncookies); + err = vnet_rx_one(port, desc); if (err == -ECONNRESET) return err; desc->hdr.state = VIO_DESC_DONE; @@ -655,6 +748,8 @@ ldc_ctrl: if (event == LDC_EVENT_RESET) { port->rmtu = 0; + port->tso = true; + port->tsolen = 0; vio_port_up(vio); } port->rx_event = 0; @@ -915,11 +1010,54 @@ static void vnet_clean_timer_expire(unsigned long port0) del_timer(&port->clean_timer); } -static inline struct sk_buff *vnet_skb_shape(struct sk_buff *skb, void **pstart, - int *plen) +static inline int vnet_skb_map(struct ldc_channel *lp, struct sk_buff *skb, + struct ldc_trans_cookie *cookies, int ncookies, + unsigned int map_perm) +{ + int i, nc, err, blen; + + /* header */ + blen = skb_headlen(skb); + if (blen < ETH_ZLEN) + blen = ETH_ZLEN; + blen += VNET_PACKET_SKIP; + blen += 8 - (blen & 7); + + err = ldc_map_single(lp, skb->data-VNET_PACKET_SKIP, blen, cookies, + ncookies, map_perm); + if (err < 0) + return err; + nc = err; + + for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { + skb_frag_t *f = &skb_shinfo(skb)->frags[i]; + u8 *vaddr; + + if (nc < ncookies) { + vaddr = kmap_atomic(skb_frag_page(f)); + blen = skb_frag_size(f); + blen += 8 - (blen & 7); + err = ldc_map_single(lp, vaddr + f->page_offset, + blen, cookies + nc, ncookies - nc, + map_perm); + kunmap_atomic(vaddr); + } else { + err = -EMSGSIZE; + } + + if (err < 0) { + ldc_unmap(lp, cookies, nc); + return err; + } + nc += err; + } + return nc; +} + +static inline struct sk_buff *vnet_skb_shape(struct sk_buff *skb, int ncookies) { struct sk_buff *nskb; - int len, pad; + int i, len, pad, docopy; len = skb->len; pad = 0; @@ -929,25 +1067,77 @@ static inline struct sk_buff *vnet_skb_shape(struct sk_buff *skb, void **pstart, } len += VNET_PACKET_SKIP; pad += 8 - (len & 7); - len += 8 - (len & 7); + /* make sure we have enough cookies and alignment in every frag */ + docopy = skb_shinfo(skb)->nr_frags >= ncookies; + for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { + skb_frag_t *f = &skb_shinfo(skb)->frags[i]; + + docopy |= f->page_offset & 7; + } if (((unsigned long)skb->data & 7) != VNET_PACKET_SKIP || skb_tailroom(skb) < pad || - skb_headroom(skb) < VNET_PACKET_SKIP) { - nskb = alloc_and_align_skb(skb->dev, skb->len); + skb_headroom(skb) < VNET_PACKET_SKIP || docopy) { + int start = 0, offset; + __wsum csum; + + len = skb->len > ETH_ZLEN ? skb->len : ETH_ZLEN; + nskb = alloc_and_align_skb(skb->dev, len); + if (nskb == NULL) { + dev_kfree_skb(skb); + return NULL; + } skb_reserve(nskb, VNET_PACKET_SKIP); - if (skb_copy_bits(skb, 0, nskb->data, skb->len)) { + + nskb->protocol = skb->protocol; + offset = skb_mac_header(skb) - skb->data; + skb_set_mac_header(nskb, offset); + offset = skb_network_header(skb) - skb->data; + skb_set_network_header(nskb, offset); + offset = skb_transport_header(skb) - skb->data; + skb_set_transport_header(nskb, offset); + + offset = 0; + nskb->csum_offset = skb->csum_offset; + nskb->ip_summed = skb->ip_summed; + + if (skb->ip_summed == CHECKSUM_PARTIAL) + start = skb_checksum_start_offset(skb); + if (start) { + struct iphdr *iph = ip_hdr(nskb); + int offset = start + nskb->csum_offset; + + if (skb_copy_bits(skb, 0, nskb->data, start)) { + dev_kfree_skb(nskb); + dev_kfree_skb(skb); + return NULL; + } + *(__sum16 *)(skb->data + offset) = 0; + csum = skb_copy_and_csum_bits(skb, start, + nskb->data + start, + skb->len - start, 0); + if (iph->protocol == IPPROTO_TCP || + iph->protocol == IPPROTO_UDP) { + csum = csum_tcpudp_magic(iph->saddr, iph->daddr, + skb->len - start, + iph->protocol, csum); + } + *(__sum16 *)(nskb->data + offset) = csum; + + nskb->ip_summed = CHECKSUM_NONE; + } else if (skb_copy_bits(skb, 0, nskb->data, skb->len)) { dev_kfree_skb(nskb); dev_kfree_skb(skb); return NULL; } (void)skb_put(nskb, skb->len); + if (skb_is_gso(skb)) { + skb_shinfo(nskb)->gso_size = skb_shinfo(skb)->gso_size; + skb_shinfo(nskb)->gso_type = skb_shinfo(skb)->gso_type; + } dev_kfree_skb(skb); skb = nskb; } - - *pstart = skb->data - VNET_PACKET_SKIP; - *plen = len; return skb; } @@ -963,6 +1153,111 @@ vnet_select_queue(struct net_device *dev, struct sk_buff *skb, return port->q_index; } +static int vnet_start_xmit(struct sk_buff *skb, struct net_device *dev); + +static int vnet_handle_offloads(struct vnet_port *port, struct sk_buff *skb) +{ + struct net_device *dev = port->vp->dev; + struct vio_dring_state *dr = &port->vio.drings[VIO_DRIVER_TX_RING]; + struct sk_buff *segs; + int maclen, datalen; + int status; + int gso_size, gso_type, gso_segs; + int hlen = skb_transport_header(skb) - skb_mac_header(skb); + int proto = IPPROTO_IP; + + if (skb->protocol == htons(ETH_P_IP)) + proto = ip_hdr(skb)->protocol; + else if (skb->protocol == htons(ETH_P_IPV6)) + proto = ipv6_hdr(skb)->nexthdr; + + if (proto == IPPROTO_TCP) + hlen += tcp_hdr(skb)->doff * 4; + else if (proto == IPPROTO_UDP) + hlen += sizeof(struct udphdr); + else { + pr_err("vnet_handle_offloads GSO with unknown transport " + "protocol %d tproto %d\n", skb->protocol, proto); + hlen = 128; /* XXX */ + } + datalen = port->tsolen - hlen; + + gso_size = skb_shinfo(skb)->gso_size; + gso_type = skb_shinfo(skb)->gso_type; + gso_segs = skb_shinfo(skb)->gso_segs; + + if (port->tso && gso_size < datalen) + gso_segs = DIV_ROUND_UP(skb->len - hlen, datalen); + + if (unlikely(vnet_tx_dring_avail(dr) < gso_segs)) { + struct netdev_queue *txq; + + txq = netdev_get_tx_queue(dev, port->q_index); + netif_tx_stop_queue(txq); + if (vnet_tx_dring_avail(dr) < skb_shinfo(skb)->gso_segs) + return NETDEV_TX_BUSY; + netif_tx_wake_queue(txq); + } + + maclen = skb_network_header(skb) - skb_mac_header(skb); + skb_pull(skb, maclen); + + if (port->tso && gso_size < datalen) { + /* segment to TSO size */ + skb_shinfo(skb)->gso_size = datalen; + skb_shinfo(skb)->gso_segs = gso_segs; + + segs = skb_gso_segment(skb, dev->features & ~NETIF_F_TSO); + + /* restore gso_size & gso_segs */ + skb_shinfo(skb)->gso_size = gso_size; + skb_shinfo(skb)->gso_segs = DIV_ROUND_UP(skb->len - hlen, + gso_size); + } else + segs = skb_gso_segment(skb, dev->features & ~NETIF_F_TSO); + if (IS_ERR(segs)) { + dev->stats.tx_dropped++; + return NETDEV_TX_OK; + } + + skb_push(skb, maclen); + skb_reset_mac_header(skb); + + status = 0; + while (segs) { + struct sk_buff *curr = segs; + + segs = segs->next; + curr->next = NULL; + if (port->tso && curr->len > dev->mtu) { + skb_shinfo(curr)->gso_size = gso_size; + skb_shinfo(curr)->gso_type = gso_type; + skb_shinfo(curr)->gso_segs = + DIV_ROUND_UP(curr->len - hlen, gso_size); + } else + skb_shinfo(curr)->gso_size = 0; + + skb_push(curr, maclen); + skb_reset_mac_header(curr); + memcpy(skb_mac_header(curr), skb_mac_header(skb), + maclen); + curr->csum_start = skb_transport_header(curr) - curr->head; + if (ip_hdr(curr)->protocol == IPPROTO_TCP) + curr->csum_offset = offsetof(struct tcphdr, check); + else if (ip_hdr(curr)->protocol == IPPROTO_UDP) + curr->csum_offset = offsetof(struct udphdr, check); + + if (!(status & NETDEV_TX_MASK)) + status = vnet_start_xmit(curr, dev); + if (status & NETDEV_TX_MASK) + dev_kfree_skb_any(curr); + } + + if (!(status & NETDEV_TX_MASK)) + dev_kfree_skb_any(skb); + return status; +} + static int vnet_start_xmit(struct sk_buff *skb, struct net_device *dev) { struct vnet *vp = netdev_priv(dev); @@ -972,15 +1267,9 @@ static int vnet_start_xmit(struct sk_buff *skb, struct net_device *dev) unsigned int len; struct sk_buff *freeskbs = NULL; int i, err, txi; - void *start = NULL; - int nlen = 0; unsigned pending = 0; struct netdev_queue *txq; - skb = vnet_skb_shape(skb, &start, &nlen); - if (unlikely(!skb)) - goto out_dropped; - rcu_read_lock(); port = __tx_port_find(vp, skb); if (unlikely(!port)) { @@ -988,7 +1277,13 @@ static int vnet_start_xmit(struct sk_buff *skb, struct net_device *dev) goto out_dropped; } - if (skb->len > port->rmtu) { + if (skb_is_gso(skb) && skb->len > port->tsolen) { + err = vnet_handle_offloads(port, skb); + rcu_read_unlock(); + return err; + } + + if (!skb_is_gso(skb) && skb->len > port->rmtu) { unsigned long localmtu = port->rmtu - ETH_HLEN; if (vio_version_after_eq(&port->vio, 1, 3)) @@ -1020,6 +1315,14 @@ static int vnet_start_xmit(struct sk_buff *skb, struct net_device *dev) goto out_dropped; } + skb = vnet_skb_shape(skb, 2); + + if (unlikely(!skb)) + goto out_dropped; + + if (skb->ip_summed == CHECKSUM_PARTIAL) + vnet_fullcsum(skb); + dr = &port->vio.drings[VIO_DRIVER_TX_RING]; i = skb_get_queue_mapping(skb); txq = netdev_get_tx_queue(dev, i); @@ -1047,16 +1350,15 @@ static int vnet_start_xmit(struct sk_buff *skb, struct net_device *dev) if (len < ETH_ZLEN) len = ETH_ZLEN; - port->tx_bufs[txi].skb = skb; - skb = NULL; - - err = ldc_map_single(port->vio.lp, start, nlen, - port->tx_bufs[txi].cookies, VNET_MAXCOOKIES, - (LDC_MAP_SHADOW | LDC_MAP_DIRECT | LDC_MAP_RW)); + err = vnet_skb_map(port->vio.lp, skb, port->tx_bufs[txi].cookies, 2, + (LDC_MAP_SHADOW | LDC_MAP_DIRECT | LDC_MAP_RW)); if (err < 0) { netdev_info(dev, "tx buffer map error %d\n", err); goto out_dropped; } + + port->tx_bufs[txi].skb = skb; + skb = NULL; port->tx_bufs[txi].ncookies = err; /* We don't rely on the ACKs to free the skb in vnet_start_xmit(), @@ -1072,6 +1374,21 @@ static int vnet_start_xmit(struct sk_buff *skb, struct net_device *dev) d->ncookies = port->tx_bufs[txi].ncookies; for (i = 0; i < d->ncookies; i++) d->cookies[i] = port->tx_bufs[txi].cookies[i]; + if (vio_version_after_eq(&port->vio, 1, 7)) { + struct vio_net_dext *dext = vio_net_ext(d); + + memset(dext, 0, sizeof(*dext)); + if (skb_is_gso(port->tx_bufs[txi].skb)) { + dext->ipv4_lso_mss = skb_shinfo(port->tx_bufs[txi].skb) + ->gso_size; + dext->flags |= VNET_PKT_IPV4_LSO; + } + if (vio_version_after_eq(&port->vio, 1, 8) && + !port->switch_port) { + dext->flags |= VNET_PKT_HCK_IPV4_HDRCKSUM_OK; + dext->flags |= VNET_PKT_HCK_FULLCKSUM_OK; + } + } /* This has to be a non-SMP write barrier because we are writing * to memory which is shared with the peer LDOM. @@ -1361,18 +1678,20 @@ static void vnet_port_free_tx_bufs(struct vnet_port *port) } } -static int vnet_port_alloc_tx_bufs(struct vnet_port *port) +static int vnet_port_alloc_tx_ring(struct vnet_port *port) { struct vio_dring_state *dr; - unsigned long len; + unsigned long len, elen; int i, err, ncookies; void *dring; dr = &port->vio.drings[VIO_DRIVER_TX_RING]; - len = (VNET_TX_RING_SIZE * - (sizeof(struct vio_net_desc) + - (sizeof(struct ldc_trans_cookie) * 2))); + elen = sizeof(struct vio_net_desc) + + sizeof(struct ldc_trans_cookie) * 2; + if (vio_version_after_eq(&port->vio, 1, 7)) + elen += sizeof(struct vio_net_dext); + len = VNET_TX_RING_SIZE * elen; ncookies = VIO_MAX_RING_COOKIES; dring = ldc_alloc_exp_dring(port->vio.lp, len, @@ -1386,8 +1705,7 @@ static int vnet_port_alloc_tx_bufs(struct vnet_port *port) } dr->base = dring; - dr->entry_size = (sizeof(struct vio_net_desc) + - (sizeof(struct ldc_trans_cookie) * 2)); + dr->entry_size = elen; dr->num_entries = VNET_TX_RING_SIZE; dr->prod = dr->cons = 0; port->start_cons = true; /* need an initial trigger */ @@ -1471,6 +1789,10 @@ static struct vnet *vnet_new(const u64 *local_mac) dev->ethtool_ops = &vnet_ethtool_ops; dev->watchdog_timeo = VNET_TX_TIMEOUT; + dev->hw_features = NETIF_F_TSO | NETIF_F_GSO | NETIF_F_GSO_SOFTWARE | + NETIF_F_HW_CSUM | NETIF_F_SG; + dev->features = dev->hw_features; + err = register_netdev(dev); if (err) { pr_err("Cannot register net device, aborting\n"); @@ -1640,10 +1962,6 @@ static int vnet_port_probe(struct vio_dev *vdev, const struct vio_device_id *id) netif_napi_add(port->vp->dev, &port->napi, vnet_poll, NAPI_POLL_WEIGHT); - err = vnet_port_alloc_tx_bufs(port); - if (err) - goto err_out_free_ldc; - INIT_HLIST_NODE(&port->hash); INIT_LIST_HEAD(&port->list); @@ -1651,6 +1969,8 @@ static int vnet_port_probe(struct vio_dev *vdev, const struct vio_device_id *id) if (mdesc_get_property(hp, vdev->mp, "switch-port", NULL) != NULL) switch_port = 1; port->switch_port = switch_port; + port->tso = true; + port->tsolen = 0; spin_lock_irqsave(&vp->lock, flags); if (switch_port) @@ -1677,10 +1997,6 @@ static int vnet_port_probe(struct vio_dev *vdev, const struct vio_device_id *id) return 0; -err_out_free_ldc: - netif_napi_del(&port->napi); - vio_ldc_free(&port->vio); - err_out_free_port: kfree(port); diff --git a/drivers/net/ethernet/sun/sunvnet.h b/drivers/net/ethernet/sun/sunvnet.h index cd5d343ea232..01ca78191683 100644 --- a/drivers/net/ethernet/sun/sunvnet.h +++ b/drivers/net/ethernet/sun/sunvnet.h @@ -20,6 +20,9 @@ #define VNET_TX_RING_SIZE 512 #define VNET_TX_WAKEUP_THRESH(dr) ((dr)->pending / 4) +#define VNET_MINTSO 2048 /* VIO protocol's minimum TSO len */ +#define VNET_MAXTSO 65535 /* VIO protocol's maximum TSO len */ + /* VNET packets are sent in buffers with the first 6 bytes skipped * so that after the ethernet header the IPv4/IPv6 headers are aligned * properly. @@ -40,8 +43,9 @@ struct vnet_port { struct hlist_node hash; u8 raddr[ETH_ALEN]; - u8 switch_port; - u8 __pad; + unsigned switch_port:1; + unsigned tso:1; + unsigned __pad:14; struct vnet *vp; @@ -56,6 +60,7 @@ struct vnet_port { struct timer_list clean_timer; u64 rmtu; + u16 tsolen; struct napi_struct napi; u32 napi_stop_idx; diff --git a/drivers/net/ethernet/ti/Kconfig b/drivers/net/ethernet/ti/Kconfig index 5d8cb7956113..605dd909bcc3 100644 --- a/drivers/net/ethernet/ti/Kconfig +++ b/drivers/net/ethernet/ti/Kconfig @@ -5,7 +5,7 @@ config NET_VENDOR_TI bool "Texas Instruments (TI) devices" default y - depends on PCI || EISA || AR7 || (ARM && (ARCH_DAVINCI || ARCH_OMAP3 || SOC_AM33XX || ARCH_KEYSTONE)) + depends on PCI || EISA || AR7 || ARCH_DAVINCI || ARCH_OMAP2PLUS || ARCH_KEYSTONE ---help--- If you have a network (Ethernet) card belonging to this class, say Y and read the Ethernet-HOWTO, available from @@ -32,7 +32,7 @@ config TI_DAVINCI_EMAC config TI_DAVINCI_MDIO tristate "TI DaVinci MDIO Support" - depends on ARM && ( ARCH_DAVINCI || ARCH_OMAP3 || SOC_AM33XX || ARCH_KEYSTONE ) + depends on ARCH_DAVINCI || ARCH_OMAP2PLUS || ARCH_KEYSTONE select PHYLIB ---help--- This driver supports TI's DaVinci MDIO module. @@ -42,7 +42,7 @@ config TI_DAVINCI_MDIO config TI_DAVINCI_CPDMA tristate "TI DaVinci CPDMA Support" - depends on ARM && ( ARCH_DAVINCI || ARCH_OMAP3 || SOC_AM33XX ) + depends on ARCH_DAVINCI || ARCH_OMAP2PLUS ---help--- This driver supports TI's DaVinci CPDMA dma engine. @@ -58,7 +58,7 @@ config TI_CPSW_PHY_SEL config TI_CPSW tristate "TI CPSW Switch Support" - depends on ARM && (ARCH_DAVINCI || SOC_AM33XX) + depends on ARCH_DAVINCI || ARCH_OMAP2PLUS select TI_DAVINCI_CPDMA select TI_DAVINCI_MDIO select TI_CPSW_PHY_SEL diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c index 6fc834e4306d..dd867e6cabd6 100644 --- a/drivers/net/hyperv/netvsc.c +++ b/drivers/net/hyperv/netvsc.c @@ -764,6 +764,9 @@ int netvsc_send(struct hv_device *device, out_channel = device->channel; packet->channel = out_channel; + if (out_channel->rescind) + return -ENODEV; + if (packet->page_buf_cnt) { ret = vmbus_sendpacket_pagebuffer(out_channel, packet->page_buf, diff --git a/drivers/net/hyperv/rndis_filter.c b/drivers/net/hyperv/rndis_filter.c index 7b2c5d1e9bad..ec0c40a8f653 100644 --- a/drivers/net/hyperv/rndis_filter.c +++ b/drivers/net/hyperv/rndis_filter.c @@ -958,6 +958,9 @@ static int rndis_filter_close_device(struct rndis_device *dev) return 0; ret = rndis_filter_set_packet_filter(dev, 0); + if (ret == -ENODEV) + ret = 0; + if (ret == 0) dev->state = RNDIS_DEV_INITIALIZED; diff --git a/drivers/net/ipvlan/ipvlan.h b/drivers/net/ipvlan/ipvlan.h index c44d29eca6c0..2729f64b3e7e 100644 --- a/drivers/net/ipvlan/ipvlan.h +++ b/drivers/net/ipvlan/ipvlan.h @@ -107,16 +107,6 @@ static inline struct ipvl_port *ipvlan_port_get_rtnl(const struct net_device *d) return rtnl_dereference(d->rx_handler_data); } -static inline bool ipvlan_dev_master(struct net_device *d) -{ - return d->priv_flags & IFF_IPVLAN_MASTER; -} - -static inline bool ipvlan_dev_slave(struct net_device *d) -{ - return d->priv_flags & IFF_IPVLAN_SLAVE; -} - void ipvlan_adjust_mtu(struct ipvl_dev *ipvlan, struct net_device *dev); void ipvlan_set_port_mode(struct ipvl_port *port, u32 nval); void ipvlan_init_secret(void); diff --git a/drivers/net/ipvlan/ipvlan_main.c b/drivers/net/ipvlan/ipvlan_main.c index feb185389a87..4f4099d5603d 100644 --- a/drivers/net/ipvlan/ipvlan_main.c +++ b/drivers/net/ipvlan/ipvlan_main.c @@ -38,6 +38,12 @@ static int ipvlan_port_create(struct net_device *dev) netdev_err(dev, "Master is either lo or non-ether device\n"); return -EINVAL; } + + if (netif_is_macvlan_port(dev)) { + netdev_err(dev, "Master is a macvlan port.\n"); + return -EBUSY; + } + port = kzalloc(sizeof(struct ipvl_port), GFP_KERNEL); if (!port) return -ENOMEM; @@ -440,11 +446,11 @@ static int ipvlan_link_new(struct net *src_net, struct net_device *dev, if (!phy_dev) return -ENODEV; - if (ipvlan_dev_slave(phy_dev)) { + if (netif_is_ipvlan(phy_dev)) { struct ipvl_dev *tmp = netdev_priv(phy_dev); phy_dev = tmp->phy_dev; - } else if (!ipvlan_dev_master(phy_dev)) { + } else if (!netif_is_ipvlan_port(phy_dev)) { err = ipvlan_port_create(phy_dev); if (err < 0) return err; @@ -554,7 +560,7 @@ static int ipvlan_device_event(struct notifier_block *unused, struct ipvl_port *port; LIST_HEAD(lst_kill); - if (!ipvlan_dev_master(dev)) + if (!netif_is_ipvlan_port(dev)) return NOTIFY_DONE; port = ipvlan_port_get_rtnl(dev); @@ -645,7 +651,7 @@ static int ipvlan_addr6_event(struct notifier_block *unused, struct net_device *dev = (struct net_device *)if6->idev->dev; struct ipvl_dev *ipvlan = netdev_priv(dev); - if (!ipvlan_dev_slave(dev)) + if (!netif_is_ipvlan(dev)) return NOTIFY_DONE; if (!ipvlan || !ipvlan->port) @@ -717,7 +723,7 @@ static int ipvlan_addr4_event(struct notifier_block *unused, struct ipvl_dev *ipvlan = netdev_priv(dev); struct in_addr ip4_addr; - if (!ipvlan_dev_slave(dev)) + if (!netif_is_ipvlan(dev)) return NOTIFY_DONE; if (!ipvlan || !ipvlan->port) diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c index 9538674587aa..612e0731142d 100644 --- a/drivers/net/macvlan.c +++ b/drivers/net/macvlan.c @@ -747,7 +747,7 @@ static struct lock_class_key macvlan_netdev_addr_lock_key; #define MACVLAN_FEATURES \ (NETIF_F_SG | NETIF_F_ALL_CSUM | NETIF_F_HIGHDMA | NETIF_F_FRAGLIST | \ - NETIF_F_GSO | NETIF_F_TSO | NETIF_F_UFO | \ + NETIF_F_GSO | NETIF_F_TSO | NETIF_F_UFO | NETIF_F_LRO | \ NETIF_F_TSO_ECN | NETIF_F_TSO6 | NETIF_F_GRO | NETIF_F_RXCSUM | \ NETIF_F_HW_VLAN_CTAG_FILTER | NETIF_F_HW_VLAN_STAG_FILTER) @@ -784,6 +784,7 @@ static int macvlan_init(struct net_device *dev) (lowerdev->state & MACVLAN_STATE_MASK); dev->features = lowerdev->features & MACVLAN_FEATURES; dev->features |= ALWAYS_ON_FEATURES; + dev->hw_features |= NETIF_F_LRO; dev->vlan_features = lowerdev->vlan_features & MACVLAN_FEATURES; dev->gso_max_size = lowerdev->gso_max_size; dev->iflink = lowerdev->ifindex; @@ -936,15 +937,15 @@ static netdev_features_t macvlan_fix_features(struct net_device *dev, netdev_features_t features) { struct macvlan_dev *vlan = netdev_priv(dev); + netdev_features_t lowerdev_features = vlan->lowerdev->features; netdev_features_t mask; features |= NETIF_F_ALL_FOR_ALL; features &= (vlan->set_features | ~MACVLAN_FEATURES); mask = features; - features = netdev_increment_features(vlan->lowerdev->features, - features, - mask); + lowerdev_features &= (features | ~NETIF_F_LRO); + features = netdev_increment_features(lowerdev_features, features, mask); features |= ALWAYS_ON_FEATURES; features &= ~NETIF_F_NETNS_LOCAL; @@ -1056,6 +1057,9 @@ static int macvlan_port_create(struct net_device *dev) if (dev->type != ARPHRD_ETHER || dev->flags & IFF_LOOPBACK) return -EINVAL; + if (netif_is_ipvlan_port(dev)) + return -EBUSY; + port = kzalloc(sizeof(*port), GFP_KERNEL); if (port == NULL) return -ENOMEM; diff --git a/drivers/net/phy/bcm7xxx.c b/drivers/net/phy/bcm7xxx.c index 7a53af4346e4..974ec4515269 100644 --- a/drivers/net/phy/bcm7xxx.c +++ b/drivers/net/phy/bcm7xxx.c @@ -252,6 +252,8 @@ static int bcm7xxx_28nm_config_init(struct phy_device *phydev) break; case 0xe0: case 0xf0: + /* Rev G0 introduces a roll over */ + case 0x10: ret = bcm7xxx_28nm_e0_plus_afe_config_init(phydev); break; default: diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c index 4a9ece01def6..2d1c77e81836 100644 --- a/drivers/net/usb/r8152.c +++ b/drivers/net/usb/r8152.c @@ -24,6 +24,7 @@ #include <net/ip6_checksum.h> #include <uapi/linux/mdio.h> #include <linux/mdio.h> +#include <linux/usb/cdc.h> /* Version Information */ #define DRIVER_VERSION "v1.07.0 (2014/10/09)" @@ -466,9 +467,6 @@ enum rtl8152_flags { #define MCU_TYPE_PLA 0x0100 #define MCU_TYPE_USB 0x0000 -#define REALTEK_USB_DEVICE(vend, prod) \ - USB_DEVICE_INTERFACE_CLASS(vend, prod, USB_CLASS_VENDOR_SPEC) - struct tally_counter { __le64 tx_packets; __le64 rx_packets; @@ -3915,11 +3913,27 @@ static void rtl8152_disconnect(struct usb_interface *intf) } } +#define REALTEK_USB_DEVICE(vend, prod) \ + .match_flags = USB_DEVICE_ID_MATCH_DEVICE | \ + USB_DEVICE_ID_MATCH_INT_CLASS, \ + .idVendor = (vend), \ + .idProduct = (prod), \ + .bInterfaceClass = USB_CLASS_VENDOR_SPEC \ +}, \ +{ \ + .match_flags = USB_DEVICE_ID_MATCH_INT_INFO | \ + USB_DEVICE_ID_MATCH_DEVICE, \ + .idVendor = (vend), \ + .idProduct = (prod), \ + .bInterfaceClass = USB_CLASS_COMM, \ + .bInterfaceSubClass = USB_CDC_SUBCLASS_ETHERNET, \ + .bInterfaceProtocol = USB_CDC_PROTO_NONE + /* table of devices that work with this driver */ static struct usb_device_id rtl8152_table[] = { - {USB_DEVICE(VENDOR_ID_REALTEK, 0x8152)}, - {USB_DEVICE(VENDOR_ID_REALTEK, 0x8153)}, - {USB_DEVICE(VENDOR_ID_SAMSUNG, 0xa101)}, + {REALTEK_USB_DEVICE(VENDOR_ID_REALTEK, 0x8152)}, + {REALTEK_USB_DEVICE(VENDOR_ID_REALTEK, 0x8153)}, + {REALTEK_USB_DEVICE(VENDOR_ID_SAMSUNG, 0xa101)}, {} }; diff --git a/drivers/net/vmxnet3/vmxnet3_ethtool.c b/drivers/net/vmxnet3/vmxnet3_ethtool.c index b725fd9e7803..b7b53329d575 100644 --- a/drivers/net/vmxnet3/vmxnet3_ethtool.c +++ b/drivers/net/vmxnet3/vmxnet3_ethtool.c @@ -583,12 +583,16 @@ vmxnet3_get_rss_indir_size(struct net_device *netdev) } static int -vmxnet3_get_rss(struct net_device *netdev, u32 *p, u8 *key) +vmxnet3_get_rss(struct net_device *netdev, u32 *p, u8 *key, u8 *hfunc) { struct vmxnet3_adapter *adapter = netdev_priv(netdev); struct UPT1_RSSConf *rssConf = adapter->rss_conf; unsigned int n = rssConf->indTableSize; + if (hfunc) + *hfunc = ETH_RSS_HASH_TOP; + if (!p) + return 0; while (n--) p[n] = rssConf->indTable[n]; return 0; @@ -596,13 +600,20 @@ vmxnet3_get_rss(struct net_device *netdev, u32 *p, u8 *key) } static int -vmxnet3_set_rss(struct net_device *netdev, const u32 *p, const u8 *key) +vmxnet3_set_rss(struct net_device *netdev, const u32 *p, const u8 *key, + const u8 hfunc) { unsigned int i; unsigned long flags; struct vmxnet3_adapter *adapter = netdev_priv(netdev); struct UPT1_RSSConf *rssConf = adapter->rss_conf; + /* We do not allow change in unsupported parameters */ + if (key || + (hfunc != ETH_RSS_HASH_NO_CHANGE && hfunc != ETH_RSS_HASH_TOP)) + return -EOPNOTSUPP; + if (!p) + return 0; for (i = 0; i < rssConf->indTableSize; i++) rssConf->indTable[i] = p[i]; diff --git a/include/linux/etherdevice.h b/include/linux/etherdevice.h index 733980fce8e3..41c891d05f04 100644 --- a/include/linux/etherdevice.h +++ b/include/linux/etherdevice.h @@ -392,4 +392,16 @@ static inline unsigned long compare_ether_header(const void *a, const void *b) #endif } +/** + * eth_skb_pad - Pad buffer to mininum number of octets for Ethernet frame + * @skb: Buffer to pad + * + * An Ethernet frame should have a minimum size of 60 bytes. This function + * takes short frames and pads them with zeros up to the 60 byte limit. + */ +static inline int eth_skb_pad(struct sk_buff *skb) +{ + return skb_put_padto(skb, ETH_ZLEN); +} + #endif /* _LINUX_ETHERDEVICE_H */ diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index c1a2d60dfb82..653dc9c4ebac 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -59,6 +59,26 @@ enum ethtool_phys_id_state { ETHTOOL_ID_OFF }; +enum { + ETH_RSS_HASH_TOP_BIT, /* Configurable RSS hash function - Toeplitz */ + ETH_RSS_HASH_XOR_BIT, /* Configurable RSS hash function - Xor */ + + /* + * Add your fresh new hash function bits above and remember to update + * rss_hash_func_strings[] in ethtool.c + */ + ETH_RSS_HASH_FUNCS_COUNT +}; + +#define __ETH_RSS_HASH_BIT(bit) ((u32)1 << (bit)) +#define __ETH_RSS_HASH(name) __ETH_RSS_HASH_BIT(ETH_RSS_HASH_##name##_BIT) + +#define ETH_RSS_HASH_TOP __ETH_RSS_HASH(TOP) +#define ETH_RSS_HASH_XOR __ETH_RSS_HASH(XOR) + +#define ETH_RSS_HASH_UNKNOWN 0 +#define ETH_RSS_HASH_NO_CHANGE 0 + struct net_device; /* Some generic methods drivers may use in their ethtool_ops */ @@ -158,17 +178,14 @@ static inline u32 ethtool_rxfh_indir_default(u32 index, u32 n_rx_rings) * Returns zero if not supported for this specific device. * @get_rxfh_indir_size: Get the size of the RX flow hash indirection table. * Returns zero if not supported for this specific device. - * @get_rxfh: Get the contents of the RX flow hash indirection table and hash - * key. - * Will only be called if one or both of @get_rxfh_indir_size and - * @get_rxfh_key_size are implemented and return non-zero. - * Returns a negative error code or zero. - * @set_rxfh: Set the contents of the RX flow hash indirection table and/or - * hash key. In case only the indirection table or hash key is to be - * changed, the other argument will be %NULL. - * Will only be called if one or both of @get_rxfh_indir_size and - * @get_rxfh_key_size are implemented and return non-zero. + * @get_rxfh: Get the contents of the RX flow hash indirection table, hash key + * and/or hash function. * Returns a negative error code or zero. + * @set_rxfh: Set the contents of the RX flow hash indirection table, hash + * key, and/or hash function. Arguments which are set to %NULL or zero + * will remain unchanged. + * Returns a negative error code or zero. An error code must be returned + * if at least one unsupported change was requested. * @get_channels: Get number of channels. * @set_channels: Set number of channels. Returns a negative error code or * zero. @@ -241,9 +258,10 @@ struct ethtool_ops { int (*reset)(struct net_device *, u32 *); u32 (*get_rxfh_key_size)(struct net_device *); u32 (*get_rxfh_indir_size)(struct net_device *); - int (*get_rxfh)(struct net_device *, u32 *indir, u8 *key); + int (*get_rxfh)(struct net_device *, u32 *indir, u8 *key, + u8 *hfunc); int (*set_rxfh)(struct net_device *, const u32 *indir, - const u8 *key); + const u8 *key, const u8 hfunc); void (*get_channels)(struct net_device *, struct ethtool_channels *); int (*set_channels)(struct net_device *, struct ethtool_channels *); int (*get_dump_flag)(struct net_device *, struct ethtool_dump *); diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h index 08cfaff8a072..476c685ca6f9 100644 --- a/include/linux/hyperv.h +++ b/include/linux/hyperv.h @@ -650,6 +650,8 @@ struct vmbus_channel { u8 monitor_grp; u8 monitor_bit; + bool rescind; /* got rescind msg */ + u32 ringbuffer_gpadlhandle; /* Allocated memory for ring buffer */ diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index 1d67fd32e71c..ea4f1c46f761 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -219,23 +219,15 @@ enum { }; enum { - MLX5_DEV_CAP_FLAG_RC = 1LL << 0, - MLX5_DEV_CAP_FLAG_UC = 1LL << 1, - MLX5_DEV_CAP_FLAG_UD = 1LL << 2, MLX5_DEV_CAP_FLAG_XRC = 1LL << 3, - MLX5_DEV_CAP_FLAG_SRQ = 1LL << 6, MLX5_DEV_CAP_FLAG_BAD_PKEY_CNTR = 1LL << 8, MLX5_DEV_CAP_FLAG_BAD_QKEY_CNTR = 1LL << 9, MLX5_DEV_CAP_FLAG_APM = 1LL << 17, MLX5_DEV_CAP_FLAG_ATOMIC = 1LL << 18, MLX5_DEV_CAP_FLAG_BLOCK_MCAST = 1LL << 23, - MLX5_DEV_CAP_FLAG_ON_DMND_PG = 1LL << 24, MLX5_DEV_CAP_FLAG_CQ_MODER = 1LL << 29, MLX5_DEV_CAP_FLAG_RESIZE_CQ = 1LL << 30, - MLX5_DEV_CAP_FLAG_RESIZE_SRQ = 1LL << 32, MLX5_DEV_CAP_FLAG_DCT = 1LL << 37, - MLX5_DEV_CAP_FLAG_REMOTE_FENCE = 1LL << 38, - MLX5_DEV_CAP_FLAG_TLP_HINTS = 1LL << 39, MLX5_DEV_CAP_FLAG_SIG_HAND_OVER = 1LL << 40, MLX5_DEV_CAP_FLAG_CMDIF_CSUM = 3LL << 46, }; diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 29c92ee9ed56..c31f74d76ebd 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -3641,6 +3641,21 @@ static inline bool netif_is_macvlan(struct net_device *dev) return dev->priv_flags & IFF_MACVLAN; } +static inline bool netif_is_macvlan_port(struct net_device *dev) +{ + return dev->priv_flags & IFF_MACVLAN_PORT; +} + +static inline bool netif_is_ipvlan(struct net_device *dev) +{ + return dev->priv_flags & IFF_IPVLAN_SLAVE; +} + +static inline bool netif_is_ipvlan_port(struct net_device *dev) +{ + return dev->priv_flags & IFF_IPVLAN_MASTER; +} + static inline bool netif_is_bond_master(struct net_device *dev) { return dev->flags & IFF_MASTER && dev->priv_flags & IFF_BONDING; diff --git a/include/linux/platform_data/bcmgenet.h b/include/linux/platform_data/bcmgenet.h new file mode 100644 index 000000000000..26af54321958 --- /dev/null +++ b/include/linux/platform_data/bcmgenet.h @@ -0,0 +1,18 @@ +#ifndef __LINUX_PLATFORM_DATA_BCMGENET_H__ +#define __LINUX_PLATFORM_DATA_BCMGENET_H__ + +#include <linux/types.h> +#include <linux/if_ether.h> +#include <linux/phy.h> + +struct bcmgenet_platform_data { + bool mdio_enabled; + phy_interface_t phy_interface; + int phy_address; + int phy_speed; + int phy_duplex; + u8 mac_address[ETH_ALEN]; + int genet_version; +}; + +#endif diff --git a/include/linux/rculist.h b/include/linux/rculist.h index 372ad5e0dcb8..aa79b3c24f66 100644 --- a/include/linux/rculist.h +++ b/include/linux/rculist.h @@ -542,6 +542,15 @@ static inline void hlist_add_behind_rcu(struct hlist_node *n, pos = hlist_entry_safe(rcu_dereference_bh((pos)->member.next),\ typeof(*(pos)), member)) +/** + * hlist_for_each_entry_from_rcu - iterate over a hlist continuing from current point + * @pos: the type * to use as a loop cursor. + * @member: the name of the hlist_node within the struct. + */ +#define hlist_for_each_entry_from_rcu(pos, member) \ + for (; pos; \ + pos = hlist_entry_safe(rcu_dereference((pos)->member.next),\ + typeof(*(pos)), member)) #endif /* __KERNEL__ */ #endif diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h index 3b0419072f88..5db76a32fcab 100644 --- a/include/linux/rtnetlink.h +++ b/include/linux/rtnetlink.h @@ -17,6 +17,11 @@ extern int rtnl_put_cacheinfo(struct sk_buff *skb, struct dst_entry *dst, u32 id, long expires, u32 error); void rtmsg_ifinfo(int type, struct net_device *dev, unsigned change, gfp_t flags); +struct sk_buff *rtmsg_ifinfo_build_skb(int type, struct net_device *dev, + unsigned change, gfp_t flags); +void rtmsg_ifinfo_send(struct sk_buff *skb, struct net_device *dev, + gfp_t flags); + /* RTNL is used as a global lock for all changes to network configuration */ extern void rtnl_lock(void); diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 7691ad5b4771..ef64cec42804 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -344,7 +344,6 @@ enum { SKB_FCLONE_UNAVAILABLE, /* skb has no fclone (from head_cache) */ SKB_FCLONE_ORIG, /* orig skb (from fclone_cache) */ SKB_FCLONE_CLONE, /* companion fclone skb (from fclone_cache) */ - SKB_FCLONE_FREE, /* this companion fclone skb is available */ }; enum { @@ -718,9 +717,6 @@ static inline void skb_dst_set(struct sk_buff *skb, struct dst_entry *dst) skb->_skb_refdst = (unsigned long)dst; } -void __skb_dst_set_noref(struct sk_buff *skb, struct dst_entry *dst, - bool force); - /** * skb_dst_set_noref - sets skb dst, hopefully, without taking reference * @skb: buffer @@ -733,24 +729,8 @@ void __skb_dst_set_noref(struct sk_buff *skb, struct dst_entry *dst, */ static inline void skb_dst_set_noref(struct sk_buff *skb, struct dst_entry *dst) { - __skb_dst_set_noref(skb, dst, false); -} - -/** - * skb_dst_set_noref_force - sets skb dst, without taking reference - * @skb: buffer - * @dst: dst entry - * - * Sets skb dst, assuming a reference was not taken on dst. - * No reference is taken and no dst_release will be called. While for - * cached dsts deferred reclaim is a basic feature, for entries that are - * not cached it is caller's job to guarantee that last dst_release for - * provided dst happens when nobody uses it, eg. after a RCU grace period. - */ -static inline void skb_dst_set_noref_force(struct sk_buff *skb, - struct dst_entry *dst) -{ - __skb_dst_set_noref(skb, dst, true); + WARN_ON(!rcu_read_lock_held() && !rcu_read_lock_bh_held()); + skb->_skb_refdst = (unsigned long)dst | SKB_DST_NOREF; } /** @@ -818,7 +798,7 @@ static inline bool skb_fclone_busy(const struct sock *sk, fclones = container_of(skb, struct sk_buff_fclones, skb1); return skb->fclone == SKB_FCLONE_ORIG && - fclones->skb2.fclone == SKB_FCLONE_CLONE && + atomic_read(&fclones->fclone_ref) > 1 && fclones->skb2.sk == sk; } @@ -2461,7 +2441,6 @@ static inline int skb_cow_head(struct sk_buff *skb, unsigned int headroom) * is untouched. Otherwise it is extended. Returns zero on * success. The skb is freed on error. */ - static inline int skb_padto(struct sk_buff *skb, unsigned int len) { unsigned int size = skb->len; @@ -2470,6 +2449,29 @@ static inline int skb_padto(struct sk_buff *skb, unsigned int len) return skb_pad(skb, len - size); } +/** + * skb_put_padto - increase size and pad an skbuff up to a minimal size + * @skb: buffer to pad + * @len: minimal length + * + * Pads up a buffer to ensure the trailing bytes exist and are + * blanked. If the buffer already contains sufficient data it + * is untouched. Otherwise it is extended. Returns zero on + * success. The skb is freed on error. + */ +static inline int skb_put_padto(struct sk_buff *skb, unsigned int len) +{ + unsigned int size = skb->len; + + if (unlikely(size < len)) { + len -= size; + if (skb_pad(skb, len)) + return -ENOMEM; + __skb_put(skb, len); + } + return 0; +} + static inline int skb_add_data(struct sk_buff *skb, char __user *from, int copy) { diff --git a/include/linux/tcp.h b/include/linux/tcp.h index f566b8567892..3fa0a9669a3a 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -130,7 +130,7 @@ struct tcp_sock { /* inet_connection_sock has to be the first member of tcp_sock */ struct inet_connection_sock inet_conn; u16 tcp_header_len; /* Bytes of tcp header to send */ - u16 xmit_size_goal_segs; /* Goal for segmenting output packets */ + u16 gso_segs; /* Max number of segs per GSO packet */ /* * Header prediction flags diff --git a/include/net/netns/xfrm.h b/include/net/netns/xfrm.h index 9da798256f0e..730d82ad6ee5 100644 --- a/include/net/netns/xfrm.h +++ b/include/net/netns/xfrm.h @@ -50,8 +50,8 @@ struct netns_xfrm { struct list_head policy_all; struct hlist_head *policy_byidx; unsigned int policy_idx_hmask; - struct hlist_head policy_inexact[XFRM_POLICY_MAX * 2]; - struct xfrm_policy_hash policy_bydst[XFRM_POLICY_MAX * 2]; + struct hlist_head policy_inexact[XFRM_POLICY_MAX]; + struct xfrm_policy_hash policy_bydst[XFRM_POLICY_MAX]; unsigned int policy_count[XFRM_POLICY_MAX * 2]; struct work_struct policy_hash_work; struct xfrm_policy_hthresh policy_hthresh; diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index d17ed6fb2f70..3d282cbb66bf 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -219,7 +219,6 @@ struct tcf_proto_ops { void (*destroy)(struct tcf_proto*); unsigned long (*get)(struct tcf_proto*, u32 handle); - void (*put)(struct tcf_proto*, unsigned long); int (*change)(struct net *net, struct sk_buff *, struct tcf_proto*, unsigned long, u32 handle, struct nlattr **, diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h index eb2095b42fbb..5f66d9c2889d 100644 --- a/include/uapi/linux/ethtool.h +++ b/include/uapi/linux/ethtool.h @@ -534,6 +534,7 @@ struct ethtool_pauseparam { * @ETH_SS_NTUPLE_FILTERS: Previously used with %ETHTOOL_GRXNTUPLE; * now deprecated * @ETH_SS_FEATURES: Device feature names + * @ETH_SS_RSS_HASH_FUNCS: RSS hush function names */ enum ethtool_stringset { ETH_SS_TEST = 0, @@ -541,6 +542,7 @@ enum ethtool_stringset { ETH_SS_PRIV_FLAGS, ETH_SS_NTUPLE_FILTERS, ETH_SS_FEATURES, + ETH_SS_RSS_HASH_FUNCS, }; /** @@ -884,6 +886,8 @@ struct ethtool_rxfh_indir { * @key_size: On entry, the array size of the user buffer for the hash key, * which may be zero. On return from %ETHTOOL_GRSSH, the size of the * hardware hash key. + * @hfunc: Defines the current RSS hash function used by HW (or to be set to). + * Valid values are one of the %ETH_RSS_HASH_*. * @rsvd: Reserved for future extensions. * @rss_config: RX ring/queue index for each hash value i.e., indirection table * of @indir_size __u32 elements, followed by hash key of @key_size @@ -893,14 +897,16 @@ struct ethtool_rxfh_indir { * size should be returned. For %ETHTOOL_SRSSH, an @indir_size of * %ETH_RXFH_INDIR_NO_CHANGE means that indir table setting is not requested * and a @indir_size of zero means the indir table should be reset to default - * values. + * values. An hfunc of zero means that hash function setting is not requested. */ struct ethtool_rxfh { __u32 cmd; __u32 rss_context; __u32 indir_size; __u32 key_size; - __u32 rsvd[2]; + __u8 hfunc; + __u8 rsvd8[3]; + __u32 rsvd32; __u32 rss_config[0]; }; #define ETH_RXFH_INDIR_NO_CHANGE 0xffffffff diff --git a/include/uapi/linux/net_tstamp.h b/include/uapi/linux/net_tstamp.h index ff354021bb69..edbc888ceb51 100644 --- a/include/uapi/linux/net_tstamp.h +++ b/include/uapi/linux/net_tstamp.h @@ -23,8 +23,9 @@ enum { SOF_TIMESTAMPING_OPT_ID = (1<<7), SOF_TIMESTAMPING_TX_SCHED = (1<<8), SOF_TIMESTAMPING_TX_ACK = (1<<9), + SOF_TIMESTAMPING_OPT_CMSG = (1<<10), - SOF_TIMESTAMPING_LAST = SOF_TIMESTAMPING_TX_ACK, + SOF_TIMESTAMPING_LAST = SOF_TIMESTAMPING_OPT_CMSG, SOF_TIMESTAMPING_MASK = (SOF_TIMESTAMPING_LAST - 1) | SOF_TIMESTAMPING_LAST }; diff --git a/include/uapi/linux/snmp.h b/include/uapi/linux/snmp.h index 30f541b32895..b22224100011 100644 --- a/include/uapi/linux/snmp.h +++ b/include/uapi/linux/snmp.h @@ -266,6 +266,10 @@ enum LINUX_MIB_TCPWANTZEROWINDOWADV, /* TCPWantZeroWindowAdv */ LINUX_MIB_TCPSYNRETRANS, /* TCPSynRetrans */ LINUX_MIB_TCPORIGDATASENT, /* TCPOrigDataSent */ + LINUX_MIB_TCPHYSTARTTRAINDETECT, /* TCPHystartTrainDetect */ + LINUX_MIB_TCPHYSTARTTRAINCWND, /* TCPHystartTrainCwnd */ + LINUX_MIB_TCPHYSTARTDELAYDETECT, /* TCPHystartDelayDetect */ + LINUX_MIB_TCPHYSTARTDELAYCWND, /* TCPHystartDelayCwnd */ __LINUX_MIB_MAX }; diff --git a/include/uapi/linux/tc_act/Kbuild b/include/uapi/linux/tc_act/Kbuild index 56f121605c99..b057da2b87a4 100644 --- a/include/uapi/linux/tc_act/Kbuild +++ b/include/uapi/linux/tc_act/Kbuild @@ -7,3 +7,4 @@ header-y += tc_mirred.h header-y += tc_nat.h header-y += tc_pedit.h header-y += tc_skbedit.h +header-y += tc_vlan.h diff --git a/lib/test_bpf.c b/lib/test_bpf.c index 3f167d2eeb94..80d78c51f65f 100644 --- a/lib/test_bpf.c +++ b/lib/test_bpf.c @@ -124,7 +124,7 @@ static struct bpf_test tests[] = { { { 0, 0xfffffffd } } }, { - "DIV_KX", + "DIV_MOD_KX", .u.insns = { BPF_STMT(BPF_LD | BPF_IMM, 8), BPF_STMT(BPF_ALU | BPF_DIV | BPF_K, 2), @@ -134,12 +134,18 @@ static struct bpf_test tests[] = { BPF_STMT(BPF_MISC | BPF_TAX, 0), BPF_STMT(BPF_LD | BPF_IMM, 0xffffffff), BPF_STMT(BPF_ALU | BPF_DIV | BPF_K, 0x70000000), + BPF_STMT(BPF_MISC | BPF_TAX, 0), + BPF_STMT(BPF_LD | BPF_IMM, 0xffffffff), + BPF_STMT(BPF_ALU | BPF_MOD | BPF_X, 0), + BPF_STMT(BPF_MISC | BPF_TAX, 0), + BPF_STMT(BPF_LD | BPF_IMM, 0xffffffff), + BPF_STMT(BPF_ALU | BPF_MOD | BPF_K, 0x70000000), BPF_STMT(BPF_ALU | BPF_ADD | BPF_X, 0), BPF_STMT(BPF_RET | BPF_A, 0) }, CLASSIC | FLAG_NO_DATA, { }, - { { 0, 0x40000001 } } + { { 0, 0x20000000 } } }, { "AND_OR_LSH_K", diff --git a/net/core/dev.c b/net/core/dev.c index 0814a560e5f3..dd3bf582e6f0 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -5925,6 +5925,8 @@ static void rollback_registered_many(struct list_head *head) synchronize_net(); list_for_each_entry(dev, head, unreg_list) { + struct sk_buff *skb = NULL; + /* Shutdown queueing discipline. */ dev_shutdown(dev); @@ -5934,6 +5936,11 @@ static void rollback_registered_many(struct list_head *head) */ call_netdevice_notifiers(NETDEV_UNREGISTER, dev); + if (!dev->rtnl_link_ops || + dev->rtnl_link_state == RTNL_LINK_INITIALIZED) + skb = rtmsg_ifinfo_build_skb(RTM_DELLINK, dev, ~0U, + GFP_KERNEL); + /* * Flush the unicast and multicast chains */ @@ -5943,9 +5950,8 @@ static void rollback_registered_many(struct list_head *head) if (dev->netdev_ops->ndo_uninit) dev->netdev_ops->ndo_uninit(dev); - if (!dev->rtnl_link_ops || - dev->rtnl_link_state == RTNL_LINK_INITIALIZED) - rtmsg_ifinfo(RTM_DELLINK, dev, ~0U, GFP_KERNEL); + if (skb) + rtmsg_ifinfo_send(skb, dev, GFP_KERNEL); /* Notifier chain MUST detach us all upper devices. */ WARN_ON(netdev_has_any_upper_dev(dev)); diff --git a/net/core/dst.c b/net/core/dst.c index a028409ee438..e956ce6d1378 100644 --- a/net/core/dst.c +++ b/net/core/dst.c @@ -327,30 +327,6 @@ void __dst_destroy_metrics_generic(struct dst_entry *dst, unsigned long old) } EXPORT_SYMBOL(__dst_destroy_metrics_generic); -/** - * __skb_dst_set_noref - sets skb dst, without a reference - * @skb: buffer - * @dst: dst entry - * @force: if force is set, use noref version even for DST_NOCACHE entries - * - * Sets skb dst, assuming a reference was not taken on dst - * skb_dst_drop() should not dst_release() this dst - */ -void __skb_dst_set_noref(struct sk_buff *skb, struct dst_entry *dst, bool force) -{ - WARN_ON(!rcu_read_lock_held() && !rcu_read_lock_bh_held()); - /* If dst not in cache, we must take a reference, because - * dst_release() will destroy dst as soon as its refcount becomes zero - */ - if (unlikely((dst->flags & DST_NOCACHE) && !force)) { - dst_hold(dst); - skb_dst_set(skb, dst); - } else { - skb->_skb_refdst = (unsigned long)dst | SKB_DST_NOREF; - } -} -EXPORT_SYMBOL(__skb_dst_set_noref); - /* Dirty hack. We did it in 2.2 (in __dst_free), * we have _very_ good reasons not to repeat * this mistake in 2.3, but we have no choice diff --git a/net/core/ethtool.c b/net/core/ethtool.c index 715f51f321e9..550892cd6b3f 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -100,6 +100,12 @@ static const char netdev_features_strings[NETDEV_FEATURE_COUNT][ETH_GSTRING_LEN] [NETIF_F_BUSY_POLL_BIT] = "busy-poll", }; +static const char +rss_hash_func_strings[ETH_RSS_HASH_FUNCS_COUNT][ETH_GSTRING_LEN] = { + [ETH_RSS_HASH_TOP_BIT] = "toeplitz", + [ETH_RSS_HASH_XOR_BIT] = "xor", +}; + static int ethtool_get_features(struct net_device *dev, void __user *useraddr) { struct ethtool_gfeatures cmd = { @@ -185,6 +191,9 @@ static int __ethtool_get_sset_count(struct net_device *dev, int sset) if (sset == ETH_SS_FEATURES) return ARRAY_SIZE(netdev_features_strings); + if (sset == ETH_SS_RSS_HASH_FUNCS) + return ARRAY_SIZE(rss_hash_func_strings); + if (ops->get_sset_count && ops->get_strings) return ops->get_sset_count(dev, sset); else @@ -199,6 +208,9 @@ static void __ethtool_get_strings(struct net_device *dev, if (stringset == ETH_SS_FEATURES) memcpy(data, netdev_features_strings, sizeof(netdev_features_strings)); + else if (stringset == ETH_SS_RSS_HASH_FUNCS) + memcpy(data, rss_hash_func_strings, + sizeof(rss_hash_func_strings)); else /* ops->get_strings is valid because checked earlier */ ops->get_strings(dev, stringset, data); @@ -618,7 +630,7 @@ static noinline_for_stack int ethtool_get_rxfh_indir(struct net_device *dev, if (!indir) return -ENOMEM; - ret = dev->ethtool_ops->get_rxfh(dev, indir, NULL); + ret = dev->ethtool_ops->get_rxfh(dev, indir, NULL, NULL); if (ret) goto out; @@ -679,7 +691,7 @@ static noinline_for_stack int ethtool_set_rxfh_indir(struct net_device *dev, goto out; } - ret = ops->set_rxfh(dev, indir, NULL); + ret = ops->set_rxfh(dev, indir, NULL, ETH_RSS_HASH_NO_CHANGE); out: kfree(indir); @@ -697,12 +709,11 @@ static noinline_for_stack int ethtool_get_rxfh(struct net_device *dev, u32 total_size; u32 indir_bytes; u32 *indir = NULL; + u8 dev_hfunc = 0; u8 *hkey = NULL; u8 *rss_config; - if (!(dev->ethtool_ops->get_rxfh_indir_size || - dev->ethtool_ops->get_rxfh_key_size) || - !dev->ethtool_ops->get_rxfh) + if (!ops->get_rxfh) return -EOPNOTSUPP; if (ops->get_rxfh_indir_size) @@ -710,16 +721,14 @@ static noinline_for_stack int ethtool_get_rxfh(struct net_device *dev, if (ops->get_rxfh_key_size) dev_key_size = ops->get_rxfh_key_size(dev); - if ((dev_key_size + dev_indir_size) == 0) - return -EOPNOTSUPP; - if (copy_from_user(&rxfh, useraddr, sizeof(rxfh))) return -EFAULT; user_indir_size = rxfh.indir_size; user_key_size = rxfh.key_size; /* Check that reserved fields are 0 for now */ - if (rxfh.rss_context || rxfh.rsvd[0] || rxfh.rsvd[1]) + if (rxfh.rss_context || rxfh.rsvd8[0] || rxfh.rsvd8[1] || + rxfh.rsvd8[2] || rxfh.rsvd32) return -EINVAL; rxfh.indir_size = dev_indir_size; @@ -727,13 +736,6 @@ static noinline_for_stack int ethtool_get_rxfh(struct net_device *dev, if (copy_to_user(useraddr, &rxfh, sizeof(rxfh))) return -EFAULT; - /* If the user buffer size is 0, this is just a query for the - * device table size and key size. Otherwise, if the User size is - * not equal to device table size or key size it's an error. - */ - if (!user_indir_size && !user_key_size) - return 0; - if ((user_indir_size && (user_indir_size != dev_indir_size)) || (user_key_size && (user_key_size != dev_key_size))) return -EINVAL; @@ -750,14 +752,19 @@ static noinline_for_stack int ethtool_get_rxfh(struct net_device *dev, if (user_key_size) hkey = rss_config + indir_bytes; - ret = dev->ethtool_ops->get_rxfh(dev, indir, hkey); - if (!ret) { - if (copy_to_user(useraddr + - offsetof(struct ethtool_rxfh, rss_config[0]), - rss_config, total_size)) - ret = -EFAULT; - } + ret = dev->ethtool_ops->get_rxfh(dev, indir, hkey, &dev_hfunc); + if (ret) + goto out; + if (copy_to_user(useraddr + offsetof(struct ethtool_rxfh, hfunc), + &dev_hfunc, sizeof(rxfh.hfunc))) { + ret = -EFAULT; + } else if (copy_to_user(useraddr + + offsetof(struct ethtool_rxfh, rss_config[0]), + rss_config, total_size)) { + ret = -EFAULT; + } +out: kfree(rss_config); return ret; @@ -776,33 +783,31 @@ static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev, u8 *rss_config; u32 rss_cfg_offset = offsetof(struct ethtool_rxfh, rss_config[0]); - if (!(ops->get_rxfh_indir_size || ops->get_rxfh_key_size) || - !ops->get_rxnfc || !ops->set_rxfh) + if (!ops->get_rxnfc || !ops->set_rxfh) return -EOPNOTSUPP; if (ops->get_rxfh_indir_size) dev_indir_size = ops->get_rxfh_indir_size(dev); if (ops->get_rxfh_key_size) dev_key_size = dev->ethtool_ops->get_rxfh_key_size(dev); - if ((dev_key_size + dev_indir_size) == 0) - return -EOPNOTSUPP; if (copy_from_user(&rxfh, useraddr, sizeof(rxfh))) return -EFAULT; /* Check that reserved fields are 0 for now */ - if (rxfh.rss_context || rxfh.rsvd[0] || rxfh.rsvd[1]) + if (rxfh.rss_context || rxfh.rsvd8[0] || rxfh.rsvd8[1] || + rxfh.rsvd8[2] || rxfh.rsvd32) return -EINVAL; - /* If either indir or hash key is valid, proceed further. - * It is not valid to request that both be unchanged. + /* If either indir, hash key or function is valid, proceed further. + * Must request at least one change: indir size, hash key or function. */ if ((rxfh.indir_size && rxfh.indir_size != ETH_RXFH_INDIR_NO_CHANGE && rxfh.indir_size != dev_indir_size) || (rxfh.key_size && (rxfh.key_size != dev_key_size)) || (rxfh.indir_size == ETH_RXFH_INDIR_NO_CHANGE && - rxfh.key_size == 0)) + rxfh.key_size == 0 && rxfh.hfunc == ETH_RSS_HASH_NO_CHANGE)) return -EINVAL; if (rxfh.indir_size != ETH_RXFH_INDIR_NO_CHANGE) @@ -845,7 +850,7 @@ static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev, } } - ret = ops->set_rxfh(dev, indir, hkey); + ret = ops->set_rxfh(dev, indir, hkey, rxfh.hfunc); out: kfree(rss_config); diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 61cb7e7cc3c7..a9be2c161702 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -2245,8 +2245,8 @@ static int rtnl_dump_all(struct sk_buff *skb, struct netlink_callback *cb) return skb->len; } -void rtmsg_ifinfo(int type, struct net_device *dev, unsigned int change, - gfp_t flags) +struct sk_buff *rtmsg_ifinfo_build_skb(int type, struct net_device *dev, + unsigned int change, gfp_t flags) { struct net *net = dev_net(dev); struct sk_buff *skb; @@ -2264,11 +2264,28 @@ void rtmsg_ifinfo(int type, struct net_device *dev, unsigned int change, kfree_skb(skb); goto errout; } - rtnl_notify(skb, net, 0, RTNLGRP_LINK, NULL, flags); - return; + return skb; errout: if (err < 0) rtnl_set_sk_err(net, RTNLGRP_LINK, err); + return NULL; +} + +void rtmsg_ifinfo_send(struct sk_buff *skb, struct net_device *dev, gfp_t flags) +{ + struct net *net = dev_net(dev); + + rtnl_notify(skb, net, 0, RTNLGRP_LINK, NULL, flags); +} + +void rtmsg_ifinfo(int type, struct net_device *dev, unsigned int change, + gfp_t flags) +{ + struct sk_buff *skb; + + skb = rtmsg_ifinfo_build_skb(type, dev, change, flags); + if (skb) + rtmsg_ifinfo_send(skb, dev, flags); } EXPORT_SYMBOL(rtmsg_ifinfo); diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 92116dfe827c..7a338fb55cc4 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -265,7 +265,7 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask, skb->fclone = SKB_FCLONE_ORIG; atomic_set(&fclones->fclone_ref, 1); - fclones->skb2.fclone = SKB_FCLONE_FREE; + fclones->skb2.fclone = SKB_FCLONE_CLONE; fclones->skb2.pfmemalloc = pfmemalloc; } out: @@ -541,26 +541,27 @@ static void kfree_skbmem(struct sk_buff *skb) switch (skb->fclone) { case SKB_FCLONE_UNAVAILABLE: kmem_cache_free(skbuff_head_cache, skb); - break; + return; case SKB_FCLONE_ORIG: fclones = container_of(skb, struct sk_buff_fclones, skb1); - if (atomic_dec_and_test(&fclones->fclone_ref)) - kmem_cache_free(skbuff_fclone_cache, fclones); - break; - - case SKB_FCLONE_CLONE: - fclones = container_of(skb, struct sk_buff_fclones, skb2); - /* The clone portion is available for - * fast-cloning again. + /* We usually free the clone (TX completion) before original skb + * This test would have no chance to be true for the clone, + * while here, branch prediction will be good. */ - skb->fclone = SKB_FCLONE_FREE; + if (atomic_read(&fclones->fclone_ref) == 1) + goto fastpath; + break; - if (atomic_dec_and_test(&fclones->fclone_ref)) - kmem_cache_free(skbuff_fclone_cache, fclones); + default: /* SKB_FCLONE_CLONE */ + fclones = container_of(skb, struct sk_buff_fclones, skb2); break; } + if (!atomic_dec_and_test(&fclones->fclone_ref)) + return; +fastpath: + kmem_cache_free(skbuff_fclone_cache, fclones); } static void skb_release_head_state(struct sk_buff *skb) @@ -872,15 +873,15 @@ struct sk_buff *skb_clone(struct sk_buff *skb, gfp_t gfp_mask) struct sk_buff_fclones *fclones = container_of(skb, struct sk_buff_fclones, skb1); - struct sk_buff *n = &fclones->skb2; + struct sk_buff *n; if (skb_orphan_frags(skb, gfp_mask)) return NULL; if (skb->fclone == SKB_FCLONE_ORIG && - n->fclone == SKB_FCLONE_FREE) { - n->fclone = SKB_FCLONE_CLONE; - atomic_inc(&fclones->fclone_ref); + atomic_read(&fclones->fclone_ref) == 1) { + n = &fclones->skb2; + atomic_set(&fclones->fclone_ref, 2); } else { if (skb_pfmemalloc(skb)) gfp_mask |= __GFP_MEMALLOC; diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index b7826575d215..640f26c6a9fe 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -399,6 +399,22 @@ void ip_local_error(struct sock *sk, int err, __be32 daddr, __be16 port, u32 inf kfree_skb(skb); } +static bool ipv4_pktinfo_prepare_errqueue(const struct sock *sk, + const struct sk_buff *skb, + int ee_origin) +{ + struct in_pktinfo *info = PKTINFO_SKB_CB(skb); + + if ((ee_origin != SO_EE_ORIGIN_TIMESTAMPING) || + (!(sk->sk_tsflags & SOF_TIMESTAMPING_OPT_CMSG)) || + (!skb->dev)) + return false; + + info->ipi_spec_dst.s_addr = ip_hdr(skb)->saddr; + info->ipi_ifindex = skb->dev->ifindex; + return true; +} + /* * Handle MSG_ERRQUEUE */ @@ -414,6 +430,8 @@ int ip_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len) int err; int copied; + WARN_ON_ONCE(sk->sk_family == AF_INET6); + err = -EAGAIN; skb = sock_dequeue_err_skb(sk); if (skb == NULL) @@ -444,7 +462,9 @@ int ip_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len) memcpy(&errhdr.ee, &serr->ee, sizeof(struct sock_extended_err)); sin = &errhdr.offender; sin->sin_family = AF_UNSPEC; - if (serr->ee.ee_origin == SO_EE_ORIGIN_ICMP) { + + if (serr->ee.ee_origin == SO_EE_ORIGIN_ICMP || + ipv4_pktinfo_prepare_errqueue(sk, skb, serr->ee.ee_origin)) { struct inet_sock *inet = inet_sk(sk); sin->sin_family = AF_INET; @@ -1049,7 +1069,7 @@ e_inval: } /** - * ipv4_pktinfo_prepare - transfert some info from rtable to skb + * ipv4_pktinfo_prepare - transfer some info from rtable to skb * @sk: socket * @skb: buffer * diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c index 6513ade8d6dc..8f9cd200ce20 100644 --- a/net/ipv4/proc.c +++ b/net/ipv4/proc.c @@ -288,6 +288,10 @@ static const struct snmp_mib snmp4_net_list[] = { SNMP_MIB_ITEM("TCPWantZeroWindowAdv", LINUX_MIB_TCPWANTZEROWINDOWADV), SNMP_MIB_ITEM("TCPSynRetrans", LINUX_MIB_TCPSYNRETRANS), SNMP_MIB_ITEM("TCPOrigDataSent", LINUX_MIB_TCPORIGDATASENT), + SNMP_MIB_ITEM("TCPHystartTrainDetect", LINUX_MIB_TCPHYSTARTTRAINDETECT), + SNMP_MIB_ITEM("TCPHystartTrainCwnd", LINUX_MIB_TCPHYSTARTTRAINCWND), + SNMP_MIB_ITEM("TCPHystartDelayDetect", LINUX_MIB_TCPHYSTARTDELAYDETECT), + SNMP_MIB_ITEM("TCPHystartDelayCwnd", LINUX_MIB_TCPHYSTARTDELAYCWND), SNMP_MIB_SENTINEL }; diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index dc13a3657e8e..427aee33ffc0 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -835,47 +835,29 @@ static unsigned int tcp_xmit_size_goal(struct sock *sk, u32 mss_now, int large_allowed) { struct tcp_sock *tp = tcp_sk(sk); - u32 xmit_size_goal, old_size_goal; - - xmit_size_goal = mss_now; - - if (large_allowed && sk_can_gso(sk)) { - u32 gso_size, hlen; - - /* Maybe we should/could use sk->sk_prot->max_header here ? */ - hlen = inet_csk(sk)->icsk_af_ops->net_header_len + - inet_csk(sk)->icsk_ext_hdr_len + - tp->tcp_header_len; - - /* Goal is to send at least one packet per ms, - * not one big TSO packet every 100 ms. - * This preserves ACK clocking and is consistent - * with tcp_tso_should_defer() heuristic. - */ - gso_size = sk->sk_pacing_rate / (2 * MSEC_PER_SEC); - gso_size = max_t(u32, gso_size, - sysctl_tcp_min_tso_segs * mss_now); - - xmit_size_goal = min_t(u32, gso_size, - sk->sk_gso_max_size - 1 - hlen); - - xmit_size_goal = tcp_bound_to_half_wnd(tp, xmit_size_goal); - - /* We try hard to avoid divides here */ - old_size_goal = tp->xmit_size_goal_segs * mss_now; - - if (likely(old_size_goal <= xmit_size_goal && - old_size_goal + mss_now > xmit_size_goal)) { - xmit_size_goal = old_size_goal; - } else { - tp->xmit_size_goal_segs = - min_t(u16, xmit_size_goal / mss_now, - sk->sk_gso_max_segs); - xmit_size_goal = tp->xmit_size_goal_segs * mss_now; - } + u32 new_size_goal, size_goal, hlen; + + if (!large_allowed || !sk_can_gso(sk)) + return mss_now; + + /* Maybe we should/could use sk->sk_prot->max_header here ? */ + hlen = inet_csk(sk)->icsk_af_ops->net_header_len + + inet_csk(sk)->icsk_ext_hdr_len + + tp->tcp_header_len; + + new_size_goal = sk->sk_gso_max_size - 1 - hlen; + new_size_goal = tcp_bound_to_half_wnd(tp, new_size_goal); + + /* We try hard to avoid divides here */ + size_goal = tp->gso_segs * mss_now; + if (unlikely(new_size_goal < size_goal || + new_size_goal >= size_goal + mss_now)) { + tp->gso_segs = min_t(u16, new_size_goal / mss_now, + sk->sk_gso_max_segs); + size_goal = tp->gso_segs * mss_now; } - return max(xmit_size_goal, mss_now); + return max(size_goal, mss_now); } static int tcp_send_mss(struct sock *sk, int *size_goal, int flags) diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c index 20de0118c98e..6b6002416a73 100644 --- a/net/ipv4/tcp_cubic.c +++ b/net/ipv4/tcp_cubic.c @@ -363,16 +363,28 @@ static void hystart_update(struct sock *sk, u32 delay) struct tcp_sock *tp = tcp_sk(sk); struct bictcp *ca = inet_csk_ca(sk); - if (!(ca->found & hystart_detect)) { + if (ca->found & hystart_detect) + return; + + if (hystart_detect & HYSTART_ACK_TRAIN) { u32 now = bictcp_clock(); /* first detection parameter - ack-train detection */ if ((s32)(now - ca->last_ack) <= hystart_ack_delta) { ca->last_ack = now; - if ((s32)(now - ca->round_start) > ca->delay_min >> 4) + if ((s32)(now - ca->round_start) > ca->delay_min >> 4) { ca->found |= HYSTART_ACK_TRAIN; + NET_INC_STATS_BH(sock_net(sk), + LINUX_MIB_TCPHYSTARTTRAINDETECT); + NET_ADD_STATS_BH(sock_net(sk), + LINUX_MIB_TCPHYSTARTTRAINCWND, + tp->snd_cwnd); + tp->snd_ssthresh = tp->snd_cwnd; + } } + } + if (hystart_detect & HYSTART_DELAY) { /* obtain the minimum delay of more than sampling packets */ if (ca->sample_cnt < HYSTART_MIN_SAMPLES) { if (ca->curr_rtt == 0 || ca->curr_rtt > delay) @@ -381,15 +393,16 @@ static void hystart_update(struct sock *sk, u32 delay) ca->sample_cnt++; } else { if (ca->curr_rtt > ca->delay_min + - HYSTART_DELAY_THRESH(ca->delay_min>>4)) + HYSTART_DELAY_THRESH(ca->delay_min >> 3)) { ca->found |= HYSTART_DELAY; + NET_INC_STATS_BH(sock_net(sk), + LINUX_MIB_TCPHYSTARTDELAYDETECT); + NET_ADD_STATS_BH(sock_net(sk), + LINUX_MIB_TCPHYSTARTDELAYCWND, + tp->snd_cwnd); + tp->snd_ssthresh = tp->snd_cwnd; + } } - /* - * Either one of two conditions are met, - * we exit from slow start immediately. - */ - if (ca->found & hystart_detect) - tp->snd_ssthresh = tp->snd_cwnd; } } diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index f5bd4bd3f7e6..f37ecf53ee8a 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -1524,6 +1524,27 @@ static bool tcp_nagle_check(bool partial, const struct tcp_sock *tp, ((nonagle & TCP_NAGLE_CORK) || (!nonagle && tp->packets_out && tcp_minshall_check(tp))); } + +/* Return how many segs we'd like on a TSO packet, + * to send one TSO packet per ms + */ +static u32 tcp_tso_autosize(const struct sock *sk, unsigned int mss_now) +{ + u32 bytes, segs; + + bytes = min(sk->sk_pacing_rate >> 10, + sk->sk_gso_max_size - 1 - MAX_TCP_HEADER); + + /* Goal is to send at least one packet per ms, + * not one big TSO packet every 100 ms. + * This preserves ACK clocking and is consistent + * with tcp_tso_should_defer() heuristic. + */ + segs = max_t(u32, bytes / mss_now, sysctl_tcp_min_tso_segs); + + return min_t(u32, segs, sk->sk_gso_max_segs); +} + /* Returns the portion of skb which can be sent right away */ static unsigned int tcp_mss_split_point(const struct sock *sk, const struct sk_buff *skb, @@ -1731,7 +1752,7 @@ static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len, * This algorithm is from John Heffner. */ static bool tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb, - bool *is_cwnd_limited) + bool *is_cwnd_limited, u32 max_segs) { struct tcp_sock *tp = tcp_sk(sk); const struct inet_connection_sock *icsk = inet_csk(sk); @@ -1761,8 +1782,7 @@ static bool tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb, limit = min(send_win, cong_win); /* If a full-sized TSO skb can be sent, do it. */ - if (limit >= min_t(unsigned int, sk->sk_gso_max_size, - tp->xmit_size_goal_segs * tp->mss_cache)) + if (limit >= max_segs * tp->mss_cache) goto send_now; /* Middle in queue won't get any more data, full sendable already? */ @@ -1959,6 +1979,7 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, int cwnd_quota; int result; bool is_cwnd_limited = false; + u32 max_segs; sent_pkts = 0; @@ -1972,6 +1993,7 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, } } + max_segs = tcp_tso_autosize(sk, mss_now); while ((skb = tcp_send_head(sk))) { unsigned int limit; @@ -2004,10 +2026,23 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, break; } else { if (!push_one && - tcp_tso_should_defer(sk, skb, &is_cwnd_limited)) + tcp_tso_should_defer(sk, skb, &is_cwnd_limited, + max_segs)) break; } + limit = mss_now; + if (tso_segs > 1 && !tcp_urg_mode(tp)) + limit = tcp_mss_split_point(sk, skb, mss_now, + min_t(unsigned int, + cwnd_quota, + max_segs), + nonagle); + + if (skb->len > limit && + unlikely(tso_fragment(sk, skb, limit, mss_now, gfp))) + break; + /* TCP Small Queues : * Control number of packets in qdisc/devices to two packets / or ~1 ms. * This allows for : @@ -2018,8 +2053,8 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, * of queued bytes to ensure line rate. * One example is wifi aggregation (802.11 AMPDU) */ - limit = max_t(unsigned int, sysctl_tcp_limit_output_bytes, - sk->sk_pacing_rate >> 10); + limit = max(2 * skb->truesize, sk->sk_pacing_rate >> 10); + limit = min_t(u32, limit, sysctl_tcp_limit_output_bytes); if (atomic_read(&sk->sk_wmem_alloc) > limit) { set_bit(TSQ_THROTTLED, &tp->tsq_flags); @@ -2032,18 +2067,6 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, break; } - limit = mss_now; - if (tso_segs > 1 && !tcp_urg_mode(tp)) - limit = tcp_mss_split_point(sk, skb, mss_now, - min_t(unsigned int, - cwnd_quota, - sk->sk_gso_max_segs), - nonagle); - - if (skb->len > limit && - unlikely(tso_fragment(sk, skb, limit, mss_now, gfp))) - break; - if (unlikely(tcp_transmit_skb(sk, skb, 1, gfp))) break; diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index b2d606833ce4..dd8e00634563 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -336,38 +336,45 @@ int udp_v4_get_port(struct sock *sk, unsigned short snum) return udp_lib_get_port(sk, snum, ipv4_rcv_saddr_equal, hash2_nulladdr); } -static inline int compute_score(struct sock *sk, struct net *net, __be32 saddr, - unsigned short hnum, - __be16 sport, __be32 daddr, __be16 dport, int dif) +static inline int compute_score(struct sock *sk, struct net *net, + __be32 saddr, unsigned short hnum, __be16 sport, + __be32 daddr, __be16 dport, int dif) { - int score = -1; + int score; + struct inet_sock *inet; - if (net_eq(sock_net(sk), net) && udp_sk(sk)->udp_port_hash == hnum && - !ipv6_only_sock(sk)) { - struct inet_sock *inet = inet_sk(sk); + if (!net_eq(sock_net(sk), net) || + udp_sk(sk)->udp_port_hash != hnum || + ipv6_only_sock(sk)) + return -1; - score = (sk->sk_family == PF_INET ? 2 : 1); - if (inet->inet_rcv_saddr) { - if (inet->inet_rcv_saddr != daddr) - return -1; - score += 4; - } - if (inet->inet_daddr) { - if (inet->inet_daddr != saddr) - return -1; - score += 4; - } - if (inet->inet_dport) { - if (inet->inet_dport != sport) - return -1; - score += 4; - } - if (sk->sk_bound_dev_if) { - if (sk->sk_bound_dev_if != dif) - return -1; - score += 4; - } + score = (sk->sk_family == PF_INET) ? 2 : 1; + inet = inet_sk(sk); + + if (inet->inet_rcv_saddr) { + if (inet->inet_rcv_saddr != daddr) + return -1; + score += 4; + } + + if (inet->inet_daddr) { + if (inet->inet_daddr != saddr) + return -1; + score += 4; } + + if (inet->inet_dport) { + if (inet->inet_dport != sport) + return -1; + score += 4; + } + + if (sk->sk_bound_dev_if) { + if (sk->sk_bound_dev_if != dif) + return -1; + score += 4; + } + return score; } @@ -378,33 +385,39 @@ static inline int compute_score2(struct sock *sk, struct net *net, __be32 saddr, __be16 sport, __be32 daddr, unsigned int hnum, int dif) { - int score = -1; + int score; + struct inet_sock *inet; + + if (!net_eq(sock_net(sk), net) || + ipv6_only_sock(sk)) + return -1; - if (net_eq(sock_net(sk), net) && !ipv6_only_sock(sk)) { - struct inet_sock *inet = inet_sk(sk); + inet = inet_sk(sk); - if (inet->inet_rcv_saddr != daddr) + if (inet->inet_rcv_saddr != daddr || + inet->inet_num != hnum) + return -1; + + score = (sk->sk_family == PF_INET) ? 2 : 1; + + if (inet->inet_daddr) { + if (inet->inet_daddr != saddr) return -1; - if (inet->inet_num != hnum) + score += 4; + } + + if (inet->inet_dport) { + if (inet->inet_dport != sport) return -1; + score += 4; + } - score = (sk->sk_family == PF_INET ? 2 : 1); - if (inet->inet_daddr) { - if (inet->inet_daddr != saddr) - return -1; - score += 4; - } - if (inet->inet_dport) { - if (inet->inet_dport != sport) - return -1; - score += 4; - } - if (sk->sk_bound_dev_if) { - if (sk->sk_bound_dev_if != dif) - return -1; - score += 4; - } + if (sk->sk_bound_dev_if) { + if (sk->sk_bound_dev_if != dif) + return -1; + score += 4; } + return score; } diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index cc1139687fd7..2464a00e36ab 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -325,6 +325,16 @@ void ipv6_local_rxpmtu(struct sock *sk, struct flowi6 *fl6, u32 mtu) kfree_skb(skb); } +static void ip6_datagram_prepare_pktinfo_errqueue(struct sk_buff *skb) +{ + int ifindex = skb->dev ? skb->dev->ifindex : -1; + + if (skb->protocol == htons(ETH_P_IPV6)) + IP6CB(skb)->iif = ifindex; + else + PKTINFO_SKB_CB(skb)->ipi_ifindex = ifindex; +} + /* * Handle MSG_ERRQUEUE */ @@ -388,8 +398,12 @@ int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len) sin->sin6_family = AF_INET6; sin->sin6_flowinfo = 0; sin->sin6_port = 0; - if (np->rxopt.all) + if (np->rxopt.all) { + if (serr->ee.ee_origin != SO_EE_ORIGIN_ICMP && + serr->ee.ee_origin != SO_EE_ORIGIN_ICMP6) + ip6_datagram_prepare_pktinfo_errqueue(skb); ip6_datagram_recv_common_ctl(sk, msg, skb); + } if (skb->protocol == htons(ETH_P_IPV6)) { sin->sin6_addr = ipv6_hdr(skb)->saddr; if (np->rxopt.all) @@ -491,7 +505,10 @@ void ip6_datagram_recv_common_ctl(struct sock *sk, struct msghdr *msg, ipv6_addr_set_v4mapped(ip_hdr(skb)->daddr, &src_info.ipi6_addr); } - put_cmsg(msg, SOL_IPV6, IPV6_PKTINFO, sizeof(src_info), &src_info); + + if (src_info.ipi6_ifindex >= 0) + put_cmsg(msg, SOL_IPV6, IPV6_PKTINFO, + sizeof(src_info), &src_info); } } diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c index 16a7e81e3f99..ace10d0b3aac 100644 --- a/net/ipv6/ip6_vti.c +++ b/net/ipv6/ip6_vti.c @@ -95,6 +95,7 @@ vti6_tnl_lookup(struct net *net, const struct in6_addr *remote, unsigned int hash = HASH(remote, local); struct ip6_tnl *t; struct vti6_net *ip6n = net_generic(net, vti6_net_id); + struct in6_addr any; for_each_vti6_tunnel_rcu(ip6n->tnls_r_l[hash]) { if (ipv6_addr_equal(local, &t->parms.laddr) && @@ -102,6 +103,22 @@ vti6_tnl_lookup(struct net *net, const struct in6_addr *remote, (t->dev->flags & IFF_UP)) return t; } + + memset(&any, 0, sizeof(any)); + hash = HASH(&any, local); + for_each_vti6_tunnel_rcu(ip6n->tnls_r_l[hash]) { + if (ipv6_addr_equal(local, &t->parms.laddr) && + (t->dev->flags & IFF_UP)) + return t; + } + + hash = HASH(remote, &any); + for_each_vti6_tunnel_rcu(ip6n->tnls_r_l[hash]) { + if (ipv6_addr_equal(remote, &t->parms.raddr) && + (t->dev->flags & IFF_UP)) + return t; + } + t = rcu_dereference(ip6n->tnls_wc[0]); if (t && (t->dev->flags & IFF_UP)) return t; diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index e1a9583bb419..66980d8d98d1 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -110,12 +110,8 @@ struct ipv6_txoptions *ipv6_update_options(struct sock *sk, icsk->icsk_ext_hdr_len = opt->opt_flen + opt->opt_nflen; icsk->icsk_sync_mss(sk, icsk->icsk_pmtu_cookie); } - opt = xchg(&inet6_sk(sk)->opt, opt); - } else { - spin_lock(&sk->sk_dst_lock); - opt = xchg(&inet6_sk(sk)->opt, opt); - spin_unlock(&sk->sk_dst_lock); } + opt = xchg(&inet6_sk(sk)->opt, opt); sk_dst_reset(sk); return opt; diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 7cfb5d745a2d..7f96432292ce 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -148,72 +148,85 @@ static inline int compute_score(struct sock *sk, struct net *net, const struct in6_addr *daddr, __be16 dport, int dif) { - int score = -1; + int score; + struct inet_sock *inet; - if (net_eq(sock_net(sk), net) && udp_sk(sk)->udp_port_hash == hnum && - sk->sk_family == PF_INET6) { - struct inet_sock *inet = inet_sk(sk); + if (!net_eq(sock_net(sk), net) || + udp_sk(sk)->udp_port_hash != hnum || + sk->sk_family != PF_INET6) + return -1; - score = 0; - if (inet->inet_dport) { - if (inet->inet_dport != sport) - return -1; - score++; - } - if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr)) { - if (!ipv6_addr_equal(&sk->sk_v6_rcv_saddr, daddr)) - return -1; - score++; - } - if (!ipv6_addr_any(&sk->sk_v6_daddr)) { - if (!ipv6_addr_equal(&sk->sk_v6_daddr, saddr)) - return -1; - score++; - } - if (sk->sk_bound_dev_if) { - if (sk->sk_bound_dev_if != dif) - return -1; - score++; - } + score = 0; + inet = inet_sk(sk); + + if (inet->inet_dport) { + if (inet->inet_dport != sport) + return -1; + score++; } + + if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr)) { + if (!ipv6_addr_equal(&sk->sk_v6_rcv_saddr, daddr)) + return -1; + score++; + } + + if (!ipv6_addr_any(&sk->sk_v6_daddr)) { + if (!ipv6_addr_equal(&sk->sk_v6_daddr, saddr)) + return -1; + score++; + } + + if (sk->sk_bound_dev_if) { + if (sk->sk_bound_dev_if != dif) + return -1; + score++; + } + return score; } #define SCORE2_MAX (1 + 1 + 1) static inline int compute_score2(struct sock *sk, struct net *net, - const struct in6_addr *saddr, __be16 sport, - const struct in6_addr *daddr, unsigned short hnum, - int dif) + const struct in6_addr *saddr, __be16 sport, + const struct in6_addr *daddr, + unsigned short hnum, int dif) { - int score = -1; + int score; + struct inet_sock *inet; - if (net_eq(sock_net(sk), net) && udp_sk(sk)->udp_port_hash == hnum && - sk->sk_family == PF_INET6) { - struct inet_sock *inet = inet_sk(sk); + if (!net_eq(sock_net(sk), net) || + udp_sk(sk)->udp_port_hash != hnum || + sk->sk_family != PF_INET6) + return -1; - if (!ipv6_addr_equal(&sk->sk_v6_rcv_saddr, daddr)) + if (!ipv6_addr_equal(&sk->sk_v6_rcv_saddr, daddr)) + return -1; + + score = 0; + inet = inet_sk(sk); + + if (inet->inet_dport) { + if (inet->inet_dport != sport) return -1; - score = 0; - if (inet->inet_dport) { - if (inet->inet_dport != sport) - return -1; - score++; - } - if (!ipv6_addr_any(&sk->sk_v6_daddr)) { - if (!ipv6_addr_equal(&sk->sk_v6_daddr, saddr)) - return -1; - score++; - } - if (sk->sk_bound_dev_if) { - if (sk->sk_bound_dev_if != dif) - return -1; - score++; - } + score++; } + + if (!ipv6_addr_any(&sk->sk_v6_daddr)) { + if (!ipv6_addr_equal(&sk->sk_v6_daddr, saddr)) + return -1; + score++; + } + + if (sk->sk_bound_dev_if) { + if (sk->sk_bound_dev_if != dif) + return -1; + score++; + } + return score; } - /* called with read_rcu_lock() */ static struct sock *udp6_lib_lookup2(struct net *net, const struct in6_addr *saddr, __be16 sport, diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c index 1f933136155a..3aedbda7658a 100644 --- a/net/netfilter/ipvs/ip_vs_xmit.c +++ b/net/netfilter/ipvs/ip_vs_xmit.c @@ -343,7 +343,7 @@ __ip_vs_get_out_rt(int skb_af, struct sk_buff *skb, struct ip_vs_dest *dest, skb_dst_drop(skb); if (noref) { if (!local) - skb_dst_set_noref_force(skb, &rt->dst); + skb_dst_set_noref(skb, &rt->dst); else skb_dst_set(skb, dst_clone(&rt->dst)); } else @@ -487,7 +487,7 @@ __ip_vs_get_out_rt_v6(int skb_af, struct sk_buff *skb, struct ip_vs_dest *dest, skb_dst_drop(skb); if (noref) { if (!local) - skb_dst_set_noref_force(skb, &rt->dst); + skb_dst_set_noref(skb, &rt->dst); else skb_dst_set(skb, dst_clone(&rt->dst)); } else diff --git a/net/openvswitch/vport.c b/net/openvswitch/vport.c index e771a46933e5..9584526c0778 100644 --- a/net/openvswitch/vport.c +++ b/net/openvswitch/vport.c @@ -600,7 +600,7 @@ int ovs_tunnel_get_egress_info(struct ovs_tunnel_info *egress_tun_info, fl.saddr = tun_key->ipv4_src; fl.flowi4_tos = RT_TOS(tun_key->ipv4_tos); fl.flowi4_mark = skb_mark; - fl.flowi4_proto = IPPROTO_GRE; + fl.flowi4_proto = ipproto; rt = ip_route_output_key(net, &fl); if (IS_ERR(rt)) diff --git a/net/sched/cls_basic.c b/net/sched/cls_basic.c index cd61280941e5..5aed341406c2 100644 --- a/net/sched/cls_basic.c +++ b/net/sched/cls_basic.c @@ -72,10 +72,6 @@ static unsigned long basic_get(struct tcf_proto *tp, u32 handle) return l; } -static void basic_put(struct tcf_proto *tp, unsigned long f) -{ -} - static int basic_init(struct tcf_proto *tp) { struct basic_head *head; @@ -113,18 +109,12 @@ static void basic_destroy(struct tcf_proto *tp) static int basic_delete(struct tcf_proto *tp, unsigned long arg) { - struct basic_head *head = rtnl_dereference(tp->root); - struct basic_filter *t, *f = (struct basic_filter *) arg; - - list_for_each_entry(t, &head->flist, link) - if (t == f) { - list_del_rcu(&t->link); - tcf_unbind_filter(tp, &t->res); - call_rcu(&t->rcu, basic_delete_filter); - return 0; - } + struct basic_filter *f = (struct basic_filter *) arg; - return -ENOENT; + list_del_rcu(&f->link); + tcf_unbind_filter(tp, &f->res); + call_rcu(&f->rcu, basic_delete_filter); + return 0; } static const struct nla_policy basic_policy[TCA_BASIC_MAX + 1] = { @@ -188,10 +178,9 @@ static int basic_change(struct net *net, struct sk_buff *in_skb, return -EINVAL; } - err = -ENOBUFS; fnew = kzalloc(sizeof(*fnew), GFP_KERNEL); - if (fnew == NULL) - goto errout; + if (!fnew) + return -ENOBUFS; tcf_exts_init(&fnew->exts, TCA_BASIC_ACT, TCA_BASIC_POLICE); err = -EINVAL; @@ -293,7 +282,6 @@ static struct tcf_proto_ops cls_basic_ops __read_mostly = { .init = basic_init, .destroy = basic_destroy, .get = basic_get, - .put = basic_put, .change = basic_change, .delete = basic_delete, .walk = basic_walk, diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c index eed49d1d0878..84c8219c3e1c 100644 --- a/net/sched/cls_bpf.c +++ b/net/sched/cls_bpf.c @@ -109,19 +109,12 @@ static void __cls_bpf_delete_prog(struct rcu_head *rcu) static int cls_bpf_delete(struct tcf_proto *tp, unsigned long arg) { - struct cls_bpf_head *head = rtnl_dereference(tp->root); - struct cls_bpf_prog *prog, *todel = (struct cls_bpf_prog *) arg; + struct cls_bpf_prog *prog = (struct cls_bpf_prog *) arg; - list_for_each_entry(prog, &head->plist, link) { - if (prog == todel) { - list_del_rcu(&prog->link); - tcf_unbind_filter(tp, &prog->res); - call_rcu(&prog->rcu, __cls_bpf_delete_prog); - return 0; - } - } - - return -ENOENT; + list_del_rcu(&prog->link); + tcf_unbind_filter(tp, &prog->res); + call_rcu(&prog->rcu, __cls_bpf_delete_prog); + return 0; } static void cls_bpf_destroy(struct tcf_proto *tp) @@ -148,7 +141,7 @@ static unsigned long cls_bpf_get(struct tcf_proto *tp, u32 handle) if (head == NULL) return 0UL; - list_for_each_entry_rcu(prog, &head->plist, link) { + list_for_each_entry(prog, &head->plist, link) { if (prog->handle == handle) { ret = (unsigned long) prog; break; @@ -158,10 +151,6 @@ static unsigned long cls_bpf_get(struct tcf_proto *tp, u32 handle) return ret; } -static void cls_bpf_put(struct tcf_proto *tp, unsigned long f) -{ -} - static int cls_bpf_modify_existing(struct net *net, struct tcf_proto *tp, struct cls_bpf_prog *prog, unsigned long base, struct nlattr **tb, @@ -344,7 +333,7 @@ static void cls_bpf_walk(struct tcf_proto *tp, struct tcf_walker *arg) struct cls_bpf_head *head = rtnl_dereference(tp->root); struct cls_bpf_prog *prog; - list_for_each_entry_rcu(prog, &head->plist, link) { + list_for_each_entry(prog, &head->plist, link) { if (arg->count < arg->skip) goto skip; if (arg->fn(tp, (unsigned long) prog, arg) < 0) { @@ -363,7 +352,6 @@ static struct tcf_proto_ops cls_bpf_ops __read_mostly = { .init = cls_bpf_init, .destroy = cls_bpf_destroy, .get = cls_bpf_get, - .put = cls_bpf_put, .change = cls_bpf_change, .delete = cls_bpf_delete, .walk = cls_bpf_walk, diff --git a/net/sched/cls_cgroup.c b/net/sched/cls_cgroup.c index d61a801222c1..741bfa7debb2 100644 --- a/net/sched/cls_cgroup.c +++ b/net/sched/cls_cgroup.c @@ -67,10 +67,6 @@ static unsigned long cls_cgroup_get(struct tcf_proto *tp, u32 handle) return 0UL; } -static void cls_cgroup_put(struct tcf_proto *tp, unsigned long f) -{ -} - static int cls_cgroup_init(struct tcf_proto *tp) { return 0; @@ -117,11 +113,7 @@ static int cls_cgroup_change(struct net *net, struct sk_buff *in_skb, return -ENOBUFS; tcf_exts_init(&new->exts, TCA_CGROUP_ACT, TCA_CGROUP_POLICE); - if (head) - new->handle = head->handle; - else - new->handle = handle; - + new->handle = handle; new->tp = tp; err = nla_parse_nested(tb, TCA_CGROUP_MAX, tca[TCA_OPTIONS], cgroup_policy); @@ -217,7 +209,6 @@ static struct tcf_proto_ops cls_cgroup_ops __read_mostly = { .classify = cls_cgroup_classify, .destroy = cls_cgroup_destroy, .get = cls_cgroup_get, - .put = cls_cgroup_put, .delete = cls_cgroup_delete, .walk = cls_cgroup_walk, .dump = cls_cgroup_dump, diff --git a/net/sched/cls_flow.c b/net/sched/cls_flow.c index 4ac515f2a6ce..8e227180cabb 100644 --- a/net/sched/cls_flow.c +++ b/net/sched/cls_flow.c @@ -426,10 +426,7 @@ static int flow_change(struct net *net, struct sk_buff *in_skb, goto err2; /* Copy fold into fnew */ - fnew->handle = fold->handle; - fnew->keymask = fold->keymask; fnew->tp = fold->tp; - fnew->handle = fold->handle; fnew->nkeys = fold->nkeys; fnew->keymask = fold->keymask; @@ -578,16 +575,12 @@ static unsigned long flow_get(struct tcf_proto *tp, u32 handle) struct flow_head *head = rtnl_dereference(tp->root); struct flow_filter *f; - list_for_each_entry_rcu(f, &head->filters, list) + list_for_each_entry(f, &head->filters, list) if (f->handle == handle) return (unsigned long)f; return 0; } -static void flow_put(struct tcf_proto *tp, unsigned long f) -{ -} - static int flow_dump(struct net *net, struct tcf_proto *tp, unsigned long fh, struct sk_buff *skb, struct tcmsg *t) { @@ -654,7 +647,7 @@ static void flow_walk(struct tcf_proto *tp, struct tcf_walker *arg) struct flow_head *head = rtnl_dereference(tp->root); struct flow_filter *f; - list_for_each_entry_rcu(f, &head->filters, list) { + list_for_each_entry(f, &head->filters, list) { if (arg->count < arg->skip) goto skip; if (arg->fn(tp, (unsigned long)f, arg) < 0) { @@ -674,7 +667,6 @@ static struct tcf_proto_ops cls_flow_ops __read_mostly = { .change = flow_change, .delete = flow_delete, .get = flow_get, - .put = flow_put, .dump = flow_dump, .walk = flow_walk, .owner = THIS_MODULE, diff --git a/net/sched/cls_fw.c b/net/sched/cls_fw.c index dbfdfd1f1a9f..23fda2ac0d19 100644 --- a/net/sched/cls_fw.c +++ b/net/sched/cls_fw.c @@ -111,10 +111,6 @@ static unsigned long fw_get(struct tcf_proto *tp, u32 handle) return 0; } -static void fw_put(struct tcf_proto *tp, unsigned long f) -{ -} - static int fw_init(struct tcf_proto *tp) { return 0; @@ -411,7 +407,6 @@ static struct tcf_proto_ops cls_fw_ops __read_mostly = { .init = fw_init, .destroy = fw_destroy, .get = fw_get, - .put = fw_put, .change = fw_change, .delete = fw_delete, .walk = fw_walk, diff --git a/net/sched/cls_route.c b/net/sched/cls_route.c index 109a329b7198..098a27360b91 100644 --- a/net/sched/cls_route.c +++ b/net/sched/cls_route.c @@ -256,10 +256,6 @@ static unsigned long route4_get(struct tcf_proto *tp, u32 handle) return 0; } -static void route4_put(struct tcf_proto *tp, unsigned long f) -{ -} - static int route4_init(struct tcf_proto *tp) { return 0; @@ -649,7 +645,6 @@ static struct tcf_proto_ops cls_route4_ops __read_mostly = { .init = route4_init, .destroy = route4_destroy, .get = route4_get, - .put = route4_put, .change = route4_change, .delete = route4_delete, .walk = route4_walk, diff --git a/net/sched/cls_rsvp.h b/net/sched/cls_rsvp.h index 6bb55f277a5a..b7af3623a26a 100644 --- a/net/sched/cls_rsvp.h +++ b/net/sched/cls_rsvp.h @@ -271,10 +271,6 @@ static unsigned long rsvp_get(struct tcf_proto *tp, u32 handle) return 0; } -static void rsvp_put(struct tcf_proto *tp, unsigned long f) -{ -} - static int rsvp_init(struct tcf_proto *tp) { struct rsvp_head *data; @@ -708,7 +704,6 @@ static struct tcf_proto_ops RSVP_OPS __read_mostly = { .init = rsvp_init, .destroy = rsvp_destroy, .get = rsvp_get, - .put = rsvp_put, .change = rsvp_change, .delete = rsvp_delete, .walk = rsvp_walk, diff --git a/net/sched/cls_tcindex.c b/net/sched/cls_tcindex.c index 30f10fb07f4a..0d9d8911a621 100644 --- a/net/sched/cls_tcindex.c +++ b/net/sched/cls_tcindex.c @@ -116,13 +116,6 @@ static unsigned long tcindex_get(struct tcf_proto *tp, u32 handle) return r && tcindex_filter_is_set(r) ? (unsigned long) r : 0UL; } - -static void tcindex_put(struct tcf_proto *tp, unsigned long f) -{ - pr_debug("tcindex_put(tp %p,f 0x%lx)\n", tp, f); -} - - static int tcindex_init(struct tcf_proto *tp) { struct tcindex_data *p; @@ -560,7 +553,6 @@ static struct tcf_proto_ops cls_tcindex_ops __read_mostly = { .init = tcindex_init, .destroy = tcindex_destroy, .get = tcindex_get, - .put = tcindex_put, .change = tcindex_change, .delete = tcindex_delete, .walk = tcindex_walk, diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c index 0472909bb014..09487afbfd51 100644 --- a/net/sched/cls_u32.c +++ b/net/sched/cls_u32.c @@ -299,10 +299,6 @@ static unsigned long u32_get(struct tcf_proto *tp, u32 handle) return (unsigned long)u32_lookup_key(ht, handle); } -static void u32_put(struct tcf_proto *tp, unsigned long f) -{ -} - static u32 gen_new_htid(struct tc_u_common *tp_c) { int i = 0x800; @@ -1021,7 +1017,6 @@ static struct tcf_proto_ops cls_u32_ops __read_mostly = { .init = u32_init, .destroy = u32_destroy, .get = u32_get, - .put = u32_put, .change = u32_change, .delete = u32_delete, .walk = u32_walk, diff --git a/net/socket.c b/net/socket.c index ee3ee39eefa5..f676ac4a3701 100644 --- a/net/socket.c +++ b/net/socket.c @@ -651,7 +651,8 @@ static inline int __sock_sendmsg(struct kiocb *iocb, struct socket *sock, return err ?: __sock_sendmsg_nosec(iocb, sock, msg, size); } -int sock_sendmsg(struct socket *sock, struct msghdr *msg, size_t size) +static int do_sock_sendmsg(struct socket *sock, struct msghdr *msg, + size_t size, bool nosec) { struct kiocb iocb; struct sock_iocb siocb; @@ -659,25 +660,22 @@ int sock_sendmsg(struct socket *sock, struct msghdr *msg, size_t size) init_sync_kiocb(&iocb, NULL); iocb.private = &siocb; - ret = __sock_sendmsg(&iocb, sock, msg, size); + ret = nosec ? __sock_sendmsg_nosec(&iocb, sock, msg, size) : + __sock_sendmsg(&iocb, sock, msg, size); if (-EIOCBQUEUED == ret) ret = wait_on_sync_kiocb(&iocb); return ret; } + +int sock_sendmsg(struct socket *sock, struct msghdr *msg, size_t size) +{ + return do_sock_sendmsg(sock, msg, size, false); +} EXPORT_SYMBOL(sock_sendmsg); static int sock_sendmsg_nosec(struct socket *sock, struct msghdr *msg, size_t size) { - struct kiocb iocb; - struct sock_iocb siocb; - int ret; - - init_sync_kiocb(&iocb, NULL); - iocb.private = &siocb; - ret = __sock_sendmsg_nosec(&iocb, sock, msg, size); - if (-EIOCBQUEUED == ret) - ret = wait_on_sync_kiocb(&iocb); - return ret; + return do_sock_sendmsg(sock, msg, size, true); } int kernel_sendmsg(struct socket *sock, struct msghdr *msg, diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c index f0761c771734..96ceefeb9daf 100644 --- a/net/tipc/bcast.c +++ b/net/tipc/bcast.c @@ -233,8 +233,11 @@ static void bclink_retransmit_pkt(u32 after, u32 to) */ void tipc_bclink_wakeup_users(void) { - while (skb_queue_len(&bclink->link.waiting_sks)) - tipc_sk_rcv(skb_dequeue(&bclink->link.waiting_sks)); + struct sk_buff *skb; + + while ((skb = skb_dequeue(&bclink->link.waiting_sks))) + tipc_sk_rcv(skb); + } /** @@ -950,7 +953,7 @@ int tipc_bclink_init(void) spin_lock_init(&bclink->lock); __skb_queue_head_init(&bcl->outqueue); __skb_queue_head_init(&bcl->deferred_queue); - __skb_queue_head_init(&bcl->waiting_sks); + skb_queue_head_init(&bcl->waiting_sks); bcl->next_out_no = 1; spin_lock_init(&bclink->node.lock); __skb_queue_head_init(&bclink->node.waiting_sks); diff --git a/net/tipc/name_distr.c b/net/tipc/name_distr.c index 56248db75274..ba6083dca95b 100644 --- a/net/tipc/name_distr.c +++ b/net/tipc/name_distr.c @@ -38,39 +38,6 @@ #include "link.h" #include "name_distr.h" -/** - * struct publ_list - list of publications made by this node - * @list: circular list of publications - * @list_size: number of entries in list - */ -struct publ_list { - struct list_head list; - u32 size; -}; - -static struct publ_list publ_zone = { - .list = LIST_HEAD_INIT(publ_zone.list), - .size = 0, -}; - -static struct publ_list publ_cluster = { - .list = LIST_HEAD_INIT(publ_cluster.list), - .size = 0, -}; - -static struct publ_list publ_node = { - .list = LIST_HEAD_INIT(publ_node.list), - .size = 0, -}; - -static struct publ_list *publ_lists[] = { - NULL, - &publ_zone, /* publ_lists[TIPC_ZONE_SCOPE] */ - &publ_cluster, /* publ_lists[TIPC_CLUSTER_SCOPE] */ - &publ_node /* publ_lists[TIPC_NODE_SCOPE] */ -}; - - int sysctl_tipc_named_timeout __read_mostly = 2000; /** @@ -146,8 +113,8 @@ struct sk_buff *tipc_named_publish(struct publication *publ) struct sk_buff *buf; struct distr_item *item; - list_add_tail(&publ->local_list, &publ_lists[publ->scope]->list); - publ_lists[publ->scope]->size++; + list_add_tail_rcu(&publ->local_list, + &tipc_nametbl->publ_list[publ->scope]); if (publ->scope == TIPC_NODE_SCOPE) return NULL; @@ -172,7 +139,6 @@ struct sk_buff *tipc_named_withdraw(struct publication *publ) struct distr_item *item; list_del(&publ->local_list); - publ_lists[publ->scope]->size--; if (publ->scope == TIPC_NODE_SCOPE) return NULL; @@ -195,21 +161,17 @@ struct sk_buff *tipc_named_withdraw(struct publication *publ) * @pls: linked list of publication items to be packed into buffer chain */ static void named_distribute(struct sk_buff_head *list, u32 dnode, - struct publ_list *pls) + struct list_head *pls) { struct publication *publ; struct sk_buff *skb = NULL; struct distr_item *item = NULL; - uint dsz = pls->size * ITEM_SIZE; uint msg_dsz = (tipc_node_get_mtu(dnode, 0) / ITEM_SIZE) * ITEM_SIZE; - uint rem = dsz; - uint msg_rem = 0; + uint msg_rem = msg_dsz; - list_for_each_entry(publ, &pls->list, local_list) { + list_for_each_entry(publ, pls, local_list) { /* Prepare next buffer: */ if (!skb) { - msg_rem = min_t(uint, rem, msg_dsz); - rem -= msg_rem; skb = named_prepare_buf(PUBLICATION, msg_rem, dnode); if (!skb) { pr_warn("Bulk publication failure\n"); @@ -227,8 +189,14 @@ static void named_distribute(struct sk_buff_head *list, u32 dnode, if (!msg_rem) { __skb_queue_tail(list, skb); skb = NULL; + msg_rem = msg_dsz; } } + if (skb) { + msg_set_size(buf_msg(skb), INT_H_SIZE + (msg_dsz - msg_rem)); + skb_trim(skb, INT_H_SIZE + (msg_dsz - msg_rem)); + __skb_queue_tail(list, skb); + } } /** @@ -240,10 +208,12 @@ void tipc_named_node_up(u32 dnode) __skb_queue_head_init(&head); - read_lock_bh(&tipc_nametbl_lock); - named_distribute(&head, dnode, &publ_cluster); - named_distribute(&head, dnode, &publ_zone); - read_unlock_bh(&tipc_nametbl_lock); + rcu_read_lock(); + named_distribute(&head, dnode, + &tipc_nametbl->publ_list[TIPC_CLUSTER_SCOPE]); + named_distribute(&head, dnode, + &tipc_nametbl->publ_list[TIPC_ZONE_SCOPE]); + rcu_read_unlock(); tipc_link_xmit(&head, dnode, dnode); } @@ -290,12 +260,12 @@ static void tipc_publ_purge(struct publication *publ, u32 addr) { struct publication *p; - write_lock_bh(&tipc_nametbl_lock); + spin_lock_bh(&tipc_nametbl_lock); p = tipc_nametbl_remove_publ(publ->type, publ->lower, publ->node, publ->ref, publ->key); if (p) tipc_publ_unsubscribe(p, addr); - write_unlock_bh(&tipc_nametbl_lock); + spin_unlock_bh(&tipc_nametbl_lock); if (p != publ) { pr_err("Unable to remove publication from failed node\n" @@ -304,7 +274,7 @@ static void tipc_publ_purge(struct publication *publ, u32 addr) publ->key); } - kfree(p); + kfree_rcu(p, rcu); } void tipc_publ_notify(struct list_head *nsub_list, u32 addr) @@ -341,7 +311,7 @@ static bool tipc_update_nametbl(struct distr_item *i, u32 node, u32 dtype) ntohl(i->key)); if (publ) { tipc_publ_unsubscribe(publ, node); - kfree(publ); + kfree_rcu(publ, rcu); return true; } } else { @@ -406,14 +376,14 @@ void tipc_named_rcv(struct sk_buff *buf) u32 count = msg_data_sz(msg) / ITEM_SIZE; u32 node = msg_orignode(msg); - write_lock_bh(&tipc_nametbl_lock); + spin_lock_bh(&tipc_nametbl_lock); while (count--) { if (!tipc_update_nametbl(item, node, msg_type(msg))) tipc_named_add_backlog(item, msg_type(msg), node); item++; } tipc_named_process_backlog(); - write_unlock_bh(&tipc_nametbl_lock); + spin_unlock_bh(&tipc_nametbl_lock); kfree_skb(buf); } @@ -429,11 +399,12 @@ void tipc_named_reinit(void) struct publication *publ; int scope; - write_lock_bh(&tipc_nametbl_lock); + spin_lock_bh(&tipc_nametbl_lock); for (scope = TIPC_ZONE_SCOPE; scope <= TIPC_NODE_SCOPE; scope++) - list_for_each_entry(publ, &publ_lists[scope]->list, local_list) + list_for_each_entry_rcu(publ, &tipc_nametbl->publ_list[scope], + local_list) publ->node = tipc_own_addr; - write_unlock_bh(&tipc_nametbl_lock); + spin_unlock_bh(&tipc_nametbl_lock); } diff --git a/net/tipc/name_table.c b/net/tipc/name_table.c index 772be1cd8bf6..aafa684c4db9 100644 --- a/net/tipc/name_table.c +++ b/net/tipc/name_table.c @@ -2,7 +2,7 @@ * net/tipc/name_table.c: TIPC name table code * * Copyright (c) 2000-2006, 2014, Ericsson AB - * Copyright (c) 2004-2008, 2010-2011, Wind River Systems + * Copyright (c) 2004-2008, 2010-2014, Wind River Systems * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -92,6 +92,7 @@ struct sub_seq { * @ns_list: links to adjacent name sequences in hash chain * @subscriptions: list of subscriptions for this 'type' * @lock: spinlock controlling access to publication lists of all sub-sequences + * @rcu: RCU callback head used for deferred freeing */ struct name_seq { u32 type; @@ -101,21 +102,11 @@ struct name_seq { struct hlist_node ns_list; struct list_head subscriptions; spinlock_t lock; + struct rcu_head rcu; }; -/** - * struct name_table - table containing all existing port name publications - * @types: pointer to fixed-sized array of name sequence lists, - * accessed via hashing on 'type'; name sequence lists are *not* sorted - * @local_publ_count: number of publications issued by this node - */ -struct name_table { - struct hlist_head *types; - u32 local_publ_count; -}; - -static struct name_table table; -DEFINE_RWLOCK(tipc_nametbl_lock); +struct name_table *tipc_nametbl; +DEFINE_SPINLOCK(tipc_nametbl_lock); static int hash(int x) { @@ -142,9 +133,7 @@ static struct publication *publ_create(u32 type, u32 lower, u32 upper, publ->node = node; publ->ref = port_ref; publ->key = key; - INIT_LIST_HEAD(&publ->local_list); INIT_LIST_HEAD(&publ->pport_list); - INIT_LIST_HEAD(&publ->nodesub_list); return publ; } @@ -179,22 +168,10 @@ static struct name_seq *tipc_nameseq_create(u32 type, struct hlist_head *seq_hea nseq->alloc = 1; INIT_HLIST_NODE(&nseq->ns_list); INIT_LIST_HEAD(&nseq->subscriptions); - hlist_add_head(&nseq->ns_list, seq_head); + hlist_add_head_rcu(&nseq->ns_list, seq_head); return nseq; } -/* - * nameseq_delete_empty - deletes a name sequence structure if now unused - */ -static void nameseq_delete_empty(struct name_seq *seq) -{ - if (!seq->first_free && list_empty(&seq->subscriptions)) { - hlist_del_init(&seq->ns_list); - kfree(seq->sseqs); - kfree(seq); - } -} - /** * nameseq_find_subseq - find sub-sequence (if any) matching a name instance * @@ -475,8 +452,8 @@ static struct name_seq *nametbl_find_seq(u32 type) struct hlist_head *seq_head; struct name_seq *ns; - seq_head = &table.types[hash(type)]; - hlist_for_each_entry(ns, seq_head, ns_list) { + seq_head = &tipc_nametbl->seq_hlist[hash(type)]; + hlist_for_each_entry_rcu(ns, seq_head, ns_list) { if (ns->type == type) return ns; } @@ -487,7 +464,9 @@ static struct name_seq *nametbl_find_seq(u32 type) struct publication *tipc_nametbl_insert_publ(u32 type, u32 lower, u32 upper, u32 scope, u32 node, u32 port, u32 key) { + struct publication *publ; struct name_seq *seq = nametbl_find_seq(type); + int index = hash(type); if ((scope < TIPC_ZONE_SCOPE) || (scope > TIPC_NODE_SCOPE) || (lower > upper)) { @@ -497,12 +476,16 @@ struct publication *tipc_nametbl_insert_publ(u32 type, u32 lower, u32 upper, } if (!seq) - seq = tipc_nameseq_create(type, &table.types[hash(type)]); + seq = tipc_nameseq_create(type, + &tipc_nametbl->seq_hlist[index]); if (!seq) return NULL; - return tipc_nameseq_insert_publ(seq, type, lower, upper, + spin_lock_bh(&seq->lock); + publ = tipc_nameseq_insert_publ(seq, type, lower, upper, scope, node, port, key); + spin_unlock_bh(&seq->lock); + return publ; } struct publication *tipc_nametbl_remove_publ(u32 type, u32 lower, @@ -514,8 +497,16 @@ struct publication *tipc_nametbl_remove_publ(u32 type, u32 lower, if (!seq) return NULL; + spin_lock_bh(&seq->lock); publ = tipc_nameseq_remove_publ(seq, lower, node, ref, key); - nameseq_delete_empty(seq); + if (!seq->first_free && list_empty(&seq->subscriptions)) { + hlist_del_init_rcu(&seq->ns_list); + kfree(seq->sseqs); + spin_unlock_bh(&seq->lock); + kfree_rcu(seq, rcu); + return publ; + } + spin_unlock_bh(&seq->lock); return publ; } @@ -544,14 +535,14 @@ u32 tipc_nametbl_translate(u32 type, u32 instance, u32 *destnode) if (!tipc_in_scope(*destnode, tipc_own_addr)) return 0; - read_lock_bh(&tipc_nametbl_lock); + rcu_read_lock(); seq = nametbl_find_seq(type); if (unlikely(!seq)) goto not_found; + spin_lock_bh(&seq->lock); sseq = nameseq_find_subseq(seq, instance); if (unlikely(!sseq)) - goto not_found; - spin_lock_bh(&seq->lock); + goto no_match; info = sseq->info; /* Closest-First Algorithm */ @@ -601,7 +592,7 @@ u32 tipc_nametbl_translate(u32 type, u32 instance, u32 *destnode) no_match: spin_unlock_bh(&seq->lock); not_found: - read_unlock_bh(&tipc_nametbl_lock); + rcu_read_unlock(); *destnode = node; return ref; } @@ -627,13 +618,12 @@ int tipc_nametbl_mc_translate(u32 type, u32 lower, u32 upper, u32 limit, struct name_info *info; int res = 0; - read_lock_bh(&tipc_nametbl_lock); + rcu_read_lock(); seq = nametbl_find_seq(type); if (!seq) goto exit; spin_lock_bh(&seq->lock); - sseq = seq->sseqs + nameseq_locate_subseq(seq, lower); sseq_stop = seq->sseqs + seq->first_free; for (; sseq != sseq_stop; sseq++) { @@ -651,10 +641,9 @@ int tipc_nametbl_mc_translate(u32 type, u32 lower, u32 upper, u32 limit, if (info->cluster_list_size != info->node_list_size) res = 1; } - spin_unlock_bh(&seq->lock); exit: - read_unlock_bh(&tipc_nametbl_lock); + rcu_read_unlock(); return res; } @@ -667,22 +656,23 @@ struct publication *tipc_nametbl_publish(u32 type, u32 lower, u32 upper, struct publication *publ; struct sk_buff *buf = NULL; - if (table.local_publ_count >= TIPC_MAX_PUBLICATIONS) { + spin_lock_bh(&tipc_nametbl_lock); + if (tipc_nametbl->local_publ_count >= TIPC_MAX_PUBLICATIONS) { pr_warn("Publication failed, local publication limit reached (%u)\n", TIPC_MAX_PUBLICATIONS); + spin_unlock_bh(&tipc_nametbl_lock); return NULL; } - write_lock_bh(&tipc_nametbl_lock); publ = tipc_nametbl_insert_publ(type, lower, upper, scope, tipc_own_addr, port_ref, key); if (likely(publ)) { - table.local_publ_count++; + tipc_nametbl->local_publ_count++; buf = tipc_named_publish(publ); /* Any pending external events? */ tipc_named_process_backlog(); } - write_unlock_bh(&tipc_nametbl_lock); + spin_unlock_bh(&tipc_nametbl_lock); if (buf) named_cluster_distribute(buf); @@ -695,27 +685,28 @@ struct publication *tipc_nametbl_publish(u32 type, u32 lower, u32 upper, int tipc_nametbl_withdraw(u32 type, u32 lower, u32 ref, u32 key) { struct publication *publ; - struct sk_buff *buf; + struct sk_buff *skb = NULL; - write_lock_bh(&tipc_nametbl_lock); + spin_lock_bh(&tipc_nametbl_lock); publ = tipc_nametbl_remove_publ(type, lower, tipc_own_addr, ref, key); if (likely(publ)) { - table.local_publ_count--; - buf = tipc_named_withdraw(publ); + tipc_nametbl->local_publ_count--; + skb = tipc_named_withdraw(publ); /* Any pending external events? */ tipc_named_process_backlog(); - write_unlock_bh(&tipc_nametbl_lock); list_del_init(&publ->pport_list); - kfree(publ); + kfree_rcu(publ, rcu); + } else { + pr_err("Unable to remove local publication\n" + "(type=%u, lower=%u, ref=%u, key=%u)\n", + type, lower, ref, key); + } + spin_unlock_bh(&tipc_nametbl_lock); - if (buf) - named_cluster_distribute(buf); + if (skb) { + named_cluster_distribute(skb); return 1; } - write_unlock_bh(&tipc_nametbl_lock); - pr_err("Unable to remove local publication\n" - "(type=%u, lower=%u, ref=%u, key=%u)\n", - type, lower, ref, key); return 0; } @@ -725,12 +716,14 @@ int tipc_nametbl_withdraw(u32 type, u32 lower, u32 ref, u32 key) void tipc_nametbl_subscribe(struct tipc_subscription *s) { u32 type = s->seq.type; + int index = hash(type); struct name_seq *seq; - write_lock_bh(&tipc_nametbl_lock); + spin_lock_bh(&tipc_nametbl_lock); seq = nametbl_find_seq(type); if (!seq) - seq = tipc_nameseq_create(type, &table.types[hash(type)]); + seq = tipc_nameseq_create(type, + &tipc_nametbl->seq_hlist[index]); if (seq) { spin_lock_bh(&seq->lock); tipc_nameseq_subscribe(seq, s); @@ -739,7 +732,7 @@ void tipc_nametbl_subscribe(struct tipc_subscription *s) pr_warn("Failed to create subscription for {%u,%u,%u}\n", s->seq.type, s->seq.lower, s->seq.upper); } - write_unlock_bh(&tipc_nametbl_lock); + spin_unlock_bh(&tipc_nametbl_lock); } /** @@ -749,18 +742,23 @@ void tipc_nametbl_unsubscribe(struct tipc_subscription *s) { struct name_seq *seq; - write_lock_bh(&tipc_nametbl_lock); + spin_lock_bh(&tipc_nametbl_lock); seq = nametbl_find_seq(s->seq.type); if (seq != NULL) { spin_lock_bh(&seq->lock); list_del_init(&s->nameseq_list); - spin_unlock_bh(&seq->lock); - nameseq_delete_empty(seq); + if (!seq->first_free && list_empty(&seq->subscriptions)) { + hlist_del_init_rcu(&seq->ns_list); + kfree(seq->sseqs); + spin_unlock_bh(&seq->lock); + kfree_rcu(seq, rcu); + } else { + spin_unlock_bh(&seq->lock); + } } - write_unlock_bh(&tipc_nametbl_lock); + spin_unlock_bh(&tipc_nametbl_lock); } - /** * subseq_list - print specified sub-sequence contents into the given buffer */ @@ -882,8 +880,8 @@ static int nametbl_list(char *buf, int len, u32 depth_info, lowbound = 0; upbound = ~0; for (i = 0; i < TIPC_NAMETBL_SIZE; i++) { - seq_head = &table.types[i]; - hlist_for_each_entry(seq, seq_head, ns_list) { + seq_head = &tipc_nametbl->seq_hlist[i]; + hlist_for_each_entry_rcu(seq, seq_head, ns_list) { ret += nameseq_list(seq, buf + ret, len - ret, depth, seq->type, lowbound, upbound, i); @@ -898,8 +896,8 @@ static int nametbl_list(char *buf, int len, u32 depth_info, } ret += nametbl_header(buf + ret, len - ret, depth); i = hash(type); - seq_head = &table.types[i]; - hlist_for_each_entry(seq, seq_head, ns_list) { + seq_head = &tipc_nametbl->seq_hlist[i]; + hlist_for_each_entry_rcu(seq, seq_head, ns_list) { if (seq->type == type) { ret += nameseq_list(seq, buf + ret, len - ret, depth, type, @@ -931,11 +929,11 @@ struct sk_buff *tipc_nametbl_get(const void *req_tlv_area, int req_tlv_space) pb = TLV_DATA(rep_tlv); pb_len = ULTRA_STRING_MAX_LEN; argv = (struct tipc_name_table_query *)TLV_DATA(req_tlv_area); - read_lock_bh(&tipc_nametbl_lock); + rcu_read_lock(); str_len = nametbl_list(pb, pb_len, ntohl(argv->depth), ntohl(argv->type), ntohl(argv->lowbound), ntohl(argv->upbound)); - read_unlock_bh(&tipc_nametbl_lock); + rcu_read_unlock(); str_len += 1; /* for "\0" */ skb_put(buf, TLV_SPACE(str_len)); TLV_SET(rep_tlv, TIPC_TLV_ULTRA_STRING, NULL, str_len); @@ -945,12 +943,18 @@ struct sk_buff *tipc_nametbl_get(const void *req_tlv_area, int req_tlv_space) int tipc_nametbl_init(void) { - table.types = kcalloc(TIPC_NAMETBL_SIZE, sizeof(struct hlist_head), - GFP_ATOMIC); - if (!table.types) + int i; + + tipc_nametbl = kzalloc(sizeof(*tipc_nametbl), GFP_ATOMIC); + if (!tipc_nametbl) return -ENOMEM; - table.local_publ_count = 0; + for (i = 0; i < TIPC_NAMETBL_SIZE; i++) + INIT_HLIST_HEAD(&tipc_nametbl->seq_hlist[i]); + + INIT_LIST_HEAD(&tipc_nametbl->publ_list[TIPC_ZONE_SCOPE]); + INIT_LIST_HEAD(&tipc_nametbl->publ_list[TIPC_CLUSTER_SCOPE]); + INIT_LIST_HEAD(&tipc_nametbl->publ_list[TIPC_NODE_SCOPE]); return 0; } @@ -965,17 +969,19 @@ static void tipc_purge_publications(struct name_seq *seq) struct sub_seq *sseq; struct name_info *info; - if (!seq->sseqs) { - nameseq_delete_empty(seq); - return; - } + spin_lock_bh(&seq->lock); sseq = seq->sseqs; info = sseq->info; list_for_each_entry_safe(publ, safe, &info->zone_list, zone_list) { tipc_nametbl_remove_publ(publ->type, publ->lower, publ->node, publ->ref, publ->key); - kfree(publ); + kfree_rcu(publ, rcu); } + hlist_del_init_rcu(&seq->ns_list); + kfree(seq->sseqs); + spin_lock_bh(&seq->lock); + + kfree_rcu(seq, rcu); } void tipc_nametbl_stop(void) @@ -983,23 +989,24 @@ void tipc_nametbl_stop(void) u32 i; struct name_seq *seq; struct hlist_head *seq_head; - struct hlist_node *safe; /* Verify name table is empty and purge any lingering * publications, then release the name table */ - write_lock_bh(&tipc_nametbl_lock); + spin_lock_bh(&tipc_nametbl_lock); for (i = 0; i < TIPC_NAMETBL_SIZE; i++) { - if (hlist_empty(&table.types[i])) + if (hlist_empty(&tipc_nametbl->seq_hlist[i])) continue; - seq_head = &table.types[i]; - hlist_for_each_entry_safe(seq, safe, seq_head, ns_list) { + seq_head = &tipc_nametbl->seq_hlist[i]; + hlist_for_each_entry_rcu(seq, seq_head, ns_list) { tipc_purge_publications(seq); } } - kfree(table.types); - table.types = NULL; - write_unlock_bh(&tipc_nametbl_lock); + spin_unlock_bh(&tipc_nametbl_lock); + + synchronize_net(); + kfree(tipc_nametbl); + } static int __tipc_nl_add_nametable_publ(struct tipc_nl_msg *msg, @@ -1103,7 +1110,7 @@ static int __tipc_nl_seq_list(struct tipc_nl_msg *msg, u32 *last_type, u32 *last_lower, u32 *last_publ) { struct hlist_head *seq_head; - struct name_seq *seq; + struct name_seq *seq = NULL; int err; int i; @@ -1113,22 +1120,21 @@ static int __tipc_nl_seq_list(struct tipc_nl_msg *msg, u32 *last_type, i = 0; for (; i < TIPC_NAMETBL_SIZE; i++) { - seq_head = &table.types[i]; + seq_head = &tipc_nametbl->seq_hlist[i]; if (*last_type) { seq = nametbl_find_seq(*last_type); if (!seq) return -EPIPE; } else { - seq = hlist_entry_safe((seq_head)->first, - struct name_seq, ns_list); + hlist_for_each_entry_rcu(seq, seq_head, ns_list) + break; if (!seq) continue; } - hlist_for_each_entry_from(seq, ns_list) { + hlist_for_each_entry_from_rcu(seq, ns_list) { spin_lock_bh(&seq->lock); - err = __tipc_nl_subseq_list(msg, seq, last_lower, last_publ); @@ -1160,8 +1166,7 @@ int tipc_nl_name_table_dump(struct sk_buff *skb, struct netlink_callback *cb) msg.portid = NETLINK_CB(cb->skb).portid; msg.seq = cb->nlh->nlmsg_seq; - read_lock_bh(&tipc_nametbl_lock); - + rcu_read_lock(); err = __tipc_nl_seq_list(&msg, &last_type, &last_lower, &last_publ); if (!err) { done = 1; @@ -1174,8 +1179,7 @@ int tipc_nl_name_table_dump(struct sk_buff *skb, struct netlink_callback *cb) */ cb->prev_seq = 1; } - - read_unlock_bh(&tipc_nametbl_lock); + rcu_read_unlock(); cb->args[0] = last_type; cb->args[1] = last_lower; diff --git a/net/tipc/name_table.h b/net/tipc/name_table.h index c62877826655..5f0dee92010d 100644 --- a/net/tipc/name_table.h +++ b/net/tipc/name_table.h @@ -43,7 +43,9 @@ struct tipc_port_list; /* * TIPC name types reserved for internal TIPC use (both current and planned) */ -#define TIPC_ZM_SRV 3 /* zone master service name type */ +#define TIPC_ZM_SRV 3 /* zone master service name type */ +#define TIPC_PUBL_SCOPE_NUM (TIPC_NODE_SCOPE + 1) +#define TIPC_NAMETBL_SIZE 1024 /* must be a power of 2 */ /** * struct publication - info about a published (name or) name sequence @@ -60,6 +62,7 @@ struct tipc_port_list; * @node_list: adjacent matching name seq publications with >= node scope * @cluster_list: adjacent matching name seq publications with >= cluster scope * @zone_list: adjacent matching name seq publications with >= zone scope + * @rcu: RCU callback head used for deferred freeing * * Note that the node list, cluster list, and zone list are circular lists. */ @@ -77,10 +80,23 @@ struct publication { struct list_head node_list; struct list_head cluster_list; struct list_head zone_list; + struct rcu_head rcu; }; +/** + * struct name_table - table containing all existing port name publications + * @seq_hlist: name sequence hash lists + * @publ_list: pulication lists + * @local_publ_count: number of publications issued by this node + */ +struct name_table { + struct hlist_head seq_hlist[TIPC_NAMETBL_SIZE]; + struct list_head publ_list[TIPC_PUBL_SCOPE_NUM]; + u32 local_publ_count; +}; -extern rwlock_t tipc_nametbl_lock; +extern spinlock_t tipc_nametbl_lock; +extern struct name_table *tipc_nametbl; int tipc_nl_name_table_dump(struct sk_buff *skb, struct netlink_callback *cb); diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 9658d9b63876..4731cad99d1c 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -827,39 +827,6 @@ exit: return TIPC_OK; } -/** - * dest_name_check - verify user is permitted to send to specified port name - * @dest: destination address - * @m: descriptor for message to be sent - * - * Prevents restricted configuration commands from being issued by - * unauthorized users. - * - * Returns 0 if permission is granted, otherwise errno - */ -static int dest_name_check(struct sockaddr_tipc *dest, struct msghdr *m) -{ - struct tipc_cfg_msg_hdr hdr; - - if (unlikely(dest->addrtype == TIPC_ADDR_ID)) - return 0; - if (likely(dest->addr.name.name.type >= TIPC_RESERVED_TYPES)) - return 0; - if (likely(dest->addr.name.name.type == TIPC_TOP_SRV)) - return 0; - if (likely(dest->addr.name.name.type != TIPC_CFG_SRV)) - return -EACCES; - - if (!m->msg_iovlen || (m->msg_iov[0].iov_len < sizeof(hdr))) - return -EMSGSIZE; - if (copy_from_user(&hdr, m->msg_iov[0].iov_base, sizeof(hdr))) - return -EFAULT; - if ((ntohs(hdr.tcm_type) & 0xC000) && (!capable(CAP_NET_ADMIN))) - return -EACCES; - - return 0; -} - static int tipc_wait_for_sndmsg(struct socket *sock, long *timeo_p) { struct sock *sk = sock->sk; @@ -912,7 +879,7 @@ static int tipc_sendmsg(struct kiocb *iocb, struct socket *sock, struct tipc_name_seq *seq = &dest->addr.nameseq; u32 mtu; long timeo; - int rc = -EINVAL; + int rc; if (unlikely(!dest)) return -EDESTADDRREQ; @@ -945,9 +912,6 @@ static int tipc_sendmsg(struct kiocb *iocb, struct socket *sock, tsk->conn_instance = dest->addr.name.name.instance; } } - rc = dest_name_check(dest, m); - if (rc) - goto exit; timeo = sock_sndtimeo(sk, m->msg_flags & MSG_DONTWAIT); diff --git a/net/tipc/subscr.c b/net/tipc/subscr.c index 31b5cb232a43..0344206b984f 100644 --- a/net/tipc/subscr.c +++ b/net/tipc/subscr.c @@ -305,7 +305,6 @@ static int subscr_subscribe(struct tipc_subscr *s, kfree(sub); return -EINVAL; } - INIT_LIST_HEAD(&sub->nameseq_list); list_add(&sub->subscription_list, &subscriber->subscription_list); sub->subscriber = subscriber; sub->swap = swap; diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 88bf289abdc9..cee479bc655c 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -55,6 +55,7 @@ static int stale_bundle(struct dst_entry *dst); static int xfrm_bundle_ok(struct xfrm_dst *xdst); static void xfrm_policy_queue_process(unsigned long arg); +static void __xfrm_policy_link(struct xfrm_policy *pol, int dir); static struct xfrm_policy *__xfrm_policy_unlink(struct xfrm_policy *pol, int dir); @@ -561,7 +562,7 @@ static void xfrm_hash_resize(struct work_struct *work) mutex_lock(&hash_resize_mutex); total = 0; - for (dir = 0; dir < XFRM_POLICY_MAX * 2; dir++) { + for (dir = 0; dir < XFRM_POLICY_MAX; dir++) { if (xfrm_bydst_should_resize(net, dir, &total)) xfrm_bydst_resize(net, dir); } @@ -601,7 +602,7 @@ static void xfrm_hash_rebuild(struct work_struct *work) write_lock_bh(&net->xfrm.xfrm_policy_lock); /* reset the bydst and inexact table in all directions */ - for (dir = 0; dir < XFRM_POLICY_MAX * 2; dir++) { + for (dir = 0; dir < XFRM_POLICY_MAX; dir++) { INIT_HLIST_HEAD(&net->xfrm.policy_inexact[dir]); hmask = net->xfrm.policy_bydst[dir].hmask; odst = net->xfrm.policy_bydst[dir].table; @@ -779,8 +780,7 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl) hlist_add_behind(&policy->bydst, newpos); else hlist_add_head(&policy->bydst, chain); - xfrm_pol_hold(policy); - net->xfrm.policy_count[dir]++; + __xfrm_policy_link(policy, dir); atomic_inc(&net->xfrm.flow_cache_genid); /* After previous checking, family can either be AF_INET or AF_INET6 */ @@ -799,7 +799,6 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl) policy->curlft.use_time = 0; if (!mod_timer(&policy->timer, jiffies + HZ)) xfrm_pol_hold(policy); - list_add(&policy->walk.all, &net->xfrm.policy_all); write_unlock_bh(&net->xfrm.xfrm_policy_lock); if (delpol) @@ -1247,17 +1246,10 @@ out: static void __xfrm_policy_link(struct xfrm_policy *pol, int dir) { struct net *net = xp_net(pol); - struct hlist_head *chain = policy_hash_bysel(net, &pol->selector, - pol->family, dir); list_add(&pol->walk.all, &net->xfrm.policy_all); - hlist_add_head(&pol->bydst, chain); - hlist_add_head(&pol->byidx, net->xfrm.policy_byidx+idx_hash(net, pol->index)); net->xfrm.policy_count[dir]++; xfrm_pol_hold(pol); - - if (xfrm_bydst_should_resize(net, dir, NULL)) - schedule_work(&net->xfrm.policy_hash_work); } static struct xfrm_policy *__xfrm_policy_unlink(struct xfrm_policy *pol, @@ -1265,17 +1257,31 @@ static struct xfrm_policy *__xfrm_policy_unlink(struct xfrm_policy *pol, { struct net *net = xp_net(pol); - if (hlist_unhashed(&pol->bydst)) + if (list_empty(&pol->walk.all)) return NULL; - hlist_del_init(&pol->bydst); - hlist_del(&pol->byidx); - list_del(&pol->walk.all); + /* Socket policies are not hashed. */ + if (!hlist_unhashed(&pol->bydst)) { + hlist_del(&pol->bydst); + hlist_del(&pol->byidx); + } + + list_del_init(&pol->walk.all); net->xfrm.policy_count[dir]--; return pol; } +static void xfrm_sk_policy_link(struct xfrm_policy *pol, int dir) +{ + __xfrm_policy_link(pol, XFRM_POLICY_MAX + dir); +} + +static void xfrm_sk_policy_unlink(struct xfrm_policy *pol, int dir) +{ + __xfrm_policy_unlink(pol, XFRM_POLICY_MAX + dir); +} + int xfrm_policy_delete(struct xfrm_policy *pol, int dir) { struct net *net = xp_net(pol); @@ -1307,7 +1313,7 @@ int xfrm_sk_policy_insert(struct sock *sk, int dir, struct xfrm_policy *pol) if (pol) { pol->curlft.add_time = get_seconds(); pol->index = xfrm_gen_index(net, XFRM_POLICY_MAX+dir, 0); - __xfrm_policy_link(pol, XFRM_POLICY_MAX+dir); + xfrm_sk_policy_link(pol, dir); } if (old_pol) { if (pol) @@ -1316,7 +1322,7 @@ int xfrm_sk_policy_insert(struct sock *sk, int dir, struct xfrm_policy *pol) /* Unlinking succeeds always. This is the only function * allowed to delete or replace socket policy. */ - __xfrm_policy_unlink(old_pol, XFRM_POLICY_MAX+dir); + xfrm_sk_policy_unlink(old_pol, dir); } write_unlock_bh(&net->xfrm.xfrm_policy_lock); @@ -1349,7 +1355,7 @@ static struct xfrm_policy *clone_policy(const struct xfrm_policy *old, int dir) memcpy(newp->xfrm_vec, old->xfrm_vec, newp->xfrm_nr*sizeof(struct xfrm_tmpl)); write_lock_bh(&net->xfrm.xfrm_policy_lock); - __xfrm_policy_link(newp, XFRM_POLICY_MAX+dir); + xfrm_sk_policy_link(newp, dir); write_unlock_bh(&net->xfrm.xfrm_policy_lock); xfrm_pol_put(newp); } @@ -1878,7 +1884,6 @@ xfrm_resolve_and_create_bundle(struct xfrm_policy **pols, int num_pols, static void xfrm_policy_queue_process(unsigned long arg) { - int err = 0; struct sk_buff *skb; struct sock *sk; struct dst_entry *dst; @@ -1941,7 +1946,7 @@ static void xfrm_policy_queue_process(unsigned long arg) skb_dst_drop(skb); skb_dst_set(skb, dst); - err = dst_output(skb); + dst_output(skb); } out: @@ -2966,10 +2971,11 @@ static int __net_init xfrm_policy_init(struct net *net) goto out_byidx; net->xfrm.policy_idx_hmask = hmask; - for (dir = 0; dir < XFRM_POLICY_MAX * 2; dir++) { + for (dir = 0; dir < XFRM_POLICY_MAX; dir++) { struct xfrm_policy_hash *htab; net->xfrm.policy_count[dir] = 0; + net->xfrm.policy_count[XFRM_POLICY_MAX + dir] = 0; INIT_HLIST_HEAD(&net->xfrm.policy_inexact[dir]); htab = &net->xfrm.policy_bydst[dir]; @@ -3021,7 +3027,7 @@ static void xfrm_policy_fini(struct net *net) WARN_ON(!list_empty(&net->xfrm.policy_all)); - for (dir = 0; dir < XFRM_POLICY_MAX * 2; dir++) { + for (dir = 0; dir < XFRM_POLICY_MAX; dir++) { struct xfrm_policy_hash *htab; WARN_ON(!hlist_empty(&net->xfrm.policy_inexact[dir])); diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index e812e988c111..8128594ab379 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -824,13 +824,15 @@ static int copy_to_user_state_extra(struct xfrm_state *x, ret = xfrm_mark_put(skb, &x->mark); if (ret) goto out; - if (x->replay_esn) { + if (x->replay_esn) ret = nla_put(skb, XFRMA_REPLAY_ESN_VAL, xfrm_replay_state_esn_len(x->replay_esn), x->replay_esn); - if (ret) - goto out; - } + else + ret = nla_put(skb, XFRMA_REPLAY_VAL, sizeof(x->replay), + &x->replay); + if (ret) + goto out; if (x->security) ret = copy_sec_ctx(x->security, skb); out: @@ -2569,6 +2571,8 @@ static inline size_t xfrm_sa_len(struct xfrm_state *x) l += nla_total_size(sizeof(x->tfcpad)); if (x->replay_esn) l += nla_total_size(xfrm_replay_state_esn_len(x->replay_esn)); + else + l += nla_total_size(sizeof(struct xfrm_replay_state)); if (x->security) l += nla_total_size(sizeof(struct xfrm_user_sec_ctx) + x->security->ctx_len); |