diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2018-08-15 15:04:25 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2018-08-15 15:04:25 -0700 |
commit | 9a76aba02a37718242d7cdc294f0a3901928aa57 (patch) | |
tree | 2040d038f85d2120f21af83b0793efd5af1864e3 /net/packet/af_packet.c | |
parent | 0a957467c5fd46142bc9c52758ffc552d4c5e2f7 (diff) | |
parent | 26a1ccc6c117be8e33e0410fce8c5298b0015b99 (diff) |
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next
Pull networking updates from David Miller:
"Highlights:
- Gustavo A. R. Silva keeps working on the implicit switch fallthru
changes.
- Support 802.11ax High-Efficiency wireless in cfg80211 et al, From
Luca Coelho.
- Re-enable ASPM in r8169, from Kai-Heng Feng.
- Add virtual XFRM interfaces, which avoids all of the limitations of
existing IPSEC tunnels. From Steffen Klassert.
- Convert GRO over to use a hash table, so that when we have many
flows active we don't traverse a long list during accumluation.
- Many new self tests for routing, TC, tunnels, etc. Too many
contributors to mention them all, but I'm really happy to keep
seeing this stuff.
- Hardware timestamping support for dpaa_eth/fsl-fman from Yangbo Lu.
- Lots of cleanups and fixes in L2TP code from Guillaume Nault.
- Add IPSEC offload support to netdevsim, from Shannon Nelson.
- Add support for slotting with non-uniform distribution to netem
packet scheduler, from Yousuk Seung.
- Add UDP GSO support to mlx5e, from Boris Pismenny.
- Support offloading of Team LAG in NFP, from John Hurley.
- Allow to configure TX queue selection based upon RX queue, from
Amritha Nambiar.
- Support ethtool ring size configuration in aquantia, from Anton
Mikaev.
- Support DSCP and flowlabel per-transport in SCTP, from Xin Long.
- Support list based batching and stack traversal of SKBs, this is
very exciting work. From Edward Cree.
- Busyloop optimizations in vhost_net, from Toshiaki Makita.
- Introduce the ETF qdisc, which allows time based transmissions. IGB
can offload this in hardware. From Vinicius Costa Gomes.
- Add parameter support to devlink, from Moshe Shemesh.
- Several multiplication and division optimizations for BPF JIT in
nfp driver, from Jiong Wang.
- Lots of prepatory work to make more of the packet scheduler layer
lockless, when possible, from Vlad Buslov.
- Add ACK filter and NAT awareness to sch_cake packet scheduler, from
Toke Høiland-Jørgensen.
- Support regions and region snapshots in devlink, from Alex Vesker.
- Allow to attach XDP programs to both HW and SW at the same time on
a given device, with initial support in nfp. From Jakub Kicinski.
- Add TLS RX offload and support in mlx5, from Ilya Lesokhin.
- Use PHYLIB in r8169 driver, from Heiner Kallweit.
- All sorts of changes to support Spectrum 2 in mlxsw driver, from
Ido Schimmel.
- PTP support in mv88e6xxx DSA driver, from Andrew Lunn.
- Make TCP_USER_TIMEOUT socket option more accurate, from Jon
Maxwell.
- Support for templates in packet scheduler classifier, from Jiri
Pirko.
- IPV6 support in RDS, from Ka-Cheong Poon.
- Native tproxy support in nf_tables, from Máté Eckl.
- Maintain IP fragment queue in an rbtree, but optimize properly for
in-order frags. From Peter Oskolkov.
- Improvde handling of ACKs on hole repairs, from Yuchung Cheng"
* git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next: (1996 commits)
bpf: test: fix spelling mistake "REUSEEPORT" -> "REUSEPORT"
hv/netvsc: Fix NULL dereference at single queue mode fallback
net: filter: mark expected switch fall-through
xen-netfront: fix warn message as irq device name has '/'
cxgb4: Add new T5 PCI device ids 0x50af and 0x50b0
net: dsa: mv88e6xxx: missing unlock on error path
rds: fix building with IPV6=m
inet/connection_sock: prefer _THIS_IP_ to current_text_addr
net: dsa: mv88e6xxx: bitwise vs logical bug
net: sock_diag: Fix spectre v1 gadget in __sock_diag_cmd()
ieee802154: hwsim: using right kind of iteration
net: hns3: Add vlan filter setting by ethtool command -K
net: hns3: Set tx ring' tc info when netdev is up
net: hns3: Remove tx ring BD len register in hns3_enet
net: hns3: Fix desc num set to default when setting channel
net: hns3: Fix for phy link issue when using marvell phy driver
net: hns3: Fix for information of phydev lost problem when down/up
net: hns3: Fix for command format parsing error in hclge_is_all_function_id_zero
net: hns3: Add support for serdes loopback selftest
bnxt_en: take coredump_record structure off stack
...
Diffstat (limited to 'net/packet/af_packet.c')
-rw-r--r-- | net/packet/af_packet.c | 64 |
1 files changed, 25 insertions, 39 deletions
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index e6445d8f3f57..5610061e7f2e 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -275,9 +275,10 @@ static bool packet_use_direct_xmit(const struct packet_sock *po) return po->xmit == packet_direct_xmit; } -static u16 __packet_pick_tx_queue(struct net_device *dev, struct sk_buff *skb) +static u16 __packet_pick_tx_queue(struct net_device *dev, struct sk_buff *skb, + struct net_device *sb_dev) { - return (u16) raw_smp_processor_id() % dev->real_num_tx_queues; + return dev_pick_tx_cpu_id(dev, skb, sb_dev, NULL); } static u16 packet_pick_tx_queue(struct sk_buff *skb) @@ -291,7 +292,7 @@ static u16 packet_pick_tx_queue(struct sk_buff *skb) __packet_pick_tx_queue); queue_index = netdev_cap_txqueue(dev, queue_index); } else { - queue_index = __packet_pick_tx_queue(dev, skb); + queue_index = __packet_pick_tx_queue(dev, skb, NULL); } return queue_index; @@ -1581,7 +1582,7 @@ static int fanout_set_data(struct packet_sock *po, char __user *data, return fanout_set_data_ebpf(po, data, len); default: return -EINVAL; - }; + } } static void fanout_release_data(struct packet_fanout *f) @@ -1590,7 +1591,7 @@ static void fanout_release_data(struct packet_fanout *f) case PACKET_FANOUT_CBPF: case PACKET_FANOUT_EBPF: __fanout_set_data_bpf(f, NULL); - }; + } } static bool __fanout_id_is_free(struct sock *sk, u16 candidate_id) @@ -1951,7 +1952,7 @@ retry: goto out_unlock; } - sockc.tsflags = sk->sk_tsflags; + sockcm_init(&sockc, sk); if (msg->msg_controllen) { err = sock_cmsg_send(sk, msg, &sockc); if (unlikely(err)) @@ -1962,6 +1963,7 @@ retry: skb->dev = dev; skb->priority = sk->sk_priority; skb->mark = sk->sk_mark; + skb->tstamp = sockc.transmit_time; sock_tx_timestamp(sk, sockc.tsflags, &skb_shinfo(skb)->tx_flags); @@ -2457,6 +2459,7 @@ static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb, skb->dev = dev; skb->priority = po->sk.sk_priority; skb->mark = po->sk.sk_mark; + skb->tstamp = sockc->transmit_time; sock_tx_timestamp(&po->sk, sockc->tsflags, &skb_shinfo(skb)->tx_flags); skb_shinfo(skb)->destructor_arg = ph.raw; @@ -2633,7 +2636,7 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg) if (unlikely(!(dev->flags & IFF_UP))) goto out_put; - sockc.tsflags = po->sk.sk_tsflags; + sockcm_init(&sockc, &po->sk); if (msg->msg_controllen) { err = sock_cmsg_send(&po->sk, msg, &sockc); if (unlikely(err)) @@ -2829,7 +2832,7 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len) if (unlikely(!(dev->flags & IFF_UP))) goto out_unlock; - sockc.tsflags = sk->sk_tsflags; + sockcm_init(&sockc, sk); sockc.mark = sk->sk_mark; if (msg->msg_controllen) { err = sock_cmsg_send(sk, msg, &sockc); @@ -2905,6 +2908,7 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len) skb->dev = dev; skb->priority = sk->sk_priority; skb->mark = sockc.mark; + skb->tstamp = sockc.transmit_time; if (has_vnet_hdr) { err = virtio_net_hdr_to_skb(skb, &vnet_hdr, vio_le()); @@ -4133,52 +4137,36 @@ static const struct vm_operations_struct packet_mmap_ops = { .close = packet_mm_close, }; -static void free_pg_vec(struct pgv *pg_vec, unsigned int order, - unsigned int len) +static void free_pg_vec(struct pgv *pg_vec, unsigned int len) { int i; for (i = 0; i < len; i++) { if (likely(pg_vec[i].buffer)) { - if (is_vmalloc_addr(pg_vec[i].buffer)) - vfree(pg_vec[i].buffer); - else - free_pages((unsigned long)pg_vec[i].buffer, - order); + kvfree(pg_vec[i].buffer); pg_vec[i].buffer = NULL; } } kfree(pg_vec); } -static char *alloc_one_pg_vec_page(unsigned long order) +static char *alloc_one_pg_vec_page(unsigned long size) { char *buffer; - gfp_t gfp_flags = GFP_KERNEL | __GFP_COMP | - __GFP_ZERO | __GFP_NOWARN | __GFP_NORETRY; - buffer = (char *) __get_free_pages(gfp_flags, order); + buffer = kvzalloc(size, GFP_KERNEL); if (buffer) return buffer; - /* __get_free_pages failed, fall back to vmalloc */ - buffer = vzalloc(array_size((1 << order), PAGE_SIZE)); - if (buffer) - return buffer; + buffer = kvzalloc(size, GFP_KERNEL | __GFP_RETRY_MAYFAIL); - /* vmalloc failed, lets dig into swap here */ - gfp_flags &= ~__GFP_NORETRY; - buffer = (char *) __get_free_pages(gfp_flags, order); - if (buffer) - return buffer; - - /* complete and utter failure */ - return NULL; + return buffer; } -static struct pgv *alloc_pg_vec(struct tpacket_req *req, int order) +static struct pgv *alloc_pg_vec(struct tpacket_req *req) { unsigned int block_nr = req->tp_block_nr; + unsigned long size = req->tp_block_size; struct pgv *pg_vec; int i; @@ -4187,7 +4175,7 @@ static struct pgv *alloc_pg_vec(struct tpacket_req *req, int order) goto out; for (i = 0; i < block_nr; i++) { - pg_vec[i].buffer = alloc_one_pg_vec_page(order); + pg_vec[i].buffer = alloc_one_pg_vec_page(size); if (unlikely(!pg_vec[i].buffer)) goto out_free_pgvec; } @@ -4196,7 +4184,7 @@ out: return pg_vec; out_free_pgvec: - free_pg_vec(pg_vec, order, block_nr); + free_pg_vec(pg_vec, block_nr); pg_vec = NULL; goto out; } @@ -4206,9 +4194,9 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u, { struct pgv *pg_vec = NULL; struct packet_sock *po = pkt_sk(sk); - int was_running, order = 0; struct packet_ring_buffer *rb; struct sk_buff_head *rb_queue; + int was_running; __be16 num; int err = -EINVAL; /* Added to avoid minimal code churn */ @@ -4270,8 +4258,7 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u, goto out; err = -ENOMEM; - order = get_order(req->tp_block_size); - pg_vec = alloc_pg_vec(req, order); + pg_vec = alloc_pg_vec(req); if (unlikely(!pg_vec)) goto out; switch (po->tp_version) { @@ -4325,7 +4312,6 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u, rb->frame_size = req->tp_frame_size; spin_unlock_bh(&rb_queue->lock); - swap(rb->pg_vec_order, order); swap(rb->pg_vec_len, req->tp_block_nr); rb->pg_vec_pages = req->tp_block_size/PAGE_SIZE; @@ -4351,7 +4337,7 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u, } if (pg_vec) - free_pg_vec(pg_vec, order, req->tp_block_nr); + free_pg_vec(pg_vec, req->tp_block_nr); out: return err; } |