aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--include/net/tcp.h2
-rw-r--r--include/net/tls.h19
-rw-r--r--net/ipv4/tcp.c42
-rw-r--r--net/tls/tls.h29
-rw-r--r--net/tls/tls_device.c19
-rw-r--r--net/tls/tls_main.c20
-rw-r--r--net/tls/tls_strp.c488
-rw-r--r--net/tls/tls_sw.c228
8 files changed, 723 insertions, 124 deletions
diff --git a/include/net/tcp.h b/include/net/tcp.h
index f9e7c85ea829..b8620be32f8f 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -673,6 +673,8 @@ void tcp_get_info(struct sock *, struct tcp_info *);
int tcp_read_sock(struct sock *sk, read_descriptor_t *desc,
sk_read_actor_t recv_actor);
int tcp_read_skb(struct sock *sk, skb_read_actor_t recv_actor);
+struct sk_buff *tcp_recv_skb(struct sock *sk, u32 seq, u32 *off);
+void tcp_read_done(struct sock *sk, size_t len);
void tcp_initialize_rcv_mss(struct sock *sk);
diff --git a/include/net/tls.h b/include/net/tls.h
index 181c496b01b8..abb050b0df83 100644
--- a/include/net/tls.h
+++ b/include/net/tls.h
@@ -108,18 +108,33 @@ struct tls_sw_context_tx {
unsigned long tx_bitmask;
};
+struct tls_strparser {
+ struct sock *sk;
+
+ u32 mark : 8;
+ u32 stopped : 1;
+ u32 copy_mode : 1;
+ u32 msg_ready : 1;
+
+ struct strp_msg stm;
+
+ struct sk_buff *anchor;
+ struct work_struct work;
+};
+
struct tls_sw_context_rx {
struct crypto_aead *aead_recv;
struct crypto_wait async_wait;
- struct strparser strp;
struct sk_buff_head rx_list; /* list of decrypted 'data' records */
void (*saved_data_ready)(struct sock *sk);
- struct sk_buff *recv_pkt;
u8 reader_present;
u8 async_capable:1;
u8 zc_capable:1;
u8 reader_contended:1;
+
+ struct tls_strparser strp;
+
atomic_t decrypt_pending;
/* protect crypto_wait with decrypt_pending*/
spinlock_t decrypt_compl_lock;
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index ba2bdc811374..dc7cc3ce6a53 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -1635,7 +1635,7 @@ static void tcp_eat_recv_skb(struct sock *sk, struct sk_buff *skb)
__kfree_skb(skb);
}
-static struct sk_buff *tcp_recv_skb(struct sock *sk, u32 seq, u32 *off)
+struct sk_buff *tcp_recv_skb(struct sock *sk, u32 seq, u32 *off)
{
struct sk_buff *skb;
u32 offset;
@@ -1658,6 +1658,7 @@ static struct sk_buff *tcp_recv_skb(struct sock *sk, u32 seq, u32 *off)
}
return NULL;
}
+EXPORT_SYMBOL(tcp_recv_skb);
/*
* This routine provides an alternative to tcp_recvmsg() for routines
@@ -1788,6 +1789,45 @@ int tcp_read_skb(struct sock *sk, skb_read_actor_t recv_actor)
}
EXPORT_SYMBOL(tcp_read_skb);
+void tcp_read_done(struct sock *sk, size_t len)
+{
+ struct tcp_sock *tp = tcp_sk(sk);
+ u32 seq = tp->copied_seq;
+ struct sk_buff *skb;
+ size_t left;
+ u32 offset;
+
+ if (sk->sk_state == TCP_LISTEN)
+ return;
+
+ left = len;
+ while (left && (skb = tcp_recv_skb(sk, seq, &offset)) != NULL) {
+ int used;
+
+ used = min_t(size_t, skb->len - offset, left);
+ seq += used;
+ left -= used;
+
+ if (skb->len > offset + used)
+ break;
+
+ if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN) {
+ tcp_eat_recv_skb(sk, skb);
+ ++seq;
+ break;
+ }
+ tcp_eat_recv_skb(sk, skb);
+ }
+ WRITE_ONCE(tp->copied_seq, seq);
+
+ tcp_rcv_space_adjust(sk);
+
+ /* Clean up data we have read: This will do ACK frames. */
+ if (left != len)
+ tcp_cleanup_rbuf(sk, len - left);
+}
+EXPORT_SYMBOL(tcp_read_done);
+
int tcp_peek_len(struct socket *sock)
{
return tcp_inq(sock->sk);
diff --git a/net/tls/tls.h b/net/tls/tls.h
index 3740740504e3..0e840a0c3437 100644
--- a/net/tls/tls.h
+++ b/net/tls/tls.h
@@ -1,4 +1,5 @@
/*
+ * Copyright (c) 2016 Tom Herbert <[email protected]>
* Copyright (c) 2016-2017, Mellanox Technologies. All rights reserved.
* Copyright (c) 2016-2017, Dave Watson <[email protected]>. All rights reserved.
*
@@ -127,8 +128,24 @@ int tls_sw_fallback_init(struct sock *sk,
struct tls_offload_context_tx *offload_ctx,
struct tls_crypto_info *crypto_info);
-int tls_strp_msg_hold(struct sock *sk, struct sk_buff *skb,
- struct sk_buff_head *dst);
+int tls_strp_dev_init(void);
+void tls_strp_dev_exit(void);
+
+void tls_strp_done(struct tls_strparser *strp);
+void tls_strp_stop(struct tls_strparser *strp);
+int tls_strp_init(struct tls_strparser *strp, struct sock *sk);
+void tls_strp_data_ready(struct tls_strparser *strp);
+
+void tls_strp_check_rcv(struct tls_strparser *strp);
+void tls_strp_msg_done(struct tls_strparser *strp);
+
+int tls_rx_msg_size(struct tls_strparser *strp, struct sk_buff *skb);
+void tls_rx_msg_ready(struct tls_strparser *strp);
+
+void tls_strp_msg_load(struct tls_strparser *strp, bool force_refresh);
+int tls_strp_msg_cow(struct tls_sw_context_rx *ctx);
+struct sk_buff *tls_strp_msg_detach(struct tls_sw_context_rx *ctx);
+int tls_strp_msg_hold(struct tls_strparser *strp, struct sk_buff_head *dst);
static inline struct tls_msg *tls_msg(struct sk_buff *skb)
{
@@ -139,7 +156,13 @@ static inline struct tls_msg *tls_msg(struct sk_buff *skb)
static inline struct sk_buff *tls_strp_msg(struct tls_sw_context_rx *ctx)
{
- return ctx->recv_pkt;
+ DEBUG_NET_WARN_ON_ONCE(!ctx->strp.msg_ready || !ctx->strp.anchor->len);
+ return ctx->strp.anchor;
+}
+
+static inline bool tls_strp_msg_ready(struct tls_sw_context_rx *ctx)
+{
+ return ctx->strp.msg_ready;
}
#ifdef CONFIG_TLS_DEVICE
diff --git a/net/tls/tls_device.c b/net/tls/tls_device.c
index b1fcd61836d1..fc513c1806a0 100644
--- a/net/tls/tls_device.c
+++ b/net/tls/tls_device.c
@@ -894,27 +894,26 @@ static void tls_device_core_ctrl_rx_resync(struct tls_context *tls_ctx,
static int
tls_device_reencrypt(struct sock *sk, struct tls_sw_context_rx *sw_ctx)
{
- int err = 0, offset, copy, nsg, data_len, pos;
- struct sk_buff *skb, *skb_iter, *unused;
+ int err, offset, copy, data_len, pos;
+ struct sk_buff *skb, *skb_iter;
struct scatterlist sg[1];
struct strp_msg *rxm;
char *orig_buf, *buf;
- skb = tls_strp_msg(sw_ctx);
- rxm = strp_msg(skb);
- offset = rxm->offset;
-
+ rxm = strp_msg(tls_strp_msg(sw_ctx));
orig_buf = kmalloc(rxm->full_len + TLS_HEADER_SIZE +
TLS_CIPHER_AES_GCM_128_IV_SIZE, sk->sk_allocation);
if (!orig_buf)
return -ENOMEM;
buf = orig_buf;
- nsg = skb_cow_data(skb, 0, &unused);
- if (unlikely(nsg < 0)) {
- err = nsg;
+ err = tls_strp_msg_cow(sw_ctx);
+ if (unlikely(err))
goto free_buf;
- }
+
+ skb = tls_strp_msg(sw_ctx);
+ rxm = strp_msg(skb);
+ offset = rxm->offset;
sg_init_table(sg, 1);
sg_set_buf(&sg[0], buf,
diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c
index 9703636cfc60..08ddf9d837ae 100644
--- a/net/tls/tls_main.c
+++ b/net/tls/tls_main.c
@@ -725,6 +725,10 @@ static int do_tls_setsockopt_conf(struct sock *sk, sockptr_t optval,
if (tx) {
ctx->sk_write_space = sk->sk_write_space;
sk->sk_write_space = tls_write_space;
+ } else {
+ struct tls_sw_context_rx *rx_ctx = tls_sw_ctx_rx(ctx);
+
+ tls_strp_check_rcv(&rx_ctx->strp);
}
return 0;
@@ -1141,20 +1145,28 @@ static int __init tls_register(void)
if (err)
return err;
+ err = tls_strp_dev_init();
+ if (err)
+ goto err_pernet;
+
err = tls_device_init();
- if (err) {
- unregister_pernet_subsys(&tls_proc_ops);
- return err;
- }
+ if (err)
+ goto err_strp;
tcp_register_ulp(&tcp_tls_ulp_ops);
return 0;
+err_strp:
+ tls_strp_dev_exit();
+err_pernet:
+ unregister_pernet_subsys(&tls_proc_ops);
+ return err;
}
static void __exit tls_unregister(void)
{
tcp_unregister_ulp(&tcp_tls_ulp_ops);
+ tls_strp_dev_exit();
tls_device_cleanup();
unregister_pernet_subsys(&tls_proc_ops);
}
diff --git a/net/tls/tls_strp.c b/net/tls/tls_strp.c
index 9ccab79a6e1e..b945288c312e 100644
--- a/net/tls/tls_strp.c
+++ b/net/tls/tls_strp.c
@@ -1,17 +1,493 @@
// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright (c) 2016 Tom Herbert <[email protected]> */
#include <linux/skbuff.h>
+#include <linux/workqueue.h>
+#include <net/strparser.h>
+#include <net/tcp.h>
+#include <net/sock.h>
+#include <net/tls.h>
#include "tls.h"
-int tls_strp_msg_hold(struct sock *sk, struct sk_buff *skb,
- struct sk_buff_head *dst)
+static struct workqueue_struct *tls_strp_wq;
+
+static void tls_strp_abort_strp(struct tls_strparser *strp, int err)
+{
+ if (strp->stopped)
+ return;
+
+ strp->stopped = 1;
+
+ /* Report an error on the lower socket */
+ strp->sk->sk_err = -err;
+ sk_error_report(strp->sk);
+}
+
+static void tls_strp_anchor_free(struct tls_strparser *strp)
+{
+ struct skb_shared_info *shinfo = skb_shinfo(strp->anchor);
+
+ DEBUG_NET_WARN_ON_ONCE(atomic_read(&shinfo->dataref) != 1);
+ shinfo->frag_list = NULL;
+ consume_skb(strp->anchor);
+ strp->anchor = NULL;
+}
+
+/* Create a new skb with the contents of input copied to its page frags */
+static struct sk_buff *tls_strp_msg_make_copy(struct tls_strparser *strp)
{
- struct sk_buff *clone;
+ struct strp_msg *rxm;
+ struct sk_buff *skb;
+ int i, err, offset;
+
+ skb = alloc_skb_with_frags(0, strp->anchor->len, TLS_PAGE_ORDER,
+ &err, strp->sk->sk_allocation);
+ if (!skb)
+ return NULL;
+
+ offset = strp->stm.offset;
+ for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
+ skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
+
+ WARN_ON_ONCE(skb_copy_bits(strp->anchor, offset,
+ skb_frag_address(frag),
+ skb_frag_size(frag)));
+ offset += skb_frag_size(frag);
+ }
+
+ skb_copy_header(skb, strp->anchor);
+ rxm = strp_msg(skb);
+ rxm->offset = 0;
+ return skb;
+}
+
+/* Steal the input skb, input msg is invalid after calling this function */
+struct sk_buff *tls_strp_msg_detach(struct tls_sw_context_rx *ctx)
+{
+ struct tls_strparser *strp = &ctx->strp;
+
+#ifdef CONFIG_TLS_DEVICE
+ DEBUG_NET_WARN_ON_ONCE(!strp->anchor->decrypted);
+#else
+ /* This function turns an input into an output,
+ * that can only happen if we have offload.
+ */
+ WARN_ON(1);
+#endif
+
+ if (strp->copy_mode) {
+ struct sk_buff *skb;
+
+ /* Replace anchor with an empty skb, this is a little
+ * dangerous but __tls_cur_msg() warns on empty skbs
+ * so hopefully we'll catch abuses.
+ */
+ skb = alloc_skb(0, strp->sk->sk_allocation);
+ if (!skb)
+ return NULL;
- clone = skb_clone(skb, sk->sk_allocation);
- if (!clone)
+ swap(strp->anchor, skb);
+ return skb;
+ }
+
+ return tls_strp_msg_make_copy(strp);
+}
+
+/* Force the input skb to be in copy mode. The data ownership remains
+ * with the input skb itself (meaning unpause will wipe it) but it can
+ * be modified.
+ */
+int tls_strp_msg_cow(struct tls_sw_context_rx *ctx)
+{
+ struct tls_strparser *strp = &ctx->strp;
+ struct sk_buff *skb;
+
+ if (strp->copy_mode)
+ return 0;
+
+ skb = tls_strp_msg_make_copy(strp);
+ if (!skb)
return -ENOMEM;
- __skb_queue_tail(dst, clone);
+
+ tls_strp_anchor_free(strp);
+ strp->anchor = skb;
+
+ tcp_read_done(strp->sk, strp->stm.full_len);
+ strp->copy_mode = 1;
+
return 0;
}
+
+/* Make a clone (in the skb sense) of the input msg to keep a reference
+ * to the underlying data. The reference-holding skbs get placed on
+ * @dst.
+ */
+int tls_strp_msg_hold(struct tls_strparser *strp, struct sk_buff_head *dst)
+{
+ struct skb_shared_info *shinfo = skb_shinfo(strp->anchor);
+
+ if (strp->copy_mode) {
+ struct sk_buff *skb;
+
+ WARN_ON_ONCE(!shinfo->nr_frags);
+
+ /* We can't skb_clone() the anchor, it gets wiped by unpause */
+ skb = alloc_skb(0, strp->sk->sk_allocation);
+ if (!skb)
+ return -ENOMEM;
+
+ __skb_queue_tail(dst, strp->anchor);
+ strp->anchor = skb;
+ } else {
+ struct sk_buff *iter, *clone;
+ int chunk, len, offset;
+
+ offset = strp->stm.offset;
+ len = strp->stm.full_len;
+ iter = shinfo->frag_list;
+
+ while (len > 0) {
+ if (iter->len <= offset) {
+ offset -= iter->len;
+ goto next;
+ }
+
+ chunk = iter->len - offset;
+ offset = 0;
+
+ clone = skb_clone(iter, strp->sk->sk_allocation);
+ if (!clone)
+ return -ENOMEM;
+ __skb_queue_tail(dst, clone);
+
+ len -= chunk;
+next:
+ iter = iter->next;
+ }
+ }
+
+ return 0;
+}
+
+static void tls_strp_flush_anchor_copy(struct tls_strparser *strp)
+{
+ struct skb_shared_info *shinfo = skb_shinfo(strp->anchor);
+ int i;
+
+ DEBUG_NET_WARN_ON_ONCE(atomic_read(&shinfo->dataref) != 1);
+
+ for (i = 0; i < shinfo->nr_frags; i++)
+ __skb_frag_unref(&shinfo->frags[i], false);
+ shinfo->nr_frags = 0;
+ strp->copy_mode = 0;
+}
+
+static int tls_strp_copyin(read_descriptor_t *desc, struct sk_buff *in_skb,
+ unsigned int offset, size_t in_len)
+{
+ struct tls_strparser *strp = (struct tls_strparser *)desc->arg.data;
+ size_t sz, len, chunk;
+ struct sk_buff *skb;
+ skb_frag_t *frag;
+
+ if (strp->msg_ready)
+ return 0;
+
+ skb = strp->anchor;
+ frag = &skb_shinfo(skb)->frags[skb->len / PAGE_SIZE];
+
+ len = in_len;
+ /* First make sure we got the header */
+ if (!strp->stm.full_len) {
+ /* Assume one page is more than enough for headers */
+ chunk = min_t(size_t, len, PAGE_SIZE - skb_frag_size(frag));
+ WARN_ON_ONCE(skb_copy_bits(in_skb, offset,
+ skb_frag_address(frag) +
+ skb_frag_size(frag),
+ chunk));
+
+ sz = tls_rx_msg_size(strp, strp->anchor);
+ if (sz < 0) {
+ desc->error = sz;
+ return 0;
+ }
+
+ /* We may have over-read, sz == 0 is guaranteed under-read */
+ if (sz > 0)
+ chunk = min_t(size_t, chunk, sz - skb->len);
+
+ skb->len += chunk;
+ skb->data_len += chunk;
+ skb_frag_size_add(frag, chunk);
+ frag++;
+ len -= chunk;
+ offset += chunk;
+
+ strp->stm.full_len = sz;
+ if (!strp->stm.full_len)
+ goto read_done;
+ }
+
+ /* Load up more data */
+ while (len && strp->stm.full_len > skb->len) {
+ chunk = min_t(size_t, len, strp->stm.full_len - skb->len);
+ chunk = min_t(size_t, chunk, PAGE_SIZE - skb_frag_size(frag));
+ WARN_ON_ONCE(skb_copy_bits(in_skb, offset,
+ skb_frag_address(frag) +
+ skb_frag_size(frag),
+ chunk));
+
+ skb->len += chunk;
+ skb->data_len += chunk;
+ skb_frag_size_add(frag, chunk);
+ frag++;
+ len -= chunk;
+ offset += chunk;
+ }
+
+ if (strp->stm.full_len == skb->len) {
+ desc->count = 0;
+
+ strp->msg_ready = 1;
+ tls_rx_msg_ready(strp);
+ }
+
+read_done:
+ return in_len - len;
+}
+
+static int tls_strp_read_copyin(struct tls_strparser *strp)
+{
+ struct socket *sock = strp->sk->sk_socket;
+ read_descriptor_t desc;
+
+ desc.arg.data = strp;
+ desc.error = 0;
+ desc.count = 1; /* give more than one skb per call */
+
+ /* sk should be locked here, so okay to do read_sock */
+ sock->ops->read_sock(strp->sk, &desc, tls_strp_copyin);
+
+ return desc.error;
+}
+
+static int tls_strp_read_short(struct tls_strparser *strp)
+{
+ struct skb_shared_info *shinfo;
+ struct page *page;
+ int need_spc, len;
+
+ /* If the rbuf is small or rcv window has collapsed to 0 we need
+ * to read the data out. Otherwise the connection will stall.
+ * Without pressure threshold of INT_MAX will never be ready.
+ */
+ if (likely(!tcp_epollin_ready(strp->sk, INT_MAX)))
+ return 0;
+
+ shinfo = skb_shinfo(strp->anchor);
+ shinfo->frag_list = NULL;
+
+ /* If we don't know the length go max plus page for cipher overhead */
+ need_spc = strp->stm.full_len ?: TLS_MAX_PAYLOAD_SIZE + PAGE_SIZE;
+
+ for (len = need_spc; len > 0; len -= PAGE_SIZE) {
+ page = alloc_page(strp->sk->sk_allocation);
+ if (!page) {
+ tls_strp_flush_anchor_copy(strp);
+ return -ENOMEM;
+ }
+
+ skb_fill_page_desc(strp->anchor, shinfo->nr_frags++,
+ page, 0, 0);
+ }
+
+ strp->copy_mode = 1;
+ strp->stm.offset = 0;
+
+ strp->anchor->len = 0;
+ strp->anchor->data_len = 0;
+ strp->anchor->truesize = round_up(need_spc, PAGE_SIZE);
+
+ tls_strp_read_copyin(strp);
+
+ return 0;
+}
+
+static void tls_strp_load_anchor_with_queue(struct tls_strparser *strp, int len)
+{
+ struct tcp_sock *tp = tcp_sk(strp->sk);
+ struct sk_buff *first;
+ u32 offset;
+
+ first = tcp_recv_skb(strp->sk, tp->copied_seq, &offset);
+ if (WARN_ON_ONCE(!first))
+ return;
+
+ /* Bestow the state onto the anchor */
+ strp->anchor->len = offset + len;
+ strp->anchor->data_len = offset + len;
+ strp->anchor->truesize = offset + len;
+
+ skb_shinfo(strp->anchor)->frag_list = first;
+
+ skb_copy_header(strp->anchor, first);
+ strp->anchor->destructor = NULL;
+
+ strp->stm.offset = offset;
+}
+
+void tls_strp_msg_load(struct tls_strparser *strp, bool force_refresh)
+{
+ struct strp_msg *rxm;
+ struct tls_msg *tlm;
+
+ DEBUG_NET_WARN_ON_ONCE(!strp->msg_ready);
+ DEBUG_NET_WARN_ON_ONCE(!strp->stm.full_len);
+
+ if (!strp->copy_mode && force_refresh) {
+ if (WARN_ON(tcp_inq(strp->sk) < strp->stm.full_len))
+ return;
+
+ tls_strp_load_anchor_with_queue(strp, strp->stm.full_len);
+ }
+
+ rxm = strp_msg(strp->anchor);
+ rxm->full_len = strp->stm.full_len;
+ rxm->offset = strp->stm.offset;
+ tlm = tls_msg(strp->anchor);
+ tlm->control = strp->mark;
+}
+
+/* Called with lock held on lower socket */
+static int tls_strp_read_sock(struct tls_strparser *strp)
+{
+ int sz, inq;
+
+ inq = tcp_inq(strp->sk);
+ if (inq < 1)
+ return 0;
+
+ if (unlikely(strp->copy_mode))
+ return tls_strp_read_copyin(strp);
+
+ if (inq < strp->stm.full_len)
+ return tls_strp_read_short(strp);
+
+ if (!strp->stm.full_len) {
+ tls_strp_load_anchor_with_queue(strp, inq);
+
+ sz = tls_rx_msg_size(strp, strp->anchor);
+ if (sz < 0) {
+ tls_strp_abort_strp(strp, sz);
+ return sz;
+ }
+
+ strp->stm.full_len = sz;
+
+ if (!strp->stm.full_len || inq < strp->stm.full_len)
+ return tls_strp_read_short(strp);
+ }
+
+ strp->msg_ready = 1;
+ tls_rx_msg_ready(strp);
+
+ return 0;
+}
+
+void tls_strp_check_rcv(struct tls_strparser *strp)
+{
+ if (unlikely(strp->stopped) || strp->msg_ready)
+ return;
+
+ if (tls_strp_read_sock(strp) == -ENOMEM)
+ queue_work(tls_strp_wq, &strp->work);
+}
+
+/* Lower sock lock held */
+void tls_strp_data_ready(struct tls_strparser *strp)
+{
+ /* This check is needed to synchronize with do_tls_strp_work.
+ * do_tls_strp_work acquires a process lock (lock_sock) whereas
+ * the lock held here is bh_lock_sock. The two locks can be
+ * held by different threads at the same time, but bh_lock_sock
+ * allows a thread in BH context to safely check if the process
+ * lock is held. In this case, if the lock is held, queue work.
+ */
+ if (sock_owned_by_user_nocheck(strp->sk)) {
+ queue_work(tls_strp_wq, &strp->work);
+ return;
+ }
+
+ tls_strp_check_rcv(strp);
+}
+
+static void tls_strp_work(struct work_struct *w)
+{
+ struct tls_strparser *strp =
+ container_of(w, struct tls_strparser, work);
+
+ lock_sock(strp->sk);
+ tls_strp_check_rcv(strp);
+ release_sock(strp->sk);
+}
+
+void tls_strp_msg_done(struct tls_strparser *strp)
+{
+ WARN_ON(!strp->stm.full_len);
+
+ if (likely(!strp->copy_mode))
+ tcp_read_done(strp->sk, strp->stm.full_len);
+ else
+ tls_strp_flush_anchor_copy(strp);
+
+ strp->msg_ready = 0;
+ memset(&strp->stm, 0, sizeof(strp->stm));
+
+ tls_strp_check_rcv(strp);
+}
+
+void tls_strp_stop(struct tls_strparser *strp)
+{
+ strp->stopped = 1;
+}
+
+int tls_strp_init(struct tls_strparser *strp, struct sock *sk)
+{
+ memset(strp, 0, sizeof(*strp));
+
+ strp->sk = sk;
+
+ strp->anchor = alloc_skb(0, GFP_KERNEL);
+ if (!strp->anchor)
+ return -ENOMEM;
+
+ INIT_WORK(&strp->work, tls_strp_work);
+
+ return 0;
+}
+
+/* strp must already be stopped so that tls_strp_recv will no longer be called.
+ * Note that tls_strp_done is not called with the lower socket held.
+ */
+void tls_strp_done(struct tls_strparser *strp)
+{
+ WARN_ON(!strp->stopped);
+
+ cancel_work_sync(&strp->work);
+ tls_strp_anchor_free(strp);
+}
+
+int __init tls_strp_dev_init(void)
+{
+ tls_strp_wq = create_singlethread_workqueue("kstrp");
+ if (unlikely(!tls_strp_wq))
+ return -ENOMEM;
+
+ return 0;
+}
+
+void tls_strp_dev_exit(void)
+{
+ destroy_workqueue(tls_strp_wq);
+}
diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c
index ed5e6f1df9c7..0fc24a5ce208 100644
--- a/net/tls/tls_sw.c
+++ b/net/tls/tls_sw.c
@@ -1283,13 +1283,13 @@ int tls_sw_sendpage(struct sock *sk, struct page *page,
static int
tls_rx_rec_wait(struct sock *sk, struct sk_psock *psock, bool nonblock,
- long timeo)
+ bool released, long timeo)
{
struct tls_context *tls_ctx = tls_get_ctx(sk);
struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx);
DEFINE_WAIT_FUNC(wait, woken_wake_function);
- while (!ctx->recv_pkt) {
+ while (!tls_strp_msg_ready(ctx)) {
if (!sk_psock_queue_empty(psock))
return 0;
@@ -1297,8 +1297,8 @@ tls_rx_rec_wait(struct sock *sk, struct sk_psock *psock, bool nonblock,
return sock_error(sk);
if (!skb_queue_empty(&sk->sk_receive_queue)) {
- __strp_unpause(&ctx->strp);
- if (ctx->recv_pkt)
+ tls_strp_check_rcv(&ctx->strp);
+ if (tls_strp_msg_ready(ctx))
break;
}
@@ -1311,10 +1311,12 @@ tls_rx_rec_wait(struct sock *sk, struct sk_psock *psock, bool nonblock,
if (nonblock || !timeo)
return -EAGAIN;
+ released = true;
add_wait_queue(sk_sleep(sk), &wait);
sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
sk_wait_event(sk, &timeo,
- ctx->recv_pkt || !sk_psock_queue_empty(psock),
+ tls_strp_msg_ready(ctx) ||
+ !sk_psock_queue_empty(psock),
&wait);
sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk);
remove_wait_queue(sk_sleep(sk), &wait);
@@ -1324,6 +1326,8 @@ tls_rx_rec_wait(struct sock *sk, struct sk_psock *psock, bool nonblock,
return sock_intr_errno(timeo);
}
+ tls_strp_msg_load(&ctx->strp, released);
+
return 1;
}
@@ -1408,13 +1412,15 @@ tls_alloc_clrtxt_skb(struct sock *sk, struct sk_buff *skb,
/* Decrypt handlers
*
- * tls_decrypt_sg() and tls_decrypt_device() are decrypt handlers.
+ * tls_decrypt_sw() and tls_decrypt_device() are decrypt handlers.
* They must transform the darg in/out argument are as follows:
* | Input | Output
* -------------------------------------------------------------------
* zc | Zero-copy decrypt allowed | Zero-copy performed
* async | Async decrypt allowed | Async crypto used / in progress
* skb | * | Output skb
+ *
+ * If ZC decryption was performed darg.skb will point to the input skb.
*/
/* This function decrypts the input skb into either out_iov or in out_sg
@@ -1567,7 +1573,7 @@ static int tls_decrypt_sg(struct sock *sk, struct iov_iter *out_iov,
clear_skb = NULL;
if (unlikely(darg->async)) {
- err = tls_strp_msg_hold(sk, skb, &ctx->async_hold);
+ err = tls_strp_msg_hold(&ctx->strp, &ctx->async_hold);
if (err)
__skb_queue_tail(&ctx->async_hold, darg->skb);
return err;
@@ -1588,49 +1594,22 @@ exit_free_skb:
}
static int
-tls_decrypt_device(struct sock *sk, struct tls_context *tls_ctx,
- struct tls_decrypt_arg *darg)
-{
- struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx);
- int err;
-
- if (tls_ctx->rx_conf != TLS_HW)
- return 0;
-
- err = tls_device_decrypted(sk, tls_ctx);
- if (err <= 0)
- return err;
-
- darg->zc = false;
- darg->async = false;
- darg->skb = tls_strp_msg(ctx);
- ctx->recv_pkt = NULL;
- return 1;
-}
-
-static int tls_rx_one_record(struct sock *sk, struct iov_iter *dest,
- struct tls_decrypt_arg *darg)
+tls_decrypt_sw(struct sock *sk, struct tls_context *tls_ctx,
+ struct msghdr *msg, struct tls_decrypt_arg *darg)
{
- struct tls_context *tls_ctx = tls_get_ctx(sk);
struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx);
struct tls_prot_info *prot = &tls_ctx->prot_info;
struct strp_msg *rxm;
int pad, err;
- err = tls_decrypt_device(sk, tls_ctx, darg);
- if (err < 0)
- return err;
- if (err)
- goto decrypt_done;
-
- err = tls_decrypt_sg(sk, dest, NULL, darg);
+ err = tls_decrypt_sg(sk, &msg->msg_iter, NULL, darg);
if (err < 0) {
if (err == -EBADMSG)
TLS_INC_STATS(sock_net(sk), LINUX_MIB_TLSDECRYPTERROR);
return err;
}
- if (darg->async)
- goto decrypt_done;
+ /* keep going even for ->async, the code below is TLS 1.3 */
+
/* If opportunistic TLS 1.3 ZC failed retry without ZC */
if (unlikely(darg->zc && prot->version == TLS_1_3_VERSION &&
darg->tail != TLS_RECORD_TYPE_DATA)) {
@@ -1638,21 +1617,87 @@ static int tls_rx_one_record(struct sock *sk, struct iov_iter *dest,
if (!darg->tail)
TLS_INC_STATS(sock_net(sk), LINUX_MIB_TLSRXNOPADVIOL);
TLS_INC_STATS(sock_net(sk), LINUX_MIB_TLSDECRYPTRETRY);
- return tls_rx_one_record(sk, dest, darg);
+ return tls_decrypt_sw(sk, tls_ctx, msg, darg);
}
-decrypt_done:
- if (darg->skb == ctx->recv_pkt)
- ctx->recv_pkt = NULL;
-
pad = tls_padding_length(prot, darg->skb, darg);
if (pad < 0) {
- consume_skb(darg->skb);
+ if (darg->skb != tls_strp_msg(ctx))
+ consume_skb(darg->skb);
return pad;
}
rxm = strp_msg(darg->skb);
rxm->full_len -= pad;
+
+ return 0;
+}
+
+static int
+tls_decrypt_device(struct sock *sk, struct msghdr *msg,
+ struct tls_context *tls_ctx, struct tls_decrypt_arg *darg)
+{
+ struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx);
+ struct tls_prot_info *prot = &tls_ctx->prot_info;
+ struct strp_msg *rxm;
+ int pad, err;
+
+ if (tls_ctx->rx_conf != TLS_HW)
+ return 0;
+
+ err = tls_device_decrypted(sk, tls_ctx);
+ if (err <= 0)
+ return err;
+
+ pad = tls_padding_length(prot, tls_strp_msg(ctx), darg);
+ if (pad < 0)
+ return pad;
+
+ darg->async = false;
+ darg->skb = tls_strp_msg(ctx);
+ /* ->zc downgrade check, in case TLS 1.3 gets here */
+ darg->zc &= !(prot->version == TLS_1_3_VERSION &&
+ tls_msg(darg->skb)->control != TLS_RECORD_TYPE_DATA);
+
+ rxm = strp_msg(darg->skb);
+ rxm->full_len -= pad;
+
+ if (!darg->zc) {
+ /* Non-ZC case needs a real skb */
+ darg->skb = tls_strp_msg_detach(ctx);
+ if (!darg->skb)
+ return -ENOMEM;
+ } else {
+ unsigned int off, len;
+
+ /* In ZC case nobody cares about the output skb.
+ * Just copy the data here. Note the skb is not fully trimmed.
+ */
+ off = rxm->offset + prot->prepend_size;
+ len = rxm->full_len - prot->overhead_size;
+
+ err = skb_copy_datagram_msg(darg->skb, off, msg, len);
+ if (err)
+ return err;
+ }
+ return 1;
+}
+
+static int tls_rx_one_record(struct sock *sk, struct msghdr *msg,
+ struct tls_decrypt_arg *darg)
+{
+ struct tls_context *tls_ctx = tls_get_ctx(sk);
+ struct tls_prot_info *prot = &tls_ctx->prot_info;
+ struct strp_msg *rxm;
+ int err;
+
+ err = tls_decrypt_device(sk, msg, tls_ctx, darg);
+ if (!err)
+ err = tls_decrypt_sw(sk, tls_ctx, msg, darg);
+ if (err < 0)
+ return err;
+
+ rxm = strp_msg(darg->skb);
rxm->offset += prot->prepend_size;
rxm->full_len -= prot->overhead_size;
tls_advance_record_sn(sk, prot, &tls_ctx->rx);
@@ -1692,9 +1737,7 @@ static int tls_record_content_type(struct msghdr *msg, struct tls_msg *tlm,
static void tls_rx_rec_done(struct tls_sw_context_rx *ctx)
{
- consume_skb(ctx->recv_pkt);
- ctx->recv_pkt = NULL;
- __strp_unpause(&ctx->strp);
+ tls_strp_msg_done(&ctx->strp);
}
/* This function traverses the rx_list in tls receive context to copies the
@@ -1781,7 +1824,7 @@ out:
return copied ? : err;
}
-static void
+static bool
tls_read_flush_backlog(struct sock *sk, struct tls_prot_info *prot,
size_t len_left, size_t decrypted, ssize_t done,
size_t *flushed_at)
@@ -1789,14 +1832,14 @@ tls_read_flush_backlog(struct sock *sk, struct tls_prot_info *prot,
size_t max_rec;
if (len_left <= decrypted)
- return;
+ return false;
max_rec = prot->overhead_size - prot->tail_size + TLS_MAX_PAYLOAD_SIZE;
if (done - *flushed_at < SZ_128K && tcp_inq(sk) > max_rec)
- return;
+ return false;
*flushed_at = done;
- sk_flush_backlog(sk);
+ return sk_flush_backlog(sk);
}
static long tls_rx_reader_lock(struct sock *sk, struct tls_sw_context_rx *ctx,
@@ -1868,13 +1911,13 @@ int tls_sw_recvmsg(struct sock *sk,
size_t flushed_at = 0;
struct strp_msg *rxm;
struct tls_msg *tlm;
- struct sk_buff *skb;
ssize_t copied = 0;
bool async = false;
int target, err = 0;
long timeo;
bool is_kvec = iov_iter_is_kvec(&msg->msg_iter);
bool is_peek = flags & MSG_PEEK;
+ bool released = true;
bool bpf_strp_enabled;
bool zc_capable;
@@ -1907,11 +1950,12 @@ int tls_sw_recvmsg(struct sock *sk,
zc_capable = !bpf_strp_enabled && !is_kvec && !is_peek &&
ctx->zc_capable;
decrypted = 0;
- while (len && (decrypted + copied < target || ctx->recv_pkt)) {
+ while (len && (decrypted + copied < target || tls_strp_msg_ready(ctx))) {
struct tls_decrypt_arg darg;
int to_decrypt, chunk;
- err = tls_rx_rec_wait(sk, psock, flags & MSG_DONTWAIT, timeo);
+ err = tls_rx_rec_wait(sk, psock, flags & MSG_DONTWAIT, released,
+ timeo);
if (err <= 0) {
if (psock) {
chunk = sk_msg_recvmsg(sk, psock, msg, len,
@@ -1927,8 +1971,8 @@ int tls_sw_recvmsg(struct sock *sk,
memset(&darg.inargs, 0, sizeof(darg.inargs));
- rxm = strp_msg(ctx->recv_pkt);
- tlm = tls_msg(ctx->recv_pkt);
+ rxm = strp_msg(tls_strp_msg(ctx));
+ tlm = tls_msg(tls_strp_msg(ctx));
to_decrypt = rxm->full_len - prot->overhead_size;
@@ -1942,16 +1986,12 @@ int tls_sw_recvmsg(struct sock *sk,
else
darg.async = false;
- err = tls_rx_one_record(sk, &msg->msg_iter, &darg);
+ err = tls_rx_one_record(sk, msg, &darg);
if (err < 0) {
tls_err_abort(sk, -EBADMSG);
goto recv_end;
}
- skb = darg.skb;
- rxm = strp_msg(skb);
- tlm = tls_msg(skb);
-
async |= darg.async;
/* If the type of records being processed is not known yet,
@@ -1961,24 +2001,30 @@ int tls_sw_recvmsg(struct sock *sk,
* is known just after record is dequeued from stream parser.
* For tls1.3, we disable async.
*/
- err = tls_record_content_type(msg, tlm, &control);
+ err = tls_record_content_type(msg, tls_msg(darg.skb), &control);
if (err <= 0) {
+ DEBUG_NET_WARN_ON_ONCE(darg.zc);
tls_rx_rec_done(ctx);
put_on_rx_list_err:
- __skb_queue_tail(&ctx->rx_list, skb);
+ __skb_queue_tail(&ctx->rx_list, darg.skb);
goto recv_end;
}
/* periodically flush backlog, and feed strparser */
- tls_read_flush_backlog(sk, prot, len, to_decrypt,
- decrypted + copied, &flushed_at);
+ released = tls_read_flush_backlog(sk, prot, len, to_decrypt,
+ decrypted + copied,
+ &flushed_at);
/* TLS 1.3 may have updated the length by more than overhead */
+ rxm = strp_msg(darg.skb);
chunk = rxm->full_len;
tls_rx_rec_done(ctx);
if (!darg.zc) {
bool partially_consumed = chunk > len;
+ struct sk_buff *skb = darg.skb;
+
+ DEBUG_NET_WARN_ON_ONCE(darg.skb == tls_strp_msg(ctx));
if (async) {
/* TLS 1.2-only, to_decrypt must be text len */
@@ -1992,6 +2038,7 @@ put_on_rx_list:
}
if (bpf_strp_enabled) {
+ released = true;
err = sk_psock_tls_strp_read(psock, skb);
if (err != __SK_PASS) {
rxm->offset = rxm->offset + rxm->full_len;
@@ -2018,13 +2065,13 @@ put_on_rx_list:
rxm->full_len -= chunk;
goto put_on_rx_list;
}
+
+ consume_skb(skb);
}
decrypted += chunk;
len -= chunk;
- consume_skb(skb);
-
/* Return full control message to userspace before trying
* to parse another message type
*/
@@ -2098,7 +2145,7 @@ ssize_t tls_sw_splice_read(struct socket *sock, loff_t *ppos,
struct tls_decrypt_arg darg;
err = tls_rx_rec_wait(sk, NULL, flags & SPLICE_F_NONBLOCK,
- timeo);
+ true, timeo);
if (err <= 0)
goto splice_read_end;
@@ -2158,23 +2205,21 @@ bool tls_sw_sock_is_readable(struct sock *sk)
ingress_empty = list_empty(&psock->ingress_msg);
rcu_read_unlock();
- return !ingress_empty || ctx->recv_pkt ||
+ return !ingress_empty || tls_strp_msg_ready(ctx) ||
!skb_queue_empty(&ctx->rx_list);
}
-static int tls_read_size(struct strparser *strp, struct sk_buff *skb)
+int tls_rx_msg_size(struct tls_strparser *strp, struct sk_buff *skb)
{
struct tls_context *tls_ctx = tls_get_ctx(strp->sk);
struct tls_prot_info *prot = &tls_ctx->prot_info;
char header[TLS_HEADER_SIZE + MAX_IV_SIZE];
- struct strp_msg *rxm = strp_msg(skb);
- struct tls_msg *tlm = tls_msg(skb);
size_t cipher_overhead;
size_t data_len = 0;
int ret;
/* Verify that we have a full TLS header, or wait for more data */
- if (rxm->offset + prot->prepend_size > skb->len)
+ if (strp->stm.offset + prot->prepend_size > skb->len)
return 0;
/* Sanity-check size of on-stack buffer. */
@@ -2184,11 +2229,11 @@ static int tls_read_size(struct strparser *strp, struct sk_buff *skb)
}
/* Linearize header to local buffer */
- ret = skb_copy_bits(skb, rxm->offset, header, prot->prepend_size);
+ ret = skb_copy_bits(skb, strp->stm.offset, header, prot->prepend_size);
if (ret < 0)
goto read_failure;
- tlm->control = header[0];
+ strp->mark = header[0];
data_len = ((header[4] & 0xFF) | (header[3] << 8));
@@ -2215,7 +2260,7 @@ static int tls_read_size(struct strparser *strp, struct sk_buff *skb)
}
tls_device_rx_resync_new_rec(strp->sk, data_len + TLS_HEADER_SIZE,
- TCP_SKB_CB(skb)->seq + rxm->offset);
+ TCP_SKB_CB(skb)->seq + strp->stm.offset);
return data_len + TLS_HEADER_SIZE;
read_failure:
@@ -2224,14 +2269,11 @@ read_failure:
return ret;
}
-static void tls_queue(struct strparser *strp, struct sk_buff *skb)
+void tls_rx_msg_ready(struct tls_strparser *strp)
{
- struct tls_context *tls_ctx = tls_get_ctx(strp->sk);
- struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx);
-
- ctx->recv_pkt = skb;
- strp_pause(strp);
+ struct tls_sw_context_rx *ctx;
+ ctx = container_of(strp, struct tls_sw_context_rx, strp);
ctx->saved_data_ready(strp->sk);
}
@@ -2241,7 +2283,7 @@ static void tls_data_ready(struct sock *sk)
struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx);
struct sk_psock *psock;
- strp_data_ready(&ctx->strp);
+ tls_strp_data_ready(&ctx->strp);
psock = sk_psock_get(sk);
if (psock) {
@@ -2317,13 +2359,11 @@ void tls_sw_release_resources_rx(struct sock *sk)
kfree(tls_ctx->rx.iv);
if (ctx->aead_recv) {
- kfree_skb(ctx->recv_pkt);
- ctx->recv_pkt = NULL;
__skb_queue_purge(&ctx->rx_list);
crypto_free_aead(ctx->aead_recv);
- strp_stop(&ctx->strp);
+ tls_strp_stop(&ctx->strp);
/* If tls_sw_strparser_arm() was not called (cleanup paths)
- * we still want to strp_stop(), but sk->sk_data_ready was
+ * we still want to tls_strp_stop(), but sk->sk_data_ready was
* never swapped.
*/
if (ctx->saved_data_ready) {
@@ -2338,7 +2378,7 @@ void tls_sw_strparser_done(struct tls_context *tls_ctx)
{
struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx);
- strp_done(&ctx->strp);
+ tls_strp_done(&ctx->strp);
}
void tls_sw_free_ctx_rx(struct tls_context *tls_ctx)
@@ -2411,8 +2451,6 @@ void tls_sw_strparser_arm(struct sock *sk, struct tls_context *tls_ctx)
rx_ctx->saved_data_ready = sk->sk_data_ready;
sk->sk_data_ready = tls_data_ready;
write_unlock_bh(&sk->sk_callback_lock);
-
- strp_check_rcv(&rx_ctx->strp);
}
void tls_update_rx_zc_capable(struct tls_context *tls_ctx)
@@ -2432,7 +2470,6 @@ int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx, int tx)
struct tls_sw_context_rx *sw_ctx_rx = NULL;
struct cipher_context *cctx;
struct crypto_aead **aead;
- struct strp_callbacks cb;
u16 nonce_size, tag_size, iv_size, rec_seq_size, salt_size;
struct crypto_tfm *tfm;
char *iv, *rec_seq, *key, *salt, *cipher_name;
@@ -2666,12 +2703,7 @@ int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx, int tx)
crypto_info->version != TLS_1_3_VERSION &&
!!(tfm->__crt_alg->cra_flags & CRYPTO_ALG_ASYNC);
- /* Set up strparser */
- memset(&cb, 0, sizeof(cb));
- cb.rcv_msg = tls_queue;
- cb.parse_msg = tls_read_size;
-
- strp_init(&sw_ctx_rx->strp, sk, &cb);
+ tls_strp_init(&sw_ctx_rx->strp, sk);
}
goto out;