From a8ae890b9c5f0d812337d97f2552c0d772957dc2 Mon Sep 17 00:00:00 2001 From: Ursula Braun Date: Thu, 7 Dec 2017 13:38:44 +0100 Subject: smc: make smc_close_active_abort() static smc_close_active_abort() is used in smc_close.c only. Make it static. Signed-off-by: Ursula Braun Signed-off-by: David S. Miller --- net/smc/smc_close.c | 2 +- net/smc/smc_close.h | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/net/smc/smc_close.c b/net/smc/smc_close.c index 48615d2ac4aa..e194c6cc308a 100644 --- a/net/smc/smc_close.c +++ b/net/smc/smc_close.c @@ -113,7 +113,7 @@ static int smc_close_abort(struct smc_connection *conn) /* terminate smc socket abnormally - active abort * RDMA communication no longer possible */ -void smc_close_active_abort(struct smc_sock *smc) +static void smc_close_active_abort(struct smc_sock *smc) { struct smc_cdc_conn_state_flags *txflags = &smc->conn.local_tx_ctrl.conn_state_flags; diff --git a/net/smc/smc_close.h b/net/smc/smc_close.h index ed82506b1b0a..8c498885d758 100644 --- a/net/smc/smc_close.h +++ b/net/smc/smc_close.h @@ -20,7 +20,6 @@ #define SMC_CLOSE_SOCK_PUT_DELAY HZ void smc_close_wake_tx_prepared(struct smc_sock *smc); -void smc_close_active_abort(struct smc_sock *smc); int smc_close_active(struct smc_sock *smc); void smc_close_sock_put_work(struct work_struct *work); int smc_close_shutdown_write(struct smc_sock *smc); -- cgit v1.2.3-73-gaa49b From 0c9f1515aa80f12734123e5fcc50ffe525e1d533 Mon Sep 17 00:00:00 2001 From: Ursula Braun Date: Thu, 7 Dec 2017 13:38:45 +0100 Subject: smc: improve smc_clc_send_decline() error handling Let smc_clc_send_decline() return with an error, if the amount sent is smaller than the length of an smc decline message. Signed-off-by: Ursula Braun Signed-off-by: David S. Miller --- net/smc/af_smc.c | 9 +++------ net/smc/smc_clc.c | 2 +- 2 files changed, 4 insertions(+), 7 deletions(-) diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index 6451c5013e06..d3ae0d5b1677 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -520,7 +520,7 @@ decline_rdma: smc->use_fallback = true; if (reason_code && (reason_code != SMC_CLC_DECL_REPLY)) { rc = smc_clc_send_decline(smc, reason_code); - if (rc < sizeof(struct smc_clc_msg_decline)) + if (rc < 0) goto out_err; } goto out_connected; @@ -879,11 +879,9 @@ static void smc_listen_work(struct work_struct *work) } /* QP confirmation over RoCE fabric */ reason_code = smc_serv_conf_first_link(new_smc); - if (reason_code < 0) { + if (reason_code < 0) /* peer is not aware of a problem */ - rc = reason_code; goto out_err_unlock; - } if (reason_code > 0) goto decline_rdma_unlock; } @@ -916,8 +914,7 @@ decline_rdma: smc_conn_free(&new_smc->conn); new_smc->use_fallback = true; if (reason_code && (reason_code != SMC_CLC_DECL_REPLY)) { - rc = smc_clc_send_decline(new_smc, reason_code); - if (rc < sizeof(struct smc_clc_msg_decline)) + if (smc_clc_send_decline(new_smc, reason_code) < 0) goto out_err; } goto out_connected; diff --git a/net/smc/smc_clc.c b/net/smc/smc_clc.c index 1800e16b2a02..f5e17d29112b 100644 --- a/net/smc/smc_clc.c +++ b/net/smc/smc_clc.c @@ -133,7 +133,7 @@ int smc_clc_send_decline(struct smc_sock *smc, u32 peer_diag_info) smc->sk.sk_err = EPROTO; if (len < 0) smc->sk.sk_err = -len; - return len; + return sock_error(&smc->sk); } /* send CLC PROPOSAL message across internal TCP socket */ -- cgit v1.2.3-73-gaa49b From 4bd3e7fbfadcd284b1582a284d076afebbe3479d Mon Sep 17 00:00:00 2001 From: Ursula Braun Date: Thu, 7 Dec 2017 13:38:46 +0100 Subject: smc: no update for unused sk_write_pending The smc code never checks the sk_write_pending sock field. Thus there is no need to update it. Signed-off-by: Ursula Braun Signed-off-by: David S. Miller --- net/smc/smc_tx.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/net/smc/smc_tx.c b/net/smc/smc_tx.c index c48dc2d5fd3a..77555c6ed199 100644 --- a/net/smc/smc_tx.c +++ b/net/smc/smc_tx.c @@ -104,14 +104,12 @@ static int smc_tx_wait_memory(struct smc_sock *smc, int flags) if (atomic_read(&conn->sndbuf_space)) break; /* at least 1 byte of free space available */ set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); - sk->sk_write_pending++; sk_wait_event(sk, &timeo, sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN) || smc_cdc_rxed_any_close_or_senddone(conn) || atomic_read(&conn->sndbuf_space), &wait); - sk->sk_write_pending--; } remove_wait_queue(sk_sleep(sk), &wait); return rc; -- cgit v1.2.3-73-gaa49b From 71c125c3f23d714c1d0725ca11c9f27416f697c8 Mon Sep 17 00:00:00 2001 From: Ursula Braun Date: Thu, 7 Dec 2017 13:38:47 +0100 Subject: smc: cleanup close checking during data receival When waiting for data to be received it must be checked if the peer signals shutdown. The SMC code uses two different checks for this purpose, even though just one check is sufficient. This patch removes the superfluous test for SOCK_DONE. Signed-off-by: Ursula Braun Signed-off-by: David S. Miller --- net/smc/smc_rx.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/net/smc/smc_rx.c b/net/smc/smc_rx.c index cbf58637ee14..9dc392ca06bf 100644 --- a/net/smc/smc_rx.c +++ b/net/smc/smc_rx.c @@ -65,7 +65,6 @@ static int smc_rx_wait_data(struct smc_sock *smc, long *timeo) rc = sk_wait_event(sk, timeo, sk->sk_err || sk->sk_shutdown & RCV_SHUTDOWN || - sock_flag(sk, SOCK_DONE) || atomic_read(&conn->bytes_to_rcv) || smc_cdc_rxed_any_close_or_senddone(conn), &wait); @@ -116,7 +115,7 @@ int smc_rx_recvmsg(struct smc_sock *smc, struct msghdr *msg, size_t len, if (read_done) { if (sk->sk_err || sk->sk_state == SMC_CLOSED || - (sk->sk_shutdown & RCV_SHUTDOWN) || + sk->sk_shutdown & RCV_SHUTDOWN || !timeo || signal_pending(current) || smc_cdc_rxed_any_close_or_senddone(conn) || @@ -124,8 +123,6 @@ int smc_rx_recvmsg(struct smc_sock *smc, struct msghdr *msg, size_t len, peer_conn_abort) break; } else { - if (sock_flag(sk, SOCK_DONE)) - break; if (sk->sk_err) { read_done = sock_error(sk); break; -- cgit v1.2.3-73-gaa49b From 6b5771aa3c351b118b1eee7bc98e1483eb0c8ca8 Mon Sep 17 00:00:00 2001 From: Ursula Braun Date: Thu, 7 Dec 2017 13:38:48 +0100 Subject: smc: no consumer update in tasklet context The SMC protocol requires to send a separate consumer cursor update, if it cannot be piggybacked to updates of the producer cursor. When receiving a blocked signal from the sender, this update is sent already in tasklet context. In addition consumer cursor updates are sent after data receival. Sending of cursor updates is controlled by sequence numbers. Assuming receiving stray messages the receiver drops updates with older sequence numbers than an already received cursor update with a higher sequence number. Sending consumer cursor updates in tasklet context may result in wrong order sends and its corresponding drops at the receiver. Since it is sufficient to send consumer cursor updates once the data is received, this patch gets rid of the consumer cursor update in tasklet context to guarantee in-sequence arrival of cursor updates. Signed-off-by: Ursula Braun Signed-off-by: David S. Miller --- net/smc/smc_cdc.c | 12 +++--------- net/smc/smc_tx.c | 9 ++------- 2 files changed, 5 insertions(+), 16 deletions(-) diff --git a/net/smc/smc_cdc.c b/net/smc/smc_cdc.c index 87f7bede6eab..d4155ff6acde 100644 --- a/net/smc/smc_cdc.c +++ b/net/smc/smc_cdc.c @@ -213,6 +213,9 @@ static void smc_cdc_msg_recv_action(struct smc_sock *smc, /* guarantee 0 <= bytes_to_rcv <= rmbe_size */ smp_mb__after_atomic(); smc->sk.sk_data_ready(&smc->sk); + } else if ((conn->local_rx_ctrl.prod_flags.write_blocked) || + (conn->local_rx_ctrl.prod_flags.cons_curs_upd_req)) { + smc->sk.sk_data_ready(&smc->sk); } if (conn->local_rx_ctrl.conn_state_flags.peer_conn_abort) { @@ -234,15 +237,6 @@ static void smc_cdc_msg_recv_action(struct smc_sock *smc, /* trigger socket release if connection closed */ smc_close_wake_tx_prepared(smc); } - - /* socket connected but not accepted */ - if (!smc->sk.sk_socket) - return; - - /* data available */ - if ((conn->local_rx_ctrl.prod_flags.write_blocked) || - (conn->local_rx_ctrl.prod_flags.cons_curs_upd_req)) - smc_tx_consumer_update(conn); } /* called under tasklet context */ diff --git a/net/smc/smc_tx.c b/net/smc/smc_tx.c index 77555c6ed199..2e50fddf8ce9 100644 --- a/net/smc/smc_tx.c +++ b/net/smc/smc_tx.c @@ -448,9 +448,7 @@ static void smc_tx_work(struct work_struct *work) void smc_tx_consumer_update(struct smc_connection *conn) { union smc_host_cursor cfed, cons; - struct smc_cdc_tx_pend *pend; - struct smc_wr_buf *wr_buf; - int to_confirm, rc; + int to_confirm; smc_curs_write(&cons, smc_curs_read(&conn->local_tx_ctrl.cons, conn), @@ -464,10 +462,7 @@ void smc_tx_consumer_update(struct smc_connection *conn) ((to_confirm > conn->rmbe_update_limit) && ((to_confirm > (conn->rmbe_size / 2)) || conn->local_rx_ctrl.prod_flags.write_blocked))) { - rc = smc_cdc_get_free_slot(conn, &wr_buf, &pend); - if (!rc) - rc = smc_cdc_msg_send(conn, wr_buf, pend); - if (rc < 0) { + if (smc_cdc_get_slot_and_msg_send(conn) < 0) { schedule_delayed_work(&conn->tx_work, SMC_TX_WORK_DELAY); return; -- cgit v1.2.3-73-gaa49b From e7b7a64a8493d47433fd003efbe6543e3f676294 Mon Sep 17 00:00:00 2001 From: Ursula Braun Date: Thu, 7 Dec 2017 13:38:49 +0100 Subject: smc: support variable CLC proposal messages According to RFC7609 [1] the CLC proposal message contains an area of unknown length for future growth. Additionally it may contain up to 8 IPv6 prefixes. The current version of the SMC-code does not understand CLC proposal messages using these variable length fields and, thus, is incompatible with SMC implementations in other operating systems. This patch makes sure, SMC understands incoming CLC proposals * with arbitrary length values for future growth * with up to 8 IPv6 prefixes [1] SMC-R Informational RFC: http://www.rfc-editor.org/info/rfc7609 Signed-off-by: Ursula Braun Reviewed-by: Hans Wippel Signed-off-by: David S. Miller --- net/smc/af_smc.c | 15 ++++++---- net/smc/smc_clc.c | 82 ++++++++++++++++++++++++++++++++++++++++++++++--------- net/smc/smc_clc.h | 34 +++++++++++++++++++---- 3 files changed, 107 insertions(+), 24 deletions(-) diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index d3ae0d5b1677..daf8075f5a4c 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -751,14 +751,16 @@ static void smc_listen_work(struct work_struct *work) { struct smc_sock *new_smc = container_of(work, struct smc_sock, smc_listen_work); + struct smc_clc_msg_proposal_prefix *pclc_prfx; struct socket *newclcsock = new_smc->clcsock; struct smc_sock *lsmc = new_smc->listen_smc; struct smc_clc_msg_accept_confirm cclc; int local_contact = SMC_REUSE_CONTACT; struct sock *newsmcsk = &new_smc->sk; - struct smc_clc_msg_proposal pclc; + struct smc_clc_msg_proposal *pclc; struct smc_ib_device *smcibdev; struct sockaddr_in peeraddr; + u8 buf[SMC_CLC_MAX_LEN]; struct smc_link *link; int reason_code = 0; int rc = 0, len; @@ -775,7 +777,7 @@ static void smc_listen_work(struct work_struct *work) /* do inband token exchange - *wait for and receive SMC Proposal CLC message */ - reason_code = smc_clc_wait_msg(new_smc, &pclc, sizeof(pclc), + reason_code = smc_clc_wait_msg(new_smc, &buf, sizeof(buf), SMC_CLC_PROPOSAL); if (reason_code < 0) goto out_err; @@ -804,8 +806,11 @@ static void smc_listen_work(struct work_struct *work) reason_code = SMC_CLC_DECL_CNFERR; /* configuration error */ goto decline_rdma; } - if ((pclc.outgoing_subnet != subnet) || - (pclc.prefix_len != prefix_len)) { + + pclc = (struct smc_clc_msg_proposal *)&buf; + pclc_prfx = smc_clc_proposal_get_prefix(pclc); + if (pclc_prfx->outgoing_subnet != subnet || + pclc_prfx->prefix_len != prefix_len) { reason_code = SMC_CLC_DECL_CNFERR; /* configuration error */ goto decline_rdma; } @@ -816,7 +821,7 @@ static void smc_listen_work(struct work_struct *work) /* allocate connection / link group */ mutex_lock(&smc_create_lgr_pending); local_contact = smc_conn_create(new_smc, peeraddr.sin_addr.s_addr, - smcibdev, ibport, &pclc.lcl, 0); + smcibdev, ibport, &pclc->lcl, 0); if (local_contact < 0) { rc = local_contact; if (rc == -ENOMEM) diff --git a/net/smc/smc_clc.c b/net/smc/smc_clc.c index f5e17d29112b..abf7ceb6690b 100644 --- a/net/smc/smc_clc.c +++ b/net/smc/smc_clc.c @@ -22,6 +22,54 @@ #include "smc_clc.h" #include "smc_ib.h" +/* check if received message has a correct header length and contains valid + * heading and trailing eyecatchers + */ +static bool smc_clc_msg_hdr_valid(struct smc_clc_msg_hdr *clcm) +{ + struct smc_clc_msg_proposal_prefix *pclc_prfx; + struct smc_clc_msg_accept_confirm *clc; + struct smc_clc_msg_proposal *pclc; + struct smc_clc_msg_decline *dclc; + struct smc_clc_msg_trail *trl; + + if (memcmp(clcm->eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER))) + return false; + switch (clcm->type) { + case SMC_CLC_PROPOSAL: + pclc = (struct smc_clc_msg_proposal *)clcm; + pclc_prfx = smc_clc_proposal_get_prefix(pclc); + if (ntohs(pclc->hdr.length) != + sizeof(*pclc) + ntohs(pclc->iparea_offset) + + sizeof(*pclc_prfx) + + pclc_prfx->ipv6_prefixes_cnt * + sizeof(struct smc_clc_ipv6_prefix) + + sizeof(*trl)) + return false; + trl = (struct smc_clc_msg_trail *) + ((u8 *)pclc + ntohs(pclc->hdr.length) - sizeof(*trl)); + break; + case SMC_CLC_ACCEPT: + case SMC_CLC_CONFIRM: + clc = (struct smc_clc_msg_accept_confirm *)clcm; + if (ntohs(clc->hdr.length) != sizeof(*clc)) + return false; + trl = &clc->trl; + break; + case SMC_CLC_DECLINE: + dclc = (struct smc_clc_msg_decline *)clcm; + if (ntohs(dclc->hdr.length) != sizeof(*dclc)) + return false; + trl = &dclc->trl; + break; + default: + return false; + } + if (memcmp(trl->eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER))) + return false; + return true; +} + /* Wait for data on the tcp-socket, analyze received data * Returns: * 0 if success and it was not a decline that we received. @@ -72,9 +120,7 @@ int smc_clc_wait_msg(struct smc_sock *smc, void *buf, int buflen, } datlen = ntohs(clcm->length); if ((len < sizeof(struct smc_clc_msg_hdr)) || - (datlen < sizeof(struct smc_clc_msg_decline)) || - (datlen > sizeof(struct smc_clc_msg_accept_confirm)) || - memcmp(clcm->eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER)) || + (datlen > buflen) || ((clcm->type != SMC_CLC_DECLINE) && (clcm->type != expected_type))) { smc->sk.sk_err = EPROTO; @@ -89,7 +135,7 @@ int smc_clc_wait_msg(struct smc_sock *smc, void *buf, int buflen, krflags = MSG_WAITALL; smc->clcsock->sk->sk_rcvtimeo = CLC_WAIT_TIME; len = kernel_recvmsg(smc->clcsock, &msg, &vec, 1, datlen, krflags); - if (len < datlen) { + if (len < datlen || !smc_clc_msg_hdr_valid(clcm)) { smc->sk.sk_err = EPROTO; reason_code = -EPROTO; goto out; @@ -141,33 +187,43 @@ int smc_clc_send_proposal(struct smc_sock *smc, struct smc_ib_device *smcibdev, u8 ibport) { + struct smc_clc_msg_proposal_prefix pclc_prfx; struct smc_clc_msg_proposal pclc; + struct smc_clc_msg_trail trl; int reason_code = 0; + struct kvec vec[3]; struct msghdr msg; - struct kvec vec; - int len, rc; + int len, plen, rc; /* send SMC Proposal CLC message */ + plen = sizeof(pclc) + sizeof(pclc_prfx) + sizeof(trl); memset(&pclc, 0, sizeof(pclc)); memcpy(pclc.hdr.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER)); pclc.hdr.type = SMC_CLC_PROPOSAL; - pclc.hdr.length = htons(sizeof(pclc)); + pclc.hdr.length = htons(plen); pclc.hdr.version = SMC_CLC_V1; /* SMC version */ memcpy(pclc.lcl.id_for_peer, local_systemid, sizeof(local_systemid)); memcpy(&pclc.lcl.gid, &smcibdev->gid[ibport - 1], SMC_GID_SIZE); memcpy(&pclc.lcl.mac, &smcibdev->mac[ibport - 1], ETH_ALEN); + pclc.iparea_offset = htons(0); + memset(&pclc_prfx, 0, sizeof(pclc_prfx)); /* determine subnet and mask from internal TCP socket */ - rc = smc_netinfo_by_tcpsk(smc->clcsock, &pclc.outgoing_subnet, - &pclc.prefix_len); + rc = smc_netinfo_by_tcpsk(smc->clcsock, &pclc_prfx.outgoing_subnet, + &pclc_prfx.prefix_len); if (rc) return SMC_CLC_DECL_CNFERR; /* configuration error */ - memcpy(pclc.trl.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER)); + pclc_prfx.ipv6_prefixes_cnt = 0; + memcpy(trl.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER)); memset(&msg, 0, sizeof(msg)); - vec.iov_base = &pclc; - vec.iov_len = sizeof(pclc); + vec[0].iov_base = &pclc; + vec[0].iov_len = sizeof(pclc); + vec[1].iov_base = &pclc_prfx; + vec[1].iov_len = sizeof(pclc_prfx); + vec[2].iov_base = &trl; + vec[2].iov_len = sizeof(trl); /* due to the few bytes needed for clc-handshake this cannot block */ - len = kernel_sendmsg(smc->clcsock, &msg, &vec, 1, sizeof(pclc)); + len = kernel_sendmsg(smc->clcsock, &msg, vec, 3, plen); if (len < sizeof(pclc)) { if (len >= 0) { reason_code = -ENETUNREACH; diff --git a/net/smc/smc_clc.h b/net/smc/smc_clc.h index 12a9af1539a2..c145a0f36a68 100644 --- a/net/smc/smc_clc.h +++ b/net/smc/smc_clc.h @@ -44,7 +44,7 @@ struct smc_clc_msg_hdr { /* header1 of clc messages */ #if defined(__BIG_ENDIAN_BITFIELD) u8 version : 4, flag : 1, - rsvd : 3; + rsvd : 3; #elif defined(__LITTLE_ENDIAN_BITFIELD) u8 rsvd : 3, flag : 1, @@ -62,17 +62,31 @@ struct smc_clc_msg_local { /* header2 of clc messages */ u8 mac[6]; /* mac of ib_device port */ }; -struct smc_clc_msg_proposal { /* clc proposal message */ - struct smc_clc_msg_hdr hdr; - struct smc_clc_msg_local lcl; - __be16 iparea_offset; /* offset to IP address information area */ +struct smc_clc_ipv6_prefix { + u8 prefix[4]; + u8 prefix_len; +} __packed; + +struct smc_clc_msg_proposal_prefix { /* prefix part of clc proposal message*/ __be32 outgoing_subnet; /* subnet mask */ u8 prefix_len; /* number of significant bits in mask */ u8 reserved[2]; u8 ipv6_prefixes_cnt; /* number of IPv6 prefixes in prefix array */ - struct smc_clc_msg_trail trl; /* eye catcher "SMCR" EBCDIC */ } __aligned(4); +struct smc_clc_msg_proposal { /* clc proposal message sent by Linux */ + struct smc_clc_msg_hdr hdr; + struct smc_clc_msg_local lcl; + __be16 iparea_offset; /* offset to IP address information area */ +} __aligned(4); + +#define SMC_CLC_PROPOSAL_MAX_OFFSET 0x28 +#define SMC_CLC_PROPOSAL_MAX_PREFIX (8 * sizeof(struct smc_clc_ipv6_prefix)) +#define SMC_CLC_MAX_LEN (sizeof(struct smc_clc_msg_proposal) + \ + SMC_CLC_PROPOSAL_MAX_OFFSET + \ + SMC_CLC_PROPOSAL_MAX_PREFIX + \ + sizeof(struct smc_clc_msg_trail)) + struct smc_clc_msg_accept_confirm { /* clc accept / confirm message */ struct smc_clc_msg_hdr hdr; struct smc_clc_msg_local lcl; @@ -102,6 +116,14 @@ struct smc_clc_msg_decline { /* clc decline message */ struct smc_clc_msg_trail trl; /* eye catcher "SMCR" EBCDIC */ } __aligned(4); +/* determine start of the prefix area within the proposal message */ +static inline struct smc_clc_msg_proposal_prefix * +smc_clc_proposal_get_prefix(struct smc_clc_msg_proposal *pclc) +{ + return (struct smc_clc_msg_proposal_prefix *) + ((u8 *)pclc + sizeof(*pclc) + ntohs(pclc->iparea_offset)); +} + struct smc_sock; struct smc_ib_device; -- cgit v1.2.3-73-gaa49b