diff options
author | David S. Miller <davem@davemloft.net> | 2017-12-07 15:03:13 -0500 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2017-12-07 15:03:13 -0500 |
commit | 66c5c5b5668281a898c7882d64ee22694fdd1d88 (patch) | |
tree | af5a7ade751925773bf6394584969014662dc8ac | |
parent | 2a93c1a3651fb41b580676c849887b68af6da02b (diff) | |
parent | e7b7a64a8493d47433fd003efbe6543e3f676294 (diff) |
Merge branch 'smc-fixes-next'
Ursula Braun says:
====================
smc: fixes 2017-12-07
here are some smc-patches. The initial 4 patches are cleanups.
Patch 5 gets rid of ib_post_sends in tasklet context to avoid peer drops due
to out-of-order receivals.
Patch 6 makes sure, the Linux SMC code understands variable sized CLC proposal
messages built according to RFC7609.
====================
Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r-- | net/smc/af_smc.c | 24 | ||||
-rw-r--r-- | net/smc/smc_cdc.c | 12 | ||||
-rw-r--r-- | net/smc/smc_clc.c | 84 | ||||
-rw-r--r-- | net/smc/smc_clc.h | 34 | ||||
-rw-r--r-- | net/smc/smc_close.c | 2 | ||||
-rw-r--r-- | net/smc/smc_close.h | 1 | ||||
-rw-r--r-- | net/smc/smc_rx.c | 5 | ||||
-rw-r--r-- | net/smc/smc_tx.c | 11 |
8 files changed, 118 insertions, 55 deletions
diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index 6451c5013e06..daf8075f5a4c 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -520,7 +520,7 @@ decline_rdma: smc->use_fallback = true; if (reason_code && (reason_code != SMC_CLC_DECL_REPLY)) { rc = smc_clc_send_decline(smc, reason_code); - if (rc < sizeof(struct smc_clc_msg_decline)) + if (rc < 0) goto out_err; } goto out_connected; @@ -751,14 +751,16 @@ static void smc_listen_work(struct work_struct *work) { struct smc_sock *new_smc = container_of(work, struct smc_sock, smc_listen_work); + struct smc_clc_msg_proposal_prefix *pclc_prfx; struct socket *newclcsock = new_smc->clcsock; struct smc_sock *lsmc = new_smc->listen_smc; struct smc_clc_msg_accept_confirm cclc; int local_contact = SMC_REUSE_CONTACT; struct sock *newsmcsk = &new_smc->sk; - struct smc_clc_msg_proposal pclc; + struct smc_clc_msg_proposal *pclc; struct smc_ib_device *smcibdev; struct sockaddr_in peeraddr; + u8 buf[SMC_CLC_MAX_LEN]; struct smc_link *link; int reason_code = 0; int rc = 0, len; @@ -775,7 +777,7 @@ static void smc_listen_work(struct work_struct *work) /* do inband token exchange - *wait for and receive SMC Proposal CLC message */ - reason_code = smc_clc_wait_msg(new_smc, &pclc, sizeof(pclc), + reason_code = smc_clc_wait_msg(new_smc, &buf, sizeof(buf), SMC_CLC_PROPOSAL); if (reason_code < 0) goto out_err; @@ -804,8 +806,11 @@ static void smc_listen_work(struct work_struct *work) reason_code = SMC_CLC_DECL_CNFERR; /* configuration error */ goto decline_rdma; } - if ((pclc.outgoing_subnet != subnet) || - (pclc.prefix_len != prefix_len)) { + + pclc = (struct smc_clc_msg_proposal *)&buf; + pclc_prfx = smc_clc_proposal_get_prefix(pclc); + if (pclc_prfx->outgoing_subnet != subnet || + pclc_prfx->prefix_len != prefix_len) { reason_code = SMC_CLC_DECL_CNFERR; /* configuration error */ goto decline_rdma; } @@ -816,7 +821,7 @@ static void smc_listen_work(struct work_struct *work) /* allocate connection / link group */ mutex_lock(&smc_create_lgr_pending); local_contact = smc_conn_create(new_smc, peeraddr.sin_addr.s_addr, - smcibdev, ibport, &pclc.lcl, 0); + smcibdev, ibport, &pclc->lcl, 0); if (local_contact < 0) { rc = local_contact; if (rc == -ENOMEM) @@ -879,11 +884,9 @@ static void smc_listen_work(struct work_struct *work) } /* QP confirmation over RoCE fabric */ reason_code = smc_serv_conf_first_link(new_smc); - if (reason_code < 0) { + if (reason_code < 0) /* peer is not aware of a problem */ - rc = reason_code; goto out_err_unlock; - } if (reason_code > 0) goto decline_rdma_unlock; } @@ -916,8 +919,7 @@ decline_rdma: smc_conn_free(&new_smc->conn); new_smc->use_fallback = true; if (reason_code && (reason_code != SMC_CLC_DECL_REPLY)) { - rc = smc_clc_send_decline(new_smc, reason_code); - if (rc < sizeof(struct smc_clc_msg_decline)) + if (smc_clc_send_decline(new_smc, reason_code) < 0) goto out_err; } goto out_connected; diff --git a/net/smc/smc_cdc.c b/net/smc/smc_cdc.c index 87f7bede6eab..d4155ff6acde 100644 --- a/net/smc/smc_cdc.c +++ b/net/smc/smc_cdc.c @@ -213,6 +213,9 @@ static void smc_cdc_msg_recv_action(struct smc_sock *smc, /* guarantee 0 <= bytes_to_rcv <= rmbe_size */ smp_mb__after_atomic(); smc->sk.sk_data_ready(&smc->sk); + } else if ((conn->local_rx_ctrl.prod_flags.write_blocked) || + (conn->local_rx_ctrl.prod_flags.cons_curs_upd_req)) { + smc->sk.sk_data_ready(&smc->sk); } if (conn->local_rx_ctrl.conn_state_flags.peer_conn_abort) { @@ -234,15 +237,6 @@ static void smc_cdc_msg_recv_action(struct smc_sock *smc, /* trigger socket release if connection closed */ smc_close_wake_tx_prepared(smc); } - - /* socket connected but not accepted */ - if (!smc->sk.sk_socket) - return; - - /* data available */ - if ((conn->local_rx_ctrl.prod_flags.write_blocked) || - (conn->local_rx_ctrl.prod_flags.cons_curs_upd_req)) - smc_tx_consumer_update(conn); } /* called under tasklet context */ diff --git a/net/smc/smc_clc.c b/net/smc/smc_clc.c index 1800e16b2a02..abf7ceb6690b 100644 --- a/net/smc/smc_clc.c +++ b/net/smc/smc_clc.c @@ -22,6 +22,54 @@ #include "smc_clc.h" #include "smc_ib.h" +/* check if received message has a correct header length and contains valid + * heading and trailing eyecatchers + */ +static bool smc_clc_msg_hdr_valid(struct smc_clc_msg_hdr *clcm) +{ + struct smc_clc_msg_proposal_prefix *pclc_prfx; + struct smc_clc_msg_accept_confirm *clc; + struct smc_clc_msg_proposal *pclc; + struct smc_clc_msg_decline *dclc; + struct smc_clc_msg_trail *trl; + + if (memcmp(clcm->eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER))) + return false; + switch (clcm->type) { + case SMC_CLC_PROPOSAL: + pclc = (struct smc_clc_msg_proposal *)clcm; + pclc_prfx = smc_clc_proposal_get_prefix(pclc); + if (ntohs(pclc->hdr.length) != + sizeof(*pclc) + ntohs(pclc->iparea_offset) + + sizeof(*pclc_prfx) + + pclc_prfx->ipv6_prefixes_cnt * + sizeof(struct smc_clc_ipv6_prefix) + + sizeof(*trl)) + return false; + trl = (struct smc_clc_msg_trail *) + ((u8 *)pclc + ntohs(pclc->hdr.length) - sizeof(*trl)); + break; + case SMC_CLC_ACCEPT: + case SMC_CLC_CONFIRM: + clc = (struct smc_clc_msg_accept_confirm *)clcm; + if (ntohs(clc->hdr.length) != sizeof(*clc)) + return false; + trl = &clc->trl; + break; + case SMC_CLC_DECLINE: + dclc = (struct smc_clc_msg_decline *)clcm; + if (ntohs(dclc->hdr.length) != sizeof(*dclc)) + return false; + trl = &dclc->trl; + break; + default: + return false; + } + if (memcmp(trl->eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER))) + return false; + return true; +} + /* Wait for data on the tcp-socket, analyze received data * Returns: * 0 if success and it was not a decline that we received. @@ -72,9 +120,7 @@ int smc_clc_wait_msg(struct smc_sock *smc, void *buf, int buflen, } datlen = ntohs(clcm->length); if ((len < sizeof(struct smc_clc_msg_hdr)) || - (datlen < sizeof(struct smc_clc_msg_decline)) || - (datlen > sizeof(struct smc_clc_msg_accept_confirm)) || - memcmp(clcm->eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER)) || + (datlen > buflen) || ((clcm->type != SMC_CLC_DECLINE) && (clcm->type != expected_type))) { smc->sk.sk_err = EPROTO; @@ -89,7 +135,7 @@ int smc_clc_wait_msg(struct smc_sock *smc, void *buf, int buflen, krflags = MSG_WAITALL; smc->clcsock->sk->sk_rcvtimeo = CLC_WAIT_TIME; len = kernel_recvmsg(smc->clcsock, &msg, &vec, 1, datlen, krflags); - if (len < datlen) { + if (len < datlen || !smc_clc_msg_hdr_valid(clcm)) { smc->sk.sk_err = EPROTO; reason_code = -EPROTO; goto out; @@ -133,7 +179,7 @@ int smc_clc_send_decline(struct smc_sock *smc, u32 peer_diag_info) smc->sk.sk_err = EPROTO; if (len < 0) smc->sk.sk_err = -len; - return len; + return sock_error(&smc->sk); } /* send CLC PROPOSAL message across internal TCP socket */ @@ -141,33 +187,43 @@ int smc_clc_send_proposal(struct smc_sock *smc, struct smc_ib_device *smcibdev, u8 ibport) { + struct smc_clc_msg_proposal_prefix pclc_prfx; struct smc_clc_msg_proposal pclc; + struct smc_clc_msg_trail trl; int reason_code = 0; + struct kvec vec[3]; struct msghdr msg; - struct kvec vec; - int len, rc; + int len, plen, rc; /* send SMC Proposal CLC message */ + plen = sizeof(pclc) + sizeof(pclc_prfx) + sizeof(trl); memset(&pclc, 0, sizeof(pclc)); memcpy(pclc.hdr.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER)); pclc.hdr.type = SMC_CLC_PROPOSAL; - pclc.hdr.length = htons(sizeof(pclc)); + pclc.hdr.length = htons(plen); pclc.hdr.version = SMC_CLC_V1; /* SMC version */ memcpy(pclc.lcl.id_for_peer, local_systemid, sizeof(local_systemid)); memcpy(&pclc.lcl.gid, &smcibdev->gid[ibport - 1], SMC_GID_SIZE); memcpy(&pclc.lcl.mac, &smcibdev->mac[ibport - 1], ETH_ALEN); + pclc.iparea_offset = htons(0); + memset(&pclc_prfx, 0, sizeof(pclc_prfx)); /* determine subnet and mask from internal TCP socket */ - rc = smc_netinfo_by_tcpsk(smc->clcsock, &pclc.outgoing_subnet, - &pclc.prefix_len); + rc = smc_netinfo_by_tcpsk(smc->clcsock, &pclc_prfx.outgoing_subnet, + &pclc_prfx.prefix_len); if (rc) return SMC_CLC_DECL_CNFERR; /* configuration error */ - memcpy(pclc.trl.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER)); + pclc_prfx.ipv6_prefixes_cnt = 0; + memcpy(trl.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER)); memset(&msg, 0, sizeof(msg)); - vec.iov_base = &pclc; - vec.iov_len = sizeof(pclc); + vec[0].iov_base = &pclc; + vec[0].iov_len = sizeof(pclc); + vec[1].iov_base = &pclc_prfx; + vec[1].iov_len = sizeof(pclc_prfx); + vec[2].iov_base = &trl; + vec[2].iov_len = sizeof(trl); /* due to the few bytes needed for clc-handshake this cannot block */ - len = kernel_sendmsg(smc->clcsock, &msg, &vec, 1, sizeof(pclc)); + len = kernel_sendmsg(smc->clcsock, &msg, vec, 3, plen); if (len < sizeof(pclc)) { if (len >= 0) { reason_code = -ENETUNREACH; diff --git a/net/smc/smc_clc.h b/net/smc/smc_clc.h index 12a9af1539a2..c145a0f36a68 100644 --- a/net/smc/smc_clc.h +++ b/net/smc/smc_clc.h @@ -44,7 +44,7 @@ struct smc_clc_msg_hdr { /* header1 of clc messages */ #if defined(__BIG_ENDIAN_BITFIELD) u8 version : 4, flag : 1, - rsvd : 3; + rsvd : 3; #elif defined(__LITTLE_ENDIAN_BITFIELD) u8 rsvd : 3, flag : 1, @@ -62,17 +62,31 @@ struct smc_clc_msg_local { /* header2 of clc messages */ u8 mac[6]; /* mac of ib_device port */ }; -struct smc_clc_msg_proposal { /* clc proposal message */ - struct smc_clc_msg_hdr hdr; - struct smc_clc_msg_local lcl; - __be16 iparea_offset; /* offset to IP address information area */ +struct smc_clc_ipv6_prefix { + u8 prefix[4]; + u8 prefix_len; +} __packed; + +struct smc_clc_msg_proposal_prefix { /* prefix part of clc proposal message*/ __be32 outgoing_subnet; /* subnet mask */ u8 prefix_len; /* number of significant bits in mask */ u8 reserved[2]; u8 ipv6_prefixes_cnt; /* number of IPv6 prefixes in prefix array */ - struct smc_clc_msg_trail trl; /* eye catcher "SMCR" EBCDIC */ } __aligned(4); +struct smc_clc_msg_proposal { /* clc proposal message sent by Linux */ + struct smc_clc_msg_hdr hdr; + struct smc_clc_msg_local lcl; + __be16 iparea_offset; /* offset to IP address information area */ +} __aligned(4); + +#define SMC_CLC_PROPOSAL_MAX_OFFSET 0x28 +#define SMC_CLC_PROPOSAL_MAX_PREFIX (8 * sizeof(struct smc_clc_ipv6_prefix)) +#define SMC_CLC_MAX_LEN (sizeof(struct smc_clc_msg_proposal) + \ + SMC_CLC_PROPOSAL_MAX_OFFSET + \ + SMC_CLC_PROPOSAL_MAX_PREFIX + \ + sizeof(struct smc_clc_msg_trail)) + struct smc_clc_msg_accept_confirm { /* clc accept / confirm message */ struct smc_clc_msg_hdr hdr; struct smc_clc_msg_local lcl; @@ -102,6 +116,14 @@ struct smc_clc_msg_decline { /* clc decline message */ struct smc_clc_msg_trail trl; /* eye catcher "SMCR" EBCDIC */ } __aligned(4); +/* determine start of the prefix area within the proposal message */ +static inline struct smc_clc_msg_proposal_prefix * +smc_clc_proposal_get_prefix(struct smc_clc_msg_proposal *pclc) +{ + return (struct smc_clc_msg_proposal_prefix *) + ((u8 *)pclc + sizeof(*pclc) + ntohs(pclc->iparea_offset)); +} + struct smc_sock; struct smc_ib_device; diff --git a/net/smc/smc_close.c b/net/smc/smc_close.c index 48615d2ac4aa..e194c6cc308a 100644 --- a/net/smc/smc_close.c +++ b/net/smc/smc_close.c @@ -113,7 +113,7 @@ static int smc_close_abort(struct smc_connection *conn) /* terminate smc socket abnormally - active abort * RDMA communication no longer possible */ -void smc_close_active_abort(struct smc_sock *smc) +static void smc_close_active_abort(struct smc_sock *smc) { struct smc_cdc_conn_state_flags *txflags = &smc->conn.local_tx_ctrl.conn_state_flags; diff --git a/net/smc/smc_close.h b/net/smc/smc_close.h index ed82506b1b0a..8c498885d758 100644 --- a/net/smc/smc_close.h +++ b/net/smc/smc_close.h @@ -20,7 +20,6 @@ #define SMC_CLOSE_SOCK_PUT_DELAY HZ void smc_close_wake_tx_prepared(struct smc_sock *smc); -void smc_close_active_abort(struct smc_sock *smc); int smc_close_active(struct smc_sock *smc); void smc_close_sock_put_work(struct work_struct *work); int smc_close_shutdown_write(struct smc_sock *smc); diff --git a/net/smc/smc_rx.c b/net/smc/smc_rx.c index cbf58637ee14..9dc392ca06bf 100644 --- a/net/smc/smc_rx.c +++ b/net/smc/smc_rx.c @@ -65,7 +65,6 @@ static int smc_rx_wait_data(struct smc_sock *smc, long *timeo) rc = sk_wait_event(sk, timeo, sk->sk_err || sk->sk_shutdown & RCV_SHUTDOWN || - sock_flag(sk, SOCK_DONE) || atomic_read(&conn->bytes_to_rcv) || smc_cdc_rxed_any_close_or_senddone(conn), &wait); @@ -116,7 +115,7 @@ int smc_rx_recvmsg(struct smc_sock *smc, struct msghdr *msg, size_t len, if (read_done) { if (sk->sk_err || sk->sk_state == SMC_CLOSED || - (sk->sk_shutdown & RCV_SHUTDOWN) || + sk->sk_shutdown & RCV_SHUTDOWN || !timeo || signal_pending(current) || smc_cdc_rxed_any_close_or_senddone(conn) || @@ -124,8 +123,6 @@ int smc_rx_recvmsg(struct smc_sock *smc, struct msghdr *msg, size_t len, peer_conn_abort) break; } else { - if (sock_flag(sk, SOCK_DONE)) - break; if (sk->sk_err) { read_done = sock_error(sk); break; diff --git a/net/smc/smc_tx.c b/net/smc/smc_tx.c index c48dc2d5fd3a..2e50fddf8ce9 100644 --- a/net/smc/smc_tx.c +++ b/net/smc/smc_tx.c @@ -104,14 +104,12 @@ static int smc_tx_wait_memory(struct smc_sock *smc, int flags) if (atomic_read(&conn->sndbuf_space)) break; /* at least 1 byte of free space available */ set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); - sk->sk_write_pending++; sk_wait_event(sk, &timeo, sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN) || smc_cdc_rxed_any_close_or_senddone(conn) || atomic_read(&conn->sndbuf_space), &wait); - sk->sk_write_pending--; } remove_wait_queue(sk_sleep(sk), &wait); return rc; @@ -450,9 +448,7 @@ static void smc_tx_work(struct work_struct *work) void smc_tx_consumer_update(struct smc_connection *conn) { union smc_host_cursor cfed, cons; - struct smc_cdc_tx_pend *pend; - struct smc_wr_buf *wr_buf; - int to_confirm, rc; + int to_confirm; smc_curs_write(&cons, smc_curs_read(&conn->local_tx_ctrl.cons, conn), @@ -466,10 +462,7 @@ void smc_tx_consumer_update(struct smc_connection *conn) ((to_confirm > conn->rmbe_update_limit) && ((to_confirm > (conn->rmbe_size / 2)) || conn->local_rx_ctrl.prod_flags.write_blocked))) { - rc = smc_cdc_get_free_slot(conn, &wr_buf, &pend); - if (!rc) - rc = smc_cdc_msg_send(conn, wr_buf, pend); - if (rc < 0) { + if (smc_cdc_get_slot_and_msg_send(conn) < 0) { schedule_delayed_work(&conn->tx_work, SMC_TX_WORK_DELAY); return; |