diff options
author | David S. Miller <[email protected]> | 2018-03-01 13:21:32 -0500 |
---|---|---|
committer | David S. Miller <[email protected]> | 2018-03-01 13:21:32 -0500 |
commit | ba6078081caae5bebbd34844ee2cf6c52aa7daec (patch) | |
tree | 255bfefb143f57b4c1bc73a580904d98d6f88919 | |
parent | 3c5aa0bc9c0063b4c462cce4d07f88aa3fbeaea3 (diff) | |
parent | 9651b9346f5bc85a4fef96789c756748483d9ee2 (diff) |
Merge branch 'smc-link-layer-control-enhancements'
Ursula Braun says:
====================
net/smc: Link Layer Control enhancements
here is a series of smc patches enabling SMC communication with peers
supporting more than one link per link group.
The first three patches are preparing code cleanups.
====================
Signed-off-by: David S. Miller <[email protected]>
-rw-r--r-- | net/smc/af_smc.c | 127 | ||||
-rw-r--r-- | net/smc/smc.h | 5 | ||||
-rw-r--r-- | net/smc/smc_clc.c | 47 | ||||
-rw-r--r-- | net/smc/smc_clc.h | 9 | ||||
-rw-r--r-- | net/smc/smc_core.c | 77 | ||||
-rw-r--r-- | net/smc/smc_core.h | 16 | ||||
-rw-r--r-- | net/smc/smc_llc.c | 408 | ||||
-rw-r--r-- | net/smc/smc_llc.h | 41 |
8 files changed, 592 insertions, 138 deletions
diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index 38ae22b65e77..26684e086750 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -7,7 +7,6 @@ * applicable with RoCE-cards only * * Initial restrictions: - * - non-blocking connect postponed * - IPv6 support postponed * - support for alternate links postponed * - partial support for non-blocking sockets only @@ -24,7 +23,6 @@ #include <linux/module.h> #include <linux/socket.h> -#include <linux/inetdevice.h> #include <linux/workqueue.h> #include <linux/in.h> #include <linux/sched/signal.h> @@ -273,46 +271,7 @@ static void smc_copy_sock_settings_to_smc(struct smc_sock *smc) smc_copy_sock_settings(&smc->sk, smc->clcsock->sk, SK_FLAGS_CLC_TO_SMC); } -/* determine subnet and mask of internal TCP socket */ -int smc_netinfo_by_tcpsk(struct socket *clcsock, - __be32 *subnet, u8 *prefix_len) -{ - struct dst_entry *dst = sk_dst_get(clcsock->sk); - struct in_device *in_dev; - struct sockaddr_in addr; - int rc = -ENOENT; - - if (!dst) { - rc = -ENOTCONN; - goto out; - } - if (!dst->dev) { - rc = -ENODEV; - goto out_rel; - } - - /* get address to which the internal TCP socket is bound */ - kernel_getsockname(clcsock, (struct sockaddr *)&addr); - /* analyze IPv4 specific data of net_device belonging to TCP socket */ - rcu_read_lock(); - in_dev = __in_dev_get_rcu(dst->dev); - for_ifa(in_dev) { - if (!inet_ifa_match(addr.sin_addr.s_addr, ifa)) - continue; - *prefix_len = inet_mask_len(ifa->ifa_mask); - *subnet = ifa->ifa_address & ifa->ifa_mask; - rc = 0; - break; - } endfor_ifa(in_dev); - rcu_read_unlock(); - -out_rel: - dst_release(dst); -out: - return rc; -} - -static int smc_clnt_conf_first_link(struct smc_sock *smc, union ib_gid *gid) +static int smc_clnt_conf_first_link(struct smc_sock *smc) { struct smc_link_group *lgr = smc->conn.lgr; struct smc_link *link; @@ -332,6 +291,9 @@ static int smc_clnt_conf_first_link(struct smc_sock *smc, union ib_gid *gid) return rc; } + if (link->llc_confirm_rc) + return SMC_CLC_DECL_RMBE_EC; + rc = smc_ib_modify_qp_rts(link); if (rc) return SMC_CLC_DECL_INTERR; @@ -346,11 +308,33 @@ static int smc_clnt_conf_first_link(struct smc_sock *smc, union ib_gid *gid) /* send CONFIRM LINK response over RoCE fabric */ rc = smc_llc_send_confirm_link(link, link->smcibdev->mac[link->ibport - 1], - gid, SMC_LLC_RESP); + &link->smcibdev->gid[link->ibport - 1], + SMC_LLC_RESP); if (rc < 0) return SMC_CLC_DECL_TCL; - return rc; + /* receive ADD LINK request from server over RoCE fabric */ + rest = wait_for_completion_interruptible_timeout(&link->llc_add, + SMC_LLC_WAIT_TIME); + if (rest <= 0) { + struct smc_clc_msg_decline dclc; + + rc = smc_clc_wait_msg(smc, &dclc, sizeof(dclc), + SMC_CLC_DECLINE); + return rc; + } + + /* send add link reject message, only one link supported for now */ + rc = smc_llc_send_add_link(link, + link->smcibdev->mac[link->ibport - 1], + &link->smcibdev->gid[link->ibport - 1], + SMC_LLC_RESP); + if (rc < 0) + return SMC_CLC_DECL_TCL; + + link->state = SMC_LNK_ACTIVE; + + return 0; } static void smc_conn_save_peer_info(struct smc_sock *smc, @@ -372,19 +356,9 @@ static void smc_link_save_peer_info(struct smc_link *link, link->peer_mtu = clc->qp_mtu; } -static void smc_lgr_forget(struct smc_link_group *lgr) -{ - spin_lock_bh(&smc_lgr_list.lock); - /* do not use this link group for new connections */ - if (!list_empty(&lgr->list)) - list_del_init(&lgr->list); - spin_unlock_bh(&smc_lgr_list.lock); -} - /* setup for RDMA connection of client */ static int smc_connect_rdma(struct smc_sock *smc) { - struct sockaddr_in *inaddr = (struct sockaddr_in *)smc->addr; struct smc_clc_msg_accept_confirm aclc; int local_contact = SMC_FIRST_CONTACT; struct smc_ib_device *smcibdev; @@ -438,8 +412,8 @@ static int smc_connect_rdma(struct smc_sock *smc) srv_first_contact = aclc.hdr.flag; mutex_lock(&smc_create_lgr_pending); - local_contact = smc_conn_create(smc, inaddr->sin_addr.s_addr, smcibdev, - ibport, &aclc.lcl, srv_first_contact); + local_contact = smc_conn_create(smc, smcibdev, ibport, &aclc.lcl, + srv_first_contact); if (local_contact < 0) { rc = local_contact; if (rc == -ENOMEM) @@ -498,8 +472,7 @@ static int smc_connect_rdma(struct smc_sock *smc) if (local_contact == SMC_FIRST_CONTACT) { /* QP confirmation over RoCE fabric */ - reason_code = smc_clnt_conf_first_link( - smc, &smcibdev->gid[ibport - 1]); + reason_code = smc_clnt_conf_first_link(smc); if (reason_code < 0) { rc = reason_code; goto out_err_unlock; @@ -558,7 +531,6 @@ static int smc_connect(struct socket *sock, struct sockaddr *addr, goto out_err; if (addr->sa_family != AF_INET) goto out_err; - smc->addr = addr; /* needed for nonblocking connect */ lock_sock(sk); switch (sk->sk_state) { @@ -748,9 +720,34 @@ static int smc_serv_conf_first_link(struct smc_sock *smc) rc = smc_clc_wait_msg(smc, &dclc, sizeof(dclc), SMC_CLC_DECLINE); + return rc; } - return rc; + if (link->llc_confirm_resp_rc) + return SMC_CLC_DECL_RMBE_EC; + + /* send ADD LINK request to client over the RoCE fabric */ + rc = smc_llc_send_add_link(link, + link->smcibdev->mac[link->ibport - 1], + &link->smcibdev->gid[link->ibport - 1], + SMC_LLC_REQ); + if (rc < 0) + return SMC_CLC_DECL_TCL; + + /* receive ADD LINK response from client over the RoCE fabric */ + rest = wait_for_completion_interruptible_timeout(&link->llc_add_resp, + SMC_LLC_WAIT_TIME); + if (rest <= 0) { + struct smc_clc_msg_decline dclc; + + rc = smc_clc_wait_msg(smc, &dclc, sizeof(dclc), + SMC_CLC_DECLINE); + return rc; + } + + link->state = SMC_LNK_ACTIVE; + + return 0; } /* setup for RDMA connection of server */ @@ -766,7 +763,6 @@ static void smc_listen_work(struct work_struct *work) struct sock *newsmcsk = &new_smc->sk; struct smc_clc_msg_proposal *pclc; struct smc_ib_device *smcibdev; - struct sockaddr_in peeraddr; u8 buf[SMC_CLC_MAX_LEN]; struct smc_link *link; int reason_code = 0; @@ -808,7 +804,7 @@ static void smc_listen_work(struct work_struct *work) } /* determine subnet and mask from internal TCP socket */ - rc = smc_netinfo_by_tcpsk(newclcsock, &subnet, &prefix_len); + rc = smc_clc_netinfo_by_tcpsk(newclcsock, &subnet, &prefix_len); if (rc) { reason_code = SMC_CLC_DECL_CNFERR; /* configuration error */ goto decline_rdma; @@ -822,13 +818,10 @@ static void smc_listen_work(struct work_struct *work) goto decline_rdma; } - /* get address of the peer connected to the internal TCP socket */ - kernel_getpeername(newclcsock, (struct sockaddr *)&peeraddr); - /* allocate connection / link group */ mutex_lock(&smc_create_lgr_pending); - local_contact = smc_conn_create(new_smc, peeraddr.sin_addr.s_addr, - smcibdev, ibport, &pclc->lcl, 0); + local_contact = smc_conn_create(new_smc, smcibdev, ibport, &pclc->lcl, + 0); if (local_contact < 0) { rc = local_contact; if (rc == -ENOMEM) diff --git a/net/smc/smc.h b/net/smc/smc.h index 9518986c97b1..268cdf11533c 100644 --- a/net/smc/smc.h +++ b/net/smc/smc.h @@ -172,7 +172,6 @@ struct smc_sock { /* smc sock container */ struct sock sk; struct socket *clcsock; /* internal tcp socket */ struct smc_connection conn; /* smc connection */ - struct sockaddr *addr; /* inet connect address */ struct smc_sock *listen_smc; /* listen parent */ struct work_struct tcp_listen_work;/* handle tcp socket accepts */ struct work_struct smc_listen_work;/* prepare new accept socket */ @@ -263,10 +262,8 @@ static inline bool using_ipsec(struct smc_sock *smc) struct smc_clc_msg_local; -int smc_netinfo_by_tcpsk(struct socket *clcsock, __be32 *subnet, - u8 *prefix_len); void smc_conn_free(struct smc_connection *conn); -int smc_conn_create(struct smc_sock *smc, __be32 peer_in_addr, +int smc_conn_create(struct smc_sock *smc, struct smc_ib_device *smcibdev, u8 ibport, struct smc_clc_msg_local *lcl, int srv_first_contact); struct sock *smc_accept_dequeue(struct sock *parent, struct socket *new_sock); diff --git a/net/smc/smc_clc.c b/net/smc/smc_clc.c index 8ac51583a063..874c5a75d6dd 100644 --- a/net/smc/smc_clc.c +++ b/net/smc/smc_clc.c @@ -11,6 +11,7 @@ */ #include <linux/in.h> +#include <linux/inetdevice.h> #include <linux/if_ether.h> #include <linux/sched/signal.h> @@ -22,6 +23,9 @@ #include "smc_clc.h" #include "smc_ib.h" +/* eye catcher "SMCR" EBCDIC for CLC messages */ +static const char SMC_EYECATCHER[4] = {'\xe2', '\xd4', '\xc3', '\xd9'}; + /* check if received message has a correct header length and contains valid * heading and trailing eyecatchers */ @@ -70,6 +74,45 @@ static bool smc_clc_msg_hdr_valid(struct smc_clc_msg_hdr *clcm) return true; } +/* determine subnet and mask of internal TCP socket */ +int smc_clc_netinfo_by_tcpsk(struct socket *clcsock, + __be32 *subnet, u8 *prefix_len) +{ + struct dst_entry *dst = sk_dst_get(clcsock->sk); + struct in_device *in_dev; + struct sockaddr_in addr; + int rc = -ENOENT; + + if (!dst) { + rc = -ENOTCONN; + goto out; + } + if (!dst->dev) { + rc = -ENODEV; + goto out_rel; + } + + /* get address to which the internal TCP socket is bound */ + kernel_getsockname(clcsock, (struct sockaddr *)&addr); + /* analyze IPv4 specific data of net_device belonging to TCP socket */ + rcu_read_lock(); + in_dev = __in_dev_get_rcu(dst->dev); + for_ifa(in_dev) { + if (!inet_ifa_match(addr.sin_addr.s_addr, ifa)) + continue; + *prefix_len = inet_mask_len(ifa->ifa_mask); + *subnet = ifa->ifa_address & ifa->ifa_mask; + rc = 0; + break; + } endfor_ifa(in_dev); + rcu_read_unlock(); + +out_rel: + dst_release(dst); +out: + return rc; +} + /* Wait for data on the tcp-socket, analyze received data * Returns: * 0 if success and it was not a decline that we received. @@ -211,8 +254,8 @@ int smc_clc_send_proposal(struct smc_sock *smc, memset(&pclc_prfx, 0, sizeof(pclc_prfx)); /* determine subnet and mask from internal TCP socket */ - rc = smc_netinfo_by_tcpsk(smc->clcsock, &pclc_prfx.outgoing_subnet, - &pclc_prfx.prefix_len); + rc = smc_clc_netinfo_by_tcpsk(smc->clcsock, &pclc_prfx.outgoing_subnet, + &pclc_prfx.prefix_len); if (rc) return SMC_CLC_DECL_CNFERR; /* configuration error */ pclc_prfx.ipv6_prefixes_cnt = 0; diff --git a/net/smc/smc_clc.h b/net/smc/smc_clc.h index c145a0f36a68..20e048beac30 100644 --- a/net/smc/smc_clc.h +++ b/net/smc/smc_clc.h @@ -22,9 +22,6 @@ #define SMC_CLC_CONFIRM 0x03 #define SMC_CLC_DECLINE 0x04 -/* eye catcher "SMCR" EBCDIC for CLC messages */ -static const char SMC_EYECATCHER[4] = {'\xe2', '\xd4', '\xc3', '\xd9'}; - #define SMC_CLC_V1 0x1 /* SMC version */ #define CLC_WAIT_TIME (6 * HZ) /* max. wait time on clcsock */ #define SMC_CLC_DECL_MEM 0x01010000 /* insufficient memory resources */ @@ -36,6 +33,7 @@ static const char SMC_EYECATCHER[4] = {'\xe2', '\xd4', '\xc3', '\xd9'}; #define SMC_CLC_DECL_INTERR 0x99990000 /* internal error */ #define SMC_CLC_DECL_TCL 0x02040000 /* timeout w4 QP confirm */ #define SMC_CLC_DECL_SEND 0x07000000 /* sending problem */ +#define SMC_CLC_DECL_RMBE_EC 0x08000000 /* peer has eyecatcher in RMBE */ struct smc_clc_msg_hdr { /* header1 of clc messages */ u8 eyecatcher[4]; /* eye catcher */ @@ -124,9 +122,8 @@ smc_clc_proposal_get_prefix(struct smc_clc_msg_proposal *pclc) ((u8 *)pclc + sizeof(*pclc) + ntohs(pclc->iparea_offset)); } -struct smc_sock; -struct smc_ib_device; - +int smc_clc_netinfo_by_tcpsk(struct socket *clcsock, __be32 *subnet, + u8 *prefix_len); int smc_clc_wait_msg(struct smc_sock *smc, void *buf, int buflen, u8 expected_type); int smc_clc_send_decline(struct smc_sock *smc, u32 peer_diag_info); diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c index 2424c7100aaf..702ce5f85e97 100644 --- a/net/smc/smc_core.c +++ b/net/smc/smc_core.c @@ -144,7 +144,7 @@ free: } /* create a new SMC link group */ -static int smc_lgr_create(struct smc_sock *smc, __be32 peer_in_addr, +static int smc_lgr_create(struct smc_sock *smc, struct smc_ib_device *smcibdev, u8 ibport, char *peer_systemid, unsigned short vlan_id) { @@ -161,7 +161,6 @@ static int smc_lgr_create(struct smc_sock *smc, __be32 peer_in_addr, } lgr->role = smc->listen_smc ? SMC_SERV : SMC_CLNT; lgr->sync_err = false; - lgr->daddr = peer_in_addr; memcpy(lgr->peer_systemid, peer_systemid, SMC_SYSTEMID_LEN); lgr->vlan_id = vlan_id; rwlock_init(&lgr->sndbufs_lock); @@ -177,6 +176,7 @@ static int smc_lgr_create(struct smc_sock *smc, __be32 peer_in_addr, lnk = &lgr->lnk[SMC_SINGLE_LINK]; /* initialize link */ + lnk->state = SMC_LNK_ACTIVATING; lnk->smcibdev = smcibdev; lnk->ibport = ibport; lnk->path_mtu = smcibdev->pattr[ibport - 1].active_mtu; @@ -198,6 +198,8 @@ static int smc_lgr_create(struct smc_sock *smc, __be32 peer_in_addr, goto destroy_qp; init_completion(&lnk->llc_confirm); init_completion(&lnk->llc_confirm_resp); + init_completion(&lnk->llc_add); + init_completion(&lnk->llc_add_resp); smc->conn.lgr = lgr; rwlock_init(&lgr->conns_lock); @@ -306,6 +308,15 @@ void smc_lgr_free(struct smc_link_group *lgr) kfree(lgr); } +void smc_lgr_forget(struct smc_link_group *lgr) +{ + spin_lock_bh(&smc_lgr_list.lock); + /* do not use this link group for new connections */ + if (!list_empty(&lgr->list)) + list_del_init(&lgr->list); + spin_unlock_bh(&smc_lgr_list.lock); +} + /* terminate linkgroup abnormally */ void smc_lgr_terminate(struct smc_link_group *lgr) { @@ -313,15 +324,7 @@ void smc_lgr_terminate(struct smc_link_group *lgr) struct smc_sock *smc; struct rb_node *node; - spin_lock_bh(&smc_lgr_list.lock); - if (list_empty(&lgr->list)) { - /* termination already triggered */ - spin_unlock_bh(&smc_lgr_list.lock); - return; - } - /* do not use this link group for new connections */ - list_del_init(&lgr->list); - spin_unlock_bh(&smc_lgr_list.lock); + smc_lgr_forget(lgr); write_lock_bh(&lgr->conns_lock); node = rb_first(&lgr->conns_all); @@ -400,7 +403,7 @@ static int smc_link_determine_gid(struct smc_link_group *lgr) } /* create a new SMC connection (and a new link group if necessary) */ -int smc_conn_create(struct smc_sock *smc, __be32 peer_in_addr, +int smc_conn_create(struct smc_sock *smc, struct smc_ib_device *smcibdev, u8 ibport, struct smc_clc_msg_local *lcl, int srv_first_contact) { @@ -457,7 +460,7 @@ int smc_conn_create(struct smc_sock *smc, __be32 peer_in_addr, create: if (local_contact == SMC_FIRST_CONTACT) { - rc = smc_lgr_create(smc, peer_in_addr, smcibdev, ibport, + rc = smc_lgr_create(smc, smcibdev, ibport, lcl->id_for_peer, vlan_id); if (rc) goto out; @@ -698,27 +701,55 @@ static inline int smc_rmb_reserve_rtoken_idx(struct smc_link_group *lgr) return -ENOSPC; } -/* save rkey and dma_addr received from peer during clc handshake */ -int smc_rmb_rtoken_handling(struct smc_connection *conn, - struct smc_clc_msg_accept_confirm *clc) +/* add a new rtoken from peer */ +int smc_rtoken_add(struct smc_link_group *lgr, __be64 nw_vaddr, __be32 nw_rkey) { - u64 dma_addr = be64_to_cpu(clc->rmb_dma_addr); - struct smc_link_group *lgr = conn->lgr; - u32 rkey = ntohl(clc->rmb_rkey); + u64 dma_addr = be64_to_cpu(nw_vaddr); + u32 rkey = ntohl(nw_rkey); int i; for (i = 0; i < SMC_RMBS_PER_LGR_MAX; i++) { if ((lgr->rtokens[i][SMC_SINGLE_LINK].rkey == rkey) && (lgr->rtokens[i][SMC_SINGLE_LINK].dma_addr == dma_addr) && test_bit(i, lgr->rtokens_used_mask)) { - conn->rtoken_idx = i; + /* already in list */ + return i; + } + } + i = smc_rmb_reserve_rtoken_idx(lgr); + if (i < 0) + return i; + lgr->rtokens[i][SMC_SINGLE_LINK].rkey = rkey; + lgr->rtokens[i][SMC_SINGLE_LINK].dma_addr = dma_addr; + return i; +} + +/* delete an rtoken */ +int smc_rtoken_delete(struct smc_link_group *lgr, __be32 nw_rkey) +{ + u32 rkey = ntohl(nw_rkey); + int i; + + for (i = 0; i < SMC_RMBS_PER_LGR_MAX; i++) { + if (lgr->rtokens[i][SMC_SINGLE_LINK].rkey == rkey && + test_bit(i, lgr->rtokens_used_mask)) { + lgr->rtokens[i][SMC_SINGLE_LINK].rkey = 0; + lgr->rtokens[i][SMC_SINGLE_LINK].dma_addr = 0; + + clear_bit(i, lgr->rtokens_used_mask); return 0; } } - conn->rtoken_idx = smc_rmb_reserve_rtoken_idx(lgr); + return -ENOENT; +} + +/* save rkey and dma_addr received from peer during clc handshake */ +int smc_rmb_rtoken_handling(struct smc_connection *conn, + struct smc_clc_msg_accept_confirm *clc) +{ + conn->rtoken_idx = smc_rtoken_add(conn->lgr, clc->rmb_dma_addr, + clc->rmb_rkey); if (conn->rtoken_idx < 0) return conn->rtoken_idx; - lgr->rtokens[conn->rtoken_idx][SMC_SINGLE_LINK].rkey = rkey; - lgr->rtokens[conn->rtoken_idx][SMC_SINGLE_LINK].dma_addr = dma_addr; return 0; } diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h index fe691bf9af91..07e2a393e6d9 100644 --- a/net/smc/smc_core.h +++ b/net/smc/smc_core.h @@ -32,6 +32,12 @@ enum smc_lgr_role { /* possible roles of a link group */ SMC_SERV /* server */ }; +enum smc_link_state { /* possible states of a link */ + SMC_LNK_INACTIVE, /* link is inactive */ + SMC_LNK_ACTIVATING, /* link is being activated */ + SMC_LNK_ACTIVE /* link is active */ +}; + #define SMC_WR_BUF_SIZE 48 /* size of work request buffer */ struct smc_wr_buf { @@ -87,8 +93,14 @@ struct smc_link { u8 peer_mac[ETH_ALEN]; /* = gid[8:10||13:15] */ u8 peer_gid[sizeof(union ib_gid)]; /* gid of peer*/ u8 link_id; /* unique # within link group */ + + enum smc_link_state state; /* state of link */ struct completion llc_confirm; /* wait for rx of conf link */ struct completion llc_confirm_resp; /* wait 4 rx of cnf lnk rsp */ + int llc_confirm_rc; /* rc from confirm link msg */ + int llc_confirm_resp_rc; /* rc from conf_resp msg */ + struct completion llc_add; /* wait for rx of add link */ + struct completion llc_add_resp; /* wait for rx of add link rsp*/ }; /* For now we just allow one parallel link per link group. The SMC protocol @@ -124,7 +136,6 @@ struct smc_rtoken { /* address/key of remote RMB */ struct smc_link_group { struct list_head list; enum smc_lgr_role role; /* client or server */ - __be32 daddr; /* destination ip address */ struct smc_link lnk[SMC_LINKS_PER_LGR_MAX]; /* smc link */ char peer_systemid[SMC_SYSTEMID_LEN]; /* unique system_id of peer */ @@ -186,10 +197,13 @@ struct smc_sock; struct smc_clc_msg_accept_confirm; void smc_lgr_free(struct smc_link_group *lgr); +void smc_lgr_forget(struct smc_link_group *lgr); void smc_lgr_terminate(struct smc_link_group *lgr); int smc_buf_create(struct smc_sock *smc); int smc_rmb_rtoken_handling(struct smc_connection *conn, struct smc_clc_msg_accept_confirm *clc); +int smc_rtoken_add(struct smc_link_group *lgr, __be64 nw_vaddr, __be32 nw_rkey); +int smc_rtoken_delete(struct smc_link_group *lgr, __be32 nw_rkey); void smc_sndbuf_sync_sg_for_cpu(struct smc_connection *conn); void smc_sndbuf_sync_sg_for_device(struct smc_connection *conn); void smc_rmb_sync_sg_for_cpu(struct smc_connection *conn); diff --git a/net/smc/smc_llc.c b/net/smc/smc_llc.c index 92fe4cc8c82c..54e8d6dc9201 100644 --- a/net/smc/smc_llc.c +++ b/net/smc/smc_llc.c @@ -4,9 +4,6 @@ * * Link Layer Control (LLC) * - * For now, we only support the necessary "confirm link" functionality - * which happens for the first RoCE link after successful CLC handshake. - * * Copyright IBM Corp. 2016 * * Author(s): Klaus Wacker <[email protected]> @@ -21,6 +18,122 @@ #include "smc_clc.h" #include "smc_llc.h" +#define SMC_LLC_DATA_LEN 40 + +struct smc_llc_hdr { + struct smc_wr_rx_hdr common; + u8 length; /* 44 */ +#if defined(__BIG_ENDIAN_BITFIELD) + u8 reserved:4, + add_link_rej_rsn:4; +#elif defined(__LITTLE_ENDIAN_BITFIELD) + u8 add_link_rej_rsn:4, + reserved:4; +#endif + u8 flags; +}; + +#define SMC_LLC_FLAG_NO_RMBE_EYEC 0x03 + +struct smc_llc_msg_confirm_link { /* type 0x01 */ + struct smc_llc_hdr hd; + u8 sender_mac[ETH_ALEN]; + u8 sender_gid[SMC_GID_SIZE]; + u8 sender_qp_num[3]; + u8 link_num; + u8 link_uid[SMC_LGR_ID_SIZE]; + u8 max_links; + u8 reserved[9]; +}; + +#define SMC_LLC_FLAG_ADD_LNK_REJ 0x40 +#define SMC_LLC_REJ_RSN_NO_ALT_PATH 1 + +#define SMC_LLC_ADD_LNK_MAX_LINKS 2 + +struct smc_llc_msg_add_link { /* type 0x02 */ + struct smc_llc_hdr hd; + u8 sender_mac[ETH_ALEN]; + u8 reserved2[2]; + u8 sender_gid[SMC_GID_SIZE]; + u8 sender_qp_num[3]; + u8 link_num; + u8 flags2; /* QP mtu */ + u8 initial_psn[3]; + u8 reserved[8]; +}; + +#define SMC_LLC_FLAG_DEL_LINK_ALL 0x40 +#define SMC_LLC_FLAG_DEL_LINK_ORDERLY 0x20 + +struct smc_llc_msg_del_link { /* type 0x04 */ + struct smc_llc_hdr hd; + u8 link_num; + __be32 reason; + u8 reserved[35]; +} __packed; /* format defined in RFC7609 */ + +struct smc_llc_msg_test_link { /* type 0x07 */ + struct smc_llc_hdr hd; + u8 user_data[16]; + u8 reserved[24]; +}; + +struct smc_rmb_rtoken { + union { + u8 num_rkeys; /* first rtoken byte of CONFIRM LINK msg */ + /* is actually the num of rtokens, first */ + /* rtoken is always for the current link */ + u8 link_id; /* link id of the rtoken */ + }; + __be32 rmb_key; + __be64 rmb_vaddr; +} __packed; /* format defined in RFC7609 */ + +#define SMC_LLC_RKEYS_PER_MSG 3 + +struct smc_llc_msg_confirm_rkey { /* type 0x06 */ + struct smc_llc_hdr hd; + struct smc_rmb_rtoken rtoken[SMC_LLC_RKEYS_PER_MSG]; + u8 reserved; +}; + +struct smc_llc_msg_confirm_rkey_cont { /* type 0x08 */ + struct smc_llc_hdr hd; + u8 num_rkeys; + struct smc_rmb_rtoken rtoken[SMC_LLC_RKEYS_PER_MSG]; +}; + +#define SMC_LLC_DEL_RKEY_MAX 8 +#define SMC_LLC_FLAG_RKEY_NEG 0x20 + +struct smc_llc_msg_delete_rkey { /* type 0x09 */ + struct smc_llc_hdr hd; + u8 num_rkeys; + u8 err_mask; + u8 reserved[2]; + __be32 rkey[8]; + u8 reserved2[4]; +}; + +union smc_llc_msg { + struct smc_llc_msg_confirm_link confirm_link; + struct smc_llc_msg_add_link add_link; + struct smc_llc_msg_del_link delete_link; + + struct smc_llc_msg_confirm_rkey confirm_rkey; + struct smc_llc_msg_confirm_rkey_cont confirm_rkey_cont; + struct smc_llc_msg_delete_rkey delete_rkey; + + struct smc_llc_msg_test_link test_link; + struct { + struct smc_llc_hdr hdr; + u8 data[SMC_LLC_DATA_LEN]; + } raw; +}; + +#define SMC_LLC_FLAG_RESP 0x80 + /********************************** send *************************************/ struct smc_llc_tx_pend { @@ -87,6 +200,7 @@ int smc_llc_send_confirm_link(struct smc_link *link, u8 mac[], memset(confllc, 0, sizeof(*confllc)); confllc->hd.common.type = SMC_LLC_CONFIRM_LINK; confllc->hd.length = sizeof(struct smc_llc_msg_confirm_link); + confllc->hd.flags |= SMC_LLC_FLAG_NO_RMBE_EYEC; if (reqresp == SMC_LLC_RESP) confllc->hd.flags |= SMC_LLC_FLAG_RESP; memcpy(confllc->sender_mac, mac, ETH_ALEN); @@ -94,7 +208,104 @@ int smc_llc_send_confirm_link(struct smc_link *link, u8 mac[], hton24(confllc->sender_qp_num, link->roce_qp->qp_num); /* confllc->link_num = SMC_SINGLE_LINK; already done by memset above */ memcpy(confllc->link_uid, lgr->id, SMC_LGR_ID_SIZE); - confllc->max_links = SMC_LINKS_PER_LGR_MAX; + confllc->max_links = SMC_LLC_ADD_LNK_MAX_LINKS; /* enforce peer resp. */ + /* send llc message */ + rc = smc_wr_tx_send(link, pend); + return rc; +} + +/* send ADD LINK request or response */ +int smc_llc_send_add_link(struct smc_link *link, u8 mac[], + union ib_gid *gid, + enum smc_llc_reqresp reqresp) +{ + struct smc_llc_msg_add_link *addllc; + struct smc_wr_tx_pend_priv *pend; + struct smc_wr_buf *wr_buf; + int rc; + + rc = smc_llc_add_pending_send(link, &wr_buf, &pend); + if (rc) + return rc; + addllc = (struct smc_llc_msg_add_link *)wr_buf; + memset(addllc, 0, sizeof(*addllc)); + addllc->hd.common.type = SMC_LLC_ADD_LINK; + addllc->hd.length = sizeof(struct smc_llc_msg_add_link); + if (reqresp == SMC_LLC_RESP) { + addllc->hd.flags |= SMC_LLC_FLAG_RESP; + /* always reject more links for now */ + addllc->hd.flags |= SMC_LLC_FLAG_ADD_LNK_REJ; + addllc->hd.add_link_rej_rsn = SMC_LLC_REJ_RSN_NO_ALT_PATH; + } + memcpy(addllc->sender_mac, mac, ETH_ALEN); + memcpy(addllc->sender_gid, gid, SMC_GID_SIZE); + /* send llc message */ + rc = smc_wr_tx_send(link, pend); + return rc; +} + +/* send DELETE LINK request or response */ +int smc_llc_send_delete_link(struct smc_link *link, + enum smc_llc_reqresp reqresp) +{ + struct smc_llc_msg_del_link *delllc; + struct smc_wr_tx_pend_priv *pend; + struct smc_wr_buf *wr_buf; + int rc; + + rc = smc_llc_add_pending_send(link, &wr_buf, &pend); + if (rc) + return rc; + delllc = (struct smc_llc_msg_del_link *)wr_buf; + memset(delllc, 0, sizeof(*delllc)); + delllc->hd.common.type = SMC_LLC_DELETE_LINK; + delllc->hd.length = sizeof(struct smc_llc_msg_add_link); + if (reqresp == SMC_LLC_RESP) + delllc->hd.flags |= SMC_LLC_FLAG_RESP; + /* DEL_LINK_ALL because only 1 link supported */ + delllc->hd.flags |= SMC_LLC_FLAG_DEL_LINK_ALL; + delllc->hd.flags |= SMC_LLC_FLAG_DEL_LINK_ORDERLY; + delllc->link_num = link->link_id; + /* send llc message */ + rc = smc_wr_tx_send(link, pend); + return rc; +} + +/* send LLC test link request or response */ +int smc_llc_send_test_link(struct smc_link *link, u8 user_data[16], + enum smc_llc_reqresp reqresp) +{ + struct smc_llc_msg_test_link *testllc; + struct smc_wr_tx_pend_priv *pend; + struct smc_wr_buf *wr_buf; + int rc; + + rc = smc_llc_add_pending_send(link, &wr_buf, &pend); + if (rc) + return rc; + testllc = (struct smc_llc_msg_test_link *)wr_buf; + memset(testllc, 0, sizeof(*testllc)); + testllc->hd.common.type = SMC_LLC_TEST_LINK; + testllc->hd.length = sizeof(struct smc_llc_msg_test_link); + if (reqresp == SMC_LLC_RESP) + testllc->hd.flags |= SMC_LLC_FLAG_RESP; + memcpy(testllc->user_data, user_data, sizeof(testllc->user_data)); + /* send llc message */ + rc = smc_wr_tx_send(link, pend); + return rc; +} + +/* send a prepared message */ +static int smc_llc_send_message(struct smc_link *link, void *llcbuf, int llclen) +{ + struct smc_wr_tx_pend_priv *pend; + struct smc_wr_buf *wr_buf; + int rc; + + rc = smc_llc_add_pending_send(link, &wr_buf, &pend); + if (rc) + return rc; + memcpy(wr_buf, llcbuf, llclen); /* send llc message */ rc = smc_wr_tx_send(link, pend); return rc; @@ -106,19 +317,156 @@ static void smc_llc_rx_confirm_link(struct smc_link *link, struct smc_llc_msg_confirm_link *llc) { struct smc_link_group *lgr; + int conf_rc; lgr = container_of(link, struct smc_link_group, lnk[SMC_SINGLE_LINK]); + + /* RMBE eyecatchers are not supported */ + if (llc->hd.flags & SMC_LLC_FLAG_NO_RMBE_EYEC) + conf_rc = 0; + else + conf_rc = ENOTSUPP; + if (llc->hd.flags & SMC_LLC_FLAG_RESP) { - if (lgr->role == SMC_SERV) + if (lgr->role == SMC_SERV && + link->state == SMC_LNK_ACTIVATING) { + link->llc_confirm_resp_rc = conf_rc; complete(&link->llc_confirm_resp); + } } else { - if (lgr->role == SMC_CLNT) { + if (lgr->role == SMC_CLNT && + link->state == SMC_LNK_ACTIVATING) { + link->llc_confirm_rc = conf_rc; link->link_id = llc->link_num; complete(&link->llc_confirm); } } } +static void smc_llc_rx_add_link(struct smc_link *link, + struct smc_llc_msg_add_link *llc) +{ + struct smc_link_group *lgr = container_of(link, struct smc_link_group, + lnk[SMC_SINGLE_LINK]); + + if (llc->hd.flags & SMC_LLC_FLAG_RESP) { + if (link->state == SMC_LNK_ACTIVATING) + complete(&link->llc_add_resp); + } else { + if (link->state == SMC_LNK_ACTIVATING) { + complete(&link->llc_add); + return; + } + + if (lgr->role == SMC_SERV) { + smc_llc_send_add_link(link, + link->smcibdev->mac[link->ibport - 1], + &link->smcibdev->gid[link->ibport - 1], + SMC_LLC_REQ); + + } else { + smc_llc_send_add_link(link, + link->smcibdev->mac[link->ibport - 1], + &link->smcibdev->gid[link->ibport - 1], + SMC_LLC_RESP); + } + } +} + +static void smc_llc_rx_delete_link(struct smc_link *link, + struct smc_llc_msg_del_link *llc) +{ + struct smc_link_group *lgr = container_of(link, struct smc_link_group, + lnk[SMC_SINGLE_LINK]); + + if (llc->hd.flags & SMC_LLC_FLAG_RESP) { + if (lgr->role == SMC_SERV) + smc_lgr_terminate(lgr); + } else { + if (lgr->role == SMC_SERV) { + smc_lgr_forget(lgr); + smc_llc_send_delete_link(link, SMC_LLC_REQ); + } else { + smc_llc_send_delete_link(link, SMC_LLC_RESP); + smc_lgr_terminate(lgr); + } + } +} + +static void smc_llc_rx_test_link(struct smc_link *link, + struct smc_llc_msg_test_link *llc) +{ + if (llc->hd.flags & SMC_LLC_FLAG_RESP) { + /* unused as long as we don't send this type of msg */ + } else { + smc_llc_send_test_link(link, llc->user_data, SMC_LLC_RESP); + } +} + +static void smc_llc_rx_confirm_rkey(struct smc_link *link, + struct smc_llc_msg_confirm_rkey *llc) +{ + struct smc_link_group *lgr; + int rc; + + lgr = container_of(link, struct smc_link_group, lnk[SMC_SINGLE_LINK]); + + if (llc->hd.flags & SMC_LLC_FLAG_RESP) { + /* unused as long as we don't send this type of msg */ + } else { + rc = smc_rtoken_add(lgr, + llc->rtoken[0].rmb_vaddr, + llc->rtoken[0].rmb_key); + + /* ignore rtokens for other links, we have only one link */ + + llc->hd.flags |= SMC_LLC_FLAG_RESP; + if (rc < 0) + llc->hd.flags |= SMC_LLC_FLAG_RKEY_NEG; + smc_llc_send_message(link, (void *)llc, sizeof(*llc)); + } +} + +static void smc_llc_rx_confirm_rkey_cont(struct smc_link *link, + struct smc_llc_msg_confirm_rkey_cont *llc) +{ + if (llc->hd.flags & SMC_LLC_FLAG_RESP) { + /* unused as long as we don't send this type of msg */ + } else { + /* ignore rtokens for other links, we have only one link */ + llc->hd.flags |= SMC_LLC_FLAG_RESP; + smc_llc_send_message(link, (void *)llc, sizeof(*llc)); + } +} + +static void smc_llc_rx_delete_rkey(struct smc_link *link, + struct smc_llc_msg_delete_rkey *llc) +{ + struct smc_link_group *lgr; + u8 err_mask = 0; + int i, max; + + lgr = container_of(link, struct smc_link_group, lnk[SMC_SINGLE_LINK]); + + if (llc->hd.flags & SMC_LLC_FLAG_RESP) { + /* unused as long as we don't send this type of msg */ + } else { + max = min_t(u8, llc->num_rkeys, SMC_LLC_DEL_RKEY_MAX); + for (i = 0; i < max; i++) { + if (smc_rtoken_delete(lgr, llc->rkey[i])) + err_mask |= 1 << (SMC_LLC_DEL_RKEY_MAX - 1 - i); + } + + if (err_mask) { + llc->hd.flags |= SMC_LLC_FLAG_RKEY_NEG; + llc->err_mask = err_mask; + } + + llc->hd.flags |= SMC_LLC_FLAG_RESP; + smc_llc_send_message(link, (void *)llc, sizeof(*llc)); + } +} + static void smc_llc_rx_handler(struct ib_wc *wc, void *buf) { struct smc_link *link = (struct smc_link *)wc->qp->qp_context; @@ -128,8 +476,30 @@ static void smc_llc_rx_handler(struct ib_wc *wc, void *buf) return; /* short message */ if (llc->raw.hdr.length != sizeof(*llc)) return; /* invalid message */ - if (llc->raw.hdr.common.type == SMC_LLC_CONFIRM_LINK) + + switch (llc->raw.hdr.common.type) { + case SMC_LLC_TEST_LINK: + smc_llc_rx_test_link(link, &llc->test_link); + break; + case SMC_LLC_CONFIRM_LINK: smc_llc_rx_confirm_link(link, &llc->confirm_link); + break; + case SMC_LLC_ADD_LINK: + smc_llc_rx_add_link(link, &llc->add_link); + break; + case SMC_LLC_DELETE_LINK: + smc_llc_rx_delete_link(link, &llc->delete_link); + break; + case SMC_LLC_CONFIRM_RKEY: + smc_llc_rx_confirm_rkey(link, &llc->confirm_rkey); + break; + case SMC_LLC_CONFIRM_RKEY_CONT: + smc_llc_rx_confirm_rkey_cont(link, &llc->confirm_rkey_cont); + break; + case SMC_LLC_DELETE_RKEY: + smc_llc_rx_delete_rkey(link, &llc->delete_rkey); + break; + } } /***************************** init, exit, misc ******************************/ @@ -140,6 +510,30 @@ static struct smc_wr_rx_handler smc_llc_rx_handlers[] = { .type = SMC_LLC_CONFIRM_LINK }, { + .handler = smc_llc_rx_handler, + .type = SMC_LLC_TEST_LINK + }, + { + .handler = smc_llc_rx_handler, + .type = SMC_LLC_ADD_LINK + }, + { + .handler = smc_llc_rx_handler, + .type = SMC_LLC_DELETE_LINK + }, + { + .handler = smc_llc_rx_handler, + .type = SMC_LLC_CONFIRM_RKEY + }, + { + .handler = smc_llc_rx_handler, + .type = SMC_LLC_CONFIRM_RKEY_CONT + }, + { + .handler = smc_llc_rx_handler, + .type = SMC_LLC_DELETE_RKEY + }, + { .handler = NULL, } }; diff --git a/net/smc/smc_llc.h b/net/smc/smc_llc.h index 51b27ce90dbd..e4a7d5e234d5 100644 --- a/net/smc/smc_llc.h +++ b/net/smc/smc_llc.h @@ -18,6 +18,7 @@ #define SMC_LLC_FLAG_RESP 0x80 #define SMC_LLC_WAIT_FIRST_TIME (5 * HZ) +#define SMC_LLC_WAIT_TIME (2 * HZ) enum smc_llc_reqresp { SMC_LLC_REQ, @@ -26,39 +27,23 @@ enum smc_llc_reqresp { enum smc_llc_msg_type { SMC_LLC_CONFIRM_LINK = 0x01, -}; - -#define SMC_LLC_DATA_LEN 40 - -struct smc_llc_hdr { - struct smc_wr_rx_hdr common; - u8 length; /* 44 */ - u8 reserved; - u8 flags; -}; - -struct smc_llc_msg_confirm_link { /* type 0x01 */ - struct smc_llc_hdr hd; - u8 sender_mac[ETH_ALEN]; - u8 sender_gid[SMC_GID_SIZE]; - u8 sender_qp_num[3]; - u8 link_num; - u8 link_uid[SMC_LGR_ID_SIZE]; - u8 max_links; - u8 reserved[9]; -}; - -union smc_llc_msg { - struct smc_llc_msg_confirm_link confirm_link; - struct { - struct smc_llc_hdr hdr; - u8 data[SMC_LLC_DATA_LEN]; - } raw; + SMC_LLC_ADD_LINK = 0x02, + SMC_LLC_DELETE_LINK = 0x04, + SMC_LLC_CONFIRM_RKEY = 0x06, + SMC_LLC_TEST_LINK = 0x07, + SMC_LLC_CONFIRM_RKEY_CONT = 0x08, + SMC_LLC_DELETE_RKEY = 0x09, }; /* transmit */ int smc_llc_send_confirm_link(struct smc_link *lnk, u8 mac[], union ib_gid *gid, enum smc_llc_reqresp reqresp); +int smc_llc_send_add_link(struct smc_link *link, u8 mac[], union ib_gid *gid, + enum smc_llc_reqresp reqresp); +int smc_llc_send_delete_link(struct smc_link *link, + enum smc_llc_reqresp reqresp); +int smc_llc_send_test_link(struct smc_link *lnk, u8 user_data[16], + enum smc_llc_reqresp reqresp); int smc_llc_init(void) __init; #endif /* SMC_LLC_H */ |