diff options
Diffstat (limited to 'net/sunrpc/xprtrdma/svc_rdma_recvfrom.c')
| -rw-r--r-- | net/sunrpc/xprtrdma/svc_rdma_recvfrom.c | 266 | 
1 files changed, 176 insertions, 90 deletions
| diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c index 96bccd398469..efa5fcb5793f 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c +++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c @@ -193,6 +193,7 @@ svc_rdma_recv_ctxt_get(struct svcxprt_rdma *rdma)  out:  	ctxt->rc_page_count = 0; +	ctxt->rc_read_payload_length = 0;  	return ctxt;  out_empty: @@ -222,6 +223,26 @@ void svc_rdma_recv_ctxt_put(struct svcxprt_rdma *rdma,  		svc_rdma_recv_ctxt_destroy(rdma, ctxt);  } +/** + * svc_rdma_release_rqst - Release transport-specific per-rqst resources + * @rqstp: svc_rqst being released + * + * Ensure that the recv_ctxt is released whether or not a Reply + * was sent. For example, the client could close the connection, + * or svc_process could drop an RPC, before the Reply is sent. + */ +void svc_rdma_release_rqst(struct svc_rqst *rqstp) +{ +	struct svc_rdma_recv_ctxt *ctxt = rqstp->rq_xprt_ctxt; +	struct svc_xprt *xprt = rqstp->rq_xprt; +	struct svcxprt_rdma *rdma = +		container_of(xprt, struct svcxprt_rdma, sc_xprt); + +	rqstp->rq_xprt_ctxt = NULL; +	if (ctxt) +		svc_rdma_recv_ctxt_put(rdma, ctxt); +} +  static int __svc_rdma_post_recv(struct svcxprt_rdma *rdma,  				struct svc_rdma_recv_ctxt *ctxt)  { @@ -357,15 +378,14 @@ static void svc_rdma_build_arg_xdr(struct svc_rqst *rqstp,  	arg->len = ctxt->rc_byte_len;  } -/* This accommodates the largest possible Write chunk, - * in one segment. +/* This accommodates the largest possible Write chunk.   */ -#define MAX_BYTES_WRITE_SEG	((u32)(RPCSVC_MAXPAGES << PAGE_SHIFT)) +#define MAX_BYTES_WRITE_CHUNK ((u32)(RPCSVC_MAXPAGES << PAGE_SHIFT))  /* This accommodates the largest possible Position-Zero - * Read chunk or Reply chunk, in one segment. + * Read chunk or Reply chunk.   */ -#define MAX_BYTES_SPECIAL_SEG	((u32)((RPCSVC_MAXPAGES + 2) << PAGE_SHIFT)) +#define MAX_BYTES_SPECIAL_CHUNK ((u32)((RPCSVC_MAXPAGES + 2) << PAGE_SHIFT))  /* Sanity check the Read list.   * @@ -373,7 +393,7 @@ static void svc_rdma_build_arg_xdr(struct svc_rqst *rqstp,   * - This implementation supports only one Read chunk.   *   * Sanity checks: - * - Read list does not overflow buffer. + * - Read list does not overflow Receive buffer.   * - Segment size limited by largest NFS data payload.   *   * The segment count is limited to how many segments can @@ -381,30 +401,44 @@ static void svc_rdma_build_arg_xdr(struct svc_rqst *rqstp,   * buffer. That's about 40 Read segments for a 1KB inline   * threshold.   * - * Returns pointer to the following Write list. + * Return values: + *       %true: Read list is valid. @rctxt's xdr_stream is updated + *		to point to the first byte past the Read list. + *      %false: Read list is corrupt. @rctxt's xdr_stream is left + *		in an unknown state.   */ -static __be32 *xdr_check_read_list(__be32 *p, const __be32 *end) +static bool xdr_check_read_list(struct svc_rdma_recv_ctxt *rctxt)  { -	u32 position; +	u32 position, len;  	bool first; +	__be32 *p; +	p = xdr_inline_decode(&rctxt->rc_stream, sizeof(*p)); +	if (!p) +		return false; + +	len = 0;  	first = true; -	while (*p++ != xdr_zero) { +	while (*p != xdr_zero) { +		p = xdr_inline_decode(&rctxt->rc_stream, +				      rpcrdma_readseg_maxsz * sizeof(*p)); +		if (!p) +			return false; +  		if (first) { -			position = be32_to_cpup(p++); +			position = be32_to_cpup(p);  			first = false; -		} else if (be32_to_cpup(p++) != position) { -			return NULL; +		} else if (be32_to_cpup(p) != position) { +			return false;  		} -		p++;	/* handle */ -		if (be32_to_cpup(p++) > MAX_BYTES_SPECIAL_SEG) -			return NULL; -		p += 2;	/* offset */ +		p += 2; +		len += be32_to_cpup(p); -		if (p > end) -			return NULL; +		p = xdr_inline_decode(&rctxt->rc_stream, sizeof(*p)); +		if (!p) +			return false;  	} -	return p; +	return len <= MAX_BYTES_SPECIAL_CHUNK;  }  /* The segment count is limited to how many segments can @@ -412,67 +446,100 @@ static __be32 *xdr_check_read_list(__be32 *p, const __be32 *end)   * buffer. That's about 60 Write segments for a 1KB inline   * threshold.   */ -static __be32 *xdr_check_write_chunk(__be32 *p, const __be32 *end, -				     u32 maxlen) +static bool xdr_check_write_chunk(struct svc_rdma_recv_ctxt *rctxt, u32 maxlen)  { -	u32 i, segcount; +	u32 i, segcount, total; +	__be32 *p; -	segcount = be32_to_cpup(p++); +	p = xdr_inline_decode(&rctxt->rc_stream, sizeof(*p)); +	if (!p) +		return false; +	segcount = be32_to_cpup(p); + +	total = 0;  	for (i = 0; i < segcount; i++) { -		p++;	/* handle */ -		if (be32_to_cpup(p++) > maxlen) -			return NULL; -		p += 2;	/* offset */ +		u32 handle, length; +		u64 offset; -		if (p > end) -			return NULL; -	} +		p = xdr_inline_decode(&rctxt->rc_stream, +				      rpcrdma_segment_maxsz * sizeof(*p)); +		if (!p) +			return false; + +		handle = be32_to_cpup(p++); +		length = be32_to_cpup(p++); +		xdr_decode_hyper(p, &offset); +		trace_svcrdma_decode_wseg(handle, length, offset); -	return p; +		total += length; +	} +	return total <= maxlen;  }  /* Sanity check the Write list.   *   * Implementation limits: - * - This implementation supports only one Write chunk. + * - This implementation currently supports only one Write chunk.   *   * Sanity checks: - * - Write list does not overflow buffer. - * - Segment size limited by largest NFS data payload. - * - * Returns pointer to the following Reply chunk. + * - Write list does not overflow Receive buffer. + * - Chunk size limited by largest NFS data payload. + * + * Return values: + *       %true: Write list is valid. @rctxt's xdr_stream is updated + *		to point to the first byte past the Write list. + *      %false: Write list is corrupt. @rctxt's xdr_stream is left + *		in an unknown state.   */ -static __be32 *xdr_check_write_list(__be32 *p, const __be32 *end) +static bool xdr_check_write_list(struct svc_rdma_recv_ctxt *rctxt)  { -	u32 chcount; +	u32 chcount = 0; +	__be32 *p; -	chcount = 0; -	while (*p++ != xdr_zero) { -		p = xdr_check_write_chunk(p, end, MAX_BYTES_WRITE_SEG); +	p = xdr_inline_decode(&rctxt->rc_stream, sizeof(*p)); +	if (!p) +		return false; +	rctxt->rc_write_list = p; +	while (*p != xdr_zero) { +		if (!xdr_check_write_chunk(rctxt, MAX_BYTES_WRITE_CHUNK)) +			return false; +		++chcount; +		p = xdr_inline_decode(&rctxt->rc_stream, sizeof(*p));  		if (!p) -			return NULL; -		if (chcount++ > 1) -			return NULL; +			return false;  	} -	return p; +	if (!chcount) +		rctxt->rc_write_list = NULL; +	return chcount < 2;  }  /* Sanity check the Reply chunk.   *   * Sanity checks: - * - Reply chunk does not overflow buffer. - * - Segment size limited by largest NFS data payload. - * - * Returns pointer to the following RPC header. + * - Reply chunk does not overflow Receive buffer. + * - Chunk size limited by largest NFS data payload. + * + * Return values: + *       %true: Reply chunk is valid. @rctxt's xdr_stream is updated + *		to point to the first byte past the Reply chunk. + *      %false: Reply chunk is corrupt. @rctxt's xdr_stream is left + *		in an unknown state.   */ -static __be32 *xdr_check_reply_chunk(__be32 *p, const __be32 *end) +static bool xdr_check_reply_chunk(struct svc_rdma_recv_ctxt *rctxt)  { -	if (*p++ != xdr_zero) { -		p = xdr_check_write_chunk(p, end, MAX_BYTES_SPECIAL_SEG); -		if (!p) -			return NULL; +	__be32 *p; + +	p = xdr_inline_decode(&rctxt->rc_stream, sizeof(*p)); +	if (!p) +		return false; +	rctxt->rc_reply_chunk = p; +	if (*p != xdr_zero) { +		if (!xdr_check_write_chunk(rctxt, MAX_BYTES_SPECIAL_CHUNK)) +			return false; +	} else { +		rctxt->rc_reply_chunk = NULL;  	} -	return p; +	return true;  }  /* RPC-over-RDMA Version One private extension: Remote Invalidation. @@ -537,60 +604,61 @@ static void svc_rdma_get_inv_rkey(struct svcxprt_rdma *rdma,  	ctxt->rc_inv_rkey = be32_to_cpu(inv_rkey);  } -/* On entry, xdr->head[0].iov_base points to first byte in the - * RPC-over-RDMA header. +/** + * svc_rdma_xdr_decode_req - Decode the transport header + * @rq_arg: xdr_buf containing ingress RPC/RDMA message + * @rctxt: state of decoding + * + * On entry, xdr->head[0].iov_base points to first byte of the + * RPC-over-RDMA transport header.   *   * On successful exit, head[0] points to first byte past the   * RPC-over-RDMA header. For RDMA_MSG, this is the RPC message. + *   * The length of the RPC-over-RDMA header is returned.   *   * Assumptions:   * - The transport header is entirely contained in the head iovec.   */ -static int svc_rdma_xdr_decode_req(struct xdr_buf *rq_arg) +static int svc_rdma_xdr_decode_req(struct xdr_buf *rq_arg, +				   struct svc_rdma_recv_ctxt *rctxt)  { -	__be32 *p, *end, *rdma_argp; +	__be32 *p, *rdma_argp;  	unsigned int hdr_len; -	/* Verify that there's enough bytes for header + something */ -	if (rq_arg->len <= RPCRDMA_HDRLEN_ERR) -		goto out_short; -  	rdma_argp = rq_arg->head[0].iov_base; -	if (*(rdma_argp + 1) != rpcrdma_version) -		goto out_version; +	xdr_init_decode(&rctxt->rc_stream, rq_arg, rdma_argp, NULL); -	switch (*(rdma_argp + 3)) { +	p = xdr_inline_decode(&rctxt->rc_stream, +			      rpcrdma_fixed_maxsz * sizeof(*p)); +	if (unlikely(!p)) +		goto out_short; +	p++; +	if (*p != rpcrdma_version) +		goto out_version; +	p += 2; +	switch (*p) {  	case rdma_msg:  		break;  	case rdma_nomsg:  		break; -  	case rdma_done:  		goto out_drop; -  	case rdma_error:  		goto out_drop; -  	default:  		goto out_proc;  	} -	end = (__be32 *)((unsigned long)rdma_argp + rq_arg->len); -	p = xdr_check_read_list(rdma_argp + 4, end); -	if (!p) +	if (!xdr_check_read_list(rctxt))  		goto out_inval; -	p = xdr_check_write_list(p, end); -	if (!p) -		goto out_inval; -	p = xdr_check_reply_chunk(p, end); -	if (!p) +	if (!xdr_check_write_list(rctxt))  		goto out_inval; -	if (p > end) +	if (!xdr_check_reply_chunk(rctxt))  		goto out_inval; -	rq_arg->head[0].iov_base = p; -	hdr_len = (unsigned long)p - (unsigned long)rdma_argp; +	rq_arg->head[0].iov_base = rctxt->rc_stream.p; +	hdr_len = xdr_stream_pos(&rctxt->rc_stream);  	rq_arg->head[0].iov_len -= hdr_len;  	rq_arg->len -= hdr_len;  	trace_svcrdma_decode_rqst(rdma_argp, hdr_len); @@ -650,7 +718,6 @@ static void svc_rdma_send_error(struct svcxprt_rdma *xprt,  				__be32 *rdma_argp, int status)  {  	struct svc_rdma_send_ctxt *ctxt; -	unsigned int length;  	__be32 *p;  	int ret; @@ -658,29 +725,46 @@ static void svc_rdma_send_error(struct svcxprt_rdma *xprt,  	if (!ctxt)  		return; -	p = ctxt->sc_xprt_buf; +	p = xdr_reserve_space(&ctxt->sc_stream, +			      rpcrdma_fixed_maxsz * sizeof(*p)); +	if (!p) +		goto put_ctxt; +  	*p++ = *rdma_argp;  	*p++ = *(rdma_argp + 1);  	*p++ = xprt->sc_fc_credits; -	*p++ = rdma_error; +	*p = rdma_error; +  	switch (status) {  	case -EPROTONOSUPPORT: +		p = xdr_reserve_space(&ctxt->sc_stream, 3 * sizeof(*p)); +		if (!p) +			goto put_ctxt; +  		*p++ = err_vers;  		*p++ = rpcrdma_version; -		*p++ = rpcrdma_version; +		*p = rpcrdma_version;  		trace_svcrdma_err_vers(*rdma_argp);  		break;  	default: -		*p++ = err_chunk; +		p = xdr_reserve_space(&ctxt->sc_stream, sizeof(*p)); +		if (!p) +			goto put_ctxt; + +		*p = err_chunk;  		trace_svcrdma_err_chunk(*rdma_argp);  	} -	length = (unsigned long)p - (unsigned long)ctxt->sc_xprt_buf; -	svc_rdma_sync_reply_hdr(xprt, ctxt, length); +	ctxt->sc_send_wr.num_sge = 1;  	ctxt->sc_send_wr.opcode = IB_WR_SEND; +	ctxt->sc_sges[0].length = ctxt->sc_hdrbuf.len;  	ret = svc_rdma_send(xprt, &ctxt->sc_send_wr);  	if (ret) -		svc_rdma_send_ctxt_put(xprt, ctxt); +		goto put_ctxt; +	return; + +put_ctxt: +	svc_rdma_send_ctxt_put(xprt, ctxt);  }  /* By convention, backchannel calls arrive via rdma_msg type @@ -756,6 +840,8 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp)  	__be32 *p;  	int ret; +	rqstp->rq_xprt_ctxt = NULL; +  	spin_lock(&rdma_xprt->sc_rq_dto_lock);  	ctxt = svc_rdma_next_recv_ctxt(&rdma_xprt->sc_read_complete_q);  	if (ctxt) { @@ -785,7 +871,7 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp)  	rqstp->rq_next_page = rqstp->rq_respages;  	p = (__be32 *)rqstp->rq_arg.head[0].iov_base; -	ret = svc_rdma_xdr_decode_req(&rqstp->rq_arg); +	ret = svc_rdma_xdr_decode_req(&rqstp->rq_arg, ctxt);  	if (ret < 0)  		goto out_err;  	if (ret == 0) |