diff options
Diffstat (limited to 'fs/nfs/write.c')
| -rw-r--r-- | fs/nfs/write.c | 335 | 
1 files changed, 282 insertions, 53 deletions
| diff --git a/fs/nfs/write.c b/fs/nfs/write.c index f05f321f9d3d..962c9ee758be 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -46,6 +46,7 @@ static const struct rpc_call_ops nfs_commit_ops;  static const struct nfs_pgio_completion_ops nfs_async_write_completion_ops;  static const struct nfs_commit_completion_ops nfs_commit_completion_ops;  static const struct nfs_rw_ops nfs_rw_write_ops; +static void nfs_clear_request_commit(struct nfs_page *req);  static struct kmem_cache *nfs_wdata_cachep;  static mempool_t *nfs_wdata_mempool; @@ -91,8 +92,15 @@ static void nfs_context_set_write_error(struct nfs_open_context *ctx, int error)  	set_bit(NFS_CONTEXT_ERROR_WRITE, &ctx->flags);  } +/* + * nfs_page_find_head_request_locked - find head request associated with @page + * + * must be called while holding the inode lock. + * + * returns matching head request with reference held, or NULL if not found. + */  static struct nfs_page * -nfs_page_find_request_locked(struct nfs_inode *nfsi, struct page *page) +nfs_page_find_head_request_locked(struct nfs_inode *nfsi, struct page *page)  {  	struct nfs_page *req = NULL; @@ -104,25 +112,33 @@ nfs_page_find_request_locked(struct nfs_inode *nfsi, struct page *page)  		/* Linearly search the commit list for the correct req */  		list_for_each_entry_safe(freq, t, &nfsi->commit_info.list, wb_list) {  			if (freq->wb_page == page) { -				req = freq; +				req = freq->wb_head;  				break;  			}  		}  	} -	if (req) +	if (req) { +		WARN_ON_ONCE(req->wb_head != req); +  		kref_get(&req->wb_kref); +	}  	return req;  } -static struct nfs_page *nfs_page_find_request(struct page *page) +/* + * nfs_page_find_head_request - find head request associated with @page + * + * returns matching head request with reference held, or NULL if not found. + */ +static struct nfs_page *nfs_page_find_head_request(struct page *page)  {  	struct inode *inode = page_file_mapping(page)->host;  	struct nfs_page *req = NULL;  	spin_lock(&inode->i_lock); -	req = nfs_page_find_request_locked(NFS_I(inode), page); +	req = nfs_page_find_head_request_locked(NFS_I(inode), page);  	spin_unlock(&inode->i_lock);  	return req;  } @@ -274,36 +290,246 @@ static void nfs_end_page_writeback(struct nfs_page *req)  		clear_bdi_congested(&nfss->backing_dev_info, BLK_RW_ASYNC);  } -static struct nfs_page *nfs_find_and_lock_request(struct page *page, bool nonblock) + +/* nfs_page_group_clear_bits + *   @req - an nfs request + * clears all page group related bits from @req + */ +static void +nfs_page_group_clear_bits(struct nfs_page *req) +{ +	clear_bit(PG_TEARDOWN, &req->wb_flags); +	clear_bit(PG_UNLOCKPAGE, &req->wb_flags); +	clear_bit(PG_UPTODATE, &req->wb_flags); +	clear_bit(PG_WB_END, &req->wb_flags); +	clear_bit(PG_REMOVE, &req->wb_flags); +} + + +/* + * nfs_unroll_locks_and_wait -  unlock all newly locked reqs and wait on @req + * + * this is a helper function for nfs_lock_and_join_requests + * + * @inode - inode associated with request page group, must be holding inode lock + * @head  - head request of page group, must be holding head lock + * @req   - request that couldn't lock and needs to wait on the req bit lock + * @nonblock - if true, don't actually wait + * + * NOTE: this must be called holding page_group bit lock and inode spin lock + *       and BOTH will be released before returning. + * + * returns 0 on success, < 0 on error. + */ +static int +nfs_unroll_locks_and_wait(struct inode *inode, struct nfs_page *head, +			  struct nfs_page *req, bool nonblock) +	__releases(&inode->i_lock) +{ +	struct nfs_page *tmp; +	int ret; + +	/* relinquish all the locks successfully grabbed this run */ +	for (tmp = head ; tmp != req; tmp = tmp->wb_this_page) +		nfs_unlock_request(tmp); + +	WARN_ON_ONCE(test_bit(PG_TEARDOWN, &req->wb_flags)); + +	/* grab a ref on the request that will be waited on */ +	kref_get(&req->wb_kref); + +	nfs_page_group_unlock(head); +	spin_unlock(&inode->i_lock); + +	/* release ref from nfs_page_find_head_request_locked */ +	nfs_release_request(head); + +	if (!nonblock) +		ret = nfs_wait_on_request(req); +	else +		ret = -EAGAIN; +	nfs_release_request(req); + +	return ret; +} + +/* + * nfs_destroy_unlinked_subrequests - destroy recently unlinked subrequests + * + * @destroy_list - request list (using wb_this_page) terminated by @old_head + * @old_head - the old head of the list + * + * All subrequests must be locked and removed from all lists, so at this point + * they are only "active" in this function, and possibly in nfs_wait_on_request + * with a reference held by some other context. + */ +static void +nfs_destroy_unlinked_subrequests(struct nfs_page *destroy_list, +				 struct nfs_page *old_head) +{ +	while (destroy_list) { +		struct nfs_page *subreq = destroy_list; + +		destroy_list = (subreq->wb_this_page == old_head) ? +				   NULL : subreq->wb_this_page; + +		WARN_ON_ONCE(old_head != subreq->wb_head); + +		/* make sure old group is not used */ +		subreq->wb_head = subreq; +		subreq->wb_this_page = subreq; + +		nfs_clear_request_commit(subreq); + +		/* subreq is now totally disconnected from page group or any +		 * write / commit lists. last chance to wake any waiters */ +		nfs_unlock_request(subreq); + +		if (!test_bit(PG_TEARDOWN, &subreq->wb_flags)) { +			/* release ref on old head request */ +			nfs_release_request(old_head); + +			nfs_page_group_clear_bits(subreq); + +			/* release the PG_INODE_REF reference */ +			if (test_and_clear_bit(PG_INODE_REF, &subreq->wb_flags)) +				nfs_release_request(subreq); +			else +				WARN_ON_ONCE(1); +		} else { +			WARN_ON_ONCE(test_bit(PG_CLEAN, &subreq->wb_flags)); +			/* zombie requests have already released the last +			 * reference and were waiting on the rest of the +			 * group to complete. Since it's no longer part of a +			 * group, simply free the request */ +			nfs_page_group_clear_bits(subreq); +			nfs_free_request(subreq); +		} +	} +} + +/* + * nfs_lock_and_join_requests - join all subreqs to the head req and return + *                              a locked reference, cancelling any pending + *                              operations for this page. + * + * @page - the page used to lookup the "page group" of nfs_page structures + * @nonblock - if true, don't block waiting for request locks + * + * This function joins all sub requests to the head request by first + * locking all requests in the group, cancelling any pending operations + * and finally updating the head request to cover the whole range covered by + * the (former) group.  All subrequests are removed from any write or commit + * lists, unlinked from the group and destroyed. + * + * Returns a locked, referenced pointer to the head request - which after + * this call is guaranteed to be the only request associated with the page. + * Returns NULL if no requests are found for @page, or a ERR_PTR if an + * error was encountered. + */ +static struct nfs_page * +nfs_lock_and_join_requests(struct page *page, bool nonblock)  {  	struct inode *inode = page_file_mapping(page)->host; -	struct nfs_page *req; +	struct nfs_page *head, *subreq; +	struct nfs_page *destroy_list = NULL; +	unsigned int total_bytes;  	int ret; +try_again: +	total_bytes = 0; + +	WARN_ON_ONCE(destroy_list); +  	spin_lock(&inode->i_lock); -	for (;;) { -		req = nfs_page_find_request_locked(NFS_I(inode), page); -		if (req == NULL) -			break; -		if (nfs_lock_request(req)) -			break; -		/* Note: If we hold the page lock, as is the case in nfs_writepage, -		 *	 then the call to nfs_lock_request() will always -		 *	 succeed provided that someone hasn't already marked the -		 *	 request as dirty (in which case we don't care). -		 */ + +	/* +	 * A reference is taken only on the head request which acts as a +	 * reference to the whole page group - the group will not be destroyed +	 * until the head reference is released. +	 */ +	head = nfs_page_find_head_request_locked(NFS_I(inode), page); + +	if (!head) {  		spin_unlock(&inode->i_lock); -		if (!nonblock) -			ret = nfs_wait_on_request(req); -		else -			ret = -EAGAIN; -		nfs_release_request(req); -		if (ret != 0) +		return NULL; +	} + +	/* lock each request in the page group */ +	nfs_page_group_lock(head); +	subreq = head; +	do { +		/* +		 * Subrequests are always contiguous, non overlapping +		 * and in order. If not, it's a programming error. +		 */ +		WARN_ON_ONCE(subreq->wb_offset != +		     (head->wb_offset + total_bytes)); + +		/* keep track of how many bytes this group covers */ +		total_bytes += subreq->wb_bytes; + +		if (!nfs_lock_request(subreq)) { +			/* releases page group bit lock and +			 * inode spin lock and all references */ +			ret = nfs_unroll_locks_and_wait(inode, head, +				subreq, nonblock); + +			if (ret == 0) +				goto try_again; +  			return ERR_PTR(ret); -		spin_lock(&inode->i_lock); +		} + +		subreq = subreq->wb_this_page; +	} while (subreq != head); + +	/* Now that all requests are locked, make sure they aren't on any list. +	 * Commit list removal accounting is done after locks are dropped */ +	subreq = head; +	do { +		nfs_list_remove_request(subreq); +		subreq = subreq->wb_this_page; +	} while (subreq != head); + +	/* unlink subrequests from head, destroy them later */ +	if (head->wb_this_page != head) { +		/* destroy list will be terminated by head */ +		destroy_list = head->wb_this_page; +		head->wb_this_page = head; + +		/* change head request to cover whole range that +		 * the former page group covered */ +		head->wb_bytes = total_bytes;  	} + +	/* +	 * prepare head request to be added to new pgio descriptor +	 */ +	nfs_page_group_clear_bits(head); + +	/* +	 * some part of the group was still on the inode list - otherwise +	 * the group wouldn't be involved in async write. +	 * grab a reference for the head request, iff it needs one. +	 */ +	if (!test_and_set_bit(PG_INODE_REF, &head->wb_flags)) +		kref_get(&head->wb_kref); + +	nfs_page_group_unlock(head); + +	/* drop lock to clear_request_commit the head req and clean up +	 * requests on destroy list */  	spin_unlock(&inode->i_lock); -	return req; + +	nfs_destroy_unlinked_subrequests(destroy_list, head); + +	/* clean up commit list state */ +	nfs_clear_request_commit(head); + +	/* still holds ref on head from nfs_page_find_head_request_locked +	 * and still has lock on head from lock loop */ +	return head;  }  /* @@ -316,7 +542,7 @@ static int nfs_page_async_flush(struct nfs_pageio_descriptor *pgio,  	struct nfs_page *req;  	int ret = 0; -	req = nfs_find_and_lock_request(page, nonblock); +	req = nfs_lock_and_join_requests(page, nonblock);  	if (!req)  		goto out;  	ret = PTR_ERR(req); @@ -448,7 +674,9 @@ static void nfs_inode_add_request(struct inode *inode, struct nfs_page *req)  		set_page_private(req->wb_page, (unsigned long)req);  	}  	nfsi->npages++; -	set_bit(PG_INODE_REF, &req->wb_flags); +	/* this a head request for a page group - mark it as having an +	 * extra reference so sub groups can follow suit */ +	WARN_ON(test_and_set_bit(PG_INODE_REF, &req->wb_flags));  	kref_get(&req->wb_kref);  	spin_unlock(&inode->i_lock);  } @@ -474,7 +702,9 @@ static void nfs_inode_remove_request(struct nfs_page *req)  		nfsi->npages--;  		spin_unlock(&inode->i_lock);  	} -	nfs_release_request(req); + +	if (test_and_clear_bit(PG_INODE_REF, &req->wb_flags)) +		nfs_release_request(req);  }  static void @@ -638,7 +868,6 @@ static void nfs_write_completion(struct nfs_pgio_header *hdr)  {  	struct nfs_commit_info cinfo;  	unsigned long bytes = 0; -	bool do_destroy;  	if (test_bit(NFS_IOHDR_REDO, &hdr->flags))  		goto out; @@ -668,7 +897,6 @@ remove_req:  next:  		nfs_unlock_request(req);  		nfs_end_page_writeback(req); -		do_destroy = !test_bit(NFS_IOHDR_NEED_COMMIT, &hdr->flags);  		nfs_release_request(req);  	}  out: @@ -769,7 +997,7 @@ static struct nfs_page *nfs_try_to_update_request(struct inode *inode,  	spin_lock(&inode->i_lock);  	for (;;) { -		req = nfs_page_find_request_locked(NFS_I(inode), page); +		req = nfs_page_find_head_request_locked(NFS_I(inode), page);  		if (req == NULL)  			goto out_unlock; @@ -877,7 +1105,7 @@ int nfs_flush_incompatible(struct file *file, struct page *page)  	 * dropped page.  	 */  	do { -		req = nfs_page_find_request(page); +		req = nfs_page_find_head_request(page);  		if (req == NULL)  			return 0;  		l_ctx = req->wb_lock_context; @@ -1569,27 +1797,28 @@ int nfs_wb_page_cancel(struct inode *inode, struct page *page)  	struct nfs_page *req;  	int ret = 0; -	for (;;) { -		wait_on_page_writeback(page); -		req = nfs_page_find_request(page); -		if (req == NULL) -			break; -		if (nfs_lock_request(req)) { -			nfs_clear_request_commit(req); -			nfs_inode_remove_request(req); -			/* -			 * In case nfs_inode_remove_request has marked the -			 * page as being dirty -			 */ -			cancel_dirty_page(page, PAGE_CACHE_SIZE); -			nfs_unlock_and_release_request(req); -			break; -		} -		ret = nfs_wait_on_request(req); -		nfs_release_request(req); -		if (ret < 0) -			break; +	wait_on_page_writeback(page); + +	/* blocking call to cancel all requests and join to a single (head) +	 * request */ +	req = nfs_lock_and_join_requests(page, false); + +	if (IS_ERR(req)) { +		ret = PTR_ERR(req); +	} else if (req) { +		/* all requests from this page have been cancelled by +		 * nfs_lock_and_join_requests, so just remove the head +		 * request from the inode / page_private pointer and +		 * release it */ +		nfs_inode_remove_request(req); +		/* +		 * In case nfs_inode_remove_request has marked the +		 * page as being dirty +		 */ +		cancel_dirty_page(page, PAGE_CACHE_SIZE); +		nfs_unlock_and_release_request(req);  	} +  	return ret;  } |