diff options
Diffstat (limited to 'fs/nfs/write.c')
| -rw-r--r-- | fs/nfs/write.c | 465 | 
1 files changed, 224 insertions, 241 deletions
| diff --git a/fs/nfs/write.c b/fs/nfs/write.c index b1af5dee5e0a..babebbccae2a 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -102,10 +102,8 @@ static struct nfs_pgio_header *nfs_writehdr_alloc(void)  {  	struct nfs_pgio_header *p = mempool_alloc(nfs_wdata_mempool, GFP_NOIO); -	if (p) { -		memset(p, 0, sizeof(*p)); -		p->rw_mode = FMODE_WRITE; -	} +	memset(p, 0, sizeof(*p)); +	p->rw_mode = FMODE_WRITE;  	return p;  } @@ -147,11 +145,12 @@ static void nfs_io_completion_put(struct nfs_io_completion *ioc)  		kref_put(&ioc->refcount, nfs_io_completion_release);  } -static void nfs_context_set_write_error(struct nfs_open_context *ctx, int error) +static struct nfs_page * +nfs_page_private_request(struct page *page)  { -	ctx->error = error; -	smp_wmb(); -	set_bit(NFS_CONTEXT_ERROR_WRITE, &ctx->flags); +	if (!PagePrivate(page)) +		return NULL; +	return (struct nfs_page *)page_private(page);  }  /* @@ -162,21 +161,41 @@ static void nfs_context_set_write_error(struct nfs_open_context *ctx, int error)   * returns matching head request with reference held, or NULL if not found.   */  static struct nfs_page * -nfs_page_find_head_request_locked(struct nfs_inode *nfsi, struct page *page) +nfs_page_find_private_request(struct page *page)  { -	struct nfs_page *req = NULL; - -	if (PagePrivate(page)) -		req = (struct nfs_page *)page_private(page); -	else if (unlikely(PageSwapCache(page))) -		req = nfs_page_search_commits_for_head_request_locked(nfsi, -			page); +	struct address_space *mapping = page_file_mapping(page); +	struct nfs_page *req; +	if (!PagePrivate(page)) +		return NULL; +	spin_lock(&mapping->private_lock); +	req = nfs_page_private_request(page);  	if (req) {  		WARN_ON_ONCE(req->wb_head != req);  		kref_get(&req->wb_kref);  	} +	spin_unlock(&mapping->private_lock); +	return req; +} +static struct nfs_page * +nfs_page_find_swap_request(struct page *page) +{ +	struct inode *inode = page_file_mapping(page)->host; +	struct nfs_inode *nfsi = NFS_I(inode); +	struct nfs_page *req = NULL; +	if (!PageSwapCache(page)) +		return NULL; +	mutex_lock(&nfsi->commit_mutex); +	if (PageSwapCache(page)) { +		req = nfs_page_search_commits_for_head_request_locked(nfsi, +			page); +		if (req) { +			WARN_ON_ONCE(req->wb_head != req); +			kref_get(&req->wb_kref); +		} +	} +	mutex_unlock(&nfsi->commit_mutex);  	return req;  } @@ -187,12 +206,11 @@ nfs_page_find_head_request_locked(struct nfs_inode *nfsi, struct page *page)   */  static struct nfs_page *nfs_page_find_head_request(struct page *page)  { -	struct inode *inode = page_file_mapping(page)->host; -	struct nfs_page *req = NULL; +	struct nfs_page *req; -	spin_lock(&inode->i_lock); -	req = nfs_page_find_head_request_locked(NFS_I(inode), page); -	spin_unlock(&inode->i_lock); +	req = nfs_page_find_private_request(page); +	if (!req) +		req = nfs_page_find_swap_request(page);  	return req;  } @@ -241,9 +259,6 @@ nfs_page_group_search_locked(struct nfs_page *head, unsigned int page_offset)  {  	struct nfs_page *req; -	WARN_ON_ONCE(head != head->wb_head); -	WARN_ON_ONCE(!test_bit(PG_HEADLOCK, &head->wb_head->wb_flags)); -  	req = head;  	do {  		if (page_offset >= req->wb_pgbase && @@ -269,20 +284,17 @@ static bool nfs_page_group_covers_page(struct nfs_page *req)  	unsigned int pos = 0;  	unsigned int len = nfs_page_length(req->wb_page); -	nfs_page_group_lock(req, false); +	nfs_page_group_lock(req); -	do { +	for (;;) {  		tmp = nfs_page_group_search_locked(req->wb_head, pos); -		if (tmp) { -			/* no way this should happen */ -			WARN_ON_ONCE(tmp->wb_pgbase != pos); -			pos += tmp->wb_bytes - (pos - tmp->wb_pgbase); -		} -	} while (tmp && pos < len); +		if (!tmp) +			break; +		pos = tmp->wb_pgbase + tmp->wb_bytes; +	}  	nfs_page_group_unlock(req); -	WARN_ON_ONCE(pos > len); -	return pos == len; +	return pos >= len;  }  /* We can set the PG_uptodate flag if we see that a write request @@ -333,8 +345,11 @@ static void nfs_end_page_writeback(struct nfs_page *req)  {  	struct inode *inode = page_file_mapping(req->wb_page)->host;  	struct nfs_server *nfss = NFS_SERVER(inode); +	bool is_done; -	if (!nfs_page_group_sync_on_bit(req, PG_WB_END)) +	is_done = nfs_page_group_sync_on_bit(req, PG_WB_END); +	nfs_unlock_request(req); +	if (!is_done)  		return;  	end_page_writeback(req->wb_page); @@ -342,22 +357,6 @@ static void nfs_end_page_writeback(struct nfs_page *req)  		clear_bdi_congested(inode_to_bdi(inode), BLK_RW_ASYNC);  } - -/* nfs_page_group_clear_bits - *   @req - an nfs request - * clears all page group related bits from @req - */ -static void -nfs_page_group_clear_bits(struct nfs_page *req) -{ -	clear_bit(PG_TEARDOWN, &req->wb_flags); -	clear_bit(PG_UNLOCKPAGE, &req->wb_flags); -	clear_bit(PG_UPTODATE, &req->wb_flags); -	clear_bit(PG_WB_END, &req->wb_flags); -	clear_bit(PG_REMOVE, &req->wb_flags); -} - -  /*   * nfs_unroll_locks_and_wait -  unlock all newly locked reqs and wait on @req   * @@ -366,43 +365,24 @@ nfs_page_group_clear_bits(struct nfs_page *req)   * @inode - inode associated with request page group, must be holding inode lock   * @head  - head request of page group, must be holding head lock   * @req   - request that couldn't lock and needs to wait on the req bit lock - * @nonblock - if true, don't actually wait   * - * NOTE: this must be called holding page_group bit lock and inode spin lock - *       and BOTH will be released before returning. + * NOTE: this must be called holding page_group bit lock + *       which will be released before returning.   *   * returns 0 on success, < 0 on error.   */ -static int -nfs_unroll_locks_and_wait(struct inode *inode, struct nfs_page *head, -			  struct nfs_page *req, bool nonblock) -	__releases(&inode->i_lock) +static void +nfs_unroll_locks(struct inode *inode, struct nfs_page *head, +			  struct nfs_page *req)  {  	struct nfs_page *tmp; -	int ret;  	/* relinquish all the locks successfully grabbed this run */ -	for (tmp = head ; tmp != req; tmp = tmp->wb_this_page) -		nfs_unlock_request(tmp); - -	WARN_ON_ONCE(test_bit(PG_TEARDOWN, &req->wb_flags)); - -	/* grab a ref on the request that will be waited on */ -	kref_get(&req->wb_kref); - -	nfs_page_group_unlock(head); -	spin_unlock(&inode->i_lock); - -	/* release ref from nfs_page_find_head_request_locked */ -	nfs_release_request(head); - -	if (!nonblock) -		ret = nfs_wait_on_request(req); -	else -		ret = -EAGAIN; -	nfs_release_request(req); - -	return ret; +	for (tmp = head->wb_this_page ; tmp != req; tmp = tmp->wb_this_page) { +		if (!kref_read(&tmp->wb_kref)) +			continue; +		nfs_unlock_and_release_request(tmp); +	}  }  /* @@ -417,7 +397,8 @@ nfs_unroll_locks_and_wait(struct inode *inode, struct nfs_page *head,   */  static void  nfs_destroy_unlinked_subrequests(struct nfs_page *destroy_list, -				 struct nfs_page *old_head) +				 struct nfs_page *old_head, +				 struct inode *inode)  {  	while (destroy_list) {  		struct nfs_page *subreq = destroy_list; @@ -428,33 +409,28 @@ nfs_destroy_unlinked_subrequests(struct nfs_page *destroy_list,  		WARN_ON_ONCE(old_head != subreq->wb_head);  		/* make sure old group is not used */ -		subreq->wb_head = subreq;  		subreq->wb_this_page = subreq; -		/* subreq is now totally disconnected from page group or any -		 * write / commit lists. last chance to wake any waiters */ -		nfs_unlock_request(subreq); +		clear_bit(PG_REMOVE, &subreq->wb_flags); -		if (!test_bit(PG_TEARDOWN, &subreq->wb_flags)) { -			/* release ref on old head request */ -			nfs_release_request(old_head); +		/* Note: races with nfs_page_group_destroy() */ +		if (!kref_read(&subreq->wb_kref)) { +			/* Check if we raced with nfs_page_group_destroy() */ +			if (test_and_clear_bit(PG_TEARDOWN, &subreq->wb_flags)) +				nfs_free_request(subreq); +			continue; +		} -			nfs_page_group_clear_bits(subreq); +		subreq->wb_head = subreq; -			/* release the PG_INODE_REF reference */ -			if (test_and_clear_bit(PG_INODE_REF, &subreq->wb_flags)) -				nfs_release_request(subreq); -			else -				WARN_ON_ONCE(1); -		} else { -			WARN_ON_ONCE(test_bit(PG_CLEAN, &subreq->wb_flags)); -			/* zombie requests have already released the last -			 * reference and were waiting on the rest of the -			 * group to complete. Since it's no longer part of a -			 * group, simply free the request */ -			nfs_page_group_clear_bits(subreq); -			nfs_free_request(subreq); +		if (test_and_clear_bit(PG_INODE_REF, &subreq->wb_flags)) { +			nfs_release_request(subreq); +			atomic_long_dec(&NFS_I(inode)->nrequests);  		} + +		/* subreq is now totally disconnected from page group or any +		 * write / commit lists. last chance to wake any waiters */ +		nfs_unlock_and_release_request(subreq);  	}  } @@ -464,7 +440,6 @@ nfs_destroy_unlinked_subrequests(struct nfs_page *destroy_list,   *                              operations for this page.   *   * @page - the page used to lookup the "page group" of nfs_page structures - * @nonblock - if true, don't block waiting for request locks   *   * This function joins all sub requests to the head request by first   * locking all requests in the group, cancelling any pending operations @@ -478,7 +453,7 @@ nfs_destroy_unlinked_subrequests(struct nfs_page *destroy_list,   * error was encountered.   */  static struct nfs_page * -nfs_lock_and_join_requests(struct page *page, bool nonblock) +nfs_lock_and_join_requests(struct page *page)  {  	struct inode *inode = page_file_mapping(page)->host;  	struct nfs_page *head, *subreq; @@ -487,43 +462,63 @@ nfs_lock_and_join_requests(struct page *page, bool nonblock)  	int ret;  try_again: -	total_bytes = 0; - -	WARN_ON_ONCE(destroy_list); - -	spin_lock(&inode->i_lock); -  	/*  	 * A reference is taken only on the head request which acts as a  	 * reference to the whole page group - the group will not be destroyed  	 * until the head reference is released.  	 */ -	head = nfs_page_find_head_request_locked(NFS_I(inode), page); - -	if (!head) { -		spin_unlock(&inode->i_lock); +	head = nfs_page_find_head_request(page); +	if (!head)  		return NULL; -	} -	/* holding inode lock, so always make a non-blocking call to try the -	 * page group lock */ -	ret = nfs_page_group_lock(head, true); -	if (ret < 0) { -		spin_unlock(&inode->i_lock); +	/* lock the page head first in order to avoid an ABBA inefficiency */ +	if (!nfs_lock_request(head)) { +		ret = nfs_wait_on_request(head); +		nfs_release_request(head); +		if (ret < 0) +			return ERR_PTR(ret); +		goto try_again; +	} -		if (!nonblock && ret == -EAGAIN) { -			nfs_page_group_lock_wait(head); -			nfs_release_request(head); -			goto try_again; -		} +	/* Ensure that nobody removed the request before we locked it */ +	if (head != nfs_page_private_request(page) && !PageSwapCache(page)) { +		nfs_unlock_and_release_request(head); +		goto try_again; +	} -		nfs_release_request(head); +	ret = nfs_page_group_lock(head); +	if (ret < 0) { +		nfs_unlock_and_release_request(head);  		return ERR_PTR(ret);  	}  	/* lock each request in the page group */ -	subreq = head; -	do { +	total_bytes = head->wb_bytes; +	for (subreq = head->wb_this_page; subreq != head; +			subreq = subreq->wb_this_page) { + +		if (!kref_get_unless_zero(&subreq->wb_kref)) { +			if (subreq->wb_offset == head->wb_offset + total_bytes) +				total_bytes += subreq->wb_bytes; +			continue; +		} + +		while (!nfs_lock_request(subreq)) { +			/* +			 * Unlock page to allow nfs_page_group_sync_on_bit() +			 * to succeed +			 */ +			nfs_page_group_unlock(head); +			ret = nfs_wait_on_request(subreq); +			if (!ret) +				ret = nfs_page_group_lock(head); +			if (ret < 0) { +				nfs_unroll_locks(inode, head, subreq); +				nfs_release_request(subreq); +				nfs_unlock_and_release_request(head); +				return ERR_PTR(ret); +			} +		}  		/*  		 * Subrequests are always contiguous, non overlapping  		 * and in order - but may be repeated (mirrored writes). @@ -535,24 +530,12 @@ try_again:  			    ((subreq->wb_offset + subreq->wb_bytes) >  			     (head->wb_offset + total_bytes)))) {  			nfs_page_group_unlock(head); -			spin_unlock(&inode->i_lock); +			nfs_unroll_locks(inode, head, subreq); +			nfs_unlock_and_release_request(subreq); +			nfs_unlock_and_release_request(head);  			return ERR_PTR(-EIO);  		} - -		if (!nfs_lock_request(subreq)) { -			/* releases page group bit lock and -			 * inode spin lock and all references */ -			ret = nfs_unroll_locks_and_wait(inode, head, -				subreq, nonblock); - -			if (ret == 0) -				goto try_again; - -			return ERR_PTR(ret); -		} - -		subreq = subreq->wb_this_page; -	} while (subreq != head); +	}  	/* Now that all requests are locked, make sure they aren't on any list.  	 * Commit list removal accounting is done after locks are dropped */ @@ -573,34 +556,30 @@ try_again:  		head->wb_bytes = total_bytes;  	} -	/* -	 * prepare head request to be added to new pgio descriptor -	 */ -	nfs_page_group_clear_bits(head); - -	/* -	 * some part of the group was still on the inode list - otherwise -	 * the group wouldn't be involved in async write. -	 * grab a reference for the head request, iff it needs one. -	 */ -	if (!test_and_set_bit(PG_INODE_REF, &head->wb_flags)) +	/* Postpone destruction of this request */ +	if (test_and_clear_bit(PG_REMOVE, &head->wb_flags)) { +		set_bit(PG_INODE_REF, &head->wb_flags);  		kref_get(&head->wb_kref); +		atomic_long_inc(&NFS_I(inode)->nrequests); +	}  	nfs_page_group_unlock(head); -	/* drop lock to clean uprequests on destroy list */ -	spin_unlock(&inode->i_lock); +	nfs_destroy_unlinked_subrequests(destroy_list, head, inode); -	nfs_destroy_unlinked_subrequests(destroy_list, head); +	/* Did we lose a race with nfs_inode_remove_request()? */ +	if (!(PagePrivate(page) || PageSwapCache(page))) { +		nfs_unlock_and_release_request(head); +		return NULL; +	} -	/* still holds ref on head from nfs_page_find_head_request_locked +	/* still holds ref on head from nfs_page_find_head_request  	 * and still has lock on head from lock loop */  	return head;  }  static void nfs_write_error_remove_page(struct nfs_page *req)  { -	nfs_unlock_request(req);  	nfs_end_page_writeback(req);  	generic_error_remove_page(page_file_mapping(req->wb_page),  				  req->wb_page); @@ -624,12 +603,12 @@ nfs_error_is_fatal_on_server(int err)   * May return an error if the user signalled nfs_wait_on_request().   */  static int nfs_page_async_flush(struct nfs_pageio_descriptor *pgio, -				struct page *page, bool nonblock) +				struct page *page)  {  	struct nfs_page *req;  	int ret = 0; -	req = nfs_lock_and_join_requests(page, nonblock); +	req = nfs_lock_and_join_requests(page);  	if (!req)  		goto out;  	ret = PTR_ERR(req); @@ -672,7 +651,7 @@ static int nfs_do_writepage(struct page *page, struct writeback_control *wbc,  	int ret;  	nfs_pageio_cond_complete(pgio, page_index(page)); -	ret = nfs_page_async_flush(pgio, page, wbc->sync_mode == WB_SYNC_NONE); +	ret = nfs_page_async_flush(pgio, page);  	if (ret == -EAGAIN) {  		redirty_page_for_writepage(wbc, page);  		ret = 0; @@ -759,6 +738,7 @@ out_err:   */  static void nfs_inode_add_request(struct inode *inode, struct nfs_page *req)  { +	struct address_space *mapping = page_file_mapping(req->wb_page);  	struct nfs_inode *nfsi = NFS_I(inode);  	WARN_ON_ONCE(req->wb_this_page != req); @@ -766,27 +746,30 @@ static void nfs_inode_add_request(struct inode *inode, struct nfs_page *req)  	/* Lock the request! */  	nfs_lock_request(req); -	spin_lock(&inode->i_lock); -	if (!nfsi->nrequests && -	    NFS_PROTO(inode)->have_delegation(inode, FMODE_WRITE)) -		inode->i_version++;  	/*  	 * Swap-space should not get truncated. Hence no need to plug the race  	 * with invalidate/truncate.  	 */ +	spin_lock(&mapping->private_lock); +	if (!nfs_have_writebacks(inode) && +	    NFS_PROTO(inode)->have_delegation(inode, FMODE_WRITE)) { +		spin_lock(&inode->i_lock); +		inode->i_version++; +		spin_unlock(&inode->i_lock); +	}  	if (likely(!PageSwapCache(req->wb_page))) {  		set_bit(PG_MAPPED, &req->wb_flags);  		SetPagePrivate(req->wb_page);  		set_page_private(req->wb_page, (unsigned long)req);  	} -	nfsi->nrequests++; +	spin_unlock(&mapping->private_lock); +	atomic_long_inc(&nfsi->nrequests);  	/* this a head request for a page group - mark it as having an  	 * extra reference so sub groups can follow suit.  	 * This flag also informs pgio layer when to bump nrequests when  	 * adding subrequests. */  	WARN_ON(test_and_set_bit(PG_INODE_REF, &req->wb_flags));  	kref_get(&req->wb_kref); -	spin_unlock(&inode->i_lock);  }  /* @@ -794,25 +777,22 @@ static void nfs_inode_add_request(struct inode *inode, struct nfs_page *req)   */  static void nfs_inode_remove_request(struct nfs_page *req)  { -	struct inode *inode = d_inode(req->wb_context->dentry); +	struct address_space *mapping = page_file_mapping(req->wb_page); +	struct inode *inode = mapping->host;  	struct nfs_inode *nfsi = NFS_I(inode);  	struct nfs_page *head; +	atomic_long_dec(&nfsi->nrequests);  	if (nfs_page_group_sync_on_bit(req, PG_REMOVE)) {  		head = req->wb_head; -		spin_lock(&inode->i_lock); +		spin_lock(&mapping->private_lock);  		if (likely(head->wb_page && !PageSwapCache(head->wb_page))) {  			set_page_private(head->wb_page, 0);  			ClearPagePrivate(head->wb_page);  			clear_bit(PG_MAPPED, &head->wb_flags);  		} -		nfsi->nrequests--; -		spin_unlock(&inode->i_lock); -	} else { -		spin_lock(&inode->i_lock); -		nfsi->nrequests--; -		spin_unlock(&inode->i_lock); +		spin_unlock(&mapping->private_lock);  	}  	if (test_and_clear_bit(PG_INODE_REF, &req->wb_flags)) @@ -868,7 +848,8 @@ nfs_page_search_commits_for_head_request_locked(struct nfs_inode *nfsi,   * number of outstanding requests requiring a commit as well as   * the MM page stats.   * - * The caller must hold cinfo->inode->i_lock, and the nfs_page lock. + * The caller must hold NFS_I(cinfo->inode)->commit_mutex, and the + * nfs_page lock.   */  void  nfs_request_add_commit_list_locked(struct nfs_page *req, struct list_head *dst, @@ -876,7 +857,7 @@ nfs_request_add_commit_list_locked(struct nfs_page *req, struct list_head *dst,  {  	set_bit(PG_CLEAN, &req->wb_flags);  	nfs_list_add_request(req, dst); -	cinfo->mds->ncommit++; +	atomic_long_inc(&cinfo->mds->ncommit);  }  EXPORT_SYMBOL_GPL(nfs_request_add_commit_list_locked); @@ -896,9 +877,9 @@ EXPORT_SYMBOL_GPL(nfs_request_add_commit_list_locked);  void  nfs_request_add_commit_list(struct nfs_page *req, struct nfs_commit_info *cinfo)  { -	spin_lock(&cinfo->inode->i_lock); +	mutex_lock(&NFS_I(cinfo->inode)->commit_mutex);  	nfs_request_add_commit_list_locked(req, &cinfo->mds->list, cinfo); -	spin_unlock(&cinfo->inode->i_lock); +	mutex_unlock(&NFS_I(cinfo->inode)->commit_mutex);  	if (req->wb_page)  		nfs_mark_page_unstable(req->wb_page, cinfo);  } @@ -922,7 +903,7 @@ nfs_request_remove_commit_list(struct nfs_page *req,  	if (!test_and_clear_bit(PG_CLEAN, &(req)->wb_flags))  		return;  	nfs_list_remove_request(req); -	cinfo->mds->ncommit--; +	atomic_long_dec(&cinfo->mds->ncommit);  }  EXPORT_SYMBOL_GPL(nfs_request_remove_commit_list); @@ -967,7 +948,7 @@ nfs_clear_page_commit(struct page *page)  		    WB_RECLAIMABLE);  } -/* Called holding inode (/cinfo) lock */ +/* Called holding the request lock on @req */  static void  nfs_clear_request_commit(struct nfs_page *req)  { @@ -976,9 +957,11 @@ nfs_clear_request_commit(struct nfs_page *req)  		struct nfs_commit_info cinfo;  		nfs_init_cinfo_from_inode(&cinfo, inode); +		mutex_lock(&NFS_I(inode)->commit_mutex);  		if (!pnfs_clear_request_commit(req, &cinfo)) {  			nfs_request_remove_commit_list(req, &cinfo);  		} +		mutex_unlock(&NFS_I(inode)->commit_mutex);  		nfs_clear_page_commit(req->wb_page);  	}  } @@ -1023,7 +1006,6 @@ static void nfs_write_completion(struct nfs_pgio_header *hdr)  remove_req:  		nfs_inode_remove_request(req);  next: -		nfs_unlock_request(req);  		nfs_end_page_writeback(req);  		nfs_release_request(req);  	} @@ -1035,10 +1017,10 @@ out:  unsigned long  nfs_reqs_to_commit(struct nfs_commit_info *cinfo)  { -	return cinfo->mds->ncommit; +	return atomic_long_read(&cinfo->mds->ncommit);  } -/* cinfo->inode->i_lock held by caller */ +/* NFS_I(cinfo->inode)->commit_mutex held by caller */  int  nfs_scan_commit_list(struct list_head *src, struct list_head *dst,  		     struct nfs_commit_info *cinfo, int max) @@ -1046,20 +1028,37 @@ nfs_scan_commit_list(struct list_head *src, struct list_head *dst,  	struct nfs_page *req, *tmp;  	int ret = 0; +restart:  	list_for_each_entry_safe(req, tmp, src, wb_list) { -		if (!nfs_lock_request(req)) -			continue;  		kref_get(&req->wb_kref); -		if (cond_resched_lock(&cinfo->inode->i_lock)) -			list_safe_reset_next(req, tmp, wb_list); +		if (!nfs_lock_request(req)) { +			int status; + +			/* Prevent deadlock with nfs_lock_and_join_requests */ +			if (!list_empty(dst)) { +				nfs_release_request(req); +				continue; +			} +			/* Ensure we make progress to prevent livelock */ +			mutex_unlock(&NFS_I(cinfo->inode)->commit_mutex); +			status = nfs_wait_on_request(req); +			nfs_release_request(req); +			mutex_lock(&NFS_I(cinfo->inode)->commit_mutex); +			if (status < 0) +				break; +			goto restart; +		}  		nfs_request_remove_commit_list(req, cinfo); +		clear_bit(PG_COMMIT_TO_DS, &req->wb_flags);  		nfs_list_add_request(req, dst);  		ret++;  		if ((ret == max) && !cinfo->dreq)  			break; +		cond_resched();  	}  	return ret;  } +EXPORT_SYMBOL_GPL(nfs_scan_commit_list);  /*   * nfs_scan_commit - Scan an inode for commit requests @@ -1076,15 +1075,17 @@ nfs_scan_commit(struct inode *inode, struct list_head *dst,  {  	int ret = 0; -	spin_lock(&cinfo->inode->i_lock); -	if (cinfo->mds->ncommit > 0) { +	if (!atomic_long_read(&cinfo->mds->ncommit)) +		return 0; +	mutex_lock(&NFS_I(cinfo->inode)->commit_mutex); +	if (atomic_long_read(&cinfo->mds->ncommit) > 0) {  		const int max = INT_MAX;  		ret = nfs_scan_commit_list(&cinfo->mds->list, dst,  					   cinfo, max);  		ret += pnfs_scan_commit_lists(inode, cinfo, max - ret);  	} -	spin_unlock(&cinfo->inode->i_lock); +	mutex_unlock(&NFS_I(cinfo->inode)->commit_mutex);  	return ret;  } @@ -1105,43 +1106,21 @@ static struct nfs_page *nfs_try_to_update_request(struct inode *inode,  	unsigned int end;  	int error; -	if (!PagePrivate(page)) -		return NULL; -  	end = offset + bytes; -	spin_lock(&inode->i_lock); - -	for (;;) { -		req = nfs_page_find_head_request_locked(NFS_I(inode), page); -		if (req == NULL) -			goto out_unlock; - -		/* should be handled by nfs_flush_incompatible */ -		WARN_ON_ONCE(req->wb_head != req); -		WARN_ON_ONCE(req->wb_this_page != req); - -		rqend = req->wb_offset + req->wb_bytes; -		/* -		 * Tell the caller to flush out the request if -		 * the offsets are non-contiguous. -		 * Note: nfs_flush_incompatible() will already -		 * have flushed out requests having wrong owners. -		 */ -		if (offset > rqend -		    || end < req->wb_offset) -			goto out_flushme; -		if (nfs_lock_request(req)) -			break; +	req = nfs_lock_and_join_requests(page); +	if (IS_ERR_OR_NULL(req)) +		return req; -		/* The request is locked, so wait and then retry */ -		spin_unlock(&inode->i_lock); -		error = nfs_wait_on_request(req); -		nfs_release_request(req); -		if (error != 0) -			goto out_err; -		spin_lock(&inode->i_lock); -	} +	rqend = req->wb_offset + req->wb_bytes; +	/* +	 * Tell the caller to flush out the request if +	 * the offsets are non-contiguous. +	 * Note: nfs_flush_incompatible() will already +	 * have flushed out requests having wrong owners. +	 */ +	if (offset > rqend || end < req->wb_offset) +		goto out_flushme;  	/* Okay, the request matches. Update the region */  	if (offset < req->wb_offset) { @@ -1152,17 +1131,17 @@ static struct nfs_page *nfs_try_to_update_request(struct inode *inode,  		req->wb_bytes = end - req->wb_offset;  	else  		req->wb_bytes = rqend - req->wb_offset; -out_unlock: -	if (req) -		nfs_clear_request_commit(req); -	spin_unlock(&inode->i_lock);  	return req;  out_flushme: -	spin_unlock(&inode->i_lock); -	nfs_release_request(req); +	/* +	 * Note: we mark the request dirty here because +	 * nfs_lock_and_join_requests() cannot preserve +	 * commit flags, so we have to replay the write. +	 */ +	nfs_mark_request_dirty(req); +	nfs_unlock_and_release_request(req);  	error = nfs_wb_page(inode, page); -out_err: -	return ERR_PTR(error); +	return (error < 0) ? ERR_PTR(error) : NULL;  }  /* @@ -1227,8 +1206,6 @@ int nfs_flush_incompatible(struct file *file, struct page *page)  		l_ctx = req->wb_lock_context;  		do_flush = req->wb_page != page ||  			!nfs_match_open_context(req->wb_context, ctx); -		/* for now, flush if more than 1 request in page_group */ -		do_flush |= req->wb_this_page != req;  		if (l_ctx && flctx &&  		    !(list_empty_careful(&flctx->flc_posix) &&  		      list_empty_careful(&flctx->flc_flock))) { @@ -1399,6 +1376,8 @@ static void nfs_initiate_write(struct nfs_pgio_header *hdr,  	task_setup_data->priority = priority;  	rpc_ops->write_setup(hdr, msg); +	trace_nfs_initiate_write(hdr->inode, hdr->io_start, hdr->good_bytes, +				 hdr->args.stable);  	nfs4_state_protect_write(NFS_SERVER(hdr->inode)->nfs_client,  				 &task_setup_data->rpc_client, msg, hdr); @@ -1412,7 +1391,6 @@ static void nfs_redirty_request(struct nfs_page *req)  {  	nfs_mark_request_dirty(req);  	set_bit(NFS_CONTEXT_RESEND_WRITES, &req->wb_context->flags); -	nfs_unlock_request(req);  	nfs_end_page_writeback(req);  	nfs_release_request(req);  } @@ -1452,7 +1430,7 @@ void nfs_pageio_init_write(struct nfs_pageio_descriptor *pgio,  		pg_ops = server->pnfs_curr_ld->pg_write_ops;  #endif  	nfs_pageio_init(pgio, inode, pg_ops, compl_ops, &nfs_rw_write_ops, -			server->wsize, ioflags, GFP_NOIO); +			server->wsize, ioflags);  }  EXPORT_SYMBOL_GPL(nfs_pageio_init_write); @@ -1557,7 +1535,10 @@ static int nfs_writeback_done(struct rpc_task *task,  	status = NFS_PROTO(inode)->write_done(task, hdr);  	if (status != 0)  		return status; +  	nfs_add_stats(inode, NFSIOS_SERVERWRITTENBYTES, hdr->res.count); +	trace_nfs_writeback_done(inode, task->tk_status, +				 hdr->args.offset, hdr->res.verf);  	if (hdr->res.verf->committed < hdr->args.stable &&  	    task->tk_status >= 0) { @@ -1686,6 +1667,7 @@ int nfs_initiate_commit(struct rpc_clnt *clnt, struct nfs_commit_data *data,  	};  	/* Set up the initial task struct.  */  	nfs_ops->commit_setup(data, &msg); +	trace_nfs_initiate_commit(data);  	dprintk("NFS: initiated commit call\n"); @@ -1810,6 +1792,7 @@ static void nfs_commit_done(struct rpc_task *task, void *calldata)  	/* Call the NFS version-specific code */  	NFS_PROTO(data->inode)->commit_done(task, data); +	trace_nfs_commit_done(data);  }  static void nfs_commit_release_pages(struct nfs_commit_data *data) @@ -1934,7 +1917,7 @@ int nfs_write_inode(struct inode *inode, struct writeback_control *wbc)  	int ret = 0;  	/* no commits means nothing needs to be done */ -	if (!nfsi->commit_info.ncommit) +	if (!atomic_long_read(&nfsi->commit_info.ncommit))  		return ret;  	if (wbc->sync_mode == WB_SYNC_NONE) { @@ -2015,7 +1998,7 @@ int nfs_wb_page_cancel(struct inode *inode, struct page *page)  	/* blocking call to cancel all requests and join to a single (head)  	 * request */ -	req = nfs_lock_and_join_requests(page, false); +	req = nfs_lock_and_join_requests(page);  	if (IS_ERR(req)) {  		ret = PTR_ERR(req); |