From 89a09141df6ac1c3821fbe44ca8384eb37692965 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 16 Mar 2007 13:38:26 -0800 Subject: [PATCH] nfs: fix congestion control The current NFS client congestion logic is severly broken, it marks the backing device congested during each nfs_writepages() call but doesn't mirror this in nfs_writepage() which makes for deadlocks. Also it implements its own waitqueue. Replace this by a more regular congestion implementation that puts a cap on the number of active writeback pages and uses the bdi congestion waitqueue. Also always use an interruptible wait since it makes sense to be able to SIGKILL the process even for mounts without 'intr'. Signed-off-by: Peter Zijlstra Acked-by: Trond Myklebust Cc: Christoph Lameter Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/nfs/write.c | 116 ++++++++++++++++++++++++++++++++++++--------------------- 1 file changed, 74 insertions(+), 42 deletions(-) (limited to 'fs/nfs/write.c') diff --git a/fs/nfs/write.c b/fs/nfs/write.c index febdade91670..2867e6b7096f 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include @@ -38,7 +39,6 @@ static struct nfs_page * nfs_update_request(struct nfs_open_context*, struct page *, unsigned int, unsigned int); static void nfs_mark_request_dirty(struct nfs_page *req); -static int nfs_wait_on_write_congestion(struct address_space *, int); static long nfs_flush_mapping(struct address_space *mapping, struct writeback_control *wbc, int how); static const struct rpc_call_ops nfs_write_partial_ops; static const struct rpc_call_ops nfs_write_full_ops; @@ -48,8 +48,6 @@ static struct kmem_cache *nfs_wdata_cachep; static mempool_t *nfs_wdata_mempool; static mempool_t *nfs_commit_mempool; -static DECLARE_WAIT_QUEUE_HEAD(nfs_write_congestion); - struct nfs_write_data *nfs_commit_alloc(void) { struct nfs_write_data *p = mempool_alloc(nfs_commit_mempool, GFP_NOFS); @@ -210,6 +208,40 @@ static int wb_priority(struct writeback_control *wbc) return 0; } +/* + * NFS congestion control + */ + +int nfs_congestion_kb; + +#define NFS_CONGESTION_ON_THRESH (nfs_congestion_kb >> (PAGE_SHIFT-10)) +#define NFS_CONGESTION_OFF_THRESH \ + (NFS_CONGESTION_ON_THRESH - (NFS_CONGESTION_ON_THRESH >> 2)) + +static void nfs_set_page_writeback(struct page *page) +{ + if (!test_set_page_writeback(page)) { + struct inode *inode = page->mapping->host; + struct nfs_server *nfss = NFS_SERVER(inode); + + if (atomic_inc_return(&nfss->writeback) > + NFS_CONGESTION_ON_THRESH) + set_bdi_congested(&nfss->backing_dev_info, WRITE); + } +} + +static void nfs_end_page_writeback(struct page *page) +{ + struct inode *inode = page->mapping->host; + struct nfs_server *nfss = NFS_SERVER(inode); + + end_page_writeback(page); + if (atomic_dec_return(&nfss->writeback) < NFS_CONGESTION_OFF_THRESH) { + clear_bdi_congested(&nfss->backing_dev_info, WRITE); + congestion_end(WRITE); + } +} + /* * Find an associated nfs write request, and prepare to flush it out * Returns 1 if there was no write request, or if the request was @@ -247,7 +279,7 @@ static int nfs_page_mark_flush(struct page *page) spin_unlock(req_lock); if (test_and_set_bit(PG_FLUSHING, &req->wb_flags) == 0) { nfs_mark_request_dirty(req); - set_page_writeback(page); + nfs_set_page_writeback(page); } ret = test_bit(PG_NEED_FLUSH, &req->wb_flags); nfs_unlock_request(req); @@ -302,13 +334,8 @@ int nfs_writepage(struct page *page, struct writeback_control *wbc) return err; } -/* - * Note: causes nfs_update_request() to block on the assumption - * that the writeback is generated due to memory pressure. - */ int nfs_writepages(struct address_space *mapping, struct writeback_control *wbc) { - struct backing_dev_info *bdi = mapping->backing_dev_info; struct inode *inode = mapping->host; int err; @@ -317,20 +344,12 @@ int nfs_writepages(struct address_space *mapping, struct writeback_control *wbc) err = generic_writepages(mapping, wbc); if (err) return err; - while (test_and_set_bit(BDI_write_congested, &bdi->state) != 0) { - if (wbc->nonblocking) - return 0; - nfs_wait_on_write_congestion(mapping, 0); - } err = nfs_flush_mapping(mapping, wbc, wb_priority(wbc)); if (err < 0) goto out; nfs_add_stats(inode, NFSIOS_WRITEPAGES, err); err = 0; out: - clear_bit(BDI_write_congested, &bdi->state); - wake_up_all(&nfs_write_congestion); - congestion_end(WRITE); return err; } @@ -360,7 +379,7 @@ static int nfs_inode_add_request(struct inode *inode, struct nfs_page *req) } /* - * Insert a write request into an inode + * Remove a write request from an inode */ static void nfs_inode_remove_request(struct nfs_page *req) { @@ -531,10 +550,10 @@ static inline int nfs_scan_commit(struct inode *inode, struct list_head *dst, un } #endif -static int nfs_wait_on_write_congestion(struct address_space *mapping, int intr) +static int nfs_wait_on_write_congestion(struct address_space *mapping) { + struct inode *inode = mapping->host; struct backing_dev_info *bdi = mapping->backing_dev_info; - DEFINE_WAIT(wait); int ret = 0; might_sleep(); @@ -542,31 +561,23 @@ static int nfs_wait_on_write_congestion(struct address_space *mapping, int intr) if (!bdi_write_congested(bdi)) return 0; - nfs_inc_stats(mapping->host, NFSIOS_CONGESTIONWAIT); + nfs_inc_stats(inode, NFSIOS_CONGESTIONWAIT); - if (intr) { - struct rpc_clnt *clnt = NFS_CLIENT(mapping->host); + do { + struct rpc_clnt *clnt = NFS_CLIENT(inode); sigset_t oldset; rpc_clnt_sigmask(clnt, &oldset); - prepare_to_wait(&nfs_write_congestion, &wait, TASK_INTERRUPTIBLE); - if (bdi_write_congested(bdi)) { - if (signalled()) - ret = -ERESTARTSYS; - else - schedule(); - } + ret = congestion_wait_interruptible(WRITE, HZ/10); rpc_clnt_sigunmask(clnt, &oldset); - } else { - prepare_to_wait(&nfs_write_congestion, &wait, TASK_UNINTERRUPTIBLE); - if (bdi_write_congested(bdi)) - schedule(); - } - finish_wait(&nfs_write_congestion, &wait); + if (ret == -ERESTARTSYS) + break; + ret = 0; + } while (bdi_write_congested(bdi)); + return ret; } - /* * Try to update any existing write request, or create one if there is none. * In order to match, the request's credentials must match those of @@ -577,14 +588,15 @@ static int nfs_wait_on_write_congestion(struct address_space *mapping, int intr) static struct nfs_page * nfs_update_request(struct nfs_open_context* ctx, struct page *page, unsigned int offset, unsigned int bytes) { - struct inode *inode = page->mapping->host; + struct address_space *mapping = page->mapping; + struct inode *inode = mapping->host; struct nfs_inode *nfsi = NFS_I(inode); struct nfs_page *req, *new = NULL; unsigned long rqend, end; end = offset + bytes; - if (nfs_wait_on_write_congestion(page->mapping, NFS_SERVER(inode)->flags & NFS_MOUNT_INTR)) + if (nfs_wait_on_write_congestion(mapping)) return ERR_PTR(-ERESTARTSYS); for (;;) { /* Loop over all inode entries and see if we find @@ -727,7 +739,7 @@ int nfs_updatepage(struct file *file, struct page *page, static void nfs_writepage_release(struct nfs_page *req) { - end_page_writeback(req->wb_page); + nfs_end_page_writeback(req->wb_page); #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) if (!PageError(req->wb_page)) { @@ -1042,12 +1054,12 @@ static void nfs_writeback_done_full(struct rpc_task *task, void *calldata) if (task->tk_status < 0) { nfs_set_pageerror(page); req->wb_context->error = task->tk_status; - end_page_writeback(page); + nfs_end_page_writeback(page); nfs_inode_remove_request(req); dprintk(", error = %d\n", task->tk_status); goto next; } - end_page_writeback(page); + nfs_end_page_writeback(page); #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) if (data->args.stable != NFS_UNSTABLE || data->verf.committed == NFS_FILE_SYNC) { @@ -1514,6 +1526,26 @@ int __init nfs_init_writepagecache(void) if (nfs_commit_mempool == NULL) return -ENOMEM; + /* + * NFS congestion size, scale with available memory. + * + * 64MB: 8192k + * 128MB: 11585k + * 256MB: 16384k + * 512MB: 23170k + * 1GB: 32768k + * 2GB: 46340k + * 4GB: 65536k + * 8GB: 92681k + * 16GB: 131072k + * + * This allows larger machines to have larger/more transfers. + * Limit the default to 256M + */ + nfs_congestion_kb = (16*int_sqrt(totalram_pages)) << (PAGE_SHIFT-10); + if (nfs_congestion_kb > 256*1024) + nfs_congestion_kb = 256*1024; + return 0; } -- cgit From 5a6d41b32a17ca902ef50fdfa170d7f23264bad5 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sat, 14 Apr 2007 19:10:12 -0400 Subject: NFS: Ensure PG_writeback is cleared when writeback fails If the writebacks are cancelled via nfs_cancel_dirty_list, or due to the memory allocation failing in nfs_flush_one/nfs_flush_multi, then we must ensure that the PG_writeback flag is cleared. Also ensure that we actually own the PG_writeback flag whenever we schedule a new writeback by making nfs_set_page_writeback() return the value of test_set_page_writeback(). The PG_writeback page flag ends up replacing the functionality of the PG_FLUSHING nfs_page flag, so we rip that out too. Signed-off-by: Trond Myklebust Cc: Peter Zijlstra Signed-off-by: Linus Torvalds --- fs/nfs/write.c | 22 +++++++++++++++------- include/linux/nfs_page.h | 1 - 2 files changed, 15 insertions(+), 8 deletions(-) (limited to 'fs/nfs/write.c') diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 2867e6b7096f..e5d7cac569aa 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -218,9 +218,11 @@ int nfs_congestion_kb; #define NFS_CONGESTION_OFF_THRESH \ (NFS_CONGESTION_ON_THRESH - (NFS_CONGESTION_ON_THRESH >> 2)) -static void nfs_set_page_writeback(struct page *page) +static int nfs_set_page_writeback(struct page *page) { - if (!test_set_page_writeback(page)) { + int ret = test_set_page_writeback(page); + + if (!ret) { struct inode *inode = page->mapping->host; struct nfs_server *nfss = NFS_SERVER(inode); @@ -228,6 +230,7 @@ static void nfs_set_page_writeback(struct page *page) NFS_CONGESTION_ON_THRESH) set_bdi_congested(&nfss->backing_dev_info, WRITE); } + return ret; } static void nfs_end_page_writeback(struct page *page) @@ -277,10 +280,8 @@ static int nfs_page_mark_flush(struct page *page) spin_lock(req_lock); } spin_unlock(req_lock); - if (test_and_set_bit(PG_FLUSHING, &req->wb_flags) == 0) { + if (nfs_set_page_writeback(page) == 0) nfs_mark_request_dirty(req); - nfs_set_page_writeback(page); - } ret = test_bit(PG_NEED_FLUSH, &req->wb_flags); nfs_unlock_request(req); return ret; @@ -424,7 +425,6 @@ nfs_mark_request_dirty(struct nfs_page *req) static void nfs_redirty_request(struct nfs_page *req) { - clear_bit(PG_FLUSHING, &req->wb_flags); __set_page_dirty_nobuffers(req->wb_page); } @@ -434,7 +434,11 @@ nfs_redirty_request(struct nfs_page *req) static inline int nfs_dirty_request(struct nfs_page *req) { - return test_bit(PG_FLUSHING, &req->wb_flags) == 0; + struct page *page = req->wb_page; + + if (page == NULL) + return 0; + return !PageWriteback(req->wb_page); } #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) @@ -500,6 +504,7 @@ static void nfs_cancel_dirty_list(struct list_head *head) while(!list_empty(head)) { req = nfs_list_entry(head->next); nfs_list_remove_request(req); + nfs_end_page_writeback(req->wb_page); nfs_inode_remove_request(req); nfs_clear_page_writeback(req); } @@ -890,6 +895,7 @@ out_bad: list_del(&data->pages); nfs_writedata_release(data); } + nfs_end_page_writeback(req->wb_page); nfs_redirty_request(req); nfs_clear_page_writeback(req); return -ENOMEM; @@ -935,6 +941,7 @@ static int nfs_flush_one(struct inode *inode, struct list_head *head, int how) while (!list_empty(head)) { struct nfs_page *req = nfs_list_entry(head->next); nfs_list_remove_request(req); + nfs_end_page_writeback(req->wb_page); nfs_redirty_request(req); nfs_clear_page_writeback(req); } @@ -970,6 +977,7 @@ out_err: while (!list_empty(head)) { req = nfs_list_entry(head->next); nfs_list_remove_request(req); + nfs_end_page_writeback(req->wb_page); nfs_redirty_request(req); nfs_clear_page_writeback(req); } diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h index 2e555d49c9b7..d111be639140 100644 --- a/include/linux/nfs_page.h +++ b/include/linux/nfs_page.h @@ -31,7 +31,6 @@ #define PG_NEED_COMMIT 1 #define PG_NEED_RESCHED 2 #define PG_NEED_FLUSH 3 -#define PG_FLUSHING 4 struct nfs_inode; struct nfs_page { -- cgit From eb4cac10d9f7b006da842e2d37414d13e1333781 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 15 Apr 2007 16:21:49 -0400 Subject: NFS: Fix a list corruption problem We must remove the request from whatever list it is currently on before we can add it to the dirty list. Signed-off-by: Trond Myklebust Signed-off-by: Linus Torvalds --- fs/nfs/write.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'fs/nfs/write.c') diff --git a/fs/nfs/write.c b/fs/nfs/write.c index e5d7cac569aa..ad2e91b4904f 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -280,8 +280,10 @@ static int nfs_page_mark_flush(struct page *page) spin_lock(req_lock); } spin_unlock(req_lock); - if (nfs_set_page_writeback(page) == 0) + if (nfs_set_page_writeback(page) == 0) { + nfs_list_remove_request(req); nfs_mark_request_dirty(req); + } ret = test_bit(PG_NEED_FLUSH, &req->wb_flags); nfs_unlock_request(req); return ret; -- cgit From 8e821cad12e80cd1a8a3fbadf91f62f17f32549e Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 20 Apr 2007 16:12:34 -0400 Subject: NFS: clean up the unstable write code Get rid of the inlined #ifdefs. Signed-off-by: Trond Myklebust Signed-off-by: Linus Torvalds --- fs/nfs/write.c | 117 ++++++++++++++++++++++++++++------------------- include/linux/nfs_page.h | 30 ------------ 2 files changed, 71 insertions(+), 76 deletions(-) (limited to 'fs/nfs/write.c') diff --git a/fs/nfs/write.c b/fs/nfs/write.c index ad2e91b4904f..3ed4feb8c856 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -460,6 +460,43 @@ nfs_mark_request_commit(struct nfs_page *req) inc_zone_page_state(req->wb_page, NR_UNSTABLE_NFS); __mark_inode_dirty(inode, I_DIRTY_DATASYNC); } + +static inline +int nfs_write_need_commit(struct nfs_write_data *data) +{ + return data->verf.committed != NFS_FILE_SYNC; +} + +static inline +int nfs_reschedule_unstable_write(struct nfs_page *req) +{ + if (test_and_clear_bit(PG_NEED_COMMIT, &req->wb_flags)) { + nfs_mark_request_commit(req); + return 1; + } + if (test_and_clear_bit(PG_NEED_RESCHED, &req->wb_flags)) { + nfs_redirty_request(req); + return 1; + } + return 0; +} +#else +static inline void +nfs_mark_request_commit(struct nfs_page *req) +{ +} + +static inline +int nfs_write_need_commit(struct nfs_write_data *data) +{ + return 0; +} + +static inline +int nfs_reschedule_unstable_write(struct nfs_page *req) +{ + return 0; +} #endif /* @@ -746,26 +783,12 @@ int nfs_updatepage(struct file *file, struct page *page, static void nfs_writepage_release(struct nfs_page *req) { - nfs_end_page_writeback(req->wb_page); -#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) - if (!PageError(req->wb_page)) { - if (NFS_NEED_RESCHED(req)) { - nfs_redirty_request(req); - goto out; - } else if (NFS_NEED_COMMIT(req)) { - nfs_mark_request_commit(req); - goto out; - } - } - nfs_inode_remove_request(req); - -out: - nfs_clear_commit(req); - nfs_clear_reschedule(req); -#else - nfs_inode_remove_request(req); -#endif + if (PageError(req->wb_page) || !nfs_reschedule_unstable_write(req)) { + nfs_end_page_writeback(req->wb_page); + nfs_inode_remove_request(req); + } else + nfs_end_page_writeback(req->wb_page); nfs_clear_page_writeback(req); } @@ -1008,22 +1031,28 @@ static void nfs_writeback_done_partial(struct rpc_task *task, void *calldata) nfs_set_pageerror(page); req->wb_context->error = task->tk_status; dprintk(", error = %d\n", task->tk_status); - } else { -#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) - if (data->verf.committed < NFS_FILE_SYNC) { - if (!NFS_NEED_COMMIT(req)) { - nfs_defer_commit(req); - memcpy(&req->wb_verf, &data->verf, sizeof(req->wb_verf)); - dprintk(" defer commit\n"); - } else if (memcmp(&req->wb_verf, &data->verf, sizeof(req->wb_verf))) { - nfs_defer_reschedule(req); - dprintk(" server reboot detected\n"); - } - } else -#endif - dprintk(" OK\n"); + goto out; } + if (nfs_write_need_commit(data)) { + spinlock_t *req_lock = &NFS_I(page->mapping->host)->req_lock; + + spin_lock(req_lock); + if (test_bit(PG_NEED_RESCHED, &req->wb_flags)) { + /* Do nothing we need to resend the writes */ + } else if (!test_and_set_bit(PG_NEED_COMMIT, &req->wb_flags)) { + memcpy(&req->wb_verf, &data->verf, sizeof(req->wb_verf)); + dprintk(" defer commit\n"); + } else if (memcmp(&req->wb_verf, &data->verf, sizeof(req->wb_verf))) { + set_bit(PG_NEED_RESCHED, &req->wb_flags); + clear_bit(PG_NEED_COMMIT, &req->wb_flags); + dprintk(" server reboot detected\n"); + } + spin_unlock(req_lock); + } else + dprintk(" OK\n"); + +out: if (atomic_dec_and_test(&req->wb_complete)) nfs_writepage_release(req); } @@ -1064,25 +1093,21 @@ static void nfs_writeback_done_full(struct rpc_task *task, void *calldata) if (task->tk_status < 0) { nfs_set_pageerror(page); req->wb_context->error = task->tk_status; - nfs_end_page_writeback(page); - nfs_inode_remove_request(req); dprintk(", error = %d\n", task->tk_status); - goto next; + goto remove_request; } - nfs_end_page_writeback(page); -#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) - if (data->args.stable != NFS_UNSTABLE || data->verf.committed == NFS_FILE_SYNC) { - nfs_inode_remove_request(req); - dprintk(" OK\n"); + if (nfs_write_need_commit(data)) { + memcpy(&req->wb_verf, &data->verf, sizeof(req->wb_verf)); + nfs_mark_request_commit(req); + nfs_end_page_writeback(page); + dprintk(" marked for commit\n"); goto next; } - memcpy(&req->wb_verf, &data->verf, sizeof(req->wb_verf)); - nfs_mark_request_commit(req); - dprintk(" marked for commit\n"); -#else + dprintk(" OK\n"); +remove_request: + nfs_end_page_writeback(page); nfs_inode_remove_request(req); -#endif next: nfs_clear_page_writeback(req); } diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h index d111be639140..16b0266b14fd 100644 --- a/include/linux/nfs_page.h +++ b/include/linux/nfs_page.h @@ -49,8 +49,6 @@ struct nfs_page { }; #define NFS_WBACK_BUSY(req) (test_bit(PG_BUSY,&(req)->wb_flags)) -#define NFS_NEED_COMMIT(req) (test_bit(PG_NEED_COMMIT,&(req)->wb_flags)) -#define NFS_NEED_RESCHED(req) (test_bit(PG_NEED_RESCHED,&(req)->wb_flags)) extern struct nfs_page *nfs_create_request(struct nfs_open_context *ctx, struct inode *inode, @@ -121,34 +119,6 @@ nfs_list_remove_request(struct nfs_page *req) req->wb_list_head = NULL; } -static inline int -nfs_defer_commit(struct nfs_page *req) -{ - return !test_and_set_bit(PG_NEED_COMMIT, &req->wb_flags); -} - -static inline void -nfs_clear_commit(struct nfs_page *req) -{ - smp_mb__before_clear_bit(); - clear_bit(PG_NEED_COMMIT, &req->wb_flags); - smp_mb__after_clear_bit(); -} - -static inline int -nfs_defer_reschedule(struct nfs_page *req) -{ - return !test_and_set_bit(PG_NEED_RESCHED, &req->wb_flags); -} - -static inline void -nfs_clear_reschedule(struct nfs_page *req) -{ - smp_mb__before_clear_bit(); - clear_bit(PG_NEED_RESCHED, &req->wb_flags); - smp_mb__after_clear_bit(); -} - static inline struct nfs_page * nfs_list_entry(struct list_head *head) { -- cgit From 6d677e3504cd173b2ff7fc393ee4241b3c0f92a6 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 20 Apr 2007 16:12:40 -0400 Subject: NFS: Don't clear PG_writeback until after we've processed unstable writes Ensure that we don't release the PG_writeback lock until after the page has either been redirtied, or queued on the nfs_inode 'commit' list. Signed-off-by: Trond Myklebust Signed-off-by: Linus Torvalds --- fs/nfs/write.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'fs/nfs/write.c') diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 3ed4feb8c856..8e9424651bc6 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -920,8 +920,8 @@ out_bad: list_del(&data->pages); nfs_writedata_release(data); } - nfs_end_page_writeback(req->wb_page); nfs_redirty_request(req); + nfs_end_page_writeback(req->wb_page); nfs_clear_page_writeback(req); return -ENOMEM; } @@ -966,8 +966,8 @@ static int nfs_flush_one(struct inode *inode, struct list_head *head, int how) while (!list_empty(head)) { struct nfs_page *req = nfs_list_entry(head->next); nfs_list_remove_request(req); - nfs_end_page_writeback(req->wb_page); nfs_redirty_request(req); + nfs_end_page_writeback(req->wb_page); nfs_clear_page_writeback(req); } return -ENOMEM; @@ -1002,8 +1002,8 @@ out_err: while (!list_empty(head)) { req = nfs_list_entry(head->next); nfs_list_remove_request(req); - nfs_end_page_writeback(req->wb_page); nfs_redirty_request(req); + nfs_end_page_writeback(req->wb_page); nfs_clear_page_writeback(req); } return error; -- cgit From 612c9384fd0486686699f7d49b774f0c7a79c511 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 20 Apr 2007 16:12:45 -0400 Subject: NFS: Fix the 'desynchronized value of nfs_i.ncommit' error Redirtying a request that is already marked for commit will screw up the accounting for NR_UNSTABLE_NFS as well as nfs_i.ncommit. Ensure that all requests on the commit queue are labelled with the PG_NEED_COMMIT flag, and avoid moving them onto the dirty list inside nfs_page_mark_flush(). Also inline nfs_mark_request_dirty() into nfs_page_mark_flush() for atomicity reasons. Avoid dropping the spinlock until we're done marking the request in the radix tree and have added it to the ->dirty list. Signed-off-by: Trond Myklebust Signed-off-by: Linus Torvalds --- fs/nfs/write.c | 47 ++++++++++++++++++++++------------------------- 1 file changed, 22 insertions(+), 25 deletions(-) (limited to 'fs/nfs/write.c') diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 8e9424651bc6..ce5b4a9f2d8b 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -38,7 +38,6 @@ static struct nfs_page * nfs_update_request(struct nfs_open_context*, struct page *, unsigned int, unsigned int); -static void nfs_mark_request_dirty(struct nfs_page *req); static long nfs_flush_mapping(struct address_space *mapping, struct writeback_control *wbc, int how); static const struct rpc_call_ops nfs_write_partial_ops; static const struct rpc_call_ops nfs_write_full_ops; @@ -255,7 +254,8 @@ static void nfs_end_page_writeback(struct page *page) static int nfs_page_mark_flush(struct page *page) { struct nfs_page *req; - spinlock_t *req_lock = &NFS_I(page->mapping->host)->req_lock; + struct nfs_inode *nfsi = NFS_I(page->mapping->host); + spinlock_t *req_lock = &nfsi->req_lock; int ret; spin_lock(req_lock); @@ -279,11 +279,23 @@ static int nfs_page_mark_flush(struct page *page) return ret; spin_lock(req_lock); } - spin_unlock(req_lock); + if (test_bit(PG_NEED_COMMIT, &req->wb_flags)) { + /* This request is marked for commit */ + spin_unlock(req_lock); + nfs_unlock_request(req); + return 1; + } if (nfs_set_page_writeback(page) == 0) { nfs_list_remove_request(req); - nfs_mark_request_dirty(req); - } + /* add the request to the inode's dirty list. */ + radix_tree_tag_set(&nfsi->nfs_page_tree, + req->wb_index, NFS_PAGE_TAG_DIRTY); + nfs_list_add_request(req, &nfsi->dirty); + nfsi->ndirty++; + spin_unlock(req_lock); + __mark_inode_dirty(page->mapping->host, I_DIRTY_PAGES); + } else + spin_unlock(req_lock); ret = test_bit(PG_NEED_FLUSH, &req->wb_flags); nfs_unlock_request(req); return ret; @@ -406,24 +418,6 @@ static void nfs_inode_remove_request(struct nfs_page *req) nfs_release_request(req); } -/* - * Add a request to the inode's dirty list. - */ -static void -nfs_mark_request_dirty(struct nfs_page *req) -{ - struct inode *inode = req->wb_context->dentry->d_inode; - struct nfs_inode *nfsi = NFS_I(inode); - - spin_lock(&nfsi->req_lock); - radix_tree_tag_set(&nfsi->nfs_page_tree, - req->wb_index, NFS_PAGE_TAG_DIRTY); - nfs_list_add_request(req, &nfsi->dirty); - nfsi->ndirty++; - spin_unlock(&nfsi->req_lock); - __mark_inode_dirty(inode, I_DIRTY_PAGES); -} - static void nfs_redirty_request(struct nfs_page *req) { @@ -438,7 +432,7 @@ nfs_dirty_request(struct nfs_page *req) { struct page *page = req->wb_page; - if (page == NULL) + if (page == NULL || test_bit(PG_NEED_COMMIT, &req->wb_flags)) return 0; return !PageWriteback(req->wb_page); } @@ -456,6 +450,7 @@ nfs_mark_request_commit(struct nfs_page *req) spin_lock(&nfsi->req_lock); nfs_list_add_request(req, &nfsi->commit); nfsi->ncommit++; + set_bit(PG_NEED_COMMIT, &(req)->wb_flags); spin_unlock(&nfsi->req_lock); inc_zone_page_state(req->wb_page, NR_UNSTABLE_NFS); __mark_inode_dirty(inode, I_DIRTY_DATASYNC); @@ -470,7 +465,7 @@ int nfs_write_need_commit(struct nfs_write_data *data) static inline int nfs_reschedule_unstable_write(struct nfs_page *req) { - if (test_and_clear_bit(PG_NEED_COMMIT, &req->wb_flags)) { + if (test_bit(PG_NEED_COMMIT, &req->wb_flags)) { nfs_mark_request_commit(req); return 1; } @@ -557,6 +552,7 @@ static void nfs_cancel_commit_list(struct list_head *head) req = nfs_list_entry(head->next); dec_zone_page_state(req->wb_page, NR_UNSTABLE_NFS); nfs_list_remove_request(req); + clear_bit(PG_NEED_COMMIT, &(req)->wb_flags); nfs_inode_remove_request(req); nfs_unlock_request(req); } @@ -1295,6 +1291,7 @@ static void nfs_commit_done(struct rpc_task *task, void *calldata) while (!list_empty(&data->pages)) { req = nfs_list_entry(data->pages.next); nfs_list_remove_request(req); + clear_bit(PG_NEED_COMMIT, &(req)->wb_flags); dec_zone_page_state(req->wb_page, NR_UNSTABLE_NFS); dprintk("NFS: commit (%s/%Ld %d@%Ld)", -- cgit From 2b82f190c81bf1524447c021df4e9ce8ef379bd5 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 20 Apr 2007 16:12:50 -0400 Subject: NFS: Fix race in nfs_set_page_dirty Protect nfs_set_page_dirty() against races with nfs_inode_add_request. Signed-off-by: Trond Myklebust Signed-off-by: Linus Torvalds --- fs/nfs/write.c | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) (limited to 'fs/nfs/write.c') diff --git a/fs/nfs/write.c b/fs/nfs/write.c index ce5b4a9f2d8b..797558941745 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -388,6 +388,8 @@ static int nfs_inode_add_request(struct inode *inode, struct nfs_page *req) } SetPagePrivate(req->wb_page); set_page_private(req->wb_page, (unsigned long)req); + if (PageDirty(req->wb_page)) + set_bit(PG_NEED_FLUSH, &req->wb_flags); nfsi->npages++; atomic_inc(&req->wb_count); return 0; @@ -407,6 +409,8 @@ static void nfs_inode_remove_request(struct nfs_page *req) set_page_private(req->wb_page, 0); ClearPagePrivate(req->wb_page); radix_tree_delete(&nfsi->nfs_page_tree, req->wb_index); + if (test_and_clear_bit(PG_NEED_FLUSH, &req->wb_flags)) + __set_page_dirty_nobuffers(req->wb_page); nfsi->npages--; if (!nfsi->npages) { spin_unlock(&nfsi->req_lock); @@ -1527,15 +1531,22 @@ int nfs_wb_page(struct inode *inode, struct page* page) int nfs_set_page_dirty(struct page *page) { + spinlock_t *req_lock = &NFS_I(page->mapping->host)->req_lock; struct nfs_page *req; + int ret; - req = nfs_page_find_request(page); + spin_lock(req_lock); + req = nfs_page_find_request_locked(page); if (req != NULL) { /* Mark any existing write requests for flushing */ - set_bit(PG_NEED_FLUSH, &req->wb_flags); + ret = !test_and_set_bit(PG_NEED_FLUSH, &req->wb_flags); + spin_unlock(req_lock); nfs_release_request(req); + return ret; } - return __set_page_dirty_nobuffers(page); + ret = __set_page_dirty_nobuffers(page); + spin_unlock(req_lock); + return ret; } -- cgit