Merge tag 'v3.2-rc7' into staging/for_v3.3

Linux 3.2-rc7 * tag 'v3.2-rc7': (1304 commits) Linux 3.2-rc7 netfilter: xt_connbytes: handle negation correctly Btrfs: call d_instantiate after all ops are setup Btrfs: fix worker lock misuse in find_worker net: relax rcvbuf limits rps: fix insufficient bounds checking in store_rps_dev_flow_table_cnt() net: introduce DST_NOPEER dst flag mqprio: Avoid panic if no options are provided bridge: provide a mtu() method for fake_dst_ops md/bitmap: It is OK to clear bits during recovery. md: don't give up looking for spares on first failure-to-add md/raid5: ensure correct assessment of drives during degraded reshape. md/linear: fix hot-add of devices to linear arrays. sparc64: Fix MSIQ HV call ordering in pci_sun4v_msiq_build_irq(). pata_of_platform: Add missing CONFIG_OF_IRQ dependency. ipv4: using prefetch requires including prefetch.h VFS: Fix race between CPU hotplug and lglocks vfs: __read_cache_page should use gfp argument rather than GFP_KERNEL USB: Fix usb/isp1760 build on sparc net: Add a flow_cache_flush_deferred function ... Conflicts: drivers/media/common/tuners/tda18218.c drivers/media/video/omap3isp/ispccdc.c drivers/staging/media/as102/as102_drv.h
author: Mauro Carvalho Chehab <mchehab@redhat.com> 2011-12-30 13:59:37 -0200
committer: Mauro Carvalho Chehab <mchehab@redhat.com> 2011-12-30 13:59:37 -0200
commit: b4d48c942c17ce3d3a330ad91e109e522bc97378 (patch)
tree: 3365292f3a5a502edb51492d011fd326c930ca40 /mm/page-writeback.c
parent: 1a5cd29631a6b75e49e6ad8a770ab9d69cda0fa2 (diff)
parent: 5f0a6e2d503896062f641639dacfe5055c2f593b (diff)
1 files changed, 35 insertions, 20 deletions
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index a3278f005230..50f08241f981 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -128,7 +128,6 @@ unsigned long global_dirty_limit;
  *
  */
 static struct prop_descriptor vm_completions;
-static struct prop_descriptor vm_dirties;
 
 /*
  * couple the period to the dirty_ratio:
@@ -154,7 +153,6 @@ static void update_completion_period(void)
 {
 	int shift = calc_period_shift();
 	prop_change_shift(&vm_completions, shift);
-	prop_change_shift(&vm_dirties, shift);
 
 	writeback_set_ratelimit();
 }
@@ -235,11 +233,6 @@ void bdi_writeout_inc(struct backing_dev_info *bdi)
 }
 EXPORT_SYMBOL_GPL(bdi_writeout_inc);
 
-void task_dirty_inc(struct task_struct *tsk)
-{
-	prop_inc_single(&vm_dirties, &tsk->dirties);
-}
-
 /*
  * Obtain an accurate fraction of the BDI's portion.
  */
@@ -418,8 +411,13 @@ void global_dirty_limits(unsigned long *pbackground, unsigned long *pdirty)
  *
  * Returns @bdi's dirty limit in pages. The term "dirty" in the context of
  * dirty balancing includes all PG_dirty, PG_writeback and NFS unstable pages.
- * And the "limit" in the name is not seriously taken as hard limit in
- * balance_dirty_pages().
+ *
+ * Note that balance_dirty_pages() will only seriously take it as a hard limit
+ * when sleeping max_pause per page is not enough to keep the dirty pages under
+ * control. For example, when the device is completely stalled due to some error
+ * conditions, or when there are 1000 dd tasks writing to a slow 10MB/s USB key.
+ * In the other normal situations, it acts more gently by throttling the tasks
+ * more (rather than completely block them) when the bdi dirty pages go high.
  *
  * It allocates high/low dirty limits to fast/slow devices, in order to prevent
  * - starving fast devices
@@ -601,6 +599,13 @@ static unsigned long bdi_position_ratio(struct backing_dev_info *bdi,
 	 */
 	if (unlikely(bdi_thresh > thresh))
 		bdi_thresh = thresh;
+	/*
+	 * It's very possible that bdi_thresh is close to 0 not because the
+	 * device is slow, but that it has remained inactive for long time.
+	 * Honour such devices a reasonable good (hopefully IO efficient)
+	 * threshold, so that the occasional writes won't be blocked and active
+	 * writes can rampup the threshold quickly.
+	 */
 	bdi_thresh = max(bdi_thresh, (limit - dirty) / 8);
 	/*
 	 * scale global setpoint to bdi's:
@@ -984,8 +989,7 @@ static unsigned long bdi_max_pause(struct backing_dev_info *bdi,
 	 *
 	 * 8 serves as the safety ratio.
 	 */
-	if (bdi_dirty)
-		t = min(t, bdi_dirty * HZ / (8 * bw + 1));
+	t = min(t, bdi_dirty * HZ / (8 * bw + 1));
 
 	/*
 	 * The pause time will be settled within range (max_pause/4, max_pause).
@@ -1133,17 +1137,30 @@ pause:
 					  pages_dirtied,
 					  pause,
 					  start_time);
-		__set_current_state(TASK_UNINTERRUPTIBLE);
+		__set_current_state(TASK_KILLABLE);
 		io_schedule_timeout(pause);
 
-		dirty_thresh = hard_dirty_limit(dirty_thresh);
 		/*
-		 * max-pause area. If dirty exceeded but still within this
-		 * area, no need to sleep for more than 200ms: (a) 8 pages per
-		 * 200ms is typically more than enough to curb heavy dirtiers;
-		 * (b) the pause time limit makes the dirtiers more responsive.
+		 * This is typically equal to (nr_dirty < dirty_thresh) and can
+		 * also keep "1000+ dd on a slow USB stick" under control.
 		 */
-		if (nr_dirty < dirty_thresh)
+		if (task_ratelimit)
+			break;
+
+		/*
+		 * In the case of an unresponding NFS server and the NFS dirty
+		 * pages exceeds dirty_thresh, give the other good bdi's a pipe
+		 * to go through, so that tasks on them still remain responsive.
+		 *
+		 * In theory 1 page is enough to keep the comsumer-producer
+		 * pipe going: the flusher cleans 1 page => the task dirties 1
+		 * more page. However bdi_dirty has accounting errors.  So use
+		 * the larger and more IO friendly bdi_stat_error.
+		 */
+		if (bdi_dirty <= bdi_stat_error(bdi))
+			break;
+
+		if (fatal_signal_pending(current))
 			break;
 	}
 
@@ -1395,7 +1412,6 @@ void __init page_writeback_init(void)
 
 	shift = calc_period_shift();
 	prop_descriptor_init(&vm_completions, shift);
-	prop_descriptor_init(&vm_dirties, shift);
 }
 
 /**
@@ -1724,7 +1740,6 @@ void account_page_dirtied(struct page *page, struct address_space *mapping)
 		__inc_zone_page_state(page, NR_DIRTIED);
 		__inc_bdi_stat(mapping->backing_dev_info, BDI_RECLAIMABLE);
 		__inc_bdi_stat(mapping->backing_dev_info, BDI_DIRTIED);
-		task_dirty_inc(current);
 		task_io_account_write(PAGE_CACHE_SIZE);
 	}
 }
author	Mauro Carvalho Chehab <mchehab@redhat.com>	2011-12-30 13:59:37 -0200
committer	Mauro Carvalho Chehab <mchehab@redhat.com>	2011-12-30 13:59:37 -0200
commit	b4d48c942c17ce3d3a330ad91e109e522bc97378 (patch)
tree	3365292f3a5a502edb51492d011fd326c930ca40 /mm/page-writeback.c
parent	1a5cd29631a6b75e49e6ad8a770ab9d69cda0fa2 (diff)
parent	5f0a6e2d503896062f641639dacfe5055c2f593b (diff)