diff options
Diffstat (limited to 'mm/memcontrol.c')
| -rw-r--r-- | mm/memcontrol.c | 154 | 
1 files changed, 67 insertions, 87 deletions
diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 4be518d4e68a..ae052b5e3315 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -921,6 +921,43 @@ static void invalidate_reclaim_iterators(struct mem_cgroup *dead_memcg)  	     iter = mem_cgroup_iter(NULL, iter, NULL))  /** + * mem_cgroup_scan_tasks - iterate over tasks of a memory cgroup hierarchy + * @memcg: hierarchy root + * @fn: function to call for each task + * @arg: argument passed to @fn + * + * This function iterates over tasks attached to @memcg or to any of its + * descendants and calls @fn for each task. If @fn returns a non-zero + * value, the function breaks the iteration loop and returns the value. + * Otherwise, it will iterate over all tasks and return 0. + * + * This function must not be called for the root memory cgroup. + */ +int mem_cgroup_scan_tasks(struct mem_cgroup *memcg, +			  int (*fn)(struct task_struct *, void *), void *arg) +{ +	struct mem_cgroup *iter; +	int ret = 0; + +	BUG_ON(memcg == root_mem_cgroup); + +	for_each_mem_cgroup_tree(iter, memcg) { +		struct css_task_iter it; +		struct task_struct *task; + +		css_task_iter_start(&iter->css, &it); +		while (!ret && (task = css_task_iter_next(&it))) +			ret = fn(task, arg); +		css_task_iter_end(&it); +		if (ret) { +			mem_cgroup_iter_break(memcg, iter); +			break; +		} +	} +	return ret; +} + +/**   * mem_cgroup_page_lruvec - return lruvec for isolating/putting an LRU page   * @page: the page   * @zone: zone of the page @@ -1178,7 +1215,7 @@ static int mem_cgroup_count_children(struct mem_cgroup *memcg)  /*   * Return the memory (and swap, if configured) limit for a memcg.   */ -static unsigned long mem_cgroup_get_limit(struct mem_cgroup *memcg) +unsigned long mem_cgroup_get_limit(struct mem_cgroup *memcg)  {  	unsigned long limit; @@ -1205,79 +1242,12 @@ static bool mem_cgroup_out_of_memory(struct mem_cgroup *memcg, gfp_t gfp_mask,  		.gfp_mask = gfp_mask,  		.order = order,  	}; -	struct mem_cgroup *iter; -	unsigned long chosen_points = 0; -	unsigned long totalpages; -	unsigned int points = 0; -	struct task_struct *chosen = NULL; +	bool ret;  	mutex_lock(&oom_lock); - -	/* -	 * If current has a pending SIGKILL or is exiting, then automatically -	 * select it.  The goal is to allow it to allocate so that it may -	 * quickly exit and free its memory. -	 */ -	if (task_will_free_mem(current)) { -		mark_oom_victim(current); -		wake_oom_reaper(current); -		goto unlock; -	} - -	check_panic_on_oom(&oc, CONSTRAINT_MEMCG); -	totalpages = mem_cgroup_get_limit(memcg) ? : 1; -	for_each_mem_cgroup_tree(iter, memcg) { -		struct css_task_iter it; -		struct task_struct *task; - -		css_task_iter_start(&iter->css, &it); -		while ((task = css_task_iter_next(&it))) { -			switch (oom_scan_process_thread(&oc, task)) { -			case OOM_SCAN_SELECT: -				if (chosen) -					put_task_struct(chosen); -				chosen = task; -				chosen_points = ULONG_MAX; -				get_task_struct(chosen); -				/* fall through */ -			case OOM_SCAN_CONTINUE: -				continue; -			case OOM_SCAN_ABORT: -				css_task_iter_end(&it); -				mem_cgroup_iter_break(memcg, iter); -				if (chosen) -					put_task_struct(chosen); -				/* Set a dummy value to return "true". */ -				chosen = (void *) 1; -				goto unlock; -			case OOM_SCAN_OK: -				break; -			}; -			points = oom_badness(task, memcg, NULL, totalpages); -			if (!points || points < chosen_points) -				continue; -			/* Prefer thread group leaders for display purposes */ -			if (points == chosen_points && -			    thread_group_leader(chosen)) -				continue; - -			if (chosen) -				put_task_struct(chosen); -			chosen = task; -			chosen_points = points; -			get_task_struct(chosen); -		} -		css_task_iter_end(&it); -	} - -	if (chosen) { -		points = chosen_points * 1000 / totalpages; -		oom_kill_process(&oc, chosen, points, totalpages, -				 "Memory cgroup out of memory"); -	} -unlock: +	ret = out_of_memory(&oc);  	mutex_unlock(&oom_lock); -	return chosen; +	return ret;  }  #if MAX_NUMNODES > 1 @@ -1600,7 +1570,7 @@ bool mem_cgroup_oom_synchronize(bool handle)  	if (!memcg)  		return false; -	if (!handle || oom_killer_disabled) +	if (!handle)  		goto cleanup;  	owait.memcg = memcg; @@ -2969,16 +2939,16 @@ static int memcg_update_tcp_limit(struct mem_cgroup *memcg, unsigned long limit)  		/*  		 * The active flag needs to be written after the static_key  		 * update. This is what guarantees that the socket activation -		 * function is the last one to run. See sock_update_memcg() for -		 * details, and note that we don't mark any socket as belonging -		 * to this memcg until that flag is up. +		 * function is the last one to run. See mem_cgroup_sk_alloc() +		 * for details, and note that we don't mark any socket as +		 * belonging to this memcg until that flag is up.  		 *  		 * We need to do this, because static_keys will span multiple  		 * sites, but we can't control their order. If we mark a socket  		 * as accounted, but the accounting functions are not patched in  		 * yet, we'll lose accounting.  		 * -		 * We never race with the readers in sock_update_memcg(), +		 * We never race with the readers in mem_cgroup_sk_alloc(),  		 * because when this value change, the code to process it is not  		 * patched in yet.  		 */ @@ -4092,11 +4062,13 @@ static DEFINE_IDR(mem_cgroup_idr);  static void mem_cgroup_id_get_many(struct mem_cgroup *memcg, unsigned int n)  { +	VM_BUG_ON(atomic_read(&memcg->id.ref) <= 0);  	atomic_add(n, &memcg->id.ref);  }  static void mem_cgroup_id_put_many(struct mem_cgroup *memcg, unsigned int n)  { +	VM_BUG_ON(atomic_read(&memcg->id.ref) < n);  	if (atomic_sub_and_test(n, &memcg->id.ref)) {  		idr_remove(&mem_cgroup_idr, memcg->id.id);  		memcg->id.id = 0; @@ -4285,8 +4257,10 @@ fail:  static int mem_cgroup_css_online(struct cgroup_subsys_state *css)  { +	struct mem_cgroup *memcg = mem_cgroup_from_css(css); +  	/* Online state pins memcg ID, memcg ID pins CSS */ -	mem_cgroup_id_get(mem_cgroup_from_css(css)); +	atomic_set(&memcg->id.ref, 1);  	css_get(css);  	return 0;  } @@ -4434,7 +4408,7 @@ static struct page *mc_handle_swap_pte(struct vm_area_struct *vma,  	 * Because lookup_swap_cache() updates some statistics counter,  	 * we call find_get_page() with swapper_space directly.  	 */ -	page = find_get_page(swap_address_space(ent), ent.val); +	page = find_get_page(swap_address_space(ent), swp_offset(ent));  	if (do_memsw_account())  		entry->val = ent.val; @@ -4472,7 +4446,8 @@ static struct page *mc_handle_file_pte(struct vm_area_struct *vma,  			swp_entry_t swp = radix_to_swp_entry(page);  			if (do_memsw_account())  				*entry = swp; -			page = find_get_page(swap_address_space(swp), swp.val); +			page = find_get_page(swap_address_space(swp), +					     swp_offset(swp));  		}  	} else  		page = find_get_page(mapping, pgoff); @@ -4707,7 +4682,8 @@ static unsigned long mem_cgroup_count_precharge(struct mm_struct *mm)  		.mm = mm,  	};  	down_read(&mm->mmap_sem); -	walk_page_range(0, ~0UL, &mem_cgroup_count_precharge_walk); +	walk_page_range(0, mm->highest_vm_end, +			&mem_cgroup_count_precharge_walk);  	up_read(&mm->mmap_sem);  	precharge = mc.precharge; @@ -4995,7 +4971,8 @@ retry:  	 * When we have consumed all precharges and failed in doing  	 * additional charge, the page walk just aborts.  	 */ -	walk_page_range(0, ~0UL, &mem_cgroup_move_charge_walk); +	walk_page_range(0, mc.mm->highest_vm_end, &mem_cgroup_move_charge_walk); +  	up_read(&mc.mm->mmap_sem);  	atomic_dec(&mc.from->moving_account);  } @@ -5674,11 +5651,15 @@ void mem_cgroup_migrate(struct page *oldpage, struct page *newpage)  DEFINE_STATIC_KEY_FALSE(memcg_sockets_enabled_key);  EXPORT_SYMBOL(memcg_sockets_enabled_key); -void sock_update_memcg(struct sock *sk) +void mem_cgroup_sk_alloc(struct sock *sk)  {  	struct mem_cgroup *memcg; -	/* Socket cloning can throw us here with sk_cgrp already +	if (!mem_cgroup_sockets_enabled) +		return; + +	/* +	 * Socket cloning can throw us here with sk_memcg already  	 * filled. It won't however, necessarily happen from  	 * process context. So the test for root memcg given  	 * the current task's memcg won't help us in this case. @@ -5703,12 +5684,11 @@ void sock_update_memcg(struct sock *sk)  out:  	rcu_read_unlock();  } -EXPORT_SYMBOL(sock_update_memcg); -void sock_release_memcg(struct sock *sk) +void mem_cgroup_sk_free(struct sock *sk)  { -	WARN_ON(!sk->sk_memcg); -	css_put(&sk->sk_memcg->css); +	if (sk->sk_memcg) +		css_put(&sk->sk_memcg->css);  }  /**  |