diff options
| author | Corinna Vinschen <xda@vinschen.de> | 2019-04-16 10:32:37 +0200 |
|---|---|---|
| committer | Moyster <oysterized@gmail.com> | 2019-07-19 00:08:32 +0200 |
| commit | 33c8b08c2bbe034fc01b346b1e6ce94913efb8ef (patch) | |
| tree | 54013c4e0059e7fbe47f563ca60097044f6ad52b | |
| parent | 23a81af2839a25b6a48484b261661a17132b72d2 (diff) | |
mm/oom_kill: squashed reverts to a stable state
Revert "mm, oom: fix use-after-free in oom_kill_process"
This reverts commit e1bebdeedb497f03d426c85a89c3807c7e75268d.
Signed-off-by: Corinna Vinschen <xda@vinschen.de>
Revert "mm,oom: make oom_killer_disable() killable"
This reverts commit 65a7400a432639aa8d5e572f30687fbca204b6f8.
Signed-off-by: Corinna Vinschen <xda@vinschen.de>
Revert "mm: oom_kill: don't ignore oom score on exiting tasks"
This reverts commit d60dae46b27a8f381e4a7ad9dde870faa49fa5f1.
Signed-off-by: Corinna Vinschen <xda@vinschen.de>
Revert "mm/oom_kill.c: avoid attempting to kill init sharing same memory"
This reverts commit 10773c0325259d6640b93c0694b5598ddf84939f.
Signed-off-by: Corinna Vinschen <xda@vinschen.de>
Revert "CHROMIUM: DROP: mm/oom_kill: Double-check before killing a child in our place"
This reverts commit 2bdd9a2042a0e12d96c545773d9d8038c920f813.
Revert "mm/oom_kill: fix the wrong task->mm == mm checks in oom_kill_process()"
This reverts commit 419a313435b31821e4d045ca4b7ea1cc5fa02035.
Signed-off-by: Corinna Vinschen <xda@vinschen.de>
Revert "mm/oom_kill: cleanup the "kill sharing same memory" loop"
This reverts commit afda78c6de38f9f66eba0955153b380d540d8276.
Revert "mm/oom_kill: remove the wrong fatal_signal_pending() check in oom_kill_process()"
This reverts commit acde9c2ace298b249c06ec5b0b971c333449dc09.
Signed-off-by: Corinna Vinschen <xda@vinschen.de>
Revert "mm, oom: remove task_lock protecting comm printing"
This reverts commit 9a9ca142d250ec9de1215284857f4528c6ddb080.
Signed-off-by: Corinna Vinschen <xda@vinschen.de>
Revert "mm/oom_kill.c: suppress unnecessary "sharing same memory" message"
This reverts commit 1aa2960f7c70d65b1481f805ac73b988faff6747.
Signed-off-by: Corinna Vinschen <xda@vinschen.de>
Revert "mm/oom_kill.c: reverse the order of setting TIF_MEMDIE and sending SIGKILL"
This reverts commit f028aedfcfd2e2bb98921b98d3ae183387ab8fed.
Revert "mm, oom: remove unnecessary variable"
This reverts commit 54b0b58224146d68a11bccb5e64683ab3029373a.
Revert "mm/oom_kill.c: print points as unsigned int"
This reverts commit 603f975a6d4f0b56c7f6df7889ef2a704eca94a3.
Signed-off-by: Corinna Vinschen <xda@vinschen.de>
Revert "mm: oom_kill: simplify OOM killer locking"
This reverts commit 7951a52ed35d162063fa08b27894e302fd716ccd.
Revert "mm: oom_kill: remove unnecessary locking in exit_oom_victim()"
This reverts commit f0739b25ac884682865d6aae7485e79489107bfb.
Revert "mm: oom_kill: generalize OOM progress waitqueue"
This reverts commit eb4b1243c72ba0b392bbe05dbf9f91959f70eb18.
Revert "mm: oom_kill: switch test-and-clear of known TIF_MEMDIE to clear"
This reverts commit e611f16275c3642cb8a6345ff2470926fef52110.
Revert "mm: oom_kill: clean up victim marking and exiting interfaces"
This reverts commit c6fada01b9370e3d7603b4ad8c26b56759174667.
Revert "mm: oom_kill: remove unnecessary locking in oom_enable()"
This reverts commit 5dd152d7351b3805f59b2b1f624722ab2f3c5fd8.
Revert "oom, PM: make OOM detection in the freezer path raceless"
This reverts commit 5fc5b1ddee5404a7629dd7045f54eaf8941bc11c.
| -rw-r--r-- | drivers/staging/android/lowmemorykiller.c | 2 | ||||
| -rw-r--r-- | drivers/tty/sysrq.c | 7 | ||||
| -rw-r--r-- | include/linux/cpuset.h | 4 | ||||
| -rw-r--r-- | include/linux/oom.h | 26 | ||||
| -rw-r--r-- | kernel/cpuset.c | 18 | ||||
| -rw-r--r-- | kernel/exit.c | 4 | ||||
| -rw-r--r-- | kernel/power/process.c | 50 | ||||
| -rw-r--r-- | mm/memcontrol.c | 22 | ||||
| -rw-r--r-- | mm/oom_kill.c | 259 | ||||
| -rw-r--r-- | mm/page_alloc.c | 25 |
10 files changed, 215 insertions, 202 deletions
diff --git a/drivers/staging/android/lowmemorykiller.c b/drivers/staging/android/lowmemorykiller.c index 8251171ca..eef102355 100644 --- a/drivers/staging/android/lowmemorykiller.c +++ b/drivers/staging/android/lowmemorykiller.c @@ -218,7 +218,7 @@ static int lowmem_shrink(struct shrinker *s, struct shrink_control *sc) * infrastructure. There is no real reason why the selected * task should have access to the memory reserves. */ - mark_oom_victim(selected); + mark_tsk_oom_victim(selected); send_sig(SIGKILL, selected, 0); rem -= selected_tasksize; } diff --git a/drivers/tty/sysrq.c b/drivers/tty/sysrq.c index 70247e699..240384965 100644 --- a/drivers/tty/sysrq.c +++ b/drivers/tty/sysrq.c @@ -353,11 +353,8 @@ static struct sysrq_key_op sysrq_term_op = { static void moom_callback(struct work_struct *ignored) { - mutex_lock(&oom_lock); - if (!out_of_memory(node_zonelist(first_memory_node, GFP_KERNEL), - GFP_KERNEL, 0, NULL, true)) - pr_info("OOM request ignored because killer is disabled\n"); - mutex_unlock(&oom_lock); + out_of_memory(node_zonelist(first_memory_node, GFP_KERNEL), GFP_KERNEL, + 0, NULL, true); } static DECLARE_WORK(moom_work, moom_callback); diff --git a/include/linux/cpuset.h b/include/linux/cpuset.h index 660ccb535..7bc96dce8 100644 --- a/include/linux/cpuset.h +++ b/include/linux/cpuset.h @@ -86,7 +86,7 @@ extern int current_cpuset_is_being_rebound(void); extern void rebuild_sched_domains(void); -extern void cpuset_print_current_mems_allowed(void); +extern void cpuset_print_task_mems_allowed(struct task_struct *p); /* * get_mems_allowed is required when making decisions involving mems_allowed @@ -219,7 +219,7 @@ static inline void rebuild_sched_domains(void) partition_sched_domains(1, NULL, NULL); } -static inline void cpuset_print_current_mems_allowed(void) +static inline void cpuset_print_task_mems_allowed(struct task_struct *p) { } diff --git a/include/linux/oom.h b/include/linux/oom.h index acabe09da..e31b58f75 100644 --- a/include/linux/oom.h +++ b/include/linux/oom.h @@ -32,8 +32,6 @@ enum oom_scan_t { /* Thread is the potential origin of an oom condition; kill first on oom */ #define OOM_FLAG_ORIGIN ((__force oom_flags_t)0x1) -extern struct mutex oom_lock; - static inline void set_current_oom_origin(void) { current->signal->oom_flags |= OOM_FLAG_ORIGIN; @@ -49,7 +47,9 @@ static inline bool oom_task_origin(const struct task_struct *p) return !!(p->signal->oom_flags & OOM_FLAG_ORIGIN); } -extern void mark_oom_victim(struct task_struct *tsk); +extern void mark_tsk_oom_victim(struct task_struct *tsk); + +extern void unmark_oom_victim(void); extern unsigned long oom_badness(struct task_struct *p, struct mem_cgroup *memcg, const nodemask_t *nodemask, @@ -62,6 +62,9 @@ extern void oom_kill_process(struct task_struct *p, gfp_t gfp_mask, int order, struct mem_cgroup *memcg, nodemask_t *nodemask, const char *message); +extern bool oom_zonelist_trylock(struct zonelist *zonelist, gfp_t gfp_flags); +extern void oom_zonelist_unlock(struct zonelist *zonelist, gfp_t gfp_flags); + extern void check_panic_on_oom(enum oom_constraint constraint, gfp_t gfp_mask, int order, const nodemask_t *nodemask); @@ -69,17 +72,22 @@ extern enum oom_scan_t oom_scan_process_thread(struct task_struct *task, unsigned long totalpages, const nodemask_t *nodemask, bool force_kill); -extern bool out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask, +extern void out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask, int order, nodemask_t *mask, bool force_kill); - -extern void exit_oom_victim(void); - extern int register_oom_notifier(struct notifier_block *nb); extern int unregister_oom_notifier(struct notifier_block *nb); extern bool oom_killer_disabled; -extern bool oom_killer_disable(void); -extern void oom_killer_enable(void); + +static inline void oom_killer_disable(void) +{ + oom_killer_disabled = true; +} + +static inline void oom_killer_enable(void) +{ + oom_killer_disabled = false; +} extern struct task_struct *find_lock_task_mm(struct task_struct *p); diff --git a/kernel/cpuset.c b/kernel/cpuset.c index f3a417c1d..ab0be4565 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -2601,26 +2601,28 @@ int cpuset_mems_allowed_intersects(const struct task_struct *tsk1, #define CPUSET_NODELIST_LEN (256) /** - * cpuset_print_current_mems_allowed - prints current's cpuset and mems_allowed + * cpuset_print_task_mems_allowed - prints task's cpuset and mems_allowed + * @task: pointer to task_struct of some task. * - * Description: Prints current's name, cpuset name, and cached copy of its - * mems_allowed to the kernel log. + * Description: Prints @task's name, cpuset name, and cached copy of its + * mems_allowed to the kernel log. Must hold task_lock(task) to allow + * dereferencing task_cs(task). */ -void cpuset_print_current_mems_allowed(void) +void cpuset_print_task_mems_allowed(struct task_struct *tsk) { /* Statically allocated to prevent using excess stack. */ static char cpuset_nodelist[CPUSET_NODELIST_LEN]; static DEFINE_SPINLOCK(cpuset_buffer_lock); - struct cgroup *cgrp = task_cs(current)->css.cgroup; + struct cgroup *cgrp = task_cs(tsk)->css.cgroup; rcu_read_lock(); spin_lock(&cpuset_buffer_lock); nodelist_scnprintf(cpuset_nodelist, CPUSET_NODELIST_LEN, - current->mems_allowed); - pr_info("%s cpuset=%s mems_allowed=%s\n", - current->comm, cgroup_name(cgrp), cpuset_nodelist); + tsk->mems_allowed); + printk(KERN_INFO "%s cpuset=%s mems_allowed=%s\n", + tsk->comm, cgroup_name(cgrp), cpuset_nodelist); spin_unlock(&cpuset_buffer_lock); rcu_read_unlock(); diff --git a/kernel/exit.c b/kernel/exit.c index d9153652a..dffb92e9b 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -458,10 +458,8 @@ static void exit_mm(struct task_struct *tsk) enter_lazy_tlb(mm, current); task_unlock(tsk); mm_update_next_owner(mm); - mmput(mm); - if (test_thread_flag(TIF_MEMDIE)) - exit_oom_victim(); + unmark_oom_victim(); } static struct task_struct *find_alive_thread(struct task_struct *p) diff --git a/kernel/power/process.c b/kernel/power/process.c index 899e07479..3bd3a1185 100644 --- a/kernel/power/process.c +++ b/kernel/power/process.c @@ -118,6 +118,30 @@ static int try_to_freeze_tasks(bool user_only) return todo ? -EBUSY : 0; } +static bool __check_frozen_processes(void) +{ + struct task_struct *g, *p; + + for_each_process_thread(g, p) + if (p != current && !freezer_should_skip(p) && !frozen(p)) + return false; + + return true; +} + +/* + * Returns true if all freezable tasks (except for current) are frozen already + */ +static bool check_frozen_processes(void) +{ + bool ret; + + read_lock(&tasklist_lock); + ret = __check_frozen_processes(); + read_unlock(&tasklist_lock); + return ret; +} + /** * freeze_processes - Signal user space processes to enter the refrigerator. * The current thread will not be frozen. The same process that calls @@ -128,6 +152,7 @@ static int try_to_freeze_tasks(bool user_only) int freeze_processes(void) { int error; + int oom_kills_saved; error = __usermodehelper_disable(UMH_FREEZING); if (error) @@ -142,22 +167,29 @@ int freeze_processes(void) pm_wakeup_clear(); pr_info("Freezing user space processes ... "); pm_freezing = true; + oom_kills_saved = oom_kills_count(); error = try_to_freeze_tasks(true); if (!error) { __usermodehelper_set_disable_depth(UMH_DISABLED); - pr_cont("done."); + oom_killer_disable(); + + /* + * There might have been an OOM kill while we were + * freezing tasks and the killed task might be still + * on the way out so we have to double check for race. + */ + if (oom_kills_count() != oom_kills_saved && + !check_frozen_processes()) { + __usermodehelper_set_disable_depth(UMH_ENABLED); + pr_cont("OOM in progress."); + error = -EBUSY; + } else { + pr_cont("done."); + } } pr_cont("\n"); BUG_ON(in_atomic()); - /* - * Now that the whole userspace is frozen we need to disbale - * the OOM killer to disallow any further interference with - * killable tasks. - */ - if (!error && !oom_killer_disable()) - error = -EBUSY; - if (error) thaw_processes(); return error; diff --git a/mm/memcontrol.c b/mm/memcontrol.c index b843aeeb3..f58bcb213 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -1807,16 +1807,14 @@ static void mem_cgroup_out_of_memory(struct mem_cgroup *memcg, gfp_t gfp_mask, unsigned int points = 0; struct task_struct *chosen = NULL; - mutex_lock(&oom_lock); - /* * If current has a pending SIGKILL or is exiting, then automatically * select it. The goal is to allow it to allocate so that it may * quickly exit and free its memory. */ if (fatal_signal_pending(current) || task_will_free_mem(current)) { - mark_oom_victim(current); - goto unlock; + mark_tsk_oom_victim(current); + return; } check_panic_on_oom(CONSTRAINT_MEMCG, gfp_mask, order, NULL); @@ -1844,7 +1842,7 @@ static void mem_cgroup_out_of_memory(struct mem_cgroup *memcg, gfp_t gfp_mask, mem_cgroup_iter_break(memcg, iter); if (chosen) put_task_struct(chosen); - goto unlock; + return; case OOM_SCAN_OK: break; }; @@ -1865,13 +1863,11 @@ static void mem_cgroup_out_of_memory(struct mem_cgroup *memcg, gfp_t gfp_mask, cgroup_iter_end(cgroup, &it); } - if (chosen) { - points = chosen_points * 1000 / totalpages; - oom_kill_process(chosen, gfp_mask, order, points, totalpages, - memcg, NULL, "Memory cgroup out of memory"); - } -unlock: - mutex_unlock(&oom_lock); + if (!chosen) + return; + points = chosen_points * 1000 / totalpages; + oom_kill_process(chosen, gfp_mask, order, points, totalpages, memcg, + NULL, "Memory cgroup out of memory"); } static unsigned long mem_cgroup_reclaim(struct mem_cgroup *memcg, @@ -2266,7 +2262,7 @@ bool mem_cgroup_oom_synchronize(bool handle) if (!memcg) return false; - if (!handle || oom_killer_disabled) + if (!handle) goto cleanup; owait.memcg = memcg; diff --git a/mm/oom_kill.c b/mm/oom_kill.c index ffb71de7c..3b7e5c947 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c @@ -42,8 +42,7 @@ int sysctl_panic_on_oom; int sysctl_oom_kill_allocating_task; int sysctl_oom_dump_tasks = 1; - -DEFINE_MUTEX(oom_lock); +static DEFINE_SPINLOCK(zone_scan_lock); static unsigned long last_victim; @@ -300,6 +299,9 @@ enum oom_scan_t oom_scan_process_thread(struct task_struct *task, if (oom_task_origin(task)) return OOM_SCAN_SELECT; + if (task_will_free_mem(task) && !force_kill) + return OOM_SCAN_ABORT; + return OOM_SCAN_OK; } @@ -399,11 +401,13 @@ void dump_tasks(const struct mem_cgroup *memcg, const nodemask_t *nodemask) static void dump_header(struct task_struct *p, gfp_t gfp_mask, int order, struct mem_cgroup *memcg, const nodemask_t *nodemask) { + task_lock(current); pr_warning("%s invoked oom-killer: gfp_mask=0x%x, order=%d, " "oom_score_adj=%hd\n", current->comm, gfp_mask, order, current->signal->oom_score_adj); - cpuset_print_current_mems_allowed(); + cpuset_print_task_mems_allowed(current); + task_unlock(current); dump_stack(); if (memcg) mem_cgroup_print_oom_info(memcg, p); @@ -414,26 +418,30 @@ static void dump_header(struct task_struct *p, gfp_t gfp_mask, int order, } /* - * Number of OOM victims in flight + * Number of OOM killer invocations (including memcg OOM killer). + * Primarily used by PM freezer to check for potential races with + * OOM killed frozen task. */ -static atomic_t oom_victims = ATOMIC_INIT(0); -static DECLARE_WAIT_QUEUE_HEAD(oom_victims_wait); +static atomic_t oom_kills = ATOMIC_INIT(0); -bool oom_killer_disabled __read_mostly; +int oom_kills_count(void) +{ + return atomic_read(&oom_kills); +} + +void note_oom_kill(void) +{ + atomic_inc(&oom_kills); +} /** - * mark_oom_victim - mark the given task as OOM victim + * mark_tsk_oom_victim - marks the given taks as OOM victim. * @tsk: task to mark - * - * Has to be called with oom_lock held and never after - * oom has been disabled already. */ -void mark_oom_victim(struct task_struct *tsk) +void mark_tsk_oom_victim(struct task_struct *tsk) { - WARN_ON(oom_killer_disabled); - /* OOM killer might race with memcg OOM */ - if (test_and_set_tsk_thread_flag(tsk, TIF_MEMDIE)) - return; + set_tsk_thread_flag(tsk, TIF_MEMDIE); + /* * Make sure that the task is woken up from uninterruptible sleep * if it is frozen because OOM killer wouldn't be able to free @@ -441,73 +449,14 @@ void mark_oom_victim(struct task_struct *tsk) * that TIF_MEMDIE tasks should be ignored. */ __thaw_task(tsk); - atomic_inc(&oom_victims); } /** - * exit_oom_victim - note the exit of an OOM victim + * unmark_oom_victim - unmarks the current task as OOM victim. */ -void exit_oom_victim(void) +void unmark_oom_victim(void) { clear_thread_flag(TIF_MEMDIE); - - if (!atomic_dec_return(&oom_victims)) - wake_up_all(&oom_victims_wait); -} - -/** - * oom_killer_disable - disable OOM killer - * - * Forces all page allocations to fail rather than trigger OOM killer. - * Will block and wait until all OOM victims are killed. - * - * The function cannot be called when there are runnable user tasks because - * the userspace would see unexpected allocation failures as a result. Any - * new usage of this function should be consulted with MM people. - * - * Returns true if successful and false if the OOM killer cannot be - * disabled. - */ -bool oom_killer_disable(void) -{ - /* - * Make sure to not race with an ongoing OOM killer. Check that the - * current is not killed (possibly due to sharing the victim's memory). - */ - if (mutex_lock_killable(&oom_lock)) - return false; - oom_killer_disabled = true; - mutex_unlock(&oom_lock); - - wait_event(oom_victims_wait, !atomic_read(&oom_victims)); - - return true; -} - -/** - * oom_killer_enable - enable OOM killer - */ -void oom_killer_enable(void) -{ - oom_killer_disabled = false; -} - -/* - * task->mm can be NULL if the task is the exited group leader. So to - * determine whether the task is using a particular mm, we examine all the - * task's threads: if one of those is using this mm then this task was also - * using it. - */ -static bool process_shares_mm(struct task_struct *p, struct mm_struct *mm) -{ - struct task_struct *t; - - for_each_thread(p, t) { - struct mm_struct *t_mm = READ_ONCE(t->mm); - if (t_mm) - return t_mm == mm; - } - return false; } #define K(x) ((x) << (PAGE_SHIFT-10)) @@ -534,7 +483,7 @@ void oom_kill_process(struct task_struct *p, gfp_t gfp_mask, int order, */ task_lock(p); if (p->mm && task_will_free_mem(p)) { - mark_oom_victim(p); + mark_tsk_oom_victim(p); task_unlock(p); last_victim = jiffies; put_task_struct(p); @@ -545,8 +494,10 @@ void oom_kill_process(struct task_struct *p, gfp_t gfp_mask, int order, if (__ratelimit(&oom_rs)) dump_header(p, gfp_mask, order, memcg, nodemask); - pr_err("%s: Kill process %d (%s) score %u or sacrifice child\n", + task_lock(p); + pr_err("%s: Kill process %d (%s) score %d or sacrifice child\n", message, task_pid_nr(p), p->comm, points); + task_unlock(p); /* * If any of p's children has a different mm and is eligible for kill, @@ -555,17 +506,9 @@ void oom_kill_process(struct task_struct *p, gfp_t gfp_mask, int order, * still freeing memory. */ read_lock(&tasklist_lock); - - /* - * The task 'p' might have already exited before reaching here. The - * put_task_struct() will free task_struct 'p' while the loop still try - * to access the field of 'p', so, get an extra reference. - */ - get_task_struct(p); for_each_thread(p, t) { list_for_each_entry(child, &t->children, sibling) { unsigned int child_points; - enum oom_scan_t scan_result; /*LCH add for race condition*/ if (task_will_free_mem(p)) { @@ -578,16 +521,8 @@ void oom_kill_process(struct task_struct *p, gfp_t gfp_mask, int order, return; } - if (process_shares_mm(child, p->mm)) - continue; - - /* Make sure no objections to killing the child */ - scan_result = oom_scan_process_thread(child, totalpages, - nodemask, false); - if (scan_result == OOM_SCAN_CONTINUE || - scan_result == OOM_SCAN_ABORT) + if (child->mm == p->mm) continue; - /* * oom_badness() returns 0 if the thread is unkillable */ @@ -601,7 +536,6 @@ void oom_kill_process(struct task_struct *p, gfp_t gfp_mask, int order, } } } - put_task_struct(p); read_unlock(&tasklist_lock); p = find_lock_task_mm(victim); @@ -616,13 +550,7 @@ void oom_kill_process(struct task_struct *p, gfp_t gfp_mask, int order, /* mm cannot safely be dereferenced after task_unlock(victim) */ mm = victim->mm; - /* - * We should send SIGKILL before setting TIF_MEMDIE in order to prevent - * the OOM victim from depleting the memory reserves from the user - * space under its control. - */ - do_send_sig_info(SIGKILL, SEND_SIG_FORCED, victim, true); - mark_oom_victim(victim); + mark_tsk_oom_victim(victim); pr_err("Killed process %d (%s) total-vm:%lukB, anon-rss:%lukB, file-rss:%lukB\n", task_pid_nr(victim), victim->comm, K(victim->mm->total_vm), K(get_mm_counter(victim->mm, MM_ANONPAGES)), @@ -639,22 +567,22 @@ void oom_kill_process(struct task_struct *p, gfp_t gfp_mask, int order, * pending fatal signal. */ rcu_read_lock(); - for_each_process(p) { - if (!process_shares_mm(p, mm)) - continue; - if (same_thread_group(p, victim)) - continue; - if (unlikely(p->flags & PF_KTHREAD)) - continue; - if (is_global_init(p)) - continue; - if (p->signal->oom_score_adj == OOM_SCORE_ADJ_MIN) - continue; - do_send_sig_info(SIGKILL, SEND_SIG_FORCED, p, true); - } + for_each_process(p) + if (p->mm == mm && !same_thread_group(p, victim) && + !(p->flags & PF_KTHREAD)) { + if (p->signal->oom_score_adj == OOM_SCORE_ADJ_MIN) + continue; + + task_lock(p); /* Protect ->comm from prctl() */ + pr_err("Kill process %d (%s) sharing same memory\n", + task_pid_nr(p), p->comm); + task_unlock(p); + do_send_sig_info(SIGKILL, SEND_SIG_FORCED, p, true); + } rcu_read_unlock(); last_victim = jiffies; + do_send_sig_info(SIGKILL, SEND_SIG_FORCED, victim, true); put_task_struct(victim); } #undef K @@ -695,8 +623,54 @@ int unregister_oom_notifier(struct notifier_block *nb) } EXPORT_SYMBOL_GPL(unregister_oom_notifier); +/* + * Try to acquire the OOM killer lock for the zones in zonelist. Returns zero + * if a parallel OOM killing is already taking place that includes a zone in + * the zonelist. Otherwise, locks all zones in the zonelist and returns 1. + */ +bool oom_zonelist_trylock(struct zonelist *zonelist, gfp_t gfp_mask) +{ + struct zoneref *z; + struct zone *zone; + bool ret = true; + + spin_lock(&zone_scan_lock); + for_each_zone_zonelist(zone, z, zonelist, gfp_zone(gfp_mask)) + if (zone_is_oom_locked(zone)) { + ret = false; + goto out; + } + + /* + * Lock each zone in the zonelist under zone_scan_lock so a parallel + * call to oom_zonelist_trylock() doesn't succeed when it shouldn't. + */ + for_each_zone_zonelist(zone, z, zonelist, gfp_zone(gfp_mask)) + zone_set_flag(zone, ZONE_OOM_LOCKED); + +out: + spin_unlock(&zone_scan_lock); + return ret; +} + +/* + * Clears the ZONE_OOM_LOCKED flag for all zones in the zonelist so that failed + * allocation attempts with zonelists containing them may now recall the OOM + * killer, if necessary. + */ +void oom_zonelist_unlock(struct zonelist *zonelist, gfp_t gfp_mask) +{ + struct zoneref *z; + struct zone *zone; + + spin_lock(&zone_scan_lock); + for_each_zone_zonelist(zone, z, zonelist, gfp_zone(gfp_mask)) + zone_clear_flag(zone, ZONE_OOM_LOCKED); + spin_unlock(&zone_scan_lock); +} + /** - * __out_of_memory - kill the "best" process when we run out of memory + * out_of_memory - kill the "best" process when we run out of memory * @zonelist: zonelist pointer * @gfp_mask: memory allocation flags * @order: amount of memory being requested as a power of 2 @@ -708,8 +682,8 @@ EXPORT_SYMBOL_GPL(unregister_oom_notifier); * OR try to be smart about which process to kill. Note that we * don't have to be perfect here, we just have to be good. */ -bool out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask, - int order, nodemask_t *nodemask, bool force_kill) +void out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask, + int order, nodemask_t *nodemask, bool force_kill) { const nodemask_t *mpol_mask; struct task_struct *p; @@ -717,9 +691,7 @@ bool out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask, unsigned long freed = 0; unsigned int uninitialized_var(points); enum oom_constraint constraint = CONSTRAINT_NONE; - - if (oom_killer_disabled) - return false; + int killed = 0; #ifdef CONFIG_MT_ENG_BUILD //void add_kmem_status_oom_counter(void); @@ -729,7 +701,7 @@ bool out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask, blocking_notifier_call_chain(&oom_notify_list, 0, &freed); if (freed > 0) /* Got some memory back in the last second. */ - return true; + return; /* * If current has a pending SIGKILL or is exiting, then automatically @@ -741,9 +713,9 @@ bool out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask, */ if (current->mm && (fatal_signal_pending(current) || task_will_free_mem(current))) { - mark_oom_victim(current); + mark_tsk_oom_victim(current); last_victim = jiffies; - return true; + return; } /* @@ -762,7 +734,7 @@ bool out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask, oom_kill_process(current, gfp_mask, order, 0, totalpages, NULL, nodemask, "Out of memory (oom_kill_allocating_task)"); - return true; + goto out; } p = select_bad_process(&points, totalpages, mpol_mask, force_kill); @@ -774,13 +746,15 @@ bool out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask, if (PTR_ERR(p) != -1UL) { oom_kill_process(p, gfp_mask, order, points, totalpages, NULL, nodemask, "Out of memory"); - /* - * Give the killed process a good chance to exit before trying - * to allocate memory again. - */ - schedule_timeout_killable(1); + killed = 1; } - return true; +out: + /* + * Give the killed threads a good chance of exiting before trying to + * allocate memory again. + */ + if (killed) + schedule_timeout_killable(1); } /* @@ -790,21 +764,14 @@ bool out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask, */ void pagefault_out_of_memory(void) { - if (mem_cgroup_oom_synchronize(true)) - return; + struct zonelist *zonelist; - if (!mutex_trylock(&oom_lock)) + if (mem_cgroup_oom_synchronize(true)) return; - if (!out_of_memory(NULL, 0, 0, NULL, false)) { - /* - * There shouldn't be any user tasks runnable while the - * OOM killer is disabled, so the current task has to - * be a racing OOM victim for which oom_killer_disable() - * is waiting for. - */ - WARN_ON(test_thread_flag(TIF_MEMDIE)); + zonelist = node_zonelist(first_memory_node, GFP_KERNEL); + if (oom_zonelist_trylock(zonelist, GFP_KERNEL)) { + out_of_memory(NULL, 0, 0, NULL, false); + oom_zonelist_unlock(zonelist, GFP_KERNEL); } - - mutex_unlock(&oom_lock); } diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 4025d9979..82a296ec2 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -281,6 +281,8 @@ void __meminit set_pageblock_mobility(struct page *page, int mobility) } #endif +bool oom_killer_disabled __read_mostly; + #ifdef CONFIG_DEBUG_VM static int page_outside_zone_boundaries(struct zone *zone, struct page *page) { @@ -2529,17 +2531,28 @@ __alloc_pages_may_oom(gfp_t gfp_mask, unsigned int order, *did_some_progress = 0; + if (oom_killer_disabled) + return NULL; + /* - * Acquire the oom lock. If that fails, somebody else is - * making progress for us. + * Acquire the per-zone oom lock for each zone. If that + * fails, somebody else is making progress for us. */ - if (!mutex_trylock(&oom_lock)) { + if (!oom_zonelist_trylock(zonelist, gfp_mask)) { *did_some_progress = 1; schedule_timeout_uninterruptible(1); return NULL; } /* + * PM-freezer should be notified that there might be an OOM killer on + * its way to kill and wake somebody up. This is too early and we might + * end up not killing anything but false positives are acceptable. + * See freeze_processes. + */ + note_oom_kill(); + + /* * Go through the zonelist yet one more time, keep very high watermark * here, this is only to catch a parallel oom killing, we must fail if * we're still under heavy pressure. @@ -2575,10 +2588,10 @@ __alloc_pages_may_oom(gfp_t gfp_mask, unsigned int order, goto out; } /* Exhausted what can be done so it's blamo time */ - if (out_of_memory(zonelist, gfp_mask, order, nodemask, false)) - *did_some_progress = 1; + out_of_memory(zonelist, gfp_mask, order, nodemask, false); + *did_some_progress = 1; out: - mutex_unlock(&oom_lock); + oom_zonelist_unlock(zonelist, gfp_mask); return page; } |
