From 88e64759a0643e835a6bf63cf2489d62d33f5a29 Mon Sep 17 00:00:00 2001 From: Martijn Coenen Date: Wed, 17 Feb 2016 10:48:40 +0100 Subject: mm: vmpressure: dynamic window sizing. The window size used for calculating vm pressure events was previously fixed at 512 pages. The window size has a big impact on the rate of notifications sent off to userspace, in particular when using the "low" level. On machines with a lot of memory, the current value is likely excessive. On the other hand, if the window size is too big, we might delay memory pressure events for too long, especially at critical levels of memory pressure. This patch attempts to address that problem with two changes. The first change is to calculate the window size based on the machine size, quite similar to how the vm watermarks are being calculated. This reduces the chance of false positives at any pressure level. Using the machine size only makes sense on the root cgroup though; for non-root cgroups, their hard memory limit is used to calculate the window size. If no hard memory limit is set, we fall back to the default window size that was previously used. The second change is based on an idea from Johannes Weiner, to only report medium and low pressure levels for every X windows that we scan. This reduces the frequency with which we report low/medium pressure levels, but at the same time will still report critical memory pressure immediately. Change-Id: Ieffca055b2fb6aa27ae0179e0a588e6fcb173a61 Signed-off-by: Martijn Coenen --- include/linux/vmpressure.h | 35 +++++++++++++++++++++++++++++++++-- 1 file changed, 33 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/vmpressure.h b/include/linux/vmpressure.h index 76be07734..b7a1c907a 100644 --- a/include/linux/vmpressure.h +++ b/include/linux/vmpressure.h @@ -8,10 +8,39 @@ #include #include +enum vmpressure_levels { + VMPRESSURE_LOW = 0, + VMPRESSURE_MEDIUM, + VMPRESSURE_CRITICAL, + VMPRESSURE_NUM_LEVELS, +}; + struct vmpressure { + /* + * The window size is the number of scanned pages before + * we try to analyze scanned/reclaimed ratio. Using small window + * sizes can cause lot of false positives, but too big window size will + * delay the notifications. + * + * In order to reduce the amount of false positives for low and medium + * levels, those levels aren't reported until we've seen multiple + * windows at those respective pressure levels. This makes sure + * sure that we don't delay notifications when encountering critical + * levels of memory pressure, but also don't spam userspace in case + * nothing serious is going on. The number of windows seen at each + * pressure level is kept in nr_windows below. + * + * For the root mem cgroup, the window size is computed based on the + * total amount of pages available in the system. For non-root cgroups, + * we compute the window size based on the hard memory limit, or if + * that is not set, we fall back to the default window size. + */ + unsigned long window_size; + /* The number of windows we've seen each pressure level occur for */ + unsigned int nr_windows[VMPRESSURE_NUM_LEVELS]; unsigned long scanned; unsigned long reclaimed; - /* The lock is used to keep the scanned/reclaimed above in sync. */ + /* The lock is used to keep the members above in sync. */ struct mutex sr_lock; /* The list of vmpressure_event structs. */ @@ -29,10 +58,12 @@ extern void vmpressure(gfp_t gfp, struct mem_cgroup *memcg, unsigned long scanned, unsigned long reclaimed); extern void vmpressure_prio(gfp_t gfp, struct mem_cgroup *memcg, int prio); -extern void vmpressure_init(struct vmpressure *vmpr); +extern void vmpressure_init(struct vmpressure *vmpr, bool is_root); extern struct vmpressure *memcg_to_vmpressure(struct mem_cgroup *memcg); extern struct cgroup_subsys_state *vmpressure_to_css(struct vmpressure *vmpr); extern struct vmpressure *css_to_vmpressure(struct cgroup_subsys_state *css); +extern void vmpressure_update_mem_limit(struct mem_cgroup *memcg, + unsigned long new_limit); extern int vmpressure_register_event(struct cgroup *cg, struct cftype *cft, struct eventfd_ctx *eventfd, const char *args); -- cgit v1.2.3