diff options
| author | DerTeufel <dominik-kassel@gmx.de> | 2015-10-19 18:11:08 +0200 |
|---|---|---|
| committer | Mister Oyster <oysterized@gmail.com> | 2016-12-11 12:54:02 +0100 |
| commit | e72fb82ad04600042d5641b186f08952678a21ae (patch) | |
| tree | 719b2500b07405edb80aff02bbb89b8317a52b48 | |
| parent | d81785e180c802798c523411571c2a3c647c23a5 (diff) | |
zsmalloc: upstream sync
| -rw-r--r-- | drivers/staging/zsmalloc/Kconfig | 14 | ||||
| -rw-r--r-- | drivers/staging/zsmalloc/zsmalloc-main.c | 97 | ||||
| -rw-r--r-- | drivers/staging/zsmalloc/zsmalloc.h | 11 |
3 files changed, 88 insertions, 34 deletions
diff --git a/drivers/staging/zsmalloc/Kconfig b/drivers/staging/zsmalloc/Kconfig index 7fab03229..9d1f2a24a 100644 --- a/drivers/staging/zsmalloc/Kconfig +++ b/drivers/staging/zsmalloc/Kconfig @@ -1,5 +1,6 @@ config ZSMALLOC bool "Memory allocator for compressed pages" + depends on MMU default n help zsmalloc is a slab-based memory allocator designed to store @@ -8,3 +9,16 @@ config ZSMALLOC non-standard allocator interface where a handle, not a pointer, is returned by an alloc(). This handle must be mapped in order to access the allocated space. + +config PGTABLE_MAPPING + bool "Use page table mapping to access object in zsmalloc" + depends on ZSMALLOC + help + By default, zsmalloc uses a copy-based object mapping method to + access allocations that span two pages. However, if a particular + architecture (ex, ARM) performs VM mapping faster than copying, + then you should select this. This causes zsmalloc to use page table + mapping rather than copying for object mapping. + + You can check speed with zsmalloc benchmark[1]. + [1] https://github.com/spartacus06/zsmalloc diff --git a/drivers/staging/zsmalloc/zsmalloc-main.c b/drivers/staging/zsmalloc/zsmalloc-main.c index 0d9f4c4a7..7660c87d8 100644 --- a/drivers/staging/zsmalloc/zsmalloc-main.c +++ b/drivers/staging/zsmalloc/zsmalloc-main.c @@ -10,16 +10,14 @@ * Released under the terms of GNU General Public License Version 2.0 */ - /* - * This allocator is designed for use with zcache and zram. Thus, the - * allocator is supposed to work well under low memory conditions. In - * particular, it never attempts higher order page allocation which is - * very likely to fail under memory pressure. On the other hand, if we - * just use single (0-order) pages, it would suffer from very high - * fragmentation -- any object of size PAGE_SIZE/2 or larger would occupy - * an entire page. This was one of the major issues with its predecessor - * (xvmalloc). + * This allocator is designed for use with zram. Thus, the allocator is + * supposed to work well under low memory conditions. In particular, it + * never attempts higher order page allocation which is very likely to + * fail under memory pressure. On the other hand, if we just use single + * (0-order) pages, it would suffer from very high fragmentation -- + * any object of size PAGE_SIZE/2 or larger would occupy an entire page. + * This was one of the major issues with its predecessor (xvmalloc). * * To overcome these issues, zsmalloc allocates a bunch of 0-order pages * and links them together using various 'struct page' fields. These linked @@ -27,6 +25,21 @@ * page boundaries. The code refers to these linked pages as a single entity * called zspage. * + * For simplicity, zsmalloc can only allocate objects of size up to PAGE_SIZE + * since this satisfies the requirements of all its current users (in the + * worst case, page is incompressible and is thus stored "as-is" i.e. in + * uncompressed form). For allocation requests larger than this size, failure + * is returned (see zs_malloc). + * + * Additionally, zs_malloc() does not return a dereferenceable pointer. + * Instead, it returns an opaque handle (unsigned long) which encodes actual + * location of the allocated object. The reason for this indirection is that + * zsmalloc does not keep zspages permanently mapped since that would cause + * issues on 32-bit systems where the VA region for kernel space mappings + * is very small. So, before using the allocating memory, the object has to + * be mapped using zs_map_object() to get a usable pointer and subsequently + * unmapped using zs_unmap_object(). + * * Following is how we use various fields and flags of underlying * struct page(s) to form a zspage. * @@ -67,7 +80,6 @@ #include <linux/bitops.h> #include <linux/errno.h> #include <linux/highmem.h> -#include <linux/init.h> #include <linux/string.h> #include <linux/slab.h> #include <asm/tlbflush.h> @@ -98,7 +110,7 @@ /* * Object location (<PFN>, <obj_idx>) is encoded as - * as single (void *) handle value. + * as single (unsigned long) handle value. * * Note that object index <obj_idx> is relative to system * page <PFN> it is stored in, so for each sub-page belonging @@ -119,7 +131,7 @@ #endif #endif #define _PFN_BITS (MAX_PHYSMEM_BITS - PAGE_SHIFT) -#define OBJ_INDEX_BITS (BITS_PER_LONG - _PFN_BITS - 1) +#define OBJ_INDEX_BITS (BITS_PER_LONG - _PFN_BITS) #define OBJ_INDEX_MASK ((_AC(1, UL) << OBJ_INDEX_BITS) - 1) #define MAX(a, b) ((a) >= (b) ? (a) : (b)) @@ -218,19 +230,8 @@ struct zs_pool { #define CLASS_IDX_MASK ((1 << CLASS_IDX_BITS) - 1) #define FULLNESS_MASK ((1 << FULLNESS_BITS) - 1) -/* - * By default, zsmalloc uses a copy-based object mapping method to access - * allocations that span two pages. However, if a particular architecture - * performs VM mapping faster than copying, then it should be added here - * so that USE_PGTABLE_MAPPING is defined. This causes zsmalloc to use - * page table mapping rather than copying for object mapping. -*/ -#if defined(CONFIG_ARM) && !defined(MODULE) -#define USE_PGTABLE_MAPPING -#endif - struct mapping_area { -#ifdef USE_PGTABLE_MAPPING +#ifdef CONFIG_PGTABLE_MAPPING struct vm_struct *vm; /* vm area for mapping object that span pages */ #else char *vm_buf; /* copy buffer for objects that span pages */ @@ -275,6 +276,13 @@ static void set_zspage_mapping(struct page *page, unsigned int class_idx, page->mapping = (struct address_space *)m; } +/* + * zsmalloc divides the pool into various size classes where each + * class maintains a list of zspages where each zspage is divided + * into equal sized chunks. Each allocation falls into one of these + * classes depending on its size. This function returns index of the + * size class which has chunk size big enough to hold the give size. + */ static int get_size_class_index(int size) { int idx = 0; @@ -286,6 +294,13 @@ static int get_size_class_index(int size) return idx; } +/* + * For each size class, zspages are divided into different groups + * depending on how "full" they are. This was done so that we could + * easily find empty or nearly empty zspages when we try to shrink + * the pool (not yet implemented). This function returns fullness + * status of the given page. + */ static enum fullness_group get_fullness_group(struct page *page) { int inuse, max_objects; @@ -307,6 +322,12 @@ static enum fullness_group get_fullness_group(struct page *page) return fg; } +/* + * Each size class maintains various freelists and zspages are assigned + * to one of these freelists based on the number of live objects they + * have. This functions inserts the given zspage into the freelist + * identified by <class, fullness_group>. + */ static void insert_zspage(struct page *page, struct size_class *class, enum fullness_group fullness) { @@ -324,6 +345,10 @@ static void insert_zspage(struct page *page, struct size_class *class, *head = page; } +/* + * This function removes the given zspage from the freelist identified + * by <class, fullness_group>. + */ static void remove_zspage(struct page *page, struct size_class *class, enum fullness_group fullness) { @@ -345,6 +370,15 @@ static void remove_zspage(struct page *page, struct size_class *class, list_del_init(&page->lru); } +/* + * Each size class maintains zspages in different fullness groups depending + * on the number of live objects they contain. When allocating or freeing + * objects, the fullness status of the page can change, say, from ALMOST_FULL + * to ALMOST_EMPTY when freeing an object. This function checks if such + * a status change has occurred for the given page and accordingly moves the + * page from the freelist of the old fullness group to that of the new + * fullness group. + */ static enum fullness_group fix_fullness_group(struct zs_pool *pool, struct page *page) { @@ -423,7 +457,7 @@ static struct page *get_next_page(struct page *page) if (is_last_page(page)) next = NULL; else if (is_first_page(page)) - next = (struct page *)page->private; + next = (struct page *)page_private(page); else next = list_entry(page->lru.next, struct page, lru); @@ -590,7 +624,7 @@ static struct page *alloc_zspage(struct size_class *class, gfp_t flags) first_page->inuse = 0; } if (i == 1) - first_page->private = (unsigned long)page; + set_page_private(first_page, (unsigned long)page); if (i >= 1) page->first_page = first_page; if (i >= 2) @@ -631,7 +665,7 @@ static struct page *find_get_zspage(struct size_class *class) return page; } -#ifdef USE_PGTABLE_MAPPING +#ifdef CONFIG_PGTABLE_MAPPING static inline int __zs_cpu_up(struct mapping_area *area) { /* @@ -669,7 +703,7 @@ static inline void __zs_unmap_object(struct mapping_area *area, unmap_kernel_range(addr, PAGE_SIZE * 2); } -#else /* USE_PGTABLE_MAPPING */ +#else /* CONFIG_PGTABLE_MAPPING */ static inline int __zs_cpu_up(struct mapping_area *area) { @@ -747,7 +781,7 @@ out: pagefault_enable(); } -#endif /* USE_PGTABLE_MAPPING */ +#endif /* CONFIG_PGTABLE_MAPPING */ static int zs_cpu_notifier(struct notifier_block *nb, unsigned long action, void *pcpu) @@ -853,8 +887,7 @@ void zs_destroy_pool(struct zs_pool *pool) for (fg = 0; fg < _ZS_NR_FULLNESS_GROUPS; fg++) { if (class->fullness_list[fg]) { - pr_info("Freeing non-empty class with size " - "%db, fullness group %d\n", + pr_info("Freeing non-empty class with size %db, fullness group %d\n", class->size, fg); } } @@ -977,7 +1010,7 @@ EXPORT_SYMBOL_GPL(zs_free); * against nested mappings. * * This function returns with preemption and page faults disabled. -*/ + */ void *zs_map_object(struct zs_pool *pool, unsigned long handle, enum zs_mapmode mm) { diff --git a/drivers/staging/zsmalloc/zsmalloc.h b/drivers/staging/zsmalloc/zsmalloc.h index 46dbd0558..c2eb174b9 100644 --- a/drivers/staging/zsmalloc/zsmalloc.h +++ b/drivers/staging/zsmalloc/zsmalloc.h @@ -18,12 +18,19 @@ /* * zsmalloc mapping modes * - * NOTE: These only make a difference when a mapped object spans pages -*/ + * NOTE: These only make a difference when a mapped object spans pages. + * They also have no effect when PGTABLE_MAPPING is selected. + */ enum zs_mapmode { ZS_MM_RW, /* normal read-write mapping */ ZS_MM_RO, /* read-only (no copy-out at unmap time) */ ZS_MM_WO /* write-only (no copy-in at map time) */ + /* + * NOTE: ZS_MM_WO should only be used for initializing new + * (uninitialized) allocations. Partial writes to already + * initialized allocations should use ZS_MM_RW to preserve the + * existing data. + */ }; struct zs_pool; |
