diff options
| author | Jaegeuk Kim <jaegeuk@kernel.org> | 2017-08-06 22:09:00 -0700 |
|---|---|---|
| committer | Mister Oyster <oysterized@gmail.com> | 2017-10-04 15:32:45 +0200 |
| commit | 0d9128e4ccfa31ef2ef1c797ab9710a74d7ebc4b (patch) | |
| tree | 61e8e595c49c002f78b5920a75839a82fc75e0a5 | |
| parent | df547bfceda4e17af41d2bbb54a4debef9d0860c (diff) | |
f2fs: introduce gc_urgent mode for background GC
This patch adds a sysfs entry to control urgent mode for background GC.
If this is set, background GC thread conducts GC with gc_urgent_sleep_time
all the time.
Reviewed-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
| -rw-r--r-- | Documentation/ABI/testing/sysfs-fs-f2fs | 12 | ||||
| -rw-r--r-- | Documentation/filesystems/f2fs.txt | 141 | ||||
| -rw-r--r-- | fs/f2fs/gc.c | 17 | ||||
| -rw-r--r-- | fs/f2fs/gc.h | 4 | ||||
| -rw-r--r-- | fs/f2fs/sysfs.c | 9 |
5 files changed, 173 insertions, 10 deletions
diff --git a/Documentation/ABI/testing/sysfs-fs-f2fs b/Documentation/ABI/testing/sysfs-fs-f2fs index 1c3dc27c9..cecb93af8 100644 --- a/Documentation/ABI/testing/sysfs-fs-f2fs +++ b/Documentation/ABI/testing/sysfs-fs-f2fs @@ -139,3 +139,15 @@ Date: June 2017 Contact: "Chao Yu" <yuchao0@huawei.com> Description: Controls current reserved blocks in system. + +What: /sys/fs/f2fs/<disk>/gc_urgent +Date: August 2017 +Contact: "Jaegeuk Kim" <jaegeuk@kernel.org> +Description: + Do background GC agressively + +What: /sys/fs/f2fs/<disk>/gc_urgent_sleep_time +Date: August 2017 +Contact: "Jaegeuk Kim" <jaegeuk@kernel.org> +Description: + Controls sleep time of GC urgent mode diff --git a/Documentation/filesystems/f2fs.txt b/Documentation/filesystems/f2fs.txt index eac67c969..5a7fd0677 100644 --- a/Documentation/filesystems/f2fs.txt +++ b/Documentation/filesystems/f2fs.txt @@ -18,8 +18,8 @@ according to its internal geometry or flash memory management scheme, namely FTL F2FS and its tools support various parameters not only for configuring on-disk layout, but also for selecting allocation and cleaning algorithms. -The file system formatting tool, "mkfs.f2fs", is available from the following -git tree: +The following git tree provides the file system formatting tool (mkfs.f2fs), +a consistency checking tool (fsck.f2fs), and a debugging tool (dump.f2fs). >> git://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs-tools.git For reporting bugs and sending patches, please use the following mailing list: @@ -102,8 +102,7 @@ background_gc=%s Turn on/off cleaning operations, namely garbage collection, triggered in background when I/O subsystem is idle. If background_gc=on, it will turn on the garbage collection and if background_gc=off, garbage collection - will be turned off. If background_gc=sync, it will turn - on synchronous garbage collection running in background. + will be truned off. Default value for this option is on. So garbage collection is on by default. disable_roll_forward Disable the roll-forward recovery routine @@ -177,6 +176,95 @@ f2fs. Each file shows the whole f2fs information. - current memory footprint consumed by f2fs. ================================================================================ +SYSFS ENTRIES +================================================================================ + +Information about mounted f2f2 file systems can be found in +/sys/fs/f2fs. Each mounted filesystem will have a directory in +/sys/fs/f2fs based on its device name (i.e., /sys/fs/f2fs/sda). +The files in each per-device directory are shown in table below. + +Files in /sys/fs/f2fs/<devname> +(see also Documentation/ABI/testing/sysfs-fs-f2fs) +.............................................................................. + File Content + + gc_max_sleep_time This tuning parameter controls the maximum sleep + time for the garbage collection thread. Time is + in milliseconds. + + gc_min_sleep_time This tuning parameter controls the minimum sleep + time for the garbage collection thread. Time is + in milliseconds. + + gc_no_gc_sleep_time This tuning parameter controls the default sleep + time for the garbage collection thread. Time is + in milliseconds. + + gc_idle This parameter controls the selection of victim + policy for garbage collection. Setting gc_idle = 0 + (default) will disable this option. Setting + gc_idle = 1 will select the Cost Benefit approach + & setting gc_idle = 2 will select the greedy aproach. + + gc_urgent This parameter controls triggering background GCs + urgently or not. Setting gc_urgent = 0 [default] + makes back to default behavior, while if it is set + to 1, background thread starts to do GC by given + gc_urgent_sleep_time interval. + + gc_urgent_sleep_time This parameter controls sleep time for gc_urgent. + 500 ms is set by default. See above gc_urgent. + + reclaim_segments This parameter controls the number of prefree + segments to be reclaimed. If the number of prefree + segments is larger than the number of segments + in the proportion to the percentage over total + volume size, f2fs tries to conduct checkpoint to + reclaim the prefree segments to free segments. + By default, 5% over total # of segments. + + max_small_discards This parameter controls the number of discard + commands that consist small blocks less than 2MB. + The candidates to be discarded are cached until + checkpoint is triggered, and issued during the + checkpoint. By default, it is disabled with 0. + + ipu_policy This parameter controls the policy of in-place + updates in f2fs. There are five policies: + 0x01: F2FS_IPU_FORCE, 0x02: F2FS_IPU_SSR, + 0x04: F2FS_IPU_UTIL, 0x08: F2FS_IPU_SSR_UTIL, + 0x10: F2FS_IPU_FSYNC. + + min_ipu_util This parameter controls the threshold to trigger + in-place-updates. The number indicates percentage + of the filesystem utilization, and used by + F2FS_IPU_UTIL and F2FS_IPU_SSR_UTIL policies. + + min_fsync_blocks This parameter controls the threshold to trigger + in-place-updates when F2FS_IPU_FSYNC mode is set. + The number indicates the number of dirty pages + when fsync needs to flush on its call path. If + the number is less than this value, it triggers + in-place-updates. + + max_victim_search This parameter controls the number of trials to + find a victim segment when conducting SSR and + cleaning operations. The default value is 4096 + which covers 8GB block address range. + + dir_level This parameter controls the directory level to + support large directory. If a directory has a + number of files, it can reduce the file lookup + latency by increasing this dir_level value. + Otherwise, it needs to decrease this value to + reduce the space overhead. The default value is 0. + + ram_thresh This parameter controls the memory footprint used + by free nids and cached nat entries. By default, + 10 is set, which indicates 10 MB / 1 GB RAM. + +================================================================================ USAGE ================================================================================ @@ -193,8 +281,12 @@ USAGE # mkfs.f2fs -l label /dev/block_device # mount -t f2fs /dev/block_device /mnt/f2fs -Format options --------------- +mkfs.f2fs +--------- +The mkfs.f2fs is for the use of formatting a partition as the f2fs filesystem, +which builds a basic on-disk layout. + +The options consist of: -l [label] : Give a volume label, up to 512 unicode name. -a [0 or 1] : Split start location of each area for heap-based allocation. 1 is set by default, which performs this. @@ -208,6 +300,37 @@ Format options -t [0 or 1] : Disable discard command or not. 1 is set by default, which conducts discard. +fsck.f2fs +--------- +The fsck.f2fs is a tool to check the consistency of an f2fs-formatted +partition, which examines whether the filesystem metadata and user-made data +are cross-referenced correctly or not. +Note that, initial version of the tool does not fix any inconsistency. + +The options consist of: + -d debug level [default:0] + +dump.f2fs +--------- +The dump.f2fs shows the information of specific inode and dumps SSA and SIT to +file. Each file is dump_ssa and dump_sit. + +The dump.f2fs is used to debug on-disk data structures of the f2fs filesystem. +It shows on-disk inode information reconized by a given inode number, and is +able to dump all the SSA and SIT entries into predefined files, ./dump_ssa and +./dump_sit respectively. + +The options consist of: + -d debug level [default:0] + -i inode no (hex) + -s [SIT dump segno from #1~#2 (decimal), for all 0~-1] + -a [SSA dump segno from #1~#2 (decimal), for all 0~-1] + +Examples: +# dump.f2fs -i [ino] /dev/sdx +# dump.f2fs -s 0~-1 /dev/sdx (SIT dump) +# dump.f2fs -a 0~-1 /dev/sdx (SSA dump) + ================================================================================ DESIGN ================================================================================ @@ -390,9 +513,11 @@ The number of blocks and buckets are determined by, # of blocks in level #n = | `- 4, Otherwise - ,- 2^n, if n < MAX_DIR_HASH_DEPTH / 2, + ,- 2^(n + dir_level), + | if n + dir_level < MAX_DIR_HASH_DEPTH / 2, # of buckets in level #n = | - `- 2^((MAX_DIR_HASH_DEPTH / 2) - 1), Otherwise + `- 2^((MAX_DIR_HASH_DEPTH / 2) - 1), + Otherwise When F2FS finds a file name in a directory, at first a hash value of the file name is calculated. Then, F2FS scans the hash table in level #0 to find the diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index ceb5ba21d..e71c508fc 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -35,9 +35,14 @@ static int gc_thread_func(void *data) set_freezable(); do { wait_event_interruptible_timeout(*wq, - kthread_should_stop() || freezing(current), + kthread_should_stop() || freezing(current) || + gc_th->gc_wake, msecs_to_jiffies(wait_ms)); + /* give it a try one time */ + if (gc_th->gc_wake) + gc_th->gc_wake = 0; + if (try_to_freeze()) continue; if (kthread_should_stop()) @@ -74,6 +79,11 @@ static int gc_thread_func(void *data) if (!mutex_trylock(&sbi->gc_mutex)) goto next; + if (gc_th->gc_urgent) { + wait_ms = gc_th->urgent_sleep_time; + goto do_gc; + } + if (!is_idle(sbi)) { increase_sleep_time(gc_th, &wait_ms); mutex_unlock(&sbi->gc_mutex); @@ -84,7 +94,7 @@ static int gc_thread_func(void *data) decrease_sleep_time(gc_th, &wait_ms); else increase_sleep_time(gc_th, &wait_ms); - +do_gc: stat_inc_bggc_count(sbi); /* if return value is not zero, no victim was selected */ @@ -115,11 +125,14 @@ int start_gc_thread(struct f2fs_sb_info *sbi) goto out; } + gc_th->urgent_sleep_time = DEF_GC_THREAD_URGENT_SLEEP_TIME; gc_th->min_sleep_time = DEF_GC_THREAD_MIN_SLEEP_TIME; gc_th->max_sleep_time = DEF_GC_THREAD_MAX_SLEEP_TIME; gc_th->no_gc_sleep_time = DEF_GC_THREAD_NOGC_SLEEP_TIME; gc_th->gc_idle = 0; + gc_th->gc_urgent = 0; + gc_th->gc_wake= 0; sbi->gc_thread = gc_th; init_waitqueue_head(&sbi->gc_thread->gc_wait_queue_head); diff --git a/fs/f2fs/gc.h b/fs/f2fs/gc.h index a993967dc..57a9000ce 100644 --- a/fs/f2fs/gc.h +++ b/fs/f2fs/gc.h @@ -13,6 +13,7 @@ * whether IO subsystem is idle * or not */ +#define DEF_GC_THREAD_URGENT_SLEEP_TIME 500 /* 500 ms */ #define DEF_GC_THREAD_MIN_SLEEP_TIME 30000 /* milliseconds */ #define DEF_GC_THREAD_MAX_SLEEP_TIME 60000 #define DEF_GC_THREAD_NOGC_SLEEP_TIME 300000 /* wait 5 min */ @@ -27,12 +28,15 @@ struct f2fs_gc_kthread { wait_queue_head_t gc_wait_queue_head; /* for gc sleep time */ + unsigned int urgent_sleep_time; unsigned int min_sleep_time; unsigned int max_sleep_time; unsigned int no_gc_sleep_time; /* for changing gc mode */ unsigned int gc_idle; + unsigned int gc_urgent; + unsigned int gc_wake; }; struct gc_inode_list { diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c index ce1314669..fbbe6e036 100644 --- a/fs/f2fs/sysfs.c +++ b/fs/f2fs/sysfs.c @@ -177,6 +177,10 @@ static ssize_t f2fs_sbi_store(struct f2fs_attr *a, if (!strcmp(a->attr.name, "iostat_enable") && *ui == 0) f2fs_reset_iostat(sbi); + if (!strcmp(a->attr.name, "gc_urgent") && t == 1 && sbi->gc_thread) { + sbi->gc_thread->gc_wake = 1; + wake_up_interruptible_all(&sbi->gc_thread->gc_wait_queue_head); + } return count; } @@ -256,10 +260,13 @@ static struct f2fs_attr f2fs_attr_##_name = { \ .id = _id, \ } +F2FS_RW_ATTR(GC_THREAD, f2fs_gc_kthread, gc_urgent_sleep_time, + urgent_sleep_time); F2FS_RW_ATTR(GC_THREAD, f2fs_gc_kthread, gc_min_sleep_time, min_sleep_time); F2FS_RW_ATTR(GC_THREAD, f2fs_gc_kthread, gc_max_sleep_time, max_sleep_time); F2FS_RW_ATTR(GC_THREAD, f2fs_gc_kthread, gc_no_gc_sleep_time, no_gc_sleep_time); F2FS_RW_ATTR(GC_THREAD, f2fs_gc_kthread, gc_idle, gc_idle); +F2FS_RW_ATTR(GC_THREAD, f2fs_gc_kthread, gc_urgent, gc_urgent); F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, reclaim_segments, rec_prefree_segments); F2FS_RW_ATTR(DCC_INFO, discard_cmd_control, max_small_discards, max_discards); F2FS_RW_ATTR(DCC_INFO, discard_cmd_control, discard_granularity, discard_granularity); @@ -297,10 +304,12 @@ F2FS_FEATURE_RO_ATTR(inode_checksum, FEAT_INODE_CHECKSUM); #define ATTR_LIST(name) (&f2fs_attr_##name.attr) static struct attribute *f2fs_attrs[] = { + ATTR_LIST(gc_urgent_sleep_time), ATTR_LIST(gc_min_sleep_time), ATTR_LIST(gc_max_sleep_time), ATTR_LIST(gc_no_gc_sleep_time), ATTR_LIST(gc_idle), + ATTR_LIST(gc_urgent), ATTR_LIST(reclaim_segments), ATTR_LIST(max_small_discards), ATTR_LIST(discard_granularity), |
