aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJaegeuk Kim <jaegeuk@kernel.org>2017-08-06 22:09:00 -0700
committerMister Oyster <oysterized@gmail.com>2017-10-04 15:32:45 +0200
commit0d9128e4ccfa31ef2ef1c797ab9710a74d7ebc4b (patch)
tree61e8e595c49c002f78b5920a75839a82fc75e0a5
parentdf547bfceda4e17af41d2bbb54a4debef9d0860c (diff)
f2fs: introduce gc_urgent mode for background GC
This patch adds a sysfs entry to control urgent mode for background GC. If this is set, background GC thread conducts GC with gc_urgent_sleep_time all the time. Reviewed-by: Chao Yu <yuchao0@huawei.com> Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
-rw-r--r--Documentation/ABI/testing/sysfs-fs-f2fs12
-rw-r--r--Documentation/filesystems/f2fs.txt141
-rw-r--r--fs/f2fs/gc.c17
-rw-r--r--fs/f2fs/gc.h4
-rw-r--r--fs/f2fs/sysfs.c9
5 files changed, 173 insertions, 10 deletions
diff --git a/Documentation/ABI/testing/sysfs-fs-f2fs b/Documentation/ABI/testing/sysfs-fs-f2fs
index 1c3dc27c9..cecb93af8 100644
--- a/Documentation/ABI/testing/sysfs-fs-f2fs
+++ b/Documentation/ABI/testing/sysfs-fs-f2fs
@@ -139,3 +139,15 @@ Date: June 2017
Contact: "Chao Yu" <yuchao0@huawei.com>
Description:
Controls current reserved blocks in system.
+
+What: /sys/fs/f2fs/<disk>/gc_urgent
+Date: August 2017
+Contact: "Jaegeuk Kim" <jaegeuk@kernel.org>
+Description:
+ Do background GC agressively
+
+What: /sys/fs/f2fs/<disk>/gc_urgent_sleep_time
+Date: August 2017
+Contact: "Jaegeuk Kim" <jaegeuk@kernel.org>
+Description:
+ Controls sleep time of GC urgent mode
diff --git a/Documentation/filesystems/f2fs.txt b/Documentation/filesystems/f2fs.txt
index eac67c969..5a7fd0677 100644
--- a/Documentation/filesystems/f2fs.txt
+++ b/Documentation/filesystems/f2fs.txt
@@ -18,8 +18,8 @@ according to its internal geometry or flash memory management scheme, namely FTL
F2FS and its tools support various parameters not only for configuring on-disk
layout, but also for selecting allocation and cleaning algorithms.
-The file system formatting tool, "mkfs.f2fs", is available from the following
-git tree:
+The following git tree provides the file system formatting tool (mkfs.f2fs),
+a consistency checking tool (fsck.f2fs), and a debugging tool (dump.f2fs).
>> git://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs-tools.git
For reporting bugs and sending patches, please use the following mailing list:
@@ -102,8 +102,7 @@ background_gc=%s Turn on/off cleaning operations, namely garbage
collection, triggered in background when I/O subsystem is
idle. If background_gc=on, it will turn on the garbage
collection and if background_gc=off, garbage collection
- will be turned off. If background_gc=sync, it will turn
- on synchronous garbage collection running in background.
+ will be truned off.
Default value for this option is on. So garbage
collection is on by default.
disable_roll_forward Disable the roll-forward recovery routine
@@ -177,6 +176,95 @@ f2fs. Each file shows the whole f2fs information.
- current memory footprint consumed by f2fs.
================================================================================
+SYSFS ENTRIES
+================================================================================
+
+Information about mounted f2f2 file systems can be found in
+/sys/fs/f2fs. Each mounted filesystem will have a directory in
+/sys/fs/f2fs based on its device name (i.e., /sys/fs/f2fs/sda).
+The files in each per-device directory are shown in table below.
+
+Files in /sys/fs/f2fs/<devname>
+(see also Documentation/ABI/testing/sysfs-fs-f2fs)
+..............................................................................
+ File Content
+
+ gc_max_sleep_time This tuning parameter controls the maximum sleep
+ time for the garbage collection thread. Time is
+ in milliseconds.
+
+ gc_min_sleep_time This tuning parameter controls the minimum sleep
+ time for the garbage collection thread. Time is
+ in milliseconds.
+
+ gc_no_gc_sleep_time This tuning parameter controls the default sleep
+ time for the garbage collection thread. Time is
+ in milliseconds.
+
+ gc_idle This parameter controls the selection of victim
+ policy for garbage collection. Setting gc_idle = 0
+ (default) will disable this option. Setting
+ gc_idle = 1 will select the Cost Benefit approach
+ & setting gc_idle = 2 will select the greedy aproach.
+
+ gc_urgent This parameter controls triggering background GCs
+ urgently or not. Setting gc_urgent = 0 [default]
+ makes back to default behavior, while if it is set
+ to 1, background thread starts to do GC by given
+ gc_urgent_sleep_time interval.
+
+ gc_urgent_sleep_time This parameter controls sleep time for gc_urgent.
+ 500 ms is set by default. See above gc_urgent.
+
+ reclaim_segments This parameter controls the number of prefree
+ segments to be reclaimed. If the number of prefree
+ segments is larger than the number of segments
+ in the proportion to the percentage over total
+ volume size, f2fs tries to conduct checkpoint to
+ reclaim the prefree segments to free segments.
+ By default, 5% over total # of segments.
+
+ max_small_discards This parameter controls the number of discard
+ commands that consist small blocks less than 2MB.
+ The candidates to be discarded are cached until
+ checkpoint is triggered, and issued during the
+ checkpoint. By default, it is disabled with 0.
+
+ ipu_policy This parameter controls the policy of in-place
+ updates in f2fs. There are five policies:
+ 0x01: F2FS_IPU_FORCE, 0x02: F2FS_IPU_SSR,
+ 0x04: F2FS_IPU_UTIL, 0x08: F2FS_IPU_SSR_UTIL,
+ 0x10: F2FS_IPU_FSYNC.
+
+ min_ipu_util This parameter controls the threshold to trigger
+ in-place-updates. The number indicates percentage
+ of the filesystem utilization, and used by
+ F2FS_IPU_UTIL and F2FS_IPU_SSR_UTIL policies.
+
+ min_fsync_blocks This parameter controls the threshold to trigger
+ in-place-updates when F2FS_IPU_FSYNC mode is set.
+ The number indicates the number of dirty pages
+ when fsync needs to flush on its call path. If
+ the number is less than this value, it triggers
+ in-place-updates.
+
+ max_victim_search This parameter controls the number of trials to
+ find a victim segment when conducting SSR and
+ cleaning operations. The default value is 4096
+ which covers 8GB block address range.
+
+ dir_level This parameter controls the directory level to
+ support large directory. If a directory has a
+ number of files, it can reduce the file lookup
+ latency by increasing this dir_level value.
+ Otherwise, it needs to decrease this value to
+ reduce the space overhead. The default value is 0.
+
+ ram_thresh This parameter controls the memory footprint used
+ by free nids and cached nat entries. By default,
+ 10 is set, which indicates 10 MB / 1 GB RAM.
+
+================================================================================
USAGE
================================================================================
@@ -193,8 +281,12 @@ USAGE
# mkfs.f2fs -l label /dev/block_device
# mount -t f2fs /dev/block_device /mnt/f2fs
-Format options
---------------
+mkfs.f2fs
+---------
+The mkfs.f2fs is for the use of formatting a partition as the f2fs filesystem,
+which builds a basic on-disk layout.
+
+The options consist of:
-l [label] : Give a volume label, up to 512 unicode name.
-a [0 or 1] : Split start location of each area for heap-based allocation.
1 is set by default, which performs this.
@@ -208,6 +300,37 @@ Format options
-t [0 or 1] : Disable discard command or not.
1 is set by default, which conducts discard.
+fsck.f2fs
+---------
+The fsck.f2fs is a tool to check the consistency of an f2fs-formatted
+partition, which examines whether the filesystem metadata and user-made data
+are cross-referenced correctly or not.
+Note that, initial version of the tool does not fix any inconsistency.
+
+The options consist of:
+ -d debug level [default:0]
+
+dump.f2fs
+---------
+The dump.f2fs shows the information of specific inode and dumps SSA and SIT to
+file. Each file is dump_ssa and dump_sit.
+
+The dump.f2fs is used to debug on-disk data structures of the f2fs filesystem.
+It shows on-disk inode information reconized by a given inode number, and is
+able to dump all the SSA and SIT entries into predefined files, ./dump_ssa and
+./dump_sit respectively.
+
+The options consist of:
+ -d debug level [default:0]
+ -i inode no (hex)
+ -s [SIT dump segno from #1~#2 (decimal), for all 0~-1]
+ -a [SSA dump segno from #1~#2 (decimal), for all 0~-1]
+
+Examples:
+# dump.f2fs -i [ino] /dev/sdx
+# dump.f2fs -s 0~-1 /dev/sdx (SIT dump)
+# dump.f2fs -a 0~-1 /dev/sdx (SSA dump)
+
================================================================================
DESIGN
================================================================================
@@ -390,9 +513,11 @@ The number of blocks and buckets are determined by,
# of blocks in level #n = |
`- 4, Otherwise
- ,- 2^n, if n < MAX_DIR_HASH_DEPTH / 2,
+ ,- 2^(n + dir_level),
+ | if n + dir_level < MAX_DIR_HASH_DEPTH / 2,
# of buckets in level #n = |
- `- 2^((MAX_DIR_HASH_DEPTH / 2) - 1), Otherwise
+ `- 2^((MAX_DIR_HASH_DEPTH / 2) - 1),
+ Otherwise
When F2FS finds a file name in a directory, at first a hash value of the file
name is calculated. Then, F2FS scans the hash table in level #0 to find the
diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
index ceb5ba21d..e71c508fc 100644
--- a/fs/f2fs/gc.c
+++ b/fs/f2fs/gc.c
@@ -35,9 +35,14 @@ static int gc_thread_func(void *data)
set_freezable();
do {
wait_event_interruptible_timeout(*wq,
- kthread_should_stop() || freezing(current),
+ kthread_should_stop() || freezing(current) ||
+ gc_th->gc_wake,
msecs_to_jiffies(wait_ms));
+ /* give it a try one time */
+ if (gc_th->gc_wake)
+ gc_th->gc_wake = 0;
+
if (try_to_freeze())
continue;
if (kthread_should_stop())
@@ -74,6 +79,11 @@ static int gc_thread_func(void *data)
if (!mutex_trylock(&sbi->gc_mutex))
goto next;
+ if (gc_th->gc_urgent) {
+ wait_ms = gc_th->urgent_sleep_time;
+ goto do_gc;
+ }
+
if (!is_idle(sbi)) {
increase_sleep_time(gc_th, &wait_ms);
mutex_unlock(&sbi->gc_mutex);
@@ -84,7 +94,7 @@ static int gc_thread_func(void *data)
decrease_sleep_time(gc_th, &wait_ms);
else
increase_sleep_time(gc_th, &wait_ms);
-
+do_gc:
stat_inc_bggc_count(sbi);
/* if return value is not zero, no victim was selected */
@@ -115,11 +125,14 @@ int start_gc_thread(struct f2fs_sb_info *sbi)
goto out;
}
+ gc_th->urgent_sleep_time = DEF_GC_THREAD_URGENT_SLEEP_TIME;
gc_th->min_sleep_time = DEF_GC_THREAD_MIN_SLEEP_TIME;
gc_th->max_sleep_time = DEF_GC_THREAD_MAX_SLEEP_TIME;
gc_th->no_gc_sleep_time = DEF_GC_THREAD_NOGC_SLEEP_TIME;
gc_th->gc_idle = 0;
+ gc_th->gc_urgent = 0;
+ gc_th->gc_wake= 0;
sbi->gc_thread = gc_th;
init_waitqueue_head(&sbi->gc_thread->gc_wait_queue_head);
diff --git a/fs/f2fs/gc.h b/fs/f2fs/gc.h
index a993967dc..57a9000ce 100644
--- a/fs/f2fs/gc.h
+++ b/fs/f2fs/gc.h
@@ -13,6 +13,7 @@
* whether IO subsystem is idle
* or not
*/
+#define DEF_GC_THREAD_URGENT_SLEEP_TIME 500 /* 500 ms */
#define DEF_GC_THREAD_MIN_SLEEP_TIME 30000 /* milliseconds */
#define DEF_GC_THREAD_MAX_SLEEP_TIME 60000
#define DEF_GC_THREAD_NOGC_SLEEP_TIME 300000 /* wait 5 min */
@@ -27,12 +28,15 @@ struct f2fs_gc_kthread {
wait_queue_head_t gc_wait_queue_head;
/* for gc sleep time */
+ unsigned int urgent_sleep_time;
unsigned int min_sleep_time;
unsigned int max_sleep_time;
unsigned int no_gc_sleep_time;
/* for changing gc mode */
unsigned int gc_idle;
+ unsigned int gc_urgent;
+ unsigned int gc_wake;
};
struct gc_inode_list {
diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c
index ce1314669..fbbe6e036 100644
--- a/fs/f2fs/sysfs.c
+++ b/fs/f2fs/sysfs.c
@@ -177,6 +177,10 @@ static ssize_t f2fs_sbi_store(struct f2fs_attr *a,
if (!strcmp(a->attr.name, "iostat_enable") && *ui == 0)
f2fs_reset_iostat(sbi);
+ if (!strcmp(a->attr.name, "gc_urgent") && t == 1 && sbi->gc_thread) {
+ sbi->gc_thread->gc_wake = 1;
+ wake_up_interruptible_all(&sbi->gc_thread->gc_wait_queue_head);
+ }
return count;
}
@@ -256,10 +260,13 @@ static struct f2fs_attr f2fs_attr_##_name = { \
.id = _id, \
}
+F2FS_RW_ATTR(GC_THREAD, f2fs_gc_kthread, gc_urgent_sleep_time,
+ urgent_sleep_time);
F2FS_RW_ATTR(GC_THREAD, f2fs_gc_kthread, gc_min_sleep_time, min_sleep_time);
F2FS_RW_ATTR(GC_THREAD, f2fs_gc_kthread, gc_max_sleep_time, max_sleep_time);
F2FS_RW_ATTR(GC_THREAD, f2fs_gc_kthread, gc_no_gc_sleep_time, no_gc_sleep_time);
F2FS_RW_ATTR(GC_THREAD, f2fs_gc_kthread, gc_idle, gc_idle);
+F2FS_RW_ATTR(GC_THREAD, f2fs_gc_kthread, gc_urgent, gc_urgent);
F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, reclaim_segments, rec_prefree_segments);
F2FS_RW_ATTR(DCC_INFO, discard_cmd_control, max_small_discards, max_discards);
F2FS_RW_ATTR(DCC_INFO, discard_cmd_control, discard_granularity, discard_granularity);
@@ -297,10 +304,12 @@ F2FS_FEATURE_RO_ATTR(inode_checksum, FEAT_INODE_CHECKSUM);
#define ATTR_LIST(name) (&f2fs_attr_##name.attr)
static struct attribute *f2fs_attrs[] = {
+ ATTR_LIST(gc_urgent_sleep_time),
ATTR_LIST(gc_min_sleep_time),
ATTR_LIST(gc_max_sleep_time),
ATTR_LIST(gc_no_gc_sleep_time),
ATTR_LIST(gc_idle),
+ ATTR_LIST(gc_urgent),
ATTR_LIST(reclaim_segments),
ATTR_LIST(max_small_discards),
ATTR_LIST(discard_granularity),