From 243e5241015fb19a20ffa33bce1a42d00e73cced Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Mon, 2 Feb 2015 00:37:00 -0500 Subject: vfs: add support for a lazytime mount option Add a new mount option which enables a new "lazytime" mode. This mode causes atime, mtime, and ctime updates to only be made to the in-memory version of the inode. The on-disk times will only get updated when (a) if the inode needs to be updated for some non-time related change, (b) if userspace calls fsync(), syncfs() or sync(), or (c) just before an undeleted inode is evicted from memory. This is OK according to POSIX because there are no guarantees after a crash unless userspace explicitly requests via a fsync(2) call. For workloads which feature a large number of random write to a preallocated file, the lazytime mount option significantly reduces writes to the inode table. The repeated 4k writes to a single block will result in undesirable stress on flash devices and SMR disk drives. Even on conventional HDD's, the repeated writes to the inode table block will trigger Adjacent Track Interference (ATI) remediation latencies, which very negatively impact long tail latencies --- which is a very big deal for web serving tiers (for example). Google-Bug-Id: 18297052 Signed-off-by: Theodore Ts'o Signed-off-by: Al Viro --- include/trace/events/writeback.h | 60 +++++++++++++++++++++++++++++++++++++++- 1 file changed, 59 insertions(+), 1 deletion(-) (limited to 'include/trace/events') diff --git a/include/trace/events/writeback.h b/include/trace/events/writeback.h index 464ea82e1..7db42317b 100644 --- a/include/trace/events/writeback.h +++ b/include/trace/events/writeback.h @@ -17,6 +17,8 @@ {I_FREEING, "I_FREEING"}, \ {I_CLEAR, "I_CLEAR"}, \ {I_SYNC, "I_SYNC"}, \ + {I_DIRTY_TIME, "I_DIRTY_TIME"}, \ + {I_DIRTY_TIME_EXPIRED, "I_DIRTY_TIME_EXPIRED"}, \ {I_REFERENCED, "I_REFERENCED"} \ ) @@ -67,6 +69,7 @@ DECLARE_EVENT_CLASS(writeback_dirty_inode_template, TP_STRUCT__entry ( __array(char, name, 32) __field(unsigned long, ino) + __field(unsigned long, state) __field(unsigned long, flags) ), @@ -77,16 +80,25 @@ DECLARE_EVENT_CLASS(writeback_dirty_inode_template, strncpy(__entry->name, bdi->dev ? dev_name(bdi->dev) : "(unknown)", 32); __entry->ino = inode->i_ino; + __entry->state = inode->i_state; __entry->flags = flags; ), - TP_printk("bdi %s: ino=%lu flags=%s", + TP_printk("bdi %s: ino=%lu state=%s flags=%s", __entry->name, __entry->ino, + show_inode_state(__entry->state), show_inode_state(__entry->flags) ) ); +DEFINE_EVENT(writeback_dirty_inode_template, writeback_mark_inode_dirty, + + TP_PROTO(struct inode *inode, int flags), + + TP_ARGS(inode, flags) +); + DEFINE_EVENT(writeback_dirty_inode_template, writeback_dirty_inode_start, TP_PROTO(struct inode *inode, int flags), @@ -597,6 +609,52 @@ DEFINE_EVENT(writeback_single_inode_template, writeback_single_inode, TP_ARGS(inode, wbc, nr_to_write) ); +DECLARE_EVENT_CLASS(writeback_lazytime_template, + TP_PROTO(struct inode *inode), + + TP_ARGS(inode), + + TP_STRUCT__entry( + __field( dev_t, dev ) + __field(unsigned long, ino ) + __field(unsigned long, state ) + __field( __u16, mode ) + __field(unsigned long, dirtied_when ) + ), + + TP_fast_assign( + __entry->dev = inode->i_sb->s_dev; + __entry->ino = inode->i_ino; + __entry->state = inode->i_state; + __entry->mode = inode->i_mode; + __entry->dirtied_when = inode->dirtied_when; + ), + + TP_printk("dev %d,%d ino %lu dirtied %lu state %s mode 0%o", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->ino, __entry->dirtied_when, + show_inode_state(__entry->state), __entry->mode) +); + +DEFINE_EVENT(writeback_lazytime_template, writeback_lazytime, + TP_PROTO(struct inode *inode), + + TP_ARGS(inode) +); + +DEFINE_EVENT(writeback_lazytime_template, writeback_lazytime_iput, + TP_PROTO(struct inode *inode), + + TP_ARGS(inode) +); + +DEFINE_EVENT(writeback_lazytime_template, writeback_dirty_inode_enqueue, + + TP_PROTO(struct inode *inode), + + TP_ARGS(inode) +); + #endif /* _TRACE_WRITEBACK_H */ /* This part must be outside protection */ -- cgit v1.2.3