diff options
| -rw-r--r-- | block/Kconfig.iosched | 11 | ||||
| -rw-r--r-- | block/Makefile | 1 | ||||
| -rw-r--r-- | block/fiops-iosched.c | 556 |
3 files changed, 568 insertions, 0 deletions
diff --git a/block/Kconfig.iosched b/block/Kconfig.iosched index f3416a344..d2f21031e 100644 --- a/block/Kconfig.iosched +++ b/block/Kconfig.iosched @@ -78,6 +78,13 @@ config IOSCHED_SIOPLUS basic merging, trying to keep a minimum overhead. It is aimed mainly for aleatory access devices (eg: flash devices). +config IOSCHED_FIOPS + tristate "IOPS based I/O scheduler" + default y + ---help--- + This is an IOPS based I/O scheduler. It will try to distribute + IOPS equally among all processes in the system. It's mainly for + Flash based storage. choice prompt "Default I/O scheduler" @@ -111,6 +118,9 @@ choice config DEFAULT_ZEN bool "ZEN" if IOSCHED_ZEN=y + config DEFAULT_FIOPS + bool "FIOPS" if IOSCHED_FIOPS=y + config DEFAULT_NOOP bool "No-op" @@ -122,6 +132,7 @@ config DEFAULT_IOSCHED default "row" if DEFAULT_ROW default "sio" if DEFAULT_SIO default "cfq" if DEFAULT_CFQ + default "fiops" if DEFAULT_FIOPS default "noop" if DEFAULT_NOOP default "zen" if DEFAULT_ZEN default "sioplus" if DEFAULT_SIOPLUS diff --git a/block/Makefile b/block/Makefile index de1a4b3b5..2c2ddfb0a 100644 --- a/block/Makefile +++ b/block/Makefile @@ -18,6 +18,7 @@ obj-$(CONFIG_IOSCHED_ROW) += row-iosched.o obj-$(CONFIG_IOSCHED_CFQ) += cfq-iosched.o obj-$(CONFIG_IOSCHED_ZEN) += zen-iosched.o obj-$(CONFIG_IOSCHED_SIOPLUS) += sioplus-iosched.o +obj-$(CONFIG_IOSCHED_FIOPS) += fiops-iosched.o obj-$(CONFIG_BLOCK_COMPAT) += compat_ioctl.o obj-$(CONFIG_BLK_DEV_INTEGRITY) += blk-integrity.o diff --git a/block/fiops-iosched.c b/block/fiops-iosched.c new file mode 100644 index 000000000..9db7f9368 --- /dev/null +++ b/block/fiops-iosched.c @@ -0,0 +1,556 @@ +/* + * IOPS based IO scheduler. Based on CFQ. + * Copyright (C) 2003 Jens Axboe <axboe@kernel.dk> + * Shaohua Li <shli@kernel.org> + */ +#include <linux/module.h> +#include <linux/slab.h> +#include <linux/blkdev.h> +#include <linux/elevator.h> +#include <linux/jiffies.h> +#include <linux/rbtree.h> +#include <linux/ioprio.h> +#include "blk.h" + +#define VIOS_SCALE_SHIFT 10 +#define VIOS_SCALE (1 << VIOS_SCALE_SHIFT) + +struct fiops_rb_root { + struct rb_root rb; + struct rb_node *left; + unsigned count; + + u64 min_vios; +}; +#define FIOPS_RB_ROOT (struct fiops_rb_root) { .rb = RB_ROOT} + +struct fiops_data { + struct request_queue *queue; + + struct fiops_rb_root service_tree; + + unsigned int busy_queues; + unsigned int in_flight[2]; + + struct work_struct unplug_work; +}; + +struct fiops_ioc { + struct io_cq icq; + + unsigned int flags; + struct fiops_data *fiopsd; + struct rb_node rb_node; + u64 vios; /* key in service_tree */ + struct fiops_rb_root *service_tree; + + unsigned int in_flight; + + struct rb_root sort_list; + struct list_head fifo; + + pid_t pid; +}; + +#define ioc_service_tree(ioc) (&((ioc)->fiopsd->service_tree)) +#define RQ_CIC(rq) icq_to_cic((rq)->elv.icq) + +enum ioc_state_flags { + FIOPS_IOC_FLAG_on_rr = 0, /* on round-robin busy list */ +}; + +#define FIOPS_IOC_FNS(name) \ +static inline void fiops_mark_ioc_##name(struct fiops_ioc *ioc) \ +{ \ + ioc->flags |= (1 << FIOPS_IOC_FLAG_##name); \ +} \ +static inline void fiops_clear_ioc_##name(struct fiops_ioc *ioc) \ +{ \ + ioc->flags &= ~(1 << FIOPS_IOC_FLAG_##name); \ +} \ +static inline int fiops_ioc_##name(const struct fiops_ioc *ioc) \ +{ \ + return ((ioc)->flags & (1 << FIOPS_IOC_FLAG_##name)) != 0; \ +} + +FIOPS_IOC_FNS(on_rr); +#undef FIOPS_IOC_FNS + +static inline struct fiops_ioc *icq_to_cic(struct io_cq *icq) +{ + /* cic->icq is the first member, %NULL will convert to %NULL */ + return container_of(icq, struct fiops_ioc, icq); +} + +static inline struct fiops_ioc *fiops_cic_lookup(struct fiops_data *fiopsd, + struct io_context *ioc) +{ + if (ioc) + return icq_to_cic(ioc_lookup_icq(ioc, fiopsd->queue)); + return NULL; +} + +/* + * The below is leftmost cache rbtree addon + */ +static struct fiops_ioc *fiops_rb_first(struct fiops_rb_root *root) +{ + /* Service tree is empty */ + if (!root->count) + return NULL; + + if (!root->left) + root->left = rb_first(&root->rb); + + if (root->left) + return rb_entry(root->left, struct fiops_ioc, rb_node); + + return NULL; +} + +static void rb_erase_init(struct rb_node *n, struct rb_root *root) +{ + rb_erase(n, root); + RB_CLEAR_NODE(n); +} + +static void fiops_rb_erase(struct rb_node *n, struct fiops_rb_root *root) +{ + if (root->left == n) + root->left = NULL; + rb_erase_init(n, &root->rb); + --root->count; +} + +static inline u64 max_vios(u64 min_vios, u64 vios) +{ + s64 delta = (s64)(vios - min_vios); + if (delta > 0) + min_vios = vios; + + return min_vios; +} + +static void fiops_update_min_vios(struct fiops_rb_root *service_tree) +{ + struct fiops_ioc *ioc; + + ioc = fiops_rb_first(service_tree); + if (!ioc) + return; + service_tree->min_vios = max_vios(service_tree->min_vios, ioc->vios); +} + +/* + * The fiopsd->service_trees holds all pending fiops_ioc's that have + * requests waiting to be processed. It is sorted in the order that + * we will service the queues. + */ +static void fiops_service_tree_add(struct fiops_data *fiopsd, + struct fiops_ioc *ioc) +{ + struct rb_node **p, *parent; + struct fiops_ioc *__ioc; + struct fiops_rb_root *service_tree = ioc_service_tree(ioc); + u64 vios; + int left; + + /* New added IOC */ + if (RB_EMPTY_NODE(&ioc->rb_node)) + vios = max_vios(service_tree->min_vios, ioc->vios); + else { + vios = ioc->vios; + /* ioc->service_tree might not equal to service_tree */ + fiops_rb_erase(&ioc->rb_node, ioc->service_tree); + ioc->service_tree = NULL; + } + + left = 1; + parent = NULL; + ioc->service_tree = service_tree; + p = &service_tree->rb.rb_node; + while (*p) { + struct rb_node **n; + + parent = *p; + __ioc = rb_entry(parent, struct fiops_ioc, rb_node); + + /* + * sort by key, that represents service time. + */ + if (vios < __ioc->vios) + n = &(*p)->rb_left; + else { + n = &(*p)->rb_right; + left = 0; + } + + p = n; + } + + if (left) + service_tree->left = &ioc->rb_node; + + ioc->vios = vios; + rb_link_node(&ioc->rb_node, parent, p); + rb_insert_color(&ioc->rb_node, &service_tree->rb); + service_tree->count++; + + fiops_update_min_vios(service_tree); +} + +/* + * Update ioc's position in the service tree. + */ +static void fiops_resort_rr_list(struct fiops_data *fiopsd, + struct fiops_ioc *ioc) +{ + /* + * Resorting requires the ioc to be on the RR list already. + */ + if (fiops_ioc_on_rr(ioc)) + fiops_service_tree_add(fiopsd, ioc); +} + +/* + * add to busy list of queues for service, trying to be fair in ordering + * the pending list according to last request service + */ +static void fiops_add_ioc_rr(struct fiops_data *fiopsd, struct fiops_ioc *ioc) +{ + BUG_ON(fiops_ioc_on_rr(ioc)); + fiops_mark_ioc_on_rr(ioc); + + fiopsd->busy_queues++; + + fiops_resort_rr_list(fiopsd, ioc); +} + +/* + * Called when the ioc no longer has requests pending, remove it from + * the service tree. + */ +static void fiops_del_ioc_rr(struct fiops_data *fiopsd, struct fiops_ioc *ioc) +{ + BUG_ON(!fiops_ioc_on_rr(ioc)); + fiops_clear_ioc_on_rr(ioc); + + if (!RB_EMPTY_NODE(&ioc->rb_node)) { + fiops_rb_erase(&ioc->rb_node, ioc->service_tree); + ioc->service_tree = NULL; + } + + BUG_ON(!fiopsd->busy_queues); + fiopsd->busy_queues--; +} + +/* + * rb tree support functions + */ +static void fiops_del_rq_rb(struct request *rq) +{ + struct fiops_ioc *ioc = RQ_CIC(rq); + + elv_rb_del(&ioc->sort_list, rq); +} + +static void fiops_add_rq_rb(struct request *rq) +{ + struct fiops_ioc *ioc = RQ_CIC(rq); + struct fiops_data *fiopsd = ioc->fiopsd; + + elv_rb_add(&ioc->sort_list, rq); + + if (!fiops_ioc_on_rr(ioc)) + fiops_add_ioc_rr(fiopsd, ioc); +} + +static void fiops_reposition_rq_rb(struct fiops_ioc *ioc, struct request *rq) +{ + elv_rb_del(&ioc->sort_list, rq); + fiops_add_rq_rb(rq); +} + +static void fiops_remove_request(struct request *rq) +{ + list_del_init(&rq->queuelist); + fiops_del_rq_rb(rq); +} + +static u64 fiops_scaled_vios(struct fiops_data *fiopsd, + struct fiops_ioc *ioc, struct request *rq) +{ + return VIOS_SCALE; +} + +/* return vios dispatched */ +static u64 fiops_dispatch_request(struct fiops_data *fiopsd, + struct fiops_ioc *ioc) +{ + struct request *rq; + struct request_queue *q = fiopsd->queue; + + rq = rq_entry_fifo(ioc->fifo.next); + + fiops_remove_request(rq); + elv_dispatch_add_tail(q, rq); + + fiopsd->in_flight[rq_is_sync(rq)]++; + ioc->in_flight++; + + return fiops_scaled_vios(fiopsd, ioc, rq); +} + +static int fiops_forced_dispatch(struct fiops_data *fiopsd) +{ + struct fiops_ioc *ioc; + int dispatched = 0; + + while ((ioc = fiops_rb_first(&fiopsd->service_tree)) != NULL) { + while (!list_empty(&ioc->fifo)) { + fiops_dispatch_request(fiopsd, ioc); + dispatched++; + } + if (fiops_ioc_on_rr(ioc)) + fiops_del_ioc_rr(fiopsd, ioc); + } + return dispatched; +} + +static struct fiops_ioc *fiops_select_ioc(struct fiops_data *fiopsd) +{ + struct fiops_ioc *ioc; + + if (RB_EMPTY_ROOT(&fiopsd->service_tree.rb)) + return NULL; + ioc = fiops_rb_first(&fiopsd->service_tree); + return ioc; +} + +static void fiops_charge_vios(struct fiops_data *fiopsd, + struct fiops_ioc *ioc, u64 vios) +{ + struct fiops_rb_root *service_tree = ioc->service_tree; + ioc->vios += vios; + + if (RB_EMPTY_ROOT(&ioc->sort_list)) + fiops_del_ioc_rr(fiopsd, ioc); + else + fiops_resort_rr_list(fiopsd, ioc); + + fiops_update_min_vios(service_tree); +} + +static int fiops_dispatch_requests(struct request_queue *q, int force) +{ + struct fiops_data *fiopsd = q->elevator->elevator_data; + struct fiops_ioc *ioc; + u64 vios; + + if (unlikely(force)) + return fiops_forced_dispatch(fiopsd); + + ioc = fiops_select_ioc(fiopsd); + if (!ioc) + return 0; + + vios = fiops_dispatch_request(fiopsd, ioc); + + fiops_charge_vios(fiopsd, ioc, vios); + return 1; +} + +static void fiops_insert_request(struct request_queue *q, struct request *rq) +{ + struct fiops_ioc *ioc = RQ_CIC(rq); + + list_add_tail(&rq->queuelist, &ioc->fifo); + + fiops_add_rq_rb(rq); +} + +/* + * scheduler run of queue, if there are requests pending and no one in the + * driver that will restart queueing + */ +static inline void fiops_schedule_dispatch(struct fiops_data *fiopsd) +{ + if (fiopsd->busy_queues) + kblockd_schedule_work(fiopsd->queue, &fiopsd->unplug_work); +} + +static void fiops_completed_request(struct request_queue *q, struct request *rq) +{ + struct fiops_data *fiopsd = q->elevator->elevator_data; + struct fiops_ioc *ioc = RQ_CIC(rq); + + fiopsd->in_flight[rq_is_sync(rq)]--; + ioc->in_flight--; + + if (fiopsd->in_flight[0] + fiopsd->in_flight[1] == 0) + fiops_schedule_dispatch(fiopsd); +} + +static struct request * +fiops_find_rq_fmerge(struct fiops_data *fiopsd, struct bio *bio) +{ + struct task_struct *tsk = current; + struct fiops_ioc *cic; + + cic = fiops_cic_lookup(fiopsd, tsk->io_context); + + if (cic) { + sector_t sector = bio->bi_sector + bio_sectors(bio); + + return elv_rb_find(&cic->sort_list, sector); + } + + return NULL; +} + +static int fiops_merge(struct request_queue *q, struct request **req, + struct bio *bio) +{ + struct fiops_data *fiopsd = q->elevator->elevator_data; + struct request *__rq; + + __rq = fiops_find_rq_fmerge(fiopsd, bio); + if (__rq && elv_rq_merge_ok(__rq, bio)) { + *req = __rq; + return ELEVATOR_FRONT_MERGE; + } + + return ELEVATOR_NO_MERGE; +} + +static void fiops_merged_request(struct request_queue *q, struct request *req, + int type) +{ + if (type == ELEVATOR_FRONT_MERGE) { + struct fiops_ioc *ioc = RQ_CIC(req); + + fiops_reposition_rq_rb(ioc, req); + } +} + +static void +fiops_merged_requests(struct request_queue *q, struct request *rq, + struct request *next) +{ + struct fiops_ioc *ioc = RQ_CIC(rq); + struct fiops_data *fiopsd = q->elevator->elevator_data; + + fiops_remove_request(next); + + ioc = RQ_CIC(next); + /* + * all requests of this task are merged to other tasks, delete it + * from the service tree. + */ + if (fiops_ioc_on_rr(ioc) && RB_EMPTY_ROOT(&ioc->sort_list)) + fiops_del_ioc_rr(fiopsd, ioc); +} + +static int fiops_allow_merge(struct request_queue *q, struct request *rq, + struct bio *bio) +{ + struct fiops_data *fiopsd = q->elevator->elevator_data; + struct fiops_ioc *cic; + + /* + * Lookup the ioc that this bio will be queued with. Allow + * merge only if rq is queued there. + */ + cic = fiops_cic_lookup(fiopsd, current->io_context); + + return cic == RQ_CIC(rq); +} + +static void fiops_exit_queue(struct elevator_queue *e) +{ + struct fiops_data *fiopsd = e->elevator_data; + + cancel_work_sync(&fiopsd->unplug_work); + + kfree(fiopsd); +} + +static void fiops_kick_queue(struct work_struct *work) +{ + struct fiops_data *fiopsd = + container_of(work, struct fiops_data, unplug_work); + struct request_queue *q = fiopsd->queue; + + spin_lock_irq(q->queue_lock); + __blk_run_queue(q); + spin_unlock_irq(q->queue_lock); +} + +static void *fiops_init_queue(struct request_queue *q) +{ + struct fiops_data *fiopsd; + + fiopsd = kzalloc_node(sizeof(*fiopsd), GFP_KERNEL, q->node); + if (!fiopsd) + return NULL; + + fiopsd->queue = q; + + fiopsd->service_tree = FIOPS_RB_ROOT; + + INIT_WORK(&fiopsd->unplug_work, fiops_kick_queue); + + return fiopsd; +} + +static void fiops_init_icq(struct io_cq *icq) +{ + struct fiops_data *fiopsd = icq->q->elevator->elevator_data; + struct fiops_ioc *ioc = icq_to_cic(icq); + + RB_CLEAR_NODE(&ioc->rb_node); + INIT_LIST_HEAD(&ioc->fifo); + ioc->sort_list = RB_ROOT; + + ioc->fiopsd = fiopsd; + + ioc->pid = current->pid; +} + +static struct elevator_type iosched_fiops = { + .ops = { + .elevator_merge_fn = fiops_merge, + .elevator_merged_fn = fiops_merged_request, + .elevator_merge_req_fn = fiops_merged_requests, + .elevator_allow_merge_fn = fiops_allow_merge, + .elevator_dispatch_fn = fiops_dispatch_requests, + .elevator_add_req_fn = fiops_insert_request, + .elevator_completed_req_fn = fiops_completed_request, + .elevator_former_req_fn = elv_rb_former_request, + .elevator_latter_req_fn = elv_rb_latter_request, + .elevator_init_icq_fn = fiops_init_icq, + .elevator_init_fn = fiops_init_queue, + .elevator_exit_fn = fiops_exit_queue, + }, + .icq_size = sizeof(struct fiops_ioc), + .icq_align = __alignof__(struct fiops_ioc), + .elevator_name = "fiops", + .elevator_owner = THIS_MODULE, +}; + +static int __init fiops_init(void) +{ + return elv_register(&iosched_fiops); +} + +static void __exit fiops_exit(void) +{ + elv_unregister(&iosched_fiops); +} + +module_init(fiops_init); +module_exit(fiops_exit); + +MODULE_AUTHOR("Jens Axboe, Shaohua Li <shli@kernel.org>"); +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("IOPS based IO scheduler"); |
