From: Nick Piggin This really should get into 2.6 sometime. Not only because people want to have fun switching them around, but because different queues have different needs: flash devices, nbd may want noop, a database raid deadline, AS for your system disk. Anyway, q->elevator is now dynamically allocated. Allocation and freeing are handled by elevator.c. kobject / sysfs stuff is also handled there. A side effect is it introduces blk_wait_free_list which can be used for drivers to wait for the queue to become empty. I think this was needed for safe disk hotplug. Documentation/block/biodoc.txt | 17 +++ drivers/block/as-iosched.c | 125 ++++++++++++++------------ drivers/block/cfq-iosched.c | 78 ++++++++-------- drivers/block/deadline-iosched.c | 53 ++++++----- drivers/block/elevator.c | 91 +++++++++++++----- drivers/block/ll_rw_blk.c | 187 +++++++++++++++++++++++++++++++++------ include/linux/blkdev.h | 21 +++- include/linux/elevator.h | 10 +- 8 files changed, 413 insertions(+), 169 deletions(-) diff -puN Documentation/block/biodoc.txt~elv-select Documentation/block/biodoc.txt --- 25/Documentation/block/biodoc.txt~elv-select 2003-10-17 19:07:35.000000000 -0700 +++ 25-akpm/Documentation/block/biodoc.txt 2003-10-17 19:07:35.000000000 -0700 @@ -969,10 +969,23 @@ elevator_set_req_fn elevator_put_req_fn Must be used to allocate and free any elevator specific storate for a request. -elevator_init_fn -elevator_exit_fn Allocate and free any elevator specific storage +elevator_alloc_fn +elevator_release_fn Allocate and free any elevator specific storage for a queue. +elevator_init_fn +elevator_exit_fn Initialise and shutdown and elevator with an + associated queue. init must not fail - failing + routines must be performed in elevator_alloc. + Queue will be empty before exit is called and + no future requests will be inserted. + +4.1a Calling order for startup and shutdown functions. +elevator_alloc_fn +elevator_init_fn +elevator_exit_fn +elevator_release_fn + 4.2 I/O scheduler implementation The generic i/o scheduler algorithm attempts to sort/merge/batch requests for optimal disk scan and request servicing performance (based on generic diff -puN drivers/block/as-iosched.c~elv-select drivers/block/as-iosched.c --- 25/drivers/block/as-iosched.c~elv-select 2003-10-17 19:07:35.000000000 -0700 +++ 25-akpm/drivers/block/as-iosched.c 2003-10-17 19:07:35.000000000 -0700 @@ -606,7 +606,7 @@ static void as_antic_stop(struct as_data static void as_antic_timeout(unsigned long data) { struct request_queue *q = (struct request_queue *)data; - struct as_data *ad = q->elevator.elevator_data; + struct as_data *ad = q->elevator->elevator_data; unsigned long flags; spin_lock_irqsave(q->queue_lock, flags); @@ -906,7 +906,7 @@ void update_write_batch(struct as_data * */ static void as_completed_request(request_queue_t *q, struct request *rq) { - struct as_data *ad = q->elevator.elevator_data; + struct as_data *ad = q->elevator->elevator_data; struct as_rq *arq = RQ_DATA(rq); struct as_io_context *aic; @@ -979,7 +979,7 @@ static void as_remove_queued_request(req { struct as_rq *arq = RQ_DATA(rq); const int data_dir = arq->is_sync; - struct as_data *ad = q->elevator.elevator_data; + struct as_data *ad = q->elevator->elevator_data; WARN_ON(arq->state != AS_RQ_QUEUED); @@ -1305,7 +1305,7 @@ fifo_expired: static struct request *as_next_request(request_queue_t *q) { - struct as_data *ad = q->elevator.elevator_data; + struct as_data *ad = q->elevator->elevator_data; struct request *rq = NULL; /* @@ -1412,7 +1412,7 @@ static void as_add_request(struct as_dat */ static void as_requeue_request(request_queue_t *q, struct request *rq) { - struct as_data *ad = q->elevator.elevator_data; + struct as_data *ad = q->elevator->elevator_data; struct as_rq *arq = RQ_DATA(rq); if (arq) { @@ -1434,7 +1434,7 @@ static void as_requeue_request(request_q static void as_insert_request(request_queue_t *q, struct request *rq, int where) { - struct as_data *ad = q->elevator.elevator_data; + struct as_data *ad = q->elevator->elevator_data; struct as_rq *arq = RQ_DATA(rq); /* barriers must flush the reorder queue */ @@ -1475,7 +1475,7 @@ as_insert_request(request_queue_t *q, st */ static int as_queue_empty(request_queue_t *q) { - struct as_data *ad = q->elevator.elevator_data; + struct as_data *ad = q->elevator->elevator_data; if (!list_empty(&ad->fifo_list[REQ_ASYNC]) || !list_empty(&ad->fifo_list[REQ_SYNC]) @@ -1514,7 +1514,7 @@ as_latter_request(request_queue_t *q, st static int as_merge(request_queue_t *q, struct request **req, struct bio *bio) { - struct as_data *ad = q->elevator.elevator_data; + struct as_data *ad = q->elevator->elevator_data; sector_t rb_key = bio->bi_sector + bio_sectors(bio); struct request *__rq; int ret; @@ -1569,7 +1569,7 @@ out_insert: static void as_merged_request(request_queue_t *q, struct request *req) { - struct as_data *ad = q->elevator.elevator_data; + struct as_data *ad = q->elevator->elevator_data; struct as_rq *arq = RQ_DATA(req); /* @@ -1614,7 +1614,7 @@ static void as_merged_requests(request_queue_t *q, struct request *req, struct request *next) { - struct as_data *ad = q->elevator.elevator_data; + struct as_data *ad = q->elevator->elevator_data; struct as_rq *arq = RQ_DATA(req); struct as_rq *anext = RQ_DATA(next); @@ -1700,7 +1700,7 @@ static void as_work_handler(void *data) static void as_put_request(request_queue_t *q, struct request *rq) { - struct as_data *ad = q->elevator.elevator_data; + struct as_data *ad = q->elevator->elevator_data; struct as_rq *arq = RQ_DATA(rq); if (!arq) { @@ -1714,7 +1714,7 @@ static void as_put_request(request_queue static int as_set_request(request_queue_t *q, struct request *rq, int gfp_mask) { - struct as_data *ad = q->elevator.elevator_data; + struct as_data *ad = q->elevator->elevator_data; struct as_rq *arq = mempool_alloc(ad->arq_pool, gfp_mask); if (arq) { @@ -1735,7 +1735,7 @@ static int as_set_request(request_queue_ static int as_may_queue(request_queue_t *q, int rw) { int ret = 0; - struct as_data *ad = q->elevator.elevator_data; + struct as_data *ad = q->elevator->elevator_data; struct io_context *ioc; if (ad->antic_status == ANTIC_WAIT_REQ || ad->antic_status == ANTIC_WAIT_NEXT) { @@ -1748,54 +1748,18 @@ static int as_may_queue(request_queue_t return ret; } -static void as_exit(request_queue_t *q, elevator_t *e) -{ - struct as_data *ad = e->elevator_data; - - del_timer_sync(&ad->antic_timer); - kblockd_flush(); - - BUG_ON(!list_empty(&ad->fifo_list[REQ_SYNC])); - BUG_ON(!list_empty(&ad->fifo_list[REQ_ASYNC])); - - mempool_destroy(ad->arq_pool); - put_io_context(ad->io_context); - kfree(ad->hash); - kfree(ad); -} - /* * initialize elevator private data (as_data), and alloc a arq for * each request on the free lists */ -static int as_init(request_queue_t *q, elevator_t *e) +static void as_init(request_queue_t *q, elevator_t *e) { - struct as_data *ad; + struct as_data *ad = e->elevator_data; int i; - if (!arq_pool) - return -ENOMEM; - - ad = kmalloc(sizeof(*ad), GFP_KERNEL); - if (!ad) - return -ENOMEM; - memset(ad, 0, sizeof(*ad)); - + q->elevator = e; ad->q = q; /* Identify what queue the data belongs to */ - ad->hash = kmalloc(sizeof(struct list_head)*AS_HASH_ENTRIES,GFP_KERNEL); - if (!ad->hash) { - kfree(ad); - return -ENOMEM; - } - - ad->arq_pool = mempool_create(BLKDEV_MIN_RQ, mempool_alloc_slab, mempool_free_slab, arq_pool); - if (!ad->arq_pool) { - kfree(ad->hash); - kfree(ad); - return -ENOMEM; - } - /* anticipatory scheduling helpers */ ad->antic_timer.function = as_antic_timeout; ad->antic_timer.data = (unsigned long)q; @@ -1815,7 +1779,6 @@ static int as_init(request_queue_t *q, e ad->antic_expire = default_antic_expire; ad->batch_expire[REQ_SYNC] = default_read_batch_expire; ad->batch_expire[REQ_ASYNC] = default_write_batch_expire; - e->elevator_data = ad; ad->current_batch_expires = jiffies + ad->batch_expire[REQ_SYNC]; ad->write_batch_count = ad->batch_expire[REQ_ASYNC] / 10; @@ -1823,8 +1786,58 @@ static int as_init(request_queue_t *q, e ad->write_batch_count = 2; ad->new_success = 512; +} - return 0; +static void as_exit(request_queue_t *q, elevator_t *e) +{ + struct as_data *ad = e->elevator_data; + + BUG_ON(!as_queue_empty(ad->q)); + put_io_context(ad->io_context); +} + +static int as_alloc(elevator_t *e) +{ + struct as_data *ad; + + if (!arq_pool) + return -ENOMEM; + + ad = kmalloc(sizeof(*ad), GFP_KERNEL); + if (!ad) + return -ENOMEM; + memset(ad, 0, sizeof(*ad)); + + ad->hash = kmalloc(sizeof(struct list_head)*AS_HASH_ENTRIES,GFP_KERNEL); + if (!ad->hash) { + kfree(ad); + return -ENOMEM; + } + + ad->arq_pool = mempool_create(BLKDEV_MIN_RQ, mempool_alloc_slab, + mempool_free_slab, arq_pool); + if (!ad->arq_pool) { + kfree(ad->hash); + kfree(ad); + return -ENOMEM; + } + + e->elevator_data = ad; + + return 0; +} + +static void as_release(elevator_t *e) +{ + struct as_data *ad = e->elevator_data; + + del_timer_sync(&ad->antic_timer); + kblockd_flush(); + + mempool_destroy(ad->arq_pool); + kfree(ad->hash); + + kfree(ad); } /* @@ -1925,7 +1938,7 @@ static struct attribute *default_attrs[] NULL, }; -#define to_as(atr) container_of((atr), struct as_fs_entry, attr) +#define to_as(ATR) container_of((ATR), struct as_fs_entry, ATR) static ssize_t as_attr_show(struct kobject *kobj, struct attribute *attr, char *page) @@ -1992,6 +2005,8 @@ elevator_t iosched_as = { .elevator_may_queue_fn = as_may_queue, .elevator_init_fn = as_init, .elevator_exit_fn = as_exit, + .elevator_alloc_fn = as_alloc, + .elevator_release_fn = as_release, .elevator_ktype = &as_ktype, .elevator_name = "anticipatory", diff -puN drivers/block/cfq-iosched.c~elv-select drivers/block/cfq-iosched.c --- 25/drivers/block/cfq-iosched.c~elv-select 2003-10-17 19:07:35.000000000 -0700 +++ 25-akpm/drivers/block/cfq-iosched.c 2003-10-17 19:07:35.000000000 -0700 @@ -238,7 +238,7 @@ out: static void cfq_remove_request(request_queue_t *q, struct request *rq) { - struct cfq_data *cfqd = q->elevator.elevator_data; + struct cfq_data *cfqd = q->elevator->elevator_data; struct cfq_rq *crq = RQ_DATA(rq); if (crq) { @@ -259,7 +259,7 @@ static void cfq_remove_request(request_q static int cfq_merge(request_queue_t *q, struct request **req, struct bio *bio) { - struct cfq_data *cfqd = q->elevator.elevator_data; + struct cfq_data *cfqd = q->elevator->elevator_data; struct request *__rq; int ret; @@ -297,7 +297,7 @@ out_insert: static void cfq_merged_request(request_queue_t *q, struct request *req) { - struct cfq_data *cfqd = q->elevator.elevator_data; + struct cfq_data *cfqd = q->elevator->elevator_data; struct cfq_rq *crq = RQ_DATA(req); cfq_del_crq_hash(crq); @@ -393,7 +393,7 @@ restart: static struct request *cfq_next_request(request_queue_t *q) { - struct cfq_data *cfqd = q->elevator.elevator_data; + struct cfq_data *cfqd = q->elevator->elevator_data; struct request *rq; if (!list_empty(cfqd->dispatch)) { @@ -483,7 +483,7 @@ static void cfq_enqueue(struct cfq_data static void cfq_insert_request(request_queue_t *q, struct request *rq, int where) { - struct cfq_data *cfqd = q->elevator.elevator_data; + struct cfq_data *cfqd = q->elevator->elevator_data; struct cfq_rq *crq = RQ_DATA(rq); switch (where) { @@ -514,7 +514,7 @@ cfq_insert_request(request_queue_t *q, s static int cfq_queue_empty(request_queue_t *q) { - struct cfq_data *cfqd = q->elevator.elevator_data; + struct cfq_data *cfqd = q->elevator->elevator_data; if (list_empty(cfqd->dispatch) && list_empty(&cfqd->rr_list)) return 1; @@ -548,7 +548,7 @@ cfq_latter_request(request_queue_t *q, s static int cfq_may_queue(request_queue_t *q, int rw) { - struct cfq_data *cfqd = q->elevator.elevator_data; + struct cfq_data *cfqd = q->elevator->elevator_data; struct cfq_queue *cfqq; int ret = 1; @@ -573,7 +573,7 @@ out: static void cfq_put_request(request_queue_t *q, struct request *rq) { - struct cfq_data *cfqd = q->elevator.elevator_data; + struct cfq_data *cfqd = q->elevator->elevator_data; struct cfq_rq *crq = RQ_DATA(rq); if (crq) { @@ -587,7 +587,7 @@ static void cfq_put_request(request_queu static int cfq_set_request(request_queue_t *q, struct request *rq, int gfp_mask) { - struct cfq_data *cfqd = q->elevator.elevator_data; + struct cfq_data *cfqd = q->elevator->elevator_data; struct cfq_rq *crq = mempool_alloc(cfqd->crq_pool, gfp_mask); if (crq) { @@ -602,28 +602,14 @@ static int cfq_set_request(request_queue return 1; } -static void cfq_exit(request_queue_t *q, elevator_t *e) -{ - struct cfq_data *cfqd = e->elevator_data; - - e->elevator_data = NULL; - mempool_destroy(cfqd->crq_pool); - kfree(cfqd->crq_hash); - kfree(cfqd->cfq_hash); - kfree(cfqd); -} - -static int cfq_init(request_queue_t *q, elevator_t *e) +static int cfq_alloc(elevator_t *e) { struct cfq_data *cfqd; - int i; cfqd = kmalloc(sizeof(*cfqd), GFP_KERNEL); if (!cfqd) return -ENOMEM; - memset(cfqd, 0, sizeof(*cfqd)); - INIT_LIST_HEAD(&cfqd->rr_list); cfqd->crq_hash = kmalloc(sizeof(struct list_head) * CFQ_MHASH_ENTRIES, GFP_KERNEL); if (!cfqd->crq_hash) @@ -637,13 +623,43 @@ static int cfq_init(request_queue_t *q, if (!cfqd->crq_pool) goto out_crqpool; + e->elevator_data = cfqd; + + return 0; +out_crqpool: + kfree(cfqd->cfq_hash); +out_cfqhash: + kfree(cfqd->crq_hash); +out_crqhash: + kfree(cfqd); + return -ENOMEM; + +} + +static void cfq_release(elevator_t *e) +{ + struct cfq_data *cfqd = e->elevator_data; + + e->elevator_data = NULL; + mempool_destroy(cfqd->crq_pool); + kfree(cfqd->crq_hash); + kfree(cfqd->cfq_hash); + kfree(cfqd); +} + +static void cfq_init(request_queue_t *q, elevator_t *e) +{ + struct cfq_data *cfqd = e->elevator_data; + int i; + + INIT_LIST_HEAD(&cfqd->rr_list); + for (i = 0; i < CFQ_MHASH_ENTRIES; i++) INIT_LIST_HEAD(&cfqd->crq_hash[i]); for (i = 0; i < CFQ_QHASH_ENTRIES; i++) INIT_LIST_HEAD(&cfqd->cfq_hash[i]); cfqd->dispatch = &q->queue_head; - e->elevator_data = cfqd; /* * just set it to some high value, we want anyone to be able to queue @@ -651,15 +667,6 @@ static int cfq_init(request_queue_t *q, */ cfqd->max_queued = q->nr_requests; q->nr_requests = 8192; - - return 0; -out_crqpool: - kfree(cfqd->cfq_hash); -out_cfqhash: - kfree(cfqd->crq_hash); -out_crqhash: - kfree(cfqd); - return -ENOMEM; } static int __init cfq_slab_setup(void) @@ -701,7 +708,8 @@ elevator_t iosched_cfq = { .elevator_put_req_fn = cfq_put_request, .elevator_may_queue_fn = cfq_may_queue, .elevator_init_fn = cfq_init, - .elevator_exit_fn = cfq_exit, + .elevator_alloc_fn = cfq_alloc, + .elevator_release_fn = cfq_release, }; EXPORT_SYMBOL(iosched_cfq); diff -puN drivers/block/deadline-iosched.c~elv-select drivers/block/deadline-iosched.c --- 25/drivers/block/deadline-iosched.c~elv-select 2003-10-17 19:07:35.000000000 -0700 +++ 25-akpm/drivers/block/deadline-iosched.c 2003-10-17 19:07:35.000000000 -0700 @@ -289,7 +289,7 @@ deadline_find_first_drq(struct deadline_ static inline void deadline_add_request(struct request_queue *q, struct request *rq) { - struct deadline_data *dd = q->elevator.elevator_data; + struct deadline_data *dd = q->elevator->elevator_data; struct deadline_rq *drq = RQ_DATA(rq); const int data_dir = rq_data_dir(drq->request); @@ -317,7 +317,7 @@ static void deadline_remove_request(requ struct deadline_rq *drq = RQ_DATA(rq); if (drq) { - struct deadline_data *dd = q->elevator.elevator_data; + struct deadline_data *dd = q->elevator->elevator_data; list_del_init(&drq->fifo); deadline_remove_merge_hints(q, drq); @@ -328,7 +328,7 @@ static void deadline_remove_request(requ static int deadline_merge(request_queue_t *q, struct request **req, struct bio *bio) { - struct deadline_data *dd = q->elevator.elevator_data; + struct deadline_data *dd = q->elevator->elevator_data; struct request *__rq; int ret; @@ -383,7 +383,7 @@ out_insert: static void deadline_merged_request(request_queue_t *q, struct request *req) { - struct deadline_data *dd = q->elevator.elevator_data; + struct deadline_data *dd = q->elevator->elevator_data; struct deadline_rq *drq = RQ_DATA(req); /* @@ -407,7 +407,7 @@ static void deadline_merged_requests(request_queue_t *q, struct request *req, struct request *next) { - struct deadline_data *dd = q->elevator.elevator_data; + struct deadline_data *dd = q->elevator->elevator_data; struct deadline_rq *drq = RQ_DATA(req); struct deadline_rq *dnext = RQ_DATA(next); @@ -604,7 +604,7 @@ dispatch_request: static struct request *deadline_next_request(request_queue_t *q) { - struct deadline_data *dd = q->elevator.elevator_data; + struct deadline_data *dd = q->elevator->elevator_data; struct request *rq; /* @@ -625,7 +625,7 @@ dispatch: static void deadline_insert_request(request_queue_t *q, struct request *rq, int where) { - struct deadline_data *dd = q->elevator.elevator_data; + struct deadline_data *dd = q->elevator->elevator_data; /* barriers must flush the reorder queue */ if (unlikely(rq->flags & (REQ_SOFTBARRIER | REQ_HARDBARRIER) @@ -653,7 +653,7 @@ deadline_insert_request(request_queue_t static int deadline_queue_empty(request_queue_t *q) { - struct deadline_data *dd = q->elevator.elevator_data; + struct deadline_data *dd = q->elevator->elevator_data; if (!list_empty(&dd->fifo_list[WRITE]) || !list_empty(&dd->fifo_list[READ]) @@ -687,7 +687,7 @@ deadline_latter_request(request_queue_t return NULL; } -static void deadline_exit(request_queue_t *q, elevator_t *e) +static void deadline_release(elevator_t *e) { struct deadline_data *dd = e->elevator_data; @@ -699,14 +699,9 @@ static void deadline_exit(request_queue_ kfree(dd); } -/* - * initialize elevator private data (deadline_data), and alloc a drq for - * each request on the free lists - */ -static int deadline_init(request_queue_t *q, elevator_t *e) +static int deadline_alloc(elevator_t *e) { struct deadline_data *dd; - int i; if (!drq_pool) return -ENOMEM; @@ -722,13 +717,30 @@ static int deadline_init(request_queue_t return -ENOMEM; } - dd->drq_pool = mempool_create(BLKDEV_MIN_RQ, mempool_alloc_slab, mempool_free_slab, drq_pool); + dd->drq_pool = mempool_create(BLKDEV_MIN_RQ, mempool_alloc_slab, + mempool_free_slab, drq_pool); if (!dd->drq_pool) { kfree(dd->hash); kfree(dd); return -ENOMEM; } + e->elevator_data = dd; + + return 0; +} + +/* + * initialize elevator private data (deadline_data), and alloc a drq for + * each request on the free lists + */ +static void deadline_init(request_queue_t *q, elevator_t *e) +{ + struct deadline_data *dd = e->elevator_data; + int i; + + q->elevator = e; + for (i = 0; i < DL_HASH_ENTRIES; i++) INIT_LIST_HEAD(&dd->hash[i]); @@ -742,13 +754,11 @@ static int deadline_init(request_queue_t dd->writes_starved = writes_starved; dd->front_merges = 1; dd->fifo_batch = fifo_batch; - e->elevator_data = dd; - return 0; } static void deadline_put_request(request_queue_t *q, struct request *rq) { - struct deadline_data *dd = q->elevator.elevator_data; + struct deadline_data *dd = q->elevator->elevator_data; struct deadline_rq *drq = RQ_DATA(rq); if (drq) { @@ -760,7 +770,7 @@ static void deadline_put_request(request static int deadline_set_request(request_queue_t *q, struct request *rq, int gfp_mask) { - struct deadline_data *dd = q->elevator.elevator_data; + struct deadline_data *dd = q->elevator->elevator_data; struct deadline_rq *drq; drq = mempool_alloc(dd->drq_pool, gfp_mask); @@ -931,7 +941,8 @@ elevator_t iosched_deadline = { .elevator_set_req_fn = deadline_set_request, .elevator_put_req_fn = deadline_put_request, .elevator_init_fn = deadline_init, - .elevator_exit_fn = deadline_exit, + .elevator_alloc_fn = deadline_alloc, + .elevator_release_fn = deadline_release, .elevator_ktype = &deadline_ktype, .elevator_name = "deadline", diff -puN drivers/block/elevator.c~elv-select drivers/block/elevator.c --- 25/drivers/block/elevator.c~elv-select 2003-10-17 19:07:35.000000000 -0700 +++ 25-akpm/drivers/block/elevator.c 2003-10-17 19:07:35.000000000 -0700 @@ -89,29 +89,60 @@ inline int elv_try_last_merge(request_qu /* * general block -> elevator interface starts here */ -int elevator_init(request_queue_t *q, elevator_t *type) +void elevator_init(request_queue_t *q, elevator_t *e) { - elevator_t *e = &q->elevator; - - memcpy(e, type, sizeof(*e)); + q->elevator = e; INIT_LIST_HEAD(&q->queue_head); q->last_merge = NULL; if (e->elevator_init_fn) - return e->elevator_init_fn(q, e); - - return 0; + e->elevator_init_fn(q, e); } void elevator_exit(request_queue_t *q) { - elevator_t *e = &q->elevator; + elevator_t *e = q->elevator; + + BUG_ON(q->rq.count[READ] || q->rq.count[WRITE]); if (e->elevator_exit_fn) e->elevator_exit_fn(q, e); } +elevator_t *elevator_alloc(elevator_t *type) +{ + elevator_t *e = kmalloc(sizeof(*type), GFP_KERNEL); + + if (e == NULL) + goto out_err; + + memcpy(e, type, sizeof(*e)); + + if (e->elevator_alloc_fn) + if (e->elevator_alloc_fn(e)) + goto out_alloc; + + return e; + +out_alloc: + kfree(e); +out_err: + return NULL; +} + +void elevator_release(struct kobject *kobj) +{ + elevator_t *e = container_of(kobj, elevator_t, kobj); + + printk(KERN_INFO "releasing %s io scheduler\n", e->elevator_name); + + if (e->elevator_release_fn) + e->elevator_release_fn(e); + + kfree(e); +} + int elevator_global_init(void) { return 0; @@ -119,7 +150,7 @@ int elevator_global_init(void) int elv_merge(request_queue_t *q, struct request **req, struct bio *bio) { - elevator_t *e = &q->elevator; + elevator_t *e = q->elevator; if (e->elevator_merge_fn) return e->elevator_merge_fn(q, req, bio); @@ -129,7 +160,7 @@ int elv_merge(request_queue_t *q, struct void elv_merged_request(request_queue_t *q, struct request *rq) { - elevator_t *e = &q->elevator; + elevator_t *e = q->elevator; if (e->elevator_merged_fn) e->elevator_merged_fn(q, rq); @@ -138,7 +169,7 @@ void elv_merged_request(request_queue_t void elv_merge_requests(request_queue_t *q, struct request *rq, struct request *next) { - elevator_t *e = &q->elevator; + elevator_t *e = q->elevator; if (q->last_merge == next) q->last_merge = NULL; @@ -153,8 +184,8 @@ void elv_requeue_request(request_queue_t * if iosched has an explicit requeue hook, then use that. otherwise * just put the request at the front of the queue */ - if (q->elevator.elevator_requeue_req_fn) - q->elevator.elevator_requeue_req_fn(q, rq); + if (q->elevator->elevator_requeue_req_fn) + q->elevator->elevator_requeue_req_fn(q, rq); else __elv_add_request(q, rq, ELEVATOR_INSERT_FRONT, 0); } @@ -165,7 +196,7 @@ void __elv_add_request(request_queue_t * if (plug) blk_plug_device(q); - q->elevator.elevator_add_req_fn(q, rq, where); + q->elevator->elevator_add_req_fn(q, rq, where); } void elv_add_request(request_queue_t *q, struct request *rq, int where, @@ -180,7 +211,7 @@ void elv_add_request(request_queue_t *q, static inline struct request *__elv_next_request(request_queue_t *q) { - return q->elevator.elevator_next_req_fn(q); + return q->elevator->elevator_next_req_fn(q); } struct request *elv_next_request(request_queue_t *q) @@ -225,7 +256,7 @@ struct request *elv_next_request(request void elv_remove_request(request_queue_t *q, struct request *rq) { - elevator_t *e = &q->elevator; + elevator_t *e = q->elevator; /* * the main clearing point for q->last_merge is on retrieval of @@ -243,7 +274,7 @@ void elv_remove_request(request_queue_t int elv_queue_empty(request_queue_t *q) { - elevator_t *e = &q->elevator; + elevator_t *e = q->elevator; if (e->elevator_queue_empty_fn) return e->elevator_queue_empty_fn(q); @@ -255,7 +286,7 @@ struct request *elv_latter_request(reque { struct list_head *next; - elevator_t *e = &q->elevator; + elevator_t *e = q->elevator; if (e->elevator_latter_req_fn) return e->elevator_latter_req_fn(q, rq); @@ -271,7 +302,7 @@ struct request *elv_former_request(reque { struct list_head *prev; - elevator_t *e = &q->elevator; + elevator_t *e = q->elevator; if (e->elevator_former_req_fn) return e->elevator_former_req_fn(q, rq); @@ -285,7 +316,7 @@ struct request *elv_former_request(reque int elv_set_request(request_queue_t *q, struct request *rq, int gfp_mask) { - elevator_t *e = &q->elevator; + elevator_t *e = q->elevator; if (e->elevator_set_req_fn) return e->elevator_set_req_fn(q, rq, gfp_mask); @@ -296,7 +327,7 @@ int elv_set_request(request_queue_t *q, void elv_put_request(request_queue_t *q, struct request *rq) { - elevator_t *e = &q->elevator; + elevator_t *e = q->elevator; if (e->elevator_put_req_fn) e->elevator_put_req_fn(q, rq); @@ -304,7 +335,7 @@ void elv_put_request(request_queue_t *q, int elv_may_queue(request_queue_t *q, int rw) { - elevator_t *e = &q->elevator; + elevator_t *e = q->elevator; if (e->elevator_may_queue_fn) return e->elevator_may_queue_fn(q, rw); @@ -314,24 +345,32 @@ int elv_may_queue(request_queue_t *q, in void elv_completed_request(request_queue_t *q, struct request *rq) { - elevator_t *e = &q->elevator; + elevator_t *e = q->elevator; if (e->elevator_completed_req_fn) e->elevator_completed_req_fn(q, rq); } +static struct kobj_type default_ktype = { + .release = &elevator_release, +}; + int elv_register_queue(struct request_queue *q) { elevator_t *e; - e = &q->elevator; + e = q->elevator; e->kobj.parent = kobject_get(&q->kobj); if (!e->kobj.parent) return -EBUSY; snprintf(e->kobj.name, KOBJ_NAME_LEN, "%s", "iosched"); - e->kobj.ktype = e->elevator_ktype; + if (e->elevator_ktype) { + e->elevator_ktype->release = &elevator_release; + e->kobj.ktype = e->elevator_ktype; + } else + e->kobj.ktype = &default_ktype; return kobject_register(&e->kobj); } @@ -339,7 +378,7 @@ int elv_register_queue(struct request_qu void elv_unregister_queue(struct request_queue *q) { if (q) { - elevator_t * e = &q->elevator; + elevator_t *e = q->elevator; kobject_unregister(&e->kobj); kobject_put(&q->kobj); } diff -puN drivers/block/ll_rw_blk.c~elv-select drivers/block/ll_rw_blk.c --- 25/drivers/block/ll_rw_blk.c~elv-select 2003-10-17 19:07:35.000000000 -0700 +++ 25-akpm/drivers/block/ll_rw_blk.c 2003-10-17 19:07:35.000000000 -0700 @@ -1259,6 +1259,45 @@ out: EXPORT_SYMBOL(blk_run_queues); /** + * blk_wait_free_list + * @q: the request queue to wait on + * + * Description: + * Synchronously wait until all requests have been emptied out of the queue. + * Must be called with the queue marked QUEUE_FLAG_DEAD, or + * blk_set_queue_drain. + **/ +static void blk_wait_free_list(request_queue_t *q) +{ + DEFINE_WAIT(wait); + struct request_list *rl = &q->rq; + + if (!test_bit(QUEUE_FLAG_DEAD, &q->queue_flags) + && !blk_queue_drain(q)) { + WARN_ON(1); + /* It might be racy to set this here. Caller should be fixed */ + set_bit(QUEUE_FLAG_DRAIN, &q->queue_flags); + } + + prepare_to_wait(&rl->empty, &wait, TASK_UNINTERRUPTIBLE); + + if (rl->count[READ] || rl->count[WRITE] + || waitqueue_active(&rl->wait[READ]) + || waitqueue_active(&rl->wait[WRITE]) ) { + + spin_unlock_irq(q->queue_lock); + wake_up_all(&q->rq.wait[READ]); + wake_up_all(&q->rq.wait[WRITE]); + io_schedule(); + spin_lock_irq(q->queue_lock); + } + + finish_wait(&rl->empty, &wait); + + WARN_ON(rl->count[READ] || rl->count[WRITE]); +} + +/** * blk_cleanup_queue: - release a &request_queue_t when it is no longer needed * @q: the request queue to be released * @@ -1280,7 +1319,8 @@ void blk_cleanup_queue(request_queue_t * if (!atomic_dec_and_test(&q->refcnt)) return; - elevator_exit(q); + if (q->elevator) + elevator_exit(q); del_timer_sync(&q->unplug_timer); kblockd_flush(); @@ -1303,8 +1343,10 @@ static int blk_init_free_list(request_qu rl->count[READ] = rl->count[WRITE] = 0; init_waitqueue_head(&rl->wait[READ]); init_waitqueue_head(&rl->wait[WRITE]); + init_waitqueue_head(&rl->empty); - rl->rq_pool = mempool_create(BLKDEV_MIN_RQ, mempool_alloc_slab, mempool_free_slab, request_cachep); + rl->rq_pool = mempool_create(BLKDEV_MIN_RQ, mempool_alloc_slab, + mempool_free_slab, request_cachep); if (!rl->rq_pool) return -ENOMEM; @@ -1328,30 +1370,38 @@ static elevator_t *chosen_elevator = #error "You must have at least 1 I/O scheduler selected" #endif -#if defined(CONFIG_IOSCHED_AS) || defined(CONFIG_IOSCHED_DEADLINE) || defined (CONFIG_IOSCHED_NOOP) -static int __init elevator_setup(char *str) +elevator_t *str_to_elv(const char *str) { #ifdef CONFIG_IOSCHED_DEADLINE - if (!strcmp(str, "deadline")) - chosen_elevator = &iosched_deadline; + if (!strncmp(str, "deadline", strlen("deadline"))) + return &iosched_deadline; #endif #ifdef CONFIG_IOSCHED_AS - if (!strcmp(str, "as")) - chosen_elevator = &iosched_as; + if (!strncmp(str, "as", strlen("as"))) + return &iosched_as; #endif #ifdef CONFIG_IOSCHED_CFQ - if (!strcmp(str, "cfq")) - chosen_elevator = &iosched_cfq; + if (!strncmp(str, "cfq", strlen("cfq"))) + return &iosched_cfq; #endif #ifdef CONFIG_IOSCHED_NOOP - if (!strcmp(str, "noop")) - chosen_elevator = &elevator_noop; + if (!strncmp(str, "noop", strlen("noop"))) + return &elevator_noop; #endif + + return NULL; +} + +static int __init elevator_setup(char *str) +{ + elevator_t *e = str_to_elv(str); + if (e != NULL) + chosen_elevator = e; + return 1; } __setup("elevator=", elevator_setup); -#endif /* CONFIG_IOSCHED_AS || CONFIG_IOSCHED_DEADLINE || CONFIG_IOSCHED_NOOP */ request_queue_t *blk_alloc_queue(int gfp_mask) { @@ -1402,7 +1452,7 @@ EXPORT_SYMBOL(blk_alloc_queue); request_queue_t *blk_init_queue(request_fn_proc *rfn, spinlock_t *lock) { request_queue_t *q; - static int printed; + elevator_t *e; q = blk_alloc_queue(GFP_KERNEL); if (!q) @@ -1411,14 +1461,12 @@ request_queue_t *blk_init_queue(request_ if (blk_init_free_list(q)) goto out_init; - if (!printed) { - printed = 1; - printk("Using %s io scheduler\n", chosen_elevator->elevator_name); - } - - if (elevator_init(q, chosen_elevator)) + e = elevator_alloc(chosen_elevator); + if (!e) goto out_elv; + elevator_init(q, e); + q->request_fn = rfn; q->back_merge_fn = ll_back_merge_fn; q->front_merge_fn = ll_front_merge_fn; @@ -1530,6 +1578,10 @@ static void freed_request(request_queue_ if (!waitqueue_active(&rl->wait[rw])) blk_clear_queue_full(q, rw); } + if ( unlikely(waitqueue_active(&rl->empty)) ) { + if (rl->count[READ] == 0 && rl->count[WRITE] == 0) + wake_up_all(&rl->empty); + } } #define blkdev_free_rq(list) list_entry((list)->next, struct request, queuelist) @@ -1543,6 +1595,11 @@ static struct request *get_request(reque struct io_context *ioc = get_io_context(gfp_mask); spin_lock_irq(q->queue_lock); + if (blk_queue_drain(q)) { + spin_unlock_irq(q->queue_lock); + goto out; + } + if (rl->count[rw]+1 >= q->nr_requests) { /* * The queue will fill after this allocation, so set it as @@ -2796,6 +2853,79 @@ queue_var_store(unsigned long *var, cons return count; } +static ssize_t queue_elevator_show(struct request_queue *q, char *page) +{ + return sprintf(page, "%s\n", q->elevator->elevator_name); +} + +static ssize_t +queue_elevator_store(struct request_queue *q, const char *page, size_t count) +{ + elevator_t *type, *elv; + unsigned long flags; + static DECLARE_MUTEX(switch_mutex); + + down(&switch_mutex); + + type = str_to_elv(page); + if (type == NULL) { + goto out; + } + + elv = elevator_alloc(type); + if (!elv) { + goto out; + } + + spin_lock_irqsave(q->queue_lock, flags); + + /* Wait for the request list to empty */ + blk_set_queue_drain(q); + blk_wait_free_list(q); + + /* Stop old elevator */ + elevator_exit(q); + + /* Unlock here should be OK. The elevator should not be entered because + * the queue is drained, and blocked... */ + spin_unlock_irqrestore(q->queue_lock, flags); + elv_unregister_queue(q); + spin_lock_irqsave(q->queue_lock, flags); + + /* Start new one */ + elevator_init(q, elv); + printk(KERN_INFO "elevator_init %s\n", q->elevator->elevator_name); + + spin_unlock_irqrestore(q->queue_lock, flags); + if (elv_register_queue(q)) { + /* + * Can't do much about it now... failure should not cause the + * device to stop working or future elevator selection to stop + * working though. + */ + printk(KERN_INFO "elv_register_queue failed\n"); + WARN_ON(1); + } + spin_lock_irqsave(q->queue_lock, flags); + + /* Unblock the request list and wake waiters */ + blk_clear_queue_drain(q); + wake_up_all(&q->rq.wait[READ]); + wake_up_all(&q->rq.wait[WRITE]); + spin_unlock_irqrestore(q->queue_lock, flags); + + blk_run_queue(q); +out: + up(&switch_mutex); + return count; +} + +static struct queue_sysfs_entry queue_elevator_entry = { + .attr = {.name = "io_scheduler", .mode = S_IRUGO | S_IWUSR }, + .show = queue_elevator_show, + .store = queue_elevator_store, +}; + static ssize_t queue_requests_show(struct request_queue *q, char *page) { return queue_var_show(q->nr_requests, (page)); @@ -2844,6 +2974,7 @@ static struct queue_sysfs_entry queue_re static struct attribute *default_attrs[] = { &queue_requests_entry.attr, + &queue_elevator_entry.attr, NULL, }; @@ -2900,16 +3031,19 @@ int blk_register_queue(struct gendisk *d return -EBUSY; snprintf(q->kobj.name, KOBJ_NAME_LEN, "%s", "queue"); - q->kobj.ktype = &queue_ktype; + if (q->elevator) + q->kobj.ktype = &queue_ktype; ret = kobject_register(&q->kobj); if (ret < 0) return ret; - ret = elv_register_queue(q); - if (ret) { - kobject_unregister(&q->kobj); - return ret; + if (q->elevator) { + ret = elv_register_queue(q); + if (ret) { + kobject_unregister(&q->kobj); + return ret; + } } return 0; @@ -2920,7 +3054,8 @@ void blk_unregister_queue(struct gendisk request_queue_t *q = disk->queue; if (q) { - elv_unregister_queue(q); + if (q->elevator) + elv_unregister_queue(q); kobject_unregister(&q->kobj); kobject_put(&disk->kobj); diff -puN include/linux/blkdev.h~elv-select include/linux/blkdev.h --- 25/include/linux/blkdev.h~elv-select 2003-10-17 19:07:35.000000000 -0700 +++ 25-akpm/include/linux/blkdev.h 2003-10-17 19:07:35.000000000 -0700 @@ -80,6 +80,7 @@ struct request_list { int count[2]; mempool_t *rq_pool; wait_queue_head_t wait[2]; + wait_queue_head_t empty; }; /* @@ -272,7 +273,7 @@ struct request_queue */ struct list_head queue_head; struct request *last_merge; - elevator_t elevator; + elevator_t *elevator; /* * the queue request freelist, one for reads and one for writes @@ -366,7 +367,8 @@ struct request_queue #define QUEUE_FLAG_STOPPED 2 /* queue is stopped */ #define QUEUE_FLAG_READFULL 3 /* write queue has been filled */ #define QUEUE_FLAG_WRITEFULL 4 /* read queue has been filled */ -#define QUEUE_FLAG_DEAD 5 /* queue being torn down */ +#define QUEUE_FLAG_DRAIN 5 /* queue being drained */ +#define QUEUE_FLAG_DEAD 6 /* queue being torn down */ #define blk_queue_plugged(q) !list_empty(&(q)->plug_list) #define blk_queue_tagged(q) test_bit(QUEUE_FLAG_QUEUED, &(q)->queue_flags) @@ -406,6 +408,21 @@ static inline void blk_clear_queue_full( clear_bit(QUEUE_FLAG_WRITEFULL, &q->queue_flags); } +static inline int blk_queue_drain(struct request_queue *q) +{ + return test_bit(QUEUE_FLAG_DRAIN, &q->queue_flags); +} + +static inline void blk_set_queue_drain(struct request_queue *q) +{ + set_bit(QUEUE_FLAG_DRAIN, &q->queue_flags); +} + +static inline void blk_clear_queue_drain(struct request_queue *q) +{ + clear_bit(QUEUE_FLAG_DRAIN, &q->queue_flags); +} + /* * mergeable request must not have _NOMERGE or _BARRIER bit set, nor may diff -puN include/linux/elevator.h~elv-select include/linux/elevator.h --- 25/include/linux/elevator.h~elv-select 2003-10-17 19:07:35.000000000 -0700 +++ 25-akpm/include/linux/elevator.h 2003-10-17 19:07:35.000000000 -0700 @@ -21,8 +21,10 @@ typedef int (elevator_may_queue_fn) (req typedef int (elevator_set_req_fn) (request_queue_t *, struct request *, int); typedef void (elevator_put_req_fn) (request_queue_t *, struct request *); -typedef int (elevator_init_fn) (request_queue_t *, elevator_t *); +typedef void (elevator_init_fn) (request_queue_t *, elevator_t *); typedef void (elevator_exit_fn) (request_queue_t *, elevator_t *); +typedef int (elevator_alloc_fn) (elevator_t *); +typedef void (elevator_release_fn) (elevator_t *); struct elevator_s { @@ -48,6 +50,8 @@ struct elevator_s elevator_init_fn *elevator_init_fn; elevator_exit_fn *elevator_exit_fn; + elevator_alloc_fn *elevator_alloc_fn; + elevator_release_fn *elevator_release_fn; void *elevator_data; @@ -99,8 +103,10 @@ extern elevator_t iosched_as; */ extern elevator_t iosched_cfq; -extern int elevator_init(request_queue_t *, elevator_t *); +extern void elevator_init(request_queue_t *, elevator_t *); extern void elevator_exit(request_queue_t *); +extern elevator_t *elevator_alloc(elevator_t *); +extern void elevator_release(struct kobject *); extern inline int elv_rq_merge_ok(struct request *, struct bio *); extern inline int elv_try_merge(struct request *, struct bio *); extern inline int elv_try_last_merge(request_queue_t *, struct bio *); _