Merge branch 'linus' into tracing/core

Merge reason: tracing/core was on a .30-rc1 base and was missing out on
              on a handful of tracing fixes present in .30-rc5-almost.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
This commit is contained in:
Ingo Molnar
2009-05-07 11:17:13 +02:00
1603 changed files with 32881 additions and 29607 deletions

View File

@@ -17,9 +17,6 @@
#include <linux/rbtree.h>
#include <linux/interrupt.h>
#define REQ_SYNC 1
#define REQ_ASYNC 0
/*
* See Documentation/block/as-iosched.txt
*/
@@ -93,7 +90,7 @@ struct as_data {
struct list_head fifo_list[2];
struct request *next_rq[2]; /* next in sort order */
sector_t last_sector[2]; /* last REQ_SYNC & REQ_ASYNC sectors */
sector_t last_sector[2]; /* last SYNC & ASYNC sectors */
unsigned long exit_prob; /* probability a task will exit while
being waited on */
@@ -109,7 +106,7 @@ struct as_data {
unsigned long last_check_fifo[2];
int changed_batch; /* 1: waiting for old batch to end */
int new_batch; /* 1: waiting on first read complete */
int batch_data_dir; /* current batch REQ_SYNC / REQ_ASYNC */
int batch_data_dir; /* current batch SYNC / ASYNC */
int write_batch_count; /* max # of reqs in a write batch */
int current_write_count; /* how many requests left this batch */
int write_batch_idled; /* has the write batch gone idle? */
@@ -554,7 +551,7 @@ static void as_update_iohist(struct as_data *ad, struct as_io_context *aic,
if (aic == NULL)
return;
if (data_dir == REQ_SYNC) {
if (data_dir == BLK_RW_SYNC) {
unsigned long in_flight = atomic_read(&aic->nr_queued)
+ atomic_read(&aic->nr_dispatched);
spin_lock(&aic->lock);
@@ -811,7 +808,7 @@ static void as_update_rq(struct as_data *ad, struct request *rq)
*/
static void update_write_batch(struct as_data *ad)
{
unsigned long batch = ad->batch_expire[REQ_ASYNC];
unsigned long batch = ad->batch_expire[BLK_RW_ASYNC];
long write_time;
write_time = (jiffies - ad->current_batch_expires) + batch;
@@ -855,7 +852,7 @@ static void as_completed_request(struct request_queue *q, struct request *rq)
kblockd_schedule_work(q, &ad->antic_work);
ad->changed_batch = 0;
if (ad->batch_data_dir == REQ_SYNC)
if (ad->batch_data_dir == BLK_RW_SYNC)
ad->new_batch = 1;
}
WARN_ON(ad->nr_dispatched == 0);
@@ -869,7 +866,7 @@ static void as_completed_request(struct request_queue *q, struct request *rq)
if (ad->new_batch && ad->batch_data_dir == rq_is_sync(rq)) {
update_write_batch(ad);
ad->current_batch_expires = jiffies +
ad->batch_expire[REQ_SYNC];
ad->batch_expire[BLK_RW_SYNC];
ad->new_batch = 0;
}
@@ -960,7 +957,7 @@ static inline int as_batch_expired(struct as_data *ad)
if (ad->changed_batch || ad->new_batch)
return 0;
if (ad->batch_data_dir == REQ_SYNC)
if (ad->batch_data_dir == BLK_RW_SYNC)
/* TODO! add a check so a complete fifo gets written? */
return time_after(jiffies, ad->current_batch_expires);
@@ -986,7 +983,7 @@ static void as_move_to_dispatch(struct as_data *ad, struct request *rq)
*/
ad->last_sector[data_dir] = rq->sector + rq->nr_sectors;
if (data_dir == REQ_SYNC) {
if (data_dir == BLK_RW_SYNC) {
struct io_context *ioc = RQ_IOC(rq);
/* In case we have to anticipate after this */
copy_io_context(&ad->io_context, &ioc);
@@ -1025,41 +1022,41 @@ static void as_move_to_dispatch(struct as_data *ad, struct request *rq)
static int as_dispatch_request(struct request_queue *q, int force)
{
struct as_data *ad = q->elevator->elevator_data;
const int reads = !list_empty(&ad->fifo_list[REQ_SYNC]);
const int writes = !list_empty(&ad->fifo_list[REQ_ASYNC]);
const int reads = !list_empty(&ad->fifo_list[BLK_RW_SYNC]);
const int writes = !list_empty(&ad->fifo_list[BLK_RW_ASYNC]);
struct request *rq;
if (unlikely(force)) {
/*
* Forced dispatch, accounting is useless. Reset
* accounting states and dump fifo_lists. Note that
* batch_data_dir is reset to REQ_SYNC to avoid
* batch_data_dir is reset to BLK_RW_SYNC to avoid
* screwing write batch accounting as write batch
* accounting occurs on W->R transition.
*/
int dispatched = 0;
ad->batch_data_dir = REQ_SYNC;
ad->batch_data_dir = BLK_RW_SYNC;
ad->changed_batch = 0;
ad->new_batch = 0;
while (ad->next_rq[REQ_SYNC]) {
as_move_to_dispatch(ad, ad->next_rq[REQ_SYNC]);
while (ad->next_rq[BLK_RW_SYNC]) {
as_move_to_dispatch(ad, ad->next_rq[BLK_RW_SYNC]);
dispatched++;
}
ad->last_check_fifo[REQ_SYNC] = jiffies;
ad->last_check_fifo[BLK_RW_SYNC] = jiffies;
while (ad->next_rq[REQ_ASYNC]) {
as_move_to_dispatch(ad, ad->next_rq[REQ_ASYNC]);
while (ad->next_rq[BLK_RW_ASYNC]) {
as_move_to_dispatch(ad, ad->next_rq[BLK_RW_ASYNC]);
dispatched++;
}
ad->last_check_fifo[REQ_ASYNC] = jiffies;
ad->last_check_fifo[BLK_RW_ASYNC] = jiffies;
return dispatched;
}
/* Signal that the write batch was uncontended, so we can't time it */
if (ad->batch_data_dir == REQ_ASYNC && !reads) {
if (ad->batch_data_dir == BLK_RW_ASYNC && !reads) {
if (ad->current_write_count == 0 || !writes)
ad->write_batch_idled = 1;
}
@@ -1076,8 +1073,8 @@ static int as_dispatch_request(struct request_queue *q, int force)
*/
rq = ad->next_rq[ad->batch_data_dir];
if (ad->batch_data_dir == REQ_SYNC && ad->antic_expire) {
if (as_fifo_expired(ad, REQ_SYNC))
if (ad->batch_data_dir == BLK_RW_SYNC && ad->antic_expire) {
if (as_fifo_expired(ad, BLK_RW_SYNC))
goto fifo_expired;
if (as_can_anticipate(ad, rq)) {
@@ -1090,7 +1087,7 @@ static int as_dispatch_request(struct request_queue *q, int force)
/* we have a "next request" */
if (reads && !writes)
ad->current_batch_expires =
jiffies + ad->batch_expire[REQ_SYNC];
jiffies + ad->batch_expire[BLK_RW_SYNC];
goto dispatch_request;
}
}
@@ -1101,20 +1098,20 @@ static int as_dispatch_request(struct request_queue *q, int force)
*/
if (reads) {
BUG_ON(RB_EMPTY_ROOT(&ad->sort_list[REQ_SYNC]));
BUG_ON(RB_EMPTY_ROOT(&ad->sort_list[BLK_RW_SYNC]));
if (writes && ad->batch_data_dir == REQ_SYNC)
if (writes && ad->batch_data_dir == BLK_RW_SYNC)
/*
* Last batch was a read, switch to writes
*/
goto dispatch_writes;
if (ad->batch_data_dir == REQ_ASYNC) {
if (ad->batch_data_dir == BLK_RW_ASYNC) {
WARN_ON(ad->new_batch);
ad->changed_batch = 1;
}
ad->batch_data_dir = REQ_SYNC;
rq = rq_entry_fifo(ad->fifo_list[REQ_SYNC].next);
ad->batch_data_dir = BLK_RW_SYNC;
rq = rq_entry_fifo(ad->fifo_list[BLK_RW_SYNC].next);
ad->last_check_fifo[ad->batch_data_dir] = jiffies;
goto dispatch_request;
}
@@ -1125,9 +1122,9 @@ static int as_dispatch_request(struct request_queue *q, int force)
if (writes) {
dispatch_writes:
BUG_ON(RB_EMPTY_ROOT(&ad->sort_list[REQ_ASYNC]));
BUG_ON(RB_EMPTY_ROOT(&ad->sort_list[BLK_RW_ASYNC]));
if (ad->batch_data_dir == REQ_SYNC) {
if (ad->batch_data_dir == BLK_RW_SYNC) {
ad->changed_batch = 1;
/*
@@ -1137,11 +1134,11 @@ dispatch_writes:
*/
ad->new_batch = 0;
}
ad->batch_data_dir = REQ_ASYNC;
ad->batch_data_dir = BLK_RW_ASYNC;
ad->current_write_count = ad->write_batch_count;
ad->write_batch_idled = 0;
rq = rq_entry_fifo(ad->fifo_list[REQ_ASYNC].next);
ad->last_check_fifo[REQ_ASYNC] = jiffies;
rq = rq_entry_fifo(ad->fifo_list[BLK_RW_ASYNC].next);
ad->last_check_fifo[BLK_RW_ASYNC] = jiffies;
goto dispatch_request;
}
@@ -1164,9 +1161,9 @@ fifo_expired:
if (ad->nr_dispatched)
return 0;
if (ad->batch_data_dir == REQ_ASYNC)
if (ad->batch_data_dir == BLK_RW_ASYNC)
ad->current_batch_expires = jiffies +
ad->batch_expire[REQ_ASYNC];
ad->batch_expire[BLK_RW_ASYNC];
else
ad->new_batch = 1;
@@ -1238,8 +1235,8 @@ static int as_queue_empty(struct request_queue *q)
{
struct as_data *ad = q->elevator->elevator_data;
return list_empty(&ad->fifo_list[REQ_ASYNC])
&& list_empty(&ad->fifo_list[REQ_SYNC]);
return list_empty(&ad->fifo_list[BLK_RW_ASYNC])
&& list_empty(&ad->fifo_list[BLK_RW_SYNC]);
}
static int
@@ -1346,8 +1343,8 @@ static void as_exit_queue(struct elevator_queue *e)
del_timer_sync(&ad->antic_timer);
cancel_work_sync(&ad->antic_work);
BUG_ON(!list_empty(&ad->fifo_list[REQ_SYNC]));
BUG_ON(!list_empty(&ad->fifo_list[REQ_ASYNC]));
BUG_ON(!list_empty(&ad->fifo_list[BLK_RW_SYNC]));
BUG_ON(!list_empty(&ad->fifo_list[BLK_RW_ASYNC]));
put_io_context(ad->io_context);
kfree(ad);
@@ -1372,18 +1369,18 @@ static void *as_init_queue(struct request_queue *q)
init_timer(&ad->antic_timer);
INIT_WORK(&ad->antic_work, as_work_handler);
INIT_LIST_HEAD(&ad->fifo_list[REQ_SYNC]);
INIT_LIST_HEAD(&ad->fifo_list[REQ_ASYNC]);
ad->sort_list[REQ_SYNC] = RB_ROOT;
ad->sort_list[REQ_ASYNC] = RB_ROOT;
ad->fifo_expire[REQ_SYNC] = default_read_expire;
ad->fifo_expire[REQ_ASYNC] = default_write_expire;
INIT_LIST_HEAD(&ad->fifo_list[BLK_RW_SYNC]);
INIT_LIST_HEAD(&ad->fifo_list[BLK_RW_ASYNC]);
ad->sort_list[BLK_RW_SYNC] = RB_ROOT;
ad->sort_list[BLK_RW_ASYNC] = RB_ROOT;
ad->fifo_expire[BLK_RW_SYNC] = default_read_expire;
ad->fifo_expire[BLK_RW_ASYNC] = default_write_expire;
ad->antic_expire = default_antic_expire;
ad->batch_expire[REQ_SYNC] = default_read_batch_expire;
ad->batch_expire[REQ_ASYNC] = default_write_batch_expire;
ad->batch_expire[BLK_RW_SYNC] = default_read_batch_expire;
ad->batch_expire[BLK_RW_ASYNC] = default_write_batch_expire;
ad->current_batch_expires = jiffies + ad->batch_expire[REQ_SYNC];
ad->write_batch_count = ad->batch_expire[REQ_ASYNC] / 10;
ad->current_batch_expires = jiffies + ad->batch_expire[BLK_RW_SYNC];
ad->write_batch_count = ad->batch_expire[BLK_RW_ASYNC] / 10;
if (ad->write_batch_count < 2)
ad->write_batch_count = 2;
@@ -1432,11 +1429,11 @@ static ssize_t __FUNC(struct elevator_queue *e, char *page) \
struct as_data *ad = e->elevator_data; \
return as_var_show(jiffies_to_msecs((__VAR)), (page)); \
}
SHOW_FUNCTION(as_read_expire_show, ad->fifo_expire[REQ_SYNC]);
SHOW_FUNCTION(as_write_expire_show, ad->fifo_expire[REQ_ASYNC]);
SHOW_FUNCTION(as_read_expire_show, ad->fifo_expire[BLK_RW_SYNC]);
SHOW_FUNCTION(as_write_expire_show, ad->fifo_expire[BLK_RW_ASYNC]);
SHOW_FUNCTION(as_antic_expire_show, ad->antic_expire);
SHOW_FUNCTION(as_read_batch_expire_show, ad->batch_expire[REQ_SYNC]);
SHOW_FUNCTION(as_write_batch_expire_show, ad->batch_expire[REQ_ASYNC]);
SHOW_FUNCTION(as_read_batch_expire_show, ad->batch_expire[BLK_RW_SYNC]);
SHOW_FUNCTION(as_write_batch_expire_show, ad->batch_expire[BLK_RW_ASYNC]);
#undef SHOW_FUNCTION
#define STORE_FUNCTION(__FUNC, __PTR, MIN, MAX) \
@@ -1451,13 +1448,14 @@ static ssize_t __FUNC(struct elevator_queue *e, const char *page, size_t count)
*(__PTR) = msecs_to_jiffies(*(__PTR)); \
return ret; \
}
STORE_FUNCTION(as_read_expire_store, &ad->fifo_expire[REQ_SYNC], 0, INT_MAX);
STORE_FUNCTION(as_write_expire_store, &ad->fifo_expire[REQ_ASYNC], 0, INT_MAX);
STORE_FUNCTION(as_read_expire_store, &ad->fifo_expire[BLK_RW_SYNC], 0, INT_MAX);
STORE_FUNCTION(as_write_expire_store,
&ad->fifo_expire[BLK_RW_ASYNC], 0, INT_MAX);
STORE_FUNCTION(as_antic_expire_store, &ad->antic_expire, 0, INT_MAX);
STORE_FUNCTION(as_read_batch_expire_store,
&ad->batch_expire[REQ_SYNC], 0, INT_MAX);
&ad->batch_expire[BLK_RW_SYNC], 0, INT_MAX);
STORE_FUNCTION(as_write_batch_expire_store,
&ad->batch_expire[REQ_ASYNC], 0, INT_MAX);
&ad->batch_expire[BLK_RW_ASYNC], 0, INT_MAX);
#undef STORE_FUNCTION
#define AS_ATTR(name) \

View File

@@ -319,9 +319,6 @@ int blkdev_issue_flush(struct block_device *bdev, sector_t *error_sector)
return -ENXIO;
bio = bio_alloc(GFP_KERNEL, 0);
if (!bio)
return -ENOMEM;
bio->bi_end_io = bio_end_empty_barrier;
bio->bi_private = &wait;
bio->bi_bdev = bdev;

View File

@@ -643,7 +643,7 @@ static inline void blk_free_request(struct request_queue *q, struct request *rq)
}
static struct request *
blk_alloc_request(struct request_queue *q, int rw, int priv, gfp_t gfp_mask)
blk_alloc_request(struct request_queue *q, int flags, int priv, gfp_t gfp_mask)
{
struct request *rq = mempool_alloc(q->rq.rq_pool, gfp_mask);
@@ -652,7 +652,7 @@ blk_alloc_request(struct request_queue *q, int rw, int priv, gfp_t gfp_mask)
blk_rq_init(q, rq);
rq->cmd_flags = rw | REQ_ALLOCED;
rq->cmd_flags = flags | REQ_ALLOCED;
if (priv) {
if (unlikely(elv_set_request(q, rq, gfp_mask))) {
@@ -792,6 +792,8 @@ static struct request *get_request(struct request_queue *q, int rw_flags,
if (priv)
rl->elvpriv++;
if (blk_queue_io_stat(q))
rw_flags |= REQ_IO_STAT;
spin_unlock_irq(q->queue_lock);
rq = blk_alloc_request(q, rw_flags, priv, gfp_mask);

View File

@@ -402,7 +402,10 @@ static int attempt_merge(struct request_queue *q, struct request *req,
elv_merge_requests(q, req, next);
blk_account_io_merge(req);
/*
* 'next' is going away, so update stats accordingly
*/
blk_account_io_merge(next);
req->ioprio = ioprio_best(req->ioprio, next->ioprio);
if (blk_rq_cpu_valid(next))

View File

@@ -156,26 +156,28 @@ EXPORT_SYMBOL(blk_queue_make_request);
/**
* blk_queue_bounce_limit - set bounce buffer limit for queue
* @q: the request queue for the device
* @dma_addr: bus address limit
* @q: the request queue for the device
* @dma_mask: the maximum address the device can handle
*
* Description:
* Different hardware can have different requirements as to what pages
* it can do I/O directly to. A low level driver can call
* blk_queue_bounce_limit to have lower memory pages allocated as bounce
* buffers for doing I/O to pages residing above @dma_addr.
* buffers for doing I/O to pages residing above @dma_mask.
**/
void blk_queue_bounce_limit(struct request_queue *q, u64 dma_addr)
void blk_queue_bounce_limit(struct request_queue *q, u64 dma_mask)
{
unsigned long b_pfn = dma_addr >> PAGE_SHIFT;
unsigned long b_pfn = dma_mask >> PAGE_SHIFT;
int dma = 0;
q->bounce_gfp = GFP_NOIO;
#if BITS_PER_LONG == 64
/* Assume anything <= 4GB can be handled by IOMMU.
Actually some IOMMUs can handle everything, but I don't
know of a way to test this here. */
if (b_pfn < (min_t(u64, 0x100000000UL, BLK_BOUNCE_HIGH) >> PAGE_SHIFT))
/*
* Assume anything <= 4GB can be handled by IOMMU. Actually
* some IOMMUs can handle everything, but I don't know of a
* way to test this here.
*/
if (b_pfn < (min_t(u64, 0xffffffffUL, BLK_BOUNCE_HIGH) >> PAGE_SHIFT))
dma = 1;
q->bounce_pfn = max_low_pfn;
#else

View File

@@ -209,14 +209,10 @@ static ssize_t queue_iostats_store(struct request_queue *q, const char *page,
ssize_t ret = queue_var_store(&stats, page, count);
spin_lock_irq(q->queue_lock);
elv_quisce_start(q);
if (stats)
queue_flag_set(QUEUE_FLAG_IO_STAT, q);
else
queue_flag_clear(QUEUE_FLAG_IO_STAT, q);
elv_quisce_end(q);
spin_unlock_irq(q->queue_lock);
return ret;

View File

@@ -211,6 +211,12 @@ void blk_abort_queue(struct request_queue *q)
struct request *rq, *tmp;
LIST_HEAD(list);
/*
* Not a request based block device, nothing to abort
*/
if (!q->request_fn)
return;
spin_lock_irqsave(q->queue_lock, flags);
elv_abort_queue(q);
@@ -224,6 +230,13 @@ void blk_abort_queue(struct request_queue *q)
list_for_each_entry_safe(rq, tmp, &list, timeout_list)
blk_abort_request(rq);
/*
* Occasionally, blk_abort_request() will return without
* deleting the element from the list. Make sure we add those back
* instead of leaving them on the local stack list.
*/
list_splice(&list, &q->timeout_list);
spin_unlock_irqrestore(q->queue_lock, flags);
}

View File

@@ -70,8 +70,8 @@ void blk_queue_congestion_threshold(struct request_queue *q);
int blk_dev_init(void);
void elv_quisce_start(struct request_queue *q);
void elv_quisce_end(struct request_queue *q);
void elv_quiesce_start(struct request_queue *q);
void elv_quiesce_end(struct request_queue *q);
/*
@@ -114,12 +114,7 @@ static inline int blk_cpu_to_group(int cpu)
static inline int blk_do_io_stat(struct request *rq)
{
struct gendisk *disk = rq->rq_disk;
if (!disk || !disk->queue)
return 0;
return blk_queue_io_stat(disk->queue) && (rq->cmd_flags & REQ_ELVPRIV);
return rq->rq_disk && blk_rq_io_stat(rq);
}
#endif

View File

@@ -56,9 +56,6 @@ static DEFINE_SPINLOCK(ioc_gone_lock);
#define cfq_class_idle(cfqq) ((cfqq)->ioprio_class == IOPRIO_CLASS_IDLE)
#define cfq_class_rt(cfqq) ((cfqq)->ioprio_class == IOPRIO_CLASS_RT)
#define ASYNC (0)
#define SYNC (1)
#define sample_valid(samples) ((samples) > 80)
/*
@@ -83,6 +80,14 @@ struct cfq_data {
* rr list of queues with requests and the count of them
*/
struct cfq_rb_root service_tree;
/*
* Each priority tree is sorted by next_request position. These
* trees are used when determining if two or more queues are
* interleaving requests (see cfq_close_cooperator).
*/
struct rb_root prio_trees[CFQ_PRIO_LISTS];
unsigned int busy_queues;
/*
* Used to track any pending rt requests so we can pre-empt current
@@ -147,6 +152,10 @@ struct cfq_queue {
struct rb_node rb_node;
/* service_tree key */
unsigned long rb_key;
/* prio tree member */
struct rb_node p_node;
/* prio tree root we belong to, if any */
struct rb_root *p_root;
/* sorted list of pending requests */
struct rb_root sort_list;
/* if fifo isn't expired, next request to serve */
@@ -185,6 +194,7 @@ enum cfqq_state_flags {
CFQ_CFQQ_FLAG_prio_changed, /* task priority has changed */
CFQ_CFQQ_FLAG_slice_new, /* no requests dispatched in slice */
CFQ_CFQQ_FLAG_sync, /* synchronous queue */
CFQ_CFQQ_FLAG_coop, /* has done a coop jump of the queue */
};
#define CFQ_CFQQ_FNS(name) \
@@ -211,6 +221,7 @@ CFQ_CFQQ_FNS(idle_window);
CFQ_CFQQ_FNS(prio_changed);
CFQ_CFQQ_FNS(slice_new);
CFQ_CFQQ_FNS(sync);
CFQ_CFQQ_FNS(coop);
#undef CFQ_CFQQ_FNS
#define cfq_log_cfqq(cfqd, cfqq, fmt, args...) \
@@ -419,13 +430,17 @@ static struct cfq_queue *cfq_rb_first(struct cfq_rb_root *root)
return NULL;
}
static void rb_erase_init(struct rb_node *n, struct rb_root *root)
{
rb_erase(n, root);
RB_CLEAR_NODE(n);
}
static void cfq_rb_erase(struct rb_node *n, struct cfq_rb_root *root)
{
if (root->left == n)
root->left = NULL;
rb_erase(n, &root->rb);
RB_CLEAR_NODE(n);
rb_erase_init(n, &root->rb);
}
/*
@@ -470,8 +485,8 @@ static unsigned long cfq_slice_offset(struct cfq_data *cfqd,
* requests waiting to be processed. It is sorted in the order that
* we will service the queues.
*/
static void cfq_service_tree_add(struct cfq_data *cfqd,
struct cfq_queue *cfqq, int add_front)
static void cfq_service_tree_add(struct cfq_data *cfqd, struct cfq_queue *cfqq,
int add_front)
{
struct rb_node **p, *parent;
struct cfq_queue *__cfqq;
@@ -544,6 +559,67 @@ static void cfq_service_tree_add(struct cfq_data *cfqd,
rb_insert_color(&cfqq->rb_node, &cfqd->service_tree.rb);
}
static struct cfq_queue *
cfq_prio_tree_lookup(struct cfq_data *cfqd, struct rb_root *root,
sector_t sector, struct rb_node **ret_parent,
struct rb_node ***rb_link)
{
struct rb_node **p, *parent;
struct cfq_queue *cfqq = NULL;
parent = NULL;
p = &root->rb_node;
while (*p) {
struct rb_node **n;
parent = *p;
cfqq = rb_entry(parent, struct cfq_queue, p_node);
/*
* Sort strictly based on sector. Smallest to the left,
* largest to the right.
*/
if (sector > cfqq->next_rq->sector)
n = &(*p)->rb_right;
else if (sector < cfqq->next_rq->sector)
n = &(*p)->rb_left;
else
break;
p = n;
cfqq = NULL;
}
*ret_parent = parent;
if (rb_link)
*rb_link = p;
return cfqq;
}
static void cfq_prio_tree_add(struct cfq_data *cfqd, struct cfq_queue *cfqq)
{
struct rb_node **p, *parent;
struct cfq_queue *__cfqq;
if (cfqq->p_root) {
rb_erase(&cfqq->p_node, cfqq->p_root);
cfqq->p_root = NULL;
}
if (cfq_class_idle(cfqq))
return;
if (!cfqq->next_rq)
return;
cfqq->p_root = &cfqd->prio_trees[cfqq->org_ioprio];
__cfqq = cfq_prio_tree_lookup(cfqd, cfqq->p_root, cfqq->next_rq->sector,
&parent, &p);
if (!__cfqq) {
rb_link_node(&cfqq->p_node, parent, p);
rb_insert_color(&cfqq->p_node, cfqq->p_root);
} else
cfqq->p_root = NULL;
}
/*
* Update cfqq's position in the service tree.
*/
@@ -552,8 +628,10 @@ static void cfq_resort_rr_list(struct cfq_data *cfqd, struct cfq_queue *cfqq)
/*
* Resorting requires the cfqq to be on the RR list already.
*/
if (cfq_cfqq_on_rr(cfqq))
if (cfq_cfqq_on_rr(cfqq)) {
cfq_service_tree_add(cfqd, cfqq, 0);
cfq_prio_tree_add(cfqd, cfqq);
}
}
/*
@@ -584,6 +662,10 @@ static void cfq_del_cfqq_rr(struct cfq_data *cfqd, struct cfq_queue *cfqq)
if (!RB_EMPTY_NODE(&cfqq->rb_node))
cfq_rb_erase(&cfqq->rb_node, &cfqd->service_tree);
if (cfqq->p_root) {
rb_erase(&cfqq->p_node, cfqq->p_root);
cfqq->p_root = NULL;
}
BUG_ON(!cfqd->busy_queues);
cfqd->busy_queues--;
@@ -613,7 +695,7 @@ static void cfq_add_rq_rb(struct request *rq)
{
struct cfq_queue *cfqq = RQ_CFQQ(rq);
struct cfq_data *cfqd = cfqq->cfqd;
struct request *__alias;
struct request *__alias, *prev;
cfqq->queued[rq_is_sync(rq)]++;
@@ -630,7 +712,15 @@ static void cfq_add_rq_rb(struct request *rq)
/*
* check if this request is a better next-serve candidate
*/
prev = cfqq->next_rq;
cfqq->next_rq = cfq_choose_req(cfqd, cfqq->next_rq, rq);
/*
* adjust priority tree position, if ->next_rq changes
*/
if (prev != cfqq->next_rq)
cfq_prio_tree_add(cfqd, cfqq);
BUG_ON(!cfqq->next_rq);
}
@@ -843,11 +933,15 @@ static struct cfq_queue *cfq_get_next_queue(struct cfq_data *cfqd)
/*
* Get and set a new active queue for service.
*/
static struct cfq_queue *cfq_set_active_queue(struct cfq_data *cfqd)
static struct cfq_queue *cfq_set_active_queue(struct cfq_data *cfqd,
struct cfq_queue *cfqq)
{
struct cfq_queue *cfqq;
if (!cfqq) {
cfqq = cfq_get_next_queue(cfqd);
if (cfqq)
cfq_clear_cfqq_coop(cfqq);
}
cfqq = cfq_get_next_queue(cfqd);
__cfq_set_active_queue(cfqd, cfqq);
return cfqq;
}
@@ -861,28 +955,100 @@ static inline sector_t cfq_dist_from_last(struct cfq_data *cfqd,
return cfqd->last_position - rq->sector;
}
#define CIC_SEEK_THR 8 * 1024
#define CIC_SEEKY(cic) ((cic)->seek_mean > CIC_SEEK_THR)
static inline int cfq_rq_close(struct cfq_data *cfqd, struct request *rq)
{
struct cfq_io_context *cic = cfqd->active_cic;
sector_t sdist = cic->seek_mean;
if (!sample_valid(cic->seek_samples))
return 0;
sdist = CIC_SEEK_THR;
return cfq_dist_from_last(cfqd, rq) <= cic->seek_mean;
return cfq_dist_from_last(cfqd, rq) <= sdist;
}
static int cfq_close_cooperator(struct cfq_data *cfq_data,
struct cfq_queue *cfqq)
static struct cfq_queue *cfqq_close(struct cfq_data *cfqd,
struct cfq_queue *cur_cfqq)
{
struct rb_root *root = &cfqd->prio_trees[cur_cfqq->org_ioprio];
struct rb_node *parent, *node;
struct cfq_queue *__cfqq;
sector_t sector = cfqd->last_position;
if (RB_EMPTY_ROOT(root))
return NULL;
/*
* First, if we find a request starting at the end of the last
* request, choose it.
*/
__cfqq = cfq_prio_tree_lookup(cfqd, root, sector, &parent, NULL);
if (__cfqq)
return __cfqq;
/*
* If the exact sector wasn't found, the parent of the NULL leaf
* will contain the closest sector.
*/
__cfqq = rb_entry(parent, struct cfq_queue, p_node);
if (cfq_rq_close(cfqd, __cfqq->next_rq))
return __cfqq;
if (__cfqq->next_rq->sector < sector)
node = rb_next(&__cfqq->p_node);
else
node = rb_prev(&__cfqq->p_node);
if (!node)
return NULL;
__cfqq = rb_entry(node, struct cfq_queue, p_node);
if (cfq_rq_close(cfqd, __cfqq->next_rq))
return __cfqq;
return NULL;
}
/*
* cfqd - obvious
* cur_cfqq - passed in so that we don't decide that the current queue is
* closely cooperating with itself.
*
* So, basically we're assuming that that cur_cfqq has dispatched at least
* one request, and that cfqd->last_position reflects a position on the disk
* associated with the I/O issued by cur_cfqq. I'm not sure this is a valid
* assumption.
*/
static struct cfq_queue *cfq_close_cooperator(struct cfq_data *cfqd,
struct cfq_queue *cur_cfqq,
int probe)
{
struct cfq_queue *cfqq;
/*
* A valid cfq_io_context is necessary to compare requests against
* the seek_mean of the current cfqq.
*/
if (!cfqd->active_cic)
return NULL;
/*
* We should notice if some of the queues are cooperating, eg
* working closely on the same area of the disk. In that case,
* we can group them together and don't waste time idling.
*/
return 0;
}
cfqq = cfqq_close(cfqd, cur_cfqq);
if (!cfqq)
return NULL;
#define CIC_SEEKY(cic) ((cic)->seek_mean > (8 * 1024))
if (cfq_cfqq_coop(cfqq))
return NULL;
if (!probe)
cfq_mark_cfqq_coop(cfqq);
return cfqq;
}
static void cfq_arm_slice_timer(struct cfq_data *cfqd)
{
@@ -920,13 +1086,6 @@ static void cfq_arm_slice_timer(struct cfq_data *cfqd)
if (!cic || !atomic_read(&cic->ioc->nr_tasks))
return;
/*
* See if this prio level has a good candidate
*/
if (cfq_close_cooperator(cfqd, cfqq) &&
(sample_valid(cic->ttime_samples) && cic->ttime_mean > 2))
return;
cfq_mark_cfqq_wait_request(cfqq);
/*
@@ -939,7 +1098,7 @@ static void cfq_arm_slice_timer(struct cfq_data *cfqd)
sl = min(sl, msecs_to_jiffies(CFQ_MIN_TT));
mod_timer(&cfqd->idle_slice_timer, jiffies + sl);
cfq_log(cfqd, "arm_idle: %lu", sl);
cfq_log_cfqq(cfqd, cfqq, "arm_idle: %lu", sl);
}
/*
@@ -1003,7 +1162,7 @@ cfq_prio_to_maxrq(struct cfq_data *cfqd, struct cfq_queue *cfqq)
*/
static struct cfq_queue *cfq_select_queue(struct cfq_data *cfqd)
{
struct cfq_queue *cfqq;
struct cfq_queue *cfqq, *new_cfqq = NULL;
cfqq = cfqd->active_queue;
if (!cfqq)
@@ -1036,6 +1195,16 @@ static struct cfq_queue *cfq_select_queue(struct cfq_data *cfqd)
if (!RB_EMPTY_ROOT(&cfqq->sort_list))
goto keep_queue;
/*
* If another queue has a request waiting within our mean seek
* distance, let it run. The expire code will check for close
* cooperators and put the close queue at the front of the service
* tree.
*/
new_cfqq = cfq_close_cooperator(cfqd, cfqq, 0);
if (new_cfqq)
goto expire;
/*
* No requests pending. If the active queue still has requests in
* flight or is idling for a new request, allow either of these
@@ -1050,7 +1219,7 @@ static struct cfq_queue *cfq_select_queue(struct cfq_data *cfqd)
expire:
cfq_slice_expired(cfqd, 0);
new_queue:
cfqq = cfq_set_active_queue(cfqd);
cfqq = cfq_set_active_queue(cfqd, new_cfqq);
keep_queue:
return cfqq;
}
@@ -1333,14 +1502,14 @@ static void __cfq_exit_single_io_context(struct cfq_data *cfqd,
if (ioc->ioc_data == cic)
rcu_assign_pointer(ioc->ioc_data, NULL);
if (cic->cfqq[ASYNC]) {
cfq_exit_cfqq(cfqd, cic->cfqq[ASYNC]);
cic->cfqq[ASYNC] = NULL;
if (cic->cfqq[BLK_RW_ASYNC]) {
cfq_exit_cfqq(cfqd, cic->cfqq[BLK_RW_ASYNC]);
cic->cfqq[BLK_RW_ASYNC] = NULL;
}
if (cic->cfqq[SYNC]) {
cfq_exit_cfqq(cfqd, cic->cfqq[SYNC]);
cic->cfqq[SYNC] = NULL;
if (cic->cfqq[BLK_RW_SYNC]) {
cfq_exit_cfqq(cfqd, cic->cfqq[BLK_RW_SYNC]);
cic->cfqq[BLK_RW_SYNC] = NULL;
}
}
@@ -1449,17 +1618,18 @@ static void changed_ioprio(struct io_context *ioc, struct cfq_io_context *cic)
spin_lock_irqsave(cfqd->queue->queue_lock, flags);
cfqq = cic->cfqq[ASYNC];
cfqq = cic->cfqq[BLK_RW_ASYNC];
if (cfqq) {
struct cfq_queue *new_cfqq;
new_cfqq = cfq_get_queue(cfqd, ASYNC, cic->ioc, GFP_ATOMIC);
new_cfqq = cfq_get_queue(cfqd, BLK_RW_ASYNC, cic->ioc,
GFP_ATOMIC);
if (new_cfqq) {
cic->cfqq[ASYNC] = new_cfqq;
cic->cfqq[BLK_RW_ASYNC] = new_cfqq;
cfq_put_queue(cfqq);
}
}
cfqq = cic->cfqq[SYNC];
cfqq = cic->cfqq[BLK_RW_SYNC];
if (cfqq)
cfq_mark_cfqq_prio_changed(cfqq);
@@ -1510,6 +1680,7 @@ retry:
}
RB_CLEAR_NODE(&cfqq->rb_node);
RB_CLEAR_NODE(&cfqq->p_node);
INIT_LIST_HEAD(&cfqq->fifo);
atomic_set(&cfqq->ref, 0);
@@ -1745,7 +1916,9 @@ cfq_update_io_seektime(struct cfq_data *cfqd, struct cfq_io_context *cic,
sector_t sdist;
u64 total;
if (cic->last_request_pos < rq->sector)
if (!cic->last_request_pos)
sdist = 0;
else if (cic->last_request_pos < rq->sector)
sdist = rq->sector - cic->last_request_pos;
else
sdist = cic->last_request_pos - rq->sector;
@@ -1905,10 +2078,20 @@ cfq_rq_enqueued(struct cfq_data *cfqd, struct cfq_queue *cfqq,
* Remember that we saw a request from this process, but
* don't start queuing just yet. Otherwise we risk seeing lots
* of tiny requests, because we disrupt the normal plugging
* and merging.
* and merging. If the request is already larger than a single
* page, let it rip immediately. For that case we assume that
* merging is already done. Ditto for a busy system that
* has other work pending, don't risk delaying until the
* idle timer unplug to continue working.
*/
if (cfq_cfqq_wait_request(cfqq))
if (cfq_cfqq_wait_request(cfqq)) {
if (blk_rq_bytes(rq) > PAGE_CACHE_SIZE ||
cfqd->busy_queues > 1) {
del_timer(&cfqd->idle_slice_timer);
blk_start_queueing(cfqd->queue);
}
cfq_mark_cfqq_must_dispatch(cfqq);
}
} else if (cfq_should_preempt(cfqd, cfqq, rq)) {
/*
* not the active queue - expire current slice if it is
@@ -1992,16 +2175,24 @@ static void cfq_completed_request(struct request_queue *q, struct request *rq)
* or if we want to idle in case it has no pending requests.
*/
if (cfqd->active_queue == cfqq) {
const bool cfqq_empty = RB_EMPTY_ROOT(&cfqq->sort_list);
if (cfq_cfqq_slice_new(cfqq)) {
cfq_set_prio_slice(cfqd, cfqq);
cfq_clear_cfqq_slice_new(cfqq);
}
/*
* If there are no requests waiting in this queue, and
* there are other queues ready to issue requests, AND
* those other queues are issuing requests within our
* mean seek distance, give them a chance to run instead
* of idling.
*/
if (cfq_slice_used(cfqq) || cfq_class_idle(cfqq))
cfq_slice_expired(cfqd, 1);
else if (sync && !rq_noidle(rq) &&
RB_EMPTY_ROOT(&cfqq->sort_list)) {
else if (cfqq_empty && !cfq_close_cooperator(cfqd, cfqq, 1) &&
sync && !rq_noidle(rq))
cfq_arm_slice_timer(cfqd);
}
}
if (!cfqd->rq_in_driver)
@@ -2062,7 +2253,7 @@ static int cfq_may_queue(struct request_queue *q, int rw)
if (!cic)
return ELV_MQUEUE_MAY;
cfqq = cic_to_cfqq(cic, rw & REQ_RW_SYNC);
cfqq = cic_to_cfqq(cic, rw_is_sync(rw));
if (cfqq) {
cfq_init_prio_data(cfqq, cic->ioc);
cfq_prio_boost(cfqq);
@@ -2152,11 +2343,10 @@ static void cfq_kick_queue(struct work_struct *work)
struct cfq_data *cfqd =
container_of(work, struct cfq_data, unplug_work);
struct request_queue *q = cfqd->queue;
unsigned long flags;
spin_lock_irqsave(q->queue_lock, flags);
spin_lock_irq(q->queue_lock);
blk_start_queueing(q);
spin_unlock_irqrestore(q->queue_lock, flags);
spin_unlock_irq(q->queue_lock);
}
/*
@@ -2263,12 +2453,22 @@ static void cfq_exit_queue(struct elevator_queue *e)
static void *cfq_init_queue(struct request_queue *q)
{
struct cfq_data *cfqd;
int i;
cfqd = kmalloc_node(sizeof(*cfqd), GFP_KERNEL | __GFP_ZERO, q->node);
if (!cfqd)
return NULL;
cfqd->service_tree = CFQ_RB_ROOT;
/*
* Not strictly needed (since RB_ROOT just clears the node and we
* zeroed cfqd on alloc), but better be safe in case someone decides
* to add magic to the rb code
*/
for (i = 0; i < CFQ_PRIO_LISTS; i++)
cfqd->prio_trees[i] = RB_ROOT;
INIT_LIST_HEAD(&cfqd->cic_list);
cfqd->queue = q;

View File

@@ -590,7 +590,7 @@ void elv_drain_elevator(struct request_queue *q)
/*
* Call with queue lock held, interrupts disabled
*/
void elv_quisce_start(struct request_queue *q)
void elv_quiesce_start(struct request_queue *q)
{
queue_flag_set(QUEUE_FLAG_ELVSWITCH, q);
@@ -607,7 +607,7 @@ void elv_quisce_start(struct request_queue *q)
}
}
void elv_quisce_end(struct request_queue *q)
void elv_quiesce_end(struct request_queue *q)
{
queue_flag_clear(QUEUE_FLAG_ELVSWITCH, q);
}
@@ -1126,7 +1126,7 @@ static int elevator_switch(struct request_queue *q, struct elevator_type *new_e)
* Turn on BYPASS and drain all requests w/ elevator private data
*/
spin_lock_irq(q->queue_lock);
elv_quisce_start(q);
elv_quiesce_start(q);
/*
* Remember old elevator.
@@ -1150,7 +1150,7 @@ static int elevator_switch(struct request_queue *q, struct elevator_type *new_e)
*/
elevator_exit(old_elevator);
spin_lock_irq(q->queue_lock);
elv_quisce_end(q);
elv_quiesce_end(q);
spin_unlock_irq(q->queue_lock);
blk_add_trace_msg(q, "elv switch: %s", e->elevator_type->elevator_name);

View File

@@ -98,7 +98,7 @@ void disk_part_iter_init(struct disk_part_iter *piter, struct gendisk *disk,
if (flags & DISK_PITER_REVERSE)
piter->idx = ptbl->len - 1;
else if (flags & DISK_PITER_INCL_PART0)
else if (flags & (DISK_PITER_INCL_PART0 | DISK_PITER_INCL_EMPTY_PART0))
piter->idx = 0;
else
piter->idx = 1;
@@ -134,7 +134,8 @@ struct hd_struct *disk_part_iter_next(struct disk_part_iter *piter)
/* determine iteration parameters */
if (piter->flags & DISK_PITER_REVERSE) {
inc = -1;
if (piter->flags & DISK_PITER_INCL_PART0)
if (piter->flags & (DISK_PITER_INCL_PART0 |
DISK_PITER_INCL_EMPTY_PART0))
end = -1;
else
end = 0;
@@ -150,7 +151,10 @@ struct hd_struct *disk_part_iter_next(struct disk_part_iter *piter)
part = rcu_dereference(ptbl->part[piter->idx]);
if (!part)
continue;
if (!(piter->flags & DISK_PITER_INCL_EMPTY) && !part->nr_sects)
if (!part->nr_sects &&
!(piter->flags & DISK_PITER_INCL_EMPTY) &&
!(piter->flags & DISK_PITER_INCL_EMPTY_PART0 &&
piter->idx == 0))
continue;
get_device(part_to_dev(part));
@@ -1011,7 +1015,7 @@ static int diskstats_show(struct seq_file *seqf, void *v)
"\n\n");
*/
disk_part_iter_init(&piter, gp, DISK_PITER_INCL_PART0);
disk_part_iter_init(&piter, gp, DISK_PITER_INCL_EMPTY_PART0);
while ((hd = disk_part_iter_next(&piter))) {
cpu = part_stat_lock();
part_round_stats(cpu, hd);

View File

@@ -146,8 +146,6 @@ static int blk_ioctl_discard(struct block_device *bdev, uint64_t start,
struct bio *bio;
bio = bio_alloc(GFP_KERNEL, 0);
if (!bio)
return -ENOMEM;
bio->bi_end_io = blk_ioc_discard_endio;
bio->bi_bdev = bdev;

View File

@@ -217,7 +217,7 @@ static int blk_fill_sghdr_rq(struct request_queue *q, struct request *rq,
static int blk_complete_sghdr_rq(struct request *rq, struct sg_io_hdr *hdr,
struct bio *bio)
{
int ret = 0;
int r, ret = 0;
/*
* fill in all the output members
@@ -242,7 +242,9 @@ static int blk_complete_sghdr_rq(struct request *rq, struct sg_io_hdr *hdr,
ret = -EFAULT;
}
blk_rq_unmap_user(bio);
r = blk_rq_unmap_user(bio);
if (!ret)
ret = r;
blk_put_request(rq);
return ret;
@@ -288,6 +290,7 @@ static int sg_io(struct request_queue *q, struct gendisk *bd_disk,
if (hdr->iovec_count) {
const int size = sizeof(struct sg_iovec) * hdr->iovec_count;
size_t iov_data_len;
struct sg_iovec *iov;
iov = kmalloc(size, GFP_KERNEL);
@@ -302,8 +305,18 @@ static int sg_io(struct request_queue *q, struct gendisk *bd_disk,
goto out;
}
/* SG_IO howto says that the shorter of the two wins */
iov_data_len = iov_length((struct iovec *)iov,
hdr->iovec_count);
if (hdr->dxfer_len < iov_data_len) {
hdr->iovec_count = iov_shorten((struct iovec *)iov,
hdr->iovec_count,
hdr->dxfer_len);
iov_data_len = hdr->dxfer_len;
}
ret = blk_rq_map_user_iov(q, rq, NULL, iov, hdr->iovec_count,
hdr->dxfer_len, GFP_KERNEL);
iov_data_len, GFP_KERNEL);
kfree(iov);
} else if (hdr->dxfer_len)
ret = blk_rq_map_user(q, rq, NULL, hdr->dxferp, hdr->dxfer_len,