mirror of
https://github.com/qemu/qemu.git
synced 2026-02-04 05:35:39 +00:00
AioContexts are used as a generic event loop even outside the block layer; move the header file out of block/ just like the implementation is in util/. Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
225 lines
6.5 KiB
C
225 lines
6.5 KiB
C
/*
|
|
* Linux io_uring support.
|
|
*
|
|
* Copyright (C) 2009 IBM, Corp.
|
|
* Copyright (C) 2009 Red Hat, Inc.
|
|
* Copyright (C) 2019 Aarushi Mehta
|
|
*
|
|
* This work is licensed under the terms of the GNU GPL, version 2 or later.
|
|
* See the COPYING file in the top-level directory.
|
|
*/
|
|
#include "qemu/osdep.h"
|
|
#include <liburing.h>
|
|
#include "qemu/aio.h"
|
|
#include "block/block.h"
|
|
#include "block/raw-aio.h"
|
|
#include "qemu/coroutine.h"
|
|
#include "system/block-backend.h"
|
|
#include "trace.h"
|
|
|
|
typedef struct {
|
|
Coroutine *co;
|
|
QEMUIOVector *qiov;
|
|
uint64_t offset;
|
|
ssize_t ret;
|
|
int type;
|
|
int fd;
|
|
BdrvRequestFlags flags;
|
|
|
|
/*
|
|
* Buffered reads may require resubmission, see
|
|
* luring_resubmit_short_read().
|
|
*/
|
|
int total_read;
|
|
QEMUIOVector resubmit_qiov;
|
|
|
|
CqeHandler cqe_handler;
|
|
} LuringRequest;
|
|
|
|
static void luring_prep_sqe(struct io_uring_sqe *sqe, void *opaque)
|
|
{
|
|
LuringRequest *req = opaque;
|
|
QEMUIOVector *qiov = req->qiov;
|
|
uint64_t offset = req->offset;
|
|
int fd = req->fd;
|
|
BdrvRequestFlags flags = req->flags;
|
|
|
|
switch (req->type) {
|
|
case QEMU_AIO_WRITE:
|
|
{
|
|
int luring_flags = (flags & BDRV_REQ_FUA) ? RWF_DSYNC : 0;
|
|
if (luring_flags != 0 || qiov->niov > 1) {
|
|
#ifdef HAVE_IO_URING_PREP_WRITEV2
|
|
io_uring_prep_writev2(sqe, fd, qiov->iov,
|
|
qiov->niov, offset, luring_flags);
|
|
#else
|
|
/*
|
|
* FUA should only be enabled with HAVE_IO_URING_PREP_WRITEV2, see
|
|
* luring_has_fua().
|
|
*/
|
|
assert(luring_flags == 0);
|
|
|
|
io_uring_prep_writev(sqe, fd, qiov->iov, qiov->niov, offset);
|
|
#endif
|
|
} else {
|
|
/* The man page says non-vectored is faster than vectored */
|
|
struct iovec *iov = qiov->iov;
|
|
io_uring_prep_write(sqe, fd, iov->iov_base, iov->iov_len, offset);
|
|
}
|
|
break;
|
|
}
|
|
case QEMU_AIO_ZONE_APPEND:
|
|
io_uring_prep_writev(sqe, fd, qiov->iov, qiov->niov, offset);
|
|
break;
|
|
case QEMU_AIO_READ:
|
|
{
|
|
if (req->resubmit_qiov.iov != NULL) {
|
|
qiov = &req->resubmit_qiov;
|
|
}
|
|
if (qiov->niov > 1) {
|
|
io_uring_prep_readv(sqe, fd, qiov->iov, qiov->niov,
|
|
offset + req->total_read);
|
|
} else {
|
|
/* The man page says non-vectored is faster than vectored */
|
|
struct iovec *iov = qiov->iov;
|
|
io_uring_prep_read(sqe, fd, iov->iov_base, iov->iov_len,
|
|
offset + req->total_read);
|
|
}
|
|
break;
|
|
}
|
|
case QEMU_AIO_FLUSH:
|
|
io_uring_prep_fsync(sqe, fd, IORING_FSYNC_DATASYNC);
|
|
break;
|
|
default:
|
|
fprintf(stderr, "%s: invalid AIO request type, aborting 0x%x.\n",
|
|
__func__, req->type);
|
|
abort();
|
|
}
|
|
}
|
|
|
|
/**
|
|
* luring_resubmit_short_read:
|
|
*
|
|
* Short reads are rare but may occur. The remaining read request needs to be
|
|
* resubmitted.
|
|
*/
|
|
static void luring_resubmit_short_read(LuringRequest *req, int nread)
|
|
{
|
|
QEMUIOVector *resubmit_qiov;
|
|
size_t remaining;
|
|
|
|
trace_luring_resubmit_short_read(req, nread);
|
|
|
|
/* Update read position */
|
|
req->total_read += nread;
|
|
remaining = req->qiov->size - req->total_read;
|
|
|
|
/* Shorten qiov */
|
|
resubmit_qiov = &req->resubmit_qiov;
|
|
if (resubmit_qiov->iov == NULL) {
|
|
qemu_iovec_init(resubmit_qiov, req->qiov->niov);
|
|
} else {
|
|
qemu_iovec_reset(resubmit_qiov);
|
|
}
|
|
qemu_iovec_concat(resubmit_qiov, req->qiov, req->total_read, remaining);
|
|
|
|
aio_add_sqe(luring_prep_sqe, req, &req->cqe_handler);
|
|
}
|
|
|
|
static void luring_cqe_handler(CqeHandler *cqe_handler)
|
|
{
|
|
LuringRequest *req = container_of(cqe_handler, LuringRequest, cqe_handler);
|
|
int ret = cqe_handler->cqe.res;
|
|
|
|
trace_luring_cqe_handler(req, ret);
|
|
|
|
if (ret < 0) {
|
|
/*
|
|
* Only writev/readv/fsync requests on regular files or host block
|
|
* devices are submitted. Therefore -EAGAIN is not expected but it's
|
|
* known to happen sometimes with Linux SCSI. Submit again and hope
|
|
* the request completes successfully.
|
|
*
|
|
* For more information, see:
|
|
* https://lore.kernel.org/io-uring/20210727165811.284510-3-axboe@kernel.dk/T/#u
|
|
*
|
|
* If the code is changed to submit other types of requests in the
|
|
* future, then this workaround may need to be extended to deal with
|
|
* genuine -EAGAIN results that should not be resubmitted
|
|
* immediately.
|
|
*/
|
|
if (ret == -EINTR || ret == -EAGAIN) {
|
|
aio_add_sqe(luring_prep_sqe, req, &req->cqe_handler);
|
|
return;
|
|
}
|
|
} else if (req->qiov) {
|
|
/* total_read is non-zero only for resubmitted read requests */
|
|
int total_bytes = ret + req->total_read;
|
|
|
|
if (total_bytes == req->qiov->size) {
|
|
ret = 0;
|
|
} else {
|
|
/* Short Read/Write */
|
|
if (req->type == QEMU_AIO_READ) {
|
|
if (ret > 0) {
|
|
luring_resubmit_short_read(req, ret);
|
|
return;
|
|
}
|
|
|
|
/* Pad with zeroes */
|
|
qemu_iovec_memset(req->qiov, total_bytes, 0,
|
|
req->qiov->size - total_bytes);
|
|
ret = 0;
|
|
} else {
|
|
ret = -ENOSPC;
|
|
}
|
|
}
|
|
}
|
|
|
|
req->ret = ret;
|
|
qemu_iovec_destroy(&req->resubmit_qiov);
|
|
|
|
/*
|
|
* If the coroutine is already entered it must be in luring_co_submit() and
|
|
* will notice req->ret has been filled in when it eventually runs later.
|
|
* Coroutines cannot be entered recursively so avoid doing that!
|
|
*/
|
|
if (!qemu_coroutine_entered(req->co)) {
|
|
aio_co_wake(req->co);
|
|
}
|
|
}
|
|
|
|
int coroutine_fn luring_co_submit(BlockDriverState *bs, int fd,
|
|
uint64_t offset, QEMUIOVector *qiov,
|
|
int type, BdrvRequestFlags flags)
|
|
{
|
|
LuringRequest req = {
|
|
.co = qemu_coroutine_self(),
|
|
.qiov = qiov,
|
|
.ret = -EINPROGRESS,
|
|
.type = type,
|
|
.fd = fd,
|
|
.offset = offset,
|
|
.flags = flags,
|
|
};
|
|
|
|
req.cqe_handler.cb = luring_cqe_handler;
|
|
|
|
trace_luring_co_submit(bs, &req, fd, offset, qiov ? qiov->size : 0, type);
|
|
aio_add_sqe(luring_prep_sqe, &req, &req.cqe_handler);
|
|
|
|
if (req.ret == -EINPROGRESS) {
|
|
qemu_coroutine_yield();
|
|
}
|
|
return req.ret;
|
|
}
|
|
|
|
bool luring_has_fua(void)
|
|
{
|
|
#ifdef HAVE_IO_URING_PREP_WRITEV2
|
|
return true;
|
|
#else
|
|
return false;
|
|
#endif
|
|
}
|