mirror of
https://github.com/qemu/qemu.git
synced 2026-04-05 22:00:58 +00:00
Merge tag 'for-upstream' of https://gitlab.com/kmwolf/qemu into staging
Block layer patches - linux-aio/io-uring: Resubmit tails of short reads/writes - curl: Fix memory leak # -----BEGIN PGP SIGNATURE----- # # iQJFBAABCgAvFiEE3D3rFZqa+V09dFb+fwmycsiPL9YFAmnC3hgRHGt3b2xmQHJl # ZGhhdC5jb20ACgkQfwmycsiPL9Ygxw//bxGIqnT2r9dSz8PzDfSGID21Ido7ypYP # bUKUIPXtho85v6IbRCzKkgLXezmpoI/cbcsvnVmFbZW6Ab8ZSZxyEsZNCOQkxL34 # TwwWVI1pXtwXcfJFTb2EhE0HpHa2YQQ7N4Mg0RTcPqctywcjYs2Fz/O3cDstTEYx # gkwpORdt4KJ4dbaRbm3UbDGzJyjWs8bZd+m7kXaJCCE1vZsmO3p1zbIkTGXt+PQJ # Rmcg0O8kAEejTIEctOX0BYgyvCxYmhfOzDmmZwAVrlKmaeemsE8umRUCvmXAimu5 # JuKxgNJgag8xbVO41nykU0qwh0uerCOSiOwAXwh4U23MY/zBnPsbI0W985XbXCyB # DAs2bs/GHb/z8kjhV/GzrBxpol9k2wdecy2Mgkihe4qEhffyxUsj4cz+XkN0CAZj # /vZjCJO4FDN2zQqTQfwttU9A8pmkT9YLBdHOEMkTEWxvDjwIS8heBvB1EABW0F4J # Bhy6z87jiWHILxdnMrmZ61UasN1GgP/fdbtFP5bhXN1LzOekfLYyfRb1B9Imwx67 # d1K3XsW2BD/ETQByUpiC/nouE6LWe5afJhKvTfg+y2L1CgAtw4jBBqpoQfALeYhv # n2aCd69TJvarlAY9Rv2CjbQxUuouFxPzrjoS12AKW5on4iQCdCVHCywqlQIMTBbl # kwzdLdUAEGw= # =HiKg # -----END PGP SIGNATURE----- # gpg: Signature made Tue Mar 24 18:55:20 2026 GMT # gpg: using RSA key DC3DEB159A9AF95D3D7456FE7F09B272C88F2FD6 # gpg: issuer "kwolf@redhat.com" # gpg: Good signature from "Kevin Wolf <kwolf@redhat.com>" [full] # Primary key fingerprint: DC3D EB15 9A9A F95D 3D74 56FE 7F09 B272 C88F 2FD6 * tag 'for-upstream' of https://gitlab.com/kmwolf/qemu: io-uring: Resubmit tails of short writes linux-aio: Resubmit tails of short reads/writes linux-aio: Put all parameters into qemu_laiocb block/curl: free s->password in cleanup paths Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
This commit is contained in:
@@ -903,6 +903,7 @@ out_noclean:
|
||||
g_free(s->cookie);
|
||||
g_free(s->url);
|
||||
g_free(s->username);
|
||||
g_free(s->password);
|
||||
g_free(s->proxyusername);
|
||||
g_free(s->proxypassword);
|
||||
if (s->sockets) {
|
||||
@@ -1014,6 +1015,7 @@ static void curl_close(BlockDriverState *bs)
|
||||
g_free(s->cookie);
|
||||
g_free(s->url);
|
||||
g_free(s->username);
|
||||
g_free(s->password);
|
||||
g_free(s->proxyusername);
|
||||
g_free(s->proxypassword);
|
||||
}
|
||||
|
||||
@@ -27,10 +27,10 @@ typedef struct {
|
||||
BdrvRequestFlags flags;
|
||||
|
||||
/*
|
||||
* Buffered reads may require resubmission, see
|
||||
* luring_resubmit_short_read().
|
||||
* Short reads/writes require resubmission, see
|
||||
* luring_resubmit_short_io().
|
||||
*/
|
||||
int total_read;
|
||||
int total_done;
|
||||
QEMUIOVector resubmit_qiov;
|
||||
|
||||
CqeHandler cqe_handler;
|
||||
@@ -40,10 +40,14 @@ static void luring_prep_sqe(struct io_uring_sqe *sqe, void *opaque)
|
||||
{
|
||||
LuringRequest *req = opaque;
|
||||
QEMUIOVector *qiov = req->qiov;
|
||||
uint64_t offset = req->offset;
|
||||
uint64_t offset = req->offset + req->total_done;
|
||||
int fd = req->fd;
|
||||
BdrvRequestFlags flags = req->flags;
|
||||
|
||||
if (req->resubmit_qiov.iov) {
|
||||
qiov = &req->resubmit_qiov;
|
||||
}
|
||||
|
||||
switch (req->type) {
|
||||
case QEMU_AIO_WRITE:
|
||||
{
|
||||
@@ -73,17 +77,12 @@ static void luring_prep_sqe(struct io_uring_sqe *sqe, void *opaque)
|
||||
break;
|
||||
case QEMU_AIO_READ:
|
||||
{
|
||||
if (req->resubmit_qiov.iov != NULL) {
|
||||
qiov = &req->resubmit_qiov;
|
||||
}
|
||||
if (qiov->niov > 1) {
|
||||
io_uring_prep_readv(sqe, fd, qiov->iov, qiov->niov,
|
||||
offset + req->total_read);
|
||||
io_uring_prep_readv(sqe, fd, qiov->iov, qiov->niov, offset);
|
||||
} else {
|
||||
/* The man page says non-vectored is faster than vectored */
|
||||
struct iovec *iov = qiov->iov;
|
||||
io_uring_prep_read(sqe, fd, iov->iov_base, iov->iov_len,
|
||||
offset + req->total_read);
|
||||
io_uring_prep_read(sqe, fd, iov->iov_base, iov->iov_len, offset);
|
||||
}
|
||||
break;
|
||||
}
|
||||
@@ -98,21 +97,26 @@ static void luring_prep_sqe(struct io_uring_sqe *sqe, void *opaque)
|
||||
}
|
||||
|
||||
/**
|
||||
* luring_resubmit_short_read:
|
||||
* luring_resubmit_short_io:
|
||||
*
|
||||
* Short reads are rare but may occur. The remaining read request needs to be
|
||||
* resubmitted.
|
||||
* Short reads and writes are rare but may occur. The remaining request needs
|
||||
* to be resubmitted.
|
||||
*
|
||||
* For example, short reads can be reproduced by a FUSE export deliberately
|
||||
* executing short reads. The tail of short writes is generally resubmitted by
|
||||
* io-uring in the kernel, but if that resubmission encounters an I/O error, the
|
||||
* already submitted portion will be returned as a short write.
|
||||
*/
|
||||
static void luring_resubmit_short_read(LuringRequest *req, int nread)
|
||||
static void luring_resubmit_short_io(LuringRequest *req, int ndone)
|
||||
{
|
||||
QEMUIOVector *resubmit_qiov;
|
||||
size_t remaining;
|
||||
|
||||
trace_luring_resubmit_short_read(req, nread);
|
||||
trace_luring_resubmit_short_io(req, ndone);
|
||||
|
||||
/* Update read position */
|
||||
req->total_read += nread;
|
||||
remaining = req->qiov->size - req->total_read;
|
||||
/* Update I/O position */
|
||||
req->total_done += ndone;
|
||||
remaining = req->qiov->size - req->total_done;
|
||||
|
||||
/* Shorten qiov */
|
||||
resubmit_qiov = &req->resubmit_qiov;
|
||||
@@ -121,7 +125,7 @@ static void luring_resubmit_short_read(LuringRequest *req, int nread)
|
||||
} else {
|
||||
qemu_iovec_reset(resubmit_qiov);
|
||||
}
|
||||
qemu_iovec_concat(resubmit_qiov, req->qiov, req->total_read, remaining);
|
||||
qemu_iovec_concat(resubmit_qiov, req->qiov, req->total_done, remaining);
|
||||
|
||||
aio_add_sqe(luring_prep_sqe, req, &req->cqe_handler);
|
||||
}
|
||||
@@ -153,31 +157,35 @@ static void luring_cqe_handler(CqeHandler *cqe_handler)
|
||||
return;
|
||||
}
|
||||
} else if (req->qiov) {
|
||||
/* total_read is non-zero only for resubmitted read requests */
|
||||
int total_bytes = ret + req->total_read;
|
||||
/* total_done is non-zero only for resubmitted requests */
|
||||
int total_bytes = ret + req->total_done;
|
||||
|
||||
if (total_bytes == req->qiov->size) {
|
||||
ret = 0;
|
||||
} else {
|
||||
} else if (ret > 0 && (req->type == QEMU_AIO_READ ||
|
||||
req->type == QEMU_AIO_WRITE)) {
|
||||
/* Short Read/Write */
|
||||
if (req->type == QEMU_AIO_READ) {
|
||||
if (ret > 0) {
|
||||
luring_resubmit_short_read(req, ret);
|
||||
return;
|
||||
}
|
||||
|
||||
/* Pad with zeroes */
|
||||
qemu_iovec_memset(req->qiov, total_bytes, 0,
|
||||
req->qiov->size - total_bytes);
|
||||
ret = 0;
|
||||
} else {
|
||||
ret = -ENOSPC;
|
||||
}
|
||||
luring_resubmit_short_io(req, ret);
|
||||
return;
|
||||
} else if (req->type == QEMU_AIO_READ) {
|
||||
/* Read ret == 0: EOF, pad with zeroes */
|
||||
qemu_iovec_memset(req->qiov, total_bytes, 0,
|
||||
req->qiov->size - total_bytes);
|
||||
ret = 0;
|
||||
} else {
|
||||
/*
|
||||
* Normal write ret == 0 means ENOSPC.
|
||||
* For zone-append, we treat any 0 <= ret < qiov->size as ENOSPC,
|
||||
* too, because resubmitting the tail seems a little unsafe.
|
||||
*/
|
||||
ret = -ENOSPC;
|
||||
}
|
||||
}
|
||||
|
||||
req->ret = ret;
|
||||
qemu_iovec_destroy(&req->resubmit_qiov);
|
||||
if (req->resubmit_qiov.iov) {
|
||||
qemu_iovec_destroy(&req->resubmit_qiov);
|
||||
}
|
||||
|
||||
/*
|
||||
* If the coroutine is already entered it must be in luring_co_submit() and
|
||||
|
||||
@@ -41,9 +41,19 @@ struct qemu_laiocb {
|
||||
LinuxAioState *ctx;
|
||||
struct iocb iocb;
|
||||
ssize_t ret;
|
||||
off_t offset;
|
||||
size_t nbytes;
|
||||
QEMUIOVector *qiov;
|
||||
bool is_read;
|
||||
|
||||
/* For handling short reads/writes */
|
||||
size_t total_done;
|
||||
QEMUIOVector resubmit_qiov;
|
||||
|
||||
int fd;
|
||||
int type;
|
||||
BdrvRequestFlags flags;
|
||||
|
||||
uint64_t dev_max_batch;
|
||||
QSIMPLEQ_ENTRY(qemu_laiocb) next;
|
||||
};
|
||||
|
||||
@@ -68,28 +78,61 @@ struct LinuxAioState {
|
||||
};
|
||||
|
||||
static void ioq_submit(LinuxAioState *s);
|
||||
static int laio_do_submit(struct qemu_laiocb *laiocb);
|
||||
|
||||
static inline ssize_t io_event_ret(struct io_event *ev)
|
||||
{
|
||||
return (ssize_t)(((uint64_t)ev->res2 << 32) | ev->res);
|
||||
}
|
||||
|
||||
/**
|
||||
* Retry tail of short requests.
|
||||
*/
|
||||
static int laio_resubmit_short_io(struct qemu_laiocb *laiocb, size_t done)
|
||||
{
|
||||
QEMUIOVector *resubmit_qiov = &laiocb->resubmit_qiov;
|
||||
|
||||
laiocb->total_done += done;
|
||||
|
||||
if (!resubmit_qiov->iov) {
|
||||
qemu_iovec_init(resubmit_qiov, laiocb->qiov->niov);
|
||||
} else {
|
||||
qemu_iovec_reset(resubmit_qiov);
|
||||
}
|
||||
qemu_iovec_concat(resubmit_qiov, laiocb->qiov,
|
||||
laiocb->total_done, laiocb->nbytes - laiocb->total_done);
|
||||
|
||||
return laio_do_submit(laiocb);
|
||||
}
|
||||
|
||||
/*
|
||||
* Completes an AIO request.
|
||||
*/
|
||||
static void qemu_laio_process_completion(struct qemu_laiocb *laiocb)
|
||||
{
|
||||
int ret;
|
||||
ssize_t ret;
|
||||
|
||||
ret = laiocb->ret;
|
||||
if (ret != -ECANCELED) {
|
||||
if (ret == laiocb->nbytes) {
|
||||
if (ret == laiocb->nbytes - laiocb->total_done) {
|
||||
ret = 0;
|
||||
} else if (ret > 0 && (laiocb->type == QEMU_AIO_READ ||
|
||||
laiocb->type == QEMU_AIO_WRITE)) {
|
||||
ret = laio_resubmit_short_io(laiocb, ret);
|
||||
if (!ret) {
|
||||
return;
|
||||
}
|
||||
} else if (ret >= 0) {
|
||||
/* Short reads mean EOF, pad with zeros. */
|
||||
if (laiocb->is_read) {
|
||||
qemu_iovec_memset(laiocb->qiov, ret, 0,
|
||||
laiocb->qiov->size - ret);
|
||||
/*
|
||||
* For normal reads and writes, we only get here if ret == 0, which
|
||||
* means EOF for reads and ENOSPC for writes.
|
||||
* For zone-append, we get here with any ret >= 0, which we just
|
||||
* treat as ENOSPC, too (safer than resubmitting, probably, but not
|
||||
* 100 % clear).
|
||||
*/
|
||||
if (laiocb->type == QEMU_AIO_READ) {
|
||||
qemu_iovec_memset(laiocb->qiov, laiocb->total_done, 0,
|
||||
laiocb->qiov->size - laiocb->total_done);
|
||||
} else {
|
||||
ret = -ENOSPC;
|
||||
}
|
||||
@@ -97,6 +140,9 @@ static void qemu_laio_process_completion(struct qemu_laiocb *laiocb)
|
||||
}
|
||||
|
||||
laiocb->ret = ret;
|
||||
if (laiocb->resubmit_qiov.iov) {
|
||||
qemu_iovec_destroy(&laiocb->resubmit_qiov);
|
||||
}
|
||||
|
||||
/*
|
||||
* If the coroutine is already entered it must be in ioq_submit() and
|
||||
@@ -367,23 +413,27 @@ static void laio_deferred_fn(void *opaque)
|
||||
}
|
||||
}
|
||||
|
||||
static int laio_do_submit(int fd, struct qemu_laiocb *laiocb, off_t offset,
|
||||
int type, BdrvRequestFlags flags,
|
||||
uint64_t dev_max_batch)
|
||||
static int laio_do_submit(struct qemu_laiocb *laiocb)
|
||||
{
|
||||
LinuxAioState *s = laiocb->ctx;
|
||||
struct iocb *iocbs = &laiocb->iocb;
|
||||
QEMUIOVector *qiov = laiocb->qiov;
|
||||
int fd = laiocb->fd;
|
||||
off_t offset = laiocb->offset + laiocb->total_done;
|
||||
|
||||
switch (type) {
|
||||
if (laiocb->resubmit_qiov.iov) {
|
||||
qiov = &laiocb->resubmit_qiov;
|
||||
}
|
||||
|
||||
switch (laiocb->type) {
|
||||
case QEMU_AIO_WRITE:
|
||||
#ifdef HAVE_IO_PREP_PWRITEV2
|
||||
{
|
||||
int laio_flags = (flags & BDRV_REQ_FUA) ? RWF_DSYNC : 0;
|
||||
int laio_flags = (laiocb->flags & BDRV_REQ_FUA) ? RWF_DSYNC : 0;
|
||||
io_prep_pwritev2(iocbs, fd, qiov->iov, qiov->niov, offset, laio_flags);
|
||||
}
|
||||
#else
|
||||
assert(flags == 0);
|
||||
assert(laiocb->flags == 0);
|
||||
io_prep_pwritev(iocbs, fd, qiov->iov, qiov->niov, offset);
|
||||
#endif
|
||||
break;
|
||||
@@ -399,7 +449,7 @@ static int laio_do_submit(int fd, struct qemu_laiocb *laiocb, off_t offset,
|
||||
/* Currently Linux kernel does not support other operations */
|
||||
default:
|
||||
fprintf(stderr, "%s: invalid AIO request type 0x%x.\n",
|
||||
__func__, type);
|
||||
__func__, laiocb->type);
|
||||
return -EIO;
|
||||
}
|
||||
io_set_eventfd(&laiocb->iocb, event_notifier_get_fd(&s->e));
|
||||
@@ -407,7 +457,7 @@ static int laio_do_submit(int fd, struct qemu_laiocb *laiocb, off_t offset,
|
||||
QSIMPLEQ_INSERT_TAIL(&s->io_q.pending, laiocb, next);
|
||||
s->io_q.in_queue++;
|
||||
if (!s->io_q.blocked) {
|
||||
if (s->io_q.in_queue >= laio_max_batch(s, dev_max_batch)) {
|
||||
if (s->io_q.in_queue >= laio_max_batch(s, laiocb->dev_max_batch)) {
|
||||
ioq_submit(s);
|
||||
} else {
|
||||
defer_call(laio_deferred_fn, s);
|
||||
@@ -425,14 +475,18 @@ int coroutine_fn laio_co_submit(int fd, uint64_t offset, QEMUIOVector *qiov,
|
||||
AioContext *ctx = qemu_get_current_aio_context();
|
||||
struct qemu_laiocb laiocb = {
|
||||
.co = qemu_coroutine_self(),
|
||||
.offset = offset,
|
||||
.nbytes = qiov ? qiov->size : 0,
|
||||
.ctx = aio_get_linux_aio(ctx),
|
||||
.ret = -EINPROGRESS,
|
||||
.is_read = (type == QEMU_AIO_READ),
|
||||
.qiov = qiov,
|
||||
.fd = fd,
|
||||
.type = type,
|
||||
.flags = flags,
|
||||
.dev_max_batch = dev_max_batch,
|
||||
};
|
||||
|
||||
ret = laio_do_submit(fd, &laiocb, offset, type, flags, dev_max_batch);
|
||||
ret = laio_do_submit(&laiocb);
|
||||
if (ret < 0) {
|
||||
return ret;
|
||||
}
|
||||
|
||||
@@ -64,7 +64,7 @@ file_paio_submit(void *acb, void *opaque, int64_t offset, int count, int type) "
|
||||
# io_uring.c
|
||||
luring_cqe_handler(void *req, int ret) "req %p ret %d"
|
||||
luring_co_submit(void *bs, void *req, int fd, uint64_t offset, size_t nbytes, int type) "bs %p req %p fd %d offset %" PRId64 " nbytes %zd type %d"
|
||||
luring_resubmit_short_read(void *req, int nread) "req %p nread %d"
|
||||
luring_resubmit_short_io(void *req, int ndone) "req %p ndone %d"
|
||||
|
||||
# qcow2.c
|
||||
qcow2_add_task(void *co, void *bs, void *pool, const char *action, int cluster_type, uint64_t host_offset, uint64_t offset, uint64_t bytes, void *qiov, size_t qiov_offset) "co %p bs %p pool %p: %s: cluster_type %d file_cluster_offset %" PRIu64 " offset %" PRIu64 " bytes %" PRIu64 " qiov %p qiov_offset %zu"
|
||||
|
||||
Reference in New Issue
Block a user