Merge tag 'for-upstream' of https://gitlab.com/kmwolf/qemu into staging

Block layer patches - linux-aio/io-uring: Resubmit tails of short reads/writes - curl: Fix memory leak # -----BEGIN PGP SIGNATURE----- # # iQJFBAABCgAvFiEE3D3rFZqa+V09dFb+fwmycsiPL9YFAmnC3hgRHGt3b2xmQHJl # ZGhhdC5jb20ACgkQfwmycsiPL9Ygxw//bxGIqnT2r9dSz8PzDfSGID21Ido7ypYP # bUKUIPXtho85v6IbRCzKkgLXezmpoI/cbcsvnVmFbZW6Ab8ZSZxyEsZNCOQkxL34 # TwwWVI1pXtwXcfJFTb2EhE0HpHa2YQQ7N4Mg0RTcPqctywcjYs2Fz/O3cDstTEYx # gkwpORdt4KJ4dbaRbm3UbDGzJyjWs8bZd+m7kXaJCCE1vZsmO3p1zbIkTGXt+PQJ # Rmcg0O8kAEejTIEctOX0BYgyvCxYmhfOzDmmZwAVrlKmaeemsE8umRUCvmXAimu5 # JuKxgNJgag8xbVO41nykU0qwh0uerCOSiOwAXwh4U23MY/zBnPsbI0W985XbXCyB # DAs2bs/GHb/z8kjhV/GzrBxpol9k2wdecy2Mgkihe4qEhffyxUsj4cz+XkN0CAZj # /vZjCJO4FDN2zQqTQfwttU9A8pmkT9YLBdHOEMkTEWxvDjwIS8heBvB1EABW0F4J # Bhy6z87jiWHILxdnMrmZ61UasN1GgP/fdbtFP5bhXN1LzOekfLYyfRb1B9Imwx67 # d1K3XsW2BD/ETQByUpiC/nouE6LWe5afJhKvTfg+y2L1CgAtw4jBBqpoQfALeYhv # n2aCd69TJvarlAY9Rv2CjbQxUuouFxPzrjoS12AKW5on4iQCdCVHCywqlQIMTBbl # kwzdLdUAEGw= # =HiKg # -----END PGP SIGNATURE----- # gpg: Signature made Tue Mar 24 18:55:20 2026 GMT # gpg: using RSA key DC3DEB159A9AF95D3D7456FE7F09B272C88F2FD6 # gpg: issuer "kwolf@redhat.com" # gpg: Good signature from "Kevin Wolf <kwolf@redhat.com>" [full] # Primary key fingerprint: DC3D EB15 9A9A F95D 3D74 56FE 7F09 B272 C88F 2FD6 * tag 'for-upstream' of https://gitlab.com/kmwolf/qemu: io-uring: Resubmit tails of short writes linux-aio: Resubmit tails of short reads/writes linux-aio: Put all parameters into qemu_laiocb block/curl: free s->password in cleanup paths Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2026-04-05 22:00:58 +00:00 · 2026-03-25 09:16:13 +00:00
parent 512b794b10 cf9cdaea6e
commit 007b29752e
4 changed files with 119 additions and 55 deletions
--- a/block/curl.c
+++ b/block/curl.c
@@ -903,6 +903,7 @@ out_noclean:
    g_free(s->cookie);
    g_free(s->url);
    g_free(s->username);
+    g_free(s->password);
    g_free(s->proxyusername);
    g_free(s->proxypassword);
    if (s->sockets) {
@@ -1014,6 +1015,7 @@ static void curl_close(BlockDriverState *bs)
    g_free(s->cookie);
    g_free(s->url);
    g_free(s->username);
+    g_free(s->password);
    g_free(s->proxyusername);
    g_free(s->proxypassword);
 }
--- a/block/io_uring.c
+++ b/block/io_uring.c
@@ -27,10 +27,10 @@ typedef struct {
    BdrvRequestFlags flags;

    /*
-     * Buffered reads may require resubmission, see
-     * luring_resubmit_short_read().
+     * Short reads/writes require resubmission, see
+     * luring_resubmit_short_io().
     */
-    int total_read;
+    int total_done;
    QEMUIOVector resubmit_qiov;

    CqeHandler cqe_handler;
@@ -40,10 +40,14 @@ static void luring_prep_sqe(struct io_uring_sqe *sqe, void *opaque)
 {
    LuringRequest *req = opaque;
    QEMUIOVector *qiov = req->qiov;
-    uint64_t offset = req->offset;
+    uint64_t offset = req->offset + req->total_done;
    int fd = req->fd;
    BdrvRequestFlags flags = req->flags;

+    if (req->resubmit_qiov.iov) {
+        qiov = &req->resubmit_qiov;
+    }
+
    switch (req->type) {
    case QEMU_AIO_WRITE:
    {
@@ -73,17 +77,12 @@ static void luring_prep_sqe(struct io_uring_sqe *sqe, void *opaque)
        break;
    case QEMU_AIO_READ:
    {
-        if (req->resubmit_qiov.iov != NULL) {
-            qiov = &req->resubmit_qiov;
-        }
        if (qiov->niov > 1) {
-            io_uring_prep_readv(sqe, fd, qiov->iov, qiov->niov,
-                                offset + req->total_read);
+            io_uring_prep_readv(sqe, fd, qiov->iov, qiov->niov, offset);
        } else {
            /* The man page says non-vectored is faster than vectored */
            struct iovec *iov = qiov->iov;
-            io_uring_prep_read(sqe, fd, iov->iov_base, iov->iov_len,
-                               offset + req->total_read);
+            io_uring_prep_read(sqe, fd, iov->iov_base, iov->iov_len, offset);
        }
        break;
    }
@@ -98,21 +97,26 @@ static void luring_prep_sqe(struct io_uring_sqe *sqe, void *opaque)
 }

 /**
- * luring_resubmit_short_read:
+ * luring_resubmit_short_io:
 *
- * Short reads are rare but may occur. The remaining read request needs to be
- * resubmitted.
+ * Short reads and writes are rare but may occur.  The remaining request needs
+ * to be resubmitted.
+ *
+ * For example, short reads can be reproduced by a FUSE export deliberately
+ * executing short reads.  The tail of short writes is generally resubmitted by
+ * io-uring in the kernel, but if that resubmission encounters an I/O error, the
+ * already submitted portion will be returned as a short write.
 */
-static void luring_resubmit_short_read(LuringRequest *req, int nread)
+static void luring_resubmit_short_io(LuringRequest *req, int ndone)
 {
    QEMUIOVector *resubmit_qiov;
    size_t remaining;

-    trace_luring_resubmit_short_read(req, nread);
+    trace_luring_resubmit_short_io(req, ndone);

-    /* Update read position */
-    req->total_read += nread;
-    remaining = req->qiov->size - req->total_read;
+    /* Update I/O position */
+    req->total_done += ndone;
+    remaining = req->qiov->size - req->total_done;

    /* Shorten qiov */
    resubmit_qiov = &req->resubmit_qiov;
@@ -121,7 +125,7 @@ static void luring_resubmit_short_read(LuringRequest *req, int nread)
    } else {
        qemu_iovec_reset(resubmit_qiov);
    }
-    qemu_iovec_concat(resubmit_qiov, req->qiov, req->total_read, remaining);
+    qemu_iovec_concat(resubmit_qiov, req->qiov, req->total_done, remaining);

    aio_add_sqe(luring_prep_sqe, req, &req->cqe_handler);
 }
@@ -153,31 +157,35 @@ static void luring_cqe_handler(CqeHandler *cqe_handler)
            return;
        }
    } else if (req->qiov) {
-        /* total_read is non-zero only for resubmitted read requests */
-        int total_bytes = ret + req->total_read;
+        /* total_done is non-zero only for resubmitted requests */
+        int total_bytes = ret + req->total_done;

        if (total_bytes == req->qiov->size) {
            ret = 0;
-        } else {
+        } else if (ret > 0 && (req->type == QEMU_AIO_READ ||
+                               req->type == QEMU_AIO_WRITE)) {
            /* Short Read/Write */
-            if (req->type == QEMU_AIO_READ) {
-                if (ret > 0) {
-                    luring_resubmit_short_read(req, ret);
-                    return;
-                }
-
-                /* Pad with zeroes */
-                qemu_iovec_memset(req->qiov, total_bytes, 0,
-                                  req->qiov->size - total_bytes);
-                ret = 0;
-            } else {
-                ret = -ENOSPC;
-            }
+            luring_resubmit_short_io(req, ret);
+            return;
+        } else if (req->type == QEMU_AIO_READ) {
+            /* Read ret == 0: EOF, pad with zeroes */
+            qemu_iovec_memset(req->qiov, total_bytes, 0,
+                              req->qiov->size - total_bytes);
+            ret = 0;
+        } else {
+            /*
+             * Normal write ret == 0 means ENOSPC.
+             * For zone-append, we treat any 0 <= ret < qiov->size as ENOSPC,
+             * too, because resubmitting the tail seems a little unsafe.
+             */
+            ret = -ENOSPC;
        }
    }

    req->ret = ret;
-    qemu_iovec_destroy(&req->resubmit_qiov);
+    if (req->resubmit_qiov.iov) {
+        qemu_iovec_destroy(&req->resubmit_qiov);
+    }

    /*
     * If the coroutine is already entered it must be in luring_co_submit() and
--- a/block/linux-aio.c
+++ b/block/linux-aio.c
@@ -41,9 +41,19 @@ struct qemu_laiocb {
    LinuxAioState *ctx;
    struct iocb iocb;
    ssize_t ret;
+    off_t offset;
    size_t nbytes;
    QEMUIOVector *qiov;
-    bool is_read;
+
+    /* For handling short reads/writes */
+    size_t total_done;
+    QEMUIOVector resubmit_qiov;
+
+    int fd;
+    int type;
+    BdrvRequestFlags flags;
+
+    uint64_t dev_max_batch;
    QSIMPLEQ_ENTRY(qemu_laiocb) next;
 };

@@ -68,28 +78,61 @@ struct LinuxAioState {
 };

 static void ioq_submit(LinuxAioState *s);
+static int laio_do_submit(struct qemu_laiocb *laiocb);

 static inline ssize_t io_event_ret(struct io_event *ev)
 {
    return (ssize_t)(((uint64_t)ev->res2 << 32) | ev->res);
 }

+/**
+ * Retry tail of short requests.
+ */
+static int laio_resubmit_short_io(struct qemu_laiocb *laiocb, size_t done)
+{
+    QEMUIOVector *resubmit_qiov = &laiocb->resubmit_qiov;
+
+    laiocb->total_done += done;
+
+    if (!resubmit_qiov->iov) {
+        qemu_iovec_init(resubmit_qiov, laiocb->qiov->niov);
+    } else {
+        qemu_iovec_reset(resubmit_qiov);
+    }
+    qemu_iovec_concat(resubmit_qiov, laiocb->qiov,
+                      laiocb->total_done, laiocb->nbytes - laiocb->total_done);
+
+    return laio_do_submit(laiocb);
+}
+
 /*
 * Completes an AIO request.
 */
 static void qemu_laio_process_completion(struct qemu_laiocb *laiocb)
 {
-    int ret;
+    ssize_t ret;

    ret = laiocb->ret;
    if (ret != -ECANCELED) {
-        if (ret == laiocb->nbytes) {
+        if (ret == laiocb->nbytes - laiocb->total_done) {
            ret = 0;
+        } else if (ret > 0 && (laiocb->type == QEMU_AIO_READ ||
+                               laiocb->type == QEMU_AIO_WRITE)) {
+            ret = laio_resubmit_short_io(laiocb, ret);
+            if (!ret) {
+                return;
+            }
        } else if (ret >= 0) {
-            /* Short reads mean EOF, pad with zeros. */
-            if (laiocb->is_read) {
-                qemu_iovec_memset(laiocb->qiov, ret, 0,
-                    laiocb->qiov->size - ret);
+            /*
+             * For normal reads and writes, we only get here if ret == 0, which
+             * means EOF for reads and ENOSPC for writes.
+             * For zone-append, we get here with any ret >= 0, which we just
+             * treat as ENOSPC, too (safer than resubmitting, probably, but not
+             * 100 % clear).
+             */
+            if (laiocb->type == QEMU_AIO_READ) {
+                qemu_iovec_memset(laiocb->qiov, laiocb->total_done, 0,
+                                  laiocb->qiov->size - laiocb->total_done);
            } else {
                ret = -ENOSPC;
            }
@@ -97,6 +140,9 @@ static void qemu_laio_process_completion(struct qemu_laiocb *laiocb)
    }

    laiocb->ret = ret;
+    if (laiocb->resubmit_qiov.iov) {
+        qemu_iovec_destroy(&laiocb->resubmit_qiov);
+    }

    /*
     * If the coroutine is already entered it must be in ioq_submit() and
@@ -367,23 +413,27 @@ static void laio_deferred_fn(void *opaque)
    }
 }

-static int laio_do_submit(int fd, struct qemu_laiocb *laiocb, off_t offset,
-                          int type, BdrvRequestFlags flags,
-                          uint64_t dev_max_batch)
+static int laio_do_submit(struct qemu_laiocb *laiocb)
 {
    LinuxAioState *s = laiocb->ctx;
    struct iocb *iocbs = &laiocb->iocb;
    QEMUIOVector *qiov = laiocb->qiov;
+    int fd = laiocb->fd;
+    off_t offset = laiocb->offset + laiocb->total_done;

-    switch (type) {
+    if (laiocb->resubmit_qiov.iov) {
+        qiov = &laiocb->resubmit_qiov;
+    }
+
+    switch (laiocb->type) {
    case QEMU_AIO_WRITE:
 #ifdef HAVE_IO_PREP_PWRITEV2
    {
-        int laio_flags = (flags & BDRV_REQ_FUA) ? RWF_DSYNC : 0;
+        int laio_flags = (laiocb->flags & BDRV_REQ_FUA) ? RWF_DSYNC : 0;
        io_prep_pwritev2(iocbs, fd, qiov->iov, qiov->niov, offset, laio_flags);
    }
 #else
-        assert(flags == 0);
+        assert(laiocb->flags == 0);
        io_prep_pwritev(iocbs, fd, qiov->iov, qiov->niov, offset);
 #endif
        break;
@@ -399,7 +449,7 @@ static int laio_do_submit(int fd, struct qemu_laiocb *laiocb, off_t offset,
    /* Currently Linux kernel does not support other operations */
    default:
        fprintf(stderr, "%s: invalid AIO request type 0x%x.\n",
-                        __func__, type);
+                        __func__, laiocb->type);
        return -EIO;
    }
    io_set_eventfd(&laiocb->iocb, event_notifier_get_fd(&s->e));
@@ -407,7 +457,7 @@ static int laio_do_submit(int fd, struct qemu_laiocb *laiocb, off_t offset,
    QSIMPLEQ_INSERT_TAIL(&s->io_q.pending, laiocb, next);
    s->io_q.in_queue++;
    if (!s->io_q.blocked) {
-        if (s->io_q.in_queue >= laio_max_batch(s, dev_max_batch)) {
+        if (s->io_q.in_queue >= laio_max_batch(s, laiocb->dev_max_batch)) {
            ioq_submit(s);
        } else {
            defer_call(laio_deferred_fn, s);
@@ -425,14 +475,18 @@ int coroutine_fn laio_co_submit(int fd, uint64_t offset, QEMUIOVector *qiov,
    AioContext *ctx = qemu_get_current_aio_context();
    struct qemu_laiocb laiocb = {
        .co         = qemu_coroutine_self(),
+        .offset     = offset,
        .nbytes     = qiov ? qiov->size : 0,
        .ctx        = aio_get_linux_aio(ctx),
        .ret        = -EINPROGRESS,
-        .is_read    = (type == QEMU_AIO_READ),
        .qiov       = qiov,
+        .fd         = fd,
+        .type       = type,
+        .flags      = flags,
+        .dev_max_batch = dev_max_batch,
    };

-    ret = laio_do_submit(fd, &laiocb, offset, type, flags, dev_max_batch);
+    ret = laio_do_submit(&laiocb);
    if (ret < 0) {
        return ret;
    }
--- a/block/trace-events
+++ b/block/trace-events
@@ -64,7 +64,7 @@ file_paio_submit(void *acb, void *opaque, int64_t offset, int count, int type) "
 # io_uring.c
 luring_cqe_handler(void *req, int ret) "req %p ret %d"
 luring_co_submit(void *bs, void *req, int fd, uint64_t offset, size_t nbytes, int type) "bs %p req %p fd %d offset %" PRId64 " nbytes %zd type %d"
-luring_resubmit_short_read(void *req, int nread) "req %p nread %d"
+luring_resubmit_short_io(void *req, int ndone) "req %p ndone %d"

 # qcow2.c
 qcow2_add_task(void *co, void *bs, void *pool, const char *action, int cluster_type, uint64_t host_offset, uint64_t offset, uint64_t bytes, void *qiov, size_t qiov_offset) "co %p bs %p pool %p: %s: cluster_type %d file_cluster_offset %" PRIu64 " offset %" PRIu64 " bytes %" PRIu64 " qiov %p qiov_offset %zu"