mirror of
https://github.com/qemu/qemu.git
synced 2026-04-05 22:00:58 +00:00
linux-aio: Resubmit tails of short reads/writes
Short reads/writes can happen. One way to reproduce them is via our
FUSE export, with the following diff applied (%s/escaped // to apply --
if you put plain diffs in commit messages, git-am will apply them, and I
would rather avoid breaking FUSE accidentally via this patch):
escaped diff --git a/block/export/fuse.c b/block/export/fuse.c
escaped index a2a478d293..67dc50a412 100644
escaped --- a/block/export/fuse.c
escaped +++ b/block/export/fuse.c
@@ -828,7 +828,7 @@ static ssize_t coroutine_fn GRAPH_RDLOCK
fuse_co_init(FuseExport *exp, struct fuse_init_out *out,
const struct fuse_init_in_compat *in)
{
- const uint32_t supported_flags = FUSE_ASYNC_READ | FUSE_ASYNC_DIO;
+ const uint32_t supported_flags = FUSE_ASYNC_READ;
if (in->major != 7) {
error_report("FUSE major version mismatch: We have 7, but kernel has %"
@@ -1060,6 +1060,8 @@ fuse_co_read(FuseExport *exp, void **bufptr, uint64_t offset, uint32_t size)
void *buf;
int ret;
+ size = MIN(size, 4096);
+
/* Limited by max_read, should not happen */
if (size > FUSE_MAX_READ_BYTES) {
return -EINVAL;
@@ -1110,6 +1112,8 @@ fuse_co_write(FuseExport *exp, struct fuse_write_out *out,
int64_t blk_len;
int ret;
+ size = MIN(size, 4096);
+
QEMU_BUILD_BUG_ON(FUSE_MAX_WRITE_BYTES > BDRV_REQUEST_MAX_BYTES);
/* Limited by max_write, should not happen */
if (size > FUSE_MAX_WRITE_BYTES) {
Then:
$ ./qemu-img create -f raw test.raw 8k
Formatting 'test.raw', fmt=raw size=8192
$ ./qemu-io -f raw -c 'write -P 42 0 8k' test.raw
wrote 8192/8192 bytes at offset 0
8 KiB, 1 ops; 00.00 sec (64.804 MiB/sec and 8294.9003 ops/sec)
$ hexdump -C test.raw
00000000 2a 2a 2a 2a 2a 2a 2a 2a 2a 2a 2a 2a 2a 2a 2a 2a |****************|
*
00002000
With aio=threads, short I/O works:
$ storage-daemon/qemu-storage-daemon \
--blockdev file,node-name=test,filename=test.raw \
--export fuse,id=exp,node-name=test,mountpoint=test.raw,writable=true
Other shell:
$ ./qemu-io --image-opts -c 'read -P 42 0 8k' \
driver=file,filename=test.raw,cache.direct=on,aio=threads
read 8192/8192 bytes at offset 0
8 KiB, 1 ops; 00.00 sec (36.563 MiB/sec and 4680.0923 ops/sec)
$ ./qemu-io --image-opts -c 'write -P 23 0 8k' \
driver=file,filename=test.raw,cache.direct=on,aio=threads
wrote 8192/8192 bytes at offset 0
8 KiB, 1 ops; 00.00 sec (35.995 MiB/sec and 4607.2970 ops/sec)
$ hexdump -C test.raw
00000000 17 17 17 17 17 17 17 17 17 17 17 17 17 17 17 17 |................|
*
00002000
But with aio=native, it does not:
$ ./qemu-io --image-opts -c 'read -P 23 0 8k' \
driver=file,filename=test.raw,cache.direct=on,aio=native
Pattern verification failed at offset 0, 8192 bytes
read 8192/8192 bytes at offset 0
8 KiB, 1 ops; 00.00 sec (86.155 MiB/sec and 11027.7900 ops/sec)
$ ./qemu-io --image-opts -c 'write -P 42 0 8k' \
driver=file,filename=test.raw,cache.direct=on,aio=native
write failed: No space left on device
$ hexdump -C test.raw
00000000 2a 2a 2a 2a 2a 2a 2a 2a 2a 2a 2a 2a 2a 2a 2a 2a |****************|
*
00001000 17 17 17 17 17 17 17 17 17 17 17 17 17 17 17 17 |................|
*
00002000
This patch fixes that.
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
Signed-off-by: Hanna Czenczek <hreitz@redhat.com>
Message-ID: <20260324084338.37453-3-hreitz@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
This commit is contained in:
committed by
Kevin Wolf
parent
cc03b62df4
commit
7eca3d4883
@@ -45,6 +45,10 @@ struct qemu_laiocb {
|
||||
size_t nbytes;
|
||||
QEMUIOVector *qiov;
|
||||
|
||||
/* For handling short reads/writes */
|
||||
size_t total_done;
|
||||
QEMUIOVector resubmit_qiov;
|
||||
|
||||
int fd;
|
||||
int type;
|
||||
BdrvRequestFlags flags;
|
||||
@@ -74,28 +78,61 @@ struct LinuxAioState {
|
||||
};
|
||||
|
||||
static void ioq_submit(LinuxAioState *s);
|
||||
static int laio_do_submit(struct qemu_laiocb *laiocb);
|
||||
|
||||
static inline ssize_t io_event_ret(struct io_event *ev)
|
||||
{
|
||||
return (ssize_t)(((uint64_t)ev->res2 << 32) | ev->res);
|
||||
}
|
||||
|
||||
/**
|
||||
* Retry tail of short requests.
|
||||
*/
|
||||
static int laio_resubmit_short_io(struct qemu_laiocb *laiocb, size_t done)
|
||||
{
|
||||
QEMUIOVector *resubmit_qiov = &laiocb->resubmit_qiov;
|
||||
|
||||
laiocb->total_done += done;
|
||||
|
||||
if (!resubmit_qiov->iov) {
|
||||
qemu_iovec_init(resubmit_qiov, laiocb->qiov->niov);
|
||||
} else {
|
||||
qemu_iovec_reset(resubmit_qiov);
|
||||
}
|
||||
qemu_iovec_concat(resubmit_qiov, laiocb->qiov,
|
||||
laiocb->total_done, laiocb->nbytes - laiocb->total_done);
|
||||
|
||||
return laio_do_submit(laiocb);
|
||||
}
|
||||
|
||||
/*
|
||||
* Completes an AIO request.
|
||||
*/
|
||||
static void qemu_laio_process_completion(struct qemu_laiocb *laiocb)
|
||||
{
|
||||
int ret;
|
||||
ssize_t ret;
|
||||
|
||||
ret = laiocb->ret;
|
||||
if (ret != -ECANCELED) {
|
||||
if (ret == laiocb->nbytes) {
|
||||
if (ret == laiocb->nbytes - laiocb->total_done) {
|
||||
ret = 0;
|
||||
} else if (ret > 0 && (laiocb->type == QEMU_AIO_READ ||
|
||||
laiocb->type == QEMU_AIO_WRITE)) {
|
||||
ret = laio_resubmit_short_io(laiocb, ret);
|
||||
if (!ret) {
|
||||
return;
|
||||
}
|
||||
} else if (ret >= 0) {
|
||||
/* Short reads mean EOF, pad with zeros. */
|
||||
/*
|
||||
* For normal reads and writes, we only get here if ret == 0, which
|
||||
* means EOF for reads and ENOSPC for writes.
|
||||
* For zone-append, we get here with any ret >= 0, which we just
|
||||
* treat as ENOSPC, too (safer than resubmitting, probably, but not
|
||||
* 100 % clear).
|
||||
*/
|
||||
if (laiocb->type == QEMU_AIO_READ) {
|
||||
qemu_iovec_memset(laiocb->qiov, ret, 0,
|
||||
laiocb->qiov->size - ret);
|
||||
qemu_iovec_memset(laiocb->qiov, laiocb->total_done, 0,
|
||||
laiocb->qiov->size - laiocb->total_done);
|
||||
} else {
|
||||
ret = -ENOSPC;
|
||||
}
|
||||
@@ -103,6 +140,9 @@ static void qemu_laio_process_completion(struct qemu_laiocb *laiocb)
|
||||
}
|
||||
|
||||
laiocb->ret = ret;
|
||||
if (laiocb->resubmit_qiov.iov) {
|
||||
qemu_iovec_destroy(&laiocb->resubmit_qiov);
|
||||
}
|
||||
|
||||
/*
|
||||
* If the coroutine is already entered it must be in ioq_submit() and
|
||||
@@ -379,7 +419,11 @@ static int laio_do_submit(struct qemu_laiocb *laiocb)
|
||||
struct iocb *iocbs = &laiocb->iocb;
|
||||
QEMUIOVector *qiov = laiocb->qiov;
|
||||
int fd = laiocb->fd;
|
||||
off_t offset = laiocb->offset;
|
||||
off_t offset = laiocb->offset + laiocb->total_done;
|
||||
|
||||
if (laiocb->resubmit_qiov.iov) {
|
||||
qiov = &laiocb->resubmit_qiov;
|
||||
}
|
||||
|
||||
switch (laiocb->type) {
|
||||
case QEMU_AIO_WRITE:
|
||||
|
||||
Reference in New Issue
Block a user