block: add bdrv_graph_wrlock_drained() convenience wrapper

Many write-locked sections are also drained sections. A new
bdrv_graph_wrunlock_drained() wrapper around bdrv_graph_wrunlock() is
introduced, which will begin a drained section first. A global
variable is used so bdrv_graph_wrunlock() knows if it also needs
to end such a drained section. Both the aio_poll call in
bdrv_graph_wrlock() and the aio_bh_poll() in bdrv_graph_wrunlock()
can re-enter a write-locked section. While for the latter, ending the
drain could be moved to before the call, the former requires that the
variable is a counter and not just a boolean.

Since the wrapper calls bdrv_drain_all_begin(), which must be called
with the graph unlocked, mark the wrapper as GRAPH_UNLOCKED too.

The switch to the new helpers was generated with the following
commands and then manually checked:
find . -name '*.c' -exec sed -i -z 's/bdrv_drain_all_begin();\n\s*bdrv_graph_wrlock();/bdrv_graph_wrlock_drained();/g' {} ';'
find . -name '*.c' -exec sed -i -z 's/bdrv_graph_wrunlock();\n\s*bdrv_drain_all_end();/bdrv_graph_wrunlock();/g' {} ';'

Suggested-by: Kevin Wolf <kwolf@redhat.com>
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
Message-ID: <20250530151125.955508-25-f.ebner@proxmox.com>
[kwolf: Removed redundant GRAPH_UNLOCKED]
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
This commit is contained in:
Fiona Ebner
2025-05-30 17:11:01 +02:00
committed by Kevin Wolf
parent 502f00c51a
commit 6b89e851fa
19 changed files with 90 additions and 141 deletions

View File

@@ -498,12 +498,10 @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs,
block_copy_set_speed(bcs, speed);
/* Required permissions are taken by copy-before-write filter target */
bdrv_drain_all_begin();
bdrv_graph_wrlock();
bdrv_graph_wrlock_drained();
block_job_add_bdrv(&job->common, "target", target, 0, BLK_PERM_ALL,
&error_abort);
bdrv_graph_wrunlock();
bdrv_drain_all_end();
return &job->common;

View File

@@ -281,11 +281,9 @@ static int blk_log_writes_open(BlockDriverState *bs, QDict *options, int flags,
ret = 0;
fail_log:
if (ret < 0) {
bdrv_drain_all_begin();
bdrv_graph_wrlock();
bdrv_graph_wrlock_drained();
bdrv_unref_child(bs, s->log_file);
bdrv_graph_wrunlock();
bdrv_drain_all_end();
s->log_file = NULL;
qemu_mutex_destroy(&s->mutex);
}
@@ -298,12 +296,10 @@ static void blk_log_writes_close(BlockDriverState *bs)
{
BDRVBlkLogWritesState *s = bs->opaque;
bdrv_drain_all_begin();
bdrv_graph_wrlock();
bdrv_graph_wrlock_drained();
bdrv_unref_child(bs, s->log_file);
s->log_file = NULL;
bdrv_graph_wrunlock();
bdrv_drain_all_end();
qemu_mutex_destroy(&s->mutex);
}

View File

@@ -151,12 +151,10 @@ static void blkverify_close(BlockDriverState *bs)
{
BDRVBlkverifyState *s = bs->opaque;
bdrv_drain_all_begin();
bdrv_graph_wrlock();
bdrv_graph_wrlock_drained();
bdrv_unref_child(bs, s->test_file);
s->test_file = NULL;
bdrv_graph_wrunlock();
bdrv_drain_all_end();
}
static int64_t coroutine_fn GRAPH_RDLOCK

View File

@@ -889,11 +889,9 @@ void blk_remove_bs(BlockBackend *blk)
root = blk->root;
blk->root = NULL;
bdrv_drain_all_begin();
bdrv_graph_wrlock();
bdrv_graph_wrlock_drained();
bdrv_root_unref_child(root);
bdrv_graph_wrunlock();
bdrv_drain_all_end();
}
/*
@@ -906,8 +904,7 @@ int blk_insert_bs(BlockBackend *blk, BlockDriverState *bs, Error **errp)
GLOBAL_STATE_CODE();
bdrv_ref(bs);
bdrv_drain_all_begin();
bdrv_graph_wrlock();
bdrv_graph_wrlock_drained();
if ((bs->open_flags & BDRV_O_INACTIVE) && blk_can_inactivate(blk)) {
blk->disable_perm = true;
@@ -922,7 +919,6 @@ int blk_insert_bs(BlockBackend *blk, BlockDriverState *bs, Error **errp)
BDRV_CHILD_FILTERED | BDRV_CHILD_PRIMARY,
perm, shared_perm, blk, errp);
bdrv_graph_wrunlock();
bdrv_drain_all_end();
if (blk->root == NULL) {
return -EPERM;
}

View File

@@ -392,8 +392,7 @@ void commit_start(const char *job_id, BlockDriverState *bs,
* this is the responsibility of the interface (i.e. whoever calls
* commit_start()).
*/
bdrv_drain_all_begin();
bdrv_graph_wrlock();
bdrv_graph_wrlock_drained();
s->base_overlay = bdrv_find_overlay(top, base);
assert(s->base_overlay);
@@ -425,21 +424,18 @@ void commit_start(const char *job_id, BlockDriverState *bs,
iter_shared_perms, errp);
if (ret < 0) {
bdrv_graph_wrunlock();
bdrv_drain_all_end();
goto fail;
}
}
if (bdrv_freeze_backing_chain(commit_top_bs, base, errp) < 0) {
bdrv_graph_wrunlock();
bdrv_drain_all_end();
goto fail;
}
s->chain_frozen = true;
ret = block_job_add_bdrv(&s->common, "base", base, 0, BLK_PERM_ALL, errp);
bdrv_graph_wrunlock();
bdrv_drain_all_end();
if (ret < 0) {
goto fail;

View File

@@ -33,6 +33,17 @@ static QemuMutex aio_context_list_lock;
/* Written and read with atomic operations. */
static int has_writer;
/*
* Many write-locked sections are also drained sections. There is a convenience
* wrapper bdrv_graph_wrlock_drained() which begins a drained section before
* acquiring the lock. This variable here is used so bdrv_graph_wrunlock() knows
* if it also needs to end such a drained section. It needs to be a counter,
* because the aio_poll() call in bdrv_graph_wrlock() might re-enter
* bdrv_graph_wrlock_drained(). And note that aio_bh_poll() in
* bdrv_graph_wrunlock() might also re-enter a write-locked section.
*/
static int wrlock_quiesced_counter;
/*
* A reader coroutine could move from an AioContext to another.
* If this happens, there is no problem from the point of view of
@@ -112,8 +123,14 @@ void no_coroutine_fn bdrv_graph_wrlock(void)
assert(!qatomic_read(&has_writer));
assert(!qemu_in_coroutine());
/* Make sure that constantly arriving new I/O doesn't cause starvation */
bdrv_drain_all_begin_nopoll();
bool need_drain = wrlock_quiesced_counter == 0;
if (need_drain) {
/*
* Make sure that constantly arriving new I/O doesn't cause starvation
*/
bdrv_drain_all_begin_nopoll();
}
/*
* reader_count == 0: this means writer will read has_reader as 1
@@ -139,7 +156,18 @@ void no_coroutine_fn bdrv_graph_wrlock(void)
smp_mb();
} while (reader_count() >= 1);
bdrv_drain_all_end();
if (need_drain) {
bdrv_drain_all_end();
}
}
void no_coroutine_fn bdrv_graph_wrlock_drained(void)
{
GLOBAL_STATE_CODE();
bdrv_drain_all_begin();
wrlock_quiesced_counter++;
bdrv_graph_wrlock();
}
void no_coroutine_fn bdrv_graph_wrunlock(void)
@@ -168,6 +196,12 @@ void no_coroutine_fn bdrv_graph_wrunlock(void)
* progress.
*/
aio_bh_poll(qemu_get_aio_context());
if (wrlock_quiesced_counter > 0) {
bdrv_drain_all_end();
wrlock_quiesced_counter--;
}
}
void coroutine_fn bdrv_graph_co_rdlock(void)

View File

@@ -2014,15 +2014,13 @@ static BlockJob *mirror_start_job(
*/
bdrv_disable_dirty_bitmap(s->dirty_bitmap);
bdrv_drain_all_begin();
bdrv_graph_wrlock();
bdrv_graph_wrlock_drained();
ret = block_job_add_bdrv(&s->common, "source", bs, 0,
BLK_PERM_WRITE_UNCHANGED | BLK_PERM_WRITE |
BLK_PERM_CONSISTENT_READ,
errp);
if (ret < 0) {
bdrv_graph_wrunlock();
bdrv_drain_all_end();
goto fail;
}
@@ -2068,19 +2066,16 @@ static BlockJob *mirror_start_job(
iter_shared_perms, errp);
if (ret < 0) {
bdrv_graph_wrunlock();
bdrv_drain_all_end();
goto fail;
}
}
if (bdrv_freeze_backing_chain(mirror_top_bs, target, errp) < 0) {
bdrv_graph_wrunlock();
bdrv_drain_all_end();
goto fail;
}
}
bdrv_graph_wrunlock();
bdrv_drain_all_end();
QTAILQ_INIT(&s->ops_in_flight);

View File

@@ -2823,11 +2823,9 @@ qcow2_do_close(BlockDriverState *bs, bool close_data_file)
if (close_data_file && has_data_file(bs)) {
GLOBAL_STATE_CODE();
bdrv_graph_rdunlock_main_loop();
bdrv_drain_all_begin();
bdrv_graph_wrlock();
bdrv_graph_wrlock_drained();
bdrv_unref_child(bs, s->data_file);
bdrv_graph_wrunlock();
bdrv_drain_all_end();
s->data_file = NULL;
bdrv_graph_rdlock_main_loop();
}

View File

@@ -1037,8 +1037,7 @@ static int quorum_open(BlockDriverState *bs, QDict *options, int flags,
close_exit:
/* cleanup on error */
bdrv_drain_all_begin();
bdrv_graph_wrlock();
bdrv_graph_wrlock_drained();
for (i = 0; i < s->num_children; i++) {
if (!opened[i]) {
continue;
@@ -1046,7 +1045,6 @@ close_exit:
bdrv_unref_child(bs, s->children[i]);
}
bdrv_graph_wrunlock();
bdrv_drain_all_end();
g_free(s->children);
g_free(opened);
exit:
@@ -1059,13 +1057,11 @@ static void quorum_close(BlockDriverState *bs)
BDRVQuorumState *s = bs->opaque;
int i;
bdrv_drain_all_begin();
bdrv_graph_wrlock();
bdrv_graph_wrlock_drained();
for (i = 0; i < s->num_children; i++) {
bdrv_unref_child(bs, s->children[i]);
}
bdrv_graph_wrunlock();
bdrv_drain_all_end();
g_free(s->children);
}

View File

@@ -540,8 +540,7 @@ static void replication_start(ReplicationState *rs, ReplicationMode mode,
return;
}
bdrv_drain_all_begin();
bdrv_graph_wrlock();
bdrv_graph_wrlock_drained();
bdrv_ref(hidden_disk->bs);
s->hidden_disk = bdrv_attach_child(bs, hidden_disk->bs, "hidden disk",
@@ -550,7 +549,6 @@ static void replication_start(ReplicationState *rs, ReplicationMode mode,
if (local_err) {
error_propagate(errp, local_err);
bdrv_graph_wrunlock();
bdrv_drain_all_end();
return;
}
@@ -561,7 +559,6 @@ static void replication_start(ReplicationState *rs, ReplicationMode mode,
if (local_err) {
error_propagate(errp, local_err);
bdrv_graph_wrunlock();
bdrv_drain_all_end();
return;
}
@@ -574,14 +571,12 @@ static void replication_start(ReplicationState *rs, ReplicationMode mode,
!check_top_bs(top_bs, bs)) {
error_setg(errp, "No top_bs or it is invalid");
bdrv_graph_wrunlock();
bdrv_drain_all_end();
reopen_backing_file(bs, false, NULL);
return;
}
bdrv_op_block_all(top_bs, s->blocker);
bdrv_graph_wrunlock();
bdrv_drain_all_end();
s->backup_job = backup_job_create(
NULL, s->secondary_disk->bs, s->hidden_disk->bs,
@@ -656,14 +651,12 @@ static void replication_done(void *opaque, int ret)
if (ret == 0) {
s->stage = BLOCK_REPLICATION_DONE;
bdrv_drain_all_begin();
bdrv_graph_wrlock();
bdrv_graph_wrlock_drained();
bdrv_unref_child(bs, s->secondary_disk);
s->secondary_disk = NULL;
bdrv_unref_child(bs, s->hidden_disk);
s->hidden_disk = NULL;
bdrv_graph_wrunlock();
bdrv_drain_all_end();
s->error = 0;
} else {

View File

@@ -291,11 +291,9 @@ int bdrv_snapshot_goto(BlockDriverState *bs,
}
/* .bdrv_open() will re-attach it */
bdrv_drain_all_begin();
bdrv_graph_wrlock();
bdrv_graph_wrlock_drained();
bdrv_unref_child(bs, fallback);
bdrv_graph_wrunlock();
bdrv_drain_all_end();
ret = bdrv_snapshot_goto(fallback_bs, snapshot_id, errp);
memset(bs->opaque, 0, drv->instance_size);

View File

@@ -371,12 +371,10 @@ void stream_start(const char *job_id, BlockDriverState *bs,
* already have our own plans. Also don't allow resize as the image size is
* queried only at the job start and then cached.
*/
bdrv_drain_all_begin();
bdrv_graph_wrlock();
bdrv_graph_wrlock_drained();
if (block_job_add_bdrv(&s->common, "active node", bs, 0,
basic_flags | BLK_PERM_WRITE, errp)) {
bdrv_graph_wrunlock();
bdrv_drain_all_end();
goto fail;
}
@@ -397,12 +395,10 @@ void stream_start(const char *job_id, BlockDriverState *bs,
basic_flags, errp);
if (ret < 0) {
bdrv_graph_wrunlock();
bdrv_drain_all_end();
goto fail;
}
}
bdrv_graph_wrunlock();
bdrv_drain_all_end();
s->base_overlay = base_overlay;
s->above_base = above_base;

View File

@@ -271,8 +271,7 @@ static void vmdk_free_extents(BlockDriverState *bs)
BDRVVmdkState *s = bs->opaque;
VmdkExtent *e;
bdrv_drain_all_begin();
bdrv_graph_wrlock();
bdrv_graph_wrlock_drained();
for (i = 0; i < s->num_extents; i++) {
e = &s->extents[i];
g_free(e->l1_table);
@@ -284,7 +283,6 @@ static void vmdk_free_extents(BlockDriverState *bs)
}
}
bdrv_graph_wrunlock();
bdrv_drain_all_end();
g_free(s->extents);
}
@@ -1249,11 +1247,9 @@ vmdk_parse_extents(const char *desc, BlockDriverState *bs, QDict *options,
0, 0, 0, 0, 0, &extent, errp);
if (ret < 0) {
bdrv_graph_rdunlock_main_loop();
bdrv_drain_all_begin();
bdrv_graph_wrlock();
bdrv_graph_wrlock_drained();
bdrv_unref_child(bs, extent_file);
bdrv_graph_wrunlock();
bdrv_drain_all_end();
bdrv_graph_rdlock_main_loop();
goto out;
}
@@ -1270,11 +1266,9 @@ vmdk_parse_extents(const char *desc, BlockDriverState *bs, QDict *options,
g_free(buf);
if (ret) {
bdrv_graph_rdunlock_main_loop();
bdrv_drain_all_begin();
bdrv_graph_wrlock();
bdrv_graph_wrlock_drained();
bdrv_unref_child(bs, extent_file);
bdrv_graph_wrunlock();
bdrv_drain_all_end();
bdrv_graph_rdlock_main_loop();
goto out;
}
@@ -1283,11 +1277,9 @@ vmdk_parse_extents(const char *desc, BlockDriverState *bs, QDict *options,
ret = vmdk_open_se_sparse(bs, extent_file, bs->open_flags, errp);
if (ret) {
bdrv_graph_rdunlock_main_loop();
bdrv_drain_all_begin();
bdrv_graph_wrlock();
bdrv_graph_wrlock_drained();
bdrv_unref_child(bs, extent_file);
bdrv_graph_wrunlock();
bdrv_drain_all_end();
bdrv_graph_rdlock_main_loop();
goto out;
}
@@ -1295,11 +1287,9 @@ vmdk_parse_extents(const char *desc, BlockDriverState *bs, QDict *options,
} else {
error_setg(errp, "Unsupported extent type '%s'", type);
bdrv_graph_rdunlock_main_loop();
bdrv_drain_all_begin();
bdrv_graph_wrlock();
bdrv_graph_wrlock_drained();
bdrv_unref_child(bs, extent_file);
bdrv_graph_wrunlock();
bdrv_drain_all_end();
bdrv_graph_rdlock_main_loop();
ret = -ENOTSUP;
goto out;