mirror of
https://github.com/qemu/qemu.git
synced 2026-05-18 11:26:37 +00:00
accel/mshv: Remove remap overlapping mappings code
This change removes userland code that worked around a restriction
in the mshv driver in the 6.18 kernel: regions from userland
couldn't be mapped to multiple regions in the kernel. We maintained a
shadow mapping table in qemu and used a heuristic to swap in a requested
region in case of UNMAPPED_GPA exits.
However, this heuristic wasn't reliable in all cases, since HyperV
behaviour is not 100% reliable across versions. HyperV itself doesn't
prohibit to map regions at multiple places into the guest, so the
restriction has been removed in the mshv driver.
Hence we can remove the remapping code. Effectively this will mandate a
6.19 kernel, if the workload attempt to map e.g. BIOS to multiple
reagions. I still think it's the right call to remove this logic:
- The workaround only seems to work reliably with a certain revision
of HyperV as a nested hypervisor.
- We expect Direct Virtualization (L1VH) to be the main platform for
the mshv accelerator, which also requires a 6.19 kernel
This reverts commit efc4093358.
Signed-off-by: Magnus Kulke <magnuskulke@linux.microsoft.com>
Acked-by: Wei Liu (Microsoft) <wei.liu@kernel.org>
Tested-by: Mohamed Mediouni <mohamed@unpredictable.fr>
Link: https://lore.kernel.org/r/20260113153708.448968-1-magnuskulke@linux.microsoft.com
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
This commit is contained in:
committed by
Paolo Bonzini
parent
69345f948a
commit
626e5dc999
406
accel/mshv/mem.c
406
accel/mshv/mem.c
@@ -11,9 +11,7 @@
|
||||
*/
|
||||
|
||||
#include "qemu/osdep.h"
|
||||
#include "qemu/lockable.h"
|
||||
#include "qemu/error-report.h"
|
||||
#include "qemu/rcu.h"
|
||||
#include "linux/mshv.h"
|
||||
#include "system/address-spaces.h"
|
||||
#include "system/mshv.h"
|
||||
@@ -22,137 +20,6 @@
|
||||
#include <sys/ioctl.h>
|
||||
#include "trace.h"
|
||||
|
||||
typedef struct SlotsRCUReclaim {
|
||||
struct rcu_head rcu;
|
||||
GList *old_head;
|
||||
MshvMemorySlot *removed_slot;
|
||||
} SlotsRCUReclaim;
|
||||
|
||||
static void rcu_reclaim_slotlist(struct rcu_head *rcu)
|
||||
{
|
||||
SlotsRCUReclaim *r = container_of(rcu, SlotsRCUReclaim, rcu);
|
||||
g_list_free(r->old_head);
|
||||
g_free(r->removed_slot);
|
||||
g_free(r);
|
||||
}
|
||||
|
||||
static void publish_slots(GList *new_head, GList *old_head,
|
||||
MshvMemorySlot *removed_slot)
|
||||
{
|
||||
MshvMemorySlotManager *manager = &mshv_state->msm;
|
||||
|
||||
assert(manager);
|
||||
qatomic_store_release(&manager->slots, new_head);
|
||||
|
||||
SlotsRCUReclaim *r = g_new(SlotsRCUReclaim, 1);
|
||||
r->old_head = old_head;
|
||||
r->removed_slot = removed_slot;
|
||||
|
||||
call_rcu1(&r->rcu, rcu_reclaim_slotlist);
|
||||
}
|
||||
|
||||
/* Needs to be called with mshv_state->msm.mutex held */
|
||||
static int remove_slot(MshvMemorySlot *slot)
|
||||
{
|
||||
GList *old_head, *new_head;
|
||||
MshvMemorySlotManager *manager = &mshv_state->msm;
|
||||
|
||||
assert(manager);
|
||||
old_head = qatomic_load_acquire(&manager->slots);
|
||||
|
||||
if (!g_list_find(old_head, slot)) {
|
||||
error_report("slot requested for removal not found");
|
||||
return -1;
|
||||
}
|
||||
|
||||
new_head = g_list_copy(old_head);
|
||||
new_head = g_list_remove(new_head, slot);
|
||||
manager->n_slots--;
|
||||
|
||||
publish_slots(new_head, old_head, slot);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Needs to be called with mshv_state->msm.mutex held */
|
||||
static MshvMemorySlot *append_slot(uint64_t gpa, uint64_t userspace_addr,
|
||||
uint64_t size, bool readonly)
|
||||
{
|
||||
GList *old_head, *new_head;
|
||||
MshvMemorySlot *slot;
|
||||
MshvMemorySlotManager *manager = &mshv_state->msm;
|
||||
|
||||
assert(manager);
|
||||
|
||||
old_head = qatomic_load_acquire(&manager->slots);
|
||||
|
||||
if (manager->n_slots >= MSHV_MAX_MEM_SLOTS) {
|
||||
error_report("no free memory slots available");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
slot = g_new0(MshvMemorySlot, 1);
|
||||
slot->guest_phys_addr = gpa;
|
||||
slot->userspace_addr = userspace_addr;
|
||||
slot->memory_size = size;
|
||||
slot->readonly = readonly;
|
||||
|
||||
new_head = g_list_copy(old_head);
|
||||
new_head = g_list_append(new_head, slot);
|
||||
manager->n_slots++;
|
||||
|
||||
publish_slots(new_head, old_head, NULL);
|
||||
|
||||
return slot;
|
||||
}
|
||||
|
||||
static int slot_overlaps(const MshvMemorySlot *slot1,
|
||||
const MshvMemorySlot *slot2)
|
||||
{
|
||||
uint64_t start_1 = slot1->userspace_addr,
|
||||
start_2 = slot2->userspace_addr;
|
||||
size_t len_1 = slot1->memory_size,
|
||||
len_2 = slot2->memory_size;
|
||||
|
||||
if (slot1 == slot2) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
return ranges_overlap(start_1, len_1, start_2, len_2) ? 0 : -1;
|
||||
}
|
||||
|
||||
static bool is_mapped(MshvMemorySlot *slot)
|
||||
{
|
||||
/* Subsequent reads of mapped field see a fully-initialized slot */
|
||||
return qatomic_load_acquire(&slot->mapped);
|
||||
}
|
||||
|
||||
/*
|
||||
* Find slot that is:
|
||||
* - overlapping in userspace
|
||||
* - currently mapped in the guest
|
||||
*
|
||||
* Needs to be called with mshv_state->msm.mutex or RCU read lock held.
|
||||
*/
|
||||
static MshvMemorySlot *find_overlap_mem_slot(GList *head, MshvMemorySlot *slot)
|
||||
{
|
||||
GList *found;
|
||||
MshvMemorySlot *overlap_slot;
|
||||
|
||||
found = g_list_find_custom(head, slot, (GCompareFunc) slot_overlaps);
|
||||
|
||||
if (!found) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
overlap_slot = found->data;
|
||||
if (!overlap_slot || !is_mapped(overlap_slot)) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
return overlap_slot;
|
||||
}
|
||||
|
||||
static int set_guest_memory(int vm_fd,
|
||||
const struct mshv_user_mem_region *region)
|
||||
{
|
||||
@@ -160,169 +27,38 @@ static int set_guest_memory(int vm_fd,
|
||||
|
||||
ret = ioctl(vm_fd, MSHV_SET_GUEST_MEMORY, region);
|
||||
if (ret < 0) {
|
||||
error_report("failed to set guest memory: %s", strerror(errno));
|
||||
return -1;
|
||||
error_report("failed to set guest memory");
|
||||
return -errno;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int map_or_unmap(int vm_fd, const MshvMemorySlot *slot, bool map)
|
||||
static int map_or_unmap(int vm_fd, const MshvMemoryRegion *mr, bool map)
|
||||
{
|
||||
struct mshv_user_mem_region region = {0};
|
||||
|
||||
region.guest_pfn = slot->guest_phys_addr >> MSHV_PAGE_SHIFT;
|
||||
region.size = slot->memory_size;
|
||||
region.userspace_addr = slot->userspace_addr;
|
||||
region.guest_pfn = mr->guest_phys_addr >> MSHV_PAGE_SHIFT;
|
||||
region.size = mr->memory_size;
|
||||
region.userspace_addr = mr->userspace_addr;
|
||||
|
||||
if (!map) {
|
||||
region.flags |= (1 << MSHV_SET_MEM_BIT_UNMAP);
|
||||
trace_mshv_unmap_memory(slot->userspace_addr, slot->guest_phys_addr,
|
||||
slot->memory_size);
|
||||
trace_mshv_unmap_memory(mr->userspace_addr, mr->guest_phys_addr,
|
||||
mr->memory_size);
|
||||
return set_guest_memory(vm_fd, ®ion);
|
||||
}
|
||||
|
||||
region.flags = BIT(MSHV_SET_MEM_BIT_EXECUTABLE);
|
||||
if (!slot->readonly) {
|
||||
if (!mr->readonly) {
|
||||
region.flags |= BIT(MSHV_SET_MEM_BIT_WRITABLE);
|
||||
}
|
||||
|
||||
trace_mshv_map_memory(slot->userspace_addr, slot->guest_phys_addr,
|
||||
slot->memory_size);
|
||||
trace_mshv_map_memory(mr->userspace_addr, mr->guest_phys_addr,
|
||||
mr->memory_size);
|
||||
return set_guest_memory(vm_fd, ®ion);
|
||||
}
|
||||
|
||||
static int slot_matches_region(const MshvMemorySlot *slot1,
|
||||
const MshvMemorySlot *slot2)
|
||||
{
|
||||
return (slot1->guest_phys_addr == slot2->guest_phys_addr &&
|
||||
slot1->userspace_addr == slot2->userspace_addr &&
|
||||
slot1->memory_size == slot2->memory_size) ? 0 : -1;
|
||||
}
|
||||
|
||||
/* Needs to be called with mshv_state->msm.mutex held */
|
||||
static MshvMemorySlot *find_mem_slot_by_region(uint64_t gpa, uint64_t size,
|
||||
uint64_t userspace_addr)
|
||||
{
|
||||
MshvMemorySlot ref_slot = {
|
||||
.guest_phys_addr = gpa,
|
||||
.userspace_addr = userspace_addr,
|
||||
.memory_size = size,
|
||||
};
|
||||
GList *found;
|
||||
MshvMemorySlotManager *manager = &mshv_state->msm;
|
||||
|
||||
assert(manager);
|
||||
found = g_list_find_custom(manager->slots, &ref_slot,
|
||||
(GCompareFunc) slot_matches_region);
|
||||
|
||||
return found ? found->data : NULL;
|
||||
}
|
||||
|
||||
static int slot_covers_gpa(const MshvMemorySlot *slot, uint64_t *gpa_p)
|
||||
{
|
||||
uint64_t gpa_offset, gpa = *gpa_p;
|
||||
|
||||
gpa_offset = gpa - slot->guest_phys_addr;
|
||||
return (slot->guest_phys_addr <= gpa && gpa_offset < slot->memory_size)
|
||||
? 0 : -1;
|
||||
}
|
||||
|
||||
/* Needs to be called with mshv_state->msm.mutex or RCU read lock held */
|
||||
static MshvMemorySlot *find_mem_slot_by_gpa(GList *head, uint64_t gpa)
|
||||
{
|
||||
GList *found;
|
||||
MshvMemorySlot *slot;
|
||||
|
||||
trace_mshv_find_slot_by_gpa(gpa);
|
||||
|
||||
found = g_list_find_custom(head, &gpa, (GCompareFunc) slot_covers_gpa);
|
||||
if (found) {
|
||||
slot = found->data;
|
||||
trace_mshv_found_slot(slot->userspace_addr, slot->guest_phys_addr,
|
||||
slot->memory_size);
|
||||
return slot;
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* Needs to be called with mshv_state->msm.mutex held */
|
||||
static void set_mapped(MshvMemorySlot *slot, bool mapped)
|
||||
{
|
||||
/* prior writes to mapped field becomes visible before readers see slot */
|
||||
qatomic_store_release(&slot->mapped, mapped);
|
||||
}
|
||||
|
||||
MshvRemapResult mshv_remap_overlap_region(int vm_fd, uint64_t gpa)
|
||||
{
|
||||
MshvMemorySlot *gpa_slot, *overlap_slot;
|
||||
GList *head;
|
||||
int ret;
|
||||
MshvMemorySlotManager *manager = &mshv_state->msm;
|
||||
|
||||
/* fast path, called often by unmapped_gpa vm exit */
|
||||
WITH_RCU_READ_LOCK_GUARD() {
|
||||
assert(manager);
|
||||
head = qatomic_load_acquire(&manager->slots);
|
||||
/* return early if no slot is found */
|
||||
gpa_slot = find_mem_slot_by_gpa(head, gpa);
|
||||
if (gpa_slot == NULL) {
|
||||
return MshvRemapNoMapping;
|
||||
}
|
||||
|
||||
/* return early if no overlapping slot is found */
|
||||
overlap_slot = find_overlap_mem_slot(head, gpa_slot);
|
||||
if (overlap_slot == NULL) {
|
||||
return MshvRemapNoOverlap;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* We'll modify the mapping list, so we need to upgrade to mutex and
|
||||
* recheck.
|
||||
*/
|
||||
assert(manager);
|
||||
QEMU_LOCK_GUARD(&manager->mutex);
|
||||
|
||||
/* return early if no slot is found */
|
||||
gpa_slot = find_mem_slot_by_gpa(manager->slots, gpa);
|
||||
if (gpa_slot == NULL) {
|
||||
return MshvRemapNoMapping;
|
||||
}
|
||||
|
||||
/* return early if no overlapping slot is found */
|
||||
overlap_slot = find_overlap_mem_slot(manager->slots, gpa_slot);
|
||||
if (overlap_slot == NULL) {
|
||||
return MshvRemapNoOverlap;
|
||||
}
|
||||
|
||||
/* unmap overlapping slot */
|
||||
ret = map_or_unmap(vm_fd, overlap_slot, false);
|
||||
if (ret < 0) {
|
||||
error_report("failed to unmap overlap region");
|
||||
abort();
|
||||
}
|
||||
set_mapped(overlap_slot, false);
|
||||
warn_report("mapped out userspace_addr=0x%016lx gpa=0x%010lx size=0x%lx",
|
||||
overlap_slot->userspace_addr,
|
||||
overlap_slot->guest_phys_addr,
|
||||
overlap_slot->memory_size);
|
||||
|
||||
/* map region for gpa */
|
||||
ret = map_or_unmap(vm_fd, gpa_slot, true);
|
||||
if (ret < 0) {
|
||||
error_report("failed to map new region");
|
||||
abort();
|
||||
}
|
||||
set_mapped(gpa_slot, true);
|
||||
warn_report("mapped in userspace_addr=0x%016lx gpa=0x%010lx size=0x%lx",
|
||||
gpa_slot->userspace_addr, gpa_slot->guest_phys_addr,
|
||||
gpa_slot->memory_size);
|
||||
|
||||
return MshvRemapOk;
|
||||
}
|
||||
|
||||
static int handle_unmapped_mmio_region_read(uint64_t gpa, uint64_t size,
|
||||
uint8_t *data)
|
||||
{
|
||||
@@ -388,97 +124,20 @@ int mshv_guest_mem_write(uint64_t gpa, const uint8_t *data, uintptr_t size,
|
||||
return -1;
|
||||
}
|
||||
|
||||
static int tracked_unmap(int vm_fd, uint64_t gpa, uint64_t size,
|
||||
uint64_t userspace_addr)
|
||||
static int set_memory(const MshvMemoryRegion *mshv_mr, bool add)
|
||||
{
|
||||
int ret;
|
||||
MshvMemorySlot *slot;
|
||||
MshvMemorySlotManager *manager = &mshv_state->msm;
|
||||
int ret = 0;
|
||||
|
||||
assert(manager);
|
||||
|
||||
QEMU_LOCK_GUARD(&manager->mutex);
|
||||
|
||||
slot = find_mem_slot_by_region(gpa, size, userspace_addr);
|
||||
if (!slot) {
|
||||
trace_mshv_skip_unset_mem(userspace_addr, gpa, size);
|
||||
/* no work to do */
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (!is_mapped(slot)) {
|
||||
/* remove slot, no need to unmap */
|
||||
return remove_slot(slot);
|
||||
}
|
||||
|
||||
ret = map_or_unmap(vm_fd, slot, false);
|
||||
if (ret < 0) {
|
||||
error_report("failed to unmap memory region");
|
||||
return ret;
|
||||
}
|
||||
return remove_slot(slot);
|
||||
}
|
||||
|
||||
static int tracked_map(int vm_fd, uint64_t gpa, uint64_t size, bool readonly,
|
||||
uint64_t userspace_addr)
|
||||
{
|
||||
MshvMemorySlot *slot, *overlap_slot;
|
||||
int ret;
|
||||
MshvMemorySlotManager *manager = &mshv_state->msm;
|
||||
|
||||
assert(manager);
|
||||
|
||||
QEMU_LOCK_GUARD(&manager->mutex);
|
||||
|
||||
slot = find_mem_slot_by_region(gpa, size, userspace_addr);
|
||||
if (slot) {
|
||||
error_report("memory region already mapped at gpa=0x%lx, "
|
||||
"userspace_addr=0x%lx, size=0x%lx",
|
||||
slot->guest_phys_addr, slot->userspace_addr,
|
||||
slot->memory_size);
|
||||
if (!mshv_mr) {
|
||||
error_report("Invalid mshv_mr");
|
||||
return -1;
|
||||
}
|
||||
|
||||
slot = append_slot(gpa, userspace_addr, size, readonly);
|
||||
|
||||
overlap_slot = find_overlap_mem_slot(manager->slots, slot);
|
||||
if (overlap_slot) {
|
||||
trace_mshv_remap_attempt(slot->userspace_addr,
|
||||
slot->guest_phys_addr,
|
||||
slot->memory_size);
|
||||
warn_report("attempt to map region [0x%lx-0x%lx], while "
|
||||
"[0x%lx-0x%lx] is already mapped in the guest",
|
||||
userspace_addr, userspace_addr + size - 1,
|
||||
overlap_slot->userspace_addr,
|
||||
overlap_slot->userspace_addr +
|
||||
overlap_slot->memory_size - 1);
|
||||
|
||||
/* do not register mem slot in hv, but record for later swap-in */
|
||||
set_mapped(slot, false);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
ret = map_or_unmap(vm_fd, slot, true);
|
||||
if (ret < 0) {
|
||||
error_report("failed to map memory region");
|
||||
return -1;
|
||||
}
|
||||
set_mapped(slot, true);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int set_memory(uint64_t gpa, uint64_t size, bool readonly,
|
||||
uint64_t userspace_addr, bool add)
|
||||
{
|
||||
int vm_fd = mshv_state->vm;
|
||||
|
||||
if (add) {
|
||||
return tracked_map(vm_fd, gpa, size, readonly, userspace_addr);
|
||||
}
|
||||
|
||||
return tracked_unmap(vm_fd, gpa, size, userspace_addr);
|
||||
trace_mshv_set_memory(add, mshv_mr->guest_phys_addr,
|
||||
mshv_mr->memory_size,
|
||||
mshv_mr->userspace_addr, mshv_mr->readonly,
|
||||
ret);
|
||||
return map_or_unmap(mshv_state->vm, mshv_mr, add);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -514,9 +173,7 @@ void mshv_set_phys_mem(MshvMemoryListener *mml, MemoryRegionSection *section,
|
||||
bool writable = !area->readonly && !area->rom_device;
|
||||
hwaddr start_addr, mr_offset, size;
|
||||
void *ram;
|
||||
|
||||
size = align_section(section, &start_addr);
|
||||
trace_mshv_set_phys_mem(add, section->mr->name, start_addr);
|
||||
MshvMemoryRegion mshv_mr = {0};
|
||||
|
||||
size = align_section(section, &start_addr);
|
||||
trace_mshv_set_phys_mem(add, section->mr->name, start_addr);
|
||||
@@ -543,21 +200,14 @@ void mshv_set_phys_mem(MshvMemoryListener *mml, MemoryRegionSection *section,
|
||||
|
||||
ram = memory_region_get_ram_ptr(area) + mr_offset;
|
||||
|
||||
ret = set_memory(start_addr, size, !writable, (uint64_t)ram, add);
|
||||
mshv_mr.guest_phys_addr = start_addr;
|
||||
mshv_mr.memory_size = size;
|
||||
mshv_mr.readonly = !writable;
|
||||
mshv_mr.userspace_addr = (uint64_t)ram;
|
||||
|
||||
ret = set_memory(&mshv_mr, add);
|
||||
if (ret < 0) {
|
||||
error_report("failed to set memory region");
|
||||
error_report("Failed to set memory region");
|
||||
abort();
|
||||
}
|
||||
}
|
||||
|
||||
void mshv_init_memory_slot_manager(MshvState *mshv_state)
|
||||
{
|
||||
MshvMemorySlotManager *manager;
|
||||
|
||||
assert(mshv_state);
|
||||
manager = &mshv_state->msm;
|
||||
|
||||
manager->n_slots = 0;
|
||||
manager->slots = NULL;
|
||||
qemu_mutex_init(&manager->mutex);
|
||||
}
|
||||
|
||||
@@ -437,8 +437,6 @@ static int mshv_init(AccelState *as, MachineState *ms)
|
||||
|
||||
mshv_init_msicontrol();
|
||||
|
||||
mshv_init_memory_slot_manager(s);
|
||||
|
||||
ret = create_vm(mshv_fd, &vm_fd);
|
||||
if (ret < 0) {
|
||||
close(mshv_fd);
|
||||
|
||||
@@ -26,8 +26,3 @@ mshv_map_memory(uint64_t userspace_addr, uint64_t gpa, uint64_t size) "\tu_a=0x%
|
||||
mshv_unmap_memory(uint64_t userspace_addr, uint64_t gpa, uint64_t size) "\tu_a=0x%" PRIx64 " gpa=0x%010" PRIx64 " size=0x%08" PRIx64
|
||||
mshv_set_phys_mem(bool add, const char *name, uint64_t gpa) "\tadd=%d name=%s gpa=0x%010" PRIx64
|
||||
mshv_handle_mmio(uint64_t gva, uint64_t gpa, uint64_t size, uint8_t access_type) "\tgva=0x%" PRIx64 " gpa=0x%010" PRIx64 " size=0x%" PRIx64 " access_type=%d"
|
||||
|
||||
mshv_found_slot(uint64_t userspace_addr, uint64_t gpa, uint64_t size) "\tu_a=0x%" PRIx64 " gpa=0x%010" PRIx64 " size=0x%08" PRIx64
|
||||
mshv_skip_unset_mem(uint64_t userspace_addr, uint64_t gpa, uint64_t size) "\tu_a=0x%" PRIx64 " gpa=0x%010" PRIx64 " size=0x%08" PRIx64
|
||||
mshv_remap_attempt(uint64_t userspace_addr, uint64_t gpa, uint64_t size) "\tu_a=0x%" PRIx64 " gpa=0x%010" PRIx64 " size=0x%08" PRIx64
|
||||
mshv_find_slot_by_gpa(uint64_t gpa) "\tgpa=0x%010" PRIx64
|
||||
|
||||
Reference in New Issue
Block a user