mirror of
https://github.com/qemu/qemu.git
synced 2026-02-04 02:24:51 +00:00
linux-user: Add syscall dispatch support
This commit adds support for the `prctl(PR_SET_SYSCALL_USER_DISPATCH)` function in the Linux userspace emulator. It is implemented as a fully host-independent function, by forcing a SIGSYS early during syscall handling, if the PC is outside the allowed range. Since disabled SUD is indistinguishable from enabled SUD with always-allowed region length == ~0, this encoding is used instead of introducing a new flag. Tested on [uglendix][1], will probably also apply to software like tiny-wine, rpcsx, limbo, lazypoline, vicar, sysfail and endokernel, to name a few. [1]: https://sr.ht/~arusekk/uglendix Signed-off-by: Arusekk <floss@arusekk.pl> Message-ID: <20250711225226.14652-1-floss@arusekk.pl> [rth: Split out is_vdso_sigreturn region matching and other minor tweaks.] Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
This commit is contained in:
committed by
Richard Henderson
parent
2c75137623
commit
c8e5aed246
@@ -228,6 +228,8 @@ void init_task_state(TaskState *ts)
|
||||
ts->start_boottime += bt.tv_nsec * (uint64_t) ticks_per_sec /
|
||||
NANOSECONDS_PER_SECOND;
|
||||
}
|
||||
|
||||
ts->sys_dispatch_len = -1;
|
||||
}
|
||||
|
||||
CPUArchState *cpu_copy(CPUArchState *env)
|
||||
|
||||
@@ -155,6 +155,11 @@ struct TaskState {
|
||||
/* This thread's sigaltstack, if it has one */
|
||||
struct target_sigaltstack sigaltstack_used;
|
||||
|
||||
/* This thread's SYSCALL_USER_DISPATCH state, len=~0 means disabled */
|
||||
vaddr sys_dispatch;
|
||||
vaddr sys_dispatch_selector;
|
||||
abi_ulong sys_dispatch_len;
|
||||
|
||||
/* Start time of task after system boot in clock ticks */
|
||||
uint64_t start_boottime;
|
||||
};
|
||||
|
||||
@@ -28,6 +28,11 @@ extern abi_ulong default_rt_sigreturn;
|
||||
extern abi_ulong vdso_sigreturn_region_start;
|
||||
extern abi_ulong vdso_sigreturn_region_end;
|
||||
|
||||
static inline bool is_vdso_sigreturn(abi_ulong pc)
|
||||
{
|
||||
return pc >= vdso_sigreturn_region_start && pc < vdso_sigreturn_region_end;
|
||||
}
|
||||
|
||||
void setup_sigtramp(abi_ulong tramp_page);
|
||||
|
||||
int on_sig_stack(unsigned long sp);
|
||||
|
||||
@@ -6344,6 +6344,10 @@ abi_long do_arch_prctl(CPUX86State *env, int code, abi_ulong addr)
|
||||
#endif
|
||||
#ifndef PR_SET_SYSCALL_USER_DISPATCH
|
||||
# define PR_SET_SYSCALL_USER_DISPATCH 59
|
||||
# define PR_SYS_DISPATCH_OFF 0
|
||||
# define PR_SYS_DISPATCH_ON 1
|
||||
# define SYSCALL_DISPATCH_FILTER_ALLOW 0
|
||||
# define SYSCALL_DISPATCH_FILTER_BLOCK 1
|
||||
#endif
|
||||
#ifndef PR_SME_SET_VL
|
||||
# define PR_SME_SET_VL 63
|
||||
@@ -6398,6 +6402,36 @@ static abi_long do_prctl_inval1(CPUArchState *env, abi_long arg2)
|
||||
#define do_prctl_sme_set_vl do_prctl_inval1
|
||||
#endif
|
||||
|
||||
static abi_long do_prctl_syscall_user_dispatch(CPUArchState *env,
|
||||
abi_ulong arg2, abi_ulong arg3,
|
||||
abi_ulong arg4, abi_ulong arg5)
|
||||
{
|
||||
CPUState *cpu = env_cpu(env);
|
||||
TaskState *ts = get_task_state(cpu);
|
||||
|
||||
switch (arg2) {
|
||||
case PR_SYS_DISPATCH_OFF:
|
||||
if (arg3 || arg4 || arg5) {
|
||||
return -TARGET_EINVAL;
|
||||
}
|
||||
ts->sys_dispatch_len = -1;
|
||||
return 0;
|
||||
case PR_SYS_DISPATCH_ON:
|
||||
if (arg3 && arg3 + arg4 <= arg3) {
|
||||
return -TARGET_EINVAL;
|
||||
}
|
||||
if (arg5 && !access_ok(cpu, VERIFY_READ, arg5, 1)) {
|
||||
return -TARGET_EFAULT;
|
||||
}
|
||||
ts->sys_dispatch = arg3;
|
||||
ts->sys_dispatch_len = arg4;
|
||||
ts->sys_dispatch_selector = arg5;
|
||||
return 0;
|
||||
default:
|
||||
return -TARGET_EINVAL;
|
||||
}
|
||||
}
|
||||
|
||||
static abi_long do_prctl(CPUArchState *env, abi_long option, abi_long arg2,
|
||||
abi_long arg3, abi_long arg4, abi_long arg5)
|
||||
{
|
||||
@@ -6473,6 +6507,9 @@ static abi_long do_prctl(CPUArchState *env, abi_long option, abi_long arg2,
|
||||
case PR_SET_UNALIGN:
|
||||
return do_prctl_set_unalign(env, arg2);
|
||||
|
||||
case PR_SET_SYSCALL_USER_DISPATCH:
|
||||
return do_prctl_syscall_user_dispatch(env, arg2, arg3, arg4, arg5);
|
||||
|
||||
case PR_CAP_AMBIENT:
|
||||
case PR_CAPBSET_READ:
|
||||
case PR_CAPBSET_DROP:
|
||||
@@ -6527,7 +6564,6 @@ static abi_long do_prctl(CPUArchState *env, abi_long option, abi_long arg2,
|
||||
case PR_SET_MM:
|
||||
case PR_GET_SECCOMP:
|
||||
case PR_SET_SECCOMP:
|
||||
case PR_SET_SYSCALL_USER_DISPATCH:
|
||||
case PR_GET_THP_DISABLE:
|
||||
case PR_SET_THP_DISABLE:
|
||||
case PR_GET_TSC:
|
||||
@@ -13897,12 +13933,46 @@ static abi_long do_syscall1(CPUArchState *cpu_env, int num, abi_long arg1,
|
||||
return ret;
|
||||
}
|
||||
|
||||
static bool sys_dispatch(CPUState *cpu, TaskState *ts)
|
||||
{
|
||||
abi_ptr pc;
|
||||
|
||||
if (likely(ts->sys_dispatch_len == -1)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
pc = cpu->cc->get_pc(cpu);
|
||||
if (likely(pc - ts->sys_dispatch < ts->sys_dispatch_len)) {
|
||||
return false;
|
||||
}
|
||||
if (unlikely(is_vdso_sigreturn(pc))) {
|
||||
return false;
|
||||
}
|
||||
if (likely(ts->sys_dispatch_selector)) {
|
||||
uint8_t sb;
|
||||
if (get_user_u8(sb, ts->sys_dispatch_selector)) {
|
||||
force_sig(TARGET_SIGSEGV);
|
||||
return true;
|
||||
}
|
||||
if (likely(sb == SYSCALL_DISPATCH_FILTER_ALLOW)) {
|
||||
return false;
|
||||
}
|
||||
if (unlikely(sb != SYSCALL_DISPATCH_FILTER_BLOCK)) {
|
||||
force_sig(TARGET_SIGSYS);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
force_sig_fault(TARGET_SIGSYS, TARGET_SYS_USER_DISPATCH, pc);
|
||||
return true;
|
||||
}
|
||||
|
||||
abi_long do_syscall(CPUArchState *cpu_env, int num, abi_long arg1,
|
||||
abi_long arg2, abi_long arg3, abi_long arg4,
|
||||
abi_long arg5, abi_long arg6, abi_long arg7,
|
||||
abi_long arg8)
|
||||
{
|
||||
CPUState *cpu = env_cpu(cpu_env);
|
||||
TaskState *ts = get_task_state(cpu);
|
||||
abi_long ret;
|
||||
|
||||
#ifdef DEBUG_ERESTARTSYS
|
||||
@@ -13919,6 +13989,10 @@ abi_long do_syscall(CPUArchState *cpu_env, int num, abi_long arg1,
|
||||
}
|
||||
#endif
|
||||
|
||||
if (sys_dispatch(cpu, ts)) {
|
||||
return -QEMU_ESIGRETURN;
|
||||
}
|
||||
|
||||
record_syscall_start(cpu, num, arg1,
|
||||
arg2, arg3, arg4, arg5, arg6, arg7, arg8);
|
||||
|
||||
|
||||
@@ -689,6 +689,12 @@ typedef struct target_siginfo {
|
||||
#define TARGET_TRAP_HWBKPT (4) /* hardware breakpoint/watchpoint */
|
||||
#define TARGET_TRAP_UNK (5) /* undiagnosed trap */
|
||||
|
||||
/*
|
||||
* SIGSYS si_codes
|
||||
*/
|
||||
#define TARGET_SYS_SECCOMP (1) /* seccomp triggered */
|
||||
#define TARGET_SYS_USER_DISPATCH (2) /* syscall user dispatch triggered */
|
||||
|
||||
/*
|
||||
* SIGEMT si_codes
|
||||
*/
|
||||
|
||||
Reference in New Issue
Block a user