mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2026-02-28 19:06:51 +01:00
This was done entirely with mindless brute force, using
git grep -l '\<k[vmz]*alloc_objs*(.*, GFP_KERNEL)' |
xargs sed -i 's/\(alloc_objs*(.*\), GFP_KERNEL)/\1)/'
to convert the new alloc_obj() users that had a simple GFP_KERNEL
argument to just drop that argument.
Note that due to the extreme simplicity of the scripting, any slightly
more complex cases spread over multiple lines would not be triggered:
they definitely exist, but this covers the vast bulk of the cases, and
the resulting diff is also then easier to check automatically.
For the same reason the 'flex' versions will be done as a separate
conversion.
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
519 lines
13 KiB
C
519 lines
13 KiB
C
// SPDX-License-Identifier: GPL-2.0-or-later
|
|
|
|
#include <linux/syscalls.h>
|
|
#include <linux/time_namespace.h>
|
|
|
|
#include "futex.h"
|
|
|
|
/*
|
|
* Support for robust futexes: the kernel cleans up held futexes at
|
|
* thread exit time.
|
|
*
|
|
* Implementation: user-space maintains a per-thread list of locks it
|
|
* is holding. Upon do_exit(), the kernel carefully walks this list,
|
|
* and marks all locks that are owned by this thread with the
|
|
* FUTEX_OWNER_DIED bit, and wakes up a waiter (if any). The list is
|
|
* always manipulated with the lock held, so the list is private and
|
|
* per-thread. Userspace also maintains a per-thread 'list_op_pending'
|
|
* field, to allow the kernel to clean up if the thread dies after
|
|
* acquiring the lock, but just before it could have added itself to
|
|
* the list. There can only be one such pending lock.
|
|
*/
|
|
|
|
/**
|
|
* sys_set_robust_list() - Set the robust-futex list head of a task
|
|
* @head: pointer to the list-head
|
|
* @len: length of the list-head, as userspace expects
|
|
*/
|
|
SYSCALL_DEFINE2(set_robust_list, struct robust_list_head __user *, head,
|
|
size_t, len)
|
|
{
|
|
/*
|
|
* The kernel knows only one size for now:
|
|
*/
|
|
if (unlikely(len != sizeof(*head)))
|
|
return -EINVAL;
|
|
|
|
current->robust_list = head;
|
|
|
|
return 0;
|
|
}
|
|
|
|
static inline void __user *futex_task_robust_list(struct task_struct *p, bool compat)
|
|
{
|
|
#ifdef CONFIG_COMPAT
|
|
if (compat)
|
|
return p->compat_robust_list;
|
|
#endif
|
|
return p->robust_list;
|
|
}
|
|
|
|
static void __user *futex_get_robust_list_common(int pid, bool compat)
|
|
{
|
|
struct task_struct *p = current;
|
|
void __user *head;
|
|
int ret;
|
|
|
|
scoped_guard(rcu) {
|
|
if (pid) {
|
|
p = find_task_by_vpid(pid);
|
|
if (!p)
|
|
return (void __user *)ERR_PTR(-ESRCH);
|
|
}
|
|
get_task_struct(p);
|
|
}
|
|
|
|
/*
|
|
* Hold exec_update_lock to serialize with concurrent exec()
|
|
* so ptrace_may_access() is checked against stable credentials
|
|
*/
|
|
ret = down_read_killable(&p->signal->exec_update_lock);
|
|
if (ret)
|
|
goto err_put;
|
|
|
|
ret = -EPERM;
|
|
if (!ptrace_may_access(p, PTRACE_MODE_READ_REALCREDS))
|
|
goto err_unlock;
|
|
|
|
head = futex_task_robust_list(p, compat);
|
|
|
|
up_read(&p->signal->exec_update_lock);
|
|
put_task_struct(p);
|
|
|
|
return head;
|
|
|
|
err_unlock:
|
|
up_read(&p->signal->exec_update_lock);
|
|
err_put:
|
|
put_task_struct(p);
|
|
return (void __user *)ERR_PTR(ret);
|
|
}
|
|
|
|
/**
|
|
* sys_get_robust_list() - Get the robust-futex list head of a task
|
|
* @pid: pid of the process [zero for current task]
|
|
* @head_ptr: pointer to a list-head pointer, the kernel fills it in
|
|
* @len_ptr: pointer to a length field, the kernel fills in the header size
|
|
*/
|
|
SYSCALL_DEFINE3(get_robust_list, int, pid,
|
|
struct robust_list_head __user * __user *, head_ptr,
|
|
size_t __user *, len_ptr)
|
|
{
|
|
struct robust_list_head __user *head = futex_get_robust_list_common(pid, false);
|
|
|
|
if (IS_ERR(head))
|
|
return PTR_ERR(head);
|
|
|
|
if (put_user(sizeof(*head), len_ptr))
|
|
return -EFAULT;
|
|
return put_user(head, head_ptr);
|
|
}
|
|
|
|
long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout,
|
|
u32 __user *uaddr2, u32 val2, u32 val3)
|
|
{
|
|
unsigned int flags = futex_to_flags(op);
|
|
int cmd = op & FUTEX_CMD_MASK;
|
|
|
|
if (flags & FLAGS_CLOCKRT) {
|
|
if (cmd != FUTEX_WAIT_BITSET &&
|
|
cmd != FUTEX_WAIT_REQUEUE_PI &&
|
|
cmd != FUTEX_LOCK_PI2)
|
|
return -ENOSYS;
|
|
}
|
|
|
|
switch (cmd) {
|
|
case FUTEX_WAIT:
|
|
val3 = FUTEX_BITSET_MATCH_ANY;
|
|
fallthrough;
|
|
case FUTEX_WAIT_BITSET:
|
|
return futex_wait(uaddr, flags, val, timeout, val3);
|
|
case FUTEX_WAKE:
|
|
val3 = FUTEX_BITSET_MATCH_ANY;
|
|
fallthrough;
|
|
case FUTEX_WAKE_BITSET:
|
|
return futex_wake(uaddr, flags, val, val3);
|
|
case FUTEX_REQUEUE:
|
|
return futex_requeue(uaddr, flags, uaddr2, flags, val, val2, NULL, 0);
|
|
case FUTEX_CMP_REQUEUE:
|
|
return futex_requeue(uaddr, flags, uaddr2, flags, val, val2, &val3, 0);
|
|
case FUTEX_WAKE_OP:
|
|
return futex_wake_op(uaddr, flags, uaddr2, val, val2, val3);
|
|
case FUTEX_LOCK_PI:
|
|
flags |= FLAGS_CLOCKRT;
|
|
fallthrough;
|
|
case FUTEX_LOCK_PI2:
|
|
return futex_lock_pi(uaddr, flags, timeout, 0);
|
|
case FUTEX_UNLOCK_PI:
|
|
return futex_unlock_pi(uaddr, flags);
|
|
case FUTEX_TRYLOCK_PI:
|
|
return futex_lock_pi(uaddr, flags, NULL, 1);
|
|
case FUTEX_WAIT_REQUEUE_PI:
|
|
val3 = FUTEX_BITSET_MATCH_ANY;
|
|
return futex_wait_requeue_pi(uaddr, flags, val, timeout, val3,
|
|
uaddr2);
|
|
case FUTEX_CMP_REQUEUE_PI:
|
|
return futex_requeue(uaddr, flags, uaddr2, flags, val, val2, &val3, 1);
|
|
}
|
|
return -ENOSYS;
|
|
}
|
|
|
|
static __always_inline bool futex_cmd_has_timeout(u32 cmd)
|
|
{
|
|
switch (cmd) {
|
|
case FUTEX_WAIT:
|
|
case FUTEX_LOCK_PI:
|
|
case FUTEX_LOCK_PI2:
|
|
case FUTEX_WAIT_BITSET:
|
|
case FUTEX_WAIT_REQUEUE_PI:
|
|
return true;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
static __always_inline int
|
|
futex_init_timeout(u32 cmd, u32 op, struct timespec64 *ts, ktime_t *t)
|
|
{
|
|
if (!timespec64_valid(ts))
|
|
return -EINVAL;
|
|
|
|
*t = timespec64_to_ktime(*ts);
|
|
if (cmd == FUTEX_WAIT)
|
|
*t = ktime_add_safe(ktime_get(), *t);
|
|
else if (cmd != FUTEX_LOCK_PI && !(op & FUTEX_CLOCK_REALTIME))
|
|
*t = timens_ktime_to_host(CLOCK_MONOTONIC, *t);
|
|
return 0;
|
|
}
|
|
|
|
SYSCALL_DEFINE6(futex, u32 __user *, uaddr, int, op, u32, val,
|
|
const struct __kernel_timespec __user *, utime,
|
|
u32 __user *, uaddr2, u32, val3)
|
|
{
|
|
int ret, cmd = op & FUTEX_CMD_MASK;
|
|
ktime_t t, *tp = NULL;
|
|
struct timespec64 ts;
|
|
|
|
if (utime && futex_cmd_has_timeout(cmd)) {
|
|
if (unlikely(should_fail_futex(!(op & FUTEX_PRIVATE_FLAG))))
|
|
return -EFAULT;
|
|
if (get_timespec64(&ts, utime))
|
|
return -EFAULT;
|
|
ret = futex_init_timeout(cmd, op, &ts, &t);
|
|
if (ret)
|
|
return ret;
|
|
tp = &t;
|
|
}
|
|
|
|
return do_futex(uaddr, op, val, tp, uaddr2, (unsigned long)utime, val3);
|
|
}
|
|
|
|
/**
|
|
* futex_parse_waitv - Parse a waitv array from userspace
|
|
* @futexv: Kernel side list of waiters to be filled
|
|
* @uwaitv: Userspace list to be parsed
|
|
* @nr_futexes: Length of futexv
|
|
* @wake: Wake to call when futex is woken
|
|
* @wake_data: Data for the wake handler
|
|
*
|
|
* Return: Error code on failure, 0 on success
|
|
*/
|
|
int futex_parse_waitv(struct futex_vector *futexv,
|
|
struct futex_waitv __user *uwaitv,
|
|
unsigned int nr_futexes, futex_wake_fn *wake,
|
|
void *wake_data)
|
|
{
|
|
struct futex_waitv aux;
|
|
unsigned int i;
|
|
|
|
for (i = 0; i < nr_futexes; i++) {
|
|
unsigned int flags;
|
|
|
|
if (copy_from_user(&aux, &uwaitv[i], sizeof(aux)))
|
|
return -EFAULT;
|
|
|
|
if ((aux.flags & ~FUTEX2_VALID_MASK) || aux.__reserved)
|
|
return -EINVAL;
|
|
|
|
flags = futex2_to_flags(aux.flags);
|
|
if (!futex_flags_valid(flags))
|
|
return -EINVAL;
|
|
|
|
if (!futex_validate_input(flags, aux.val))
|
|
return -EINVAL;
|
|
|
|
futexv[i].w.flags = flags;
|
|
futexv[i].w.val = aux.val;
|
|
futexv[i].w.uaddr = aux.uaddr;
|
|
futexv[i].q = futex_q_init;
|
|
futexv[i].q.wake = wake;
|
|
futexv[i].q.wake_data = wake_data;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int futex2_setup_timeout(struct __kernel_timespec __user *timeout,
|
|
clockid_t clockid, struct hrtimer_sleeper *to)
|
|
{
|
|
int flag_clkid = 0, flag_init = 0;
|
|
struct timespec64 ts;
|
|
ktime_t time;
|
|
int ret;
|
|
|
|
if (!timeout)
|
|
return 0;
|
|
|
|
if (clockid == CLOCK_REALTIME) {
|
|
flag_clkid = FLAGS_CLOCKRT;
|
|
flag_init = FUTEX_CLOCK_REALTIME;
|
|
}
|
|
|
|
if (clockid != CLOCK_REALTIME && clockid != CLOCK_MONOTONIC)
|
|
return -EINVAL;
|
|
|
|
if (get_timespec64(&ts, timeout))
|
|
return -EFAULT;
|
|
|
|
/*
|
|
* Since there's no opcode for futex_waitv, use
|
|
* FUTEX_WAIT_BITSET that uses absolute timeout as well
|
|
*/
|
|
ret = futex_init_timeout(FUTEX_WAIT_BITSET, flag_init, &ts, &time);
|
|
if (ret)
|
|
return ret;
|
|
|
|
futex_setup_timer(&time, to, flag_clkid, 0);
|
|
return 0;
|
|
}
|
|
|
|
static inline void futex2_destroy_timeout(struct hrtimer_sleeper *to)
|
|
{
|
|
hrtimer_cancel(&to->timer);
|
|
destroy_hrtimer_on_stack(&to->timer);
|
|
}
|
|
|
|
/**
|
|
* sys_futex_waitv - Wait on a list of futexes
|
|
* @waiters: List of futexes to wait on
|
|
* @nr_futexes: Length of futexv
|
|
* @flags: Flag for timeout (monotonic/realtime)
|
|
* @timeout: Optional absolute timeout.
|
|
* @clockid: Clock to be used for the timeout, realtime or monotonic.
|
|
*
|
|
* Given an array of `struct futex_waitv`, wait on each uaddr. The thread wakes
|
|
* if a futex_wake() is performed at any uaddr. The syscall returns immediately
|
|
* if any waiter has *uaddr != val. *timeout is an optional timeout value for
|
|
* the operation. Each waiter has individual flags. The `flags` argument for
|
|
* the syscall should be used solely for specifying the timeout as realtime, if
|
|
* needed. Flags for private futexes, sizes, etc. should be used on the
|
|
* individual flags of each waiter.
|
|
*
|
|
* Returns the array index of one of the woken futexes. No further information
|
|
* is provided: any number of other futexes may also have been woken by the
|
|
* same event, and if more than one futex was woken, the retrned index may
|
|
* refer to any one of them. (It is not necessaryily the futex with the
|
|
* smallest index, nor the one most recently woken, nor...)
|
|
*/
|
|
|
|
SYSCALL_DEFINE5(futex_waitv, struct futex_waitv __user *, waiters,
|
|
unsigned int, nr_futexes, unsigned int, flags,
|
|
struct __kernel_timespec __user *, timeout, clockid_t, clockid)
|
|
{
|
|
struct hrtimer_sleeper to;
|
|
struct futex_vector *futexv;
|
|
int ret;
|
|
|
|
/* This syscall supports no flags for now */
|
|
if (flags)
|
|
return -EINVAL;
|
|
|
|
if (!nr_futexes || nr_futexes > FUTEX_WAITV_MAX || !waiters)
|
|
return -EINVAL;
|
|
|
|
if (timeout && (ret = futex2_setup_timeout(timeout, clockid, &to)))
|
|
return ret;
|
|
|
|
futexv = kzalloc_objs(*futexv, nr_futexes);
|
|
if (!futexv) {
|
|
ret = -ENOMEM;
|
|
goto destroy_timer;
|
|
}
|
|
|
|
ret = futex_parse_waitv(futexv, waiters, nr_futexes, futex_wake_mark,
|
|
NULL);
|
|
if (!ret)
|
|
ret = futex_wait_multiple(futexv, nr_futexes, timeout ? &to : NULL);
|
|
|
|
kfree(futexv);
|
|
|
|
destroy_timer:
|
|
if (timeout)
|
|
futex2_destroy_timeout(&to);
|
|
return ret;
|
|
}
|
|
|
|
/*
|
|
* sys_futex_wake - Wake a number of futexes
|
|
* @uaddr: Address of the futex(es) to wake
|
|
* @mask: bitmask
|
|
* @nr: Number of the futexes to wake
|
|
* @flags: FUTEX2 flags
|
|
*
|
|
* Identical to the traditional FUTEX_WAKE_BITSET op, except it is part of the
|
|
* futex2 family of calls.
|
|
*/
|
|
|
|
SYSCALL_DEFINE4(futex_wake,
|
|
void __user *, uaddr,
|
|
unsigned long, mask,
|
|
int, nr,
|
|
unsigned int, flags)
|
|
{
|
|
if (flags & ~FUTEX2_VALID_MASK)
|
|
return -EINVAL;
|
|
|
|
flags = futex2_to_flags(flags);
|
|
if (!futex_flags_valid(flags))
|
|
return -EINVAL;
|
|
|
|
if (!futex_validate_input(flags, mask))
|
|
return -EINVAL;
|
|
|
|
return futex_wake(uaddr, FLAGS_STRICT | flags, nr, mask);
|
|
}
|
|
|
|
/*
|
|
* sys_futex_wait - Wait on a futex
|
|
* @uaddr: Address of the futex to wait on
|
|
* @val: Value of @uaddr
|
|
* @mask: bitmask
|
|
* @flags: FUTEX2 flags
|
|
* @timeout: Optional absolute timeout
|
|
* @clockid: Clock to be used for the timeout, realtime or monotonic
|
|
*
|
|
* Identical to the traditional FUTEX_WAIT_BITSET op, except it is part of the
|
|
* futex2 familiy of calls.
|
|
*/
|
|
|
|
SYSCALL_DEFINE6(futex_wait,
|
|
void __user *, uaddr,
|
|
unsigned long, val,
|
|
unsigned long, mask,
|
|
unsigned int, flags,
|
|
struct __kernel_timespec __user *, timeout,
|
|
clockid_t, clockid)
|
|
{
|
|
struct hrtimer_sleeper to;
|
|
int ret;
|
|
|
|
if (flags & ~FUTEX2_VALID_MASK)
|
|
return -EINVAL;
|
|
|
|
flags = futex2_to_flags(flags);
|
|
if (!futex_flags_valid(flags))
|
|
return -EINVAL;
|
|
|
|
if (!futex_validate_input(flags, val) ||
|
|
!futex_validate_input(flags, mask))
|
|
return -EINVAL;
|
|
|
|
if (timeout && (ret = futex2_setup_timeout(timeout, clockid, &to)))
|
|
return ret;
|
|
|
|
ret = __futex_wait(uaddr, flags, val, timeout ? &to : NULL, mask);
|
|
|
|
if (timeout)
|
|
futex2_destroy_timeout(&to);
|
|
|
|
return ret;
|
|
}
|
|
|
|
/*
|
|
* sys_futex_requeue - Requeue a waiter from one futex to another
|
|
* @waiters: array describing the source and destination futex
|
|
* @flags: unused
|
|
* @nr_wake: number of futexes to wake
|
|
* @nr_requeue: number of futexes to requeue
|
|
*
|
|
* Identical to the traditional FUTEX_CMP_REQUEUE op, except it is part of the
|
|
* futex2 family of calls.
|
|
*/
|
|
|
|
SYSCALL_DEFINE4(futex_requeue,
|
|
struct futex_waitv __user *, waiters,
|
|
unsigned int, flags,
|
|
int, nr_wake,
|
|
int, nr_requeue)
|
|
{
|
|
struct futex_vector futexes[2];
|
|
u32 cmpval;
|
|
int ret;
|
|
|
|
if (flags)
|
|
return -EINVAL;
|
|
|
|
if (!waiters)
|
|
return -EINVAL;
|
|
|
|
ret = futex_parse_waitv(futexes, waiters, 2, futex_wake_mark, NULL);
|
|
if (ret)
|
|
return ret;
|
|
|
|
cmpval = futexes[0].w.val;
|
|
|
|
return futex_requeue(u64_to_user_ptr(futexes[0].w.uaddr), futexes[0].w.flags,
|
|
u64_to_user_ptr(futexes[1].w.uaddr), futexes[1].w.flags,
|
|
nr_wake, nr_requeue, &cmpval, 0);
|
|
}
|
|
|
|
#ifdef CONFIG_COMPAT
|
|
COMPAT_SYSCALL_DEFINE2(set_robust_list,
|
|
struct compat_robust_list_head __user *, head,
|
|
compat_size_t, len)
|
|
{
|
|
if (unlikely(len != sizeof(*head)))
|
|
return -EINVAL;
|
|
|
|
current->compat_robust_list = head;
|
|
|
|
return 0;
|
|
}
|
|
|
|
COMPAT_SYSCALL_DEFINE3(get_robust_list, int, pid,
|
|
compat_uptr_t __user *, head_ptr,
|
|
compat_size_t __user *, len_ptr)
|
|
{
|
|
struct compat_robust_list_head __user *head = futex_get_robust_list_common(pid, true);
|
|
|
|
if (IS_ERR(head))
|
|
return PTR_ERR(head);
|
|
|
|
if (put_user(sizeof(*head), len_ptr))
|
|
return -EFAULT;
|
|
return put_user(ptr_to_compat(head), head_ptr);
|
|
}
|
|
#endif /* CONFIG_COMPAT */
|
|
|
|
#ifdef CONFIG_COMPAT_32BIT_TIME
|
|
SYSCALL_DEFINE6(futex_time32, u32 __user *, uaddr, int, op, u32, val,
|
|
const struct old_timespec32 __user *, utime, u32 __user *, uaddr2,
|
|
u32, val3)
|
|
{
|
|
int ret, cmd = op & FUTEX_CMD_MASK;
|
|
ktime_t t, *tp = NULL;
|
|
struct timespec64 ts;
|
|
|
|
if (utime && futex_cmd_has_timeout(cmd)) {
|
|
if (get_old_timespec32(&ts, utime))
|
|
return -EFAULT;
|
|
ret = futex_init_timeout(cmd, op, &ts, &t);
|
|
if (ret)
|
|
return ret;
|
|
tp = &t;
|
|
}
|
|
|
|
return do_futex(uaddr, op, val, tp, uaddr2, (unsigned long)utime, val3);
|
|
}
|
|
#endif /* CONFIG_COMPAT_32BIT_TIME */
|
|
|