1
0
mirror of https://github.com/openbsd/src.git synced 2025-01-10 06:47:55 -08:00
openbsd-src/sys/kern/sys_futex.c
guenther 703735be1a New flag PR_RWLOCK for pool_init(9) makes the pool use rwlocks instead
of mutexes.  Use this immediately for the pool_cache futex pools.

Mostly worked out with dlg@ during e2k17
ok mpi@ tedu@
2017-08-13 20:26:33 +00:00

294 lines
6.8 KiB
C

/* $OpenBSD: sys_futex.c,v 1.4 2017/08/13 20:26:33 guenther Exp $ */
/*
* Copyright (c) 2016-2017 Martin Pieuchot
*
* Permission to use, copy, modify, and distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
* copyright notice and this permission notice appear in all copies.
*
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/proc.h>
#include <sys/kernel.h>
#include <sys/mount.h>
#include <sys/syscallargs.h>
#include <sys/pool.h>
#include <sys/time.h>
#include <sys/rwlock.h>
#include <sys/futex.h>
#ifdef KTRACE
#include <sys/ktrace.h>
#endif
/*
* Atomicity is only needed on MULTIPROCESSOR kernels. Fall back on
* copyin(9) until non-MULTIPROCESSOR architectures have a copyin32(9)
* implementation.
*/
#ifndef MULTIPROCESSOR
#define copyin32(uaddr, kaddr) copyin((uaddr), (kaddr), sizeof(uint32_t))
#endif
/*
* Kernel representation of a futex.
*/
struct futex {
LIST_ENTRY(futex) ft_list; /* list of all futexes */
TAILQ_HEAD(, proc) ft_threads; /* sleeping queue */
uint32_t *ft_uaddr; /* userspace address */
pid_t ft_pid; /* process identifier */
unsigned int ft_refcnt; /* # of references */
};
/* Syscall helpers. */
int futex_wait(uint32_t *, uint32_t, const struct timespec *);
int futex_wake(uint32_t *, uint32_t);
int futex_requeue(uint32_t *, uint32_t, uint32_t *, uint32_t);
/* Flags for futex_get(). */
#define FT_CREATE 0x1 /* Create a futex if it doesn't exist. */
struct futex *futex_get(uint32_t *, int);
void futex_put(struct futex *);
/*
* The global futex lock serialize futex(2) calls such that no wakeup
* event are lost, protect the global list of all futexes and their
* states.
*/
struct rwlock ftlock = RWLOCK_INITIALIZER("futex");
static LIST_HEAD(, futex) ftlist;
struct pool ftpool;
void
futex_init(void)
{
pool_init(&ftpool, sizeof(struct futex), 0, IPL_NONE,
PR_WAITOK | PR_RWLOCK, "futexpl", NULL);
}
int
sys_futex(struct proc *p, void *v, register_t *retval)
{
struct sys_futex_args /* {
syscallarg(uint32_t *) f;
syscallarg(int) op;
syscallarg(inr) val;
syscallarg(const struct timespec *) timeout;
syscallarg(uint32_t *) g;
} */ *uap = v;
uint32_t *uaddr = SCARG(uap, f);
int op = SCARG(uap, op);
uint32_t val = SCARG(uap, val);
const struct timespec *timeout = SCARG(uap, timeout);
void *g = SCARG(uap, g);
switch (op) {
case FUTEX_WAIT:
KERNEL_LOCK();
rw_enter_write(&ftlock);
*retval = futex_wait(uaddr, val, timeout);
rw_exit_write(&ftlock);
KERNEL_UNLOCK();
break;
case FUTEX_WAKE:
rw_enter_write(&ftlock);
*retval = futex_wake(uaddr, val);
rw_exit_write(&ftlock);
break;
case FUTEX_REQUEUE:
rw_enter_write(&ftlock);
*retval = futex_requeue(uaddr, val, g, (unsigned long)timeout);
rw_exit_write(&ftlock);
break;
default:
*retval = ENOSYS;
break;
}
return 0;
}
/*
* Return an existing futex matching userspace address ``uaddr''.
*
* If such futex does not exist and FT_CREATE is given, create it.
*/
struct futex *
futex_get(uint32_t *uaddr, int flag)
{
struct futex *f;
rw_assert_wrlock(&ftlock);
LIST_FOREACH(f, &ftlist, ft_list) {
if (f->ft_uaddr == uaddr && f->ft_pid == curproc->p_p->ps_pid) {
f->ft_refcnt++;
break;
}
}
if ((f == NULL) && (flag & FT_CREATE)) {
/*
* We rely on the rwlock to ensure that no other thread
* create the same futex.
*/
f = pool_get(&ftpool, PR_WAITOK);
TAILQ_INIT(&f->ft_threads);
f->ft_uaddr = uaddr;
f->ft_pid = curproc->p_p->ps_pid;
f->ft_refcnt = 1;
LIST_INSERT_HEAD(&ftlist, f, ft_list);
}
return f;
}
/*
* Release a given futex.
*/
void
futex_put(struct futex *f)
{
rw_assert_wrlock(&ftlock);
KASSERT(f->ft_refcnt > 0);
--f->ft_refcnt;
if (f->ft_refcnt == 0) {
KASSERT(TAILQ_EMPTY(&f->ft_threads));
LIST_REMOVE(f, ft_list);
pool_put(&ftpool, f);
}
}
/*
* Put the current thread on the sleep queue of the futex at address
* ``uaddr''. Let it sleep for the specified ``timeout'' time, or
* indefinitly if the argument is NULL.
*/
int
futex_wait(uint32_t *uaddr, uint32_t val, const struct timespec *timeout)
{
struct proc *p = curproc;
struct futex *f;
uint64_t to_ticks = 0;
uint32_t cval;
int error;
/*
* After reading the value a race is still possible but
* we deal with it by serializing all futex syscalls.
*/
rw_assert_wrlock(&ftlock);
/*
* Read user space futex value
*/
if ((error = copyin32(uaddr, &cval)))
return error;
/* If the value changed, stop here. */
if (cval != val)
return EAGAIN;
if (timeout != NULL) {
struct timespec ts;
if ((error = copyin(timeout, &ts, sizeof(ts))))
return error;
#ifdef KTRACE
if (KTRPOINT(p, KTR_STRUCT))
ktrabstimespec(p, timeout);
#endif
to_ticks = (uint64_t)hz * ts.tv_sec +
(ts.tv_nsec + tick * 1000 - 1) / (tick * 1000) + 1;
if (to_ticks > INT_MAX)
to_ticks = INT_MAX;
}
f = futex_get(uaddr, FT_CREATE);
TAILQ_INSERT_TAIL(&f->ft_threads, p, p_fut_link);
p->p_futex = f;
error = rwsleep(p, &ftlock, PUSER|PCATCH, "fsleep", (int)to_ticks);
if (error == ERESTART)
error = EINTR;
else if (error == EWOULDBLOCK) {
/* A race occured between a wakeup and a timeout. */
if (p->p_futex == NULL)
error = 0;
else
error = ETIMEDOUT;
}
/* Remove ourself if we haven't been awaken. */
if ((f = p->p_futex) != NULL) {
p->p_futex = NULL;
TAILQ_REMOVE(&f->ft_threads, p, p_fut_link);
futex_put(f);
}
return error;
}
/*
* Wakeup at most ``n'' sibling threads sleeping on a futex at address
* ``uaddr'' and requeue at most ``m'' sibling threads on a futex at
* address ``uaddr2''.
*/
int
futex_requeue(uint32_t *uaddr, uint32_t n, uint32_t *uaddr2, uint32_t m)
{
struct futex *f, *g;
struct proc *p;
uint32_t count = 0;
rw_assert_wrlock(&ftlock);
f = futex_get(uaddr, 0);
if (f == NULL)
return 0;
while ((p = TAILQ_FIRST(&f->ft_threads)) != NULL && (count < (n + m))) {
p->p_futex = NULL;
TAILQ_REMOVE(&f->ft_threads, p, p_fut_link);
futex_put(f);
if (count < n) {
wakeup_one(p);
} else if (uaddr2 != NULL) {
g = futex_get(uaddr2, FT_CREATE);
TAILQ_INSERT_TAIL(&g->ft_threads, p, p_fut_link);
p->p_futex = g;
}
count++;
}
futex_put(f);
return count;
}
/*
* Wakeup at most ``n'' sibling threads sleeping on a futex at address
* ``uaddr''.
*/
int
futex_wake(uint32_t *uaddr, uint32_t n)
{
return futex_requeue(uaddr, n, NULL, 0);
}