// Copyright 2009 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
#include "runtime.h"
#include "defs.h"
#include "signals.h"
#include "os.h"
// Linux futex.
//
// futexsleep(uint32 *addr, uint32 val)
// futexwakeup(uint32 *addr)
//
// Futexsleep atomically checks if *addr == val and if so, sleeps on addr.
// Futexwakeup wakes up one thread sleeping on addr.
// Futexsleep is allowed to wake up spuriously.
enum
{
FUTEX_WAIT = 0,
FUTEX_WAKE = 1,
EINTR = 4,
EAGAIN = 11,
};
// TODO(rsc): I tried using 1<<40 here but futex woke up (-ETIMEDOUT).
// I wonder if the timespec that gets to the kernel
// actually has two 32-bit numbers in it, so that
// a 64-bit 1<<40 ends up being 0 seconds,
// 1<<8 nanoseconds.
static Timespec longtime =
{
1<<30, // 34 years
0
};
// Atomically,
// if(*addr == val) sleep
// Might be woken up spuriously; that's allowed.
static void
futexsleep(uint32 *addr, uint32 val)
{
int32 ret;
ret = futex(addr, FUTEX_WAIT, val, &longtime, nil, 0);
if(ret >= 0 || ret == -EAGAIN || ret == -EINTR)
return;
prints("futexsleep addr=");
runtime·printpointer(addr);
prints(" val=");
runtime·printint(val);
prints(" returned ");
runtime·printint(ret);
prints("\n");
*(int32*)0x1005 = 0x1005;
}
// If any procs are sleeping on addr, wake up at least one.
static void
futexwakeup(uint32 *addr)
{
int64 ret;
ret = futex(addr, FUTEX_WAKE, 1, nil, nil, 0);
if(ret >= 0)
return;
// I don't know that futex wakeup can return
// EAGAIN or EINTR, but if it does, it would be
// safe to loop and call futex again.
prints("futexwakeup addr=");
runtime·printpointer(addr);
prints(" returned ");
runtime·printint(ret);
prints("\n");
*(int32*)0x1006 = 0x1006;
}
// Lock and unlock.
//
// The lock state is a single 32-bit word that holds
// a 31-bit count of threads waiting for the lock
// and a single bit (the low bit) saying whether the lock is held.
// The uncontended case runs entirely in user space.
// When contention is detected, we defer to the kernel (futex).
//
// A reminder: compare-and-swap cas(addr, old, new) does
// if(*addr == old) { *addr = new; return 1; }
// else return 0;
// but atomically.
static void
futexlock(Lock *l)
{
uint32 v;
again:
v = l->key;
if((v&1) == 0){
if(cas(&l->key, v, v|1)){
// Lock wasn't held; we grabbed it.
return;
}
goto again;
}
// Lock was held; try to add ourselves to the waiter count.
if(!cas(&l->key, v, v+2))
goto again;
// We're accounted for, now sleep in the kernel.
//
// We avoid the obvious lock/unlock race because
// the kernel won't put us to sleep if l->key has
// changed underfoot and is no longer v+2.
//
// We only really care that (v&1) == 1 (the lock is held),
// and in fact there is a futex variant that could
// accomodate that check, but let's not get carried away.)
futexsleep(&l->key, v+2);
// We're awake: remove ourselves from the count.
for(;;){
v = l->key;
if(v < 2)
throw("bad lock key");
if(cas(&l->key, v, v-2))
break;
}
// Try for the lock again.
goto again;
}
static void
futexunlock(Lock *l)
{
uint32 v;
// Atomically get value and clear lock bit.
again:
v = l->key;
if((v&1) == 0)
throw("unlock of unlocked lock");
if(!cas(&l->key, v, v&~1))
goto again;
// If there were waiters, wake one.
if(v & ~1)
futexwakeup(&l->key);
}
void
lock(Lock *l)
{
if(m->locks < 0)
throw("lock count");
m->locks++;
futexlock(l);
}
void
unlock(Lock *l)
{
m->locks--;
if(m->locks < 0)
throw("lock count");
futexunlock(l);
}
// One-time notifications.
//
// Since the lock/unlock implementation already
// takes care of sleeping in the kernel, we just reuse it.
// (But it's a weird use, so it gets its own interface.)
//
// We use a lock to represent the event:
// unlocked == event has happened.
// Thus the lock starts out locked, and to wait for the
// event you try to lock the lock. To signal the event,
// you unlock the lock.
void
noteclear(Note *n)
{
n->lock.key = 0; // memset(n, 0, sizeof *n)
futexlock(&n->lock);
}
void
notewakeup(Note *n)
{
futexunlock(&n->lock);
}
void
notesleep(Note *n)
{
futexlock(&n->lock);
futexunlock(&n->lock); // Let other sleepers find out too.
}
// Clone, the Linux rfork.
enum
{
CLONE_VM = 0x100,
CLONE_FS = 0x200,
CLONE_FILES = 0x400,
CLONE_SIGHAND = 0x800,
CLONE_PTRACE = 0x2000,
CLONE_VFORK = 0x4000,
CLONE_PARENT = 0x8000,
CLONE_THREAD = 0x10000,
CLONE_NEWNS = 0x20000,
CLONE_SYSVSEM = 0x40000,
CLONE_SETTLS = 0x80000,
CLONE_PARENT_SETTID = 0x100000,
CLONE_CHILD_CLEARTID = 0x200000,
CLONE_UNTRACED = 0x800000,
CLONE_CHILD_SETTID = 0x1000000,
CLONE_STOPPED = 0x2000000,
CLONE_NEWUTS = 0x4000000,
CLONE_NEWIPC = 0x8000000,
};
void
newosproc(M *m, G *g, void *stk, void (*fn)(void))
{
int32 ret;
int32 flags;
/*
* note: strace gets confused if we use CLONE_PTRACE here.
*/
flags = CLONE_PARENT /* getppid doesn't change in child */
| CLONE_VM /* share memory */
| CLONE_FS /* share cwd, etc */
| CLONE_FILES /* share fd table */
| CLONE_SIGHAND /* share sig handler table */
| CLONE_THREAD /* revisit - okay for now */
;
m->tls[0] = m->id; // so 386 asm can find it
if(0){
printf("newosproc stk=%p m=%p g=%p fn=%p clone=%p id=%d/%d ostk=%p\n",
stk, m, g, fn, clone, m->id, m->tls[0], &m);
}
ret = clone(flags, stk, m, g, fn);
if(ret < 0)
*(int32*)123 = 123;
}
void
osinit(void)
{
}
// Called to initialize a new m (including the bootstrap m).
void
minit(void)
{
// Initialize signal handling.
m->gsignal = malg(32*1024); // OS X wants >=8K, Linux >=2K
signalstack(m->gsignal->stackguard, 32*1024);
}
|