dns: fix deadlock race
[rsc] --rw-rw-r-- M 779778 glenda sys 28763 Apr 13 14:23 sys/src/cmd/ndb/dn.c
/n/sourcesdump/2006/0413/plan9/sys/src/cmd/ndb/dn.c:448,460 -
/n/sourcesdump/2006/0414/plan9/sys/src/cmd/ndb/dn.c:448,464
* garbage collect. block while garbage collecting.
*/
int
- getactivity(Request *req)
+ getactivity(Request *req, int recursive)
{
int rv;
- if(traceactivity) syslog(0, "dns", "get %d by %d", dnvars.active, getpid());
+ if(traceactivity) syslog(0, "dns", "get %d by %d from %p", dnvars.active, getpid(), getcallerpc(&req));
lock(&dnvars);
- while(dnvars.mutex){
+ /*
+ * can't block here if we're already holding one
+ * of the dnvars.active (recursive). will deadlock.
+ */
+ while(!recursive && dnvars.mutex){
unlock(&dnvars);
sleep(200);
lock(&dnvars);
/n/sourcesdump/2006/0413/plan9/sys/src/cmd/ndb/dn.c:467,473 -
/n/sourcesdump/2006/0414/plan9/sys/src/cmd/ndb/dn.c:471,477
return rv;
}
void
- putactivity(void)
+ putactivity(int recursive)
{
static ulong lastclean;
/n/sourcesdump/2006/0413/plan9/sys/src/cmd/ndb/dn.c:478,485 -
/n/sourcesdump/2006/0414/plan9/sys/src/cmd/ndb/dn.c:482,491
/*
* clean out old entries and check for new db periodicly
+ * can't block here if being called to let go a "recursive" lock
+ * or we'll deadlock waiting for ourselves to give up the dnvars.active.
*/
- if(dnvars.mutex || (needrefresh == 0 && dnvars.active > 0)){
+ if(recursive || dnvars.mutex || (needrefresh == 0 && dnvars.active > 0)){
unlock(&dnvars);
return;
}
/n/sourcesdump/2006/0413/plan9/sys/src/cmd/ndb/dn.c:1234,1254 -
/n/sourcesdump/2006/0414/plan9/sys/src/cmd/ndb/dn.c:1240,1272
slave(Request *req)
{
static int slaveid;
+ int ppid;
if(req->isslave)
return; /* we're already a slave process */
+ /*
+ * These calls to putactivity cannot block.
+ * After getactivity(), the current process is counted
+ * twice in dnvars.active (one will pass to the child).
+ * If putactivity tries to wait for dnvars.active == 0,
+ * it will never happen.
+ */
+
/* limit parallelism */
- if(getactivity(req) > Maxactive){
- putactivity();
+ if(getactivity(req, 1) > Maxactive){
+ if(traceactivity) syslog(0, "dns", "[%d] too much activity", getpid());
+ putactivity(1);
return;
}
+ ppid = getpid();
switch(rfork(RFPROC|RFNOTEG|RFMEM|RFNOWAIT)){
case -1:
- putactivity();
+ putactivity(1);
break;
case 0:
+ if(traceactivity) syslog(0, "dns", "[%d] take activity from %d", ppid, getpid());
req->isslave = 1;
break;
default:
[rsc] --rw-rw-r-- M 779778 rsc sys 3066 Apr 13 14:23 sys/src/cmd/ndb/dnnotify.c
/n/sourcesdump/2006/0413/plan9/sys/src/cmd/ndb/dnnotify.c:150,158 -
/n/sourcesdump/2006/0414/plan9/sys/src/cmd/ndb/dnnotify.c:150,158
req.isslave = 1; /* son't fork off subprocesses */
for(;;){
- getactivity(&req);
+ getactivity(&req, 0);
notify_areas(owned, &req);
- putactivity();
+ putactivity(0);
sleep(60*1000);
}
}
[rsc] --rw-rw-r-- M 779778 glenda sys 15346 Apr 13 14:23 sys/src/cmd/ndb/dnresolve.c
/n/sourcesdump/2006/0413/plan9/sys/src/cmd/ndb/dnresolve.c:94,100 -
/n/sourcesdump/2006/0414/plan9/sys/src/cmd/ndb/dnresolve.c:94,100
char *cp;
if(debug)
- syslog(0, LOG, "dnresolve1 %s %d %d", name, type, class);
+ syslog(0, LOG, "[%d] dnresolve1 %s %d %d", getpid(), name, type, class);
/* only class Cin implemented so far */
if(class != Cin)
[rsc] --rw-rw-r-- M 779778 glenda sys 15308 Apr 13 14:23 sys/src/cmd/ndb/dns.c
/n/sourcesdump/2006/0413/plan9/sys/src/cmd/ndb/dns.c:376,382 -
/n/sourcesdump/2006/0414/plan9/sys/src/cmd/ndb/dns.c:376,382
* through 'mret'.
*/
if(setjmp(req.mret))
- putactivity();
+ putactivity(0);
req.isslave = 0;
for(;;){
n = read9pmsg(mfd[0], mdata, sizeof mdata);
/n/sourcesdump/2006/0413/plan9/sys/src/cmd/ndb/dns.c:393,399 -
/n/sourcesdump/2006/0414/plan9/sys/src/cmd/ndb/dns.c:393,399
if(debug)
syslog(0, logfile, "%F", &job->request);
- getactivity(&req);
+ getactivity(&req, 0);
req.aborttime = now + 60; /* don't spend more than 60 seconds */
switch(job->request.type){
/n/sourcesdump/2006/0413/plan9/sys/src/cmd/ndb/dns.c:447,457 -
/n/sourcesdump/2006/0414/plan9/sys/src/cmd/ndb/dns.c:447,457
* slave processes die after replying
*/
if(req.isslave){
- putactivity();
+ putactivity(0);
_exits(0);
}
- putactivity();
+ putactivity(0);
}
}
/n/sourcesdump/2006/0413/plan9/sys/src/cmd/ndb/dns.c:871,878 -
/n/sourcesdump/2006/0414/plan9/sys/src/cmd/ndb/dns.c:871,878
{
char buf[12];
- syslog(0, LOG, "%d.%d: sending to %I/%s %s %s",
- id, subid, addr, sname, rname, rrname(type, buf, sizeof buf));
+ syslog(0, LOG, "[%d] %d.%d: sending to %I/%s %s %s",
+ getpid(), id, subid, addr, sname, rname, rrname(type, buf, sizeof buf));
}
RR*
[rsc] --rw-rw-r-- M 779778 glenda sys 8492 Apr 13 14:23 sys/src/cmd/ndb/dnsdebug.c
/n/sourcesdump/2006/0413/plan9/sys/src/cmd/ndb/dnsdebug.c:414,420 -
/n/sourcesdump/2006/0414/plan9/sys/src/cmd/ndb/dnsdebug.c:414,420
return;
}
- getactivity(&req);
+ getactivity(&req, 0);
req.isslave = 1;
req.aborttime = now + 60; /* don't spend more than 60 seconds */
rr = dnresolve(buf, Cin, type, &req, 0, 0, Recurse, rooted, 0);
/n/sourcesdump/2006/0413/plan9/sys/src/cmd/ndb/dnsdebug.c:426,432 -
/n/sourcesdump/2006/0414/plan9/sys/src/cmd/ndb/dnsdebug.c:426,432
}
rrfreelist(rr);
- putactivity();
+ putactivity(0);
}
void
[rsc] --rw-rw-r-- M 779778 glenda sys 7349 Apr 13 14:23 sys/src/cmd/ndb/dnstcp.c
[diffs elided - too long]
[diff -c /n/sourcesdump/2006/0413/plan9/sys/src/cmd/ndb/dnstcp.c /n/sourcesdump/2006/0414/plan9/sys/src/cmd/ndb/dnstcp.c]
[rsc] --rw-rw-r-- M 779778 glenda sys 5337 Apr 13 18:58 sys/src/cmd/ndb/dnudpserver.c
/n/sourcesdump/2006/0413/plan9/sys/src/cmd/ndb/dnudpserver.c:97,107 -
/n/sourcesdump/2006/0414/plan9/sys/src/cmd/ndb/dnudpserver.c:97,107
while((fd = udpannounce(mntpt)) < 0)
sleep(5000);
if(setjmp(req.mret))
- putactivity();
+ putactivity(0);
req.isslave = 0;
/* loop on requests */
- for(;; putactivity()){
+ for(;; putactivity(0)){
memset(&repmsg, 0, sizeof(repmsg));
memset(&reqmsg, 0, sizeof(reqmsg));
alarm(60*1000);
/n/sourcesdump/2006/0413/plan9/sys/src/cmd/ndb/dnudpserver.c:111,117 -
/n/sourcesdump/2006/0414/plan9/sys/src/cmd/ndb/dnudpserver.c:111,117
goto restart;
uh = (OUdphdr*)buf;
len -= OUdphdrsize;
- getactivity(&req);
+ getactivity(&req, 0);
req.aborttime = now + 30; /* don't spend more than 30 seconds */
err = convM2DNS(&buf[OUdphdrsize], len, &reqmsg);
if(err){
/n/sourcesdump/2006/0413/plan9/sys/src/cmd/ndb/dnudpserver.c:174,180 -
/n/sourcesdump/2006/0414/plan9/sys/src/cmd/ndb/dnudpserver.c:174,180
rrfreelist(reqmsg.ar);
if(req.isslave){
- putactivity();
+ putactivity(0);
_exits(0);
}
/n/sourcesdump/2006/0413/plan9/sys/src/cmd/ndb/dnudpserver.c:190,195 -
/n/sourcesdump/2006/0414/plan9/sys/src/cmd/ndb/dnudpserver.c:190,196
static int
udpannounce(char *mntpt)
{
+ static int whined;
int data, ctl;
char dir[64];
char datafile[64+6];
/n/sourcesdump/2006/0413/plan9/sys/src/cmd/ndb/dnudpserver.c:198,204 -
/n/sourcesdump/2006/0414/plan9/sys/src/cmd/ndb/dnudpserver.c:199,206
sprint(datafile, "%s/udp!*!dns", mntpt);
ctl = announce(datafile, dir);
if(ctl < 0){
- warning("can't announce on dns udp port");
+ if(!whined++)
+ warning("can't announce on dns udp port");
return -1;
}
snprint(datafile, sizeof(datafile), "%s/data", dir);
/n/sourcesdump/2006/0413/plan9/sys/src/cmd/ndb/dnudpserver.c:210,216 -
/n/sourcesdump/2006/0414/plan9/sys/src/cmd/ndb/dnudpserver.c:212,219
data = open(datafile, ORDWR);
if(data < 0){
close(ctl);
- warning("can't announce on dns udp port");
+ if(!whined++)
+ warning("can't announce on dns udp port");
return -1;
}
[rsc] --rw-rw-r-- M 779778 glenda sys 10171 Apr 13 18:58 sys/src/cmd/ndb/dns.h
/n/sourcesdump/2006/0413/plan9/sys/src/cmd/ndb/dns.h:370,377 -
/n/sourcesdump/2006/0414/plan9/sys/src/cmd/ndb/dns.h:370,377
extern char* rrname(int, char*, int);
extern int tsame(int, int);
extern void dndump(char*);
- extern int getactivity(Request*);
- extern void putactivity(void);
+ extern int getactivity(Request*, int);
+ extern void putactivity(int);
extern void abort(); /* char*, ... */;
extern void warning(char*, ...);
extern void slave(Request*);
[rsc] --rw-rw-r-- M 779778 glenda sys 5337 Apr 13 18:58 sys/src/cmd/ndb/dnudpserver.c
|