Plan 9 from Bell Labs’s /usr/web/sources/contrib/quanstro/root/sys/src/boot/pc-e820/ether82598.c

Copyright © 2021 Plan 9 Foundation.
Distributed under the MIT License.
Download the Plan 9 distribution.


/*
 * intel 10gbe pcie boot driver
 * copyright © 2010, coraid, inc.
 */
#include "u.h"
#include "lib.h"
#include "mem.h"
#include "dat.h"
#include "fns.h"
#include "io.h"
#include "etherif.h"

enum{
	/* general */
	Ctrl		= 0x00000/4,	/* Device Control */
	Status		= 0x00008/4,	/* Device Status */
	Ctrlext		= 0x00018/4,	/* Extended Device Control */
	Esdp		= 0x00020/4,	/* extended sdp control */
	Esodp		= 0x00028/4,	/* extended od sdp control */
	Ledctl		= 0x00200/4,	/* led control */
	Tcptimer		= 0x0004c/4,	/* tcp timer */
	Ecc		= 0x110b0/4,	/* errata ecc control magic */

	/* nvm */
	Eec		= 0x10010/4,	/* eeprom/flash control */
	Eerd		= 0x10014/4,	/* eeprom read */
	Fla		= 0x1001c/4,	/* flash access */
	Flop		= 0x1013c/4,	/* flash opcode */
	Grc		= 0x10200/4,	/* general rx control */

	/* interrupt */
	Icr		= 0x00800/4,	/* interrupt cause read */
	Ics		= 0x00808/4,	/* " set */
	Ims		= 0x00880/4,	/* " mask read/set */
	Imc		= 0x00888/4,	/* " mask clear */
	Iac		= 0x00810/4,	/* " ayto clear */
	Iam		= 0x00890/4,	/* " auto mask enable */
	Itr		= 0x00820/4,	/* " throttling rate (0-19) */
	Ivar		= 0x00900/4,	/* " vector allocation regs. */
	/*msi interrupt */
	Msixt		= 0x0000/4,	/* msix table (bar3) */
	Msipba		= 0x2000/4,	/* msix pending bit array (bar3) */
	Pbacl		= 0x11068/4,	/* pba clear */
	Gpie		= 0x00898/4,	/* general purpose int enable */

	/* flow control */
	Pfctop		= 0x03008/4,	/* priority flow ctl type opcode */
	Fcttv		= 0x03200/4,	/* " transmit timer value (0-3) */
	Fcrtl		= 0x03220/4,	/* " rx threshold low (0-7) +8n */
	Fcrth		= 0x03260/4,	/* " rx threshold high (0-7) +8n */
	Rcrtv		= 0x032a0/4,	/* " refresh value threshold */
	Tfcs		= 0x0ce00/4,	/* " tx status */

	/* rx dma */
	Rbal		= 0x01000/4,	/* rx desc base low (0-63) +0x40n */
	Rbah		= 0x01004/4,	/* " high */
	Rdlen		= 0x01008/4,	/* " length */
	Rdh		= 0x01010/4,	/* " head */
	Rdt		= 0x01018/4,	/* " tail */
	Rxdctl		= 0x01028/4,	/* " control */

	Srrctl		= 0x02100/4,	/* split and replication rx ctl. */
	Dcarxctl		= 0x02200/4,	/* rx dca control */
	Rdrxctl		= 0x02f00/4,	/* rx dma control */
	Rxpbsize		= 0x03c00/4,	/* rx packet buffer size */
	Rxctl		= 0x03000/4,	/* rx control */
	Dropen		= 0x03d04/4,	/* drop enable control */

	/* rx */
	Rxcsum		= 0x05000/4,	/* rx checksum control */
	Rfctl		= 0x04008/4,	/* rx filter control */
	Mta		= 0x05200/4,	/* multicast table array (0-127) */
	Ral		= 0x05400/4,	/* rx address low */
	Rah		= 0x05404/4,
	Psrtype		= 0x05480/4,	/* packet split rx type. */
	Vfta		= 0x0a000/4,	/* vlan filter table array. */
	Fctrl		= 0x05080/4,	/* filter control */
	Vlnctrl		= 0x05088/4,	/* vlan control */
	Msctctrl		= 0x05090/4,	/* multicast control */
	Mrqc		= 0x05818/4,	/* multiple rx queues cmd */
	Vmdctl		= 0x0581c/4,	/* vmdq control */
	Imir		= 0x05a80/4,	/* immediate irq rx (0-7) */
	Imirext		= 0x05aa0/4,	/* immediate irq rx ext */
	Imirvp		= 0x05ac0/4,	/* immediate irq vlan priority */
	Reta		= 0x05c00/4,	/* redirection table */
	Rssrk		= 0x05c80/4,	/* rss random key */

	/* tx */
	Tdbal		= 0x06000/4,	/* tx desc base low +0x40n */
	Tdbah		= 0x06004/4,	/* " high */
	Tdlen		= 0x06008/4,	/* " len */
	Tdh		= 0x06010/4,	/* " head */
	Tdt		= 0x06018/4,	/* " tail */
	Txdctl		= 0x06028/4,	/* " control */
	Tdwbal		= 0x06038/4,	/* " write-back address low */
	Tdwbah		= 0x0603c/4,

	Dtxctl		= 0x04a80/4,	/* tx dma control 82599 only */
	Tdcatxctrl	= 0x07200/4,	/* tx dca register (0-15) */
	Tipg		= 0x0cb00/4,	/* tx inter-packet gap */
	Txpbsize		= 0x0cc00/4,	/* tx packet-buffer size (0-15) */

	/* mac */
	Hlreg0		= 0x04240/4,	/* highlander control reg 0 */
	Hlreg1		= 0x04244/4,	/* highlander control reg 1 (ro) */
	Msca		= 0x0425c/4,	/* mdi signal cmd & addr */
	Msrwd		= 0x04260/4,	/* mdi single rw data */
	Mhadd		= 0x04268/4,	/* mac addr high & max frame */
	Pcss1		= 0x04288/4,	/* xgxs status 1 */
	Pcss2		= 0x0428c/4,
	Xpcss		= 0x04290/4,	/* 10gb-x pcs status */
	Serdesc		= 0x04298/4,	/* serdes control */
	Macs		= 0x0429c/4,	/* fifo control & report */
	Autoc		= 0x042a0/4,	/* autodetect control & status */
	Links		= 0x042a4/4,	/* link status */
	Autoc2		= 0x042a8/4,
};

enum{
	/* Ctrl */
	Rst		= 1<<26,	/* full nic reset */

	/* Txdctl */
	Ten		= 1<<25,

	/* Dtxctl */
	Den		= 1<<0,

	/* Fctrl */
	Bam		= 1<<10,	/* broadcast accept mode */
	Upe 		= 1<<9,	/* unicast promiscuous */
	Mpe 		= 1<<8,	/* multicast promiscuous */

	/* Rxdctl */
	Pthresh		= 0,		/* prefresh threshold shift in bits */
	Hthresh		= 8,		/* host buffer minimum threshold " */
	Wthresh		= 16,		/* writeback threshold */
	Renable		= 1<<25,

	/* Rxctl */
	Rxen		= 1<<0,
	Dmbyps		= 1<<1,

	/* Rdrxctl */
	Rdmt½		= 0,
	Rdmt¼		= 1,
	Rdmt⅛		= 2,

	/* Rxcsum */
	Ippcse		= 1<<12,	/* ip payload checksum enable */

	/* Eerd */
	EEstart		= 1<<0,	/* Start Read */
	EEdone		= 1<<1,	/* Read done */

	/* interrupts */
	Irx0		= 1<<0,	/* driver defined */
	Itx0		= 1<<1,	/* driver defined */
	Lsc		= 1<<20,	/* link status change */

	/* Links */
	Lnkup	= 1<<30,
	Lnkspd	= 1<<29,

	/* Hlreg0 */
	Jumboen	= 1<<2,

	/* Ivar */
	Ivtx	= 1|1<<7,		/* transmit interrupt */
	Ivrx	= 0|1<<7,		/* receive interrupt */
};

enum {
	i82598,
	i82599,
	Nctlrtype,
};

typedef struct Ctlrtype Ctlrtype;
struct Ctlrtype {
	int	type;
	int	mtu;
	int	flag;
	char	*name;
};

static Ctlrtype cttab[Nctlrtype] = {
	i82598,	12*1024,		0,	"i82598",
	i82599,	12*1024,		0,	"i82599",
};

/* status */
enum{
	Pif	= 1<<7,	/* past exact filter (sic) */
	Ipcs	= 1<<6,	/* ip checksum calcuated */
	L4cs	= 1<<5,	/* layer 2 */
	Tcpcs	= 1<<4,	/* tcp checksum calcuated */
	Vp	= 1<<3,	/* 802.1q packet matched vet */
	Ixsm	= 1<<2,	/* ignore checksum */
	Reop	= 1<<1,	/* end of packet */
	Rdd	= 1<<0,	/* descriptor done */
};

typedef struct{
	u32int	addr[2];
	ushort	length;
	ushort	cksum;
	uchar	status;
	uchar	errors;
	ushort	vlan;
}Rd;

enum{
	/* Td cmd */
	Rs	= 1<<3,
	Ic	= 1<<2,
	Ifcs	= 1<<1,
	Teop	= 1<<0,

	/* Td status */
	Tdd	= 1<<0,
};

typedef struct{
	u32int	addr[2];
	ushort	length;
	uchar	cso;
	uchar	cmd;
	uchar	status;
	uchar	css;
	ushort	vlan;
}Td;

enum{
	Factive	= 1<<0,
	Fstarted	= 1<<1,
};

typedef struct{
	Pcidev	*p;
	u32int	*reg;
	uchar	flag;
	int	pool;
	int	nrd, ntd, nrb, rbsz;
	Lock	tlock;
	uint	im;
	Lock	imlock;
	char	*alloc;
	Rd	*rdba;
	Block	**rb;
	uint	rdt, rdfree;
	uint	rdh;
	Td	*tdba;
	uint	tdh, tdt;
	Block	**tb;
	uchar	ra[Eaddrlen];
	uchar	mta[128];
	int	type;
}Ctlr;

/* tweakable paramaters */
enum{
	Nrd	= 32,
	Ntd	= 32,
	Nctlr	= 4,
};

static	Ctlr	*ctlrtab[Nctlr];
static	Lock	rblock[Nctlr];
static	Block	*rbpool[Nctlr];
static	int	nctlr;

static void
im(Ctlr *c, int i)
{
	ilock(&c->imlock);
	c->im |= i;
	c->reg[Ims] = c->im;
	iunlock(&c->imlock);
}

#define Next(x, m)	(((x)+1) & (m))
static int
cleanup(Ctlr *c, int tdh)
{
	Block *b;
	uint m, n;

	m = c->ntd-1;
	while(c->tdba[n = Next(tdh, m)].status&Tdd){
		tdh = n;
		b = c->tb[tdh];
		c->tb[tdh] = 0;
		freeb(b);
		c->tdba[tdh].status = 0;
	}
	return tdh;
}

static void
transmit(Ether *e)
{
	uint i, m, tdt, tdh;
	Ctlr *c;
	Block *b;
	Td *t;
	RingBuf *tb;

	c = e->ctlr;
	ilock(&c->tlock);
	tdh = c->tdh = cleanup(c, c->tdh);
	tdt = c->tdt;
	m = c->ntd-1;
	for(i = 0; i<8; i++){
		if(Next(tdt, m) == tdh){
			im(c, Itx0);
			break;
		}
		tb = e->tb + e->ti;
		if(tb->owner != Interface)
			break;
		b = fromringbuf(e);
		tb->owner = Host;
		e->ti = NEXT(e->ti, e->ntb);
		t = c->tdba+tdt;
		t->addr[0] = PCIWADDR(b->rp);
		t->length = BLEN(b);
		t->cmd = Rs|Ifcs|Teop;
		c->tb[tdt] = b;
		tdt = Next(tdt, m);
	}
	if(i){
		c->tdt = tdt;
		c->reg[Tdt] = tdt;
	}
	iunlock(&c->tlock);
}

static void
rxinit(Ctlr *c)
{
	Block *b;
	int i;

	c->reg[Rxctl] &= ~Rxen;
	for(i = 0; i<c->nrd; i++){
		b = c->rb[i];
		c->rb[i] = 0;
		if(b)
			freeb(b);
	}
	c->rdfree = 0;

	c->reg[Fctrl] |= Bam;
	c->reg[Rxcsum] |= Ipcs;
	c->reg[Srrctl] = (c->rbsz+1023)/1024;
	c->reg[Mhadd] = c->rbsz<<16;
	c->reg[Hlreg0] |= Jumboen;

	c->reg[Rbal] = PCIWADDR(c->rdba);
	c->reg[Rbah] = 0;
	c->reg[Rdlen] = c->nrd*sizeof(Rd);
	c->reg[Rdh] = 0;
	c->reg[Rdt] = c->rdt = 0;

	c->reg[Rdrxctl] = Rdmt¼;
	c->reg[Rxdctl] = 8<<Wthresh|8<<Pthresh|4<<Hthresh|Renable;
	c->reg[Rxctl] |= Rxen|Dmbyps;
}

static void
replenish(Ctlr *c, uint rdh)
{
	uint rdt, m, i;
	Rd *r;
	Block *b;

	m = c->nrd-1;
	i = 0;
	for(rdt = c->rdt; Next(rdt, m) != rdh; rdt = Next(rdt, m)){
		r = c->rdba+rdt;
		b = allocb(2048);
		c->rb[rdt] = b;
		r->addr[0] = PCIWADDR(b->rp);
		r->status = 0;
		c->rdfree++;
		i++;
	}
	if(i)
		c->reg[Rdt] = c->rdt = rdt;
}

static void
rx(Ether *e)
{
	Ctlr *c;
	Block *b;
	Rd *r;
	uint m;

	c = e->ctlr;
	m = c->nrd-1;

	replenish(c, c->rdh);
loop:
	r = c->rdba+c->rdh;
	if(!(r->status&Rdd))
		return;
	b = c->rb[c->rdh];
	c->rb[c->rdh] = 0;
	b->wp += r->length;
//	b->lim = b->wp;		/* lie like a dog */

	toringbuf(e, b->rp, BLEN(b));
	freeb(b);

	c->rdfree--;
	c->rdh = Next(c->rdh, m);
	goto loop;
}

static int
detach(Ctlr *c)
{
	int i;

	c->reg[Imc] = ~0;
	c->reg[Ctrl] |= Rst;
	for(i = 0; i < 100; i++){
		delay(1);
		if((c->reg[Ctrl]&Rst) == 0)
			goto good;
	}
	return -1;
good:
	/* errata */
	delay(50);
	c->reg[Ecc] &= ~(1<<21|1<<18|1<<9|1<<6);

	/* not cleared by reset; kill it manually. */
	for(i = 1; i<16; i++)
		c->reg[Rah] &= ~(1<<31);
	for(i = 0; i<128; i++)
		c->reg[Mta+i] = 0;
	for(i = 1; i<640; i++)
		c->reg[Vfta+i] = 0;
	return 0;
}

static void
shutdown(Ether *e)
{
	detach(e->ctlr);
}

/* ≤ 20ms */
static ushort
eeread(Ctlr *c, int i)
{
	c->reg[Eerd] = EEstart|i<<2;
	while((c->reg[Eerd]&EEdone) == 0)
		;
	return c->reg[Eerd]>>16;
}

static int
eeload(Ctlr *c)
{
	ushort u, v, p, l, i, j;

	if((eeread(c, 0)&0xc0) != 0x40)
		return -1;
	u = 0;
	for(i = 0; i < 0x40; i++)
		u +=  eeread(c, i);
	for(i = 3; i < 0xf; i++){
		p = eeread(c, i);
		l = eeread(c, p++);
		if((int)p+l+1 > 0xffff)
			continue;
		for(j = p; j < p+l; j++)
			u += eeread(c, j);
	}
	if(u != 0xbaba)
		return -1;
	if(c->reg[Status]&1<<3)
		u = eeread(c, 10);
	else
		u = eeread(c, 9);
	u++;
	for(i = 0; i<Eaddrlen;){
		v = eeread(c, u+i/2);
		c->ra[i++] = v;
		c->ra[i++] = v>>8;
	}
	c->ra[5] += (c->reg[Status]&0xc)>>2;
	return 0;
}

static int
reset(Ctlr *c)
{
	uchar *p;
	int i;

	if(detach(c)){
		print("82598: reset timeout\n");
		return -1;
	}
	if(eeload(c)){
		print("82598: eeprom failure\n");
		return -1;
	}
	p = c->ra;
	c->reg[Ral] = p[3]<<24|p[2]<<16|p[1]<<8|p[0];
	c->reg[Rah] = p[5]<<8|p[4]|1<<31;

	c->reg[Ctrlext] |= 1<<16;
	/* make some guesses for flow control */
	c->reg[Fcrtl] = 0x10000|1<<31;
	c->reg[Fcrth] = 0x40000|1<<31;
	c->reg[Rcrtv] = 0x6000;

	/* configure interrupt mapping (don't ask) */
	if(c->type == i82599)
		c->reg[Ivar+0] = Ivtx<<8 | Ivrx;
	else{
		c->reg[Ivar+0] = Ivrx;
		c->reg[Ivar+64/4] = Ivtx;
//		c->reg[Ivar+97/4] = (2|1<<7)<<8*(97%4);
	}

	/* interrupt throttling goes here. */
	for(i = Itr; i<Itr+20; i++)
		c->reg[i] = 128;		/* ¼µs intervals */
	c->reg[Itr+Itx0] = 256;
	return 0;
}

static void
txinit(Ctlr *c)
{
	Block *b;
	int i;

	c->reg[Txdctl] = 16<<Wthresh|16<<Pthresh;
	for(i = 0; i<c->ntd; i++){
		b = c->tb[i];
		c->tb[i] = 0;
		if(b)
			freeb(b);
	}
	memset(c->tdba, 0, c->ntd*sizeof(Td));
	c->reg[Tdbal] = PCIWADDR(c->tdba);
	c->reg[Tdbah] = 0;
	c->reg[Tdlen] = c->ntd*sizeof(Td);
	c->reg[Tdh] = 0;
	c->reg[Tdt] = 0;
	c->tdh = c->ntd-1;
	c->tdt = 0;
	if(c->type == i82599)
		c->reg[Dtxctl] |= Den;
	c->reg[Txdctl] |= Ten;
}

static void
attach(Ether *e)
{
	Ctlr *c;
	int t;

	c = e->ctlr;

	c->nrd = Nrd;
	c->ntd = Ntd;
	t = c->nrd*sizeof *c->rdba+255;
	t += c->ntd*sizeof *c->tdba+255;
	t += (c->ntd+c->nrd)*sizeof(Block*);
	c->alloc = malloc(t);

	c->rdba = (Rd*)ROUNDUP((uintptr)c->alloc, 256);
	c->tdba = (Td*)ROUNDUP((uintptr)(c->rdba+c->nrd), 256);
	c->rb = (Block**)(c->tdba+c->ntd);
	c->tb = (Block**)(c->rb+c->nrd);

	rxinit(c);

	txinit(c);
	c->rdh = 0;
	replenish(c, c->rdh);
	im(c, Irx0);
}

static void
interrupt(Ureg*, void *v)
{
	Ether *e;
	Ctlr *c;
	int icr, im;

	e = v;
	c = e->ctlr;
	ilock(&c->imlock);
	c->reg[Imc] = ~0;
	im = c->im;
	while(icr = c->reg[Icr]&c->im){
		if(icr&Lsc){
		}
		if(icr&Irx0)
			rx(e);
		if(icr&Itx0)
			transmit(e);
	}
	c->reg[Ims] = c->im = im;
	iunlock(&c->imlock);
}

static void
hbafixup(Pcidev *p)
{
	uint i;

	i = pcicfgr32(p, PciSVID);
	if((i & 0xffff) == 0x1b52 && p->did == 1)
		p->did = i>>16;
}

static void
scan(void)
{
	Pcidev *p;
	ulong io, type, mem;
	Ctlr *c;

	p = 0;
	while(p = pcimatch(p, 0x8086, 0)){
		hbafixup(p);
		switch(p->did){
		case 0x10c6:	/* 82598 af dual port */
		case 0x10c7:	/* 82598 af single port */
		case 0x10b6:	/* 82598 backplane */
		case 0x10dd:	/* 82598 at cx4 */
			type = i82598;
			break;
		case 0x10f7:	/* 82599 kx/kx4 */
		case 0x10fb:	/* 82599 sfi/sfp+ */
		case 0x10fc:	/* 82599 xaui */
		case 0x151c:	/* 82599 10gbt */
			type = i82599;
			break;
		default:
			continue;
		}
		if(nctlr == nelem(ctlrtab)){
			print("i82598: too many controllers\n");
			return;
		}
		io = p->mem[0].bar&~0xf;
		mem = upamalloc(io, p->mem[0].size, 0);
		if(mem == 0){
			print("i82598: cant map %#p\n", p->mem[0].bar);
			continue;
		}
		c = malloc(sizeof *c);
		c->p = p;
		c->reg = (u32int*)KADDR(mem);
		c->rbsz = 2048;
		c->type = type;
		if(reset(c)){
			print("i82598: cant reset\n");
			free(c);
//			vunmap(mem, p->mem[0].size);
			continue;
		}
		pcisetbme(p);
		c->pool = nctlr;
		ctlrtab[nctlr++] = c;
	}
}

int
i82598pnp(Ether *e)
{
	Ctlr *c;
	int i;
	static int once;

	if(once == 0){
		scan();
		once = 1;
	}

	for(i = 0; i<nctlr; i++){
		c = ctlrtab[i];
		if(c == 0 || c->flag&Factive)
			continue;
		if(e->port == 0 || e->port == (ulong)c->reg)
			goto found;
	}
	return -1;
found:
	c->flag |= Factive;
	e->ctlr = c;
	e->port = (uintptr)c->reg;
	e->irq = c->p->intl;
	e->tbdf = c->p->tbdf;
	e->mbps = 10000;
	memmove(e->ea, c->ra, Eaddrlen);
	e->attach = attach;
	e->interrupt = interrupt;
	e->transmit = transmit;
	e->detach = shutdown;

	return 0;
}

Bell Labs OSI certified Powered by Plan 9

(Return to Plan 9 Home Page)

Copyright © 2021 Plan 9 Foundation. All Rights Reserved.
Comments to [email protected].