Plan 9 from Bell Labs’s /usr/web/sources/patch/applied/tcs-html-chars/html.c.orig

Copyright © 2021 Plan 9 Foundation.
Distributed under the MIT License.
Download the Plan 9 distribution.


#include <u.h>
#include <libc.h>
#include <bio.h>
#include "hdr.h"
#include "conv.h"

typedef struct Hchar Hchar;
struct Hchar
{
	char *s;
	Rune r;
};

/* &lt;, &gt;, &quot;, &amp; intentionally omitted */

static Hchar byname[] =
{
	{"AElig", 198},
	{"Aacute", 193},
	{"Acirc", 194},
	{"Agrave", 192},
	{"Aring", 197},
	{"Atilde", 195},
	{"Auml", 196},
	{"Ccedil", 199},
	{"ETH", 208},
	{"Eacute", 201},
	{"Ecirc", 202},
	{"Egrave", 200},
	{"Euml", 203},
	{"Iacute", 205},
	{"Icirc", 206},
	{"Igrave", 204},
	{"Iuml", 207},
	{"Ntilde", 209},
	{"Oacute", 211},
	{"Ocirc", 212},
	{"Ograve", 210},
	{"Oslash", 216},
	{"Otilde", 213},
	{"Ouml", 214},
	{"THORN", 222},
	{"Uacute", 218},
	{"Ucirc", 219},
	{"Ugrave", 217},
	{"Uuml", 220},
	{"Yacute", 221},
	{"aacute", 225},
	{"acirc", 226},
	{"acute", 180},
	{"aelig", 230},
	{"agrave", 224},
	{"alpha", 945},
	{"aring", 229},
	{"atilde", 227},
	{"auml", 228},
	{"beta", 946},
	{"brvbar", 166},
	{"ccedil", 231},
	{"cdots", 8943},
	{"cedil", 184},
	{"cent", 162},
	{"chi", 967},
	{"copy", 169},
	{"curren", 164},
	{"ddots", 8945},
	{"deg", 176},
	{"delta", 948},
	{"divide", 247},
	{"eacute", 233},
	{"ecirc", 234},
	{"egrave", 232},
	{"emdash", 8212},	/* non-standard but commonly used */
	{"emsp", 8195},
	{"endash", 8211},	/* non-standard but commonly used */
	{"ensp", 8194},
	{"epsilon", 949},
	{"eta", 951},
	{"eth", 240},
	{"euml", 235},
	{"frac12", 189},
	{"frac14", 188},
	{"frac34", 190},
	{"gamma", 947},
	{"iacute", 237},
	{"icirc", 238},
	{"iexcl", 161},
	{"igrave", 236},
	{"iota", 953},
	{"iquest", 191},
	{"iuml", 239},
	{"kappa", 954},
	{"lambda", 955},
	{"laquo", 171},
	{"ldquo", 8220},
	{"ldots", 8230},
	{"lsquo", 8216},
	{"macr", 175},
	{"mdash", 8212},
	{"micro", 181},
	{"middot", 183},
	{"mu", 956},
	{"nbsp", 160},
	{"ndash", 8211},
	{"not", 172},
	{"ntilde", 241},
	{"nu", 957},
	{"oacute", 243},
	{"ocirc", 244},
	{"ograve", 242},
	{"omega", 969},
	{"omicron", 959},
	{"ordf", 170},
	{"ordm", 186},
	{"oslash", 248},
	{"otilde", 245},
	{"ouml", 246},
	{"para", 182},
	{"phi", 966},
	{"pi", 960},
	{"plusmn", 177},
	{"pound", 163},
	{"psi", 968},
	{"quad", 8193},
	{"raquo", 187},
	{"rdquo", 8221},
	{"reg", 174},
	{"rho", 961},
	{"rsquo", 8217},
	{"sect", 167},
	{"shy", 173},
	{"sigma", 963},
	{"sp", 8194},
	{"sup1", 185},
	{"sup2", 178},
	{"sup3", 179},
	{"szlig", 223},
	{"tau", 964},
	{"theta", 952},
	{"thinsp", 8201},
	{"thorn", 254},
	{"times", 215},
	{"trade", 8482},
	{"uacute", 250},
	{"ucirc", 251},
	{"ugrave", 249},
	{"uml", 168},
	{"upsilon", 965},
	{"uuml", 252},
	{"varepsilon", 8712},
	{"varphi", 981},
	{"varpi", 982},
	{"varrho", 1009},
	{"vdots", 8942},
	{"vsigma", 962},
	{"vtheta", 977},
	{"xi", 958},
	{"yacute", 253},
	{"yen", 165},
	{"yuml", 255},
	{"zeta", 950}
};

static Hchar byrune[nelem(byname)];

static int
hnamecmp(const void *va, const void *vb)
{
	Hchar *a, *b;
	
	a = (Hchar*)va;
	b = (Hchar*)vb;
	return strcmp(a->s, b->s);
}

static int
hrunecmp(const void *va, const void *vb)
{
	Hchar *a, *b;
	
	a = (Hchar*)va;
	b = (Hchar*)vb;
	return a->r - b->r;
}

static void
html_init(void)
{
	static int init;
	
	if(init)
		return;
	init = 1;
	memmove(byrune, byname, sizeof byrune);
	qsort(byname, nelem(byname), sizeof byname[0], hnamecmp);
	qsort(byrune, nelem(byrune), sizeof byrune[0], hrunecmp);
}

static Rune
findbyname(char *s)
{
	Hchar *h;
	int n, m, x;
	
	h = byname;
	n = nelem(byname);
	while(n > 0){
		m = n/2;
		x = strcmp(h[m].s, s);
		if(x == 0)
			return h[m].r;
		if(x < 0){
			h += m+1;
			n -= m+1;
		}else
			n = m;
	}
	return Runeerror;
}

static char*
findbyrune(Rune r)
{
	Hchar *h;
	int n, m;

	h = byrune;
	n = nelem(byrune);
	while(n > 0){
		m = n/2;
		if(h[m].r == r)
			return h[m].s;
		if(h[m].r < r){
			h += m+1;
			n -= m+1;
		}else
			n = m;
	}
	return nil;
}

void
html_in(int fd, long *x, struct convert *out)
{
	char buf[100], *p;
	Biobuf b;
	Rune rbuf[N];
	Rune *r, *er;
	int c, i;
	
	USED(x);
	
	html_init();
	r = rbuf;
	er = rbuf+N;
	Binit(&b, fd, OREAD);
	while((c = Bgetrune(&b)) != Beof){
		if(r >= er){
			OUT(out, rbuf, r-rbuf);
			r = rbuf;
		}
		if(c == '&'){
			buf[0] = c;
			for(i=1; i<nelem(buf)-1;){
				c = Bgetc(&b);
				if(c == Beof)
					break;
				buf[i++] = c;
				if(strchr("; \t\r\n", c))
					break;
			}
			buf[i] = 0;
			if(buf[i-1] == ';'){
				buf[i-1] = 0;
				if((c = findbyname(buf+1)) != Runeerror){
					*r++ = c;
					continue;
				}
				buf[i-1] = ';';
				if(buf[1] == '#'){
					if(buf[2] == 'x')
						c = strtol(buf+3, &p, 16);
					else
						c = strtol(buf+2, &p, 10);
					if(*p != ';' || c >= NRUNE || c < 0)
						goto bad;
					*r++ = c;
					continue;
				}
			}
		bad:
			for(p=buf; p<buf+i; ){
				p += chartorune(r++, p);
				if(r >= er){
					OUT(out, rbuf, r-rbuf);
					r = rbuf;
				}
			}
			continue;
		}
		*r++ = c;
	}
	if(r > rbuf)
		OUT(out, rbuf, r-rbuf);
}

/*
 * use biobuf because can use more than UTFmax bytes per rune
 */
void
html_out(Rune *r, int n, long *x)
{
	char *s;
	Biobuf b;
	Rune *er;
	
	USED(x);
	html_init();
	Binit(&b, 1, OWRITE);
	er = r+n;
	for(; r<er; r++){
		if(*r < Runeself)
			Bputrune(&b, *r);
		else if((s = findbyrune(*r)) != nil)
			Bprint(&b, "&%s;", s);
		else
			Bprint(&b, "&#%d;", *r);
	}
	Bflush(&b);
}


Bell Labs OSI certified Powered by Plan 9

(Return to Plan 9 Home Page)

Copyright © 2021 Plan 9 Foundation. All Rights Reserved.
Comments to [email protected].