Plan 9 from Bell Labs’s /usr/web/sources/extra/mothra/html.h

Copyright © 2021 Plan 9 Foundation.
Distributed under the MIT License.
Download the Plan 9 distribution.


/*
 * Parameters
 */
#define	NSTACK	100	/* html grammar is not recursive, so 30 or so should do */
#define	NHBUF	8192	/* Input buffer size */
#define	NPEEKC	3	/* Maximum lookahead */
#define	NTOKEN	1024	/* Maximum token length */
#define	NATTR	512	/* Maximum number of attributes of a tag */
typedef struct Pair Pair;
typedef struct Tag Tag;
typedef struct Stack Stack;
typedef struct Hglob Hglob;
typedef struct Form Form;
typedef struct Entity Entity;
struct Pair{
	char *name;
	char *value;
};
struct Entity{
	char *name;
	Rune value;
};
struct Tag{
	char *name;
	int action;
};
struct Stack{
	int tag;		/* html tag being processed */
	int pre;		/* in preformatted text? */
	int font;		/* typeface */
	int size;		/* point size of text */
	int margin;		/* left margin position */
	int indent;		/* extra indent at paragraph start */
	int number;		/* paragraph number */
	char *image;		/* arg of <img> */
	char *link;		/* arg of <a href=...> */
	char *name;		/* arg of <a name=...> */
	int ismap;		/* flag of <img> */
	int	table;		/* depth of table nesting */
};
/*
 * Globals -- these are packed up into a struct that gets passed around
 * so that multiple parsers can run concurrently
 */
struct Hglob{
	char *tp;		/* pointer in text buffer */
	char *name;		/* input file name */
	int hfd;		/* input file descriptor */
	char hbuf[NHBUF];	/* input buffer */
	char *hbufp;		/* next character in buffer */
	char *ehbuf;		/* end of good characters in buffer */
	int heof;		/* end of file flag */
	int peekc[NPEEKC];	/* characters to re-read */
	int npeekc;		/* # of characters to re-read */
	char token[NTOKEN];	/* if token type is TEXT */
	Pair attr[NATTR];	/* tag attribute/value pairs */
	int nsp;		/* # of white-space characters before TEXT token */
	int spacc;		/* place to accumulate more spaces */
				/* if negative, won't accumulate! */
	int tag;		/* if token type is TAG or END */
	Stack stack[NSTACK];	/* parse stack */
	Stack *state;		/* parse stack pointer */
	int lineno;		/* input line number */
	int linebrk;		/* flag set if we require a line-break in output */
	int para;		/* flag set if we need an indent at the break */
	char *text;		/* text buffer */
	char *etext;		/* end of text buffer */
	Form *form;		/* data for form under construction */
	Www *dst;		/* where the text goes */
	int isutf;			/* nonzero if charset=utf-8 */
};
/*
 * Token types
 */
#define	TAG	1
#define	ENDTAG	2
#define	TEXT	3
/*
 * Magic characters corresponding to
 *	literal < followed by / ! or alpha,
 *	literal > and
 *	end of file
 */
#define	STAG	65536
#define	ETAG	65537
#define	EOF	(-1)
/*
 * fonts
 */
#define	ROMAN	0
#define	ITALIC	1
#define	BOLD	2
#define	CWIDTH	3
/*
 * font sizes
 */
#define	SMALL	0
#define	NORMAL	1
#define	LARGE	2
#define	ENORMOUS 3
/*
 * Token names for the html parser.
 * Tag_end corresponds to </end> tags.
 * Tag_text tags text not in a tag.
 * Those two must follow the others.
 */
enum{
	Tag_comment=0,
	Tag_a=1,
	Tag_address=2,
	Tag_b=3,
	Tag_base=4,
	Tag_blockquot=5,
	Tag_body=6,
	Tag_br=7,
	Tag_center=8,
	Tag_cite=9,
	Tag_code=10,
	Tag_dd=11,
	Tag_dfn=12,
	Tag_dir=13,
	Tag_dl=14,
	Tag_dt=15,
	Tag_em=16,
	Tag_font=17,
	Tag_form=18,
	Tag_h1=19,
	Tag_h2=20,
	Tag_h3=21,
	Tag_h4=22,
	Tag_h5=23,
	Tag_h6=24,
	Tag_head=25,
	Tag_hr=26,
	Tag_html=27,
	Tag_i=28,
	Tag_img=29,
	Tag_input=30,
	Tag_isindex=31,
	Tag_kbd=32,
	Tag_key=33,
	Tag_li=34,
	Tag_link=35,
	Tag_listing=36,
	Tag_menu=37,
	Tag_meta=38,
	Tag_nextid=39,
	Tag_ol=40,
	Tag_option=41,
	Tag_p=42,
	Tag_plaintext=43,
	Tag_pre=44,
	Tag_samp=45,
	Tag_select=46,
	Tag_strong=47,
	Tag_textarea=48,
	Tag_title=49,
	Tag_tt=50,
	Tag_u=51,
	Tag_ul=52,
	Tag_var=53,
	Tag_xmp=54,
	Tag_frame=55,	/* rm 5.8.97 */
	Tag_table=56,	/* rm 3.8.00 */
	Tag_td=57,
	Tag_tr=58,
	Tag_script=59,
	Tag_end=60,		/* also used to indicate unrecognized start tag */
	Tag_text=61,
	NTAG=Tag_end,
	END=1,			/* tag must have a matching end tag */
	NOEND=2,		/* tag must not have a matching end tag */
	OPTEND=3,		/* tag may have a matching end tag */
	ERR=4,			/* tag must not occur */
};
Tag tag[];
Entity pl_entity[];
int pl_entities;
void rdform(Hglob *);
void endform(Hglob *);
char *pl_getattr(Pair *, char *);
int pl_hasattr(Pair *, char *);
void pl_htmloutput(Hglob *, int, char *, Field *);

Bell Labs OSI certified Powered by Plan 9

(Return to Plan 9 Home Page)

Copyright © 2021 Plan 9 Foundation. All Rights Reserved.
Comments to [email protected].