Plan 9 from Bell Labs’s /usr/web/sources/contrib/steve/root/sys/src/cmd/tex/makeindex/scanid.c

Copyright © 2021 Plan 9 Foundation.
Distributed under the MIT License.
Download the Plan 9 distribution.


/*
 *
 *  This file is part of
 *	MakeIndex - A formatter and format independent index processor
 *
 *  Copyright (C) 1989 by Chen & Harrison International Systems, Inc.
 *  Copyright (C) 1988 by Olivetti Research Center
 *  Copyright (C) 1987 by Regents of the University of California
 *
 *  Author:
 *	Pehong Chen
 *	Chen & Harrison International Systems, Inc.
 *	Palo Alto, California
 *	USA
 *	([email protected] or [email protected])
 *
 *  Contributors:
 *	Please refer to the CONTRIB file that comes with this release
 *	for a list of people who have contributed to this and/or previous
 *	release(s) of MakeIndex.
 *
 *  All rights reserved by the copyright holders.  See the copyright
 *  notice distributed with this software for a complete description of
 *  the conditions under which it is made available.
 *
 */

#include    "mkind.h"
#include    "scanid.h"

#define CHECK_LENGTH()	if (i > len_field) goto OVERFLOW

int     idx_lc;			       /* line count */
int     idx_tc;			       /* total entry count */
int     idx_ec;			       /* erroneous entry count */
int     idx_dc;			       /* number of dots printed so far */

static int first_entry = TRUE;
static int comp_len;
static char key[ARGUMENT_MAX];
static char no[NUMBER_MAX];
extern char *strchr ARGS((const char* s,int c));

NODE_PTR head;
NODE_PTR tail;

static	void	flush_to_eol ARGS((void));
static	int	make_key ARGS((void));
static	void	make_string ARGS((char **ppstr,int n));
static	int	scan_alpha_lower ARGS((char *no,short *npg,short *count));
static	int	scan_alpha_upper ARGS((char *no,short *npg,short *count));
static	int	scan_arabic ARGS((char *no,short *npg,short *count));
static	int	scan_arg1 ARGS((void));
static	int	scan_arg2 ARGS((void));
static	int	scan_field ARGS((int *n,char field[],int len_field,
				 int ck_level, int ck_encap,int ck_actual));
static	int	scan_key ARGS((struct KFIELD *data));
static	int	scan_no ARGS((char *no,short *npg,short *count,short *type));
static	int	scan_roman_lower ARGS((char *no,short *npg,short *count));
static	int	scan_roman_upper ARGS((char *no,short *npg,short *count));
static	void	search_quote ARGS((char sort_key[],char actual_key[]));

#if     (OS_BS2000 | OS_MVSXA)
char    UPCC[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZ";
#endif

void
scan_idx(VOID_ARG)
{
    char    keyword[ARRAY_MAX];
    int     c;
    int     i = 0;
    int     not_eof = TRUE;
    int     arg_count = -1;

    MESSAGE("Scanning input file %s...", idx_fn);
    idx_lc = idx_tc = idx_ec = idx_dc = 0;
    comp_len = strlen(page_comp);
    while (not_eof) {
	switch (c = GET_CHAR(idx_fp)) {
	case EOF:
	    if (arg_count == 2) {
		idx_lc++;
		if (make_key())
		    IDX_DOT(DOT_MAX);
		arg_count = -1;
	    } else
		not_eof = FALSE;
	    break;

	case LFD:
	    idx_lc++;
	    if (arg_count == 2) {
		if (make_key())
		    IDX_DOT(DOT_MAX);
		arg_count = -1;
	    } else if (arg_count > -1) {
		IDX_ERROR("Missing arguments -- need two (premature LFD).\n",
			  NULL);
		arg_count = -1;
	    }
	case TAB:
	case SPC:
	    break;

	default:
	    switch (arg_count) {
	    case -1:
		i = 0;
		keyword[i++] = (char) c;
		arg_count++;
		idx_tc++;
		break;
	    case 0:
		if (c == idx_aopen) {
		    arg_count++;
		    keyword[i] = NUL;
		    if (STREQ(keyword, idx_keyword)) {
			if (!scan_arg1()) {
			    arg_count = -1;
			}
		    } else {
			IDX_SKIPLINE;
			IDX_ERROR("Unknown index keyword %s.\n", keyword);
		    }
		} else {
		    if (i < ARRAY_MAX)
			keyword[i++] = (char) c;
		    else {
			IDX_SKIPLINE;
			IDX_ERROR2("Index keyword %s too long (max %d).\n",
				   keyword, ARRAY_MAX);
		    }
		}
		break;
	    case 1:
		if (c == idx_aopen) {
		    arg_count++;
		    if (!scan_arg2()) {
			arg_count = -1;
		    }
		} else {
		    IDX_SKIPLINE;
IDX_ERROR(
"No opening delimiter for second argument (illegal character `%c').\n", c);
		}
		break;
	    case 2:
		IDX_SKIPLINE;
IDX_ERROR(
"No closing delimiter for second argument (illegal character `%c').\n", c);
		break;
	    }
	    break;
	}
    }

    /* fixup the total counts */
    idx_tt += idx_tc;
    idx_et += idx_ec;

    DONE(idx_tc - idx_ec, "entries accepted", idx_ec, "rejected");
    CLOSE(idx_fp);
}

static void
flush_to_eol(VOID_ARG)
{	/* flush to end-of-line, or end-of-file, whichever is first */
    int a;

    while ( ((a = GET_CHAR(idx_fp)) != LFD) && (a != EOF) )
    	/* NO-OP */;
}

static int
make_key(VOID_ARG)
{
    NODE_PTR ptr;
    int     i;

    /* allocate and initialize a node */

#ifdef DEBUG
	totmem += sizeof(NODE);
	(void)fprintf(stderr,"make_key(): malloc(%d)\ttotmem = %ld\n",
	    sizeof(NODE),totmem);
#endif /* DEBUG */

    if ((ptr = (NODE_PTR) malloc(sizeof(NODE))) == NULL)
	FATAL("Not enough core...abort.\n", "");

    for (i = 0; i < FIELD_MAX; i++)
    {
	ptr->data.sf[i] = "";		/* initialize fields to pointers */;
	ptr->data.af[i] = "";		/* to constant empty strings */
    }
    ptr->data.encap = "";
    ptr->data.lpg[0] = NUL;
    ptr->data.count = 0;
    ptr->data.type = EMPTY;

    /* process index key */
    if (!scan_key(&(ptr->data)))
	return (FALSE);

    /* determine group type */
    ptr->data.group = group_type(ptr->data.sf[0]);

    /* process page number */
    strcpy(ptr->data.lpg, no);
    if (!scan_no(no, ptr->data.npg, &(ptr->data.count), &(ptr->data.type)))
	return (FALSE);

    if (first_entry) {
	head = tail = ptr;
	first_entry = FALSE;
    } else {
	tail->next = ptr;
	tail = ptr;
    }
    ptr->data.lc = idx_lc;
    ptr->data.fn = idx_fn;
    tail->next = NULL;

    return (TRUE);
}

static void
#if STDC
make_string(char **ppstr, int n)
#else
make_string(ppstr,n)			/* allocate n-byte string if *ppstr */
char	**ppstr;			/* points to an empty string */
int	n;
#endif
{
    if ((*ppstr)[0] == NUL)		/* then we have an empty string */
    {
	(*ppstr) = (char*)malloc(n);
	if ((*ppstr) == (char*)NULL)
	    FATAL("Not enough core...abort.\n", "");
	(*ppstr)[0] = NUL;
    }
}

static int
#if STDC
scan_key(FIELD_PTR data)
#else
scan_key(data)
FIELD_PTR data;
#endif
{
    int     i = 0;		       /* current level */
    int     n = 0;		       /* index to the key[] array */
    int     second_round = FALSE;
    int     last = FIELD_MAX - 1;

    while (TRUE) {
	if (key[n] == NUL)
	    break;
	if (key[n] == idx_encap)
	{
	    n++;
	    make_string(&(data->encap),strlen(key) + 1);
	    if (scan_field(&n, data->encap, strlen(key), FALSE, FALSE, FALSE))
		break;
	    else
		return (FALSE);
	}
	if (key[n] == idx_actual) {
	    n++;
	    if (i == last)
	    {
		make_string(&(data->af[i]),strlen(key) + 1);
		if (!scan_field(&n, data->af[i], strlen(key),
				FALSE, TRUE, FALSE))
		    return (FALSE);
	    }
	    else
	    {
		make_string(&(data->af[i]),strlen(key) + 1);
		if (!scan_field(&n, data->af[i], strlen(key),
				TRUE, TRUE, FALSE))
		    return (FALSE);
	    }
	} else {
	    /* Next nesting level */
	    if (second_round) {
		i++;
		n++;
	    }
	    if (i == last)
	    {
		make_string(&(data->sf[i]),strlen(key) + 1);
		if (!scan_field(&n, data->sf[i], strlen(key),
				FALSE, TRUE, TRUE))
		    return (FALSE);
	    }
	    else
	    {
		make_string(&(data->sf[i]),strlen(key) + 1);
		if (!scan_field(&n, data->sf[i], strlen(key),
				TRUE, TRUE, TRUE))
		    return (FALSE);
	    }
	    second_round = TRUE;
	    if (german_sort && strchr(data->sf[i], '"'))
	    {
		make_string(&(data->af[i]),strlen(data->sf[i]) + 1);
		search_quote(data->sf[i], data->af[i]);
	    }
	}
    }

    /* check for empty fields which shouldn't be empty */
    if (*data->sf[0] == NUL) {
	NULL_RTN;
    }
    for (i = 1; i < FIELD_MAX - 1; i++)
	if ((*data->sf[i] == NUL) &&
	    ((*data->af[i] != NUL) || (*data->sf[i + 1] != NUL))) {
	    NULL_RTN;
	}
    /* i == FIELD_MAX-1 */
    if ((*data->sf[i] == NUL) && (*data->af[i] != NUL)) {
	NULL_RTN;
    }
    return (TRUE);
}

static int
#if STDC
scan_field(int *n, char field[], int len_field, int ck_level, int ck_encap, 
	int ck_actual)
#else
scan_field(n, field, len_field, ck_level, ck_encap, ck_actual)
int    *n;
char    field[];
int	len_field;	/* length of field[], EXCLUDING space for final NUL */
int     ck_level;
int     ck_encap;
int     ck_actual;
#endif
{
    int     i = 0;
    int     nbsh;		       /* backslash count */

    if (compress_blanks && ((key[*n] == SPC) || (key[*n] == TAB)))
	++* n;

    while (TRUE) {
	nbsh = 0;
	while (key[*n] == idx_escape)
	{
	    nbsh++;
	    field[i++] = key[*n];
	    CHECK_LENGTH();
	    ++*n;
	}

	if (key[*n] == idx_quote)
	{
	    if (nbsh % 2 == 0)
		field[i++] = key[++*n];
	    else
		field[i++] = key[*n];
	    CHECK_LENGTH();
	}
	else if ((ck_level && (key[*n] == idx_level)) ||
		 (ck_encap && (key[*n] == idx_encap)) ||
		 (ck_actual && (key[*n] == idx_actual)) ||
		 (key[*n] == NUL))
	{
	    if ((i > 0) && compress_blanks && (field[i - 1] == SPC))
		field[i - 1] = NUL;
	    else
		field[i] = NUL;
	    return (TRUE);
	} else {
	    field[i++] = key[*n];
	    CHECK_LENGTH();
	    if ((!ck_level) && (key[*n] == idx_level)) {
		IDX_ERROR2("Extra `%c' at position %d of first argument.\n",
			   idx_level, *n + 1);
		return (FALSE);
	    } else if ((!ck_encap) && (key[*n] == idx_encap)) {
		IDX_ERROR2("Extra `%c' at position %d of first argument.\n",
			   idx_encap, *n + 1);
		return (FALSE);
	    } else if ((!ck_actual) && (key[*n] == idx_actual)) {
		IDX_ERROR2("Extra `%c' at position %d of first argument.\n",
			   idx_actual, *n + 1);
		return (FALSE);
	    }
	}
	/* check if max field length is reached */
	if (i > len_field)
	{
	OVERFLOW:
	    if (!ck_encap) {
		IDX_ERROR("Encapsulator of page number too long (max. %d).\n",
			  len_field);
	    } else if (ck_actual) {
		IDX_ERROR("Index sort key too long (max. %d).\n", len_field);
	    } else {
		IDX_ERROR("Text of key entry too long (max. %d).\n", len_field);
	    }
	    return (FALSE);
	}
	++*n;
    }
#if    IBM_PC_TURBO
    return (FALSE);		/* not reached, but keeps compiler happy */
#endif
}

int
#if STDC
group_type(char *str)
#else
group_type(str)
char   *str;
#endif
{
    int     i = 0;

    while ((str[i] != NUL) && ISDIGIT(str[i]))
	i++;

    if (str[i] == NUL) {
	sscanf(str, "%d", &i);
	return (i);
    } else if (ISSYMBOL(str[0]))
	return (SYMBOL);
    else
	return (ALPHA);
}

static int
#if STDC
scan_no(char no[], short npg[], short *count, short *type)
#else
scan_no(no, npg, count, type)
char    no[];
short   npg[];
short  *count;
short  *type;
#endif
{
    int     i = 1;

    if (isdigit(no[0])) {
	*type = ARAB;
	if (!scan_arabic(no, npg, count))
	    return (FALSE);
	/* simple heuristic to determine if a letter is Roman or Alpha */
    } else if (IS_ROMAN_LOWER(no[0]) && (!IS_COMPOSITOR)) {
	*type = ROML;
	if (!scan_roman_lower(no, npg, count))
	    return (FALSE);
	/* simple heuristic to determine if a letter is Roman or Alpha */
    } else if (IS_ROMAN_UPPER(no[0]) &&
	       ((no[0] == ROMAN_I) || (!IS_COMPOSITOR))) {
	*type = ROMU;
	if (!scan_roman_upper(no, npg, count))
	    return (FALSE);
    } else if (IS_ALPHA_LOWER(no[0])) {
	*type = ALPL;
	if (!scan_alpha_lower(no, npg, count))
	    return (FALSE);
    } else if (IS_ALPHA_UPPER(no[0])) {
	*type = ALPU;
	if (!scan_alpha_upper(no, npg, count))
	    return (FALSE);
    } else {
	IDX_ERROR("Illegal page number %s.\n", no);
	return (FALSE);
    }
    return (TRUE);
}


static int
#if STDC
scan_arabic(char no[], short npg[], short *count)
#else
scan_arabic(no, npg, count)
char    no[];
short   npg[];
short  *count;
#endif
{
    short   i = 0;
    char    str[ARABIC_MAX+1];		/* space for trailing NUL */

    while ((no[i] != NUL) && (i <= ARABIC_MAX) && (!IS_COMPOSITOR)) {
	if (isdigit(no[i])) {
	    str[i] = no[i];
	    i++;
	} else {
IDX_ERROR2("Illegal Arabic digit: position %d in %s.\n", i + 1, no);
	    return (FALSE);
	}
    }
    if (i > ARABIC_MAX) {
	IDX_ERROR2("Arabic page number %s too big (max %d digits).\n",
		   no, ARABIC_MAX);
	return (FALSE);
    }
    str[i] = NUL;

    ENTER(strtoint(str) + page_offset[ARAB]);

    if (IS_COMPOSITOR)
	return (scan_no(&no[i + comp_len], npg, count, &i));
    else
	return (TRUE);
}


static int
#if STDC
scan_roman_lower(char no[], short npg[], short *count)
#else
scan_roman_lower(no, npg, count)
char    no[];
short   npg[];
short  *count;
#endif
{
    short   i = 0;
    int     inp = 0;
    int     prev = 0;
    int     the_new;

    while ((no[i] != NUL) && (i < ROMAN_MAX) && (!IS_COMPOSITOR)) {
	if ((IS_ROMAN_LOWER(no[i])) &&
	    ((the_new = ROMAN_LOWER_VAL(no[i])) != 0)) {
	    if (prev == 0)
		prev = the_new;
	    else {
		if (prev < the_new) {
		    prev = the_new - prev;
		    the_new = 0;
		}
		inp += prev;
		prev = the_new;
	    }
	} else {
IDX_ERROR2("Illegal Roman number: position %d in %s.\n", i + 1, no);
	    return (FALSE);
	}
	i++;
    }
    if (i == ROMAN_MAX) {
	IDX_ERROR2("Roman page number %s too big (max %d digits).\n",
		   no, ROMAN_MAX);
	return (FALSE);
    }
    inp += prev;

    ENTER(inp + page_offset[ROML]);

    if (IS_COMPOSITOR)
	return (scan_no(&no[i + comp_len], npg, count, &i));
    else
	return (TRUE);
}


static int
#if STDC
scan_roman_upper(char no[], short npg[], short *count)
#else
scan_roman_upper(no, npg, count)
char    no[];
short   npg[];
short  *count;
#endif
{
    short   i = 0;
    int     inp = 0;
    int     prev = 0;
    int     the_new;

    while ((no[i] != NUL) && (i < ROMAN_MAX) && (!IS_COMPOSITOR)) {
	if ((IS_ROMAN_UPPER(no[i])) &&
	    ((the_new = ROMAN_UPPER_VAL(no[i])) != 0)) {
	    if (prev == 0)
		prev = the_new;
	    else {
		if (prev < the_new) {
		    prev = the_new - prev;
		    the_new = 0;
		}
		inp += prev;
		prev = the_new;
	    }
	} else {
IDX_ERROR2("Illegal Roman number: position %d in %s.\n", i + 1, no);
	    return (FALSE);
	}
	i++;
    }
    if (i == ROMAN_MAX) {
	IDX_ERROR2("Roman page number %s too big (max %d digits).\n",
		   no, ROMAN_MAX);
	return (FALSE);
    }
    inp += prev;

    ENTER(inp + page_offset[ROMU]);

    if (IS_COMPOSITOR)
	return (scan_no(&no[i + comp_len], npg, count, &i));
    else
	return (TRUE);
}


static int
#if STDC
scan_alpha_lower(char no[], short npg[], short *count)
#else
scan_alpha_lower(no, npg, count)
char    no[];
short   npg[];
short  *count;
#endif
{
    short   i;

    ENTER(ALPHA_VAL(no[0]) + page_offset[ALPL]);

    i = 1;
    if (IS_COMPOSITOR)
	return (scan_no(&no[comp_len + 1], npg, count, &i));
    else
	return (TRUE);
}


static int
#if STDC
scan_alpha_upper(char no[], short npg[], short *count)
#else
scan_alpha_upper(no, npg, count)
char    no[];
short   npg[];
short  *count;
#endif
{
    short   i;

    ENTER(ALPHA_VAL(no[0]) + page_offset[ALPU]);

    i = 1;
    if (IS_COMPOSITOR)
	return (scan_no(&no[comp_len + 1], npg, count, &i));
    else
	return (TRUE);
}


static int
scan_arg1(VOID_ARG)
{
    int     i = 0;
    int     n = 0;		       /* delimiter count */
    int     a;

    if (compress_blanks)
	while (((a = GET_CHAR(idx_fp)) == SPC) || (a == TAB));
    else
	a = GET_CHAR(idx_fp);

    while ((i < ARGUMENT_MAX) && (a != EOF))
    {
	if ((a == idx_quote) || (a == idx_escape))
	{				/* take next character literally */
	    key[i++] = (char) a;	/* but preserve quote or escape */
	    a = GET_CHAR(idx_fp);
	    key[i++] = (char) a;	/* save literal character */
	}
	else if (a == idx_aopen)
	{		/* opening delimiters within the argument list */
	    key[i++] = (char) a;
	    n++;
	}
	else if (a == idx_aclose)
	{
	    if (n == 0)			/* end of argument */
	    {
		if (compress_blanks && key[i - 1] == SPC)
		    key[i - 1] = NUL;
		else
		    key[i] = NUL;
		return (TRUE);
	    }
	    else			/* nested delimiters */
	    {
		key[i++] = (char) a;
		n--;
	    }
	}
	else
	{
	    switch (a)
	    {
	    case LFD:
		idx_lc++;
		IDX_ERROR("Incomplete first argument (premature LFD).\n", "");
		return (FALSE);
	    case TAB:
	    case SPC:
		/* compress successive SPC's to one SPC */
		if (compress_blanks)
		{
		    if ((i > 0) && (key[i - 1] != SPC) && (key[i - 1] != TAB))
			key[i++] = SPC;
		    break;
		}
	    default:
		key[i++] = (char) a;
		break;
	    }
	}
	a = GET_CHAR(idx_fp);
    }

    flush_to_eol();			/* Skip to end of line */
    idx_lc++;
    IDX_ERROR("First argument too long (max %d).\n", ARGUMENT_MAX);
    return (FALSE);
}


static int
scan_arg2(VOID_ARG)
{
    int     i = 0;
    int     a;
    int     hit_blank = FALSE;

    while (((a = GET_CHAR(idx_fp)) == SPC) || (a == TAB));

    while (i < NUMBER_MAX) {
	if (a == idx_aclose) {
	    no[i] = NUL;
	    return (TRUE);
	} else
	    switch (a) {
	    case LFD:
		idx_lc++;
IDX_ERROR("Incomplete second argument (premature LFD).\n", "");
		return (FALSE);
	    case TAB:
	    case SPC:
		hit_blank = TRUE;
		break;
	    default:
		if (hit_blank) {
		    flush_to_eol();	/* Skip to end of line */
		    idx_lc++;
IDX_ERROR("Illegal space within numerals in second argument.\n", "");
		    return (FALSE);
		}
		no[i++] = (char) a;
		break;
	    }
	a = GET_CHAR(idx_fp);
    }
    flush_to_eol();	/* Skip to end of line */
    idx_lc++;
    IDX_ERROR("Second argument too long (max %d).\n", NUMBER_MAX);
    return (FALSE);
}


static void
#if STDC
search_quote(char sort_key[], char actual_key[])
#else
search_quote(sort_key, actual_key)
char    sort_key[];
char    actual_key[];
#endif
{
    char   *ptr;		       /* pointer to sort_key */
    char   *sort;		       /* contains sorting text */
    int     char_found = FALSE;

    strcpy(actual_key, sort_key);
    ptr = strchr(sort_key, '"');       /* look for first double quote */
    while (ptr != (char*)NULL)
    {
	sort = "";
	switch (*(ptr + 1))
	{				/* skip to umlaut or sharp S */
	case 'a':
	case 'A':
	    sort = isupper(*(ptr + 1)) ? "Ae" : "ae";
	    break;
	case 'o':
	case 'O':
	    sort = isupper(*(ptr + 1)) ? "Oe" : "oe";
	    break;
	case 'u':
	case 'U':
	    sort = isupper(*(ptr + 1)) ? "Ue" : "ue";
	    break;
	case 's':
	    sort = "ss";
	    break;
	default:
	    break;
	}
	if (sort[0] != NUL)
	{
	    char_found = TRUE;
	    *ptr = sort[0];	       /* write to sort_key */
	    *(ptr + 1) = sort[1];
	}
	ptr = strchr(ptr + 1, '"');    /* next double quote */
    }
    if (!char_found)		       /* reset actual_key ? */
	actual_key[0] = NUL;
    return;
}

Bell Labs OSI certified Powered by Plan 9

(Return to Plan 9 Home Page)

Copyright © 2021 Plan 9 Foundation. All Rights Reserved.
Comments to [email protected].