/* $Id: reader.c,v 1.33 2011/09/06 22:56:53 tom Exp $ */

#include "defs.h"

/*  The line size must be a positive integer.  One hundred was chosen	*/
/*  because few lines in Yacc input grammars exceed 100 characters.	*/
/*  Note that if a line exceeds LINESIZE characters, the line buffer	*/
/*  will be expanded to accomodate it.					*/

#define LINESIZE 100

#define L_CURL '{'
#define R_CURL '}'

static void start_rule(bucket *bp, int s_lineno);

static char *cache;
static int cinc, cache_size;

int ntags;
static int tagmax;
static char **tag_table;

static char saw_eof;
char unionized;
char *cptr, *line;
static int linesize;

static bucket *goal;
static Value_t prec;
static int gensym;
static char last_was_action;

static int maxitems;
static bucket **pitem;

static int maxrules;
static bucket **plhs;

static size_t name_pool_size;
static char *name_pool;

char line_format[] = "#line %d \"%s\"\n";

param *lex_param;
param *parse_param;

static void
cachec(int c)
{
    assert(cinc >= 0);
    if (cinc >= cache_size)
    {
	cache_size += 256;
	cache = REALLOC(cache, cache_size);
	NO_SPACE(cache);
    }
    cache[cinc] = (char)c;
    ++cinc;
}

static void
get_line(void)
{
    FILE *f = input_file;
    int c;
    int i;

    if (saw_eof || (c = getc(f)) == EOF)
    {
	if (line)
	{
	    FREE(line);
	    line = 0;
	}
	cptr = 0;
	saw_eof = 1;
	return;
    }

    if (line == 0 || linesize != (LINESIZE + 1))
    {
	if (line)
	    FREE(line);
	linesize = LINESIZE + 1;
	line = MALLOC(linesize);
	NO_SPACE(line);
    }

    i = 0;
    ++lineno;
    for (;;)
    {
	line[i] = (char)c;
	if (c == '\n')
	{
	    cptr = line;
	    return;
	}
	if (++i >= linesize)
	{
	    linesize += LINESIZE;
	    line = REALLOC(line, linesize);
	    NO_SPACE(line);
	}
	c = getc(f);
	if (c == EOF)
	{
	    line[i] = '\n';
	    saw_eof = 1;
	    cptr = line;
	    return;
	}
    }
}

static char *
dup_line(void)
{
    char *p, *s, *t;

    if (line == 0)
	return (0);
    s = line;
    while (*s != '\n')
	++s;
    p = MALLOC(s - line + 1);
    NO_SPACE(p);

    s = line;
    t = p;
    while ((*t++ = *s++) != '\n')
	continue;
    return (p);
}

static void
skip_comment(void)
{
    char *s;

    int st_lineno = lineno;
    char *st_line = dup_line();
    char *st_cptr = st_line + (cptr - line);

    s = cptr + 2;
    for (;;)
    {
	if (*s == '*' && s[1] == '/')
	{
	    cptr = s + 2;
	    FREE(st_line);
	    return;
	}
	if (*s == '\n')
	{
	    get_line();
	    if (line == 0)
		unterminated_comment(st_lineno, st_line, st_cptr);
	    s = cptr;
	}
	else
	    ++s;
    }
}

static int
nextc(void)
{
    char *s;

    if (line == 0)
    {
	get_line();
	if (line == 0)
	    return (EOF);
    }

    s = cptr;
    for (;;)
    {
	switch (*s)
	{
	case '\n':
	    get_line();
	    if (line == 0)
		return (EOF);
	    s = cptr;
	    break;

	case ' ':
	case '\t':
	case '\f':
	case '\r':
	case '\v':
	case ',':
	case ';':
	    ++s;
	    break;

	case '\\':
	    cptr = s;
	    return ('%');

	case '/':
	    if (s[1] == '*')
	    {
		cptr = s;
		skip_comment();
		s = cptr;
		break;
	    }
	    else if (s[1] == '/')
	    {
		get_line();
		if (line == 0)
		    return (EOF);
		s = cptr;
		break;
	    }
	    /* FALLTHRU */

	default:
	    cptr = s;
	    return (*s);
	}
    }
}

/*
 * Compare keyword to cached token, treating '_' and '-' the same.  Some
 * grammars rely upon this misfeature.
 */
static int
matchec(const char *name)
{
    const char *p = cache;
    const char *q = name;
    int code = 0;	/* assume mismatch */

    while (*p != '\0' && *q != '\0')
    {
	char a = *p++;
	char b = *q++;
	if (a == '_')
	    a = '-';
	if (b == '_')
	    b = '-';
	if (a != b)
	    break;
	if (*p == '\0' && *q == '\0')
	{
	    code = 1;
	    break;
	}
    }
    return code;
}

static int
keyword(void)
{
    int c;
    char *t_cptr = cptr;

    c = *++cptr;
    if (isalpha(c))
    {
	cinc = 0;
	for (;;)
	{
	    if (isalpha(c))
	    {
		if (isupper(c))
		    c = tolower(c);
		cachec(c);
	    }
	    else if (isdigit(c)
		     || c == '-'
		     || c == '_'
		     || c == '.'
		     || c == '$')
	    {
		cachec(c);
	    }
	    else
	    {
		break;
	    }
	    c = *++cptr;
	}
	cachec(NUL);

	if (matchec("token") || matchec("term"))
	    return (TOKEN);
	if (matchec("type"))
	    return (TYPE);
	if (matchec("left"))
	    return (LEFT);
	if (matchec("right"))
	    return (RIGHT);
	if (matchec("nonassoc") || matchec("binary"))
	    return (NONASSOC);
	if (matchec("start"))
	    return (START);
	if (matchec("union"))
	    return (UNION);
	if (matchec("ident"))
	    return (IDENT);
	if (matchec("expect"))
	    return (EXPECT);
	if (matchec("expect-rr"))
	    return (EXPECT_RR);
	if (matchec("pure-parser"))
	    return (PURE_PARSER);
	if (matchec("parse-param"))
	    return (PARSE_PARAM);
	if (matchec("lex-param"))
	    return (LEX_PARAM);
	if (matchec("yacc"))
	    return (POSIX_YACC);
    }
    else
    {
	++cptr;
	if (c == L_CURL)
	    return (TEXT);
	if (c == '%' || c == '\\')
	    return (MARK);
	if (c == '<')
	    return (LEFT);
	if (c == '>')
	    return (RIGHT);
	if (c == '0')
	    return (TOKEN);
	if (c == '2')
	    return (NONASSOC);
    }
    syntax_error(lineno, line, t_cptr);
    /*NOTREACHED */
}

static void
copy_ident(void)
{
    int c;
    FILE *f = output_file;

    c = nextc();
    if (c == EOF)
	unexpected_EOF();
    if (c != '"')
	syntax_error(lineno, line, cptr);
    ++outline;
    fprintf(f, "#ident \"");
    for (;;)
    {
	c = *++cptr;
	if (c == '\n')
	{
	    fprintf(f, "\"\n");
	    return;
	}
	putc(c, f);
	if (c == '"')
	{
	    putc('\n', f);
	    ++cptr;
	    return;
	}
    }
}

static void
copy_text(void)
{
    int c;
    int quote;
    FILE *f = text_file;
    int need_newline = 0;
    int t_lineno = lineno;
    char *t_line = dup_line();
    char *t_cptr = t_line + (cptr - line - 2);

    if (*cptr == '\n')
    {
	get_line();
	if (line == 0)
	    unterminated_text(t_lineno, t_line, t_cptr);
    }
    if (!lflag)
	fprintf(f, line_format, lineno, input_file_name);

  loop:
    c = *cptr++;
    switch (c)
    {
    case '\n':
      next_line:
	putc('\n', f);
	need_newline = 0;
	get_line();
	if (line)
	    goto loop;
	unterminated_text(t_lineno, t_line, t_cptr);

    case '\'':
    case '"':
	{
	    int s_lineno = lineno;
	    char *s_line = dup_line();
	    char *s_cptr = s_line + (cptr - line - 1);

	    quote = c;
	    putc(c, f);
	    for (;;)
	    {
		c = *cptr++;
		putc(c, f);
		if (c == quote)
		{
		    need_newline = 1;
		    FREE(s_line);
		    goto loop;
		}
		if (c == '\n')
		    unterminated_string(s_lineno, s_line, s_cptr);
		if (c == '\\')
		{
		    c = *cptr++;
		    putc(c, f);
		    if (c == '\n')
		    {
			get_line();
			if (line == 0)
			    unterminated_string(s_lineno, s_line, s_cptr);
		    }
		}
	    }
	}

    case '/':
	putc(c, f);
	need_newline = 1;
	c = *cptr;
	if (c == '/')
	{
	    putc('*', f);
	    while ((c = *++cptr) != '\n')
	    {
		if (c == '*' && cptr[1] == '/')
		    fprintf(f, "* ");
		else
		    putc(c, f);
	    }
	    fprintf(f, "*/");
	    goto next_line;
	}
	if (c == '*')
	{
	    int c_lineno = lineno;
	    char *c_line = dup_line();
	    char *c_cptr = c_line + (cptr - line - 1);

	    putc('*', f);
	    ++cptr;
	    for (;;)
	    {
		c = *cptr++;
		putc(c, f);
		if (c == '*' && *cptr == '/')
		{
		    putc('/', f);
		    ++cptr;
		    FREE(c_line);
		    goto loop;
		}
		if (c == '\n')
		{
		    get_line();
		    if (line == 0)
			unterminated_comment(c_lineno, c_line, c_cptr);
		}
	    }
	}
	need_newline = 1;
	goto loop;

    case '%':
    case '\\':
	if (*cptr == R_CURL)
	{
	    if (need_newline)
		putc('\n', f);
	    ++cptr;
	    FREE(t_line);
	    return;
	}
	/* FALLTHRU */

    default:
	putc(c, f);
	need_newline = 1;
	goto loop;
    }
}

static void
puts_both(const char *s)
{
    fputs(s, text_file);
    if (dflag)
	fputs(s, union_file);
}

static void
putc_both(int c)
{
    putc(c, text_file);
    if (dflag)
	putc(c, union_file);
}

static void
copy_union(void)
{
    int c;
    int quote;
    int depth;
    int u_lineno = lineno;
    char *u_line = dup_line();
    char *u_cptr = u_line + (cptr - line - 6);

    if (unionized)
	over_unionized(cptr - 6);
    unionized = 1;

    if (!lflag)
	fprintf(text_file, line_format, lineno, input_file_name);

    puts_both("#ifdef YYSTYPE\n");
    puts_both("#undef  YYSTYPE_IS_DECLARED\n");
    puts_both("#define YYSTYPE_IS_DECLARED 1\n");
    puts_both("#endif\n");
    puts_both("#ifndef YYSTYPE_IS_DECLARED\n");
    puts_both("#define YYSTYPE_IS_DECLARED 1\n");
    puts_both("typedef union");

    depth = 0;
  loop:
    c = *cptr++;
    putc_both(c);
    switch (c)
    {
    case '\n':
      next_line:
	get_line();
	if (line == 0)
	    unterminated_union(u_lineno, u_line, u_cptr);
	goto loop;

    case L_CURL:
	++depth;
	goto loop;

    case R_CURL:
	if (--depth == 0)
	{
	    puts_both(" YYSTYPE;\n");
	    puts_both("#endif /* !YYSTYPE_IS_DECLARED */\n");
	    FREE(u_line);
	    return;
	}
	goto loop;

    case '\'':
    case '"':
	{
	    int s_lineno = lineno;
	    char *s_line = dup_line();
	    char *s_cptr = s_line + (cptr - line - 1);

	    quote = c;
	    for (;;)
	    {
		c = *cptr++;
		putc_both(c);
		if (c == quote)
		{
		    FREE(s_line);
		    goto loop;
		}
		if (c == '\n')
		    unterminated_string(s_lineno, s_line, s_cptr);
		if (c == '\\')
		{
		    c = *cptr++;
		    putc_both(c);
		    if (c == '\n')
		    {
			get_line();
			if (line == 0)
			    unterminated_string(s_lineno, s_line, s_cptr);
		    }
		}
	    }
	}

    case '/':
	c = *cptr;
	if (c == '/')
	{
	    putc_both('*');
	    while ((c = *++cptr) != '\n')
	    {
		if (c == '*' && cptr[1] == '/')
		{
		    puts_both("* ");
		}
		else
		{
		    putc_both(c);
		}
	    }
	    puts_both("*/\n");
	    goto next_line;
	}
	if (c == '*')
	{
	    int c_lineno = lineno;
	    char *c_line = dup_line();
	    char *c_cptr = c_line + (cptr - line - 1);

	    putc_both('*');
	    ++cptr;
	    for (;;)
	    {
		c = *cptr++;
		putc_both(c);
		if (c == '*' && *cptr == '/')
		{
		    putc_both('/');
		    ++cptr;
		    FREE(c_line);
		    goto loop;
		}
		if (c == '\n')
		{
		    get_line();
		    if (line == 0)
			unterminated_comment(c_lineno, c_line, c_cptr);
		}
	    }
	}
	goto loop;

    default:
	goto loop;
    }
}

/*
 * Keep a linked list of parameters
 */
static void
copy_param(int k)
{
    char *buf;
    int c;
    param *head, *p;
    int i;
    int name, type2;

    c = nextc();
    if (c == EOF)
	unexpected_EOF();
    if (c != '{')
	goto out;
    cptr++;

    c = nextc();
    if (c == EOF)
	unexpected_EOF();
    if (c == '}')
	goto out;

    buf = MALLOC(linesize);
    NO_SPACE(buf);

    for (i = 0; (c = *cptr++) != '}'; i++)
    {
	if (c == '\0')
	    missing_brace();
	if (c == EOF)
	    unexpected_EOF();
	buf[i] = (char)c;
    }

    if (i == 0)
	goto out;

    buf[i--] = '\0';
    while (i >= 0 && isspace(UCH(buf[i])))
	buf[i--] = '\0';

    if (buf[i] == ']')
    {
	int level = 1;
	while (i >= 0 && level > 0 && buf[i] != '[')
	{
	    if (buf[i] == ']')
		++level;
	    else if (buf[i] == '[')
		--level;
	    i--;
	}
	if (i <= 0)
	    unexpected_EOF();
	type2 = i--;
    }
    else
    {
	type2 = i + 1;
    }

    while (i >= 0 && (isalnum(UCH(buf[i])) ||
		      UCH(buf[i]) == '_'))
	i--;

    if (!isspace(UCH(buf[i])) && buf[i] != '*')
	goto out;

    name = i + 1;

    p = MALLOC(sizeof(*p));
    NO_SPACE(p);

    p->type2 = strdup(buf + type2);
    NO_SPACE(p->type2);

    buf[type2] = '\0';

    p->name = strdup(buf + name);
    NO_SPACE(p->name);

    buf[name] = '\0';
    p->type = buf;

    if (k == LEX_PARAM)
	head = lex_param;
    else
	head = parse_param;

    if (head != NULL)
    {
	while (head->next)
	    head = head->next;
	head->next = p;
    }
    else
    {
	if (k == LEX_PARAM)
	    lex_param = p;
	else
	    parse_param = p;
    }
    p->next = NULL;
    return;

  out:
    syntax_error(lineno, line, cptr);
}

static int
hexval(int c)
{
    if (c >= '0' && c <= '9')
	return (c - '0');
    if (c >= 'A' && c <= 'F')
	return (c - 'A' + 10);
    if (c >= 'a' && c <= 'f')
	return (c - 'a' + 10);
    return (-1);
}

static bucket *
get_literal(void)
{
    int c, quote;
    int i;
    int n;
    char *s;
    bucket *bp;
    int s_lineno = lineno;
    char *s_line = dup_line();
    char *s_cptr = s_line + (cptr - line);

    quote = *cptr++;
    cinc = 0;
    for (;;)
    {
	c = *cptr++;
	if (c == quote)
	    break;
	if (c == '\n')
	    unterminated_string(s_lineno, s_line, s_cptr);
	if (c == '\\')
	{
	    char *c_cptr = cptr - 1;

	    c = *cptr++;
	    switch (c)
	    {
	    case '\n':
		get_line();
		if (line == 0)
		    unterminated_string(s_lineno, s_line, s_cptr);
		continue;

	    case '0':
	    case '1':
	    case '2':
	    case '3':
	    case '4':
	    case '5':
	    case '6':
	    case '7':
		n = c - '0';
		c = *cptr;
		if (IS_OCTAL(c))
		{
		    n = (n << 3) + (c - '0');
		    c = *++cptr;
		    if (IS_OCTAL(c))
		    {
			n = (n << 3) + (c - '0');
			++cptr;
		    }
		}
		if (n > MAXCHAR)
		    illegal_character(c_cptr);
		c = n;
		break;

	    case 'x':
		c = *cptr++;
		n = hexval(c);
		if (n < 0 || n >= 16)
		    illegal_character(c_cptr);
		for (;;)
		{
		    c = *cptr;
		    i = hexval(c);
		    if (i < 0 || i >= 16)
			break;
		    ++cptr;
		    n = (n << 4) + i;
		    if (n > MAXCHAR)
			illegal_character(c_cptr);
		}
		c = n;
		break;

	    case 'a':
		c = 7;
		break;
	    case 'b':
		c = '\b';
		break;
	    case 'f':
		c = '\f';
		break;
	    case 'n':
		c = '\n';
		break;
	    case 'r':
		c = '\r';
		break;
	    case 't':
		c = '\t';
		break;
	    case 'v':
		c = '\v';
		break;
	    }
	}
	cachec(c);
    }
    FREE(s_line);

    n = cinc;
    s = MALLOC(n);
    NO_SPACE(s);

    for (i = 0; i < n; ++i)
	s[i] = cache[i];

    cinc = 0;
    if (n == 1)
	cachec('\'');
    else
	cachec('"');

    for (i = 0; i < n; ++i)
    {
	c = UCH(s[i]);
	if (c == '\\' || c == cache[0])
	{
	    cachec('\\');
	    cachec(c);
	}
	else if (isprint(c))
	    cachec(c);
	else
	{
	    cachec('\\');
	    switch (c)
	    {
	    case 7:
		cachec('a');
		break;
	    case '\b':
		cachec('b');
		break;
	    case '\f':
		cachec('f');
		break;
	    case '\n':
		cachec('n');
		break;
	    case '\r':
		cachec('r');
		break;
	    case '\t':
		cachec('t');
		break;
	    case '\v':
		cachec('v');
		break;
	    default:
		cachec(((c >> 6) & 7) + '0');
		cachec(((c >> 3) & 7) + '0');
		cachec((c & 7) + '0');
		break;
	    }
	}
    }

    if (n == 1)
	cachec('\'');
    else
	cachec('"');

    cachec(NUL);
    bp = lookup(cache);
    bp->class = TERM;
    if (n == 1 && bp->value == UNDEFINED)
	bp->value = UCH(*s);
    FREE(s);

    return (bp);
}

static int
is_reserved(char *name)
{
    char *s;

    if (strcmp(name, ".") == 0 ||
	strcmp(name, "$accept") == 0 ||
	strcmp(name, "$end") == 0)
	return (1);

    if (name[0] == '$' && name[1] == '$' && isdigit(UCH(name[2])))
    {
	s = name + 3;
	while (isdigit(UCH(*s)))
	    ++s;
	if (*s == NUL)
	    return (1);
    }

    return (0);
}

static bucket *
get_name(void)
{
    int c;

    cinc = 0;
    for (c = *cptr; IS_IDENT(c); c = *++cptr)
	cachec(c);
    cachec(NUL);

    if (is_reserved(cache))
	used_reserved(cache);

    return (lookup(cache));
}

static Value_t
get_number(void)
{
    int c;
    Value_t n;

    n = 0;
    for (c = *cptr; isdigit(c); c = *++cptr)
	n = (Value_t) (10 * n + (c - '0'));

    return (n);
}

static char *
get_tag(void)
{
    int c;
    int i;
    char *s;
    int t_lineno = lineno;
    char *t_line = dup_line();
    char *t_cptr = t_line + (cptr - line);

    ++cptr;
    c = nextc();
    if (c == EOF)
	unexpected_EOF();
    if (!isalpha(c) && c != '_' && c != '$')
	illegal_tag(t_lineno, t_line, t_cptr);

    cinc = 0;
    do
    {
	cachec(c);
	c = *++cptr;
    }
    while (IS_IDENT(c));
    cachec(NUL);

    c = nextc();
    if (c == EOF)
	unexpected_EOF();
    if (c != '>')
	illegal_tag(t_lineno, t_line, t_cptr);
    ++cptr;

    for (i = 0; i < ntags; ++i)
    {
	if (strcmp(cache, tag_table[i]) == 0)
	{
	    FREE(t_line);
	    return (tag_table[i]);
	}
    }

    if (ntags >= tagmax)
    {
	tagmax += 16;
	tag_table = (char **)
	    (tag_table
	     ? REALLOC(tag_table, (unsigned)tagmax * sizeof(char *))
	     : MALLOC((unsigned)tagmax * sizeof(char *)));
	NO_SPACE(tag_table);
    }

    s = MALLOC(cinc);
    NO_SPACE(s);

    strcpy(s, cache);
    tag_table[ntags] = s;
    ++ntags;
    FREE(t_line);
    return (s);
}

static void
declare_tokens(int assoc)
{
    int c;
    bucket *bp;
    Value_t value;
    char *tag = 0;

    if (assoc != TOKEN)
	++prec;

    c = nextc();
    if (c == EOF)
	unexpected_EOF();
    if (c == '<')
    {
	tag = get_tag();
	c = nextc();
	if (c == EOF)
	    unexpected_EOF();
    }

    for (;;)
    {
	if (isalpha(c) || c == '_' || c == '.' || c == '$')
	    bp = get_name();
	else if (c == '\'' || c == '"')
	    bp = get_literal();
	else
	    return;

	if (bp == goal)
	    tokenized_start(bp->name);
	bp->class = TERM;

	if (tag)
	{
	    if (bp->tag && tag != bp->tag)
		retyped_warning(bp->name);
	    bp->tag = tag;
	}

	if (assoc != TOKEN)
	{
	    if (bp->prec && prec != bp->prec)
		reprec_warning(bp->name);
	    bp->assoc = (Assoc_t) assoc;
	    bp->prec = prec;
	}

	c = nextc();
	if (c == EOF)
	    unexpected_EOF();

	if (isdigit(c))
	{
	    value = get_number();
	    if (bp->value != UNDEFINED && value != bp->value)
		revalued_warning(bp->name);
	    bp->value = value;
	    c = nextc();
	    if (c == EOF)
		unexpected_EOF();
	}
    }
}

/*
 * %expect requires special handling
 * as it really isn't part of the yacc
 * grammar only a flag for yacc proper.
 */
static void
declare_expect(int assoc)
{
    int c;

    if (assoc != EXPECT && assoc != EXPECT_RR)
	++prec;

    /*
     * Stay away from nextc - doesn't
     * detect EOL and will read to EOF.
     */
    c = *++cptr;
    if (c == EOF)
	unexpected_EOF();

    for (;;)
    {
	if (isdigit(c))
	{
	    if (assoc == EXPECT)
		SRexpect = get_number();
	    else
		RRexpect = get_number();
	    break;
	}
	/*
	 * Looking for number before EOL.
	 * Spaces, tabs, and numbers are ok,
	 * words, punc., etc. are syntax errors.
	 */
	else if (c == '\n' || isalpha(c) || !isspace(c))
	{
	    syntax_error(lineno, line, cptr);
	}
	else
	{
	    c = *++cptr;
	    if (c == EOF)
		unexpected_EOF();
	}
    }
}

static void
declare_types(void)
{
    int c;
    bucket *bp;
    char *tag;

    c = nextc();
    if (c == EOF)
	unexpected_EOF();
    if (c != '<')
	syntax_error(lineno, line, cptr);
    tag = get_tag();

    for (;;)
    {
	c = nextc();
	if (isalpha(c) || c == '_' || c == '.' || c == '$')
	    bp = get_name();
	else if (c == '\'' || c == '"')
	    bp = get_literal();
	else
	    return;

	if (bp->tag && tag != bp->tag)
	    retyped_warning(bp->name);
	bp->tag = tag;
    }
}

static void
declare_start(void)
{
    int c;
    bucket *bp;

    c = nextc();
    if (c == EOF)
	unexpected_EOF();
    if (!isalpha(c) && c != '_' && c != '.' && c != '$')
	syntax_error(lineno, line, cptr);
    bp = get_name();
    if (bp->class == TERM)
	terminal_start(bp->name);
    if (goal && goal != bp)
	restarted_warning();
    goal = bp;
}

static void
read_declarations(void)
{
    int c, k;

    cache_size = 256;
    cache = MALLOC(cache_size);
    NO_SPACE(cache);

    for (;;)
    {
	c = nextc();
	if (c == EOF)
	    unexpected_EOF();
	if (c != '%')
	    syntax_error(lineno, line, cptr);
	switch (k = keyword())
	{
	case MARK:
	    return;

	case IDENT:
	    copy_ident();
	    break;

	case TEXT:
	    copy_text();
	    break;

	case UNION:
	    copy_union();
	    break;

	case TOKEN:
	case LEFT:
	case RIGHT:
	case NONASSOC:
	    declare_tokens(k);
	    break;

	case EXPECT:
	case EXPECT_RR:
	    declare_expect(k);
	    break;

	case TYPE:
	    declare_types();
	    break;

	case START:
	    declare_start();
	    break;

	case PURE_PARSER:
	    pure_parser = 1;
	    break;

	case PARSE_PARAM:
	case LEX_PARAM:
	    copy_param(k);
	    break;

	case POSIX_YACC:
	    /* noop for bison compatibility. byacc is already designed to be posix
	     * yacc compatible. */
	    break;
	}
    }
}

static void
initialize_grammar(void)
{
    nitems = 4;
    maxitems = 300;

    pitem = (bucket **)MALLOC((unsigned)maxitems * sizeof(bucket *));
    NO_SPACE(pitem);

    pitem[0] = 0;
    pitem[1] = 0;
    pitem[2] = 0;
    pitem[3] = 0;

    nrules = 3;
    maxrules = 100;

    plhs = (bucket **)MALLOC((unsigned)maxrules * sizeof(bucket *));
    NO_SPACE(plhs);

    plhs[0] = 0;
    plhs[1] = 0;
    plhs[2] = 0;

    rprec = (short *)MALLOC((unsigned)maxrules * sizeof(short));
    NO_SPACE(rprec);

    rprec[0] = 0;
    rprec[1] = 0;
    rprec[2] = 0;

    rassoc = (char *)MALLOC((unsigned)maxrules * sizeof(char));
    NO_SPACE(rassoc);

    rassoc[0] = TOKEN;
    rassoc[1] = TOKEN;
    rassoc[2] = TOKEN;
}

static void
expand_items(void)
{
    maxitems += 300;
    pitem = (bucket **)REALLOC(pitem, (unsigned)maxitems * sizeof(bucket *));
    NO_SPACE(pitem);
}

static void
expand_rules(void)
{
    maxrules += 100;

    plhs = (bucket **)REALLOC(plhs, (unsigned)maxrules * sizeof(bucket *));
    NO_SPACE(plhs);

    rprec = (short *)REALLOC(rprec, (unsigned)maxrules * sizeof(short));
    NO_SPACE(rprec);

    rassoc = (char *)REALLOC(rassoc, (unsigned)maxrules * sizeof(char));
    NO_SPACE(rassoc);
}

static void
advance_to_start(void)
{
    int c;
    bucket *bp;
    char *s_cptr;
    int s_lineno;

    for (;;)
    {
	c = nextc();
	if (c != '%')
	    break;
	s_cptr = cptr;
	switch (keyword())
	{
	case MARK:
	    no_grammar();

	case TEXT:
	    copy_text();
	    break;

	case START:
	    declare_start();
	    break;

	default:
	    syntax_error(lineno, line, s_cptr);
	}
    }

    c = nextc();
    if (!isalpha(c) && c != '_' && c != '.' && c != '_')
	syntax_error(lineno, line, cptr);
    bp = get_name();
    if (goal == 0)
    {
	if (bp->class == TERM)
	    terminal_start(bp->name);
	goal = bp;
    }

    s_lineno = lineno;
    c = nextc();
    if (c == EOF)
	unexpected_EOF();
    if (c != ':')
	syntax_error(lineno, line, cptr);
    start_rule(bp, s_lineno);
    ++cptr;
}

static void
start_rule(bucket *bp, int s_lineno)
{
    if (bp->class == TERM)
	terminal_lhs(s_lineno);
    bp->class = NONTERM;
    if (nrules >= maxrules)
	expand_rules();
    plhs[nrules] = bp;
    rprec[nrules] = UNDEFINED;
    rassoc[nrules] = TOKEN;
}

static void
end_rule(void)
{
    int i;

    if (!last_was_action && plhs[nrules]->tag)
    {
	if (pitem[nitems - 1])
	{
	    for (i = nitems - 1; (i > 0) && pitem[i]; --i)
		continue;
	    if (pitem[i + 1] == 0 || pitem[i + 1]->tag != plhs[nrules]->tag)
		default_action_warning();
	}
	else
	{
	    default_action_warning();
	}
    }

    last_was_action = 0;
    if (nitems >= maxitems)
	expand_items();
    pitem[nitems] = 0;
    ++nitems;
    ++nrules;
}

static void
insert_empty_rule(void)
{
    bucket *bp, **bpp;

    assert(cache);
    sprintf(cache, "$$%d", ++gensym);
    bp = make_bucket(cache);
    last_symbol->next = bp;
    last_symbol = bp;
    bp->tag = plhs[nrules]->tag;
    bp->class = NONTERM;

    if ((nitems += 2) > maxitems)
	expand_items();
    bpp = pitem + nitems - 1;
    *bpp-- = bp;
    while ((bpp[0] = bpp[-1]) != 0)
	--bpp;

    if (++nrules >= maxrules)
	expand_rules();
    plhs[nrules] = plhs[nrules - 1];
    plhs[nrules - 1] = bp;
    rprec[nrules] = rprec[nrules - 1];
    rprec[nrules - 1] = 0;
    rassoc[nrules] = rassoc[nrules - 1];
    rassoc[nrules - 1] = TOKEN;
}

static void
add_symbol(void)
{
    int c;
    bucket *bp;
    int s_lineno = lineno;

    c = *cptr;
    if (c == '\'' || c == '"')
	bp = get_literal();
    else
	bp = get_name();

    c = nextc();
    if (c == ':')
    {
	end_rule();
	start_rule(bp, s_lineno);
	++cptr;
	return;
    }

    if (last_was_action)
	insert_empty_rule();
    last_was_action = 0;

    if (++nitems > maxitems)
	expand_items();
    pitem[nitems - 1] = bp;
}

static char *
after_blanks(char *s)
{
    while (*s != '\0' && isspace(UCH(*s)))
	++s;
    return s;
}

static void
copy_action(void)
{
    int c;
    int i, n;
    int depth;
    int quote;
    char *tag;
    FILE *f = action_file;
    int a_lineno = lineno;
    char *a_line = dup_line();
    char *a_cptr = a_line + (cptr - line);

    if (last_was_action)
	insert_empty_rule();
    last_was_action = 1;

    fprintf(f, "case %d:\n", nrules - 2);
    if (!lflag)
	fprintf(f, line_format, lineno, input_file_name);
    if (*cptr == '=')
	++cptr;

    /* avoid putting curly-braces in first column, to ease editing */
    if (*after_blanks(cptr) == L_CURL)
    {
	putc('\t', f);
	cptr = after_blanks(cptr);
    }

    n = 0;
    for (i = nitems - 1; pitem[i]; --i)
	++n;

    depth = 0;
  loop:
    c = *cptr;
    if (c == '$')
    {
	if (cptr[1] == '<')
	{
	    int d_lineno = lineno;
	    char *d_line = dup_line();
	    char *d_cptr = d_line + (cptr - line);

	    ++cptr;
	    tag = get_tag();
	    c = *cptr;
	    if (c == '$')
	    {
		fprintf(f, "yyval.%s", tag);
		++cptr;
		FREE(d_line);
		goto loop;
	    }
	    else if (isdigit(c))
	    {
		i = get_number();
		if (i > n)
		    dollar_warning(d_lineno, i);
		fprintf(f, "yystack.l_mark[%d].%s", i - n, tag);
		FREE(d_line);
		goto loop;
	    }
	    else if (c == '-' && isdigit(UCH(cptr[1])))
	    {
		++cptr;
		i = -get_number() - n;
		fprintf(f, "yystack.l_mark[%d].%s", i, tag);
		FREE(d_line);
		goto loop;
	    }
	    else
		dollar_error(d_lineno, d_line, d_cptr);
	}
	else if (cptr[1] == '$')
	{
	    if (ntags)
	    {
		tag = plhs[nrules]->tag;
		if (tag == 0)
		    untyped_lhs();
		fprintf(f, "yyval.%s", tag);
	    }
	    else
		fprintf(f, "yyval");
	    cptr += 2;
	    goto loop;
	}
	else if (isdigit(UCH(cptr[1])))
	{
	    ++cptr;
	    i = get_number();
	    if (ntags)
	    {
		if (i <= 0 || i > n)
		    unknown_rhs(i);
		tag = pitem[nitems + i - n - 1]->tag;
		if (tag == 0)
		    untyped_rhs(i, pitem[nitems + i - n - 1]->name);
		fprintf(f, "yystack.l_mark[%d].%s", i - n, tag);
	    }
	    else
	    {
		if (i > n)
		    dollar_warning(lineno, i);
		fprintf(f, "yystack.l_mark[%d]", i - n);
	    }
	    goto loop;
	}
	else if (cptr[1] == '-')
	{
	    cptr += 2;
	    i = get_number();
	    if (ntags)
		unknown_rhs(-i);
	    fprintf(f, "yystack.l_mark[%d]", -i - n);
	    goto loop;
	}
    }
    if (isalpha(c) || c == '_' || c == '$')
    {
	do
	{
	    putc(c, f);
	    c = *++cptr;
	}
	while (isalnum(c) || c == '_' || c == '$');
	goto loop;
    }
    putc(c, f);
    ++cptr;
    switch (c)
    {
    case '\n':
      next_line:
	get_line();
	if (line)
	    goto loop;
	unterminated_action(a_lineno, a_line, a_cptr);

    case ';':
	if (depth > 0)
	    goto loop;
	fprintf(f, "\nbreak;\n");
	free(a_line);
	return;

    case L_CURL:
	++depth;
	goto loop;

    case R_CURL:
	if (--depth > 0)
	    goto loop;
	fprintf(f, "\nbreak;\n");
	free(a_line);
	return;

    case '\'':
    case '"':
	{
	    int s_lineno = lineno;
	    char *s_line = dup_line();
	    char *s_cptr = s_line + (cptr - line - 1);

	    quote = c;
	    for (;;)
	    {
		c = *cptr++;
		putc(c, f);
		if (c == quote)
		{
		    FREE(s_line);
		    goto loop;
		}
		if (c == '\n')
		    unterminated_string(s_lineno, s_line, s_cptr);
		if (c == '\\')
		{
		    c = *cptr++;
		    putc(c, f);
		    if (c == '\n')
		    {
			get_line();
			if (line == 0)
			    unterminated_string(s_lineno, s_line, s_cptr);
		    }
		}
	    }
	}

    case '/':
	c = *cptr;
	if (c == '/')
	{
	    putc('*', f);
	    while ((c = *++cptr) != '\n')
	    {
		if (c == '*' && cptr[1] == '/')
		    fprintf(f, "* ");
		else
		    putc(c, f);
	    }
	    fprintf(f, "*/\n");
	    goto next_line;
	}
	if (c == '*')
	{
	    int c_lineno = lineno;
	    char *c_line = dup_line();
	    char *c_cptr = c_line + (cptr - line - 1);

	    putc('*', f);
	    ++cptr;
	    for (;;)
	    {
		c = *cptr++;
		putc(c, f);
		if (c == '*' && *cptr == '/')
		{
		    putc('/', f);
		    ++cptr;
		    FREE(c_line);
		    goto loop;
		}
		if (c == '\n')
		{
		    get_line();
		    if (line == 0)
			unterminated_comment(c_lineno, c_line, c_cptr);
		}
	    }
	}
	goto loop;

    default:
	goto loop;
    }
}

static int
mark_symbol(void)
{
    int c;
    bucket *bp;

    c = cptr[1];
    if (c == '%' || c == '\\')
    {
	cptr += 2;
	return (1);
    }

    if (c == '=')
	cptr += 2;
    else if ((c == 'p' || c == 'P') &&
	     ((c = cptr[2]) == 'r' || c == 'R') &&
	     ((c = cptr[3]) == 'e' || c == 'E') &&
	     ((c = cptr[4]) == 'c' || c == 'C') &&
	     ((c = cptr[5], !IS_IDENT(c))))
	cptr += 5;
    else
	syntax_error(lineno, line, cptr);

    c = nextc();
    if (isalpha(c) || c == '_' || c == '.' || c == '$')
	bp = get_name();
    else if (c == '\'' || c == '"')
	bp = get_literal();
    else
    {
	syntax_error(lineno, line, cptr);
	/*NOTREACHED */
    }

    if (rprec[nrules] != UNDEFINED && bp->prec != rprec[nrules])
	prec_redeclared();

    rprec[nrules] = bp->prec;
    rassoc[nrules] = bp->assoc;
    return (0);
}

static void
read_grammar(void)
{
    int c;

    initialize_grammar();
    advance_to_start();

    for (;;)
    {
	c = nextc();
	if (c == EOF)
	    break;
	if (isalpha(c)
	    || c == '_'
	    || c == '.'
	    || c == '$'
	    || c == '\''
	    || c == '"')
	    add_symbol();
	else if (c == L_CURL || c == '=')
	    copy_action();
	else if (c == '|')
	{
	    end_rule();
	    start_rule(plhs[nrules - 1], 0);
	    ++cptr;
	}
	else if (c == '%')
	{
	    if (mark_symbol())
		break;
	}
	else
	    syntax_error(lineno, line, cptr);
    }
    end_rule();
}

static void
free_tags(void)
{
    int i;

    if (tag_table == 0)
	return;

    for (i = 0; i < ntags; ++i)
    {
	assert(tag_table[i]);
	FREE(tag_table[i]);
    }
    FREE(tag_table);
}

static void
pack_names(void)
{
    bucket *bp;
    char *p, *s, *t;

    name_pool_size = 13;	/* 13 == sizeof("$end") + sizeof("$accept") */
    for (bp = first_symbol; bp; bp = bp->next)
	name_pool_size += strlen(bp->name) + 1;

    name_pool = MALLOC(name_pool_size);
    NO_SPACE(name_pool);

    strcpy(name_pool, "$accept");
    strcpy(name_pool + 8, "$end");
    t = name_pool + 13;
    for (bp = first_symbol; bp; bp = bp->next)
    {
	p = t;
	s = bp->name;
	while ((*t++ = *s++) != 0)
	    continue;
	FREE(bp->name);
	bp->name = p;
    }
}

static void
check_symbols(void)
{
    bucket *bp;

    if (goal->class == UNKNOWN)
	undefined_goal(goal->name);

    for (bp = first_symbol; bp; bp = bp->next)
    {
	if (bp->class == UNKNOWN)
	{
	    undefined_symbol_warning(bp->name);
	    bp->class = TERM;
	}
    }
}

static void
protect_string(char *src, char **des)
{
    unsigned len;
    char *s;
    char *d;

    *des = src;
    if (src)
    {
	len = 1;
	s = src;
	while (*s)
	{
	    if ('\\' == *s || '"' == *s)
		len++;
	    s++;
	    len++;
	}

	*des = d = (char *)MALLOC(len);
	NO_SPACE(d);

	s = src;
	while (*s)
	{
	    if ('\\' == *s || '"' == *s)
		*d++ = '\\';
	    *d++ = *s++;
	}
	*d = '\0';
    }
}

static void
pack_symbols(void)
{
    bucket *bp;
    bucket **v;
    Value_t i, j, k, n;

    nsyms = 2;
    ntokens = 1;
    for (bp = first_symbol; bp; bp = bp->next)
    {
	++nsyms;
	if (bp->class == TERM)
	    ++ntokens;
    }
    start_symbol = (Value_t) ntokens;
    nvars = nsyms - ntokens;

    symbol_name = (char **)MALLOC((unsigned)nsyms * sizeof(char *));
    NO_SPACE(symbol_name);

    symbol_value = (short *)MALLOC((unsigned)nsyms * sizeof(short));
    NO_SPACE(symbol_value);

    symbol_prec = (short *)MALLOC((unsigned)nsyms * sizeof(short));
    NO_SPACE(symbol_prec);

    symbol_assoc = MALLOC(nsyms);
    NO_SPACE(symbol_assoc);

    v = (bucket **)MALLOC((unsigned)nsyms * sizeof(bucket *));
    NO_SPACE(v);

    v[0] = 0;
    v[start_symbol] = 0;

    i = 1;
    j = (Value_t) (start_symbol + 1);
    for (bp = first_symbol; bp; bp = bp->next)
    {
	if (bp->class == TERM)
	    v[i++] = bp;
	else
	    v[j++] = bp;
    }
    assert(i == ntokens && j == nsyms);

    for (i = 1; i < ntokens; ++i)
	v[i]->index = i;

    goal->index = (Index_t) (start_symbol + 1);
    k = (Value_t) (start_symbol + 2);
    while (++i < nsyms)
	if (v[i] != goal)
	{
	    v[i]->index = k;
	    ++k;
	}

    goal->value = 0;
    k = 1;
    for (i = (Value_t) (start_symbol + 1); i < nsyms; ++i)
    {
	if (v[i] != goal)
	{
	    v[i]->value = k;
	    ++k;
	}
    }

    k = 0;
    for (i = 1; i < ntokens; ++i)
    {
	n = v[i]->value;
	if (n > 256)
	{
	    for (j = k++; j > 0 && symbol_value[j - 1] > n; --j)
		symbol_value[j] = symbol_value[j - 1];
	    symbol_value[j] = n;
	}
    }

    assert(v[1] != 0);

    if (v[1]->value == UNDEFINED)
	v[1]->value = 256;

    j = 0;
    n = 257;
    for (i = 2; i < ntokens; ++i)
    {
	if (v[i]->value == UNDEFINED)
	{
	    while (j < k && n == symbol_value[j])
	    {
		while (++j < k && n == symbol_value[j])
		    continue;
		++n;
	    }
	    v[i]->value = n;
	    ++n;
	}
    }

    symbol_name[0] = name_pool + 8;
    symbol_value[0] = 0;
    symbol_prec[0] = 0;
    symbol_assoc[0] = TOKEN;
    for (i = 1; i < ntokens; ++i)
    {
	symbol_name[i] = v[i]->name;
	symbol_value[i] = v[i]->value;
	symbol_prec[i] = v[i]->prec;
	symbol_assoc[i] = v[i]->assoc;
    }
    symbol_name[start_symbol] = name_pool;
    symbol_value[start_symbol] = -1;
    symbol_prec[start_symbol] = 0;
    symbol_assoc[start_symbol] = TOKEN;
    for (++i; i < nsyms; ++i)
    {
	k = v[i]->index;
	symbol_name[k] = v[i]->name;
	symbol_value[k] = v[i]->value;
	symbol_prec[k] = v[i]->prec;
	symbol_assoc[k] = v[i]->assoc;
    }

    if (gflag)
    {
	symbol_pname = (char **)MALLOC((unsigned)nsyms * sizeof(char *));
	NO_SPACE(symbol_pname);

	for (i = 0; i < nsyms; ++i)
	    protect_string(symbol_name[i], &(symbol_pname[i]));
    }

    FREE(v);
}

static void
pack_grammar(void)
{
    int i;
    Value_t j;
    Assoc_t assoc;
    Value_t prec2;

    ritem = (short *)MALLOC((unsigned)nitems * sizeof(short));
    NO_SPACE(ritem);

    rlhs = (short *)MALLOC((unsigned)nrules * sizeof(short));
    NO_SPACE(rlhs);

    rrhs = (short *)MALLOC((unsigned)(nrules + 1) * sizeof(short));
    NO_SPACE(rrhs);

    rprec = (short *)REALLOC(rprec, (unsigned)nrules * sizeof(short));
    NO_SPACE(rprec);

    rassoc = REALLOC(rassoc, nrules);
    NO_SPACE(rassoc);

    ritem[0] = -1;
    ritem[1] = goal->index;
    ritem[2] = 0;
    ritem[3] = -2;
    rlhs[0] = 0;
    rlhs[1] = 0;
    rlhs[2] = start_symbol;
    rrhs[0] = 0;
    rrhs[1] = 0;
    rrhs[2] = 1;

    j = 4;
    for (i = 3; i < nrules; ++i)
    {
	rlhs[i] = plhs[i]->index;
	rrhs[i] = j;
	assoc = TOKEN;
	prec2 = 0;
	while (pitem[j])
	{
	    ritem[j] = pitem[j]->index;
	    if (pitem[j]->class == TERM)
	    {
		prec2 = pitem[j]->prec;
		assoc = pitem[j]->assoc;
	    }
	    ++j;
	}
	ritem[j] = (Value_t) - i;
	++j;
	if (rprec[i] == UNDEFINED)
	{
	    rprec[i] = prec2;
	    rassoc[i] = assoc;
	}
    }
    rrhs[i] = j;

    FREE(plhs);
    FREE(pitem);
}

static void
print_grammar(void)
{
    int i, k;
    size_t j, spacing = 0;
    FILE *f = verbose_file;

    if (!vflag)
	return;

    k = 1;
    for (i = 2; i < nrules; ++i)
    {
	if (rlhs[i] != rlhs[i - 1])
	{
	    if (i != 2)
		fprintf(f, "\n");
	    fprintf(f, "%4d  %s :", i - 2, symbol_name[rlhs[i]]);
	    spacing = strlen(symbol_name[rlhs[i]]) + 1;
	}
	else
	{
	    fprintf(f, "%4d  ", i - 2);
	    j = spacing;
	    while (j-- != 0)
		putc(' ', f);
	    putc('|', f);
	}

	while (ritem[k] >= 0)
	{
	    fprintf(f, " %s", symbol_name[ritem[k]]);
	    ++k;
	}
	++k;
	putc('\n', f);
    }
}

void
reader(void)
{
    write_section(code_file, banner);
    create_symbol_table();
    read_declarations();
    read_grammar();
    free_symbol_table();
    free_tags();
    pack_names();
    check_symbols();
    pack_symbols();
    pack_grammar();
    free_symbols();
    print_grammar();
}

#ifdef NO_LEAKS
static param *
free_declarations(param * list)
{
    while (list != 0)
    {
	param *next = list->next;
	free(list->type);
	free(list->name);
	free(list->type2);
	free(list);
	list = next;
    }
    return list;
}

void
reader_leaks(void)
{
    lex_param = free_declarations(lex_param);
    parse_param = free_declarations(parse_param);

    DO_FREE(line);
    DO_FREE(rrhs);
    DO_FREE(rlhs);
    DO_FREE(rprec);
    DO_FREE(ritem);
    DO_FREE(rassoc);
    DO_FREE(cache);
    DO_FREE(name_pool);
    DO_FREE(symbol_name);
    DO_FREE(symbol_prec);
    DO_FREE(symbol_assoc);
    DO_FREE(symbol_value);
}
#endif
