/*
 * Copyright (c) 2003-2011
 * Distributed Systems Software.  All rights reserved.
 * See the file LICENSE for redistribution information.
 */

/*****************************************************************************
 * COPYRIGHT AND PERMISSION NOTICE
 * 
 * Copyright (c) 2001-2003 The Queen in Right of Canada
 * 
 * All rights reserved.
 * 
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to
 * deal in the Software without restriction, including without limitation 
 * the rights to use, copy, modify, merge, publish, distribute, and/or sell
 * copies of the Software, and to permit persons to whom the Software is 
 * furnished to do so, provided that the above copyright notice(s) and this
 * permission notice appear in all copies of the Software and that both the
 * above copyright notice(s) and this permission notice appear in supporting
 * documentation.
 * 
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS.
 * IN NO EVENT SHALL THE COPYRIGHT HOLDER OR HOLDERS INCLUDED IN THIS NOTICE 
 * BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES,
 * OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
 * WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, 
 * ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS 
 * SOFTWARE.
 * 
 * Except as contained in this notice, the name of a copyright holder shall not
 * be used in advertising or otherwise to promote the sale, use or other
 * dealings in this Software without prior written authorization of the
 * copyright holder.
 ***************************************************************************/

/*
 * Kwv - a library for handling keyword=value text strings
 * It can be especially useful when dealing with environment variables and
 * configuration files.
 *
 * The data structure is intended to be built up incrementally and eventually
 * freed all at once.
 * The data structure looks something like this:
 *
 * ptr_0 --> keywordA/keyvalue --> keywordA/keyvalue --> ...
 * ptr_1 --> keywordB/keyvalue --> ...
 * ptr_2 --> ...
 * NULL
 * ...
 *
 * The vector of pointers is allocated using malloc() and expanded as
 * necesary using realloc().
 * Lookup is a linear search, which is sufficient so far.
 *
 * We generally assume that malloc() etc. are being called indirectly and
 * cannot return a failure.
 *
 * Additions?
 * kwv_split(kwv, string, sep) -- a=b&c=d&e=f
 */

#ifndef lint
static const char copyright[] =
"Copyright (c) 2003-2011\n\
Distributed Systems Software.  All rights reserved.";
static const char revid[] =
  "$Id: kwv.c 2521 2011-09-23 18:44:00Z brachman $";
#endif

#include "kwv.h"
#include "str.h"
#include "misc.h"

#ifdef DEBUG_KWV
static void
VALIDATE(Kwv *kwv, char *op)
{

  if (kwv_validate(kwv, op, 1) == -1)
	abort();
}
#else
#define VALIDATE(K, OP)
#endif

typedef enum {
  DO_COPY = 0,
  NO_COPY = 1
} copy_t;

static void
cpyn(char *a, char *b, int n)
{
  register int i;

  for (i = 0; i < n; i++) {
	if ((*a++ = *b++) == '\0')
	  return;
  }
  *a = '\0';
}

Kwv *
kwv_init(unsigned int init_nalloc)
{
  Kwv *kwv;

  if ((kwv = ALLOC(Kwv)) == NULL)
	return(NULL);

  kwv->npairs = 0;
  kwv->nused = 0;
  kwv->clear_flag = 0;
  kwv->dup_mode = KWV_ALLOW_DUPS;
  kwv->icase = 0;
  kwv->regex_flag = 0;
  kwv->prepend_flag = 0;
  kwv->error_msg = NULL;

  if ((kwv->pairs = ALLOC_N(Kwv_pair *, init_nalloc)) != NULL) {
	unsigned int i;

	for (i = 0; i < init_nalloc; i++)
	  kwv->pairs[i] = NULL;
	kwv->nalloc = init_nalloc;
	VALIDATE(kwv, "after init");
	return(kwv);
  }

  free(kwv);
  return(NULL);
}

static int
kwv_expand(Kwv *kwv)
{
  int i, n;
  Kwv_pair **new_pairs;

  if (kwv == NULL)
	return(-1);

  VALIDATE(kwv, "before expand");

  n = kwv->nalloc * 2;
  new_pairs = (Kwv_pair **) realloc(kwv->pairs, sizeof(Kwv_pair *) * n);
  if (new_pairs == NULL) {
	kwv->error_msg = "realloc failed";
	return(-1);
  }
  kwv->pairs = new_pairs;

  for (i = kwv->nalloc; i < n; i++)
	kwv->pairs[i] = NULL;
  kwv->nalloc = n;

  VALIDATE(kwv, "after expand");
  return(0);
}

static int
lookup_regex_from_slot(Kwv *kwv, regex_t *regex, unsigned int start,
					   unsigned int *slot)
{
  int st;
  unsigned int i;

  if (kwv == NULL || kwv->pairs == NULL || kwv->npairs == 0)
	return(0);

  for (i = start; i < kwv->nused; i++) {
	if (kwv->pairs[i] == NULL)
	  continue;

	if ((st = regexec(regex, kwv->pairs[i]->name, 0, NULL, 0)) == 0) {
	  if (slot != NULL)
		*slot = i;
	  return(1);
	}

	if (st != REG_NOMATCH) {
	  char errbuf[100];

	  errbuf[0] = '\0';
	  regerror(st, regex, errbuf, sizeof(errbuf));
	  kwv->error_msg = strdup(errbuf);
	  return(-1);
	}
  }

  return(0);
}

static int
lookup_key_from_slot(Kwv *kwv, char *key, unsigned int start,
					 unsigned int *slot)
{
  unsigned int i;

  VALIDATE(kwv, "lookup key from slot");

  if (key == NULL || key[0] == '\0')
	return(0);

  if (kwv == NULL || kwv->pairs == NULL || kwv->npairs == 0)
	return(0);

  for (i = start; i < kwv->nused; i++) {
	if (kwv->pairs[i] == NULL)
	  continue;

	if ((kwv->icase && strcaseeq(key, kwv->pairs[i]->name))
		|| streq(key, kwv->pairs[i]->name)) {
	  if (slot != NULL)
		*slot = i;
	  return(1);
	}
  }

  return(0);
}

static int
lookup_key(Kwv *kwv, char *key, unsigned int *slot)
{

  return(lookup_key_from_slot(kwv, key, 0, slot));
}

int
kwv_get_mode(Kwv *kwv)
{
  int mode;

  mode = 0;
  mode |= (kwv->clear_flag) ? KWV_CLEAR_MODE : 0;
  mode |= (kwv->regex_flag) ? KWV_REGEX_MODE : 0;
  mode |= (kwv->prepend_flag) ? KWV_PREPEND_MODE : 0;
  mode |= (kwv->icase) ? KWV_ICASE_MODE : 0;
  mode |= (kwv->dup_mode == KWV_NO_DUPS) ? KWV_NODUPS_MODE : 0;
  mode |= (kwv->dup_mode == KWV_ALLOW_DUPS) ? KWV_ALLOWDUPS_MODE : 0;
  mode |= (kwv->dup_mode == KWV_REPLACE_DUPS) ? KWV_REPLACEDUPS_MODE : 0;

  return(mode);
}

/*
 * Syntax {{d[anr]} | {{+|-}flag}}*
 *
 * + --> enable flag
 * - --> disable flag
 * toggle flags:
 * c: clear mode
 * d: duplicate mode (allow or not allow)
 * i: ignore case
 * r: regex mode
 *
 * multi-mode flags (not preceded by + or -):
 * d: duplicate handling mode
 *  da=allow,dn=not allow,dr=replace
 *
 * Return the mode bits after the changes, or -1 (an invalid mode).
 *
 * XXX This API is a bit wonky and so is subject to change
 */
int
kwv_set_mode(Kwv *kwv, char *modestr)
{
  int set;
  char *p;

  for (p = modestr; *p != '\0'; p++) {
	if (*p == '+') {
	  set = 1;
	  p++;
	}
	else if (*p == '-') {
	  set = 0;
	  p++;
	}
	else if (*p == 'd') {
	  p++;
	  switch ((int) *p) {
	  case 'a':
		kwv->dup_mode = KWV_ALLOW_DUPS;
		break;
	  case 'n':
		kwv->dup_mode = KWV_NO_DUPS;
		break;
	  case 'r':
		kwv->dup_mode = KWV_REPLACE_DUPS;
		break;
	  default:
		return(-1);
		break;
	  }
	  continue;
	}
	else
	  return(-1);

	switch ((int) *p) {
	case 'c':
	  kwv->clear_flag = (set != 0) ? 1 : 0;
	  break;
	case 'd':
	  kwv->dup_mode = (set != 0) ? KWV_ALLOW_DUPS : KWV_NO_DUPS;
	  break;
	case 'i':
	  kwv->icase = (set != 0) ? 1 : 0;
	  break;
	case 'r':
	  kwv->regex_flag = (set != 0) ? 1 : 0;
	  break;
	case 'p':
	  kwv->prepend_flag = (set != 0) ? 1 : 0;
	  break;
    default:
	  return(-1);
	  /*NOTREACHED*/
	  break;
	}
  }

  return(kwv_get_mode(kwv));
}

Kwv_pair *
kwv_lookup(Kwv *kwv, char *key)
{
  unsigned int slot;

  if (kwv == NULL)
	return(NULL);

  kwv->error_msg = NULL;
  if (lookup_key(kwv, key, &slot) == 0)
	return(NULL);

  return(kwv->pairs[slot]);
}

/*
 * Lookup a keyword name and returns its value.
 * If the name or value is missing, return NULL.
 */
char *
kwv_lookup_value(Kwv *kwv, char *key)
{
  unsigned int slot;
  Kwv_pair *s;

  if (kwv == NULL)
	return(NULL);
  kwv->error_msg = NULL;
  if (lookup_key(kwv, key, &slot) == 0)
	return(NULL);

  s = kwv->pairs[slot];
  /*
   * XXX I'm not sure why the empty string is considered invalid but it is.
   */
  if (s == NULL || s->val == NULL || s->val[0] == '\0')
	return(NULL);

  return(s->val);
}

int
kwv_lookup_strnum(Kwv *kwv, char *key, Strnum type, void *value)
{
  int st;
  char *val;

  if ((val = kwv_lookup_value(kwv, key)) == NULL)
	return(-1);
  st = strnum(val, type, value);

  return(st);
}

/*
 * Like kwv_lookup() except that if the name has not been assigned a value
 * or if the value is the empty string, return the empty string.
 * If the name is missing, return NULL.
 * Otherwise, return the value;
 */
char *
kwv_lookup_value_null(Kwv *kwv, char *key)
{
  unsigned int slot;
  Kwv_pair *s;

  if (kwv == NULL)
	return(NULL);
  kwv->error_msg = NULL;
  if (lookup_key(kwv, key, &slot) == 0)
	return(NULL);

  s = kwv->pairs[slot];
  if (s == NULL)
	return(NULL);
  if (s->val == NULL || *s->val == '\0')
	return("");

  return(s->val);
}

/*
 * Delete each value of the given key.
 * Note that the number of pairs (npairs) is updated, but not the
 * number of vector pairs (nused), so the caller must do this if it's required.
 * The data structure may therefore be left in an inconsistent state
 * (e.g., with slot kwv->pair[x] pointing to a freed pair).
 */
static void
delete_pair(Kwv *kwv, Kwv_pair *head)
{
  Kwv_pair *pair, *v;

  for (pair = head; pair != NULL; pair = v) {
	v = pair->next;

	if (pair->name != NULL) {
	  if (kwv->clear_flag)
		strzap(pair->name);
	  free(pair->name);
	  pair->name = NULL;
	}

	if (pair->val != NULL) {
	  if (kwv->clear_flag)
		strzap(pair->val);
	  free(pair->val);
	  pair->val = NULL;
	}
	free(pair);
	kwv->npairs--;
  }

}

static Kwv_pair *
make_pair(Kwv_pair *pair, char *key, char *val, void *xval, copy_t copy)
{
  Kwv_pair *np;

  if (pair != NULL)
	np = pair;
  else {
	if ((np = ALLOC(Kwv_pair)) == NULL)
	  return(NULL);
  }

  if (copy == DO_COPY) {
	np->name = (key != NULL) ? strdup(key) : NULL;
	np->val = (val != NULL) ? strdup(val) : NULL;
  }
  else {
	np->name = key;
	np->val = val;
  }

  np->xval = xval;		/* Not copied... */
  np->next = NULL;

  return(np);
}

static Kwv_pair *
copy_pair_list(Kwv_pair *ohead, copy_t copy, int *list_len)
{
  Kwv_pair *nhead, *np, *nprev, *op;

  nhead = nprev = NULL;
  *list_len = 0;
  for (op = ohead; op != NULL; op = op->next) {
	if ((np = make_pair(NULL, op->name, op->val, op->xval, copy)) == NULL)
	  return(NULL);
    if (nhead == NULL)
      nhead = np;
    else
      nprev->next = np;
    nprev = np;
	(*list_len)++;
  }

  return(nhead);
}

static int
append_to_pair_list(Kwv_pair **headp, Kwv_pair *np)
{
  int c;
  Kwv_pair *pair, **ptr;

  if (headp == NULL || np == NULL)
    return(-1);

  for (ptr = headp; *ptr != NULL; ptr = &(*ptr)->next)
	;

  *ptr = np;

  c = 1;
  for (pair = np; pair->next != NULL; pair = pair->next)
	c++;

  return(c);
}

static int
prepend_to_pair_list(Kwv_pair **headp, Kwv_pair *np)
{
  int c;
  Kwv_pair *ptr;

  if (headp == NULL || np == NULL)
    return(-1);

  /* Find the end of the list to be prepended. */
  c = 1;
  for (ptr = np; ptr->next != NULL; ptr = ptr->next)
	c++;

  ptr->next = *headp;
  *headp = np;

  return(c);
}

/*
 * Add the pair list OP to K (or a new kwv).
 */
static Kwv *
add_pair_list(Kwv *k, Kwv_pair *op, copy_t copy)
{
  int exists, len;
  unsigned int slot;
  Kwv *kwv;
  Kwv_pair *np;

  if (op == NULL || op->name == NULL || op->name[0] == '\0')
	return(NULL);

  if (k == NULL) {
	/* XXX the initial allocation should be divined somehow. */
	if ((kwv = kwv_init(8)) == NULL)
	  return(NULL);
  }
  else
	kwv = k;

  VALIDATE(kwv, "before add pair list");
  kwv->error_msg = NULL;
  exists = lookup_key(k, op->name, &slot);

  if (exists && k->dup_mode == KWV_NO_DUPS) {
	kwv->error_msg = ds_xprintf("attempt to add a duplicate key: \"%s\"",
								op->name);
	return(NULL);
  }

  if (exists && k->dup_mode == KWV_REPLACE_DUPS) {
	/* Nuke all values of this key, then reuse the slot in the vector. */
	delete_pair(kwv, kwv->pairs[slot]);
	kwv->pairs[slot] = copy_pair_list(op, DO_COPY, &len);
	kwv->npairs += len;
	VALIDATE(kwv, "after add/replace pair list");
	return(kwv);
  }

  if (kwv->nused == kwv->nalloc) {
	if (kwv_expand(kwv) == -1)
	  return(NULL);		/* XXX */
  }

  np = copy_pair_list(op, copy, &len);

  if (exists) {
	if (kwv->prepend_flag)
	  prepend_to_pair_list(&kwv->pairs[slot], np);
	else
	  append_to_pair_list(&kwv->pairs[slot], np);
  }
  else {
	kwv->pairs[kwv->nused] = np;
	kwv->nused++;
  }

  kwv->npairs += len;

  VALIDATE(kwv, "after add pair list");
  return(kwv);
}

#ifdef NOTDEF
/*
 * This is the central function for adding/replacing a key/value pair.
 */
static Kwv *
add(Kwv *k, Kwv_pair *np)
{
  int exists;
  unsigned int slot;
  Kwv *kwv;

  if (np->key == NULL || np->key[0] == '\0')
	return(NULL);

  if (k == NULL) {
	/* XXX the initial allocation should be divined somehow. */
	if ((kwv = kwv_init(8)) == NULL)
	  return(NULL);
  }
  else
	kwv = k;

  VALIDATE(kwv, "before add");
  kwv->error_msg = NULL;
  exists = lookup_key(kwv, np->key, &slot);

  if (exists) {
	if (k->dup_mode == KWV_NO_DUPS) {
	  kwv->error_msg = ds_xprintf("attempt to add a duplicate key: \"%s\"",
								  np->key);
	  return(NULL);
	}

	if (k->dup_mode == KWV_REPLACE_DUPS) {
	  /* Nuke all values of this key, then reuse the slot in the vector. */
	  delete_pair(kwv, kwv->pairs[slot]);
	  /* XXX is a copy of NP needed?? */
	  kwv->pairs[slot] = np;
	  kwv->npairs++;
	  VALIDATE(kwv, "after add/replace");
	  return(kwv);
	}
  }
  
  if (kwv->nused == kwv->nalloc) {
	if (kwv_expand(kwv) == -1)
	  return(NULL);		/* XXX */
  }

  if ((np = make_pair(NULL, key, val, xval, copy)) == NULL) {
	kwv->error_msg = "make_pair failed";
	return(NULL);
  }

  if (exists) {
	if (kwv->prepend_flag)
	  prepend_to_pair_list(&kwv->pairs[slot], np);
	else
	  append_to_pair_list(&kwv->pairs[slot], np);
  }
  else {
	kwv->pairs[kwv->nused] = np;
	kwv->nused++;
  }
  kwv->npairs++;

  VALIDATE(kwv, "after add");
  return(kwv);
}
#else
static Kwv *
add(Kwv *k, char *key, char *val, void *xval, copy_t copy)
{
  Kwv_pair pair;

  pair.name = key;
  pair.val = val;
  pair.xval = xval;
  pair.next = NULL;

  return(add_pair_list(k, &pair, copy));
}
#endif

Kwv *
kwv_make_new(char *str, Kwv_conf *conf)
{

  return(kwv_make_sep(NULL, str, conf));
}

Kwv *
kwv_make_add(Kwv *kwv, char *str, Kwv_conf *conf)
{

  return(kwv_make_sep(kwv, str, conf));
}

static inline char *
ws_skip(char *p, const Kwv_conf *conf)
{
  char *q;

  if (conf == NULL || conf->ws_pad_chars == NULL)
	return(p);

  for (q = p; strchr(conf->ws_pad_chars, (int) *q) != NULL; q++) {
	if (!isprint((int) *q))
		break;
  }

  return(q);
}

static inline int
eol_skip(char **p, char *quote, const Kwv_conf *conf)
{
  char *q;

  q = *p;

  while (*q != '\0'
		 && ((*q == '\n' || *q == '\r')
			 || (quote == NULL && conf->multi_mode != NULL
				 && strchr(conf->multi_mode, (int) *q) != NULL))) {
	q++;
  }

  if (q == *p)
	return(0);

  *p = q;
  return(1);
}

/*
 * Given a string with one keyword<sep>value per line, create a Kwv structure.
 * Ignore leading whitespace and lines whose first non-whitespace character
 * is a '#'.
 * The separator can be any printable character that cannot appear in a
 * keyword.
 */
Kwv *
kwv_make_sep(Kwv *k, char *str, Kwv_conf *user_conf)
{
  int in_quote;
  char *fstr, *key, *p, *quote, *sep, *sp, *space, *val;
  char *startp, *endp;
  const char *quote_chars;
  Ds *ds;
  Kwv *kwv;
  const Kwv_conf *conf;
  static const char *default_quote_chars = "'\"";
  static const Kwv_conf default_conf = {
	"=", "'\"", NULL, KWV_CONF_DEFAULT, NULL, 10, NULL, NULL
  };

  kwv = NULL;
  ds = NULL;
  space = NULL;

  if (user_conf == NULL)
	conf = &default_conf;
  else
	conf = user_conf;

  if (k == NULL) {
	unsigned int nalloc;

	if (conf->init_nalloc == 0)
	  nalloc = 10;		/* One guess is as good as another... */
	else
	  nalloc = conf->init_nalloc;
	if ((kwv = kwv_init(nalloc)) == NULL)
	  goto fail;
  }
  else
	kwv = k;

  kwv->error_msg = NULL;

  /* Default is for whitespace to separate the key from the value. */
  if ((sep = conf->sep) == NULL)
	sep = " \t";
  for (p = sep; *p != '\0'; p++) {
	if (!isprint((int) *p) && *p != '\t') {
	  kwv->error_msg = "invalid separator character";
	  goto fail;
	}

	/* XXX padding must also be printable */
	if (conf->ws_pad_chars != NULL
		&& strchr(conf->ws_pad_chars, *p) != NULL) {
	  kwv->error_msg = "separator cannot be a padding character";
	  goto fail;
	}
  }

  if (conf->quote_chars != NULL)
	quote_chars = conf->quote_chars;
  else
	quote_chars = default_quote_chars;

  ds = strfold(str);
  fstr = ds_buf(ds);

  space = (char *) malloc((unsigned) (strlen(fstr) + 1));
  if (space == NULL) {
	kwv->error_msg = "malloc failed";
	goto fail;
  }

  p = fstr;
  in_quote = 0;
  quote = NULL;

  while (*p != '\0') {
	/* Skip leading whitespace */
	while (*p != '\0' && (*p == ' ' || *p == '\t' || *p == '\n' || *p == '\r'))
	  p++;
	if (*p == '\0')
	  break;

	/* Ignore a comment */
	if (*p == '#') {
	  while (*p != '\n' && *p != '\0')
		p++;
	  continue;
	}

	/*
	 * Find the end of the keyword, which is marked by a non-printable
	 * character (an error) or a designated separator character
	 * which may be optionally surrounded by padding characters from
	 * ws_pad_chars.
	 * Note that the separator might be a space character.
	 */
	startp = p;
	while (isprint((int) *p) && strchr(sep, *p) == NULL
		   && ws_skip(p, conf) == p)
	  p++;
	endp = p;

	/*
	 * We've found the end of the keyword.
	 * If we're not looking at a separator, we must see some padding
	 * followed by a separator.
	 */
	if (*p == '\0')
	  goto fail;

	if (strchr(sep, *p) == NULL) {
	  p = ws_skip(p, conf);
	  if (strchr(sep, *p) == NULL)
		goto fail;
	}

	p++;
	if (conf->mode & KWV_CONF_MULTISEP) {
	  while (isprint((int) *p) && strchr(sep, (int) *p) != NULL)
		p++;
	}

	/* Skip any padding that follows the separator. */
	p = ws_skip(p, conf);

	sp = space;
	cpyn(sp, startp, endp - startp);

	key = sp;
	sp += (endp - startp) + 1;
	val = sp;

	if (*p != '\0' && (quote = strchr(quote_chars, *p)) != NULL) {
	  if (conf->mode & KWV_CONF_KEEPQ)
		*sp++ = *p;
	  p++;
	  in_quote = 1;
	}
	else {
	  /*
	   * If quote characters have been configured and they're required,
	   * but none is found, it's an error.
	   */
	  if (quote_chars != NULL && *quote_chars != '\0'
		  && (conf->mode & KWV_CONF_QUOTED))
		goto fail;
	  in_quote = 0;
	}

	while (*p != '\0' ) {
	  /*
	   * If it's the end of the line or, if not inside quotes, any of the
	   * multi_mode characters, then advance until we see something else
	   * and then continue with the next line.
	   */
	  if (eol_skip(&p, quote, conf))
		break;

	  if (*p == '\\' && *(p + 1) == '\0')
		goto fail;
	  /*
	   * The only thing that can be quoted (using a backslash) is the
	   * quote character.  Except for those cases, the backslash has no
	   * special interpretation.
	   *
	   * We used to recognize quoting of the backslash, but that led to
	   * problems in upper layers because they also handle backslashes.
	   * XXX this newer behaviour may need to be configurable but it seems
	   * ok so far
	   */
	  if (*p == '\\' && quote != NULL && *(p + 1) == *quote)
		p++;
#ifdef NOTDEF
	  else if (*p == '\\' && *(p + 1) == '\\')
		p++;
#else
	  else if (*p == '\\' && *(p + 1) == '\\')
		*sp++ = *p++;
#endif
	  else if (quote != NULL && *quote == *p) {
		if (conf->mode & KWV_CONF_KEEPQ)
		  *sp++ = *p;
		p++;
		if (!in_quote)
		  goto fail;
		in_quote = 0;
		if (eol_skip(&p, NULL, conf))
		  break;

		break;
	  }
	  *sp++ = *p++;
	}
	*sp++ = '\0';

	if (conf->eval_func != NULL) {
	  int st;
	  char *new_val;

	  st = (conf->eval_func)(key, val, conf, conf->eval_arg, &new_val);
	  if (st == -1)
		goto fail;
	  if (st != 0) {
		Kwv_pair pair;

		pair.name = key;
		pair.val = new_val;
		pair.xval = NULL;
		pair.next = NULL;

		if (add_pair_list(kwv, &pair, DO_COPY) == NULL)
		  goto fail;
	  }
	}
	else {
	  Kwv_pair pair;

	  pair.name = key;
	  pair.val = val;
	  pair.xval = NULL;
	  pair.next = NULL;

	  /* They are copied so that they can be individually freed and zeroed. */
	  if (add_pair_list(kwv, &pair, DO_COPY) == NULL)
		goto fail;
	}
  }

  if (in_quote)
	goto fail;

  free(space);
  return(kwv);

 fail:
  if (ds != NULL)
	ds_free(ds);
  if (space != NULL)
	free(space);
  if (k == NULL && kwv != NULL)
	kwv_free(kwv);

  return(NULL);
}

Kwv_pair *
kwv_new_pair(char *key, char *val, void *xval)
{
  Kwv_pair *pair;

  if (key == NULL || key[0] == '\0')
	return(NULL);

  pair = make_pair(NULL, key, val, xval, DO_COPY);

  return(pair);
}

Kwv_pair *
kwv_set_pair(Kwv_pair *pair, char *key, char *val, void *xval)
{
  Kwv_pair *np;

  if (key == NULL || key[0] == '\0')
	return(NULL);

  np = make_pair(pair, key, val, xval, DO_COPY);

  return(np);
}

/*
 * Create a new pair from KEY and VALUE and add it to an existing Kwv (or
 * create a new Kwv, if K == NULL).
 * If duplicates are allowed they are appended.
 */
Kwv *
kwv_add(Kwv *k, char *key, char *val)
{
  Kwv_pair pair;

  pair.name = key;
  pair.val = val;
  pair.xval = NULL;
  pair.next = NULL;

  return(add_pair_list(k, &pair, DO_COPY));
}

Kwv *
kwv_add_pair(Kwv *k, Kwv_pair *pair)
{

  return(add_pair_list(k, pair, DO_COPY));
}

Kwv *
kwv_add_pair_nocopy(Kwv *k, Kwv_pair *pair)
{

  return(add_pair_list(k, pair, NO_COPY));
}

Kwv *
kwv_add_nocopy(Kwv *k, char *key, char *val)
{
  Kwv_pair pair;

  pair.name = key;
  pair.val = val;
  pair.xval = NULL;
  pair.next = NULL;

  return(add_pair_list(k, &pair, NO_COPY));
}

static Kwv *
replace(Kwv *k, Kwv_pair *pair, copy_t copy)
{
  Kwv *kwv;
  Kwv_dup_mode dm;

  VALIDATE(kwv, "before replace");

  if (k != NULL) {
	kwv = k;
	kwv->error_msg = NULL;

	dm = (Kwv_dup_mode) kwv->dup_mode;
	kwv->dup_mode = KWV_REPLACE_DUPS;

	if (add_pair_list(kwv, pair, copy) == NULL) {
	  kwv->dup_mode = dm;
	  return(NULL);
	}

	kwv->dup_mode = dm;
  }
  else {
	if ((kwv = add_pair_list(NULL, pair, copy)) == NULL)
	  return(NULL);
  }

  VALIDATE(kwv, "after replace");
  return(kwv);
}

Kwv *
kwv_replace(Kwv *kwv, char *key, char *val)
{
  Kwv_pair pair;

  kwv_set_pair(&pair, key, val, NULL);

  if (kwv == NULL)
	return(kwv_add_pair(NULL, &pair));
  else
	return(replace(kwv, &pair, DO_COPY));
}

Kwv *
kwv_replace_pair(Kwv *kwv, Kwv_pair *pair)
{

  return(replace(kwv, pair, DO_COPY));
}

/*
 * Very simple kwv parsing.
 * If STR looks like <name>=<value>, where <name> is everything to the left
 * of the '=' and <value> is everything to the right, then return 0 and
 * optionally set NAME and VALUE; otherwise, return -1.
 * NAME or VALUE or both can be empty strings.
 */
int
kwv_parse_str(char *str, char **name, char **value)
{
  char *p, *s;

  if ((s = strdup(str)) == NULL || (p = strchr(s, '=')) == NULL)
	return(-1);

  *p++ = '\0';

  if (name != NULL)
	*name = s;
  if (value != NULL)
	*value = p;

  return(0);
}

Kwv_parse_conf *
kwv_parse_init(Kwv_parse_conf *c, int flags, int sep_char, int end_char)
{
  Kwv_parse_conf *conf;

  if (c == NULL)
	conf = ALLOC(Kwv_parse_conf);
  else
	conf = c;

  c->flags = flags;
  c->sep_char = sep_char;
  c->end_char = end_char;
  c->errmsg = NULL;

  return(c);
}

/*
 * Parse a <name> '=' <value> string.
 * (A possible replacement for kwv_make_sep())
 * Returns 0 if the parse succeeds, -1 otherwise.
 * NAME, VALUE, and ENDP are all optional (useful when only a syntax scan
 * is required), and are set only if the parse succeeds.
 * If requested, ENDP is set to the first character not part of <value> (other
 * than a quote); this facilitates a series of calls to process multiple
 * name/value pairs in STR.
 * Note: whitespace consists of any number of spaces and tabs.
 *
 * The <name>, which must be present, may be preceded by whitespace and is
 * terminated by whitespace or the separator character (default is '=').
 * The <value> may be absent, in which case it is set to the null string.
 * The <value> may be surrounded by matching single or double quotes,
 * which are by default stripped from the value; if the first non-whitespace
 * character following the separator is a quote, then the leading whitespace
 * is ignored and the value is terminated by a matching quote; if the first
 * non-whitespace character is not a quote, then all characters following the
 * separator are part of the <value> and any subsequent quotes are not
 * special.
 * The <value> is terminated by a matching quote, a newline, an optional
 * specified character, or the end of STR.
 *
 * Some default behaviour can be overridden by providing CONF, initialized
 * using kwv_parse_init().
 * Its FLAGS field (ORed bit flags) may be:
 *   KWV_PARSE_DEFAULT: explicitly requests the defaults.
 *   KWV_PARSE_NEED_VALUE: a null value (e.g., "a=") is not
 *     allowed; this does not apply to quoted values (e.g., "a=''").
 *   KWV_PARSE_NEED_QUOTES: the value must be within quotes.
 *   KWV_PARSE_KEEP_QUOTES: if the value is quoted, the quotes will
 *     be returned as part of the value.
 *   KWV_PARSE_INTERNAL_QUOTES: quotes embedded within the value
 *     are not treated specially (only the two surrounding quotes, if present,
 *     are special).
 *
 * XXX incorporate a Kwv_parse_flags argument
 */
int
kwv_parse_qstr(char *str, char **name, char **value, char **endp,
			   Kwv_parse_conf *conf)
{
  int end_char, quote_char, sep_char;
  char *name_end, *name_start, *p, *value_end, *value_start;
  char *errmsg;
  static int default_sep_char = '=';

  errmsg = NULL;
  end_char = '\0';
  sep_char = default_sep_char;

  if (conf != NULL) {
	if (conf->sep_char != '\0')
	  sep_char = conf->sep_char;
	if (conf->end_char != '\0')
	  end_char = conf->end_char;
  }

  /* Always skip initial whitespace. */
  for (p = str; *p != '\0'; p++) {
	if (*p != ' ' && *p != '\t')
	  break;
  }
  name_start = p;

  /*
   * Advance just past the end of the name, which should be whitespace
   * or the separator character.
   */
  for (p = name_start; *p != sep_char && *p != '\0'; p++) {
	if (*p == ' ' || *p == '\t')
	  break;
  }
  name_end = p;

  if (name_end == name_start) {
	/* A name is required. */
	errmsg = "No name was found";
	goto fail;
  }

  /* Skip optional whitespace following the name and before the separator. */
  while (*p == ' ' || *p == '\t')
	p++;

  /* We need to be looking at the separator now. */
  if (*p++ != sep_char) {
	errmsg = "Separator character was not found";
	goto fail;
  }

  /*
   * If the value is quoted, there may be optional whitespace to skip
   * before the leading quote.  If the value is not quoted, it may include
   * initial whitespace.
   */
  value_start = p;
  while (*p == ' ' || *p == '\t')
	p++;

  /*
   * Is the value quoted?
   * If there are optional quotes surrounding the value, a) discard any text
   * between the separator and the leading quote, b) the quotes are not part of
   * the value by default, and c) the quotes must match.
   */
  if (*p == '"' || *p == '\'') {
	quote_char = *p;
	if (conf == NULL || (conf->flags & KWV_PARSE_KEEP_QUOTES) == 0)
	  value_start = ++p;
	else
	  value_start = p++;
  }
  else {
	if (conf != NULL && (conf->flags & KWV_PARSE_NEED_QUOTES)) {
	  errmsg = "Value must be quoted";
	  goto fail;
	}
	quote_char = '\0';
  }

  /*
   * Advance to the end of the value, which may be a) a matching quote,
   * b) a newline, c) a specified character, or d) the end of string.
   * It is an error if a matching quote is not present.
   */
  while (*p != '\0' && *p != '\n' && *p != end_char) {
	if (*p == quote_char) {
	  if (conf == NULL || (conf->flags & KWV_PARSE_INTERNAL_QUOTES) == 0
		  || *(p + 1) == '\n' || *(p + 1) == '\0' || *(p + 1) == end_char)
		break;
	}
	p++;
  }

  /* If required, check that a matching quote is there. */
  if (quote_char != '\0') {
	if (*p != quote_char) {
	  errmsg = "No matching quote was found";
	  goto fail;
	}
	if (conf == NULL || (conf->flags & KWV_PARSE_KEEP_QUOTES) == 0) {
	  value_end = p;
	  if (endp != NULL)
		*endp = p + 1;
	}
	else {
	  value_end = ++p;
	  if (endp != NULL)
		*endp = value_end;
	}
  }
  else {
	value_end = p;
	if (endp != NULL)
	  *endp = p;
  }

  if (name != NULL)
	*name = strndup(name_start, name_end - name_start);

  if (quote_char == '\0'
	  && conf != NULL && (conf->flags & KWV_PARSE_NEED_VALUE)
	  && value_end == value_start) {
	errmsg = "Required value was not found";
	goto fail;
  }

  if (value != NULL) {
	if (value_end == value_start) {
	  /* A null value... */
	  *value = strdup("");
	}
	else
	  *value = strndup(value_start, value_end - value_start);
  }

  if (conf != NULL)
	conf->errmsg = NULL;

  return(0);

 fail:
  if (conf != NULL)
	conf->errmsg = errmsg;

  return(-1);
}

/*
 * Parse STR, in <name> '=' <value> format, and add the pair to K
 * (or create a new Kwv).
 * Set ENDP to the character that terminates the parse.
 */
Kwv *
kwv_add_qstr(Kwv *k, char *str, char **endp, Kwv_parse_conf *conf)
{
  Kwv *kwv;
  Kwv_pair pair;

  pair.name = NULL;
  pair.val = NULL;
  pair.xval = NULL;
  pair.next = NULL;

  if (k != NULL)
	k->error_msg = NULL;

  if (kwv_parse_qstr(str, &pair.name, &pair.val, endp, conf) == -1) {
	if (k != NULL)
	  k->error_msg = "string parse failed";
	return(NULL);
  }

  kwv = add_pair_list(k, &pair, NO_COPY);

  return(kwv);
}

/*
 * STR is a string of the form 'KEYWORD=VALUE'.
 * Create a new pair and add it to an existing Kwv (or create a new
 * Kwv, if K == NULL)
 * XXX this should take a Kwv_conf argument (quotes are not stripped off of
 * VALUE).
 */
Kwv *
kwv_add_str(Kwv *k, char *str)
{
  Kwv *kwv;
  Kwv_pair pair;

  pair.name = NULL;
  pair.val = NULL;
  pair.xval = NULL;
  pair.next = NULL;

  if (k != NULL)
	k->error_msg = NULL;

  if (kwv_parse_str(str, &pair.name, &pair.val) == -1) {
	if (k != NULL)
	  k->error_msg = "string parse failed";
	return(NULL);
  }

  kwv = add_pair_list(k, &pair, NO_COPY);

  return(kwv);
}

static int
parse_xml_whitespace(int ch)
{

  return(ch == ' ' || ch == '\t' || ch == '\n');
}

/*
 * Try to extract the next attrname=attrvalue pair from STR, where
 * the attrvalue is surrounded by matching single or double quotes.
 * If successful, return 1 and set NAME and VALUE to the attrname and attrvalue,
 * respectively, and ENDPTR to the first non-whitespace character that
 * follows VALUE.
 * If there is no such pair, return 0 and set ENDPTR to the first non-whitespace
 * character.  If an error occurs, simply return -1.
 */
static int
parse_xml_attrpair(char *str, char **name, char **value, char **endptr)
{
  int quote_ch;
  char *name_end, *name_start, *p, *value_end, *value_start;

  p = str;
  while (parse_xml_whitespace((int) *p))
	p++;

  if (*p == '/' || *p == '>' || *p == '\0') {
	*endptr = p;
	return(0);
  }
  name_start = p;

  while (!parse_xml_whitespace((int) *p) && *p != '=' && *p != '\0')
	p++;
  if (!parse_xml_whitespace((int) *p) && *p != '=')
	return(-1);
  name_end = p;

  while (parse_xml_whitespace((int) *p))
	p++;
  if (*p != '=')
	return(-1);

  p++;
  while (parse_xml_whitespace((int) *p))
	p++;
  if (*p != '\'' && *p != '"')
	return(-1);
  quote_ch = *p++;
  value_start = p;

  while (*p != quote_ch && *p != '\0')
	p++;
  if (*p == '\0')
	return(-1);
  value_end = p++;

  if (!parse_xml_whitespace((int) *p) && *p != '/' && *p != '>')
	return(-1);

  *name = strndup(name_start, name_end - name_start);
  *value = strndup(value_start, value_end - value_start);
  *endptr = p;

  return(1);
}

Kwv_xml *
kwv_xml_init(Kwv_xml *xml)
{
  Kwv_xml *x;

  if ((x = xml) == NULL)
	x = ALLOC(Kwv_xml);

  x->type = KWV_XML_UNKNOWN_TAG;
  x->el_name = NULL;
  x->kwv_attrs = NULL;

  return(x);
}

/*
 * Simplified XML element parsing.
 * Parse an empty element in STR into the element name and a (possibly empty)
 * set of attribute/value pairs.  Add the pairs to XML if it is non-NULL,
 * otherwise return a new Kwv_xml.  Each attribute value must be surrounded by
 * matching single or double quotes and may not contain any matching
 * embedded quotes (they must be XML escaped as &quot; or &apos;), but any
 * other character may appear.  The value is not decoded.
 *
 * These (empty) elements may be nested - it is up to the caller to validate
 * nesting; e.g.,
 *   <foo a="1"> <bar b="2"/> <baz c="3"></baz> </foo>
 *
 * This function accepts a slightly more general (empty) element syntax than
 * does XML.
 *
 * STR looks like:
 *  [whitespace]* '<' [el_name] {[whitespace]+ [attr]}* [whitespace]* ['/']? '>'
 * where an attr looks like:
 *   [attr_name] [whitespace]* '=' [whitespace]* [quote] [attr_val] [quote]
 * Add the [attr_name]/[attr_val] pair to X->KWV, set X->EL_NAME to
 * [el_name], and optionally set ENDPTR to point to the first character
 * after the '>'.
 *
 * Why is this needed?  Good question.  It is sometimes useful to be able
 * to parse (and generate) valid XML elements that have no child elements
 * without having to bring in a full XML parser.  It lets us work with
 * simple XML elements now and, provided we stay within XML syntax, gives
 * us a nice path to full XML if it is eventually needed.
 * It's a better approach than creating records with implicit fields (e.g., the
 * Unix password file) and in many applications the format is good enough.
 */
Kwv_xml *
kwv_xml_parse(Kwv_xml *xml, char *str, char **endptr)
{
  char *el_name_end, *el_name_start, *p;
  Kwv *kwv;
  Kwv_xml *x;

  if ((x = xml) == NULL)
	x = kwv_xml_init(NULL);
  else {
	x->type = KWV_XML_UNKNOWN_TAG;
	x->el_name = NULL;
  }

  p = str;

  /* Skip initial whitespace. */
  while (parse_xml_whitespace((int) *p))
	p++;

  /* Expect the start tag. */
  if (*p != '<') {
	x = NULL;
	goto fail;
  }
  p++;

  /* A comment? */
  if (*p == '!' && *(p + 1) == '-' && *(p + 2) == '-') {
	p += 3;
	while (*p != '\0' && *p != '-' && *(p + 1) != '-' && *(p + 2) != '>')
	  p++;
	if (*p == '\0')
	  return(NULL);
	x->type = KWV_XML_COMMENT;
	if (endptr != NULL)
	  *endptr = p + 3;

	return(x);
  }

  /* An end tag? */
  if (*p == '/') {
	/* Element end tag. */
	el_name_start = ++p;
	while (*p != '>' && *p != '\0')
	  p++;
	if (*p == '\0')
	  return(NULL);
	el_name_end = p;

	x->type = KWV_XML_END_TAG;
	x->el_name = strndup(el_name_start, el_name_end - el_name_start);

	if (endptr != NULL)
	  *endptr = p + 1;

	return(x);
  }

  /* A start tag. */
  el_name_start = p;
  while (!parse_xml_whitespace((int) *p)
		 && *p != '/' && *p != '>' && *p != '\0')
	p++;
  if (*p == '/')
	x->type = KWV_XML_EMPTY_TAG;
  else
	x->type = KWV_XML_START_TAG;

  if (x->kwv_attrs == NULL)
	x->kwv_attrs = kwv_init(8);

  /* Look for zero or more attributes. */
  el_name_end = p;
  while (*p != '\0') {
	int st;
	char *attr_name, *attr_value, *e;

	if ((st = parse_xml_attrpair(p, &attr_name, &attr_value, &e)) == -1) {
	  x = NULL;
	  goto fail;
	}

	p = e;
	if (st == 0)
	  break;

	kwv_add(x->kwv_attrs, attr_name, attr_value);
  }

  /*
   * Expect the end of the element, possibly the end of the tag.
   */
  if (*p == '/') {
	if (*(p + 1) != '>') {
	  x = NULL;
	  goto fail;
	}
	p += 2;
  }
  else if (*p == '>')
	p++;
  else
	goto fail;
  
  x->el_name = strndup(el_name_start, el_name_end - el_name_start);

 fail:
  if (endptr != NULL)
	*endptr = p;

   return(x);
}

/*
 * Convert a KWV into an XML element named el_name, where each pair is
 * an attribute.  Double quotes are used around attribute values, and
 * embedded double quotes are escaped.
 * Return the formatted element, or NULL if an error occurs.
 */
Ds *
kwv_xml_format(Kwv *kwv, char *el_name)
{
  Ds *ds;
  Kwv_iter *iter;
  Kwv_pair *v;

  ds = ds_init(NULL);
  ds_asprintf(ds, "<%s", el_name);

  iter = kwv_iter_begin(kwv, NULL);
  for (v = kwv_iter_first(iter); v != NULL; v = kwv_iter_next(iter)) {
	ds_asprintf(ds, " %s=\"%s\"",
				v->name, escape_xml_attribute(v->val, (int) '"'));
  }
  kwv_iter_end(iter);
  ds_asprintf(ds, "/>");

  return(ds);
}

/*
 * If KEY is NULL, return the total number of pairs in KWV.
 * Otherwise, return the number of times KEY appears in KWV.
 */
int
kwv_count(Kwv *kwv, char *key)
{
  int n;
  unsigned int slot;
  Kwv_pair *av;

  if (kwv == NULL)
	return(0);

  kwv->error_msg = NULL;
  if (kwv->npairs == 0)
    return(0);

  if (key == NULL)
    return(kwv->npairs);

  if (lookup_key(kwv, key, &slot) == 0)
	return(0);

  n = 1;
  for (av = kwv->pairs[slot]->next; av != NULL; av = av->next)
	n++;

  return(n);
}

/*
 * Return 1 if there is more than one value for KEY, 0 otherwise.
 */
int
kwv_dups(Kwv *kwv, char *key)
{
  unsigned int slot;

  if (kwv == NULL || key == NULL)
	return(0);

  kwv->error_msg = NULL;
  if (kwv->npairs == 0)
    return(0);

  if (lookup_key(kwv, key, &slot) == 0)
	return(0);

  if (kwv->pairs[slot]->next != NULL)
	return(1);

  return(0);
}

Kwv *
kwv_copy(Kwv *kwv)
{
  int i;
  Kwv *new_kwv;
  Kwv_pair *pair;

  if (kwv == NULL)
	return(NULL);

  new_kwv = kwv_init(kwv->nused ? kwv->nused : 16);

  for (i = 0; i < kwv->nused; i++) {
	if ((pair = kwv->pairs[i]) != NULL)
	  add_pair_list(new_kwv, pair, DO_COPY);
  }

  new_kwv->dup_mode = kwv->dup_mode;
  new_kwv->clear_flag = kwv->clear_flag;
  new_kwv->icase = kwv->icase;
  new_kwv->regex_flag = kwv->regex_flag;
  new_kwv->prepend_flag = kwv->prepend_flag;
  new_kwv->error_msg = (kwv->error_msg != NULL)
	? strdup(kwv->error_msg) : NULL;

  return(new_kwv);
}

/*
 * Delete all occurrences of KEY.
 * The empty slot in the vector is reused by shuffling everything up.
 */
int
kwv_delete(Kwv *kwv, char *key)
{
  unsigned int i, slot;

  if (lookup_key(kwv, key, &slot) == 0)
	return(-1);

  delete_pair(kwv, kwv->pairs[slot]);

  /* Bubble up any table entries that follow. */
  if (slot < kwv->nused) {
	for (i = slot; i < kwv->nused - 1; i++)
	  kwv->pairs[i] = kwv->pairs[i + 1];
	kwv->pairs[i] = NULL;
  }

  /* Since the vector entry was deleted, one less slot is being used. */
  kwv->nused--;

  VALIDATE(kwv, "after delete");
  return(0);
}

/*
 * Initialize Kwv_vartab values from a Kwv.
 * VARTAB is a table of keyword names; for each one found in KWV,
 * copy its pairs to VARTAB.
 * Return the count of the number of initializations done for VARTAB.
 */
int
kwv_vartab_init(Kwv *kwv, Kwv_vartab *vartab,
				Kwv_pair *(*derive)(Kwv_vartab *, void *), void *derive_arg)
{
  int len, n;
  unsigned int slot;
  Kwv_vartab *v;

  if (kwv == NULL)
	return(0);

  kwv->error_msg = NULL;
  n = 0;
  for (v = vartab; v->name != NULL; v++) {
	if (lookup_key(kwv, v->name, &slot) != 0) {
	  v->pair = copy_pair_list(kwv->pairs[slot], DO_COPY, &len);
	  n++;
	}
	else if ((v->type & KWV_DERIVABLE) && v->pair == NULL && derive != NULL) {
	  if ((v->pair = derive(v, derive_arg)) != NULL)
		n++;
	}
  }

  return(n);
}

/*
 * There is no kwv_vartab_lookup_value() because a name may have multiple
 * values.
 */
Kwv_vartab *
kwv_vartab_lookup(Kwv_vartab *vartab, char *name)
{
  Kwv_vartab *v;

  for (v = vartab; v->name != NULL; v++) {
	if (streq(v->name, name))
	  return(v);
  }

  return(NULL);
}

Kwv_vartab *
kwv_vartab_new(Kwv_vartab *vartab)
{
  int i;
  Kwv_vartab *new_vartab, *nv, *v;

  if (vartab == NULL)
	return(NULL);

  for (i = 0, v = vartab; v->name != NULL; v++, i++)
	;

  nv = new_vartab = ALLOC_N(Kwv_vartab, i + 1);
  for (v = vartab; v->name != NULL; v++) {
	nv->id = v->id;
	nv->name = strdup(v->name);
	nv->type = v->type;
	nv->pair = NULL;
	nv++;
  }

  nv->id = KWV_VARTAB_END_ID;
  nv->name = NULL;
  nv->type = KWV_NULL;
  nv->pair = NULL;

  return(new_vartab);
}

/*
 * Verify that VARTAB satisfies the constraints on the presence and number
 * of occurrences of variables, and that no unknown variable names appear in
 * KWV.
 * If there's an error, set ERRMSG, a pointer to the invalid
 * table entry, and return -1; otherwise return 0.
 */
int
kwv_vartab_check(Kwv *kwv, Kwv_vartab *vartab, 
				 Kwv_pair **kw_err, Kwv_vartab **vartab_err, char **errmsg)
{
  int i, j;
  unsigned int ui;

  *vartab_err = NULL;
  *kw_err = NULL;
  *errmsg = NULL;
  for (i = 0; vartab[i].name != NULL; i++) {
    if ((vartab[i].type & KWV_TYPE_MASK) == KWV_REQUIRED
		&& vartab[i].pair == NULL) {
      *errmsg = "Variable is required at least once";
	  *vartab_err = &vartab[i];
      return(-1);
    }
    else if ((vartab[i].type & KWV_TYPE_MASK) == KWV_REQUIRED1
			 && (vartab[i].pair == NULL || vartab[i].pair->next != NULL)) {
	  *errmsg = "Variable is required exactly once";
	  *vartab_err = &vartab[i];
	  return(-1);
    }
    else if ((vartab[i].type & KWV_TYPE_MASK) == KWV_OPTIONAL1
			 && vartab[i].pair != NULL && vartab[i].pair->next != NULL) {
      *errmsg = "Duplicate optional-variable is forbidden";
	  *vartab_err = &vartab[i];
      return(-1);
    }
    else if ((vartab[i].type & KWV_TYPE_MASK) == KWV_UNUSED
			 && vartab[i].pair != NULL) {
      *errmsg = "Obsolete variable is defined";
	  *vartab_err = &vartab[i];
      return(-1);
    }
    else if ((vartab[i].type & KWV_TYPE_MASK) == KWV_RESERVED
			 && vartab[i].pair != NULL) {
      *errmsg = "Reserved variable is defined";
	  *vartab_err = &vartab[i];
      return(-1);
    }
  }

  for (ui = 0; ui < kwv->nused; ui++) {
	for (j = 0; vartab[j].name != NULL; j++) {
	  if (streq(kwv->pairs[ui]->name, vartab[j].name))
		break;
	}

	if (vartab[j].name == NULL) {
	  *errmsg = "Unrecognized keyword, variable, or directive found";
	  *kw_err = kwv->pairs[ui];
	  return(-1);
	}
  }

  return(0);
}

/*
 * Initializer for iterating through the elements of a Kwv data structure.
 * The iterator state is maintained so that an element returned by
 * kwv_iter_first() or kwv_iter_next() can be deleted (by kwv_delete()) without
 * breaking the state necessary for kwv_iter_next().
 */
Kwv_iter *
kwv_iter_begin(Kwv *kwv, char *key)
{
  unsigned int slot;
  Kwv_iter *iter;

  if (kwv == NULL)
	return(NULL);

  kwv->error_msg = NULL;
  if (kwv->npairs == 0)
	return(NULL);

  iter = ALLOC(Kwv_iter);
  if (key != NULL) {
	if (kwv->regex_flag) {
	  int flags, st;
	  char errbuf[100];

	  flags = REG_EXTENDED | REG_NOSUB | (kwv->icase ? REG_ICASE : 0);
	  if ((st = regcomp(&iter->regex, key, flags)) != 0) {
		errbuf[0] = '\0';
		regerror(st, &iter->regex, errbuf, sizeof(errbuf));
		kwv->error_msg = strdup(errbuf);
		regfree(&iter->regex);
		return(NULL);
	  }

	  if (lookup_regex_from_slot(kwv, &iter->regex, 0, &slot) == 0) {
		regfree(&iter->regex);
		free(iter);
		return(NULL);
	  }
	}
	else {
	  if (lookup_key(kwv, key, &slot) == 0) {
		free(iter);
		return(NULL);
	  }
	}

	iter->current_index = slot;
	iter->key_only = -1;
  }
  else {
	iter->current_index = -1;
	iter->key_only = 0;
  }

  iter->kwv = kwv;
  iter->next_linked_pair = NULL;

  return(iter);
}

void
kwv_iter_end(Kwv_iter *iter)
{

  if (iter != NULL)
	free(iter);
}

Kwv_pair *
kwv_iter_first(Kwv_iter *iter)
{

  if (iter == NULL || iter->kwv == NULL || iter->kwv->npairs == 0)
	return(NULL);

  if (iter->key_only) {
	iter->next_linked_pair = iter->kwv->pairs[iter->current_index]->next;
	iter->key_only = 1;
	return(iter->kwv->pairs[iter->current_index]);
  }

  iter->current_index = 0;
  iter->next_linked_pair = iter->kwv->pairs[0]->next;
  return(iter->kwv->pairs[0]);
}

Kwv_pair *
kwv_iter_next(Kwv_iter *iter)
{
  Kwv_pair *pair;

  if (iter == NULL || iter->kwv == NULL || iter->kwv->npairs == 0)
	return(NULL);

  if (iter->current_index == -1 || iter->key_only == -1)
	return(kwv_iter_first(iter));

  if (iter->kwv->regex_flag) {
	unsigned int slot;

	if (iter->next_linked_pair != NULL)
	  pair = iter->next_linked_pair;
	else {
	  /*
	   * There's no other value for the current key; advance to the next.
	   */
	  if ((unsigned int) ++iter->current_index >= iter->kwv->nused)
		return(NULL);
	  if (lookup_regex_from_slot(iter->kwv, &iter->regex,
								 iter->current_index, &slot) == 0)
		return(NULL);
	  iter->current_index = slot;
	  iter->next_linked_pair = iter->kwv->pairs[slot]->next;
	  pair = iter->kwv->pairs[slot];
	}
  }
  else if (iter->key_only || iter->next_linked_pair != NULL)
	pair = iter->next_linked_pair;
  else {
	while ((unsigned int) ++iter->current_index < iter->kwv->nused
		   && (pair = iter->kwv->pairs[iter->current_index]) == NULL)
	  ;
	if ((unsigned int) iter->current_index >= iter->kwv->nused)
	  return(NULL);
  }

  if (pair != NULL)
	iter->next_linked_pair = pair->next;

  return(pair);
}

/*
 * Convert a KWV into a single string.
 */
char *
kwv_str(Kwv *kwv)
{
  unsigned int ui;
  char *p;
  Ds ds;
  Kwv_pair *pair;

  if (kwv == NULL)
	return(NULL);

  kwv->error_msg = NULL;
  if (kwv->npairs == 0)
	return("");

  ds_init(&ds);
  for (ui = 0; ui < kwv->nused; ui++) {
	pair = kwv->pairs[ui];
	while (pair != NULL) {
	  ds_asprintf(&ds, "%s=\"", pair->name);
	  for (p = pair->val; *p != '\0'; p++) {
		if (*p == '"') {
		  ds_concatc(&ds, '\\');
		  ds_concatc(&ds, '"');
		}
		else
		  ds_concatc(&ds, (int) *p);
	  }
	  ds_concatc(&ds, '"');
	  ds_concatc(&ds, '\n');
	  pair = pair->next;
	}
  }

  return(ds_buf(&ds));
}

/*
 * Merge the contents of SECONDARY into PRIMARY, returning PRIMARY and leaving
 * SECONDARY as is.
 * If the same name appears in both, the action depends on MODE:
 *   if KWV_ALLOW_DUPS, add names in SECONDARY to PRIMARY if they already exist
 *   in PRIMARY;
 *   if KWV_REPLACE_DUPS, replace names in PRIMARY with duplicates in
 *   SECONDARY;
 *   if KWV_NO_DUPS, do not add names in SECONDARY to PRIMARY if they already
 *   exist in PRIMARY - continue processing but return an error indication
 *   (which the caller may choose to ignore)
 *
 * If multiple values are allowed, the SECONDARYs will be added *after* the
 * PRIMARYs, otherwise the SECONDARYs will be discarded.
 */
Kwv *
kwv_merge(Kwv *primary, Kwv *secondary, Kwv_dup_mode mode)
{
  unsigned int i, slot;
  Kwv *rval;
  Kwv_pair *pair;

  primary->error_msg = secondary->error_msg = NULL;
  rval = primary;

  for (i = 0; i < secondary->nused; i++) {
	if ((pair = secondary->pairs[i]) != NULL) {
	  if (lookup_key(primary, pair->name, &slot) == 0) {
		/* Doesn't exist in primary, so add it. */
		add_pair_list(primary, pair, DO_COPY);
	  }
	  else {
		/* Exists in primary - action depends on MODE. */
		if (mode == KWV_NO_DUPS) {
		  if (primary->error_msg == NULL)
			primary->error_msg = "Duplicate key found";
		  rval = NULL;
		  continue;
		}
		else if (mode == KWV_ALLOW_DUPS)
		  add_pair_list(primary, pair, DO_COPY);
		else if (mode == KWV_REPLACE_DUPS)
		  primary->pairs[i] = pair;
		else {
		  primary->error_msg = "Invalid mode argument";
		  return(NULL);
		}
	  }
	}
  }

  return(rval);
}

static void
free_all_pairs(Kwv *kwv)
{
  int i;
  Kwv_iter *iter;
  Kwv_pair *pair;

  if (kwv == NULL)
	return;

  kwv->error_msg = NULL;
  iter = kwv_iter_begin(kwv, NULL);
  while ((pair = kwv_iter_next(iter)) != NULL) {
	if (pair->name != NULL) {
	  if (kwv->clear_flag)
		strzap(pair->name);
	  free(pair->name);
	  pair->name = NULL;
	}
	if (pair->val != NULL) {
	  if (kwv->clear_flag)
		strzap(pair->val);
	  free(pair->val);
	  pair->val = NULL;
	}
	free(pair);
  }

  kwv_iter_end(iter);

  for (i = 0; i < kwv->nalloc; i++)
	kwv->pairs[i] = NULL;

  kwv->npairs = 0;
  kwv->nused = 0;
  kwv->nalloc = 0;
}

/*
 * Free all parts of KWV.
 */
void
kwv_free(Kwv *kwv)
{

  if (kwv == NULL)
	return;

  free_all_pairs(kwv);
  free(kwv);
}

/*
 * Keep the data structure and its configuration but free all of its contents.
 */
Kwv *
kwv_reset(Kwv *kwv)
{
  int i;

  if (kwv == NULL)
	return(NULL);

  free_all_pairs(kwv);
  kwv->error_msg = NULL;

  return(kwv);
}

static int
looks_valid(char *str)
{
  char *p;

  if (str == NULL)
	return(0);
  for (p = str; *p != '\0'; p++) {
	if (!isprint((int) *p))
	  return(0);
  }

  return(1);
}

int
kwv_validate(Kwv *kwv, char *op, int verbose)
{
  int c;
  unsigned int ui, pn;
  Kwv_pair *pair;

  if (kwv == NULL)
	return(0);

  if (verbose)
	fprintf(stderr, "[Kwv %p, %s, npairs=%d, nused=%d, nalloc=%d]\n",
			(void *) kwv, op, kwv->npairs, kwv->nused, kwv->nalloc);

  if (kwv->nused > kwv->nalloc) {
	if (verbose)
	  fprintf(stderr, "Bug: nused > nalloc?\n");
	return(-1);
  }

  if (kwv->error_msg != NULL) {
	if (verbose)
	  fprintf(stderr, "%s\n", kwv->error_msg);
	return(-1);
  }

  c = 0;
  for (ui = 0; ui < kwv->nused; ui++) {
	if ((pair = kwv->pairs[ui]) == NULL) {
	  if (verbose)
		fprintf(stderr, "%d: empty\n", ui);
	  continue;
	}

	pn = 0;
	while (pair != NULL) {
	  if (verbose)
		fprintf(stderr, "[slot %u/%u] ", ui, pn);

	  if (!looks_valid(pair->name)) {
		if (verbose)
		  fprintf(stderr, "[bug: invalid name!]\n");
		return(-1);
	  }
	  else if (verbose)
		fprintf(stderr, "%s -> ", pair->name);

	  if (pair->val != NULL) {
		if (!looks_valid(pair->val)) {
		  if (verbose)
			fprintf(stderr, "[bug: invalid value]\n");
		  return(-1);
		}
		else if (verbose)
		  fprintf(stderr, "\"%s\"", pair->val);
	  }
	  else if (verbose)
		fprintf(stderr, "[no val]");

	  if (pair->xval != NULL) {
		if (verbose)
		  fprintf(stderr, " [+xval]");
	  }

	  if (verbose)
		fprintf(stderr, "\n");

	  pair = pair->next;
	  pn++;
	}
  }

  return(0);
}

void
kwv_vartab_text(FILE *fp, Kwv_vartab *vartab)
{
  Kwv_pair *pair;
  Kwv_vartab *vt;

  if (vartab == NULL)
	return;

  if (fp == NULL)
	fp = stderr;

  for (vt = vartab; vt->name != NULL; vt++) {
	if (vt->pair != NULL) {
	  int i = 0;

	  fprintf(fp, "name=\"%s\" (id=%d):\n", vt->name, vt->id);
	  for (pair = vt->pair; pair != NULL; pair = pair->next) {
		fprintf(fp, "  %2d. %s=%s\n", ++i, pair->name, pair->val);
	  }
	}
  }

}

void
kwv_text(FILE *fp, Kwv *kwv)
{

  if (kwv == NULL)
	return;
  kwv->error_msg = NULL;
  if (kwv->nused == 0)
	return;

  if (fp == NULL)
	fp = stderr;

  fprintf(fp, "%s", kwv_buf(kwv, '=', 0));
}

char *
kwv_buf(Kwv *kwv, int sep_ch, int quote_ch)
{
  char *sep_str, *quote_str;
  Ds *ds;
  Kwv_pair *pair;
  Kwv_iter *iter;

  if (kwv == NULL)
	return(NULL);
  kwv->error_msg = NULL;
  if (kwv->nused == 0)
	return(NULL);

  if (sep_ch)
	sep_str = ds_xprintf("%c", sep_ch);
  else
	sep_str = "=";

  if (quote_ch)
	quote_str = ds_xprintf("%c", quote_ch);
  else
	quote_str = "";

  ds = ds_init(NULL);

  iter = kwv_iter_begin(kwv, NULL);
  while ((pair = kwv_iter_next(iter)) != NULL) {
	if (pair->val != NULL)
	  ds_asprintf(ds, "%s%s%s%s%s\n", pair->name, sep_str,
				  quote_str, pair->val, quote_str);
  }

  kwv_iter_end(iter);

  return(ds_buf(ds));
}

/*
 * Return a list of keys (names) in KWV.
 */
Dsvec *
kwv_keys(Kwv *kwv)
{
  unsigned int i;
  Dsvec *dsv;

  if (kwv == NULL)
	return(NULL);
  kwv->error_msg = NULL;
  if (kwv->nused == 0)
	return(NULL);

  dsv = dsvec_init(NULL, sizeof(char *));
  for (i = 0; i < kwv->nused; i++) {
	if (kwv->pairs[i] == NULL)
	  continue;
	dsvec_add_ptr(dsv, kwv->pairs[i]->name);
  }

  return(dsv);
}

/*
 * Convert KWV into a vector of pointers to kw=val strings, NULL terminated.
 * This is the format used by environ(7), for instance.
 */
char **
kwv_env(Kwv *kwv)
{
  char *str;
  Dsvec *dsv;
  Kwv_pair *pair;
  Kwv_iter *iter;

  if (kwv == NULL)
	return(NULL);

  kwv->error_msg = NULL;
  if (kwv->nused == 0)
	return(NULL);

  /* XXX This should be done so that freeing is easier. */
  dsv = dsvec_init(NULL, sizeof(char *));
  iter = kwv_iter_begin(kwv, NULL);
  while ((pair = kwv_iter_next(iter)) != NULL) {
	str = ds_xprintf("%s=%s", pair->name, pair->val);
	dsvec_add_ptr(dsv, str);
  }

  kwv_iter_end(iter);

  dsvec_add_ptr(dsv, NULL);

  return((char **) dsvec_base(dsv));
}

static void
attribute_hl(FILE *fp, char *n, char *v)
{
  fprintf(fp, " <font color=\"Blue\">%s</font>=\"%s\"", n, v);
}

void
kwv_html(FILE *fp, Kwv *kwv)
{
  Kwv_pair *pair;
  Kwv_iter *iter;

  if (kwv == NULL)
	return;

  kwv->error_msg = NULL;
  if (fp == NULL)
	fp = stderr;

  fprintf(fp, "<br/>\n");
  iter = kwv_iter_begin(kwv, NULL);
  while ((pair = kwv_iter_next(iter)) != NULL)
	attribute_hl(fp, pair->name, pair->val);
  fprintf(fp, "<br/>");
  kwv_iter_end(iter);
}

#ifdef NOTDEF
I now think these are unnecessary...
#ifdef HAVE_DS
Kwv *
kwv_add_sprintf(Kwv *k, char *key, char *fmt, ...)
{
  Kwv *kwv;
  char *s;
  va_list ap;

  if (k == NULL) {
	/* XXX the initial allocation should be divined somehow. */
	if ((kwv = kwv_init(8)) == NULL)
	  return(NULL);
  }
  else
	kwv = k;

  if (kwv->npairs == kwv->nalloc) {
	if (kwv_expand(kwv) == -1)
	  return(NULL);		/* XXX */
  }

  kwv->pairs[kwv->npairs].name = strdup(key);
  va_start(fmt, ap);
  s = dstr_vxprintf(fmt, ap);
  va_end(ap);
  if (s == NULL)
	return(NULL);		/* XXX */
  kwv->pairs[kwv->npairs].val = s;
  kwv->npairs++;

  return(kwv);
}

Kwv *
kwv_add_str_sprintf(Kwv *k, char *fmt, ...)
{
  char *key, *p, *s, *val;
  Kwv *kwv;
  va_list ap;

  va_start(fmt, ap);
  s = dstr_vxprintf(fmt, ap);
  va_end(ap);

  if (s == NULL)
	return(NULL);

  key = s;
  if ((p = strchr(s, '=')) == NULL) {
	free(s);
	return(NULL);		/* XXX */
  }

  *p = '\0';
  val = p + 1;

  kwv = kwv_add(k, key, val);
  free(s);

  return(kwv);
}
#endif
#endif

#ifdef PROG

static int
kwv_parse_test(char *str)
{
  int i, n;
  char *el_name, *endptr;
  Ds *ds;
  Kwv_iter *iter;
  Kwv_pair *v;
  Kwv_xml *x;

  if ((x = kwv_parse_xml(NULL, str, &endptr)) == NULL) {
	fprintf(stderr, "kwv_parse_xml error\n");
	exit(1);
  }

  ds = kwv_xml_format(x->kwv_attrs, x->el_name);
  fprintf(stderr, "%s\n", ds_buf(ds));

  n = kwv_count(x->kwv_attrs, NULL);
  fprintf(stderr, "element \"%s\": %d %s%s",
		  x->el_name, n, (n == 1) ? "attribute" : "attributes",
		  (n == 0) ? "\n" : ":\n");

  iter = kwv_iter_begin(x->kwv_attrs, NULL);
  i = 0;
  for (v = kwv_iter_first(iter); v != NULL; v = kwv_iter_next(iter))
	fprintf(stderr, "  %2d: %s = \"%s\"\n", ++i, v->name, v->val);
  kwv_iter_end(iter);

  return(n);
}

int
main(int argc, char **argv)
{
  int i, n;

  kwv_parse_test("<hello one='1' two=\"2\" three='3'  >");
  kwv_parse_test("<hello one='1' two=\"2\" three='3'>");
  kwv_parse_test("<hello one='1' two=\"2\" three='3' />");
  kwv_parse_test("<hello   foo='1'   bar=\"2\"  >");
  kwv_parse_test("<hello foo =  '1' bar  = \"2\">");
  kwv_parse_test("<hello foo='1' bar=\"2\">");
  kwv_parse_test("<hello foo='1'>");
  kwv_parse_test("<hello  />");
  kwv_parse_test("<hello/>");
  kwv_parse_test("<hello  >");
  kwv_parse_test("<hello>");

  exit(0);
}
#endif
