/***************************************************************************
                  xmlparser.cpp  -  XmlParser Implementation
                             -------------------
    begin                : Sat Sep 21 2002
    copyright            : (C) 2002 by Ken Schenke
    email                : kschenke at users dot sourceforge dot net
 ***************************************************************************/

/***************************************************************************
 *                                                                         *
 *   This program is free software; you can redistribute it and/or modify  *
 *   it under the terms of the GNU General Public License as published by  *
 *   the Free Software Foundation; either version 2 of the License, or     *
 *   (at your option) any later version.                                   *
 *                                                                         *
 *   This program is distributed in the hope that it will be useful, but   *
 *   WITHOUT ANY WARRANTY; without even the implied warranty of            *
 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU      *
 *   General Public License for more details.                              *
 *                                                                         *
 *   You should have received a copy of the GNU General Public License     *
 *   along with this program; if not, write to the Free Software           *
 *   Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA         *
 *   02110-1301, USA                                                       *
 *                                                                         *
 ***************************************************************************/

#include "xmlparser.h"
#include <libxml/HTMLparser.h>
#include <stack>
#include <stdarg.h>

#include <ctype.h>

#include <QFile>
#include <QDir>

/***************************************************************************
 *                                                                         *
 *   Constants                                                             *
 *                                                                         *
 ***************************************************************************/

#define CHARS_GROWBY			100
#define STATE_TRACE				0

/***************************************************************************
 *                                                                         *
 *   Structure Definitions                                                 *
 *                                                                         *
 ***************************************************************************/

typedef struct
{
	xmlChar		*pChars;		// pointer to character string
	long		nChars;			// number of characters in string
	long		dimChars;		// size of buffer containing characters
	std::stack<short>	*state;
	void		*ctx;			// pointer to caller's context data
	ELEMHANDLER	*elems;			// array of caller's element handlers
	BEGIN_DOC_HANDLER	funcBegin;
	END_DOC_HANDLER	funcEnd;
	CHAR_HANDLER	charHandler;
} SAXDATA;

/***************************************************************************
 *                                                                         *
 *   Function Prototypes                                                   *
 *                                                                         *
 ***************************************************************************/

static	void	GlobalReplace(QString &str, const char *s1, const char *subs);
static	void	xmlSAXstartDocument(void *);
static	void	xmlSAXendDocument(void *);
static	void	xmlSAXerror(void *, const char *, ...);
static	void	xmlSAXfatalError(void *, const char *, ...);
static	void	xmlSAXstartElement(void *, const xmlChar *, const xmlChar **);
static	void	xmlSAXendElement(void *, const xmlChar *);
static	void	xmlSAXcharacters(void *, const xmlChar *, int);

/***************************************************************************
 *                                                                         *
 *   EncodeString()                                                        *
 *                                                                         *
 *   Parameters:                                                           *
 *      const QString &str                                                 *
 *      QString &outstr                                                    *
 *   Return:                                                               *
 *      void                                                               *
 *   Description:                                                          *
 *      This function scans the string given in the first parameter for    *
 *      special characters and substitutes them with XML entities.  The    *
 *      results are written to the string given in the second parameter.   *
 *                                                                         *
 ***************************************************************************/

void EncodeString(const QString &str, QString &outstr)
{
	outstr = str;

	GlobalReplace(outstr, "&",  "&amp;");
	GlobalReplace(outstr, "<",  "&lt;");
	GlobalReplace(outstr, ">",  "&gt;");
	GlobalReplace(outstr, "\"", "&quot;");
}

/***************************************************************************
 *                                                                         *
 *   GlobalReplace()                                                       *
 *                                                                         *
 *   Parameters:                                                           *
 *      QString &str                                                       *
 *      const char *s1                                                     *
 *      const char *subs                                                   *
 *   Return:                                                               *
 *      void                                                               *
 *   Description:                                                          *
 *      This function searches str for s1 and replaces each instance with  *
 *      subs.                                                              *
 *                                                                         *
 ***************************************************************************/

static void GlobalReplace(QString &str, const char *s1, const char *subs)
{
	int i = 0;
	for(;;)
	{
		i = str.indexOf(s1, i);
		if(i < 0)
			break;
		str.replace(i, strlen(s1), subs);
		i += strlen(subs);
	}
}

/***************************************************************************
 *                                                                         *
 *   ParseHtmlDocument()                                                   *
 *                                                                         *
 *   Parameters:                                                           *
 *      const char *filename                                               *
 *      void *ctx                                                          *
 *      ELEMHANDLER *elems                                                 *
 *      BEGIN_DOC_HANDLER funcBegin                                        *
 *      END_DOC_HANDLER  funcEnd                                           *
 *      CHAR_HANDLER charHandler                                           *
 *   Return:                                                               *
 *      None.  BkException thrown if error occurs                          *
 *   Description:                                                          *
 *      This function attempts to parse an HTML document using libxml's    *
 *      SAX2 parsing interface.  The first parameter is the filename to    *
 *      parse.  The second, a pointer to a context handler passed to every *
 *      callback function.  The third parameter is an array of element     *
 *      handlers.  These element handlers define which the state of the    *
 *      parsing engine when it encounters this element and optionally,     *
 *      pointers to callback functions to handle the start of end of that  *
 *      element.  The third and fourth parameters are points to callback   *
 *      functions to call the beginning and end of the document.           *
 *                                                                         *
 ***************************************************************************/

void ParseHtmlDocument(
	const QString &filename,
	void *ctx,
	ELEMHANDLER *elems,
	BEGIN_DOC_HANDLER funcBegin,
	END_DOC_HANDLER funcEnd,
	CHAR_HANDLER charHandler) throw(BkException)
{
	SAXDATA		data;
	std::stack<short>	state;
	xmlSAXHandler	saxhandler;

	memset(&data, 0, sizeof(data));
	data.state = &state;

	xmlSubstituteEntitiesDefault(1);

	data.ctx = ctx;
	data.funcBegin = funcBegin;
	data.funcEnd = funcEnd;
	data.charHandler = charHandler;
	data.elems = elems;

	// set up the SAX handler structure

	memset(&saxhandler, 0, sizeof(saxhandler));
	saxhandler.startDocument = xmlSAXstartDocument;
	saxhandler.endDocument = xmlSAXendDocument;
	saxhandler.startElement = xmlSAXstartElement;
	saxhandler.endElement = xmlSAXendElement;
	saxhandler.characters = xmlSAXcharacters;
	saxhandler.error = xmlSAXerror;
	saxhandler.fatalError = xmlSAXfatalError;

	// make sure the file exists before starting

	if(!QFile::exists(filename))
		BKEXCEPT("HTML File Not Found");

	// Read the file into memory

	QFileInfo fi(filename);
	uint file_size = fi.size();
	xmlChar *buffer = new xmlChar[file_size+1];
	if(buffer == 0)
		BKEXCEPT("Unable to Allocate Memory");
	memset(buffer, 0, file_size+1);
	QFile file(filename);
	if(file.open(QIODevice::ReadOnly) == false)
		BKEXCEPT("Unable to Read HTML File");
	QByteArray bytes = file.readAll();
	memcpy(buffer, bytes.data(), file_size);

	// parse the file

	if(htmlSAXParseDoc(
		buffer,
		NULL,		// encoding
		&saxhandler,
		&data))
		BKEXCEPT("Failed to Parse HTML File");

	// delete the memory buffer

	delete [] buffer;

	// verify the results of the parse

	if(	data.state->size() != 1
	 ||	data.state->top() != SAXSTATE_STARTEND)
		BKEXCEPT("HTML Parser Not in Consistent State");

#if STATE_TRACE
	fprintf(stderr, "pop SAXSTATE_STARTEND\n");
	data.state->pop();
#endif
}

/***************************************************************************
 *                                                                         *
 *   ParseXmlDocument()                                                    *
 *                                                                         *
 *   Parameters:                                                           *
 *      const char *filename                                               *
 *      void *ctx                                                          *
 *      ELEMHANDLER *elems                                                 *
 *      BEGIN_DOC_HANDLER funcBegin                                        *
 *      END_DOC_HANDLER  funcEnd                                           *
 *      CHAR_HANDLER charHandler                                           *
 *   Return:                                                               *
 *      None.  BkException thrown if error occurs                          *
 *   Description:                                                          *
 *      This function attempts to parse an xml document using libxml's     *
 *      SAX2 parsing interface.  The first parameter is the filename to    *
 *      parse.  The second, a pointer to a context handler passed to every *
 *      callback function.  The third parameter is an array of element     *
 *      handlers.  These element handlers define which the state of the    *
 *      parsing engine when it encounters this element and optionally,     *
 *      pointers to callback functions to handle the start of end of that  *
 *      element.  The third and fourth parameters are points to callback   *
 *      functions to call the beginning and end of the document.           *
 *                                                                         *
 ***************************************************************************/

void ParseXmlDocument(
	const QString &filename,
	void *ctx,
	ELEMHANDLER *elems,
	BEGIN_DOC_HANDLER funcBegin,
	END_DOC_HANDLER funcEnd,
	CHAR_HANDLER charHandler)
	throw(BkException)
{
	SAXDATA		data;
	std::stack<short>	state;
	xmlSAXHandler	saxhandler;

	memset(&data, 0, sizeof(data));
	data.state = &state;

	xmlSubstituteEntitiesDefault(1);

	data.ctx = ctx;
	data.funcBegin = funcBegin;
	data.funcEnd = funcEnd;
	data.charHandler = charHandler;
	data.elems = elems;

	// set up the SAX handler structure

	memset(&saxhandler, 0, sizeof(saxhandler));
	saxhandler.startDocument = xmlSAXstartDocument;
	saxhandler.endDocument = xmlSAXendDocument;
	saxhandler.startElement = xmlSAXstartElement;
	saxhandler.endElement = xmlSAXendElement;
	saxhandler.characters = xmlSAXcharacters;
	saxhandler.error = xmlSAXerror;
	saxhandler.fatalError = xmlSAXfatalError;

	// make sure the file exists before starting

	if(!QFile::exists(filename))
		BKEXCEPT("Xml Document Does Not Exist");

	// Read the file into memory

	QFileInfo fi(filename);
	uint file_size = fi.size();
	char *buffer = new char[file_size+1];
	if(buffer == 0)
		BKEXCEPT("Unable to Allocate Memory");
	memset(buffer, 0, file_size+1);
	QFile file(filename);
	if(file.open(QIODevice::ReadOnly) == false)
		BKEXCEPT("Unable to Read Xml Document");

	QByteArray bytes = file.readAll();
	memcpy(buffer, bytes.data(), file_size);

	// parse the file

	if(xmlSAXUserParseMemory(
		&saxhandler,
		&data,
		buffer,
		file_size))
	{
		QString	msg;

		msg = "XML Parser Failed on file \"" + filename + "\"";
		BKEXCEPT(msg);
	}

	// delete the memory buffer

	delete [] buffer;

	// verify the results of the parse

	if(	data.state->size() != 1
	 ||	data.state->top() != SAXSTATE_STARTEND)
	{
		BKEXCEPT("XML Parser has Inconsistent Internal State");
	}

#if STATE_TRACE
	fprintf(stderr, "pop SAXSTATE_STARTEND\n");
	data.state->pop();
#endif
}

/***************************************************************************
 *                                                                         *
 *   xmlSAXerror()                                                         *
 *                                                                         *
 *   Parameters:                                                           *
 *      void *       (not used)                                            *
 *      const char *msg, ...                                               *
 *   Return:                                                               *
 *      void                                                               *
 *   Description:                                                          *
 *      This function is called by the xml parsing engine if it encounters *
 *      a fatal error.  It accepts a format string and a variable          *
 *      argument list like printf().                                       *
 *                                                                         *
 ***************************************************************************/

static void xmlSAXerror(void *, const char *msg, ...)
{
	va_list args;

	va_start(args, msg);
	vfprintf(stderr, msg, args);
	va_end(args);
}

/***************************************************************************
 *                                                                         *
 *   xmlSAXfatalError()                                                    *
 *                                                                         *
 *   Parameters:                                                           *
 *      void *            (not used)                                       *
 *      const char *msg                                                    *
 *      ...               (variable arg list)                              *
 *   Return:                                                               *
 *      None                                                               *
 *   Description:                                                          *
 *      This function is called by the XML parsing engine when it          *
 *      encounters a fatal error.  All it really does it print the error   *
 *      message to stderr.                                                 *
 *                                                                         *
 ***************************************************************************/

static void xmlSAXfatalError(void *, const char *msg, ...)
{
	va_list args;

	va_start(args, msg);
	vfprintf(stderr, msg, args);
	va_end(args);
}

/***************************************************************************
 *                                                                         *
 *   xmlSAXstartDocument()                                                 *
 *                                                                         *
 *   Parameters:                                                           *
 *      void *ctx                                                          *
 *   Return:                                                               *
 *      void                                                               *
 *   Description:                                                          *
 *      This function is called by the SAX2 parsing engine before any      *
 *      others.  It uses the opportunity to initialize state data.  It     *
 *      also pushes the current state on to a state stack.  This stack is  *
 *      pushed when a new XML element is encountered and popped at the end *
 *      of the element.  This allows the callback functions to always      *
 *      know where they are in the XML document.                           *
 *                                                                         *
 ***************************************************************************/

static void xmlSAXstartDocument(void *ctx)
{
	SAXDATA *data = (SAXDATA *)ctx;

#if STATE_TRACE
	fprintf(stderr, "push SAXSTATE_STARTEND\n");
#endif
	data->state->push(SAXSTATE_STARTEND);
	data->pChars = NULL;
	data->nChars = 0;
	data->dimChars = 0;

	if(data->funcBegin)
		data->funcBegin(data->ctx);
}

/***************************************************************************
 *                                                                         *
 *   xmlSAXendDocument()                                                   *
 *                                                                         *
 *   Parameters:                                                           *
 *      void *ctx                                                          *
 *   Return:                                                               *
 *      void                                                               *
 *   Description:                                                          *
 *      This function is called by the SAX2 parsing engine after all XML   *
 *      elements have been processed.  It cleans up any state data.        *
 *                                                                         *
 ***************************************************************************/

static void xmlSAXendDocument(void *ctx)
{
	SAXDATA *data = (SAXDATA *)ctx;

	if(data->funcEnd)
		data->funcEnd(data->ctx);

	delete [] data->pChars;
}

/***************************************************************************
 *                                                                         *
 *   xmlSAXstartElement()                                                  *
 *                                                                         *
 *   Parameters:                                                           *
 *      void *ctx                                                          *
 *      const xmlChar *name                                                *
 *      const xmlChar **atts                                               *
 *   Return:                                                               *
 *      void                                                               *
 *   Description:                                                          *
 *      This function is called by the SAX2 parsing engine when a new XML  *
 *      element is encountered.  The first parameter is, of course, the    *
 *      state information.  The second parameter contains the name of the  *
 *      element.  The third parameter contains an array of the attributes  *
 *      specified with the element.  Please refer to the libxml            *
 *      documentation for more information.                                *
 *                                                                         *
 ***************************************************************************/

static void xmlSAXstartElement(
	void *ctx,
	const xmlChar *name,
	const xmlChar **atts)
{
	SAXDATA	*data = (SAXDATA *)ctx;

	if(data->state->top() == SAXSTATE_UNKNOWN)
	{
		data->state->push(SAXSTATE_UNKNOWN);
#if STATE_TRACE
		for(int i=0; i<data->state->size(); i++)
			putc('\t', stderr);
		fprintf(stderr, "push SAXSTATE_UNKNOWN\n");
#endif
		return;
	}

//	fprintf(stderr, "<%s", (const char *)name);

	for(int i=0; ; i++)
	{
		if(	data->elems[i].state == 0
		  &&	data->elems[i].elem == NULL
		  &&	data->elems[i].startElem == NULL
		  &&	data->elems[i].endElem == NULL)
		{
			data->state->push(SAXSTATE_UNKNOWN);
#if STATE_TRACE
			for(int i=0; i<data->state->size(); i++)
				putc('\t', stderr);
			fprintf(stderr, "pop/push SAXSTATE_UNKNOWN\n");
#endif
			break;
		}

		if(!xmlStrcasecmp(name, (const xmlChar *)data->elems[i].elem))
		{
			short prevState = data->state->top();
			data->state->push(data->elems[i].state);
#if STATE_TRACE
			for(int i=0; i<data->state->size(); i++)
				putc('\t', stderr);
			fprintf(stderr, "push %d\n", data->elems[i].state);
#endif
			if(	data->elems[i].startElem
			  &&	data->elems[i].startElem(
			  		data->ctx, name, atts, prevState))
			{
				// an error occured, ignore this element
				// along with any child elements
				(void)data->state->pop();
				data->state->push(SAXSTATE_UNKNOWN);
#if STATE_TRACE
			for(int i=0; i<data->state->size(); i++)
				putc('\t', stderr);
			fprintf(stderr, "pop/push SAXSTATE_UNKNOWN\n");
#endif
			}

			break;
		}
	}
}

/***************************************************************************
 *                                                                         *
 *   xmlSAXendElement()                                                    *
 *                                                                         *
 *   Parameters:                                                           *
 *      void *ctx                                                          *
 *      const xmlChar *name                                                *
 *   Return:                                                               *
 *      void                                                               *
 *   Description:                                                          *
 *      This function is called by the SAX2 parsing engine at the end of   *
 *      an XML element. The first parameter is, of course, the state       *
 *      information.  The second parameter contains the name of the        *
 *      the element.                                                       *
 *                                                                         *
 ***************************************************************************/

static void xmlSAXendElement(void *ctx, const xmlChar *name)
{
	SAXDATA *data = (SAXDATA *)ctx;
	xmlChar *pChars;

	// remove leading and trailing whitespace from characters
	// accumulated by xmlSAXcharacters()

	if(data->pChars)
	{
		for(;;)
		{
			int len = xmlStrlen(data->pChars);
			if(len < 1)
				break;
			if(isspace(data->pChars[len-1]))
				data->pChars[len-1] = 0;
			else
				break;
		}
		pChars = data->pChars;
		while(*pChars && isspace(*pChars))
			pChars++;
	}
	else
		pChars = NULL;

	short state = data->state->top();
#if STATE_TRACE
	for(int i=0; i<data->state->size(); i++)
		putc('\t', stderr);
	fprintf(stderr, "pop %d\n", state);
#endif
	data->state->pop();

	if(state != SAXSTATE_UNKNOWN)
	{
		for(int i=0; ; i++)
		{
			if(	data->elems[i].state == 0
			&&	data->elems[i].elem == NULL
			&&	data->elems[i].startElem == NULL
			&&	data->elems[i].endElem == NULL)
				break;

			if(	!xmlStrcasecmp(name, (const xmlChar *)data->elems[i].elem)
			&&	data->elems[i].endElem)
			{
				data->elems[i].endElem(
					data->ctx,
					name,
					pChars,
					state,
					data->state->top());
				break;
			}
		}
	}

	// delete any characters that might have accumulated in this element

	if(data->pChars)
	{
		delete [] data->pChars;
		data->pChars = NULL;
		data->nChars = 0;
		data->dimChars = 0;
	}
}

/***************************************************************************
 *                                                                         *
 *   xmlSAXcharacters()                                                    *
 *                                                                         *
 *   Parameters:                                                           *
 *      void *ctx                                                          *
 *      const xmlChar *ch                                                  *
 *      int len                                                            *
 *   Return:                                                               *
 *      void                                                               *
 *   Description:                                                          *
 *      This function is called by the SAX2 parsing engine when characters *
 *      are encountered between an open element tag and a close element    *
 *      tag.  The first parameter is, of course, the state information.    *
 *      The second parameter contains the character string.  The third     *
 *      parameter is the length of the string.  Since the libxml           *
 *      documentation doesn't specify, I assumed it's possible for libxml  *
 *      to call this function multiple times for a given string of         *
 *      characters.  For this reason, the function simply continues        *
 *      appending characters to the buffer, allocating more memory as      *
 *      necessary.  At the end of each element the buffer accumulated by   *
 *      this function is deleted.                                          *
 *                                                                         *
 ***************************************************************************/

static void xmlSAXcharacters(void *ctx, const xmlChar *ch, int len)
{
	SAXDATA *data = (SAXDATA *)ctx;

	// check to see if memory needs to be allocated

	if(data->nChars+len >= data->dimChars-1)
	{
		short growby = CHARS_GROWBY;
		if(growby <= len)
			growby = len + CHARS_GROWBY + 1;
		xmlChar *temp = new xmlChar[data->dimChars+growby];
		if(temp == NULL)
			return;
		memset(temp, 0, (data->dimChars+growby)*sizeof(xmlChar));

		if(data->pChars)
		{
			memcpy(temp, data->pChars, data->nChars*sizeof(xmlChar));
			delete [] data->pChars;
		}
		data->pChars = temp;
		data->dimChars += growby;
	}

	// store the string of characters in the buffer

	memcpy(data->pChars+data->nChars, ch, len*sizeof(xmlChar));
	data->nChars += len;

	// see if the caller supplied a character handler of their own

	if(data->charHandler)
		data->charHandler(data->ctx, data->state->top(), ch, len);
}
