/*
 * Copyright (c) 2009, 2010 Nhat Minh Lê
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 *
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in
 *    the documentation and/or other materials provided with the
 *    distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 */

/*
 * Note: Due to the way Expat handles XML_StopParser(), once
 * regxml_xml_push() fails, it is unreasonable to resume further
 * parsing. This comes from the fact that Expat does not guarantee
 * immediate interruption after a call to XML_StopParser(), so there
 * may be intervening handler invocations after a failure. In the best
 * case, they will just fail altogether. In the worst case, they will
 * insert corrupt data into the tree.
 */

/* LINTLIBRARY */

#include <assert.h>
#include <ctype.h>
#include <stdarg.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#include <expat.h>

#ifndef __NetBSD__
#include <regxml/compat.h>
#endif
#include <regxml/buffer.h>
#include <regxml/regxml.h>
#include <regxml/xml.h>

struct regxml_xml {
	struct regxml *rxx_reg;
	int rxx_error;
	XML_Parser rxx_parser;
	char *rxx_xmlbuf;
	struct regxml_buffer rxx_buffer;
	int (*rxx_print)(void *, const char *, ...);
	void *rxx_arg;
	char *rxx_filename;
	unsigned int rxx_baseindent;
	regxml_xid_t rxx_depth;
	unsigned rxx_trim: 1;
	unsigned rxx_expand: 1;
	unsigned rxx_tabindent: 1;
	unsigned rxx_attrline: 1;
	unsigned rxx_canindent: 1;
	unsigned rxx_expaterror: 1;
};

static enum XML_Status parse(struct regxml_xml *, const char *, size_t);
static int pushde(struct regxml_xml *, const char *, size_t);

static int whitespace(const char *);
static int flushbuffer(struct regxml_xml *);

static void start(void *, const char *, const char *[]);
static void end(void *, const char *);
static void text(void *, const XML_Char *, int);
static void comment(void *, const XML_Char *);
static void pi(void *, const XML_Char *, const XML_Char *);

static int indent(struct regxml_xml *);
static int tryindent(struct regxml_xml *);
static int attrindent(struct regxml_xml *);

static int printq(struct regxml_xml *, const char *);
static int printopeningorempty(struct regxml_xml *, int,
    const char *, char * const *);

int
regxml_xml_create(struct regxml_xml **xmlptr, struct regxml *reg, int flags)
{
	struct regxml_xml *xml;
	int r;

	_DIAGASSERT(reg != NULL);
	_DIAGASSERT(xmlptr != NULL);

	xml = malloc(sizeof *xml);
	if (xml == NULL)
		return REGXML_ESYSTEM;

	xml->rxx_reg = reg;
	xml->rxx_error = 0;
	xml->rxx_expaterror = 0;

	xml->rxx_parser = XML_ParserCreate(NULL);
	if (xml->rxx_parser == NULL) {
		free(xml);
		return REGXML_ESYSTEM;
	}
	xml->rxx_xmlbuf = NULL;

	r = regxml_buffer_init(&xml->rxx_buffer, 0);
	if (r != 0) {
		XML_ParserFree(xml->rxx_parser);
		free(xml);
		return r;
	}

	XML_SetUserData(xml->rxx_parser, xml);
	XML_SetElementHandler(xml->rxx_parser, start, end);
	XML_SetCharacterDataHandler(xml->rxx_parser, text);
	XML_SetCommentHandler(xml->rxx_parser, comment);
	XML_SetProcessingInstructionHandler(xml->rxx_parser, pi);

	xml->rxx_print = regxml_xml_printf;
	xml->rxx_arg = NULL;

	xml->rxx_trim = !!(flags & REGXML_XML_TRIM);
	xml->rxx_expand = !!(flags & REGXML_XML_EXPAND);
	xml->rxx_tabindent = !!(flags & REGXML_XML_TABINDENT);
	xml->rxx_attrline = !!(flags & REGXML_XML_ATTRLINE);

	xml->rxx_filename = NULL;
	xml->rxx_baseindent = 0;
	xml->rxx_depth = 0;
	xml->rxx_canindent = 0;

	*xmlptr = xml;
	return 0;
}

void
regxml_xml_destroy(struct regxml_xml *xml)
{
	_DIAGASSERT(xml != NULL);
	_DIAGASSERT(xml->rxx_parser != NULL);

	free(xml->rxx_filename);
	regxml_buffer_free(&xml->rxx_buffer);
	if (!xml->rxx_expand)
		free(xml->rxx_xmlbuf);
	XML_ParserFree(xml->rxx_parser);
	free(xml);
}

size_t
regxml_xml_strerror(struct regxml_xml *xml, int code, char *s, size_t n)
{
	XML_Size lineno;
	const XML_LChar *errstr;

	_DIAGASSERT(xml != NULL);
	_DIAGASSERT(s != NULL);

	if (code != REGXML_EXML || !xml->rxx_expaterror)
		return regxml_strerror(xml->rxx_reg, code, s, n);
	else {
		lineno = XML_GetCurrentLineNumber(xml->rxx_parser);
		errstr = XML_ErrorString(XML_GetErrorCode(xml->rxx_parser));
		if (xml->rxx_filename != NULL) {
			return snprintf(s, n, "%s:%lu: %s", xml->rxx_filename,
			    (unsigned long)lineno, errstr);
		} else {
			return snprintf(s, n, "%lu: %s",
			    (unsigned long)lineno, errstr);
		}
	}
}

int
regxml_xml_getbuf(struct regxml_xml *xml, char **bufptr, size_t n)
{
	char *buf;

	_DIAGASSERT(xml != NULL);
	_DIAGASSERT(bufptr != NULL);
	_DIAGASSERT(n <= INT_MAX);

	if (xml->rxx_expand)
		buf = XML_GetBuffer(xml->rxx_parser, (int)n);
	else {
		if (xml->rxx_xmlbuf != NULL) {
			*bufptr = xml->rxx_xmlbuf;
			return 0;
		}
		buf = malloc(n * sizeof *buf);
	}
	if (buf == NULL) {
		/*
		 * Out of memory condition, but strerror() may not
		 * return something useful, since the allocation may
		 * have been wrapped in an Expat library call.
		 */
		return REGXML_ESYSTEM;
	}

	xml->rxx_xmlbuf = buf;
	*bufptr = buf;
	return 0;
}

static enum XML_Status
parse(struct regxml_xml *xml, const char *s, size_t n)
{
	if (s == NULL)
		return XML_ParseBuffer(xml->rxx_parser, (int)n, 0);
	else
		return XML_Parse(xml->rxx_parser, s, (int)n, 0);
}

/*
 * Push a double-escaped string into Expat so that it leaves all
 * entities untouched.
 */
static int
pushde(struct regxml_xml *xml, const char *s, size_t n)
{
	const char *t;
	size_t len;

	if (s == NULL)
		s = xml->rxx_xmlbuf;
	while (n > 0) {
		t = memchr(s, '&', n);
		if (t == NULL)
			break;
		len = t - s;
		if (XML_Parse(xml->rxx_parser, s, (int)len, 0) !=
		    XML_STATUS_OK)
			goto bad;
		if (XML_Parse(xml->rxx_parser, "&amp;", 5, 0) !=
		    XML_STATUS_OK)
			goto bad;
		s = t + 1;
		n -= len + 1;
	}
	if (n != 0) {
		if (XML_Parse(xml->rxx_parser, s, (int)n, 0) !=
		    XML_STATUS_OK)
			goto bad;
	}
	return 0;

bad:
	if (xml->rxx_error != 0)
		return xml->rxx_error;
	xml->rxx_expaterror = 1;
	return REGXML_EXML;
}

int
regxml_xml_pushraw(struct regxml_xml *xml, const char *s, size_t n)
{
	_DIAGASSERT(xml != NULL);
	_DIAGASSERT(n <= INT_MAX);

	xml->rxx_error = 0;
	xml->rxx_expaterror = 0;
	if (!xml->rxx_expand)
		return pushde(xml, s, n);
	if (parse(xml, s, n) != XML_STATUS_OK) {
		if (xml->rxx_error != 0)
			return xml->rxx_error;
		xml->rxx_expaterror = 1;
		return REGXML_EXML;
	}
	return 0;
}

int
regxml_xml_push(struct regxml_xml *xml, const char *s)
{
	_DIAGASSERT(s != NULL);
	return regxml_xml_pushraw(xml, s, strlen(s));
}

int
regxml_xml_end(struct regxml_xml *xml)
{
	int r;

	_DIAGASSERT(xml != NULL);

	xml->rxx_error = 0;
	xml->rxx_expaterror = 0;
	if (XML_Parse(xml->rxx_parser, "", 0, 1) != XML_STATUS_OK) {
		if (xml->rxx_error != 0)
			return xml->rxx_error;
		xml->rxx_expaterror = 1;
		return REGXML_EXML;
	}
	r = regxml_end(xml->rxx_reg);
	if (r != 0)
		return r;
	return 0;
}

static int
whitespace(const char *s)
{
	for (; *s != '\0'; ++s) {
		if (!isspace((unsigned char)*s))
			return 0;
	}
	return 1;
}

static int
flushbuffer(struct regxml_xml *xml)
{
	int r;

	if (xml->rxx_buffer.rxb_index == 0)
		return 0;
	if (!xml->rxx_trim || !whitespace(xml->rxx_buffer.rxb_base)) {
		xml->rxx_reg->rx_lineno =
		    XML_GetCurrentLineNumber(xml->rxx_parser);
		r = regxml_pushtext(xml->rxx_reg, xml->rxx_buffer.rxb_base);
		if (r != 0)
			return r;
	}
	xml->rxx_buffer.rxb_index = 0;
	return 0;
}

static void
start(void *arg, const char *name, const char *attrv[])
{
	struct regxml_xml *xml;
	int r;

	xml = arg;
	r = flushbuffer(xml);
	if (r != 0)
		goto fail;
	xml->rxx_reg->rx_lineno = XML_GetCurrentLineNumber(xml->rxx_parser);
	r = regxml_pushopening(xml->rxx_reg, name, __UNCONST(attrv));
	if (r != 0)
		goto fail;
	return;

fail:
	xml->rxx_error = r;
	XML_StopParser(xml->rxx_parser, XML_FALSE);
}

/* ARGSUSED */
static void
end(void *arg, const char *name)
{
	struct regxml_xml *xml;
	int r;

	xml = arg;
	r = flushbuffer(xml);
	if (r != 0)
		goto fail;
	xml->rxx_reg->rx_lineno = XML_GetCurrentLineNumber(xml->rxx_parser);
	r = regxml_pushclosing(xml->rxx_reg);
	if (r != 0)
		goto fail;
	return;

fail:
	xml->rxx_error = r;
	XML_StopParser(xml->rxx_parser, XML_FALSE);
}

static void
text(void *arg, const XML_Char *s, int len)
{
	struct regxml_xml *xml;
	int r;

	xml = arg;
	r = regxml_buffer_appendraw(&xml->rxx_buffer, s, (size_t)len);
	if (r != 0) {
		xml->rxx_error = r;
		XML_StopParser(xml->rxx_parser, XML_FALSE);
	}
}

static void
comment(void *arg, const XML_Char *s)
{
	struct regxml_xml *xml;
	int r;

	xml = arg;
	r = flushbuffer(xml);
	if (r != 0)
		goto fail;
	xml->rxx_reg->rx_lineno = XML_GetCurrentLineNumber(xml->rxx_parser);
	r = regxml_pushcomment(xml->rxx_reg, s);
	if (r != 0)
		goto fail;
	return;

fail:
	xml->rxx_error = r;
	XML_StopParser(xml->rxx_parser, XML_FALSE);
}

static void
pi(void *arg, const XML_Char *name, const XML_Char *s)
{
	struct regxml_xml *xml;
	int r;

	xml = arg;
	r = flushbuffer(xml);
	if (r != 0)
		goto fail;
	xml->rxx_reg->rx_lineno = XML_GetCurrentLineNumber(xml->rxx_parser);
	r = regxml_pushpi(xml->rxx_reg, name, s);
	if (r != 0)
		goto fail;
	return;

fail:
	xml->rxx_error = r;
	XML_StopParser(xml->rxx_parser, XML_FALSE);
}

int
regxml_xml_setfilename(struct regxml_xml *xml, const char *filename)
{
	_DIAGASSERT(xml != NULL);

	xml->rxx_filename = strdup(filename);
	if (xml->rxx_filename == NULL)
		return REGXML_ESYSTEM;
	return 0;
}

void
regxml_xml_setindent(struct regxml_xml *xml, unsigned int baseindent)
{
	_DIAGASSERT(xml != NULL);
	xml->rxx_baseindent = baseindent;
}

void
regxml_xml_setprint(struct regxml_xml *xml,
    int (*print)(void *, const char *, ...), void *arg)
{
	_DIAGASSERT(xml != NULL);
	_DIAGASSERT(print != NULL);

	xml->rxx_print = print;
	xml->rxx_arg = arg;
}

static int
indent(struct regxml_xml *xml)
{
	regxml_xid_t d;
	unsigned int i;
	char c;

	if (xml->rxx_baseindent == 0)
		return 0;
	c = xml->rxx_tabindent ? '\t' : ' ';
	for (d = 0; d < xml->rxx_depth; ++d) {
		for (i = 0; i < xml->rxx_baseindent; ++i) {
			if (xml->rxx_print(xml->rxx_arg, "%c", c) != 0)
				return REGXML_EUSER;
		}
	}
	return 0;
}

static int
tryindent(struct regxml_xml *xml)
{
	int r;

	if (!xml->rxx_trim || !xml->rxx_canindent)
		return 0;
	if (xml->rxx_print(xml->rxx_arg, "\n") != 0)
		return REGXML_EUSER;
	r = indent(xml);
	if (r != 0)
		return r;
	return 0;
}

static int
attrindent(struct regxml_xml *xml)
{
	int r;

	if (!xml->rxx_attrline) {
		if (xml->rxx_print(xml->rxx_arg, " ") != 0)
			return REGXML_EUSER;
		return 0;
	}

	if (xml->rxx_print(xml->rxx_arg, "\n") != 0)
		return REGXML_EUSER;
	if (!xml->rxx_trim || !xml->rxx_canindent)
		return 0;
	r = indent(xml);
	if (r != 0)
		return r;
	return 0;
}

static int
printq(struct regxml_xml *xml, const char *s)
{
	if (!xml->rxx_expand) {
		if (xml->rxx_print(xml->rxx_arg, "%s", s) != 0)
			return REGXML_EUSER;
		return 0;
	}

	for (; *s != '\0'; ++s) {
		switch (*s) {
		case '&':
			if (xml->rxx_print(xml->rxx_arg, "&amp;") != 0)
				return REGXML_EUSER;
			break;
		case '\'':
			if (xml->rxx_print(xml->rxx_arg, "&apos;") != 0)
				return REGXML_EUSER;
			break;
		case '"':
			if (xml->rxx_print(xml->rxx_arg, "&quot;") != 0)
				return REGXML_EUSER;
			break;
		case '<':
			if (xml->rxx_print(xml->rxx_arg, "&lt;") != 0)
				return REGXML_EUSER;
			break;
		case '>':
			if (xml->rxx_print(xml->rxx_arg, "&gt;") != 0)
				return REGXML_EUSER;
			break;
		default:
			if (xml->rxx_print(xml->rxx_arg, "%c", *s) != 0)
				return REGXML_EUSER;
		}
	}
	return 0;
}

static int
printopeningorempty(struct regxml_xml *xml, int empty,
    const char *name, char * const *attrv)
{
	int r;

	r = tryindent(xml);
	if (r != 0)
		return r;
	if (xml->rxx_print(xml->rxx_arg, "<%s", name) != 0)
		return REGXML_EUSER;
	if (attrv != NULL) {
		for (; attrv[0] != NULL && attrv[1] != NULL; attrv += 2) {
			r = attrindent(xml);
			if (r != 0)
				return r;
			r = xml->rxx_print(xml->rxx_arg, "%s='", attrv[0]);
			if (r != 0)
				return REGXML_EUSER;
			r = printq(xml, attrv[1]);
			if (r != 0)
				return r;
			if (xml->rxx_print(xml->rxx_arg, "'") != 0)
				return REGXML_EUSER;
		}
		if (xml->rxx_attrline) {
			r = attrindent(xml);
			if (r != 0)
				return r;
		}
	}
	if (empty) {
		if (xml->rxx_print(xml->rxx_arg, "/>") != 0)
			return REGXML_EUSER;
	} else {
		if (xml->rxx_print(xml->rxx_arg, ">") != 0)
			return REGXML_EUSER;
		++xml->rxx_depth;
	}
	xml->rxx_canindent = 1;
	return 0;
}

int
regxml_xml_print(struct regxml_xml *xml, const char *s)
{
	char mode;
	int r;

	mode = 'c';
	for (; *s != '\0'; ++s) {
		switch (*s) {
		case '<':
			switch (*++s) {
			case '/':
				if (xml->rxx_depth == 0)
					return REGXML_EXML;
				--xml->rxx_depth;
				mode = '/';
				break;
			case '!':
				mode = '!';
				break;
			default:
				mode = '<';
			}
			r = tryindent(xml);
			if (r != 0)
				return r;
			if (xml->rxx_print(xml->rxx_arg, "<") != 0)
				return REGXML_EUSER;
			if (xml->rxx_print(xml->rxx_arg, "%c", *s) != 0)
				return REGXML_EUSER;
			break;

		case '>':
			if (xml->rxx_print(xml->rxx_arg, ">") != 0)
				return REGXML_EUSER;
			switch (mode) {
			case '<':
				if (*(s-1) != '/')
					++xml->rxx_depth;
				break;
			case '/':
			case '!':
				break;
			default:
				return REGXML_EXML;
			}
			xml->rxx_canindent = 1;
			mode = 'c';
			break;

		default:
			if (xml->rxx_print(xml->rxx_arg, "%c", *s) != 0)
				return REGXML_EUSER;
			xml->rxx_canindent = 0;
		}
	}
	if (mode != 'c')
		return REGXML_EXML;
	return 0;
}

int
regxml_xml_printopening(struct regxml_xml *xml,
    const char *name, char * const *attrv)
{
	_DIAGASSERT(xml != NULL);
	return printopeningorempty(xml, 0, name, attrv);
}

int
regxml_xml_printclosing(struct regxml_xml *xml, const char *name)
{
	int r;

	_DIAGASSERT(xml != NULL);

	if (xml->rxx_depth == 0)
		return REGXML_EXML;
	--xml->rxx_depth;
	r = tryindent(xml);
	if (r != 0)
		return r;
	if (xml->rxx_print(xml->rxx_arg, "</%s>", name) != 0)
		return REGXML_EUSER;
	xml->rxx_canindent = 1;
	return 0;
}

int
regxml_xml_printempty(struct regxml_xml *xml,
    const char *name, char * const *attrv)
{
	_DIAGASSERT(xml != NULL);
	return printopeningorempty(xml, 1, name, attrv);
}

int
regxml_xml_printtext(struct regxml_xml *xml, const char *value)
{
	int r;

	_DIAGASSERT(xml != NULL);
	_DIAGASSERT(value != NULL);

	r = printq(xml, value);
	if (r != 0)
		return r;
	xml->rxx_canindent = 0;
	return 0;
}

int
regxml_xml_printcomment(struct regxml_xml *xml, const char *value)
{
	int r;

	_DIAGASSERT(xml != NULL);
	_DIAGASSERT(value != NULL);

	r = tryindent(xml);
	if (r != 0)
		return r;
	if (xml->rxx_print(xml->rxx_arg, "<!--") != 0)
		return REGXML_EUSER;
	r = printq(xml, value);
	if (r != 0)
		return r;
	if (xml->rxx_print(xml->rxx_arg, "-->") != 0)
		return REGXML_EUSER;
	xml->rxx_canindent = 1;
	return 0;
}

int
regxml_xml_printpi(struct regxml_xml *xml,
    const char *name, const char *value)
{
	int r;

	_DIAGASSERT(xml != NULL);
	_DIAGASSERT(name != NULL);
	_DIAGASSERT(value != NULL);

	r = tryindent(xml);
	if (r != 0)
		return r;
	if (xml->rxx_print(xml->rxx_arg, "<?%s ", name) != 0)
		return REGXML_EUSER;
	r = printq(xml, value);
	if (r != 0)
		return r;
	if (xml->rxx_print(xml->rxx_arg, "?>") != 0)
		return REGXML_EUSER;
	xml->rxx_canindent = 1;
	return 0;
}

/* ARGSUSED */
int
regxml_xml_printf(void *arg, const char *fmt, ...)
{
	va_list ap;
	int r;

	_DIAGASSERT(fmt != NULL);

	va_start(ap, fmt);
	r = vprintf(fmt, ap);
	va_end(ap);

	return r < 0 ? -1 : 0;
}

int
regxml_xml_fprintf(void *arg, const char *fmt, ...)
{
	va_list ap;
	FILE *fp;
	int r;

	_DIAGASSERT(fmt != NULL);

	fp = arg;
	va_start(ap, fmt);
	r = vfprintf(fp, fmt, ap);
	va_end(ap);

	return r < 0 ? -1 : 0;
}
