/*
 * Copyright (c) 2009, 2010 Nhat Minh Lê
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 *
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in
 *    the documentation and/or other materials provided with the
 *    distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 */

#include <ctype.h>
#include <err.h>
#include <fcntl.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>

#ifndef __NetBSD__
#include <regxml/compat.h>
#endif
#include <regxml/regxml.h>
#include <regxml/xml.h>

#define DEFAULTINDENT 2
#define INBUFSIZE 8192
#define ERRBUFSIZE 128

static struct regxml *reg;
static struct regxml_elem **match;
static struct regxml_xml *xml;
static char *buf;

static char *pattern;
static char *replacement[10];
static char *scope, *scopeprefix;
static int noexpandentities, preservews;
static unsigned int baseindent = DEFAULTINDENT;
static int tabindent, attrline;
static int caseinsensitive;

static const char *filename;
static int fd;

static int done;
static int found;

static void die(int);
static void printopening(struct regxml_elem *);
static void printclosing(struct regxml_elem *);
static void printscope(void);
static void printsubmatch(regxml_id_t);
static void printreplacement(void);
static void process(void);
static void fetchdata(void);
static void dofile(int);
static void setformat(char *);
static void usage(void);

static void
die(int code)
{
	char errbuf[ERRBUFSIZE];

	if (xml != NULL)
		(void)regxml_xml_strerror(xml, code, errbuf, ERRBUFSIZE);
	else
		(void)regxml_strerror(reg, code, errbuf, ERRBUFSIZE);
	errx(2, "%s", errbuf);
}

static void
printopening(struct regxml_elem *el)
{
	switch (el->rxe_type) {
	case REGXML_NODE:
		if (el->rxe_highid - el->rxe_lowid == 1) {
			(void)regxml_xml_printempty(xml,
			    el->rxe_name, el->rxe_attrv);
		} else {
			(void)regxml_xml_printopening(xml,
			    el->rxe_name, el->rxe_attrv);
		}
		break;
	case REGXML_TEXT:
		(void)regxml_xml_printtext(xml, el->rxe_value);
		break;
	case REGXML_COMMENT:
		(void)regxml_xml_printcomment(xml, el->rxe_value);
		break;
	case REGXML_PI:
		(void)regxml_xml_printpi(xml, el->rxe_name, el->rxe_value);
		break;
	default:
		/* NOTREACHED */
		abort();
	}
}

static void
printclosing(struct regxml_elem *el)
{
	switch (el->rxe_type) {
	case REGXML_NODE:
		if (el->rxe_highid - el->rxe_lowid != 1)
			(void)regxml_xml_printclosing(xml, el->rxe_name);
	default:
		break;
	}
}

static void
printscope(void)
{
	char *scbuf;

	if (asprintf(&scbuf, "%s.%u", scopeprefix,
		(unsigned int)reg->rx_maxmatch) == -1)
		err(2, "asprintf failed");
	(void)regxml_xml_printpi(xml, "regxml-scope", scbuf);
	free(scbuf);
}

static void
printsubmatch(regxml_id_t mid)
{
	struct regxml_iter iter;
	int r;

	regxml_inititer(&iter, match[2*mid], match[2*mid+1]);
	for (;;) {
		r = regxml_fetchiter(&iter);
		switch (r) {
		case REGXML_OPENING:
			printopening(iter.rxi_ptr);
			break;
		case REGXML_CLOSING:
			printclosing(iter.rxi_ptr);
			break;
		case REGXML_EOF:
			return;
		case REGXML_EAGAIN:
			fetchdata();
			break;
		default:
			die(r);
		}
	}
}

static void
printreplacement(void)
{
	char *s, *start;
	regxml_id_t mid;
	int r;

	start = replacement[reg->rx_maxmatch];
	for (s = start; *s != '\0'; ++s) {
		if (*s == '$') {
			*s = '\0';
			r = regxml_xml_print(xml, start);
			if (r != 0)
				die(r);
			*s = '$';

			mid = *++s - '0';
			if (mid >= reg->rx_msize || match[2*mid+1] == NULL)
				errx(2, "nonexistent submatch %d", (int)mid);
			printsubmatch(mid);

			start = s + 1;
		}
	}
	r = regxml_xml_print(xml, start);
	if (r != 0)
		die(r);
}

static void
process(void)
{
	static struct regxml_elem *end;
	static int doprint;
	int r;

	for (;;) {
		if (end == NULL) {
			r = regxml_match(reg, match);
			switch (r) {
			case 0:
				end = match[1];
				if (scopeprefix != NULL)
					printscope();
				if (replacement[reg->rx_maxmatch] != NULL) {
					doprint = 0;
					printreplacement();
					found = 1;
				} else {
					doprint = 1;
					printopening(reg->rx_start);
				}
				break;
			case REGXML_NOMATCH:
				printopening(reg->rx_start);
				break;
			case REGXML_CLOSING:
				printclosing(reg->rx_start);
				break;
			default:
				goto eret;
			}
		} else {
			r = regxml_fetch(reg);
			switch (r) {
			case REGXML_OPENING:
				if (doprint)
					printopening(reg->rx_start);
				break;
			case REGXML_CLOSING:
				if (doprint)
					printclosing(reg->rx_start);
				if (reg->rx_start == end) {
					if (scopeprefix != NULL)
						printscope();
					end = NULL;
				}
				break;
			default:
				goto eret;
			}
		}
		continue;

	eret:
		switch (r) {
		case REGXML_EAGAIN:
			fetchdata();
			break;
		case REGXML_EOF:
			return;
		default:
			die(r);
		}
	}
}

static void
fetchdata(void)
{
	ssize_t n;
	int r;

	if (done)
		die(REGXML_EAGAIN);

	r = regxml_xml_getbuf(xml, &buf, INBUFSIZE);
	if (r != 0)
		die(r);
	n = read(fd, buf, INBUFSIZE);
	if (n == -1)
		err(2, "read failed");
	if (n == 0) {
		r = regxml_xml_end(xml);
		if (r != 0)
			die(r);
		done = 1;
		return;
	}

	r = regxml_xml_pushraw(xml, NULL, (size_t)n);
	if (r != 0)
		die(r);
}

static void
dofile(int flags)
{
	int r;

	if (strcmp(filename, "-") == 0)
		fd = 0;
	else {
		fd = open(filename, O_RDONLY);
		if (fd == -1)
			err(2, "open failed");
	}

	xml = NULL;		/* For die(). */
	r = regxml_xml_create(&xml, reg, flags);
	if (r != 0)
		die(r);

	regxml_xml_setfilename(xml, filename);
	regxml_xml_setindent(xml, baseindent);

	fetchdata();
	process();

	regxml_xml_destroy(xml);
	regxml_reset(reg);

	(void)printf("\n");

	if (fd != 0) {
		if (close(fd) == -1)
			err(2, "close failed");
	}
}

static void
setformat(char *arg)
{
	char *end;

	baseindent = strtoul(arg, &end, 10);
	for (; *end != '\0'; ++end) {
		switch (*end) {
		case 'a':
			attrline = 1;
			break;
		case 't':
			tabindent = 1;
			break;
		default:
			break;
		}
	}
}

static void
usage(void)
{
	(void)fprintf(stderr,
	    "usage:\t"
	    "%s [-EiP] [-d sprefix] [-I indent] [-s scope] "
	    "pattern [-1 replacement ...] [file ...]\n",
	    getprogname());
	exit(3);
}

int
main(int argc, char *argv[])
{
	int flags, opt, r;

	setprogname(argv[0]);

	while ((opt = getopt(argc, argv, "d:EI:iPs:")) != -1) {
		switch (opt) {
		case 'd':
			scopeprefix = optarg;
			break;
		case 'E':
			noexpandentities = 1;
			break;
		case 'I':
			preservews = 0;
			setformat(optarg);
			break;
		case 'i':
			caseinsensitive = 1;
			break;
		case 'P':
			preservews = 1;
			break;
		case 's':
			scope = optarg;
			break;
		default:
			usage();
		}
	}

	argc -= optind;
	argv += optind;

	if (argc == 0)
		usage();
	pattern = *argv++;
	--argc;

	while (*argv != NULL && (*argv)[0] == '-' &&
	    isdigit((unsigned char)(*argv)[1]) && (*argv)[2] == '\0' &&
	    *(argv+1) != NULL) {
		replacement[(*argv)[1] - '0'] = *(argv+1);
		argv += 2;
		argc -= 2;
	}

	flags = REGXML_LONGEST | REGXML_MATCHSTEP;
#if 0
	if (noexpandentities)
		flags |= REGXML_ENTITIZE;
#endif
	if (caseinsensitive)
		flags |= REGXML_ICASE;
	r = regxml_create(&reg, pattern, flags);
	if (r != 0)
		die(r);
	match = malloc(reg->rx_msize * sizeof *match);
	if (match == NULL)
		err(2, "malloc failed");

	if (scope != NULL) {
		r = regxml_setscope(reg, scope);
		if (r != 0)
			die(r);
	}

	flags = 0;
	if (!preservews)
		flags |= REGXML_XML_TRIM;
	if (!noexpandentities)
		flags |= REGXML_XML_EXPAND;
	if (tabindent)
		flags |= REGXML_XML_TABINDENT;
	if (attrline)
		flags |= REGXML_XML_ATTRLINE;

	if (argc == 0) {
		filename = "-";
		dofile(flags);
	} else {
		while (argc-- > 0) {
			filename = *argv++;
			dofile(flags);
		}
	}

	free(match);
	regxml_destroy(reg);

	return found ? 0 : 1;
}
