//
// Parse a set of RCS files and print a summary of the commits.
// This is not a full RCS file parser; rather, it knows just
// enough of the RCS file syntax to extract the information we
// need.  Also, it only parses the initial metadata part of the
// RCS file, not the deltas (this is good for performance).
//
// The list of files to parse is read from standard
// input, null terminated (not newline terminated), as generated
// by "find -print0".
//
// The output is a set of (date, author, filename, revision)
// tuples.  The fields are separated by spaces, and each tuple
// is terminated by a newline.
//
// Dates are output in the RCS internal YYYY.MM.DD.hh.mm.ss format.
// The year is always in 4-digit form, regardless of whether the RCS
// file used 2 or 4 digits.
//
// Copyright (c) 2008-2020 Andreas Gustafsson.  All rights reserved.
// Please refer to the file COPYRIGHT for detailed copyright information.
//

#include <ctype.h>
#include <errno.h>
#include <limits.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#include <algorithm>
#include <string>
#include <vector>
#include <unordered_map>

#include "fatal.h"

#define iswhite(c) (isspace(c) || (c) == '\b')

#define isspecial(c) ((c) == '$' || (c) == ',' || (c) == ':' || (c) == ';')

using std::string;
using std::vector;

char colon[] = ":";
char semicolon[] = ";";

// Parser states

enum pstate_t {
    P_ADMIN, P_DELTAS, P_REV, P_DATE, P_AUTHOR, P_NEXT, P_HEAD,
    P_BRANCH, P_SYMBOLS, P_SYMBOL_COLON, P_SYMBOL_VALUE
};

struct commit {
    string rev;
    string date;
    string author;
    string next;
};

struct file_state {
    file_state(): pstate(P_ADMIN) { }
    char buf[256];
    string rev;
    string head;
    string branch;
    string symname;
    struct commit *cur_commit;
    std::unordered_map<string, commit *> by_rev;
    std::unordered_map<string, commit *> prev;
    std::unordered_map<string, string> symbols;
    pstate_t pstate;
};

// split/join from scope/ws

inline std::vector<std::string>
split(const std::string &s, char sep, unsigned int max_fields = UINT_MAX)
{
    std::string::const_iterator b = s.begin();
    std::string::const_iterator e = s.end();
    std::string::const_iterator i;
    std::vector<std::string> v;
    for (;;) {
	i = std::find(b, e, sep);
	v.push_back(std::string(b, i));
	if (i == e)
	    break;
	b = i + 1; // skip separator
	if (v.size() + 1 == max_fields) {
	    v.push_back(std::string(b, e));
	    break;
	}
    }
    return v;
}

template<class I>
inline std::string join(I b, I e, char sep) {
    std::string s;
    while (b != e) {
	s += *b;
	++b;
	if (b != e)
	    s += sep;
    }
    return s;
}

// Process a token "p" generated by the lexer.
// Return true when done.

bool token(file_state &f, char *p, const char *fn) {
    switch (f.pstate) {
    case P_ADMIN:
        if (isdigit((unsigned char) p[0])) {
            goto delta;
        } else if (strcmp(p, "head") == 0) {
            f.pstate = P_HEAD;
        } else if (strcmp(p, "branch") == 0) {
            f.pstate = P_BRANCH;
        } else if (strcmp(p, "symbols") == 0) {
            f.pstate = P_SYMBOLS;
        } else if (strcmp(p, "desc") == 0) {
            goto desc;
        }
        break;
    case P_DELTAS:
        if (isdigit((unsigned char) p[0])) {
        delta:
            f.pstate = P_REV;
            f.rev = p;
        } else if (strcmp(p, "author") == 0) {
            f.pstate = P_AUTHOR;
        } else if (strcmp(p, "next") == 0) {
            f.pstate = P_NEXT;
        } else if (strcmp(p, "desc") == 0) {
        desc:
            return true;
        }
        break;
    case P_REV:
        if (strcmp(p, "date") == 0) {
            f.pstate = P_DATE;
        } else if (p == semicolon) {
            f.pstate = P_DELTAS;
        }
        break;
    case P_DATE: {
        f.cur_commit = new commit;
        int len = strlen(p);
        // Y2K
        bool shortform = (len == 17);
        if (shortform)
            len += 2;
        if (len != 19)
            fatal("bad date %s");
        if (shortform) {
            f.cur_commit->date = "19";
        }
        f.cur_commit->date += p;
        f.cur_commit->rev = f.rev;
        f.rev.clear();
        f.pstate = P_DELTAS;
        break;
    }
    case P_NEXT:
        if (isdigit(p[0])) {
            f.cur_commit->next = p;
            f.prev[f.cur_commit->next] = f.cur_commit;
        }
        f.by_rev[f.cur_commit->rev] = f.cur_commit;
        f.pstate = P_DELTAS;
        break;
    case P_AUTHOR:
        f.cur_commit->author = p;
        f.pstate = P_DELTAS;
        break;
    case P_HEAD:
        f.head = p;
        f.pstate = P_ADMIN;
        break;
    case P_BRANCH:
        f.branch = p;
        f.pstate = P_ADMIN;
        break;
    case P_SYMBOLS:
        if (p == semicolon) {
            f.pstate = P_ADMIN;
        } else {
            f.symname = p;
            f.pstate = P_SYMBOL_COLON;
        }
        break;
    case P_SYMBOL_COLON:
        if (p != colon) {
            fatal("expected colon");
        }
        f.pstate = P_SYMBOL_VALUE;
        break;
    case P_SYMBOL_VALUE:
        f.symbols[f.symname] = p;
        f.pstate = P_SYMBOLS;
        break;
    default:
        abort();
    }
    return false;
}

// Lexer

enum state_t { S_INIT, S_STRING, S_AT, S_WORD };

void parse_file(file_state &f, const char *fn) {
    FILE *fp = fopen(fn, "r");
    if (! fp) {
        // This can happen during incremental updates if a file has
        // been administratively removed from the repository, such
        // as src/sys/dev/microcode/radeon/bonaire_ce.bin,v in
        // September 2019.
        fprintf(stderr, "warning: open: %s: %s\n", fn, strerror(errno));
        return;
    }

    state_t state = S_INIT;
    char *p = 0;
    for (;;) {
        int c = getc(fp);
        if (c == EOF)
            break;

        switch (state) {
        case S_INIT:
        s_init:
            if (iswhite(c))
                state = S_INIT;
            else if (c == '@')
                state = S_STRING;
            else if (isspecial(c)) {
                if (c == ':') {
                    if (token(f, colon, fn))
                        goto done;
                } else if (c == ';') {
                    if (token(f, semicolon, fn))
                        goto done;
                }
                state = S_INIT;
            } else {
                p = f.buf;
                goto s_word;
            }
            break;
        case S_STRING:
            if (c == '@')
                // @ inside string; could be end of string
                // or the first of a doubled @
                state = S_AT;
            else
                state = S_STRING;
            break;
        case S_AT:
            if (c == '@')
                state = S_STRING; // doubled @
            else
                goto s_init; // end of string
            break;
        case S_WORD:
            if (iswhite(c) || isspecial(c) || c == '@') {
                // End of word
                *p++ = '\0';
                if (token(f, f.buf, fn))
                    goto done;
                goto s_init;
            } else {
            s_word:
                if (p < f.buf + sizeof(f.buf) - 1)
                    *p++ = c;
                state = S_WORD;
            }
        }
    }
 done:
    fclose(fp);
}

void process_file(const char *fn, const string &tag) {
    file_state f;
    parse_file(f, fn);
    if (f.head.empty()) {
        fprintf(stderr, "warning: %s has no head\n", fn);
        return;
    }

    string rev;

    if (tag == "HEAD") {
        if (! f.branch.empty()) {
            // There is a default branch, such as a vendor branch
            vector<string> parts = split(f.branch, '.');
            size_t n = parts.size();
            if (n % 2 == 0)
                fatal("expected odd number of components in default branch %s",
                      f.branch);
            parts.push_back("1");
            rev = join(parts.begin(), parts.end(), '.');
        } else {
            rev = f.head;
        }
    } else {
        // Branch
        auto it = f.symbols.find(tag);
        if (it == f.symbols.end()) {
            rev = "";
        } else {
            string head = (*it).second;
            // Turn magic branch number into first revision on branch
            vector<string> magic = split(head, '.');
            size_t n = magic.size();
            if (n < 4)
                fatal("%s:%s does not look like a branch", tag, head);
            if (n % 2)
                fatal("%s:%s has odd number of components", tag, head);
            if (magic[n - 2] != "0")
                fatal("expected zero in %s:%s", tag, head);
            magic[n - 2] = magic[n - 1];
            magic[n - 1] = "1";
            rev = join(magic.begin(), magic.end(), '.');
            // Handle the case of a branch with no commits
            auto it1 = f.by_rev.find(rev);
            if (it1 == f.by_rev.end()) {
                rev = "";
            }
        }
    }

    for (;;) {
        if (rev.empty())
            break;
        auto it = f.by_rev.find(rev);
        if (it == f.by_rev.end()) {
            fprintf(stderr, "unknown revision %s\n", rev.c_str());
            break;
        }
        commit *c = (*it).second;
        printf("%s %s %s %s\n", c->date.c_str(), c->author.c_str(),
               fn, c->rev.c_str());
        rev = c->next;
    }
}

int main(int argc, char **argv) {
    string tag = "HEAD";
    if (argv[1])
        tag = argv[1];

    char fn[PATH_MAX + 1];
    int i = 0;
    for (;;) {
        int c = getchar();
        if (c == -1) {
            if (i != 0) {
                fprintf(stderr, "warning: last line lacks null termination\n");
            }
            break;
        } else if (c == 0) {
            fn[i] = '\0';
            process_file(fn, tag);
            i = 0;
        } else {
            if (i == PATH_MAX)
                fatal("file name too long, or input not null separated");
            fn[i++] = c;
        }
    }

    return 0;
}
