/*
 * Copyright (c) 2022 Jim Seymour (jseymour+sshguard@LinxNet.com)
 *
 * atre-parser
 *
 * Parse and display individual attacks from standard input using a regular
 * expression engine.
 *
 * Usage:
 *
 *   atre-parser [-b] [-c <cfig file>] [-d <level>] [-h [subject]]
       [-l <facility>] [-r] [-t <tag>] [-v <level>] [file]
 *
 *     -b - bail on mismatch (with -r only)
 *
 *     -c <config file> - specify non-default regexp expression config file
 *
 *         Default is /etc/sshguard/attack_parser_re.conf for POSIX regexp
 *         builds, /etc/sshguard/attack_parser_re.pcre for PCRE builds.
 *
 *     -d <level> - emit debugging statements to stderr
 *
 *     -h [subject] - help
 *
 *        Besides command-line usage, help is available for:
 *
 *            services - list the known service names
 *
 *     -l <facility> - log to syslog facility (-r disables)
 *
 *         Valid facility names are system-specific, but examples are
 *         (Linux and most Unixen?): auth, authpriv, cron, daemon, ftp,
 *         lpr, mail, news, syslog, user, uucp, and local0 to local7
 *
 *         If -l not specified, messages otherwise written to a logfile
 *         instead will be sent to stderr, preceded with the log level
 *         (e.g.: "log_info:")
 *
 *         Specifying an input file disables syslogging, regardless of
 *         this option.
 *
 *     -r - regression-testing mode
 *
 *     -t <tag> - syslog logging tag (with -l only, default is "atre-parser")
 *
 *         (Aka: syslog "ident")
 *
 *     -v <level> - increase logging verbosity
 *
 *     [file] - input log file (otherwise stdin)
 *
 * Notes:
 *
 *    SIGUSR1 sent to the running process will cause it to reload its
 *    regexp configuration file if it has a modification time newer than
 *    when it was last read.  E.g.:
 *
 *        $ pkill -USR1 atre-parser
 *
 *    One or more "reload" messages will be logged, depending upon logging
 *    verbosity level.
 *
 *    SIGHUP, SIGTERM, and SIGINT terminate execution
 *
 *    It's okay to specify POSIX regexp config files with PCRE builds,
 *    but not vice-versa.
 *
 *    Fatal errors result in immediate program termination, with error
 *    messages emitted to stderr (and possibly syslog), and a non-zero
 *    exit value.
 *
 *    "-t <tag>" is Linux "logger" utility semantics.
 *
 * Examples:
 *
 *    Used in an sshguard PARSER pipeline:
 *
 *        atre-parser -l auth -t sshguard
 *
 *    Using an alternate regexp config file and increased logging verbosity:
 *
 *        atre-parser -c path_to_config_file -l auth -t sshguard -v1
 *
 *    Input is expected to be logfile lines.  Output, to stdout, is in
 *    the form:
 *
 *        <service code> <IP address> <IP address type> <dangerousness>
 *
 *    Specifying debug mode changes the output to stdout to be more
 *    verbose. (Incompatible with using in an sshguard stream.)
 *
 *    Checking a log file with an experimental regexp in a file named
 *    "test.conf", with debugging enabled:
 *
 *        atre-parser -d1 -v1 -c test.conf log_file
 *
 *    Regression-testing:
 *
 *        atre-parser -b -r -c examples/attack_parser_re.conf <test/mytests.txt
 *
 *    The input file must be in a particular format.  See the file,
 *    noted above.  For lines that fail, specify debug mode to get
 *    result/expected output.
 *
 * Bugs:
 *
 *     None known at this time.
 *
 */
#include <stdio.h>
#include <stdarg.h>
#include <stdlib.h>
#include <string.h>
#include <malloc.h>
#include <ctype.h>
#include <syslog.h>
#include <errno.h>
#include <stdarg.h>
#include <signal.h>

/*
 * Stolen from simclist.h
 *
 * Be friend of both C90 and C99 compilers
 */
#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L
    /* "inline" and "restrict" are keywords */
#else
#   define inline           /* inline */
#   define restrict         /* restrict */
#endif

/*
 * These paths will require tweaking, based upon build directory structure
 */
#if defined(SSHG_1_7_0)
#include "parser/attack.h"	/* from sshguard source tree */
#else
#include "common/attack.h"	/* from sshguard source tree */
#endif

#include "attack_parser_re.h"


static volatile sig_atomic_t atre_reload_signal = 0;	/* Signal-tracking for thread-safe signal handler */
static unsigned logging_verbosity = 0;			/* Logging verbosity */
static int syslogging = 0;				/* Are we syslogging? */

/*
 * signals we catch to terminate program execution
 */
static int termination_sigs[] = {
    SIGTERM,
    SIGHUP,
    SIGINT,
    -1				/* list terminator */
};

/***
 *** Syslog'ing setup and signal handlers
 ***/

#ifndef STAND_ALONE_DEVEL_ATRE

#define MAX_FACILITY_NAME_LEN 20        /* More than twice the length of the longest-known facility name */

/* syslog facility name to facility code lookup table */
typedef struct syslog_fac_name_to_code_t {
    char name[MAX_FACILITY_NAME_LEN];
    int code;
} syslog_fac_name_to_code_t;

/*
 * Syslog facility name <-> code translation table
 * 
 * FIXME: Is this system-specific?  Perhaps the list should be limited
 *        to only those facilities common to *all* Unixen?
 */
static syslog_fac_name_to_code_t syslog_fac_name_to_code_l[] = {
    {"auth",       LOG_AUTH},
    {"authpriv",   LOG_AUTHPRIV},
    {"cron",       LOG_CRON},
    {"daemon",     LOG_DAEMON},
    {"ftp",        LOG_FTP},
    {"lpr",        LOG_LPR},
    {"mail",       LOG_MAIL},
    {"news",       LOG_NEWS},
    {"syslog",     LOG_SYSLOG},
    {"user",       LOG_USER},
    {"uucp",       LOG_UUCP},
    {"local0",     LOG_LOCAL0},
    {"local1",     LOG_LOCAL1},
    {"local2",     LOG_LOCAL2},
    {"local3",     LOG_LOCAL3},
    {"local4",     LOG_LOCAL4},
    {"local5",     LOG_LOCAL5},
    {"local6",     LOG_LOCAL6},
    {"local7",     LOG_LOCAL7},
};
 
/*
 * Translate a syslog facility name to a facility code
 *
 * Returns -1 on failure
 *
 */
int
syslog_fac_name_to_code(const char *facility_name)
{
    int indx;
    int rslt = -1;      /* assume failure */
    int max_indx = sizeof(syslog_fac_name_to_code_l) / sizeof(syslog_fac_name_to_code_t);

    for(indx = 0; indx < max_indx; ++indx) {
        if(strcmp(facility_name, syslog_fac_name_to_code_l[indx].name) == 0) {
            rslt = syslog_fac_name_to_code_l[indx].code;
            break;
        }
    }
    
    return(rslt);
}

/*
 * Set up syslogging
 */
static void
atre_log_init(const int syslog_facility, const char *syslog_ident, const int debug)
{
    int flags = LOG_NDELAY | LOG_PID;

    if (debug) {
        flags |= LOG_PERROR;
    } else {
        setlogmask(LOG_UPTO(LOG_INFO));
    }

    // Set local time zone and open log
    tzset();
    openlog(syslog_ident, flags, syslog_facility);
}

/*
 * Close-down syslogging
 */
static void
atre_log_fin() {
    closelog();
}
#endif

/*
 * exit trap
 */
static void
atre_exit_catch(void) {
    if(syslogging)
	atre_log(LOG_NOTICE, "atre_parser exiting");
    attack_parser_re_cleaner(NULL);

#ifndef STAND_ALONE_DEVEL_ATRE
    atre_log_fin();
#endif
}

/*
 * Thread-safe reload signal handler
 */
static void
atre_reload__sig_handler(int signo)
{
    atre_reload_signal = signo;
}

/*
 * Normal termination signals handler
 *
 * This is just so exits on expected signals
 * invokes atre_exit_catch()
 */
static void
atre_fin_sig_handler(int sig) {
    exit(0);
}

/***
 *** End: Syslog'ing setup and signal handlers
 ***/

/***
 *** Regression-testing code
 *** Copied and adapted from sshguard-2.4.2 .../src/parser/parser.c
 ***/

#define bool unsigned

static void
parse_to_buf(char buf[static 1], char dst[static BUFSIZ]) {
    attack_t attack;

    bool is_attack = !parse_line_re(buf, &attack);

    if (is_attack) {
        snprintf(dst, BUFSIZ,
                "%d %s %d %d", attack.service, attack.address.value,
                attack.address.kind, attack.dangerousness);

    } else {
        strncpy(dst, "*", BUFSIZ);
    }
}

static unsigned
test_next_line(char buf[static BUFSIZ], unsigned stop_on_not_ok) {
    static unsigned char state = 0;
    static char expected[BUFSIZ], result[BUFSIZ];
    static bool match;
    static unsigned test_counter = 0;	/* number of tested log lines */
    static unsigned line_number = 0;	/* input file line number (for debugging) */

    ++line_number;

    // skip blank lines and comments
    if(!(buf[0] == (char) '\0' || buf[0] == '\n' || buf[0] == '#')) {

	switch (state) {
	    case 0: // line with input
		strncpy(expected, buf, sizeof(expected));
		parse_to_buf(buf, result);
		state++;
		break;
	    case 1: // line with expected output
		match = strcmp(buf, result) == 0;
		if(! match)
		    printf("expected: \"%s\", results: \"%s\"\n", buf, result);
		state++;
		break;
	    case 2: // line with type of test
		++test_counter;
		if (match) {
		    printf("ok %d", test_counter);
		}
		else {
		    printf("not ok %d, line nbr: %u\n", test_counter, line_number);
		    if(stop_on_not_ok)
			abort();
		}
		switch (buf[0]) {
		    case 'M': // expected match
			if (match) {
			    putchar('\n');
			} else {
			    printf(" # actual: %s", result);
			}
			break;
		    case 'X': // expected fail
			printf(" # TODO\n");
			break;
		    default:
			printf("Bail out! Malformed expected test result: \"%c\" at line nbr: %u\n", buf[0], line_number);
			exit(99);
		}
		printf("# %s\n", expected);
		state = 0;
		break;
	    default:
		abort();
	}
    }

    return(test_counter);
}

/***
 *** End: Regression-testing code
 *** Copied and adapted from sshguard-2.4.2 .../src/parser/parser.c
 ***/

/*
 * Extended help
 */
static char *help_subjects_subjects[] = {
    "services",
    (char *) NULL	/* terminate the list */
};

static void
help_subjects(const char *subj) {

    if(!strcmp(subj, "services")) {
	int indx, svc_code;
	const char *svc_name;

	puts("\nKnown services:");

	indx = 0;
	while((svc_name = svc_name_code_iterator(&indx, &svc_code)) != NULL)
	    printf("    %11s  %d\n", svc_name, svc_code);
	putchar('\n');

    } else if(!strcmp(subj, "?") || !strcmp(subj, "help")) {
	char **subject = &help_subjects_subjects[0];

	puts("help avaiable for:");
	while(*subject != (char *) NULL) {
	    printf("    %s\n", *subject);
	    ++subject;
	}

    } else {
	fprintf(stderr, "No help for \"%s\"\n", subj);
    }
}

static char *progname = NULL;
static char usage[] = "[-b] [-c <cfig file>] [-d <level>] [-h [subject]] [-l <facility>] [-r] [-t <tag>] [-v <level>] [file] \n\
    -b - bail on mismatch (with -r only) \n\
    -c <config file> - specify non-default regexp expression config file \n\
    -d <level> - emit debugging statements to stderr \n\
    -h - help \n\
    -l - <facility> - log to syslog facility (-r over-rides) \n\
    -r - regression-testing mode \n\
    -t <tag> - syslog logging tag (with -l only, default is \"atre-parser\") \n\
    -v <level> - increase logging verbosity \n\
    [file] - input log file (otherwise stdin) \
";

extern int opterr;	/* option error reporting flag (getopt) */

/*
 * The main() show.  Documented atop the file.
 */
int
main(int argc, char **argv)
{
    int c;
    extern char *optarg;		/* option arg pointer (getopt) */
    extern int optind;			/* option indexer (getopt) */

    int debugging = 0;			/* command-line args */
    unsigned regression_testing = 0;
    unsigned stop_on_not_ok = 0;	/* bail on first "not ok" in regression? */
    unsigned test_counter = 0;

    char *cfig_file = (char *) NULL;
    char *syslog_facility_name = (char *) NULL;
    char *syslog_tag = (char *) NULL;
    char *infile = (char *) NULL;
    FILE *fp;
    char buf[BUFSIZ], *sp1;
    attack_t attack;
    struct sigaction reload_sa;		/* for thread-safe signal handlers */
    struct sigaction terminate_sa;	/* for thread-safe signal handlers */

    progname = argv[0];		/* this is used by error reporting */

    opterr = 0;			/* we'll do the reporting */

    /*
     * get options
     * 
     */
    while((c = getopt(argc, argv, "bc:d:h:l:rt:v:")) != -1) {
	switch(c)
	{
	    case 'b':
		++stop_on_not_ok;
		break;
	    case 'c':
		if(optarg[0] == '-')
		{
		    fprintf(stderr, "usage: %s %s\n", progname, usage);
		    fflush(stderr);
		    exit(1);
		}
		cfig_file = optarg;
		break;
	    case 'd':
		if(sscanf(optarg, "%u", &debugging) != 1)
		{
		    fprintf(stderr, "%s: \"%s\" not a numeric value\n", progname, argv[optind -1]);
		    fprintf(stderr, "usage: %s %s\n", progname, usage);
		    fflush(stderr);
		    exit(1);
		}
		break;
	    case 'h':
		help_subjects(optarg);
		exit(0);
	    case 'l':
		if(optarg[0] == '-')
		{
		    fprintf(stderr, "usage: %s %s\n", progname, usage);
		    fflush(stderr);
		    exit(1);
		}
		syslog_facility_name = optarg;
		break;
	    case 'r':
		++regression_testing;
		break;
	    case 't':
		if(optarg[0] == '-')
		{
		    fprintf(stderr, "usage: %s %s\n", progname, usage);
		    fflush(stderr);
		    exit(1);
		}
		syslog_tag = optarg;
		break;
	    case 'v':
		if(sscanf(optarg, "%u", &logging_verbosity) != 1)
		{
		    fprintf(stderr, "%s: \"%s\" not a numeric value\n", progname, argv[optind -1]);
		    fprintf(stderr, "usage: %s %s\n", progname, usage);
		    fflush(stderr);
		    exit(1);
		}
		break;
	    case '?':
		/*
		 * Normally, an option requiring an argument, and lacking one, is an error,
		 * but we allow "-h" all on its own
		 */
		if(strcmp(argv[optind - 1], "-h"))
		    fprintf(stderr, "%s: bad option \"%s\"\n", progname, argv[optind - 1]);
		fprintf(stderr, "usage: %s %s\n", progname, usage);
		fflush(stderr);
		exit(1);
		break;
	    default:
		break;
	}
    }

    /* set "using syslogging" status */
    syslogging = ((syslog_facility_name != (char *) NULL) && !regression_testing);

    /* named input file or piped from stdin? */
    switch(argc - optind) {
	case 1:
	    infile = argv[optind];
	    if((fp = fopen(infile, "r")) == NULL) {
		fprintf(stderr, "%s: Could not open input file \"%s\" for read: %s\n", progname, infile, strerror(errno));
		exit(1);
	    }
	    if(debugging > 1)
		fprintf(stderr, "input file: %s\n", infile);
	    syslogging = 0;
	    break;
	case 0:
	    fp = stdin;
	    if(debugging > 1)
		fputs("input file: stdin\n", stderr);
	    break;
	default:
	    fprintf(stderr, "usage: %s %s\n", progname, usage);
	    fflush(stderr);
	    exit(1);
    }

#ifndef STAND_ALONE_DEVEL_ATRE
    if(syslogging) {
	int syslog_facility;

	if((syslog_facility = syslog_fac_name_to_code(syslog_facility_name)) == -1) {
	    fprintf(stderr, "Error: Unkown syslog facility: \"%s\" - bailing...\n", syslog_facility_name);
	    exit(1);
	}

	atre_log_init(syslog_facility, syslog_tag, 0);	/* Syslog messages only to a logfile */
    }
#endif

    int *sig_num_p = termination_sigs;		/* Yes, a bit unconventional, but this is where it's used */

    /* set up reload sig handler */
    memset(&reload_sa, 0, sizeof(struct sigaction));
    reload_sa.sa_handler = &atre_reload__sig_handler;
    if (sigaction(SIGUSR1, &reload_sa, NULL) == -1) {
	perror("sigaction");
	exit(EXIT_FAILURE);
    }

    /* set up program termination sig handler */
    memset(&terminate_sa, 0, sizeof(struct sigaction));
    terminate_sa.sa_handler = &atre_fin_sig_handler;
    while(*sig_num_p != -1) {
	if (sigaction(*sig_num_p, &terminate_sa, NULL) == -1) {
	    perror("sigaction");
	    exit(EXIT_FAILURE);
	}
	++sig_num_p;
    }

    atexit(atre_exit_catch);

    attack_parser_re_init(cfig_file, NULL, debugging, logging_verbosity, syslogging);

    if(syslogging)
	atre_log(LOG_INFO, "atre-parser running (pid: %lu)", (unsigned long) getpid());

    while(!feof(fp)) {
	if(fgets(buf, sizeof(buf), fp) != NULL) {
	    if((sp1 = strchr(buf, '\n')) == NULL) {
		fputs("line too long from log file\n", stderr);
		while(fgetc(fp) != '\n') {}	/* eat the rest of the too-long line */
	    } else {
		*sp1 = '\0';	/* chomp */

		if(regression_testing) {
		    test_counter = test_next_line(buf, stop_on_not_ok);
		} else {
		    if(debugging > 2)
			fprintf(stderr, "\nChecking \"%s\" for attack pattern\n", buf);

		    int ret_val;
		    switch(ret_val = parse_line_re(buf, &attack)) {
			case 0:
			    if(debugging) {
				printf("Denied: service name: \"%s\", service: %d, ip addr: \"%s\", ip_type: %d, dang: %d\n",
				    atre_service_to_name(attack.service),
				    attack.service,
				    attack.address.value,
				    attack.address.kind,
				    attack.dangerousness);
			    } else {
				printf("%d %s %d %d\n",
				    attack.service, attack.address.value, attack.address.kind, attack.dangerousness);
			    }
			    fflush(stdout);
			    break;
			case REG_NOMATCH:
			    if(debugging > 2)
				fputs("Allowed!\n", stderr);
			    break;
			case -1:
			    fputs("Error: parse_line_re() returned error\n", stderr);
			    break;
			default:
			    fprintf(stderr, "Say what? parse_line_re(): Unexpected return: %d\n", ret_val);
			    break;
		    }
		}
	    }
	} else if(atre_reload_signal) {		/* early return from fgets() due to signal? */
	    /*
	     * If we want to use it (for display?) later
	    int what_sig = atre_reload_signal;
	     */

	    atre_reload_signal = 0;
	    if(logging_verbosity)
		atre_log(LOG_INFO, "attack_parser_re signatures reload requested");
	    reload_attack_parser_re_conf();
	} else {
	    int f_errno = ferror(fp);

	    if(f_errno) {
		atre_log(LOG_ERR, "Error reading log file input (%s, perhaps?) - terminating", strerror(errno));
		break;
	    }
	}
    }

    attack_parser_re_cleaner(NULL);

    if(infile)
	fclose(fp);

    if(regression_testing)
	printf("### %d log line samples tested\n", test_counter);

    exit(0);
}
