/*
 * Copyright (c) 2022 Jim Seymour (jseymour+sshguard@LinxNet.com)
 *
 * attack_parser_re.c
 *
 * Check a logfile line for an regular expression attack match
 *
 * Note: "ATRE" and "atre", throughout the code, are abbreviations
 *       for "attack regular expression".
 */
#include <stdio.h>
#include <stdarg.h>
#include <stdlib.h>
#include <string.h>
#include <malloc.h>
#include <ctype.h>
#include <syslog.h>
#include <errno.h>
#include <sys/stat.h>

/*
 * Stolen from simclist.h
 *
 * Be friend of both C90 and C99 compilers
 */
#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L
    /* "inline" and "restrict" are keywords */
#else
#   define inline           /* inline */
#   define restrict         /* restrict */
#endif

/*
 * These paths will require tweaking, based upon build directory structure
 */
#if defined(SSHG_1_7_0)
#include "parser/attack.h"	/* from sshguard source tree */
#else
#include "common/attack.h"	/* from sshguard source tree */
#endif

#include "attack_parser_re.h"

#define DEFAULT_ATTACKS_DANGEROUSNESS 10

#define ATREP_ARGS 2	/* Attack RE _pattern_ args: There's only one */
#define ATREP_RE 1	/* Index to captured regexp sub-field value in regmatch_t */
#define ATREP_DANG 2	/* Index to captured dangerousness sub-field value in regmatch_t */
#define COMMENT_ARGS 1

#define ATRE_ARGS 2	/* Attack RE args (two sub-strings are captured) */
#define ATRE_CAP_SVC 1	/* Indexes to captured sub-field values in regmatch_t */
#define ATRE_CAP_IP  2

/*
 * Default config file path
 */
static const char *atre_config_file_path = ATRE_CONFIG_FILE_PATH;

/*
 * IP address types compiled regexps
 *
 * These are here, because, while they're used only during log line parsing,
 * we compile them at startup on the off chance the compiles fail, they fail
 * at startup, rather than when the first log line arrives--which would be
 * fugly.
 */
static atre_regex_t ipv4_preg;
static atre_regex_t ipv6_preg;
static atre_regex_t ipv4_mapped6_preg;

/* The top (head) of the attack parser regex linked list */
static atre_list_t *atre_list = (atre_list_t *) NULL;

/* service name to number lookup table struct */
typedef struct svc_name_to_code_t {
    char name[BUFSIZ];
    int code;
} svc_name_to_code_t;

/*
 * Service enums from sshguard-2.x that weren't in 1.7.0
 *
 * Yes: This be fugly, but, short of #defines for every last sshguard version...
 */
#ifndef SERVICES_CLF_LOGIN_URL
#define SERVICES_CLF_LOGIN_URL  370
#endif
#ifndef SERVICES_CLF_PROBES     
#define SERVICES_CLF_PROBES     360
#endif
#ifndef SERVICES_CLF_UNAUTH     
#define SERVICES_CLF_UNAUTH     350
#endif
#ifndef SERVICES_COCKPIT
#define SERVICES_COCKPIT        340
#endif
#ifndef SERVICES_COURIER
#define SERVICES_COURIER        280
#endif
#ifndef SERVICES_GITEA
#define SERVICES_GITEA          500
#endif
#ifndef SERVICES_OPENSMTPD      
#define SERVICES_OPENSMTPD      270
#endif
#ifndef SERVICES_OPENVPN
#define SERVICES_OPENVPN        400
#endif
#ifndef SERVICES_SSHGUARD
#define SERVICES_SSHGUARD       110
#endif

/*
 * Service name <-> code translation table
 *
 * N.B.: THIS MUST BE KEPT IN-SYNC with enum_services
 * in .../src/parser/attack.h,
 * 
 * BUT sorted in ascending order on "name" field!
 * (This allows us to short-circuit the linear search.)
 */
static svc_name_to_code_t svc_name_to_code_l[] = {
    {"all",          SERVICES_ALL},		/* all/any/unknown */
    {"cockpit",      SERVICES_COCKPIT},		/* Cockpit management dashboard */
    {"courier",      SERVICES_COURIER},		/* Courier imap/pop */
    {"cucipop",      SERVICES_CUCIPOP},		/* cucipop */
    {"cyrusimap",    SERVICES_CYRUSIMAP},	/* Cyrus-imap */
    {"dovecot",      SERVICES_DOVECOT},		/* dovecot */
    {"exim",         SERVICES_EXIM},		/* Exim */
    {"freebsdftpd",  SERVICES_FREEBSDFTPD},	/* ftpd shipped with freebsd */
    {"gitea",        SERVICES_GITEA},		/* Gitea */
    {"openvpn",      SERVICES_OPENVPN},		/* OpenVPN */
    {"postfix",      SERVICES_POSTFIX},		/* Postfix */
    {"proftpd",      SERVICES_PROFTPD},		/* Proftpd */
    {"pureftpd",     SERVICES_PUREFTPD},	/* Pure-ftpd */
    {"sendmail",     SERVICES_SENDMAIL},	/* sendmail */
    {"ssh",          SERVICES_SSH},		/* ssh */
    {"uwimap",       SERVICES_UWIMAP},		/* UWimap for imap and pop daemon */
    {"vsftpd",       SERVICES_VSFTPD},		/* vsftpd */
}; 

static int atre_use_syslog = 0;
static int atre_debugging = 0;
static int atre_logging_verbosity = 0;

/***
 *** Local worker functions, defined below
 ***/
static void fatal_n(const char *fmt,...);
static char *expand_ipaddr_macro(const char *cfig_line, int line_nbr);


#ifdef USE_NATIVE_PCRE
/*
 * Cadged from regex.h
 *
 * These are the only two flags we use, here, and, if the defs
 * change it really doesn't matter as they're only here for sytactical
 * compatibility.
 */
#define REG_EXTENDED 1
#define REG_ICASE (REG_EXTENDED << 1)

typedef struct regmatch_t {
    int rm_so;
    int rm_eo;
} regmatch_t;

static int regcomp(atre_regex_t *preg, const char *regex, int cflags);
static int regexec(const atre_regex_t *preg, const char *string, size_t nmatch, regmatch_t pmatch[], int eflags);
static void regfree(atre_regex_t *preg);
#endif


/*
 * Process an attack regex config file
 *
 * If the ret_atre_list_pp argument is non-NULL, the location
 * to which it points will be set to the new list's first
 * element.
 *
 * Calling Args:
 *
 *     char *cfig_path - Optional config file path
 *
 *         If NULL, the default config file is read
 *
 *     atre_list_t **ret_atre_list_pp - Optional pointer to
 *        to atre_list_t pointer
 *
 *         If NULL, the global atre_list_t * is used
 *
 *     int debugging - Debugging level (saved)
 *
 *     int logging_verbosity - Logging verbosity (saved)
 *
 *     int use_syslog - Use syslog (saved)
 *
 * Returns:
 *
 *     ATRE_CONF_SUCCESS - successful (re-)config
 *     ATRE_CONF_NOCONF  - no (re-)config performed
 *     ATRE_CONF_ERR     - fatal (re-)configuration error
 *
 *     "No reconfig performed" will happen if the specified
 *     config file modification date is unchanged from the
 *     last (re-)config
 *
 * N.B.: Does not return on certain fatal errors, such as
 *       inability to read config file, on initial
 *       invocation.  Instead bails directly out via
 *       fatal_n().
 */
int
attack_parser_re_init(const char *cfig_path, atre_list_t **ret_atre_list_pp, const int debugging, const int logging_verbosity, const int use_syslog)
{
    char cfig_line[BUFSIZ];
    char *cfig_line_p;
    atre_list_t **atre_list_pp = (atre_list_t **) NULL;
    atre_list_t *curr_atre_p = (atre_list_t *) NULL;
    static const char *my_cfig_path = NULL;
    FILE *cfigfp;

    /* Config file-parsing regex vars */
    static int pregs_are_compiled;			/* flag saying pregs have been compiled */
    static atre_regex_t atre_preg;			/* compiled regex pointer - remember it */
    regmatch_t atre_fields[ATREP_ARGS + 1];		/* The 0th is the whole string */
    static atre_regex_t comment_preg;			/* compiled regex pointer - remember it */
    regmatch_t comment_fields[COMMENT_ARGS + 1];
    static atre_regex_t whspace_preg;			/* compiled regex pointer - remember it */

    int line_nbr = 0;					/* regex config file line number */
    struct stat statbuf;
    static time_t last_cfig_time = 0;			/* last time regex config file was read */

    atre_debugging = debugging;				/* remember debugging (level) for all */
    atre_logging_verbosity = logging_verbosity;		/* remember logging verbosity level for all */
    atre_use_syslog = use_syslog;			/* logging to syslog or emitting to stderr? */

    /*
     * Get the config file path
     */
    if(! my_cfig_path) {
	my_cfig_path = (cfig_path == (char *) NULL)? atre_config_file_path : cfig_path;
    }

    if(atre_debugging > 1)
	fprintf(stderr, "attack_parser_re_init(): atre config file path: \"%s\"\n", my_cfig_path);

    /* On a re-read: Does it appear file was changed since last we read it? */
    if(!stat(my_cfig_path, &statbuf)) {			/* unless error getting stat... */
	if(statbuf.st_mtime > last_cfig_time) {
	    if(last_cfig_time && atre_logging_verbosity)
		atre_log(LOG_INFO, "attack_parser_re_init(): Reloading attack_parser_re signatures");
	    last_cfig_time = statbuf.st_mtime;
	} else {
	    return(ATRE_CONF_NOCONF);				/* nothing to do */
	}
    } else {
	/*
	 * If this is the initial config file read that failed: Bail out.  Otherwise, just
	 * log the error and return the error condition.
	 */
	if(ret_atre_list_pp == (atre_list_t **) NULL) {
	    fatal_n("Problem getting status of file \"%s\": %s", my_cfig_path, strerror(errno));
	} else {
	    atre_log(LOG_ERR, "Problem getting status of file \"%s\": %s", my_cfig_path, strerror(errno));
	    return(ATRE_CONF_ERR);
	}
    }

    /* Generating a running config, or a custom one? */
    atre_list_pp = (ret_atre_list_pp == (atre_list_t **) NULL? &atre_list : ret_atre_list_pp);

    curr_atre_p = *atre_list_pp;

    /* For parsing the config file.  We only compile these once */
    if(pregs_are_compiled == 0) {
	if(regcomp(&atre_preg,
		   "^/(.+)/[ 	]+<([[:digit:]]+)>[         ]*$",
		   REG_EXTENDED | REG_ICASE) != 0)
	{
	    fatal_n("Can't compile attack parsing regex");
	}

	/* Find comments - incl. leading whitespace */
	if(regcomp(&comment_preg, "^([ 	]*#).*$", REG_EXTENDED) != 0)
	{
	    fatal_n("Can't compile comment-finding regex");
	}

	if(regcomp(&whspace_preg, "^[ 	]*$", REG_EXTENDED) != 0) {
	    fatal_n("Can't compile blank-line-finding regex");
	}

	/* Build the IP address type list of regexps */

	char my_regex_str[BUFSIZ];	/* need to add anchors for our use */

	snprintf(my_regex_str, sizeof(my_regex_str), "%s%s%s", "^", REGEXLIB_IPV4, "$");
	if(regcomp(&ipv4_preg, my_regex_str, REG_EXTENDED) != 0)
	    fatal_n("Invalid IPv4 regexp");

	snprintf(my_regex_str, sizeof(my_regex_str), "%s%s%s", "^", REGEXLIB_IPV6, "$");
	if(regcomp(&ipv6_preg, my_regex_str, REG_EXTENDED) != 0)
	    fatal_n("Invalid IPv6 regexp");

#if defined(USE_PCRE) || defined(USE_NATIVE_PCRE)
	if(regcomp(&ipv4_mapped6_preg, "^[0:]+[Ff]{4}:((?:\\d{1,3}\\.){3}\\d{1,3})$", REG_EXTENDED) != 0)
	    fatal_n("Invalid IPv4_mapped6 regexp");
#else
	if(regcomp(&ipv4_mapped6_preg, "^[0:]+[Ff]{4}:([[:digit:]]{1,3}\\.[[:digit:]]{1,3}\\.[[:digit:]]{1,3}\\.[[:digit:]]{1,3})$", REG_EXTENDED) != 0)
	    fatal_n("Invalid IPv4_mapped6 regexp");
#endif
	++pregs_are_compiled;
    }

    /* Parse the config file */
    if((cfigfp = fopen(my_cfig_path, "r")) != NULL) {
	char *p;
	while(fgets(cfig_line, sizeof(cfig_line), cfigfp) != NULL) {
	    ++line_nbr;
	    if((p = strchr(cfig_line, '\n')) == NULL) {
		atre_log(LOG_WARNING, "line too long in file, config line nbr: %d -- skipping", line_nbr);
		while(fgetc(cfigfp) != '\n') {}	/* eat the rest of the too-long line */
		continue;
	    }
	    *p = '\0';	/* chomp */

	    /* Whack comments - incl. leading whitespace */
	    if(regexec(&comment_preg, cfig_line, (size_t) COMMENT_ARGS + 1, comment_fields, 0) == 0) {
		/* Beginning of line? */
		if(comment_fields[1].rm_so == 0)
		    continue;
		cfig_line[comment_fields[1].rm_so] = '\0';	/* make it go away */
	    }

	    /* Nothing *but* whitespace, or a 0-length line? */
	    if(regexec(&whspace_preg, cfig_line, (size_t) 0, NULL, 0) == 0)
		continue;

	    /* IP address macro expansion? */
	    if((cfig_line_p = expand_ipaddr_macro(cfig_line, line_nbr)) == NULL)
		cfig_line_p = cfig_line;	/* Nope */

	    /* Pointer for new attack parser regex */
	    atre_list_t *new_atre_p = (atre_list_t *) NULL;

	    /*
	     * Theoretically, the latter test can fail only if the config file parsing regexp
	     * is b0rk3d, which should have been discovered up above, but better safe than sorry
	     */
	    if((regexec(&atre_preg, cfig_line_p, (size_t) ATREP_ARGS + 1, atre_fields, 0) != 0) ||
	       (atre_fields[ATREP_RE].rm_so == -1))
	    {
		atre_log(LOG_WARNING, "Invalid attack regexp, config line nbr: %d - skipping", line_nbr);
		continue;
	    }

	    if((new_atre_p = calloc(1, sizeof(atre_list_t))) == NULL) {
		if(ret_atre_list_pp == (atre_list_t **) NULL) {
		    fatal_n("Unable to allocate memory for new attack regex list element");
		} else {
		    atre_log(LOG_ERR, "Unable to allocate memory for new attack regex list element");
		    return(ATRE_CONF_ERR);
		}
	    }

	    /* It's okay to alter cfig_line, now, cuz we'll be done w/it */

	    /* Was a specific dangerousness specified? */
	    if(atre_fields[ATREP_RE].rm_so != -1) {
		cfig_line_p[atre_fields[ATREP_DANG].rm_eo] = '\0';	/* terminate dangerousness sub-string */

		/* Get the initial dangerousness */
		if((new_atre_p->dangerousness = atoi(&cfig_line_p[atre_fields[ATREP_DANG].rm_so])) == 0)
		    new_atre_p->dangerousness = DEFAULT_ATTACKS_DANGEROUSNESS;
	    }

	    cfig_line_p[atre_fields[ATREP_RE].rm_eo] = '\0';	/* terminate regexp sub-string */

	    if(atre_debugging > 2)
		fprintf(stderr, "attack_parser_re_init(): atre_fields[ATREP_RE]: \"%s\", dangerousness: %d\n",
		    &cfig_line_p[atre_fields[ATREP_RE].rm_so], new_atre_p->dangerousness);

	    if(regcomp(&new_atre_p->atre_pat, &cfig_line_p[atre_fields[ATREP_RE].rm_so], REG_EXTENDED | REG_ICASE) == 0) {

		new_atre_p->line_nbr = line_nbr;

#ifdef USE_NATIVE_PCRE
		if(new_atre_p->atre_pat.cap_cnt - ATRE_ARGS < 0) {
		    atre_log(LOG_ERR, "Too few captured attack values specified (%d), config line nbr: %d - skipping",
			new_atre_p->atre_pat.cap_cnt, line_nbr);
		    continue;
		} else if(new_atre_p->atre_pat.cap_cnt - ATRE_ARGS > 0) {
		    atre_log(LOG_ERR, "Too many captured attack values specified (%d), config line nbr: %d - skipping",
			new_atre_p->atre_pat.cap_cnt, line_nbr);
		    continue;
		}
#endif

		/* Starting the attack regex list from scratch, or adding an additional entry? */
		if(curr_atre_p == (atre_list_t *) NULL) {
		    *atre_list_pp = curr_atre_p = new_atre_p;	/* new list */
		    new_atre_p->prev = (atre_list_t *) NULL;	/* there is no previous entry */

		    if(atre_debugging > 2)
			fputs("attack_parser_re_init(): adding first entry to attack list\n", stderr);

		} else {
		    curr_atre_p->next = new_atre_p;		/* link */
		    new_atre_p->prev = curr_atre_p;		/* back-link */
		    curr_atre_p = new_atre_p;

		    if(atre_debugging > 2)
			fprintf(stderr, "attack_parser_re_init(): adding a new attack entry (cfig: %d)\n", line_nbr);

		}

		curr_atre_p->next = NULL;		/* end of the attack regex linked-list, for now */

	    } else {
		atre_log(LOG_WARNING, "Could not complile attack regexp, config line nbr: %d - skipping", line_nbr);
		free(new_atre_p);	/* Not a valid atre */
	    }

	}
    } else {
	/*
	 * If this is the initial config file read that failed: Bail out.  Otherwise, just
	 * log the error and return the error condition.
	 */
	if(ret_atre_list_pp == (atre_list_t **) NULL) {
	    fatal_n("Could not open file \"%s\" for read: %s", my_cfig_path, strerror(errno));
	} else {
	    atre_log(LOG_ERR, "Could not open file \"%s\" for read: %s", my_cfig_path, strerror(errno));
	    return(ATRE_CONF_ERR);
	}
    }
}

/*
 * Re-read the attack parser regexp configuration
 *
 * Meant to facilitate run-time re-configuration without having to kill and
 * restart the program.
 *
 * Process: Calls attack_parser_re_init() with a local pointer to an atre_list_t
 *          pointer.  If the call succeeds: Calls attack_parser_re_cleaner() to
 *          release all allocated memory in the existing file-global list, replaces
 *          the file-global pointer with the pointer to the new list, and returns
 *          success.
 *
 *          If the call to attack_parser_re_init() returns an error: Calls
 *          attack_parser_re_cleaner() to clear anything out of the list pointed
 *          to by the local atre_list_t pointer and returns a failure status.
 */
void
reload_attack_parser_re_conf(void)
{
    atre_list_t *my_atre_list_p = (atre_list_t *) NULL;;	/* Our own list pointer */

    /* re-read the regexp conf list */
    switch(attack_parser_re_init((char *) NULL, &my_atre_list_p, atre_debugging, atre_logging_verbosity, atre_use_syslog)) {
	case ATRE_CONF_SUCCESS:
	    attack_parser_re_cleaner(&atre_list);	/* clear out the old list */
	    atre_list = my_atre_list_p;			/* reset the regex list head */
	    atre_log(LOG_INFO, "reload_attack_parser_re_conf(): Attack parser signatures reloaded");
	    break;
	case ATRE_CONF_NOCONF:
	    if(atre_logging_verbosity)
		atre_log(LOG_INFO, "reload_attack_parser_re_conf(): Attack parser signatures already up-to-date");
	    break;
	case ATRE_CONF_ERR:
	    atre_log(LOG_ERR, "Re-reading regexp config file failed--using previous config.  See prior warnings");
	    /* Depending upon where the error occurred: There may be a (partial) list */
	    attack_parser_re_cleaner(&my_atre_list_p);
	    break;
	default:
	    atre_log(LOG_ERR, "reload_attack_parser_re_conf(): Unexpected return from attack_parser_re_init()\n");
	    break;
    }

    return;
}

/*
 * Clear an attack regex list and all its memory allocations
 *
 */
void
attack_parser_re_cleaner(atre_list_t **atre_list_pp)
{
    atre_list_t *curr_atre_p;
    atre_list_t *next_atre_p;

    /* Cleaning default list?) */
    if(atre_list_pp == (atre_list_t **) NULL)
	atre_list_pp = &atre_list;

    curr_atre_p = *atre_list_pp;

    while(curr_atre_p != (atre_list_t *) NULL) {
	next_atre_p = curr_atre_p->next;
	if(&curr_atre_p->atre_pat != (atre_regex_t *) NULL) {
	    regfree(&curr_atre_p->atre_pat);
	} else {
	    atre_log(LOG_ERR, "unexpected NULL atre_regex_t pointer in attack parser regex list (config line nbr: %d)", curr_atre_p->line_nbr);
	}
	free(curr_atre_p);
	curr_atre_p = next_atre_p;
    }

    *atre_list_pp = (atre_list_t *) NULL;	/* Attack regex list is empty now */
}

/*
 * Check a log line against the attack regexps
 *
 * Calling args:
 *
 *     char *log_line - The log line to check
 *
 *     attack_t *attack - Pointer to attack_t struct to fill
 *
 * Returns: 0 - attack detected
 *          1 - no attack detected
 *
 * Design Note
 *
 *     It was either use a couple flags for conditional execution or
 *     end-up with torturous nesting.  I chose the former.
 */
int
parse_line_re(char *log_line, attack_t *attack)
{
    atre_list_t *atre_list_p = atre_list;	/* init attack regex list traversal pointer */
    int         ret_val = REG_NOMATCH;		/* Default: No attack detected */
    regmatch_t atre_captures[ATRE_ARGS + 2];	/* Captured values - the 0th is the whole string */
						/* We allocate for one more than expected so */
						/*  we can check for too many returns */
    char svc_name[BUFSIZ];

    /*
     * FIXME: Not certain if this is right...
     */
#if defined(SSHG_1_7_0)
    attack->source = 0;
#endif

    /* These should never happen, but just in case ...*/
    if(log_line == NULL || strlen(log_line) == 0 || attack == (attack_t *) NULL)
	return(ret_val);

    while(atre_list_p != (atre_list_t *) NULL) {
#ifndef USE_NATIVE_PCRE
	int prune_faulty_regexp = 0;	/* flag: remove faulty regexp */
#endif

	/*
	 * We specify one more capture value than we should get returned so we can check for too many
	 */
	if((ret_val = regexec(&atre_list_p->atre_pat, log_line, (size_t) ATRE_ARGS + 2, atre_captures, 0)) == 0) {

	    if(atre_debugging > 2)
		fprintf(stderr, "parse_line_re(): Got a log line match (cfig: %d)\n", atre_list_p->line_nbr);

	    /* Test for number of captures we *should* get */
	    int indx;
	    for(indx = 1; indx <= ATRE_ARGS; ++indx) {
		if(atre_captures[indx].rm_so == -1) {

		    /*
		     * Avoid flooding logfiles with this one
		     *
		     * We can't just prune it, because it might be due to wonky logfile lines,
		     * rather than a bad regexp.
		     *
		     * Usually, this will happen only with mis-configured regexps when the code
		     * is not compiled with USE_NATIVE_PCRE
		     */
		    if(atre_list_p->faulty_re_warnings++ < MAX_FAULTY_RE_WARNINGS)
			atre_log(LOG_WARNING, "Less than expected number of captured attack values returned, config line nbr: %d", atre_list_p->line_nbr);

		    ret_val = REG_NOMATCH;	/* Maybe another will succeed? */
		    break;
		}
	    }

	    if(ret_val == 0) {
		/* More captures than we should have? */
		if(atre_captures[indx].rm_so != -1) {
		    atre_log(LOG_WARNING, "More than expected number of captured attack values returned, config line nbr: %d", atre_list_p->line_nbr);
#ifndef USE_NATIVE_PCRE
		    ++prune_faulty_regexp;	/* schedule faulty regexp list entry for removal */
#endif
		    ret_val = REG_NOMATCH;	/* Maybe another will succeed? */
		} else {
		    /* This is kinda ugly, and we hates ugly code, but what ya gonna do? */
		    size_t dst_len = sizeof(svc_name);
		    size_t src_len = atre_captures[ATRE_CAP_SVC].rm_eo - atre_captures[ATRE_CAP_SVC].rm_so;

		    if(src_len < dst_len) {
			strncpy(svc_name, &log_line[atre_captures[ATRE_CAP_SVC].rm_so], src_len);
			svc_name[src_len] = '\0';
			attack->service = svc_name_to_code(svc_name);

			dst_len = sizeof(attack->address.value);
			src_len = atre_captures[ATRE_CAP_IP].rm_eo - atre_captures[ATRE_CAP_IP].rm_so;

			if(src_len < dst_len) {
			    strncpy(attack->address.value, &log_line[atre_captures[ATRE_CAP_IP].rm_so], src_len);
			    attack->address.value[src_len] = '\0';

			    if(atre_debugging > 3)
				fprintf(stderr, "attack_parser_re(): Got an IP addr: \"%s\"\n", attack->address.value);

			    /* Determine address type (IPv4, IPv6) */
			    if(regexec(&ipv4_preg, attack->address.value, (size_t) 0, (regmatch_t *) NULL, 0) == 0) {
				attack->address.kind = ADDRKIND_IPv4;
			    } else if(regexec(&ipv6_preg, attack->address.value, (size_t) 0, (regmatch_t *) NULL, 0) == 0) {
				attack->address.kind = ADDRKIND_IPv6;
			    } else {
				/*
				 * IPv4 address mapped to IPv6?  If so: We have a bit more work to do
				 */
				regmatch_t ipv4_captures[2];

			        if(regexec(&ipv4_mapped6_preg, attack->address.value, (size_t) 2, ipv4_captures, 0) == 0) {
				    size_t src_len = ipv4_captures[1].rm_eo - ipv4_captures[1].rm_so;

				    strncpy(attack->address.value, &attack->address.value[ipv4_captures[1].rm_so], src_len);
				    attack->address.value[src_len] = '\0';
				    attack->address.kind = ADDRKIND_IPv4;
				} else {
				    attack->address.kind = 0;	/* FIXME: Magic number! */
				    atre_log(LOG_WARNING, "Could not determine IP address type for IP addr \"%s\"", attack->address.value);
				}
			    }

			    /* Set initial dangerousness? */
			    attack->dangerousness = atre_list_p->dangerousness;

			    if(atre_logging_verbosity > 1) {
				atre_log(LOG_INFO, "parse_line_re(): detected: svc name: \"%s\", svc: %d, dang: %d, ip addr: \"%s\", ip_type: %d",
				    atre_service_to_name(attack->service), attack->service, attack->dangerousness, attack->address.value, attack->address.kind);
			    }

			    break;	/* Successful match */

			} else {
			    atre_log(LOG_WARNING, "Captured IP address string length exceeds expected max length - skipping, config line nbr: %d",
				atre_list_p->line_nbr);
			    ret_val = REG_NOMATCH;	/* error */
			}
		    } else {
			atre_log(LOG_WARNING, "Captured service name string length exceeds expected max length - skipping, config line nbr: %d",
			    atre_list_p->line_nbr);
			ret_val = REG_NOMATCH;
		    }
		}
	    }
	}

#ifndef USE_NATIVE_PCRE
	/*
	 * Prune regexps detected to be faulty at runtime
	 *
	 * This may seem to be an odd way to handle it, but we had to delay this until now
	 * because we want to free() the faulty regexp's list member's memory allocations,
	 * and we needed it's ->next link until this point.
	 */
	if(prune_faulty_regexp) {
	    atre_list_t *rem_atre_list_p = atre_list_p;		/* save pointer to list element to elide */

	    /*
	     * Remove the offending attack test so the log file isn't bombarded with repeated warning messages
	     */
	    atre_log(LOG_NOTICE, "Removing defective regexp test, config line nbr: %d", rem_atre_list_p->line_nbr);

	    if(atre_list_p->prev == (atre_list_t *) NULL) {		/* are we at the top of the list? */
		atre_list = atre_list_p->next;				/* new head of list is next element */
		atre_list->prev = (atre_list_t *) NULL;			/* and now it has no "previous" */
	    } else {							/* otherwise ... */
		(atre_list_p->prev)->next = atre_list_p->next;		/* link prev element to our next */
		if(atre_list_p->next != (atre_list_t *) NULL)		/* if there was a next element ... */
		    (atre_list_p->next)->prev = atre_list_p->prev;	/* ... back-link its prev our prev */
	    }
	    atre_list_p = atre_list_p->next;

	    regfree(&rem_atre_list_p->atre_pat);	/* free mem allocs for removed list element */
	    free(rem_atre_list_p);
	} else {
	    atre_list_p = atre_list_p->next;
	}
#else
	atre_list_p = atre_list_p->next;
#endif
    }

    return(ret_val);
}

/*
 * Translate a service name to a service number
 *
 */
enum service
svc_name_to_code(const char *svc_name)
{
    int indx, sc_rslt;
    int max_indx = sizeof(svc_name_to_code_l) / sizeof(svc_name_to_code_t);
    enum service ret_val = SERVICES_ALL;	/* dflt: all/unknown */

    for(indx = 0; indx < max_indx; ++indx) {
	if((sc_rslt = strcmp(svc_name, svc_name_to_code_l[indx].name)) == 0) {
	    ret_val = svc_name_to_code_l[indx].code;
	    break;
	} else if(sc_rslt < 0) {
	    break;	/* short-circuit linear search */
	}
    }

    return(ret_val);
}

/*
 * Translate a service number to a service name
 *
 * N.B.: Kind of replicates a similar function in a later release of
 *       sshguard, except returns a service name as it would appear
 *       in a log file ident field.
 */
const char *
atre_service_to_name(enum service code)
{
    int indx;
    int max_indx = sizeof(svc_name_to_code_l) / sizeof(svc_name_to_code_t);
    char *ret_val = "unknown";

    for(indx = 0; indx < max_indx; ++indx) {
	if(svc_name_to_code_l[indx].code == code) {
	    ret_val = svc_name_to_code_l[indx].name;
	    break;
	}
    }

    return(ret_val);
}

/*
 * Iterate over the svc_name_to_code list
 *
 * Calling args:
 *
 *    int *indx - index into list
 *    int *svc_code - pointer to int into which to return service code
 *
 * Returns:
 *
 *    Pointer to service name or null if *indx is out of list bounds.
 *    In the latter case the service code will be set to -1.
 *
 *    The int pointed to by *indx will be incremented.  Thus, calling
 *    this repeatedly, with an initial *indx of 0, will iterate over
 *    the entire list.
 */
const char *
svc_name_code_iterator(int * const indx, int * const svc_code)
{
    static int max_indx = sizeof(svc_name_to_code_l) / sizeof(svc_name_to_code_t);

    if(*indx >= 0 && *indx < max_indx) {
	*svc_code = svc_name_to_code_l[*indx].code;
	return(svc_name_to_code_l[(*indx)++].name);
    } else {
	*svc_code = -1;
	return((char *) NULL);
    }
}

/*
 * logging abstraction function
 *
 * Logs to syslog if local global variable
 * use_syslog set, otherwise emits to stderr
 */
void
atre_log(int priority, const char *fmt,...)
{
    va_list ap;
    char    buf[BUFSIZ];
    char *msg_level;

    va_start(ap, fmt);
    (void) vsnprintf(buf, sizeof(buf), fmt, ap);
    va_end(ap);

    if(atre_use_syslog) {
	syslog(priority, "%s", buf);
    } else {
	/*
	 * These never actually appear in real log lines
	 * They're just here for test/development
	 */
	switch(priority) {
	    case LOG_EMERG:
		msg_level = "emergency";
		break;
	    case LOG_ALERT:
		msg_level = "alert";
		break;
	    case LOG_CRIT:
		msg_level = "crit";
		break;
	    case LOG_ERR:
		msg_level = "error";
		break;
	    case LOG_WARNING:
		msg_level = "warning";
		break;
	    case LOG_NOTICE:
		msg_level = "notice";
		break;
	    case LOG_INFO:
		msg_level = "info";
		break;
	    case LOG_DEBUG:
		msg_level = "debug";
		break;
	}

	(void) fprintf(stderr, "log_%s: %s\n", msg_level, buf);
    }
}

/*****************************************
 * Various & sundry local worker functions
 *****************************************/

/* The bits of a config line */
#define CL_PARSE_CNT    4
#define CL_PRE_MACRO    1
#define CL_MACRO        2
#define CL_POST_MACRO   3

/* IP macro addr patterns */

#if defined(USE_PCRE) || defined(USE_NATIVE_PCRE)
#define IPV4_EXP "(?:\\d{1,3}\\.){3}\\d{1,3}"
#define IPV6_EXP "[\\dA-F:]{1,5}:[\\dA-F:]+(?!.*\\d{1,3}\\.)"
#define IPV4_MAP6_EXP "[0:]+:[Ff]{4}:(?:\\d{1,3}\\.){3}\\d{1,3}"
#define IPV_ANY_EXP "(?:[0:]+[Ff]{4}:)?(?:(?:\\d{1,3}\\.){3}\\d{1,3})|[\\dA-F:]{1,5}:[\\dA-F:]+"
#else
#define IPV4_EXP "[[:digit:]]{1,3}\\.[[:digit:]]{1,3}\\.[[:digit:]]{1,3}\\.[[:digit:]]{1,3}"
#define IPV6_EXP "[[:digit:]A-F:]{1,5}:[[:digit:]A-F:\\.]+"
#define IPV4_MAP6_EXP "[0:]+:[Ff]{4}:[[:digit:]]{1,3}\\.[[:digit:]]{1,3}\\.[[:digit:]]{1,3}\\.[[:digit:]]{1,3}"
#define IPV_ANY_EXP "[[:digit:]]{1,3}\\.[[:digit:]]{1,3}\\.[[:digit:]]{1,3}\\.[[:digit:]]{1,3}|[[:digit:]A-F:]{1,5}[[:digit:]A-F:\\.]+"
#endif

/*
 * macro -> pattern lookup table
 */
typedef struct ipaddr_macro_to_pat_t {
    char macro[BUFSIZ];
    char pat[BUFSIZ];
} ipaddr_macro_to_pat_t;

static ipaddr_macro_to_pat_t ipaddr_macro_to_pat[] = {
    {"IPV4_ADDR",      IPV4_EXP},
    {"IPV6_ADDR",      IPV6_EXP},
    {"IPV4_MAP6_ADDR", IPV4_MAP6_EXP},
    {"IPV_ANY_ADDR",   IPV_ANY_EXP},
};

/*
 * Expand an IP address macro into a regexp pattern and
 * return a pointer to the expanded config line
 *
 * Returns NULL if no valid IP addr macro expansion found
 *
 * Warning: Over-writes the same local expanded regex storage on
 *          each invcation.
 */
static char *
expand_ipaddr_macro(const char *cfig_line, int line_nbr)
{
    static char expanded_expr[BUFSIZ];
    static int preg_is_compiled;			/* flag saying preg has been compiled */
    static atre_regex_t ip_macro_preg;			/* compiled regex pointer - remember it */
    regmatch_t cfig_line_parse[CL_PARSE_CNT];
    char *ret_val = (char *) NULL;			/* It's a fail unless all succeeds */

    /*
     * compile our parsing regexp only once
     *
     * If this fails something is seriously broken, so it exits the program entirely
     */
    if(preg_is_compiled == 0) {
	if(regcomp(&ip_macro_preg, "^(/.+)<([^[:space:]]+)>(.*?/[ 	]+<[[:digit:]]+>)[         ]*$", REG_EXTENDED) != 0) {
	    fatal_n("Can't compile ip-macro-finding regex");
	}
	++preg_is_compiled;
    }

    /* if we found a (possible) ip address macro... */
    if(regexec(&ip_macro_preg, cfig_line, (size_t) CL_PARSE_CNT, cfig_line_parse, 0) == 0) {
	if(atre_debugging > 3)
	    fprintf(stderr, "expand_ipaddr_macro(): Got a macro!\n");

	/* check results for sanity */
	int x;

	for(x = CL_PRE_MACRO; x < CL_PARSE_CNT; ++x) {
	    if(cfig_line_parse[x].rm_so == -1) {
		atre_log(LOG_WARNING, "expand_ipaddr_macro(): Missing result: %d, cfig line nbr: %d!\n", x, line_nbr);
		break;
	    }
	}

	/* If we have all our bits... */
	if(cfig_line_parse[x].rm_so != -1) {
	    int indx, sc_rslt;
	    int eol;
	    char macro[BUFSIZ];
	    int max_indx = sizeof(ipaddr_macro_to_pat) / sizeof(ipaddr_macro_to_pat_t);

	    /* find the macro's replacement pattern */
	    strncpy(macro, &cfig_line[cfig_line_parse[CL_MACRO].rm_so], cfig_line_parse[CL_MACRO].rm_eo - cfig_line_parse[CL_MACRO].rm_so);
	    macro[cfig_line_parse[CL_MACRO].rm_eo - cfig_line_parse[CL_MACRO].rm_so] = '\0';

	    if(atre_debugging > 3)
		fprintf(stderr, "expand_ipaddr_macro(): And the macro is: \"%s\"\n", macro);

	    for(indx = 0; indx < max_indx; ++indx) {
		if((sc_rslt = strcmp(macro, ipaddr_macro_to_pat[indx].macro)) == 0) {
		    break;
		}
	    }

	    /* If we found a valid macro: Do the expansion */
	    if(indx < max_indx) {
		if(atre_debugging > 3)
		    fprintf(stderr, "expand_ipaddr_macro(): And the expansion is: \"%s\"\n", ipaddr_macro_to_pat[indx].pat);

		strncpy(expanded_expr,
		        &cfig_line[cfig_line_parse[CL_PRE_MACRO].rm_so],
			cfig_line_parse[CL_PRE_MACRO].rm_eo - cfig_line_parse[CL_PRE_MACRO].rm_so);
		eol = cfig_line_parse[CL_PRE_MACRO].rm_eo;
		strcpy(&expanded_expr[eol], ipaddr_macro_to_pat[indx].pat);
		eol += strlen(ipaddr_macro_to_pat[indx].pat);
		strncpy(&expanded_expr[eol],
		        &cfig_line[cfig_line_parse[CL_POST_MACRO].rm_so],
			cfig_line_parse[CL_POST_MACRO].rm_eo - cfig_line_parse[CL_POST_MACRO].rm_so);
		eol += cfig_line_parse[CL_POST_MACRO].rm_eo - cfig_line_parse[CL_POST_MACRO].rm_so;
		expanded_expr[eol] = '\0';

		ret_val = expanded_expr;
	    } else {
		atre_log(LOG_WARNING, "No matching IP address macro found for \"%s\", config line nbr: %d", macro, line_nbr);
	    }
	}
    }

    return(ret_val);
}

/*
 * Print fatal execution errors to the log file and stderr, then bail out
 */
static void
fatal_n(const char *fmt,...)
{
    va_list ap;
    char    buf[BUFSIZ];

    va_start(ap, fmt);
    (void) vsnprintf(buf, sizeof(buf), fmt, ap);
    va_end(ap);

    strcat(buf, ", terminating");
    atre_log(LOG_CRIT, "%s", buf);

    (void) fprintf(stderr, "%s\n", buf);
    exit(1);
}

/*
 * Local PCRE <-> POSIX abstraction layer
 *
 * Functions to simulate POSIX regcomp(), regexec(), and regfree()
 * functions using PCRE.
 *
 * The advantages of these functions, over those in the pcreposix
 * library, are:
 * 
 *     . They employ pcre_study(), which can result in a
 *       ±9% performance gain over using the pcreposix library.
 *
 *     . If the regexp starts with a specified number of any N
 *       chars to ignore: We set a start offset for future searches
 *       and truncate the regex.  That produces another ±9%
 *       performance gain for some beginning-of-line-anchored
 *       regexps.
 *
 *     . Provides much better dignostics of manifestly broken regexps.
 *
 * The disadvantage is they're not as complete as the functions
 * in the pcreposix library.  But that's not an issue, as they're
 * complete enough for our purposes.
 *
 * N.B.: These don't take a POSIX regex_t argument, but our
 *       own struct type that has members for both a regex and a
 *       study (pcre_extra) pointer.
 */

#ifdef USE_NATIVE_PCRE

#define REGEX_SUCCESS 0
#define REGEX_FAIL 1


/*
 * regcomp()
 *
 * In the current implementation, a non-zero return (failure)
 * is non-specific.  It simply means failure to compile or
 * a pcre_study() error.
 *
 * Currently supports only REG_ICASE in cflags
 */
static int
regcomp(atre_regex_t *preg, const char *regex, int cflags)
{
    int erroroffset;
    const char *errstr;
    int options = 0;
    char *regex_p = (char *) regex;
    static pcre *soffset_re = (pcre *) NULL;
    int ovector[9];
    char startoffset_str[BUFSIZ];
    int ret_stat = REGEX_SUCCESS;	/* assume success */

    /* compile once only */
    if(soffset_re == (pcre *) NULL) {
	if((soffset_re = pcre_compile("^\\^\\.\\{(\\d+)\\}(.+)$", 0, &errstr, &erroroffset, NULL)) == (pcre *) NULL) {
	    fatal_n("Can't compile start-offset-finding regex (%s)", errstr == (char *) NULL? "" : errstr);
	}
    }

    preg->startoffset = 0;	/* default */

    /*
     * If the regexp starts with a specified number of any N chars to ignore: Set a start offset
     * for future searches and truncate the regex.
     */
    if(pcre_exec(soffset_re, (pcre_extra *) NULL, regex, strlen(regex), 0, 0, ovector, 9) == 3) {
	strncpy(startoffset_str, &regex[ovector[2]], ovector[3] - ovector[2]);	/* copy the offset string */
	startoffset_str[ovector[3] - ovector[2]] = (char) '\0';
	preg->startoffset = atoi(startoffset_str);				/* set the start offset */
	regex_p = (char *) &regex[ovector[4]];					/* move the regex pointer */
    }

    /*
     * Translate relevant POSIX regex cflags to PCRE options
     */
    if(cflags & REG_ICASE)
	options = PCRE_CASELESS;

    if((preg->re = pcre_compile(regex_p, options, &errstr, &erroroffset, NULL)) != (pcre *) NULL) {
	if((preg->sd = pcre_study(preg->re, 0, &errstr)) == (pcre_extra *) NULL) {
	    if(errstr) {
		atre_log(LOG_WARNING, "regcomp(): pcre_study() error: %s", errstr);
		ret_stat = REGEX_FAIL;
	    }
	}

	switch(pcre_fullinfo(preg->re, preg->sd, PCRE_INFO_CAPTURECOUNT, &preg->cap_cnt)) {
	    case 0:
		if(atre_debugging > 4)
		    fprintf(stderr, "regcomp(): pcre_fullinfo() returned: %d\n", preg->cap_cnt);
		break;
	    default:
		atre_log(LOG_WARNING, "regcomp(): pcre_fullinfo() error");
		ret_stat = REGEX_FAIL;
	}

    } else {
	atre_log(LOG_WARNING, "regcomp(): pcre_compile(): Couldn't compile string: %s", errstr);
	ret_stat = REGEX_FAIL;
    }

    return(ret_stat);
}

/*
 * regexec()
 *
 * In the current implementation, a non-zero return (failure)
 * is non-specific.  It simply means failure match.
 */
static int
regexec(const atre_regex_t *preg, const char *string, size_t nmatch, regmatch_t pmatch[], int eflags)
{
    int ret_stat, cap_cnt;
    int *ovector = (int *) NULL;
    int ovecsize = nmatch * 3;
    int p_indx, o_indx;

    /* Allocate space for pcre_exec() ovector ints? */
    if(nmatch) {
	if((ovector = calloc(ovecsize, sizeof(int))) == NULL)
	    fatal_n("Unable to allocate memory for pcre_exec() \"ovector\" elements");
    }

    if((cap_cnt = pcre_exec(preg->re, preg->sd, string, strlen(string), preg->startoffset, 0, ovector, ovecsize)) >= 0)
    {
	/* Move ovector data into pmatch[] */
	for(p_indx = 0, o_indx = 0; p_indx < cap_cnt; ++p_indx, ++o_indx) {
	    pmatch[p_indx].rm_so = ovector[o_indx++];
	    pmatch[p_indx].rm_eo = ovector[o_indx];
	}

	/* Set stat for any unfilled captures */
	for( ; p_indx < nmatch; ++p_indx)
	    pmatch[p_indx].rm_so = -1;

	ret_stat = REGEX_SUCCESS;	/* Success */

    } else {
        /*
	 * Unless there's unexpected cruft in the logfile, PCRE_ERROR_BADOFFSET will (should?)
	 * never occur, but you never know.
	 */
	if(cap_cnt != PCRE_ERROR_NOMATCH && cap_cnt != PCRE_ERROR_BADOFFSET)
	    atre_log(LOG_ERR, "regexec(): pcre_exec(): Couldn't execute pcre pattern - error: %d", cap_cnt);

	ret_stat = REGEX_FAIL;
    }

    if(ovector)
	free(ovector);	/* We're done w/this */

    return(ret_stat);
}

/*
 * regfree()
 */
static void
regfree(atre_regex_t *preg)
{
    if(preg) {
	if(preg->re) {
	    pcre_free(preg->re);

	    if(preg->sd)
		pcre_free_study(preg->sd);
	}
    }

    return;
}
#endif
