#!/usr/bin/perl -w
eval 'exec perl -S $0 "$@"'
    if 0;
#
# xfersumm.pl - Produce summaries of FTP xfer entries in xferlog
#	Copyright (C) 1998-2004 by James S. Seymour (jseymour@LinxNet.com)
#	(See "License", below.)  Release 0.0.2
#
# Usage:
#    xfersumm [-q] [-d <today|yesterday>] [file1 [file2 [filen [...]]]]
#
# Options:
#     -d today     - means just today
#     -d yesterday - means just "yesterday"
#
#     -q           - quiet: don't print headings for empty reports
#
#    If no file(s) specified, reads from stdin.  Output is to stdout.
#
# Typical usage:
#    Produce a report of previous day's activities:
#        xfersumm.pl -d yesterday /var/log/xferlog
#    A report of prior week's activities (after weekly log rotate):
#        xfersumm.pl /var/log/xferlog.1
#    What's happened so far today:
#        xfersumm.pl -d today /var/log/xferlog
#
# TBD:
#    date ranges, "lastweek", etc.?
#
# License:
#    This program is free software; you can redistribute it and/or
#    modify it under the terms of the GNU General Public License
#    as published by the Free Software Foundation; either version 2
#    of the License, or (at your option) any later version.
#    
#    This program is distributed in the hope that it will be useful,
#    but WITHOUT ANY WARRANTY; without even the implied warranty of
#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#    GNU General Public License for more details.
#    
#    You may have received a copy of the GNU General Public License
#    along with this program; if not, write to the Free Software
#    Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
#    
#    An on-line copy of the GNU General Public License can be found
#    http://www.fsf.org/copyleft/gpl.html.

use strict;
use Getopt::Std;

use vars qw(
    $usageMsg
    $opt_q $opt_d
);

my (
    $size, $fname, $direction, $userType, $userID, $agent,
    $dateStr,
    @flds,
    %userInCnt, %userInSize, %fileInCnt, %anonInCnt, %anonInSize,
    %anonFileInCnt, %userOutCnt, %userOutSize, %fileOutCnt, %anonOutCnt,
    %anonOutSize, %anonFileOutCnt
);


$usageMsg = "usage: xfersumm [-q] [-d <today|yesterday>]";

# Fields in the logfile entries
#$rmtHost   = 6;	# for future use
$size      = 7;		# file size
$fname     = 8;		# file name
$direction = 11;	# "i" is incoming, "o" is outgoing
$userType  = 12;	# "r" is real, "a" is anon
$userID    = 13;	# will be login name or email address
$agent     = 14;	# i.e.: "ftp"

$opt_q = 0;	# for "simpler" tests later in case not set
getopts('qd:') ||
    die "$usageMsg\n";

$dateStr = get_datestr($opt_d) if(defined($opt_d));

while(<>) {
    chomp;
    next if(defined($dateStr) && ! /^$dateStr/);
    @flds = split;
    next unless($flds[$agent] eq "ftp");
    if($flds[$direction] eq "i") {
	# puts
	if($flds[$userType] eq "r") {
	    # real user
	    ++$userInCnt{$flds[$userID]};
	    $userInSize{$flds[$userID]} += $flds[$size];
	    ++$fileInCnt{$flds[$fname]};
	} else {
	    # anon user
	    ++$anonInCnt{$flds[$userID]};
	    $anonInSize{$flds[$userID]} += $flds[$size];
	    ++$anonFileInCnt{$flds[$fname]};
	}
    } else {
	# gets
	if($flds[$userType] eq "r") {
	    # real user
	    ++$userOutCnt{$flds[$userID]};
	    $userOutSize{$flds[$userID]} += $flds[$size];
	    ++$fileOutCnt{$flds[$fname]};
	} else {
	    # anon user
	    ++$anonOutCnt{$flds[$userID]};
	    $anonOutSize{$flds[$userID]} += $flds[$size];
	    ++$anonFileOutCnt{$flds[$fname]};
	}
    }
}

if(defined($dateStr)) {
    print "File xfer summaries for $dateStr\n";
}

# emit real user summaries
print_user_by_cnt_vals(\%userOutCnt, \%userOutSize, "real user FTP \"gets\" by username");
print_user_by_cnt_vals(\%userInCnt,  \%userInSize,  "real user FTP \"puts\" by username");
print_file_by_cnt_vals(\%fileOutCnt, "real user FTP \"gets\" by file");
print_file_by_cnt_vals(\%fileInCnt,  "real user FTP \"puts\" by file");
# emit anonymous user summaries
print_user_by_cnt_vals(\%anonOutCnt, \%anonOutSize, "anonymous FTP \"gets\" by user");
print_user_by_cnt_vals(\%anonInCnt,  \%anonInSize,  "anonymous FTP \"puts\" by user");
# print file summaries
print_file_by_cnt_vals(\%anonFileOutCnt, "anonymous FTP \"gets\" by file");
print_file_by_cnt_vals(\%anonFileInCnt,  "anonymous FTP \"puts\" by file");


# print user info hash contents sorted by numeric values in descending
# order (i.e.: highest first)
sub print_user_by_cnt_vals {
    my($hashName, $sizeHashName, $title) = @_;
    my $dottedLine;
    unless(%$hashName) {
	return if($opt_q);
	$dottedLine = ": none";
    } else {
	$dottedLine = "\n" . "-" x length($title);
    }
    print "\n$title$dottedLine\n";
    foreach (reverse sort { $hashName->{$a} <=> $hashName->{$b} } keys(%$hashName)) {
	if($sizeHashName->{$_} > 1024) {
	    printf "%3d  %6dk  %s\n", $hashName->{$_}, int($sizeHashName->{$_} / 1024), $_;
	} else {
	    printf "%3d   %6d  %s\n", $hashName->{$_}, $sizeHashName->{$_}, $_;
	}
    }
}

# print file info hash contents sorted by numeric values in descending
# order (i.e.: highest first)
sub print_file_by_cnt_vals {
    my($hashName, $title) = @_;
    my $dottedLine;
    unless(%$hashName) {
	return if($opt_q);
	$dottedLine = ": none";
    } else {
	$dottedLine = "\n" . "-" x length($title);
    }
    print "\n$title$dottedLine\n";
    foreach (reverse sort { $hashName->{$a} <=> $hashName->{$b} } keys(%$hashName)) {
	printf "%3d  %s\n", $hashName->{$_}, $_;
    }
}

# return a date string to match in log
sub get_datestr {
    my @monthNames = qw(Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec);
    my @weekDays = qw(Sun Mon Tue Wed Thu Fri Sat);
    my $aDay = 60 * 60 * 24;

    my $dateOpt = $_[0];

    my $time = time();
    if($dateOpt eq "yesterday") {
	$time -= $aDay;
    } elsif($dateOpt ne "today") {
	die "$usageMsg\n";
    }
    my ($t_mday, $t_mon, $t_wday) = (localtime($time))[3,4,6];

    return sprintf("%s %s %2d", $weekDays[$t_wday], $monthNames[$t_mon],
                   $t_mday);
}
