#!/usr/bin/perl ########################################################################### # # Program : Log Analyzer for e2Guardian/DansGuardian # Author : Jimmy Myrick (jmyrick@cherokeek12.org) # Version : 2.0 # Released : January 8, 2009 # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # # If you like it and want to send me something, that's ok too. # How about a gift certificate to amazon.com or a donation to # e2Guardian/DansGuardian on my behalf? # ########################################################################### ########################################################################### # # Change to point to your e2Guardian/DansGuardian log directory # NOTE: The trailing / IS REQUIRED!! # ########################################################################### $logdir = '/var/log/e2guardian/'; ########################################################################### # # Log filename. Change this to match the prefix of your log files # This defaults to access.log and should not have to be modified. # # Any logfiles in $logdir that match the prefix $logfile and are gzip'ed # with a .gz extension will also be read. The results will be printed in # reverse chronological filename order. # # Example: # If you have the files: access.log access.log.0.gz access.log.1.gz # where they are newest to oldest, then any matches in # access.log.1.gz will be printed first, followed by access.log.0.gz # and then access.log # # No sorting is done by the program and the results are displayed in logfile # order. If your results are out of sequence, check the filename/dates # to be sure they are compressed and rotated properly. If you use # the FreeBSD newsyslog.conf to rotate your logs, this will not be a # problem. # ########################################################################### $logfile = 'access.log'; ########################################################################### # # Log Format. Change to indicate what format the log files are. This # should match what is in dansguardian.conf. Setting it to the wrong type # will cause strange results. # # 1 = DansGuardian format 2 = CSV-style format # ########################################################################### $logformat = 1; ########################################################################### # # If you need the perl modules below, download and untar them to a directory. # Then cd to the directory and enter the commands: # perl Makefile.PL; make; make test; make install # # If you need more instructions, # go here: http://www.cpan.org/modules/INSTALL.html # # Get it here: http://www.cpan.org/authors/id/LDS/CGI.pm-2.81.tar.gz # ########################################################################### use CGI; ########################################################################### # # This is needed to do gzip'ed log files on the fly # Get it here: http://www.cpan.org/authors/id/PMQS/Compress-Zlib-1.16.tar.gz # ########################################################################### use Compress::Zlib; ########################################################################### # # This should determine where the program is called from automagically. # If not, uncomment the first line, change to your server name/path and # comment the second line. You can use Apache restrictions to block # access to this file if desired. # ########################################################################### #$cgipath = 'http://your.server.com/cgi-bin/dglog/dglog2.pl'; $cgipath = $ENV{SCRIPT_NAME}; ########################################################################### # # SHOULDN'T HAVE TO MODIFY ANYTHING BELOW THIS LINE # ########################################################################### $q = new CGI; ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = localtime(time); $mon = $mon + 1; # mon starts at 0 $year = $year + 1900; # year needs 1900 added $pagename = 'Log Analyzer for e2Guardian/DansGuardian'; $a = $q->param('a'); if ($a eq 'i') { # Inquiry into logs # These are the values that can be sent by the user through the browser $sIP = "ALL"; # IP address $sUN = "ALL"; # Username $sURL = "ALL"; # URL to show or trace a denied site - this is the URL to trace $sSD = "ALL"; # Complete start date $sSDY = "ALL"; # Start date year $sSDM = "ALL"; # Start date month $sSDD = "ALL"; # Start date day $sED = "ALL"; # Complete end date $sEDY = "ALL"; # End date year $sEDM = "ALL"; # End date month $sEDD = "ALL"; # End date day $sA = "ALL"; # Action $sSumCnt = "20"; # Number of summary sites to show $sSumDen = "off"; # Show denied summary? on/off $sSumAlw = "off"; # Show allowed summary? on/off $sSumOrd = "URL"; # Default to showing url for summary denied/allowed $sL = "off"; # Turn URL's into links? on/off $sZ = "off"; # Examine gziped files? on/off $sIP = &validateIP($q->param('sIP')) if $q->param('sIP') ne ""; $sUN = $q->param('sUN') if $q->param('sUN') ne ""; $sURL = $q->param('sURL') if $q->param('sURL') ne ""; if ($q->param('sSDY') ne "" && $q->param('sSDY') ne 'ALL' && $q->param('sSDM') ne "" && $q->param('sSDM') ne 'ALL' && $q->param('sSDD') ne "" && $q->param('sSDD') ne 'ALL' && $q->param('sEDY') ne "" && $q->param('sEDY') ne 'ALL' && $q->param('sEDM') ne "" && $q->param('sEDM') ne 'ALL' && $q->param('sEDD') ne "" && $q->param('sEDD') ne 'ALL') { $sSDY = $q->param('sSDY'); $sSDM = $q->param('sSDM'); $sSDD = $q->param('sSDD'); $sEDY = $q->param('sEDY'); $sEDM = $q->param('sEDM'); $sEDD = $q->param('sEDD'); $sSD = $sSDY.'.'.$sSDM.'.'.$sSDD; $sSD = convertDate($sSD); $sED = $sEDY.'.'.$sEDM.'.'.$sEDD; $sED = convertDate($sED); if ($sSD > $sED) { $msg = "End Date is greater than Start Date"; &printMenu; } } $sA = &validateAction($q->param('sA')) if $q->param('sA') ne ""; # Action $sSumCnt = &validateSummary($q->param('sSumCnt')) if $q->param('sSumCnt') ne ""; $sSumDen = $q->param('sSumDen') if $q->param('sSumDen') eq 'on'; $sSumAlw = $q->param('sSumAlw') if $q->param('sSumAlw') eq 'on'; $sSumOrd = $q->param('sSumOrd') if $q->param('sSumOrd') ne ''; $sL = $q->param('sL') if $q->param('sL') eq 'on'; $sZ = $q->param('sZ') if $q->param('sZ') eq 'on'; # Need a few global variables to keep from passing back and forth a bunch $linesRead, $allowTotal, $blockTotal, $grandTotal = 0; &searchLog; } elsif ($a eq 'h') { &displayHelp; } else { &printMenu; } ############# sub searchLog ############# { my $first = 0; &printHeader; print ""; print "Report information for:
Start Date: $sSD | End Date: $sED | Username : $sUN | IP: $sIP | Action: $sA | URL: $sURL
\n"; print ""; opendir(D, $logdir); @files = grep {/^$logfile/} readdir(D); @files = sort {$b cmp $a} @files; closedir(D); foreach $file (@files) { if ($file =~ /\.gz/) { if ($sZ eq 'off') { if ($first == 0) { print "Ignoring gzip logfile(s) in $logdir: "; $first = 1; } print "$file | "; next; } $gz = gzopen($logdir.$file,r); if (!$gz) { $msg = "Cannot open $logdir$file. Check Permissions.

Try setting directories chmod 755 and logfiles chmod 644."; &printMenu; } while ($gz->gzreadline($line)) { &checkLine($line); } $gz->gzclose; } else { print "

"; unless (open(F,$logdir.$file)) { $msg = "Cannot open $logdir$file. Check Permissions.

Try setting directories chmod 755 and logfiles chmod 644."; &printMenu; } while ($line = ) { &checkLine($line); } close(F); } } if ($sSumAlw eq "on" && $allowTotal != 0) { &showSummarySites($allowTotal,'ALLOWED',$sSumCnt,$sSumOrd,%topSites); } if ($sSumDen eq "on" && $blockTotal != 0) { &showSummarySites($blockTotal,'DENIED',$sSumCnt,$sSumOrd,%blockSites); } print "


Total matches: $grandTotal | Total ALL Requests: $linesRead
"; print "
Return to Menu
"; } ############# sub checkLine ############# { my ($line) = @_; # If a line doesn't start with a digit, throw it out. return if (!($line =~ /^\d/)); $linesRead++; # Print out a '.' every 1000 log file lines read. Keep browser connect alive if (($linesRead % 1000) == 0) { print " "; } if ($logformat == 2) { # If CSV format, then convert to dg format. # $c1=date+time,$c5=action, $c6=method, $c7=size ($c1,$user,$ip,$url,$c5,$c6,$c7) = split(/","/,$line,7); ($date,$time) = split(/ /,$c1); # Clean up the extra quotes - this is dirty but does the trick. Also, by # doing the split above it would be possible for a line to be misread if # a strange URL contained such a sequence...but it gets the job done in # most cases. $date =~ s/\"//; $c7 =~ s/\"//; $toeol = $c5 . ' ' . $c6. ' ' . $c7; } else { ($date,$time,$user,$ip,$url,$toeol) = split(/ /,$line,6); } # Rule out the easy matches first return if ($sIP ne "ALL" && $sIP ne $ip); return if ($sUN ne "ALL" && $sUN ne $user); # Don't do a date comparison unless we are told to if ($sSD ne "ALL" || $sED ne "ALL") { $dgDate = &convertDate($date); return if (!($dgDate ge $sSD && $dgDate le $sED)); } $url =~ /(\w+):\/\/([\w\.-]+)\/?(\S*)/; $protocol = $1; # HTTP, FTP $baseurl = $2; # domain part without http:// or ftp:// return if ($sURL ne "ALL" && $sURL ne $baseurl); $toeol =~ /(\*.+\*)? ?(.*)? ([A-Z]+) ([0-9]+) (.*)?/; # 5 = unknown # 6 = unknown # 7 = status # 8 = type $action = $1; # *DENIED# or *EXCEPTION* etc., if exists $reason = $2; # Reason for #1 if exists $method = $3; # method (GET POST) $size = $4; # size # 5 = unknown, 6 = unknown, 7 = status, 8 = type if ($sA ne "ALL") { return if ($sA eq "denAll" && $action ne "*DENIED*"); return if ($sA eq "excAll" && $action ne "*EXCEPTION*"); return if ($sA eq "denSite" && !($reason =~ /^Banned site/)); return if ($sA eq "denRegURL" && !($reason =~ /^Banned Regular Expression URL/)); return if ($sA eq "denPhrase" && !($reason =~ /^Banned Phrase/)); return if ($sA eq "denCombPhrase" && !($reason =~ /^Banned combination phrase/)); return if ($sA eq "denWeightPhrase" && !($reason =~ /^Weighted phrase limit/)); return if ($sA eq "denExt" && !($reason =~ /^Banned extension/)); return if ($sA eq "denMIME" && !($reason =~ /^Banned MIME Type/)); return if ($sA eq "denICRA" && !($reason =~ /^ICRA/)); return if ($sA eq "denBlanketIP" && !($reason =~ /^Blanket IP Block/)); return if ($sA eq "excSite" && !($reason =~ /^Exception site/)); return if ($sA eq "excPhrase" && !($reason =~ /^Exception phrase/)); return if ($sA eq "excCombPhrase" && !($reason =~ /^Combination exception phrase/)); } # Need to do a count for grandTotal if allowed OR denied summary selected if ($sSumAlw eq "on" || $sSumDen eq "on") { if ($action ne '*DENIED*') { $allowTotal++; $grandTotal++; # Don't waste memory if didn't want this, but need to count for grandTotal $topSites{$baseurl}++ if ($sSumAlw eq "on" && $sSumOrd eq "URL"); $topSites{$ip}++ if ($sSumAlw eq "on" && $sSumOrd eq "IP"); $topSites{$user}++ if ($sSumAlw eq "on" && $sSumOrd eq "User"); } else { $blockTotal++; $grandTotal++; # Don't waste memory if didn't want this, but need to count for grandTotal $blockSites{$baseurl}++ if ($sSumDen eq "on" && $sSumOrd eq "URL"); $blockSites{$ip}++ if ($sSumDen eq "on" && $sSumOrd eq "IP"); $blockSites{$user}++ if ($sSumDen eq "on" && $sSumOrd eq "User"); } } else { print "$date   $time   "; print "$ip   $user
"; if ($sL eq 'on') { print "$url $method $size
"; } else { print "$url $method $size
"; } if ($action ne "" && $reason ne "") { print "$action : $reason

"; } else { print "

"; } $grandTotal++; } } #################### sub showSummarySites { #################### my ($subTotal, $whatToShow, $topNum, $sumOrder, %sites) = @_; my $count = 1; print "
"; foreach $key (sort {$sites{$b} <=> $sites{$a}} keys %sites) { if ($count <= $topNum) { print ""; print ""; print ""; $count++; } break; } print ""; print "

Top $topNum $whatToShow Sites by $sumOrder
Rank URL Count \% of
$whatToShow
\% of
Total
Investigate
$count.  "; if ($sL eq 'on' && $sumOrder eq 'URL') { print "$key"; } else { print "$key"; } print "$sites{$key}"; printf("   %2.2f  ",($sites{$key}/$subTotal)*100); print ""; printf("   %2.2f",($sites{$key}/$grandTotal)*100); print " Trace

Total $whatToShow Requests (only top $topNum sites shown) : $subTotal


"; } ################### sub validateSummary ################### { my ($count) = @_; if ($count < 0 || $count > 100) { $count = 20; } return($count); } ############## sub validateIP ############## { my ($checkIP) = @_; if ($checkIP eq 'ALL') { return('ALL'); } elsif ($checkIP =~ /^((2([0-4]\d|5[0-5])|1?\d{1,2})(\.|$)){4}/) { return ($checkIP); } else { $msg = "Invalid IP address entered."; &printMenu; } } ################## sub validateAction { ################## my ($action) = @_; # Need to make the actions a hash and reference them that way # Make it easier to add/modify and can validate that way too # Maybe later. if ($action eq "none") { return ("ALL"); } return ($action); } ############### sub convertDate { ############### my ($workDate) = @_; ($year, $mon, $day) = split(/\./,$workDate); if (length($mon) == 1) { $mon = '0'.$mon; } if (length($day) == 1) { $day = '0'.$day; } if (($mon ge "01" && $mon le "12") && ($day ge "01" && $day le "31") && ($year ge "2000" && $year le "2035")) { $goodDate = $year.$mon.$day; return ($goodDate); } else { $msg = "Invalid Date Detected - $workDate - Be sure logformat is set to the correct format."; &printMenu; } } ############### sub buildSelect ############### { my ($start, $end, $type) = @_; my $x = 0; ## print "