#!/usr/bin/perl

# domains.pl - Generate domain statistics from server log
# Created 8/10/94 by Andrew Pam <avatar@aus.xanadu.com>
# Sort bug fixed 28/10/94 by Andrew Pam
# Hosts count and totals added 01/11/94 by Andrew Pam
# Modified for common log format 09/08/95 by Andrew Pam
# Byte count added 26/11/96 by Andrew Pam
# Copyright (c) 1994-96 Serious Cybernetics

# filename containing the table of domain codes
$domcodes = "domain.codes.txt";

# short name of local host (not FQDN!)
$localhost = "localhost";

# description of local users
$local = "Local Melbourne users";

##### END OF CONFIGURATION SECTION #####

# Read the domain codes into an associative array
open(DOMCODES, $domcodes) || die "Can't open $domcodes: $!";
while (<DOMCODES>)
{
	($domain, $name) = /^(\S+)\s+(.+)\n$/;
	$domain =~ tr/A-Z/a-z/;
	$domcodes{$domain} = $name;
};
close DOMCODES;

# Add special cases
$domcodes{'localhost'} = $local;
$domcodes{'numeric'} = "Location unknown";
$domcodes{'other'} = "Probably errors";

# Gather data
while (<>)
{
	# Decode log entry
#	($host, $date, $URL, $code, $size) =
#	/^(\S+)\s\S+\s\S+\s\[(.+)\]\s"(.+)"\s(\S+)\s(\S+)/;
	($host, $size) = /^(\S+)\s.+\s(\S+)$/;

	# Keep count
	$host{$host}++;
	$size{$host} += $size;
}

# Analyse data
foreach $host (keys %host)
{
	# Set $domain to the top level domain (last part after a period)
	split(/\./, $host);
	$domain = $_[$#_];

	# Check for special cases
	if ($domain eq $localhost)
	{	$domain = 'localhost';	}
	elsif ($domain =~ /^[0-9]+$/)
	{	$domain = 'numeric';	}
	elsif (!defined($domcodes{$domain}))
	{	$domain = 'other';	}

	# Keep count
	$hosts{$domain}++;
	$count{$domain} += $host{$host};
	$bytes{$domain} += $size{$host};
}

if ($hosts{'localhost'} > 1)
{	$hosts{'localhost'} = 1;	}
undef %host, %size;

# Define output format
format STDOUT_TOP =
Accesses Hosts MBytes   Domain   Description
-------- ----- ------ ---------- ----------------------------------------
.
format STDOUT =
@####### @#### @##### @||||||||| @<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
$count{$domain}, $hosts{$domain}, $bytes{$domain}/1048576, $domain, $domcodes{$domain}
.

# Print results
foreach $domain (sort {$count{$b} <=> $count{$a}} keys %count)
{
	write;
	$bytes += $bytes{$domain};
	$total += $count{$domain};
	$hosts += $hosts{$domain};
}
print "======== ===== ======\n";
printf "%8d %5d %6d\n", $total, $hosts, $bytes/1048576;
