DZone Snippets is a public source code repository. Easily build up your personal collection of code snippets, categorize them with tags / keywords, and share them with the world

Snippets has posted 5883 posts at DZone. View Full User Profile

Convert Apache Http Combined Logs Into Sql (and Import It Into A Mysql Database Eventually)

03.24.2007
| 14628 views |
  • submit to reddit
        you need to extract the data in your http server log files and put it in a database to query it with your usual tools using SQL. this perl script does just this.

it was hard to find it, that's why i put it here.

#!/usr/bin/perl -w
# Written by Aaron Jenson.
# Original source: http://www.visualprose.com/software.php
# Updated to work under Perl 5.6.1 by Edward Rudd
# Updated 24 march 2007 by Slim Amamou <slim.amamou@alpha-studios.com>
#  - output SQL with the option '--sql'
#  - added SQL create table script to the HELP
#
#  NOTE : you need the TimeDate library (http://search.cpan.org/dist/TimeDate/)
#
use strict;
use Getopt::Long qw(:config bundling);
use DBI;
use Date::Parse;

my %options = ();
my $i = 0;
my $sql = '';
my $valuesSql = '';
my $line = '';
my $dbh = 0;
my $sth = 0;
my @parts = ();
my $part;
my $TIMESTAMP = 3;
my $REQUEST_LINE = 4;
my @cols = (
	'remote_host',			## 0
	'remote_logname',		## 1
	'remote_user',			## 2
	'request_time',			## 3.string
	'time_stamp',			## 3.posix
	'request_line',			## 5
	'request_method',		## 6
	'request_uri',			## 7
	'request_args',			## 8
	'request_protocol',		## 9
	'status',				## 10
	'bytes_sent',			## 11
	'referer',				## 12
	'agent'					## 13
);
my $col = '';

GetOptions (\%options,
		"version" => sub { VERSION_MESSAGE(); exit 0; },
		"help|?" => sub { HELP_MESSAGE(); exit 0; },
		"host|h=s",
		"database|d=s",
		"table|t=s",
		"username|u=s",
		"password|p=s",
		"logfile|f=s",
		"sql");

$options{host} ||= 'localhost';
$options{database} ||= '';
$options{username} ||= '';
$options{password} ||= '';
$options{logfile} ||= '';
$options{sql} ||= '';

if( ! ($options{database} || $options{sql}))
{
	HELP_MESSAGE();
	print "Must supply a database to connect to.\n";
	exit 1;
}

if( ! $options{table} )
{
	HELP_MESSAGE();
	print "Must supply table name.\n";
	exit 1;
}

if( $options{logfile} )
{
	if( ! -e $options{logfile} )
	{
		print  "File '$options{logfile}' doesn't exist.\n";
		exit 1;
	}
	open(STDIN, "<$options{logfile}") || die "Can't open $options{logfile} for reading.";
}

if( $options{database} )
{
	$dbh = Connect();
	if (! $dbh) {
		exit 1;
	}
}

$sql = "INSERT INTO $options{table} (";
foreach $col (@cols)
{
	$sql .= "$col," if( $col );
}
chop($sql);
$sql .= ') VALUES (';
my ($linecount,$insertcount) = (0,0);
while($line = <STDIN>)
{
	$linecount++;
	@parts = SplitLogLine( $line );
	next if( $parts[$TIMESTAMP+1] == 0 );
	$valuesSql = '';
	for( $i = 0; $i < @cols; ++$i )
	{
		$parts[$i] =~ s/\\/\\\\/g;
		$parts[$i] =~ s/'/\\'/g;
		$valuesSql .= "'$parts[$i]'," if( $cols[$i] );
	}
	chop($valuesSql);

	if( $options{database} )
	{
		$sth  = $dbh->prepare("$sql$valuesSql)");
		if( ! $sth->execute() )
		{
			print "Unable to perform specified query.\n$sql$valuesSql\n" . $sth->errstr() . "\n";
		} else {
			$insertcount++;
		}
		$sth->finish();
	}
	if( $options{sql} )
	{
		print "$sql$valuesSql);\n";
	}
}
if( ! $options{sql} )
{
	print "Parsed $linecount Log lines\n";
	print "Inserted $insertcount records\n";
	print "to table '$options{table}' in database '$options{database}' on '$options{host}'\n";
}

# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # 
# Connects to a MySQL database and returns the connection.
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # 
sub Connect
{
	my $dsn = "DBI:mysql:$options{database};hostname=$options{host}";
	return DBI->connect( $dsn, $options{username}, $options{password} );
}


# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # 
# Splits up a log line into its parts.
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # 
sub SplitLogLine
{
	my $line = shift;
	my $i = 0;
	my $inQuote = 0;
	my $char = '';
	my $part = '';
	my @parts = ();
	my $count = 0;
	chomp($line);
	for( $i = 0; $i < length($line); ++$i )
	{
		$char = substr($line, $i, 1);
		if( $char eq ' ' && ! $inQuote )
		{
			## print "Found part $part.\n";
			if( $count == $TIMESTAMP )
			{
				push(@parts, "[".$part."]");
				$part = str2time($part);
			}
			push(@parts, $part);
			if( $count == $REQUEST_LINE )
			{
				my @request = split(/[ ?]/, $part);
				push(@parts, $request[0]);
				push(@parts, $request[1]);
				if( $request[3] )
				{
					push(@parts, $request[2]);
					push(@parts, $request[3]);
				}
				else
				{
					push(@parts, '');
					push(@parts, $request[2]);
				}
				$count += 5;
			}
			else
			{
				++$count;
			}
			$part = '';
		}
		elsif( $char eq '"' || $char eq '[' || $char eq ']' )
		{
			$inQuote = !$inQuote;
		}
		else
		{
			$part .= $char;
		}
	}
	push(@parts,$part) if $part;

	return @parts;
}


# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # 
# Prints the usage/help message for this program.
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # 
sub HELP_MESSAGE
{
	print<<EOF;
Imports an Apache combined log into a MySQL database.
Usage: mysql_import_combined_log.pl -d <database name> -t <table name> [-h <hostname>] [-u <username>] [-p <password>] [-f <filename]
 --host|-h <host name>         The host to connect to.  Default is localhost.
 --database|-d <database name> The database to use.  Required.
 --username|-u <username>      The user to connect as.
 --password|-p <password>      The user's password.
 --table|-t <table name>       The name of the table in which to insert data.
 --logfile|-f <file name>      The file to read from.  If not given, data is read from stdin.
 --sql                         Output SQL
 --help|-?                     Print out this help message.
 --version                     Print out the version of this software.

----------------------------------
-- SQL create statements for the table
--

create table <TABLE_NAME> (
    remote_host varchar(50) ,
    remote_logname varchar(50) ,
    remote_user varchar(50) ,
    request_time char(28),
    time_stamp varchar(10) ,
    request_line varchar(255),
    request_method varchar(10) ,
    request_uri varchar(255),
    request_args varchar(255),
    request_protocol varchar(10) ,
    status varchar(10) ,
    bytes_sent varchar(10) ,
    referer varchar(255) ,
    agent varchar(255)
);

EOF
}



# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # 
# Prints the version information for this program
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # 
sub VERSION_MESSAGE
{
	print "mysql_import_combined_log.pl version 1.2\n";
	print "Version 1.0 Written by Aaron Jenson.\n";
	print "Update to work with perl 5.6.1 by Edward Rudd\n";
}

1;