mirror of
https://github.com/MariaDB/server.git
synced 2025-01-18 04:53:01 +01:00
310 lines
7.9 KiB
Perl
Executable file
310 lines
7.9 KiB
Perl
Executable file
#!/usr/bin/perl
|
|
# Copyright Abandoned 1998 TCX DataKonsult AB & Monty Program KB & Detron HB
|
|
# This file is public domain and comes with NO WARRANTY of any kind
|
|
#
|
|
# This program is brought to you by Janne-Petteri Koilo with the
|
|
# administration of Michael Widenius.
|
|
|
|
# This program takes your mails and puts them into your database. It ignores
|
|
# messages with the same from, date and message text.
|
|
# You can use mail-files that are compressed or gzipped and ends with
|
|
# -.gz or -.Z.
|
|
|
|
use DBI;
|
|
use Getopt::Long;
|
|
|
|
$VER = "1.6";
|
|
|
|
$opt_db = "mail";
|
|
$opt_table = "mails";
|
|
$opt_max_mail_size = 65536;
|
|
$opt_db_engine = "mysql";
|
|
$opt_host = "localhost";
|
|
$opt_user = $opt_password = "";
|
|
$opt_help = $opt_version = $opt_test=0;
|
|
|
|
GetOptions("help","version","user=s","password=s",
|
|
"db_engine=s","db=s","host=s","max_mail_size=s","test") || usage();
|
|
|
|
usage($VER) if ($opt_help || $opt_version || !$ARGV[0]);
|
|
|
|
%months= ('Jan' => 1, 'Feb' => 2, 'Mar' => 3, 'Apr' => 4, 'May' => 5,
|
|
'Jun' => 6, 'Jul' => 7, 'Aug' => 8, 'Sep' => 9, 'Oct' => 10,
|
|
'Nov' => 11, 'Des' => 12);
|
|
|
|
$count_no_from = $count_no_txt = $count_too_big = 0;
|
|
$count_forwarded_msgs = $count_duplicates = $no_subject = 0;
|
|
$inserted_mails = 0;
|
|
$dbh=0;
|
|
|
|
$dbh = DBI->connect("DBI:$opt_db_engine:$opt_db:$opt_host",$opt_user,
|
|
$opt_password,{ PrintError => 0}) || die $DBI::errstr;
|
|
if (!$opt_test)
|
|
{
|
|
create_table_if_needed($dbh);
|
|
}
|
|
|
|
foreach (@ARGV)
|
|
{
|
|
if (/^(.*)\.(gz|Z)$/) #checks if the file is compressed or gzipped
|
|
{
|
|
open(FILE, "zcat $_ |");
|
|
process_mail_file($dbh,$1);
|
|
}
|
|
else
|
|
{
|
|
open(FILE,$_);
|
|
process_mail_file($dbh,$_);
|
|
}
|
|
}
|
|
$dbh->disconnect if (!$opt_test);
|
|
|
|
$ignored = $count_no_from + $count_no_txt + $count_too_big + $count_duplicates + $no_subject;
|
|
print "Mails inserted:\t\t\t$inserted_mails\n";
|
|
print "Mails ignored:\t\t\t$ignored\n";
|
|
print "Mails without \"From:\" -field:\t$count_no_from\n";
|
|
print "Mails without message:\t\t$count_no_txt\n";
|
|
print "Too big mails (> $opt_max_mail_size):\t$count_too_big\n";
|
|
print "Duplicate mails:\t\t$count_duplicates\n";
|
|
print "Forwarded mails:\t\t$count_forwarded_msgs\n";
|
|
print "No subject:\t\t\t$no_subject\n";
|
|
print "Mails altogether:\t\t";
|
|
print $inserted_mails+$ignored;
|
|
print "\n";
|
|
exit(0);
|
|
|
|
sub usage
|
|
{
|
|
my($VER)=@_;
|
|
|
|
$0 =~ s/.\/(.+)/$1/;
|
|
if ($opt_version)
|
|
{
|
|
print "$0 version $VER\n";
|
|
}
|
|
else
|
|
{
|
|
print <<EOF;
|
|
$0 version $VER
|
|
|
|
Usage: $0 [options] file1 [file2 file3 ...]
|
|
|
|
Description: Inserts mails from file(s) into a database
|
|
|
|
Options:
|
|
--help show this help and exit
|
|
--version shows the version of the program
|
|
--db_engine=... database server (default: $opt_db_engine)
|
|
--db=... database to be used (default: $opt_db)
|
|
--host=... hostname to be used (default: $opt_host)
|
|
--password=... user password for the db server
|
|
--user=... username for the db server
|
|
--max_mail_size=# max size of a mail to be inserted into the db.
|
|
mail will be ignored if it exceeds this size
|
|
(default $opt_max_mail_size)
|
|
--test Don\'t connect to the database, just write the
|
|
queries to stdout
|
|
EOF
|
|
}
|
|
exit(0);
|
|
}
|
|
|
|
sub create_table_if_needed
|
|
{
|
|
my ($dbh)=@_;
|
|
my ($sth,$create);
|
|
|
|
$sth = $dbh->prepare("select count(*) from $opt_table") or die $dbh->errstr;
|
|
if (!$sth->execute)
|
|
{
|
|
$create = "CREATE TABLE $opt_table (msg_nro mediumint unsigned not null ";
|
|
$create .= "auto_increment, date DATETIME NOT NULL, time_zone CHAR(6) ";
|
|
$create .= "NOT NULL, mail_from char(120) not null, reply char(120), ";
|
|
$create .= "mail_to TEXT, cc TEXT, sbj char(200), txt MEDIUMTEXT NOT ";
|
|
$create .= "NULL, file char(32) noT NULL, hash INT NOT NULL, key ";
|
|
$create .= "(msg_nro), primary key (mail_from, date, time_zone, hash))";
|
|
$sth = $dbh->prepare($create) or die $dbh->errstr;
|
|
$sth->execute() or die $dbh->errstr;
|
|
}
|
|
}
|
|
|
|
sub process_mail_file
|
|
{
|
|
my ($dbh,$file_name)= @_;
|
|
my (%values,$type,$check);
|
|
|
|
%values=(); $type="";
|
|
$check=0;
|
|
|
|
while (<FILE>)
|
|
{
|
|
chop;
|
|
if ($type ne "message")
|
|
{
|
|
if (/^Reply-To: (.*)/i) # finding different fields from file
|
|
{
|
|
$type="reply";
|
|
$values{$type}= $1;
|
|
}
|
|
elsif (/^From: (.*)/i)
|
|
{
|
|
$type="from";
|
|
$values{$type}= $1;
|
|
}
|
|
elsif (/^To: (.*)/i)
|
|
{
|
|
$type="to";
|
|
$values{$type}= $1;
|
|
}
|
|
elsif (/^Cc: (.*)/i)
|
|
{
|
|
$type="cc";
|
|
$values{$type}= $1;
|
|
}
|
|
elsif (/^Subject: (.*)/i)
|
|
{
|
|
$type="subject";
|
|
$values{$type}= $1;
|
|
}
|
|
elsif (/^Date: (.*)/i)
|
|
{
|
|
date_parser($1,\%values);
|
|
$type="rubbish";
|
|
}
|
|
elsif (/^[\w\W-]+:\s/)
|
|
{
|
|
$type="rubbish";
|
|
}
|
|
elsif ($_ eq "")
|
|
{
|
|
$type="message";
|
|
$values{$type}="";
|
|
}
|
|
else
|
|
{
|
|
s/^\s*/ /;
|
|
$values{$type}.= $_;
|
|
}
|
|
}
|
|
elsif ($check!=0 && $_ ne "") # in case of forwarded messages
|
|
{
|
|
$values{$type}.= "\n" . $_;
|
|
$check--;
|
|
}
|
|
elsif (/^From .* \d\d:\d\d:\d\d\s\d\d\d\d$/)
|
|
{
|
|
$values{'hash'}= checksum("$values{'message'}");
|
|
update_table($dbh,$file_name,\%values);
|
|
%values=(); $type="";
|
|
$check=0;
|
|
}
|
|
elsif (/-* forwarded message .*-*/i) # in case of forwarded messages
|
|
{
|
|
$values{$type}.= "\n" . $_;
|
|
$check++;
|
|
$count_forwarded_msgs++;
|
|
}
|
|
else
|
|
{
|
|
$values{$type}.= "\n" . $_;
|
|
}
|
|
}
|
|
$values{'hash'}= checksum("$values{'message'}");
|
|
update_table($dbh,$file_name,\%values);
|
|
}
|
|
|
|
########
|
|
|
|
# converts date to the right form
|
|
|
|
sub date_parser
|
|
{
|
|
my ($date_raw,$values)=@_;
|
|
|
|
$date_raw =~ /\s*(\d{1,2}) (\w+) (\d{2,4}) (\d+:\d+:\d+)\s*([\w-+]{3-5})?/;
|
|
|
|
$values->{'date'}=$3 . "-" . $months{$2} . "-" . "$1 $4";
|
|
$values->{'time_zone'}=$5;
|
|
}
|
|
|
|
#########
|
|
|
|
# this is runned when the whole mail is gathered.
|
|
# this actually puts the mail to the database.
|
|
|
|
sub update_table
|
|
{
|
|
my($dbh,$file_name,$values)=@_;
|
|
my($query);
|
|
|
|
if (! defined($values->{'subject'}) || !defined($values->{'to'}))
|
|
{
|
|
$no_subject++;
|
|
return; # Ignore these
|
|
}
|
|
$values->{'message'} =~ s/^\s*//; #removes whitespaces from the beginning
|
|
$values->{'message'} =~ s/\s*$//; #removes whitespaces from the end
|
|
$query = "insert into $opt_table values (NULL,'" . $values->{'date'};
|
|
$query .= "','" . $values->{'time_zone'} . "',";
|
|
$query .= (defined($values->{'from'}) ? $dbh->quote($values->{'from'}) : "NULL") . ",";
|
|
$query .= (defined($values->{'reply'}) ? $dbh->quote($values->{'reply'}) : "NULL") . ",";
|
|
|
|
$query .= (defined($values->{'to'}) ? $dbh->quote($values->{'to'}) : "NULL") . ",";
|
|
$query .= (defined($values->{'cc'}) ? $dbh->quote($values->{'cc'}) : "NULL") . ",";
|
|
$query .= $dbh->quote($values->{'subject'}) . ",";
|
|
$query .= $dbh->quote($values->{'message'}) . "," . $dbh->quote($file_name);
|
|
$query .= ",'" . $values->{'hash'} . "')";
|
|
|
|
if (length($values->{'message'}) > $opt_max_mail_size) #disables big message
|
|
{
|
|
$count_too_big++;
|
|
}
|
|
elsif ($values->{'from'} eq "") #disables mails with no from field
|
|
{
|
|
$count_no_from++;
|
|
}
|
|
elsif ($opt_test)
|
|
{
|
|
print "$query\n";
|
|
$inserted_mails++;
|
|
}
|
|
elsif ($values->{'message'} eq "") #disables mails with no message text
|
|
{
|
|
$count_no_msg_text++;
|
|
}
|
|
elsif ($dbh->do($query))
|
|
{
|
|
$inserted_mails++;
|
|
}
|
|
elsif (!($dbh->errstr =~ /Duplicate entry /)) #disables duplicates
|
|
{
|
|
die "Aborting: Got error '" . $dbh->errstr ."' for query: '$query'\n";
|
|
}
|
|
else
|
|
{
|
|
$count_duplicates++;
|
|
}
|
|
$query="";
|
|
}
|
|
|
|
|
|
##########
|
|
|
|
# In case you have two identical messages we wanted to identify them
|
|
# and remove additionals; We do this by calculating a hash number of the
|
|
# message and ignoring messages with the same from, date and hash.
|
|
# This function calculates a simple 32 bit hash value for the message.
|
|
|
|
sub checksum
|
|
{
|
|
my ($txt)= @_;
|
|
my ($crc,$i,$count);
|
|
$count = length($txt);
|
|
for ($crc = $i = 0; $i < $count ; $i++)
|
|
{
|
|
$crc = (($crc << 1) + (ord (substr ($txt, $i, 1)))) +
|
|
(($crc & (1 << 30)) ? 1 : 0);
|
|
$crc &= ((1 << 31) -1);
|
|
}
|
|
return $crc;
|
|
}
|