#!/usr/bin/perl -w ################################################################################ # Logfile tagger for Apache HTTPd logfiles # ------------------------------------------------------------------------------ # "Logfile tagger" is given the name of a directory which is supposed to contain # "old" (i.e. rotated-away) httpd logfiles or error logfiles. # # The "logfile tagger" opens all the files that look like a logfile judging by # their name (whether compressed or not). It then looks for the first recognizable # datetime stamp in that file and renames the file using said datetime stamp, then # re-compresses the file. # # Should a file be empty, it is deleted. # # Should a file not empty, but no datetime stamp is found, the timestamp # of its creation is used instead. # # The result is a directory with logfiles that are named in a practical manner: # # ssl_log_2008_06_06__12:00:56.gz # ssl_log_2008_06_25__12:05:57.gz # ssl_log_2008_06_26__09:01:53.gz # # instead of something impenetrably obscure like # # ssl_log.1 # ssl_log.2 # ssl_log.3 # # Of course, proper "logrotation" won't work because logfiles will just # accumulate forever in the old logfile directory. You need a separate program # which destroys old logfiles (for example, using a "find") or you have to # destory them manually. # # Note that if the program is let loose on a directory which contains rotated # files in which no datetime stamps can be found, it still compresses them all, # even if they were unzipped at first. This may not be what is desired! # # By default, Apache uses the "combined" format for traffic logfiles and the # SSL log file. However, the error log file has a different format. To account # for that, the string "combined" or "errorlog" can be passed as second argument. # The assumed default is "combined". # # An example for a "combined" log: # # 192.168.1.49 - - [25/Apr/2010:16:57:04 +0200] "GET / HTTP/1.1" 200 1012 "-.... # # An example for the "error" log: # # [Sun Apr 25 08:30:12 2010] [warn] RSA server certificate is a CA certificate... # # Adaptations may be needed depending on the actual use to which this program # will be put # # 1) When to call. # # In our case, the program is called from the Red Hat "logrotate" configuration # files for the httpd logs. For example, the logrotation configuration for # the site "public.m-plify.net", as found in file "access_public.m-plify.net_log" # # /var/log/httpd/access_public.m-plify.net_log { # daily # rotate 40 # nomissingok # ifempty # olddir /var/log/httpd/access_public.m-plify.net_dir # create 640 root root # nocompress # prerotate # # NOP # endscript # postrotate # /usr/bin/logger -t httpd-logrotate -p cron.info "access_public.m-plify.net_log rotated" # echo "/logfile_tagger.pl /var/log/httpd/access_public.m-plify.net_dir > /dev/null" | at 'now + 5 minute' # /apachectl graceful # endscript # } # # The above schedules logfile_tagger.pl to be executed in 5 minutes, after one can be # sure that "apachectl graceful" has worked and the just-rotated-away logfile has # been relinguished by httpd. # # 2) The names of logfiles may vary between installations. # # The function which recognizes the logfile by likely name may have to be modified. # # 3) The desired format of tag may change # # The function whcih determines the new tag may have to be modified # # ------------------------------------------------------------------------------- # Maintainer: d.tonhofer@m-plify.com # # Copyright 2010 M-PLIFY S.A. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. ################################################################################ # First argument is supposed to be the directory to work on $dir = $ARGV[0]; if (!$dir) { print STDERR "You have to pass the name of a directory with logfiles as first argument -- exiting\n"; exit 1 } if (! -d $dir) { print STDERR "The directory '$dir' does not exist (or is not a directory) -- exiting\n"; exit 1 } # Second argument is supposed to be missing or to indicate the expected format of # the logfiles, either 'combined' or 'error'. The default, used in case the argument # is missing, is 'combined' $format = lc($ARGV[1]); if (!$format) { $format = 'combined'; } elsif ($format eq 'combined' || $format eq 'error') { # ok, use $format 'as is' } else { print STDERR "The passed format '$format' is not recognized -- exiting\n"; exit 1 } handleDir($dir,$format); # ----------------------------------------------------------------------------- # Check whether a file's name matches the "tagged" pattern # _YYYY_MM_DD__HH:MM:SS # with or without .gz or .Z appended # ----------------------------------------------------------------------------- sub isAlreadyTagged { my($file) = @_; if ($file =~ /\S_\d{4}_\d{2}_\d{2}__\d{2}\:\d{2}\:\d{2}(\.gz|\.Z|\.zip)?$/) { return 1; } else { return 0; } } # ----------------------------------------------------------------------------- # Check whether a file's name matches the "untagged logfile" pattern # _log.XXX # with or without .gz or .Z appended # ----------------------------------------------------------------------------- sub isUntaggedLogfile { my($file) = @_; if ($file =~ /^\S+\.\d+(\.gz|\.Z|\.zip)?$/) { return 1; } else { return 0; } } # ----------------------------------------------------------------------------- # Uncompress file if needed. The fully qualified name of the file to uncompress # is passed. We look at the file ending to decide whether it's compressed or # not (better strategies are possible) # # Returns a reference to an array which holds: # # Index 0: 'ok' or 'skip' # Index 1: If 'ok': The fully qualified name of the uncompressed file # Index 2: If 'ok': The basename of the uncompressed file # Index 2: The compression program to be used to re-compress: 'gzip' # # This can be extended for other compression programs. # ----------------------------------------------------------------------------- sub uncompressIfNeeded { my($fqfile) = @_; if ($fqfile =~ /^(\S+)(\.\d+)(\.gz|\.Z|\.zip)$/) { my $fqBasename = $1; my $fqUncompressedFile = "${1}${2}"; my $suffix = $3; if ($suffix eq '.gz') { `/bin/gunzip --force '$fqfile'`; if ($?!=0) { print STDERR "Problem uncompressing file '$fqfile' using 'gunzip': $! -- skipping file\n"; return ['skip']; } elsif (! -f $fqUncompressedFile) { print STDERR "Uncompressed file '$fqUncompressedFile' does not exist after 'gunzip'. Skipping file\n"; return ['skip']; } else { return ['ok',$fqUncompressedFile,$fqBasename,'gzip --force']; } } else { # Not yet handled suffix; add handling of .Z or .zip as needed print STDERR "Unhandled suffix '$suffix' on file '$fqfile' -- skipping file\n"; return ['skip'] } } elsif ($fqfile =~ /^(\S+)(\.\d+)$/) { my $fqBasename = $1; my $fqUncompressedFile = "${1}${2}"; return ['ok',$fqUncompressedFile,$fqBasename,'gzip --force']; # use gzip to recompress, even though uncompressed } else { die "No match on file '$fqfile'\n"; } } # ----------------------------------------------------------------------------- # Find first datetime in "combined" format file # ----------------------------------------------------------------------------- sub findCombinedDatetime { my($fqFile) = @_; open(FILE,"<$fqFile") or die "Could not open file '$fqFile' for reading: $!\n"; my $datetime = undef; my $monthMap = { Jan => 1,Feb => 2,Mar => 3,Apr => 4,May => 5, Jun => 6,Jul => 7,Aug => 8,Sep => 9,Oct => 10, Nov => 11,Dec => 12 }; my $line; while (defined($line = ) && !$datetime) { # [09/Jul/2008:09:17:59 +0200] if ($line =~ /^.*?\[(\d{2})\/(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)\/(\d{4})\:(\d{2})\:(\d{2})\:(\d{2})(\s+(\S+))?\]/) { my $day_of_month = $1 * 1; my $month = $2; my $year = $3 * 1; my $hour = $4 * 1; my $minute = $5 * 1; my $second = $6 * 1; my $timezone = $8; # may be undef my $month_num = $$monthMap{$month}; # "datetime" is expressed in the "timezone" $datetime = sprintf("%04i_%02i_%02i__%02i:%02i:%02i",${year},${month_num},${day_of_month},${hour},${minute},${second}); } } close(FILE); return $datetime; } # ----------------------------------------------------------------------------- # Find first datetime in "error" format file # ----------------------------------------------------------------------------- sub findErrorDatetime { my($fqFile) = @_; open(FILE,"<$fqFile") or die "Could not open file '$fqFile' for reading: $!\n"; my $datetime = undef; my $monthMap = { Jan => 1,Feb => 2,Mar => 3,Apr => 4,May => 5, Jun => 6,Jul => 7,Aug => 8,Sep => 9,Oct => 10, Nov => 11,Dec => 12 }; my $line; while (defined($line = ) && !$datetime) { # [Sun Apr 25 11:43:00 2010] if ($line =~ /^\[\S+\s+(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)\s+(\d{2})\s+(\d{2})\:(\d{2})\:(\d{2})\s+(\d{4})\]/) { my $month = $1; my $month_num = $$monthMap{$month}; my $day_of_month = $2 * 1; my $hour = $3 * 1; my $minute = $4 * 1; my $second = $5 * 1; my $year = $6 * 1; my $timezone = undef; # The server's timezone # "datetime" is expressed in the "timezone" $datetime = sprintf("%04i_%02i_%02i__%02i:%02i:%02i",${year},${month_num},${day_of_month},${hour},${minute},${second}); } } close(FILE); return $datetime; } # ----------------------------------------------------------------------------- # Handle the given directory # ----------------------------------------------------------------------------- sub handleDir { my($dir,$format) = @_; # # suck in the content list of the directory # opendir(DIR,$dir); if ($?!=0) { print STDERR "Could not open directory $dir: $! -- exiting\n"; exit 2 } my @files = readdir(DIR); closedir(DIR); # # loop over list, handling each file in turm # my $file; for $file (@files) { my $fqfile = "$dir/$file"; # # Skip stuff which doesn't look right # if ($file =~ /^\./) { # Skip hidden files or "." or ".." next # ------^ } if (! -f $fqfile || -l $fqfile) { print STDERR "Skipped file '$fqfile' because it's not a file or it's a symlink -- you may want to check this out\n"; next # ------^ } if (isAlreadyTagged($file)) { # Skip files whose name indicates that they already have been tagged, whether they are compressed or not print STDERR "Already tagged: $file\n"; next # ------^ } if (!isUntaggedLogfile($file)) { # Skip files whose name does not indicate that they are untagged compressed or uncompressed logfiles print STDERR "Skipped file '$fqfile' because it's name does not indicate a logfile -- you may want to check this out\n"; next # ------^ } # # Try to uncompress # my $uncompressResults = uncompressIfNeeded($fqfile); if ($$uncompressResults[0] ne 'ok') { next # ------^ } my $fqUncompressedFile = $$uncompressResults[1]; my $fqBasename = $$uncompressResults[2]; my $compressProg = $$uncompressResults[3]; # # Remove empty file # my ($size,$chgtime) = (stat($fqUncompressedFile))[7,10]; if ($size == 0) { print STDERR "The size of file '$fqUncompressedFile' is 0 -- removing it\n"; `/bin/rm '$fqUncompressedFile'`; if ($?!=0) { print STDERR "Problem removing file '$fqUncompressedFile': $! -- continuing anyway\n"; } next # ------^ } # # Find first "datetime" in file # my $firstDatetime; if ($format eq 'combined') { $firstDatetime = findCombinedDatetime($fqUncompressedFile); } elsif ($format eq 'error') { $firstDatetime = findErrorDatetime($fqUncompressedFile); } else { die "Unhandled case '$format'\n"; } # # If "datetime" found, rename the file; otherwise use the inode change time # if (!$firstDatetime) { print STDERR "No datetime found in $fqUncompressedFile ...\n"; # # TODO: Use inode change time to rename file # } else { print STDERR "Found '$firstDatetime' in '$fqUncompressedFile'\n"; my $fqNewFile = "${fqBasename}_${firstDatetime}"; `/bin/mv '$fqUncompressedFile' '$fqNewFile'`; if ($?!=0) { die "Problem renaming file '$fqUncompressedFile' to '$fqNewFile': $!\n"; } `$compressProg '$fqNewFile'`; if ($?!=0) { die "Problem compressing '$fqNewFile' using '$compressProg': $!\n"; } } } }