#!/usr/bin/perl -w # # Copyright 2008 Niklas Edmundsson , # Tomas Ögren , # David Cameron # # Originally developed by Niklas Edmundsson and Tomas Ögren as # cleanbyage. Modified, renamed cache-clean and maintained # for ARC by David Cameron. # # Released under Apache License Version 2.0 # use Sys::Hostname; use File::Find (); use File::Path; use Getopt::Std; use Fcntl ':mode'; use DirHandle; use File::Basename; use POSIX; use strict; use warnings; # Set the variable $File::Find::dont_use_nlink if you're using AFS, # since AFS cheats. # for the convenience of &wanted calls, including -eval statements: use vars qw/*name *dir *prune/; *name = *File::Find::name; *dir = *File::Find::dir; *prune = *File::Find::prune; sub wanted; sub debug; sub printsize; sub diskspace; my(%opts); my $configfile = '/etc/arc.conf'; # max/min used percentage my $maxusedpercent = 0; my $minusedpercent = 0; my $expirytime = 0; my %files; my %expiredfiles; my $totsize = 0; my $totlocksize = 0; my $totlockfiles = 0; my $debuglevel = 'INFO'; my $currenttime = time(); getopts('hsSc:m:M:E:f:D:', \%opts); if(defined($opts{c})) { $configfile = $opts{c}; } if(defined($opts{M})) { $maxusedpercent = $opts{M}; } if(defined($opts{m})) { $minusedpercent = $opts{m}; } if(defined($opts{D})) { $debuglevel = $opts{D}; } if(defined($opts{s})) { $debuglevel = 'ERROR'; } if($maxusedpercent < 0 || $maxusedpercent > 100) { die "Bad value for -M: $maxusedpercent\n"; } if($minusedpercent < 0 || $minusedpercent > 100) { die "Bad value for -m: $minusedpercent\n"; } if($minusedpercent > $maxusedpercent) { die "-M can't be smaller than -m (now $maxusedpercent/$minusedpercent)\n"; } if(defined($opts{E})) { if ($opts{E} =~ m/^(\d+)(\D)$/) { if ($2 eq 'd') { $expirytime = $1 * 86400; } elsif ($2 eq 'h') { $expirytime = $1 * 3600; } elsif ($2 eq 'm') { $expirytime = $1 * 60; } elsif ($2 eq 's') { $expirytime = $1; } else { die "Bad format in -E option value\n"; } } elsif ($opts{E} =~ m/^\d+$/) { $expirytime = $opts{E}; } else { die "Bad format in -E option value\n"; } } sub usage { print < -M ] [-E N] [-D debug_level] [-f space_command] [ -c | [ [...]] ] -h - This help -s - Statistics mode, show cache usage stats, dont delete anything -S - Calculate cache size rather than using used file system space -c - path to an A-REX config file, xml or ini format -M NN - Maximum usage of file system. When to start cleaning the cache (percent) -m NN - Minimum usage of file system. When to stop cleaning cache (percent) -E N - Delete all files whose access time is older than N. Examples of N are 1800, 90s, 24h, 30d (default is seconds) -f command - Path and optionally arguments to a command which outputs "total_bytes used_bytes" of the file system the cache is on. The cache dir is passed as an argument to this command. -D level - Debug level, FATAL, ERROR, WARNING, INFO, VERBOSE or DEBUG. Default is INFO Caches are given by dir1, dir2.. or taken from the config file specified by -c, ARC_CONFIG or the default /etc/arc.conf. EOH exit 1; } usage() if(defined($opts{'h'}) || ((!defined($opts{'M'}) || !defined($opts{'m'})) && !defined($opts{'E'}) && !defined($opts{'s'}))); if (!$configfile && $ENV{'ARC_CONFIG'} && -e $ENV{'ARC_CONFIG'}) { $configfile = $ENV{'ARC_CONFIG'}; } my %loglevels = (FATAL => 0, ERROR => 1, WARNING => 2, INFO => 3, VERBOSE => 4, DEBUG => 5); die "Bad debug level $debuglevel\n" unless $loglevels{$debuglevel}; my $loglevel = $loglevels{$debuglevel}; # Prints message with required format sub p($) { my $s = shift; my $t = strftime "%F %T", localtime; print STDERR "[$t] [cache-clean] $s\n"; } # logging functions sub error($) { if ($loglevel >= 1) { my $s = shift; p("[ERROR] $s"); } } sub warning($) { if ($loglevel >= 2) { my $s = shift; p("[WARNING] $s"); } } sub info($) { if ($loglevel >= 3) { my $s = shift; p("[INFO] $s"); } } sub verbose($) { if ($loglevel >= 4) { my $s = shift; p("[VERBOSE] $s"); } } info('Cache cleaning started'); my @caches = @ARGV; if (!@caches) { die "No config file found and no caches specified\n" unless $configfile; die "No such configuration file $configfile\n" unless -e $configfile; open CONFIG, $configfile; my $cacheblock = 0; while (my $line = ) { if ($line =~ /^\[/) { $cacheblock = 0; } if ($line =~ /^\[arex\/cache\]/) { $cacheblock = 1; } next unless $cacheblock; if ($line =~ /^cachedir/) { $line =~ /\S+\s*=\s*([^\s]+)/; my $cache = $1; push @caches, $cache; } } close CONFIG; die "No caches found in config file '$configfile'\n" unless @caches; } foreach my $filesystem (@caches) { $filesystem =~ s|/+$|| unless $filesystem eq "/"; next if $filesystem eq ""; if ($filesystem =~ /%/) { warning("$filesystem: Warning: cache-clean cannot deal with substitutions"); next; } if (! -d $filesystem || ! -d $filesystem."/data") { info("$filesystem: Cache is empty"); next; } # follow sym links to real filesystem my $symlinkdest = $filesystem; while ($symlinkdest) { $filesystem = $symlinkdest; $symlinkdest = readlink($symlinkdest); $symlinkdest =~ s|/+$|| if $symlinkdest; } my $fsvalues = diskspace($filesystem); if(!($fsvalues)) { warning("Unable to stat $filesystem"); next; } my $fssize = $fsvalues->{total}; my $fsused = $fsvalues->{used}; my $maxfbytes=$fssize*$maxusedpercent/100; info(join("", "$filesystem: used space ", printsize($fsused), " / ", printsize($fssize), " (", sprintf("%.2f",100*$fsused/$fssize), "%)")); if ($expirytime == 0 && $fsused < $maxfbytes && !$opts{'s'}) { info(join("", "Used space is lower than upper limit (", $maxusedpercent, "%)")); next; } my $minfbytes=$fssize*$minusedpercent/100; %files = (); %expiredfiles = (); $totsize = 0; $totlocksize = 0; $totlockfiles = 0; File::Find::find({wanted => \&wanted}, $filesystem."/data"); if($opts{'s'}) { my %allfiles = (%files, %expiredfiles); print "\nUsage statistics: $filesystem\n"; print "Total deletable files found: ",scalar keys %allfiles," ($totlockfiles files locked or in use)\n"; print "Total size of deletable files found: ",printsize($totsize)," (",printsize($totlocksize)," locked or in use)\n"; print "Used space on file system: ",printsize($fsused)," / ",printsize($fssize), " (",sprintf("%.2f",100*$fsused/$fssize),"%)\n"; my $increment = $totsize / 10; if($increment < 1) { print "Total size too small to show usage histogram\n"; next; } printf "%-21s %-25s %s\n", "At size (% of total)", "Newest file", "Oldest file"; my $nextinc = $increment; my $accumulated = 0; my ($newatime, $lastatime); foreach my $fil (sort { $allfiles{$b}{atime} <=> $allfiles{$a}{atime} } keys %allfiles) { $accumulated += $allfiles{$fil}{size}; if(!$newatime) { $newatime = $allfiles{$fil}{atime}; } if($accumulated > $nextinc) { printf "%-21s %-25s %s\n", printsize($accumulated)." (".int(($accumulated/$totsize)*100)."%)", scalar localtime($newatime), scalar localtime($allfiles{$fil}{atime}); while($nextinc < $accumulated) { $nextinc += $increment; } $newatime = undef; $lastatime = undef; } else { $lastatime = $allfiles{$fil}{atime}; } } printf "%-21s %-25s %s\n", printsize($accumulated)." (100%)", "-", scalar localtime($lastatime) if($lastatime); next; } # remove expired files if ($expirytime > 0) { foreach my $fil (keys %expiredfiles) { next if (-d "$fil"); if (-e "$fil.lock") { my $atime = (stat("$fil.lock"))[8]; next if ( defined $atime && ($currenttime - $atime) <= 86400); unlink "$fil.lock"; } if ( unlink $fil ) { $fsused-=$expiredfiles{$fil}{size}; if (defined($opts{'D'}) && -e "$fil.meta") { open FILE, "$fil.meta"; my @lines = ; close FILE; my @values = split(' ', $lines[0]); verbose(join("", "Deleting expired file: $fil atime: ", scalar localtime($expiredfiles{$fil}{atime}), " size: $expiredfiles{$fil}{size} url: $values[0]")); } else { verbose(join("", "Deleting expired file: $fil atime: ", scalar localtime($expiredfiles{$fil}{atime}), " size: $expiredfiles{$fil}{size}")); } } else { warning("Error deleting file '$fil': $!"); } # not critical if this fails next if (! -e "$fil.meta"); if ( unlink "$fil.meta" ) { my $lastslash = rindex($fil, "/"); if ( rmdir(substr($fil, 0, $lastslash))) { verbose("Deleting directory ".substr($fil, 0, $lastslash)); } } else { warning("Error deleting file '$fil.meta': $!"); } } } #Are we still exceding limit after deleting expired files. if ($fsused > $maxfbytes) { # delete in order of access foreach my $fil (sort { $files{$a}{atime} <=> $files{$b}{atime} } keys %files) { last if $fsused < $minfbytes; next if (-d "$fil"); if (-e "$fil.lock") { my $atime = (stat("$fil.lock"))[8]; next if ( defined $atime && ($currenttime - $atime) <= 86400); unlink "$fil.lock" } if ( unlink $fil ) { $fsused-=$files{$fil}{size}; if (defined($opts{'D'}) && -e "$fil.meta") { open FILE, "$fil.meta"; my $line = ; close FILE; chomp($line); verbose(join("", "Deleting file: $fil atime: ", scalar localtime($files{$fil}{atime}), " size: $files{$fil}{size} url: $line")); } else { verbose(join("","Deleting file: $fil atime: ", scalar localtime($files{$fil}{atime}), " size: $files{$fil}{size}")); } } else { warning("Error deleting file '$fil': $!"); } next if (! -e "$fil.meta"); # not critical if this fails if ( unlink "$fil.meta" ) { my $lastslash = rindex($fil, "/"); if ( rmdir(substr($fil, 0, $lastslash))) { verbose("Deleting directory ".substr($fil, 0, $lastslash)); } } else { warning("Error deleting file '$fil.meta': $!"); } } } info(join("", "Cleaning finished, used space now ", printsize($fsused), " / ", printsize($fssize), " (", sprintf("%.2f",100*$fsused/$fssize),"%)")); } exit 0; sub wanted { return if $name =~ m|\.lock$|; return if $name =~ m|\.meta$|; my ($links, $atime, $blocks); ($links, $atime, $blocks) = (lstat($_))[3,8,12]; return unless defined $atime; return unless !(-d _) || -f _; if ($links != 1) { $totlocksize += 512 * $blocks; $totlockfiles++; return; } if (-e "$name.lock") { # check if lock is still valid my $lockatime = (stat("$name.lock"))[8]; if ( defined $lockatime && ($currenttime - $lockatime) <= 86400) { $totlocksize += 512 * $blocks; $totlockfiles++; return; } } if ($expirytime > 0 && ($currenttime - $atime) >= $expirytime) { $expiredfiles{$name}{atime}=$atime; $expiredfiles{$name}{size}= 512 * $blocks; } else { $files{$name}{atime}=$atime; $files{$name}{size}= 512 * $blocks; } $totsize += 512 * $blocks; } sub printsize($) { my $size = shift; if($size > 1024*1024*1024*1024) { $size = int($size/(1024*1024*1024*1024)); return "$size TB"; } if($size > 1024*1024*1024) { $size = int($size/(1024*1024*1024)); return "$size GB"; } if($size > 1024*1024) { $size = int($size/(1024*1024)); return "$size MB"; } if($size > 1024) { $size = int($size/1024); return "$size kB"; } return $size; } # # Returns disk space (total, used by cache and free) in bytes on a filesystem # Taken from arc1/trunk/src/services/a-rex/infoproviders/HostInfo.pm # Updated to calculate actual use # TODO: Put in common place # sub diskspace ($) { my $path = shift; my ($diskused, $disktotal); if ( -d "$path") { # user-specified tool if (defined($opts{f})) { my $spacecmd = $opts{f}; my @output= `$spacecmd $path`; if ($? != 0) { warning("Failed running $spacecmd"); } elsif ($output[0] =~ m/(\d+) (\d+)/) { $disktotal = $1; $diskused = $2; } else { warning("Bad output from $spacecmd: @output"); } } # check if on afs elsif ($path =~ m#/afs/#) { my @dfstring =`fs listquota $path 2>/dev/null`; if ($? != 0) { warning("Failed running: fs listquota $path"); } elsif ($dfstring[-1] =~ /\s+(\d+)\s+(\d+)\s+\d+%\s+\d+%/) { $disktotal = $1*1024; $diskused = $2*1024; } else { warning("Failed interpreting output of: fs listquota $path"); } # "ordinary" disk } else { my @dfstring =`df -k $path 2>/dev/null`; if ($? != 0) { warning("Failed running: df -k $path"); # The first column may be printed on a separate line. # The relevant numbers are always on the last line. } elsif ($dfstring[-1] =~ /\s+(\d+)\s+(\d+)\s+\d+\s+\d+%\s+\//) { $disktotal = $1*1024; $diskused = $2*1024; } else { warning("Failed interpreting output of: df -k $path"); } } # get actual used disk for caches on shared partitions if configured if (defined($opts{S})) { $diskused = undef; my @dustring =`du -ks $path 2>/dev/null`; if ($? != 0) { warning("Failed running: du -ks $path"); } elsif ($dustring[-1] =~ /(\d+)\s+[\w\/]+/) { $diskused = $1*1024; } else { warning("Failed interpreting output of: du -ks $path"); } } } else { warning("Not a directory: $path"); } return undef unless defined($disktotal) and defined($diskused); return {total => $disktotal, used => $diskused}; }