#!/usr/bin/perl
#: schedule files for deletion N days in the future
#======================================================================
# Sean M. Burke <sburke@cpan.org>
# Last Modified Time-stamp: "2009-06-19 15:20:52 AKDT sburke@cpan.org"
#======================================================================
# See the end of this file for some notes on the design of this program.

my $VERSION = "0.91"; # late beta
use strict; use warnings; use constant DEBUG => 0; 
my $Default_Days = 32;
sub usage {
  my $self = progname();
  print
qq{Usage:  $self [options] files...
$self\: schedule files for deletion N days in the future
These will all schedule "guh.txt" and "thing.mp3" for deletion in 12 days:
  $self 12        guh.txt thing.mp3
  $self -12       guh.txt thing.mp3
  $self -d12      guh.txt thing.mp3
  $self --days=12 guh.txt thing.mp3
Unless you specify a number, $self will default to $Default_Days days from now.

As usual, inserting a "--" will force everything afterward to be treated as
files to delete, no matter how much they look like switches, so that you can
delete files actually named "12" or "-d12" or "--days=12".

$self is mostly just a wrapper around 'rm' + 'at', as if:
   echo rm -f -- guh.txt thing.mp3 | at now + 12 days
So what it prints is whatever 'at' prints, namely, something like:
   job 497 at Wed Apr  9 20:22:00 2008
You can use this with atrm to cancel deletion:  atrm 497

Note: $self doesn't work on directories.  Too scary!  If you
want that, make the call yourself:
   echo rm -rf -- That\\\\ Whole\\\\ Dir | at now + 12 days

                                        [sburke\@cpan.org version $VERSION]
} ;
  exit 2;

}
#======================================================================

my $A = "\nAborting";

my(@Shellies, $Pwd, @Files, $Days);
Get_switches();
DEBUG and print "Days away: $Days\n";

die "$Days days is so very far into the future that I won't do it.$A"
 if $Days > (4*365);  # sanity
usage() unless @Files;

Check_At_Available();
Prune_env();
Get_pwd();
Ready_it_all(@Files);
Do_it_all();
exit;

#======================================================================

sub Check_At_Available {
  open my $ATV, "at -V 2>&1 |" or
   die "at(1) doesn't seem available on your system, so 'expire' can't work.$A";
  my $lines = join '', <$ATV>;
  close($ATV);

  if($lines =~ m/do not have permission to use at/) {
   die "at(1) is available on your system, but you don't have permission to use it, so 'expire' can't work.$A";
  }

  return;
}

#======================================================================
sub progname {
  my $self = $0;
  $self = $1 if $self =~ m<([^/]+)$>s;
  $self = "<thisprogram>" if $self =~ m/\bperl/;
  return $self;
}

#======================================================================

sub Get_switches {
  # We don't use a standard getopts library, because
  #  we want to tolerate some irregular stuff.
  usage() unless @ARGV;
  $Days = $Default_Days;

  # Turn "expire 12 foo bar baz" or "expire -12 ..."
  # into "expire -d12 -- foo bar baz"

  if(@ARGV and $ARGV[0] =~ m/\A -? ( \d{1,9} ) \z/smx ) {
    $Days = $1;
    shift @ARGV;
    unshift @ARGV, "-d$Days", "--";
  }

  while(@ARGV) {
    if( $ARGV[0] eq '--' ) {
      shift @ARGV;
      @Files = splice @ARGV; # pull them all and that's it.

    } elsif( $ARGV[0] =~
        m<^
           (?: -d | --days= )
           ( \d{1,7} )
          $>xsm
    ) {
      $Days = $1;
      shift @ARGV;
    } elsif( $ARGV[0] =~ m/^-/) { # including -h
      usage();
    } else {
      push @Files, shift @ARGV; # pulling just one
    }
  }

  return;
}

#======================================================================

sub Get_pwd {
  chomp($Pwd = `pwd`);

  DEBUG and print "PWD: <$Pwd>\n";

  # Sanity check;
  -e $Pwd or die "My own pwd, $Pwd, doesn't seem to exist?!$A";
  -d $Pwd or die "My own pwd, $Pwd, isn't a dir?!$A";
  -w $Pwd or die "My own pwd, $Pwd, doesn't seem to exist?!$A";
  $Pwd = shell_quote($Pwd);

  return;
}

#======================================================================

sub Ready_it_all {
  my @items = @_;
  foreach my $item (@items) {
    next unless defined $item and length $item; # sanity
    unless( -e $item ) { warn "I see no $item!  Skipping.\n";     next; }
    unless( -f $item ) { warn "But $item isn't a file!  Skipping.\n"; next; }

    my $pf  = "$Pwd/" . shell_quote($item);
    #my $shelly = qq<if [ -e $pf ]; then rm $pf ; fi\n>;
    my $shelly = qq<rm -f -- $pf\n>;
    push @Shellies, $shelly;
    DEBUG > 5 and print "Shelly:\n<<\n$shelly\n>>\n";
  }

  DEBUG > 9 and exit;

  return;
}

#======================================================================

sub Do_it_all {
  my $home = $ENV{'HOME'} || die "I have no \$HOME set?!$A";
  chdir $home or die "Can't chdir to $home !?!? $!$A";

  $ENV{PWD} = $home;

  foreach my $shelly (@Shellies) {
    DEBUG and print "at()'ing: $shelly\n";
    open my $AT, "| at  -q e  now + $Days days 2>&1 | grep -v '/bin/sh'"
     # "e" for expire.  It's just so it stands out from the usual at(1) jobs.
     # That greppy thing is to avoid being told the usual dumb phrase:
     #  "warning: commands will be executed using /bin/sh"
     or die "Can't open a channel to at(1)";
    print $AT "#\n# generated by Expire at " . localtime() ."\n#\n$shelly";
    close($AT) or die "Can't close the channel to at(): $!$A";
  }
  return;
} 

#======================================================================

sub shell_quote {
    # I'm pretty sure this is right -- SMB.
    my @in = @_;
    return '' unless @in;
    my $ret = '';
    foreach (@in) {
	if (!defined $_ or $_ eq '') {
	    $_ = "''";
	} elsif (/[^\w\d.\-\/]/) {
	    s/\'/\'\\\'\'/g;
	    $_ = "'$_'";
	    s/^''//;
	    s/''$//;
	}
	$ret .= "$_ ";
    }
    chop $ret;
    return $ret;
}

#======================================================================

sub Prune_env {
  # This isn't strictly necessary, but it means that if we look
  #  at the job in "at -c" or with gato, won't have a huge blob
  #  of irrelevant env vars preceding the actual last line of
  #  real work.

  # Also, it won't give away all sorts of things that just ~might~ be
  # security-sensitive, like some variables set by SSH.  I have no
  # idea, but I have decided to err on the side of caution.

  %ENV = ( map {; defined($ENV{$_}) ? ($_=>$ENV{$_}) : () }
    qw<SHELL LANG LC_ALL HOME TZ PATH USER USERNAME LOGNAME GROUP PWD> );
    # LANG and LC_ALL just in case there's encoding things
    #  to worry about, I guess.

  # TODO: instead maybe look for a list in NECESSARY_VARS
  #        else DISPENSIBLE_VARS 
  #         (by analogy with cron looking for at.allow/at.deny)

  my $now = scalar localtime;
  $ENV{'EXPIRE_TIME_START'} = "$^T = $now ("
         . ($ENV{TZ} || "TZ?" ) . ")";

  return;
}

#======================================================================

__END__
q~

Notes:

The above business of chdir-ing to $HOME and then rm-ing in
terms of the absolute path may seem, well, a bit more complex
than needed.

Here's the story:

at(1) prefaces whatever commands you give it with this attempt
to get the future job into the same directory as is now
current:

cd /home/sean/guh-work-files || {
	 echo 'Execution directory inaccessible' >&2
	 exit 1
}
[then your commands]
And there's apparently no way to tell at(1) to *not* do that.

And this is an annoyance, because if that "cd" fails, you get
a "Execution directory inaccessible" complaint landing in your
mailbox, which is rarely useful to you.  Or at least, not to
me, because:

In my routine use of "expire", I will often have some
large-ish temp files that I will call "expire 7" on.  But
after I run that command but *before* seven days have passed,
I might actually delete that file myself, or I might even
delete the whole directory it's in.  (Because often that whole
directory is a "work_temp" directory that I might kill because
I'm done working with any of those files.)

(But that does mean that if you rename the directory
containing the files you called "expire" on, then the "at" job
that "expire" set up WILL FAIL WITHOUT ANY ERROR MESSAGE.  If
this is a problem for you, really, just make the call to "at",
which "expire" is just a wrapper around.)

So, to avoid any sane chance of a warning about the file's
directory not being there, you can chdir to a directory that
you are confident will still exist when the at(1) job runs.
You could chdir to "/" (which is *always* there), but I figure
it's good to chdir to the user's home directory.  If *that*
directory doesn't exist when this "at" job runs, then
presumably we wouldn't want "at" running this job anyway.
(Ideally, deleting that user would delete his "at" queue, but
it doesn't hurt to check.)

cd /home/mojo || {
	 echo 'Execution directory inaccessible' >&2
	 exit 1
}
rm -f -- /home/sean/guh-work-files/thatfile.dat

And then all is well.  The -f here basically means "if it
doesn't exist, don't complain about that".  And so it does the
right thing, as far as *my* expectations of it go-- namely, it
never sends complaining email to me.  And the "--" is so you
can delete files named "-guh" or the like, if we called
"expire -- -guh"

  -- Sean M. Burke

~
