#!/usr/bin/perl require 5; # desc{ report how to fit a directory's items onto a CDR's worth of space } use strict; use constant CDR => 681_700_000; # may require fudging #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ =head1 NAME cdfit -- report how to fit a directory's items onto a CDR's worth of space =head1 SYNOPSIS % cdfit ./mp3s Considering fitting ./mp3s into 681,700,000b... Optimal solution is 7 items: [ 58,338,055: Air 90,221,893: Brian Eno 18,908,463: Duran Duran 42,602,438: Laurie Anderson 14,547,579: Luscious Jackson 98,023,464: Massive Attack 359,057,250: Philip Glass ] filling to 681,699,142b, leaving 858b free and 14 items totalling 1,804,083,465b outside. =head1 DESCRIPTION This utility says how to fit items from a given directory into a certain amount of space -- by default, a rule-of-thumb figure for the amount of space on a blank CDR. =head1 OPTIONS Usage: C]> B<[-tI]>> B<[I]>> Examples: cdfit cdfit c:/data cdfit -670,000,000 cdfit -670,000,000 ~/oggs cdfit -650,000,000 -t120 cdfit -1,440,000 -t360 /user/mojojojo/journals / Optional switch B<-I> sets the size of the CD (or whatever) that you want to fit items into. Current default is 681,700,000 bytes. You may use commas or underscores in this number. Optional switch B<-t> sets a time limit on how long this program will try searching for a match. When the time is up, the program stops searching and reports the best solution found within that time. You may use commas or underscores in this number. Note that this number is in seconds: B<-t10> is ten seconds, not ten minutes (which would be B<-t600>, of course). This switch may not be supported under MSWin. The parameter I provides the path to the items that you want to fit onto a CD. Otherwise defaults to the first existing item found from the list C< "c:/windows/desktop/to_burn", "c:/windows/desktop/mp3", "c:/windows/desktop/mp3s", "c:/windows/desktop/music", "." > (the latter, current directory, is a guaranteed fallback). Note that at time of writing, you can only provide look at one directory -- i.e., you can't do C =head1 CAVEATS F doesn't take into account per-file or per-directory overhead when imagining fitting things onto a CDR. You can work around this by just claiming that there's less space on a CDR than F's default of 681,700,000 bytes. F B take B longer time when given more items to fit. (Specifically, each item B make it take twice as long to run: if it takes 3 seconds to consider 15 items, it I take twice as long (6s) to consider 16 items, twice as long again (12s) to consider 17 items. My experience is that F runs best (i.e., in less than a minute on a decent machine, I taking exponential time with increased number of elements) when you have fewer than 30 items, where the typical item is larger than 1/10th the capacity of a CD. Having lots of little items (little relative to the size of the CD) will make this take quite a long time. When in doubt, you can always just abort the program. =head2 Implementation Notes This program has efficiency problems because this is a case of the "Knapsack Problem", which is NP-complete. F's algorithm iterates over all possible solutions, but prunes the search space when applicable. I.e., when it's considering fitting a 500M directory and a 300M directory and others into a 670M disk, it doesn't bother iterating over all possible solutions that start with having a 500M item and a 300M item, because it knows that none of those are going to work. So taking 2**N time is only a worst case -- bit it's one that we approach as we have many small items, the sum of whose sizes is just over the capacity of the CD. F's algorithm specifically traps the case where all items given will fit in the CD. This avoids a complete (and pointless) scan of the search space. Mercifully, memory requirements don't increase much with the number of items to fit. We don't keep a list of all possible solutions in memory -- we keep only the best one so far, and the current one being considered. =head1 BACKSTORY As you can probably guess, I wrote this program because I had made mp3 backups of a lot of CDs of mine, and I wanted to burn those directories to CDRs, and I wanted to pack the CDRs as tightly as possible. =head1 COPYRIGHT AND DISCLAIMER Copyright (c) 2002 Sean M. Burke. All rights reserved. This library is free software; you can redistribute it and/or modify it under the same terms as Perl itself. The programs and documentation in this dist are distributed in the hope that they will be useful, but without any warranty; without even the implied warranty of merchantability or fitness for a particular purpose. =head1 AUTHOR Sean M. Burke, sburke@cpan.org =cut #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ use constant DEBUG => 0; my $alarm; my $length; while(@ARGV and $ARGV[0] =~ m/^-/s) { last if $ARGV[0] eq '--'; if($ARGV[0] =~ m/^-([0-9,_]+)$/s) { $length = $1; $length =~ tr/0-9//cd; $length += 0; shift @ARGV; } elsif($ARGV[0] =~ m/^-[tT]([0-9,_]+)$/) { eval {alarm 0}; die "Can't use -T with this version/build of Perl on this platform.\n" if $@; $alarm = $1; $alarm =~ tr/0-9//cd; $alarm += 0; shift @ARGV; } else { die "Unknown option $ARGV[0]\n"; } } my $where; if(@ARGV) { if(@ARGV == 1) { $where = shift; } else { die "usage: cdfit [-target_size_in_bytes] [-Tseconds_allowed] [dirspec]\n" . " Fit items in dirspec into target_size_in_bytes or fewer,\n" . " with best solution found within seconds_allowed seconds.\n"; } } my $path; foreach my $p ( $where ? $where : ( "c:/windows/desktop/to_burn", "c:/windows/desktop/mp3", "c:/windows/desktop/mp3s", "c:/windows/desktop/music", '.', ), ) { next unless -e $p; $path = $p; last; } die "Where are the CD contents?" unless defined $path; $length ||= CDR; { print "Considering fitting $path into ", commulate($length), "b...\n"; my %in = du1($path); my %in_short; @in_short{trim_common(keys %in)} = values %in; solve_fit( $length, %in_short ); } #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ sub solve_fit { #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # Make our recursive closure: my $capacity; my @item_size; my %itemnum2name; my @last_solution; my $go_deeper; $go_deeper = sub { # We could probably do this iteratively, since the search space is # pretty straight: it's 2**N where N is the number of items. # However, pruning in that situation is not totally straightforward # to implement, whereas it's a snap with recursion. # $_[0] is how deep we are. (0 = nothing yet) # $_[1] is how much we've used so far. # rest of @_ is list of items included DEBUG > 2 and print ' ' x $_[0], ' ' , $_[0] . 'd', ' ', $_[1] . 'c', ' ', '[', join(',', @_[2 .. $#_]), ']', "\n" ; if($_[0] == @item_size) { # We've got a complete and good solution DEBUG > 3 and print ' ' x $_[0], " Noting solution [@_]\n"; if($_[1] < $last_solution[1]) { DEBUG > 3 and print ' ' x $_[0], " No better.\n"; } elsif($_[1] > $last_solution[1]) { DEBUG > 1 and print ' ' x $_[0], " Better!\n"; @last_solution = @_; } else { # It's the same. Favor one with fewer parts. if(@_ > @last_solution) { DEBUG > 2 and print ' ' x $_[0], " As good as, and but more complex than.\n"; } elsif(@_ < @last_solution) { DEBUG > 2 and print ' ' x $_[0], " As good as, and simpler than!.\n"; @last_solution = @_; } else { DEBUG > 2 and print ' ' x $_[0], " As good as, and as simple as.\n"; } } } else { # We're only partial: keep recursing { DEBUG > 3 and printf "%s Considering adding a %sb item (#%s:%s)\n", ' ' x $_[0], $item_size[$_[0]], $_[0], $itemnum2name{$_[0]} ; my $x = $_[1] + $item_size[$_[0]]; if($x > $capacity) { # if there's no room. DEBUG > 3 and print ' ' x $_[0], " [prune]\n"; } else { # Consider combinations involving including this item: $go_deeper->($_[0] + 1, $x, @_[2 .. $#_], $_[0]) } } # Now consider combinations involving skipping this item: $go_deeper->($_[0] + 1, @_[1 .. $#_]); } return; }; # end of closure. #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ $capacity = int(shift); @item_size = (); %itemnum2name = (); while(@_) { my($name, $size) = splice @_,0,2; push @item_size, $size; $itemnum2name{ $#item_size } = $name; DEBUG and print "$#item_size => $name of size $item_size[-1]\n"; } my $sum_size = 0; foreach my $size (@item_size) { $sum_size += $size } if($sum_size <= $capacity) { DEBUG and print "They all fit!\n"; # Pretend we ran... @last_solution = ($#item_size, $sum_size, 0 .. $#item_size); } else { @last_solution = (-1,0); my $start_time = time(); my $run_time; if($alarm) { # We have a time limit. local $SIG{'ALRM'} = sub { die "alarm\n" }; eval { alarm $alarm; $go_deeper->(0,0); alarm 0; }; if ($@) { die unless $@ eq "alarm\n"; # propagate unexpected errors print "Ran out of time. Printing best solution so far...\n" if DEBUG >= 0; } } else { # Take as long as we want. $go_deeper->(0,0); } $run_time = time - $start_time; DEBUG and print "Excursion time: $run_time s\n"; } undef $go_deeper; # break cyclicity if($last_solution[0] == -1) { DEBUG and print "No solution.\n"; return(); } else { if(DEBUG < 0) { # just report it. splice @last_solution, 0, 2; } else { my $used; (undef,$used) = splice @last_solution, 0, 2; my $space_free = $capacity - $used; my $outside = $sum_size - $used; my $format = " % " . length(commulate($capacity)) . "s: %s\n"; print "Optimal solution is ", (@last_solution and @last_solution == @item_size) ? "all " : '', scalar(@last_solution), " items:\n[\n", do { use locale; map sprintf($format, commulate($item_size[$_]), $itemnum2name{$_} ), sort {$itemnum2name{$a} cmp $itemnum2name{$b}} @last_solution }, "]\nfilling to ", commulate($used), "b,\nleaving ", commulate($space_free), "b free and ", (@item_size - @last_solution) ? ( @item_size - @last_solution, " items totalling ", commulate($outside), "b " ) : "nothing ", "outside.\n" ; } @last_solution = map $itemnum2name{$_}, @last_solution; return @last_solution; } } #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ sub du1 { my $in = $_[0]; opendir(MAIN, $in) || die "Can't opendir $in : $!"; my @items = map( ($_ ne '.' and $_ ne '..') ? "$in/$_" : (), readdir(MAIN) ); closedir(MAIN); my @chunks; foreach my $item (@items) { push @chunks, $item, _du_item($item); DEBUG and print "*** ", $chunks[-2], ' = ', $chunks[-1], "\n" } return @chunks; } sub _du_item { # recursive my $spec = $_[0]; if(-e $spec) { if(-f _) { DEBUG > 2 and print "spec $spec\n"; return -s _; } elsif(-d _) { my $u = 0; opendir(DIR, $spec) || die "Can't opendir $spec : $!"; my @items = map( ($_ ne '.' and $_ ne '..') ? "$spec/$_" : (), readdir(DIR) ); closedir(DIR); foreach my $i (@items) { $u += _du_item($i); } return $u; } } else { return 0; } } #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ sub commulate { # add commas to a number # optimize for most common case: return $_[0] unless $_[0] and $_[0] =~ m/^\d{3,}(?:\.\d*)?$/s; my($integer, $fraction) = ($_[0], ''); $integer =~ tr/0-9.//cd; if($integer =~ /([^.]*)(\..*)/) { $integer = $1; $fraction = $2; } while( $integer =~ s/(\d)(\d\d\d)(\,|$)/${1},$2$3/g ) { } return $integer . $fraction; } #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ sub trim_common { # remove common substring from all elements my @in = @_; return @in unless @in > 1; my $common = $in[0]; foreach my $k (@in) { #print "Comparing $common to $k\n"; if( scalar($common ^ $k) =~ m/^(\x00+)/s ) { $common = substr($common, 0, length $1); } else { $common = ''; last; } } if(length $common) { #print "Removing \"$common\"\n"; $common = length $common; foreach my $in (@in) { substr($in,0,$common) = '' } } return @in; } #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ __END__