#!/usr/bin/perl -w use strict; use Getopt::Long; use Cwd; # don't bother displaying info for cumulative sizes smaller than total # divided by this: my $detail = 30; #< options my $divfactor=0; my $help=0; my $Links=0; my $extensions=1; my @skips; GetOptions('bytes!'=>sub {$divfactor=1;}, 'kilobytes|kb!'=>sub {$divfactor=1024;}, 'megabytes|mb!'=>sub {$divfactor=1024 * 1024;}, 'gigabytes|gb!'=>sub {$divfactor=1024 * 1024 * 1024;}, 'extensions+'=>sub {$extensions=!$extensions;}, 'help!'=>\$help, 'Links!'=>\$Links, 'detail=i'=>\$detail, 'skip=s@'=>\@skips); #(b)ytes, (k)ilobytes, (m)egabytes, (g)igabytes, (h)uman-readable #(d)etail if ($help) { print qq(usage: duke [-bkmg] [-d n] [-s dir] [-e] [dir] -bkmg - show sizes in bytes, kb, Mb, or Gb, respectively. (default is human-readable) -d n - amount of detail: show entities at most total/n in size (default $detail -- by necessity this is a bit more than the number of items displayed) -s dir - skip dir. This option may be used multiple times. -e - accumulate info for files by filname extension\n); exit 0; } my %skip=(); foreach my $s (@skips) { $s=~s|/$||; $skip{$s}=1; } #> #< human_readable sub human_readable { my $bytes = shift; unless ($divfactor==0) { return sprintf "%1.0f", ($bytes / $divfactor); } my $kb = $bytes / 1024; if ($bytes < 1000) { return sprintf " %d",$bytes; } elsif ($bytes < 1024) { return sprintf "%0.2fk", $kb; } elsif ($kb < 10) { return sprintf "%1.1fk", $kb; } else { my $mb = $kb / 1024; if ($kb < 1000) { return sprintf "%3.0fk", $kb; } elsif ($kb < 1024) { return sprintf "%0.2fM", $mb; } elsif ($mb < 10) { return sprintf "%1.1fM", $mb; } else { my $gb = $mb / 1024; if ($mb < 1000) { return sprintf "%3.0fM", $mb } elsif ($mb < 1024) { return sprintf "%0.2fG", $gb; } elsif ($gb < 10) { return sprintf "%1.1fG", $gb; } else { return sprintf "%3.0fG", $gb; } } } } #> #< walk(dir) sub walk { # for each directory I'm given, I want to return an arrayref of the form # [ sum_of_content_sizes, name, age, # [biggest_constituent], [next_biggest_constituent], ... # ... [smallest_constituent] # ] my $dirname = shift; my $dirsize = -s $dirname; my $dirage = -M _; local *DIR; opendir DIR, $dirname or do { warn "WARNING: cannot open directory [$dirname]: $!\n"; warn " this directory will be skipped: the report will be wrong.\n"; return [$dirsize, $dirage, $dirname]; }; my @sizes = (); my @files = readdir(DIR); closedir DIR; @files = grep !/^\.\.?$/, @files; # skip . and .. foreach my $file (@files) { # get the stat info: my ($dev, $ino, $mode, $nlink, $uid, $gid, $rdev, $size, $atime, $mtime, $ctime, $blksize, $blocks) = lstat qq($dirname/$file); # we won't use most of it. The real size is returned in $size, # but this isn't what actually matters to counting disk usage, # because the part of the block not filled is still considered # "used" for the purposes of storage. So the "size" we want # is the block size $blksize times the number of blocks $blocks. # $size = $blksize if $blksize > $size; unless ($blksize) { warn "block size is zero for $dirname/$file!\n"; warn " skipping it...\n"; next; } $size = $blksize*(1+(int($size / $blksize))); if (not -l _ and -d _ and not $skip{$file}) { my $subdir = walk(qq($dirname/$file)); push @sizes, $subdir; $dirsize += $subdir->[0]; $dirage = $subdir->[1] if $subdir->[1] < $dirage; } else { my $age = -M _; push @sizes, [ $size, $age, $file ]; $dirsize += $size; $dirage = $age if $age < $dirage; } } return [ $dirsize, $dirage, $dirname, sort { $b->[0] <=> $a->[0] } @sizes ]; } #> #< Lwalk(dir) sub Lwalk { # Like walk, but follows symbolic links instead of counting their actual # sizes. # for each directory I'm given, I want to return an arrayref of the form # [ sum_of_content_sizes, name, age, # [biggest_constituent], [next_biggest_constituent], ... # ... [smallest_constituent] # ] my $dirname = shift; my $dirsize = -s $dirname; my $dirage = -M _; local *DIR; opendir DIR, $dirname or do { warn "WARNING: cannot open directory [$dirname]: $!\n"; warn " this directory will be skipped: the report will be wrong.\n"; return [$dirsize, $dirage, $dirname]; }; my @sizes = (); my @files = readdir(DIR); closedir DIR; shift @files; shift @files; # skip . and .. foreach my $file (@files) { # get the stat info: my ($dev, $ino, $mode, $nlink, $uid, $gid, $rdev, $size, $atime, $mtime, $ctime, $blksize, $blocks) = stat qq($dirname/$file); # we won't use most of it. The real size is returned in $size, # but this isn't what actually matters to counting disk usage, # because the part of the block not filled is still considered # "used" for the purposes of storage. So the "size" we want # is the block size $blksize times the number of blocks $blocks. # $size = $blksize if $blksize > $size; unless ($blksize) { warn "block size is zero for $dirname/$file!\n"; warn " skipping it...\n"; next; } $size = $blksize*(1+(int($size / $blksize))); if (-d _ and not $skip{$file}) { my $subdir = Lwalk(qq($dirname/$file)); push @sizes, $subdir; $dirsize += $subdir->[0]; $dirage = $subdir->[1] if $subdir->[1] < $dirage; } else { my $age = -M _; push @sizes, [ $size, $age, $file ]; $dirsize += $size; $dirage = $age if $age < $dirage; } } return [ $dirsize, $dirage, $dirname, sort { $b->[0] <=> $a->[0] } @sizes ]; } #> #< display(sizes) sub display { my $sizes = shift; my $depth = shift; my $total = shift; $depth ||= 0; # return if $depth > 1; my ($size, $age, $name, @contents) = @$sizes; return unless $size; # in case we had perm problems or anything. $name =~ s|/$||; $name =~ s|.*/|| if scalar @contents; $total ||= $size; return if ($size * $detail < $total); print " "x$depth; printf "%s %s (%1.1f)\n", human_readable($size), $name, $age; foreach my $ref (@contents) { last unless display($ref, $depth+1, $total); } return 1; } #> #< extdisplay(sizes) sub extdisplay { my $sizes = shift; my $depth = shift; my $total = shift; $depth ||= 0; # return if $depth > 1; my ($size, $age, $name, @contents) = @$sizes; return unless $size; # in case we had perm problems or anything. $total ||= $size; $name =~ s|/$||; $name =~ s|.*/||; # remove directory parts of name if (not scalar @contents) { # this is a file, not a directory if ($size * $detail >= $total) { # file is big enough to get printed print " "x$depth; printf "%s %s (%1.1f)\n", human_readable($size), $name, $age; return (); } elsif ($name =~ /.\./) { # has an extension, contribute upwards my $ext = $name; $ext =~ s/.*\.//; return ("*.$ext", { size => $size, count => 1, age => $age }); } else { # file too small, doesn't have an extension. nothing useful. return (); } } else { # a directory my %extensions = (); if ($size * $detail >= $total) { print " "x$depth; printf "%s %s (%1.1f)\n", human_readable($size), $name, $age; } foreach my $ref (@contents) { &merge(\%extensions, extdisplay($ref, $depth+1, $total)); } foreach my $ext (keys %extensions) { if ($extensions{$ext}->{size} * $detail >= $total) { print " "x($depth+1); printf("%s %s (%1.1f) %1d files\n", human_readable($extensions{$ext}->{size}), substr($ext,2), $extensions{$ext}->{age}, $extensions{$ext}->{count}); delete $extensions{$ext}; } } return %extensions; } } #< merge(\%hashref, %otherhash) # requires: all values are of the form { size => number, count => number, # age => number } # modifies: hashref # effects: for each key, adds the sizes and counts of hashref{"*/key"} # and otherhash{key} and takes the minimum of the ages, and # stores the result in hashref{"*/key"}. sub merge { my $hashref = shift; my (%otherhash) = @_; foreach my $key (keys %otherhash) { if (defined $hashref->{"*/$key"}) { my $left = $hashref->{"*/$key"}; $left->{size} += $otherhash{$key}->{size}; $left->{count} += $otherhash{$key}->{count}; $left->{age} = $otherhash{$key}->{age} if $otherhash{$key}->{age} < $left->{age}; } else { $hashref->{"*/$key"} = $otherhash{$key}; } } } #> #> print "#size name (age in days)\n"; my $target = (shift(@ARGV) or cwd()); my $result; if ($Links) { $result = Lwalk($target); } else { $result = walk($target); } if ($extensions) { extdisplay($result); } else { display($result); } #< documentation =pod =head1 Name duke - short multilevel size and age summary =head1 Synopsys duke [-bkmg] [-d n] [-s dir] [-e] [dir] =head1 Description Most people use L to see which files and subdirectories are the biggest to find where their disk space crunch comes from. This is only one level deep, and often takes a long time for each level where you do it. Another piece of relevant information when deciding what to move to a different storage medium is the age of the offending files. B offers a short multilevel size and age summary to help you quickly find the largest, oldest stuff on your disk. =head1 Options =over =item C<-bkmg> show sizes in bytes, kb, Mb, or Gb, respectively. (default is human-readable) =item C<-d> I amount of detail: show entities at most total/I in size (by necessity this is a bit more than the number of items displayed) =item C<-L> like du, dereference symbolic links when counting size. =item C<-s> I skip I. This option may be used multiple times. =item C<-e> aggregate files by Ixtension. On by default, use again to disable (e.g. for faster evaluation of results). If a file that's not big enough to display on its own has a period in the name (but not at the beginning), then everything after the last period is assumed to be a filename extension (often indicating type -- e.g. pdf or jpg) After listing all of the files and directories which did make the detail cutoff, duke will then list aggregates of file extensions which did not make the cutoff individually, but do make it together. These extensions can propagate up the directory hierarchy, and "*/"-es will be displayed for each layer that they propagate. Once displayed, they will not be taken into account for shallower directory entries. Displayed single-file entries also do not count toward the aggregate for that directory. It might be considered a bug that these appear after all single-file entries for a directory even when in aggregate they are larger than some of the single-file entries that appeared. In reality it is probably more worthwhile to deal with large single files first, so I'm tempted to call this a feature. =back =head1 See Also L =head1 Author Gregory A. Marton L =begin html =head1 Download This program is distributed under the GNU General Public License: L The source: L =end html =cut ; #>