#!/usr/bin/perl # mailbox_deduplicate Deduplicates mail boxes by moving or deleting # and is released under the terms of the GNU GPL version 3, or any # later version, at your option. See the file README and COPYING for # more information. # Copyright 2013 by Don Armstrong . use warnings; use strict; use Getopt::Long; use Pod::Usage; use File::Copy; use IO::Dir; use IO::File; =head1 NAME mailbox_deduplicate - Deduplicates mail boxes by moving or deleting =head1 SYNOPSIS mailbox_deduplicate [options] [mailboxes] Options: --move-to Target to move mail (Default .duplicated) --delete Delete mail instead of moving it --simulate, -n Don't actually do anything --debug, -d debugging level (Default 0) --help, -h display this help --man, -m display manual =head1 OPTIONS =over =item B<--move-to> Mail box to move duplicate mail to. Defaults to .duplicated =item B<--delete> Delete duplicate mail instead of moving it. =item B<--debug, -d> Debug verbosity. (Default 0) =item B<--help, -h> Display brief usage information. =item B<--man, -m> Display this manual. =back =head1 EXAMPLES mailbox_deduplicate =cut use vars qw($DEBUG); my %options = (debug => 0, help => 0, man => 0, simulate => 0, move_to => '.duplicated', ); GetOptions(\%options, 'simulate|n!', 'move_to|move-to=s', 'delete', 'debug|d+','help|h|?','man|m'); pod2usage() if $options{help}; pod2usage({verbose=>2}) if $options{man}; $DEBUG = $options{debug}; my @USAGE_ERRORS; if (0) { push @USAGE_ERRORS,"You must pass something"; } pod2usage(join("\n",@USAGE_ERRORS)) if @USAGE_ERRORS; if (not @ARGV) { push @ARGV,'.'; } my %messages; for my $mb (@ARGV) { if (not -d $mb) { print STDERR "No directory named '$mb'\n"; exit 1; } for my $nc (qw(new cur)) { if (not -d "$mb/$nc") { print STDERR "This probably isn't a mailbox; no '$nc' directory inside\n"; exit 1; } my $dir = IO::Dir->new("$mb/$nc") or die "Unable to open directory '$mb/$nc' for reading: $!"; my $message; while (defined ($message = $dir->read())) { my $msg_id = get_message_id("$mb/$nc/$message"); if (not defined $msg_id) { print STDERR "No message id for $mb/$nc/$message\n" if $DEBUG; next; } print STDERR "$mb/$nc/$message is $msg_id\n" if $DEBUG; push @{$messages{$msg_id}},"$mb/$nc/$message"; } } } # handle duplicated messages if (not $options{delete}) { for my $dir ('','/new','/cur') { if (not -d $options{move_to}.$dir) { mkdir($options{move_to}.$dir) or die "Unable to make directory $options{move_to}.$dir"; } } } for my $msg_id (keys %messages) { # if there is only one message with this message id, do nothing. next unless @{$messages{$msg_id}} > 1; # otherwise, we need to remove or move the message to the # duplicate folder for my $i (1..$#{$messages{$msg_id}}) { my ($nc) = $messages{$msg_id}[$i] =~ /(new|cur)/; $nc //= 'new'; if ($options{delete}) { if ($options{simulate} or $options{verbose}) { print "rm $messages{$msg_id}[$i] (duplicate of $msg_id $messages{$msg_id}[0])\n"; } if (not $options{simulate}) { unlink($messages{$msg_id}[$i]) or die "Unable to unlink message $messages{$msg_id}[$i]"; } } else { if ($options{simulate} or $options{verbose}) { print "mv $messages{$msg_id}[$i] $options{move_to}/$nc/. (duplicate of $msg_id $messages{$msg_id}[0])\n"; } if (not $options{simulate}) { move($messages{$msg_id}[$i], "$options{move_to}/$nc/." ) or die "Unable to move message $messages{$msg_id}[$i]"; } } } } sub get_message_id{ my ($message) = @_; my $fh = IO::File->new($message,'r') or die "Unable to open $message for reading: $!"; my $msg_id; while (<$fh>) { chomp; if (/^$/) { last; } if (/^Message-Id:\s+(