--- /dev/null
+#!/usr/bin/perl
+# mailbox_deduplicate Deduplicates mail boxes by moving or deleting
+# and is released under the terms of the GNU GPL version 3, or any
+# later version, at your option. See the file README and COPYING for
+# more information.
+# Copyright 2013 by Don Armstrong <don@donarmstrong.com>.
+
+
+use warnings;
+use strict;
+
+use Getopt::Long;
+use Pod::Usage;
+
+use File::Copy;
+use IO::Dir;
+use IO::File;
+
+=head1 NAME
+
+mailbox_deduplicate - Deduplicates mail boxes by moving or deleting
+
+=head1 SYNOPSIS
+
+mailbox_deduplicate [options] [mailboxes]
+
+ Options:
+ --move-to Target to move mail (Default .duplicated)
+ --delete Delete mail instead of moving it
+ --simulate, -n Don't actually do anything
+ --debug, -d debugging level (Default 0)
+ --help, -h display this help
+ --man, -m display manual
+
+=head1 OPTIONS
+
+=over
+
+=item B<--move-to>
+
+Mail box to move duplicate mail to. Defaults to .duplicated
+
+=item B<--delete>
+
+Delete duplicate mail instead of moving it.
+
+=item B<--debug, -d>
+
+Debug verbosity. (Default 0)
+
+=item B<--help, -h>
+
+Display brief usage information.
+
+=item B<--man, -m>
+
+Display this manual.
+
+=back
+
+=head1 EXAMPLES
+
+mailbox_deduplicate
+
+=cut
+
+
+use vars qw($DEBUG);
+
+my %options = (debug => 0,
+ help => 0,
+ man => 0,
+ simulate => 0,
+ move_to => '.duplicated',
+ );
+
+GetOptions(\%options,
+ 'simulate|n!',
+ 'move_to|move-to=s',
+ 'delete',
+ 'debug|d+','help|h|?','man|m');
+
+pod2usage() if $options{help};
+pod2usage({verbose=>2}) if $options{man};
+
+$DEBUG = $options{debug};
+
+my @USAGE_ERRORS;
+if (0) {
+ push @USAGE_ERRORS,"You must pass something";
+}
+
+pod2usage(join("\n",@USAGE_ERRORS)) if @USAGE_ERRORS;
+
+
+if (not @ARGV) {
+ push @ARGV,'.';
+}
+
+my %messages;
+for my $mb (@ARGV) {
+ if (not -d $mb) {
+ print STDERR "No directory named '$mb'\n";
+ exit 1;
+ }
+ for my $nc (qw(new cur)) {
+ if (not -d "$mb/$nc") {
+ print STDERR "This probably isn't a mailbox; no '$nc' directory inside\n";
+ exit 1;
+ }
+ my $dir = IO::Dir->new("$mb/$nc") or
+ die "Unable to open directory '$mb/$nc' for reading: $!";
+ my $message;
+ while (defined ($message = $dir->read())) {
+ my $msg_id = get_message_id("$mb/$nc/$message");
+ if (not defined $msg_id) {
+ print STDERR "No message id for $mb/$nc/$message\n" if $DEBUG;
+ next;
+ }
+ print STDERR "$mb/$nc/$message is $msg_id\n" if $DEBUG;
+ push @{$messages{$msg_id}},"$mb/$nc/$message";
+ }
+ }
+}
+
+# handle duplicated messages
+if (not $options{delete}) {
+ for my $dir ('','/new','/cur') {
+ if (not -d $options{move_to}.$dir) {
+ mkdir($options{move_to}.$dir) or
+ die "Unable to make directory $options{move_to}.$dir";
+ }
+ }
+}
+for my $msg_id (keys %messages) {
+ # if there is only one message with this message id, do nothing.
+ next unless @{$messages{$msg_id}} > 1;
+ # otherwise, we need to remove or move the message to the
+ # duplicate folder
+ for my $i (1..$#{$messages{$msg_id}}) {
+ my ($nc) = $messages{$msg_id}[$i] =~ /(new|cur)/;
+ $nc //= 'new';
+ if ($options{delete}) {
+ if ($options{simulate} or $options{verbose}) {
+ print "rm $messages{$msg_id}[$i] (duplicate of $msg_id $messages{$msg_id}[0])\n";
+ }
+ if (not $options{simulate}) {
+ unlink($messages{$msg_id}[$i]) or
+ die "Unable to unlink message $messages{$msg_id}[$i]";
+ }
+ } else {
+ if ($options{simulate} or $options{verbose}) {
+ print "mv $messages{$msg_id}[$i] $options{move_to}/$nc/. (duplicate of $msg_id $messages{$msg_id}[0])\n";
+ }
+ if (not $options{simulate}) {
+ move($messages{$msg_id}[$i],
+ "$options{move_to}/$nc/."
+ ) or die "Unable to move message $messages{$msg_id}[$i]";
+ }
+ }
+ }
+}
+
+sub get_message_id{
+ my ($message) = @_;
+
+ my $fh = IO::File->new($message,'r') or
+ die "Unable to open $message for reading: $!";
+ my $msg_id;
+ while (<$fh>) {
+ chomp;
+ if (/^$/) {
+ last;
+ }
+ if (/^Message-Id:\s+(<?)(\S+)\1/i) {
+ $msg_id = $2;
+ last;
+ }
+ }
+ close($fh);
+ return $msg_id;
+}
+
+
+__END__