Debbugs/MIME.pm

   1 package Debbugs::MIME;
   2
   3 use strict;
   4
   5 use base qw(Exporter);
   6 use vars qw($VERSION @EXPORT_OK);
   7
   8 BEGIN {
   9     $VERSION = 1.00;
  10
  11     @EXPORT_OK = qw(parse de_rfc1522);
  12 }
  13
  14 use File::Path;
  15 use MIME::Parser;
  16
  17 # for de_rfc1522
  18 use MIME::WordDecoder qw();
  19 use Unicode::MapUTF8 qw(to_utf8 utf8_supported_charset);
  20
  21 sub getmailbody ($);
  22 sub getmailbody ($)
  23 {
  24     my $entity = shift;
  25     my $type = $entity->effective_type;
  26     if ($type eq 'text/plain' or
  27             ($type =~ m#text/# and $type ne 'text/html') or
  28             $type eq 'application/pgp') {
  29         return $entity->bodyhandle;
  30     } elsif ($type eq 'multipart/alternative') {
  31         # RFC 2046 says we should use the last part we recognize.
  32         for my $part (reverse $entity->parts) {
  33             my $ret = getmailbody($part);
  34             return $ret if $ret;
  35         }
  36     } else {
  37         # For other multipart types, we just pretend they're
  38         # multipart/mixed and run through in order.
  39         for my $part ($entity->parts) {
  40             my $ret = getmailbody($part);
  41             return $ret if $ret;
  42         }
  43     }
  44     return undef;
  45 }
  46
  47 sub parse ($)
  48 {
  49     # header and decoded body respectively
  50     my (@headerlines, @bodylines);
  51
  52     my $parser = new MIME::Parser;
  53     mkdir "mime.tmp.$$", 0777;
  54     $parser->output_under("mime.tmp.$$");
  55     my $entity = eval { $parser->parse_data($_[0]) };
  56
  57     if ($entity and $entity->head->tags) {
  58         @headerlines = @{$entity->head->header};
  59         chomp @headerlines;
  60
  61         my $entity_body = getmailbody($entity);
  62         @bodylines = $entity_body ? $entity_body->as_lines() : ();
  63         chomp @bodylines;
  64     } else {
  65         # Legacy pre-MIME code, kept around in case MIME::Parser fails.
  66         my @msg = split /\n/, $_[0];
  67         my $i;
  68
  69         for ($i = 0; $i <= $#msg; ++$i) {
  70             $_ = $msg[$i];
  71             last unless length;
  72             while ($msg[$i + 1] =~ /^\s/) {
  73                 ++$i;
  74                 $_ .= "\n" . $msg[$i];
  75             }
  76             push @headerlines, $_;
  77         }
  78
  79         @bodylines = @msg[$i .. $#msg];
  80     }
  81
  82     rmtree "mime.tmp.$$", 0, 1;
  83
  84     # Remove blank lines.
  85     shift @bodylines while @bodylines and $bodylines[0] !~ /\S/;
  86
  87     # Strip off RFC2440-style PGP clearsigning.
  88     if (@bodylines and $bodylines[0] =~ /^-----BEGIN PGP SIGNED/) {
  89         shift @bodylines while @bodylines and length $bodylines[0];
  90         shift @bodylines while @bodylines and $bodylines[0] !~ /\S/;
  91         for my $findsig (0 .. $#bodylines) {
  92             if ($bodylines[$findsig] =~ /^-----BEGIN PGP SIGNATURE/) {
  93                 $#bodylines = $findsig - 1;
  94                 last;
  95             }
  96         }
  97         map { s/^- // } @bodylines;
  98     }
  99
 100     return { header => [@headerlines], body => [@bodylines]};
 101 }
 102
 103 # Bug #61342 et al.
 104
 105 =head2 de_rfc1522
 106
 107     de_rfc1522('=?iso-8859-1?Q?D=F6n_Armstr=F3ng?= <don@donarmstrong.com>')
 108
 109 Turn RFC-1522 names into the UTF-8 equivalent.
 110
 111 =cut
 112
 113 BEGIN {
 114     # Set up the default RFC1522 decoder, which turns all charsets that
 115     # are supported into the appropriate UTF-8 charset.
 116     MIME::WordDecoder->default(new MIME::WordDecoder(
 117         ['*' => sub {
 118             my ($data, $charset) = @_;
 119             $charset =~ s/^(UTF)\-(\d+)/$1$2/i;
 120             return $data unless utf8_supported_charset($charset);
 121             return to_utf8({
 122                 -string  => $data,
 123                 -charset => $charset,
 124             });
 125         }]));
 126 }
 127
 128 sub de_rfc1522 ($)
 129 {
 130     my ($string) = @_;
 131
 132     # unmime calls the default MIME::WordDecoder handler set up at
 133     # initialization time.
 134     return MIME::WordDecoder::unmime($string);
 135 }
 136
 137 1;