[ Index ] |
PHP Cross Reference of Unnamed Project |
[Summary view] [Print] [Text view]
1 package Encode::MIME::Header; 2 use strict; 3 use warnings; 4 no warnings 'redefine'; 5 6 our $VERSION = do { my @r = ( q$Revision: 2.5 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r }; 7 use Encode qw(find_encoding encode_utf8 decode_utf8); 8 use MIME::Base64; 9 use Carp; 10 11 my %seed = ( 12 decode_b => '1', # decodes 'B' encoding ? 13 decode_q => '1', # decodes 'Q' encoding ? 14 encode => 'B', # encode with 'B' or 'Q' ? 15 bpl => 75, # bytes per line 16 ); 17 18 $Encode::Encoding{'MIME-Header'} = 19 bless { %seed, Name => 'MIME-Header', } => __PACKAGE__; 20 21 $Encode::Encoding{'MIME-B'} = bless { 22 %seed, 23 decode_q => 0, 24 Name => 'MIME-B', 25 } => __PACKAGE__; 26 27 $Encode::Encoding{'MIME-Q'} = bless { 28 %seed, 29 decode_q => 1, 30 encode => 'Q', 31 Name => 'MIME-Q', 32 } => __PACKAGE__; 33 34 use base qw(Encode::Encoding); 35 36 sub needs_lines { 1 } 37 sub perlio_ok { 0 } 38 39 sub decode($$;$) { 40 use utf8; 41 my ( $obj, $str, $chk ) = @_; 42 43 # zap spaces between encoded words 44 $str =~ s/\?=\s+=\?/\?==\?/gos; 45 46 # multi-line header to single line 47 $str =~ s/(:?\r|\n|\r\n)[ \t]//gos; 48 49 1 while ( $str =~ 50 s/(\=\?[0-9A-Za-z\-_]+\?[Qq]\?)(.*?)\?\=\1(.*?)\?\=/$1$2$3\?\=/ ) 51 ; # Concat consecutive QP encoded mime headers 52 # Fixes breaking inside multi-byte characters 53 54 $str =~ s{ 55 =\? # begin encoded word 56 ([0-9A-Za-z\-_]+) # charset (encoding) 57 (?:\*[A-Za-z]{1,8}(?:-[A-Za-z]{1,8})*)? # language (RFC 2231) 58 \?([QqBb])\? # delimiter 59 (.*?) # Base64-encodede contents 60 \?= # end encoded word 61 }{ 62 if (uc($2) eq 'B'){ 63 $obj->{decode_b} or croak qq(MIME "B" unsupported); 64 decode_b($1, $3); 65 }elsif(uc($2) eq 'Q'){ 66 $obj->{decode_q} or croak qq(MIME "Q" unsupported); 67 decode_q($1, $3); 68 }else{ 69 croak qq(MIME "$2" encoding is nonexistent!); 70 } 71 }egox; 72 $_[1] = '' if $chk; 73 return $str; 74 } 75 76 sub decode_b { 77 my $enc = shift; 78 my $d = find_encoding($enc) or croak qq(Unknown encoding "$enc"); 79 my $db64 = decode_base64(shift); 80 return $d->name eq 'utf8' 81 ? Encode::decode_utf8($db64) 82 : $d->decode( $db64, Encode::FB_PERLQQ ); 83 } 84 85 sub decode_q { 86 my ( $enc, $q ) = @_; 87 my $d = find_encoding($enc) or croak qq(Unknown encoding "$enc"); 88 $q =~ s/_/ /go; 89 $q =~ s/=([0-9A-Fa-f]{2})/pack("C", hex($1))/ego; 90 return $d->name eq 'utf8' 91 ? Encode::decode_utf8($q) 92 : $d->decode( $q, Encode::FB_PERLQQ ); 93 } 94 95 my $especials = 96 join( '|' => map { quotemeta( chr($_) ) } 97 unpack( "C*", qq{()<>@,;:\"\'/[]?.=} ) ); 98 99 my $re_encoded_word = qr{ 100 (?: 101 =\? # begin encoded word 102 (?:[0-9A-Za-z\-_]+) # charset (encoding) 103 (?:\*\w+(?:-\w+)*)? # language (RFC 2231) 104 \?(?:[QqBb])\? # delimiter 105 (?:.*?) # Base64-encodede contents 106 \?= # end encoded word 107 ) 108 }xo; 109 110 my $re_especials = qr{$re_encoded_word|$especials}xo; 111 112 sub encode($$;$) { 113 my ( $obj, $str, $chk ) = @_; 114 my @line = (); 115 for my $line ( split /\r|\n|\r\n/o, $str ) { 116 my ( @word, @subline ); 117 for my $word ( split /($re_especials)/o, $line ) { 118 if ( $word =~ /[^\x00-\x7f]/o 119 or $word =~ /^$re_encoded_word$/o ) 120 { 121 push @word, $obj->_encode($word); 122 } 123 else { 124 push @word, $word; 125 } 126 } 127 my $subline = ''; 128 for my $word (@word) { 129 use bytes (); 130 if ( bytes::length($subline) + bytes::length($word) > 131 $obj->{bpl} ) 132 { 133 push @subline, $subline; 134 $subline = ''; 135 } 136 $subline .= $word; 137 } 138 $subline and push @subline, $subline; 139 push @line, join( "\n " => @subline ); 140 } 141 $_[1] = '' if $chk; 142 return join( "\n", @line ); 143 } 144 145 use constant HEAD => '=?UTF-8?'; 146 use constant TAIL => '?='; 147 use constant SINGLE => { B => \&_encode_b, Q => \&_encode_q, }; 148 149 sub _encode { 150 my ( $o, $str ) = @_; 151 my $enc = $o->{encode}; 152 my $llen = ( $o->{bpl} - length(HEAD) - 2 - length(TAIL) ); 153 154 # to coerce a floating-point arithmetics, the following contains 155 # .0 in numbers -- dankogai 156 $llen *= $enc eq 'B' ? 3.0 / 4.0 : 1.0 / 3.0; 157 my @result = (); 158 my $chunk = ''; 159 while ( length( my $chr = substr( $str, 0, 1, '' ) ) ) { 160 use bytes (); 161 if ( bytes::length($chunk) + bytes::length($chr) > $llen ) { 162 push @result, SINGLE->{$enc}($chunk); 163 $chunk = ''; 164 } 165 $chunk .= $chr; 166 } 167 $chunk and push @result, SINGLE->{$enc}($chunk); 168 return @result; 169 } 170 171 sub _encode_b { 172 HEAD . 'B?' . encode_base64( encode_utf8(shift), '' ) . TAIL; 173 } 174 175 sub _encode_q { 176 my $chunk = shift; 177 $chunk = encode_utf8($chunk); 178 $chunk =~ s{ 179 ([^0-9A-Za-z]) 180 }{ 181 join("" => map {sprintf "=%02X", $_} unpack("C*", $1)) 182 }egox; 183 return HEAD . 'Q?' . $chunk . TAIL; 184 } 185 186 1; 187 __END__ 188 189 =head1 NAME 190 191 Encode::MIME::Header -- MIME 'B' and 'Q' header encoding 192 193 =head1 SYNOPSIS 194 195 use Encode qw/encode decode/; 196 $utf8 = decode('MIME-Header', $header); 197 $header = encode('MIME-Header', $utf8); 198 199 =head1 ABSTRACT 200 201 This module implements RFC 2047 Mime Header Encoding. There are 3 202 variant encoding names; C<MIME-Header>, C<MIME-B> and C<MIME-Q>. The 203 difference is described below 204 205 decode() encode() 206 ---------------------------------------------- 207 MIME-Header Both B and Q =?UTF-8?B?....?= 208 MIME-B B only; Q croaks =?UTF-8?B?....?= 209 MIME-Q Q only; B croaks =?UTF-8?Q?....?= 210 211 =head1 DESCRIPTION 212 213 When you decode(=?I<encoding>?I<X>?I<ENCODED WORD>?=), I<ENCODED WORD> 214 is extracted and decoded for I<X> encoding (B for Base64, Q for 215 Quoted-Printable). Then the decoded chunk is fed to 216 decode(I<encoding>). So long as I<encoding> is supported by Encode, 217 any source encoding is fine. 218 219 When you encode, it just encodes UTF-8 string with I<X> encoding then 220 quoted with =?UTF-8?I<X>?....?= . The parts that RFC 2047 forbids to 221 encode are left as is and long lines are folded within 76 bytes per 222 line. 223 224 =head1 BUGS 225 226 It would be nice to support encoding to non-UTF8, such as =?ISO-2022-JP? 227 and =?ISO-8859-1?= but that makes the implementation too complicated. 228 These days major mail agents all support =?UTF-8? so I think it is 229 just good enough. 230 231 Due to popular demand, 'MIME-Header-ISO_2022_JP' was introduced by 232 Makamaka. Thre are still too many MUAs especially cellular phone 233 handsets which does not grok UTF-8. 234 235 =head1 SEE ALSO 236 237 L<Encode> 238 239 RFC 2047, L<http://www.faqs.org/rfcs/rfc2047.html> and many other 240 locations. 241 242 =cut
title
Description
Body
title
Description
Body
title
Description
Body
title
Body
Generated: Tue Mar 17 22:47:18 2015 | Cross-referenced by PHPXref 0.7.1 |