1# -*-Perl-*- 2################################################################ 3### 4### Scan.pm 5### 6### Author: Internet Message Group <img@mew.org> 7### Created: Apr 23, 1997 8### Revised: May 25, 2011 9### 10 11my $PM_VERSION = "IM::Scan.pm version 20161010(IM153)"; 12 13package IM::Scan; 14require 5.003; 15require Exporter; 16 17use IM::Config qw(allowcrlf scansbr_file scan_header_pick mail_path address 18 addresses_regex addrbook_file petname_file); 19use IM::Util; 20use IM::EncDec qw(mime_decode_string); 21use IM::Address qw(extract_addr fetch_addr); 22use IM::Japanese; 23use integer; 24use strict; 25use vars qw(@ISA @EXPORT); 26 27@ISA = qw(Exporter); 28@EXPORT = qw(set_scan_form get_header store_header parse_body parse_header 29 disp_msg read_petnames); 30 31use vars qw($WIDTH $JIS_SAFE $HEADLINELIMIT $BODYLINELIMIT 32 $MSTR2NUM @MSTR @WSTR %symbol_table 33 %multipart_mark @NEEDSAFE %NEEDSAFE_HASH 34 @STRUCTURED %STRUCTURED_HASH 35 @HANDLE 36 %REF_SYMBOL %message_id %message_id_and_subject 37 %petnames %ADDRESS_HASH 38 $SI $SO $SS2 $SS3 39 $ALLOW_CRLF); 40 41############################################ 42## 43## Environments 44## 45 46BEGIN { 47 $WIDTH = 80; 48 $JIS_SAFE = 0; 49 50 $HEADLINELIMIT = 100; 51 $BODYLINELIMIT = 30; 52 53 $MSTR2NUM = { 54 Jan => "01", Feb => "02", Mar => "03", Apr => "04", 55 May => "05", Jun => "06", Jul => "07", Aug => "08", 56 Sep => "09", Oct => "10", Nov => "11", Dec => "12", 57 }; 58 59 @MSTR = ('Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 60 'Sep', 'Oct', 'Nov', 'Dec'); 61 62 @WSTR = ('Sun', 'Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat'); 63 64 # used in 'set_scan_form' to convert scan_form() to $EVAL_SCAN_FORM 65 %symbol_table = ( 66 'n' => 'number:', 67 'd' => 'date:', 68 'f' => 'from:', 69 't' => 'to:', 70 'g' => 'newsgroups', 71 'a' => 'address:', 72 'P' => 'pureaddr:', 73 'A' => 'Address:', 74 's' => 'subject:', 75 'i' => 'indent:', 76 'b' => 'body:', 77 'm' => 'multipart:', 78 'S' => 'indent-subject:', 79 'F' => 'folder:', 80 'M' => 'mark:', 81 'p' => 'private:', 82 'D' => 'duplicate:', 83# 'B' => 'bytes:', 84 'K' => 'kbytes:', 85 86 'y' => 'year:', 87 'c' => 'month:', 88 'C' => 'monthstr:', 89 'e' => 'mday:', 90 'h' => 'hour:', 91 'E' => 'min:', 92 'G' => 'sec:', 93 ); 94 95 %multipart_mark = ( 96 'enc' => 'E', 97 'sig' => 'S', 98 ); 99 100 @NEEDSAFE = qw(from: to: cc: address: Address: 101 subject: indent-subject: body:); 102 103 %NEEDSAFE_HASH = (); 104 105 foreach (@NEEDSAFE) { 106 $NEEDSAFE_HASH{$_} = 1; 107 } 108} 109 110############################################ 111## 112## If user specifies a scan format, convert that to 'eval-form'. 113## 114 115sub set_scan_form($$$) { 116 my($scan_form, $width, $jis_safe) = @_; 117 118 $ALLOW_CRLF = allowcrlf(); 119 120 $WIDTH = $width; 121 $JIS_SAFE= $jis_safe; 122 123 my $scan_hook = scansbr_file(); 124 if ($scan_hook =~ /(.+)/) { 125 if ($main::INSECURE) { 126 im_warn("Sorry, ScanSbr is ignored for SUID root script.\n"); 127 } else { 128 if ($> != 0) { 129 $scan_hook = $1; # to pass through taint check 130 } 131 if (-f $scan_hook) { 132 require $scan_hook; 133 } else { 134 im_err("scan subroutine file $scan_hook not found.\n"); 135 } 136 } 137 } 138 139 convert_scan_form($scan_form); 140} 141 142############################################ 143## 144## get_header 145## 146 147sub get_header($) { 148 my $path = shift; 149 my %Head = (); 150 my $folder; 151 152 $Head{'path'} = $path; 153 if ($path =~ /(.*)\/([0-9]+)$/) { 154 # xxx how about news? 155 $Head{'number:'} = $2; 156 $folder = substr($1, length(mail_path()) + 1); 157 $folder = conv_iso2022jp($folder) if ($folder =~ /[\200-\377]/); 158 $Head{'folder:'} = '+' . $folder; 159 } 160 161 im_open(\*MSG, "<$path") || return; 162 163 ## 164 ## Collect file attributes 165 ## 166# $Head{'bytes:'} = -s MSG; 167 $Head{'kbytes:'} = int(((-s MSG) + 1023) / 1024); 168 169 ## 170 ## Header parse 171 ## 172 my $header; 173 if ($ALLOW_CRLF) { 174 $header = <MSG>; 175 if ($header =~ /\r/) { 176 $/ = "\r\n\r\n"; 177 } else { 178 $/ = "\n\n"; 179 } 180 $header .= <MSG>; 181 $header =~ s/\r//g; 182 } else { 183 $/ = "\n\n"; 184 $header = <MSG>; 185 } 186 store_header(\%Head, $header); 187 188 ## 189 ## Body parse 190 ## 191 $/ = "\n"; 192 $Head{'body:'} = parse_body(*MSG, 0); 193 194 close(MSG); 195 196 parse_header(\%Head); 197 198 return(%Head); 199} 200 201@STRUCTURED = qw ( 202 sender from reply-to return-path 203 resent-sender resent-from resent-reply-to 204 errors-to return-receipt-to 205 to cc bcc dcc apparently-to 206 resent-to resent-cc resent-bcc 207); 208 209%STRUCTURED_HASH = (); 210 211foreach (@STRUCTURED) { 212 $STRUCTURED_HASH{$_} = 1; 213} 214 215sub store_header($$) { 216 my($href, $header) = @_; 217 local $_; 218 my $lines = 0; 219 220 chomp($header); 221 $header =~ s/\n[ \t]+/ /g; 222 foreach (split("\n", $header)) { 223 chomp; 224 last if (++$lines > $HEADLINELIMIT); 225 next unless (/^([^:]*):\s*(.*)$/); 226 my $label = lc($1); 227 next if ($label eq 'received'); 228 if (defined($href->{$label})) { 229 if ($STRUCTURED_HASH{$label}) { 230 $href->{$label} .= ", "; 231 } else { 232 $href->{$label} .= "\n\t"; 233 } 234 $href->{$label} .= $2; 235 } else { 236 $href->{$label} = $2; 237 } 238 } 239} 240 241##### BODY parse ##### 242# 243# parse_body(HANDLER, mode) 244# HANDER: Filer Hander or Array 245# mode: 1 if HANDLER is File Handler, otherwise HANDLER is Array 246# return value: substring from body 247# 248sub parse_body(*$) { 249 local *HANDLE = shift; 250 my $mode = shift; 251 my($content, $lines) = ('', 0); 252 253 while (1) { 254 if ($mode == 0) { 255 $_ = <HANDLE>; 256 } else { 257 $_ = shift(@HANDLE); 258 } 259 last unless defined($_); 260 261 next if /^\s*\n/; 262 next if /^--/; 263 next if /^- --/; 264 next if /^=2D/; 265 next if /^\s+[\w*-]+=/; # eg. "boundary="; * = RFC2231 266 next if /^\s*[\w-]+: /; # Headers and header style citation 267 next if /^\s*[>:|\#;\/_}]/; 268 next if /^\s*[[<\/(.]+ *snip/; 269 next if /^ /; 270 next if /^\s*\w+([\'._-]+\w+)*>/; 271 next if /^\s*(On|At) .*[^.!\s\n]\s*$/; 272 next if /(:|;|\/)\s*\n$/; 273 next if /(wrote|writes?|said|says?)[^.!\n]?\s*\n$/; 274 next if /^This is a multi-part message in MIME format/i; 275 276 if (/^\s*In (message|article|mail|news|<|\"|\[|\()/i) { 277 if ($mode == 0) { 278 $_ = <HANDLE>; 279 } else { 280 $_ = shift(@HANDLE); 281 } 282 last unless defined($_); 283 next; 284 } 285 286 chomp; 287 s/^\s+//g; 288 s/\s+/ /g; 289 if ($content eq '') { 290 $content = $_; 291 } else { 292 $content .= ' '; 293 $content .= $_; 294 } 295 296 last if (length($content) > $WIDTH); 297 $lines++; 298 last if ($lines > $BODYLINELIMIT); 299 } 300 301 return substr_safe($content, $WIDTH); 302} 303 304sub parse_header($) { 305 my $href = shift; 306 307 ## 308 ## Thread related 309 ## 310 if (($href->{'in-reply-to'}) 311 && ($href->{'in-reply-to'} =~ /.*(<[^<]*>)\s*/)) { 312 $href->{'references:'} = $1; 313 } elsif ($href->{'references'}) { 314 if ($href->{'references'} =~ /.*(<[^<]*>)/) { 315 $href->{'references:'} = $1; 316 } else { 317 $href->{'references:'} = $href->{'references'}; 318 } 319 } 320 321 ## 322 ## Date 323 ## 324 my $tz; 325 if ($href->{'date'}) { 326 $href->{'date:'} = $href->{'date'}; 327 } else { 328 my($sec, $min, $hour, $mday, $mon, $year, 329 $wday, $yday, $isdst) = localtime((stat($href->{'path'}))[9]); 330 my($gsec, $gmin, $ghour, $gmday, $gmon, $gyear, 331 $gwday, $gyday, $gisdst) = gmtime((stat($href->{'path'}))[9]); 332 333 my $off = ($hour - $ghour) * 60 + $min - $gmin; 334 if ($year < $gyear) { 335 $off -= 24 * 60; 336 } elsif ($year > $gyear) { 337 $off += 24 * 60; 338 } elsif ($yday < $gyday) { 339 $off -= 24 * 60; 340 } elsif ($yday > $gyday) { 341 $off += 24 * 60; 342 } 343 if ($off == 0) { 344 $tz = "GMT"; 345 } elsif ($off > 0) { 346 $tz = sprintf("+%02d%02d", $off/60, $off%60); 347 } else { 348 $off = -$off; 349 $tz = sprintf("-%02d%02d", $off/60, $off%60); 350 } 351 352 $href->{'date:'} = sprintf "%s, %d %s %d %02d:%02d:%02d %s", 353 $WSTR[$wday], $mday, $MSTR[$mon], $year + 1900, 354 $hour, $min, $sec, $tz; 355 } 356 357 $href->{'date:'} =~ /(\d\d?)\s+([A-Za-z]+)\s+(\d+)\s/; 358 my($mday, $monthstr, $year) = ($1, "\u\L$2", $3); 359 my $mon = $MSTR2NUM->{$monthstr}; 360 361 $href->{'date:'} =~ /\s(\d\d?):(\d\d?)/; 362 my($hour, $min, $sec) = ($1, $2, 0); 363 if ($href->{'date:'} =~ /\s\d\d?:\d\d?:(\d\d?)\s/) { 364 $sec = $1; 365 } 366 367 if ($year < 50) { 368 $year += 2000; 369 } elsif ($year < 1000) { 370 $year += 1900; 371 } 372 $href->{'year:'} = $year; 373 $href->{'month:'} = $mon; 374 $href->{'monthstr:'} = $monthstr; 375 $href->{'mday:'} = $mday; 376 $href->{'hour:'} = $hour; 377 $href->{'min:'} = $min; 378 $href->{'sec:'} = $sec; 379 $href->{'date:'} = sprintf "%02d/%02d", $href->{'month:'}, $href->{'mday:'}; 380 381 ## 382 ## MIME decoding 383 ## 384 $href->{'subject:'} = &mime_decode_string($href->{'subject'}); 385 $href->{'from:'} = &mime_decode_string($href->{'from'}) 386 if $REF_SYMBOL{'from:'}; 387 $href->{'to:'} = &mime_decode_string($href->{'to'}) 388 if $REF_SYMBOL{'to:'}; 389 $href->{'cc:'} = &mime_decode_string($href->{'cc'}) 390 if $REF_SYMBOL{'cc:'}; 391 392 ## 393 ## Mark 394 ## 395 $href->{'multipart:'} = ' '; 396 if (defined($href->{'mime-version'}) && 397 defined($href->{'content-type'})) { 398 if ($href->{'content-type'} =~ /Multipart\/(...)/i) { 399 $href->{'multipart:'} = $multipart_mark{lc($1)} || 'M'; 400 } elsif ($href->{'content-type'} =~ /Message\/Partial/i) { 401 $href->{'multipart:'} = 'P'; 402 } 403 } 404 405 ## 406 ## Address related 407 ## 408 if ($REF_SYMBOL{'address:'}) { 409 $href->{'address:'} = friendly_addr($href->{'from'}, 0) 410 unless ($href->{'address:'}); 411 } 412 if ($REF_SYMBOL{'Address:'}) { 413 if (my_addr($href->{'from'})) { 414 if ($href->{'to'}) { 415 my $to = &friendly_addr($href->{'to'}, 0); 416 if ($to) { 417 $href->{'Address:'} = 'To:' . $to; 418 } 419 } elsif ($href->{'newsgroups'}) { 420 $href->{'Address:'} = 'Ng:' . $href->{'newsgroups'}; 421 } 422 } 423 $href->{'Address:'} = friendly_addr($href->{'from'}, 0) 424 unless ($href->{'Address:'}); 425 } 426 if ($REF_SYMBOL{'pureaddr:'}) { 427 if (my_addr($href->{'from'})) { 428 if ($href->{'to'}) { 429 my($to, $rest) = &fetch_addr($href->{'to'}, 1); 430 if ($to) { 431 $href->{'pureaddr:'} = 'To:' . $to; 432 } 433 } elsif ($href->{'newsgroups'}) { 434 $href->{'pureaddr:'} = 'Ng:' . $href->{'newsgroups'}; 435 } 436 } 437 $href->{'pureaddr:'} = &extract_addr($href->{'from'}) 438 unless ($href->{'pureaddr:'}); 439 } 440 if (($REF_SYMBOL{'mark:'} || $REF_SYMBOL{'private:'}) 441 && my_addr($href->{'to'}, $href->{'cc'}, $href->{'apparently-to'})) { 442 $href->{'mark:'} = $href->{'private:'} = '*'; 443 } else { 444 $href->{'mark:'} = $href->{'private:'} = ' '; 445 } 446 447 if ($::opt_dupchecktarget eq "" or $::opt_dupchecktarget eq "message-id") { 448 if ($href->{'multipart:'} ne 'P' 449 && $href->{'message-id'} && $message_id{$href->{'message-id'}}++) { 450 $href->{'mark:'} = $href->{'duplicate:'} = 'D'; 451 } else { 452 $href->{'duplicate:'} = ' '; 453 } 454 } 455 elsif ($::opt_dupchecktarget eq "message-id+subject") { 456 my $t = join(";", $href->{'message-id'}, $href->{'subject'}); 457 if ($t ne ";" and $message_id_and_subject{$t}++) { 458 $href->{'mark:'} = $href->{'duplicate:'} = 'D'; 459 } 460 else { 461 $href->{'duplicate:'} = ' '; 462 } 463 } 464 465 ## 466 ## Call user defined function 467 ## 468 &scan_sub($href) if (defined(&scan_sub)); 469} 470 471sub disp_msg($;$) { 472 my($href, $vscan) = @_; 473 474 $href->{'indent:'} = '' unless defined($href->{'indent:'}); 475 $href->{'subject:'} = '' unless defined($href->{'subject:'}); 476 $href->{'indent-subject:'} = $href->{'indent:'} . $href->{'subject:'}; 477 478 binmode(STDOUT); 479 480 if (defined &my_get_msg) { 481 print &my_get_msg($href), "\n"; 482 flush('STDOUT') unless $main::opt_buffer; 483 return; 484 } elsif (defined(&scan_form)) { 485 my $content = &scan_form($href); 486 $content =~ s/\t/ /g; 487 if ($vscan) { 488 print &substr_safe($content, $WIDTH - 1), 489 "\r $href->{'folder:'} $href->{'pnum'}\n"; 490 } else { 491 print &substr_safe($content, $WIDTH - 1), "\n"; 492 } 493 flush('STDOUT') unless $main::opt_buffer; 494 return; 495 } else { 496 im_err("no scan_form specified.\n"); 497 } 498} 499 500############################################ 501## 502## Convert into Friendly Address 503## 504 505sub friendly_addr($$) { 506 my($addr, $need_addr) = @_; 507 return '' unless $addr; 508 my $friendly = ''; 509 my($a, $f, $p); 510 while (($a, $addr, $f) = &fetch_addr($addr, 1), $a ne '') { 511 $a =~ s/\/[^@]*//; 512 if (%petnames && $petnames{lc($a)}) { 513 $p = $petnames{lc($a)}; 514 } elsif (!$need_addr && $f) { 515 $p = &mime_decode_string($f); 516 } else { 517 $p = $a; 518 } 519 if ($friendly eq '') { 520 $friendly = $p; 521 } else { 522 $friendly .= ', ' . $p; 523 } 524 } 525 return $friendly; 526} 527 528############################################ 529## 530## Read petnames entry 531## 532 533%ADDRESS_HASH = (); 534 535sub my_addr(@) { 536 my @addrs = @_; 537 my $addr; 538 539 unless (defined($ADDRESS_HASH{'init'})) { 540 $ADDRESS_HASH{'addr'} = addresses_regex(); 541 unless ($ADDRESS_HASH{'addr'}) { 542 $ADDRESS_HASH{'addr'} = '^' . quotemeta(address()) . "\$"; 543 $ADDRESS_HASH{'addr'} =~ s/(\\\s)*\\,(\\\s)*/\$|\^/g; 544 } 545 $ADDRESS_HASH{'init'} = 1; 546 } 547 return 0 if ($ADDRESS_HASH{'addr'} eq ""); 548 foreach $addr (@addrs) { 549 my $a; 550 while (($a, $addr) = fetch_addr($addr, 1), $a ne "") { 551 return 1 if ($a =~ /$ADDRESS_HASH{'addr'}/io); 552 } 553 } 554 return 0; 555} 556 557############################################ 558## 559## Convert scan_form() to 'eval-form' 560## 561 562sub convert_scan_form($) { 563 my $SCANFORM = shift; 564 565 if (!$main::INSECURE && $SCANFORM && $SCANFORM !~ /%/) { 566 do $SCANFORM; # -- require $SCAN_FORM; (sub scan_form) 567 return if defined(&scan_form); 568 } 569 570 my @symbols = (); 571 my($format, $jis_safe, $plus, $hyphen, $size, $type, $arg); 572 573 if (scan_header_pick()) { 574 my $elem; 575 foreach $elem (split /,/, scan_header_pick()) { 576 if ($elem =~ /^([a-zA-Z]+):(.*)$/) { 577 $symbol_table{$1} = "$2"; 578 } 579 } 580 } 581 582 while ($SCANFORM ne '') { 583 if ($SCANFORM =~ /^%(!?)(\+?)(-?)(\d*)([a-zA-Z]|{\w+})(.*)/) { 584 $plus = $2; 585 $hyphen = $3; 586 $size = $4; 587 $type = $5; 588 $SCANFORM = $6; 589 590 $type =~ s/{(.*)}/$1/; 591 if ($type eq 'n') { 592 if ($SCANFORM =~ /^ / || 593 $SCANFORM =~ /^%D/ || $SCANFORM =~ /^%p/ || 594 $SCANFORM =~ /^%M/) { 595 # OK 596 } else { 597 im_err("Characters in Scan form after %n should be a space or %D or %p or %M\n"); 598 } 599 } 600 601 $jis_safe = ($size ne '' && $size > 0 602 && ($1 ne '' || $NEEDSAFE_HASH{$symbol_table{$type}})) 603 ? $JIS_SAFE : 0; 604 605 $arg = '$href->{\'' . $symbol_table{$type} . '\'}'; 606 $arg = "&substr_safe(sprintf('%${hyphen}${size}s', $arg), $size)" 607 if ($jis_safe && !$plus); 608 609 push(@symbols, $arg); 610 $REF_SYMBOL{$symbol_table{$type}} = 1; 611 612 if ($size =~ /^0/) { # numerical context 613 $format .= "%${hyphen}${size}d"; 614 } else { 615 if ($jis_safe || $plus || $size eq '') { 616 $format .= "%${hyphen}${size}s"; 617 } else { 618 $format .= "%${hyphen}${size}.${size}s"; 619 } 620 } 621 } elsif ($SCANFORM =~ /^([^%]+)(.*)/) { 622 $format .= $1; 623 $SCANFORM = $2; 624 next; 625 } else { 626 im_warn("invalid scan format: $SCANFORM\n"); 627 return; 628 } 629 } 630 631 $arg = join(',', @symbols); 632 my $EVAL_SCAN_FORM = "sprintf('$format', $arg)"; 633 eval "sub scan_form { my(\$href) = shift; $EVAL_SCAN_FORM }"; 634 if ($@) { 635 im_die("Form seems to be wrong.\nPerl error message is: $@"); 636 } 637} 638 639############################################ 640## 641## Substring in Safe Manner 642## fill up spaces to specified '$len' when length doesn't reach that. 643## 644 645BEGIN { 646 $SI = "\x0f"; # Shift In Sequence 647 $SO = "\x0e"; # Shift Out Sequence 648 # for ISO-2022-CN 649 $SS2 = "\x1b\x4e"; # <ISO 2022 Single_shift two> 650 $SS3 = "\x1b\x4f"; # <ISO 2022 Single_shift three> 651} 652 653sub substr_safe($$) { 654 ($_, my $len) = @_; 655 656 # This hack makes the code a few percent faster but it's kinda ugly. 657 # Do you want leave it? 658 if (1) { 659 unless (/[^\s!-~]/) { 660 return pack("A$len", $_); 661 } 662 } 663 664 my $i = 0; # Current Index of this string 665 my $count = 0; # Readable Characters 666 my $charset = 'ascii'; # Current Character Set 667 my @res = (); # Output Result 668 my $fill_char = ' '; # Fill Spaces up to specified length 669 my $last_char = ''; # Extra Characters in double-byte-segment 670 my $shift_in = ''; # Return code to shift in 671 my $G0 = 'ascii'; # Buffer G0 672 my $G1 = ''; # Buffer G1 673 my $G2 = ''; # Buffer G2 674 my $G3 = ''; # Buffer G3 675 676 while (length($_) && $count < $len) { 677 678 if (s/(^$SI)//o) { $charset = $G0; } 679 elsif (s/(^$SO)//o) { $charset = $G1; $shift_in = $SI; } 680 elsif (s/(^$SS2)//o) { $charset = $G2; $shift_in = $SI; } 681 # This is verbose if SS3 appears only in ISO-2022-CN-EXT 682 elsif (s/(^$SS3)//o) { $charset = $G3; $shift_in = $SI; } 683 684 elsif (m/(^[^\e$SI$SO]+)/o) { 685 my $room = $len - $count; 686 my $matched_len = length($1); 687 my $avail; 688 689 # XXX: Should be parameterized. 690 if ($charset =~ /(^cns11643-plane-2)/) { 691 $avail = int(length($1) / 3) * 2; 692 } else { 693 $avail = length($1); 694 } 695 696 if ($avail >= $room) { 697 my $i; 698 699 if ($room % 2 and $charset =~ 700 /^(jisx0208|jisx0212|jisx0213|ksc5601|cns11643-plane-2|big5-1|big5-2)/) { 701 $room--; 702 $last_char = ' '; 703 } 704 if ($charset =~ /^cns11643-plane-2/) { 705 $i = $room * 3 / 2; 706 } else { 707 $i = $room; 708 } 709 $count = $len; 710 push(@res, substr($_, 0, $i)); 711 last; 712 } 713 $count += $avail; 714 push(@res, substr($_, 0, $matched_len)); 715 substr($_, 0, $matched_len) = ''; 716 next; 717 } 718 719 # for Japanese Character in rfc1554 720 elsif (s/(^\e\(B)//) { $G0 = $charset = 'ascii'; } 721 elsif (s/(^\e\$\@)//) { $G0 = $charset = 'jisx0208-1978'; } 722 elsif (s/(^\e\$\(?B)//) { $G0 = $charset = 'jisx0208-1983'; } 723 elsif (s/(^\e\(J)//) { $G0 = $charset = 'jisx0201-roman'; } 724 elsif (s/(^\e\$\(?A)//) { $G0 = $charset = 'gb2312-1980'; } 725 elsif (s/(^\e\$\(D)//) { $G0 = $charset = 'jisx0212-1990'; } 726 elsif (s/(^\e\$\(C)//) { $G1 = $charset = 'ksc5601-1987'; 727 $G0 = 'ascii'; } 728 729 elsif (s/(^\e\$\(O)//) { $G0 = $charset = 'jisx0213-1'; } 730 elsif (s/(^\e\$\(P)//) { $G0 = $charset = 'jisx0213-2'; } 731 732 elsif (s/(^\e-A)//) { $G1 = $charset = 'iso8859-1'; } 733 elsif (s/(^\e-B)//) { $G1 = $charset = 'iso8859-2'; } 734 elsif (s/(^\e-C)//) { $G1 = $charset = 'iso8859-3'; } 735 elsif (s/(^\e-D)//) { $G1 = $charset = 'iso8859-4'; } 736 elsif (s/(^\e-L)//) { $G1 = $charset = 'iso8859-5'; } 737 elsif (s/(^\e-G)//) { $G1 = $charset = 'iso8859-6'; } 738 elsif (s/(^\e-F)//) { $G1 = $charset = 'iso8859-7'; } 739 elsif (s/(^\e-H)//) { $G1 = $charset = 'iso8859-8'; } 740 elsif (s/(^\e-M)//) { $G1 = $charset = 'iso8859-9'; } 741 742 elsif (s/(^\e\.A)//) { $G2 = $charset = 'iso8859-1'; } 743 elsif (s/(^\e\.F)//) { $G2 = $charset = 'iso8859-7'; } 744 745 # for Korean Character in rfc1557 746 elsif (s/(^\e\$\)C)//) { $G1 = $charset = 'ksc5601'; 747 $G0 = 'ascii'; } 748 749 # for Chinese Character in rfc1922 750 elsif (s/(^\e\$\)A)//) { $G1 = $charset = 'gb2312'; 751 $G0 = 'ascii'; } 752 elsif (s/(^\e\$\)G)//) { $G1 = $charset = 'cns11643-plane-1'; 753 $G0 = 'ascii'; } 754 elsif (s/(^\e\$\*H)//) { $G2 = $charset = 'cns11643-plane-2'; 755 $G0 = 'ascii';} 756 757 elsif (s/(^\e\$\(0)//) { $G0 = $charset = 'big5-1';} 758 elsif (s/(^\e\$\(1)//) { $G0 = $charset = 'big5-2';} 759 760 elsif (s/(^\e)//) { 761 ; 762 } 763 else { 764 die "panic"; 765 } 766 push(@res, $1); 767 } 768 769 join ('', 770 @res, 771 ($G0 ne 'ascii') ? "\e(B" : '', 772 $shift_in, 773 $last_char, 774 $fill_char x ($len - $count), 775 ); 776} 777 778############################################ 779## 780## Read petnames entry 781## 782 783sub w2n($) { 784 my $line = shift; 785 $line =~ tr/\x20/\x0/; 786 787 return $line; 788} 789 790sub read_petnames() { 791 if (addrbook_file() && open(ADDRBOOK, addrbook_file())) { 792 my $key; my $addr; my $petname; my $a; my @addrs; 793 my $code; 794 795 while (<ADDRBOOK>) { 796 my $line = ''; 797 do { 798 chomp; 799 next if (/^[\#;]/); 800 $code = code_check($_, 0); 801 if ($code eq 'sjis') { 802 $_ = conv_euc_from_sjis($_); 803 } elsif ($code eq 'jis') { 804 $_ = conv_euc_from_jis($_); 805 } 806 s/#.*$//g; 807 $line =~ s/\\$//; 808 $line .= $_; 809 } while (/[,\\]$/ && defined($_ = <ADDRBOOK>)); 810 $_ = $line; 811 s/"([^"]+)"/w2n($1)/geo; #" 812 s/,\s+/,/g; 813 if (s/^(\S+)\s+(\S+)\s+(\S+)//) { 814 $key = $1; 815 $addr = $2; 816 $petname = $3; 817 next if ($key =~ /:$/); 818 next if $petname eq '*'; 819 } else { 820 next; 821 } 822 $petname =~ tr/\x0/\x20/; 823 $petname = conv_iso2022jp($petname, 'EUC'); 824 825 @addrs = split(/,\s*/, $addr); 826 while ($addr = shift(@addrs)) { 827 $petnames{lc($addr)} = $petname; 828 } 829 } 830 close(ADDRBOOK); 831 return; 832 } 833 my $file = petname_file(); 834 return unless $file; 835 unless (open(PETNAMES, $file)) { ## don't use im_open(). 836 im_warn("can't open petname file $file\n"); 837 return; 838 } 839 while (<PETNAMES>) { 840 next if (/^$/); 841 next if (/^#/); 842 chomp; 843 my($name, $petname); 844 if (/(\S+)\s+(.*)/) { 845 $name = $1; 846 $petname = $2; 847 } 848 $petname =~ s/^"(.*)"$/$1/; 849 $petnames{lc($name)} = $petname; 850 } 851 close(PETNAMES); 852} 853 8541; 855 856__END__ 857 858=head1 NAME 859 860IM::Scan - scan listing from mail/news message 861 862=head1 SYNOPSIS 863 864 use IM::Scan; 865 866 &set_scan_form($scan_form, $width, $use_jis); 867 &read_petnames(); 868 %Head = &get_header($mail_file); 869 &disp_msg(\%Head); 870 871=head1 DESCRIPTION 872 873The I<IM::Scan> module handles scan format and petnames format 874for mail/news message. 875 876This modules is provided by IM (Internet Message). 877 878=head1 FILES 879 880 $HOME/.im/Config the user profile 881 882=head1 PROFILE COMPONENTS 883 884 Component Explanation Example 885 886 MailDir: your mail directory Mail 887 Width: one line width 80 888 JisSafe: safely substr for ISO-2022-JP on 889 Form: scan format %+5n %m%d %8f %-30S %b 890 PetnameFile: nickname file ~/.im/Petname 891 Address: your mail addresses kazu@mew.org, kazu@wide.ad.jp 892 AddrRegex: regexp of your addresses ^kazu@.*$ 893 if necessary 894 895=head1 SCAN FORMAT 896 897'%{width}{header-type}' format is available. You can define any 898header-type as you want. Default valid header-types are 899 900 %n message number 901 %d raw Date: field 902 %f MIME decoded From: field 903 %t MIME decoded To: filed 904 %g raw Newsgroups: field 905 %a friendly From: field 906 %A If this message is originated by yourself, friendly To: 907 or raw Newsgroups: is displayed in 'To:xxx' or 'Ng:xxx' 908 format, respectively. Otherwise, friendly From: field is 909 displayed. 910 %P Similar to %A, but display raw address of mail sender 911 instead of friendly From: field, just like mh-e. 912 %i indent to display thread 913 %s MIME decoded Subject: field 914 %S indented MIME decoded Subject (same as %i+%s) 915 %b a part of body extracted with heuristic 916 %m Multipart type 917 'S'igned, 'E'ncrypt, 'M'ultipart, 'P'artial or none 918 %p mark '*' if the message is destined to you 919 %D mark 'D' if the message is duplicated 920 %M %p+%D 921 %F folder path 922 %K file block size (1024 bytes/block) 923 924 %y year 925 %c month (digit) 926 %C month (string) 927 %e mday 928 %h hour 929 %E min 930 %G sec 931 932{width} is a integer with/without '-' sign. if a '-' sign exists, content 933of a header-type will be displaied with left adjustment. If the integer 934have leading '0', the field will be padded with leading '0's. 935 936To improve processing speed, needless process on JIS character should be 937avoided. Even if 'JisSafe' is on, only %f, %t, %A, %s, %S and %b are 938processed with 'substr' routine for JIS characters by default. If you want 939to process other header-types with JIS version of 'substr', specify '!' 940just after '%' like: %!-8S. 941 942ScanForm "%+5n %m%d %-14A %-18S %b" works as same as IM default scaning. 943 944=head1 PETNAMES FORMAT 945 946Following format is valid in petnames file. 947A line beginning with '#' is ignored. 948 949 # This is comments 950 Kazu@Mew.org "Mr.Kazu" 951 nom@Mew.org "Nomsun" 952 953=head1 COPYRIGHT 954 955IM (Internet Message) is copyrighted by IM developing team. 956You can redistribute it and/or modify it under the modified BSD 957license. See the copyright file for more details. 958 959=cut 960 961### Copyright (C) 1997, 1998, 1999 IM developing team 962### All rights reserved. 963### 964### Redistribution and use in source and binary forms, with or without 965### modification, are permitted provided that the following conditions 966### are met: 967### 968### 1. Redistributions of source code must retain the above copyright 969### notice, this list of conditions and the following disclaimer. 970### 2. Redistributions in binary form must reproduce the above copyright 971### notice, this list of conditions and the following disclaimer in the 972### documentation and/or other materials provided with the distribution. 973### 3. Neither the name of the team nor the names of its contributors 974### may be used to endorse or promote products derived from this software 975### without specific prior written permission. 976### 977### THIS SOFTWARE IS PROVIDED BY THE TEAM AND CONTRIBUTORS ``AS IS'' AND 978### ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 979### IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 980### PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE TEAM OR CONTRIBUTORS BE 981### LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 982### CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 983### SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR 984### BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 985### WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE 986### OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN 987### IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 988