1# -*-Perl-*-
2################################################################
3###
4###			       Scan.pm
5###
6### Author:  Internet Message Group <img@mew.org>
7### Created: Apr 23, 1997
8### Revised: May 25, 2011
9###
10
11my $PM_VERSION = "IM::Scan.pm version 20161010(IM153)";
12
13package IM::Scan;
14require 5.003;
15require Exporter;
16
17use IM::Config qw(allowcrlf scansbr_file scan_header_pick mail_path address
18		  addresses_regex addrbook_file petname_file);
19use IM::Util;
20use IM::EncDec qw(mime_decode_string);
21use IM::Address qw(extract_addr fetch_addr);
22use IM::Japanese;
23use integer;
24use strict;
25use vars qw(@ISA @EXPORT);
26
27@ISA = qw(Exporter);
28@EXPORT = qw(set_scan_form get_header store_header parse_body parse_header
29	     disp_msg read_petnames);
30
31use vars qw($WIDTH $JIS_SAFE $HEADLINELIMIT $BODYLINELIMIT
32	    $MSTR2NUM @MSTR @WSTR %symbol_table
33	    %multipart_mark @NEEDSAFE %NEEDSAFE_HASH
34	    @STRUCTURED %STRUCTURED_HASH
35	    @HANDLE
36	    %REF_SYMBOL %message_id %message_id_and_subject
37	    %petnames %ADDRESS_HASH
38	    $SI $SO $SS2 $SS3
39	    $ALLOW_CRLF);
40
41############################################
42##
43## Environments
44##
45
46BEGIN {
47    $WIDTH = 80;
48    $JIS_SAFE = 0;
49
50    $HEADLINELIMIT = 100;
51    $BODYLINELIMIT = 30;
52
53    $MSTR2NUM = {
54	Jan => "01", Feb => "02", Mar => "03", Apr => "04",
55	May => "05", Jun => "06", Jul => "07", Aug => "08",
56	Sep => "09", Oct => "10", Nov => "11", Dec => "12",
57    };
58
59    @MSTR = ('Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug',
60	     'Sep', 'Oct', 'Nov', 'Dec');
61
62    @WSTR = ('Sun', 'Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat');
63
64    # used in 'set_scan_form' to convert scan_form() to $EVAL_SCAN_FORM
65    %symbol_table = (
66		     'n' => 'number:',
67		     'd' => 'date:',
68		     'f' => 'from:',
69		     't' => 'to:',
70		     'g' => 'newsgroups',
71		     'a' => 'address:',
72		     'P' => 'pureaddr:',
73		     'A' => 'Address:',
74		     's' => 'subject:',
75		     'i' => 'indent:',
76		     'b' => 'body:',
77		     'm' => 'multipart:',
78		     'S' => 'indent-subject:',
79		     'F' => 'folder:',
80		     'M' => 'mark:',
81		     'p' => 'private:',
82		     'D' => 'duplicate:',
83#		     'B' => 'bytes:',
84		     'K' => 'kbytes:',
85
86		     'y' => 'year:',
87		     'c' => 'month:',
88		     'C' => 'monthstr:',
89		     'e' => 'mday:',
90		     'h' => 'hour:',
91		     'E' => 'min:',
92		     'G' => 'sec:',
93		     );
94
95    %multipart_mark = (
96		       'enc' => 'E',
97		       'sig' => 'S',
98		       );
99
100    @NEEDSAFE = qw(from: to: cc: address: Address:
101		   subject: indent-subject: body:);
102
103    %NEEDSAFE_HASH = ();
104
105    foreach (@NEEDSAFE) {
106	$NEEDSAFE_HASH{$_} = 1;
107    }
108}
109
110############################################
111##
112## If user specifies a scan format, convert that to 'eval-form'.
113##
114
115sub set_scan_form($$$) {
116    my($scan_form, $width, $jis_safe) = @_;
117
118    $ALLOW_CRLF = allowcrlf();
119
120    $WIDTH = $width;
121    $JIS_SAFE= $jis_safe;
122
123    my $scan_hook = scansbr_file();
124    if ($scan_hook =~ /(.+)/) {
125	if ($main::INSECURE) {
126	    im_warn("Sorry, ScanSbr is ignored for SUID root script.\n");
127	} else {
128	    if ($> != 0) {
129		$scan_hook = $1;	# to pass through taint check
130	    }
131	    if (-f $scan_hook) {
132		require $scan_hook;
133	    } else {
134		im_err("scan subroutine file $scan_hook not found.\n");
135	    }
136	}
137    }
138
139    convert_scan_form($scan_form);
140}
141
142############################################
143##
144##   get_header
145##
146
147sub get_header($) {
148    my $path = shift;
149    my %Head = ();
150    my $folder;
151
152    $Head{'path'} = $path;
153    if ($path =~ /(.*)\/([0-9]+)$/) {
154	# xxx how about news?
155	$Head{'number:'} = $2;
156	$folder = substr($1, length(mail_path()) + 1);
157	$folder = conv_iso2022jp($folder) if ($folder =~ /[\200-\377]/);
158	$Head{'folder:'} = '+' . $folder;
159    }
160
161    im_open(\*MSG, "<$path") || return;
162
163    ##
164    ## Collect file attributes
165    ##
166#    $Head{'bytes:'} = -s MSG;
167    $Head{'kbytes:'} = int(((-s MSG) + 1023) / 1024);
168
169    ##
170    ## Header parse
171    ##
172    my $header;
173    if ($ALLOW_CRLF) {
174	$header = <MSG>;
175	if ($header =~ /\r/) {
176	    $/ = "\r\n\r\n";
177	} else {
178	    $/ = "\n\n";
179	}
180	$header .= <MSG>;
181	$header =~ s/\r//g;
182    } else {
183	$/ = "\n\n";
184	$header = <MSG>;
185    }
186    store_header(\%Head, $header);
187
188    ##
189    ## Body parse
190    ##
191    $/ = "\n";
192    $Head{'body:'} = parse_body(*MSG, 0);
193
194    close(MSG);
195
196    parse_header(\%Head);
197
198    return(%Head);
199}
200
201@STRUCTURED = qw (
202	sender from reply-to return-path
203	resent-sender resent-from resent-reply-to
204	errors-to return-receipt-to
205	to cc bcc dcc apparently-to
206	resent-to resent-cc resent-bcc
207);
208
209%STRUCTURED_HASH = ();
210
211foreach (@STRUCTURED) {
212    $STRUCTURED_HASH{$_} = 1;
213}
214
215sub store_header($$) {
216    my($href, $header) = @_;
217    local $_;
218    my $lines = 0;
219
220    chomp($header);
221    $header =~ s/\n[ \t]+/ /g;
222    foreach (split("\n", $header)) {
223	chomp;
224	last if (++$lines > $HEADLINELIMIT);
225	next unless (/^([^:]*):\s*(.*)$/);
226	my $label = lc($1);
227	next if ($label eq 'received');
228	if (defined($href->{$label})) {
229	    if ($STRUCTURED_HASH{$label}) {
230		$href->{$label} .= ", ";
231	    } else {
232		$href->{$label} .= "\n\t";
233	    }
234	    $href->{$label} .= $2;
235	} else {
236	    $href->{$label} = $2;
237	}
238    }
239}
240
241##### BODY parse #####
242#
243# parse_body(HANDLER, mode)
244#        HANDER: Filer Hander or Array
245#        mode: 1 if HANDLER is File Handler, otherwise HANDLER is Array
246#        return value: substring from body
247#
248sub parse_body(*$) {
249    local *HANDLE = shift;
250    my $mode = shift;
251    my($content, $lines) = ('', 0);
252
253    while (1) {
254	if ($mode == 0) {
255	    $_ = <HANDLE>;
256	} else {
257	    $_ = shift(@HANDLE);
258	}
259	last unless defined($_);
260
261	next if /^\s*\n/;
262	next if /^--/;
263	next if /^- --/;
264	next if /^=2D/;
265	next if /^\s+[\w*-]+=/;		# eg. "boundary="; * = RFC2231
266	next if /^\s*[\w-]+: /;		# Headers and header style citation
267	next if /^\s*[>:|\#;\/_}]/;
268	next if /^\s*[[<\/(.]+ *snip/;
269	next if /^   /;
270	next if /^\s*\w+([\'._-]+\w+)*>/;
271	next if /^\s*(On|At) .*[^.!\s\n]\s*$/;
272	next if /(:|;|\/)\s*\n$/;
273	next if /(wrote|writes?|said|says?)[^.!\n]?\s*\n$/;
274	next if /^This is a multi-part message in MIME format/i;
275
276	if (/^\s*In (message|article|mail|news|<|\"|\[|\()/i) {
277	    if ($mode == 0) {
278		$_ = <HANDLE>;
279	    } else {
280		$_ = shift(@HANDLE);
281	    }
282	    last unless defined($_);
283	    next;
284	}
285
286	chomp;
287	s/^\s+//g;
288	s/\s+/ /g;
289	if ($content eq '') {
290	    $content = $_;
291	} else {
292	    $content .= ' ';
293	    $content .= $_;
294	}
295
296	last if (length($content) > $WIDTH);
297	$lines++;
298	last if ($lines > $BODYLINELIMIT);
299    }
300
301    return substr_safe($content, $WIDTH);
302}
303
304sub parse_header($) {
305    my $href = shift;
306
307    ##
308    ## Thread related
309    ##
310    if (($href->{'in-reply-to'})
311	&& ($href->{'in-reply-to'} =~ /.*(<[^<]*>)\s*/))  {
312	$href->{'references:'} = $1;
313    } elsif ($href->{'references'}) {
314	if ($href->{'references'} =~ /.*(<[^<]*>)/) {
315	    $href->{'references:'} = $1;
316	} else {
317	    $href->{'references:'} = $href->{'references'};
318	}
319    }
320
321    ##
322    ## Date
323    ##
324    my $tz;
325    if ($href->{'date'}) {
326	$href->{'date:'} = $href->{'date'};
327    } else {
328	my($sec, $min, $hour, $mday, $mon, $year,
329	   $wday, $yday, $isdst) = localtime((stat($href->{'path'}))[9]);
330	my($gsec, $gmin, $ghour, $gmday, $gmon, $gyear,
331	   $gwday, $gyday, $gisdst) = gmtime((stat($href->{'path'}))[9]);
332
333	my $off = ($hour - $ghour) * 60 + $min - $gmin;
334	if ($year < $gyear) {
335	    $off -= 24 * 60;
336	} elsif ($year > $gyear) {
337	    $off += 24 * 60;
338	} elsif ($yday < $gyday) {
339	    $off -= 24 * 60;
340	} elsif ($yday > $gyday) {
341	    $off += 24 * 60;
342	}
343	if ($off == 0) {
344	    $tz = "GMT";
345	} elsif ($off > 0) {
346	    $tz = sprintf("+%02d%02d", $off/60, $off%60);
347	} else {
348	    $off = -$off;
349	    $tz = sprintf("-%02d%02d", $off/60, $off%60);
350	}
351
352	$href->{'date:'} = sprintf "%s, %d %s %d %02d:%02d:%02d %s",
353			$WSTR[$wday], $mday, $MSTR[$mon], $year + 1900,
354			$hour, $min, $sec, $tz;
355    }
356
357    $href->{'date:'} =~ /(\d\d?)\s+([A-Za-z]+)\s+(\d+)\s/;
358    my($mday, $monthstr, $year) = ($1, "\u\L$2", $3);
359    my $mon = $MSTR2NUM->{$monthstr};
360
361    $href->{'date:'} =~ /\s(\d\d?):(\d\d?)/;
362    my($hour, $min, $sec) = ($1, $2, 0);
363    if ($href->{'date:'} =~ /\s\d\d?:\d\d?:(\d\d?)\s/) {
364	$sec = $1;
365    }
366
367    if ($year < 50) {
368	$year += 2000;
369    } elsif ($year < 1000) {
370	$year += 1900;
371    }
372    $href->{'year:'} = $year;
373    $href->{'month:'} = $mon;
374    $href->{'monthstr:'} = $monthstr;
375    $href->{'mday:'} = $mday;
376    $href->{'hour:'} = $hour;
377    $href->{'min:'} = $min;
378    $href->{'sec:'} = $sec;
379    $href->{'date:'} = sprintf "%02d/%02d", $href->{'month:'}, $href->{'mday:'};
380
381    ##
382    ## MIME decoding
383    ##
384    $href->{'subject:'} = &mime_decode_string($href->{'subject'});
385    $href->{'from:'} = &mime_decode_string($href->{'from'})
386	if $REF_SYMBOL{'from:'};
387    $href->{'to:'} = &mime_decode_string($href->{'to'})
388	if $REF_SYMBOL{'to:'};
389    $href->{'cc:'} = &mime_decode_string($href->{'cc'})
390	if $REF_SYMBOL{'cc:'};
391
392    ##
393    ## Mark
394    ##
395    $href->{'multipart:'} = ' ';
396    if (defined($href->{'mime-version'}) &&
397	defined($href->{'content-type'})) {
398	if ($href->{'content-type'} =~ /Multipart\/(...)/i) {
399	    $href->{'multipart:'} = $multipart_mark{lc($1)} || 'M';
400	} elsif ($href->{'content-type'} =~ /Message\/Partial/i) {
401	    $href->{'multipart:'} = 'P';
402	}
403    }
404
405    ##
406    ## Address related
407    ##
408    if ($REF_SYMBOL{'address:'}) {
409	$href->{'address:'} = friendly_addr($href->{'from'}, 0)
410	    unless ($href->{'address:'});
411    }
412    if ($REF_SYMBOL{'Address:'}) {
413	if (my_addr($href->{'from'})) {
414	    if ($href->{'to'}) {
415	        my $to = &friendly_addr($href->{'to'}, 0);
416		if ($to) {
417		    $href->{'Address:'} = 'To:' . $to;
418		}
419	    } elsif ($href->{'newsgroups'}) {
420		$href->{'Address:'} = 'Ng:' .  $href->{'newsgroups'};
421	    }
422	}
423	$href->{'Address:'} = friendly_addr($href->{'from'}, 0)
424	      unless ($href->{'Address:'});
425    }
426    if ($REF_SYMBOL{'pureaddr:'}) {
427	if (my_addr($href->{'from'})) {
428	    if ($href->{'to'}) {
429		my($to, $rest) = &fetch_addr($href->{'to'}, 1);
430		if ($to) {
431		    $href->{'pureaddr:'} = 'To:' . $to;
432		}
433	    } elsif ($href->{'newsgroups'}) {
434		$href->{'pureaddr:'} = 'Ng:' .  $href->{'newsgroups'};
435	    }
436	}
437	$href->{'pureaddr:'} = &extract_addr($href->{'from'})
438	    unless ($href->{'pureaddr:'});
439    }
440    if (($REF_SYMBOL{'mark:'} || $REF_SYMBOL{'private:'})
441	&& my_addr($href->{'to'}, $href->{'cc'}, $href->{'apparently-to'})) {
442	$href->{'mark:'} = $href->{'private:'} = '*';
443    } else {
444	$href->{'mark:'} = $href->{'private:'} = ' ';
445    }
446
447    if ($::opt_dupchecktarget eq "" or $::opt_dupchecktarget eq "message-id") {
448	if ($href->{'multipart:'} ne 'P'
449	    && $href->{'message-id'} && $message_id{$href->{'message-id'}}++) {
450	    $href->{'mark:'} = $href->{'duplicate:'} = 'D';
451	} else {
452	    $href->{'duplicate:'} = ' ';
453	}
454    }
455    elsif ($::opt_dupchecktarget eq "message-id+subject") {
456	my $t = join(";", $href->{'message-id'}, $href->{'subject'});
457	if ($t ne ";" and $message_id_and_subject{$t}++) {
458	    $href->{'mark:'} = $href->{'duplicate:'} = 'D';
459	}
460	else {
461	    $href->{'duplicate:'} = ' ';
462	}
463    }
464
465    ##
466    ## Call user defined function
467    ##
468    &scan_sub($href) if (defined(&scan_sub));
469}
470
471sub disp_msg($;$) {
472    my($href, $vscan) = @_;
473
474    $href->{'indent:'} = '' unless defined($href->{'indent:'});
475    $href->{'subject:'} = '' unless defined($href->{'subject:'});
476    $href->{'indent-subject:'} = $href->{'indent:'} . $href->{'subject:'};
477
478    binmode(STDOUT);
479
480    if (defined &my_get_msg) {
481	print &my_get_msg($href), "\n";
482	flush('STDOUT') unless $main::opt_buffer;
483	return;
484    } elsif (defined(&scan_form)) {
485	my $content = &scan_form($href);
486	$content =~ s/\t/ /g;
487	if ($vscan) {
488	    print &substr_safe($content, $WIDTH - 1),
489	    "\r $href->{'folder:'} $href->{'pnum'}\n";
490	} else {
491	    print &substr_safe($content, $WIDTH - 1), "\n";
492	}
493	flush('STDOUT') unless $main::opt_buffer;
494	return;
495    } else {
496	im_err("no scan_form specified.\n");
497    }
498}
499
500############################################
501##
502## Convert into Friendly Address
503##
504
505sub friendly_addr($$) {
506    my($addr, $need_addr) = @_;
507    return '' unless $addr;
508    my $friendly = '';
509    my($a, $f, $p);
510    while (($a, $addr, $f) = &fetch_addr($addr, 1), $a ne '') {
511	$a =~ s/\/[^@]*//;
512	if (%petnames && $petnames{lc($a)}) {
513	    $p = $petnames{lc($a)};
514	} elsif (!$need_addr && $f) {
515	    $p = &mime_decode_string($f);
516	} else {
517	    $p = $a;
518	}
519	if ($friendly eq '') {
520	    $friendly = $p;
521	} else {
522	    $friendly .= ', ' . $p;
523	}
524    }
525    return $friendly;
526}
527
528############################################
529##
530## Read petnames entry
531##
532
533%ADDRESS_HASH = ();
534
535sub my_addr(@) {
536    my @addrs = @_;
537    my $addr;
538
539    unless (defined($ADDRESS_HASH{'init'})) {
540	$ADDRESS_HASH{'addr'} = addresses_regex();
541	unless ($ADDRESS_HASH{'addr'}) {
542	    $ADDRESS_HASH{'addr'} = '^' . quotemeta(address()) . "\$";
543	    $ADDRESS_HASH{'addr'} =~ s/(\\\s)*\\,(\\\s)*/\$|\^/g;
544	}
545	$ADDRESS_HASH{'init'} = 1;
546    }
547    return 0 if ($ADDRESS_HASH{'addr'} eq "");
548    foreach $addr (@addrs) {
549	my $a;
550	while (($a, $addr) = fetch_addr($addr, 1), $a ne "") {
551	    return 1 if ($a =~ /$ADDRESS_HASH{'addr'}/io);
552	}
553    }
554    return 0;
555}
556
557############################################
558##
559## Convert scan_form() to 'eval-form'
560##
561
562sub convert_scan_form($) {
563    my $SCANFORM = shift;
564
565    if (!$main::INSECURE && $SCANFORM && $SCANFORM !~ /%/) {
566	do $SCANFORM; # -- require $SCAN_FORM; (sub scan_form)
567	return if defined(&scan_form);
568    }
569
570    my @symbols = ();
571    my($format, $jis_safe, $plus, $hyphen, $size, $type, $arg);
572
573    if (scan_header_pick()) {
574	my $elem;
575	foreach $elem (split /,/, scan_header_pick()) {
576	    if ($elem =~ /^([a-zA-Z]+):(.*)$/) {
577		$symbol_table{$1} = "$2";
578	    }
579	}
580    }
581
582    while ($SCANFORM ne '') {
583	if ($SCANFORM =~ /^%(!?)(\+?)(-?)(\d*)([a-zA-Z]|{\w+})(.*)/) {
584	    $plus = $2;
585	    $hyphen = $3;
586	    $size = $4;
587	    $type = $5;
588	    $SCANFORM = $6;
589
590	    $type =~ s/{(.*)}/$1/;
591	    if ($type eq 'n') {
592		if ($SCANFORM =~ /^ / ||
593		    $SCANFORM =~ /^%D/ || $SCANFORM =~ /^%p/ ||
594		    $SCANFORM =~ /^%M/) {
595		    # OK
596		} else {
597		im_err("Characters in Scan form after %n should be a space or %D or %p or %M\n");
598	        }
599	    }
600
601	    $jis_safe = ($size ne '' && $size > 0
602			 && ($1 ne '' || $NEEDSAFE_HASH{$symbol_table{$type}}))
603		? $JIS_SAFE : 0;
604
605	    $arg = '$href->{\'' . $symbol_table{$type} . '\'}';
606	    $arg = "&substr_safe(sprintf('%${hyphen}${size}s', $arg), $size)"
607		if ($jis_safe && !$plus);
608
609	    push(@symbols, $arg);
610	    $REF_SYMBOL{$symbol_table{$type}} = 1;
611
612	    if ($size =~ /^0/) { # numerical context
613		$format .= "%${hyphen}${size}d";
614	    } else {
615		if ($jis_safe || $plus || $size eq '') {
616		    $format .= "%${hyphen}${size}s";
617		} else {
618		    $format .= "%${hyphen}${size}.${size}s";
619		}
620	    }
621	} elsif ($SCANFORM =~ /^([^%]+)(.*)/) {
622	    $format .= $1;
623	    $SCANFORM = $2;
624	    next;
625	} else {
626	    im_warn("invalid scan format: $SCANFORM\n");
627	    return;
628	}
629    }
630
631    $arg  = join(',', @symbols);
632    my $EVAL_SCAN_FORM = "sprintf('$format', $arg)";
633    eval "sub scan_form { my(\$href) = shift; $EVAL_SCAN_FORM }";
634    if ($@) {
635	im_die("Form seems to be wrong.\nPerl error message is: $@");
636    }
637}
638
639############################################
640##
641## Substring in Safe Manner
642## fill up spaces to specified '$len' when length doesn't reach that.
643##
644
645BEGIN {
646     $SI = "\x0f";		# Shift In Sequence
647     $SO = "\x0e";		# Shift Out Sequence
648     # for ISO-2022-CN
649     $SS2 = "\x1b\x4e";		# <ISO 2022 Single_shift two>
650     $SS3 = "\x1b\x4f";		# <ISO 2022 Single_shift three>
651}
652
653sub substr_safe($$) {
654    ($_, my $len) = @_;
655
656    # This hack makes the code a few percent faster but it's kinda ugly.
657    # Do you want leave it?
658    if (1) {
659	unless (/[^\s!-~]/) {
660	    return pack("A$len", $_);
661	}
662    }
663
664    my $i = 0;			# Current Index of this string
665    my $count = 0;		# Readable Characters
666    my $charset = 'ascii';	# Current Character Set
667    my @res = ();		# Output Result
668    my $fill_char = ' ';	# Fill Spaces up to specified length
669    my $last_char = '';		# Extra Characters in double-byte-segment
670    my $shift_in = '';		# Return code to shift in
671    my $G0 = 'ascii';		# Buffer G0
672    my $G1 = '';		# Buffer G1
673    my $G2 = '';		# Buffer G2
674    my $G3 = '';		# Buffer G3
675
676    while (length($_) && $count < $len) {
677
678	   if (s/(^$SI)//o)	{ $charset = $G0; }
679	elsif (s/(^$SO)//o)	{ $charset = $G1; $shift_in = $SI; }
680	elsif (s/(^$SS2)//o)	{ $charset = $G2; $shift_in = $SI; }
681	# This is verbose if SS3 appears only in ISO-2022-CN-EXT
682	elsif (s/(^$SS3)//o)	{ $charset = $G3; $shift_in = $SI; }
683
684	elsif (m/(^[^\e$SI$SO]+)/o) {
685	    my $room = $len - $count;
686	    my $matched_len = length($1);
687	    my $avail;
688
689	    # XXX: Should be parameterized.
690	    if ($charset =~ /(^cns11643-plane-2)/) {
691		$avail = int(length($1) / 3) * 2;
692	    } else {
693		$avail = length($1);
694	    }
695
696	    if ($avail >= $room) {
697		my $i;
698
699		if ($room % 2 and $charset =~
700		    /^(jisx0208|jisx0212|jisx0213|ksc5601|cns11643-plane-2|big5-1|big5-2)/) {
701		    $room--;
702		    $last_char = ' ';
703		}
704		if ($charset =~ /^cns11643-plane-2/) {
705		    $i = $room * 3 / 2;
706		} else {
707		    $i = $room;
708		}
709		$count = $len;
710		push(@res, substr($_, 0, $i));
711		last;
712	    }
713	    $count += $avail;
714	    push(@res, substr($_, 0, $matched_len));
715	    substr($_, 0, $matched_len) = '';
716	    next;
717	}
718
719	# for Japanese Character in rfc1554
720	elsif (s/(^\e\(B)//)	{ $G0 = $charset = 'ascii'; }
721	elsif (s/(^\e\$\@)//)	{ $G0 = $charset = 'jisx0208-1978'; }
722	elsif (s/(^\e\$\(?B)//)	{ $G0 = $charset = 'jisx0208-1983'; }
723	elsif (s/(^\e\(J)//)	{ $G0 = $charset = 'jisx0201-roman'; }
724	elsif (s/(^\e\$\(?A)//)	{ $G0 = $charset = 'gb2312-1980'; }
725	elsif (s/(^\e\$\(D)//)	{ $G0 = $charset = 'jisx0212-1990'; }
726	elsif (s/(^\e\$\(C)//)	{ $G1 = $charset = 'ksc5601-1987';
727				  $G0 = 'ascii'; }
728
729	elsif (s/(^\e\$\(O)//)	{ $G0 = $charset = 'jisx0213-1'; }
730	elsif (s/(^\e\$\(P)//)	{ $G0 = $charset = 'jisx0213-2'; }
731
732	elsif (s/(^\e-A)//)	{ $G1 = $charset = 'iso8859-1'; }
733	elsif (s/(^\e-B)//)	{ $G1 = $charset = 'iso8859-2'; }
734	elsif (s/(^\e-C)//)	{ $G1 = $charset = 'iso8859-3'; }
735	elsif (s/(^\e-D)//)	{ $G1 = $charset = 'iso8859-4'; }
736	elsif (s/(^\e-L)//)	{ $G1 = $charset = 'iso8859-5'; }
737	elsif (s/(^\e-G)//)	{ $G1 = $charset = 'iso8859-6'; }
738	elsif (s/(^\e-F)//)	{ $G1 = $charset = 'iso8859-7'; }
739	elsif (s/(^\e-H)//)	{ $G1 = $charset = 'iso8859-8'; }
740	elsif (s/(^\e-M)//)	{ $G1 = $charset = 'iso8859-9'; }
741
742	elsif (s/(^\e\.A)//)	{ $G2 = $charset = 'iso8859-1'; }
743	elsif (s/(^\e\.F)//)	{ $G2 = $charset = 'iso8859-7'; }
744
745	# for Korean Character in rfc1557
746	elsif (s/(^\e\$\)C)//)	{ $G1 = $charset = 'ksc5601';
747				  $G0 = 'ascii'; }
748
749	# for Chinese Character in rfc1922
750	elsif (s/(^\e\$\)A)//)	{ $G1 = $charset = 'gb2312';
751				  $G0 = 'ascii'; }
752	elsif (s/(^\e\$\)G)//)	{ $G1 = $charset = 'cns11643-plane-1';
753				  $G0 = 'ascii'; }
754	elsif (s/(^\e\$\*H)//)	{ $G2 = $charset = 'cns11643-plane-2';
755				  $G0 = 'ascii';}
756
757	elsif (s/(^\e\$\(0)//)	{ $G0 = $charset = 'big5-1';}
758	elsif (s/(^\e\$\(1)//)	{ $G0 = $charset = 'big5-2';}
759
760	elsif (s/(^\e)//) {
761	    ;
762	}
763	else {
764	    die "panic";
765	}
766	push(@res, $1);
767    }
768
769    join ('',
770	@res,
771	($G0 ne 'ascii') ? "\e(B" : '',
772	$shift_in,
773	$last_char,
774	$fill_char x ($len - $count),
775    );
776}
777
778############################################
779##
780## Read petnames entry
781##
782
783sub w2n($) {
784    my $line = shift;
785    $line =~ tr/\x20/\x0/;
786
787    return $line;
788}
789
790sub read_petnames() {
791    if (addrbook_file() && open(ADDRBOOK, addrbook_file())) {
792	my $key; my $addr; my $petname; my $a; my @addrs;
793	my $code;
794
795	while (<ADDRBOOK>) {
796	    my $line = '';
797	    do {
798		chomp;
799		next if (/^[\#;]/);
800		$code = code_check($_, 0);
801		if ($code eq 'sjis') {
802		    $_ = conv_euc_from_sjis($_);
803		} elsif ($code eq 'jis') {
804		    $_ = conv_euc_from_jis($_);
805		}
806		s/#.*$//g;
807		$line =~ s/\\$//;
808		$line .= $_;
809	    } while (/[,\\]$/ && defined($_ = <ADDRBOOK>));
810	    $_ = $line;
811	    s/"([^"]+)"/w2n($1)/geo;  #"
812	    s/,\s+/,/g;
813	    if (s/^(\S+)\s+(\S+)\s+(\S+)//) {
814		$key = $1;
815		$addr = $2;
816		$petname = $3;
817		next if ($key =~ /:$/);
818	        next if $petname eq '*';
819	    } else {
820		next;
821	    }
822	    $petname =~ tr/\x0/\x20/;
823            $petname = conv_iso2022jp($petname, 'EUC');
824
825	    @addrs = split(/,\s*/, $addr);
826	    while ($addr = shift(@addrs)) {
827	        $petnames{lc($addr)} = $petname;
828	    }
829	}
830	close(ADDRBOOK);
831	return;
832    }
833    my $file = petname_file();
834    return unless $file;
835    unless (open(PETNAMES, $file)) { ## don't use im_open().
836	im_warn("can't open petname file $file\n");
837	return;
838    }
839    while (<PETNAMES>) {
840	next if (/^$/);
841	next if (/^#/);
842	chomp;
843	my($name, $petname);
844	if (/(\S+)\s+(.*)/) {
845	    $name = $1;
846	    $petname = $2;
847	}
848	$petname =~ s/^"(.*)"$/$1/;
849	$petnames{lc($name)} = $petname;
850    }
851    close(PETNAMES);
852}
853
8541;
855
856__END__
857
858=head1 NAME
859
860IM::Scan - scan listing from mail/news message
861
862=head1 SYNOPSIS
863
864 use IM::Scan;
865
866 &set_scan_form($scan_form, $width, $use_jis);
867 &read_petnames();
868 %Head = &get_header($mail_file);
869 &disp_msg(\%Head);
870
871=head1 DESCRIPTION
872
873The I<IM::Scan> module handles scan format and petnames format
874for mail/news message.
875
876This modules is provided by IM (Internet Message).
877
878=head1 FILES
879
880 $HOME/.im/Config	the user profile
881
882=head1 PROFILE COMPONENTS
883
884 Component     Explanation                     Example
885
886 MailDir:      your mail directory             Mail
887 Width:        one line width                  80
888 JisSafe:      safely substr for ISO-2022-JP   on
889 Form:         scan format                     %+5n %m%d %8f %-30S %b
890 PetnameFile:  nickname file			~/.im/Petname
891 Address:      your mail addresses             kazu@mew.org, kazu@wide.ad.jp
892 AddrRegex:    regexp of your addresses        ^kazu@.*$
893               if necessary
894
895=head1 SCAN FORMAT
896
897'%{width}{header-type}' format is available. You can define any
898header-type as you want. Default valid header-types are
899
900    %n    message number
901    %d    raw Date: field
902    %f    MIME decoded From: field
903    %t    MIME decoded To: filed
904    %g    raw Newsgroups: field
905    %a    friendly From: field
906    %A    If this message is originated by yourself, friendly To:
907          or raw Newsgroups: is displayed in 'To:xxx' or 'Ng:xxx'
908          format, respectively. Otherwise, friendly From: field is
909          displayed.
910    %P    Similar to %A, but display raw address of mail sender
911          instead of friendly From: field, just like mh-e.
912    %i    indent to display thread
913    %s    MIME decoded Subject: field
914    %S    indented MIME decoded Subject (same as %i+%s)
915    %b    a part of body extracted with heuristic
916    %m    Multipart type
917              'S'igned, 'E'ncrypt, 'M'ultipart, 'P'artial or none
918    %p    mark '*' if the message is destined to you
919    %D    mark 'D' if the message is duplicated
920    %M    %p+%D
921    %F    folder path
922    %K    file block size (1024 bytes/block)
923
924    %y    year
925    %c    month (digit)
926    %C    month (string)
927    %e    mday
928    %h    hour
929    %E    min
930    %G    sec
931
932{width} is a integer with/without '-' sign. if a '-' sign exists, content
933of a header-type will be displaied with left adjustment. If the integer
934have leading '0', the field will be padded with leading '0's.
935
936To improve processing speed, needless process on JIS character should be
937avoided. Even if 'JisSafe' is on, only %f, %t, %A, %s, %S and %b are
938processed with 'substr' routine for JIS characters by default. If you want
939to process other header-types with JIS version of 'substr', specify '!'
940just after '%' like: %!-8S.
941
942ScanForm "%+5n %m%d %-14A %-18S %b" works as same as IM default scaning.
943
944=head1 PETNAMES FORMAT
945
946Following format is valid in petnames file.
947A line beginning with '#' is ignored.
948
949    # This is comments
950    Kazu@Mew.org      "Mr.Kazu"
951    nom@Mew.org       "Nomsun"
952
953=head1 COPYRIGHT
954
955IM (Internet Message) is copyrighted by IM developing team.
956You can redistribute it and/or modify it under the modified BSD
957license.  See the copyright file for more details.
958
959=cut
960
961### Copyright (C) 1997, 1998, 1999 IM developing team
962### All rights reserved.
963###
964### Redistribution and use in source and binary forms, with or without
965### modification, are permitted provided that the following conditions
966### are met:
967###
968### 1. Redistributions of source code must retain the above copyright
969###    notice, this list of conditions and the following disclaimer.
970### 2. Redistributions in binary form must reproduce the above copyright
971###    notice, this list of conditions and the following disclaimer in the
972###    documentation and/or other materials provided with the distribution.
973### 3. Neither the name of the team nor the names of its contributors
974###    may be used to endorse or promote products derived from this software
975###    without specific prior written permission.
976###
977### THIS SOFTWARE IS PROVIDED BY THE TEAM AND CONTRIBUTORS ``AS IS'' AND
978### ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
979### IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
980### PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE TEAM OR CONTRIBUTORS BE
981### LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
982### CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
983### SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
984### BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
985### WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
986### OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
987### IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
988