1#!/usr/bin/perl
2
3#  edbrowse: line editor/browser
4
5use IO::Handle;
6use IO::Socket;
7use Time::Local;
8
9=head1 Author
10
11	Karl Dahlke
12	eklhad@gmail.com
13
14=HEAD1 Current Maintainer
15
16Chris Brannon
17	maintainers@edbrowse.org
18	http://edbrowse.org
19
20=head1 Copyright Notice
21
22This program is copyright (C) (C) Karl Dahlke, 2000-2003.
23It is made available, by the author, under the terms of the General Public License (GPL),
24as articulated by the Free Software Foundation.
25It may be used for any purpose, and redistributed,
26provided this copyright notice is included.
27
28=head1 Redirection
29
30This program, and its associated documentation, are becoming quite large.
31Therefore the documentation has been moved to a separate html file.
32Please visit:
33
34http://edbrowse.org/usersguide.html
35
36Realize that this documentation covers the C version of edbrowse.  Development
37of the Perl version stopped years ago, and there have been significant changes.
38
39If you have lynx on hand, you can run:
40
41lynx -dump http://edbrowse.org/usersguide.html > usersguide.txt
42
43If you are using lynx to download the actual program, do this:
44
45lynx -source 'http://edbrowse.org/edbrowse.pl' > edbrowse.pl
46
47=cut
48
49
50$version = "1.5.17";
51@agents = ("edbrowse/$version");
52$agent = $agents[0];
53
54
55#  It's tempting to let perl establish the global variables as you go.
56#  Let's try not to do this.
57#  That's where all the side effects are - that's where the bugs come in.
58#  Below are the global variables, with some explanations.
59
60$debug = 0;  # general debugging
61$errorExit = 0;
62$ismc = 0;  # is mail client
63$zapmail = 0;  # just get rid of the mail
64$maxfile = 40000000;  # Max size of an editable file.
65$eol = "\r\n";  # end-of-line, as far as http is concerned
66$doslike = 0;  # Is it a Dos-like OS?
67$doslike = 1 if $^O =~ /^(dos|win|mswin)/i;
68$errorMsg = "";  # Set this if the last operation produced an error.
69$inglob = 0;  # Are we in global mode, under a g// operation?
70$onloadSubmit = 0;
71$inscript = 0;  # plowing through javascript
72$filesize = 0;  # size of file just read or written
73$global_lhs_rhs = 0;  # remember lhs and rhs across sessions
74$caseInsensitive = 0;
75#  Do we send crnl or nl after the lines in a text buffer?
76#  What is the standard - I think it's DOS newlines.
77$textAreaCR = 1;
78$pdf_convert = 1;  # convert pdf to html
79$fetchFrames = 1;  # fetch the frames into a web page
80$allsub = 0;  # enclose all superscripts and subscripts
81$allowCookies = 1; # allow all cookies.
82%cookies = ();  # the in-memory cookie jar
83%authHist = ();  # authorization strings by domain
84$authAttempt = 0;  # count authorization attempts for this page
85$ssl_verify = 1; # By default we verify all certs.
86$ssl = undef;  # ssl connection
87$ctx = undef;  # ssl certificate
88$allowReferer = 1; # Allow referer header by default.
89$referer = "";  # refering web page
90$reroute = 1;  # follow http redirections to find the actual web page
91$rerouteCount = 0;  # but prevent infinite loops
92%didFrame = ();  # which frames have we fetched already
93$passive = 1; # ftp passive mode on by default.
94$nostack = 0;  # suppress stacking of edit sessions
95$last_z = 1;  # line count for the z command
96$endmarks = 0;  # do we print ^ $ at the start and end of lines?
97$subprint = 0;  # print lines after substitutions?
98$delprint = 0;  # print line after delete
99$dw = 0;  # directory write enabled
100$altattach = 0;  # attachments are really alternative presentations of the same email
101$do_input = 0;  # waiting for the next input from the tty
102$intFlag = 0;  # control c was hit
103$intMsg = "operation interrupted";
104
105#  Interrupt handler, for control C.
106#  Close file handle if we were reading from disk or socket.
107sub intHandler()
108{
109$intFlag = 1;
110if($do_input) {
111print "\ninterrupt, type qt to quit completely\n";
112return;
113}
114#  Reading from an http server.
115close FH if defined FH;
116# Kill ftp data connection if open.
117close FDFH if defined FDFH;
118#  and mail connection or ftp control connection
119close SERVER_FH if defined SERVER_FH;
120# And listening ftp socket.
121close FLFH if defined FLFH;
122exit 1 if $ismc;
123}  # intHandler
124
125$SIG{INT} = \&intHandler;
126
127#  A quieter form of die, without the edbrowse line number, which just confuses people.
128sub dieq($)
129{
130my $msg = shift;
131print "fatal: $msg\n";
132exit 1;
133}  # dieq
134
135@weekDaysShort = ("Sun","Mon","Tue","Wed","Thu","Fri","Sat");
136@monthsShort = ("Jan","Feb","Mar","Apr","May","Jun","Jul","Aug","Sep","Oct","Nov","Dec");
137sub mailTimeString()
138{
139my ($ss, $nn, $hh, $dd, $mm, $yy, $wd) = localtime time;
140my $wds = $weekDaysShort[$wd];
141my $mths = $monthsShort[$mm];
142return sprintf "%s, %02d %s %d %02d:%02d:%02d",
143$wds, $dd, $mths, $yy+1900, $hh, $nn, $ss;
144}  # mailTimeString
145
146#  ubackup is set when the command has changed something.
147#  The previous text, stored in the save_* variables,
148#  is copied into the last* variables.  If you type u,
149#  the last* variables and current variables are swapped.
150$ubackup = 0;
151
152#  Did we successfully read the edbrowse config file?
153#  If so, set some variables.
154$myname = $annoyFile = $junkFile = $addressFile = "";
155%adbook = ();
156$adbooktime = 0;
157@inmailserver = ();  # list of pop3 servers
158$mailDir = "";
159$localMail = -1;
160$whichMail = 0;  # which account to use
161$smMail = "";
162$naccounts = 0;  # number of pop accounts
163$outmailserver = "";  # smtp
164$smtplogin = "";  # smtp login
165my $mailToSend = "";
166@pop3login = ();
167@pop3password = ();
168@replyAddress = ();
169@fromSource = ();
170@fromDest = ();
171$serverLine = "";  # line received from mail or ftp server
172
173#  web express configuration variables and arrays.
174%shortcut = ();
175%commandList = ();
176%commandCheck = ();
177$currentShortcut = "";
178$currentCommandList = "";
179
180#  Specify the start and end of a range for an operation.
181#  1,3m5 will set these variables to 1, 3, and 5.
182$startRange = $endRange = $dest = 0;
183
184#  The input command, but only the one-letter commands.
185$icmd = "";
186#  Now the command that is actually executed is in $cmd.
187#  This is usually the same as $icmd, but not always.
188#  8i becomes 7a, for instance.
189$cmd = "";
190#  The valid edbrowse commands.
191$valid_cmd = "aAbBcdefghHiIjJklmnpqrsStuvwz=^@<";
192#  Commands that can be done in browse mode.
193$browse_cmd = "AbBdefghHIjJklmnpqsuvwz=^@<";
194#  Commands for directory mode.
195$dir_cmd = "AbdefghHklnpqsvwz=^@<";
196#  Commands that work at line number 0, in an empty file.
197$zero_cmd = "aAbefhHqruwz=^@<";
198#  Commands that expect a space afterward.
199$spaceplus_cmd = "befrw";
200#  Commands that should have no text after them.
201$nofollow_cmd = "aAcdhHijlmnptu=";
202#  Commands that can be done after a g// global directive.
203$global_cmd = "dIjJlmnpst";
204#  Show the error message, not just the question mark, after these commands.
205$showerror_cmd = "Abefqrw^@";
206$helpall = 0;  #  show the error message all the time
207
208#  Remember that two successive q's will quit the session without changes.
209#  here we must track which session, by number, you were trying to quit.
210$lastq = $lastqq = -1;
211
212#  For any variable x, there are usually multiple copies of x, one per session.
213#  These are housed in an array @x.
214#  In contrast, the variable $x holds $x[$context],
215#  according to the current context.
216#  I hope this isn't too confusing.
217$context = 0;
218
219#  dot and dol, current and last line numbers.
220@dot = (0);
221$dot = $dot[0];
222@dol = (0);
223$dol = $dol[0];
224@factive = (1);  # which sessions are active
225#  Retain file names, and whether the text has been modified.
226@fname = ("");
227$fname = $fname[0];
228$baseref = "";  # usually the same as $fname
229@fmode = (0);  # file modes
230$fmode = $fmode[0];
231$binmode = 1;  # binary file
232$nlmode = 2;  # newline apended
233$browsemode = 4;  # browsing html text
234$changemode = 8;  # something has changed in this file
235$dirmode = 16;  # directory mode
236$firstopmode = 32;  # first operation issued - undo is possible
237$nobrowse = "not in browse mode";  # common error message
238$nixbrowse = "command not available in browse mode";
239$nixdir = "command not available in directory mode";
240
241sub dirBrowseCheck($)
242{
243my $cmd = shift;
244$fmode&$browsemode and $errorMsg = "$cmd $nixbrowse", $inglob = 0, return 0;
245$fmode&$dirmode and $errorMsg = "$cmd $nixdir", $inglob = 0, return 0;
246return 1;
247}  # dirBrowseCheck
248
249#  retain base directory name when scanning a directory
250@dirname = ("");
251$dirname = $dirname[0];
252
253#  Remember substitution strings.
254@savelhs = ();  # save left hand side
255$savelhs = $savelhs[0];
256@saverhs = ();  # save right hand side
257$saverhs = $saverhs[0];
258
259#  month hash, to encode dates.
260%monhash =
261(jan => 1, feb => 2, mar => 3, apr => 4, may => 5, jun => 6,
262jul => 7, aug => 8, sep => 9, oct => 10, nov => 11, dec => 12);
263
264$home = $ENV{HOME};
265defined $home and length $home or
266dieq 'home directory not defined by $HOME.';
267-d $home or
268dieq "$home is not a directory.";
269
270#  Establish the trash bin, for deleted files.
271$rbin = "$home/.trash";
272if(! -d $rbin) {
273$rbin = "" unless mkdir $rbin, 0700;
274}
275#  Config file for this browser.
276#  Sample file is available at http://edbrowse.org/sample.perl.ebrc
277$rcFile = "$home/.ebprc";
278#  Last http header, normally deleted before you read the web page.
279$ebhttp = "$rbin/eb.http";
280truncate $ebhttp, 0;
281#  When we need a temp file.
282$ebtmp = "$rbin/eb.tmp";
283# A file containing SSL certificates in PEM format, concatinated together.
284# This will be used for certificate verification.
285$ebcerts = "$home/.ssl-certs";
286#  file for persistant cookies.
287$ebcooks = "$home/.cookies";
288sub fillJar() ;
289fillJar();  # fill up that cooky jar
290
291#  Let's see if we can read the config file?
292if(open FH, $rcFile) {
293my $sort = 0;
294while(<FH>) {
295s/^\s+//;
296s/^#.*$//;
297next if /^$/;
298s/\s+$//;
299my ($server, $login, $passwd, $retpath, $key, $value);
300if(/^([^:\s]+)\s*:\s*([^:\s]+)\s*:\s*([^:\s]+)\s*:\s*([^:\s]+)\s*:\s*([^:\s]*)/) {
301($server, $login, $passwd, $retpath) = ($1, $2, $3, $4);
302my $smtpbox = $5;
303if($server =~ s/^\*\s*//) {
304dieq "multiple accounts are marked as local, with a star." if $localMail >= 0;
305$localMail = $naccounts;
306$smtpbox = $server unless length $smtpbox;
307$outmailserver = $smtpbox;
308$smtplogin = $login;
309}
310$inmailserver[$naccounts] = $server;
311$pop3login[$naccounts] = $login;
312$pop3password[$naccounts] = $passwd;
313$replyAddress[$naccounts] = $retpath;
314++$naccounts;
315next;
316}  # describing a mail server
317
318#  Now look form keyword = string.
319#  Initial < is shorthand for cmd =
320s/^\</cmd =/;
321if(/^([^=]+)=\s*(.+)/) {
322$key = $1;
323$value = $2;
324$key =~ s/\s+$//;
325$myname = $value, next if $key eq "fullname";
326$addressFile = $value, next if $key eq "addressbook";
327$junkFile = $value, next if $key eq "junkfile";
328$annoyFile = $value, next if $key eq "annoyfile";
329$mailDir = $value, next if $key eq "cd";
330
331if($key eq "from") {
332if($value =~ /^\s*([^\s>]+)\s*>\s*(.+)$/) {
333push @fromSource, lc $1;
334push @fromDest, $2;
335next;
336}
337dieq "from filter \"$value\" does not look like \"emailAddress > file\".";
338}  # from
339
340if($key eq "agent") {
341push @agents, $value;
342next;
343}  # agent
344
345#  web express keywords
346if($key eq "shortcut") {
347if(length $currentShortcut and ! defined $shortcut{$currentShortcut}{url}) {
348dieq "shortcut $currentShortcut has not been assigned a url";
349}
350$value =~ /^[\w-]+$/ or dieq "the name of a shortcut must consist of letters digits or dashes, $value is invalid";
351$currentShortcut = $value;
352#  Start out with no post processing commands.
353$shortcut{$value}{after} = [];
354$shortcut{$value}{sort} = sprintf "%04d", $sort;
355++$sort;
356$currentCommandList = "";
357next;
358}  # shortcut
359if($key eq "cmdlist") {
360if(length $currentShortcut and ! defined $shortcut{$currentShortcut}{url}) {
361dieq "shortcut $currentShortcut has not been assigned a url";
362}
363$currentShortcut = "";
364my $check = 0;
365$check = 1 if $value =~ s/^\+//;
366$value =~ /^[\w-]+$/ or dieq "the name of a command list must consist of letters digits or dashes, $value is invalid.";
367$currentCommandList = $value;
368$commandList{$value} = [];
369$commandCheck{$value} = $check;
370next;
371}  # cmdlist
372if($key eq "cmd") {
373length $currentShortcut or length $currentCommandList or
374dieq "postprocessing command is not part of a command list or shortcut";
375my $cref;  # command reference
376$cref = $shortcut{$currentShortcut}{after} if length $currentShortcut;
377$cref = $commandList{$currentCommandList} if length $currentCommandList;
378#  is this a command list?
379if($value =~ /^[a-zA-Z_-]+$/ and defined $commandList{$value}) {
380my $cpush = $commandList{$value};
381push @$cref, @$cpush;
382} else {
383push @$cref, $value;
384}
385next;
386}  # cmd
387if($key eq "url") {
388length $currentShortcut or dieq "$key command without a current shortcut";
389$shortcut{$currentShortcut}{url} = $value;
390next;
391}  # url
392if($key eq "desc") {
393length $currentShortcut or dieq "$key command without a current shortcut";
394$shortcut{$currentShortcut}{desc} = $value;
395next;
396}  # desc
397
398dieq "Unrecognized keyword <$key> in config file.";
399}
400
401dieq "garbled line <$_> in config file.";
402}  # loop over lines in config file
403close FH;
404
405if(length $currentShortcut and ! defined $shortcut{$currentShortcut}{url}) {
406dieq "shortcut $currentShortcut has not been assigned a url";
407}
408
409if($naccounts) {
410$localMail = 0 if $naccounts == 1;
411dieq "None of the pop3 accounts is marked as local." if $localMail < 0;
412dieq "fullname not specified in the config file." if ! length $myname;
413}  # mail accounts
414}  # open succeeded
415
416#  One array holds all the lines of text (without the newlines)
417#  for all the files in all the sessions.
418#  Within a given session, the actual file is represented by a list of numbers,
419#  indexes into this large array.
420#  Note that when text is copied, we actually copy the strings in the array.
421#  I could just have different lines use the same index, thus pointing to the
422#  same string, and there would be no need to copy that string,
423#  but then I'd have to maintain reference counts on all these strings,
424#  and that would make the program very messy!
425@text = ();
426
427#  If a file has 30 lines, it is represented by 30 numbers,
428#  indexes into @text above.
429#  Should we use an array of numbers, or a string of numbers
430#  represented by decimal digits?
431#  Both are painful, in different ways.
432#  Consider inserting a block of text, a very common operation.
433#  In a list, we would have to slide all the following numbers down.
434#  Granted, that's better than copying all those lines of text down,
435#  but it's still a pain to program, and somewhat inefficient.
436#  If we use strings, we take the original string of numbers,
437#  break it at the insert point, and make a new string
438#  by concatenating these two pieces with the new block.
439#  The same issues arise when deleting text near the top of a file.
440#  This and other considerations push me towards strings.
441#  I currently use 6 characters for a line number, and a seventh for the g// flag.
442$lnwidth = 7;  # width of a line number field in $map
443$lnwidth1 = $lnwidth - 1;
444$lnformat = "%6d ";
445$lnspace = ' ' x $lnwidth;
446$lnmax = 999999;
447#  Note that line 0 never maps to anything in @text.
448@map = ($lnspace);
449$map = $map[0];
450#  The 26 labels, corresponding to the lower case letters.
451#  These are stored in a packed string, like $map above.
452#  labels also holds the filetype suffixes when in directory mode.
453@labels = ($lnspace x 26);
454$labels = $labels[0];
455# offset into $labels, where directory suffixes begin.
456$dirSufStart = 26 * $lnwidth;
457
458#  The anchor/form/input tags, for browsing.
459#  The browse tags are in an array of hashes.
460#  Each hash has tag=tagname,
461#  and attrib=value for each attrib=value in the tag.
462#  Be advised that certain special tags, such as those defining
463#  title and description and keywords, are placed in btag[0].
464@btags = ();
465$btags = $btags[0];
466
467#  When we focus on an input field, for edit or manipulation,
468#  we need its type, size, and list of options.
469$inf = "";  # current text displayed by this input field.
470$itype = "";  # Type of the input field.
471$isize = 0;  # size of the input field.
472$iopt = {};  # hash of input options in a discrete list.
473$irows = $icols = 0;  # for a text area window.
474$iwrap = "";  # Can we scroll beyond this window?
475$itag = undef;  # the input tag from which the previous variables were derived.
476$iline = 0;  # line where this input field was found.
477$ifield = 0;  # field number, within the line, the nth input field on the line.
478$itagnum = 0;  # tag number for this input field.
479$inorange = "this input directive cannot be applied to a range of lines";
480$inoglobal = "this input directive cannot be applied globally";
481
482#  last* and save* variables mirror the variables that define your session.
483#  This supports the undo command.
484$lastdot = $savedot = $lastdol = $savedol = 0;
485$lastmap = $savemap = $lastlabels = $savelabels = "";
486
487#  Variables to format text, i.e. break lines at sentence/phrase boundaries.
488$refbuf = "";  #  The new, reformatted buffer.
489$lineno = $colno = 0;  # line/column number
490$optimalLine = 80;  # optimal line length
491$cutLineAfter = 36;  # cut sentence or phrase after this column
492$paraLine = 120;  # longer lines are assumed to be self-contained paragraphs
493$longcut = 0;  # last cut of a long line
494$lspace = 3;  # last space value, 3 = paragraph
495$lperiod = $lcomma = $lright = $lany = 0;  # columns for various punctuations
496$idxperiod = $idxcomma = $idxright = $idxany = 0;
497
498#  Push the entire edit session onto a stack, for the back key.
499#  A hash will hold all the variables that make a session,
500#  such as $map, $fname, $btags, etc.
501@backup = ();
502$backup = $backup[0];
503
504$hexChars = "0123456789abcdefABCDEF";
505
506#  Valid delimiters for search/substitute.
507#  note that \ is conspicuously absent, not a valid delimiter.
508#  I alsso avoid nestable delimiters such as parentheses.
509#  And no alphanumerics please -- too confusing.
510$valid_delim = "-_=!|#*;:`\"',./?+@";
511
512#  $linePending holds a line of text that you accidentally typed in
513#  while edbrowse was in command mode.
514#  When you see the question mark, immediately type a+ to recover the line.
515$linePending = undef;
516
517
518#  That's it for the globals, here comes the code.
519#  First a few support routines.
520#  Strip white space from either side.
521sub stripWhite($)
522{
523my $line = shift;
524$$line =~ s/^\s+//;
525$$line =~ s/\s+$//;
526}  # stripWhite
527
528#  Is a filename a URL?
529#  If it is, return the transport protocol, e.g. http.
530sub is_url($)
531{
532my $line = shift;
533return 'http' if $line =~ m,^http://[^\s],i;
534return 'https' if $line =~ m,^https://[^\s],i;
535return 'gopher' if $line =~ m,^gopher://[^\s],i;
536return 'telnet' if $line =~ m,^telnet://[^\s],i;
537return 'ftp' if $line =~ m,^ftp://[^\s],i;
538# I assume that the following will be regular http.
539#  Strip off the ?this=that stuff
540$line =~ s:\?.*::;
541#  Strip off the file name and .browse suffix.
542$line =~ s:/.*::;
543$line =~ s/\.browse$//;
544$line =~ s/:\d+$//;
545return 0 if $line !~ /\w\.\w.*\w\.\w/;  # we need at least two internal dots
546#  Look for an ip address, four numbers and three dots.
547return 'http' if $line =~ /^\d+\.\d+\.\d+\.\d+$/;
548$line =~ s/.*\.//;
549return 'http' if index(".com.biz.info.net.org.gov.edu.us.uk.au.ca.de.jp.be.nz.sg.", ".$line.") >= 0;
550}  # is_url
551
552#  Apply a (possibly) relative path to a preexisting url.
553#  The new url is returned.
554#  resolveUrl("http://www.eklhad.net/linux/index.html", "app") returns
555#  "http://www.eklhad.net/linux/app"
556sub resolveUrl($$)
557{
558my ($line, $href) = @_;
559my $scheme;
560$line = "" unless defined $line;
561$line =~ s/\.browse$//;
562#  debug print - this is a very subtle routine.
563print "resolve($line, $href)\n" if $debug >= 2;
564#  Some people, or generators, actually write http://../whatever.html
565$href =~ s/^http:(\.+)/$1/i;
566$href =~ s,^http://(\.*/),$1,i;
567return $href unless length $href and length $line and ! is_url($href);
568if(substr($href, 0, 1) ne '/') {
569$line =~ s/\?.*//;  # hope this is right
570if(substr($href, 0, 1) ne '?') {
571if($line =~ s,^/[^/]*$,, or
572$line =~ s,([^/])/[^/]*$,$1,) {
573#  We stripped off the last directory
574$line .= '/';
575} else {
576if($scheme = is_url $line) {
577$line .= '/';
578} else {
579$line = "";
580}
581}  # stripping off last directory
582}  # doesn't start with ?
583} elsif($scheme = is_url $line) {
584#  Keep the scheme and server, lose the filename
585$line =~ s/\?.*//;  # hope this is right
586$line =~ s,^($scheme://[^/]*)/.*,$1,i;
587} else {
588$line = "";
589}
590return $line.$href;
591}  # resolveUrl
592
593#  Prepare a string for http transmition.
594#  No, I really don't know which characters to encode.
595#  I'm probably encoding more than I need to -- hope that's ok.
596sub urlEncode($)
597{
598$_ = shift;
599s/([^-\w .@])/sprintf('%%%02X',ord($1))/ge;
600y/ /+/;
601return $_;
602}  # urlEncode
603
604sub urlDecode($)
605{
606$_ = shift;
607y/+/ /;
608s/%([0-9a-fA-F]{2})/chr hex "$1"/ge;
609return $_;
610}  # urlDecode
611
612#  The javascript unescape function, sort of
613sub unescape($)
614{
615$_ = shift;
616s/(%|\\u00)([0-9a-fA-F]{2})/chr hex "$2"/ge;
617s/&#(\d+);/chr "$1"/ge;
618return $_;
619}  # unescape
620
621#  Pull the subject out of a sendmail url.
622sub urlSubject($)
623{
624my $href = shift;
625if($$href =~ s/\?(.*)$//) {
626my @pieces = split '&', $1;
627foreach my $j (@pieces) {
628next unless $j =~ s/^subject=//i;
629my $subj = urlDecode $j;
630stripWhite \$subj;
631return $subj;
632}  # loop
633}  # attributes after the email
634return "";
635}  # urlSubject
636
637#  Get raw text ready for html display.
638sub textUnmeta($)
639{
640my $tbuf = shift;
641return unless length $$tbuf;
642$$tbuf =~ s/&/&amp;/g;
643$$tbuf =~ s/</&lt;/g;
644$$tbuf =~ s/>/&gt;/g;
645$$tbuf =~ s/^/<P><PRE>/;
646$$tbuf =~ s/$/<\/PRE><P>\n/;
647}  # textUnmeta
648
649#  Derive the alt description for an image or hyperlink.
650sub deriveAlt($$)
651{
652my $h = shift;
653my $href = shift;
654my $alt = $$h{alt};
655$alt = "" unless defined $alt;
656stripWhite \$alt;
657#  Some alt descriptions are flat-out useless.
658$alt =~ s/^[^\w]+$//;
659return $alt if length $alt;
660if(!length $href) {
661$href = $$h{href};
662$href = "" unless defined $href;
663}
664$alt = $href;
665$alt =~ s/^javascript.*$//i;
666$alt =~ s/^\?//;
667$alt =~ s:\?.*::s;
668$alt =~ s:.*/::;
669$alt =~ s/\.[^.]*$//;
670$alt =~ s:/$::;
671return $alt;
672}  # deriveAlt
673
674#  Pull the reference out of a javascript openWindow() call.
675$foundFunc = "";
676sub javaWindow($)
677{
678my $jc = shift;  # java call
679my $page = "";
680$foundFunc = "";
681$page = $1 if $jc =~ /(?:open|location|window)[\w.]* *[(=] *["']([\w._\/:,=@&?+-]+)["']/i;
682return $page if length $page;
683return "submit" if $jc =~ /\bsubmit *\(/i;
684while($jc =~ /(\w+) *\(/g) {
685my $f = $1;
686my $href = $$btags[0]{fw}{$f};
687if($href) {
688$href =~ s/^\*//;
689$foundFunc = $f;
690$page = $href;
691}
692}
693return $page;
694}  # javaWindow
695
696#  Try to find the Java functions
697sub javaFunctions($)
698{
699my $tbuf = shift;
700my $flc = 0;  # function line count
701my $f;  # java function
702while($$tbuf =~ /(.+)/g) {
703my $line = $1;
704if($line =~ /function *(\w+)\(/) {
705$f = $1;
706print "java function $f\n" if $debug >= 6;
707$flc = 1;
708}
709my $win = javaWindow $line;
710if(length $win) {
711if($flc) {
712if(not defined $$btags[0]{fw}{$f}) {
713$$btags[0]{fw}{$f} = "*$win";
714print "$f: $win\n" if $debug >= 3;
715}
716} elsif($win ne "submit") {
717my $h = {};
718push @$btags, $h;
719$attrhidden = hideNumber($#$btags);
720$$h{ofs1} = length $refbuf;
721my $alt = deriveAlt($h, $win);
722$alt = "relocate" unless length $alt;
723createHyperLink($h, $win, $alt);
724}
725}
726next unless $flc;
727++$flc;
728$flc = 0 if $flc == 12;
729}  # loop over lines
730}  # javaFunctions
731
732#  Mixed case.
733sub mixCase($)
734{
735my $w = lc shift;
736$w =~ s/\b([a-z])/uc $1/ge;
737#  special McDonald code
738$w =~ s/Mc([a-z])/"Mc".uc $1/ge;
739return $w;
740}  # mixCase
741
742#  Create a hyperlink where there was none before.
743sub createHyperLink($$$)
744{
745my ($h, $href, $desc) = @_;
746$$h{tag} = "a";
747$$h{bref} = $baseref;
748$$h{href} = $href;
749$refbuf .= "\x80$attrhidden" . "{$desc}";
750$colno += 2 + length $desc;
751$$h{ofs2} = length $refbuf;
752$lspace = 0;
753}  # createHyperLink
754
755#  meta html characters.
756#  There's lots more -- this is just a starter.
757%charmap = (
758#  Normal ascii symbols
759gt => '>', lt => '<', quot => '"',
760plus => '+', minus => '-', colon => ':',
761apos => '`', star => '*', comma => ',',
762period => '.', dot => ".",
763dollar => '$', percnt => '%', amp => '&',
764#  International letters
765ntilde => "\xf1", Ntilde => "\xd1",
766agrave => "\xe0", Agrave => "\xc0",
767egrave => "\xe8", Egrave => "\xc8",
768igrave => "\xec", Igrave => "\xcc",
769ograve => "\xf2", Ograve => "\xd2",
770ugrave => "\xf9", Ugrave => "\xd9",
771auml => "\xe4", Auml => "\xc4",
772euml => "\xeb", Euml => "\xcb",
773iuml => "\xef", Iuml => "\xcf",
774ouml => "\xf6", Ouml => "\xd6",
775uuml => "\xfc", Uuml => "\xdc",
776yuml => "\xff", Yuml => 'Y',
777aacute => "\xe1", Aacute => "\xc1",
778eacute => "\xe9", Eacute => "\xc9",
779iacute => "\xed", Iacute => "\xcd",
780oacute => "\xf3", Oacute => "\xd3",
781uacute => "\xfa", Uacute => "\xda",
782yacute => "\xfd", Yacute => "\xdd",
783atilde => "\xe3", Atilde => "\xc3",
784itilde => 'i', Itilde => 'I',
785otilde => "\xf5", Otilde => "\xd5",
786utilde => 'u', Utilde => 'U',
787acirc => "\xe2", Acirc => "\xc2",
788ecirc => "\xea", Ecirc => "\xca",
789icirc => "\xee", Icirc => "\xce",
790ocirc => "\xf4", Ocirc => "\xd4",
791ucirc => "\xfb", Ucirc => "\xdb",
792#  Other 8-bit symbols.
793#  I turn these into their 8 bit equivalents,
794#  then a follow-on routine turns them into words for easy reading.
795#  Some speech adapters do this as well, saying "cents" for the cents sign,
796#  but yours may not, so I do some of these translations for you.
797#  But not here, because some people put the 8-bit cents sign in directly,
798#  rather then &cent;, so I've got to do that translation later.
799pound => "\xa3", cent => "\xa2",
800sdot => "\xb7",
801middot => "\xb7",
802edot => 'e',
803nbsp => ' ',
804times => "\xd7",
805divide => "\xf7",
806deg => "\xb0",
807frac14 => "\xbc",
808half => "\xbd",
809frac34 => "\xbe",
810frac13 => "1/3",
811frac23 => "2/3",
812copy => "\xa9",
813reg => "\xae",
814trade => "(TM)",
815);
816
817%symbolmap = (
818a => "945",
819b => "946",
820g => "947",
821d => "948",
822e => "949",
823z => "950",
824h => "951",
825q => "952",
826i => "953",
827k => "954",
828l => "955",
829m => "956",
830n => "957",
831x => "958",
832o => "959",
833p => "960",
834r => "961",
835s => "963",
836t => "964",
837u => "965",
838f => "966",
839c => "967",
840y => "968",
841w => "969",
842177 => "8177",  # kludge!!  I made up 8177
843198 => "8709",
844219 => "8660",
845209 => "8711",
846229 => "8721",
847206 => "8712",
848207 => "8713",
849242 => "8747",
850192 => "8501",
851172 => "8592",
852174 => "8594",
853165 => "8734",
854199 => "8745",
855200 => "8746",
85664 => "8773",
857182 => "8706",
858185 => "8800",
859162 => "8242",
860163 => "8804",
861179 => "8805",
862204 => "8834",
863205 => "8838",
864201 => "8835",
865203 => "8836",
866202 => "8839",
867208 => "8736",
868);
869
870#  map certain font=symbol characters to words
871%symbolWord = (
872176 => "degrees",
873188 => "1fourth",
874189 => "1half",
875190 => "3fourths",
876215 => "times",
877247 => "divided by",
878913 => "Alpha",
879914 => "Beta",
880915 => "Gamma",
881916 => "Delta",
882917 => "Epsilon",
883918 => "Zeta",
884919 => "Eta",
885920 => "Theta",
886921 => "Iota",
887922 => "Kappa",
888923 => "Lambda",
889924 => "Mu",
890925 => "Nu",
891926 => "Xi",
892927 => "Omicron",
893928 => "Pi",
894929 => "Rho",
895931 => "Sigma",
896932 => "Tau",
897933 => "Upsilon",
898934 => "Phi",
899935 => "Chi",
900936 => "Psi",
901937 => "Omega",
902945 => "alpha",
903946 => "beta",
904947 => "gamma",
905948 => "delta",
906949 => "epsilon",
907950 => "zeta",
908951 => "eta",
909952 => "theta",
910953 => "iota",
911954 => "kappa",
912955 => "lambda",
913956 => "mu",
914957 => "nu",
915958 => "xi",
916959 => "omicron",
917960 => "pi",
918961 => "rho",
919962 => "sigmaf",
920963 => "sigma",
921964 => "tau",
922965 => "upsilon",
923966 => "phi",
924967 => "chi",
925968 => "psi",
926969 => "omega",
9278177 => "+-",  # kludge!!  I made up 8177
9288242 => "prime",
9298501 => "aleph",
9308592 => "left arrow",
9318594 => "arrow",
9328660 => "double arrow",
9338706 => "d",
9348709 => "empty set",
9358711 => "del",
9368712 => "member of",
9378713 => "not a member of",
9388721 => "sum",
9398734 => "infinity",
9408736 => "angle",
9418745 => "intersect",
9428746 => "union",
9438747 => "integral",
9448773 => "congruent to",
9458800 => "not equal",
9468804 => "less equal",
9478805 => "greater equal",
9488834 => "proper subset of",
9498835 => "proper superset of",
9508836 => "not a subset of",
9518838 => "subset of",
9528839 => "superset of",
953);
954
955#  Map an html meta character using the above hashes.
956#  Usually run from within a global substitute.
957sub metaChar($)
958{
959my $meta = shift;
960if($meta =~ /^#(\d+)$/) {
961return chr $1 if $1 <= 255;
962return "'" if $1 == 8217;
963return "\x82$1#" if $symbolWord{$1};
964return "?";
965}
966my $real = $charmap{$meta};
967defined $real or $real = "?";
968return $real;
969}  # metaChar
970
971#  Translate <font face=symbol>number</font>.
972#  This is highly specific to my web pages - doesn't work in general!
973sub metaSymbol($)
974{
975my $meta = shift;
976$meta =~ s/^&#//;
977$meta =~ s/;$//;
978my $real = $symbolmap{$meta};
979return "?" unless $real;
980return "&#$real;";
981}  # metaSymbol
982
983#  replace VAR with $VAR, as defined by the environment.
984sub envVar($)
985{
986my $var = shift;
987my $newvar = $ENV{$var};
988if(defined $newvar) {
989#  There shouldn't be any whitespace at the front or back.
990stripWhite \$newvar;
991return $newvar if length $newvar;
992}
993length $errorMsg or
994$errorMsg = "environment variable $var not set";
995return "";
996}  # envVar
997
998#  Replace the variables in a line, using the above.
999sub envLine($)
1000{
1001my $line = shift;
1002$errorMsg = "";
1003#  $errorMsg will be set if something goes wrong.
1004$line =~ s,^~/,\$HOME/,;
1005$line =~ s/\$([a-zA-Z]\w*)/envVar($1)/ge;
1006return $line;
1007}  # envLine
1008
1009#  The filename can be specified using environment variables,
1010#  and shell meta characters such as *.
1011#  But not if it's a url.
1012sub envFile($)
1013{
1014my $filename = shift;
1015$errorMsg = "";
1016if(! is_url($filename)) {
1017$filename = envLine($filename);
1018return if length $errorMsg;
1019my @filelist;
1020#  This is real kludgy - I just don't understand how glob works.
1021if($filename =~ / / and $filename !~ /"/) {
1022@filelist = glob '"'.$filename.'"';
1023} else {
1024@filelist = glob $filename;
1025}
1026$filelist[0] = $filename if $#filelist < 0;
1027$errorMsg = "wild card expansion produces multiple files" if $#filelist;
1028$filename = $filelist[0];
1029}
1030return $filename;
1031}  # envFile
1032
1033#  Drop any active edit sessions that have no text, and no associated file.
1034#  This housecleaning routine is run on every quit or backup command.
1035sub dropEmptyBuffers()
1036{
1037foreach my $cx (0..$#factive) {
1038next if $cx == $context;
1039next unless $factive[$cx];
1040next if length $fname[$cx];
1041next if $dol[$cx];
1042$factive[$cx] = undef;
1043}
1044}  # dropEmptyBuffers
1045
1046#  Several small functions to switch between contexts, i.e. editing sessions.
1047#  In all these functions, we have to map between our context numbers,
1048#  that start with 0, and the user's session numbers, that start with 1.
1049#  C and fortran programmers will be use to this problem.
1050#  Is a context different from the currently running context?
1051sub cxCompare($)
1052{
1053my $cx = shift;
1054$errorMsg = "session 0 is invalid", return 0 if $cx < 0;
1055return 1 if $cx != $context;  # ok
1056++$cx;
1057$errorMsg = "you are already in session $cx";
1058return 0;
1059}  # cxCompare
1060
1061#  Is a context active?
1062sub cxActive($)
1063{
1064my $cx = shift;
1065return 1 if $factive[$cx];
1066++$cx;
1067$errorMsg = "session $cx is not active";
1068return 0;
1069}  # cxActive
1070
1071#  Switch to another editing session.
1072#  This assumes cxCompare has succeeded - we're moving to a different context.
1073#  Pass the context number and an interactive flag.
1074sub cxSwitch($$)
1075{
1076my ($cx, $ia) = @_;
1077#  Put the variables in a known start state if this is a virgin session.
1078cxReset($cx, 0) if ! defined $factive[$cx];
1079$dot[$context] = $dot, $dot = $dot[$cx];
1080$dol[$context] = $dol, $dol = $dol[$cx];
1081$fname[$context] = $fname, $fname = $fname[$cx];
1082$dirname[$context] = $dirname, $dirname = $dirname[$cx];
1083$map[$context] = $map, $map = $map[$cx];
1084$labels[$context] = $labels, $labels = $labels[$cx];
1085$btags = $btags[$cx];
1086$backup[$context] = $backup, $backup = $backup[$cx];
1087if(!$global_lhs_rhs) {
1088$savelhs[$context] = $savelhs, $savelhs = $savelhs[$cx];
1089$saverhs[$context] = $saverhs, $saverhs = $saverhs[$cx];
1090}
1091$fmode[$context] = $fmode, $fmode = $fmode[$cx];
1092#  But we don't replicate the last* variables per context,
1093#  so your ability to undo is destroyed if you switch contexts.
1094$fmode &= ~$firstopmode;
1095if($ia) {
1096if(defined $factive[$cx]) {
1097print ((length($fname[$cx]) ? $fname[$cx] : "no file")."\n");
1098} else {
1099print "new session\n";
1100}
1101}
1102$factive[$cx] = 1;
1103$context = $cx;
1104return 1;
1105}  # cxSwitch
1106
1107#  Can we trash the data in a context?
1108#  If so, trash it, and reset all the variables.
1109#  The second parameter is a close directive.
1110#  If nonzero, we clear out empty buffers associated with
1111#  text areas in the fill-out forms (browse mode).
1112#  A value of 1, as opposed to 2, means close down the entire session.
1113sub cxReset($$)
1114{
1115my ($cx, $close) = @_;
1116
1117if(defined $factive[$cx]) {
1118#  We might be trashing data, make sure that's ok.
1119$fname[$cx] = $fname, $fmode[$cx] = $fmode if $cx == $context;
1120if($fmode[$cx]&$changemode and
1121!( $fmode[$cx]&$dirmode) and
1122$lastq != $cx and
1123length $fname[$cx] and
1124! is_url($fname[$cx])) {
1125$errorMsg = "expecting `w'";
1126$lastqq = $cx;
1127if($cx != $context) {
1128++$cx;
1129$errorMsg .= " on session $cx";
1130}
1131return 0;
1132}  # warning message
1133
1134if($close) {
1135dropEmptyBuffers();
1136if($close&1) {
1137#  And we're closing this session.
1138$factive[$cx] = undef;
1139$backup[$cx] = undef;
1140}
1141}
1142}  # session was active
1143
1144#  reset the variables
1145$dot[$cx] = $dol[$cx] = 0;
1146$map[$cx] = $lnspace;
1147$fname[$cx] = "";
1148$dirname[$cx] = "";
1149$labels[$cx] = $lnspace x 26;
1150$btags[$cx] = [];
1151$savelhs[$cx] = $saverhs[$cx] = undef;
1152$fmode[$cx] = 0;
1153if($cx == $context) {
1154$dot = $dol = 0;
1155$map = $map[$cx];
1156$fname = "";
1157$labels = $labels[$cx];
1158$btags = $btags[$cx];
1159$global_lhs_rhs or $savelhs = $saverhs = undef;
1160$fmode = 0;
1161}  # current context
1162
1163return 1;
1164}  # cxReset
1165
1166#  Pack all the information about the current context into a hash.
1167#  This will be pushed onto a virtual stack.
1168#  When you enter the back key, it all gets unpacked again,
1169#  to restore your session.
1170sub cxPack()
1171{
1172my $h = {
1173dot =>$dot, dol => $dol, map => $map, labels => $labels,
1174lastdot =>$lastdot, lastdol => $lastdol, lastmap => $lastmap, lastlabels => $lastlabels,
1175fname => $fname, dirname => $dirname,
1176fmode => $fmode&~$changemode,
1177savelhs => $savelhs, saverhs => $saverhs,
1178btags => $btags,
1179};
1180return $h;
1181}  # cxPack
1182
1183sub cxUnpack($)
1184{
1185my $h = shift;
1186return if ! defined $h;
1187$dot = $$h{dot};
1188$lastdot = $$h{lastdot};
1189$dol = $$h{dol};
1190$lastdol = $$h{lastdol};
1191$map = $$h{map};
1192$lastmap = $$h{lastmap};
1193$labels = $$h{labels};
1194$lastlabels = $$h{lastlabels};
1195$fmode = $$h{fmode};
1196$fname = $$h{fname};
1197$dirname = $$h{dirname};
1198if(!$global_lhs_rhs) {
1199$savelhs = $$h{savelhs};
1200$saverhs = $$h{saverhs};
1201}
1202$btags[$context] = $btags = $$h{btags};
1203}  # cxUnpack
1204
1205#  find an available session and load it with some initial data.
1206#  Returns the context number.
1207sub cxCreate($$)
1208{
1209my ($text_ptr, $filename) = @_;
1210#  Look for an unused buffer
1211my ($cx, $j);
1212for($cx=0; $cx<=$#factive; ++$cx) {
1213last unless defined $factive[$cx];
1214}
1215cxReset($cx, 0);
1216$factive[$cx] = 1;
1217$fname[$cx] = $filename;
1218my $bincount = $$text_ptr =~ y/\0\x80-\xff/\0\x80-\xff/;
1219if($bincount*4 - 10 < length $$text_ptr) {
1220#  A text file - remove crlf in the dos world.
1221$$text_ptr =~ s/\r\n/\n/g if $doslike;
1222} else {
1223$fmode[$cx] |= $binmode;
1224}
1225$fmode[$cx] |= $nlmode unless $$text_ptr =~ s/\n$//;
1226$j = $#text;
1227if(length $$text_ptr) {
1228push @text, split "\n", $$text_ptr, -1;
1229}
1230if(!lineLimit(0)) {
1231my $newpiece = $lnspace;
1232++$dol[$cx], $newpiece .= sprintf($lnformat, $j) while ++$j <= $#text;
1233$map[$cx] = $newpiece;
1234$dot[$cx] = $dol[$cx];
1235} else {
1236warn $errorMsg;
1237}
1238return $cx;
1239}  # cxCreate
1240
1241#  See if @text is too big.
1242#  Pass the number of lines we will be adding.
1243sub lineLimit($)
1244{
1245my $more = shift;
1246return 0 if $#text + $more <= $lnmax;
1247$errorMsg = "Your limit of 1 million lines has been reached.\nSave your files, then exit and restart this program.";
1248return 1;
1249}  # lineLimit
1250
1251#  Hide and reveal numbers that are internal to the line.
1252#  These numbers indicate links and input fields, and are not displayed by the next routine.
1253sub hideNumber($)
1254{
1255my $n = shift;
1256$n =~ y/0-9/\x85-\x8e/;
1257return $n;
1258} # hideNumber
1259
1260sub revealNumber($)
1261{
1262my $n = shift;
1263$n =~ y/\x85-\x8f/0-9/;
1264return $n;
1265} # revealNumber
1266
1267sub removeHiddenNumbers($)
1268{
1269my $t = shift;
1270$$t =~ s/\x80[\x85-\x8f]+([<>{])/$1/g;
1271$$t =~ s/\x80[\x85-\x8f]+\*//g;
1272}  # removeHiddenNumbers
1273
1274#  Small helper function to retrieve the text for line number n.
1275#  If the second parameter is set, hidden numbers are left in place;
1276#  otherwise they are stripped out via removeHiddenNumbers().
1277sub fetchLine($$)
1278{
1279my $n = shift;
1280my $show = shift;
1281return "" unless $n;  # should never happen
1282my $t = $text[substr($map, $n*$lnwidth, $lnwidth1)];
1283removeHiddenNumbers(\$t) if $show and $fmode&$browsemode;
1284return $t;
1285}  # fetchLine
1286
1287#  Here's the same function, but for another context.
1288sub fetchLineContext($$$)
1289{
1290my $n = shift;
1291my $show = shift;
1292my $cx = shift;
1293$t = $text[substr($map[$cx], $n*$lnwidth, $lnwidth1)];
1294removeHiddenNumbers(\$t) if $show and $fmode[$cx]&$browsemode;
1295return $t;
1296}  # fetchLineContext
1297
1298#  Print size of the text in buffer.
1299sub apparentSize()
1300{
1301my $j = 0;
1302$j += length(fetchLine($_, 1)) + 1 foreach (1..$dol);
1303--$j if $fmode&$nlmode;
1304print "$j\n";
1305}  # apparentSize
1306
1307#  Read a line from stdin.
1308#  Could be a command, could be text going into the buffer.
1309sub readLine()
1310{
1311my ($i, $j, $c, $d, $line);
1312getline: {
1313$intFlag = 0;
1314$do_input = 1;
1315$line = <STDIN>;
1316$do_input = 0;
1317redo getline if $intFlag and ! defined $line;  # interrupt
1318$intFlag = 0;
1319}
1320exit 0 unless defined $line;  # EOF
1321$line =~ s/\n$//;
1322#  A bug in my keyboard causes nulls to be entered from time to time.
1323$line =~ s/\0/ /g;
1324return $line if $line !~ /~/;  # shortcut
1325#  We have to process it, character by character.
1326my $line2 = "";
1327for($i=0; $i<length($line); $line2 .= $c, ++$i) {
1328$c = substr $line, $i, 1;
1329next if $c ne '~';
1330next if $i == length($line) - 1;
1331$d = substr $line, $i+1, 1;
1332++$i, next if $d eq '~';
1333next if $i == length($line) - 2;
1334$j = index $hexChars, $d;
1335next if $j < 0;
1336$j -= 6 if $j >= 16;
1337my $val = $j*16;
1338$d = substr $line, $i+2, 1;
1339$j = index $hexChars, $d;
1340next if $j < 0;
1341$j -= 6 if $j >= 16;
1342$val += $j;
1343#  We don't use this mechanism to enter normal ascii characters.
1344next if $val >= 32 and $val < 127;
1345#  And don't stick a newline in the middle of an entered line.
1346next if $val == 10;
1347$c = chr $val;
1348$i += 2;
1349}  # loop over input chars
1350return $line2;
1351}  # readLine
1352
1353#  Read a block of lines into the buffer.
1354sub readLines()
1355{
1356my $tbuf = "";
1357#  Put the pending line in first, if it's there.
1358my $line = $linePending;
1359$line = readLine() unless defined $line;
1360while($line ne ".") {
1361$tbuf .= "$line\n";
1362$line = readLine();
1363}  # loop gathering input lines
1364return addTextToSession(\$tbuf) if length $tbuf;
1365$dot = $endRange;
1366$dot = 1 if $dot == 0 and $dol;
1367return 1;
1368}  # readLines
1369
1370#  Display a line.  Show line number if $cmd is n.
1371#  Expand binary characters if $cmd is l.
1372#  Pass the line number.
1373sub dispLine($)
1374{
1375my $ln = shift;
1376print "$ln " if $cmd eq 'n';
1377my $line = fetchLine($ln, 1);
1378#  Truncate, if the line is pathologically long.
1379$line = substr($line, 0, 500) . "..." if length($line) > 500;
1380print '^' if $endmarks and ($endmarks == 2 or $cmd eq 'l');
1381if($cmd eq 'l') {
1382$line =~ y/\10\11/<>/;
1383$line =~ s/([\0-\x1f\x80-\xff])/sprintf("~%02x",ord($1))/ge;
1384} else {
1385#  But we always remap return, null, and escape
1386$line =~ s/(\00|\r|\x1b)/sprintf("~%02x",ord($1))/ge;
1387}
1388print $line;
1389print dirSuffix($ln);
1390print '$' if $endmarks and ($endmarks == 2 or $cmd eq 'l');
1391print "\n";
1392}  # dispLine
1393
1394#  If we've printed a line in directory mode, and the entry isn't
1395#  a regular file, we've got to find and print the special character at the end.
1396#  / means directory, for example.
1397#  This is used by the previous routine, among others.
1398sub dirSuffix($)
1399{
1400my $ln = shift;
1401my $suf = "";
1402if($fmode&$dirmode) {
1403$suf = substr($labels, $dirSufStart + 2*$ln, 2);
1404$suf =~ s/ +$//;
1405}
1406return $suf;
1407}  # dirSuffix
1408
1409#  Routines to help format a string, i.e. cut at sentence boundaries.
1410#  This isn't real smart; it will happily split Mr. Flintstone.
1411sub appendWhiteSpace($$)
1412{
1413my($chunk, $breakable) = @_;
1414my $nlc = $chunk =~ y/\n//d;  # newline count
1415if($breakable) {
1416#  Don't interrogate the last few characters of a huge string -- that's inefficient.
1417my $short = substr $refbuf, -2;
1418my $l = length $refbuf;
1419$lperiod = $colno, $idxperiod = $l if $short =~ /[.!?:][)"|}]?$/;
1420$lcomma = $colno, $idxcomma = $l if $short =~ /[-,;][)"|]?$/;
1421$lright = $colno, $idxright = $l if $short =~ /[)"|]$/;
1422$lany = $colno, $idxany = $l;
1423#  Tack short fragment onto previous long line.
1424if($longcut and ($nlc or $lperiod == $colno) and $colno <= 14) {
1425substr($refbuf, $longcut, 1) = " ";
1426$chunk = "", $nlc = 1 unless $nlc;
1427}  # pasting small fragment onto previous line
1428}  # allowing line breaks
1429$nlc = 0 if $lspace == 3;
1430if($nlc) {
1431$nlc = 1 if $lspace == 2;
1432$refbuf .= "\n";
1433$refbuf .= "\n" if $nlc > 1;
1434$colno = 1;
1435$longcut = $lperiod = $lcomma = $lright = $lany = 0;
1436$lspace = 3 if $lspace >= 2 or $nlc > 1;
1437$lspace = 2 if $lspace < 2;
1438}
1439$refbuf .= $chunk;
1440$lspace = 1 if length $chunk;
1441$colno += $chunk =~ y/ / /;
1442$colno += 4 * ($chunk =~ y/\t/\t/);
1443}  # appendWhiteSpace
1444
1445sub appendPrintable($)
1446{
1447my $chunk = shift;
1448$refbuf .= $chunk;
1449$colno += length $chunk;
1450$lspace = 0;
1451return if $colno <= $optimalLine;
1452#  Oops, line is getting long.  Let's see where we can cut it.
1453my ($i, $j) = (0, 0);
1454if($lperiod > $cutLineAfter) { $i = $lperiod, $j = $idxperiod;
1455} elsif($lcomma > $cutLineAfter) { $i = $lcomma, $j = $idxcomma;
1456} elsif($lright > $cutLineAfter) { $i = $lright, $j = $idxright;
1457} elsif($lany > $cutLineAfter) { $i = $lany, $j = $idxany;
1458}
1459return unless $j;  # nothing we can do about it
1460$longcut = 0;
1461$longcut = $j if $i != $lperiod;
1462substr($refbuf, $j, 1) = "\n";
1463$colno -= $i;
1464$lperiod -= $i;
1465$lcomma -= $i;
1466$lright -= $i;
1467$lany -= $i;
1468}  # appendPrintable
1469
1470#  Break up a line using the above routines.
1471sub breakLine($)
1472{
1473my $t = shift;
1474my $ud = $$t =~ s/\r$//;
1475if($lspace eq "2l") {
1476$$t =~ s/^/\r/ if length $$t;
1477$lspace = 2;
1478}
1479$$t =~ s/^/\r/ if length $$t > $paraLine;
1480my $rc = $$t =~ y/\r/\n/;
1481$ud |= $$t =~ s/[ \t]+$//gm;
1482$ud |= $$t =~ s/([^ \t\n])[ \t]{2,}/$1 /g;
1483$ud |= $$t =~ s/([^ \t\n])\t/$1 /g;
1484$ud |= $$t =~ s/ +\t/\t/g;
1485$lspace = 2 if $lspace < 2;  # should never happen
1486$lspace = 3 unless length $$t;
1487return $ud if ! $rc and length $$t < $optimalLine;
1488$rc |= $ud;
1489#  The following 120 comes from $paraLine.
1490$$t =~ s/(\n.{120})/\n$1/g;
1491$$t =~ s/(.{120,}\n)/$1\n/g;
1492$refbuf = "";
1493$colno = 1;
1494$longcut = $lperiod =  $lcomma =  $lright =  $lany = 0;
1495while($$t =~ /(\s+|[^\s]+)/g) {
1496my $chunk = $1;
1497if($chunk =~ /\s/) { appendWhiteSpace($chunk, 1); } else { appendPrintable($chunk); }
1498}
1499if($lspace < 2) {  # line didn't have a \r at the end
1500#  We might want to paste the last word back on.
1501appendWhiteSpace("\n", 1);
1502chop $refbuf;
1503}
1504$rc = 1 if $refbuf =~ /\n/;
1505return 0 unless $rc;
1506$$t = $refbuf;
1507$lspace = "2l" if length $refbuf > $paraLine;
1508return 1;
1509}  # breakLine
1510
1511#  Check the syntax of a regular expression, before we pass it to perl.
1512#  If perl doesn't like it, it dies, and you've lost your edits.
1513#  The first char is the delimiter -- we stop at the next delimiter.
1514#  The regexp, up to the second delimiter, is returned,
1515#  along with the remainder of the string in the second return variable.
1516#  return (regexp, remainder), or return () if there is a problem.
1517#  As usual, $errorMsg will be set.
1518#  Pass the line containing the regexp, and a flag indicating
1519#  left or right side of a substitute.
1520sub regexpCheck($$)
1521{
1522my ($line, $isleft) = @_;
1523my ($c, $d);
1524#  We wouldn't be here if the line was empty.
1525my $delim = substr $line, 0, 1;
1526index($valid_delim, $delim) >= 0 or
1527$errorMsg = "invalid delimiter $delim", return ();
1528$line = substr $line, 1;  # remove lead delimiter
1529#  Remember whether a character is "on deck", ready to be modified by * etc.
1530my $ondeck = 0;
1531my $offdeck = ' ';
1532my $exp = "";
1533my $cc = 0;  # in character class
1534my $paren = 0;  # nested parentheses
1535
1536while(length $line) {
1537$c = substr $line, 0, 1;
1538if($c eq '\\') {
1539$errorMsg = "line ends in backslash", return () if length($line) == 1;
1540$d = substr $line, 1, 1;
1541$ondeck = 1;
1542$offdeck = ' ';
1543#  I can't think of any reason to remove the escape \ from any character,
1544#  except ()|, where we reverse the sense of escape,
1545#  and \& on the right, which becomes &.
1546if(index("()|", $d) >= 0 and ! $cc and $isleft) {
1547$ondeck = 0, ++$paren if $c eq '(';
1548--$paren if $c eq ')';
1549$errorMsg = "Unexpected closing )", return () if $paren < 0;
1550$c = '';
1551}
1552$c = '' if $d eq '&' and ! $isleft;
1553$exp .= "$c$d";
1554$line = substr $line, 2;
1555next;
1556}  # escape character
1557
1558#  Break out if you've hit the delimiter
1559$paren or $c ne $delim or last;
1560
1561#  Not the delimiter, I'll assume I can copy it over to $exp.
1562#  But I have to watch out for slash, which is *my* delimiter.
1563$exp .= '\\' if $c eq '/';
1564#  Then there's ()|, which I am reversing the sense of escape.
1565$exp .= '\\'  if index("()|", $c) >= 0 and $isleft;
1566#  Sometimes $ is interpolated when I don't want it to be.
1567#  Even if there is no alphanumeric following, a bare $ seems to cause trouble.
1568#  Escape it, unless followed by delimiter, or digit (rhs).
1569if($c eq '$') {
1570$exp .= '\\' if $isleft and
1571length($line) > 1 and substr($line, 1, 1) ne $delim;
1572$exp .= '\\' if ! $isleft and
1573$line !~ /^\$\d/;
1574}
1575if($c eq '^') {
1576$exp .= '\\' if $isleft and $cc != length $exp;
1577}
1578#  And we have to escape every @, to avoid interpolation.
1579#  Good thing we don't have to escape %,
1580#  or it might mess up our % remembered rhs logic.
1581$exp .= '\\' if $c eq '@';
1582#  Turn & into $&
1583$exp .= '$' if $c eq '&' and ! $isleft;
1584#  Finally push the character.
1585$exp .= $c;
1586$line = substr $line, 1;
1587
1588#  Are there any syntax checks I need to make on the rhs?
1589#  I don't think so.
1590next if ! $isleft;
1591
1592if($cc) {  # character class
1593#  All that matters here is the ]
1594$cc = 0 if $c eq ']';
1595next;
1596}
1597
1598#  Modifiers must have a preceding character.
1599#  Except ? which can reduce the greediness of the others.
1600if($c eq '?' and $offdeck ne '?') {
1601$ondeck = 0;
1602$offdeck = '?';
1603next;
1604}
1605
1606if(index("?+*", $c) >= 0 or
1607$c eq '{' and $line =~ s/^(\d+,?\d*})//) {
1608my $mod = ( $c eq '{' ? "{$1" : $c);
1609$errorMsg = "$mod modifier has no preceding character", return () if ! $ondeck;
1610$ondeck = 0;
1611$offdeck = $c;
1612$exp .= "$1" if $c eq '{';
1613next;
1614}  # modifier
1615
1616$ondeck = 1;
1617$offdeck = ' ';
1618$cc = length $exp if $c eq '[';
1619}  # loop over chars in the pattern
1620
1621$cc == 0 or
1622$errorMsg = "no closing ]", return ();
1623$paren == 0 or
1624$errorMsg = "no closing )", return ();
1625if(! length $exp and $isleft) {
1626$exp = $savelhs;
1627$errorMsg = "no remembered search string", return () if ! defined $exp;
1628}
1629$savelhs = $exp if $isleft;
1630if(! $isleft) {
1631if($exp eq '%') {
1632$exp = $saverhs;
1633$errorMsg = "no remembered replacement string", return () if ! defined $exp;
1634} elsif($exp eq '\\%') {
1635$exp = '%';
1636}
1637$saverhs = $exp;
1638}  # rhs
1639
1640return ($exp, $line);
1641}  # regexpCheck
1642
1643#  Get the start or end of a range.
1644#  Pass the line containing the address.
1645sub getRangePart($)
1646{
1647my $line = shift;
1648my $ln = $dot;
1649if($line =~ s/^(\d+)//) {
1650$ln = $1;
1651} elsif($line =~ s/^\.//) {
1652#  $ln is already set to dot
1653} elsif($line =~ s/^\$//) {
1654$ln = $dol;
1655} elsif($line =~ s/^'([a-z])//) {
1656$ln = substr $labels, (ord($1) - ord('a'))*$lnwidth, $lnwidth;
1657$errorMsg = "label $1 not set", return () if $ln eq $lnspace;
1658} elsif($line =~ m:^([/?]):) {
1659$errorMsg = "search string not found", return () if $dot == 0;
1660my $delim = $1;
1661my @pieces = regexpCheck($line, 1);
1662return () if $#pieces < 0;
1663my $exp = $pieces[0];
1664$line = $pieces[1];
1665my $icase = "";  # case independent
1666$icase = "i" if $caseInsensitive;
1667if($delim eq substr $line, 0, 1) {
1668$line = substr $line, 1;
1669if('i' eq substr $line, 0, 1) {
1670$line = substr $line, 1;
1671$icase = 'i';
1672}
1673}
1674my $incr = ($delim eq '/' ? 1 : -1);
1675#  Recompile the regexp after each command, but don't compile it on every line.
1676#  Is there a better way to do this, besides using eval?
1677my $notfound = 0;
1678eval '
1679while(1) {
1680$ln += $incr;
1681$ln = 1 if $ln > $dol;
1682$ln = $dol if $ln == 0;
1683last if fetchLine($ln, 1) =~ ' .
1684"/$exp/o$icase; " .
1685'$notfound = 1, last if $ln == $dot;
1686}  # looking for match
1687';   # end evaluated string
1688$errorMsg = "search string not found", return () if $notfound;
1689}  # search pattern
1690#  Now add or subtract from this base line number
1691while($line =~ s/^([+-])(\d*)//) {
1692my $add = ($2 eq "" ? 1 : $2);
1693$ln += ($1 eq '+' ? $add : -$add);
1694}
1695$errorMsg = "line number too large", return ()
1696if $ln > $dol;
1697$errorMsg = "negative line number", return ()
1698if $ln < 0;
1699return ($ln, $line);
1700}  # getRangePart
1701
1702#  Read the data as a string from a url.
1703#  Data is retrieved using http, https, or ftp.
1704#  Parameters: url, post data, result buffer.
1705#  You can return 0 (failure) and leave text and the buffer,
1706#  and I'll report the error, and still assimilate the buffer.
1707sub readUrl($$$)
1708{
1709my ($filename, $post, $tbuf) = @_;
1710my $rc = 1;  # return code, success
1711$lfsz = 0;  # local file size
1712my $rsize = 0;  # size read
1713my $weburl;
1714my $scheme;
1715my $encoding = "";
1716my $pagetype = "";
1717my %url_desc = (); # Description of the current URL
1718
1719#  I don't know if we need a full url encode or what??
1720#  This is a major kludge!  I just don't understand this.
1721$filename =~ s/ /%20/g;
1722$filename =~ s/[\t\r\n]//g;
1723#  I don't know what http://foo@this.that.com/file.htm means,
1724#  but I see it all the time.
1725$filename =~ s,^http://[^/]*@,http://,i;
1726
1727$$tbuf = "";  # start with a clear buffer
1728$errorMsg = "too many nested frames", return 0 unless $rerouteCount;
1729--$rerouteCount;
1730
1731#  split into machine, file, and post parameters
1732separate: {
1733my $oldname = $filename;  # remember where we started
1734my $authinfo = "";  # login password for web sites that return error 401
1735
1736$scheme = is_url $filename;  # scheme could have changed
1737$weburl = 0;
1738$weburl = 1 if $scheme =~ /^https?$/;
1739if(!length $post and $filename =~ s/^(.*?)(\?.*)$/$1/ ) {
1740$post = $2;
1741}
1742#  $post should be url encoded, but sometimes it's not, and I don't know why.
1743$post =~ y/ /+/;
1744my $postfilename = "";
1745#  We assume $post starts with ? or *, if it is present at all.
1746my $meth = "GET";
1747my $postapplic = "";
1748if(substr($post, 0, 1) eq '*') {
1749$meth = "POST";
1750} else {
1751$postfilename = $post;
1752}
1753print "$meth: $post\n" if $debug >= 2;
1754
1755$filename =~ s,^$scheme://,,i;
1756my $serverPort = 80;
1757$serverPort = 443 if $scheme eq 'https';
1758$serverPort = 21 if $scheme eq 'ftp';
1759$serverPort = 23 if $scheme eq 'telnet';
1760my $serverPortString = "";
1761my $server = $filename;
1762$server =~ s,/.*,,;
1763#  Sometimes we need to do this -- got me hanging!
1764$server =~ s/%([0-9a-fA-F]{2})/chr hex "$1"/ge;
1765if($server =~ s/:(\d+)$//) {
1766$serverPort = $1;
1767}
1768# If a server is on port 443, assume it speaks SSL.
1769#  This is a real bastardization of the html standard,
1770#  but it's the explorer standard.  Need I say more?
1771$scheme = 'https' if$serverPort == 443;
1772$serverPortString = ":$serverPort" if $serverPort != 80;
1773$filename =~ s,^[^/]*,,;
1774
1775#  Lots of http servers can't handle /./ or /../ or //
1776$filename =~ s:/{2,}:/:g;
1777#  Oops, put internal http:// back the way it was.
1778#  The bug is caused by a line like this.
1779#  <form method=post action=server/file?this=that&return=http://someOtherServer/blah>
1780#  Because it's post, the get parameters after the ? are still here.
1781#  And I just turned http:// into http:/
1782#  This is very rare, but it happened to me, so I'm trying to fix it.
1783$filename =~ s,http:/,http://,gi;
1784$filename =~ s,ftp:/,ftp://,gi;
1785$filename =~ s:^/(\.{1,2}/)+:/:;
1786$filename =~ s:/(\./)+:/:g;
17871 while $filename =~ s:/[^/]+/\.\./:/:;
1788$filename =~ s:^/(\.\./)+:/:;
1789
1790#  Ok, create some more variables so we either fetch this file
1791#  or convert it if it's pdf.
1792#  Too bad I did all this work, and the pdf converter doesn't work for crap.
1793#  Probably because pdf is irreparably inaccessible.
1794#  Thanks a lot adobe!
1795my $go_server = $server;
1796my $go_port = $serverPort;
1797my $go_portString = $serverPortString;
1798my $go_file = $filename;
1799my $go_post = $post;
1800my $go_postfilename = $postfilename;
1801my $go_meth = $meth;
1802
1803if($filename =~ /\.pdf$/ and $pdf_convert) {
1804($meth eq "GET" and $scheme eq "http") or
1805$errorMsg = "online conversion from pdf to html only works when the pdf file is accessed via the http get method\ntype pr to download pdf in raw mode", return 0;
1806$go_server="access.adobe.com";
1807$go_port = 80;
1808$go_portString = "";
1809$go_file = "/perl/convertPDF.pl";
1810#  It would be simpler if this bloody form wer get, but it's post.
1811$go_meth = "POST";
1812$go_post = "http://$server$serverPortString$filename$postfilename";
1813$go_post = "*submit=submit&url=" . urlEncode($go_post);
1814$go_postfilename = "";
1815}  # redirecting to adobe to convert pdf
1816
1817if($go_meth eq "POST") {
1818$postapplic =
1819"Pragma: no-cache$eol" .
1820"Cache-Control: no-cache$eol" .
1821"Content-Type: application/x-www-form-urlencoded$eol" .
1822"Content-Length: " . (length($go_post)-1) . $eol;
1823}
1824
1825my $newname = "";
1826
1827$authAttempt = 0;
1828makeconnect: {
1829my $chunk;
1830$lfsz = 0;
1831$$tbuf = "";
1832$go_file = "/" if ! length $go_file;
1833%url_desc = (SCHEME => $scheme, SERVER => $go_server, PORT => $go_port, PATH => $go_file, method => $go_meth);
1834$url_desc{content} = substr($go_post, 1) if length $go_post; # Kinda silly.
1835# If you're using digest authentication with the POST method,
1836# the content needs to be digestified.
1837# This is for message integrity checking, when that option is used.
1838# Consider completely replacing $go_x variables with elements of the %url_desc
1839# hash?  There is massive redundancy here.
1840my $domainCookies = "";
1841$domainCookies = fetchCookies(\%url_desc) if $allowCookies; # Grab the cookies.
1842my $send_server  = # Send this to the http server - maybe via SSL
1843"$go_meth $go_file$go_postfilename HTTP/1.0$eol" .
1844#  Do we need $go_portString here???
1845#  If I put it in, paypal doesn't work.
1846"Host: $go_server$eol" .
1847(length $referer ? "Referer: $referer$eol" : "") .
1848$domainCookies .
1849$authinfo .
1850"Accept: text/*, audio/*, image/*, application/*, message/*$eol" .
1851"Accept: audio-file, postscript-file, mail-file, default, */*;q=0.01$eol" .
1852"Accept-Encoding: gzip, compress$eol" .
1853"Accept-Language: en$eol" .
1854 "User-Agent: $agent$eol" .
1855$postapplic .
1856$eol;  # blank line at the end
1857
1858#  send data after if post method
1859$send_server .= substr($go_post, 1) if $go_meth eq "POST";
1860
1861if($debug >= 4) {
1862my $temp_server = $send_server;
1863$temp_server =~ y/\r//d;
1864print $temp_server;
1865}
1866
1867if($scheme eq 'http') {
1868#  Connect to the http server.
1869my $iaddr   = inet_aton($go_server)               or
1870$errorMsg = "cannot identify $go_server on the network", return 0;
1871my $paddr   = sockaddr_in($go_port, $iaddr);
1872my $proto   = getprotobyname('tcp');
1873socket(FH, PF_INET, SOCK_STREAM, $proto)  or
1874$errorMsg = "cannot allocate a socket", return 0;
1875connect(FH, $paddr)    or
1876$errorMsg = "cannot connect to $go_server", return 0;
1877FH->autoflush(1);
1878
1879print FH $send_server; # Send the HTTP request message
1880
1881#  Now retrieve the page and update the user after every 100K of data.
1882my $last_fk = 0;
1883STDOUT->autoflush(1) if ! $doslike;
1884while(defined($rsize = sysread FH, $chunk, 100000)) {
1885print "sockread $rsize\n" if $debug >= 5;
1886$$tbuf .= $chunk;
1887$lfsz += $rsize;
1888last if $rsize == 0;
1889my $fk = int($lfsz/100000);
1890if($fk > $last_fk) {
1891print ".";
1892$last_fk = $fk;
1893}
1894last if $lfsz >= $maxfile;
1895}
1896               close FH;
1897print "\n" if $last_fk;
1898STDOUT->autoflush(0) if ! $doslike;
1899$lfsz <= $maxfile or
1900$errorMsg = "file is too large, limit 40MB", return 0;
1901defined $rsize or
1902$$tbuf = "", $errorMsg = "error reading data from the socket", return 0;
1903
1904} elsif ($scheme eq 'https') {
1905$lfsz = do_ssl($go_server, $go_port, $send_server, $tbuf);
1906Net::SSLeay::free($ssl) if defined $ssl;
1907Net::SSLeay::CTX_free($ctx) if defined $ctx;
1908return 0 unless $lfsz;
1909
1910} elsif ($scheme eq 'ftp') {
1911$lfsz = ftp_connect($go_server, $go_port, $go_file, $tbuf);
1912return 0 unless $lfsz;
1913
1914} elsif ($scheme eq "telnet") {
1915if($go_server =~ s/^([^:@]*):([^:@]*)@//) {
1916print "This URL gives a suggested username of $1 and password of $2\n" .
1917"to be used with the telnet connection you are about to establish.\n";
1918# See RFC 1738, section 3.8.  The username and password in a telnet URL
1919# are advisory.  There is no standard method of logging into telnet services.
1920# I guess this is especially useful for public services, which offer guest accounts and such.
1921}
1922print "Starting telnet.\n";
1923system("telnet $go_server $go_port");
1924return 1;
1925
1926} else {
1927$errorMsg = "this browser cannot access $scheme URLs.", return 0;
1928}
1929
1930#  We got the web page.
1931#  But it might be a redirection to another url.
1932if($weburl and $$tbuf =~  /^http\/[\d.]+ 30[12]/i) {
1933if($$tbuf =~ /\nlocation:[ \t]+(.*[^\s])[ \t\r]*\n/i) {
1934$newname = $1;
1935print "relocate $newname\n" if $debug >= 2;
1936}}
1937
1938if($rc and
1939! length $newname and
1940#  Some web sites serve up pages with no headers at all!
1941#  aspace.whizy.com/forum/ultimate.cgi
1942$$tbuf =~ /^http/i and
1943$$tbuf =~  /^http\/[\d.]+ 404 /i) {
1944$errorMsg = "file not found on the remote server";
1945$rc = 0;
1946}  # not found
1947
1948#  there is yet another way to redirect to a url
1949if($rc and $$tbuf =~ /<meta +http-equiv=["']?refresh[^<>]*(url=|\d+;)['"]?([^'">\s]+)/i) {
1950$newname = $2;
1951print "refresh $newname\n" if $debug >= 2;
1952#  This is almost always an absolute url, even without the http prefix,
1953#  but sometimes it's relative.  Got me hanging!
1954#  Here's a looser criterion for web url.
1955if($newname =~ /^[\w,-]+\.[\w,-]+\.[\w,-]/) {
1956$newname = "http://$newname";
1957}
1958}
1959
1960#  Extract information from the http header - primarily cookies.
1961$encoding = $pagetype = "";
1962if($$tbuf =~ s/^(http\/\d.*?\r?\n\r?\n)//si) {
1963my $header = $1;
1964my @lines = split "\n", $header;
1965open BFH, ">>$ebhttp";
1966if(defined BFH) {
1967print BFH $header;
1968close BFH;
1969}
1970$authinfo = "";
1971while(my $hline = shift @lines) {
1972$hline =~ s/\r$//;
1973print "$hline\n" if $debug >= 4;
1974setCookies($hline, \%url_desc) if $hline =~ /^Set-Cookie:/i and $allowCookies;
1975$authinfo = parseWWWAuth($hline, \%url_desc) if $hline =~ /^WWW-Authenticate/i;
1976return 0 if $authinfo eq "x";
1977#  I shouldn't really discard things like charset=blablabla,
1978#  but I don't really know what to do with it anyways.
1979$hline =~s/;.*//;
1980$encoding = lc $1 if $hline =~ /^content-encoding:\s+['"]?(\w+)['"]?\s*$/i;
1981$pagetype = lc $1 if $hline =~ /^content-type:\s+['"]?([^\s'"]+)['"]?\s*$/i;
1982}  # loop over lines
1983++$authAttempt, redo makeconnect if length $authinfo;
1984} else {  # http header extracted
1985if($scheme =~ /^https?$/) {
1986$errorMsg = "http response doesn't have a head-body structure";
1987$rc = 0;
1988} else {
1989#  For now, this means ftp.
1990#  We could have retrieved an html page via ftp, but probably not.
1991#  Turn off browse command.
1992$cmd = 'e' unless $$tbuf =~ /^<[hH!]/;
1993}
1994}
1995} # makeconnect
1996
1997#  cookies that are set via http-equiv
1998#  The content of the cookie must be quoted.
1999while($$tbuf =~ /<meta +http-equiv=["']?set-cookie['" ]+content="([^"]*)"/gi) {
2000setCookies($1, \%url_desc);
2001}
2002while($$tbuf =~ /<meta +http-equiv=["']?set-cookie['" ]+content='([^']*)'/gi) {
2003setCookies($1, \%url_desc);
2004}
2005
2006if($rc and $reroute and length $newname) {
2007$newname = resolveUrl("$scheme://$server$serverPortString$filename", $newname);
2008print "becomes $newname\n" if $debug >= 2;
2009if($newname ne $oldname) {
2010#  It's not really diferent if one has :80 and the other doesn't.
2011#  I wouldn't code this up if it didn't really happen.  See www.claritin.com
2012$oldname =~ s,^HTTP://,http://,;
2013$oldname =~ s,^(http://)?([^/]*):80/,$1$2/,;
2014$oldname =~ s,^(http://)?([^/]*):80$,$1$2,;
2015$newname =~ s,^HTTP://,http://,;
2016$newname =~ s,^(http://)?([^/]*):80/,$1$2/,;
2017$newname =~ s,^(http://)?([^/]*):80$,$1$2,;
2018if($oldname ne $newname) {
2019if(--$rerouteCount) {
2020print "$newname\n" if $debug >= 1;
2021#  Post method becomes get after redirection, I think.
2022#  $post = "" if length $post and $newname =~ /\?[^\/]*$/;
2023$post = "";
2024$filename = $newname;
2025redo separate;
2026}
2027$errorMsg = "too many url redirections";
2028$rc = 0;
2029}}}  # automatic url redirection
2030
2031$changeFname = "$scheme://$server$serverPortString$filename$postfilename";
2032}  # separate
2033
2034#  Check for complressed data.
2035if($rc and $lfsz and length $encoding and $pagetype =~ /^text/i) {
2036print "$lfsz\ndecoding $encoding\n" if $debug >= 2;
2037my $program = "";
2038my $csuf = "";  # compression suffix
2039$program = "zcat", $csuf = "gz" if $encoding eq "gzip";
2040$program = "zcat", $csuf = "Z" if $encoding eq "compress";
2041length $program or
2042$errorMsg = "unrecognized compression method", return 0;
2043$cfn = "$ebtmp.$csuf";  # compressed file name
2044open FH, ">$cfn" or
2045$errorMsg = "cannot create temp file $cfn", return 0;
2046binmode FH, ':raw' if $doslike;
2047print FH $$tbuf or
2048$errorMsg = "cannot write to temp file $cfn", return 0;
2049close FH;
2050unlink $ebtmp;
2051if(! system "$program $ebtmp.$csuf >$ebtmp 2>/dev/null") {
2052#  There are web pages out there that are improperly compressed.
2053#  We'll call it good if we got any data at all.
2054$errorMsg = "could not uncompress the data", return 0 unless (stat($ebtmp))[7];
2055}
2056
2057#  Read in the uncompressed data.
2058$$tbuf = "";
2059open FH, $ebtmp or
2060$errorMsg = "cannot open the uncompressed file $ebtmp", return 0;
2061$lfsz = (stat(FH))[7];
2062$lfsz <= $maxfile or
2063$errorMsg = "uncompressed file is too large, limit 40MB", close FH, return 0;
2064binmode FH, ':raw' if $doslike;
2065$rsize = sysread FH, $$tbuf, $lfsz;
2066close FH;
2067$rsize and $rsize == $lfsz or
2068$errorMsg = "cannot read the uncompressed data from $ebtmp", return 0;
2069unlink $ebtmp;
2070}  # compressed data
2071
2072if($rc and $fetchFrames) {
2073$errorMsg = "";
2074#  This really isn't right - to do this here I mean.
2075#  If a line of javascript happens to contain a frame tag
2076#  I'm going to fetch that frame and put it in right here.
2077#  Hopefully that won't happen.
2078#  Note that the entire frame tag must be on one line.
2079$$tbuf =~ s/(<i?frame\b[^<>\0\x80-\xff]+>)/readFrame($1)/gei;
2080$rc = 0 if length $errorMsg;
2081}  # looking for frames
2082
2083return $rc;
2084}  # readUrl
2085
2086#  Read a frame.
2087sub readFrame($)
2088{
2089my $tag = shift;
2090my $saveFname = $changeFname;
2091my($tc, $fbuf, $src, $name);
2092
2093$tag =~ s/\bsrc *= */src=/gi;
2094$tag =~ s/\bname *= */name=/gi;
2095$tc = $tag;
2096if($tc =~ s/^.*\bsrc=//s) {
2097$src = $tc;
2098$src =~ s/ .*//s;
2099$src =~ s/^['"]//;
2100$src =~ s/['"]?>?$//;
2101if(length $src) {
2102print "fetch frame $src\n" if $debug >= 1;
2103$src = resolveUrl($saveFname, $src);
2104if($didFrame{$src}) {
2105print "already fetched\n" if $debug >= 2;
2106$changeFname = $saveFname;
2107return "";
2108}
2109$didFrame{$src} = 1;
2110print "* $src\n" if $debug >= 1;
2111
2112$name = "";
2113$tc = $tag;
2114if($tc =~ s/^.*\bname=//s) {
2115$tc =~ s/ .*//s;
2116$tc =~ s/^['"]//;
2117$tc =~ s/['"]?>?$//;
2118$name = urlDecode $tc if length $tc;
2119}  # name attribute
2120
2121if(readUrl($src, "", \$fbuf)) {
2122#  Replace the tag with the data, and some stuff prepended.
2123$name = " $name" if length $name;
2124$tag = "<H2> Frame$name: </H2>\n<base href=" .
2125urlEncode($changeFname) . ">\n";
2126$changeFname = $saveFname;
2127return $tag.$fbuf;
2128}  # frame read successfully
2129}}  # src attribute present
2130
2131$changeFname = $saveFname;
2132return $tag;
2133}  # readFrame
2134
2135#  Adjust the map of line numbers -- we have inserted text.
2136#  Also shift the downstream labels.
2137#  Pass the string containing the new line numbers, and the dest line number.
2138sub addToMap($$)
2139{
2140my ($newpiece, $dln) = @_;
2141my $offset = length($newpiece)/$lnwidth;
2142$offset > 0 or
2143die "negative offset in addToMap";
2144my ($i, $j);
2145foreach $i (0..25) {
2146my $ln = substr($labels, $i*$lnwidth, $lnwidth);  # line number
2147next if $ln eq $lnspace or $ln <= $dln;
2148substr($labels, $i*$lnwidth, $lnwidth) =
2149sprintf($lnformat, $ln + $offset);
2150}  # loop over 26 labels
2151$j = ($dln+1) * $lnwidth;
2152substr($map, $j, 0) = $newpiece;
2153$dot = $dln + $offset;
2154$dol += $offset;
2155$fmode |= $changemode|$firstopmode;
2156$ubackup = 1;
2157}  # addToMap
2158
2159#  Fold in the text buffer (parameter) at $endRange (global variable).
2160#  Assumes the text has the last newline on it.
2161sub addTextToSession($)
2162{
2163my $tbuf = shift;  # text buffer
2164return 1 unless length $$tbuf;
2165$fmode &= ~$nlmode if $endRange == $dol;
2166if(not $$tbuf =~ s/\n$// and
2167$endRange == $dol) {
2168$fmode |= $nlmode;
2169print "no trailing newline\n" if ! ($fmode&$binmode) and $cmd ne 'b';
2170}  # missing newline
2171my $j = $#text;
2172my $newpiece = "";
2173#  At this point $tbuf could be empty, whence split doesn't work properly.
2174#  This only happens when reading a file containing one blank line.
2175if(length $$tbuf) {
2176push @text, split "\n", $$tbuf, -1;
2177} else {
2178push @text, "";
2179}
2180$#text = $j, return 0 if lineLimit 0;
2181$newpiece .= sprintf($lnformat, $j) while ++$j <= $#text;
2182addToMap($newpiece, $endRange);
2183return 1;
2184}  # addTextToSession
2185
2186#  Read a file into memory.
2187#  As described earlier, the lines are appended to @text.
2188#  Then the indexes for those lines are pasted into $map,
2189#  using addToMap().
2190#  Check to see if the data is binary, and set $fmode accordingly.
2191#  Parameters are the filename or URL, and the post data (for URLs).
2192sub readFile($$)
2193{
2194my ($filename, $post) = @_;
2195my $tbuf;  # text buffer
2196my $rc = 1;  # return code, success
2197$filesize = 0;
2198my $rsize = 0;  # size read
2199my $j;
2200
2201if(is_url $filename) {
2202$rerouteCount = 24;
2203%didFrame = ();
2204$rc = readUrl($filename, $post, \$tbuf);
2205$filesize = length $tbuf;
2206return 0 unless $rc + $filesize;
2207} else {  # url or file
2208
2209open FH, "<$filename" or
2210$errorMsg = "cannot open $filename, $!", return 0;
2211
2212#  Check for directory here
2213if(-d FH) {
2214close FH;
2215$j = $filename;
2216$j =~ s,/$,,;
2217$j .= "/*";
2218my @dirlist;
2219if($j =~ / /) {
2220@dirlist = glob '"'.$j.'"';
2221} else {
2222@dirlist = glob $j;
2223}
2224if($#dirlist < 0) {
2225$dot = $endRange;
2226$filesize = 0;
2227return $rc;
2228}  # empty directory
2229$dirname = $j;
2230$dirname =~ s/..$//;  # get rid of /*
2231return 0 if lineLimit($#dirlist + 1);
2232$filesize = 0;
2233$tbuf = "";
2234$j = $dirSufStart;
2235substr($labels, $j, 2) = "  ";
2236foreach (@dirlist) {
2237my $entry = $_;
2238$entry =~ s,.*/,,;  # leave only the file
2239$entry =~ s/\n/\t/g;
2240my $suf = "";
2241$suf .= '@' if -l;
2242if(! -f) {
2243$suf .= '/' if -d;
2244$suf .= '|' if -p;
2245$suf .= '*' if -b;
2246$suf .= '<' if -c;
2247$suf .= '^' if -S;
2248}  # not a regular file
2249$filesize += length($entry) + length($suf) + 1;
2250if($dol) {
2251$entry .= $suf;
2252} else {
2253$suf .= "  ";
2254$j += 2;
2255substr($labels, $j, 2) = substr($suf, 0, 2);
2256}
2257$tbuf .= "$entry\n";
2258}
2259$dol or $fmode = $dirmode, print "directory mode\n";
2260return addTextToSession(\$tbuf);
2261}  # directory
2262
2263-f FH or $errorMsg = "$filename is not a regular file", close FH, return 0;
2264$filesize = (stat(FH))[7];
2265if(! $filesize) {
2266close FH;
2267$dot = $endRange;
2268$filesize = 0;
2269return $rc;
2270}  # empty file
2271$filesize <= $maxfile or
2272$errorMsg = "file is too large, limit 40MB", close FH, return 0;
2273binmode FH, ':raw' if $doslike;
2274$rsize = sysread(FH, $tbuf, $filesize) if $filesize;
2275close FH;
2276$rsize == $filesize or
2277$errorMsg = "cannot read the contents of $filename,$!", return 0;
2278}  # reading url or regular file
2279
2280my $bincount = $tbuf =~ y/\0\x80-\xff/\0\x80-\xff/;
2281if($bincount*4 - 10 < $filesize) {
2282#  A text file - remove crlf in the dos world.
2283$tbuf =~ s/\r\n/\n/g if $doslike;
2284} elsif(! ($fmode&$binmode)) {
2285#  If it wasn't before, it is now a binary file.
2286print "binary data\n";
2287$fmode |= $binmode;
2288}
2289
2290$rc &= addTextToSession(\$tbuf);
2291return $rc;
2292}  # readFile
2293
2294#  Write a range into a file.
2295#  Pass the mode and filename.
2296sub writeFile($$)
2297{
2298my ($mode, $filename) = @_;
2299$errorMsg = "cannot write to a url", return 0 if is_url($filename);
2300$dol or $errorMsg = "writing an empty file", return 0;
2301open FH, "$mode$filename" or
2302$errorMsg = "cannot create $filename, $!", return 0;
2303$filesize = 0;
2304binmode FH, ':raw' if $doslike and $fmode&$binmode;
2305if($startRange) {
2306foreach my $i ($startRange..$endRange) {
2307my $nl = ($fmode&$nlmode && $i == $dol ? "" : "\n");
2308my $suf = dirSuffix($i);
2309my $outline = fetchLine($i, 1).$suf.$nl;
2310print FH $outline or
2311$errorMsg = "cannot write to $filename, $!", close FH, return 0;
2312$filesize += length $outline;
2313}  # loop over range
2314}  # nonempty file
2315close FH;
2316#  This is not an undoable operation, nor does it change data.
2317#  In fact the data is "no longer modified" if we have written all of it.
2318$fmode &= ~$changemode if $dol == 0 or $startRange == 1 and $endRange == $dol;
2319return 1;
2320}  # writeFile
2321
2322#  Read from another context.
2323#  Pass the context number.
2324sub readContext($)
2325{
2326my $cx = shift;
2327cxCompare($cx) and cxActive($cx) or return 0;
2328my $dolcx = $dol[$cx];
2329$filesize = 0;
2330if($dolcx) {
2331return 0 if lineLimit $dolcx;
2332$fmode &= ~$nlmode if $endRange == $dol;
2333my $newpiece = "";
2334foreach my $i (1..$dolcx) {
2335my $inline = fetchLineContext($i, 1, $cx);
2336my $suf = "";
2337if($fmode[$cx] & $dirmode) {
2338$suf = substr($labels[$cx], $dirSufStart + 2*$i, 2);
2339$suf =~ s/ +$//;
2340}
2341$inline .= $suf;
2342push @text, $inline;
2343$newpiece .= sprintf $lnformat, $#text;
2344$filesize += length($inline) + 1;
2345}  # end loop copying lines
2346addToMap($newpiece, $endRange);
2347if($fmode[$cx]&$nlmode) {
2348--$filesize;
2349$fmode |= $nlmode if $endRange == $dol;
2350}
2351$fmode |= $binmode, print "binary data\n"
2352if $fmode[$cx]&$binmode and ! ($fmode&$binmode);
2353}  # nonempty buffer
2354return 1;
2355}  # readContext
2356
2357#  Write to another context.
2358#  Pass the context number.
2359sub writeContext($)
2360{
2361my $cx = shift;
2362my $dolcx = $endRange - $startRange + 1;
2363$dolcx = 0 if ! $startRange;
2364return 0 if ! cxCompare($cx) or !cxReset($cx, 1) or lineLimit $dolcx;
2365my $mapcx = $lnspace;
2366$filesize = 0;
2367if($startRange) {
2368foreach my $i ($startRange..$endRange) {
2369$outline = fetchLine($i, 0);
2370$outline .= dirSuffix($i);
2371push @text, $outline;
2372$mapcx .= sprintf $lnformat, $#text;
2373$filesize += length($outline) + 1;
2374}  # end loop copying lines
2375$fmode[$cx] = $fmode & ($binmode|$browsemode);
2376$fmode[$cx] |= $nlmode, --$filesize
2377if $fmode&$nlmode and $endRange == $dol;
2378}  # nonempty file
2379$map[$cx] = $mapcx;
2380$dot[$cx] = $dol[$cx] = $dolcx;
2381$factive[$cx] = 1;
2382$fname[$cx] = "";
2383$btags[$cx] = $btags;
2384return 1;
2385}  # writeContext
2386
2387#  Move or copy a block of text.
2388sub moveCopy()
2389{
2390$dest++;  # more convenient
2391$endr1 = $endRange+1;  # more convenient
2392$dest <= $startRange or
2393$dest >= $endr1 or
2394$errorMsg = "destination lies inside the block to be moved or copied", return 0;
2395if($cmd eq 'm' and
2396($dest == $endr1 or $dest == $startRange)) {
2397$errorMsg = "no change" if ! $inglob;
2398return 0;
2399}
2400my $starti = $startRange*$lnwidth;
2401my $endi = $endr1*$lnwidth;
2402my $desti = $dest * $lnwidth;
2403my $offset = $endr1 - $startRange;
2404my ($i, $j);
2405#  The section of the map that represents the range.
2406my $piece_r = substr $map, $starti, $endi-$starti;
2407my $piece_n = "";  # the new line numbers, if the text is copied.
2408if($cmd eq 't') {
2409return 0 if lineLimit $offset;
2410for($j=0; $j<length($piece_r); $j+=$lnwidth) {
2411push @text,
2412$text[substr($piece_r, $j, $lnwidth1)];
2413$piece_n .= sprintf $lnformat, $#text;
2414}
2415substr($map, $desti, 0) = $piece_n;
2416} elsif($dest < $startRange)  {
2417substr($map, $starti, $endi-$starti) = "";
2418substr($map, $desti, 0) = $piece_r;
2419} else {
2420substr($map, $desti, 0) = $piece_r;
2421substr($map, $starti, $endi-$starti) = "";
2422}
2423if($fmode&$nlmode) {
2424$fmode &= ~$nlmode if $dest > $dol;
2425$fmode &= ~$nlmode if $endRange == $dol and $cmd eq 'm';
2426}
2427#  Now for the labels
2428my ($lowcut, $highcut, $p2len);
2429if($dest <= $startRange) {
2430$lowcut = $dest;
2431$highcut = $endr1;
2432$p2len = $startRange - $dest;
2433} else {
2434$lowcut = $startRange;
2435$highcut = $dest;
2436$p2len = $dest - $endr1;
2437}
2438foreach $i (0..25) {
2439my $ln = substr($labels, $i*$lnwidth, $lnwidth);  # line number
2440next if $ln eq $lnspace or $ln < $lowcut;
2441if($ln >= $highcut) {
2442$ln += $offset if $cmd eq 't';
2443} elsif($ln >= $startRange and $ln <= $endRange) {
2444$ln += ($dest < $startRange ? -$p2len : $p2len) if $cmd eq 'm';
2445$ln += $offset if $cmd eq 't' and $dest < $startRange;
2446} elsif($dest < $startRange) {
2447$ln += $offset;
2448} else {
2449$ln -= $offset if $cmd eq 'm';
2450}
2451substr($labels, $i*$lnwidth, $lnwidth) = sprintf $lnformat, $ln;
2452}  # loop over labels
2453$dol += $offset if $cmd eq 't';
2454$dot = $endRange;
2455$dot +=  ($dest < $startRange ? -$p2len : $p2len) if $cmd eq 'm';
2456$dot = $dest + $offset - 1 if $cmd eq 't';
2457$fmode |= $changemode|$firstopmode;
2458$ubackup = 1;
2459return 1;
2460}  # moveCopy
2461
2462#  Delete a block of text.
2463#  Pass the range to delete.
2464sub delText($$)
2465{
2466my ($sr, $er) = @_;  # local start and end range
2467my ($i, $j);
2468$fmode &= ~$nlmode if $er == $dol;
2469$j = $er - $sr + 1;
2470substr($map, $sr*$lnwidth, $j*$lnwidth) = "";
2471#  Move the labels.
2472foreach $i (0..25) {
2473my $ln = substr($labels, $i*$lnwidth, $lnwidth);  # line number
2474next if $ln eq $lnspace or $ln < $sr;
2475substr($labels, $i*$lnwidth, $lnwidth) =
2476($ln <= $er ? $lnspace : (sprintf $lnformat, $ln - $j));
2477}  # loop over labels
2478$dol -= $j;
2479$dot = $sr;
2480--$dot if $dot > $dol;
2481$fmode |= $changemode|$firstopmode;
2482$ubackup = 1;
2483return 1;
2484}  # delText
2485
2486#  Delete files from a directory as you delete lines.
2487#  It actually moves them to your trash bin.
2488sub delFiles()
2489{
2490$dw or $errorMsg = "directories are readonly, type dw to enable directory writes", return 0;
2491$dw == 2 or length $rbin or
2492$errorMsg = "could not create .trash under your home directory, to hold the deleted files", return 0;
2493my $ln = $startRange;
2494my $cnt = $endRange - $startRange + 1;
2495while($cnt--) {
2496my $f = fetchLine($ln, 0);
2497if($dw == 2 or dirSuffix($ln) =~ /^@/) {
2498unlink "$dirname/$f" or
2499$errorMsg = "could not remove $f, $!", return 0;
2500} else {
2501rename "$dirname/$f", "$rbin/$f" or
2502$errorMsg = "Could not move $f to the trash bin, $!, set dx mode to actually remove the file", return 0;
2503}
2504delText($ln, $ln);
2505substr($labels, $dirSufStart + 2*$ln, 2) = "";
2506}
2507return 1;
2508}  # delFiles
2509
2510#  Join lines from startRange to endRange.
2511sub joinText()
2512{
2513$errorMsg = "cannot join one line", return 0 if $startRange == $endRange;
2514return 0 if lineLimit 1;
2515my ($i, $line);
2516$line = "";
2517foreach $i ($startRange..$endRange) {
2518$line .= ' ' if $cmd eq 'J' and $i > $startRange;
2519$line .= fetchLine($i, 0);
2520}
2521push @text, $line;
2522substr($map, $startRange*$lnwidth, $lnwidth) = sprintf $lnformat, $#text;
2523delText($startRange+1, $endRange);
2524$dot = $startRange;
2525return 1;
2526}  # joinText
2527
2528#  Substitute text on the lines in $startRange through $endRange.
2529#  We could be changing the text in an input field.
2530#  If so, we'll call infReplace().
2531#  Also, we might be indirectory mode, whence we must rename the file.
2532sub substituteText($)
2533{
2534my $line = shift;
2535my $whichlink = "";
2536$whichlink = $1 if $line =~ s/^(\d+)//;
2537length $line or
2538$errorMsg = "no regular expression after $icmd", return -1;
2539if($fmode&$dirmode) {
2540$dw or $errorMsg = "directories are readonly, type dw to enable directory writes", return -1;
2541}
2542my ($i, $j, $exp, $rhs, $qrhs, $lastSubst, @pieces, $blmode);
2543
2544if($line ne "bl") {
2545$blmode = 0;
2546@pieces = regexpCheck($line, 1);
2547return -1 if $#pieces < 0;
2548$exp = $pieces[0];
2549$line = $pieces[1];
2550length $line or $errorMsg = "missing delimiter", return -1;
2551@pieces = regexpCheck($line, 0);
2552return -1 if $#pieces < 0;
2553$rhs = $pieces[0];
2554$line = $pieces[1];
2555} else { $blmode = 1, $lspace = 3; }
2556
2557my $gflag = "";
2558my $nflag = 0;
2559my $iflag = "";
2560$iflag = "i" if $caseInsensitive;
2561$subprint = 1;  # default is to print the last line substituted
2562$lastSubst = 0;
2563
2564if(! $blmode) {
2565if(length $line) {
2566$subprint = 0;
2567#  necessarily starts with the delimiter
2568substr($line, 0, 1) = "";
2569while(length $line) {
2570$gflag = 'g', next if $line =~ s/^g//;
2571$subprint = 2, next if $line =~ s/^p//;
2572$iflag = 'i', next if $line =~ s/^i//;
2573if($line =~ s/^(\d+)//) {
2574! $nflag or $errorMsg = "multiple numbers after the third delimiter", return -1;
2575$nflag = $1;
2576$nflag > 0 and $nflag <= 999 or
2577$errorMsg = "numeric suffix out of range, please use [1-999]", return -1;
2578next;
2579}  # number
2580$errorMsg = "unexpected substitution suffix after the third delimiter";
2581return -1;
2582}  # loop gathering suffix flags
2583! $gflag or ! $nflag or
2584$errorMsg = "cannot use both a numeric suffix and the `g' suffix simultaneously", return -1;
2585#  s/x/y/1 is very inefficient.
2586$nflag = 0 if $nflag == 1;
2587}  # closing delimiter
2588
2589$qrhs = $rhs;  # quote-fixed right hand side
2590if($rhs =~ /^[ul]c$/) {
2591$qrhs = "$qrhs \$&";
2592$iflag .= 'e' if !$nflag;
2593} elsif($rhs eq "ue") {
2594$qrhs = "unescape \$&";
2595$iflag .= 'e' if !$nflag;
2596} elsif($rhs eq "mc") {
2597$qrhs = "mixCase \$&";
2598$iflag .= 'e' if !$nflag;
2599} else {
2600if($nflag) {
2601$qrhs =~ s/"/\\"/g;
2602$qrhs = '"'.$qrhs.'"';
2603}
2604}
2605
2606#  I don't understand it, but $&[x] means something to perl.
2607#  So when I replace j with &[x], becomeing $&[x], it blows up.
2608#  Thus I escape open brackets and braces in the rhs.
2609#  Hopefully you won't escape them on the command line - you have no reason to.
2610#  If you do they'll be doubly escaped, and that's bad.
2611$qrhs =~ s/([\[{])/\\$1/g;  #  }
2612} else {
2613$subprint = 0;
2614}  # blmode or not
2615
2616#  Substitute the input fields first.
2617if($cmd eq 'I') {
2618my $yesdot = 0;
2619my $foundFields = 0;
2620foreach $i ($startRange..$endRange) {
2621my $rc = infIndex($i, $whichlink);
2622next unless $rc;
2623$foundFields = 1;
2624$rc > 0 or $dot = $i, $inglob = 0, return -1;
2625my $newinf = $inf;
2626if(!$nflag) {
2627eval '$rc = $newinf =~ ' .
2628"s/$exp/$qrhs/$iflag$gflag; ";
2629} else {
2630$j = 0;
2631eval '$newinf =~ ' .
2632"s/$exp/++\$j == $nflag ? $qrhs : \$&/ge$iflag; ";
2633$rc = ($j >= $nflag);
2634}
2635next unless $rc;
2636$dot = $i;
2637infReplace($newinf) or return -1;
2638$yesdot = $dot;
2639}  # loop over lines
2640if(! $yesdot) {
2641if(!$inglob) {
2642$errorMsg = "no match" if $foundFields;
2643}
2644return 0;
2645}
2646dispLine($yesdot) if $subprint == 2 or ! $inglob and $subprint == 1;
2647return 1;
2648}  # input fields
2649
2650#  Not an input field, just text, so replace it.
2651#  Once again, use the eval construct.
2652#  This time we might be substituting across an entire range.
2653@pieces = ();
2654$errorMsg = "";
2655eval '
2656for($i=$startRange; $i<=$endRange; ++$i) {
2657my $temp = fetchLine($i, 0);' .
2658($blmode ? 'my $subst = breakLine(\$temp);' :
2659(!$nflag ?
2660'my $subst = $temp =~ ' .
2661"s/$exp/$qrhs/o$iflag$gflag; "
2662:
2663'my $subst = 0;
2664my $k = 0;
2665$temp =~ ' .
2666"s/$exp/++\$k == $nflag ? $qrhs : \$&/oge$iflag; " .
2667'$subst = ($k >= $nflag); '
2668)) .
2669'next unless $subst;
2670if($fmode&$dirmode) {
2671if($temp =~ m,[/\n],) {
2672$errorMsg = "cannot embed slash or newline in a directory name";
2673$inglob = 0;
2674last;
2675}
2676my $dest = "$dirname/$temp";
2677my $src = fetchLine($i, 0);
2678$src = "$dirname/$src";
2679if($src ne $dest) {
2680if(-e $dest or -l $dest) {
2681$errorMsg = "destination file already exists";
2682$inglob = 0;
2683last;
2684}
2685rename $src, $dest or
2686$errorMsg = "cannot move file to $temp", $inglob = 0, last;
2687}  # source and dest are different
2688}  # directory
2689@pieces = split "\n", $temp, -1;
2690@pieces = ("") if $temp eq "";
2691last if lineLimit $#pieces+1;
2692$j = $#text;
2693push @text, @pieces;
2694@pieces = ();
2695substr($map, $i*$lnwidth, $lnwidth) = sprintf $lnformat, ++$j;
2696if($j < $#text) {
2697my $newpiece = "";
2698$newpiece .= sprintf $lnformat, $j while ++$j <= $#text;
2699addToMap($newpiece, $i);
2700$j = length($newpiece) / $lnwidth;
2701$endRange += $j;
2702$i += $j;
2703}
2704dispLine($i) if $subprint == 2;
2705$lastSubst = $i;
2706$fmode |= $changemode|$firstopmode;
2707$ubackup = 1;
2708last if $intFlag;
2709}
2710';  # eval string
2711return 0 if length $errorMsg;
2712if(! $lastSubst) {
2713$errorMsg = ($blmode ? "no change" : "no match") if ! $inglob;
2714return 0;
2715}
2716$dot = $lastSubst;
2717dispLine($dot) if $subprint == 1 and ! $inglob;
2718if($intFlag and ! $inglob) {
2719$errorMsg = $intMsg, return 0;
2720}
2721return 1;
2722}  # substituteText
2723
2724#  Follow a hyperlink to another web page.
2725sub hyperlink($)
2726{
2727my $whichlink = shift;
2728$cmd = 'b';
2729$errorMsg = "cannot use the g$whichlink command in directory mode", return 0 if $fmode&$dirmode;
2730$startRange == $endRange or
2731$errorMsg = "go command does not expect a range", return 0;
2732
2733my $h;  # hyperlink tag
2734my @links = ();  # links on this line
2735my @bref = ();  # baseref values
2736my ($j, $line, $href);
2737
2738if($fmode&$browsemode) {
2739$line = fetchLine $endRange, 0;
2740while($line =~ /\x80([\x85-\x8f]+){/g) {
2741$j = revealNumber $1;
2742$h = $$btags[$j];
2743$href = $$h{href};
2744$errorMsg = "hyperlink found without a url?? internal error", return 0 unless defined $href;
2745push @links, $href;
2746push @bref, $$h{bref};
2747}  # loop
2748}  # browse mode
2749
2750if($#links < 0) {
2751$line = fetchLine $endRange, 1;
2752stripWhite \$line;
2753$line =~ s/[\s"']+/ /g;
2754if(length $line) {
2755while($line =~ /([^ ]+)/g) {
2756$href = $1;
2757$href =~ s/^[^\w]+//;
2758$href =~ s/[^\w]+$//;
2759if(is_url $href) {
2760push @links, $href;
2761} else {
2762$href =~ s/^mailto://i;
2763push @links, "mailto:$href" if $href =~ /^[\w.,-]+@[\w,-]+\.[\w,.-]+$/;
2764}
2765}
2766}  # loop over words
2767}  # looking for url in text mode
2768
2769$j = $#links + 1;
2770$j or $errorMsg = "no links present", return 0;
2771length $whichlink or $j == 1 or
2772$errorMsg = "multiple links, please use g [1,$j]", return 0;
2773$whichlink = 1 if ! length $whichlink;
2774if($whichlink == 0 or $whichlink > $j) {
2775$errorMsg = $j > 1 ?
2776"invalid link, please use g [1,$j]" :
2777"this line only has one link";
2778return 0;
2779}
2780--$whichlink;
2781$href = $links[$whichlink];
2782if($href =~ s/^mailto://i) {
2783$cmd = 'e';
2784return 1, "\x80mail\x80$href";
2785}  # mailto
2786$href =~ /^javascript:/i and
2787$errorMsg = "sorry, this link calls a javascript function", return 0;
2788return 1, $href if $href =~ /^#/;
2789$line = resolveUrl(($#bref >= 0 ? $bref[$whichlink] : ""), $href);
2790print "* $line\n";
2791return 1, $line;
2792}  # hyperlink
2793
2794#  Follow an internal link to a section of the document.
2795sub findSection($)
2796{
2797my $section = shift;
2798foreach my $i (1..$dol) {
2799my $t = fetchLine $i, 0;
2800while($t =~ /\x80([\x85-\x8f]+)\*/g) {
2801my $j = revealNumber $1;
2802my $h = $$btags[$j];
2803return $i if $$h{name} eq $section;
2804}
2805}
2806return 0;
2807}  # findSection
2808
2809#  Return the number of unbalanced punctuation marks at the start and end of the line.
2810sub unbalanced($$$)
2811{
2812my ($c, $d, $ln) = @_;
2813my $curline = fetchLine($ln, 1);
2814#  Escape these characters, so we know they are literal.
2815$c = "\\$c";
2816$d = "\\$d";
2817while($curline =~ s/$c[^$c$d]*$d//) { ; }
2818my $forward = $curline =~ s/$c//g;
2819$forward = 0 if $forward eq "";
2820my $backward = $curline =~ s/$d//g;
2821$backward = 0 if $backward eq "";
2822return $backward, $forward;
2823}  # unbalanced
2824
2825#  Find the line that balances the unbalanced punctuation.
2826sub balanceLine($)
2827{
2828my $line = shift;
2829my ($c, $d);  # balancing characters
2830my $openlist = "{([<`";
2831my $closelist = "})]>'";
2832my $alllist = "{}()[]<>`'";
2833my $level = 0;
2834my ($i, $direction, $forward, $backward);
2835
2836if(length $line) {
2837$line =~ /^[\{\}\(\)\[\]<>`']$/ or
2838$errorMsg = "you must specify exactly one of $alllist after the B command", return 0;
2839$c = $line;
2840if(index($openlist, $c) >= 0) {
2841$d = substr $closelist, index($openlist, $c), 1;
2842$direction = 1;
2843} else {
2844$d = $c;
2845$c = substr $openlist, index($closelist, $d), 1;
2846$direction = -1;
2847}
2848($backward, $forward) = unbalanced($c, $d, $endRange);
2849if($direction > 0) {
2850($level = $forward) or
2851$errorMsg = "line does not contain an open $c", return 0;
2852} else {
2853($level = $backward) or
2854$errorMsg = "line does not contain an open $d", return 0;
2855}
2856} else {  # character specified by the user or not?
2857#  Look for anything unbalanced, probably a brace.
2858foreach $i (0..2) {
2859$c = substr $openlist, $i, 1;
2860$d = substr $closelist, $i, 1;
2861($backward, $forward) = unbalanced($c, $d, $endRange);
2862! $backward or ! $forward or
2863$errorMsg = "both $c and $d are unbalanced on this line, try B$c or B$d", return 0;
2864($level = $backward + $forward) or next;
2865$direction = 1;
2866$direction = -1 if $backward;
2867last;
2868}
2869$level or
2870$errorMsg = "line does not contain an unbalanced brace, parenthesis, or bracket", return 0;
2871}  # explicit character passed in, or look for one
2872
2873my $selected = ($direction > 0) ? $c : $d;
2874
2875#  Now search for the balancing line.
2876$i = $endRange;
2877while(($i += $direction) > 0 and $i <= $dol) {
2878($backward, $forward) = unbalanced($c, $d, $i);
2879if($direction > 0 and $backward >= $level or
2880$direction < 0 and $forward >= $level) {
2881$dot = $i;
2882dispLine($dot);
2883return 1;
2884}
2885$level += ($forward-$backward) * $direction;
2886}  # loop over lines
2887
2888$errorMsg = "cannot find the line that balances $selected";
2889return 0;
2890}  # balanceLine
2891
2892#  Apply a regular expression to each line, and then execute
2893#  a command for each matching, or nonmatching, line.
2894#  This is the global feature, g/re/p, which gives us the word grep.
2895sub doGlobal($)
2896{
2897my $line = shift;
2898my ($i, $j, $exp, @pieces);
2899
2900length $line or
2901$errorMsg = "no regular expression after $icmd", return 0;
2902@pieces = regexpCheck($line, 1);
2903return 0 if $#pieces < 0;
2904$exp = $pieces[0];
2905$line = $pieces[1];
2906length $line or
2907$errorMsg = "missing delimiter", return 0;
2908$line =~ s/^.(i?)\s*//;
2909my $iflag = $1;
2910$iflag = "i" if $caseInsensitive;
2911
2912#  Clean up any previous stars.
2913substr($map, $_*$lnwidth+$lnwidth1, 1) = ' ' foreach (1.. $dol);
2914
2915#  Find the lines that match the pattern.
2916my $gcnt = 0;  # global count
2917eval '
2918for($i=$startRange, $j=$i*$lnwidth+$lnwidth1; $i<=$endRange; ++$i, $j+=$lnwidth) {
2919substr($map, $j, 1) = "*", ++$gcnt if
2920fetchLine($i, 1)' .
2921($cmd eq 'g' ? ' =~ ' : ' !~ ') .
2922"/$exp/o$iflag; }";
2923$gcnt or $errorMsg =
2924($cmd eq 'g' ? "no lines match the g pattern" : "all lines match the v pattern"),
2925return 0;
2926
2927#  Now apply $line to every line with a *
2928$inglob = 1;
2929$errorMsg = "";
2930$line = 'p' if ! length $line;
2931my $origdot = $dot;
2932my $yesdot = 0;
2933my $nodot = 0;
2934my $stars = 1;
2935global:while($gcnt and $stars) {
2936$stars = 0;
2937for($i=1; $i<=$dol; ++$i) {
2938last global if $intFlag;
2939next unless substr($map, $i*$lnwidth+$lnwidth1, 1) eq '*';
2940$stars = 1,--$gcnt;
2941substr($map, $i*$lnwidth+$lnwidth1, 1) = ' ';
2942$dot = $i;  # ready to run the command
2943if(evaluate($line)) {
2944$yesdot = $dot;
2945--$i if $ubackup;  # try this line again, in case we deleted or moved it
2946} else {
2947#  Subcommand might turn global flag off.
2948$nodot = $dot, $yesdot = 0, last global if ! $inglob;
2949}
2950}
2951}
2952$inglob = 0;
2953#  yesdot could be 0, even upon success, if all lines are deleted via g/re/d
2954if($yesdot or ! $dol) {
2955$dot = $yesdot;
2956dispLine($dot) if ($cmd eq 's' or $cmd eq 'I') and $subprint == 1;
2957} elsif($nodot) {
2958$dot = $nodot;
2959} else {
2960$dot = $origdot;
2961$errorMsg = "none of the marked lines were successfully modified" if $errorMsg eq "";
2962}
2963$errorMsg = $intMsg if $errorMsg eq "" and $intFlag;
2964return ! length $errorMsg;
2965}  # doGlobal
2966
2967#  Reveal the links to other web pages, or the email links.
2968sub showLinks()
2969{
2970my ($i, $j, $h, $href, $line);
2971my $addrtext = "";
2972if($fmode&$browsemode) {
2973$line = fetchLine $endRange, 0;
2974while($line =~ /\x80([\x85-\x8f]+){(.*?)}/g) {
2975$j = revealNumber $1;
2976$i = $2;
2977$h = $$btags[$j];
2978$href = $$h{href};
2979$href = "" unless defined $href;
2980if($href =~ s/^mailto://i) {
2981$addrtext .= "$i:$href\n";
2982} else {
2983$href = resolveUrl($$h{bref}, $href);
2984$addrtext .= "<A HREF=$href>\n$i\n</A>\n";
2985}
2986}  # loop
2987}  # browse mode
2988if(! length $addrtext) {
2989length $fname or $errorMsg = "no file name", return 0;
2990if(is_url($fname)) {
2991$href = $fname;
2992$href =~ s/\.browse$//;
2993$j = $href;
2994$j =~ s,^https?://,,i;
2995$j =~ s,.*/,,;
2996$addrtext = "<A HREF=$href>\n$j\n</A>\n";
2997} else {
2998$addrtext = $fname."\n";
2999}
3000}
3001$addrtext =~ s/\n$//;
3002$j = $#text;
3003push @text, split "\n", $addrtext, -1;
3004$#text = $j, return 0 if lineLimit 0;
3005$h = cxPack();
3006cxReset($context, 0) or return 0;
3007$$h{backup} = $backup if defined $backup;
3008$backup = $h;
3009print((length($addrtext)+1)."\n");
3010$dot = $dol = $#text - $j;
3011my $newpiece = $lnspace;
3012$newpiece .= sprintf($lnformat, $j) while ++$j <= $#text;
3013$map = $newpiece;
3014return 1;
3015}  # showLinks
3016
3017#  All other editors let you stack and undo hundreds of operations.
3018#  If I'm writing a new editor, why don't I do that?
3019#  I don't know; guess I don't have the time.
3020#  And in my 20 years experience, I have rarely felt the need
3021#  to undo several operations.
3022#  I'm usually undoing the last typo, and that's it.
3023#  So I allow you to undo the last operation, only.
3024#  Get ready for a possible undo command.
3025sub readyUndo()
3026{
3027return if $fmode & $dirmode;
3028$savedot = $dot, $savedol = $dol;
3029$savemap = $map, $savelabels = $labels;
3030}  # readyUndo
3031
3032sub goUndo()
3033{
3034#  swap, so we can undo our undo.  I do this alot.
3035my $temp;
3036$temp = $ dot, $dot = $lastdot, $lastdot = $temp;
3037$temp = $ dol, $dol = $lastdol, $lastdol = $temp;
3038$temp = $ map, $map = $lastmap, $lastmap = $temp;
3039$temp = $ labels, $labels = $lastlabels, $lastlabels = $temp;
3040}  # goUndo
3041
3042#  Replace labels with their lines in shell escapes.
3043sub expandLabeledLine($)
3044{
3045my$x = shift;
3046my $n = ord($x) - ord('a');
3047my $ln = substr $labels, $n*$lnwidth, $lnwidth;
3048$ln ne $lnspace or
3049$errorMsg = "label $x not set", return "";
3050return fetchLine($ln, 1);
3051}  # expandLabeledLine
3052
3053#  Run a shell escape
3054sub shellEscape($)
3055{
3056my $line = shift;
3057#  Expand 'a through 'z labels
3058$errorMsg = "";
3059$line =~ s/\B'([a-z])\b/expandLabeledLine($1)/ge;
3060return 0 if length $errorMsg;
3061$line =~ s/'_/$fname/g;
3062$line =~ s/'\./fetchLine($dot,1)/ge;
3063if($doslike) {
3064#  Just run system and hope for the best.
3065system $line;
3066} else {
3067#  Unix has a concept of shells.
3068my $shell = $ENV{SHELL};
3069$shell = "/bin/sh" if ! defined $shell;
3070if(length $line) {
3071system $shell, "-c", $line;
3072} else {
3073system $shell;
3074}
3075}  # dos or unix
3076print "ok\n";
3077return 1;
3078}  # shellEscape
3079
3080#  Implement various two letter commands.
3081#  Most of these set and clear modes.
3082sub twoLetter($)
3083{
3084my $line = shift;
3085my ($i, $j);
3086
3087if($line eq "qt") { exit 0; }
3088
3089if($line =~ s/^cd\s+// or $line =~ s/^cd$//) {
3090$cmd = 'e';  # so error messages are printed
3091if(length $line) {
3092my $temppath = `pwd`;
3093chomp $temppath;
3094if($line eq "-") {
3095$errorMsg = "you have no previous directory", return 0 unless defined $oldpath;
3096chdir $oldpath or $errorMsg = "cannot change to previous directory $oldpath", return 0;
3097} else {
3098$line = envFile($line);
3099return 0 if length $errorMsg;
3100chdir $line or $errorMsg = "invalid directory", return 0;
3101}
3102$oldpath = $temppath;
3103}
3104print `pwd`;
3105return 1;
3106}
3107
3108if($line eq "rf") {
3109$cmd = 'e';
3110if($fmode & $browsemode) {
3111$cmd = 'b';
3112$fname =~ s/.browse$//;
3113}
3114length $fname or $errorMsg = "no file name", return 0;
3115$nostack = 1;
3116return -1, "$cmd $fname";
3117}
3118
3119if($line eq "et") {
3120$cmd = 'e';
3121$fmode&$browsemode or
3122$errorMsg = $nobrowse, return 0;
3123foreach $i (1..$dol) {
3124$text[substr($map, $i*$lnwidth, $lnwidth1)] = fetchLine($i,1);
3125}
3126$fmode &= ~($browsemode|$firstopmode|$changemode);
3127$btags = [];  # don't need those any more.
3128print "editing as pure text\n" if $helpall;
3129return 1;
3130}
3131
3132if($line eq "ub") {
3133$fmode&$browsemode or
3134$errorMsg = $nobrowse, return 0;
3135dropEmptyBuffers();
3136#  Backing out.
3137$map = $$btags[0]{map1};
3138$fname = $$btags[0]{fname};
3139$fmode = $$btags[0]{fmode};
3140$labels = $$btags[0]{labels};
3141$dot = $$btags[0]{dot};
3142$dol = $$btags[0]{dol1};
3143apparentSize();
3144return 1;
3145}  # reverse browse
3146
3147if($line eq "f/" or $line eq "w/") {
3148$i = $fname;
3149$i =~ s,.*/,, or
3150$errorMsg = "filename does not contain a slash", return 0;
3151print "$i\n" if $helpall;
3152substr($line, 1, 1) = " $i";
3153return -1, $line;
3154}
3155
3156if($line =~ /^f[dkt]$/) {
3157$fmode&$browsemode or
3158$errorMsg = $nobrowse, return 0;
3159my $key = "title";
3160$key = "keywords" if $line eq "fk";
3161$key = "description" if $line eq "fd";
3162my $val = $$btags[0]{$key};
3163if(defined $val) {
3164print "$val\n";
3165} else {
3166print "no $key\n";
3167}
3168return 1;
3169}
3170
3171if($line =~ /^sm(\d*)$/) {
3172 $cmd = 'e';
3173$smMail = $1;
3174$altattach = 0;
3175$j = sendMailCurrent();
3176$j and print "ok\n";
3177return $j;
3178}
3179
3180#  simple commands
3181if($line eq "sg") { $global_lhs_rhs = 1; print "substitutions global\n" if $helpall; return 1; }
3182if($line eq "sl") { $global_lhs_rhs = 0; print "substitutions local\n" if $helpall; return 1; }
3183if($line eq "ci") { $caseInsensitive = 1; print "case insensitive\n" if $helpall; return 1; }
3184if($line eq "cs") { $caseInsensitive = 0; print "case sensitive\n" if $helpall; return 1; }
3185if($line eq "dr") { $dw = 0; print "directories readonly\n" if $helpall; return 1; }
3186if($line eq "dw") { $dw = 1; print "directories writable\n" if $helpall; return 1; }
3187if($line eq "dx") { $dw = 2; print "directories writable with delete\n" if $helpall; return 1; }
3188if($line eq "dp") { $delprint ^= 1; print ($delprint ? "delete print\n" : "delete quiet\n"); return 1; }
3189if($line eq "rh") { $reroute ^= 1; print ($reroute ? "redirect html\n" : "do not redirect html\n"); return 1; }
3190if($line eq "pm") { $passive ^= 1; print ($passive ? "passive ftp\n" : "active ftp\n"); return 1; }
3191if($line eq "ph") { $pdf_convert ^= 1; print ($pdf_convert ? "pdf to html conversion\n" : "pdf raw\n"); return 1; }
3192if($line eq "vs") { $ssl_verify ^= 1; print ($ssl_verify ? "verify ssl connections\n" : "do not verify ssl connections (less secure)\n"); return 1; }
3193if($line eq "ac") { $allowCookies ^= 1; print ($allowCookies ? "accept cookies\n" : "reject cookies\n"); return 1; }
3194if($line eq "sr") { $allowReferer ^= 1; print ($allowReferer ? "send refering web page\n" : "don't send refering web page\n"); return 1; }
3195if($line =~ s/^db *//) {
3196if($line =~ /^\d$/) {
3197$debug = $line, return 1;
3198} else {
3199$errorMsg = "please set debug level, 0 through 7", return 0;
3200}
3201}
3202if($line =~ s/^ua *//) {
3203if($line =~ /^\d+$/) {
3204$errorMsg = "Agent number $line is not defined", return 0 if ! defined$agents[$line];
3205$agent = $agents[$line], return 1;
3206} else {
3207$errorMsg = "please set user agent, 0 through ".$#agents, return 0;
3208}
3209}  # ua number
3210if($line eq "ff") { $fetchFrames ^= 1; print ($fetchFrames ? "fetch frames\n" : "do not fetch frames\n"); return 1; }
3211if($line eq "tn") { $textAreaCR ^= 1; print ($textAreaCR ? "dos newlines on text areas\n" : "unix newlines on text areas\n"); return 1; }
3212if($line eq "eo") { $endmarks = 0; print "end markers off\n" if $helpall; return 1; }
3213if($line eq "el") { $endmarks = 1; print "end markers list\n" if $helpall; return 1; }
3214if($line eq "ep") { $endmarks = 2; print "end markers on\n" if $helpall; return 1; }
3215return -1,"^".length($1) if $line =~ /^(\^+)$/;
3216return stripChild() if $line eq "ws";
3217return unstripChild() if $line eq "us";
3218
3219return -1, $line;  # no change
3220}  # twoLetter
3221
3222#  Evaluate the entered command.
3223#  This is indirectly recursive, as in g/z/ s/x/y/
3224#  Pass the command line, and return success or failure.
3225sub evaluate($)
3226{
3227my $line = shift;
3228my ($i, $j, @pieces, $h, $href);
3229my $postspace = 0;
3230my $postBrowse;
3231my $nsuf = -1;  # numeric suffix
3232my $cx;  # context specified -- always $nsuf - 1
3233my $section = "";  # section within a document
3234my $post = "";  # for post cgi method
3235$nostack = 0;  # suppress stacking of edit sessions
3236
3237$referer = "";
3238$referer = $fname if $allowReferer;
3239$referer =~ s/\.browse$//;
3240
3241$cmd = "";
3242#  We'll allow whitespace at the start of an entered command.
3243$line =~ s/^\s*//;
3244#  Watch for successive q commands.
3245$lastq = $lastqq, $lastqq = -1;
3246
3247if(!$inglob) {
3248#  We'll allow comments in an edbrowse script
3249return 1 if $line =~ /^#/;
3250
3251return shellEscape $line if $line =~ s/^!\s*//;
3252
3253#  Web express shortcuts
3254if($line =~ s/^@ *//) {
3255if(! length $line) {
3256my @shortList = ();
3257foreach $i (sort keys %shortcut) {
3258$j = $i;
3259my ($desc, $sort);
3260defined ($desc = $shortcut{$i}{desc}) and
3261$j .= " = $desc";
3262$j = "|$j";
3263defined ($sort = $shortcut{$i}{sort}) and
3264$j = "$sort$j";
3265$j .= "\n";
3266push @shortList, $j;
3267}  # loop over shortcuts
3268foreach (sort @shortList) {
3269s/^.*?\|//;
3270print $_;
3271}
3272return 1;
3273}
3274$cmd = '@';
3275($j, $line, $postBrowse) = webExpress($line);
3276return 0 unless $j;
3277$line =~ s%^%b http://%;
3278if($line =~ /\*/) {
3279$post = $line;
3280$post =~ s/.*\*/*/;
3281$line =~ s/\*.*//;
3282}
3283}
3284
3285#  Predefined command sets.
3286if($line =~ s/^< *//) {
3287if(!length $line) {
3288foreach $i (sort keys %commandList) {
3289print "$i\n";
3290}
3291return 1;
3292}
3293$i = $commandList{$line};
3294defined $i or $errorMsg = "command set $line is not recognized", return 0;
3295return evaluateSequence($i, $commandCheck{$line});
3296}  # command set
3297
3298#  Two letter commands.
3299($j, $line) = twoLetter($line);
3300return $j if $j >= 0;
3301}  # not in global
3302
3303$startRange = $endRange = $dot;  # default, if no range given
3304$line = '+' if ! length $line;
3305$line = ($dol ? 1 : 0) . $line if substr($line, 0, 1) eq ',';
3306if($line =~ /^j/i) {
3307$endRange = $dot + 1;
3308$errorMsg = "line number too large", return "" if $endRange > $dol;
3309} elsif(substr($line, 0, 1) eq '=') {
3310$startRange = $endRange = $dol;
3311} elsif($line =~ /^[wgv]/ and $line !~ /^g\s*\d*$/) {
3312$startRange = 1, $endRange = $dol;
3313$startRange = 0 if ! $dol;
3314} elsif($line =~ s/^;//) {
3315$endRange = $dol;
3316} else {
3317@pieces = getRangePart($line);
3318$inglob = 0, return 0 if $#pieces < 0;
3319$startRange = $endRange = $pieces[0];
3320$line = $pieces[1];
3321if($line =~ s/^,//) {
3322$endRange = $dol;  # new default
3323if($line =~ /^[-'.\$+\d\/?]/) {
3324@pieces = getRangePart($line);
3325$inglob = 0, return 0 if $#pieces < 0;
3326$endRange = $pieces[0];
3327$line = $pieces[1];
3328}  # second address
3329}  # comma present
3330}  # end standard range processing
3331
3332#  lc lower case, uc upper case
3333$line =~ s:^([lmu]c|ue)$:s/.*/$1/:;
3334if($line eq "bl") {  # break the line
3335dirBrowseCheck("break line") or return 0;
3336$line = "sbl";
3337}
3338
3339$cmd = substr($line, 0, 1);
3340if(length $cmd) { $line = substr($line, 1); } else { $cmd = 'p'; }
3341$icmd = $cmd;
3342$startRange <= $endRange or
3343$errorMsg = "bad range", return 0;
3344index($valid_cmd, $cmd) >= 0 or
3345$errorMsg = "unknown command $cmd", $inglob = 0, return 0;
3346
3347#  Change some of the command codes, depending on context
3348$cmd = 'I' if $cmd eq 'i' and $line =~ /^[$valid_delim\d<*]/o;
3349$cmd = 'I' if $cmd eq 's' and $fmode&$browsemode;
3350$cmd = 's' if $cmd eq 'S';
3351my $writeMode = ">";
3352if($cmd eq "w" and substr($line, 0, 1) eq "+") {
3353$writeMode = ">>";
3354$line =~ s/^.//;
3355}
3356
3357!($fmode&$dirmode) or index($dir_cmd, $cmd) >= 0 or
3358$errorMsg = "$icmd $nixdir", $inglob = 0, return 0;
3359!($fmode&$browsemode) or index($browse_cmd, $cmd) >= 0 or
3360$errorMsg = "$icmd $nixbrowse", $inglob = 0, return 0;
3361$startRange > 0 or index($zero_cmd, $cmd) >= 0 or
3362$errorMsg = "zero line number", return 0;
3363$postspace = 1 if  $line =~ s/^\s+//;
3364if(index($spaceplus_cmd, $cmd) >= 0 and
3365! $postspace and length $line and
3366$line !~ /^\d+$/) {
3367$errorMsg = "no space after command";
3368return 0;
3369}
3370
3371#  env variable and wild card expansion
3372if(index("brewf", $cmd) >= 0 and length $line) {
3373$line = envFile($line);
3374return 0 if length $errorMsg;
3375}
3376
3377if($cmd eq 'B') {
3378return balanceLine($line);
3379}
3380
3381if($cmd eq 'z') {
3382$startRange = $endRange + 1;
3383$endRange = $startRange;
3384$startRange <= $dol or
3385$errorMsg = "line number too large", return 0;
3386$cmd = 'p';
3387$line = $last_z if ! length $line;
3388if($line =~ /^(\d+)\s*$/) {
3389$last_z = $1;
3390$last_z = 1 if $last_z == 0;
3391$endRange += $last_z - 1;
3392$endRange = $dol if $endRange > $dol;
3393} else {
3394$errorMsg = "z command should be followed by a number", return 0;
3395}
3396$line = "";
3397}
3398
3399#  move/copy destination, the third address
3400if($cmd eq 'm' or $cmd eq 't') {
3401length $line or
3402$errorMsg = "no move/copy destination", $inglob = 0, return 0;
3403$line =~ /^[-'.\$+\d\/?]/ or
3404$errorMsg = "invalid move/copy destination", $inglob = 0, return 0;
3405@pieces = getRangePart($line);
3406$inglob = 0, return 0 if $#pieces < 0;
3407$dest = $pieces[0];
3408$line = $pieces[1];
3409$line =~ s/^\s*//;
3410}  # move copy destination
3411if($cmd eq 'a') {
3412($line eq "+") ? ($line = "") : ($linePending = undef);
3413} else {
3414$linePending = undef;
3415}
3416! length $line or index($nofollow_cmd, $cmd) < 0 or
3417$errorMsg = "unexpected text after the $icmd command", $inglob = 0, return 0;
3418
3419#  We don't need trailing whitespace, except for substitute or global substitute.
3420index("sgvI", $cmd) >= 0 or
3421$line =~ s/\s*$//;
3422
3423! $inglob or
3424index($global_cmd, $cmd) >= 0 or
3425$errorMsg = "the $icmd command cannot be applied globally", $inglob = 0, return 0;
3426
3427if($cmd eq 'h') {
3428$errorMsg = "no errors" if ! length $errorMsg;
3429print $errorMsg,"\n";
3430return 1;
3431}
3432
3433if($cmd eq 'H') {
3434$helpall ^= 1;
3435print "help messages on\n" if $helpall;
3436return 1;
3437}  # H
3438
3439if(index("lpn", $cmd) >= 0) {
3440foreach $i ($startRange..$endRange) {
3441dispLine($i);
3442$dot = $i;
3443last if $intFlag;
3444}
3445return 1;
3446}
3447
3448if($cmd eq '=') {
3449print $endRange,"\n";
3450return 1;
3451}
3452
3453if($cmd eq 'u') {
3454$fmode&$firstopmode or
3455$errorMsg = "nothing to undo", return 0;
3456goUndo();
3457return 1;
3458}  # u
3459
3460if($cmd eq 'k') {
3461$line =~ /^[a-z]$/ or
3462$errorMsg = "please enter k[a-z]", return 0;
3463$startRange == $endRange or
3464$errorMsg = "cannot label an entire range", return 0;
3465substr($labels, (ord($line) - ord('a'))*$lnwidth, $lnwidth) =
3466sprintf $lnformat, $endRange;
3467return 1;
3468}
3469
3470$nsuf = $line if $line =~ /^\d+$/ and ! $postspace;
3471$cx = $nsuf - 1;
3472
3473if($cmd eq 'f') {
3474if($nsuf >= 0) {
3475(cxCompare($cx) and cxActive($cx)) or return 0;
3476$j = $fname[$cx];
3477print(length($j) ? $j : "no file");
3478print " [binary]" if $fmode[$cx]&$binmode;
3479print "\n";
3480return 1;
3481}
3482if(length $line) {
3483$errorMsg = "cannot change the name of a directory", return 0 if $fmode&$dirmode;
3484$fname = $line;
3485} else {
3486print(length($fname) ? $fname : "no file");
3487print " [binary]" if $fmode&$binmode;
3488print "\n";
3489}
3490return 1;
3491}  # f
3492
3493if($cmd eq 'q') {
3494$nsuf < 0 or (cxCompare($cx) and cxActive($cx)) or return 0;
3495if($nsuf < 0) {
3496$cx = $context;
3497$errorMsg = "unexpected text after the $icmd command", return 0 if length $line;
3498}
3499cxReset($cx, 1) or return 0;
3500return 1 if $cx != $context;
3501#  look around for another active session
3502while(1) {
3503$cx = 0 if ++$cx > $#factive;
3504exit 0 if $cx == $context;
3505next if ! defined $factive[$cx];
3506cxSwitch($cx, 1);
3507return 1;
3508}
3509}  # q
3510
3511if($cmd eq 'w') {
3512if($nsuf >= 0) {
3513$writeMode eq ">" or
3514$errorMsg = "sorry, append to buffer not yet implemented", return 0;
3515return writeContext($cx)
3516}
3517$line = $fname if ! length $line;
3518if($fmode&$dirmode and $line eq $fname) {
3519$errorMsg = "cannot write to the directory; files are modified as you go";
3520return 0;
3521}
3522return writeFile($writeMode, $line) if length $line;
3523$errorMsg = "no file specified";
3524return 0;
3525}  # w
3526
3527#  goto a file in a directory
3528if($fmode&$dirmode and $cmd eq 'g' and ! length $line) {
3529$cmd = 'e';
3530$line = $dirname . '/' . fetchLine($endRange, 0);
3531}
3532
3533if($cmd eq 'e') {
3534return (cxCompare($cx) and cxSwitch($cx, 1)) if $nsuf >= 0;
3535if(!length $line) {
3536$j = $context + 1;
3537print "session $j\n";
3538return 1;
3539}
3540}  # e
3541
3542if($cmd eq 'g' and $line =~ /^\d*$/) {
3543($j, $line) = hyperlink($line);
3544return 0 unless $j;
3545#  Go on to browse the file.
3546}  # goto link
3547
3548if($cmd eq '^') {
3549! length $line or $nsuf >= 0 or
3550$errorMsg = "unexpected text after the ^ command", return 0;
3551$nsuf = 1 if $nsuf < 0;
3552while($nsuf) {
3553$errorMsg = "no previous text", return 0 if ! defined $backup;
3554cxReset($context, 2) or return 0;
3555$h = $backup;
3556$backup = $$h{backup};
3557cxUnpack($h);
3558--$nsuf;
3559}
3560#  Should this print be inside or outside the loop?
3561if($dot) { dispLine($dot); } else { print "empty file\n"; }
3562return 1;
3563}  # ^
3564
3565if($cmd eq 'A') {
3566return showLinks();
3567}  # A
3568
3569if($icmd eq 's' or $icmd eq 'S') {
3570#  A few shorthand notations.
3571if($line =~ /^([,.;:!?)"-])(\d?)$/) {
3572my $suffix = $2;
3573$line = "$1 +";
3574#  We have to escape the question mark and period
3575$line =~ s/^([?.])/\\$1/;
3576$line = "/$line/$1\\n";
3577$line .= "/$suffix" if length $suffix;
3578}
3579}  # original command was s
3580
3581readyUndo if ! $inglob;
3582
3583if($cmd eq 'g' or $cmd eq 'v') {
3584return doGlobal($line);
3585}  # global
3586
3587if($cmd eq 'I') {
3588$fmode&$browsemode or $errorMsg = $nobrowse, $inglob = 0, return 0;
3589
3590if($line =~ /^\d*\?/) {  # status
3591$inglob and $errorMsg = $inoglobal, $inglob = 0, return 0;
3592$startRange == $endRange or $errorMsg = $inorange, return 0;
3593infIndex($endRange, $line) > 0 or return 0;
3594infStatus($line);
3595return 1;
3596}  # get info on input field
3597
3598if($line =~ /^\d*([=<])/) {
3599my $asg = $1;
3600$subprint = 1;
3601my $yesdot = 0;
3602my $t = $line;
3603$t =~ s/^\d*[=<]//;
3604if($asg eq '<') {
3605if($t =~ /^\d+$/) {
3606my $cx = $t-1;
3607cxCompare($cx) and cxActive($cx) or $inglob = 0, return 0;
3608my $dolcx = $dol[$cx];
3609$dolcx == 1 or $errorMsg = "session $t should contain exactly one line", $inglob = 0, return 0;
3610$t = fetchLineContext(1, 1, $cx);
3611} else {
3612$errorMsg = "";
3613$t = envFile $t;
3614length($errorMsg) and $inglob = 0, return 0;
3615open FH, $t or $errorMsg = "cannot open $t, $!", $inglob = 0, return 0;
3616$t = <FH>;
3617defined $t or $errorMsg = "empty file", $inglob = 0, return 0;
3618if(defined <FH>) {
3619close FH;
3620$errorMsg = "file contains more than one line";
3621$inglob = 0;
3622return 0;
3623}
3624close FH;
3625$t =~ s/[\r\n]+$//;
3626}
3627}  # I<file
3628foreach $i ($startRange..$endRange) {
3629my $rc = infIndex($i, $line);
3630next unless $rc;
3631$dot = $i;
3632$rc > 0 and infReplace($t) or $inglob = 0, return 0;
3633$yesdot = $dot;
3634}  # loop over lines
3635if($yesdot) {
3636dispLine($yesdot) if ! $inglob;
3637return 1;
3638}
3639$errorMsg = "no input fields present" if ! $inglob;
3640return 0;
3641}  # i=
3642
3643if($line =~ /^\d*\*$/) {
3644$inglob and $errorMsg = $inoglobal, $inglob = 0, return 0;
3645$startRange == $endRange or $errorMsg = $inorange, return 0;
3646infIndex($endRange, $line) > 0 or return 0;
3647($j, $line, $post) = infPush();
3648#  return code of -1 means there's more to do.
3649return $j unless $j < 0;
3650} elsif( $line !~ m&^\d*[$valid_delim]&o) {
3651$errorMsg = "unknown input field directive, please use I? or I= or I/text/replacement/";
3652return 0;
3653}
3654}  # input field
3655
3656#  Pull section indicator off of a url.
3657$section = $1 if $cmd eq 'b' and $line =~ s/(#.*)//;
3658
3659if(($cmd eq 'b' or $cmd eq 'e') and length $line) {
3660$h = undef;
3661$h = cxPack() if $dol and ! $nostack;
3662cxReset($context, 0) or return 0;
3663$startRange = $endRange = 0;
3664$changeFname = "";
3665if($line =~ /^\x80mail\x80(.*)$/) {  # special code for sendmail link
3666$href = $1;
3667my $subj = urlSubject(\$href);
3668$subj = "Comments" unless length $subj;
3669if(lineLimit 2) {
3670$i = 0;
3671} else {
3672$i = 1;
3673push @text, "To: $href";
3674$map .= sprintf($lnformat, $#text);
3675push @text, "Subject: $subj";
3676$map .= sprintf($lnformat, $#text);
3677$dot = $dol = 2;
3678print "SendMail link.  Compose your mail, type sm to send, then ^ to get back.\n";
3679apparentSize();
3680}
3681} else {
3682$fname = $line;
3683$i = readFile($fname, $post);
3684$fmode &= ~($changemode|$firstopmode);
3685}
3686$filesize = -1, cxUnpack($h), return 0 if !$i and ! $dol and is_url($fname);
3687if(defined $h) {
3688$$h{backup} = $backup if defined $backup;
3689$backup = $h;
3690}
3691return 0 if ! $i;
3692$fname = $changeFname if length $changeFname;
3693$cmd = 'e' if $fmode&$binmode or ! $dol;
3694return 1 if $cmd eq 'e';
3695}
3696
3697if($cmd eq 'b') {
3698if(! ($fmode&$browsemode)) {
3699readyUndo();
3700print("$filesize\n"), $filesize = -1 if $filesize >= 0;
3701render() or return 0;
3702if(defined $postBrowse) {
3703$$btags[0]{pb} = $postBrowse;
3704evaluateSequence($postBrowse, 0);
3705if($$btags[0]{dol2} > $dol) {
3706$fmode &= ~($changemode|$firstopmode);
3707apparentSize();
3708}
3709}
3710} else {
3711$errorMsg = "already browsing", return 0 if ! length $section;
3712}
3713return 1 if ! length $section;
3714$section =~ s/^#//;
3715$j = findSection($section);
3716$errorMsg = "cannot locate section #$section", return 0 unless $j;
3717$dot = $j;
3718dispLine($dot);
3719return 1;
3720}  # b
3721
3722if($cmd eq 'm' or $cmd eq 't') {
3723return moveCopy();
3724}
3725
3726if($cmd eq 'i') {
3727$cmd = 'a';
3728--$startRange, --$endRange;
3729}
3730
3731if($cmd eq 'c') {
3732delText($startRange, $endRange) or return 0;
3733$endRange = --$startRange;
3734$cmd = 'a';
3735}
3736
3737if($cmd eq 'a') {
3738return readLines();
3739}
3740
3741if($cmd eq 'd') {
3742$i = ($endRange == $dol);
3743if($fmode & $dirmode) {
3744$j = delFiles();
3745} else {
3746$j = delText($startRange, $endRange);
3747}
3748$inglob = 0 if ! $j;
3749if($j and $delprint and ! $inglob) {
3750$i ? print "end of file\n" : dispLine($dot);
3751}
3752return $j;
3753}  # d
3754
3755if($cmd eq 'j' or $cmd eq 'J') {
3756return joinText();
3757}  # j
3758
3759if($cmd eq 'r') {
3760return readContext($cx) if $nsuf >= 0;
3761return readFile($line, "") if length $line;
3762$errorMsg = "no file specified";
3763return 0;
3764}  #  r
3765
3766if($cmd eq 's' or $cmd eq 'I') {
3767$j = substituteText($line);
3768$inglob = $j = 0 if $j < 0;
3769return $j;
3770}  # substitute
3771
3772$errorMsg = "command $icmd not yet implemented";
3773$inglob = 0;
3774return 0;
3775}  # evaluate
3776
3777sub evaluateSequence($$)
3778{
3779my $commands = shift;
3780my $check = shift;
3781foreach my $go (@$commands) {
3782$inglob = 0;
3783$intFlag = 0;
3784$filesize = -1;
3785my $rc = evaluate($go);
3786print "$filesize\n" if $filesize >= 0;
3787$rc or ! $check or
3788return 0;
3789}
3790return 1;
3791}  # evaluateSequence
3792
3793#  Hash to map html tags onto their English descriptions.
3794#  For instance, P maps to "paragraph".
3795#  Most of the tags, such as FONT, map to nothing,
3796#  whence they are thrown away.
3797#  The first two characters are not part of the description.
3798#  It forms a number that describes the nestability of the tag.
3799#  Bit 1 means the tag should be nested, like parentheses.
3800#  In fact all the bit1 tags should nest amongst eachother, unlike
3801#  <UL> <TABLE> </UL> </TABLE> (nesting error).
3802#  Bit 2 means a tag may appear inside itself, like nested lists.
3803#  Bit 4 means the tag implies a paragraph break.
3804#  Bit 8 means we retain attributes on the positive tag.
3805#  bit 16 means to close an open anchor *before* applying this tag
3806%tagdesc = (
3807sub => "11a subscript",
3808font => " 3a font",
3809center => " 3centered text",
3810sup => "11a superscript",
3811title => "17the title",
3812head => "17the html header information",
3813body => "27the html body",
3814bgsound => "24background music",
3815meta => " 8a meta tag",
3816base => " 8base reference for relative URLs",
3817img => " 8an image",
3818br => " 0a line break",
3819p => "20a paragraph",
3820blockquote => "20a quoted paragraph",
3821div => "20a divided section",
3822h => "21a header",
3823dt => "20a term",
3824dd => "20a definition",
3825hr => "16a horizontal line",
3826ul => "23a bullet list",
3827ol => "23a numbered list",
3828dl => "23a definition list",
3829li => "16a list item",
3830form => "25a form",
3831input => "24an input item",
3832a => "25an anchor",
3833frame => "28a frame",
3834map => "28An image map",
3835area => "24an image map area",
3836#  I've seen tables nested inside tables -- I don't know why!
3837table => "31a table",
3838tr => "19a table row",
3839td => "19a table entry",
3840th => "19a table heading",
3841pre => " 5a preformatted section",
3842xmp => " 5a preformatted section",
3843address => " 5a preformatted section",
3844script => " 1a script",
3845style => " 1a style block",
3846noframes => " 1noframe section",
3847select => "25an option list",
3848textarea => "25an input text area",
3849option => "24a select option",
3850#  The following tags convey formatting information that is eventually
3851#  discarded, but I'll track them for a while,
3852#  just to verify nestability.
3853em => " 1a block of emphasized text",
3854strong => " 1a block of emphasized text",
3855b => " 1a block of bold text",
3856i => " 1a block of italicized text",
3857code => " 1a block of sample code",
3858samp => " 1a block of sample code",
3859);
3860
3861#  We encode tags in a @tag attribute=value attribute=value ...@ format,
3862#  though of course we don't use the @ sign.
3863#  We use \x80, which should not appear in international text.
3864#  I simply hard code it - it makes things simpler.
3865
3866#  Support routine, to encode a tag.
3867#  Run from within a global substitute.
3868#  Pas the name of the tag, slash, and tag arguments
3869sub processTag($$$)
3870{
3871my ($tag, $slash, $attributes) = @_;
3872my $nlcount = $attributes =~ y/\n/\n/;  # newline count
3873my $doat = 0;  # do attributes
3874$tag = lc $tag;
3875my $desc = $tagdesc{$tag};
3876if(defined $desc) {
3877$doat = (substr($desc, 0, 2) & 8);
3878} else {
3879$tag = "z";
3880}
3881#  Do we need to gather up the attributes?
3882if(!$doat or $slash eq "/") {
3883#  Guess not, just return the tag.
3884return "" if $tag eq "z" and ! $nlcount;
3885return "\x80$tag$slash$nlcount\x80";
3886}
3887#  Process each whitespace separated chunk, taking quotes into account.
3888#  note that name="foo"size="1" is suppose to be two separate tags;
3889#  God help us!
3890#  Borrow a global variable, even though this may not be an input tag.
3891$itag = {tag => $tag};
3892push @$btags, $itag;
3893$attributes =~ s/(  # replace the entire matched text
3894\w+  # attribute name
3895(?>\s*=\s*  # as in name=value
3896(?>  # a sequence of choices
3897[^\s"']+  # regular printable characters
3898|
3899"[^"]*"  # double quoted string
3900|
3901'[^']*'  # single quoted string
3902)  # one of three formats
3903)?  # =value
3904)/processAttr($1)/xsge;
3905#  Capture description and keywords.
3906if($tag eq "meta") {
3907my $val = $$itag{name};
3908if(defined $val) {
3909$val = lc $val;
3910if($val eq "description" or $val eq "keywords") {
3911my $content = $$itag{content};
3912if(defined $content) {
3913stripWhite \$content;
3914$$btags[0]{$val} = $content if length $content;
3915}  # content
3916}  # description or keywords
3917}  # name=
3918pop @$btags;
3919return "" unless $nlcount;
3920return "\x80z$nlcount\x80";
3921}  # meta tag
3922my $tagnum = $#$btags;
3923return "\x80$tag$nlcount,$tagnum\x80";
3924}  # processTag
3925
3926#  Support routine, to crack attribute=value.
3927sub processAttr($)
3928{
3929my $line = shift;
3930#  Get rid of spaces around first equals.
3931$line =~ s/^([^=\s]*)\s*=\s*/$1=/;
3932#  Get rid of the quotes.
3933$line =~ s/("[^"]*"|'[^']*')/substr($1,1,-1)/sge;
3934my $attr = lc $line;
3935$attr =~ s/\s*=.*//s;
3936return "" unless $attr =~ /^\w+$/;
3937$line =~ s/^[^=]*=//s
3938or $line = "";
3939$line =~ s/&([a-zA-Z]+|#\d+);/metaChar($1)/ge;
3940$$itag{$attr} = $line;
3941return "";
3942}  # processAttr
3943
3944#  Support routine, to encode a bang tag.
3945#  Run from within a global substitute.
3946sub processBangtag($)
3947{
3948my $item = shift;
3949if($item eq "'" or $item eq '"') {
3950return (length $bangtag ? " " : $item);
3951}
3952if(substr($item, 0, 1) eq '<') {
3953return "" if length $bangtag;
3954return $item if $item eq "<";
3955$bangtag = substr $item, 1;
3956return "<z ";
3957}
3958return $item unless length $bangtag;
3959#  dashes at the front require dashes at the end.
3960#  But (apparently) they don't have to be the same number of dashes.
3961#  I really don't understand this syntax at all!
3962#  It is suppose to follow the rules in
3963#  http://www.htmlhelp.com/reference/wilbur/misc/comment.html
3964#  but real web pages hardly ever follow these rules!
3965substr($item, -1) = "";  # don't need that last >
3966my $l = length($bangtag) - 1;
3967$l &= ~1;  # back down to an even number
3968return " " if $l and ! length $item;  # lone > inside a comment
3969$bangtag = "";
3970return ">";
3971}  # processBangtag
3972
3973#  Turn <>'" in javascript into spaces, as we did above.
3974sub processScript($)
3975{
3976my $item = shift;
3977if(length($item) < 5) {
3978return ($inscript ? " " : $item);
3979}
3980#  now $item is <script or </script
3981#  Try to guard against Java code that looks like
3982#  document_write("<script bla bla bla>\n";
3983#  There's a lot of this going around.
3984$prequote = 0;
3985$prequote = 1 if $item =~ s/^\( *['"]//;
3986return ' ' if $inscript and $prequote;
3987if(substr($item, 1, 1) eq '/') {
3988--$inscript if $inscript;
3989} else {
3990++$inscript;
3991}
3992return $item;
3993}  # processScript
3994
3995sub backOverSpaces($)
3996{
3997my $trunc = shift;
3998my $j = length($refbuf) - 1;
3999--$j while $j >= 0 and substr($refbuf, $j, 1) =~ /[ \t]/;
4000++$j;
4001substr($refbuf, $j) = "" if $trunc;
4002return $j;
4003}  # backOverSpaces
4004
4005#  Recompute space value, after the buffer has been cropped.
4006#  0 = word, 1 = spaces, 2 = newline, 3 = paragraph.
4007sub computeSpace()
4008{
4009return 3 if ! length $refbuf;
4010my $last = substr $refbuf, -1;
4011return 0 if $last !~ /\s/;
4012return 1 if $last ne "\n";
4013return 2 if substr($refbuf, -2) ne "\n\n";
4014return 3;
4015}  # computeSpace
4016
4017#  Here are the common keywords for mail header lines.
4018#  These are in alphabetical order, so you can stick more in as you find them.
4019#  The more words we have, the more accurate the test.
4020#  Value = 1 means it might be just a "NextPart" mime header,
4021#  rather than a full-blown email header.
4022#  Value = 2 means it could be part of an English form.
4023#  Value = 4 means it's almost certainly a line in a mail header.
4024%mhWords = (
4025"action" => 2,
4026"arrival-date" => 4,
4027"content-transfer-encoding" => 1,
4028"content-type" => 1,
4029"date" => 2,
4030"delivered-to" => 4,
4031"errors-to" => 4,
4032"final-recipient" => 4,
4033"from" => 2,
4034"importance" => 4,
4035"last-attempt-date" => 4,
4036"list-id" => 4,
4037"mailing-list" => 4,
4038"message-id" => 4,
4039"mime-version" => 4,
4040"precedence" => 4,
4041"received" => 4,
4042"remote-mta" => 4,
4043"reply-to" => 4,
4044"reporting-mta" => 4,
4045"return-path" => 4,
4046"sender" => 4,
4047"status" => 2,
4048"subject" => 4,
4049"to" => 2,
4050"x-beenthere" => 4,
4051"x-loop" => 4,
4052"x-mailer" => 4,
4053"x-mailman-version" => 4,
4054"x-mimeole" => 4,
4055"x-ms-tnef-correlator" => 4,
4056"x-msmail-priority" => 4,
4057"x-priority" => 4,
4058"x-uidl" => 4,
4059);
4060
4061#  Get a filename from the user.
4062sub getFileName($$)
4063{
4064my $startName = shift;
4065my $isnew = shift;
4066input: {
4067print "Filename: ";
4068print "[$startName] " if defined $startName;
4069my $line = <STDIN>;
4070exit 0 unless defined $line;
4071stripWhite \$line;
4072if($line eq "") {
4073redo input if ! defined $startName;
4074$line = $startName;
4075} else {
4076$startName = undef;
4077$line = envLine $line;
4078print("$errorMsg\n"), redo input if length $errorMsg;
4079}  # blank line
4080if($isnew and -e $line) {
4081print "Sorry, file $line already exists.\n";
4082$startName = undef;
4083redo input;
4084}
4085return $line;
4086}
4087}  # getFileName
4088
4089#  Get a character from the tty, raw mode.
4090#  For some reason hitting ^c in this routine doesn't leave the tty
4091#  screwed up.  I don't know why not.
4092sub userChar
4093{
4094my $choices = shift;
4095input: {
4096#  Too bad there isn't a perl in-built for this.
4097#  I don't know how to do this in Windows.  Help anybody?
4098system "stty", "-icanon", "-echo";
4099my $c = getc;
4100system "stty", "icanon", "echo";
4101if(defined $choices and index($choices, $c) < 0) {
4102STDOUT->autoflush(1);
4103print "\a\b";
4104STDOUT->autoflush(0);
4105redo input;
4106}
4107return $c;
4108}
4109}  # userChar
4110
4111#  Encode html page or mail message.
4112#  No args, the html is stored in @text, as indicated by $map.
4113sub render()
4114{
4115$dol or $errorMsg = "empty file", return 0;
4116$errorMsg = "binary file", return 0 if $fmode&$binmode;
4117$errorMsg = "cannot render a directory", return 0 if $fmode&$dirmode;
4118
4119my ($i, $j, $k, $rc);
4120my $type = "";
4121$btags[$context] = $btags = [];
4122$$btags[0] = {tag => "special", fw => {} };
4123
4124#  If it starts with html, head, or comment, we'll call it html.
4125my $tbuf = fetchLine 1, 0;
4126if($tbuf =~ /^\s*<(?:!-|html|head|meta)/i) {
4127$type = "html";
4128}
4129
4130if(! length $type) {
4131#  Check for mail header.
4132#  There might be html tags inside the mail message, so we need to
4133#  look for mail headers first.
4134#  This is a very simple test - hopefully not too simple.
4135#  The first 20 non-indented lines have to look like mail header lines,
4136#  with at least half the keywords recognized.
4137$j = $k = 0;
4138for $i (1..$dol) {
4139my $line = fetchLine $i, 0;
4140last unless length $line;
4141next if $line =~ /^[ \t]/;  # indented
4142++$j;
4143next unless $line =~ /^([\w-]+):/;
4144my $word = lc $1;
4145my $v = $mhWords{$word};
4146++$k if $v;
4147if($k >= 4 and $k*2 >= $j) {
4148$type = "mail";
4149last;
4150}
4151last if $j > 20;
4152}
4153}
4154
4155if($type ne "mail") {
4156#  Put the lines together into one long string.
4157#  This is necessary to check for, and render, html.
4158$tbuf .= "\n";
4159$tbuf .= fetchLine($_, 0) . "\n" foreach (2..$dol);
4160}
4161
4162if(! length $type) {
4163#  Count the simple html tags, we need at least two per kilabyte.
4164$i = length $tbuf;
4165$j = $tbuf =~ s/(<\/?[a-zA-Z]{1,7}\d?[>\s])/$1/g;
4166$j = 0 if $j eq "";
4167$type = "html" if $j * 500 >= $i;
4168}
4169
4170if(! length $type) {
4171$errorMsg = "this doesn't look like browsable text";
4172return 0;
4173}
4174
4175$badHtml = 0;
4176$badHtml = 1 if is_url($fname);
4177$rc = renderMail(\$tbuf) if $type eq "mail";
4178$rc = renderHtml(\$tbuf) if $type eq "html";
4179return 0 unless $rc;
4180
4181pushRenderedText(\$tbuf) or return 0;
4182if($type eq "mail") {
4183$fmode &= ~$browsemode;  # so I can run the next command
4184evaluate(",bl");
4185$errorMsg = "";
4186$dot = $dol;
4187$fmode &= ~$changemode;
4188$fmode |= $browsemode;
4189}
4190apparentSize();
4191$tbuf = undef;
4192
4193if($type eq "mail" and $nat) {
4194print "$nat attachments.\n";
4195$j = 0;
4196foreach $curPart (@mimeParts) {
4197next unless $$curPart{isattach};
4198++$j;
4199print "Attachment $j\n";
4200my $filename = getFileName($$curPart{filename}, 1);
4201next if $filename eq "x";
4202if($filename eq "e") {
4203print "session " . (cxCreate(\$$curPart{data}, $$curPart{filename})+1) . "\n";
4204next;
4205}
4206if(open FH, ">$filename") {
4207binmode FH, ':raw' if $doslike;
4208print FH $$curPart{data}
4209or dieq "Cannot write to attachment file $filename, $!.";
4210close FH;
4211} else {
4212print "Cannot create attachment file $filename.\n";
4213}
4214}  # loop over attachments
4215print "attachments complete.\n";
4216}  # attachments present
4217
4218return 1;
4219}  # render
4220
4221#  Pass the reformatted text, without its last newline.
4222sub pushRenderedText($)
4223{
4224my $tbuf = shift;
4225
4226#  Replace common nonascii symbols
4227#  I don't know what this pair of bytes is for!
4228$$tbuf =~ s/\xe2\x81//g;
4229
4230#  Transliterate alternate forms of quote, apostrophe, etc.
4231#  We replace escape too, cuz it shouldn't be there anyways, and it messes up
4232#  some terminals, and some adapters.
4233#  Warning!!  Don't change anything in the range \x80-\x8f.
4234#  These codes are for internal use, and mus carry through.
4235$$tbuf =~ y/\x1b\x95\x99\x9c\x9d\x92\x93\x94\xa0\xad\x96\x97/_*'`''`' \55\55\55/;
4236
4237#  Sometimes the bullet list indicator is falsely separated from the subsequent text.
4238$$tbuf =~ s/\n\n\*\n\n/\n\n* /g;
4239
4240#  Turn nonascii math symbols into our encoded versions of math symbols,
4241#  to be handled like Greek letters etc, in a consistent manner,
4242#  by the next block of code.
4243$$tbuf =~ s/\xb0/\x82176#/;  # degrees
4244$$tbuf =~ s/\xbc/\x82188#/;  # 1 fourth
4245$$tbuf =~ s/\xbd/\x82189#/;  # 1 half
4246$$tbuf =~ s/\xbe/\x82190#/;  # 3 fourths
4247$$tbuf =~ s/\xd7/\x82215#/;  # times
4248$$tbuf =~ s/\xf7/\x82247#/;  # divided by
4249
4250if($$tbuf =~ /\x82\d+#/) {  # we have codes to expand.
4251#  These symbols are going to become words -
4252#  put spaces on either side, if the neighbors are also words.
4253$$tbuf =~ s/#\x82/# \x82/g;
4254$$tbuf =~ s/([a-zA-Z\d])(\x82\d+#)/$1 $2/g;
4255$$tbuf =~ s/(\x82\d+#)([a-zA-Z\d])/$1 $2/g;
4256$$tbuf =~ s/\x82(\d+)#/$symbolWord{$1}/ge;
4257}
4258
4259#  Now push into lines, for the editor.
4260my $j = $#text;
4261if(length $$tbuf) {
4262push @text, split "\n", $$tbuf, -1;
4263} else {
4264push @text, "";
4265}
4266$#text = $j, return 0 if lineLimit 0;
4267
4268$$btags[0]{map1} = $map;
4269$$btags[0]{dot} = $dot;
4270$$btags[0]{dol1} = $dol;
4271$dot = $dol = $#text - $j;
4272$$btags[0]{dol2} = $dol;
4273$map = $lnspace;
4274$map .= sprintf($lnformat, $j) while ++$j <= $#text;
4275$$btags[0]{map2} = $map;
4276$fmode &= ~$firstopmode;
4277$$btags[0]{fname} = $fname;
4278$$btags[0]{fmode} = $fmode;
4279$$btags[0]{labels} = $labels;
4280$fmode &= $changemode;  #  only the change bit retains its significance
4281$fmode |= $browsemode;
4282$labels = $lnspace x 26;
4283$fname .= ".browse" if length $fname;
4284return 1;
4285}  # pushRenderedText
4286
4287#  Pass in the text to be rendered, by reference.
4288#  The text is *replaced* with the rendered text.
4289sub renderHtml($)
4290{
4291my $tbuf = shift;
4292my ($i, $j, $ofs1, $ofs2, $h);  # variables
4293
4294$baseref = $fname;
4295
4296#  Ok, here's a real kludge.
4297#  The utility that converts pdf to html,
4298#  access.adobe.com/simple_form.html, has a few quirks.
4299#  One of the common problems in the translation is
4300#  the following meaningless string, that appears over and over again.
4301#  I'm removing it here.
4302$$tbuf =~ s/Had\strouble\sresolving\sdest\snear\sword\s(<[\w_;:\/'"().,-]+>\s)?action\stype\sis\sGoToR?//g;
4303
4304#  I don't expect any overstrikes, but just in case ...
4305$$tbuf =~ s/[^<>"'&\n]\10//g;
4306#  Get rid of any other backspaces.
4307$$tbuf =~ y/\10/ /;
4308
4309#  Make sure there aren't any \x80 characters to begin with.
4310$$tbuf =~ y/\x80/\x81/;
4311
4312#  As far as I can tell, href=// means href=http://
4313#  Is this documented anywhere??
4314$$tbuf =~ s,\bhref=(["']?)//\b,HREF=$1http://,ig;
4315
4316#  Find the simple window javascript functions
4317$refbuf = "";
4318$lineno = $colno = 1;
4319$lspace = 3;
4320javaFunctions($tbuf);
4321
4322#  Before we do the standard tags, get rid of the <!-- .. --> tags.
4323#  I turn them into <z ... > tags,
4324#  which will be disposed of later, along with all the
4325#  other unrecognized tags.
4326#  This is not a perfect implementation.
4327#  It will glom onto the <! inside <A HREF="xyz<!stuff">,
4328#  and it shouldn't; but niehter should you be writing such a perverse string!
4329$$tbuf =~ s/<!-*>//g;
4330$bangtag = "";
4331$$tbuf =~ s/(['"]|<(!-*)?|-*>)/processBangtag($1)/ge;
4332print "comments stripped\n" if $debug >= 6;
4333
4334$errorMsg = $intMsg, return 0 if $intFlag;
4335
4336#  A good web page encloses its javascript in comments <!-- ... -->,
4337#  But some don't, and the (sometimes quoted) < > characters
4338#  really mess us up.  Let's try to strip the javascript,
4339#  or any other script for that matter.
4340$inscript = 0;
4341$$tbuf =~ s/((?>(\( *['"])?<(\/?script[^>]*>)?|[>"']))/processScript($1)/gei;
4342print "javascript stripped\n" if $debug >= 6;
4343
4344$errorMsg = $intMsg, return 0 if $intFlag;
4345
4346#  I'm about to crack html tags with one regexp,
4347#  and that would be entirely doable, if people and web tools didn't
4348#  generate crappy html.
4349#  The biggest problem is unbalanced quotes, whence the open quote
4350#  swallows the rest of the document in one tag.
4351#  I'm goint to *try*, emphasis on try, to develop a few heuristics
4352#  that will detect some of the common misquotings.
4353#  This stuff should be written in C, a complex procedural algorithm.
4354#  But I don't have the time or inclination to translate this mess into C,
4355#  and perl is not the write language to write an algorithm like that.
4356#  I've seen examples of all of these syntactical nightmares on the web,
4357#  and others that I can't possibly code around.
4358#  Only one quote in the tag; get rid of it.  Tag is on one line.
4359$$tbuf =~ s/<(\/?[a-zA-Z][^<>'"]*)['"]([^<>'"]*)>/<$1$2>/g;
4360#  Two quotes before the last >, but not ="">, which would be ok.
4361$$tbuf =~ s/([^= <>])"">/$1">/g;
4362$$tbuf =~ s/([^= <>])''>/$1'>/g;
4363#  Missing quote before the last >   "word>
4364#  It's usually the last > where things screw up.
4365$$tbuf =~ s/["'](\w+)>/$1>/g;
4366#  &nbsp is suppose to have a semi after it - it often doesn't.
4367$$tbuf =~ s/&nbsp$/&nbsp;/gi;
4368$$tbuf =~ s/&nbsp([^;])/&nbsp;$1/gi;
4369#  Well that's all I can manage right now.
4370
4371#  Encode <font face=symbol> number characters.
4372#  This is kludgy as hell, but I want to be able to read my own math pages.
4373$$tbuf =~ s/<font +face=['"]?symbol['"]?> *([a-zA-Z]|&#\d+;) *<\/font>/metaSymbol($1)/gei;
4374
4375#  Now let's encode the tags.
4376#  Thanks to perl, we can do it in one regexp.
4377$$tbuf =~ s/<  # start the tag
4378(\/?)  # leading slash turns off the tag
4379([a-zA-Z]+)  # name of the tag
4380(  # remember the attributes
4381(?>  # fix each subexpression as you find it
4382[^>"']+  # unquoted stuff inside the tag
4383|
4384"[^"]*"  # stuff in double quotes
4385|
4386'[^']*'  # stuff in single quotes
4387)*  # as many of these chunks as you need
4388)  # return the set in $3
4389>  # close the html tag
4390/processTag($2, $1, $3)/xsge;
4391print "tags encoded\n" if $debug >= 6;
4392
4393$errorMsg = $intMsg, return 0 if $intFlag;
4394
4395#  Now we can crunch the meta chars without fear.
4396$$tbuf =~ s/&([a-zA-Z]+|#\d+);/metaChar($1)/ge;
4397print "meta chars translated\n" if $debug >= 6;
4398
4399$onloadSubmit = 0;
4400$longcut = $lperiod =  $lcomma =  $lright =  $lany = 0;
4401
4402my @olcount = ();  #  Where are we in each nested number list?
4403my @dlcount = ();  # definition lists
4404my $tagnest = ".";  #  Stack the nestable html tags, such as <LI> </LI>
4405my $tagLock = 0;  # other tags are locked out, semantically, until this one is done
4406my $tagStart;  # location of the tag currently under lock
4407#  Locking tags are currently: title, select, textarea
4408my $inhref = 0;  # in anchor reference
4409my $intitle = 0;
4410my $inselect = 0;
4411my $inta = 0;  # text area
4412my $optStart;  # start location of option
4413my $opt;  # hash of options
4414my $optCount;  # count of options
4415my $optSel;  # options selected
4416my $optSize;  # size of longest option
4417my $lastopt;  # last option, waiting for next <option>
4418my $premode = 0;  # preformatted mode
4419my $hrefTag;
4420my $hrefFile = "";
4421$inscript = 0;
4422my $intable = 0;  # in table
4423my $intabhead = 0;  # in table header
4424my $intabrow = 0;  # in table row
4425my $inform = 0;  # in form
4426
4427#  Global substitute is mighty powerful, but at this point
4428#  we really need to proceed token by token.
4429#  Going by chunks is better than shifting each character.
4430#  Extract a contiguous sequence of non-whitespace characters,
4431#  or whitespace, or a tag.
4432reformat:
4433while($$tbuf =~ /(\s+|\x80[\w\/,]+\x80|[^\s\x80]+)/gs) {
4434$errorMsg = $intMsg, return 0 if $intFlag;
4435my $chunk = $1;
4436
4437#  Should we ignore line breaks in table headers?
4438$chunk = ' ' if ($intabhead|$inhref) and $chunk =~ /^\x80br\/?0/;
4439
4440if($chunk =~ /^\s/) {  # whitespace
4441$j = $chunk =~ y/\n/\n/;  # count newlines
4442$lineno += $j;
4443next reformat if $inscript;
4444if(!$premode or $tagLock) {
4445next reformat if $lspace;
4446$chunk = " ";
4447$chunk = "\n" if $j and substr($refbuf, -4) =~ / [a-zA-Z]\.$/;
4448appendWhiteSpace($chunk, !($inhref + $tagLock));
4449#  Switch { and space, it looks prettier.
4450#  Hopefully this will never happen accept at the beginning of a link.
4451$inhref and $lspace == 1 and $refbuf =~ s/(\x80[\x85-\x8f]+{) $/ $1/;
4452next reformat;
4453}  # not preformatted
4454
4455#  Formfeed is a paragraph break.
4456$j = 2 if $chunk =~ s/\f/\n\n/g;
4457$colno = 1 if $j;
4458#  Keep the whitespace after nl, it's preformatted.
4459$chunk =~ s/.*\n//s;
4460#  Note that we make no effort to track colno or lperiod etc in preformat mode.
4461if($lspace == 3) {
4462backOverSpaces(1);
4463$j = 0;
4464}
4465if($j == 2) {
4466backOverSpaces(1);
4467$chunk = "\n\n".$chunk if $lspace < 2;
4468$chunk = "\n".$chunk if $lspace == 2;
4469$lspace = 3;
4470$j = 0;
4471}
4472if(!$j) {
4473$refbuf .= $chunk;
4474next reformat;
4475}
4476#  Now j = 1 and lspace < 3
4477backOverSpaces(1);
4478$refbuf .= "\n$chunk";
4479$lspace = 1 if ! $lspace;
4480++$lspace;
4481next reformat;
4482}  # whitespace
4483
4484if(substr($chunk, 0, 1) ne "\x80") {
4485next reformat if $inscript;
4486$chunk =~ y/{}/[]/ if $inhref;
4487$inhref = 2 if $inhref;
4488appendPrintable($chunk);
4489next reformat;
4490}  # token
4491
4492#  It's a tag
4493my ($tag, $slash, $nlcount, $attrnum) =
4494$chunk =~ /^.([a-z]+)(\/?)(\d+)(?:,(\d+))?.$/;
4495#  Unless we hear otherwise, the tag is assumed to contribute no visible
4496#  characters to the finished document.
4497$chunk = "";
4498
4499my $desc = $tagdesc{$tag};
4500$desc = " 0an unknown construct" if ! defined $desc;
4501my $nest = substr $desc, 0, 2;
4502$chunk = "\n\n" if $nest & 4;
4503substr($desc, 0, 2) = "";
4504#  Equivalent tags, as far as we're concerned
4505$tag = "script" if $tag eq "style";
4506$tag = "script" if $tag eq "noframes";
4507$tag = "pre" if $tag eq "xmp";
4508$tag = "pre" if $tag eq "address";
4509$tag = "frame" if $tag eq "iframe";
4510my $tag1 = ".$tag.";
4511my $tagplus = "$tag$slash";
4512$lineno += $nlcount, next reformat if $inscript and $tagplus ne "script/";
4513$attrnum = 0 if ! defined $attrnum;
4514#  A hidden version of the attribute number, to embed in the text.
4515$attrhidden = hideNumber $attrnum;
4516$h = undef;
4517if($attrnum) {
4518$h = $$btags[$attrnum];
4519$$h{lineno} = $lineno;  # source line number
4520}
4521my $openattr = 0;
4522my $openattrhidden;
4523my $openTag;
4524my $closeAnchor = 0;
4525$closeAnchor = 1 if $inhref and ($tag eq "a" or $tag eq "area" or $tag eq "frame" or $tag eq "input");
4526$closeAnchor = 1 if $inhref == 2 and $nest&16;
4527
4528#  Make sure we open and close things in order.
4529if($nest&1) {
4530if(!$slash) {
4531errorConvert("$desc begins in the middle of $desc")
4532if index($tagnest, $tag1) >= 0 and !($nest&2);
4533$tagnest = ".$tag.$attrnum" . $tagnest;
4534push @olcount, 0 if $tag eq "ol";
4535push @dlcount, 0 if $tag eq "dl";
4536} else {
4537$j = index $tagnest, $tag1;
4538if($j < 0) {
4539errorConvert("an unexpected closure of $desc, which was never opened");
4540} else {
4541if($j > 0) {
4542my $opendesc = substr $tagnest, 1;
4543$opendesc =~ s/\..*//;
4544$opendesc = $tagdesc{$opendesc};
4545substr($opendesc, 0, 2) = "";
4546errorConvert("$desc is closed inside $opendesc");
4547}  # bad nesting
4548++$j;
4549substr($tagnest, $j, length($tag)+1) = "";
4550$openattr = substr $tagnest, $j;
4551$openattr =~ s/\..*//;
4552substr($tagnest, $j, length($openattr)+1) = "";
4553$openattrhidden = hideNumber $openattr;
4554if($openattr) {  # record the offset of </tag>
4555$ofs2 = backOverSpaces(0);
4556$openTag = $$btags[$openattr];
4557#  Tweak offset for the } on the anchor
4558++$ofs2 if $closeAnchor and $inhref == 2;
4559$ofs2 = $tagStart if $tagLock;
4560$$openTag{ofs2} = $ofs2;
4561}
4562pop @olcount if $tag eq "ol";
4563pop @dlcount if $tag eq "dl";
4564}  # was this construct open or not
4565}  # /tag
4566}  # nestable tag
4567
4568#  retain the start and end of any tag worthy of attributes
4569if($attrnum) {
4570$ofs1 = backOverSpaces(0);
4571$ofs1 = $tagStart if $tagLock;
4572$$h{ofs1} = $ofs1;
4573}
4574
4575switch: {
4576
4577if($closeAnchor) {
4578if($inhref == 1) {  # no text in the hyperlink
4579if($refbuf =~ s/( *\x80[\x85-\x8f]+{[\s|]*)$//s) {
4580$j = $1;
4581$colno -= $j =~ y/ {/ {/;
4582} else {
4583warn "couldn't strip off the open anchor at line $lineno <" .
4584substr($refbuf, -10) . ">.";
4585}
4586$$hrefTag{tag} = "z";  # trash the anchor
4587} else {
4588$refbuf .= "}";
4589$refbuf =~ s/([ \n])}$/}$1/;
4590++$colno;
4591$j = $$hrefTag{href};
4592my $onc = $$hrefTag{onclick};
4593if($j =~ /^javascript/i or $onc) {
4594#  Let the onclick take precedence.
4595$j = $onc if defined $onc and length $onc;
4596#  See if this is a javascript function we can recognize and circumvent.
4597$i = javaWindow $j;
4598if($$hrefTag{form} and $i eq "submit") {
4599#  I'll assume this is a check and submit function.
4600#  If it only validates fields, we're ok.
4601#  If it reformats the data, we're screwed!
4602$i = $$hrefTag{ofs1};
4603$inf = substr $refbuf, $i;
4604$inf =~ s/{/</;
4605$inf =~ s/ +$//;
4606$inf =~ s/}$/ js\x80\x8f>/;
4607$$hrefTag{$ofs2} += 5;
4608if($$inform{action}) {
4609my $actscheme = is_url $$inform{action};
4610$actscheme = is_url $baseref unless $actscheme;
4611if($actscheme eq "https") {
4612$inf =~ s/ js/& secure/;
4613$$hrefTag{$ofs2} += 7;
4614}
4615if($$inform{action} =~ /^mailto:/i) {
4616$inf =~ s/ js/& mailform/;
4617$$hrefTag{$ofs2} += 9;
4618}
4619}
4620substr($refbuf, $i) = $inf;
4621# change it to an input field
4622$$hrefTag{tag} = "input";
4623$$hrefTag{type} = "submit";
4624$$hrefTag{value} = "submit";
4625$$inform{nnh}++;  # another non hidden field
4626$$inform{nif}++;
4627$$inform{lnh} = $h;  # last non hidden field
4628}
4629#  Is this just opening a new window, then calling the link?
4630elsif(length $i and $i ne "submit") {
4631#  Ok, I'll assume it's a new window with hyperlink
4632$$hrefTag{href} = $i;
4633} else {
4634print "unknown javascript ref $j\n" if $debug >= 3;
4635}
4636}
4637}
4638$lspace = computeSpace();
4639$inhref = 0;
4640last switch if $tagplus eq "a/";
4641}  # close the open anchor
4642
4643if($tagplus eq "sup") {
4644$refbuf .= '^';
4645last switch;
4646}  # sup
4647
4648if($tagplus eq "sup/" and defined $openTag) {
4649$ofs1 = $$openTag{ofs1};
4650++$ofs1;  # skip past ^
4651$j = substr $refbuf, $ofs1;
4652stripWhite \$j;
4653last switch unless length $j;
4654if($j =~ /^th|st|rd|nd$/i and
4655substr($refbuf, $ofs1-2) =~ /\d/) {
4656--$ofs1;
4657substr($refbuf, $ofs1, 1) = "";
4658last switch;
4659}
4660last switch if $j =~ /^(\d+|\*)$/;
4661if(not $allsub) {
4662last switch if $j =~ /^[a-zA-Z](?:\d{1,2})?$/;
4663}
4664(substr $refbuf, $ofs1) = "($j)";
4665last switch;
4666}  # sup/
4667
4668if($tagplus eq "sub/" and defined $openTag) {
4669$ofs1 = $$openTag{ofs1};
4670$j = substr $refbuf, $ofs1;
4671stripWhite \$j;
4672last switch unless length $j;
4673if(not $allsub) {
4674last switch if $j =~ /^\d{1,2}$/;
4675}
4676(substr $refbuf, $ofs1) = "[$j]";
4677last switch;
4678}  # sub/
4679
4680if($tagplus eq "title" and ! $tagLock and ! $intitle) {
4681$tagStart = length $refbuf;
4682$tagLock = $intitle = 1;
4683last switch;
4684}  # title
4685
4686if($tagplus eq "title/" and $intitle) {
4687$i = substr $refbuf, $tagStart;
4688substr($refbuf, $tagStart) = "";
4689$lspace = computeSpace();
4690$longcut = 0;
4691$colno = 1;
4692if(! defined $$btags[0]{title}) {
4693stripWhite \$i;
4694$$btags[0]{title} = $i if length $i;
4695}
4696$tagLock = 0;
4697$intitle = 0;
4698last switch;
4699}  # title/
4700
4701if($tagplus eq "li") {
4702$i = index $tagnest, ".ol.";
4703$j = index $tagnest, ".ul.";
4704if($i >= 0) {
4705if($j >= 0 and $j < $i) {
4706$chunk = "\n* ";
4707} else {
4708$j = ++$olcount[$#olcount];
4709$chunk = "\n$j. ";
4710}
4711} elsif($j >= 0) {
4712$chunk = "\n* ";
4713} else {
4714$chunk = "\n";
4715errorConvert("$desc appears outside of a list context");
4716}
4717last switch;
4718}  # li
4719
4720if($tagplus eq "dt" or $tagplus eq "dd") {
4721if(($i = $#dlcount) >= 0) {
4722$j = ($tag eq "dd" ? 1 : 0);
4723errorConvert("improper term->definition sequence") if $j != $dlcount[$i];
4724$dlcount[$i] ^= 1;
4725} else {
4726errorConvert("$desc is not contained in a definition list");
4727}
4728last switch;
4729}  # dt or dd
4730
4731#  The only thing good about an image is its alt description.
4732if($tagplus eq "img") {
4733$hrefFile = "" unless $inhref;
4734$j = deriveAlt($h, $hrefFile);
4735$j = "?" if $inhref and length($j) == 0;
4736if(length $j) {
4737$refbuf .= $j;
4738$inhref = 2 if $inhref;
4739$lspace = 0;
4740}
4741last switch;
4742}  # image
4743
4744if($tagplus eq "body") {
4745my $onl = $$h{onload};  # popup
4746$onl = $$h{onunload} unless $onl;  # popunder
4747next unless $onl;
4748if($onl =~ /submit[.\w]* *\(/i) {
4749$onloadSubmit = 1;
4750last switch;
4751}
4752$j = javaWindow $onl;
4753if(length $j and $j ne "submit") {
4754createHyperLink($h, $j, "onload");
4755$chunk = "";
4756last switch;
4757}  #  open another window
4758}  # body
4759
4760if($tagplus eq "bgsound") {
4761my $j = $$h{src};
4762if(defined $j and length $j) {
4763#  Someday we'll let you play this right from edbrowse, spawning playmidi
4764#  or mpg123 or whatever.  For now I'll let you grab the file yourself.
4765#  Maybe that's better anyways.
4766createHyperLink($h, $j, "Background music");
4767$chunk = "\n";
4768}
4769last switch;
4770}  # background music
4771
4772if($tag eq "base") {
4773$href = $$h{href};
4774$baseref = urlDecode $href if $href;
4775next reformat;
4776}  # base tag
4777
4778if($tagplus eq "a") {
4779if(defined $$h{name}) {
4780$refbuf .= "\x80$attrhidden*";
4781}  # name=
4782if(defined($hrefFile = $$h{href})) {
4783$$h{form} = $inform;
4784$inhref = 1;
4785$hrefTag = $h;
4786$$h{bref} = $baseref;
4787#  We preserve $lspace, despite pushing visible characters.
4788$refbuf .= "\x80$attrhidden".'{';
4789++$colno;
4790}  # href=
4791last switch;
4792}  # a
4793
4794if($tagplus eq "area") {
4795my ($alt, $href);
4796if(defined($href = $$h{href})) {
4797$j = javaWindow($href);
4798$href = $j if length $j and $j ne "submit";
4799$alt = deriveAlt($h, "");
4800$alt = $foundFunc if length $foundFunc and not defined $$h{alt};
4801$alt = "area" unless length $alt;
4802createHyperLink($h, $href, $alt);
4803}  # hyperlink
4804} # area
4805
4806if($tagplus eq "frame") {
4807my $name = $$h{name};
4808my $src = $$h{src};
4809if(defined $src) {
4810$name = "" if ! defined $name;
4811stripWhite \$name;
4812stripWhite \$src;
4813if(length $src) {
4814$$h{ofs1} = backOverSpaces(1);
4815$name = "???" if ! length $name;
4816$name =~ y/{}\n/[] / if $inhref;
4817$refbuf .= "frame ";
4818$colno += 6;
4819createHyperLink($h, $src, $name);
4820}}  # frame becomes hyperlink
4821last switch;
4822}  # frame
4823
4824$premode = 1, last switch if $tagplus eq "pre";
4825$premode = 0, last switch if $tagplus eq "pre/";
4826$inscript = 1, last switch if $tagplus eq "script";
4827$inscript = 0, last switch if $tagplus eq "script/";
4828$intable++, last switch if $tagplus eq "table";
4829$intable--, last switch if $tagplus eq "table/" and $intable;
4830if($tag eq "br") {
4831$chunk = "\n";
4832$chunk = "\n\n" if $lspace >= 2;
4833last switch;
4834}
4835$chunk = "\n--------------------------------------------------------------------------------\n\n", last switch if $tagplus eq "hr";
4836
4837if($tag eq "tr") {
4838errorConvert("$desc not inside a table") if ! $intable;
4839$slash ? do { --$intabrow if $intabrow } : ++$intabrow;
4840$chunk = "\n";
4841$intabhead = 0;
4842}  # tr
4843
4844if($tag eq "td" or $tag eq "th") {
4845errorConvert("$desc not inside a table row") if ! $intabrow;
4846$intabhead = 0;
4847$intabhead = 1 - length $slash if $tag eq "th";
4848if($slash) {
4849substr($refbuf, -1) = "" if $lspace == 1;
4850$refbuf .= "|";
4851$lspace = 1;
4852}
4853last switch;
4854}  # td or th
4855
4856if($tagplus eq "form" and ! ($inform + $tagLock)) {
4857$inform = $h;
4858$$h{bref} = $baseref;
4859$j = lc $$h{method};
4860$j = "get" if ! defined $j or ! length $j;  # default
4861if($j ne "post" and $j ne "get") {
4862errorConvert("form method $j not supported");
4863$j = "get";
4864}
4865$$h{method} = $j;
4866$$h{nnh} = 0;  # number of non hidden fields
4867$$h{nif} = 0;  # number of input fields
4868last switch;
4869}  # form
4870
4871if($tagplus eq "form/" and $inform) {
4872#  Handle the case with only one visible input field.
4873if($onloadSubmit or
4874($$inform{action} or $$inform{onchange}) and
4875$$inform{nnh} <= 1 and $$inform{nif} and (
4876$$inform{nnh} == 0 or
4877($h = $$inform{lnh}) and
4878$$h{type} ne "submit")) {
4879$refbuf .= " " if $lspace == 0;
4880$itag = {tag => "input",
4881type => "submit", form => $inform,
4882size => 2, value => "Go"};
4883push @$btags, $itag;
4884$j = hideNumber $#$btags;
4885$refbuf .= "\x80$j<Go\x80\x8f>";
4886$lspace = 0;
4887$onloadSubmit = 0;
4888}  # submit button created out of thin air
4889$inform = 0;
4890last switch;
4891}  # form/
4892
4893my $noform = "$desc is not inside a form";
4894if($tagplus eq "select" and ! $tagLock) {
4895errorConvert($noform) if ! $inform;
4896$inselect = $h;
4897$$inform{onchange} = 1 if $inform and $$h{onchange};
4898$tagLock = 1;
4899$tagStart = length $refbuf;
4900$optCount = $optSel = $optSize = 0;
4901$lastopt = undef;
4902$$h{opt} = $opt = {};
4903last switch;
4904}  # select
4905
4906if(($tagplus eq "select/" or $tagplus eq "option") and $inselect) {
4907if(defined $lastopt) {
4908$j = substr $refbuf, $optStart;
4909stripWhite \$j;
4910if(length $j) {
4911$lastopt =~ s/NoOptValue$/$j/;
4912$$opt{$j} = $lastopt;
4913if($optCount < 999) {
4914++$optCount;
4915} else {
4916errorConvert("too many options, limit 999");
4917}
4918++$optSel if substr($lastopt, 3, 1) eq '+';
4919$j = length $j;
4920$optSize = $j if $j > $optSize;
4921}}
4922
4923if($tagplus eq "select/") {
4924$inselect = 0;
4925$tagLock = 0;
4926substr($refbuf, $tagStart) = "";
4927$lspace = computeSpace();
4928$colno = 1;
4929$optCount or errorConvert("no options in the select statement");
4930my $mult = 0;  # multiple select
4931$mult = 1 if defined $$openTag{multiple};
4932my $mse = 0;  # multiple select error
4933$optSel <= 1 or $mult or
4934$mse = 1, errorConvert("multiple options preselected");
4935$$inform{nnh}++;  # another non hidden field
4936$$inform{nif}++;
4937$$inform{lnh} = $openTag;  # last non hidden field
4938#  Display selected item(s)
4939$refbuf .= "\x80$openattrhidden<";
4940my $buflen = length $refbuf;
4941$i = 0;
4942foreach (%{$opt}) {
4943$i ^= 1;
4944$j = $_, next if $i;
4945if($mult and $j =~ /,/) {
4946errorConvert("sorry, option string cannot contain a comma");
4947$$opt{$j} = "";  # can't delete from hash
4948next;
4949}
4950substr($_, 3, 1) = '-' if $mse;
4951next unless substr($_, 3, 1) eq '+';
4952$refbuf .= ',' unless substr($refbuf, -1) eq '<';
4953$refbuf .= $j;
4954}
4955#  This is really an input tag.
4956$$openTag{tag} = "input";
4957$$openTag{type} = "select";
4958$$openTag{size} =  ($mult ? 0 : $optSize);
4959$$openTag{value} = substr $refbuf, $buflen;
4960$$openTag{form} = $inform if $inform;
4961$refbuf .= "\x80\x8f>";
4962$lspace = 0;
4963$$openTag{ofs2} = length $refbuf;
4964last switch;
4965}}  # select/
4966
4967if($tagplus eq "option") {
4968if(! $inselect) {
4969errorConvert("$desc is not inside a select statement")
4970} else {
4971$lastopt = $$h{value};
4972$lastopt = "NoOptValue" unless defined $lastopt;
4973$lastopt = (defined $$h{selected} ? "+" : "-") . $lastopt;
4974$lastopt = sprintf("%03d", $optCount) . $lastopt;
4975$optStart = length $refbuf;
4976$$h{tag} = "z";
4977}  # in select or not
4978last switch;
4979}  # option
4980
4981if($tagplus eq "textarea" and ! $tagLock) {
4982errorConvert($noform) if ! $inform;
4983$inta = $h;
4984$tagLock = 1;
4985$tagStart = length $refbuf;
4986last switch;
4987}  # textarea
4988
4989if($tagplus eq "textarea/" and $inta) {
4990#  Gather up the original, unformatted text.
4991$i = "";
4992foreach $j ($$inta{lineno}..$lineno) {
4993  $i .= fetchLine($j, 0);
4994$i .= "\n";
4995}
4996#  Strip off textarea tags.
4997#  I'm not using the s suffix, textarea tags should not cover multiple lines.
4998$i =~ s/^.*<textarea[^<>]*>\n?//i;
4999$i =~ s/<\/textarea.*\n$//i;
5000$i .= "\n" if length $i and substr($i, -1) ne "\n";
5001$inta = 0;
5002$tagLock = 0;
5003substr($refbuf, $tagStart) = "";
5004my $cx = cxCreate(\$i, "");
5005$colno = 1;
5006$$openTag{cx} = $cx;
5007++$cx;
5008$$inform{nnh}++;  # another non hidden field
5009$$inform{nif}++;
5010$$inform{lnh} = $openTag;  # last non hidden field
5011$refbuf .= "\x80$openattrhidden<buffer $cx\x80\x8f>";
5012$lspace = 0;
5013#  This is really an input tag.
5014$$openTag{tag} = "input";
5015$$openTag{ofs1} = $tagStart;
5016$$openTag{ofs2} = length($refbuf);
5017$$openTag{type} = "area";
5018$$openTag{form} = $inform if $inform;
5019$j = $$openTag{rows};
5020$j = 0 if ! defined $j;
5021$j = 0 unless $j =~ /^\d+$/;
5022$$openTag{rows} = $j;
5023$j = $$openTag{cols};
5024$j = 0 if ! defined $j;
5025$j = 0 unless $j =~ /^\d+$/;
5026$$openTag{cols} = $j;
5027last switch;
5028}  # textarea/
5029
5030if($tagplus eq "input") {
5031errorConvert($noform) if ! $inform;
5032$i = lc $$h{type};
5033$i = "text" unless defined $i and length $i;
5034$i = "text" if $i eq "password";
5035#  I should verify that the input is a number,
5036#  but I'm too busy right now to implement that.
5037$i = "text" if $i eq "number";
5038#  Be on the lookout for new, advanced types.
5039index(".text.checkbox.radio.submit.image.button.reset.hidden.", ".$i.") >= 0 or
5040errorConvert("unknown input type $i");
5041$j = $$h{value};
5042$j = "" unless defined $j;
5043$$h{saveval} = $j;
5044if($i eq "radio" or $i eq "checkbox") {
5045$j = (defined $$h{checked} ? '+' : '-');
5046}
5047if($i eq "image") {
5048$i = "submit";
5049$$h{image} = 1;
5050length $j or $j = deriveAlt($h, "submit");
5051}  # submit button is represented by an icon
5052if($i ne "hidden") {
5053#  I don't think there should be newlines in the value field.
5054$j =~ y/\n/ /;
5055}
5056if($i eq "button") {
5057#  Hopefully we can turn this into a hyperlink.
5058#  If not, it's no use to us.
5059my $onc = $$h{onclick};
5060my $page = javaWindow $onc;
5061$j = "button" unless length $j;  # alt=button
5062if(not length $page and
5063$onc =~ /self\.location *= *['"]([\w._\/:,=@&?+-]+)["'] *(\+?)/i) {
5064$page = "$1$2";
5065}
5066$i = $page, $page = "" if $page eq "submit";
5067if(length $page) {
5068createHyperLink($h, $page, $j);
5069last switch;
5070}
5071}  # button
5072$$h{type} = $i;
5073$$h{value} = $j;
5074$$inform{nif}++;
5075if($i ne "hidden") {
5076$$inform{nnh}++;  # another non hidden field
5077$$inform{lnh} = $h;  # last non hidden field
5078$refbuf .= "\x80$attrhidden<$j";
5079if($i eq "submit") {
5080$refbuf .= " js" if $$inform{onsubmit} or $$h{onclick};
5081if($$inform{action}) {
5082my $actscheme = is_url $$inform{action};
5083$actscheme = is_url $baseref unless $actscheme;
5084$refbuf .= " secure" if $actscheme eq "https";
5085$refbuf .= " mailform" if $$inform{action} =~ /^mailto:/i;
5086}
5087}
5088$refbuf .= "\x80\x8f>";
5089$lspace = 0;
5090$j = $$h{maxlength};
5091$j = 0 unless defined $j and $j =~ /^\d+$/;
5092$j = 1 if $i eq "checkbox" or $i eq "radio";
5093$$h{size} = $j;
5094}
5095if($inform and ! $tagLock) {
5096$$h{form} = $inform;
5097$$h{ofs2} = length $refbuf;
5098}
5099last switch;
5100}  # input
5101
5102}  # switch on $tag
5103
5104$lineno += $nlcount;
5105next reformat unless length $chunk;
5106
5107#  Apparently the tag has forced a line break or paragraph break.
5108#  I've decided to honor this, even in preformat mode,
5109#  because that's what lynx does.
5110$colno = 1;
5111$longcut = $lperiod = $lcomma = $lright = $lany = 0;
5112backOverSpaces(1);
5113
5114#  Get rid of a previous line of pipes.
5115#  This is usually a table or table row of images -- of no use to me.
5116if($intable and $lspace < 2) {
5117$j = length($refbuf) - 1;
5118while($j >= 0 and substr($refbuf, $j, 1) =~ /[|\s]/) {
5119last if $j > 0 and substr($refbuf, $j-1, 2) eq "\n\n";
5120--$j;
5121}
5122++$j;
5123if($j < length $refbuf) {
5124substr($refbuf, $j) = "";
5125$lspace = computeSpace();
5126$colno = 1;
5127}
5128}  # end of line tag inside a table
5129
5130if($chunk eq "\n\n") {
5131next reformat if $lspace == 3;
5132$chunk = "\n" if $lspace == 2;
5133$lspace = 3;
5134$refbuf .= $chunk;
5135next reformat;
5136}  # tag paragraph
5137
5138#  It's a line break.
5139substr($chunk, 0, 1) = "" if $lspace > 1;
5140$lspace = 2 if $lspace < 2;
5141next reformat unless length $chunk;
5142$refbuf .= $chunk;
5143$chunk =~ s/^\n//;
5144next reformat unless length $chunk;
5145#  It's either a list item indicator or a horizontal line
5146$inhref = 2 if $inhref;
5147if($chunk =~ /^--/) {
5148#  Again I'm following the lynx convention.
5149#  hr implies a line break before, and aparagraph break after.
5150$lspace = 3;
5151} else {
5152$colno += length $chunk;
5153$lspace = 1;
5154}
5155}  # loop over tokens in the buffer
5156
5157$$tbuf = undef;
5158print "tags rendered\n" if $debug >= 6;
5159
5160if(length($tagnest) > 1 and $tagnest ne ".body.0.") {
5161my $opendesc = substr $tagnest, 1;
5162$opendesc =~ s/\..*//;
5163$opendesc = $tagdesc{$opendesc};
5164substr($opendesc, 0, 2) = "";
5165--$lineno;
5166errorConvert("$opendesc is not closed at EOF");
5167}
5168
5169$errorMsg = $intMsg, return 0 if $intFlag;
5170
5171$refbuf =~ s/\s+$//;  # don't need trailing blank lines
5172
5173#  In order to fit all the links on one screen, many web sites place
5174#  several links on a line.  Sometimes they are separated
5175#  by whitespace, sometimes commas, sometimes hyphens.
5176#  Sometimes they are arranged in a table, and thanks to the
5177#  table rendering software in this program, they will be pipe separated.
5178#  In any case, there is no advantage in having multiple
5179#  links on a line, and it's downright inconvenient when you want to use
5180#  the g or A command.  We introduce line breaks between links.
5181#  We use alphanum [punctuation] right brace to locate the end of a link.
5182#  We use { optional '" alphanum for the start of a link.
5183#  These aren't guaranteed to be right, but they probably are most of the time.
5184#  Let's start with link space link or link separater link
5185$refbuf =~ s/} ?[-,|]? ?(\x80[\x85-\x8f]+{['"]?\w)/}\n$1/g;
5186#  Separating punctuation at the end of the line.
5187$refbuf =~ s/^({[^{}]+} ?),$/$1/mg;
5188#  Delimiter at the start of line, before the first link.
5189$refbuf =~ s/\n[-,|] ?(\x80[\x85-\x8f]+{['"]?\w)/\n$1/g;
5190#  word delimiting punctuation space link.
5191$refbuf =~ s/([a-zA-Z]{2,}[-:|]) (\x80[\x85-\x8f]+{['"]?\w)/$1\n$2/g;
5192#  Link terminating punctuation words
5193$refbuf =~ s/(\w['"!]?}) ?[-|:] ?(\w\w)/$1\n$2/g;
5194print "links rearranged\n" if $debug >= 6;
5195
5196if(! $badHtml) {
5197#  Verify internal links.
5198intlink:
5199foreach $h (@$btags) {
5200$tag = $$h{tag};
5201next unless $tag eq "a";
5202$j = $$h{href};
5203next if ! defined $j;
5204next unless $j =~ s/^#//;
5205$refbuf .= "";  # reset match position.  Isn't there a better way??
5206while($refbuf =~ /\x80([\x85-\x8f]+)\*/g) {
5207$i = revealNumber $1;
5208next intlink if $$btags[$i]{name} eq $j;
5209}
5210$lineno = $$h{lineno};
5211errorConvert("internal link #$j not found");
5212last;
5213}  # loop
5214print "internal links verified\n" if $debug >= 6;
5215}
5216
5217#  Find the uncalled javascript functions.
5218my $fw = $$btags[0]{fw};  # pointer to function window hash
5219my $orphans = 0;
5220foreach $i (keys %$fw) {
5221$j = $$fw{$i};
5222next unless $j =~ s/^\*//;
5223$orphans = 1, $refbuf .= "\n" unless $orphans;
5224print "orphan java function $i\n" if $debug >= 3;
5225$itag = {tag => "a", href => $j, bref => $baseref};
5226push @$btags, $itag;
5227my $hn = hideNumber $#$btags;
5228$refbuf .= "\n jf: $i\x80$hn" . "{$j}";
5229#  I don't think we need to mess with ofs1 and ofs2?
5230$$itag{ofs2} = length $refbuf;
5231}
5232
5233$$tbuf = $refbuf;  # replace
5234return 1;
5235}  # renderHtml
5236
5237#  Report the first html syntax error.
5238#  $lineno tracks the line number, where text is being processed.
5239sub errorConvert($)
5240{
5241$badHtml and return;
5242my $msg = shift;
5243#  Look at the following print statement, and you'll see the little things
5244#  I try to anticipate when I write software for the blind.
5245#  The first physical line of output is for the sighted user, or the
5246#  new blind user -- but the experienced blind user doesn't need to read it.
5247#  He can read the last line of output, one keystroke in my adaptive software,
5248#  and hear exactly what he want to know.
5249print "The html text contains syntax errors.  The first one is at line\n$lineno: $msg.\n";
5250#  Put the bad line number in label e.
5251substr($labels, 4*$lnwidth, $lnwidth) =
5252sprintf $lnformat, $lineno;
5253$badHtml = 1;
5254}  # errorConvert
5255
5256#  Strip redundent stuff off the start and end of a web page,
5257#  relative to its parent.
5258sub stripChild()
5259{
5260$fmode&$browsemode or $errorMsg = $nobrowse, return 0;
5261defined $backup or $errorMsg = "no previous web page", return 0;
5262my $p_fmode = $$backup{fmode};
5263$p_fmode&$browsemode or $errorMsg = "no previous web page", return 0;
5264#  Parent and child file names should come from the same server.
5265my $p_fname = $$backup{fname};
5266my $c_fname = $fname;
5267is_url($p_fname) and is_url($c_fname) or $errorMsg = "web pages do not come from web servers", return 0;
5268$p_fname =~ s,^https?://,,i;
5269$c_fname =~ s,^https?://,,i;
5270$p_fname =~ s,/.*,,;
5271$c_fname =~ s,/.*,,;
5272$p_fname =~ s/\.browse$//;
5273$c_fname =~ s/\.browse$//;
5274$p_fname eq $c_fname or $errorMsg = "parent web page comes from a different server", return 0;
5275$$btags[0]{dol2} == $dol or $errorMsg = "web page already stripped or modified", return 0;
5276my $p_dol = $$backup{btags}[0]{dol2};
5277my $c_dol = $dol;
5278if($p_dol > 10 and $c_dol > 10) {
5279my $pb = $$backup{btags}[0]{pb};
5280if(defined $pb) {
5281evaluateSequence($pb, 0);
5282if($$btags[0]{dol2} > $dol) {
5283$fmode &= ~($changemode|$firstopmode);
5284apparentSize();
5285$$btags[0]{pb} = $pb;
5286return 1;
5287}  # successful post browse from the parent page
5288}  # attempting post browse from the parent page
5289my $p_map = $$backup{btags}[0]{map2};
5290my $c_map = $map;
5291my $start = 1;
5292my $oneout = 0;
5293while($start <= $p_dol and $start <= $c_dol) {
5294if(!sameChildLine(\$p_map, $start, \$c_map, $start)) {
5295last if $oneout;
5296$oneout = $start;
5297}
5298++$start;
5299}
5300$start = $oneout if $oneout and $start < $oneout + 5;
5301my $delcount = --$start;
5302my $p_end = $p_dol;
5303my $c_end = $c_dol;
5304while($p_end > $start and $c_end > $start) {
5305last unless sameChildLine(\$p_map, $p_end, \$c_map, $c_end);
5306++$delcount;
5307--$p_end, --$c_end;
5308}
5309if($delcount == $dol) {
5310my $ln = substr($map, $lnwidth, $lnwidth1);
5311$text[$ln] = "This web page contains no new information - you've seen it all before.";
5312print "71\n";
5313$dol = $dot = 1;
5314$labels = $lnspace x 26;
5315$fmode &= ~$firstopmode;
5316return 1;
5317}
5318if($delcount > 5) {
5319++$c_end;
5320delText($c_end, $dol)  if $c_end <= $dol;
5321delText(1, $start) if $start;
5322$labels = $lnspace x 26;
5323$fmode &= ~($changemode|$firstopmode);
5324apparentSize();
5325return 1;
5326}
5327}
5328$errorMsg = "nothing to strip";
5329return 0;
5330}  # stripChild
5331
5332sub sameChildLine($$$$)
5333{
5334my ($m1, $l1, $m2, $l2) = @_;
5335my $t1 = $text[substr($$m1, $l1*$lnwidth, $lnwidth1)];
5336my $t2 = $text[substr($$m2, $l2*$lnwidth, $lnwidth1)];
5337removeHiddenNumbers \$t1;
5338removeHiddenNumbers \$t2;
5339$t1 =~ y/a-zA-Z0-9//cd;
5340$t2 =~ y/a-zA-Z0-9//cd;
5341return ($t1 eq $t2);
5342}  # sameChildLine
5343
5344sub unstripChild()
5345{
5346$fmode&$browsemode or
5347$errorMsg = $nobrowse, return 0;
5348my $dol2 = $$btags[0]{dol2};
5349$dol2 > $dol or $errorMsg = "nothing stripped from this web page", return 0;
5350#  Backing out.
5351$map = $$btags[0]{map2};
5352$fmode &= ~$firstopmode;
5353$labels = $lnspace x 26;
5354$dot = 1;
5355$dol = $dol2;
5356apparentSize();
5357return 1;
5358}  # unstripChild
5359
5360#  Returns the index of the input field to be modified.
5361#  Sets $inf to the text of that field.
5362#  Sets $itag, $isize, and the other globals that establish an input field.
5363#  Returns 0 for no input fields on the line, -1 for some other error.
5364sub infIndex($$)
5365{
5366my ($ln, $line) = @_;
5367my ($i, $j, $idx);
5368my @fields = ();
5369my @fieldtext = ();
5370#  Here's some machinery to remember the index if there's only one
5371#  input field of the desired type.
5372my $holdInput = 0;
5373my $t = fetchLine $ln, 0;
5374#  Bug in perl mandates the use of the no-op (?=) below.
5375#  You'll see this other places in the code too.
5376#  This bug was fixed in September 2001, patch 12120.
5377while($t =~ /\x80([\x85-\x8f]+)<(.*?)(?=)\x80\x8f>/g) {
5378$j = revealNumber $1;
5379$i = $2;
5380push @fields, $j;
5381push @fieldtext, $i;
5382$itag = $$btags[$j];
5383$itype = $$itag{type};
5384next if $itype eq "area";
5385if($line =~ /^\d*\*/) {
5386if($itype eq "submit" or $itype eq "reset") {
5387$holdInput = -1 if $holdInput > 0;
5388$holdInput = $#fields+1 if $holdInput == 0;
5389}
5390} else {
5391if($itype ne "submit" and $itype ne "reset") {
5392$holdInput = -1 if $holdInput > 0;
5393$holdInput = $#fields+1 if $holdInput == 0;
5394}
5395}
5396}
5397$j = $#fields + 1;
5398if(!$j) {
5399$errorMsg = "no input fields present" if ! $inglob;
5400return 0;
5401}
5402$idx = -1;
5403$idx = $1 if $line =~ /^(\d+)/;
5404$idx = $holdInput if $holdInput > 0 and $idx < 0;
5405$idx >= 0 or $j == 1 or
5406$errorMsg = "multiple input fields, please use $icmd [1,$j]", return -1;
5407$idx = 1 if $idx < 0;
5408if($idx == 0 or $idx > $j) {
5409$errorMsg = $j > 1 ?
5410"invalid field, please use $icmd [1,$j]" :
5411"line only has one input field";
5412return -1;
5413}
5414$j = $fields[$idx-1];
5415$inf = $fieldtext[$idx-1];
5416$ifield = $idx;
5417$itagnum = $j;
5418$itag = $$btags[$j];
5419$iline = $ln;
5420$itype = $$itag{type};
5421$isize = $$itag{size};
5422$irows = $$itag{rows};
5423$icols = $$itag{cols};
5424$iwrap = $$itag{wrap};
5425$iwrap = "" if ! defined $iwrap;
5426$iwrap = lc $iwrap;
5427$iopt = $$itag{opt};
5428return $idx;
5429}  # infIndex
5430
5431#  Get status on an input field, including its options.
5432sub infStatus($)
5433{
5434my $line = shift;
5435$line =~ s/^\d*\?//;
5436$line = lc $line;
5437print $itype;
5438print "[$isize]" if $isize;
5439if($itype eq "area" and $irows and $icols) {
5440print "[${irows}x$icols";
5441print " recommended" if $iwrap eq "virtual";
5442print "]";
5443}
5444print " many" if defined $$itag{multiple};
5445print " <$inf>";
5446my $name = $$itag{name};
5447print " [$name]" if defined $name and length $name;
5448print "\n";
5449return unless $itype eq "select";
5450
5451#  Display the options in a pick list.
5452#  If a string is given, display only those options containing the string.
5453my $i = 0;
5454my @pieces = ();
5455my $j;
5456foreach my $v (%{$iopt}) {
5457$i ^= 1;
5458$j = $v, next if $i;
5459$_ = $v;
5460next unless s/^(...)[-+]//;
5461next if length $line and index(lc $j, $line) < 0;
5462push @pieces,  "$1$j\n";
5463}
5464if($#pieces < 0) {
5465print(length($line) ? "No options contain the string \"$line\"\n" :
5466"No options found\n");
5467return;
5468}
5469foreach (sort @pieces) {
5470print((substr($_, 0, 3) + 1) . ": " . substr($_, 3));
5471last if $intFlag;
5472}
5473}  # infStatus
5474
5475#  Replace an input field with new text.
5476sub infReplace($)
5477{
5478my $newtext = shift;
5479my ($i, $j, $k, $t);
5480
5481#  Sanity checks on the input.
5482$itype ne "submit" and $itype ne "reset" or
5483$errorMsg = "field is a $itype button, use * to push the button", return 0;
5484$itype ne "area" or
5485$errorMsg = "field is a text area, you must edit it from another session", return 0;
5486not defined $$itag{readonly} or
5487$errorMsg = "readonly field", return 0;
5488$newtext =~ /\n/ and
5489$errorMsg = "input field cannot contain a newline character", return 0;
5490return 0 if lineLimit 2;
5491
5492if($ifield) {
5493my $newlen = length $newtext;
5494! $isize or $newlen <= $isize or
5495$errorMsg = "input field too long, limit $isize", return 0;
5496
5497if($itype eq "checkbox" or $itype eq "radio") {
5498$newtext eq "+" or $newtext eq "-" or
5499$errorMsg = "field requires + (active) or - (inactive)", return 0;
5500$itype eq "checkbox" or $newtext eq '+' or $inf eq '-' or
5501$errorMsg = "at least one radio button must be set", return 0;
5502}
5503}  # not from reset button
5504
5505if($itype eq "select") {
5506my @opts = $newtext;
5507@opts = split(',', $newtext) if defined $$itag{multiple};
5508$newtext = "";
5509option:
5510foreach my $newopt (@opts) {
5511$newtext .= "," if length $newtext;
5512$j = $$iopt{$newopt};
5513#  If you type in the option exactly, that's grand.
5514$newtext .= $newopt, next if defined $j and length $j;
5515#  Maybe it's a menu number.
5516if($newopt =~ /^\d+$/) {
5517$j = sprintf("%03d", $newopt-1);
5518#  reverse hash lookup.
5519my $revcnt = 0;
5520my $revkey;
5521foreach (%{$iopt}) {
5522$revcnt ^= 1;
5523$revkey = $_, next if $revcnt;
5524next unless substr($_, 0, 3) eq $j;
5525$newtext .= $revkey;
5526next option;
5527}
5528} else {  # menu number conversion
5529#  See if this text is a piece of one and only one option.
5530#  Or if it is exactly one and only one option.
5531$j = lc $newopt;
5532my $matchCount = 0;
5533my $matchLevel = 0;
5534my $bestopt = "";
5535foreach $k (keys %{$iopt}) {
5536my $klow = lc $k;  # k lower case
5537next unless index($klow, $j) >= 0;
5538if($j eq $klow) {
5539$matchCount = 0, $matchLevel = 2 if $matchLevel < 2;
5540++$matchCount;
5541$bestopt = $k;
5542} else {
5543next if $matchLevel == 2;
5544$matchCount = 0, $matchLevel = 1 unless $matchLevel;
5545++$matchCount;
5546$bestopt = $k;
5547}
5548}
5549$newtext .= $bestopt, next option if $matchCount == 1;
5550$errorMsg = "$j matches more than one entry in the list", return 0 if $matchCount > 1;
5551}
5552$errorMsg = "$newopt is not an option, type i$ifield? for the list";
5553return 0;
5554}  # loop over options in the new list
5555}  # select
5556
5557#  Definitely making a change.
5558$fmode |= $firstopmode;
5559$ubackup = 1;
5560$dot = $iline;
5561
5562return 1 if $newtext eq $inf;  # no change
5563
5564#  Find and replace the text.
5565$t = fetchLine $iline, 0;
5566my $itaghidden = hideNumber $itagnum;
5567$t =~ s/\x80$itaghidden<.*?(?=)\x80\x8f>/\x80$itaghidden<$newtext\x80\x8f>/;
5568push @text, $t;
5569substr($map, $iline*$lnwidth, $lnwidth) =
5570sprintf $lnformat, $#text;
5571
5572if($itype eq "radio") {  # find and undo the other radio button
5573my $radioname = $$itag{name};
5574if(defined $radioname and length $radioname) {
5575my $form = $$itag{form};
5576lineloop:
5577foreach $k (1..$dol) {
5578$t = fetchLine $k, 0;
5579while($t =~ /\x80([\x85-\x8f]+)<\+\x80\x8f>/g) {
5580$jh = $1;
5581$j = revealNumber $1;
5582next if $j == $itagnum;  # already changed this one
5583my $h = $$btags[$j];
5584next unless $$h{form} eq $form;
5585#  Input field is part of our form.
5586next unless $$h{type} eq "radio";
5587my $name = $$h{name};
5588next unless defined $name and $name eq $radioname;
5589#  It's another radio button in our set.
5590$t =~ s/\x80$jh<\+\x80\x8f>/\x80$jh<-\x80\x8f>/;
5591push @text, $t;
5592substr($map, $k*$lnwidth, $lnwidth) =
5593sprintf $lnformat, $#text;
5594last lineloop;
5595}  # loop over input fields on this line
5596}  # loop over lines
5597}  # radio button has a name
5598}  # radio
5599
5600return 1;
5601}  # infReplace
5602
5603#  Push the submit or reset button.
5604sub infPush()
5605{
5606my $button = $itag;
5607my $buttontype = $itype;
5608$buttontype eq "submit" or $buttontype eq "reset" or
5609$errorMsg = "this is not a submit or reset button", return 0;
5610$cmd = 'b';  # this has become a browse command
5611my $formh = $$itag{form};
5612defined $formh or
5613$errorMsg = "field is not part of a form", return 0;
5614my $buttonvalue = $inf;
5615$buttonvalue =~ s/ secure$//;
5616$buttonvalue =~ s/ mailform$//;
5617$buttonvalue =~ s/ js$//;
5618my $domail = 0;  # sendmail link
5619
5620my $bref = $$formh{bref};
5621my $action = $$formh{action};
5622if(! defined $action or ! length $action) {
5623#  If no form program is specified, the default is the current url.
5624$action = $bref;
5625$action =~ s/\?.*//;
5626}
5627$domail = 1 if $action =~ s/^mailto://i;
5628#  We should check for $form{encoding}.
5629
5630my ($name, $val, $i, $j, $cx, $h, @pieces);
5631my $post = "";
5632my $origdot = $dot;
5633
5634#  Loop over all tags, keeping those in the input form.
5635$itagnum = -1;
5636foreach $h (@$btags) {
5637++$itagnum;
5638next unless $$h{tag} eq "input";
5639#  Overwrite the global input variables, so infReplace will work properly.
5640#  $itagnum is already set.
5641$itag = $h;
5642$itype = $$h{type};
5643$j = $$h{form};
5644next unless defined $j and $j eq $formh;
5645#  Input field is part of our form.
5646$iopt = $$h{opt};
5647$isize = $$h{size};
5648
5649if($itag eq $button and $itype eq "submit") {
5650$name = $$button{name};
5651if(defined $name and length $name) {
5652if($domail) {
5653$post .= "\n" if length $post;
5654} else {
5655$post .= '&' if length $post;
5656$name = urlEncode $name;
5657}
5658if($$button{image}) {
5659$post .= $domail ?
5660"$name.x=\n0\n\n$name.y=\n0\n" :
5661"$name.x=0&$name.y=0";
5662} else {
5663if(defined $buttonvalue and length $buttonvalue) {
5664if($domail) {
5665$post .= "$name=\n$buttonvalue\n";
5666} else {
5667$buttonvalue = urlEncode $buttonvalue;
5668$post .= "$name=$buttonvalue";
5669}
5670} else {
5671$post .= $domail ?
5672"$name=\nSubmit\n" :
5673"$name=Submit";
5674}
5675}
5676}
5677}  # submit button
5678
5679next if $itype eq "reset" or $itype eq "submit";
5680
5681if($itype eq "hidden") {
5682$inf = $$h{value};
5683$iline = $ifield = 0;
5684} else {
5685#  Establish the line number, field number, and field value.
5686#  This is crude and inefficient, but it doesn't happen very often.
5687findField:
5688for($iline=1; $iline<=$dol; ++$iline) {
5689$j = fetchLine $iline, 0;
5690$ifield = 0;
5691while($j =~ /\x80([\x85-\x8f]+)<(.*?)(?=)\x80\x8f>/g) {
5692$i = revealNumber $1;
5693$inf = $2;
5694++$ifield;
5695last findField if $i == $itagnum;
5696}
5697}
5698$iline <= $dol or $errorMsg = "input field $itagnum is lost", return 0;
5699}
5700
5701if($buttontype eq "submit") {
5702if($itype eq "area") {
5703$cx = $$h{cx};
5704$val = "";
5705if(defined $factive[$cx] and $dol[$cx]) {
5706#  Send all the lines of text in the secondary buffer.
5707for(my $ln=1; $ln<=$dol[$cx]; ++$ln) {
5708$val .= fetchLineContext($ln, 1, $cx);
5709next if $ln == $dol[$cx];
5710$val .= ($textAreaCR ? $eol : "\n");
5711}
5712}
5713} else {  # text area or field
5714$val = $inf;
5715if($itype eq "radio" or $itype eq "checkbox") {
5716next if $val eq '-';
5717$val = $$h{saveval};
5718#  I thought it had to say "on"; now I'm not sure.
5719$val = "on" if $itype eq "checkbox" and  ! length $val;
5720}  # radio
5721}  # text area or input field
5722#  Turn option descriptions into option codes for transmission
5723if($itype eq "select") {
5724@pieces = $val;
5725@pieces = split ',', $val if defined $$h{multiple};
5726$val = "";
5727foreach (@pieces) {
5728$val .= "," if length $val;
5729my $code = $$iopt{$_};
5730if(defined $code) {
5731$code = substr($code, 4);
5732} else {
5733$code = $_;
5734}
5735$val .= $code;
5736}  # loop over options
5737}  # select
5738
5739$name = $$h{name};
5740defined $name or $name = "";
5741if(! $domail) {
5742#  Encode punctuation marks for http transmition
5743$name = urlEncode($name);
5744$name .= '=';
5745$val = urlEncode($val);
5746}
5747if($itype eq "select" and defined $$h{multiple}) {
5748#  This is kludgy as hell.
5749#  comma has been turned into %2C
5750@pieces = split '%2C', $val;
5751foreach $val (@pieces) {
5752$post .= ($domail ? "\n" : '&') if length $post;
5753$post .= $name;
5754$post .= "\n" if $domail and length $name;
5755$post .= $val;
5756$post .= "\n" if $domail and length $val;
5757}
5758} else {
5759$post .= ($domail ? "\n" : '&') if length $post;
5760$post .= $name;
5761$post .= "\n" if $domail and length $name;
5762$post .= $val;
5763$post .= "\n" if $domail and length $val;
5764}
5765
5766} else {  # submit or reset
5767
5768next if $itype eq "hidden";
5769if($itype eq "area") {
5770$cx = $$h{cx};
5771cxReset($cx, 2);
5772$factive[$cx] = 1;
5773} else {
5774$ifield = 0;  # zero skips some of the field checks in infReplace
5775$val = $$h{value};
5776infReplace($val);
5777}  # field or text area
5778
5779}  # submit or reset
5780}  # loop over tags
5781
5782$dot = $origdot, return 1 if $buttontype eq "reset";
5783print "submit: $post\n" if $debug >= 2;
5784
5785length $action or
5786$errorMsg = "form does not specify a program to run", return 0;
5787
5788if($domail) {
5789my $subj = urlSubject(\$action);
5790$subj = "html form" unless length $subj;
5791$post = "Subject: $subj\n\n$post";
5792print "$action\n";
5793my @tolist = ($action);
5794my @atlist = ();
5795$mailToSend = "form";
5796$altattach = 0;
5797$whichMail = $localMail;
5798sendMail(\@tolist, \$post, \@atlist) or return 0;
5799print "Form has been mailed, watch for a reply.\n";
5800return 1;
5801}  # sendmail
5802
5803$line = resolveUrl($bref, $action);
5804print "* $line\n";
5805$post = ($$formh{method} eq "get" ? '?' : '*') . $post;
5806return -1, $line, $post;
5807}  # infPush
5808
5809sub renderMail($)
5810{
5811my $tbuf = shift;
5812$badenc = $bad64 = 0;
5813$fhLevel = 0;
5814$nat = 0;  # number of attachments
5815@mimeParts = ();
5816
5817#  Copy lines into @msg.
5818#  The original cleanMail routine was built upon @msg,
5819#  And when I folded it into edbrowse, I was too lazy to change it.
5820@msg = ();
5821push @msg, fetchLine($_, 0) foreach (1..$dol);
5822
5823findHeaders(0, $#msg);
5824deGreater();
5825nullForwarding();
5826--$#msg while $#msg >= 0 and
5827$msg[$#msg] !~ /[a-zA-Z0-9]/;
5828
5829#  Last chance to interrupt a browse operation
5830$errorMsg = $intMsg, return 0 if $intFlag;
5831
5832$$tbuf = "";
5833$$tbuf .= "$_\n" foreach (@msg);
5834chomp $$tbuf if length $$tbuf;
5835$$tbuf =~ y/\x92\x93\x94\xa0\xad/'`' -/;
5836return 1;
5837}  # renderMail
5838
5839#  Insert this text line between mail headers.
5840$mailBreak = "{NextMailHeader}";
5841#  Insert this text line between mime headers.
5842$mimeBreak = "{NextMimeSection}, Level";
5843#  Max lines in a "to unsubscribe" trailer?
5844$unsHorizon = 7;
5845
5846#  Hash the annoying commercials.
5847%annoy = ();
5848if(length $annoyFile) {
5849open FH, $annoyFile
5850or dieq "Cannot open file of annoying commercials $annoyFile.";
5851while(<FH>) {
5852stripWhite \$_;
5853$annoy{lc $_} = "" if length $_;
5854}
5855close FH;
5856}  # annoy
5857
5858#  Today timestamp, so old "junk" subjects can expire.
5859$junkToday = int time / (60*60*24);
5860$junkHorizon = 14;
5861$oldSubjects = 0;
5862%junkSubjects = ();
5863
5864#  Now load the junk subjects, which we aren't interested in reading.
5865if(length $junkFile) {
5866open FH, $junkFile
5867or dieq "Cannot open file of junk subjects $junkFile.";
5868while(<FH>) {
5869s/\n$//;  # don't need nl
5870($jtime = $_) =~ s/:.*//;
5871($jsubject = $_) =~ s/^\d+:\s*(.*)\s*$/$1/;
5872if($jsubject =~ /^`/) {
5873$junkSubjects{$jsubject} = $junkToday;
5874} else {
5875$oldSubjects = 1, next if $jtime < $junkToday - $junkHorizon;
5876$junkSubjects{$jsubject} = $jtime;
5877}
5878}
5879close FH;
5880}  # junkFile
5881
5882#  Add a subject to the junk list.
5883#  This updates the junk file.
5884sub markSubject ($)
5885{
5886my $s = shift;
5887die "No subject to junk." if $s eq "";
5888$junkSubjects{$s} = $junkToday;
5889if($oldSubjects) {
5890open FH, ">$junkFile"
5891or dieq "Cannot rewrite file of junk subjects $junkFile.";
5892$iskey = 0;
5893foreach (%junkSubjects) {
5894($iskey ^= 1) ?
5895($savekey = $_) :
5896print FH "$_:$savekey\n";
5897}
5898$oldSubjects = 0;
5899} else {
5900open FH, ">>$junkFile"
5901or dieq "Cannot add to file of junk subjects $junkFile.";
5902print FH "$junkToday:$s\n";
5903}
5904close FH;
5905}  # markSubject
5906
5907#  Build an array for base64 decoding.
5908{
5909my ($j, $c);
5910$c = 'A', $j = 0;
5911$b64_map[ord $c] = $j, ++$c, ++$j until $j == 26;
5912$c = 'a';
5913$b64_map[ord $c] = $j, ++$c, ++$j until $j == 52;
5914$c = '0';
5915$b64_map[ord $c] = $j, ++$c, ++$j until $j == 62;
5916$b64_map[ord '+'] = $j++;
5917$b64_map[ord '/'] = $j++;
5918}
5919
5920#  The following routine decodes the Quoted-Printable mime standard.
5921#  If one line ends in an equals sign, it must be joined to the next.
5922#  All other =xx sequences become the 8-bit value defined by hex xx.
5923#  Pass the start and end offsets -- what do you want to dequote?
5924#  Also pass the boundary, if any, and stop there.
5925sub qp_lowlevel($$$)
5926{
5927my ($start, $end, $boundary) = @_;
5928print "qp $start-$end<$boundary\n" if $debug >= 6;
5929return if $end < $start;
5930
5931my $tbuf = "";
5932foreach my $i ($start..$end) {
5933if(length $boundary) {
5934my $line = $msg[$i];
5935$line =~ s/^[ \t>]*-*//;
5936$line =~ s/-*$//;
5937$end = $i-1, last if $line eq $boundary;
5938}
5939$msg[$i] =~ s/[ \t]+$//;
5940$tbuf .= $msg[$i]."\n";
5941}
5942chomp $tbuf;
5943print "qp ends at $end, length " . length($tbuf) . "\n" if $debug >= 6;
5944
5945#  Now undo quoted-printable encoding.
5946#  Use global substitutions on the concatenated texts, it's faster.
5947my $join =
5948$tbuf =~ s/=\n//g;
5949$join = 0 unless $join;
5950print "qp joins $join lines\n" if $debug >= 6;
5951$tbuf =~ s/=([0-9a-fA-F]{2})/chr hex "$1"/ge;
5952#  Split the text back into lines, and back into @msg.
5953$end -= $join;
5954if(length $tbuf) {
5955@msg[$start.. $end] = split("\n", $tbuf, $end-$start+1);
5956} else {  # split problem
5957$msg[$start] = "";  # probably was "" already
5958}
5959#  Fill the empty spaces with blank lines
5960$msg[++$end] = '' while $join--;
5961}  # qp_lowlevel
5962
5963#  If the message includes any lines with leading > signs,
5964#  break paragraphs whenever the nesting level changes.
5965#  A change in the number of > symbols indicates a different speaker,
5966#  hence a new paragraph.
5967#  But watch out!
5968#  Some mail intermediaries cut long lines,
5969#  leaving a dangling fragment without a > nesting level.
5970#  This fragment does not represent a new paragraph;
5971#  it is part of the previous sentence.
5972#  But watch watch out!
5973#  Some people deliberately interject short comments, as in:
5974#
5975#  > I really think the Tigers are the hotest baseball team ever, really great,
5976#  Horse feathers!
5977#  > a team to look up to.
5978#
5979#  There is virtually no way to distinguish between the two cases.
5980#  Most of the time a short fragment in the midst of indented text
5981#  actually belongs to the previous line,
5982#  so I treat it as such and hope for the best.
5983sub deGreater()
5984{
5985my (@nestlev, @newmsg, $i, $j, $state, $temp);
5986my $lastsubject = "";
5987
5988#  Push some blank lines, to avoid eof conditions.
5989push @msg, '', '', '';
5990
5991#  Establish the nest level of each line.
5992foreach (@msg) {
5993$temp = $_;
5994#  Count > signs.
5995$temp =~ s/^([ \t>]*).*/$1/;
5996$j = $temp =~ y/>/>/;
5997push @nestlev, $j;
5998}
5999
6000my $lastlev = 0;
6001my $newlev;
6002for($i=0; $i<=$#msg; ++$i) {
6003$newlev = $nestlev[$i];
6004
6005#  Let's get right at the tricky part, a drop in level.
6006#  It's a fragment if the next line, or line after,
6007#  has the same nest level as the previous line.
6008if($newlev < $lastlev and
6009(($nextlev = $nestlev[$i+1]) == $lastlev or
6010$nextlev < $lastlev && $nestlev[$i+2] == $lastlev)) {
6011$temp = $msg[$i];
6012$temp =~ s/^[ \t>]*//;
6013if($j = length $temp) {
6014if($nextlev == $lastlev) {
6015$newmsg[$#newmsg] .= " $temp";
6016next;
6017}
6018#  It must be that the line after next has the previous nest level.
6019my $temp2 = $msg[$i+1];
6020$temp2 =~ s/^[ \t>]*//;
6021if($j = length $temp2) {
6022$newmsg[$#newmsg] .= " $temp $temp2";
6023++$i;
6024next;
6025}  # next line is nonempty
6026}  # this line is nonempty
6027
6028$newlev = $lastlev if $j == 0 and $nextlev == $lastlev;
6029}  # bracketed between larger nest levels
6030
6031if($msg[$i] =~ /^$mailBreak/o) {
6032$newlev = $ lastlev = 0;
6033push @newmsg, $mailBreak;
6034my ($subject, $from, $date, $reply);
6035$temp = $msg[$i];
6036($subject = $temp) =~ s/\n.*//s if $temp =~ s/.*\nSubject: //s;
6037($from = $temp) =~ s/\n.*//s if $temp =~ s/.*\nFrom: //s;
6038($date = $temp) =~ s/\n.*//s if $temp =~ s/.*\nDate: //s;
6039($reply = $temp) =~ s/\n.*//s if $temp =~ s/.*\nReply-to: //s;
6040if(defined $subject or defined $from or defined $date or defined $reply) {
6041if($#newmsg == 0) {  # Read the first header differently.
6042push @newmsg, "Subject: $subject" if defined $subject;
6043push @newmsg, "From $from" if defined $from;
6044} else {
6045$temp = "Message";
6046if(defined $from) {
6047$temp .= " from $from,";
6048if(defined $subject) {
6049$temp .= (
6050$subject eq $lastsubject ?
6051" same subject." :
6052" with subject, $subject.");
6053} else {
6054$temp .= " with no subject.";
6055}  # subject or not
6056} else {
6057if(defined $subject) {
6058$temp .= (
6059$subject eq $lastsubject ?
6060" with the same subject." :
6061" with subject, $subject.");
6062} else {
6063$temp .= " with no subject.";
6064}  # subject or not
6065}  # from line or not
6066push @newmsg, $temp;
6067}  # top header or internal
6068push @newmsg, "Mail sent $date" if defined $date;
6069push @newmsg, "Reply to $reply" if defined $reply;
6070push @newmsg, "" if $#newmsg;
6071}
6072$subject = "" if ! defined $subject;
6073$lastsubject = $subject;
6074next;
6075}  # mail header
6076
6077if($newlev != $lastlev) {
6078push @newmsg, "", "Indent $newlev.";
6079}  # change in level
6080
6081#  Strip off leading >
6082$temp = $msg[$i];
6083$temp =~ s/^[ \t]*>[ \t>]*//;
6084push @newmsg, $temp;
6085$lastlev = $newlev;
6086}  # loop over lines
6087
6088#  Push a mime separater on, to make the unsubscribe test work.
6089push @newmsg, "$mimeBreak 1";
6090
6091#  Now put the lines back into @msg, compressing blank lines.
6092#  Also, Try to remove any "unsubscribe" trailers.
6093$#msg = -1;
6094my $unslast = -1;
6095my $unscount = 0;
6096my $unstest;
6097$state = 1;
6098$j = 0;
6099
6100foreach my $line (@newmsg) {
6101#  Check for "to unsubscribe"
6102if($line =~ /^ *to unsubscribe/i) {
6103$unslast = $j if $unslast < 0 or $unscount > $unsHorizon;
6104$unscount = 0;
6105}  # unsubscribe line
6106
6107#  Check for mime/mail separater.
6108$unstest = 0;
6109$temp = lc $line;
6110$temp =~ s/\s+$//;
6111if($line =~ /^$mimeBreak \d/o or
6112$line eq $mailBreak or
6113defined $annoy{$temp} or
6114$temp =~ /^-+\s*original message\s*-+\s*$/) {
6115$line = "";  # no need to read that
6116$unstest = 1;
6117}
6118
6119$unstest = 1 if $line =~ /^$mailBreak/o;
6120
6121if($line =~ /^Indent \d/) {
6122$unstest = 1;
6123if($j > 0 and $msg[$j-1] =~ /^Indent \d/) {
6124--$j, --$#msg;
6125$unstest = 0;
6126}  # sequential indents
6127}  # indent line
6128
6129if($unstest and $unslast >= 0 and $unscount <= $unsHorizon) {
6130#  Remove unsubscribe section
6131$j = $unslast - 1;
6132$unslast = -1;
6133--$j while $j >= 0 and
6134$msg[$j] !~ /[a-zA-Z0-9]/;
6135$#msg = $j;
6136++$j;
6137$state = ($j == 0);
6138next;
6139}  # crunching unsubscribe
6140
6141if(length $line) {
6142++$unscount if $line =~ /[a-zA-Z0-9]/;
6143$msg[$j++] = $line;
6144$state = 0;
6145$state = 1 if $line =~ /^Indent \d/;
6146} elsif(! $state) {
6147$msg[$j++] = $line;
6148$state = 1;
6149}
6150}  # loop over lines
6151
6152--$j;
6153--$j if $j >= 0 and $state;
6154$#msg = $j;
6155}  # deGreater
6156
6157#  No need to read vacuous forwardings.
6158sub nullForwarding()
6159{
6160my $lf = -1;  # last forwarding
6161my $j = 0;
6162foreach my $line (@msg) {
6163if($line =~ /^Message/) {
6164$j = $lf if $lf >= 0 and $j - $lf <= 4;
6165$lf = $j;
6166}
6167$msg[$j++] = $line;
6168}  # loop over lines
6169--$j;
6170$#msg = $j;
6171}  # nullForwarding
6172
6173#  Decide whether a line, and eventually a paragraph, is an email header.
6174#  Realize that these headers might be pasted in almost anywhere.
6175#  They don't always appear at the top of the message,
6176#  or even the top of a mime section.
6177#  They may even be indented, or prepended with leading greater than signs,
6178#  if a mail message is manually forwarded, or pasted inside a larger
6179#  mail message.
6180#  We willhowever assume that a header block, once begun,
6181#  continues until we reach a blank line.
6182#  If you've manually pasted a header and body together, sorry,
6183#  but the body is going to get thrown away.
6184#  This routine is recursive, so make sure the appropriate variables are auto.
6185#  Pass the start and end offsets -- a sub-message inside the entire message.
6186sub findHeaders($$) ;
6187sub findHeaders($$)
6188{
6189my ($start, $end) = @_;
6190++$fhLevel;
6191my $startLine = -1;
6192my $boundaryCut = "";
6193my ($i, $j, $temp, $line, $state);
6194my ($reply, $from, $subject, $date);
6195my ($boundary, $content, $encoding, $encfile);
6196my $expand64 = 0;
6197
6198$line = $msg[$start];
6199if($line =~ s/^$mailBreak.*\nboundary=//so) {
6200$line =~ s/\n$//;
6201$boundaryCut = $line;
6202}
6203
6204print "findheaders$fhLevel $start-$end<$boundaryCut\n" if $debug >= 6;
6205
6206foreach $i ($start..$end) {
6207$line = $msg[$i];
6208
6209#  Strip away whitespace and leading greater than signs.
6210$line =~ s/^[ \t>]+//;
6211$line =~ s/\s+$//;
6212
6213#  Are we expanding binary data?
6214if($expand64) {
6215$expand64 = 0 if $line eq "";
6216if(length $boundaryCut and $expand64) {
6217$temp = $line;
6218$temp =~ s/^-+//;
6219$temp =~ s/-+$//;
6220$expand64 = 0 if $temp eq $boundaryCut;
6221}
6222if($expand64) {
6223my ($c, $leftover, $rem);
6224#  We don't really need the padding equals to run the algorithm properly.
6225#  Sometimes it ends in =9  I don't know what that means!
6226$line =~ s/=+9*$//;
6227if($line =~ y;+/a-zA-Z0-9;;cd && !$bad64) {
6228warn "Invalid base64 encoding at line $i";
6229$bad64 = 1;
6230}
6231for($j=0; $j < length $line; ++$j) {
6232$c = $b64_map[ord substr($line,$j,1)];
6233$rem = $j & 3;
6234if($rem == 0) {
6235$leftover = $c<<2;
6236} elsif($rem == 1) {
6237$$curPart{data} .= chr($leftover | ($c>>4));
6238$leftover = ($c & 0xf) <<4;
6239} elsif($rem == 2) {
6240$$curPart{data} .= chr($leftover | ($c>>2));
6241$leftover = ($c & 3) <<6;
6242} else {
6243$$curPart{data} .= chr($leftover | $c);
6244}
6245}
6246$msg[$i] = "";
6247next;
6248}
6249}
6250
6251#  Look for mailKeyWord:
6252#  We check for a header until we have established a boundary,
6253#  and then we only crack the header at the start of each section.
6254if(($startLine >= 0 or  # inside a header
6255! length $boundaryCut or  # no boundary yet
6256$msg[$i-1] eq "$mimeBreak $fhLevel")  # top of the mime section
6257and
6258$line =~ /^\$?[a-zA-Z][\w-]*:/) {  # keyword:
6259if($startLine < 0) {
6260$startLine = $i;
6261$state = 0;
6262$reply = $from = $subject = $date = "";
6263$boundary = $content = $encoding = $encfile = "";
6264}
6265($headKey = $line) =~ s/:.*//;
6266$headKey = lc $headKey;
6267($headVal = $line) =~ s/^[^:]+:\s*//;
6268my $headKeyType = $mhWords{$headKey};
6269$state |= $headKeyType if defined $headKeyType;
6270if($headVal ne "") {
6271$from = $headVal if $headKey eq "from";
6272$reply = $headVal if $headKey eq "reply-to";
6273$subject = $headVal if $headKey eq "subject";
6274$date = $headVal if $headKey eq "date";
6275$date = $headVal if $headKey eq "sent";
6276if($headKey eq "content-transfer-encoding") {
6277$encoding = lc $headVal;
6278}
6279if($headKey eq "content-type") {
6280$content = lc $headVal;
6281$content =~ s/;.*//;
6282}
6283}  # something after keyword:
6284}  # keyword: mail/mime header line
6285
6286if($startLine >= 0) {
6287#  boundary= is a special attribute within a mail header
6288$temp = $line;
6289if($temp =~ s/.*boundary *= *//i) {
6290($temp =~ s/^"//) ?
6291($temp =~ s/".*//) :
6292($temp =~ s/,.*//);
6293$boundary = $temp;
6294$boundary =~ s/^-+//;
6295$boundary =~ s/-+$//;
6296$boundaryCut = $boundary if length $boundary and ! length $boundaryCut;
6297}  # boundary keyword detected
6298#  filename is similarly set.
6299$temp = $line;
6300if($temp =~ s/.*(?:file)?name *= *//i) {
6301($temp =~ s/^"//) ?
6302($temp =~ s/".*//) :
6303($temp =~ s/,.*//);
6304$encfile = $temp;
6305}
6306
6307} else {
6308
6309next if ! length $boundaryCut;
6310#  Strip away leading and trailing hyphens -- helps us look for boundary
6311$line =~ s/^-+//;
6312$line =~ s/-+$//;
6313next if $line ne $boundaryCut;
6314$msg[$i] = "$mimeBreak $fhLevel";
6315next;
6316}  # body
6317
6318#  Now we know we're inside a mail header.
6319next if length $line;
6320
6321#  We've got a blank line -- that ends the header.
6322#  But it's not really a header if we've just got English keywords.
6323if($state&5) {
6324
6325if(length $boundary) {
6326#  Skip the preamble.
6327foreach $j ($i+1..$#msg) {
6328$temp = $msg[$j];
6329$temp =~ s/^-+//;
6330$temp =~ s/-+$//;
6331last if $temp eq $boundary;
6332$msg[$j] = "";
6333}
6334}
6335
6336#  Handle the various encodings.
6337$encoding = "" if length $boundary and $startLine == $start;
6338$encoding = "" if $encoding eq "8bit" or $encoding eq "7bit" or $encoding eq "binary";
6339if($encoding eq "quoted-printable") {
6340qp_lowlevel($i+1, $end, $boundaryCut);
6341$encoding = "";
6342}
6343if($encoding eq "base64") {  # binary attachment
6344$expand64 = 1;
6345$curPart = { data => "", filename => $encfile, isattach => 1};
6346push @mimeParts, $curPart;
6347++$nat;
6348$encoding = "";
6349}
6350if($encoding and !$badenc) {
6351warn "Unknown encoding at line $i $encoding";
6352$badenc = 1;
6353}
6354
6355$j = $startLine;
6356if($state & 4 or length $boundary) {
6357#  Process from/reply lines.
6358$reply = $from if ! length $reply;
6359$from = $reply if ! length $from;
6360$from =~ s/".*// if $from =~ s/^"//;
6361$from =~ s/\s*<.*>.*$//;
6362$reply =~ s/^.*<(.*)>.*$/$1/;
6363$reply = ""
6364if length $reply and (
6365$reply =~ /[\s<>]/ or $reply !~ /\w@\w/ or $reply !~ /\w\.\w/);
6366#  Strip away  re:  and  fwd:
6367while($subject =~ s/^(re|fd|fwd)[,:]\s*//i) { }
6368
6369$mailSubject = $subject,
6370$mailFrom = $from,
6371$mailReply = $reply,
6372$mailDate = $date
6373if $startLine == 0;  # top of the message
6374
6375#  Consolodate the header.
6376$line = "$mailBreak\n";
6377$line .= "Subject: $subject\n" if length $subject;
6378$line .= "From: $from\n" if length $from;
6379$line .= "Date: $date\n" if length $date;
6380$line .= "Reply-to: $reply\n" if length $reply;
6381$line .= "boundary=$boundary\n" if length $boundary;
6382$msg[$j++] = $line;
6383}  # mail header
6384$msg[$j++] = "" while $j <= $i;
6385
6386#  Decode html, if specified in the header.
6387#  Or turn it into an attachment, if anything other than plain text.
6388if(length $content and ! $expand64) {
6389if($content eq "text/html" or length $encfile) {
6390mailHtml($i+1, $end, $startLine-1, $boundaryCut, $encfile);
6391$content = "text/plain";
6392}
6393}
6394
6395}  # mail or mime header
6396
6397$startLine = -1;
6398}  # loop over lines in the message
6399
6400if(length $boundaryCut) {
6401#  Still more work to do.
6402#  Reprocess each section.
6403$boundary = "$mimeBreak $fhLevel";
6404$j = -1;
6405foreach $i ($start..$end) {
6406next unless $msg[$i] eq $boundary;
6407findHeaders($j+1, $i-1) if $j >= 0;
6408$j = $i;
6409}  # loop over lines
6410}  # bounhdary encountered
6411
6412--$fhLevel;
6413}  # findHeaders
6414
6415#  process an html mime section within a mail message.
6416sub mailHtml($$$$$)
6417{
6418my ($start, $end, $breakLine, $boundary, $filename) = @_;
6419return if $end < $start;  # should never happen
6420my ($i, $line);
6421
6422my $tbuf = "";
6423
6424foreach $i ($start..$end) {
6425$line = $msg[$i];
6426$line =~ s/^[ \t>]*//;
6427
6428#  boundary may end this section.
6429if(length $boundary) {
6430my $temp = $line;
6431$temp =~ s/^-+//;
6432$temp =~ s/-+$//;
6433$end = $i-1, last if $temp eq $boundary;
6434}
6435
6436$tbuf .= "$line\n";
6437$msg[$i] = "";
6438}  # loop over lines
6439
6440if(length $filename) {  # present as attachment
6441$curPart = { data => $tbuf, filename => $filename, isattach => 1};
6442push @mimeParts, $curPart;
6443++$nat;
6444return;
6445}
6446
6447my $cx = cxCreate(\$tbuf, $filename);
6448my $precx = $context;
6449cxSwitch $cx, 0;
6450readyUndo();
6451#  $tbuf still holds the html attachment
6452$badHtml = 1;
6453renderHtml(\$tbuf) and
6454pushRenderedText(\$tbuf);
6455cxSwitch $precx, 0;
6456
6457++$cx;
6458print "switch to session $cx for the html version of this mail\n" unless $ismc;
6459}  # mailHtml
6460
6461#  Connect to the mail server.
6462sub pop3connect($$)
6463{
6464my $remote = shift;
6465my $port = shift;
6466my $iaddr   = inet_aton($remote)               or
6467$errorMsg = "cannot locate the mail server $remote", return 0;
6468my $paddr   = sockaddr_in($port, $iaddr);
6469my $proto   = getprotobyname('tcp');
6470socket(SERVER_FH, PF_INET, SOCK_STREAM, $proto)  or
6471$errorMsg = "Cannot establish TCP socket", return 0;
6472connect(SERVER_FH, $paddr)    or
6473$errorMsg = "Cannot connect to mail server $remote", return 0;
6474SERVER_FH->autoflush(1);
6475return 1;
6476}  # pop3connect
6477
6478#  Put and get lines from the mail server.
6479sub serverPutLine ($)
6480{
6481my $line = shift;
6482if($debug >= 7) {
6483my $t = $line;
6484$t =~ s/\r\n/\n/g;
6485print "$t\n";
6486}
6487print SERVER_FH $line.$eol or
6488$errorMsg = "Could not write to the mail socket", return 0;
6489return 1;
6490}  # serverPutLine
6491
6492sub serverGetLine()
6493{
6494defined($serverLine = <SERVER_FH>)
6495or $errorMsg = "could not read from the mail socket", return 0;
6496#  strip trailing newline indicator(s)
6497$serverLine =~ s/[\r\n]+$//;
6498print "< $serverLine\n" if $debug >= 7;
6499return 1;
6500}  # serverGetLine
6501
6502sub serverClose($)
6503{
6504my $scheme = shift;
6505#  Should we make $scheme global instead of passing it around?
6506if($scheme =~ /(smtp|pop3)/i) {
6507serverPutLine("quit");
6508} elsif ($scheme =~ /ftp/i) {
6509serverPutLine "abor${eol}quit";
6510# Nope, abor is not a typo.
6511my @disposeOf = <SERVER_FH>;
6512close FDFH if defined FDFH;
6513close FLFH if defined FLFH;
6514}
6515sleep 2;
6516close SERVER_FH;
6517}  # serverClose
6518
6519# This subroutine was taken from MIME::Base64 by Gisle Aas.
6520sub encodeBase64($$$)
6521{
6522my($in, $eol, $out) = @_;
6523my $inl = length $$in;
6524#  uuencode is pretty close
6525$$out = pack 'u', $$in;
6526#  get rid of first and last char
6527$$out =~ s/^.//;
6528chop $$out;
6529#  Get rid of newlines inside
6530$$out =~ s/\n.//g;
6531#  Over to base 64 char set
6532    $$out =~ tr|` -_|AA-Za-z0-9+/|;
6533    # fix padding at the end
6534    my $padding = (3 - $inl%3) % 3;
6535    $$out =~ s/.{$padding}$/'=' x $padding/e if $padding;
6536    # break encoded string into lines of no more than 76 characters each
6537    if (length $eol) {
6538	$$out =~ s/(.{1,72})/$1$eol/g;
6539    }
6540}  # encodeBase64
6541
6542#  Read the file into memory, mime encode it,
6543#  and return the type of encoding and the encoded data.
6544#  Last three parameters are result parameters.
6545sub encodeAttachment($$$$$)
6546{
6547my ($atfile, $isMail, $res_enc, $res_type, $res_data) = @_;
6548my ($subline, $buffer, $fsize, $rsize);
6549
6550if(!$isMail) {
6551if($atfile =~ /^\d+$/) {  # edbrowse session
6552my $cx = $atfile - 1;
6553$buffer = "";
6554for(my $ln=1; $ln<=$dol[$cx]; ++$ln) {
6555$buffer .= fetchLineContext($ln, 1, $cx);
6556$buffer .= "\n" if $ln < $dol[$cx];
6557}
6558$fsize = $rsize = length $buffer;
6559} else {
6560open FH, $atfile or
6561$errorMsg = "cannot open attachment file $atfile,$!", return 0;
6562binmode FH, ':raw' if $doslike;
6563$fsize = (stat(FH))[7];
6564$rsize = 0;
6565$buffer = "";
6566$rsize = sysread(FH, $buffer, $fsize) if $fsize;
6567close FH;
6568$rsize == $fsize or
6569$errorMsg = "cannot read the contents of $atfile,$!", return 0;
6570}
6571} else {
6572$buffer = $$atfile;
6573#  We just made a copy of the mail to send; hope it wasn't too big.
6574$atfile = $mailToSend;
6575$fsize = $rsize = length $buffer;
6576$buffer =~ s/^\s*subject\s*:\s*/Subject: /i or
6577$errorMsg = "$atfile does not begin with a line `Subject: subject of your mail'", return 0;
6578$buffer =~ s/\r\n/\n/g;
6579$buffer .= "\n" if substr($buffer, -1) ne "\n";
6580$buffer .= ':';  # temporary
6581#  Extra blank line after subject.
6582$buffer =~ s/^(.*\n)(.)/$1\n$2/;
6583$buffer =~ /^(.*)\n/;
6584$subline = $1;
6585substr($buffer, -1) = "";  # get rid of :
6586length $subline < 90 or
6587$errorMsg = "subject line too long, limit 80 characters", return 0;
6588}  # primary mail message
6589
6590my $newbuf = "";
6591my ($c, $col, $j, $ctype, $enc);
6592
6593#  Count nonascii characters.
6594my $nacount = $buffer =~ y/\x80-\xff/\x80-\xff/;
6595#  Count null characters.
6596my $nullcount = $buffer =~ y/\0/\0/;
6597$nacount += $nullcount;
6598
6599if($nacount*5 > $fsize and $fsize > 20) {
6600! $isMail or
6601$errorMsg = "cannot mail the binary file $atfile - perhaps this should be an attachment?", return 0;
6602
6603encodeBase64(\$buffer, "\n", \$newbuf);
6604
6605$ctype = "application/octet-stream";
6606$ctype = "application/PostScript" if $atfile =~ /\.ps$/i;
6607$ctype = "image/jpeg" if $atfile =~ /\.jpeg$/i;
6608$ctype = "image/gif" if $atfile =~ /\.gif$/i;
6609$ctype = "audio/basic" if $atfile =~ /\.wav$/i;
6610$ctype = "video/mpeg" if $atfile =~ /\.mpeg$/i;
6611$enc = "base64";
6612$$res_type = $ctype;
6613$$res_enc = $enc;
6614$$res_data = $newbuf;
6615return 1;
6616}  # base 64 encode
6617
6618#  Use the filename of the edbrowse session to determine type.
6619if($atfile =~ /^\d+$/) {
6620$atfile = $fname[$atfile-1];
6621}
6622$ctype = "text/plain";
6623$ctype = "text/html" if $atfile =~ /\.(htm|html|shtml|asp)$/i;
6624$ctype = "text/richtext" if $atfile =~ /\.rtf$/i;
6625
6626#  Switch to unix newlines - we'll switch back to dos later.
6627$buffer =~ s/\r\n/\n/g;
6628$fsize = length $buffer;
6629
6630if($nacount*20 < $fsize) {
6631#  Looks like it's almost all ascii, but we still have to switch to qp
6632#  if the lines are too long.
6633$col = 0;
6634for($j =0; $j < $fsize; ++$j) {
6635$c = substr $buffer, $j, 1;
6636$col = 0, next if $c eq "\n";
6637++$col;
6638$nacount = $fsize, last if $col > 500 or $col > 120 and ! $isMail;
6639}
6640}
6641
6642if($nullcount or $nacount*20 >= $fsize) {
6643$buffer =~ s/([^\t\n-<>-~])/sprintf("=%02X", ord $1)/ge;
6644$buffer =~ s/ $/=20/m;
6645$buffer =~ s/\t$/=09/m;
6646#  Cut long lines, preferably after a space, but wherever we can.
6647$fsize = length $buffer;
6648my $spaceCol = 0;
6649$col = 0;
6650for($j =0; $j < $fsize; ++$j) {
6651$c = substr $buffer, $j, 1;
6652$newbuf .= $c;
6653if($c eq "\n") {  # new line, column 0
6654$spaceCol = $col = 0;
6655next;
6656}
6657++$col;
6658if($c eq " " || $c eq "\t") {
6659$spaceCol = length $newbuf;
6660}
6661next if $col < 72;
6662#  Don't break an = triplet.
6663next if $c eq '=';
6664next if substr($newbuf, -2, 1) eq '=';
6665#  If we're near the end, don't worry about it.
6666next if $j == $fsize - 1;
6667#  If newline's coming up anyways, don't force another one.
6668$c = substr $buffer, $j+1, 1;
6669next if $c eq "\n";
6670#  Ok, it's a long line, we need to cut it.
6671$spaceCol = length $newbuf if ! $spaceCol;
6672substr($newbuf, $spaceCol, 0) = "=\n";
6673$spaceCol += 2;
6674$col = length($newbuf) - $spaceCol;
6675$spaceCol = 0;
6676}
6677
6678if($isMail) {
6679#  Don't qp the subject.
6680$newbuf =~ s/^.*/$subline/;
6681}
6682
6683$enc = "quoted-printable";
6684$$res_type = $ctype;
6685$$res_enc = $enc;
6686$$res_data = $newbuf;
6687return 1;
6688}
6689
6690#  Almost all ascii, short lines, no problems.
6691$enc = ($nacount ? "8bit" : "7bit");
6692$$res_type = $ctype;
6693$$res_enc = $enc;
6694$$res_data = $buffer;
6695return 1;
6696}  # encodeAttachment
6697#  Don't forget to turn lf into crlf before you send this on to smtp.
6698
6699#  Send mail to the smtp server.
6700#  sendMail(recipients, mailtext, attachments)
6701#  Everything passed by reference.
6702sub sendMail($$$)
6703{
6704my ($tolist, $main, $atlist) = @_;
6705length $outmailserver or
6706$errorMsg = "No mail server specified - check your $home/.ebprc file", return 0;
6707
6708my $proto = 'smtp';
6709my $reply = $replyAddress[$whichMail];
6710$altattach == 0 or $altattach == $#$atlist+1 or
6711$errorMsg = 'either none or all of the attachments must be declared "alternative"', return 0;
6712
6713#  Read and/or refresh the address book.
6714if(length $addressFile and -e $addressFile) {
6715my $newtime = (stat($addressFile))[9];
6716if($newtime  > $adbooktime) {
6717%adbook = ();
6718$adbooktime = $newtime;
6719my ($alias, $email);
6720open FH, $addressFile or
6721$errorMsg = "Cannot open address book $addressFile.", return 0;
6722while(<FH>) {
6723s/\n$//;  # don't need nl
6724next if /^\s*#/;  # comment line
6725next if /^\s*$/;  # blank line
6726($alias = $_) =~ s/:.*//;
6727($email = $_) =~ s/^[^:]*:([^:]*).*/$1/;
6728$adbook{$alias} = $email;
6729}
6730close FH;
6731}
6732}
6733
6734#  Resolve recipients against address book.
6735foreach my $who (@$tolist) {
6736next if $who =~ /@/;
6737my $real = $adbook{$who};
6738if(defined $real and length $real) {
6739#  Remember that $who is a by reference variable, being in the for loop.
6740$who = $real;
6741next;
6742}
6743length $addressFile or
6744$errorMsg = "No address book specified - check your $home/.ebprc file", return 0;
6745$errorMsg = "alias $who not found in your address book";
6746return 0;
6747}
6748
6749#  Verify attachments are readable.
6750foreach my $f (@$atlist) {
6751if($f =~ /^\d+$/) {
6752my $cx = $f - 1;
6753cxCompare($cx) or return 0;
6754defined $factive[$cx] and $dol[$cx] or
6755$errorMsg = "session $f is empty - cannot atach", return 0;
6756} else {
6757-r $f or
6758$errorMsg = "cannot access attachment $f", return 0;
6759}
6760}
6761
6762my $mustmime = $#$atlist + 1;
6763my ($sendEnc, $sendType, $sendData);
6764encodeAttachment($main, 1, \$sendEnc, \$sendType, \$sendData) or return 0;
6765$mustmime = 1 if $sendEnc =~ /^q/;
6766
6767#  Boundary, for sending attachments.
6768my $sendBound = rand;
6769$sendBound =~ s/^0./nextpart-domail/;
6770
6771#  Looks good - let's get going.
6772pop3connect($outmailserver, 25) or return 0;
6773
6774normal: {
6775serverGetLine() or last normal;
6776while($serverLine =~ /^220-/) {
6777serverGetLine() or last normal;
6778}
6779$serverLine =~ /^220 / or
6780$errorMsg = "Unexpected prompt <$serverLine> at the start of the sendmail session", last normal;
6781
6782serverPutLine "helo $smtplogin" or last normal;
6783serverGetLine() or last normal;
6784$serverLine =~ /^250 / or
6785$errorMsg = "The mail server doesn't recognize $smtplogin", last normal;
6786
6787serverPutLine "mail from: $reply" or last normal;
6788serverGetLine() or last normal;
6789$serverLine =~ /^250 / or
6790$errorMsg = "mail server rejected $reply <$serverLine>", last normal;
6791
6792my $reclist = "";  # list of recipients
6793my $reccount = 0;  # count recipients
6794foreach my $f (@$tolist) {
6795$f = "\"$f\"" if $f =~ /[^\w,.@=_-]/;
6796$reclist .= ", " if $reccount;
6797++$reccount;
6798$reclist .= $f;
6799serverPutLine "rcpt to: $f" or last normal;
6800serverGetLine() or last normal;
6801$serverLine =~ /^250 / or
6802$errorMsg = "mail server rejected $f <$serverLine>", last normal;
6803}  # loop over recipients
6804
6805serverPutLine "data" or last normal;
6806serverGetLine() or last normal;
6807$serverLine =~ /^354 / or
6808$errorMsg = "The mail server is not ready to accept email data <$serverLine>", last normal;
6809serverPutLine "To: $reclist$eol" .
6810"From: $myname <$reply>$eol" .
6811"Reply-To: $myname <$reply>$eol" .
6812"Date: " . mailTimeString() . $eol .
6813"Mime-Version: 1.0" or last normal;
6814
6815#  dot alone tells smtp we're done.
6816#  Make sure there isn't a dot line in the middle of the mail.
6817$sendData =~ s/^\.$/ ./gm;
6818#  serverPutLine() routine already adds the last newline.
6819substr($sendData, -1) = "" if substr($sendData, -1) eq "\n";
6820#  smtp requires crlf.
6821$sendData =~ s/\n/\r\n/g;
6822
6823if(! $mustmime) {
6824serverPutLine "Content-Type: $sendType$eol" .
6825"Content-Transfer-Encoding: $sendEnc" or last normal;
6826} else {
6827$sendData =~ s/^(.*\r\n)// or
6828$errorMsg = "could not pull subject line out of sendData", last normal;
6829my $subline = $1;
6830serverPutLine $subline .
6831"Content-Type: multipart/" .
6832($altattach ? "alternative" : "mixed") .
6833"; boundary=$sendBound$eol" .
6834"Content-Transfer-Encoding: 7bit$eol" .
6835$eol .
6836"This message is in MIME format. Since your mail reader does not understand$eol" .
6837"this format, some or all of this message may not be legible.$eol" .
6838$eol .
6839"--$sendBound$eol" .
6840"Content-Type: $sendType$eol" .
6841"Content-Transfer-Encoding: $sendEnc" or last normal;
6842}
6843serverPutLine $sendData or last normal;
6844
6845if($mustmime) {
6846foreach my $f (@$atlist) {
6847encodeAttachment($f, 0, \$sendEnc, \$sendType, \$sendData) or last normal;
6848serverPutLine "$eol--$sendBound$eol" .
6849"Content-Type: $sendType" .
6850#  If the filename has a quote in it, forget it.
6851#  Also, suppress filename if this is an alternative presentation.
6852#  Also, suppress filename if you pulled it out of an edbrowse session.
6853(($altattach or $f =~ /"/ or $f =~ /^\d+$/) ?
6854"" : "; name=\"$f\"") . $eol .
6855"Content-Transfer-Encoding: $sendEnc$eol" or last normal;
6856
6857$sendData =~ s/^\.$/ ./gm;
6858substr($sendData, -1) = "" if substr($sendData, -1) eq "\n";
6859$sendData =~ s/\n/\r\n/g;
6860serverPutLine $sendData or last normal;
6861}  # loop over attachments
6862#  Last boundary.
6863serverPutLine "$eol--$sendBound--" or last normal;
6864}  # mime parts
6865
6866serverPutLine "." or last normal;
6867serverGetLine() or last normal;
6868$serverLine =~ /message (accepted|received)/i or
6869$serverLine =~ /^250/ or
6870$errorMsg = "Could not send mail message <$serverLine>", last normal;
6871serverClose($proto);
6872return 1;
6873}  # normal processing
6874
6875close SERVER_FH;
6876return 0;  # failed
6877}  # sendMail
6878
6879#  Send the current session as outgoing mail.
6880sub sendMailCurrent()
6881{
6882dirBrowseCheck("send mail") or return 0;
6883$fmode&$binmode and $errorMsg = "cannot mail a binary file - should this be an attachment?", return 0;
6884$dol or $errorMsg = "cannot mail an empty file", return 0;
6885$whichMail = $localMail;
6886
6887#  Gather recipients and attachments, until we reach subject:
6888my @tolist = ();
6889my @atlist = ();
6890my ($ln, $t);
6891my $subject = 0;
6892for($ln=1; $ln<=$dol; ++$ln) {
6893$t = fetchLine $ln, 0;
6894$t =~ s/^reply[ -]to:* /to:/i;
6895$t =~ s/^mailto:/to:/i;
6896push(@tolist, $1), next if $t =~ /^to\s*:\s*(.*?)[ \t]*$/i;
6897if($t =~ /^(attach|alt)\s*:\s*(.*?)[ \t]*$/i) {
6898$altattach++ if lc($1) eq "alt";
6899push(@atlist, $2);
6900next;
6901}
6902$whichMail = $1, next if $t =~ /^account\s*:\s*(\d+)[ \t]*$/i;
6903$subject = 1 if $t =~ /^subject\s*:/i;
6904last;
6905}
6906$whichMail = $smMail if length $smMail;
6907$subject or $errorMsg = "line $ln, should begin with to: attach: or subject:", return 0;
6908$#tolist >= 0 or $errorMsg = "no recipients specified - place `To: emailAddress' at the top of your file", return 0;
6909$whichMail <= $#inmailserver or $errorMsg = "account $whichMail is out of range", return 0;
6910
6911my $tbuf = "";
6912$tbuf .= fetchLine($_, 0) . "\n" foreach ($ln..$dol);
6913$mailToSend = "buffer";
6914return sendMail(\@tolist, \$tbuf, \@atlist);
6915}  # sendMailCurrent
6916
6917
6918#  runtime code starts here.
6919#  Think of this code as being inside main(){}
6920
6921if($doslike) {
6922#  Buffered I/O messes me up when this runs on NT, over telnet.
6923STDOUT->autoflush(1);
6924#  The shell doesn't expand wild cards, let's do it here.
6925my @arglist = ();
6926push @arglist, glob($_) foreach (@ARGV);
6927@ARGV=@arglist;
6928}
6929
6930if($#ARGV >= 0 and $ARGV[0] eq "-v") {
6931print "$version\n";
6932exit 0;
6933}
6934
6935#  debug option
6936if($#ARGV >= 0 and $ARGV[0] =~ /^-d(\d*)$/) {
6937$debug = (length $1 ? $1 : 4);
6938shift @ARGV;
6939}
6940
6941#  error exit option
6942if($#ARGV >= 0 and $ARGV[0] eq '-e') {
6943$errorExit = 1;
6944shift @ARGV;
6945}
6946
6947#  -m is a special flag; run as a mail client.
6948if($#ARGV >= 0 and $ARGV[0] =~ /^-(u?)m(\d+)$/) {
6949$ismc = 1;  # running as a mail client
6950my $unformat = length $1;
6951my $account = $2;
6952shift @ARGV;
6953$#inmailserver >= 0 or
6954dieq "there are no mail accounts in your .ebprc config file.";
6955$account <= $#inmailserver or
6956dieq "account designator $account is out of range.";
6957$whichMail = $account;
6958my @atfiles = ();
6959my $mailBuf = "";
6960
6961if($#ARGV == 0 and $ARGV[0] eq "-Zap") {
6962$zapmail = 1;
6963shift @ARGV;
6964}
6965
6966while($#ARGV>= 0) {
6967my $arg = pop @ARGV;
6968if($arg =~ s/^([-+])//) {
6969++$altattach if $1 eq '-';
6970open FH, $arg or
6971dieq "cannot access attachment $arg.";
6972close FH;
6973unshift @atfiles, $arg;
6974} else {
6975$mailToSend = $arg;
6976open FH, $mailToSend
6977or dieq "Cannot access send file $mailToSend.";
6978dieq "Send file $mailToSend has zero size." if -z FH;
6979binmode FH, ':raw' if $doslike;
6980my $fsize = (stat(FH))[7];
6981my $rsize = sysread(FH, $mailBuf, $fsize);
6982close FH;
6983$rsize == $fsize or
6984dieq "cannot read the contents of $mailToSend,$!";
6985last;
6986}
6987}  # loop looking for files to transmit
6988
6989if(length $mailToSend or $#atfiles >= 0) {
6990#  Mail client is in send mode.
6991length $mailToSend or
6992dieq "all arguments are attachments - you must include a plain send file.";
6993$#ARGV >= 0 or dieq "No recipients specified.";
6994sendMail(\@ARGV, \$mailBuf, \@atfiles) or dieq $errorMsg;
6995exit 0;
6996}  # send mail
6997
6998#  Move to the mail directory.
6999length $mailDir or dieq "mailbox directory not specified in your .ebprc file.";
7000chdir $mailDir or dieq "Cannot change directory to $mailDir.";
7001
7002#  Now fetch the mail and process it,
7003#  and ask the user what to do with it.
7004#  Begin with the pop3 login/password sequence.
7005my $proto = "pop3";
7006pop3connect($inmailserver[$whichMail], 110) or dieq $errorMsg;
7007serverGetLine();
7008$serverLine =~ /^\+OK /
7009or dieq "Unexpected pop3 introduction <$serverLine>.";
7010my $login = $pop3login[$whichMail];
7011my $password = $pop3password[$whichMail];
7012serverPutLine("user $login");
7013serverGetLine();
7014#  perhaps we require a password?
7015if($password) {
7016serverPutLine("pass $password");
7017serverGetLine();
7018}  # sending password
7019$serverLine =~ /^\+OK/
7020or dieq "Could not complete the pop3 login/password sequence <$serverLine>.";
7021
7022#  determine number of messages
7023serverPutLine("stat");
7024serverGetLine();
7025$serverLine =~ /^\+OK /
7026or dieq "Could not obtain status information on your mailbox <$serverLine>.";
7027my $nmsgs = substr($serverLine, 4);
7028$nmsgs =~ s/ .*//;
7029
7030if(!$nmsgs) {
7031print "No mail\n";
7032serverClose($proto);
7033exit 0;
7034}
7035
7036my $mailHuge = "Mail message consumes more than a million lines; you won't be able to use this client.";
7037print "$nmsgs messages\n";
7038if($zapmail) {
7039$nmsgs = 300 if $nmsgs > 300;
7040}
7041
7042#  Iterate over messages.
7043foreach my $m (1..$nmsgs) {
7044my ($filename, $j, $curpart, $rendered);
7045#  Is this mail automatically going somewhere else?
7046my $redirect = "";
7047my $delFlag = 0;
7048
7049if($zapmail) {
7050$delFlag = 1;
7051} else {
7052
7053#  Clear out the editor before we read in the next message.
7054foreach $j (0..$#factive) {
7055cxReset $j, 1;
7056}
7057$context = 0;  # probably not necessary
7058$factive[0] = 1;  # mail goes into session 0
7059$#text = 1;
7060$text[0] = "";
7061$text[1] = "--------------------------------------------------------------------------------";
7062
7063#  retrieve the entire mth message from the server.
7064serverPutLine("retr $m");
7065my $exact_msg = "";  # an exact copy of the email
7066# Throw first line away, it's from the pop3 server, not part of the mail.
7067serverGetLine();
7068$j = 1;
7069serverGetLine();
7070while($serverLine ne ".") {
7071$exact_msg .= "$serverLine\n";
7072lineLimit 1 and dieq $mailHuge;
7073push @text, $serverLine;
7074++$j;
7075$map .= sprintf($lnformat, $j);
7076serverGetLine();
7077}
7078$dot = $dol = $j-1;
7079
7080if(not $unformat) {
7081#  Browse the mail message for display.
7082$btags[0] = $btags = [];
7083$$btags[0] = {tag => "special", fw => {} };
7084$badHtml = 1;
7085$mailSubject = $mailFrom = $mailReply = $mailDate = "";
7086renderMail(\$rendered) and pushRenderedText(\$rendered) or
7087dieq $errorMsg;
7088$rendered = undef;  # don't need it any more
7089
7090#  Break the lines in the buffer.
7091$fmode &= ~$browsemode;  # so I can run the next command
7092evaluate(",bl");
7093$errorMsg = "";
7094$dot = $dol;
7095$fmode |= $browsemode;
7096
7097#  Let user know about attachments.
7098my $unat = 0;  # unnamed attachments
7099my $exat = 0;  # attachment already exists
7100if($nat) {
7101print "$nat attachments.\n";
7102$j = 0;
7103foreach $curPart (@mimeParts) {
7104next unless $$curPart{isattach};
7105++$j;
7106$filename = $$curPart{filename};
7107++$unat, next unless length $filename;
7108print "$j = $filename";
7109if(-e $filename) {
7110print " exists";
7111$exat = 1;
7112}
7113print "\n";
7114}
7115}
7116
7117#  Paste on the html segments.
7118foreach $j (1..$#factive) {
7119next unless $factive[$j];
7120next unless $dol[$j];
7121$map .= sprintf($lnformat, 0) if $dol;
7122if($dol > 4) {
7123$map .= sprintf($lnformat, 1);
7124$map .= sprintf($lnformat, 0);
7125}
7126$map .= substr($map[$j], $lnwidth);
7127$dot = $dol = length($map)/$lnwidth - 1;
7128}
7129foreach my $t (@text) {
7130removeHiddenNumbers \$t;
7131}
7132
7133#  See if the mail is redirected.
7134if(length $mailReply and $#fromSource >= 0) {
7135my $lowReply = lc $mailReply;
7136foreach my $j (0..$#fromSource) {
7137next unless index($lowReply, $fromSource[$j]) >= 0;
7138$redirect = $fromDest[$j];
7139last;
7140}
7141}
7142
7143#  I'm not going to redirect mail if there are unamed or existing attachments.
7144$redirect = "" if $redirect ne "x" and $unat + $exat;
7145}  # formatting the mail message
7146
7147my $dispLine = 1;
7148if(length $redirect) {
7149$delFlag = 1;
7150#  Replace % date/time fields.
7151if($redirect =~ /%[ymdhns]{2,}/) {
7152my ($ss, $nn, $hh, $dd, $mm, $yy) = localtime time;
7153$mm++;
7154$yy += 1900;
7155$redirect =~ s/%yyyy/sprintf "%4d", $yy/ge;
7156$redirect =~ s/%yy/sprintf "%02d", $yy%100/ge;
7157$redirect =~ s/%mm/sprintf "%02d", $mm/ge;
7158$redirect =~ s/%dd/sprintf "%02d", $dd/ge;
7159$redirect =~ s/%hh/sprintf "%02d", $hh/ge;
7160$redirect =~ s/%nn/sprintf "%02d", $nn/ge;
7161$redirect =~ s/%ss/sprintf "%02d", $ss/ge;
7162}
7163print "$mailReply > $redirect\n";
7164}
7165
7166#  display the next page of mail and get an input character.
7167dispInput: {
7168if(! $delFlag) {
7169print("skipped\n"), $delFlag = 1, last if  ! $unformat and length $mailSubject and defined $junkSubjects{$mailSubject};
7170foreach $j (keys %junkSubjects) {
7171next unless $j =~ /^`/;
7172my $trash = $j;
7173$trash =~ s/^`//;
7174next unless index($exact_msg, $trash) >= 0;
7175print("trash\n"), $delFlag = 1, last dispInput;
7176}
7177if($dispLine <= $dol) {
7178foreach $j (1..20) {
7179last if $dispLine > $dol;
7180my $line = fetchLine $dispLine, 0;
7181#  Don't print date and return address, but they will be recorded,
7182#  if you save the file.
7183next if $line =~ /^Mail sent /;
7184next if $line =~ /^Reply to /;
7185print "$line\n";
7186} continue { ++$dispLine; }
7187}  # display next page
7188}  # not being deleted
7189
7190getkey: {
7191my $key;
7192if($delFlag) {
7193last if $redirect eq "x";
7194$key = 'w';
7195} else {
7196#  Interactive prompt depends on whether there is more text or not.
7197STDOUT->autoflush(1);
7198print ($dispLine > $dol ? "? " : "* ");
7199STDOUT->autoflush(0);
7200
7201$key = userChar("qx? nwkuJdA");
7202print "\b\b";
7203
7204exit 0 if $key eq 'x';
7205print("quit\n"), serverClose($proto), exit 0 if $key eq 'q';
7206print("next\n"), last dispInput if $key eq 'n';
7207print("delete\n"), $delFlag = 1, last dispInput if $key eq 'd';
7208
7209if($key eq ' ') {
7210print "End of message\n" if $dispLine > $dol;
7211redo dispInput;
7212}
7213
7214if($key eq '?') {
7215print "?\tprint this help message.
7216q\tquit this program.
7217x\texit without changing anything on the mail server.
7218space\tread more of this mail message.
7219n\tmove on to the next mail message.
7220A\tadd the sender to your address book.
7221d\tdelete this message.
7222J\tjunk this subject, and delete any mail with this subject.
7223w\twrite this message to a file and delete it.
7224k\tkeep this message in a file, but don't delete it.
7225u\twrite this message unformatted to a file, and delete it.\n";
7226redo;
7227}
7228
7229if($key eq 'J') {
7230print "No subject to junk\n", redo if $mailSubject eq "";
7231print "No junkfile specified in your .ebprc file\n", redo unless length $junkFile;
7232print "junk\n";
7233markSubject($mailSubject);
7234$delFlag = 1;
7235last dispInput;
7236}  # J
7237
7238if($key eq 'A') {
7239print "No addressbook specified in your .ebprc file\n", redo unless length $addressFile;
7240print "Cannot establish sender's name and/or email address.", redo unless length $mailFrom and length $mailReply;
7241open FH, ">>$addressFile"
7242or dieq "Cannot append to $addressFile.";
7243$_ = lc $mailFrom;
7244s/\s/./g;
7245print "$_:$mailReply\n";
7246print FH "$_:$mailReply\n";
7247close FH;
7248redo;
7249}  # A
7250}  # delFlag or not
7251
7252#  At this point we're saving the mail somewhere.
7253$delFlag = 1 if $key ne 'k';
7254
7255if(length $redirect) {
7256$filename = $redirect;
7257} else {
7258$filename = getFileName(undef, 0);
7259}
7260if($filename ne "x") {
7261my $append = (-e $filename);
7262open FH, ">>$filename"
7263or dieq "Cannot create mail file $filename.";  # should not happen
7264my $fsize = 0;
7265if($key eq 'u'or $unformat) {
7266print FH $exact_msg
7267or dieq "Cannot write to mail file $filename.";
7268$fsize = length $exact_msg;
7269} else {
7270foreach $j (1..$dol) {
7271my $line = fetchLine $j, 0;
7272print FH "$line\n"
7273or dieq "Cannot write to mail file $filename.";
7274$fsize += length($line) + 1;
7275}
7276}
7277close FH;
7278print "mail saved, $fsize bytes";
7279print " appended" if $append;
7280print "\n";
7281}
7282
7283if($key ne 'u' and $redirect ne 'x') {
7284#  Ask the user about any attachments.
7285$j = 0;
7286foreach $curPart (@mimeParts) {
7287next unless $$curPart{isattach};
7288++$j;
7289$filename = $$curPart{filename};
7290if(length $redirect) {
7291print "attach $filename\n";
7292} else {
7293print "Attachment $j ";
7294$filename = getFileName($filename, 1);
7295next if $filename eq "x";
7296}
7297open FH, ">$filename"
7298or dieq "Cannot create attachment file $filename.";
7299binmode FH, ':raw' if $doslike;
7300print FH $$curPart{data}
7301or dieq "Cannot write to attachment file $filename.";
7302close FH;
7303}  # loop over attachments
7304}  # key other than 'u'
7305
7306}  # input key
7307}  # display and input
7308}  # interactive or zap
7309
7310if($delFlag) {  #  Delete the message.
7311#  Remember, it isn't really gone until you quit the session.
7312#  So if you didn't want to delete, type x to exit abruptly,
7313#  then fetch your mail again.
7314serverPutLine("dele $m");
7315serverGetLine() or
7316dieq "Sorry, you took too long; mail server hung up.";
7317$serverLine =~ /^\+OK/
7318or dieq "Unable to delete message <$serverLine>.";
7319}  # Del
7320
7321}  # loop over messages
7322
7323print "$nmsgs\n" if $zapmail;
7324
7325serverClose($proto);  # that's all folks!
7326exit 0;
7327}  # end mail client
7328
7329#  Initial set of commands.
7330if($commandList{init}) {
7331evaluateSequence($commandList{init}, $commandCheck{init});
7332}
7333
7334#  Process the command line arguments.
7335foreach my $cx (0..$#ARGV) {
7336my $file = $ARGV[$cx];
7337cxSwitch($cx, 0) if $cx;
7338$changeFname = "";
7339my $rc = readFile($file, "");
7340print "$filesize\n";
7341$rc or print $errorMsg,"\n";
7342$fname = $file;
7343$fname = $changeFname if length $changeFname;
7344$fmode &= ~($changemode|$firstopmode);
7345if($rc and $filesize and is_url($fname)) {
7346#  Go ahead and browse it.
7347$inglob = $intFlag = 0;
7348$filesize = -1;
7349$rc = evaluate("b");
7350print "$filesize\n" if $filesize >= 0;
7351$rc or print "$errorMsg\n";
7352}  # open of url
7353}  # loop over args on the command line
7354cxSwitch(0, 0) if $context;
7355print "edbrowse ready\n" if ! length $fname;
7356
7357#  get user commands.
7358while(1) {
7359my $line = readLine();
7360my $saveLine = $line;
7361$inglob = 0;
7362$intFlag = 0;
7363$filesize = -1;
7364my $rc = evaluate($line);
7365print "$filesize\n" if $filesize >= 0;
7366if(!$rc) {
7367print ((($helpall or $cmd =~ /[$showerror_cmd]/o) ? $errorMsg : "?"), "\n");
7368exit 1 if $errorExit;
7369}
7370$linePending = $saveLine;
7371if($ubackup) {
7372$lastdot = $savedot, $lastdol = $savedol;
7373$lastmap = $savemap, $lastlabels = $savelabels;
7374$ubackup = 0;
7375}
7376}   # infinite loop
7377
7378#*********************************************************************
7379#  The following code is written and maintained by Chris Brannon,
7380#  cbrannon@wilnet1.com
7381#  It manages secure http and ftp connections.
7382#*********************************************************************
7383
7384sub do_ssl($$$$)
7385{
7386# Do the SSL thing.  This takes four arguments: server, port, message,
7387# and buffer reference.
7388# <message> is a scalar containing http headers.  <buffer reference> is
7389# a reference to a scalar.  We tack each chunk of received data onto that
7390# scalar.  Thusly, we don't have to return a variable containing twenty
7391# MB of data.
7392# I borrow heavily from Karl's plain http connection code.
7393unless(eval { require Net::SSLeay }) {
7394$errorMsg = "you must have the Net::SSLeay module and OpenSSL toolkit to speak https", return 0;
7395}
7396# Should I error-check these values?  I don't know.  Probably.
7397my $server = shift;
7398my $port = shift;
7399my $message = shift;
7400my $bufref = shift;
7401my $iaddr = inet_aton($server) or
7402$errorMsg = "Cannot identify $server on the network", return 0;
7403my $paddr = sockaddr_in($port, $iaddr);
7404my $proto = getprotobyname('tcp');
7405socket(FH, PF_INET, SOCK_STREAM, $proto) or
7406$errorMsg = "cannot allocate a socket", return 0;
7407connect(FH, $paddr) or
7408$errorMsg = "cannot connect to $server on $port: $!", return 0;
7409Net::SSLeay::load_error_strings();
7410Net::SSLeay::SSLeay_add_ssl_algorithms();
7411Net::SSLeay::randomize();
7412$ctx = Net::SSLeay::CTX_new();
7413Net::SSLeay::CTX_set_options($ctx, &Net::SSLeay::OP_ALL);
7414if($ssl_verify) {
7415Net::SSLeay::CTX_load_verify_locations($ctx, $ebcerts, '') or
7416$errorMsg = "Error opening certificate file $ebcerts: $!", return 0;
7417Net::SSLeay::CTX_set_verify($ctx, &Net::SSLeay::VERIFY_PEER, 0);
7418}
7419# Should the user be warned somehow when SSL certificate verification has
7420# been turned off?  Accepting unverifiable certificates can be a security
7421# risk.  But some servers, like https://listman.redhat.com can't be verified
7422# with my certificate bundle.  So I make verification the default, but optional.
7423$ssl = Net::SSLeay::new($ctx);
7424Net::SSLeay::set_fd($ssl, fileno(FH)) or
7425$errorMsg = Net::SSLeay::ERR_error_string(Net::SSLeay::ERR_get_error()), return 0;
7426if(Net::SSLeay::connect($ssl) == -1) {
7427$errorMsg = Net::SSLeay::ERR_error_string(Net::SSLeay::ERR_get_error());
7428return 0;
7429}
7430Net::SSLeay::ssl_write_all($ssl, $message) or
7431$errorMsg = &Net::SSLeay::ERR_error_string(&Net::SSLeay::ERR_get_error()), return 0;
7432my ($chunk, $filesize, $rsize, $last_fk, $fk);
7433$fk = $last_fk = 0;
7434STDOUT->autoflush(1) if ! $doslike;
7435while($chunk = Net::SSLeay::ssl_read_all($ssl, 100000)) {
7436$$bufref .= $chunk; # how cute!
7437$rsize = length($chunk); # Is this computationally expensive??
7438$filesize += $rsize;
7439last if $rsize == 0;
7440$fk = int($filesize / 100000);
7441if($fk > $last_fk) {
7442print ".";
7443$last_fk = $fk;
7444}
7445last if($filesize >= $maxfile);
7446}
7447close(FH);
7448print "\n" if $last_fk > $fk;
7449STDOUT->autoflush(0) if ! $doslike;
7450$filesize <= $maxfile or
7451$errorMsg = "file is too large, limit 40MB", return 0;
7452defined $rsize or
7453$errorMsg = "error reading data from the socket", return 0;
7454#  There's no way to distinguish between a read error and reading a zero
7455#  length file.  I guess that's ok.
7456if(defined($filesize)) {
7457return $filesize;
7458} else {
7459return 0;
7460}
7461}   # do_ssl
7462
7463sub ftp_connect($$$$)
7464{
7465my($host, $port, $path, $bufref) = @_;
7466my $proto = 'ftp';
7467my ($tempbuf, @disposeOf);
7468my $filesize = 0;
7469my $login = "anonymous";
7470my $password = 'some-user@edbrowse.net';
7471my $dataOpen = (
7472$passive ? \&pasvOpen : \&ftpListen);
7473if($host =~ s/^([^:@]*):([^:@]*)@//) {
7474$login = $1, $password = $2;
7475}
7476# Do an ftp connect, prompting for username & password.
7477my $iaddr = inet_aton($host) or
7478$errorMsg = "cannot identify $host on the network", return 0;
7479my $paddr = sockaddr_in($port, $iaddr);
7480socket(SERVER_FH, PF_INET, SOCK_STREAM, getprotobyname('tcp')) or
7481$errorMsg = "cannot allocate a socket", return 0;
7482connect(SERVER_FH, $paddr) or
7483$errorMsg = "cannot connect to $host", return 0;
7484SERVER_FH->autoflush(1);
7485STDOUT->autoflush(1) if !$doslike;
7486serverGetLine or serverClose($proto), return 0;
7487serverClose($proto), return 0 if ftpError($serverLine, 220, "server sent \"$serverLine\" while attempting login");
7488serverPutLine "user $login";
7489do {
7490serverGetLine or serverClose($proto), return 0;
7491}
7492while($serverLine =~ /^220/);
7493serverClose($proto), return 0 if ftpError($serverLine, 331, "invalid username: server sent $serverLine");
7494serverPutLine "pass $password";
7495serverGetLine or serverClose($proto), return 0;
7496serverClose($proto), return 0 if ftpError($serverLine, 230, "bad password: server sent $serverLine");
7497my $wmsg = "";  # welcome message
7498if($serverLine =~ s/^\s*230\s*?-\s*//) {
7499# We got a welcome message.
7500$wmsg = "$serverLine\n";
7501while(serverGetLine) {
7502last if $serverLine =~ /^\s*230\s*?[^-]/;
7503$serverLine =~ s/^\s*230\s*?-\s*//;
7504$wmsg .= "$serverLine\n";
7505}
7506}
7507$wmsg = "" unless $path eq "/";  # trash the welcome message, we're going somewhere else
7508serverPutLine "CWD $path";
7509serverGetLine or serverClose($proto), return 0;
7510if($serverLine =~ /^\s*250\s*/) {
7511if($serverLine =~ s/^\s*250\s*?-\s*//) {
7512# Its a directory-specific greeting.
7513$wmsg = "$serverLine\n";
7514while(serverGetLine) {
7515last if $serverLine =~ /^\s*250\s*?[^-]/;
7516$serverLine =~ s/^\s*250\s*?-\s*//;
7517$wmsg .= "$serverLine\n";
7518}
7519}
7520serverPutLine "type a";
7521serverGetLine or serverClose($proto), return 0;
7522serverClose($proto), return 0 if ftpError($serverLine, 200, "ASCII transfers not supported by server: received \"$serverLine\"");
7523&$dataOpen or
7524serverClose($proto), return 0;
7525serverPutLine "list";
7526serverGetLine or serverClose($proto), return 0;
7527serverClose($proto), return 0 if ftpError($serverLine, "150", "error retrieving directory listing");
7528$tempbuf = "";
7529ftpRead(\$tempbuf) or
7530serverClose($proto), return 0;
7531ParseList: {
7532serverPutLine "syst";
7533serverGetLine or serverClose($proto), return 0;
7534if($serverLine =~ /unix/i) {
7535# Good.  Let's try to htmlize this file listing.
7536my $base_filename = "ftp://$host";
7537$base_filename .= ":$port" if $port != 21;
7538$base_filename .= $path;
7539$base_filename .= '/' if $base_filename !~ m,/$,;
7540# Yah, I know.  That looks disgusting.
7541textUnmeta(\$wmsg);
7542$$bufref = "http/1.0 200 ok$eol$eol<html><head><title>Directory Listing</title>\n</head>\n<body>\n$wmsg<ul>\n";
7543my @lines = split("$eol", $tempbuf);
7544shift(@lines); # Ditch the "total: xxx" line from Unix ls
7545foreach $line (@lines) {
7546# Extract the filename and length from ls -l format
7547my @listItems = split /\s+/, $line;
7548my $mode = $listItems[0];
7549my $extracted = $listItems[$#listItems];
7550my $extlen = $listItems[$#listItems-4];
7551$extlen = "/" if $mode =~ /^d/;
7552$$bufref .= "<li><a href=\"$base_filename$extracted\">$extracted</a> $extlen\n";
7553}
7554$$bufref .= "</ul>\n</body>\n</html>\n";
7555$$bufref =~ s/<ul>\n<\/ul>/This ftp directory is empty./;
7556$filesize = length($$bufref);
7557} else {
7558$$bufref = $tempbuf; # Oh well...
7559}
7560serverPutLine "quit";
7561@disposeOf = <SERVER_FH>;
7562close SERVER_FH;
7563return 0 if !$filesize;
7564return $filesize;
7565} # ParseList
7566} else {
7567# Try to retr.  If unable, the path was bogus.
7568serverPutLine "type i";
7569serverGetLine or serverClose($proto), return 0;
7570serverClose($proto), return 0 if ftpError($serverLine, 200, "binary transfers unsupported by server: received \"$serverLine\"");
7571&$dataOpen or
7572serverClose($proto), return 0;
7573serverPutLine "retr $path";
7574serverGetLine or serverClose($proto), return 0;
7575serverClose($proto), return 0 if ftpError($serverLine, "150",
7576"the path you specified in this URL is neither a filename nor a directory");
7577# Let's read our data.
7578$filesize = ftpRead($bufref);
7579serverClose($proto);
7580# The problem is, the ftp server will get an extraneous abor command when
7581# we close connection.  I only want these sent after an error condition, to
7582# abort a transfer.
7583return 0 if !$filesize;
7584return $filesize;
7585}
7586} # ftp_connect
7587
7588sub ftpRead($)
7589{
7590# I don't like the fact that this subroutine returns 0 on error.  Seems wrong.
7591my $bufref = shift;
7592my $rsize = 0;
7593my $filesize = 0;
7594my $last_fk = 0;
7595my $chunk;
7596my $proto = 'ftp';
7597if(!$passive) {
7598my $check = '';
7599vec($check, fileno(FLFH), 1) = 1;
7600select($check, undef, undef, 10) or
7601$errorMsg = "ftp data connection timed out", $filesize =  0, goto Cleanup;
7602socket(FDFH, PF_INET, SOCK_STREAM, getprotobyname('TCP')) or
7603$errorMsg = "unable to allocate a socket", $filesize =  0, goto Cleanup;
7604accept(FDFH, FLFH);
7605shutdown(FDFH, 1);
7606}
7607while(defined($rsize = sysread(FDFH, $chunk, 100000))) {
7608print "sockread $rsize\n" if $debug >= 5;
7609$$bufref .= $chunk;
7610$filesize += $rsize;
7611last if $rsize == 0;
7612my $fk = int($filesize / 100000);
7613if($fk > $last_fk) {
7614print ".";
7615$last_fk = $fk;
7616}
7617last if $filesize >= $maxfile;
7618}
7619my $line;
7620serverGetLine or return 0;
7621close FDFH;
7622close FLFH;
7623# ignore it; it should read 226 transfer complete
7624print "\n" if $last_fk;
7625defined($rsize) or
7626$errorMsg = "error reading data from the socket", $filesize =  0, goto Cleanup;
7627$filesize <= $maxfile or
7628$errorMsg = "file to large: 4-1M limit", $filesize =  0, goto Cleanup;
7629$filesize > 0 or
7630$errorMsg = "empty file", $filesize = 0, goto Cleanup;
7631Cleanup: {
7632close FDFH if defined FDFH;
7633close FLFH if defined FLFH;
7634return $filesize;
7635}
7636} # ftpRead
7637
7638sub ftpError($$$)
7639{
7640# This subroutine matches an ftp response against an status code.  The code can
7641# be specified as a regexp.  So, 25[0-9] as the status code will let us match
7642# any of the 25X status codes.
7643# It returns 1 on error. This subroutine used to do  cleanup, but
7644# I'm leaving this job to the main ftp subroutine.
7645my ($input, $statcode, $errmsg) = @_;
7646$errorMsg = $errmsg, return 1 if($input !~ /^\s*$statcode/);
7647return 0;
7648} # ftpError
7649
7650sub pasvOpen()
7651{
7652my ($line, $packed_ftpaddr, $ipaddr, $port);
7653serverPutLine "pasv";
7654serverGetLine or return 0;
7655return 0 if ftpError($serverLine, '227', "server doesn't support passive mode: received \"$serverLine\"");
7656if($serverLine =~ /([0-9]+,[0-9]+,[0-9]+,[0-9]+,[0-9]+,[0-9]+)/) {
7657$packed_ftpaddr = pack("C6", split(',', $1));
7658} else {
7659$errorMsg = "cannot make ftp data connection: server sent \"$serverLine\"";
7660return 0;
7661}
7662$ipaddr = substr($packed_ftpaddr, 0, 4);
7663$port = unpack("n", substr($packed_ftpaddr, 4, 2));
7664# The address for ftp data connections is written this way:
7665# 127,0,0,1,100,100
7666# We turn those decimal notations into a packed string of unsigned chars..
7667# The first four characters are the IP address in network byte order.  They
7668# are fed directly to sockaddr_in.  The last two are unpacked as an
7669# unsigned short in NBO.  The new decimal representation is fed to sockaddr_in.
7670my $saddr = sockaddr_in($port, $ipaddr);
7671socket(FDFH, PF_INET, SOCK_STREAM, getprotobyname('tcp')) or
7672$errorMsg = "cannot allocate a socket", return 0;
7673connect(FDFH, $saddr) or
7674$errorMsg = "cannot open ftp data connection", return 0;
7675shutdown(FDFH, 1); # Hmm.  My server hangs if this doesn't happen...
7676return 1;
7677} # pasvOpen
7678
7679sub ftpListen {
7680my $ctladdr = (sockaddr_in(getsockname(SERVER_FH)))[1];
7681$errorMsg = "unable to obtain address of control connection; cannot initiate data connection",
7682return 0 if !$ctladdr;
7683my $port = int(rand(64510) + 1025);
7684socket(FLFH, PF_INET, SOCK_STREAM, getprotobyname('tcp')) or
7685$errorMsg = "unable to allocate a socket", return 0;
7686my $saddr = sockaddr_in($port, $ctladdr);
7687bind(FLFH, $saddr) or
7688$errorMsg = "unable to bind socket: port $port $!", return 0;
7689listen(FLFH, 1) or
7690$errorMsg = "unable to listen on ftp data socket", return 0;
7691serverPutLine sprintf("port %d,%d,%d,%d,%d,%d", unpack('C4', $ctladdr), $port >> 8, $port & 255);
7692serverGetLine or return 0;
7693return 0 if ftpError($serverLine, '200', "ftp server does not support port command, received \"$serverLine\"");
7694shutdown(FLFH, 1);
7695return 1;
7696} # ftpListen
7697
7698
7699
7700# Cookie support
7701sub setCookies($$)
7702{
7703# We only support Netscape-style cookies presently.  The newer style will
7704# be supported eventually.  It offers some functionality that Netscape's
7705# doesn't.
7706my $cookie = shift;
7707print "incoming cookie: $cookie\n" if $debug >= 4;
7708$cookie =~ s/^Set-Cookie:\s+//i;
7709stripWhite \$cookie;
7710return unless length $cookie;
7711my $url_desc = shift;
7712my @cook_array = split(';', $cookie);
7713# We should have the cookie into its component parts.
7714my($name, $value, $path, $domain, $expires, $secure);
7715($name, $value) = split('=', shift(@cook_array), 2);
7716$value = "" unless defined $value;
7717my $crumb;
7718while($crumb = shift(@cook_array)) {
7719stripWhite \$crumb;
7720$crumb = "secure=" if $crumb =~ /^secure$/i;
7721if($crumb =~ s/^domain=//i) {
7722# Do some work on $crumb to protect us from general maliciousness/idiocy.
7723my $workingserver = $$url_desc{SERVER};
7724next unless $$url_desc{SERVER} =~ /\Q$crumb\E$/i;
7725my $l = length $crumb;
7726next if length($workingserver) > $l and substr($crumb, 0, 1) ne '.' and substr($workingserver, -$l-1, 1) ne '.';
7727# We simply won't use a bogus domain attribute.  We ignore it, and the domain
7728# eventually is set to the default.
7729#  In other words, we don't want somebody sending us a cookie for all of .com.
7730my $numfields = $crumb =~ y/././;
7731++$numfields unless substr($crumb, 0, 1) eq '.';
7732if($crumb =~ /\.(net|com|gov|edu|mil|org|int|tv|bus)$/i) {
7733# One nasty regexp, oh well.  Domain attributes from these domains may
7734# have a minimum of two fields.
7735next if $numfields < 2;
7736} else {
7737# Everyone else needs three fields.
7738next if $numfields < 3;
7739}
7740$domain = $crumb;
7741} elsif($crumb =~ s/^path=//i) {
7742$path = $crumb;
7743} elsif($crumb =~ s/^expires=?\s*//i) {
7744# Squeeze a time_t out of the date string, hopefully!  If not, then "-1"
7745# is used as the date, so the cookie will expire on quit.
7746$expires = cookieDate($crumb);
7747} elsif($crumb =~ s/^max-age=?\s*//i) {
7748if($crumb =~ /^\d+$/ and not defined $expires) {
7749$expires = time() + $crumb;
7750}
7751} elsif($crumb =~ s/^secure=//i) {
7752# SSL-only cookie.
7753$secure = 1;
7754} else {
7755print STDERR "Error processing cookie with element $crumb\n"; # debugging statement
7756}
7757}
7758$domain = $$url_desc{SERVER} if !defined $domain;
7759#  Here's what it should be, according to the standard.
7760#  $path = $$url_desc{PATH} if !defined $path;
7761#  Here's what some sites require, such as http://tdzk.net
7762#  This is apparently what Explorer does.
7763#  Oh well, who the hell needs standards;
7764#  when you're a monopoly you set the standards.
7765$path = "/" if !defined $path;
7766$expires = -1 if !defined $expires;
7767$secure = 0 if !defined $secure; # For secure cookies, it will have been set to 1
7768# Put the cookie into the master cookie jar.
7769print "into jar: $domain $path $expires $name $value\n" if $debug >= 4;
7770$cookies{$domain}{$path}{$name} =
7771{value => $value, expires => $expires, secure => $secure};
7772# If a server sends two cookies of the same path and name, with different values,
7773# the former will be quashed by the latter.  This is proper behavior.
7774if($expires != -1) { # Persistent cookie.
7775my $chmodFlag = 0;
7776$chmodFlag = 1 unless -f $ebcooks;
7777# Now, append to the cookie file.
7778# I learned the format for Netscape's cookie file from lynx's source.  Thank you, lynx team.
7779if(!open(COOKFILE, ">>$ebcooks")) {
7780warn "unable to open cookie jar for append: $!";
7781} else {
7782chmod 0600, $ebcooks if $chmodFlag;
7783print COOKFILE join("\t", $domain, 'FALSE', $path,
7784$secure ? 'TRUE' : 'FALSE', $expires, $name, $value) . "\n";
7785# A note.  Lynx defines a field, "what".  I don't know what its used
7786# for.  But all the Netscape cookie files I've seen have it set to "FALSE".
7787# so will we.
7788# Maybe its proprietary to Netscape's browser.
7789close COOKFILE;
7790}
7791}
7792}  # setCookies
7793
7794sub fetchCookies($)
7795{
7796my $url_desc = shift;
7797my $cur_scheme = $$url_desc{SCHEME};
7798my $cur_domain = $$url_desc{SERVER};
7799my $cur_path = $$url_desc{PATH};
7800my ($domainm, $pathm, $cookiem); # The 'm' at the end stands for 'match'
7801my @sendable = (); # Sendable cookie strings.
7802foreach $domainm (keys(%cookies)) {
7803next unless $cur_domain =~ /\Q$domainm\E$/i;
7804my $l = length $domainm;
7805next if length($cur_domain) > $l and substr($domainm, 0, 1) ne '.' and substr($cur_domain, -$l-1, 1) ne '.';
7806foreach $pathm (keys(%{$cookies{$domainm}})) {
7807next unless $cur_path =~ /^\Q$pathm\E/;
7808foreach $cookiem (keys(%{$cookies{$domainm}{$pathm}})) {
7809my $deref = $cookies{$domainm}{$pathm}{$cookiem};
7810# $deref is a simple hash reference, containing the description of one cookie.
7811# We can do the rest of our matching painlessly, without dereferencing
7812# the whole nasty data structure every time.
7813next if $$deref{secure} and  ($cur_scheme !~ /https/);
7814my $j = join('=', $cookiem, $$deref{value});
7815$j =~ s/=$//;
7816push @sendable, $j;
7817print "outgoing cookie: $domainm $pathm $j\n" if $debug >= 4;
7818}
7819}
7820}
7821return "" if $#sendable < 0;  # no cookies
7822my $outgoing = 'Cookie: ' . join("; ", @sendable);
7823#  Lynx prepends a cookie2: directive.
7824#  I don't know what it means or what it's for.  Here it is.
7825return "Cookie2: \$Version=1$eol$outgoing$eol";
7826}  # fetchCookies
7827
7828sub cookieDate($)
7829{
7830# This might become a general http date decoder, if we ever find
7831# places where dates are useful.
7832my $datestring = shift;
7833stripWhite \$datestring;
7834if($datestring =~ /^[a-z]{3,9},\s+(\d\d)[- ]([a-z]{3})[- ](\d\d(?:\d\d)?)\s+(\d\d):(\d\d):(\d\d)\s+GMT/i) {
7835my ($day, $mon, $year, $hour, $min, $sec) = ($1, $2, $3, $4, $5, $6);
7836if(($year < 100) and ($year > 0)) {
7837# two digit.
7838if($year >= 70) {
7839$year += 1900;
7840} else { $year += 2000; }
7841}
7842$mon = $monhash{lc($mon)} - 1;
7843#  We should probably range-check all the fields,
7844#  but year is definitely necessary.
7845$year = 2035 if $year > 2035;
7846$year = 1970 if $year < 1970;
7847my $time = timegm($sec, $min, $hour, $day, $mon, $year);
7848return $time;
7849} else {
7850return -1;
7851}
7852}  # cookyDate
7853
7854sub fillJar()
7855{
7856# Initialize the cookie jar.
7857my $writeFlag = 0; # Write revised cookie file?
7858open(COOKFILE, "+<$ebcooks") or return;
7859my $inline;
7860my $nowtime = time;
7861while($inline = <COOKFILE>) {
7862chomp $inline;
7863my ($domain, $what, $path, $secure, $expires, $name, $value) = split("\t", $inline);
7864$writeFlag = 1 if exists $cookies{$domain}{$path}{$name};
7865if($expires > $nowtime) {
7866$cookies{$domain}{$path}{$name} =
7867{value => $value, secure => $secure eq "TRUE" ? 1 : 0, expires => $expires}
7868} else {
7869$writeFlag = 1;
7870} # cookies expired.
7871}  # loop reading
7872if($writeFlag) {
7873seek COOKFILE, 0, 0;
7874truncate COOKFILE, 0;
7875my ($odomain, $opath, $ocook); # o for out
7876foreach $odomain (keys(%cookies)) {
7877foreach $opath (keys(%{$cookies{$odomain}})) {
7878foreach $ocook (keys(%{$cookies{$odomain}{$opath}})) {
7879my %deref = %{$cookies{$odomain}{$opath}{$ocook}};
7880print COOKFILE join("\t", $odomain, 'FALSE', $opath,
7881$deref{secure} ? "TRUE" : "FALSE", $deref{expires}, $ocook,
7882$deref{value}), "\n" if $deref{expires} > $nowtime;
7883}
7884}
7885}
7886}  # rewrite file
7887close COOKFILE;
7888}  # fillJar
7889
7890#*********************************************************************
7891#  Web Express features.  For more on Web Express, visit
7892#  http://www.webexpresstech.com/WebXP/WebExpressTutorial.html
7893#*********************************************************************
7894
7895sub webExpress($)
7896{
7897my $line = shift;
7898stripWhite \$line;
7899$line =~ s/\s+/ /g;
7900my $code = $line;
7901$code =~ s/ .*//;
7902$line =~ s/.*? //;
7903defined $shortcut{$code} or
7904$errorMsg = "shortcut $code is not recognized", return 0;
7905my $newurl = $shortcut{$code}{url};
7906
7907#  Step through $line and extract options, indicated by -
7908#  This isn't implemented yet.
7909
7910#  Done with options, what remains is the search argument.
7911my $arg = urlEncode $line;
7912length $arg or
7913$errorMsg = "shortcut is given no search argument", return 0;
7914
7915#  Put the argument into the url.
7916$newurl =~ s/\$1/$arg/;
7917
7918return 1, $newurl, $shortcut{$code}{after};
7919}  # webExpress
7920
7921
7922#  return "x" if an error is encountered
7923sub parseWWWAuth($$)
7924{
7925my ($authline, $url_desc) = @_;
7926my ($qop_auth, $qop_authint) = (0, 1); # this would be an enum in C
7927my ($username, $pass);
7928
7929#  parse the authorization request line
7930my @challenges = ();
7931my ($attribname, $value);
7932stripWhite(\$authline);
7933$authline =~ s/^WWW-Authenticate:\s*//i;
7934while($authline =~ s/^\s*([^\s]+)\s+//) {
7935my %challenge = (authscheme => $1);
7936while($authline =~ s/^([^=]+)=//) {
7937$attribname = lc($1);
7938if($authline =~ s/^"//) {
7939# value of attribute is a quoted string.
7940$authline =~ s/^([^"]+)"((,\s*)|$)//;
7941$value = $1;
7942} else {
7943$authline =~ s/^([^,]+)((,\s*)|$)//;
7944$value = $1;
7945}
7946$challenge{$attribname} = $value;
7947}
7948if($challenge{authscheme} =~ /^digest/i && defined($challenge{qop})) {
7949my ($q, $newq) = undef;
7950my @qop = split(/\s*,\s*/, $challenge{qop});
7951foreach $q (@qop) {
7952$newq = $qop_authint, last if $q =~ /^auth-int$/i;
7953}
7954if(!defined($newq)) {
7955foreach $q (@qop) {
7956$newq = $qop_auth, last if $q =~ /^auth$/i;
7957}
7958}
7959$errorMsg = "Server sent a bad qop value in digest authentication", return "x" unless defined $newq;
7960$challenge{qop} = $newq;
7961}
7962push(@challenges, {%challenge});
7963}
7964my ($c, $used_challenge) = undef;
7965# Server may have sent multiple challenges with multiple auth schemes.
7966# Spec says that we use the strongest scheme supported by the server.
7967foreach $c (@challenges) {
7968$used_challenge = $c, last if $$c{authscheme} =~ /^Digest$/i;
7969}
7970if(!defined($used_challenge)) {
7971foreach $c (@challenges) {
7972$used_challenge = $c if($$c{authscheme} =~ /Basic/);
7973}
7974}
7975$errorMsg = "no usable challenges were found", return "x" unless defined $used_challenge;
7976if($$used_challenge{authscheme} =~ /Basic/i) {
7977($username, $pass) = getUserPass($$url_desc{SERVER} . "\x01" . $$url_desc{PORT} . "\x01" . $$used_challenge{realm});
7978return "x" if $username eq "x";
7979my $do64x = "$username:$pass";
7980my $do64y = "";
7981encodeBase64(\$do64x, "", \$do64y);
7982return "Authorization: Basic $do64y$eol";
7983}
7984else { # Not Basic, must be Digest.
7985unless(eval { require Digest::MD5 }) {
7986$errorMsg = "You need to download the Digest::MD5 module from CPAN to do digest authentication.", return "x";
7987}
7988$errorMsg = "Unsupported algorithm for digest authentication", return "x" if(defined($$used_challenge{algorithm}) && $$used_challenge{algorithm} !~ /^md5$/i);
7989$errorMsg = "unable to perform digest authentication", return "x" if(!defined($$used_challenge{realm})
7990|| !defined($$used_challenge{nonce}));
7991($username, $pass) = getUserPass($$url_desc{SERVER} . "\x01" . $$url_desc{PORT} . "\x01" . $$used_challenge{realm});
7992return "x" if $username eq "x";
7993srand(time());
7994my $nc = "00000001";
7995my $cnonce = sprintf("%08x%08x", int(rand(0xffffffff)), int(rand(0xffffffff)));
7996# pseudorandoms are fine here.  The cnonce is used to thwart chosen plaintext
7997# attacks when checking integrity of message content.  Probably not much
7998# of a threat for MD5.  Maybe it will be someday, and when it is, I'll
7999# dream up a better way to create a random cnonce.
8000my ($a1, $a2);
8001$a1 = "$username:$$used_challenge{realm}:$pass";
8002if($$used_challenge{qop} == $qop_authint) {
8003$a2 = $$url_desc{method} . ':' . $$url_desc{PATH} . Digest::MD5::md5_hex($$url_desc{content});
8004} else {
8005$a2 = $$url_desc{method} . ':' . $$url_desc{PATH};
8006}
8007my $response;
8008if(defined($$used_challenge{qop})) {
8009$response = Digest::MD5::md5_hex(Digest::MD5::md5_hex($a1) . ':' . $$used_challenge{nonce} . ':' .
8010 $nc . ':' . $cnonce . ':' .
8011($$used_challenge{qop} == $qop_auth ? "auth" : "auth-int") . ':' . Digest::MD5::md5_hex($a2)) ;
8012} else {
8013$response = Digest::MD5::md5_hex(Digest::MD5::md5_hex($a1) . ':' . $$used_challenge{nonce} . ':' . Digest::MD5::md5_hex($a2)) ;
8014}
8015my $out = "Authorization: Digest username=\"$username\", realm=\"$$used_challenge{realm}\", " .
8016"nonce=\"$$used_challenge{nonce}\", uri=\"$$url_desc{PATH}\", response=\"$response\"";
8017$out .= ", opaque=\"$$used_challenge{opaque}\"" if defined($$used_challenge{opaque});
8018$out .= ", algorithm=\"$$used_challenge{algorithm}\"" if defined($$used_challenge{algorithm});
8019if(defined($$used_challenge{qop})) {
8020$out .= ", qop=";
8021$out .= "\"auth\"" if $$used_challenge{qop} == $qop_auth;
8022$out .= "\"auth-int\"" if $$used_challenge{qop} == $qop_authint;
8023$out .= ", nc=$nc, cnonce=\"$cnonce\"";
8024}
8025$out .= "$eol";
8026return $out;
8027}
8028}  # parseWWWAuth
8029
8030sub getUserPass($)
8031{
8032my $request = shift;
8033my $abort = "login password sequence aborted";
8034if(! $authAttempt and defined $authHist{$request}) {
8035return split ":", $authHist{$request};
8036}
8037my ($server, $port, $realm) = split(":", $request);
8038print "Server $server requests authentication for $realm.  (type x to abort)\n";
8039print "Username: ";
8040my $username = <STDIN>;
8041chomp $username;
8042$errorMsg = $abort, return ("x","x") if $username eq "x";
8043print "Password: ";
8044my $pass = <STDIN>;
8045chomp $pass;
8046$errorMsg = $abort, return ("x","x") if $pass eq "x";
8047$authHist{$request} = "$username:$pass";
8048return ($username, $pass);
8049}  # getUserPass
8050
8051
8052