1#!/usr/bin/perl 2 3# edbrowse: line editor/browser 4 5use IO::Handle; 6use IO::Socket; 7use Time::Local; 8 9=head1 Author 10 11 Karl Dahlke 12 eklhad@gmail.com 13 14=HEAD1 Current Maintainer 15 16Chris Brannon 17 maintainers@edbrowse.org 18 http://edbrowse.org 19 20=head1 Copyright Notice 21 22This program is copyright (C) (C) Karl Dahlke, 2000-2003. 23It is made available, by the author, under the terms of the General Public License (GPL), 24as articulated by the Free Software Foundation. 25It may be used for any purpose, and redistributed, 26provided this copyright notice is included. 27 28=head1 Redirection 29 30This program, and its associated documentation, are becoming quite large. 31Therefore the documentation has been moved to a separate html file. 32Please visit: 33 34http://edbrowse.org/usersguide.html 35 36Realize that this documentation covers the C version of edbrowse. Development 37of the Perl version stopped years ago, and there have been significant changes. 38 39If you have lynx on hand, you can run: 40 41lynx -dump http://edbrowse.org/usersguide.html > usersguide.txt 42 43If you are using lynx to download the actual program, do this: 44 45lynx -source 'http://edbrowse.org/edbrowse.pl' > edbrowse.pl 46 47=cut 48 49 50$version = "1.5.17"; 51@agents = ("edbrowse/$version"); 52$agent = $agents[0]; 53 54 55# It's tempting to let perl establish the global variables as you go. 56# Let's try not to do this. 57# That's where all the side effects are - that's where the bugs come in. 58# Below are the global variables, with some explanations. 59 60$debug = 0; # general debugging 61$errorExit = 0; 62$ismc = 0; # is mail client 63$zapmail = 0; # just get rid of the mail 64$maxfile = 40000000; # Max size of an editable file. 65$eol = "\r\n"; # end-of-line, as far as http is concerned 66$doslike = 0; # Is it a Dos-like OS? 67$doslike = 1 if $^O =~ /^(dos|win|mswin)/i; 68$errorMsg = ""; # Set this if the last operation produced an error. 69$inglob = 0; # Are we in global mode, under a g// operation? 70$onloadSubmit = 0; 71$inscript = 0; # plowing through javascript 72$filesize = 0; # size of file just read or written 73$global_lhs_rhs = 0; # remember lhs and rhs across sessions 74$caseInsensitive = 0; 75# Do we send crnl or nl after the lines in a text buffer? 76# What is the standard - I think it's DOS newlines. 77$textAreaCR = 1; 78$pdf_convert = 1; # convert pdf to html 79$fetchFrames = 1; # fetch the frames into a web page 80$allsub = 0; # enclose all superscripts and subscripts 81$allowCookies = 1; # allow all cookies. 82%cookies = (); # the in-memory cookie jar 83%authHist = (); # authorization strings by domain 84$authAttempt = 0; # count authorization attempts for this page 85$ssl_verify = 1; # By default we verify all certs. 86$ssl = undef; # ssl connection 87$ctx = undef; # ssl certificate 88$allowReferer = 1; # Allow referer header by default. 89$referer = ""; # refering web page 90$reroute = 1; # follow http redirections to find the actual web page 91$rerouteCount = 0; # but prevent infinite loops 92%didFrame = (); # which frames have we fetched already 93$passive = 1; # ftp passive mode on by default. 94$nostack = 0; # suppress stacking of edit sessions 95$last_z = 1; # line count for the z command 96$endmarks = 0; # do we print ^ $ at the start and end of lines? 97$subprint = 0; # print lines after substitutions? 98$delprint = 0; # print line after delete 99$dw = 0; # directory write enabled 100$altattach = 0; # attachments are really alternative presentations of the same email 101$do_input = 0; # waiting for the next input from the tty 102$intFlag = 0; # control c was hit 103$intMsg = "operation interrupted"; 104 105# Interrupt handler, for control C. 106# Close file handle if we were reading from disk or socket. 107sub intHandler() 108{ 109$intFlag = 1; 110if($do_input) { 111print "\ninterrupt, type qt to quit completely\n"; 112return; 113} 114# Reading from an http server. 115close FH if defined FH; 116# Kill ftp data connection if open. 117close FDFH if defined FDFH; 118# and mail connection or ftp control connection 119close SERVER_FH if defined SERVER_FH; 120# And listening ftp socket. 121close FLFH if defined FLFH; 122exit 1 if $ismc; 123} # intHandler 124 125$SIG{INT} = \&intHandler; 126 127# A quieter form of die, without the edbrowse line number, which just confuses people. 128sub dieq($) 129{ 130my $msg = shift; 131print "fatal: $msg\n"; 132exit 1; 133} # dieq 134 135@weekDaysShort = ("Sun","Mon","Tue","Wed","Thu","Fri","Sat"); 136@monthsShort = ("Jan","Feb","Mar","Apr","May","Jun","Jul","Aug","Sep","Oct","Nov","Dec"); 137sub mailTimeString() 138{ 139my ($ss, $nn, $hh, $dd, $mm, $yy, $wd) = localtime time; 140my $wds = $weekDaysShort[$wd]; 141my $mths = $monthsShort[$mm]; 142return sprintf "%s, %02d %s %d %02d:%02d:%02d", 143$wds, $dd, $mths, $yy+1900, $hh, $nn, $ss; 144} # mailTimeString 145 146# ubackup is set when the command has changed something. 147# The previous text, stored in the save_* variables, 148# is copied into the last* variables. If you type u, 149# the last* variables and current variables are swapped. 150$ubackup = 0; 151 152# Did we successfully read the edbrowse config file? 153# If so, set some variables. 154$myname = $annoyFile = $junkFile = $addressFile = ""; 155%adbook = (); 156$adbooktime = 0; 157@inmailserver = (); # list of pop3 servers 158$mailDir = ""; 159$localMail = -1; 160$whichMail = 0; # which account to use 161$smMail = ""; 162$naccounts = 0; # number of pop accounts 163$outmailserver = ""; # smtp 164$smtplogin = ""; # smtp login 165my $mailToSend = ""; 166@pop3login = (); 167@pop3password = (); 168@replyAddress = (); 169@fromSource = (); 170@fromDest = (); 171$serverLine = ""; # line received from mail or ftp server 172 173# web express configuration variables and arrays. 174%shortcut = (); 175%commandList = (); 176%commandCheck = (); 177$currentShortcut = ""; 178$currentCommandList = ""; 179 180# Specify the start and end of a range for an operation. 181# 1,3m5 will set these variables to 1, 3, and 5. 182$startRange = $endRange = $dest = 0; 183 184# The input command, but only the one-letter commands. 185$icmd = ""; 186# Now the command that is actually executed is in $cmd. 187# This is usually the same as $icmd, but not always. 188# 8i becomes 7a, for instance. 189$cmd = ""; 190# The valid edbrowse commands. 191$valid_cmd = "aAbBcdefghHiIjJklmnpqrsStuvwz=^@<"; 192# Commands that can be done in browse mode. 193$browse_cmd = "AbBdefghHIjJklmnpqsuvwz=^@<"; 194# Commands for directory mode. 195$dir_cmd = "AbdefghHklnpqsvwz=^@<"; 196# Commands that work at line number 0, in an empty file. 197$zero_cmd = "aAbefhHqruwz=^@<"; 198# Commands that expect a space afterward. 199$spaceplus_cmd = "befrw"; 200# Commands that should have no text after them. 201$nofollow_cmd = "aAcdhHijlmnptu="; 202# Commands that can be done after a g// global directive. 203$global_cmd = "dIjJlmnpst"; 204# Show the error message, not just the question mark, after these commands. 205$showerror_cmd = "Abefqrw^@"; 206$helpall = 0; # show the error message all the time 207 208# Remember that two successive q's will quit the session without changes. 209# here we must track which session, by number, you were trying to quit. 210$lastq = $lastqq = -1; 211 212# For any variable x, there are usually multiple copies of x, one per session. 213# These are housed in an array @x. 214# In contrast, the variable $x holds $x[$context], 215# according to the current context. 216# I hope this isn't too confusing. 217$context = 0; 218 219# dot and dol, current and last line numbers. 220@dot = (0); 221$dot = $dot[0]; 222@dol = (0); 223$dol = $dol[0]; 224@factive = (1); # which sessions are active 225# Retain file names, and whether the text has been modified. 226@fname = (""); 227$fname = $fname[0]; 228$baseref = ""; # usually the same as $fname 229@fmode = (0); # file modes 230$fmode = $fmode[0]; 231$binmode = 1; # binary file 232$nlmode = 2; # newline apended 233$browsemode = 4; # browsing html text 234$changemode = 8; # something has changed in this file 235$dirmode = 16; # directory mode 236$firstopmode = 32; # first operation issued - undo is possible 237$nobrowse = "not in browse mode"; # common error message 238$nixbrowse = "command not available in browse mode"; 239$nixdir = "command not available in directory mode"; 240 241sub dirBrowseCheck($) 242{ 243my $cmd = shift; 244$fmode&$browsemode and $errorMsg = "$cmd $nixbrowse", $inglob = 0, return 0; 245$fmode&$dirmode and $errorMsg = "$cmd $nixdir", $inglob = 0, return 0; 246return 1; 247} # dirBrowseCheck 248 249# retain base directory name when scanning a directory 250@dirname = (""); 251$dirname = $dirname[0]; 252 253# Remember substitution strings. 254@savelhs = (); # save left hand side 255$savelhs = $savelhs[0]; 256@saverhs = (); # save right hand side 257$saverhs = $saverhs[0]; 258 259# month hash, to encode dates. 260%monhash = 261(jan => 1, feb => 2, mar => 3, apr => 4, may => 5, jun => 6, 262jul => 7, aug => 8, sep => 9, oct => 10, nov => 11, dec => 12); 263 264$home = $ENV{HOME}; 265defined $home and length $home or 266dieq 'home directory not defined by $HOME.'; 267-d $home or 268dieq "$home is not a directory."; 269 270# Establish the trash bin, for deleted files. 271$rbin = "$home/.trash"; 272if(! -d $rbin) { 273$rbin = "" unless mkdir $rbin, 0700; 274} 275# Config file for this browser. 276# Sample file is available at http://edbrowse.org/sample.perl.ebrc 277$rcFile = "$home/.ebprc"; 278# Last http header, normally deleted before you read the web page. 279$ebhttp = "$rbin/eb.http"; 280truncate $ebhttp, 0; 281# When we need a temp file. 282$ebtmp = "$rbin/eb.tmp"; 283# A file containing SSL certificates in PEM format, concatinated together. 284# This will be used for certificate verification. 285$ebcerts = "$home/.ssl-certs"; 286# file for persistant cookies. 287$ebcooks = "$home/.cookies"; 288sub fillJar() ; 289fillJar(); # fill up that cooky jar 290 291# Let's see if we can read the config file? 292if(open FH, $rcFile) { 293my $sort = 0; 294while(<FH>) { 295s/^\s+//; 296s/^#.*$//; 297next if /^$/; 298s/\s+$//; 299my ($server, $login, $passwd, $retpath, $key, $value); 300if(/^([^:\s]+)\s*:\s*([^:\s]+)\s*:\s*([^:\s]+)\s*:\s*([^:\s]+)\s*:\s*([^:\s]*)/) { 301($server, $login, $passwd, $retpath) = ($1, $2, $3, $4); 302my $smtpbox = $5; 303if($server =~ s/^\*\s*//) { 304dieq "multiple accounts are marked as local, with a star." if $localMail >= 0; 305$localMail = $naccounts; 306$smtpbox = $server unless length $smtpbox; 307$outmailserver = $smtpbox; 308$smtplogin = $login; 309} 310$inmailserver[$naccounts] = $server; 311$pop3login[$naccounts] = $login; 312$pop3password[$naccounts] = $passwd; 313$replyAddress[$naccounts] = $retpath; 314++$naccounts; 315next; 316} # describing a mail server 317 318# Now look form keyword = string. 319# Initial < is shorthand for cmd = 320s/^\</cmd =/; 321if(/^([^=]+)=\s*(.+)/) { 322$key = $1; 323$value = $2; 324$key =~ s/\s+$//; 325$myname = $value, next if $key eq "fullname"; 326$addressFile = $value, next if $key eq "addressbook"; 327$junkFile = $value, next if $key eq "junkfile"; 328$annoyFile = $value, next if $key eq "annoyfile"; 329$mailDir = $value, next if $key eq "cd"; 330 331if($key eq "from") { 332if($value =~ /^\s*([^\s>]+)\s*>\s*(.+)$/) { 333push @fromSource, lc $1; 334push @fromDest, $2; 335next; 336} 337dieq "from filter \"$value\" does not look like \"emailAddress > file\"."; 338} # from 339 340if($key eq "agent") { 341push @agents, $value; 342next; 343} # agent 344 345# web express keywords 346if($key eq "shortcut") { 347if(length $currentShortcut and ! defined $shortcut{$currentShortcut}{url}) { 348dieq "shortcut $currentShortcut has not been assigned a url"; 349} 350$value =~ /^[\w-]+$/ or dieq "the name of a shortcut must consist of letters digits or dashes, $value is invalid"; 351$currentShortcut = $value; 352# Start out with no post processing commands. 353$shortcut{$value}{after} = []; 354$shortcut{$value}{sort} = sprintf "%04d", $sort; 355++$sort; 356$currentCommandList = ""; 357next; 358} # shortcut 359if($key eq "cmdlist") { 360if(length $currentShortcut and ! defined $shortcut{$currentShortcut}{url}) { 361dieq "shortcut $currentShortcut has not been assigned a url"; 362} 363$currentShortcut = ""; 364my $check = 0; 365$check = 1 if $value =~ s/^\+//; 366$value =~ /^[\w-]+$/ or dieq "the name of a command list must consist of letters digits or dashes, $value is invalid."; 367$currentCommandList = $value; 368$commandList{$value} = []; 369$commandCheck{$value} = $check; 370next; 371} # cmdlist 372if($key eq "cmd") { 373length $currentShortcut or length $currentCommandList or 374dieq "postprocessing command is not part of a command list or shortcut"; 375my $cref; # command reference 376$cref = $shortcut{$currentShortcut}{after} if length $currentShortcut; 377$cref = $commandList{$currentCommandList} if length $currentCommandList; 378# is this a command list? 379if($value =~ /^[a-zA-Z_-]+$/ and defined $commandList{$value}) { 380my $cpush = $commandList{$value}; 381push @$cref, @$cpush; 382} else { 383push @$cref, $value; 384} 385next; 386} # cmd 387if($key eq "url") { 388length $currentShortcut or dieq "$key command without a current shortcut"; 389$shortcut{$currentShortcut}{url} = $value; 390next; 391} # url 392if($key eq "desc") { 393length $currentShortcut or dieq "$key command without a current shortcut"; 394$shortcut{$currentShortcut}{desc} = $value; 395next; 396} # desc 397 398dieq "Unrecognized keyword <$key> in config file."; 399} 400 401dieq "garbled line <$_> in config file."; 402} # loop over lines in config file 403close FH; 404 405if(length $currentShortcut and ! defined $shortcut{$currentShortcut}{url}) { 406dieq "shortcut $currentShortcut has not been assigned a url"; 407} 408 409if($naccounts) { 410$localMail = 0 if $naccounts == 1; 411dieq "None of the pop3 accounts is marked as local." if $localMail < 0; 412dieq "fullname not specified in the config file." if ! length $myname; 413} # mail accounts 414} # open succeeded 415 416# One array holds all the lines of text (without the newlines) 417# for all the files in all the sessions. 418# Within a given session, the actual file is represented by a list of numbers, 419# indexes into this large array. 420# Note that when text is copied, we actually copy the strings in the array. 421# I could just have different lines use the same index, thus pointing to the 422# same string, and there would be no need to copy that string, 423# but then I'd have to maintain reference counts on all these strings, 424# and that would make the program very messy! 425@text = (); 426 427# If a file has 30 lines, it is represented by 30 numbers, 428# indexes into @text above. 429# Should we use an array of numbers, or a string of numbers 430# represented by decimal digits? 431# Both are painful, in different ways. 432# Consider inserting a block of text, a very common operation. 433# In a list, we would have to slide all the following numbers down. 434# Granted, that's better than copying all those lines of text down, 435# but it's still a pain to program, and somewhat inefficient. 436# If we use strings, we take the original string of numbers, 437# break it at the insert point, and make a new string 438# by concatenating these two pieces with the new block. 439# The same issues arise when deleting text near the top of a file. 440# This and other considerations push me towards strings. 441# I currently use 6 characters for a line number, and a seventh for the g// flag. 442$lnwidth = 7; # width of a line number field in $map 443$lnwidth1 = $lnwidth - 1; 444$lnformat = "%6d "; 445$lnspace = ' ' x $lnwidth; 446$lnmax = 999999; 447# Note that line 0 never maps to anything in @text. 448@map = ($lnspace); 449$map = $map[0]; 450# The 26 labels, corresponding to the lower case letters. 451# These are stored in a packed string, like $map above. 452# labels also holds the filetype suffixes when in directory mode. 453@labels = ($lnspace x 26); 454$labels = $labels[0]; 455# offset into $labels, where directory suffixes begin. 456$dirSufStart = 26 * $lnwidth; 457 458# The anchor/form/input tags, for browsing. 459# The browse tags are in an array of hashes. 460# Each hash has tag=tagname, 461# and attrib=value for each attrib=value in the tag. 462# Be advised that certain special tags, such as those defining 463# title and description and keywords, are placed in btag[0]. 464@btags = (); 465$btags = $btags[0]; 466 467# When we focus on an input field, for edit or manipulation, 468# we need its type, size, and list of options. 469$inf = ""; # current text displayed by this input field. 470$itype = ""; # Type of the input field. 471$isize = 0; # size of the input field. 472$iopt = {}; # hash of input options in a discrete list. 473$irows = $icols = 0; # for a text area window. 474$iwrap = ""; # Can we scroll beyond this window? 475$itag = undef; # the input tag from which the previous variables were derived. 476$iline = 0; # line where this input field was found. 477$ifield = 0; # field number, within the line, the nth input field on the line. 478$itagnum = 0; # tag number for this input field. 479$inorange = "this input directive cannot be applied to a range of lines"; 480$inoglobal = "this input directive cannot be applied globally"; 481 482# last* and save* variables mirror the variables that define your session. 483# This supports the undo command. 484$lastdot = $savedot = $lastdol = $savedol = 0; 485$lastmap = $savemap = $lastlabels = $savelabels = ""; 486 487# Variables to format text, i.e. break lines at sentence/phrase boundaries. 488$refbuf = ""; # The new, reformatted buffer. 489$lineno = $colno = 0; # line/column number 490$optimalLine = 80; # optimal line length 491$cutLineAfter = 36; # cut sentence or phrase after this column 492$paraLine = 120; # longer lines are assumed to be self-contained paragraphs 493$longcut = 0; # last cut of a long line 494$lspace = 3; # last space value, 3 = paragraph 495$lperiod = $lcomma = $lright = $lany = 0; # columns for various punctuations 496$idxperiod = $idxcomma = $idxright = $idxany = 0; 497 498# Push the entire edit session onto a stack, for the back key. 499# A hash will hold all the variables that make a session, 500# such as $map, $fname, $btags, etc. 501@backup = (); 502$backup = $backup[0]; 503 504$hexChars = "0123456789abcdefABCDEF"; 505 506# Valid delimiters for search/substitute. 507# note that \ is conspicuously absent, not a valid delimiter. 508# I alsso avoid nestable delimiters such as parentheses. 509# And no alphanumerics please -- too confusing. 510$valid_delim = "-_=!|#*;:`\"',./?+@"; 511 512# $linePending holds a line of text that you accidentally typed in 513# while edbrowse was in command mode. 514# When you see the question mark, immediately type a+ to recover the line. 515$linePending = undef; 516 517 518# That's it for the globals, here comes the code. 519# First a few support routines. 520# Strip white space from either side. 521sub stripWhite($) 522{ 523my $line = shift; 524$$line =~ s/^\s+//; 525$$line =~ s/\s+$//; 526} # stripWhite 527 528# Is a filename a URL? 529# If it is, return the transport protocol, e.g. http. 530sub is_url($) 531{ 532my $line = shift; 533return 'http' if $line =~ m,^http://[^\s],i; 534return 'https' if $line =~ m,^https://[^\s],i; 535return 'gopher' if $line =~ m,^gopher://[^\s],i; 536return 'telnet' if $line =~ m,^telnet://[^\s],i; 537return 'ftp' if $line =~ m,^ftp://[^\s],i; 538# I assume that the following will be regular http. 539# Strip off the ?this=that stuff 540$line =~ s:\?.*::; 541# Strip off the file name and .browse suffix. 542$line =~ s:/.*::; 543$line =~ s/\.browse$//; 544$line =~ s/:\d+$//; 545return 0 if $line !~ /\w\.\w.*\w\.\w/; # we need at least two internal dots 546# Look for an ip address, four numbers and three dots. 547return 'http' if $line =~ /^\d+\.\d+\.\d+\.\d+$/; 548$line =~ s/.*\.//; 549return 'http' if index(".com.biz.info.net.org.gov.edu.us.uk.au.ca.de.jp.be.nz.sg.", ".$line.") >= 0; 550} # is_url 551 552# Apply a (possibly) relative path to a preexisting url. 553# The new url is returned. 554# resolveUrl("http://www.eklhad.net/linux/index.html", "app") returns 555# "http://www.eklhad.net/linux/app" 556sub resolveUrl($$) 557{ 558my ($line, $href) = @_; 559my $scheme; 560$line = "" unless defined $line; 561$line =~ s/\.browse$//; 562# debug print - this is a very subtle routine. 563print "resolve($line, $href)\n" if $debug >= 2; 564# Some people, or generators, actually write http://../whatever.html 565$href =~ s/^http:(\.+)/$1/i; 566$href =~ s,^http://(\.*/),$1,i; 567return $href unless length $href and length $line and ! is_url($href); 568if(substr($href, 0, 1) ne '/') { 569$line =~ s/\?.*//; # hope this is right 570if(substr($href, 0, 1) ne '?') { 571if($line =~ s,^/[^/]*$,, or 572$line =~ s,([^/])/[^/]*$,$1,) { 573# We stripped off the last directory 574$line .= '/'; 575} else { 576if($scheme = is_url $line) { 577$line .= '/'; 578} else { 579$line = ""; 580} 581} # stripping off last directory 582} # doesn't start with ? 583} elsif($scheme = is_url $line) { 584# Keep the scheme and server, lose the filename 585$line =~ s/\?.*//; # hope this is right 586$line =~ s,^($scheme://[^/]*)/.*,$1,i; 587} else { 588$line = ""; 589} 590return $line.$href; 591} # resolveUrl 592 593# Prepare a string for http transmition. 594# No, I really don't know which characters to encode. 595# I'm probably encoding more than I need to -- hope that's ok. 596sub urlEncode($) 597{ 598$_ = shift; 599s/([^-\w .@])/sprintf('%%%02X',ord($1))/ge; 600y/ /+/; 601return $_; 602} # urlEncode 603 604sub urlDecode($) 605{ 606$_ = shift; 607y/+/ /; 608s/%([0-9a-fA-F]{2})/chr hex "$1"/ge; 609return $_; 610} # urlDecode 611 612# The javascript unescape function, sort of 613sub unescape($) 614{ 615$_ = shift; 616s/(%|\\u00)([0-9a-fA-F]{2})/chr hex "$2"/ge; 617s/&#(\d+);/chr "$1"/ge; 618return $_; 619} # unescape 620 621# Pull the subject out of a sendmail url. 622sub urlSubject($) 623{ 624my $href = shift; 625if($$href =~ s/\?(.*)$//) { 626my @pieces = split '&', $1; 627foreach my $j (@pieces) { 628next unless $j =~ s/^subject=//i; 629my $subj = urlDecode $j; 630stripWhite \$subj; 631return $subj; 632} # loop 633} # attributes after the email 634return ""; 635} # urlSubject 636 637# Get raw text ready for html display. 638sub textUnmeta($) 639{ 640my $tbuf = shift; 641return unless length $$tbuf; 642$$tbuf =~ s/&/&/g; 643$$tbuf =~ s/</</g; 644$$tbuf =~ s/>/>/g; 645$$tbuf =~ s/^/<P><PRE>/; 646$$tbuf =~ s/$/<\/PRE><P>\n/; 647} # textUnmeta 648 649# Derive the alt description for an image or hyperlink. 650sub deriveAlt($$) 651{ 652my $h = shift; 653my $href = shift; 654my $alt = $$h{alt}; 655$alt = "" unless defined $alt; 656stripWhite \$alt; 657# Some alt descriptions are flat-out useless. 658$alt =~ s/^[^\w]+$//; 659return $alt if length $alt; 660if(!length $href) { 661$href = $$h{href}; 662$href = "" unless defined $href; 663} 664$alt = $href; 665$alt =~ s/^javascript.*$//i; 666$alt =~ s/^\?//; 667$alt =~ s:\?.*::s; 668$alt =~ s:.*/::; 669$alt =~ s/\.[^.]*$//; 670$alt =~ s:/$::; 671return $alt; 672} # deriveAlt 673 674# Pull the reference out of a javascript openWindow() call. 675$foundFunc = ""; 676sub javaWindow($) 677{ 678my $jc = shift; # java call 679my $page = ""; 680$foundFunc = ""; 681$page = $1 if $jc =~ /(?:open|location|window)[\w.]* *[(=] *["']([\w._\/:,=@&?+-]+)["']/i; 682return $page if length $page; 683return "submit" if $jc =~ /\bsubmit *\(/i; 684while($jc =~ /(\w+) *\(/g) { 685my $f = $1; 686my $href = $$btags[0]{fw}{$f}; 687if($href) { 688$href =~ s/^\*//; 689$foundFunc = $f; 690$page = $href; 691} 692} 693return $page; 694} # javaWindow 695 696# Try to find the Java functions 697sub javaFunctions($) 698{ 699my $tbuf = shift; 700my $flc = 0; # function line count 701my $f; # java function 702while($$tbuf =~ /(.+)/g) { 703my $line = $1; 704if($line =~ /function *(\w+)\(/) { 705$f = $1; 706print "java function $f\n" if $debug >= 6; 707$flc = 1; 708} 709my $win = javaWindow $line; 710if(length $win) { 711if($flc) { 712if(not defined $$btags[0]{fw}{$f}) { 713$$btags[0]{fw}{$f} = "*$win"; 714print "$f: $win\n" if $debug >= 3; 715} 716} elsif($win ne "submit") { 717my $h = {}; 718push @$btags, $h; 719$attrhidden = hideNumber($#$btags); 720$$h{ofs1} = length $refbuf; 721my $alt = deriveAlt($h, $win); 722$alt = "relocate" unless length $alt; 723createHyperLink($h, $win, $alt); 724} 725} 726next unless $flc; 727++$flc; 728$flc = 0 if $flc == 12; 729} # loop over lines 730} # javaFunctions 731 732# Mixed case. 733sub mixCase($) 734{ 735my $w = lc shift; 736$w =~ s/\b([a-z])/uc $1/ge; 737# special McDonald code 738$w =~ s/Mc([a-z])/"Mc".uc $1/ge; 739return $w; 740} # mixCase 741 742# Create a hyperlink where there was none before. 743sub createHyperLink($$$) 744{ 745my ($h, $href, $desc) = @_; 746$$h{tag} = "a"; 747$$h{bref} = $baseref; 748$$h{href} = $href; 749$refbuf .= "\x80$attrhidden" . "{$desc}"; 750$colno += 2 + length $desc; 751$$h{ofs2} = length $refbuf; 752$lspace = 0; 753} # createHyperLink 754 755# meta html characters. 756# There's lots more -- this is just a starter. 757%charmap = ( 758# Normal ascii symbols 759gt => '>', lt => '<', quot => '"', 760plus => '+', minus => '-', colon => ':', 761apos => '`', star => '*', comma => ',', 762period => '.', dot => ".", 763dollar => '$', percnt => '%', amp => '&', 764# International letters 765ntilde => "\xf1", Ntilde => "\xd1", 766agrave => "\xe0", Agrave => "\xc0", 767egrave => "\xe8", Egrave => "\xc8", 768igrave => "\xec", Igrave => "\xcc", 769ograve => "\xf2", Ograve => "\xd2", 770ugrave => "\xf9", Ugrave => "\xd9", 771auml => "\xe4", Auml => "\xc4", 772euml => "\xeb", Euml => "\xcb", 773iuml => "\xef", Iuml => "\xcf", 774ouml => "\xf6", Ouml => "\xd6", 775uuml => "\xfc", Uuml => "\xdc", 776yuml => "\xff", Yuml => 'Y', 777aacute => "\xe1", Aacute => "\xc1", 778eacute => "\xe9", Eacute => "\xc9", 779iacute => "\xed", Iacute => "\xcd", 780oacute => "\xf3", Oacute => "\xd3", 781uacute => "\xfa", Uacute => "\xda", 782yacute => "\xfd", Yacute => "\xdd", 783atilde => "\xe3", Atilde => "\xc3", 784itilde => 'i', Itilde => 'I', 785otilde => "\xf5", Otilde => "\xd5", 786utilde => 'u', Utilde => 'U', 787acirc => "\xe2", Acirc => "\xc2", 788ecirc => "\xea", Ecirc => "\xca", 789icirc => "\xee", Icirc => "\xce", 790ocirc => "\xf4", Ocirc => "\xd4", 791ucirc => "\xfb", Ucirc => "\xdb", 792# Other 8-bit symbols. 793# I turn these into their 8 bit equivalents, 794# then a follow-on routine turns them into words for easy reading. 795# Some speech adapters do this as well, saying "cents" for the cents sign, 796# but yours may not, so I do some of these translations for you. 797# But not here, because some people put the 8-bit cents sign in directly, 798# rather then ¢, so I've got to do that translation later. 799pound => "\xa3", cent => "\xa2", 800sdot => "\xb7", 801middot => "\xb7", 802edot => 'e', 803nbsp => ' ', 804times => "\xd7", 805divide => "\xf7", 806deg => "\xb0", 807frac14 => "\xbc", 808half => "\xbd", 809frac34 => "\xbe", 810frac13 => "1/3", 811frac23 => "2/3", 812copy => "\xa9", 813reg => "\xae", 814trade => "(TM)", 815); 816 817%symbolmap = ( 818a => "945", 819b => "946", 820g => "947", 821d => "948", 822e => "949", 823z => "950", 824h => "951", 825q => "952", 826i => "953", 827k => "954", 828l => "955", 829m => "956", 830n => "957", 831x => "958", 832o => "959", 833p => "960", 834r => "961", 835s => "963", 836t => "964", 837u => "965", 838f => "966", 839c => "967", 840y => "968", 841w => "969", 842177 => "8177", # kludge!! I made up 8177 843198 => "8709", 844219 => "8660", 845209 => "8711", 846229 => "8721", 847206 => "8712", 848207 => "8713", 849242 => "8747", 850192 => "8501", 851172 => "8592", 852174 => "8594", 853165 => "8734", 854199 => "8745", 855200 => "8746", 85664 => "8773", 857182 => "8706", 858185 => "8800", 859162 => "8242", 860163 => "8804", 861179 => "8805", 862204 => "8834", 863205 => "8838", 864201 => "8835", 865203 => "8836", 866202 => "8839", 867208 => "8736", 868); 869 870# map certain font=symbol characters to words 871%symbolWord = ( 872176 => "degrees", 873188 => "1fourth", 874189 => "1half", 875190 => "3fourths", 876215 => "times", 877247 => "divided by", 878913 => "Alpha", 879914 => "Beta", 880915 => "Gamma", 881916 => "Delta", 882917 => "Epsilon", 883918 => "Zeta", 884919 => "Eta", 885920 => "Theta", 886921 => "Iota", 887922 => "Kappa", 888923 => "Lambda", 889924 => "Mu", 890925 => "Nu", 891926 => "Xi", 892927 => "Omicron", 893928 => "Pi", 894929 => "Rho", 895931 => "Sigma", 896932 => "Tau", 897933 => "Upsilon", 898934 => "Phi", 899935 => "Chi", 900936 => "Psi", 901937 => "Omega", 902945 => "alpha", 903946 => "beta", 904947 => "gamma", 905948 => "delta", 906949 => "epsilon", 907950 => "zeta", 908951 => "eta", 909952 => "theta", 910953 => "iota", 911954 => "kappa", 912955 => "lambda", 913956 => "mu", 914957 => "nu", 915958 => "xi", 916959 => "omicron", 917960 => "pi", 918961 => "rho", 919962 => "sigmaf", 920963 => "sigma", 921964 => "tau", 922965 => "upsilon", 923966 => "phi", 924967 => "chi", 925968 => "psi", 926969 => "omega", 9278177 => "+-", # kludge!! I made up 8177 9288242 => "prime", 9298501 => "aleph", 9308592 => "left arrow", 9318594 => "arrow", 9328660 => "double arrow", 9338706 => "d", 9348709 => "empty set", 9358711 => "del", 9368712 => "member of", 9378713 => "not a member of", 9388721 => "sum", 9398734 => "infinity", 9408736 => "angle", 9418745 => "intersect", 9428746 => "union", 9438747 => "integral", 9448773 => "congruent to", 9458800 => "not equal", 9468804 => "less equal", 9478805 => "greater equal", 9488834 => "proper subset of", 9498835 => "proper superset of", 9508836 => "not a subset of", 9518838 => "subset of", 9528839 => "superset of", 953); 954 955# Map an html meta character using the above hashes. 956# Usually run from within a global substitute. 957sub metaChar($) 958{ 959my $meta = shift; 960if($meta =~ /^#(\d+)$/) { 961return chr $1 if $1 <= 255; 962return "'" if $1 == 8217; 963return "\x82$1#" if $symbolWord{$1}; 964return "?"; 965} 966my $real = $charmap{$meta}; 967defined $real or $real = "?"; 968return $real; 969} # metaChar 970 971# Translate <font face=symbol>number</font>. 972# This is highly specific to my web pages - doesn't work in general! 973sub metaSymbol($) 974{ 975my $meta = shift; 976$meta =~ s/^&#//; 977$meta =~ s/;$//; 978my $real = $symbolmap{$meta}; 979return "?" unless $real; 980return "&#$real;"; 981} # metaSymbol 982 983# replace VAR with $VAR, as defined by the environment. 984sub envVar($) 985{ 986my $var = shift; 987my $newvar = $ENV{$var}; 988if(defined $newvar) { 989# There shouldn't be any whitespace at the front or back. 990stripWhite \$newvar; 991return $newvar if length $newvar; 992} 993length $errorMsg or 994$errorMsg = "environment variable $var not set"; 995return ""; 996} # envVar 997 998# Replace the variables in a line, using the above. 999sub envLine($) 1000{ 1001my $line = shift; 1002$errorMsg = ""; 1003# $errorMsg will be set if something goes wrong. 1004$line =~ s,^~/,\$HOME/,; 1005$line =~ s/\$([a-zA-Z]\w*)/envVar($1)/ge; 1006return $line; 1007} # envLine 1008 1009# The filename can be specified using environment variables, 1010# and shell meta characters such as *. 1011# But not if it's a url. 1012sub envFile($) 1013{ 1014my $filename = shift; 1015$errorMsg = ""; 1016if(! is_url($filename)) { 1017$filename = envLine($filename); 1018return if length $errorMsg; 1019my @filelist; 1020# This is real kludgy - I just don't understand how glob works. 1021if($filename =~ / / and $filename !~ /"/) { 1022@filelist = glob '"'.$filename.'"'; 1023} else { 1024@filelist = glob $filename; 1025} 1026$filelist[0] = $filename if $#filelist < 0; 1027$errorMsg = "wild card expansion produces multiple files" if $#filelist; 1028$filename = $filelist[0]; 1029} 1030return $filename; 1031} # envFile 1032 1033# Drop any active edit sessions that have no text, and no associated file. 1034# This housecleaning routine is run on every quit or backup command. 1035sub dropEmptyBuffers() 1036{ 1037foreach my $cx (0..$#factive) { 1038next if $cx == $context; 1039next unless $factive[$cx]; 1040next if length $fname[$cx]; 1041next if $dol[$cx]; 1042$factive[$cx] = undef; 1043} 1044} # dropEmptyBuffers 1045 1046# Several small functions to switch between contexts, i.e. editing sessions. 1047# In all these functions, we have to map between our context numbers, 1048# that start with 0, and the user's session numbers, that start with 1. 1049# C and fortran programmers will be use to this problem. 1050# Is a context different from the currently running context? 1051sub cxCompare($) 1052{ 1053my $cx = shift; 1054$errorMsg = "session 0 is invalid", return 0 if $cx < 0; 1055return 1 if $cx != $context; # ok 1056++$cx; 1057$errorMsg = "you are already in session $cx"; 1058return 0; 1059} # cxCompare 1060 1061# Is a context active? 1062sub cxActive($) 1063{ 1064my $cx = shift; 1065return 1 if $factive[$cx]; 1066++$cx; 1067$errorMsg = "session $cx is not active"; 1068return 0; 1069} # cxActive 1070 1071# Switch to another editing session. 1072# This assumes cxCompare has succeeded - we're moving to a different context. 1073# Pass the context number and an interactive flag. 1074sub cxSwitch($$) 1075{ 1076my ($cx, $ia) = @_; 1077# Put the variables in a known start state if this is a virgin session. 1078cxReset($cx, 0) if ! defined $factive[$cx]; 1079$dot[$context] = $dot, $dot = $dot[$cx]; 1080$dol[$context] = $dol, $dol = $dol[$cx]; 1081$fname[$context] = $fname, $fname = $fname[$cx]; 1082$dirname[$context] = $dirname, $dirname = $dirname[$cx]; 1083$map[$context] = $map, $map = $map[$cx]; 1084$labels[$context] = $labels, $labels = $labels[$cx]; 1085$btags = $btags[$cx]; 1086$backup[$context] = $backup, $backup = $backup[$cx]; 1087if(!$global_lhs_rhs) { 1088$savelhs[$context] = $savelhs, $savelhs = $savelhs[$cx]; 1089$saverhs[$context] = $saverhs, $saverhs = $saverhs[$cx]; 1090} 1091$fmode[$context] = $fmode, $fmode = $fmode[$cx]; 1092# But we don't replicate the last* variables per context, 1093# so your ability to undo is destroyed if you switch contexts. 1094$fmode &= ~$firstopmode; 1095if($ia) { 1096if(defined $factive[$cx]) { 1097print ((length($fname[$cx]) ? $fname[$cx] : "no file")."\n"); 1098} else { 1099print "new session\n"; 1100} 1101} 1102$factive[$cx] = 1; 1103$context = $cx; 1104return 1; 1105} # cxSwitch 1106 1107# Can we trash the data in a context? 1108# If so, trash it, and reset all the variables. 1109# The second parameter is a close directive. 1110# If nonzero, we clear out empty buffers associated with 1111# text areas in the fill-out forms (browse mode). 1112# A value of 1, as opposed to 2, means close down the entire session. 1113sub cxReset($$) 1114{ 1115my ($cx, $close) = @_; 1116 1117if(defined $factive[$cx]) { 1118# We might be trashing data, make sure that's ok. 1119$fname[$cx] = $fname, $fmode[$cx] = $fmode if $cx == $context; 1120if($fmode[$cx]&$changemode and 1121!( $fmode[$cx]&$dirmode) and 1122$lastq != $cx and 1123length $fname[$cx] and 1124! is_url($fname[$cx])) { 1125$errorMsg = "expecting `w'"; 1126$lastqq = $cx; 1127if($cx != $context) { 1128++$cx; 1129$errorMsg .= " on session $cx"; 1130} 1131return 0; 1132} # warning message 1133 1134if($close) { 1135dropEmptyBuffers(); 1136if($close&1) { 1137# And we're closing this session. 1138$factive[$cx] = undef; 1139$backup[$cx] = undef; 1140} 1141} 1142} # session was active 1143 1144# reset the variables 1145$dot[$cx] = $dol[$cx] = 0; 1146$map[$cx] = $lnspace; 1147$fname[$cx] = ""; 1148$dirname[$cx] = ""; 1149$labels[$cx] = $lnspace x 26; 1150$btags[$cx] = []; 1151$savelhs[$cx] = $saverhs[$cx] = undef; 1152$fmode[$cx] = 0; 1153if($cx == $context) { 1154$dot = $dol = 0; 1155$map = $map[$cx]; 1156$fname = ""; 1157$labels = $labels[$cx]; 1158$btags = $btags[$cx]; 1159$global_lhs_rhs or $savelhs = $saverhs = undef; 1160$fmode = 0; 1161} # current context 1162 1163return 1; 1164} # cxReset 1165 1166# Pack all the information about the current context into a hash. 1167# This will be pushed onto a virtual stack. 1168# When you enter the back key, it all gets unpacked again, 1169# to restore your session. 1170sub cxPack() 1171{ 1172my $h = { 1173dot =>$dot, dol => $dol, map => $map, labels => $labels, 1174lastdot =>$lastdot, lastdol => $lastdol, lastmap => $lastmap, lastlabels => $lastlabels, 1175fname => $fname, dirname => $dirname, 1176fmode => $fmode&~$changemode, 1177savelhs => $savelhs, saverhs => $saverhs, 1178btags => $btags, 1179}; 1180return $h; 1181} # cxPack 1182 1183sub cxUnpack($) 1184{ 1185my $h = shift; 1186return if ! defined $h; 1187$dot = $$h{dot}; 1188$lastdot = $$h{lastdot}; 1189$dol = $$h{dol}; 1190$lastdol = $$h{lastdol}; 1191$map = $$h{map}; 1192$lastmap = $$h{lastmap}; 1193$labels = $$h{labels}; 1194$lastlabels = $$h{lastlabels}; 1195$fmode = $$h{fmode}; 1196$fname = $$h{fname}; 1197$dirname = $$h{dirname}; 1198if(!$global_lhs_rhs) { 1199$savelhs = $$h{savelhs}; 1200$saverhs = $$h{saverhs}; 1201} 1202$btags[$context] = $btags = $$h{btags}; 1203} # cxUnpack 1204 1205# find an available session and load it with some initial data. 1206# Returns the context number. 1207sub cxCreate($$) 1208{ 1209my ($text_ptr, $filename) = @_; 1210# Look for an unused buffer 1211my ($cx, $j); 1212for($cx=0; $cx<=$#factive; ++$cx) { 1213last unless defined $factive[$cx]; 1214} 1215cxReset($cx, 0); 1216$factive[$cx] = 1; 1217$fname[$cx] = $filename; 1218my $bincount = $$text_ptr =~ y/\0\x80-\xff/\0\x80-\xff/; 1219if($bincount*4 - 10 < length $$text_ptr) { 1220# A text file - remove crlf in the dos world. 1221$$text_ptr =~ s/\r\n/\n/g if $doslike; 1222} else { 1223$fmode[$cx] |= $binmode; 1224} 1225$fmode[$cx] |= $nlmode unless $$text_ptr =~ s/\n$//; 1226$j = $#text; 1227if(length $$text_ptr) { 1228push @text, split "\n", $$text_ptr, -1; 1229} 1230if(!lineLimit(0)) { 1231my $newpiece = $lnspace; 1232++$dol[$cx], $newpiece .= sprintf($lnformat, $j) while ++$j <= $#text; 1233$map[$cx] = $newpiece; 1234$dot[$cx] = $dol[$cx]; 1235} else { 1236warn $errorMsg; 1237} 1238return $cx; 1239} # cxCreate 1240 1241# See if @text is too big. 1242# Pass the number of lines we will be adding. 1243sub lineLimit($) 1244{ 1245my $more = shift; 1246return 0 if $#text + $more <= $lnmax; 1247$errorMsg = "Your limit of 1 million lines has been reached.\nSave your files, then exit and restart this program."; 1248return 1; 1249} # lineLimit 1250 1251# Hide and reveal numbers that are internal to the line. 1252# These numbers indicate links and input fields, and are not displayed by the next routine. 1253sub hideNumber($) 1254{ 1255my $n = shift; 1256$n =~ y/0-9/\x85-\x8e/; 1257return $n; 1258} # hideNumber 1259 1260sub revealNumber($) 1261{ 1262my $n = shift; 1263$n =~ y/\x85-\x8f/0-9/; 1264return $n; 1265} # revealNumber 1266 1267sub removeHiddenNumbers($) 1268{ 1269my $t = shift; 1270$$t =~ s/\x80[\x85-\x8f]+([<>{])/$1/g; 1271$$t =~ s/\x80[\x85-\x8f]+\*//g; 1272} # removeHiddenNumbers 1273 1274# Small helper function to retrieve the text for line number n. 1275# If the second parameter is set, hidden numbers are left in place; 1276# otherwise they are stripped out via removeHiddenNumbers(). 1277sub fetchLine($$) 1278{ 1279my $n = shift; 1280my $show = shift; 1281return "" unless $n; # should never happen 1282my $t = $text[substr($map, $n*$lnwidth, $lnwidth1)]; 1283removeHiddenNumbers(\$t) if $show and $fmode&$browsemode; 1284return $t; 1285} # fetchLine 1286 1287# Here's the same function, but for another context. 1288sub fetchLineContext($$$) 1289{ 1290my $n = shift; 1291my $show = shift; 1292my $cx = shift; 1293$t = $text[substr($map[$cx], $n*$lnwidth, $lnwidth1)]; 1294removeHiddenNumbers(\$t) if $show and $fmode[$cx]&$browsemode; 1295return $t; 1296} # fetchLineContext 1297 1298# Print size of the text in buffer. 1299sub apparentSize() 1300{ 1301my $j = 0; 1302$j += length(fetchLine($_, 1)) + 1 foreach (1..$dol); 1303--$j if $fmode&$nlmode; 1304print "$j\n"; 1305} # apparentSize 1306 1307# Read a line from stdin. 1308# Could be a command, could be text going into the buffer. 1309sub readLine() 1310{ 1311my ($i, $j, $c, $d, $line); 1312getline: { 1313$intFlag = 0; 1314$do_input = 1; 1315$line = <STDIN>; 1316$do_input = 0; 1317redo getline if $intFlag and ! defined $line; # interrupt 1318$intFlag = 0; 1319} 1320exit 0 unless defined $line; # EOF 1321$line =~ s/\n$//; 1322# A bug in my keyboard causes nulls to be entered from time to time. 1323$line =~ s/\0/ /g; 1324return $line if $line !~ /~/; # shortcut 1325# We have to process it, character by character. 1326my $line2 = ""; 1327for($i=0; $i<length($line); $line2 .= $c, ++$i) { 1328$c = substr $line, $i, 1; 1329next if $c ne '~'; 1330next if $i == length($line) - 1; 1331$d = substr $line, $i+1, 1; 1332++$i, next if $d eq '~'; 1333next if $i == length($line) - 2; 1334$j = index $hexChars, $d; 1335next if $j < 0; 1336$j -= 6 if $j >= 16; 1337my $val = $j*16; 1338$d = substr $line, $i+2, 1; 1339$j = index $hexChars, $d; 1340next if $j < 0; 1341$j -= 6 if $j >= 16; 1342$val += $j; 1343# We don't use this mechanism to enter normal ascii characters. 1344next if $val >= 32 and $val < 127; 1345# And don't stick a newline in the middle of an entered line. 1346next if $val == 10; 1347$c = chr $val; 1348$i += 2; 1349} # loop over input chars 1350return $line2; 1351} # readLine 1352 1353# Read a block of lines into the buffer. 1354sub readLines() 1355{ 1356my $tbuf = ""; 1357# Put the pending line in first, if it's there. 1358my $line = $linePending; 1359$line = readLine() unless defined $line; 1360while($line ne ".") { 1361$tbuf .= "$line\n"; 1362$line = readLine(); 1363} # loop gathering input lines 1364return addTextToSession(\$tbuf) if length $tbuf; 1365$dot = $endRange; 1366$dot = 1 if $dot == 0 and $dol; 1367return 1; 1368} # readLines 1369 1370# Display a line. Show line number if $cmd is n. 1371# Expand binary characters if $cmd is l. 1372# Pass the line number. 1373sub dispLine($) 1374{ 1375my $ln = shift; 1376print "$ln " if $cmd eq 'n'; 1377my $line = fetchLine($ln, 1); 1378# Truncate, if the line is pathologically long. 1379$line = substr($line, 0, 500) . "..." if length($line) > 500; 1380print '^' if $endmarks and ($endmarks == 2 or $cmd eq 'l'); 1381if($cmd eq 'l') { 1382$line =~ y/\10\11/<>/; 1383$line =~ s/([\0-\x1f\x80-\xff])/sprintf("~%02x",ord($1))/ge; 1384} else { 1385# But we always remap return, null, and escape 1386$line =~ s/(\00|\r|\x1b)/sprintf("~%02x",ord($1))/ge; 1387} 1388print $line; 1389print dirSuffix($ln); 1390print '$' if $endmarks and ($endmarks == 2 or $cmd eq 'l'); 1391print "\n"; 1392} # dispLine 1393 1394# If we've printed a line in directory mode, and the entry isn't 1395# a regular file, we've got to find and print the special character at the end. 1396# / means directory, for example. 1397# This is used by the previous routine, among others. 1398sub dirSuffix($) 1399{ 1400my $ln = shift; 1401my $suf = ""; 1402if($fmode&$dirmode) { 1403$suf = substr($labels, $dirSufStart + 2*$ln, 2); 1404$suf =~ s/ +$//; 1405} 1406return $suf; 1407} # dirSuffix 1408 1409# Routines to help format a string, i.e. cut at sentence boundaries. 1410# This isn't real smart; it will happily split Mr. Flintstone. 1411sub appendWhiteSpace($$) 1412{ 1413my($chunk, $breakable) = @_; 1414my $nlc = $chunk =~ y/\n//d; # newline count 1415if($breakable) { 1416# Don't interrogate the last few characters of a huge string -- that's inefficient. 1417my $short = substr $refbuf, -2; 1418my $l = length $refbuf; 1419$lperiod = $colno, $idxperiod = $l if $short =~ /[.!?:][)"|}]?$/; 1420$lcomma = $colno, $idxcomma = $l if $short =~ /[-,;][)"|]?$/; 1421$lright = $colno, $idxright = $l if $short =~ /[)"|]$/; 1422$lany = $colno, $idxany = $l; 1423# Tack short fragment onto previous long line. 1424if($longcut and ($nlc or $lperiod == $colno) and $colno <= 14) { 1425substr($refbuf, $longcut, 1) = " "; 1426$chunk = "", $nlc = 1 unless $nlc; 1427} # pasting small fragment onto previous line 1428} # allowing line breaks 1429$nlc = 0 if $lspace == 3; 1430if($nlc) { 1431$nlc = 1 if $lspace == 2; 1432$refbuf .= "\n"; 1433$refbuf .= "\n" if $nlc > 1; 1434$colno = 1; 1435$longcut = $lperiod = $lcomma = $lright = $lany = 0; 1436$lspace = 3 if $lspace >= 2 or $nlc > 1; 1437$lspace = 2 if $lspace < 2; 1438} 1439$refbuf .= $chunk; 1440$lspace = 1 if length $chunk; 1441$colno += $chunk =~ y/ / /; 1442$colno += 4 * ($chunk =~ y/\t/\t/); 1443} # appendWhiteSpace 1444 1445sub appendPrintable($) 1446{ 1447my $chunk = shift; 1448$refbuf .= $chunk; 1449$colno += length $chunk; 1450$lspace = 0; 1451return if $colno <= $optimalLine; 1452# Oops, line is getting long. Let's see where we can cut it. 1453my ($i, $j) = (0, 0); 1454if($lperiod > $cutLineAfter) { $i = $lperiod, $j = $idxperiod; 1455} elsif($lcomma > $cutLineAfter) { $i = $lcomma, $j = $idxcomma; 1456} elsif($lright > $cutLineAfter) { $i = $lright, $j = $idxright; 1457} elsif($lany > $cutLineAfter) { $i = $lany, $j = $idxany; 1458} 1459return unless $j; # nothing we can do about it 1460$longcut = 0; 1461$longcut = $j if $i != $lperiod; 1462substr($refbuf, $j, 1) = "\n"; 1463$colno -= $i; 1464$lperiod -= $i; 1465$lcomma -= $i; 1466$lright -= $i; 1467$lany -= $i; 1468} # appendPrintable 1469 1470# Break up a line using the above routines. 1471sub breakLine($) 1472{ 1473my $t = shift; 1474my $ud = $$t =~ s/\r$//; 1475if($lspace eq "2l") { 1476$$t =~ s/^/\r/ if length $$t; 1477$lspace = 2; 1478} 1479$$t =~ s/^/\r/ if length $$t > $paraLine; 1480my $rc = $$t =~ y/\r/\n/; 1481$ud |= $$t =~ s/[ \t]+$//gm; 1482$ud |= $$t =~ s/([^ \t\n])[ \t]{2,}/$1 /g; 1483$ud |= $$t =~ s/([^ \t\n])\t/$1 /g; 1484$ud |= $$t =~ s/ +\t/\t/g; 1485$lspace = 2 if $lspace < 2; # should never happen 1486$lspace = 3 unless length $$t; 1487return $ud if ! $rc and length $$t < $optimalLine; 1488$rc |= $ud; 1489# The following 120 comes from $paraLine. 1490$$t =~ s/(\n.{120})/\n$1/g; 1491$$t =~ s/(.{120,}\n)/$1\n/g; 1492$refbuf = ""; 1493$colno = 1; 1494$longcut = $lperiod = $lcomma = $lright = $lany = 0; 1495while($$t =~ /(\s+|[^\s]+)/g) { 1496my $chunk = $1; 1497if($chunk =~ /\s/) { appendWhiteSpace($chunk, 1); } else { appendPrintable($chunk); } 1498} 1499if($lspace < 2) { # line didn't have a \r at the end 1500# We might want to paste the last word back on. 1501appendWhiteSpace("\n", 1); 1502chop $refbuf; 1503} 1504$rc = 1 if $refbuf =~ /\n/; 1505return 0 unless $rc; 1506$$t = $refbuf; 1507$lspace = "2l" if length $refbuf > $paraLine; 1508return 1; 1509} # breakLine 1510 1511# Check the syntax of a regular expression, before we pass it to perl. 1512# If perl doesn't like it, it dies, and you've lost your edits. 1513# The first char is the delimiter -- we stop at the next delimiter. 1514# The regexp, up to the second delimiter, is returned, 1515# along with the remainder of the string in the second return variable. 1516# return (regexp, remainder), or return () if there is a problem. 1517# As usual, $errorMsg will be set. 1518# Pass the line containing the regexp, and a flag indicating 1519# left or right side of a substitute. 1520sub regexpCheck($$) 1521{ 1522my ($line, $isleft) = @_; 1523my ($c, $d); 1524# We wouldn't be here if the line was empty. 1525my $delim = substr $line, 0, 1; 1526index($valid_delim, $delim) >= 0 or 1527$errorMsg = "invalid delimiter $delim", return (); 1528$line = substr $line, 1; # remove lead delimiter 1529# Remember whether a character is "on deck", ready to be modified by * etc. 1530my $ondeck = 0; 1531my $offdeck = ' '; 1532my $exp = ""; 1533my $cc = 0; # in character class 1534my $paren = 0; # nested parentheses 1535 1536while(length $line) { 1537$c = substr $line, 0, 1; 1538if($c eq '\\') { 1539$errorMsg = "line ends in backslash", return () if length($line) == 1; 1540$d = substr $line, 1, 1; 1541$ondeck = 1; 1542$offdeck = ' '; 1543# I can't think of any reason to remove the escape \ from any character, 1544# except ()|, where we reverse the sense of escape, 1545# and \& on the right, which becomes &. 1546if(index("()|", $d) >= 0 and ! $cc and $isleft) { 1547$ondeck = 0, ++$paren if $c eq '('; 1548--$paren if $c eq ')'; 1549$errorMsg = "Unexpected closing )", return () if $paren < 0; 1550$c = ''; 1551} 1552$c = '' if $d eq '&' and ! $isleft; 1553$exp .= "$c$d"; 1554$line = substr $line, 2; 1555next; 1556} # escape character 1557 1558# Break out if you've hit the delimiter 1559$paren or $c ne $delim or last; 1560 1561# Not the delimiter, I'll assume I can copy it over to $exp. 1562# But I have to watch out for slash, which is *my* delimiter. 1563$exp .= '\\' if $c eq '/'; 1564# Then there's ()|, which I am reversing the sense of escape. 1565$exp .= '\\' if index("()|", $c) >= 0 and $isleft; 1566# Sometimes $ is interpolated when I don't want it to be. 1567# Even if there is no alphanumeric following, a bare $ seems to cause trouble. 1568# Escape it, unless followed by delimiter, or digit (rhs). 1569if($c eq '$') { 1570$exp .= '\\' if $isleft and 1571length($line) > 1 and substr($line, 1, 1) ne $delim; 1572$exp .= '\\' if ! $isleft and 1573$line !~ /^\$\d/; 1574} 1575if($c eq '^') { 1576$exp .= '\\' if $isleft and $cc != length $exp; 1577} 1578# And we have to escape every @, to avoid interpolation. 1579# Good thing we don't have to escape %, 1580# or it might mess up our % remembered rhs logic. 1581$exp .= '\\' if $c eq '@'; 1582# Turn & into $& 1583$exp .= '$' if $c eq '&' and ! $isleft; 1584# Finally push the character. 1585$exp .= $c; 1586$line = substr $line, 1; 1587 1588# Are there any syntax checks I need to make on the rhs? 1589# I don't think so. 1590next if ! $isleft; 1591 1592if($cc) { # character class 1593# All that matters here is the ] 1594$cc = 0 if $c eq ']'; 1595next; 1596} 1597 1598# Modifiers must have a preceding character. 1599# Except ? which can reduce the greediness of the others. 1600if($c eq '?' and $offdeck ne '?') { 1601$ondeck = 0; 1602$offdeck = '?'; 1603next; 1604} 1605 1606if(index("?+*", $c) >= 0 or 1607$c eq '{' and $line =~ s/^(\d+,?\d*})//) { 1608my $mod = ( $c eq '{' ? "{$1" : $c); 1609$errorMsg = "$mod modifier has no preceding character", return () if ! $ondeck; 1610$ondeck = 0; 1611$offdeck = $c; 1612$exp .= "$1" if $c eq '{'; 1613next; 1614} # modifier 1615 1616$ondeck = 1; 1617$offdeck = ' '; 1618$cc = length $exp if $c eq '['; 1619} # loop over chars in the pattern 1620 1621$cc == 0 or 1622$errorMsg = "no closing ]", return (); 1623$paren == 0 or 1624$errorMsg = "no closing )", return (); 1625if(! length $exp and $isleft) { 1626$exp = $savelhs; 1627$errorMsg = "no remembered search string", return () if ! defined $exp; 1628} 1629$savelhs = $exp if $isleft; 1630if(! $isleft) { 1631if($exp eq '%') { 1632$exp = $saverhs; 1633$errorMsg = "no remembered replacement string", return () if ! defined $exp; 1634} elsif($exp eq '\\%') { 1635$exp = '%'; 1636} 1637$saverhs = $exp; 1638} # rhs 1639 1640return ($exp, $line); 1641} # regexpCheck 1642 1643# Get the start or end of a range. 1644# Pass the line containing the address. 1645sub getRangePart($) 1646{ 1647my $line = shift; 1648my $ln = $dot; 1649if($line =~ s/^(\d+)//) { 1650$ln = $1; 1651} elsif($line =~ s/^\.//) { 1652# $ln is already set to dot 1653} elsif($line =~ s/^\$//) { 1654$ln = $dol; 1655} elsif($line =~ s/^'([a-z])//) { 1656$ln = substr $labels, (ord($1) - ord('a'))*$lnwidth, $lnwidth; 1657$errorMsg = "label $1 not set", return () if $ln eq $lnspace; 1658} elsif($line =~ m:^([/?]):) { 1659$errorMsg = "search string not found", return () if $dot == 0; 1660my $delim = $1; 1661my @pieces = regexpCheck($line, 1); 1662return () if $#pieces < 0; 1663my $exp = $pieces[0]; 1664$line = $pieces[1]; 1665my $icase = ""; # case independent 1666$icase = "i" if $caseInsensitive; 1667if($delim eq substr $line, 0, 1) { 1668$line = substr $line, 1; 1669if('i' eq substr $line, 0, 1) { 1670$line = substr $line, 1; 1671$icase = 'i'; 1672} 1673} 1674my $incr = ($delim eq '/' ? 1 : -1); 1675# Recompile the regexp after each command, but don't compile it on every line. 1676# Is there a better way to do this, besides using eval? 1677my $notfound = 0; 1678eval ' 1679while(1) { 1680$ln += $incr; 1681$ln = 1 if $ln > $dol; 1682$ln = $dol if $ln == 0; 1683last if fetchLine($ln, 1) =~ ' . 1684"/$exp/o$icase; " . 1685'$notfound = 1, last if $ln == $dot; 1686} # looking for match 1687'; # end evaluated string 1688$errorMsg = "search string not found", return () if $notfound; 1689} # search pattern 1690# Now add or subtract from this base line number 1691while($line =~ s/^([+-])(\d*)//) { 1692my $add = ($2 eq "" ? 1 : $2); 1693$ln += ($1 eq '+' ? $add : -$add); 1694} 1695$errorMsg = "line number too large", return () 1696if $ln > $dol; 1697$errorMsg = "negative line number", return () 1698if $ln < 0; 1699return ($ln, $line); 1700} # getRangePart 1701 1702# Read the data as a string from a url. 1703# Data is retrieved using http, https, or ftp. 1704# Parameters: url, post data, result buffer. 1705# You can return 0 (failure) and leave text and the buffer, 1706# and I'll report the error, and still assimilate the buffer. 1707sub readUrl($$$) 1708{ 1709my ($filename, $post, $tbuf) = @_; 1710my $rc = 1; # return code, success 1711$lfsz = 0; # local file size 1712my $rsize = 0; # size read 1713my $weburl; 1714my $scheme; 1715my $encoding = ""; 1716my $pagetype = ""; 1717my %url_desc = (); # Description of the current URL 1718 1719# I don't know if we need a full url encode or what?? 1720# This is a major kludge! I just don't understand this. 1721$filename =~ s/ /%20/g; 1722$filename =~ s/[\t\r\n]//g; 1723# I don't know what http://foo@this.that.com/file.htm means, 1724# but I see it all the time. 1725$filename =~ s,^http://[^/]*@,http://,i; 1726 1727$$tbuf = ""; # start with a clear buffer 1728$errorMsg = "too many nested frames", return 0 unless $rerouteCount; 1729--$rerouteCount; 1730 1731# split into machine, file, and post parameters 1732separate: { 1733my $oldname = $filename; # remember where we started 1734my $authinfo = ""; # login password for web sites that return error 401 1735 1736$scheme = is_url $filename; # scheme could have changed 1737$weburl = 0; 1738$weburl = 1 if $scheme =~ /^https?$/; 1739if(!length $post and $filename =~ s/^(.*?)(\?.*)$/$1/ ) { 1740$post = $2; 1741} 1742# $post should be url encoded, but sometimes it's not, and I don't know why. 1743$post =~ y/ /+/; 1744my $postfilename = ""; 1745# We assume $post starts with ? or *, if it is present at all. 1746my $meth = "GET"; 1747my $postapplic = ""; 1748if(substr($post, 0, 1) eq '*') { 1749$meth = "POST"; 1750} else { 1751$postfilename = $post; 1752} 1753print "$meth: $post\n" if $debug >= 2; 1754 1755$filename =~ s,^$scheme://,,i; 1756my $serverPort = 80; 1757$serverPort = 443 if $scheme eq 'https'; 1758$serverPort = 21 if $scheme eq 'ftp'; 1759$serverPort = 23 if $scheme eq 'telnet'; 1760my $serverPortString = ""; 1761my $server = $filename; 1762$server =~ s,/.*,,; 1763# Sometimes we need to do this -- got me hanging! 1764$server =~ s/%([0-9a-fA-F]{2})/chr hex "$1"/ge; 1765if($server =~ s/:(\d+)$//) { 1766$serverPort = $1; 1767} 1768# If a server is on port 443, assume it speaks SSL. 1769# This is a real bastardization of the html standard, 1770# but it's the explorer standard. Need I say more? 1771$scheme = 'https' if$serverPort == 443; 1772$serverPortString = ":$serverPort" if $serverPort != 80; 1773$filename =~ s,^[^/]*,,; 1774 1775# Lots of http servers can't handle /./ or /../ or // 1776$filename =~ s:/{2,}:/:g; 1777# Oops, put internal http:// back the way it was. 1778# The bug is caused by a line like this. 1779# <form method=post action=server/file?this=that&return=http://someOtherServer/blah> 1780# Because it's post, the get parameters after the ? are still here. 1781# And I just turned http:// into http:/ 1782# This is very rare, but it happened to me, so I'm trying to fix it. 1783$filename =~ s,http:/,http://,gi; 1784$filename =~ s,ftp:/,ftp://,gi; 1785$filename =~ s:^/(\.{1,2}/)+:/:; 1786$filename =~ s:/(\./)+:/:g; 17871 while $filename =~ s:/[^/]+/\.\./:/:; 1788$filename =~ s:^/(\.\./)+:/:; 1789 1790# Ok, create some more variables so we either fetch this file 1791# or convert it if it's pdf. 1792# Too bad I did all this work, and the pdf converter doesn't work for crap. 1793# Probably because pdf is irreparably inaccessible. 1794# Thanks a lot adobe! 1795my $go_server = $server; 1796my $go_port = $serverPort; 1797my $go_portString = $serverPortString; 1798my $go_file = $filename; 1799my $go_post = $post; 1800my $go_postfilename = $postfilename; 1801my $go_meth = $meth; 1802 1803if($filename =~ /\.pdf$/ and $pdf_convert) { 1804($meth eq "GET" and $scheme eq "http") or 1805$errorMsg = "online conversion from pdf to html only works when the pdf file is accessed via the http get method\ntype pr to download pdf in raw mode", return 0; 1806$go_server="access.adobe.com"; 1807$go_port = 80; 1808$go_portString = ""; 1809$go_file = "/perl/convertPDF.pl"; 1810# It would be simpler if this bloody form wer get, but it's post. 1811$go_meth = "POST"; 1812$go_post = "http://$server$serverPortString$filename$postfilename"; 1813$go_post = "*submit=submit&url=" . urlEncode($go_post); 1814$go_postfilename = ""; 1815} # redirecting to adobe to convert pdf 1816 1817if($go_meth eq "POST") { 1818$postapplic = 1819"Pragma: no-cache$eol" . 1820"Cache-Control: no-cache$eol" . 1821"Content-Type: application/x-www-form-urlencoded$eol" . 1822"Content-Length: " . (length($go_post)-1) . $eol; 1823} 1824 1825my $newname = ""; 1826 1827$authAttempt = 0; 1828makeconnect: { 1829my $chunk; 1830$lfsz = 0; 1831$$tbuf = ""; 1832$go_file = "/" if ! length $go_file; 1833%url_desc = (SCHEME => $scheme, SERVER => $go_server, PORT => $go_port, PATH => $go_file, method => $go_meth); 1834$url_desc{content} = substr($go_post, 1) if length $go_post; # Kinda silly. 1835# If you're using digest authentication with the POST method, 1836# the content needs to be digestified. 1837# This is for message integrity checking, when that option is used. 1838# Consider completely replacing $go_x variables with elements of the %url_desc 1839# hash? There is massive redundancy here. 1840my $domainCookies = ""; 1841$domainCookies = fetchCookies(\%url_desc) if $allowCookies; # Grab the cookies. 1842my $send_server = # Send this to the http server - maybe via SSL 1843"$go_meth $go_file$go_postfilename HTTP/1.0$eol" . 1844# Do we need $go_portString here??? 1845# If I put it in, paypal doesn't work. 1846"Host: $go_server$eol" . 1847(length $referer ? "Referer: $referer$eol" : "") . 1848$domainCookies . 1849$authinfo . 1850"Accept: text/*, audio/*, image/*, application/*, message/*$eol" . 1851"Accept: audio-file, postscript-file, mail-file, default, */*;q=0.01$eol" . 1852"Accept-Encoding: gzip, compress$eol" . 1853"Accept-Language: en$eol" . 1854 "User-Agent: $agent$eol" . 1855$postapplic . 1856$eol; # blank line at the end 1857 1858# send data after if post method 1859$send_server .= substr($go_post, 1) if $go_meth eq "POST"; 1860 1861if($debug >= 4) { 1862my $temp_server = $send_server; 1863$temp_server =~ y/\r//d; 1864print $temp_server; 1865} 1866 1867if($scheme eq 'http') { 1868# Connect to the http server. 1869my $iaddr = inet_aton($go_server) or 1870$errorMsg = "cannot identify $go_server on the network", return 0; 1871my $paddr = sockaddr_in($go_port, $iaddr); 1872my $proto = getprotobyname('tcp'); 1873socket(FH, PF_INET, SOCK_STREAM, $proto) or 1874$errorMsg = "cannot allocate a socket", return 0; 1875connect(FH, $paddr) or 1876$errorMsg = "cannot connect to $go_server", return 0; 1877FH->autoflush(1); 1878 1879print FH $send_server; # Send the HTTP request message 1880 1881# Now retrieve the page and update the user after every 100K of data. 1882my $last_fk = 0; 1883STDOUT->autoflush(1) if ! $doslike; 1884while(defined($rsize = sysread FH, $chunk, 100000)) { 1885print "sockread $rsize\n" if $debug >= 5; 1886$$tbuf .= $chunk; 1887$lfsz += $rsize; 1888last if $rsize == 0; 1889my $fk = int($lfsz/100000); 1890if($fk > $last_fk) { 1891print "."; 1892$last_fk = $fk; 1893} 1894last if $lfsz >= $maxfile; 1895} 1896 close FH; 1897print "\n" if $last_fk; 1898STDOUT->autoflush(0) if ! $doslike; 1899$lfsz <= $maxfile or 1900$errorMsg = "file is too large, limit 40MB", return 0; 1901defined $rsize or 1902$$tbuf = "", $errorMsg = "error reading data from the socket", return 0; 1903 1904} elsif ($scheme eq 'https') { 1905$lfsz = do_ssl($go_server, $go_port, $send_server, $tbuf); 1906Net::SSLeay::free($ssl) if defined $ssl; 1907Net::SSLeay::CTX_free($ctx) if defined $ctx; 1908return 0 unless $lfsz; 1909 1910} elsif ($scheme eq 'ftp') { 1911$lfsz = ftp_connect($go_server, $go_port, $go_file, $tbuf); 1912return 0 unless $lfsz; 1913 1914} elsif ($scheme eq "telnet") { 1915if($go_server =~ s/^([^:@]*):([^:@]*)@//) { 1916print "This URL gives a suggested username of $1 and password of $2\n" . 1917"to be used with the telnet connection you are about to establish.\n"; 1918# See RFC 1738, section 3.8. The username and password in a telnet URL 1919# are advisory. There is no standard method of logging into telnet services. 1920# I guess this is especially useful for public services, which offer guest accounts and such. 1921} 1922print "Starting telnet.\n"; 1923system("telnet $go_server $go_port"); 1924return 1; 1925 1926} else { 1927$errorMsg = "this browser cannot access $scheme URLs.", return 0; 1928} 1929 1930# We got the web page. 1931# But it might be a redirection to another url. 1932if($weburl and $$tbuf =~ /^http\/[\d.]+ 30[12]/i) { 1933if($$tbuf =~ /\nlocation:[ \t]+(.*[^\s])[ \t\r]*\n/i) { 1934$newname = $1; 1935print "relocate $newname\n" if $debug >= 2; 1936}} 1937 1938if($rc and 1939! length $newname and 1940# Some web sites serve up pages with no headers at all! 1941# aspace.whizy.com/forum/ultimate.cgi 1942$$tbuf =~ /^http/i and 1943$$tbuf =~ /^http\/[\d.]+ 404 /i) { 1944$errorMsg = "file not found on the remote server"; 1945$rc = 0; 1946} # not found 1947 1948# there is yet another way to redirect to a url 1949if($rc and $$tbuf =~ /<meta +http-equiv=["']?refresh[^<>]*(url=|\d+;)['"]?([^'">\s]+)/i) { 1950$newname = $2; 1951print "refresh $newname\n" if $debug >= 2; 1952# This is almost always an absolute url, even without the http prefix, 1953# but sometimes it's relative. Got me hanging! 1954# Here's a looser criterion for web url. 1955if($newname =~ /^[\w,-]+\.[\w,-]+\.[\w,-]/) { 1956$newname = "http://$newname"; 1957} 1958} 1959 1960# Extract information from the http header - primarily cookies. 1961$encoding = $pagetype = ""; 1962if($$tbuf =~ s/^(http\/\d.*?\r?\n\r?\n)//si) { 1963my $header = $1; 1964my @lines = split "\n", $header; 1965open BFH, ">>$ebhttp"; 1966if(defined BFH) { 1967print BFH $header; 1968close BFH; 1969} 1970$authinfo = ""; 1971while(my $hline = shift @lines) { 1972$hline =~ s/\r$//; 1973print "$hline\n" if $debug >= 4; 1974setCookies($hline, \%url_desc) if $hline =~ /^Set-Cookie:/i and $allowCookies; 1975$authinfo = parseWWWAuth($hline, \%url_desc) if $hline =~ /^WWW-Authenticate/i; 1976return 0 if $authinfo eq "x"; 1977# I shouldn't really discard things like charset=blablabla, 1978# but I don't really know what to do with it anyways. 1979$hline =~s/;.*//; 1980$encoding = lc $1 if $hline =~ /^content-encoding:\s+['"]?(\w+)['"]?\s*$/i; 1981$pagetype = lc $1 if $hline =~ /^content-type:\s+['"]?([^\s'"]+)['"]?\s*$/i; 1982} # loop over lines 1983++$authAttempt, redo makeconnect if length $authinfo; 1984} else { # http header extracted 1985if($scheme =~ /^https?$/) { 1986$errorMsg = "http response doesn't have a head-body structure"; 1987$rc = 0; 1988} else { 1989# For now, this means ftp. 1990# We could have retrieved an html page via ftp, but probably not. 1991# Turn off browse command. 1992$cmd = 'e' unless $$tbuf =~ /^<[hH!]/; 1993} 1994} 1995} # makeconnect 1996 1997# cookies that are set via http-equiv 1998# The content of the cookie must be quoted. 1999while($$tbuf =~ /<meta +http-equiv=["']?set-cookie['" ]+content="([^"]*)"/gi) { 2000setCookies($1, \%url_desc); 2001} 2002while($$tbuf =~ /<meta +http-equiv=["']?set-cookie['" ]+content='([^']*)'/gi) { 2003setCookies($1, \%url_desc); 2004} 2005 2006if($rc and $reroute and length $newname) { 2007$newname = resolveUrl("$scheme://$server$serverPortString$filename", $newname); 2008print "becomes $newname\n" if $debug >= 2; 2009if($newname ne $oldname) { 2010# It's not really diferent if one has :80 and the other doesn't. 2011# I wouldn't code this up if it didn't really happen. See www.claritin.com 2012$oldname =~ s,^HTTP://,http://,; 2013$oldname =~ s,^(http://)?([^/]*):80/,$1$2/,; 2014$oldname =~ s,^(http://)?([^/]*):80$,$1$2,; 2015$newname =~ s,^HTTP://,http://,; 2016$newname =~ s,^(http://)?([^/]*):80/,$1$2/,; 2017$newname =~ s,^(http://)?([^/]*):80$,$1$2,; 2018if($oldname ne $newname) { 2019if(--$rerouteCount) { 2020print "$newname\n" if $debug >= 1; 2021# Post method becomes get after redirection, I think. 2022# $post = "" if length $post and $newname =~ /\?[^\/]*$/; 2023$post = ""; 2024$filename = $newname; 2025redo separate; 2026} 2027$errorMsg = "too many url redirections"; 2028$rc = 0; 2029}}} # automatic url redirection 2030 2031$changeFname = "$scheme://$server$serverPortString$filename$postfilename"; 2032} # separate 2033 2034# Check for complressed data. 2035if($rc and $lfsz and length $encoding and $pagetype =~ /^text/i) { 2036print "$lfsz\ndecoding $encoding\n" if $debug >= 2; 2037my $program = ""; 2038my $csuf = ""; # compression suffix 2039$program = "zcat", $csuf = "gz" if $encoding eq "gzip"; 2040$program = "zcat", $csuf = "Z" if $encoding eq "compress"; 2041length $program or 2042$errorMsg = "unrecognized compression method", return 0; 2043$cfn = "$ebtmp.$csuf"; # compressed file name 2044open FH, ">$cfn" or 2045$errorMsg = "cannot create temp file $cfn", return 0; 2046binmode FH, ':raw' if $doslike; 2047print FH $$tbuf or 2048$errorMsg = "cannot write to temp file $cfn", return 0; 2049close FH; 2050unlink $ebtmp; 2051if(! system "$program $ebtmp.$csuf >$ebtmp 2>/dev/null") { 2052# There are web pages out there that are improperly compressed. 2053# We'll call it good if we got any data at all. 2054$errorMsg = "could not uncompress the data", return 0 unless (stat($ebtmp))[7]; 2055} 2056 2057# Read in the uncompressed data. 2058$$tbuf = ""; 2059open FH, $ebtmp or 2060$errorMsg = "cannot open the uncompressed file $ebtmp", return 0; 2061$lfsz = (stat(FH))[7]; 2062$lfsz <= $maxfile or 2063$errorMsg = "uncompressed file is too large, limit 40MB", close FH, return 0; 2064binmode FH, ':raw' if $doslike; 2065$rsize = sysread FH, $$tbuf, $lfsz; 2066close FH; 2067$rsize and $rsize == $lfsz or 2068$errorMsg = "cannot read the uncompressed data from $ebtmp", return 0; 2069unlink $ebtmp; 2070} # compressed data 2071 2072if($rc and $fetchFrames) { 2073$errorMsg = ""; 2074# This really isn't right - to do this here I mean. 2075# If a line of javascript happens to contain a frame tag 2076# I'm going to fetch that frame and put it in right here. 2077# Hopefully that won't happen. 2078# Note that the entire frame tag must be on one line. 2079$$tbuf =~ s/(<i?frame\b[^<>\0\x80-\xff]+>)/readFrame($1)/gei; 2080$rc = 0 if length $errorMsg; 2081} # looking for frames 2082 2083return $rc; 2084} # readUrl 2085 2086# Read a frame. 2087sub readFrame($) 2088{ 2089my $tag = shift; 2090my $saveFname = $changeFname; 2091my($tc, $fbuf, $src, $name); 2092 2093$tag =~ s/\bsrc *= */src=/gi; 2094$tag =~ s/\bname *= */name=/gi; 2095$tc = $tag; 2096if($tc =~ s/^.*\bsrc=//s) { 2097$src = $tc; 2098$src =~ s/ .*//s; 2099$src =~ s/^['"]//; 2100$src =~ s/['"]?>?$//; 2101if(length $src) { 2102print "fetch frame $src\n" if $debug >= 1; 2103$src = resolveUrl($saveFname, $src); 2104if($didFrame{$src}) { 2105print "already fetched\n" if $debug >= 2; 2106$changeFname = $saveFname; 2107return ""; 2108} 2109$didFrame{$src} = 1; 2110print "* $src\n" if $debug >= 1; 2111 2112$name = ""; 2113$tc = $tag; 2114if($tc =~ s/^.*\bname=//s) { 2115$tc =~ s/ .*//s; 2116$tc =~ s/^['"]//; 2117$tc =~ s/['"]?>?$//; 2118$name = urlDecode $tc if length $tc; 2119} # name attribute 2120 2121if(readUrl($src, "", \$fbuf)) { 2122# Replace the tag with the data, and some stuff prepended. 2123$name = " $name" if length $name; 2124$tag = "<H2> Frame$name: </H2>\n<base href=" . 2125urlEncode($changeFname) . ">\n"; 2126$changeFname = $saveFname; 2127return $tag.$fbuf; 2128} # frame read successfully 2129}} # src attribute present 2130 2131$changeFname = $saveFname; 2132return $tag; 2133} # readFrame 2134 2135# Adjust the map of line numbers -- we have inserted text. 2136# Also shift the downstream labels. 2137# Pass the string containing the new line numbers, and the dest line number. 2138sub addToMap($$) 2139{ 2140my ($newpiece, $dln) = @_; 2141my $offset = length($newpiece)/$lnwidth; 2142$offset > 0 or 2143die "negative offset in addToMap"; 2144my ($i, $j); 2145foreach $i (0..25) { 2146my $ln = substr($labels, $i*$lnwidth, $lnwidth); # line number 2147next if $ln eq $lnspace or $ln <= $dln; 2148substr($labels, $i*$lnwidth, $lnwidth) = 2149sprintf($lnformat, $ln + $offset); 2150} # loop over 26 labels 2151$j = ($dln+1) * $lnwidth; 2152substr($map, $j, 0) = $newpiece; 2153$dot = $dln + $offset; 2154$dol += $offset; 2155$fmode |= $changemode|$firstopmode; 2156$ubackup = 1; 2157} # addToMap 2158 2159# Fold in the text buffer (parameter) at $endRange (global variable). 2160# Assumes the text has the last newline on it. 2161sub addTextToSession($) 2162{ 2163my $tbuf = shift; # text buffer 2164return 1 unless length $$tbuf; 2165$fmode &= ~$nlmode if $endRange == $dol; 2166if(not $$tbuf =~ s/\n$// and 2167$endRange == $dol) { 2168$fmode |= $nlmode; 2169print "no trailing newline\n" if ! ($fmode&$binmode) and $cmd ne 'b'; 2170} # missing newline 2171my $j = $#text; 2172my $newpiece = ""; 2173# At this point $tbuf could be empty, whence split doesn't work properly. 2174# This only happens when reading a file containing one blank line. 2175if(length $$tbuf) { 2176push @text, split "\n", $$tbuf, -1; 2177} else { 2178push @text, ""; 2179} 2180$#text = $j, return 0 if lineLimit 0; 2181$newpiece .= sprintf($lnformat, $j) while ++$j <= $#text; 2182addToMap($newpiece, $endRange); 2183return 1; 2184} # addTextToSession 2185 2186# Read a file into memory. 2187# As described earlier, the lines are appended to @text. 2188# Then the indexes for those lines are pasted into $map, 2189# using addToMap(). 2190# Check to see if the data is binary, and set $fmode accordingly. 2191# Parameters are the filename or URL, and the post data (for URLs). 2192sub readFile($$) 2193{ 2194my ($filename, $post) = @_; 2195my $tbuf; # text buffer 2196my $rc = 1; # return code, success 2197$filesize = 0; 2198my $rsize = 0; # size read 2199my $j; 2200 2201if(is_url $filename) { 2202$rerouteCount = 24; 2203%didFrame = (); 2204$rc = readUrl($filename, $post, \$tbuf); 2205$filesize = length $tbuf; 2206return 0 unless $rc + $filesize; 2207} else { # url or file 2208 2209open FH, "<$filename" or 2210$errorMsg = "cannot open $filename, $!", return 0; 2211 2212# Check for directory here 2213if(-d FH) { 2214close FH; 2215$j = $filename; 2216$j =~ s,/$,,; 2217$j .= "/*"; 2218my @dirlist; 2219if($j =~ / /) { 2220@dirlist = glob '"'.$j.'"'; 2221} else { 2222@dirlist = glob $j; 2223} 2224if($#dirlist < 0) { 2225$dot = $endRange; 2226$filesize = 0; 2227return $rc; 2228} # empty directory 2229$dirname = $j; 2230$dirname =~ s/..$//; # get rid of /* 2231return 0 if lineLimit($#dirlist + 1); 2232$filesize = 0; 2233$tbuf = ""; 2234$j = $dirSufStart; 2235substr($labels, $j, 2) = " "; 2236foreach (@dirlist) { 2237my $entry = $_; 2238$entry =~ s,.*/,,; # leave only the file 2239$entry =~ s/\n/\t/g; 2240my $suf = ""; 2241$suf .= '@' if -l; 2242if(! -f) { 2243$suf .= '/' if -d; 2244$suf .= '|' if -p; 2245$suf .= '*' if -b; 2246$suf .= '<' if -c; 2247$suf .= '^' if -S; 2248} # not a regular file 2249$filesize += length($entry) + length($suf) + 1; 2250if($dol) { 2251$entry .= $suf; 2252} else { 2253$suf .= " "; 2254$j += 2; 2255substr($labels, $j, 2) = substr($suf, 0, 2); 2256} 2257$tbuf .= "$entry\n"; 2258} 2259$dol or $fmode = $dirmode, print "directory mode\n"; 2260return addTextToSession(\$tbuf); 2261} # directory 2262 2263-f FH or $errorMsg = "$filename is not a regular file", close FH, return 0; 2264$filesize = (stat(FH))[7]; 2265if(! $filesize) { 2266close FH; 2267$dot = $endRange; 2268$filesize = 0; 2269return $rc; 2270} # empty file 2271$filesize <= $maxfile or 2272$errorMsg = "file is too large, limit 40MB", close FH, return 0; 2273binmode FH, ':raw' if $doslike; 2274$rsize = sysread(FH, $tbuf, $filesize) if $filesize; 2275close FH; 2276$rsize == $filesize or 2277$errorMsg = "cannot read the contents of $filename,$!", return 0; 2278} # reading url or regular file 2279 2280my $bincount = $tbuf =~ y/\0\x80-\xff/\0\x80-\xff/; 2281if($bincount*4 - 10 < $filesize) { 2282# A text file - remove crlf in the dos world. 2283$tbuf =~ s/\r\n/\n/g if $doslike; 2284} elsif(! ($fmode&$binmode)) { 2285# If it wasn't before, it is now a binary file. 2286print "binary data\n"; 2287$fmode |= $binmode; 2288} 2289 2290$rc &= addTextToSession(\$tbuf); 2291return $rc; 2292} # readFile 2293 2294# Write a range into a file. 2295# Pass the mode and filename. 2296sub writeFile($$) 2297{ 2298my ($mode, $filename) = @_; 2299$errorMsg = "cannot write to a url", return 0 if is_url($filename); 2300$dol or $errorMsg = "writing an empty file", return 0; 2301open FH, "$mode$filename" or 2302$errorMsg = "cannot create $filename, $!", return 0; 2303$filesize = 0; 2304binmode FH, ':raw' if $doslike and $fmode&$binmode; 2305if($startRange) { 2306foreach my $i ($startRange..$endRange) { 2307my $nl = ($fmode&$nlmode && $i == $dol ? "" : "\n"); 2308my $suf = dirSuffix($i); 2309my $outline = fetchLine($i, 1).$suf.$nl; 2310print FH $outline or 2311$errorMsg = "cannot write to $filename, $!", close FH, return 0; 2312$filesize += length $outline; 2313} # loop over range 2314} # nonempty file 2315close FH; 2316# This is not an undoable operation, nor does it change data. 2317# In fact the data is "no longer modified" if we have written all of it. 2318$fmode &= ~$changemode if $dol == 0 or $startRange == 1 and $endRange == $dol; 2319return 1; 2320} # writeFile 2321 2322# Read from another context. 2323# Pass the context number. 2324sub readContext($) 2325{ 2326my $cx = shift; 2327cxCompare($cx) and cxActive($cx) or return 0; 2328my $dolcx = $dol[$cx]; 2329$filesize = 0; 2330if($dolcx) { 2331return 0 if lineLimit $dolcx; 2332$fmode &= ~$nlmode if $endRange == $dol; 2333my $newpiece = ""; 2334foreach my $i (1..$dolcx) { 2335my $inline = fetchLineContext($i, 1, $cx); 2336my $suf = ""; 2337if($fmode[$cx] & $dirmode) { 2338$suf = substr($labels[$cx], $dirSufStart + 2*$i, 2); 2339$suf =~ s/ +$//; 2340} 2341$inline .= $suf; 2342push @text, $inline; 2343$newpiece .= sprintf $lnformat, $#text; 2344$filesize += length($inline) + 1; 2345} # end loop copying lines 2346addToMap($newpiece, $endRange); 2347if($fmode[$cx]&$nlmode) { 2348--$filesize; 2349$fmode |= $nlmode if $endRange == $dol; 2350} 2351$fmode |= $binmode, print "binary data\n" 2352if $fmode[$cx]&$binmode and ! ($fmode&$binmode); 2353} # nonempty buffer 2354return 1; 2355} # readContext 2356 2357# Write to another context. 2358# Pass the context number. 2359sub writeContext($) 2360{ 2361my $cx = shift; 2362my $dolcx = $endRange - $startRange + 1; 2363$dolcx = 0 if ! $startRange; 2364return 0 if ! cxCompare($cx) or !cxReset($cx, 1) or lineLimit $dolcx; 2365my $mapcx = $lnspace; 2366$filesize = 0; 2367if($startRange) { 2368foreach my $i ($startRange..$endRange) { 2369$outline = fetchLine($i, 0); 2370$outline .= dirSuffix($i); 2371push @text, $outline; 2372$mapcx .= sprintf $lnformat, $#text; 2373$filesize += length($outline) + 1; 2374} # end loop copying lines 2375$fmode[$cx] = $fmode & ($binmode|$browsemode); 2376$fmode[$cx] |= $nlmode, --$filesize 2377if $fmode&$nlmode and $endRange == $dol; 2378} # nonempty file 2379$map[$cx] = $mapcx; 2380$dot[$cx] = $dol[$cx] = $dolcx; 2381$factive[$cx] = 1; 2382$fname[$cx] = ""; 2383$btags[$cx] = $btags; 2384return 1; 2385} # writeContext 2386 2387# Move or copy a block of text. 2388sub moveCopy() 2389{ 2390$dest++; # more convenient 2391$endr1 = $endRange+1; # more convenient 2392$dest <= $startRange or 2393$dest >= $endr1 or 2394$errorMsg = "destination lies inside the block to be moved or copied", return 0; 2395if($cmd eq 'm' and 2396($dest == $endr1 or $dest == $startRange)) { 2397$errorMsg = "no change" if ! $inglob; 2398return 0; 2399} 2400my $starti = $startRange*$lnwidth; 2401my $endi = $endr1*$lnwidth; 2402my $desti = $dest * $lnwidth; 2403my $offset = $endr1 - $startRange; 2404my ($i, $j); 2405# The section of the map that represents the range. 2406my $piece_r = substr $map, $starti, $endi-$starti; 2407my $piece_n = ""; # the new line numbers, if the text is copied. 2408if($cmd eq 't') { 2409return 0 if lineLimit $offset; 2410for($j=0; $j<length($piece_r); $j+=$lnwidth) { 2411push @text, 2412$text[substr($piece_r, $j, $lnwidth1)]; 2413$piece_n .= sprintf $lnformat, $#text; 2414} 2415substr($map, $desti, 0) = $piece_n; 2416} elsif($dest < $startRange) { 2417substr($map, $starti, $endi-$starti) = ""; 2418substr($map, $desti, 0) = $piece_r; 2419} else { 2420substr($map, $desti, 0) = $piece_r; 2421substr($map, $starti, $endi-$starti) = ""; 2422} 2423if($fmode&$nlmode) { 2424$fmode &= ~$nlmode if $dest > $dol; 2425$fmode &= ~$nlmode if $endRange == $dol and $cmd eq 'm'; 2426} 2427# Now for the labels 2428my ($lowcut, $highcut, $p2len); 2429if($dest <= $startRange) { 2430$lowcut = $dest; 2431$highcut = $endr1; 2432$p2len = $startRange - $dest; 2433} else { 2434$lowcut = $startRange; 2435$highcut = $dest; 2436$p2len = $dest - $endr1; 2437} 2438foreach $i (0..25) { 2439my $ln = substr($labels, $i*$lnwidth, $lnwidth); # line number 2440next if $ln eq $lnspace or $ln < $lowcut; 2441if($ln >= $highcut) { 2442$ln += $offset if $cmd eq 't'; 2443} elsif($ln >= $startRange and $ln <= $endRange) { 2444$ln += ($dest < $startRange ? -$p2len : $p2len) if $cmd eq 'm'; 2445$ln += $offset if $cmd eq 't' and $dest < $startRange; 2446} elsif($dest < $startRange) { 2447$ln += $offset; 2448} else { 2449$ln -= $offset if $cmd eq 'm'; 2450} 2451substr($labels, $i*$lnwidth, $lnwidth) = sprintf $lnformat, $ln; 2452} # loop over labels 2453$dol += $offset if $cmd eq 't'; 2454$dot = $endRange; 2455$dot += ($dest < $startRange ? -$p2len : $p2len) if $cmd eq 'm'; 2456$dot = $dest + $offset - 1 if $cmd eq 't'; 2457$fmode |= $changemode|$firstopmode; 2458$ubackup = 1; 2459return 1; 2460} # moveCopy 2461 2462# Delete a block of text. 2463# Pass the range to delete. 2464sub delText($$) 2465{ 2466my ($sr, $er) = @_; # local start and end range 2467my ($i, $j); 2468$fmode &= ~$nlmode if $er == $dol; 2469$j = $er - $sr + 1; 2470substr($map, $sr*$lnwidth, $j*$lnwidth) = ""; 2471# Move the labels. 2472foreach $i (0..25) { 2473my $ln = substr($labels, $i*$lnwidth, $lnwidth); # line number 2474next if $ln eq $lnspace or $ln < $sr; 2475substr($labels, $i*$lnwidth, $lnwidth) = 2476($ln <= $er ? $lnspace : (sprintf $lnformat, $ln - $j)); 2477} # loop over labels 2478$dol -= $j; 2479$dot = $sr; 2480--$dot if $dot > $dol; 2481$fmode |= $changemode|$firstopmode; 2482$ubackup = 1; 2483return 1; 2484} # delText 2485 2486# Delete files from a directory as you delete lines. 2487# It actually moves them to your trash bin. 2488sub delFiles() 2489{ 2490$dw or $errorMsg = "directories are readonly, type dw to enable directory writes", return 0; 2491$dw == 2 or length $rbin or 2492$errorMsg = "could not create .trash under your home directory, to hold the deleted files", return 0; 2493my $ln = $startRange; 2494my $cnt = $endRange - $startRange + 1; 2495while($cnt--) { 2496my $f = fetchLine($ln, 0); 2497if($dw == 2 or dirSuffix($ln) =~ /^@/) { 2498unlink "$dirname/$f" or 2499$errorMsg = "could not remove $f, $!", return 0; 2500} else { 2501rename "$dirname/$f", "$rbin/$f" or 2502$errorMsg = "Could not move $f to the trash bin, $!, set dx mode to actually remove the file", return 0; 2503} 2504delText($ln, $ln); 2505substr($labels, $dirSufStart + 2*$ln, 2) = ""; 2506} 2507return 1; 2508} # delFiles 2509 2510# Join lines from startRange to endRange. 2511sub joinText() 2512{ 2513$errorMsg = "cannot join one line", return 0 if $startRange == $endRange; 2514return 0 if lineLimit 1; 2515my ($i, $line); 2516$line = ""; 2517foreach $i ($startRange..$endRange) { 2518$line .= ' ' if $cmd eq 'J' and $i > $startRange; 2519$line .= fetchLine($i, 0); 2520} 2521push @text, $line; 2522substr($map, $startRange*$lnwidth, $lnwidth) = sprintf $lnformat, $#text; 2523delText($startRange+1, $endRange); 2524$dot = $startRange; 2525return 1; 2526} # joinText 2527 2528# Substitute text on the lines in $startRange through $endRange. 2529# We could be changing the text in an input field. 2530# If so, we'll call infReplace(). 2531# Also, we might be indirectory mode, whence we must rename the file. 2532sub substituteText($) 2533{ 2534my $line = shift; 2535my $whichlink = ""; 2536$whichlink = $1 if $line =~ s/^(\d+)//; 2537length $line or 2538$errorMsg = "no regular expression after $icmd", return -1; 2539if($fmode&$dirmode) { 2540$dw or $errorMsg = "directories are readonly, type dw to enable directory writes", return -1; 2541} 2542my ($i, $j, $exp, $rhs, $qrhs, $lastSubst, @pieces, $blmode); 2543 2544if($line ne "bl") { 2545$blmode = 0; 2546@pieces = regexpCheck($line, 1); 2547return -1 if $#pieces < 0; 2548$exp = $pieces[0]; 2549$line = $pieces[1]; 2550length $line or $errorMsg = "missing delimiter", return -1; 2551@pieces = regexpCheck($line, 0); 2552return -1 if $#pieces < 0; 2553$rhs = $pieces[0]; 2554$line = $pieces[1]; 2555} else { $blmode = 1, $lspace = 3; } 2556 2557my $gflag = ""; 2558my $nflag = 0; 2559my $iflag = ""; 2560$iflag = "i" if $caseInsensitive; 2561$subprint = 1; # default is to print the last line substituted 2562$lastSubst = 0; 2563 2564if(! $blmode) { 2565if(length $line) { 2566$subprint = 0; 2567# necessarily starts with the delimiter 2568substr($line, 0, 1) = ""; 2569while(length $line) { 2570$gflag = 'g', next if $line =~ s/^g//; 2571$subprint = 2, next if $line =~ s/^p//; 2572$iflag = 'i', next if $line =~ s/^i//; 2573if($line =~ s/^(\d+)//) { 2574! $nflag or $errorMsg = "multiple numbers after the third delimiter", return -1; 2575$nflag = $1; 2576$nflag > 0 and $nflag <= 999 or 2577$errorMsg = "numeric suffix out of range, please use [1-999]", return -1; 2578next; 2579} # number 2580$errorMsg = "unexpected substitution suffix after the third delimiter"; 2581return -1; 2582} # loop gathering suffix flags 2583! $gflag or ! $nflag or 2584$errorMsg = "cannot use both a numeric suffix and the `g' suffix simultaneously", return -1; 2585# s/x/y/1 is very inefficient. 2586$nflag = 0 if $nflag == 1; 2587} # closing delimiter 2588 2589$qrhs = $rhs; # quote-fixed right hand side 2590if($rhs =~ /^[ul]c$/) { 2591$qrhs = "$qrhs \$&"; 2592$iflag .= 'e' if !$nflag; 2593} elsif($rhs eq "ue") { 2594$qrhs = "unescape \$&"; 2595$iflag .= 'e' if !$nflag; 2596} elsif($rhs eq "mc") { 2597$qrhs = "mixCase \$&"; 2598$iflag .= 'e' if !$nflag; 2599} else { 2600if($nflag) { 2601$qrhs =~ s/"/\\"/g; 2602$qrhs = '"'.$qrhs.'"'; 2603} 2604} 2605 2606# I don't understand it, but $&[x] means something to perl. 2607# So when I replace j with &[x], becomeing $&[x], it blows up. 2608# Thus I escape open brackets and braces in the rhs. 2609# Hopefully you won't escape them on the command line - you have no reason to. 2610# If you do they'll be doubly escaped, and that's bad. 2611$qrhs =~ s/([\[{])/\\$1/g; # } 2612} else { 2613$subprint = 0; 2614} # blmode or not 2615 2616# Substitute the input fields first. 2617if($cmd eq 'I') { 2618my $yesdot = 0; 2619my $foundFields = 0; 2620foreach $i ($startRange..$endRange) { 2621my $rc = infIndex($i, $whichlink); 2622next unless $rc; 2623$foundFields = 1; 2624$rc > 0 or $dot = $i, $inglob = 0, return -1; 2625my $newinf = $inf; 2626if(!$nflag) { 2627eval '$rc = $newinf =~ ' . 2628"s/$exp/$qrhs/$iflag$gflag; "; 2629} else { 2630$j = 0; 2631eval '$newinf =~ ' . 2632"s/$exp/++\$j == $nflag ? $qrhs : \$&/ge$iflag; "; 2633$rc = ($j >= $nflag); 2634} 2635next unless $rc; 2636$dot = $i; 2637infReplace($newinf) or return -1; 2638$yesdot = $dot; 2639} # loop over lines 2640if(! $yesdot) { 2641if(!$inglob) { 2642$errorMsg = "no match" if $foundFields; 2643} 2644return 0; 2645} 2646dispLine($yesdot) if $subprint == 2 or ! $inglob and $subprint == 1; 2647return 1; 2648} # input fields 2649 2650# Not an input field, just text, so replace it. 2651# Once again, use the eval construct. 2652# This time we might be substituting across an entire range. 2653@pieces = (); 2654$errorMsg = ""; 2655eval ' 2656for($i=$startRange; $i<=$endRange; ++$i) { 2657my $temp = fetchLine($i, 0);' . 2658($blmode ? 'my $subst = breakLine(\$temp);' : 2659(!$nflag ? 2660'my $subst = $temp =~ ' . 2661"s/$exp/$qrhs/o$iflag$gflag; " 2662: 2663'my $subst = 0; 2664my $k = 0; 2665$temp =~ ' . 2666"s/$exp/++\$k == $nflag ? $qrhs : \$&/oge$iflag; " . 2667'$subst = ($k >= $nflag); ' 2668)) . 2669'next unless $subst; 2670if($fmode&$dirmode) { 2671if($temp =~ m,[/\n],) { 2672$errorMsg = "cannot embed slash or newline in a directory name"; 2673$inglob = 0; 2674last; 2675} 2676my $dest = "$dirname/$temp"; 2677my $src = fetchLine($i, 0); 2678$src = "$dirname/$src"; 2679if($src ne $dest) { 2680if(-e $dest or -l $dest) { 2681$errorMsg = "destination file already exists"; 2682$inglob = 0; 2683last; 2684} 2685rename $src, $dest or 2686$errorMsg = "cannot move file to $temp", $inglob = 0, last; 2687} # source and dest are different 2688} # directory 2689@pieces = split "\n", $temp, -1; 2690@pieces = ("") if $temp eq ""; 2691last if lineLimit $#pieces+1; 2692$j = $#text; 2693push @text, @pieces; 2694@pieces = (); 2695substr($map, $i*$lnwidth, $lnwidth) = sprintf $lnformat, ++$j; 2696if($j < $#text) { 2697my $newpiece = ""; 2698$newpiece .= sprintf $lnformat, $j while ++$j <= $#text; 2699addToMap($newpiece, $i); 2700$j = length($newpiece) / $lnwidth; 2701$endRange += $j; 2702$i += $j; 2703} 2704dispLine($i) if $subprint == 2; 2705$lastSubst = $i; 2706$fmode |= $changemode|$firstopmode; 2707$ubackup = 1; 2708last if $intFlag; 2709} 2710'; # eval string 2711return 0 if length $errorMsg; 2712if(! $lastSubst) { 2713$errorMsg = ($blmode ? "no change" : "no match") if ! $inglob; 2714return 0; 2715} 2716$dot = $lastSubst; 2717dispLine($dot) if $subprint == 1 and ! $inglob; 2718if($intFlag and ! $inglob) { 2719$errorMsg = $intMsg, return 0; 2720} 2721return 1; 2722} # substituteText 2723 2724# Follow a hyperlink to another web page. 2725sub hyperlink($) 2726{ 2727my $whichlink = shift; 2728$cmd = 'b'; 2729$errorMsg = "cannot use the g$whichlink command in directory mode", return 0 if $fmode&$dirmode; 2730$startRange == $endRange or 2731$errorMsg = "go command does not expect a range", return 0; 2732 2733my $h; # hyperlink tag 2734my @links = (); # links on this line 2735my @bref = (); # baseref values 2736my ($j, $line, $href); 2737 2738if($fmode&$browsemode) { 2739$line = fetchLine $endRange, 0; 2740while($line =~ /\x80([\x85-\x8f]+){/g) { 2741$j = revealNumber $1; 2742$h = $$btags[$j]; 2743$href = $$h{href}; 2744$errorMsg = "hyperlink found without a url?? internal error", return 0 unless defined $href; 2745push @links, $href; 2746push @bref, $$h{bref}; 2747} # loop 2748} # browse mode 2749 2750if($#links < 0) { 2751$line = fetchLine $endRange, 1; 2752stripWhite \$line; 2753$line =~ s/[\s"']+/ /g; 2754if(length $line) { 2755while($line =~ /([^ ]+)/g) { 2756$href = $1; 2757$href =~ s/^[^\w]+//; 2758$href =~ s/[^\w]+$//; 2759if(is_url $href) { 2760push @links, $href; 2761} else { 2762$href =~ s/^mailto://i; 2763push @links, "mailto:$href" if $href =~ /^[\w.,-]+@[\w,-]+\.[\w,.-]+$/; 2764} 2765} 2766} # loop over words 2767} # looking for url in text mode 2768 2769$j = $#links + 1; 2770$j or $errorMsg = "no links present", return 0; 2771length $whichlink or $j == 1 or 2772$errorMsg = "multiple links, please use g [1,$j]", return 0; 2773$whichlink = 1 if ! length $whichlink; 2774if($whichlink == 0 or $whichlink > $j) { 2775$errorMsg = $j > 1 ? 2776"invalid link, please use g [1,$j]" : 2777"this line only has one link"; 2778return 0; 2779} 2780--$whichlink; 2781$href = $links[$whichlink]; 2782if($href =~ s/^mailto://i) { 2783$cmd = 'e'; 2784return 1, "\x80mail\x80$href"; 2785} # mailto 2786$href =~ /^javascript:/i and 2787$errorMsg = "sorry, this link calls a javascript function", return 0; 2788return 1, $href if $href =~ /^#/; 2789$line = resolveUrl(($#bref >= 0 ? $bref[$whichlink] : ""), $href); 2790print "* $line\n"; 2791return 1, $line; 2792} # hyperlink 2793 2794# Follow an internal link to a section of the document. 2795sub findSection($) 2796{ 2797my $section = shift; 2798foreach my $i (1..$dol) { 2799my $t = fetchLine $i, 0; 2800while($t =~ /\x80([\x85-\x8f]+)\*/g) { 2801my $j = revealNumber $1; 2802my $h = $$btags[$j]; 2803return $i if $$h{name} eq $section; 2804} 2805} 2806return 0; 2807} # findSection 2808 2809# Return the number of unbalanced punctuation marks at the start and end of the line. 2810sub unbalanced($$$) 2811{ 2812my ($c, $d, $ln) = @_; 2813my $curline = fetchLine($ln, 1); 2814# Escape these characters, so we know they are literal. 2815$c = "\\$c"; 2816$d = "\\$d"; 2817while($curline =~ s/$c[^$c$d]*$d//) { ; } 2818my $forward = $curline =~ s/$c//g; 2819$forward = 0 if $forward eq ""; 2820my $backward = $curline =~ s/$d//g; 2821$backward = 0 if $backward eq ""; 2822return $backward, $forward; 2823} # unbalanced 2824 2825# Find the line that balances the unbalanced punctuation. 2826sub balanceLine($) 2827{ 2828my $line = shift; 2829my ($c, $d); # balancing characters 2830my $openlist = "{([<`"; 2831my $closelist = "})]>'"; 2832my $alllist = "{}()[]<>`'"; 2833my $level = 0; 2834my ($i, $direction, $forward, $backward); 2835 2836if(length $line) { 2837$line =~ /^[\{\}\(\)\[\]<>`']$/ or 2838$errorMsg = "you must specify exactly one of $alllist after the B command", return 0; 2839$c = $line; 2840if(index($openlist, $c) >= 0) { 2841$d = substr $closelist, index($openlist, $c), 1; 2842$direction = 1; 2843} else { 2844$d = $c; 2845$c = substr $openlist, index($closelist, $d), 1; 2846$direction = -1; 2847} 2848($backward, $forward) = unbalanced($c, $d, $endRange); 2849if($direction > 0) { 2850($level = $forward) or 2851$errorMsg = "line does not contain an open $c", return 0; 2852} else { 2853($level = $backward) or 2854$errorMsg = "line does not contain an open $d", return 0; 2855} 2856} else { # character specified by the user or not? 2857# Look for anything unbalanced, probably a brace. 2858foreach $i (0..2) { 2859$c = substr $openlist, $i, 1; 2860$d = substr $closelist, $i, 1; 2861($backward, $forward) = unbalanced($c, $d, $endRange); 2862! $backward or ! $forward or 2863$errorMsg = "both $c and $d are unbalanced on this line, try B$c or B$d", return 0; 2864($level = $backward + $forward) or next; 2865$direction = 1; 2866$direction = -1 if $backward; 2867last; 2868} 2869$level or 2870$errorMsg = "line does not contain an unbalanced brace, parenthesis, or bracket", return 0; 2871} # explicit character passed in, or look for one 2872 2873my $selected = ($direction > 0) ? $c : $d; 2874 2875# Now search for the balancing line. 2876$i = $endRange; 2877while(($i += $direction) > 0 and $i <= $dol) { 2878($backward, $forward) = unbalanced($c, $d, $i); 2879if($direction > 0 and $backward >= $level or 2880$direction < 0 and $forward >= $level) { 2881$dot = $i; 2882dispLine($dot); 2883return 1; 2884} 2885$level += ($forward-$backward) * $direction; 2886} # loop over lines 2887 2888$errorMsg = "cannot find the line that balances $selected"; 2889return 0; 2890} # balanceLine 2891 2892# Apply a regular expression to each line, and then execute 2893# a command for each matching, or nonmatching, line. 2894# This is the global feature, g/re/p, which gives us the word grep. 2895sub doGlobal($) 2896{ 2897my $line = shift; 2898my ($i, $j, $exp, @pieces); 2899 2900length $line or 2901$errorMsg = "no regular expression after $icmd", return 0; 2902@pieces = regexpCheck($line, 1); 2903return 0 if $#pieces < 0; 2904$exp = $pieces[0]; 2905$line = $pieces[1]; 2906length $line or 2907$errorMsg = "missing delimiter", return 0; 2908$line =~ s/^.(i?)\s*//; 2909my $iflag = $1; 2910$iflag = "i" if $caseInsensitive; 2911 2912# Clean up any previous stars. 2913substr($map, $_*$lnwidth+$lnwidth1, 1) = ' ' foreach (1.. $dol); 2914 2915# Find the lines that match the pattern. 2916my $gcnt = 0; # global count 2917eval ' 2918for($i=$startRange, $j=$i*$lnwidth+$lnwidth1; $i<=$endRange; ++$i, $j+=$lnwidth) { 2919substr($map, $j, 1) = "*", ++$gcnt if 2920fetchLine($i, 1)' . 2921($cmd eq 'g' ? ' =~ ' : ' !~ ') . 2922"/$exp/o$iflag; }"; 2923$gcnt or $errorMsg = 2924($cmd eq 'g' ? "no lines match the g pattern" : "all lines match the v pattern"), 2925return 0; 2926 2927# Now apply $line to every line with a * 2928$inglob = 1; 2929$errorMsg = ""; 2930$line = 'p' if ! length $line; 2931my $origdot = $dot; 2932my $yesdot = 0; 2933my $nodot = 0; 2934my $stars = 1; 2935global:while($gcnt and $stars) { 2936$stars = 0; 2937for($i=1; $i<=$dol; ++$i) { 2938last global if $intFlag; 2939next unless substr($map, $i*$lnwidth+$lnwidth1, 1) eq '*'; 2940$stars = 1,--$gcnt; 2941substr($map, $i*$lnwidth+$lnwidth1, 1) = ' '; 2942$dot = $i; # ready to run the command 2943if(evaluate($line)) { 2944$yesdot = $dot; 2945--$i if $ubackup; # try this line again, in case we deleted or moved it 2946} else { 2947# Subcommand might turn global flag off. 2948$nodot = $dot, $yesdot = 0, last global if ! $inglob; 2949} 2950} 2951} 2952$inglob = 0; 2953# yesdot could be 0, even upon success, if all lines are deleted via g/re/d 2954if($yesdot or ! $dol) { 2955$dot = $yesdot; 2956dispLine($dot) if ($cmd eq 's' or $cmd eq 'I') and $subprint == 1; 2957} elsif($nodot) { 2958$dot = $nodot; 2959} else { 2960$dot = $origdot; 2961$errorMsg = "none of the marked lines were successfully modified" if $errorMsg eq ""; 2962} 2963$errorMsg = $intMsg if $errorMsg eq "" and $intFlag; 2964return ! length $errorMsg; 2965} # doGlobal 2966 2967# Reveal the links to other web pages, or the email links. 2968sub showLinks() 2969{ 2970my ($i, $j, $h, $href, $line); 2971my $addrtext = ""; 2972if($fmode&$browsemode) { 2973$line = fetchLine $endRange, 0; 2974while($line =~ /\x80([\x85-\x8f]+){(.*?)}/g) { 2975$j = revealNumber $1; 2976$i = $2; 2977$h = $$btags[$j]; 2978$href = $$h{href}; 2979$href = "" unless defined $href; 2980if($href =~ s/^mailto://i) { 2981$addrtext .= "$i:$href\n"; 2982} else { 2983$href = resolveUrl($$h{bref}, $href); 2984$addrtext .= "<A HREF=$href>\n$i\n</A>\n"; 2985} 2986} # loop 2987} # browse mode 2988if(! length $addrtext) { 2989length $fname or $errorMsg = "no file name", return 0; 2990if(is_url($fname)) { 2991$href = $fname; 2992$href =~ s/\.browse$//; 2993$j = $href; 2994$j =~ s,^https?://,,i; 2995$j =~ s,.*/,,; 2996$addrtext = "<A HREF=$href>\n$j\n</A>\n"; 2997} else { 2998$addrtext = $fname."\n"; 2999} 3000} 3001$addrtext =~ s/\n$//; 3002$j = $#text; 3003push @text, split "\n", $addrtext, -1; 3004$#text = $j, return 0 if lineLimit 0; 3005$h = cxPack(); 3006cxReset($context, 0) or return 0; 3007$$h{backup} = $backup if defined $backup; 3008$backup = $h; 3009print((length($addrtext)+1)."\n"); 3010$dot = $dol = $#text - $j; 3011my $newpiece = $lnspace; 3012$newpiece .= sprintf($lnformat, $j) while ++$j <= $#text; 3013$map = $newpiece; 3014return 1; 3015} # showLinks 3016 3017# All other editors let you stack and undo hundreds of operations. 3018# If I'm writing a new editor, why don't I do that? 3019# I don't know; guess I don't have the time. 3020# And in my 20 years experience, I have rarely felt the need 3021# to undo several operations. 3022# I'm usually undoing the last typo, and that's it. 3023# So I allow you to undo the last operation, only. 3024# Get ready for a possible undo command. 3025sub readyUndo() 3026{ 3027return if $fmode & $dirmode; 3028$savedot = $dot, $savedol = $dol; 3029$savemap = $map, $savelabels = $labels; 3030} # readyUndo 3031 3032sub goUndo() 3033{ 3034# swap, so we can undo our undo. I do this alot. 3035my $temp; 3036$temp = $ dot, $dot = $lastdot, $lastdot = $temp; 3037$temp = $ dol, $dol = $lastdol, $lastdol = $temp; 3038$temp = $ map, $map = $lastmap, $lastmap = $temp; 3039$temp = $ labels, $labels = $lastlabels, $lastlabels = $temp; 3040} # goUndo 3041 3042# Replace labels with their lines in shell escapes. 3043sub expandLabeledLine($) 3044{ 3045my$x = shift; 3046my $n = ord($x) - ord('a'); 3047my $ln = substr $labels, $n*$lnwidth, $lnwidth; 3048$ln ne $lnspace or 3049$errorMsg = "label $x not set", return ""; 3050return fetchLine($ln, 1); 3051} # expandLabeledLine 3052 3053# Run a shell escape 3054sub shellEscape($) 3055{ 3056my $line = shift; 3057# Expand 'a through 'z labels 3058$errorMsg = ""; 3059$line =~ s/\B'([a-z])\b/expandLabeledLine($1)/ge; 3060return 0 if length $errorMsg; 3061$line =~ s/'_/$fname/g; 3062$line =~ s/'\./fetchLine($dot,1)/ge; 3063if($doslike) { 3064# Just run system and hope for the best. 3065system $line; 3066} else { 3067# Unix has a concept of shells. 3068my $shell = $ENV{SHELL}; 3069$shell = "/bin/sh" if ! defined $shell; 3070if(length $line) { 3071system $shell, "-c", $line; 3072} else { 3073system $shell; 3074} 3075} # dos or unix 3076print "ok\n"; 3077return 1; 3078} # shellEscape 3079 3080# Implement various two letter commands. 3081# Most of these set and clear modes. 3082sub twoLetter($) 3083{ 3084my $line = shift; 3085my ($i, $j); 3086 3087if($line eq "qt") { exit 0; } 3088 3089if($line =~ s/^cd\s+// or $line =~ s/^cd$//) { 3090$cmd = 'e'; # so error messages are printed 3091if(length $line) { 3092my $temppath = `pwd`; 3093chomp $temppath; 3094if($line eq "-") { 3095$errorMsg = "you have no previous directory", return 0 unless defined $oldpath; 3096chdir $oldpath or $errorMsg = "cannot change to previous directory $oldpath", return 0; 3097} else { 3098$line = envFile($line); 3099return 0 if length $errorMsg; 3100chdir $line or $errorMsg = "invalid directory", return 0; 3101} 3102$oldpath = $temppath; 3103} 3104print `pwd`; 3105return 1; 3106} 3107 3108if($line eq "rf") { 3109$cmd = 'e'; 3110if($fmode & $browsemode) { 3111$cmd = 'b'; 3112$fname =~ s/.browse$//; 3113} 3114length $fname or $errorMsg = "no file name", return 0; 3115$nostack = 1; 3116return -1, "$cmd $fname"; 3117} 3118 3119if($line eq "et") { 3120$cmd = 'e'; 3121$fmode&$browsemode or 3122$errorMsg = $nobrowse, return 0; 3123foreach $i (1..$dol) { 3124$text[substr($map, $i*$lnwidth, $lnwidth1)] = fetchLine($i,1); 3125} 3126$fmode &= ~($browsemode|$firstopmode|$changemode); 3127$btags = []; # don't need those any more. 3128print "editing as pure text\n" if $helpall; 3129return 1; 3130} 3131 3132if($line eq "ub") { 3133$fmode&$browsemode or 3134$errorMsg = $nobrowse, return 0; 3135dropEmptyBuffers(); 3136# Backing out. 3137$map = $$btags[0]{map1}; 3138$fname = $$btags[0]{fname}; 3139$fmode = $$btags[0]{fmode}; 3140$labels = $$btags[0]{labels}; 3141$dot = $$btags[0]{dot}; 3142$dol = $$btags[0]{dol1}; 3143apparentSize(); 3144return 1; 3145} # reverse browse 3146 3147if($line eq "f/" or $line eq "w/") { 3148$i = $fname; 3149$i =~ s,.*/,, or 3150$errorMsg = "filename does not contain a slash", return 0; 3151print "$i\n" if $helpall; 3152substr($line, 1, 1) = " $i"; 3153return -1, $line; 3154} 3155 3156if($line =~ /^f[dkt]$/) { 3157$fmode&$browsemode or 3158$errorMsg = $nobrowse, return 0; 3159my $key = "title"; 3160$key = "keywords" if $line eq "fk"; 3161$key = "description" if $line eq "fd"; 3162my $val = $$btags[0]{$key}; 3163if(defined $val) { 3164print "$val\n"; 3165} else { 3166print "no $key\n"; 3167} 3168return 1; 3169} 3170 3171if($line =~ /^sm(\d*)$/) { 3172 $cmd = 'e'; 3173$smMail = $1; 3174$altattach = 0; 3175$j = sendMailCurrent(); 3176$j and print "ok\n"; 3177return $j; 3178} 3179 3180# simple commands 3181if($line eq "sg") { $global_lhs_rhs = 1; print "substitutions global\n" if $helpall; return 1; } 3182if($line eq "sl") { $global_lhs_rhs = 0; print "substitutions local\n" if $helpall; return 1; } 3183if($line eq "ci") { $caseInsensitive = 1; print "case insensitive\n" if $helpall; return 1; } 3184if($line eq "cs") { $caseInsensitive = 0; print "case sensitive\n" if $helpall; return 1; } 3185if($line eq "dr") { $dw = 0; print "directories readonly\n" if $helpall; return 1; } 3186if($line eq "dw") { $dw = 1; print "directories writable\n" if $helpall; return 1; } 3187if($line eq "dx") { $dw = 2; print "directories writable with delete\n" if $helpall; return 1; } 3188if($line eq "dp") { $delprint ^= 1; print ($delprint ? "delete print\n" : "delete quiet\n"); return 1; } 3189if($line eq "rh") { $reroute ^= 1; print ($reroute ? "redirect html\n" : "do not redirect html\n"); return 1; } 3190if($line eq "pm") { $passive ^= 1; print ($passive ? "passive ftp\n" : "active ftp\n"); return 1; } 3191if($line eq "ph") { $pdf_convert ^= 1; print ($pdf_convert ? "pdf to html conversion\n" : "pdf raw\n"); return 1; } 3192if($line eq "vs") { $ssl_verify ^= 1; print ($ssl_verify ? "verify ssl connections\n" : "do not verify ssl connections (less secure)\n"); return 1; } 3193if($line eq "ac") { $allowCookies ^= 1; print ($allowCookies ? "accept cookies\n" : "reject cookies\n"); return 1; } 3194if($line eq "sr") { $allowReferer ^= 1; print ($allowReferer ? "send refering web page\n" : "don't send refering web page\n"); return 1; } 3195if($line =~ s/^db *//) { 3196if($line =~ /^\d$/) { 3197$debug = $line, return 1; 3198} else { 3199$errorMsg = "please set debug level, 0 through 7", return 0; 3200} 3201} 3202if($line =~ s/^ua *//) { 3203if($line =~ /^\d+$/) { 3204$errorMsg = "Agent number $line is not defined", return 0 if ! defined$agents[$line]; 3205$agent = $agents[$line], return 1; 3206} else { 3207$errorMsg = "please set user agent, 0 through ".$#agents, return 0; 3208} 3209} # ua number 3210if($line eq "ff") { $fetchFrames ^= 1; print ($fetchFrames ? "fetch frames\n" : "do not fetch frames\n"); return 1; } 3211if($line eq "tn") { $textAreaCR ^= 1; print ($textAreaCR ? "dos newlines on text areas\n" : "unix newlines on text areas\n"); return 1; } 3212if($line eq "eo") { $endmarks = 0; print "end markers off\n" if $helpall; return 1; } 3213if($line eq "el") { $endmarks = 1; print "end markers list\n" if $helpall; return 1; } 3214if($line eq "ep") { $endmarks = 2; print "end markers on\n" if $helpall; return 1; } 3215return -1,"^".length($1) if $line =~ /^(\^+)$/; 3216return stripChild() if $line eq "ws"; 3217return unstripChild() if $line eq "us"; 3218 3219return -1, $line; # no change 3220} # twoLetter 3221 3222# Evaluate the entered command. 3223# This is indirectly recursive, as in g/z/ s/x/y/ 3224# Pass the command line, and return success or failure. 3225sub evaluate($) 3226{ 3227my $line = shift; 3228my ($i, $j, @pieces, $h, $href); 3229my $postspace = 0; 3230my $postBrowse; 3231my $nsuf = -1; # numeric suffix 3232my $cx; # context specified -- always $nsuf - 1 3233my $section = ""; # section within a document 3234my $post = ""; # for post cgi method 3235$nostack = 0; # suppress stacking of edit sessions 3236 3237$referer = ""; 3238$referer = $fname if $allowReferer; 3239$referer =~ s/\.browse$//; 3240 3241$cmd = ""; 3242# We'll allow whitespace at the start of an entered command. 3243$line =~ s/^\s*//; 3244# Watch for successive q commands. 3245$lastq = $lastqq, $lastqq = -1; 3246 3247if(!$inglob) { 3248# We'll allow comments in an edbrowse script 3249return 1 if $line =~ /^#/; 3250 3251return shellEscape $line if $line =~ s/^!\s*//; 3252 3253# Web express shortcuts 3254if($line =~ s/^@ *//) { 3255if(! length $line) { 3256my @shortList = (); 3257foreach $i (sort keys %shortcut) { 3258$j = $i; 3259my ($desc, $sort); 3260defined ($desc = $shortcut{$i}{desc}) and 3261$j .= " = $desc"; 3262$j = "|$j"; 3263defined ($sort = $shortcut{$i}{sort}) and 3264$j = "$sort$j"; 3265$j .= "\n"; 3266push @shortList, $j; 3267} # loop over shortcuts 3268foreach (sort @shortList) { 3269s/^.*?\|//; 3270print $_; 3271} 3272return 1; 3273} 3274$cmd = '@'; 3275($j, $line, $postBrowse) = webExpress($line); 3276return 0 unless $j; 3277$line =~ s%^%b http://%; 3278if($line =~ /\*/) { 3279$post = $line; 3280$post =~ s/.*\*/*/; 3281$line =~ s/\*.*//; 3282} 3283} 3284 3285# Predefined command sets. 3286if($line =~ s/^< *//) { 3287if(!length $line) { 3288foreach $i (sort keys %commandList) { 3289print "$i\n"; 3290} 3291return 1; 3292} 3293$i = $commandList{$line}; 3294defined $i or $errorMsg = "command set $line is not recognized", return 0; 3295return evaluateSequence($i, $commandCheck{$line}); 3296} # command set 3297 3298# Two letter commands. 3299($j, $line) = twoLetter($line); 3300return $j if $j >= 0; 3301} # not in global 3302 3303$startRange = $endRange = $dot; # default, if no range given 3304$line = '+' if ! length $line; 3305$line = ($dol ? 1 : 0) . $line if substr($line, 0, 1) eq ','; 3306if($line =~ /^j/i) { 3307$endRange = $dot + 1; 3308$errorMsg = "line number too large", return "" if $endRange > $dol; 3309} elsif(substr($line, 0, 1) eq '=') { 3310$startRange = $endRange = $dol; 3311} elsif($line =~ /^[wgv]/ and $line !~ /^g\s*\d*$/) { 3312$startRange = 1, $endRange = $dol; 3313$startRange = 0 if ! $dol; 3314} elsif($line =~ s/^;//) { 3315$endRange = $dol; 3316} else { 3317@pieces = getRangePart($line); 3318$inglob = 0, return 0 if $#pieces < 0; 3319$startRange = $endRange = $pieces[0]; 3320$line = $pieces[1]; 3321if($line =~ s/^,//) { 3322$endRange = $dol; # new default 3323if($line =~ /^[-'.\$+\d\/?]/) { 3324@pieces = getRangePart($line); 3325$inglob = 0, return 0 if $#pieces < 0; 3326$endRange = $pieces[0]; 3327$line = $pieces[1]; 3328} # second address 3329} # comma present 3330} # end standard range processing 3331 3332# lc lower case, uc upper case 3333$line =~ s:^([lmu]c|ue)$:s/.*/$1/:; 3334if($line eq "bl") { # break the line 3335dirBrowseCheck("break line") or return 0; 3336$line = "sbl"; 3337} 3338 3339$cmd = substr($line, 0, 1); 3340if(length $cmd) { $line = substr($line, 1); } else { $cmd = 'p'; } 3341$icmd = $cmd; 3342$startRange <= $endRange or 3343$errorMsg = "bad range", return 0; 3344index($valid_cmd, $cmd) >= 0 or 3345$errorMsg = "unknown command $cmd", $inglob = 0, return 0; 3346 3347# Change some of the command codes, depending on context 3348$cmd = 'I' if $cmd eq 'i' and $line =~ /^[$valid_delim\d<*]/o; 3349$cmd = 'I' if $cmd eq 's' and $fmode&$browsemode; 3350$cmd = 's' if $cmd eq 'S'; 3351my $writeMode = ">"; 3352if($cmd eq "w" and substr($line, 0, 1) eq "+") { 3353$writeMode = ">>"; 3354$line =~ s/^.//; 3355} 3356 3357!($fmode&$dirmode) or index($dir_cmd, $cmd) >= 0 or 3358$errorMsg = "$icmd $nixdir", $inglob = 0, return 0; 3359!($fmode&$browsemode) or index($browse_cmd, $cmd) >= 0 or 3360$errorMsg = "$icmd $nixbrowse", $inglob = 0, return 0; 3361$startRange > 0 or index($zero_cmd, $cmd) >= 0 or 3362$errorMsg = "zero line number", return 0; 3363$postspace = 1 if $line =~ s/^\s+//; 3364if(index($spaceplus_cmd, $cmd) >= 0 and 3365! $postspace and length $line and 3366$line !~ /^\d+$/) { 3367$errorMsg = "no space after command"; 3368return 0; 3369} 3370 3371# env variable and wild card expansion 3372if(index("brewf", $cmd) >= 0 and length $line) { 3373$line = envFile($line); 3374return 0 if length $errorMsg; 3375} 3376 3377if($cmd eq 'B') { 3378return balanceLine($line); 3379} 3380 3381if($cmd eq 'z') { 3382$startRange = $endRange + 1; 3383$endRange = $startRange; 3384$startRange <= $dol or 3385$errorMsg = "line number too large", return 0; 3386$cmd = 'p'; 3387$line = $last_z if ! length $line; 3388if($line =~ /^(\d+)\s*$/) { 3389$last_z = $1; 3390$last_z = 1 if $last_z == 0; 3391$endRange += $last_z - 1; 3392$endRange = $dol if $endRange > $dol; 3393} else { 3394$errorMsg = "z command should be followed by a number", return 0; 3395} 3396$line = ""; 3397} 3398 3399# move/copy destination, the third address 3400if($cmd eq 'm' or $cmd eq 't') { 3401length $line or 3402$errorMsg = "no move/copy destination", $inglob = 0, return 0; 3403$line =~ /^[-'.\$+\d\/?]/ or 3404$errorMsg = "invalid move/copy destination", $inglob = 0, return 0; 3405@pieces = getRangePart($line); 3406$inglob = 0, return 0 if $#pieces < 0; 3407$dest = $pieces[0]; 3408$line = $pieces[1]; 3409$line =~ s/^\s*//; 3410} # move copy destination 3411if($cmd eq 'a') { 3412($line eq "+") ? ($line = "") : ($linePending = undef); 3413} else { 3414$linePending = undef; 3415} 3416! length $line or index($nofollow_cmd, $cmd) < 0 or 3417$errorMsg = "unexpected text after the $icmd command", $inglob = 0, return 0; 3418 3419# We don't need trailing whitespace, except for substitute or global substitute. 3420index("sgvI", $cmd) >= 0 or 3421$line =~ s/\s*$//; 3422 3423! $inglob or 3424index($global_cmd, $cmd) >= 0 or 3425$errorMsg = "the $icmd command cannot be applied globally", $inglob = 0, return 0; 3426 3427if($cmd eq 'h') { 3428$errorMsg = "no errors" if ! length $errorMsg; 3429print $errorMsg,"\n"; 3430return 1; 3431} 3432 3433if($cmd eq 'H') { 3434$helpall ^= 1; 3435print "help messages on\n" if $helpall; 3436return 1; 3437} # H 3438 3439if(index("lpn", $cmd) >= 0) { 3440foreach $i ($startRange..$endRange) { 3441dispLine($i); 3442$dot = $i; 3443last if $intFlag; 3444} 3445return 1; 3446} 3447 3448if($cmd eq '=') { 3449print $endRange,"\n"; 3450return 1; 3451} 3452 3453if($cmd eq 'u') { 3454$fmode&$firstopmode or 3455$errorMsg = "nothing to undo", return 0; 3456goUndo(); 3457return 1; 3458} # u 3459 3460if($cmd eq 'k') { 3461$line =~ /^[a-z]$/ or 3462$errorMsg = "please enter k[a-z]", return 0; 3463$startRange == $endRange or 3464$errorMsg = "cannot label an entire range", return 0; 3465substr($labels, (ord($line) - ord('a'))*$lnwidth, $lnwidth) = 3466sprintf $lnformat, $endRange; 3467return 1; 3468} 3469 3470$nsuf = $line if $line =~ /^\d+$/ and ! $postspace; 3471$cx = $nsuf - 1; 3472 3473if($cmd eq 'f') { 3474if($nsuf >= 0) { 3475(cxCompare($cx) and cxActive($cx)) or return 0; 3476$j = $fname[$cx]; 3477print(length($j) ? $j : "no file"); 3478print " [binary]" if $fmode[$cx]&$binmode; 3479print "\n"; 3480return 1; 3481} 3482if(length $line) { 3483$errorMsg = "cannot change the name of a directory", return 0 if $fmode&$dirmode; 3484$fname = $line; 3485} else { 3486print(length($fname) ? $fname : "no file"); 3487print " [binary]" if $fmode&$binmode; 3488print "\n"; 3489} 3490return 1; 3491} # f 3492 3493if($cmd eq 'q') { 3494$nsuf < 0 or (cxCompare($cx) and cxActive($cx)) or return 0; 3495if($nsuf < 0) { 3496$cx = $context; 3497$errorMsg = "unexpected text after the $icmd command", return 0 if length $line; 3498} 3499cxReset($cx, 1) or return 0; 3500return 1 if $cx != $context; 3501# look around for another active session 3502while(1) { 3503$cx = 0 if ++$cx > $#factive; 3504exit 0 if $cx == $context; 3505next if ! defined $factive[$cx]; 3506cxSwitch($cx, 1); 3507return 1; 3508} 3509} # q 3510 3511if($cmd eq 'w') { 3512if($nsuf >= 0) { 3513$writeMode eq ">" or 3514$errorMsg = "sorry, append to buffer not yet implemented", return 0; 3515return writeContext($cx) 3516} 3517$line = $fname if ! length $line; 3518if($fmode&$dirmode and $line eq $fname) { 3519$errorMsg = "cannot write to the directory; files are modified as you go"; 3520return 0; 3521} 3522return writeFile($writeMode, $line) if length $line; 3523$errorMsg = "no file specified"; 3524return 0; 3525} # w 3526 3527# goto a file in a directory 3528if($fmode&$dirmode and $cmd eq 'g' and ! length $line) { 3529$cmd = 'e'; 3530$line = $dirname . '/' . fetchLine($endRange, 0); 3531} 3532 3533if($cmd eq 'e') { 3534return (cxCompare($cx) and cxSwitch($cx, 1)) if $nsuf >= 0; 3535if(!length $line) { 3536$j = $context + 1; 3537print "session $j\n"; 3538return 1; 3539} 3540} # e 3541 3542if($cmd eq 'g' and $line =~ /^\d*$/) { 3543($j, $line) = hyperlink($line); 3544return 0 unless $j; 3545# Go on to browse the file. 3546} # goto link 3547 3548if($cmd eq '^') { 3549! length $line or $nsuf >= 0 or 3550$errorMsg = "unexpected text after the ^ command", return 0; 3551$nsuf = 1 if $nsuf < 0; 3552while($nsuf) { 3553$errorMsg = "no previous text", return 0 if ! defined $backup; 3554cxReset($context, 2) or return 0; 3555$h = $backup; 3556$backup = $$h{backup}; 3557cxUnpack($h); 3558--$nsuf; 3559} 3560# Should this print be inside or outside the loop? 3561if($dot) { dispLine($dot); } else { print "empty file\n"; } 3562return 1; 3563} # ^ 3564 3565if($cmd eq 'A') { 3566return showLinks(); 3567} # A 3568 3569if($icmd eq 's' or $icmd eq 'S') { 3570# A few shorthand notations. 3571if($line =~ /^([,.;:!?)"-])(\d?)$/) { 3572my $suffix = $2; 3573$line = "$1 +"; 3574# We have to escape the question mark and period 3575$line =~ s/^([?.])/\\$1/; 3576$line = "/$line/$1\\n"; 3577$line .= "/$suffix" if length $suffix; 3578} 3579} # original command was s 3580 3581readyUndo if ! $inglob; 3582 3583if($cmd eq 'g' or $cmd eq 'v') { 3584return doGlobal($line); 3585} # global 3586 3587if($cmd eq 'I') { 3588$fmode&$browsemode or $errorMsg = $nobrowse, $inglob = 0, return 0; 3589 3590if($line =~ /^\d*\?/) { # status 3591$inglob and $errorMsg = $inoglobal, $inglob = 0, return 0; 3592$startRange == $endRange or $errorMsg = $inorange, return 0; 3593infIndex($endRange, $line) > 0 or return 0; 3594infStatus($line); 3595return 1; 3596} # get info on input field 3597 3598if($line =~ /^\d*([=<])/) { 3599my $asg = $1; 3600$subprint = 1; 3601my $yesdot = 0; 3602my $t = $line; 3603$t =~ s/^\d*[=<]//; 3604if($asg eq '<') { 3605if($t =~ /^\d+$/) { 3606my $cx = $t-1; 3607cxCompare($cx) and cxActive($cx) or $inglob = 0, return 0; 3608my $dolcx = $dol[$cx]; 3609$dolcx == 1 or $errorMsg = "session $t should contain exactly one line", $inglob = 0, return 0; 3610$t = fetchLineContext(1, 1, $cx); 3611} else { 3612$errorMsg = ""; 3613$t = envFile $t; 3614length($errorMsg) and $inglob = 0, return 0; 3615open FH, $t or $errorMsg = "cannot open $t, $!", $inglob = 0, return 0; 3616$t = <FH>; 3617defined $t or $errorMsg = "empty file", $inglob = 0, return 0; 3618if(defined <FH>) { 3619close FH; 3620$errorMsg = "file contains more than one line"; 3621$inglob = 0; 3622return 0; 3623} 3624close FH; 3625$t =~ s/[\r\n]+$//; 3626} 3627} # I<file 3628foreach $i ($startRange..$endRange) { 3629my $rc = infIndex($i, $line); 3630next unless $rc; 3631$dot = $i; 3632$rc > 0 and infReplace($t) or $inglob = 0, return 0; 3633$yesdot = $dot; 3634} # loop over lines 3635if($yesdot) { 3636dispLine($yesdot) if ! $inglob; 3637return 1; 3638} 3639$errorMsg = "no input fields present" if ! $inglob; 3640return 0; 3641} # i= 3642 3643if($line =~ /^\d*\*$/) { 3644$inglob and $errorMsg = $inoglobal, $inglob = 0, return 0; 3645$startRange == $endRange or $errorMsg = $inorange, return 0; 3646infIndex($endRange, $line) > 0 or return 0; 3647($j, $line, $post) = infPush(); 3648# return code of -1 means there's more to do. 3649return $j unless $j < 0; 3650} elsif( $line !~ m&^\d*[$valid_delim]&o) { 3651$errorMsg = "unknown input field directive, please use I? or I= or I/text/replacement/"; 3652return 0; 3653} 3654} # input field 3655 3656# Pull section indicator off of a url. 3657$section = $1 if $cmd eq 'b' and $line =~ s/(#.*)//; 3658 3659if(($cmd eq 'b' or $cmd eq 'e') and length $line) { 3660$h = undef; 3661$h = cxPack() if $dol and ! $nostack; 3662cxReset($context, 0) or return 0; 3663$startRange = $endRange = 0; 3664$changeFname = ""; 3665if($line =~ /^\x80mail\x80(.*)$/) { # special code for sendmail link 3666$href = $1; 3667my $subj = urlSubject(\$href); 3668$subj = "Comments" unless length $subj; 3669if(lineLimit 2) { 3670$i = 0; 3671} else { 3672$i = 1; 3673push @text, "To: $href"; 3674$map .= sprintf($lnformat, $#text); 3675push @text, "Subject: $subj"; 3676$map .= sprintf($lnformat, $#text); 3677$dot = $dol = 2; 3678print "SendMail link. Compose your mail, type sm to send, then ^ to get back.\n"; 3679apparentSize(); 3680} 3681} else { 3682$fname = $line; 3683$i = readFile($fname, $post); 3684$fmode &= ~($changemode|$firstopmode); 3685} 3686$filesize = -1, cxUnpack($h), return 0 if !$i and ! $dol and is_url($fname); 3687if(defined $h) { 3688$$h{backup} = $backup if defined $backup; 3689$backup = $h; 3690} 3691return 0 if ! $i; 3692$fname = $changeFname if length $changeFname; 3693$cmd = 'e' if $fmode&$binmode or ! $dol; 3694return 1 if $cmd eq 'e'; 3695} 3696 3697if($cmd eq 'b') { 3698if(! ($fmode&$browsemode)) { 3699readyUndo(); 3700print("$filesize\n"), $filesize = -1 if $filesize >= 0; 3701render() or return 0; 3702if(defined $postBrowse) { 3703$$btags[0]{pb} = $postBrowse; 3704evaluateSequence($postBrowse, 0); 3705if($$btags[0]{dol2} > $dol) { 3706$fmode &= ~($changemode|$firstopmode); 3707apparentSize(); 3708} 3709} 3710} else { 3711$errorMsg = "already browsing", return 0 if ! length $section; 3712} 3713return 1 if ! length $section; 3714$section =~ s/^#//; 3715$j = findSection($section); 3716$errorMsg = "cannot locate section #$section", return 0 unless $j; 3717$dot = $j; 3718dispLine($dot); 3719return 1; 3720} # b 3721 3722if($cmd eq 'm' or $cmd eq 't') { 3723return moveCopy(); 3724} 3725 3726if($cmd eq 'i') { 3727$cmd = 'a'; 3728--$startRange, --$endRange; 3729} 3730 3731if($cmd eq 'c') { 3732delText($startRange, $endRange) or return 0; 3733$endRange = --$startRange; 3734$cmd = 'a'; 3735} 3736 3737if($cmd eq 'a') { 3738return readLines(); 3739} 3740 3741if($cmd eq 'd') { 3742$i = ($endRange == $dol); 3743if($fmode & $dirmode) { 3744$j = delFiles(); 3745} else { 3746$j = delText($startRange, $endRange); 3747} 3748$inglob = 0 if ! $j; 3749if($j and $delprint and ! $inglob) { 3750$i ? print "end of file\n" : dispLine($dot); 3751} 3752return $j; 3753} # d 3754 3755if($cmd eq 'j' or $cmd eq 'J') { 3756return joinText(); 3757} # j 3758 3759if($cmd eq 'r') { 3760return readContext($cx) if $nsuf >= 0; 3761return readFile($line, "") if length $line; 3762$errorMsg = "no file specified"; 3763return 0; 3764} # r 3765 3766if($cmd eq 's' or $cmd eq 'I') { 3767$j = substituteText($line); 3768$inglob = $j = 0 if $j < 0; 3769return $j; 3770} # substitute 3771 3772$errorMsg = "command $icmd not yet implemented"; 3773$inglob = 0; 3774return 0; 3775} # evaluate 3776 3777sub evaluateSequence($$) 3778{ 3779my $commands = shift; 3780my $check = shift; 3781foreach my $go (@$commands) { 3782$inglob = 0; 3783$intFlag = 0; 3784$filesize = -1; 3785my $rc = evaluate($go); 3786print "$filesize\n" if $filesize >= 0; 3787$rc or ! $check or 3788return 0; 3789} 3790return 1; 3791} # evaluateSequence 3792 3793# Hash to map html tags onto their English descriptions. 3794# For instance, P maps to "paragraph". 3795# Most of the tags, such as FONT, map to nothing, 3796# whence they are thrown away. 3797# The first two characters are not part of the description. 3798# It forms a number that describes the nestability of the tag. 3799# Bit 1 means the tag should be nested, like parentheses. 3800# In fact all the bit1 tags should nest amongst eachother, unlike 3801# <UL> <TABLE> </UL> </TABLE> (nesting error). 3802# Bit 2 means a tag may appear inside itself, like nested lists. 3803# Bit 4 means the tag implies a paragraph break. 3804# Bit 8 means we retain attributes on the positive tag. 3805# bit 16 means to close an open anchor *before* applying this tag 3806%tagdesc = ( 3807sub => "11a subscript", 3808font => " 3a font", 3809center => " 3centered text", 3810sup => "11a superscript", 3811title => "17the title", 3812head => "17the html header information", 3813body => "27the html body", 3814bgsound => "24background music", 3815meta => " 8a meta tag", 3816base => " 8base reference for relative URLs", 3817img => " 8an image", 3818br => " 0a line break", 3819p => "20a paragraph", 3820blockquote => "20a quoted paragraph", 3821div => "20a divided section", 3822h => "21a header", 3823dt => "20a term", 3824dd => "20a definition", 3825hr => "16a horizontal line", 3826ul => "23a bullet list", 3827ol => "23a numbered list", 3828dl => "23a definition list", 3829li => "16a list item", 3830form => "25a form", 3831input => "24an input item", 3832a => "25an anchor", 3833frame => "28a frame", 3834map => "28An image map", 3835area => "24an image map area", 3836# I've seen tables nested inside tables -- I don't know why! 3837table => "31a table", 3838tr => "19a table row", 3839td => "19a table entry", 3840th => "19a table heading", 3841pre => " 5a preformatted section", 3842xmp => " 5a preformatted section", 3843address => " 5a preformatted section", 3844script => " 1a script", 3845style => " 1a style block", 3846noframes => " 1noframe section", 3847select => "25an option list", 3848textarea => "25an input text area", 3849option => "24a select option", 3850# The following tags convey formatting information that is eventually 3851# discarded, but I'll track them for a while, 3852# just to verify nestability. 3853em => " 1a block of emphasized text", 3854strong => " 1a block of emphasized text", 3855b => " 1a block of bold text", 3856i => " 1a block of italicized text", 3857code => " 1a block of sample code", 3858samp => " 1a block of sample code", 3859); 3860 3861# We encode tags in a @tag attribute=value attribute=value ...@ format, 3862# though of course we don't use the @ sign. 3863# We use \x80, which should not appear in international text. 3864# I simply hard code it - it makes things simpler. 3865 3866# Support routine, to encode a tag. 3867# Run from within a global substitute. 3868# Pas the name of the tag, slash, and tag arguments 3869sub processTag($$$) 3870{ 3871my ($tag, $slash, $attributes) = @_; 3872my $nlcount = $attributes =~ y/\n/\n/; # newline count 3873my $doat = 0; # do attributes 3874$tag = lc $tag; 3875my $desc = $tagdesc{$tag}; 3876if(defined $desc) { 3877$doat = (substr($desc, 0, 2) & 8); 3878} else { 3879$tag = "z"; 3880} 3881# Do we need to gather up the attributes? 3882if(!$doat or $slash eq "/") { 3883# Guess not, just return the tag. 3884return "" if $tag eq "z" and ! $nlcount; 3885return "\x80$tag$slash$nlcount\x80"; 3886} 3887# Process each whitespace separated chunk, taking quotes into account. 3888# note that name="foo"size="1" is suppose to be two separate tags; 3889# God help us! 3890# Borrow a global variable, even though this may not be an input tag. 3891$itag = {tag => $tag}; 3892push @$btags, $itag; 3893$attributes =~ s/( # replace the entire matched text 3894\w+ # attribute name 3895(?>\s*=\s* # as in name=value 3896(?> # a sequence of choices 3897[^\s"']+ # regular printable characters 3898| 3899"[^"]*" # double quoted string 3900| 3901'[^']*' # single quoted string 3902) # one of three formats 3903)? # =value 3904)/processAttr($1)/xsge; 3905# Capture description and keywords. 3906if($tag eq "meta") { 3907my $val = $$itag{name}; 3908if(defined $val) { 3909$val = lc $val; 3910if($val eq "description" or $val eq "keywords") { 3911my $content = $$itag{content}; 3912if(defined $content) { 3913stripWhite \$content; 3914$$btags[0]{$val} = $content if length $content; 3915} # content 3916} # description or keywords 3917} # name= 3918pop @$btags; 3919return "" unless $nlcount; 3920return "\x80z$nlcount\x80"; 3921} # meta tag 3922my $tagnum = $#$btags; 3923return "\x80$tag$nlcount,$tagnum\x80"; 3924} # processTag 3925 3926# Support routine, to crack attribute=value. 3927sub processAttr($) 3928{ 3929my $line = shift; 3930# Get rid of spaces around first equals. 3931$line =~ s/^([^=\s]*)\s*=\s*/$1=/; 3932# Get rid of the quotes. 3933$line =~ s/("[^"]*"|'[^']*')/substr($1,1,-1)/sge; 3934my $attr = lc $line; 3935$attr =~ s/\s*=.*//s; 3936return "" unless $attr =~ /^\w+$/; 3937$line =~ s/^[^=]*=//s 3938or $line = ""; 3939$line =~ s/&([a-zA-Z]+|#\d+);/metaChar($1)/ge; 3940$$itag{$attr} = $line; 3941return ""; 3942} # processAttr 3943 3944# Support routine, to encode a bang tag. 3945# Run from within a global substitute. 3946sub processBangtag($) 3947{ 3948my $item = shift; 3949if($item eq "'" or $item eq '"') { 3950return (length $bangtag ? " " : $item); 3951} 3952if(substr($item, 0, 1) eq '<') { 3953return "" if length $bangtag; 3954return $item if $item eq "<"; 3955$bangtag = substr $item, 1; 3956return "<z "; 3957} 3958return $item unless length $bangtag; 3959# dashes at the front require dashes at the end. 3960# But (apparently) they don't have to be the same number of dashes. 3961# I really don't understand this syntax at all! 3962# It is suppose to follow the rules in 3963# http://www.htmlhelp.com/reference/wilbur/misc/comment.html 3964# but real web pages hardly ever follow these rules! 3965substr($item, -1) = ""; # don't need that last > 3966my $l = length($bangtag) - 1; 3967$l &= ~1; # back down to an even number 3968return " " if $l and ! length $item; # lone > inside a comment 3969$bangtag = ""; 3970return ">"; 3971} # processBangtag 3972 3973# Turn <>'" in javascript into spaces, as we did above. 3974sub processScript($) 3975{ 3976my $item = shift; 3977if(length($item) < 5) { 3978return ($inscript ? " " : $item); 3979} 3980# now $item is <script or </script 3981# Try to guard against Java code that looks like 3982# document_write("<script bla bla bla>\n"; 3983# There's a lot of this going around. 3984$prequote = 0; 3985$prequote = 1 if $item =~ s/^\( *['"]//; 3986return ' ' if $inscript and $prequote; 3987if(substr($item, 1, 1) eq '/') { 3988--$inscript if $inscript; 3989} else { 3990++$inscript; 3991} 3992return $item; 3993} # processScript 3994 3995sub backOverSpaces($) 3996{ 3997my $trunc = shift; 3998my $j = length($refbuf) - 1; 3999--$j while $j >= 0 and substr($refbuf, $j, 1) =~ /[ \t]/; 4000++$j; 4001substr($refbuf, $j) = "" if $trunc; 4002return $j; 4003} # backOverSpaces 4004 4005# Recompute space value, after the buffer has been cropped. 4006# 0 = word, 1 = spaces, 2 = newline, 3 = paragraph. 4007sub computeSpace() 4008{ 4009return 3 if ! length $refbuf; 4010my $last = substr $refbuf, -1; 4011return 0 if $last !~ /\s/; 4012return 1 if $last ne "\n"; 4013return 2 if substr($refbuf, -2) ne "\n\n"; 4014return 3; 4015} # computeSpace 4016 4017# Here are the common keywords for mail header lines. 4018# These are in alphabetical order, so you can stick more in as you find them. 4019# The more words we have, the more accurate the test. 4020# Value = 1 means it might be just a "NextPart" mime header, 4021# rather than a full-blown email header. 4022# Value = 2 means it could be part of an English form. 4023# Value = 4 means it's almost certainly a line in a mail header. 4024%mhWords = ( 4025"action" => 2, 4026"arrival-date" => 4, 4027"content-transfer-encoding" => 1, 4028"content-type" => 1, 4029"date" => 2, 4030"delivered-to" => 4, 4031"errors-to" => 4, 4032"final-recipient" => 4, 4033"from" => 2, 4034"importance" => 4, 4035"last-attempt-date" => 4, 4036"list-id" => 4, 4037"mailing-list" => 4, 4038"message-id" => 4, 4039"mime-version" => 4, 4040"precedence" => 4, 4041"received" => 4, 4042"remote-mta" => 4, 4043"reply-to" => 4, 4044"reporting-mta" => 4, 4045"return-path" => 4, 4046"sender" => 4, 4047"status" => 2, 4048"subject" => 4, 4049"to" => 2, 4050"x-beenthere" => 4, 4051"x-loop" => 4, 4052"x-mailer" => 4, 4053"x-mailman-version" => 4, 4054"x-mimeole" => 4, 4055"x-ms-tnef-correlator" => 4, 4056"x-msmail-priority" => 4, 4057"x-priority" => 4, 4058"x-uidl" => 4, 4059); 4060 4061# Get a filename from the user. 4062sub getFileName($$) 4063{ 4064my $startName = shift; 4065my $isnew = shift; 4066input: { 4067print "Filename: "; 4068print "[$startName] " if defined $startName; 4069my $line = <STDIN>; 4070exit 0 unless defined $line; 4071stripWhite \$line; 4072if($line eq "") { 4073redo input if ! defined $startName; 4074$line = $startName; 4075} else { 4076$startName = undef; 4077$line = envLine $line; 4078print("$errorMsg\n"), redo input if length $errorMsg; 4079} # blank line 4080if($isnew and -e $line) { 4081print "Sorry, file $line already exists.\n"; 4082$startName = undef; 4083redo input; 4084} 4085return $line; 4086} 4087} # getFileName 4088 4089# Get a character from the tty, raw mode. 4090# For some reason hitting ^c in this routine doesn't leave the tty 4091# screwed up. I don't know why not. 4092sub userChar 4093{ 4094my $choices = shift; 4095input: { 4096# Too bad there isn't a perl in-built for this. 4097# I don't know how to do this in Windows. Help anybody? 4098system "stty", "-icanon", "-echo"; 4099my $c = getc; 4100system "stty", "icanon", "echo"; 4101if(defined $choices and index($choices, $c) < 0) { 4102STDOUT->autoflush(1); 4103print "\a\b"; 4104STDOUT->autoflush(0); 4105redo input; 4106} 4107return $c; 4108} 4109} # userChar 4110 4111# Encode html page or mail message. 4112# No args, the html is stored in @text, as indicated by $map. 4113sub render() 4114{ 4115$dol or $errorMsg = "empty file", return 0; 4116$errorMsg = "binary file", return 0 if $fmode&$binmode; 4117$errorMsg = "cannot render a directory", return 0 if $fmode&$dirmode; 4118 4119my ($i, $j, $k, $rc); 4120my $type = ""; 4121$btags[$context] = $btags = []; 4122$$btags[0] = {tag => "special", fw => {} }; 4123 4124# If it starts with html, head, or comment, we'll call it html. 4125my $tbuf = fetchLine 1, 0; 4126if($tbuf =~ /^\s*<(?:!-|html|head|meta)/i) { 4127$type = "html"; 4128} 4129 4130if(! length $type) { 4131# Check for mail header. 4132# There might be html tags inside the mail message, so we need to 4133# look for mail headers first. 4134# This is a very simple test - hopefully not too simple. 4135# The first 20 non-indented lines have to look like mail header lines, 4136# with at least half the keywords recognized. 4137$j = $k = 0; 4138for $i (1..$dol) { 4139my $line = fetchLine $i, 0; 4140last unless length $line; 4141next if $line =~ /^[ \t]/; # indented 4142++$j; 4143next unless $line =~ /^([\w-]+):/; 4144my $word = lc $1; 4145my $v = $mhWords{$word}; 4146++$k if $v; 4147if($k >= 4 and $k*2 >= $j) { 4148$type = "mail"; 4149last; 4150} 4151last if $j > 20; 4152} 4153} 4154 4155if($type ne "mail") { 4156# Put the lines together into one long string. 4157# This is necessary to check for, and render, html. 4158$tbuf .= "\n"; 4159$tbuf .= fetchLine($_, 0) . "\n" foreach (2..$dol); 4160} 4161 4162if(! length $type) { 4163# Count the simple html tags, we need at least two per kilabyte. 4164$i = length $tbuf; 4165$j = $tbuf =~ s/(<\/?[a-zA-Z]{1,7}\d?[>\s])/$1/g; 4166$j = 0 if $j eq ""; 4167$type = "html" if $j * 500 >= $i; 4168} 4169 4170if(! length $type) { 4171$errorMsg = "this doesn't look like browsable text"; 4172return 0; 4173} 4174 4175$badHtml = 0; 4176$badHtml = 1 if is_url($fname); 4177$rc = renderMail(\$tbuf) if $type eq "mail"; 4178$rc = renderHtml(\$tbuf) if $type eq "html"; 4179return 0 unless $rc; 4180 4181pushRenderedText(\$tbuf) or return 0; 4182if($type eq "mail") { 4183$fmode &= ~$browsemode; # so I can run the next command 4184evaluate(",bl"); 4185$errorMsg = ""; 4186$dot = $dol; 4187$fmode &= ~$changemode; 4188$fmode |= $browsemode; 4189} 4190apparentSize(); 4191$tbuf = undef; 4192 4193if($type eq "mail" and $nat) { 4194print "$nat attachments.\n"; 4195$j = 0; 4196foreach $curPart (@mimeParts) { 4197next unless $$curPart{isattach}; 4198++$j; 4199print "Attachment $j\n"; 4200my $filename = getFileName($$curPart{filename}, 1); 4201next if $filename eq "x"; 4202if($filename eq "e") { 4203print "session " . (cxCreate(\$$curPart{data}, $$curPart{filename})+1) . "\n"; 4204next; 4205} 4206if(open FH, ">$filename") { 4207binmode FH, ':raw' if $doslike; 4208print FH $$curPart{data} 4209or dieq "Cannot write to attachment file $filename, $!."; 4210close FH; 4211} else { 4212print "Cannot create attachment file $filename.\n"; 4213} 4214} # loop over attachments 4215print "attachments complete.\n"; 4216} # attachments present 4217 4218return 1; 4219} # render 4220 4221# Pass the reformatted text, without its last newline. 4222sub pushRenderedText($) 4223{ 4224my $tbuf = shift; 4225 4226# Replace common nonascii symbols 4227# I don't know what this pair of bytes is for! 4228$$tbuf =~ s/\xe2\x81//g; 4229 4230# Transliterate alternate forms of quote, apostrophe, etc. 4231# We replace escape too, cuz it shouldn't be there anyways, and it messes up 4232# some terminals, and some adapters. 4233# Warning!! Don't change anything in the range \x80-\x8f. 4234# These codes are for internal use, and mus carry through. 4235$$tbuf =~ y/\x1b\x95\x99\x9c\x9d\x92\x93\x94\xa0\xad\x96\x97/_*'`''`' \55\55\55/; 4236 4237# Sometimes the bullet list indicator is falsely separated from the subsequent text. 4238$$tbuf =~ s/\n\n\*\n\n/\n\n* /g; 4239 4240# Turn nonascii math symbols into our encoded versions of math symbols, 4241# to be handled like Greek letters etc, in a consistent manner, 4242# by the next block of code. 4243$$tbuf =~ s/\xb0/\x82176#/; # degrees 4244$$tbuf =~ s/\xbc/\x82188#/; # 1 fourth 4245$$tbuf =~ s/\xbd/\x82189#/; # 1 half 4246$$tbuf =~ s/\xbe/\x82190#/; # 3 fourths 4247$$tbuf =~ s/\xd7/\x82215#/; # times 4248$$tbuf =~ s/\xf7/\x82247#/; # divided by 4249 4250if($$tbuf =~ /\x82\d+#/) { # we have codes to expand. 4251# These symbols are going to become words - 4252# put spaces on either side, if the neighbors are also words. 4253$$tbuf =~ s/#\x82/# \x82/g; 4254$$tbuf =~ s/([a-zA-Z\d])(\x82\d+#)/$1 $2/g; 4255$$tbuf =~ s/(\x82\d+#)([a-zA-Z\d])/$1 $2/g; 4256$$tbuf =~ s/\x82(\d+)#/$symbolWord{$1}/ge; 4257} 4258 4259# Now push into lines, for the editor. 4260my $j = $#text; 4261if(length $$tbuf) { 4262push @text, split "\n", $$tbuf, -1; 4263} else { 4264push @text, ""; 4265} 4266$#text = $j, return 0 if lineLimit 0; 4267 4268$$btags[0]{map1} = $map; 4269$$btags[0]{dot} = $dot; 4270$$btags[0]{dol1} = $dol; 4271$dot = $dol = $#text - $j; 4272$$btags[0]{dol2} = $dol; 4273$map = $lnspace; 4274$map .= sprintf($lnformat, $j) while ++$j <= $#text; 4275$$btags[0]{map2} = $map; 4276$fmode &= ~$firstopmode; 4277$$btags[0]{fname} = $fname; 4278$$btags[0]{fmode} = $fmode; 4279$$btags[0]{labels} = $labels; 4280$fmode &= $changemode; # only the change bit retains its significance 4281$fmode |= $browsemode; 4282$labels = $lnspace x 26; 4283$fname .= ".browse" if length $fname; 4284return 1; 4285} # pushRenderedText 4286 4287# Pass in the text to be rendered, by reference. 4288# The text is *replaced* with the rendered text. 4289sub renderHtml($) 4290{ 4291my $tbuf = shift; 4292my ($i, $j, $ofs1, $ofs2, $h); # variables 4293 4294$baseref = $fname; 4295 4296# Ok, here's a real kludge. 4297# The utility that converts pdf to html, 4298# access.adobe.com/simple_form.html, has a few quirks. 4299# One of the common problems in the translation is 4300# the following meaningless string, that appears over and over again. 4301# I'm removing it here. 4302$$tbuf =~ s/Had\strouble\sresolving\sdest\snear\sword\s(<[\w_;:\/'"().,-]+>\s)?action\stype\sis\sGoToR?//g; 4303 4304# I don't expect any overstrikes, but just in case ... 4305$$tbuf =~ s/[^<>"'&\n]\10//g; 4306# Get rid of any other backspaces. 4307$$tbuf =~ y/\10/ /; 4308 4309# Make sure there aren't any \x80 characters to begin with. 4310$$tbuf =~ y/\x80/\x81/; 4311 4312# As far as I can tell, href=// means href=http:// 4313# Is this documented anywhere?? 4314$$tbuf =~ s,\bhref=(["']?)//\b,HREF=$1http://,ig; 4315 4316# Find the simple window javascript functions 4317$refbuf = ""; 4318$lineno = $colno = 1; 4319$lspace = 3; 4320javaFunctions($tbuf); 4321 4322# Before we do the standard tags, get rid of the <!-- .. --> tags. 4323# I turn them into <z ... > tags, 4324# which will be disposed of later, along with all the 4325# other unrecognized tags. 4326# This is not a perfect implementation. 4327# It will glom onto the <! inside <A HREF="xyz<!stuff">, 4328# and it shouldn't; but niehter should you be writing such a perverse string! 4329$$tbuf =~ s/<!-*>//g; 4330$bangtag = ""; 4331$$tbuf =~ s/(['"]|<(!-*)?|-*>)/processBangtag($1)/ge; 4332print "comments stripped\n" if $debug >= 6; 4333 4334$errorMsg = $intMsg, return 0 if $intFlag; 4335 4336# A good web page encloses its javascript in comments <!-- ... -->, 4337# But some don't, and the (sometimes quoted) < > characters 4338# really mess us up. Let's try to strip the javascript, 4339# or any other script for that matter. 4340$inscript = 0; 4341$$tbuf =~ s/((?>(\( *['"])?<(\/?script[^>]*>)?|[>"']))/processScript($1)/gei; 4342print "javascript stripped\n" if $debug >= 6; 4343 4344$errorMsg = $intMsg, return 0 if $intFlag; 4345 4346# I'm about to crack html tags with one regexp, 4347# and that would be entirely doable, if people and web tools didn't 4348# generate crappy html. 4349# The biggest problem is unbalanced quotes, whence the open quote 4350# swallows the rest of the document in one tag. 4351# I'm goint to *try*, emphasis on try, to develop a few heuristics 4352# that will detect some of the common misquotings. 4353# This stuff should be written in C, a complex procedural algorithm. 4354# But I don't have the time or inclination to translate this mess into C, 4355# and perl is not the write language to write an algorithm like that. 4356# I've seen examples of all of these syntactical nightmares on the web, 4357# and others that I can't possibly code around. 4358# Only one quote in the tag; get rid of it. Tag is on one line. 4359$$tbuf =~ s/<(\/?[a-zA-Z][^<>'"]*)['"]([^<>'"]*)>/<$1$2>/g; 4360# Two quotes before the last >, but not ="">, which would be ok. 4361$$tbuf =~ s/([^= <>])"">/$1">/g; 4362$$tbuf =~ s/([^= <>])''>/$1'>/g; 4363# Missing quote before the last > "word> 4364# It's usually the last > where things screw up. 4365$$tbuf =~ s/["'](\w+)>/$1>/g; 4366#   is suppose to have a semi after it - it often doesn't. 4367$$tbuf =~ s/ $/ /gi; 4368$$tbuf =~ s/ ([^;])/ $1/gi; 4369# Well that's all I can manage right now. 4370 4371# Encode <font face=symbol> number characters. 4372# This is kludgy as hell, but I want to be able to read my own math pages. 4373$$tbuf =~ s/<font +face=['"]?symbol['"]?> *([a-zA-Z]|&#\d+;) *<\/font>/metaSymbol($1)/gei; 4374 4375# Now let's encode the tags. 4376# Thanks to perl, we can do it in one regexp. 4377$$tbuf =~ s/< # start the tag 4378(\/?) # leading slash turns off the tag 4379([a-zA-Z]+) # name of the tag 4380( # remember the attributes 4381(?> # fix each subexpression as you find it 4382[^>"']+ # unquoted stuff inside the tag 4383| 4384"[^"]*" # stuff in double quotes 4385| 4386'[^']*' # stuff in single quotes 4387)* # as many of these chunks as you need 4388) # return the set in $3 4389> # close the html tag 4390/processTag($2, $1, $3)/xsge; 4391print "tags encoded\n" if $debug >= 6; 4392 4393$errorMsg = $intMsg, return 0 if $intFlag; 4394 4395# Now we can crunch the meta chars without fear. 4396$$tbuf =~ s/&([a-zA-Z]+|#\d+);/metaChar($1)/ge; 4397print "meta chars translated\n" if $debug >= 6; 4398 4399$onloadSubmit = 0; 4400$longcut = $lperiod = $lcomma = $lright = $lany = 0; 4401 4402my @olcount = (); # Where are we in each nested number list? 4403my @dlcount = (); # definition lists 4404my $tagnest = "."; # Stack the nestable html tags, such as <LI> </LI> 4405my $tagLock = 0; # other tags are locked out, semantically, until this one is done 4406my $tagStart; # location of the tag currently under lock 4407# Locking tags are currently: title, select, textarea 4408my $inhref = 0; # in anchor reference 4409my $intitle = 0; 4410my $inselect = 0; 4411my $inta = 0; # text area 4412my $optStart; # start location of option 4413my $opt; # hash of options 4414my $optCount; # count of options 4415my $optSel; # options selected 4416my $optSize; # size of longest option 4417my $lastopt; # last option, waiting for next <option> 4418my $premode = 0; # preformatted mode 4419my $hrefTag; 4420my $hrefFile = ""; 4421$inscript = 0; 4422my $intable = 0; # in table 4423my $intabhead = 0; # in table header 4424my $intabrow = 0; # in table row 4425my $inform = 0; # in form 4426 4427# Global substitute is mighty powerful, but at this point 4428# we really need to proceed token by token. 4429# Going by chunks is better than shifting each character. 4430# Extract a contiguous sequence of non-whitespace characters, 4431# or whitespace, or a tag. 4432reformat: 4433while($$tbuf =~ /(\s+|\x80[\w\/,]+\x80|[^\s\x80]+)/gs) { 4434$errorMsg = $intMsg, return 0 if $intFlag; 4435my $chunk = $1; 4436 4437# Should we ignore line breaks in table headers? 4438$chunk = ' ' if ($intabhead|$inhref) and $chunk =~ /^\x80br\/?0/; 4439 4440if($chunk =~ /^\s/) { # whitespace 4441$j = $chunk =~ y/\n/\n/; # count newlines 4442$lineno += $j; 4443next reformat if $inscript; 4444if(!$premode or $tagLock) { 4445next reformat if $lspace; 4446$chunk = " "; 4447$chunk = "\n" if $j and substr($refbuf, -4) =~ / [a-zA-Z]\.$/; 4448appendWhiteSpace($chunk, !($inhref + $tagLock)); 4449# Switch { and space, it looks prettier. 4450# Hopefully this will never happen accept at the beginning of a link. 4451$inhref and $lspace == 1 and $refbuf =~ s/(\x80[\x85-\x8f]+{) $/ $1/; 4452next reformat; 4453} # not preformatted 4454 4455# Formfeed is a paragraph break. 4456$j = 2 if $chunk =~ s/\f/\n\n/g; 4457$colno = 1 if $j; 4458# Keep the whitespace after nl, it's preformatted. 4459$chunk =~ s/.*\n//s; 4460# Note that we make no effort to track colno or lperiod etc in preformat mode. 4461if($lspace == 3) { 4462backOverSpaces(1); 4463$j = 0; 4464} 4465if($j == 2) { 4466backOverSpaces(1); 4467$chunk = "\n\n".$chunk if $lspace < 2; 4468$chunk = "\n".$chunk if $lspace == 2; 4469$lspace = 3; 4470$j = 0; 4471} 4472if(!$j) { 4473$refbuf .= $chunk; 4474next reformat; 4475} 4476# Now j = 1 and lspace < 3 4477backOverSpaces(1); 4478$refbuf .= "\n$chunk"; 4479$lspace = 1 if ! $lspace; 4480++$lspace; 4481next reformat; 4482} # whitespace 4483 4484if(substr($chunk, 0, 1) ne "\x80") { 4485next reformat if $inscript; 4486$chunk =~ y/{}/[]/ if $inhref; 4487$inhref = 2 if $inhref; 4488appendPrintable($chunk); 4489next reformat; 4490} # token 4491 4492# It's a tag 4493my ($tag, $slash, $nlcount, $attrnum) = 4494$chunk =~ /^.([a-z]+)(\/?)(\d+)(?:,(\d+))?.$/; 4495# Unless we hear otherwise, the tag is assumed to contribute no visible 4496# characters to the finished document. 4497$chunk = ""; 4498 4499my $desc = $tagdesc{$tag}; 4500$desc = " 0an unknown construct" if ! defined $desc; 4501my $nest = substr $desc, 0, 2; 4502$chunk = "\n\n" if $nest & 4; 4503substr($desc, 0, 2) = ""; 4504# Equivalent tags, as far as we're concerned 4505$tag = "script" if $tag eq "style"; 4506$tag = "script" if $tag eq "noframes"; 4507$tag = "pre" if $tag eq "xmp"; 4508$tag = "pre" if $tag eq "address"; 4509$tag = "frame" if $tag eq "iframe"; 4510my $tag1 = ".$tag."; 4511my $tagplus = "$tag$slash"; 4512$lineno += $nlcount, next reformat if $inscript and $tagplus ne "script/"; 4513$attrnum = 0 if ! defined $attrnum; 4514# A hidden version of the attribute number, to embed in the text. 4515$attrhidden = hideNumber $attrnum; 4516$h = undef; 4517if($attrnum) { 4518$h = $$btags[$attrnum]; 4519$$h{lineno} = $lineno; # source line number 4520} 4521my $openattr = 0; 4522my $openattrhidden; 4523my $openTag; 4524my $closeAnchor = 0; 4525$closeAnchor = 1 if $inhref and ($tag eq "a" or $tag eq "area" or $tag eq "frame" or $tag eq "input"); 4526$closeAnchor = 1 if $inhref == 2 and $nest&16; 4527 4528# Make sure we open and close things in order. 4529if($nest&1) { 4530if(!$slash) { 4531errorConvert("$desc begins in the middle of $desc") 4532if index($tagnest, $tag1) >= 0 and !($nest&2); 4533$tagnest = ".$tag.$attrnum" . $tagnest; 4534push @olcount, 0 if $tag eq "ol"; 4535push @dlcount, 0 if $tag eq "dl"; 4536} else { 4537$j = index $tagnest, $tag1; 4538if($j < 0) { 4539errorConvert("an unexpected closure of $desc, which was never opened"); 4540} else { 4541if($j > 0) { 4542my $opendesc = substr $tagnest, 1; 4543$opendesc =~ s/\..*//; 4544$opendesc = $tagdesc{$opendesc}; 4545substr($opendesc, 0, 2) = ""; 4546errorConvert("$desc is closed inside $opendesc"); 4547} # bad nesting 4548++$j; 4549substr($tagnest, $j, length($tag)+1) = ""; 4550$openattr = substr $tagnest, $j; 4551$openattr =~ s/\..*//; 4552substr($tagnest, $j, length($openattr)+1) = ""; 4553$openattrhidden = hideNumber $openattr; 4554if($openattr) { # record the offset of </tag> 4555$ofs2 = backOverSpaces(0); 4556$openTag = $$btags[$openattr]; 4557# Tweak offset for the } on the anchor 4558++$ofs2 if $closeAnchor and $inhref == 2; 4559$ofs2 = $tagStart if $tagLock; 4560$$openTag{ofs2} = $ofs2; 4561} 4562pop @olcount if $tag eq "ol"; 4563pop @dlcount if $tag eq "dl"; 4564} # was this construct open or not 4565} # /tag 4566} # nestable tag 4567 4568# retain the start and end of any tag worthy of attributes 4569if($attrnum) { 4570$ofs1 = backOverSpaces(0); 4571$ofs1 = $tagStart if $tagLock; 4572$$h{ofs1} = $ofs1; 4573} 4574 4575switch: { 4576 4577if($closeAnchor) { 4578if($inhref == 1) { # no text in the hyperlink 4579if($refbuf =~ s/( *\x80[\x85-\x8f]+{[\s|]*)$//s) { 4580$j = $1; 4581$colno -= $j =~ y/ {/ {/; 4582} else { 4583warn "couldn't strip off the open anchor at line $lineno <" . 4584substr($refbuf, -10) . ">."; 4585} 4586$$hrefTag{tag} = "z"; # trash the anchor 4587} else { 4588$refbuf .= "}"; 4589$refbuf =~ s/([ \n])}$/}$1/; 4590++$colno; 4591$j = $$hrefTag{href}; 4592my $onc = $$hrefTag{onclick}; 4593if($j =~ /^javascript/i or $onc) { 4594# Let the onclick take precedence. 4595$j = $onc if defined $onc and length $onc; 4596# See if this is a javascript function we can recognize and circumvent. 4597$i = javaWindow $j; 4598if($$hrefTag{form} and $i eq "submit") { 4599# I'll assume this is a check and submit function. 4600# If it only validates fields, we're ok. 4601# If it reformats the data, we're screwed! 4602$i = $$hrefTag{ofs1}; 4603$inf = substr $refbuf, $i; 4604$inf =~ s/{/</; 4605$inf =~ s/ +$//; 4606$inf =~ s/}$/ js\x80\x8f>/; 4607$$hrefTag{$ofs2} += 5; 4608if($$inform{action}) { 4609my $actscheme = is_url $$inform{action}; 4610$actscheme = is_url $baseref unless $actscheme; 4611if($actscheme eq "https") { 4612$inf =~ s/ js/& secure/; 4613$$hrefTag{$ofs2} += 7; 4614} 4615if($$inform{action} =~ /^mailto:/i) { 4616$inf =~ s/ js/& mailform/; 4617$$hrefTag{$ofs2} += 9; 4618} 4619} 4620substr($refbuf, $i) = $inf; 4621# change it to an input field 4622$$hrefTag{tag} = "input"; 4623$$hrefTag{type} = "submit"; 4624$$hrefTag{value} = "submit"; 4625$$inform{nnh}++; # another non hidden field 4626$$inform{nif}++; 4627$$inform{lnh} = $h; # last non hidden field 4628} 4629# Is this just opening a new window, then calling the link? 4630elsif(length $i and $i ne "submit") { 4631# Ok, I'll assume it's a new window with hyperlink 4632$$hrefTag{href} = $i; 4633} else { 4634print "unknown javascript ref $j\n" if $debug >= 3; 4635} 4636} 4637} 4638$lspace = computeSpace(); 4639$inhref = 0; 4640last switch if $tagplus eq "a/"; 4641} # close the open anchor 4642 4643if($tagplus eq "sup") { 4644$refbuf .= '^'; 4645last switch; 4646} # sup 4647 4648if($tagplus eq "sup/" and defined $openTag) { 4649$ofs1 = $$openTag{ofs1}; 4650++$ofs1; # skip past ^ 4651$j = substr $refbuf, $ofs1; 4652stripWhite \$j; 4653last switch unless length $j; 4654if($j =~ /^th|st|rd|nd$/i and 4655substr($refbuf, $ofs1-2) =~ /\d/) { 4656--$ofs1; 4657substr($refbuf, $ofs1, 1) = ""; 4658last switch; 4659} 4660last switch if $j =~ /^(\d+|\*)$/; 4661if(not $allsub) { 4662last switch if $j =~ /^[a-zA-Z](?:\d{1,2})?$/; 4663} 4664(substr $refbuf, $ofs1) = "($j)"; 4665last switch; 4666} # sup/ 4667 4668if($tagplus eq "sub/" and defined $openTag) { 4669$ofs1 = $$openTag{ofs1}; 4670$j = substr $refbuf, $ofs1; 4671stripWhite \$j; 4672last switch unless length $j; 4673if(not $allsub) { 4674last switch if $j =~ /^\d{1,2}$/; 4675} 4676(substr $refbuf, $ofs1) = "[$j]"; 4677last switch; 4678} # sub/ 4679 4680if($tagplus eq "title" and ! $tagLock and ! $intitle) { 4681$tagStart = length $refbuf; 4682$tagLock = $intitle = 1; 4683last switch; 4684} # title 4685 4686if($tagplus eq "title/" and $intitle) { 4687$i = substr $refbuf, $tagStart; 4688substr($refbuf, $tagStart) = ""; 4689$lspace = computeSpace(); 4690$longcut = 0; 4691$colno = 1; 4692if(! defined $$btags[0]{title}) { 4693stripWhite \$i; 4694$$btags[0]{title} = $i if length $i; 4695} 4696$tagLock = 0; 4697$intitle = 0; 4698last switch; 4699} # title/ 4700 4701if($tagplus eq "li") { 4702$i = index $tagnest, ".ol."; 4703$j = index $tagnest, ".ul."; 4704if($i >= 0) { 4705if($j >= 0 and $j < $i) { 4706$chunk = "\n* "; 4707} else { 4708$j = ++$olcount[$#olcount]; 4709$chunk = "\n$j. "; 4710} 4711} elsif($j >= 0) { 4712$chunk = "\n* "; 4713} else { 4714$chunk = "\n"; 4715errorConvert("$desc appears outside of a list context"); 4716} 4717last switch; 4718} # li 4719 4720if($tagplus eq "dt" or $tagplus eq "dd") { 4721if(($i = $#dlcount) >= 0) { 4722$j = ($tag eq "dd" ? 1 : 0); 4723errorConvert("improper term->definition sequence") if $j != $dlcount[$i]; 4724$dlcount[$i] ^= 1; 4725} else { 4726errorConvert("$desc is not contained in a definition list"); 4727} 4728last switch; 4729} # dt or dd 4730 4731# The only thing good about an image is its alt description. 4732if($tagplus eq "img") { 4733$hrefFile = "" unless $inhref; 4734$j = deriveAlt($h, $hrefFile); 4735$j = "?" if $inhref and length($j) == 0; 4736if(length $j) { 4737$refbuf .= $j; 4738$inhref = 2 if $inhref; 4739$lspace = 0; 4740} 4741last switch; 4742} # image 4743 4744if($tagplus eq "body") { 4745my $onl = $$h{onload}; # popup 4746$onl = $$h{onunload} unless $onl; # popunder 4747next unless $onl; 4748if($onl =~ /submit[.\w]* *\(/i) { 4749$onloadSubmit = 1; 4750last switch; 4751} 4752$j = javaWindow $onl; 4753if(length $j and $j ne "submit") { 4754createHyperLink($h, $j, "onload"); 4755$chunk = ""; 4756last switch; 4757} # open another window 4758} # body 4759 4760if($tagplus eq "bgsound") { 4761my $j = $$h{src}; 4762if(defined $j and length $j) { 4763# Someday we'll let you play this right from edbrowse, spawning playmidi 4764# or mpg123 or whatever. For now I'll let you grab the file yourself. 4765# Maybe that's better anyways. 4766createHyperLink($h, $j, "Background music"); 4767$chunk = "\n"; 4768} 4769last switch; 4770} # background music 4771 4772if($tag eq "base") { 4773$href = $$h{href}; 4774$baseref = urlDecode $href if $href; 4775next reformat; 4776} # base tag 4777 4778if($tagplus eq "a") { 4779if(defined $$h{name}) { 4780$refbuf .= "\x80$attrhidden*"; 4781} # name= 4782if(defined($hrefFile = $$h{href})) { 4783$$h{form} = $inform; 4784$inhref = 1; 4785$hrefTag = $h; 4786$$h{bref} = $baseref; 4787# We preserve $lspace, despite pushing visible characters. 4788$refbuf .= "\x80$attrhidden".'{'; 4789++$colno; 4790} # href= 4791last switch; 4792} # a 4793 4794if($tagplus eq "area") { 4795my ($alt, $href); 4796if(defined($href = $$h{href})) { 4797$j = javaWindow($href); 4798$href = $j if length $j and $j ne "submit"; 4799$alt = deriveAlt($h, ""); 4800$alt = $foundFunc if length $foundFunc and not defined $$h{alt}; 4801$alt = "area" unless length $alt; 4802createHyperLink($h, $href, $alt); 4803} # hyperlink 4804} # area 4805 4806if($tagplus eq "frame") { 4807my $name = $$h{name}; 4808my $src = $$h{src}; 4809if(defined $src) { 4810$name = "" if ! defined $name; 4811stripWhite \$name; 4812stripWhite \$src; 4813if(length $src) { 4814$$h{ofs1} = backOverSpaces(1); 4815$name = "???" if ! length $name; 4816$name =~ y/{}\n/[] / if $inhref; 4817$refbuf .= "frame "; 4818$colno += 6; 4819createHyperLink($h, $src, $name); 4820}} # frame becomes hyperlink 4821last switch; 4822} # frame 4823 4824$premode = 1, last switch if $tagplus eq "pre"; 4825$premode = 0, last switch if $tagplus eq "pre/"; 4826$inscript = 1, last switch if $tagplus eq "script"; 4827$inscript = 0, last switch if $tagplus eq "script/"; 4828$intable++, last switch if $tagplus eq "table"; 4829$intable--, last switch if $tagplus eq "table/" and $intable; 4830if($tag eq "br") { 4831$chunk = "\n"; 4832$chunk = "\n\n" if $lspace >= 2; 4833last switch; 4834} 4835$chunk = "\n--------------------------------------------------------------------------------\n\n", last switch if $tagplus eq "hr"; 4836 4837if($tag eq "tr") { 4838errorConvert("$desc not inside a table") if ! $intable; 4839$slash ? do { --$intabrow if $intabrow } : ++$intabrow; 4840$chunk = "\n"; 4841$intabhead = 0; 4842} # tr 4843 4844if($tag eq "td" or $tag eq "th") { 4845errorConvert("$desc not inside a table row") if ! $intabrow; 4846$intabhead = 0; 4847$intabhead = 1 - length $slash if $tag eq "th"; 4848if($slash) { 4849substr($refbuf, -1) = "" if $lspace == 1; 4850$refbuf .= "|"; 4851$lspace = 1; 4852} 4853last switch; 4854} # td or th 4855 4856if($tagplus eq "form" and ! ($inform + $tagLock)) { 4857$inform = $h; 4858$$h{bref} = $baseref; 4859$j = lc $$h{method}; 4860$j = "get" if ! defined $j or ! length $j; # default 4861if($j ne "post" and $j ne "get") { 4862errorConvert("form method $j not supported"); 4863$j = "get"; 4864} 4865$$h{method} = $j; 4866$$h{nnh} = 0; # number of non hidden fields 4867$$h{nif} = 0; # number of input fields 4868last switch; 4869} # form 4870 4871if($tagplus eq "form/" and $inform) { 4872# Handle the case with only one visible input field. 4873if($onloadSubmit or 4874($$inform{action} or $$inform{onchange}) and 4875$$inform{nnh} <= 1 and $$inform{nif} and ( 4876$$inform{nnh} == 0 or 4877($h = $$inform{lnh}) and 4878$$h{type} ne "submit")) { 4879$refbuf .= " " if $lspace == 0; 4880$itag = {tag => "input", 4881type => "submit", form => $inform, 4882size => 2, value => "Go"}; 4883push @$btags, $itag; 4884$j = hideNumber $#$btags; 4885$refbuf .= "\x80$j<Go\x80\x8f>"; 4886$lspace = 0; 4887$onloadSubmit = 0; 4888} # submit button created out of thin air 4889$inform = 0; 4890last switch; 4891} # form/ 4892 4893my $noform = "$desc is not inside a form"; 4894if($tagplus eq "select" and ! $tagLock) { 4895errorConvert($noform) if ! $inform; 4896$inselect = $h; 4897$$inform{onchange} = 1 if $inform and $$h{onchange}; 4898$tagLock = 1; 4899$tagStart = length $refbuf; 4900$optCount = $optSel = $optSize = 0; 4901$lastopt = undef; 4902$$h{opt} = $opt = {}; 4903last switch; 4904} # select 4905 4906if(($tagplus eq "select/" or $tagplus eq "option") and $inselect) { 4907if(defined $lastopt) { 4908$j = substr $refbuf, $optStart; 4909stripWhite \$j; 4910if(length $j) { 4911$lastopt =~ s/NoOptValue$/$j/; 4912$$opt{$j} = $lastopt; 4913if($optCount < 999) { 4914++$optCount; 4915} else { 4916errorConvert("too many options, limit 999"); 4917} 4918++$optSel if substr($lastopt, 3, 1) eq '+'; 4919$j = length $j; 4920$optSize = $j if $j > $optSize; 4921}} 4922 4923if($tagplus eq "select/") { 4924$inselect = 0; 4925$tagLock = 0; 4926substr($refbuf, $tagStart) = ""; 4927$lspace = computeSpace(); 4928$colno = 1; 4929$optCount or errorConvert("no options in the select statement"); 4930my $mult = 0; # multiple select 4931$mult = 1 if defined $$openTag{multiple}; 4932my $mse = 0; # multiple select error 4933$optSel <= 1 or $mult or 4934$mse = 1, errorConvert("multiple options preselected"); 4935$$inform{nnh}++; # another non hidden field 4936$$inform{nif}++; 4937$$inform{lnh} = $openTag; # last non hidden field 4938# Display selected item(s) 4939$refbuf .= "\x80$openattrhidden<"; 4940my $buflen = length $refbuf; 4941$i = 0; 4942foreach (%{$opt}) { 4943$i ^= 1; 4944$j = $_, next if $i; 4945if($mult and $j =~ /,/) { 4946errorConvert("sorry, option string cannot contain a comma"); 4947$$opt{$j} = ""; # can't delete from hash 4948next; 4949} 4950substr($_, 3, 1) = '-' if $mse; 4951next unless substr($_, 3, 1) eq '+'; 4952$refbuf .= ',' unless substr($refbuf, -1) eq '<'; 4953$refbuf .= $j; 4954} 4955# This is really an input tag. 4956$$openTag{tag} = "input"; 4957$$openTag{type} = "select"; 4958$$openTag{size} = ($mult ? 0 : $optSize); 4959$$openTag{value} = substr $refbuf, $buflen; 4960$$openTag{form} = $inform if $inform; 4961$refbuf .= "\x80\x8f>"; 4962$lspace = 0; 4963$$openTag{ofs2} = length $refbuf; 4964last switch; 4965}} # select/ 4966 4967if($tagplus eq "option") { 4968if(! $inselect) { 4969errorConvert("$desc is not inside a select statement") 4970} else { 4971$lastopt = $$h{value}; 4972$lastopt = "NoOptValue" unless defined $lastopt; 4973$lastopt = (defined $$h{selected} ? "+" : "-") . $lastopt; 4974$lastopt = sprintf("%03d", $optCount) . $lastopt; 4975$optStart = length $refbuf; 4976$$h{tag} = "z"; 4977} # in select or not 4978last switch; 4979} # option 4980 4981if($tagplus eq "textarea" and ! $tagLock) { 4982errorConvert($noform) if ! $inform; 4983$inta = $h; 4984$tagLock = 1; 4985$tagStart = length $refbuf; 4986last switch; 4987} # textarea 4988 4989if($tagplus eq "textarea/" and $inta) { 4990# Gather up the original, unformatted text. 4991$i = ""; 4992foreach $j ($$inta{lineno}..$lineno) { 4993 $i .= fetchLine($j, 0); 4994$i .= "\n"; 4995} 4996# Strip off textarea tags. 4997# I'm not using the s suffix, textarea tags should not cover multiple lines. 4998$i =~ s/^.*<textarea[^<>]*>\n?//i; 4999$i =~ s/<\/textarea.*\n$//i; 5000$i .= "\n" if length $i and substr($i, -1) ne "\n"; 5001$inta = 0; 5002$tagLock = 0; 5003substr($refbuf, $tagStart) = ""; 5004my $cx = cxCreate(\$i, ""); 5005$colno = 1; 5006$$openTag{cx} = $cx; 5007++$cx; 5008$$inform{nnh}++; # another non hidden field 5009$$inform{nif}++; 5010$$inform{lnh} = $openTag; # last non hidden field 5011$refbuf .= "\x80$openattrhidden<buffer $cx\x80\x8f>"; 5012$lspace = 0; 5013# This is really an input tag. 5014$$openTag{tag} = "input"; 5015$$openTag{ofs1} = $tagStart; 5016$$openTag{ofs2} = length($refbuf); 5017$$openTag{type} = "area"; 5018$$openTag{form} = $inform if $inform; 5019$j = $$openTag{rows}; 5020$j = 0 if ! defined $j; 5021$j = 0 unless $j =~ /^\d+$/; 5022$$openTag{rows} = $j; 5023$j = $$openTag{cols}; 5024$j = 0 if ! defined $j; 5025$j = 0 unless $j =~ /^\d+$/; 5026$$openTag{cols} = $j; 5027last switch; 5028} # textarea/ 5029 5030if($tagplus eq "input") { 5031errorConvert($noform) if ! $inform; 5032$i = lc $$h{type}; 5033$i = "text" unless defined $i and length $i; 5034$i = "text" if $i eq "password"; 5035# I should verify that the input is a number, 5036# but I'm too busy right now to implement that. 5037$i = "text" if $i eq "number"; 5038# Be on the lookout for new, advanced types. 5039index(".text.checkbox.radio.submit.image.button.reset.hidden.", ".$i.") >= 0 or 5040errorConvert("unknown input type $i"); 5041$j = $$h{value}; 5042$j = "" unless defined $j; 5043$$h{saveval} = $j; 5044if($i eq "radio" or $i eq "checkbox") { 5045$j = (defined $$h{checked} ? '+' : '-'); 5046} 5047if($i eq "image") { 5048$i = "submit"; 5049$$h{image} = 1; 5050length $j or $j = deriveAlt($h, "submit"); 5051} # submit button is represented by an icon 5052if($i ne "hidden") { 5053# I don't think there should be newlines in the value field. 5054$j =~ y/\n/ /; 5055} 5056if($i eq "button") { 5057# Hopefully we can turn this into a hyperlink. 5058# If not, it's no use to us. 5059my $onc = $$h{onclick}; 5060my $page = javaWindow $onc; 5061$j = "button" unless length $j; # alt=button 5062if(not length $page and 5063$onc =~ /self\.location *= *['"]([\w._\/:,=@&?+-]+)["'] *(\+?)/i) { 5064$page = "$1$2"; 5065} 5066$i = $page, $page = "" if $page eq "submit"; 5067if(length $page) { 5068createHyperLink($h, $page, $j); 5069last switch; 5070} 5071} # button 5072$$h{type} = $i; 5073$$h{value} = $j; 5074$$inform{nif}++; 5075if($i ne "hidden") { 5076$$inform{nnh}++; # another non hidden field 5077$$inform{lnh} = $h; # last non hidden field 5078$refbuf .= "\x80$attrhidden<$j"; 5079if($i eq "submit") { 5080$refbuf .= " js" if $$inform{onsubmit} or $$h{onclick}; 5081if($$inform{action}) { 5082my $actscheme = is_url $$inform{action}; 5083$actscheme = is_url $baseref unless $actscheme; 5084$refbuf .= " secure" if $actscheme eq "https"; 5085$refbuf .= " mailform" if $$inform{action} =~ /^mailto:/i; 5086} 5087} 5088$refbuf .= "\x80\x8f>"; 5089$lspace = 0; 5090$j = $$h{maxlength}; 5091$j = 0 unless defined $j and $j =~ /^\d+$/; 5092$j = 1 if $i eq "checkbox" or $i eq "radio"; 5093$$h{size} = $j; 5094} 5095if($inform and ! $tagLock) { 5096$$h{form} = $inform; 5097$$h{ofs2} = length $refbuf; 5098} 5099last switch; 5100} # input 5101 5102} # switch on $tag 5103 5104$lineno += $nlcount; 5105next reformat unless length $chunk; 5106 5107# Apparently the tag has forced a line break or paragraph break. 5108# I've decided to honor this, even in preformat mode, 5109# because that's what lynx does. 5110$colno = 1; 5111$longcut = $lperiod = $lcomma = $lright = $lany = 0; 5112backOverSpaces(1); 5113 5114# Get rid of a previous line of pipes. 5115# This is usually a table or table row of images -- of no use to me. 5116if($intable and $lspace < 2) { 5117$j = length($refbuf) - 1; 5118while($j >= 0 and substr($refbuf, $j, 1) =~ /[|\s]/) { 5119last if $j > 0 and substr($refbuf, $j-1, 2) eq "\n\n"; 5120--$j; 5121} 5122++$j; 5123if($j < length $refbuf) { 5124substr($refbuf, $j) = ""; 5125$lspace = computeSpace(); 5126$colno = 1; 5127} 5128} # end of line tag inside a table 5129 5130if($chunk eq "\n\n") { 5131next reformat if $lspace == 3; 5132$chunk = "\n" if $lspace == 2; 5133$lspace = 3; 5134$refbuf .= $chunk; 5135next reformat; 5136} # tag paragraph 5137 5138# It's a line break. 5139substr($chunk, 0, 1) = "" if $lspace > 1; 5140$lspace = 2 if $lspace < 2; 5141next reformat unless length $chunk; 5142$refbuf .= $chunk; 5143$chunk =~ s/^\n//; 5144next reformat unless length $chunk; 5145# It's either a list item indicator or a horizontal line 5146$inhref = 2 if $inhref; 5147if($chunk =~ /^--/) { 5148# Again I'm following the lynx convention. 5149# hr implies a line break before, and aparagraph break after. 5150$lspace = 3; 5151} else { 5152$colno += length $chunk; 5153$lspace = 1; 5154} 5155} # loop over tokens in the buffer 5156 5157$$tbuf = undef; 5158print "tags rendered\n" if $debug >= 6; 5159 5160if(length($tagnest) > 1 and $tagnest ne ".body.0.") { 5161my $opendesc = substr $tagnest, 1; 5162$opendesc =~ s/\..*//; 5163$opendesc = $tagdesc{$opendesc}; 5164substr($opendesc, 0, 2) = ""; 5165--$lineno; 5166errorConvert("$opendesc is not closed at EOF"); 5167} 5168 5169$errorMsg = $intMsg, return 0 if $intFlag; 5170 5171$refbuf =~ s/\s+$//; # don't need trailing blank lines 5172 5173# In order to fit all the links on one screen, many web sites place 5174# several links on a line. Sometimes they are separated 5175# by whitespace, sometimes commas, sometimes hyphens. 5176# Sometimes they are arranged in a table, and thanks to the 5177# table rendering software in this program, they will be pipe separated. 5178# In any case, there is no advantage in having multiple 5179# links on a line, and it's downright inconvenient when you want to use 5180# the g or A command. We introduce line breaks between links. 5181# We use alphanum [punctuation] right brace to locate the end of a link. 5182# We use { optional '" alphanum for the start of a link. 5183# These aren't guaranteed to be right, but they probably are most of the time. 5184# Let's start with link space link or link separater link 5185$refbuf =~ s/} ?[-,|]? ?(\x80[\x85-\x8f]+{['"]?\w)/}\n$1/g; 5186# Separating punctuation at the end of the line. 5187$refbuf =~ s/^({[^{}]+} ?),$/$1/mg; 5188# Delimiter at the start of line, before the first link. 5189$refbuf =~ s/\n[-,|] ?(\x80[\x85-\x8f]+{['"]?\w)/\n$1/g; 5190# word delimiting punctuation space link. 5191$refbuf =~ s/([a-zA-Z]{2,}[-:|]) (\x80[\x85-\x8f]+{['"]?\w)/$1\n$2/g; 5192# Link terminating punctuation words 5193$refbuf =~ s/(\w['"!]?}) ?[-|:] ?(\w\w)/$1\n$2/g; 5194print "links rearranged\n" if $debug >= 6; 5195 5196if(! $badHtml) { 5197# Verify internal links. 5198intlink: 5199foreach $h (@$btags) { 5200$tag = $$h{tag}; 5201next unless $tag eq "a"; 5202$j = $$h{href}; 5203next if ! defined $j; 5204next unless $j =~ s/^#//; 5205$refbuf .= ""; # reset match position. Isn't there a better way?? 5206while($refbuf =~ /\x80([\x85-\x8f]+)\*/g) { 5207$i = revealNumber $1; 5208next intlink if $$btags[$i]{name} eq $j; 5209} 5210$lineno = $$h{lineno}; 5211errorConvert("internal link #$j not found"); 5212last; 5213} # loop 5214print "internal links verified\n" if $debug >= 6; 5215} 5216 5217# Find the uncalled javascript functions. 5218my $fw = $$btags[0]{fw}; # pointer to function window hash 5219my $orphans = 0; 5220foreach $i (keys %$fw) { 5221$j = $$fw{$i}; 5222next unless $j =~ s/^\*//; 5223$orphans = 1, $refbuf .= "\n" unless $orphans; 5224print "orphan java function $i\n" if $debug >= 3; 5225$itag = {tag => "a", href => $j, bref => $baseref}; 5226push @$btags, $itag; 5227my $hn = hideNumber $#$btags; 5228$refbuf .= "\n jf: $i\x80$hn" . "{$j}"; 5229# I don't think we need to mess with ofs1 and ofs2? 5230$$itag{ofs2} = length $refbuf; 5231} 5232 5233$$tbuf = $refbuf; # replace 5234return 1; 5235} # renderHtml 5236 5237# Report the first html syntax error. 5238# $lineno tracks the line number, where text is being processed. 5239sub errorConvert($) 5240{ 5241$badHtml and return; 5242my $msg = shift; 5243# Look at the following print statement, and you'll see the little things 5244# I try to anticipate when I write software for the blind. 5245# The first physical line of output is for the sighted user, or the 5246# new blind user -- but the experienced blind user doesn't need to read it. 5247# He can read the last line of output, one keystroke in my adaptive software, 5248# and hear exactly what he want to know. 5249print "The html text contains syntax errors. The first one is at line\n$lineno: $msg.\n"; 5250# Put the bad line number in label e. 5251substr($labels, 4*$lnwidth, $lnwidth) = 5252sprintf $lnformat, $lineno; 5253$badHtml = 1; 5254} # errorConvert 5255 5256# Strip redundent stuff off the start and end of a web page, 5257# relative to its parent. 5258sub stripChild() 5259{ 5260$fmode&$browsemode or $errorMsg = $nobrowse, return 0; 5261defined $backup or $errorMsg = "no previous web page", return 0; 5262my $p_fmode = $$backup{fmode}; 5263$p_fmode&$browsemode or $errorMsg = "no previous web page", return 0; 5264# Parent and child file names should come from the same server. 5265my $p_fname = $$backup{fname}; 5266my $c_fname = $fname; 5267is_url($p_fname) and is_url($c_fname) or $errorMsg = "web pages do not come from web servers", return 0; 5268$p_fname =~ s,^https?://,,i; 5269$c_fname =~ s,^https?://,,i; 5270$p_fname =~ s,/.*,,; 5271$c_fname =~ s,/.*,,; 5272$p_fname =~ s/\.browse$//; 5273$c_fname =~ s/\.browse$//; 5274$p_fname eq $c_fname or $errorMsg = "parent web page comes from a different server", return 0; 5275$$btags[0]{dol2} == $dol or $errorMsg = "web page already stripped or modified", return 0; 5276my $p_dol = $$backup{btags}[0]{dol2}; 5277my $c_dol = $dol; 5278if($p_dol > 10 and $c_dol > 10) { 5279my $pb = $$backup{btags}[0]{pb}; 5280if(defined $pb) { 5281evaluateSequence($pb, 0); 5282if($$btags[0]{dol2} > $dol) { 5283$fmode &= ~($changemode|$firstopmode); 5284apparentSize(); 5285$$btags[0]{pb} = $pb; 5286return 1; 5287} # successful post browse from the parent page 5288} # attempting post browse from the parent page 5289my $p_map = $$backup{btags}[0]{map2}; 5290my $c_map = $map; 5291my $start = 1; 5292my $oneout = 0; 5293while($start <= $p_dol and $start <= $c_dol) { 5294if(!sameChildLine(\$p_map, $start, \$c_map, $start)) { 5295last if $oneout; 5296$oneout = $start; 5297} 5298++$start; 5299} 5300$start = $oneout if $oneout and $start < $oneout + 5; 5301my $delcount = --$start; 5302my $p_end = $p_dol; 5303my $c_end = $c_dol; 5304while($p_end > $start and $c_end > $start) { 5305last unless sameChildLine(\$p_map, $p_end, \$c_map, $c_end); 5306++$delcount; 5307--$p_end, --$c_end; 5308} 5309if($delcount == $dol) { 5310my $ln = substr($map, $lnwidth, $lnwidth1); 5311$text[$ln] = "This web page contains no new information - you've seen it all before."; 5312print "71\n"; 5313$dol = $dot = 1; 5314$labels = $lnspace x 26; 5315$fmode &= ~$firstopmode; 5316return 1; 5317} 5318if($delcount > 5) { 5319++$c_end; 5320delText($c_end, $dol) if $c_end <= $dol; 5321delText(1, $start) if $start; 5322$labels = $lnspace x 26; 5323$fmode &= ~($changemode|$firstopmode); 5324apparentSize(); 5325return 1; 5326} 5327} 5328$errorMsg = "nothing to strip"; 5329return 0; 5330} # stripChild 5331 5332sub sameChildLine($$$$) 5333{ 5334my ($m1, $l1, $m2, $l2) = @_; 5335my $t1 = $text[substr($$m1, $l1*$lnwidth, $lnwidth1)]; 5336my $t2 = $text[substr($$m2, $l2*$lnwidth, $lnwidth1)]; 5337removeHiddenNumbers \$t1; 5338removeHiddenNumbers \$t2; 5339$t1 =~ y/a-zA-Z0-9//cd; 5340$t2 =~ y/a-zA-Z0-9//cd; 5341return ($t1 eq $t2); 5342} # sameChildLine 5343 5344sub unstripChild() 5345{ 5346$fmode&$browsemode or 5347$errorMsg = $nobrowse, return 0; 5348my $dol2 = $$btags[0]{dol2}; 5349$dol2 > $dol or $errorMsg = "nothing stripped from this web page", return 0; 5350# Backing out. 5351$map = $$btags[0]{map2}; 5352$fmode &= ~$firstopmode; 5353$labels = $lnspace x 26; 5354$dot = 1; 5355$dol = $dol2; 5356apparentSize(); 5357return 1; 5358} # unstripChild 5359 5360# Returns the index of the input field to be modified. 5361# Sets $inf to the text of that field. 5362# Sets $itag, $isize, and the other globals that establish an input field. 5363# Returns 0 for no input fields on the line, -1 for some other error. 5364sub infIndex($$) 5365{ 5366my ($ln, $line) = @_; 5367my ($i, $j, $idx); 5368my @fields = (); 5369my @fieldtext = (); 5370# Here's some machinery to remember the index if there's only one 5371# input field of the desired type. 5372my $holdInput = 0; 5373my $t = fetchLine $ln, 0; 5374# Bug in perl mandates the use of the no-op (?=) below. 5375# You'll see this other places in the code too. 5376# This bug was fixed in September 2001, patch 12120. 5377while($t =~ /\x80([\x85-\x8f]+)<(.*?)(?=)\x80\x8f>/g) { 5378$j = revealNumber $1; 5379$i = $2; 5380push @fields, $j; 5381push @fieldtext, $i; 5382$itag = $$btags[$j]; 5383$itype = $$itag{type}; 5384next if $itype eq "area"; 5385if($line =~ /^\d*\*/) { 5386if($itype eq "submit" or $itype eq "reset") { 5387$holdInput = -1 if $holdInput > 0; 5388$holdInput = $#fields+1 if $holdInput == 0; 5389} 5390} else { 5391if($itype ne "submit" and $itype ne "reset") { 5392$holdInput = -1 if $holdInput > 0; 5393$holdInput = $#fields+1 if $holdInput == 0; 5394} 5395} 5396} 5397$j = $#fields + 1; 5398if(!$j) { 5399$errorMsg = "no input fields present" if ! $inglob; 5400return 0; 5401} 5402$idx = -1; 5403$idx = $1 if $line =~ /^(\d+)/; 5404$idx = $holdInput if $holdInput > 0 and $idx < 0; 5405$idx >= 0 or $j == 1 or 5406$errorMsg = "multiple input fields, please use $icmd [1,$j]", return -1; 5407$idx = 1 if $idx < 0; 5408if($idx == 0 or $idx > $j) { 5409$errorMsg = $j > 1 ? 5410"invalid field, please use $icmd [1,$j]" : 5411"line only has one input field"; 5412return -1; 5413} 5414$j = $fields[$idx-1]; 5415$inf = $fieldtext[$idx-1]; 5416$ifield = $idx; 5417$itagnum = $j; 5418$itag = $$btags[$j]; 5419$iline = $ln; 5420$itype = $$itag{type}; 5421$isize = $$itag{size}; 5422$irows = $$itag{rows}; 5423$icols = $$itag{cols}; 5424$iwrap = $$itag{wrap}; 5425$iwrap = "" if ! defined $iwrap; 5426$iwrap = lc $iwrap; 5427$iopt = $$itag{opt}; 5428return $idx; 5429} # infIndex 5430 5431# Get status on an input field, including its options. 5432sub infStatus($) 5433{ 5434my $line = shift; 5435$line =~ s/^\d*\?//; 5436$line = lc $line; 5437print $itype; 5438print "[$isize]" if $isize; 5439if($itype eq "area" and $irows and $icols) { 5440print "[${irows}x$icols"; 5441print " recommended" if $iwrap eq "virtual"; 5442print "]"; 5443} 5444print " many" if defined $$itag{multiple}; 5445print " <$inf>"; 5446my $name = $$itag{name}; 5447print " [$name]" if defined $name and length $name; 5448print "\n"; 5449return unless $itype eq "select"; 5450 5451# Display the options in a pick list. 5452# If a string is given, display only those options containing the string. 5453my $i = 0; 5454my @pieces = (); 5455my $j; 5456foreach my $v (%{$iopt}) { 5457$i ^= 1; 5458$j = $v, next if $i; 5459$_ = $v; 5460next unless s/^(...)[-+]//; 5461next if length $line and index(lc $j, $line) < 0; 5462push @pieces, "$1$j\n"; 5463} 5464if($#pieces < 0) { 5465print(length($line) ? "No options contain the string \"$line\"\n" : 5466"No options found\n"); 5467return; 5468} 5469foreach (sort @pieces) { 5470print((substr($_, 0, 3) + 1) . ": " . substr($_, 3)); 5471last if $intFlag; 5472} 5473} # infStatus 5474 5475# Replace an input field with new text. 5476sub infReplace($) 5477{ 5478my $newtext = shift; 5479my ($i, $j, $k, $t); 5480 5481# Sanity checks on the input. 5482$itype ne "submit" and $itype ne "reset" or 5483$errorMsg = "field is a $itype button, use * to push the button", return 0; 5484$itype ne "area" or 5485$errorMsg = "field is a text area, you must edit it from another session", return 0; 5486not defined $$itag{readonly} or 5487$errorMsg = "readonly field", return 0; 5488$newtext =~ /\n/ and 5489$errorMsg = "input field cannot contain a newline character", return 0; 5490return 0 if lineLimit 2; 5491 5492if($ifield) { 5493my $newlen = length $newtext; 5494! $isize or $newlen <= $isize or 5495$errorMsg = "input field too long, limit $isize", return 0; 5496 5497if($itype eq "checkbox" or $itype eq "radio") { 5498$newtext eq "+" or $newtext eq "-" or 5499$errorMsg = "field requires + (active) or - (inactive)", return 0; 5500$itype eq "checkbox" or $newtext eq '+' or $inf eq '-' or 5501$errorMsg = "at least one radio button must be set", return 0; 5502} 5503} # not from reset button 5504 5505if($itype eq "select") { 5506my @opts = $newtext; 5507@opts = split(',', $newtext) if defined $$itag{multiple}; 5508$newtext = ""; 5509option: 5510foreach my $newopt (@opts) { 5511$newtext .= "," if length $newtext; 5512$j = $$iopt{$newopt}; 5513# If you type in the option exactly, that's grand. 5514$newtext .= $newopt, next if defined $j and length $j; 5515# Maybe it's a menu number. 5516if($newopt =~ /^\d+$/) { 5517$j = sprintf("%03d", $newopt-1); 5518# reverse hash lookup. 5519my $revcnt = 0; 5520my $revkey; 5521foreach (%{$iopt}) { 5522$revcnt ^= 1; 5523$revkey = $_, next if $revcnt; 5524next unless substr($_, 0, 3) eq $j; 5525$newtext .= $revkey; 5526next option; 5527} 5528} else { # menu number conversion 5529# See if this text is a piece of one and only one option. 5530# Or if it is exactly one and only one option. 5531$j = lc $newopt; 5532my $matchCount = 0; 5533my $matchLevel = 0; 5534my $bestopt = ""; 5535foreach $k (keys %{$iopt}) { 5536my $klow = lc $k; # k lower case 5537next unless index($klow, $j) >= 0; 5538if($j eq $klow) { 5539$matchCount = 0, $matchLevel = 2 if $matchLevel < 2; 5540++$matchCount; 5541$bestopt = $k; 5542} else { 5543next if $matchLevel == 2; 5544$matchCount = 0, $matchLevel = 1 unless $matchLevel; 5545++$matchCount; 5546$bestopt = $k; 5547} 5548} 5549$newtext .= $bestopt, next option if $matchCount == 1; 5550$errorMsg = "$j matches more than one entry in the list", return 0 if $matchCount > 1; 5551} 5552$errorMsg = "$newopt is not an option, type i$ifield? for the list"; 5553return 0; 5554} # loop over options in the new list 5555} # select 5556 5557# Definitely making a change. 5558$fmode |= $firstopmode; 5559$ubackup = 1; 5560$dot = $iline; 5561 5562return 1 if $newtext eq $inf; # no change 5563 5564# Find and replace the text. 5565$t = fetchLine $iline, 0; 5566my $itaghidden = hideNumber $itagnum; 5567$t =~ s/\x80$itaghidden<.*?(?=)\x80\x8f>/\x80$itaghidden<$newtext\x80\x8f>/; 5568push @text, $t; 5569substr($map, $iline*$lnwidth, $lnwidth) = 5570sprintf $lnformat, $#text; 5571 5572if($itype eq "radio") { # find and undo the other radio button 5573my $radioname = $$itag{name}; 5574if(defined $radioname and length $radioname) { 5575my $form = $$itag{form}; 5576lineloop: 5577foreach $k (1..$dol) { 5578$t = fetchLine $k, 0; 5579while($t =~ /\x80([\x85-\x8f]+)<\+\x80\x8f>/g) { 5580$jh = $1; 5581$j = revealNumber $1; 5582next if $j == $itagnum; # already changed this one 5583my $h = $$btags[$j]; 5584next unless $$h{form} eq $form; 5585# Input field is part of our form. 5586next unless $$h{type} eq "radio"; 5587my $name = $$h{name}; 5588next unless defined $name and $name eq $radioname; 5589# It's another radio button in our set. 5590$t =~ s/\x80$jh<\+\x80\x8f>/\x80$jh<-\x80\x8f>/; 5591push @text, $t; 5592substr($map, $k*$lnwidth, $lnwidth) = 5593sprintf $lnformat, $#text; 5594last lineloop; 5595} # loop over input fields on this line 5596} # loop over lines 5597} # radio button has a name 5598} # radio 5599 5600return 1; 5601} # infReplace 5602 5603# Push the submit or reset button. 5604sub infPush() 5605{ 5606my $button = $itag; 5607my $buttontype = $itype; 5608$buttontype eq "submit" or $buttontype eq "reset" or 5609$errorMsg = "this is not a submit or reset button", return 0; 5610$cmd = 'b'; # this has become a browse command 5611my $formh = $$itag{form}; 5612defined $formh or 5613$errorMsg = "field is not part of a form", return 0; 5614my $buttonvalue = $inf; 5615$buttonvalue =~ s/ secure$//; 5616$buttonvalue =~ s/ mailform$//; 5617$buttonvalue =~ s/ js$//; 5618my $domail = 0; # sendmail link 5619 5620my $bref = $$formh{bref}; 5621my $action = $$formh{action}; 5622if(! defined $action or ! length $action) { 5623# If no form program is specified, the default is the current url. 5624$action = $bref; 5625$action =~ s/\?.*//; 5626} 5627$domail = 1 if $action =~ s/^mailto://i; 5628# We should check for $form{encoding}. 5629 5630my ($name, $val, $i, $j, $cx, $h, @pieces); 5631my $post = ""; 5632my $origdot = $dot; 5633 5634# Loop over all tags, keeping those in the input form. 5635$itagnum = -1; 5636foreach $h (@$btags) { 5637++$itagnum; 5638next unless $$h{tag} eq "input"; 5639# Overwrite the global input variables, so infReplace will work properly. 5640# $itagnum is already set. 5641$itag = $h; 5642$itype = $$h{type}; 5643$j = $$h{form}; 5644next unless defined $j and $j eq $formh; 5645# Input field is part of our form. 5646$iopt = $$h{opt}; 5647$isize = $$h{size}; 5648 5649if($itag eq $button and $itype eq "submit") { 5650$name = $$button{name}; 5651if(defined $name and length $name) { 5652if($domail) { 5653$post .= "\n" if length $post; 5654} else { 5655$post .= '&' if length $post; 5656$name = urlEncode $name; 5657} 5658if($$button{image}) { 5659$post .= $domail ? 5660"$name.x=\n0\n\n$name.y=\n0\n" : 5661"$name.x=0&$name.y=0"; 5662} else { 5663if(defined $buttonvalue and length $buttonvalue) { 5664if($domail) { 5665$post .= "$name=\n$buttonvalue\n"; 5666} else { 5667$buttonvalue = urlEncode $buttonvalue; 5668$post .= "$name=$buttonvalue"; 5669} 5670} else { 5671$post .= $domail ? 5672"$name=\nSubmit\n" : 5673"$name=Submit"; 5674} 5675} 5676} 5677} # submit button 5678 5679next if $itype eq "reset" or $itype eq "submit"; 5680 5681if($itype eq "hidden") { 5682$inf = $$h{value}; 5683$iline = $ifield = 0; 5684} else { 5685# Establish the line number, field number, and field value. 5686# This is crude and inefficient, but it doesn't happen very often. 5687findField: 5688for($iline=1; $iline<=$dol; ++$iline) { 5689$j = fetchLine $iline, 0; 5690$ifield = 0; 5691while($j =~ /\x80([\x85-\x8f]+)<(.*?)(?=)\x80\x8f>/g) { 5692$i = revealNumber $1; 5693$inf = $2; 5694++$ifield; 5695last findField if $i == $itagnum; 5696} 5697} 5698$iline <= $dol or $errorMsg = "input field $itagnum is lost", return 0; 5699} 5700 5701if($buttontype eq "submit") { 5702if($itype eq "area") { 5703$cx = $$h{cx}; 5704$val = ""; 5705if(defined $factive[$cx] and $dol[$cx]) { 5706# Send all the lines of text in the secondary buffer. 5707for(my $ln=1; $ln<=$dol[$cx]; ++$ln) { 5708$val .= fetchLineContext($ln, 1, $cx); 5709next if $ln == $dol[$cx]; 5710$val .= ($textAreaCR ? $eol : "\n"); 5711} 5712} 5713} else { # text area or field 5714$val = $inf; 5715if($itype eq "radio" or $itype eq "checkbox") { 5716next if $val eq '-'; 5717$val = $$h{saveval}; 5718# I thought it had to say "on"; now I'm not sure. 5719$val = "on" if $itype eq "checkbox" and ! length $val; 5720} # radio 5721} # text area or input field 5722# Turn option descriptions into option codes for transmission 5723if($itype eq "select") { 5724@pieces = $val; 5725@pieces = split ',', $val if defined $$h{multiple}; 5726$val = ""; 5727foreach (@pieces) { 5728$val .= "," if length $val; 5729my $code = $$iopt{$_}; 5730if(defined $code) { 5731$code = substr($code, 4); 5732} else { 5733$code = $_; 5734} 5735$val .= $code; 5736} # loop over options 5737} # select 5738 5739$name = $$h{name}; 5740defined $name or $name = ""; 5741if(! $domail) { 5742# Encode punctuation marks for http transmition 5743$name = urlEncode($name); 5744$name .= '='; 5745$val = urlEncode($val); 5746} 5747if($itype eq "select" and defined $$h{multiple}) { 5748# This is kludgy as hell. 5749# comma has been turned into %2C 5750@pieces = split '%2C', $val; 5751foreach $val (@pieces) { 5752$post .= ($domail ? "\n" : '&') if length $post; 5753$post .= $name; 5754$post .= "\n" if $domail and length $name; 5755$post .= $val; 5756$post .= "\n" if $domail and length $val; 5757} 5758} else { 5759$post .= ($domail ? "\n" : '&') if length $post; 5760$post .= $name; 5761$post .= "\n" if $domail and length $name; 5762$post .= $val; 5763$post .= "\n" if $domail and length $val; 5764} 5765 5766} else { # submit or reset 5767 5768next if $itype eq "hidden"; 5769if($itype eq "area") { 5770$cx = $$h{cx}; 5771cxReset($cx, 2); 5772$factive[$cx] = 1; 5773} else { 5774$ifield = 0; # zero skips some of the field checks in infReplace 5775$val = $$h{value}; 5776infReplace($val); 5777} # field or text area 5778 5779} # submit or reset 5780} # loop over tags 5781 5782$dot = $origdot, return 1 if $buttontype eq "reset"; 5783print "submit: $post\n" if $debug >= 2; 5784 5785length $action or 5786$errorMsg = "form does not specify a program to run", return 0; 5787 5788if($domail) { 5789my $subj = urlSubject(\$action); 5790$subj = "html form" unless length $subj; 5791$post = "Subject: $subj\n\n$post"; 5792print "$action\n"; 5793my @tolist = ($action); 5794my @atlist = (); 5795$mailToSend = "form"; 5796$altattach = 0; 5797$whichMail = $localMail; 5798sendMail(\@tolist, \$post, \@atlist) or return 0; 5799print "Form has been mailed, watch for a reply.\n"; 5800return 1; 5801} # sendmail 5802 5803$line = resolveUrl($bref, $action); 5804print "* $line\n"; 5805$post = ($$formh{method} eq "get" ? '?' : '*') . $post; 5806return -1, $line, $post; 5807} # infPush 5808 5809sub renderMail($) 5810{ 5811my $tbuf = shift; 5812$badenc = $bad64 = 0; 5813$fhLevel = 0; 5814$nat = 0; # number of attachments 5815@mimeParts = (); 5816 5817# Copy lines into @msg. 5818# The original cleanMail routine was built upon @msg, 5819# And when I folded it into edbrowse, I was too lazy to change it. 5820@msg = (); 5821push @msg, fetchLine($_, 0) foreach (1..$dol); 5822 5823findHeaders(0, $#msg); 5824deGreater(); 5825nullForwarding(); 5826--$#msg while $#msg >= 0 and 5827$msg[$#msg] !~ /[a-zA-Z0-9]/; 5828 5829# Last chance to interrupt a browse operation 5830$errorMsg = $intMsg, return 0 if $intFlag; 5831 5832$$tbuf = ""; 5833$$tbuf .= "$_\n" foreach (@msg); 5834chomp $$tbuf if length $$tbuf; 5835$$tbuf =~ y/\x92\x93\x94\xa0\xad/'`' -/; 5836return 1; 5837} # renderMail 5838 5839# Insert this text line between mail headers. 5840$mailBreak = "{NextMailHeader}"; 5841# Insert this text line between mime headers. 5842$mimeBreak = "{NextMimeSection}, Level"; 5843# Max lines in a "to unsubscribe" trailer? 5844$unsHorizon = 7; 5845 5846# Hash the annoying commercials. 5847%annoy = (); 5848if(length $annoyFile) { 5849open FH, $annoyFile 5850or dieq "Cannot open file of annoying commercials $annoyFile."; 5851while(<FH>) { 5852stripWhite \$_; 5853$annoy{lc $_} = "" if length $_; 5854} 5855close FH; 5856} # annoy 5857 5858# Today timestamp, so old "junk" subjects can expire. 5859$junkToday = int time / (60*60*24); 5860$junkHorizon = 14; 5861$oldSubjects = 0; 5862%junkSubjects = (); 5863 5864# Now load the junk subjects, which we aren't interested in reading. 5865if(length $junkFile) { 5866open FH, $junkFile 5867or dieq "Cannot open file of junk subjects $junkFile."; 5868while(<FH>) { 5869s/\n$//; # don't need nl 5870($jtime = $_) =~ s/:.*//; 5871($jsubject = $_) =~ s/^\d+:\s*(.*)\s*$/$1/; 5872if($jsubject =~ /^`/) { 5873$junkSubjects{$jsubject} = $junkToday; 5874} else { 5875$oldSubjects = 1, next if $jtime < $junkToday - $junkHorizon; 5876$junkSubjects{$jsubject} = $jtime; 5877} 5878} 5879close FH; 5880} # junkFile 5881 5882# Add a subject to the junk list. 5883# This updates the junk file. 5884sub markSubject ($) 5885{ 5886my $s = shift; 5887die "No subject to junk." if $s eq ""; 5888$junkSubjects{$s} = $junkToday; 5889if($oldSubjects) { 5890open FH, ">$junkFile" 5891or dieq "Cannot rewrite file of junk subjects $junkFile."; 5892$iskey = 0; 5893foreach (%junkSubjects) { 5894($iskey ^= 1) ? 5895($savekey = $_) : 5896print FH "$_:$savekey\n"; 5897} 5898$oldSubjects = 0; 5899} else { 5900open FH, ">>$junkFile" 5901or dieq "Cannot add to file of junk subjects $junkFile."; 5902print FH "$junkToday:$s\n"; 5903} 5904close FH; 5905} # markSubject 5906 5907# Build an array for base64 decoding. 5908{ 5909my ($j, $c); 5910$c = 'A', $j = 0; 5911$b64_map[ord $c] = $j, ++$c, ++$j until $j == 26; 5912$c = 'a'; 5913$b64_map[ord $c] = $j, ++$c, ++$j until $j == 52; 5914$c = '0'; 5915$b64_map[ord $c] = $j, ++$c, ++$j until $j == 62; 5916$b64_map[ord '+'] = $j++; 5917$b64_map[ord '/'] = $j++; 5918} 5919 5920# The following routine decodes the Quoted-Printable mime standard. 5921# If one line ends in an equals sign, it must be joined to the next. 5922# All other =xx sequences become the 8-bit value defined by hex xx. 5923# Pass the start and end offsets -- what do you want to dequote? 5924# Also pass the boundary, if any, and stop there. 5925sub qp_lowlevel($$$) 5926{ 5927my ($start, $end, $boundary) = @_; 5928print "qp $start-$end<$boundary\n" if $debug >= 6; 5929return if $end < $start; 5930 5931my $tbuf = ""; 5932foreach my $i ($start..$end) { 5933if(length $boundary) { 5934my $line = $msg[$i]; 5935$line =~ s/^[ \t>]*-*//; 5936$line =~ s/-*$//; 5937$end = $i-1, last if $line eq $boundary; 5938} 5939$msg[$i] =~ s/[ \t]+$//; 5940$tbuf .= $msg[$i]."\n"; 5941} 5942chomp $tbuf; 5943print "qp ends at $end, length " . length($tbuf) . "\n" if $debug >= 6; 5944 5945# Now undo quoted-printable encoding. 5946# Use global substitutions on the concatenated texts, it's faster. 5947my $join = 5948$tbuf =~ s/=\n//g; 5949$join = 0 unless $join; 5950print "qp joins $join lines\n" if $debug >= 6; 5951$tbuf =~ s/=([0-9a-fA-F]{2})/chr hex "$1"/ge; 5952# Split the text back into lines, and back into @msg. 5953$end -= $join; 5954if(length $tbuf) { 5955@msg[$start.. $end] = split("\n", $tbuf, $end-$start+1); 5956} else { # split problem 5957$msg[$start] = ""; # probably was "" already 5958} 5959# Fill the empty spaces with blank lines 5960$msg[++$end] = '' while $join--; 5961} # qp_lowlevel 5962 5963# If the message includes any lines with leading > signs, 5964# break paragraphs whenever the nesting level changes. 5965# A change in the number of > symbols indicates a different speaker, 5966# hence a new paragraph. 5967# But watch out! 5968# Some mail intermediaries cut long lines, 5969# leaving a dangling fragment without a > nesting level. 5970# This fragment does not represent a new paragraph; 5971# it is part of the previous sentence. 5972# But watch watch out! 5973# Some people deliberately interject short comments, as in: 5974# 5975# > I really think the Tigers are the hotest baseball team ever, really great, 5976# Horse feathers! 5977# > a team to look up to. 5978# 5979# There is virtually no way to distinguish between the two cases. 5980# Most of the time a short fragment in the midst of indented text 5981# actually belongs to the previous line, 5982# so I treat it as such and hope for the best. 5983sub deGreater() 5984{ 5985my (@nestlev, @newmsg, $i, $j, $state, $temp); 5986my $lastsubject = ""; 5987 5988# Push some blank lines, to avoid eof conditions. 5989push @msg, '', '', ''; 5990 5991# Establish the nest level of each line. 5992foreach (@msg) { 5993$temp = $_; 5994# Count > signs. 5995$temp =~ s/^([ \t>]*).*/$1/; 5996$j = $temp =~ y/>/>/; 5997push @nestlev, $j; 5998} 5999 6000my $lastlev = 0; 6001my $newlev; 6002for($i=0; $i<=$#msg; ++$i) { 6003$newlev = $nestlev[$i]; 6004 6005# Let's get right at the tricky part, a drop in level. 6006# It's a fragment if the next line, or line after, 6007# has the same nest level as the previous line. 6008if($newlev < $lastlev and 6009(($nextlev = $nestlev[$i+1]) == $lastlev or 6010$nextlev < $lastlev && $nestlev[$i+2] == $lastlev)) { 6011$temp = $msg[$i]; 6012$temp =~ s/^[ \t>]*//; 6013if($j = length $temp) { 6014if($nextlev == $lastlev) { 6015$newmsg[$#newmsg] .= " $temp"; 6016next; 6017} 6018# It must be that the line after next has the previous nest level. 6019my $temp2 = $msg[$i+1]; 6020$temp2 =~ s/^[ \t>]*//; 6021if($j = length $temp2) { 6022$newmsg[$#newmsg] .= " $temp $temp2"; 6023++$i; 6024next; 6025} # next line is nonempty 6026} # this line is nonempty 6027 6028$newlev = $lastlev if $j == 0 and $nextlev == $lastlev; 6029} # bracketed between larger nest levels 6030 6031if($msg[$i] =~ /^$mailBreak/o) { 6032$newlev = $ lastlev = 0; 6033push @newmsg, $mailBreak; 6034my ($subject, $from, $date, $reply); 6035$temp = $msg[$i]; 6036($subject = $temp) =~ s/\n.*//s if $temp =~ s/.*\nSubject: //s; 6037($from = $temp) =~ s/\n.*//s if $temp =~ s/.*\nFrom: //s; 6038($date = $temp) =~ s/\n.*//s if $temp =~ s/.*\nDate: //s; 6039($reply = $temp) =~ s/\n.*//s if $temp =~ s/.*\nReply-to: //s; 6040if(defined $subject or defined $from or defined $date or defined $reply) { 6041if($#newmsg == 0) { # Read the first header differently. 6042push @newmsg, "Subject: $subject" if defined $subject; 6043push @newmsg, "From $from" if defined $from; 6044} else { 6045$temp = "Message"; 6046if(defined $from) { 6047$temp .= " from $from,"; 6048if(defined $subject) { 6049$temp .= ( 6050$subject eq $lastsubject ? 6051" same subject." : 6052" with subject, $subject."); 6053} else { 6054$temp .= " with no subject."; 6055} # subject or not 6056} else { 6057if(defined $subject) { 6058$temp .= ( 6059$subject eq $lastsubject ? 6060" with the same subject." : 6061" with subject, $subject."); 6062} else { 6063$temp .= " with no subject."; 6064} # subject or not 6065} # from line or not 6066push @newmsg, $temp; 6067} # top header or internal 6068push @newmsg, "Mail sent $date" if defined $date; 6069push @newmsg, "Reply to $reply" if defined $reply; 6070push @newmsg, "" if $#newmsg; 6071} 6072$subject = "" if ! defined $subject; 6073$lastsubject = $subject; 6074next; 6075} # mail header 6076 6077if($newlev != $lastlev) { 6078push @newmsg, "", "Indent $newlev."; 6079} # change in level 6080 6081# Strip off leading > 6082$temp = $msg[$i]; 6083$temp =~ s/^[ \t]*>[ \t>]*//; 6084push @newmsg, $temp; 6085$lastlev = $newlev; 6086} # loop over lines 6087 6088# Push a mime separater on, to make the unsubscribe test work. 6089push @newmsg, "$mimeBreak 1"; 6090 6091# Now put the lines back into @msg, compressing blank lines. 6092# Also, Try to remove any "unsubscribe" trailers. 6093$#msg = -1; 6094my $unslast = -1; 6095my $unscount = 0; 6096my $unstest; 6097$state = 1; 6098$j = 0; 6099 6100foreach my $line (@newmsg) { 6101# Check for "to unsubscribe" 6102if($line =~ /^ *to unsubscribe/i) { 6103$unslast = $j if $unslast < 0 or $unscount > $unsHorizon; 6104$unscount = 0; 6105} # unsubscribe line 6106 6107# Check for mime/mail separater. 6108$unstest = 0; 6109$temp = lc $line; 6110$temp =~ s/\s+$//; 6111if($line =~ /^$mimeBreak \d/o or 6112$line eq $mailBreak or 6113defined $annoy{$temp} or 6114$temp =~ /^-+\s*original message\s*-+\s*$/) { 6115$line = ""; # no need to read that 6116$unstest = 1; 6117} 6118 6119$unstest = 1 if $line =~ /^$mailBreak/o; 6120 6121if($line =~ /^Indent \d/) { 6122$unstest = 1; 6123if($j > 0 and $msg[$j-1] =~ /^Indent \d/) { 6124--$j, --$#msg; 6125$unstest = 0; 6126} # sequential indents 6127} # indent line 6128 6129if($unstest and $unslast >= 0 and $unscount <= $unsHorizon) { 6130# Remove unsubscribe section 6131$j = $unslast - 1; 6132$unslast = -1; 6133--$j while $j >= 0 and 6134$msg[$j] !~ /[a-zA-Z0-9]/; 6135$#msg = $j; 6136++$j; 6137$state = ($j == 0); 6138next; 6139} # crunching unsubscribe 6140 6141if(length $line) { 6142++$unscount if $line =~ /[a-zA-Z0-9]/; 6143$msg[$j++] = $line; 6144$state = 0; 6145$state = 1 if $line =~ /^Indent \d/; 6146} elsif(! $state) { 6147$msg[$j++] = $line; 6148$state = 1; 6149} 6150} # loop over lines 6151 6152--$j; 6153--$j if $j >= 0 and $state; 6154$#msg = $j; 6155} # deGreater 6156 6157# No need to read vacuous forwardings. 6158sub nullForwarding() 6159{ 6160my $lf = -1; # last forwarding 6161my $j = 0; 6162foreach my $line (@msg) { 6163if($line =~ /^Message/) { 6164$j = $lf if $lf >= 0 and $j - $lf <= 4; 6165$lf = $j; 6166} 6167$msg[$j++] = $line; 6168} # loop over lines 6169--$j; 6170$#msg = $j; 6171} # nullForwarding 6172 6173# Decide whether a line, and eventually a paragraph, is an email header. 6174# Realize that these headers might be pasted in almost anywhere. 6175# They don't always appear at the top of the message, 6176# or even the top of a mime section. 6177# They may even be indented, or prepended with leading greater than signs, 6178# if a mail message is manually forwarded, or pasted inside a larger 6179# mail message. 6180# We willhowever assume that a header block, once begun, 6181# continues until we reach a blank line. 6182# If you've manually pasted a header and body together, sorry, 6183# but the body is going to get thrown away. 6184# This routine is recursive, so make sure the appropriate variables are auto. 6185# Pass the start and end offsets -- a sub-message inside the entire message. 6186sub findHeaders($$) ; 6187sub findHeaders($$) 6188{ 6189my ($start, $end) = @_; 6190++$fhLevel; 6191my $startLine = -1; 6192my $boundaryCut = ""; 6193my ($i, $j, $temp, $line, $state); 6194my ($reply, $from, $subject, $date); 6195my ($boundary, $content, $encoding, $encfile); 6196my $expand64 = 0; 6197 6198$line = $msg[$start]; 6199if($line =~ s/^$mailBreak.*\nboundary=//so) { 6200$line =~ s/\n$//; 6201$boundaryCut = $line; 6202} 6203 6204print "findheaders$fhLevel $start-$end<$boundaryCut\n" if $debug >= 6; 6205 6206foreach $i ($start..$end) { 6207$line = $msg[$i]; 6208 6209# Strip away whitespace and leading greater than signs. 6210$line =~ s/^[ \t>]+//; 6211$line =~ s/\s+$//; 6212 6213# Are we expanding binary data? 6214if($expand64) { 6215$expand64 = 0 if $line eq ""; 6216if(length $boundaryCut and $expand64) { 6217$temp = $line; 6218$temp =~ s/^-+//; 6219$temp =~ s/-+$//; 6220$expand64 = 0 if $temp eq $boundaryCut; 6221} 6222if($expand64) { 6223my ($c, $leftover, $rem); 6224# We don't really need the padding equals to run the algorithm properly. 6225# Sometimes it ends in =9 I don't know what that means! 6226$line =~ s/=+9*$//; 6227if($line =~ y;+/a-zA-Z0-9;;cd && !$bad64) { 6228warn "Invalid base64 encoding at line $i"; 6229$bad64 = 1; 6230} 6231for($j=0; $j < length $line; ++$j) { 6232$c = $b64_map[ord substr($line,$j,1)]; 6233$rem = $j & 3; 6234if($rem == 0) { 6235$leftover = $c<<2; 6236} elsif($rem == 1) { 6237$$curPart{data} .= chr($leftover | ($c>>4)); 6238$leftover = ($c & 0xf) <<4; 6239} elsif($rem == 2) { 6240$$curPart{data} .= chr($leftover | ($c>>2)); 6241$leftover = ($c & 3) <<6; 6242} else { 6243$$curPart{data} .= chr($leftover | $c); 6244} 6245} 6246$msg[$i] = ""; 6247next; 6248} 6249} 6250 6251# Look for mailKeyWord: 6252# We check for a header until we have established a boundary, 6253# and then we only crack the header at the start of each section. 6254if(($startLine >= 0 or # inside a header 6255! length $boundaryCut or # no boundary yet 6256$msg[$i-1] eq "$mimeBreak $fhLevel") # top of the mime section 6257and 6258$line =~ /^\$?[a-zA-Z][\w-]*:/) { # keyword: 6259if($startLine < 0) { 6260$startLine = $i; 6261$state = 0; 6262$reply = $from = $subject = $date = ""; 6263$boundary = $content = $encoding = $encfile = ""; 6264} 6265($headKey = $line) =~ s/:.*//; 6266$headKey = lc $headKey; 6267($headVal = $line) =~ s/^[^:]+:\s*//; 6268my $headKeyType = $mhWords{$headKey}; 6269$state |= $headKeyType if defined $headKeyType; 6270if($headVal ne "") { 6271$from = $headVal if $headKey eq "from"; 6272$reply = $headVal if $headKey eq "reply-to"; 6273$subject = $headVal if $headKey eq "subject"; 6274$date = $headVal if $headKey eq "date"; 6275$date = $headVal if $headKey eq "sent"; 6276if($headKey eq "content-transfer-encoding") { 6277$encoding = lc $headVal; 6278} 6279if($headKey eq "content-type") { 6280$content = lc $headVal; 6281$content =~ s/;.*//; 6282} 6283} # something after keyword: 6284} # keyword: mail/mime header line 6285 6286if($startLine >= 0) { 6287# boundary= is a special attribute within a mail header 6288$temp = $line; 6289if($temp =~ s/.*boundary *= *//i) { 6290($temp =~ s/^"//) ? 6291($temp =~ s/".*//) : 6292($temp =~ s/,.*//); 6293$boundary = $temp; 6294$boundary =~ s/^-+//; 6295$boundary =~ s/-+$//; 6296$boundaryCut = $boundary if length $boundary and ! length $boundaryCut; 6297} # boundary keyword detected 6298# filename is similarly set. 6299$temp = $line; 6300if($temp =~ s/.*(?:file)?name *= *//i) { 6301($temp =~ s/^"//) ? 6302($temp =~ s/".*//) : 6303($temp =~ s/,.*//); 6304$encfile = $temp; 6305} 6306 6307} else { 6308 6309next if ! length $boundaryCut; 6310# Strip away leading and trailing hyphens -- helps us look for boundary 6311$line =~ s/^-+//; 6312$line =~ s/-+$//; 6313next if $line ne $boundaryCut; 6314$msg[$i] = "$mimeBreak $fhLevel"; 6315next; 6316} # body 6317 6318# Now we know we're inside a mail header. 6319next if length $line; 6320 6321# We've got a blank line -- that ends the header. 6322# But it's not really a header if we've just got English keywords. 6323if($state&5) { 6324 6325if(length $boundary) { 6326# Skip the preamble. 6327foreach $j ($i+1..$#msg) { 6328$temp = $msg[$j]; 6329$temp =~ s/^-+//; 6330$temp =~ s/-+$//; 6331last if $temp eq $boundary; 6332$msg[$j] = ""; 6333} 6334} 6335 6336# Handle the various encodings. 6337$encoding = "" if length $boundary and $startLine == $start; 6338$encoding = "" if $encoding eq "8bit" or $encoding eq "7bit" or $encoding eq "binary"; 6339if($encoding eq "quoted-printable") { 6340qp_lowlevel($i+1, $end, $boundaryCut); 6341$encoding = ""; 6342} 6343if($encoding eq "base64") { # binary attachment 6344$expand64 = 1; 6345$curPart = { data => "", filename => $encfile, isattach => 1}; 6346push @mimeParts, $curPart; 6347++$nat; 6348$encoding = ""; 6349} 6350if($encoding and !$badenc) { 6351warn "Unknown encoding at line $i $encoding"; 6352$badenc = 1; 6353} 6354 6355$j = $startLine; 6356if($state & 4 or length $boundary) { 6357# Process from/reply lines. 6358$reply = $from if ! length $reply; 6359$from = $reply if ! length $from; 6360$from =~ s/".*// if $from =~ s/^"//; 6361$from =~ s/\s*<.*>.*$//; 6362$reply =~ s/^.*<(.*)>.*$/$1/; 6363$reply = "" 6364if length $reply and ( 6365$reply =~ /[\s<>]/ or $reply !~ /\w@\w/ or $reply !~ /\w\.\w/); 6366# Strip away re: and fwd: 6367while($subject =~ s/^(re|fd|fwd)[,:]\s*//i) { } 6368 6369$mailSubject = $subject, 6370$mailFrom = $from, 6371$mailReply = $reply, 6372$mailDate = $date 6373if $startLine == 0; # top of the message 6374 6375# Consolodate the header. 6376$line = "$mailBreak\n"; 6377$line .= "Subject: $subject\n" if length $subject; 6378$line .= "From: $from\n" if length $from; 6379$line .= "Date: $date\n" if length $date; 6380$line .= "Reply-to: $reply\n" if length $reply; 6381$line .= "boundary=$boundary\n" if length $boundary; 6382$msg[$j++] = $line; 6383} # mail header 6384$msg[$j++] = "" while $j <= $i; 6385 6386# Decode html, if specified in the header. 6387# Or turn it into an attachment, if anything other than plain text. 6388if(length $content and ! $expand64) { 6389if($content eq "text/html" or length $encfile) { 6390mailHtml($i+1, $end, $startLine-1, $boundaryCut, $encfile); 6391$content = "text/plain"; 6392} 6393} 6394 6395} # mail or mime header 6396 6397$startLine = -1; 6398} # loop over lines in the message 6399 6400if(length $boundaryCut) { 6401# Still more work to do. 6402# Reprocess each section. 6403$boundary = "$mimeBreak $fhLevel"; 6404$j = -1; 6405foreach $i ($start..$end) { 6406next unless $msg[$i] eq $boundary; 6407findHeaders($j+1, $i-1) if $j >= 0; 6408$j = $i; 6409} # loop over lines 6410} # bounhdary encountered 6411 6412--$fhLevel; 6413} # findHeaders 6414 6415# process an html mime section within a mail message. 6416sub mailHtml($$$$$) 6417{ 6418my ($start, $end, $breakLine, $boundary, $filename) = @_; 6419return if $end < $start; # should never happen 6420my ($i, $line); 6421 6422my $tbuf = ""; 6423 6424foreach $i ($start..$end) { 6425$line = $msg[$i]; 6426$line =~ s/^[ \t>]*//; 6427 6428# boundary may end this section. 6429if(length $boundary) { 6430my $temp = $line; 6431$temp =~ s/^-+//; 6432$temp =~ s/-+$//; 6433$end = $i-1, last if $temp eq $boundary; 6434} 6435 6436$tbuf .= "$line\n"; 6437$msg[$i] = ""; 6438} # loop over lines 6439 6440if(length $filename) { # present as attachment 6441$curPart = { data => $tbuf, filename => $filename, isattach => 1}; 6442push @mimeParts, $curPart; 6443++$nat; 6444return; 6445} 6446 6447my $cx = cxCreate(\$tbuf, $filename); 6448my $precx = $context; 6449cxSwitch $cx, 0; 6450readyUndo(); 6451# $tbuf still holds the html attachment 6452$badHtml = 1; 6453renderHtml(\$tbuf) and 6454pushRenderedText(\$tbuf); 6455cxSwitch $precx, 0; 6456 6457++$cx; 6458print "switch to session $cx for the html version of this mail\n" unless $ismc; 6459} # mailHtml 6460 6461# Connect to the mail server. 6462sub pop3connect($$) 6463{ 6464my $remote = shift; 6465my $port = shift; 6466my $iaddr = inet_aton($remote) or 6467$errorMsg = "cannot locate the mail server $remote", return 0; 6468my $paddr = sockaddr_in($port, $iaddr); 6469my $proto = getprotobyname('tcp'); 6470socket(SERVER_FH, PF_INET, SOCK_STREAM, $proto) or 6471$errorMsg = "Cannot establish TCP socket", return 0; 6472connect(SERVER_FH, $paddr) or 6473$errorMsg = "Cannot connect to mail server $remote", return 0; 6474SERVER_FH->autoflush(1); 6475return 1; 6476} # pop3connect 6477 6478# Put and get lines from the mail server. 6479sub serverPutLine ($) 6480{ 6481my $line = shift; 6482if($debug >= 7) { 6483my $t = $line; 6484$t =~ s/\r\n/\n/g; 6485print "$t\n"; 6486} 6487print SERVER_FH $line.$eol or 6488$errorMsg = "Could not write to the mail socket", return 0; 6489return 1; 6490} # serverPutLine 6491 6492sub serverGetLine() 6493{ 6494defined($serverLine = <SERVER_FH>) 6495or $errorMsg = "could not read from the mail socket", return 0; 6496# strip trailing newline indicator(s) 6497$serverLine =~ s/[\r\n]+$//; 6498print "< $serverLine\n" if $debug >= 7; 6499return 1; 6500} # serverGetLine 6501 6502sub serverClose($) 6503{ 6504my $scheme = shift; 6505# Should we make $scheme global instead of passing it around? 6506if($scheme =~ /(smtp|pop3)/i) { 6507serverPutLine("quit"); 6508} elsif ($scheme =~ /ftp/i) { 6509serverPutLine "abor${eol}quit"; 6510# Nope, abor is not a typo. 6511my @disposeOf = <SERVER_FH>; 6512close FDFH if defined FDFH; 6513close FLFH if defined FLFH; 6514} 6515sleep 2; 6516close SERVER_FH; 6517} # serverClose 6518 6519# This subroutine was taken from MIME::Base64 by Gisle Aas. 6520sub encodeBase64($$$) 6521{ 6522my($in, $eol, $out) = @_; 6523my $inl = length $$in; 6524# uuencode is pretty close 6525$$out = pack 'u', $$in; 6526# get rid of first and last char 6527$$out =~ s/^.//; 6528chop $$out; 6529# Get rid of newlines inside 6530$$out =~ s/\n.//g; 6531# Over to base 64 char set 6532 $$out =~ tr|` -_|AA-Za-z0-9+/|; 6533 # fix padding at the end 6534 my $padding = (3 - $inl%3) % 3; 6535 $$out =~ s/.{$padding}$/'=' x $padding/e if $padding; 6536 # break encoded string into lines of no more than 76 characters each 6537 if (length $eol) { 6538 $$out =~ s/(.{1,72})/$1$eol/g; 6539 } 6540} # encodeBase64 6541 6542# Read the file into memory, mime encode it, 6543# and return the type of encoding and the encoded data. 6544# Last three parameters are result parameters. 6545sub encodeAttachment($$$$$) 6546{ 6547my ($atfile, $isMail, $res_enc, $res_type, $res_data) = @_; 6548my ($subline, $buffer, $fsize, $rsize); 6549 6550if(!$isMail) { 6551if($atfile =~ /^\d+$/) { # edbrowse session 6552my $cx = $atfile - 1; 6553$buffer = ""; 6554for(my $ln=1; $ln<=$dol[$cx]; ++$ln) { 6555$buffer .= fetchLineContext($ln, 1, $cx); 6556$buffer .= "\n" if $ln < $dol[$cx]; 6557} 6558$fsize = $rsize = length $buffer; 6559} else { 6560open FH, $atfile or 6561$errorMsg = "cannot open attachment file $atfile,$!", return 0; 6562binmode FH, ':raw' if $doslike; 6563$fsize = (stat(FH))[7]; 6564$rsize = 0; 6565$buffer = ""; 6566$rsize = sysread(FH, $buffer, $fsize) if $fsize; 6567close FH; 6568$rsize == $fsize or 6569$errorMsg = "cannot read the contents of $atfile,$!", return 0; 6570} 6571} else { 6572$buffer = $$atfile; 6573# We just made a copy of the mail to send; hope it wasn't too big. 6574$atfile = $mailToSend; 6575$fsize = $rsize = length $buffer; 6576$buffer =~ s/^\s*subject\s*:\s*/Subject: /i or 6577$errorMsg = "$atfile does not begin with a line `Subject: subject of your mail'", return 0; 6578$buffer =~ s/\r\n/\n/g; 6579$buffer .= "\n" if substr($buffer, -1) ne "\n"; 6580$buffer .= ':'; # temporary 6581# Extra blank line after subject. 6582$buffer =~ s/^(.*\n)(.)/$1\n$2/; 6583$buffer =~ /^(.*)\n/; 6584$subline = $1; 6585substr($buffer, -1) = ""; # get rid of : 6586length $subline < 90 or 6587$errorMsg = "subject line too long, limit 80 characters", return 0; 6588} # primary mail message 6589 6590my $newbuf = ""; 6591my ($c, $col, $j, $ctype, $enc); 6592 6593# Count nonascii characters. 6594my $nacount = $buffer =~ y/\x80-\xff/\x80-\xff/; 6595# Count null characters. 6596my $nullcount = $buffer =~ y/\0/\0/; 6597$nacount += $nullcount; 6598 6599if($nacount*5 > $fsize and $fsize > 20) { 6600! $isMail or 6601$errorMsg = "cannot mail the binary file $atfile - perhaps this should be an attachment?", return 0; 6602 6603encodeBase64(\$buffer, "\n", \$newbuf); 6604 6605$ctype = "application/octet-stream"; 6606$ctype = "application/PostScript" if $atfile =~ /\.ps$/i; 6607$ctype = "image/jpeg" if $atfile =~ /\.jpeg$/i; 6608$ctype = "image/gif" if $atfile =~ /\.gif$/i; 6609$ctype = "audio/basic" if $atfile =~ /\.wav$/i; 6610$ctype = "video/mpeg" if $atfile =~ /\.mpeg$/i; 6611$enc = "base64"; 6612$$res_type = $ctype; 6613$$res_enc = $enc; 6614$$res_data = $newbuf; 6615return 1; 6616} # base 64 encode 6617 6618# Use the filename of the edbrowse session to determine type. 6619if($atfile =~ /^\d+$/) { 6620$atfile = $fname[$atfile-1]; 6621} 6622$ctype = "text/plain"; 6623$ctype = "text/html" if $atfile =~ /\.(htm|html|shtml|asp)$/i; 6624$ctype = "text/richtext" if $atfile =~ /\.rtf$/i; 6625 6626# Switch to unix newlines - we'll switch back to dos later. 6627$buffer =~ s/\r\n/\n/g; 6628$fsize = length $buffer; 6629 6630if($nacount*20 < $fsize) { 6631# Looks like it's almost all ascii, but we still have to switch to qp 6632# if the lines are too long. 6633$col = 0; 6634for($j =0; $j < $fsize; ++$j) { 6635$c = substr $buffer, $j, 1; 6636$col = 0, next if $c eq "\n"; 6637++$col; 6638$nacount = $fsize, last if $col > 500 or $col > 120 and ! $isMail; 6639} 6640} 6641 6642if($nullcount or $nacount*20 >= $fsize) { 6643$buffer =~ s/([^\t\n-<>-~])/sprintf("=%02X", ord $1)/ge; 6644$buffer =~ s/ $/=20/m; 6645$buffer =~ s/\t$/=09/m; 6646# Cut long lines, preferably after a space, but wherever we can. 6647$fsize = length $buffer; 6648my $spaceCol = 0; 6649$col = 0; 6650for($j =0; $j < $fsize; ++$j) { 6651$c = substr $buffer, $j, 1; 6652$newbuf .= $c; 6653if($c eq "\n") { # new line, column 0 6654$spaceCol = $col = 0; 6655next; 6656} 6657++$col; 6658if($c eq " " || $c eq "\t") { 6659$spaceCol = length $newbuf; 6660} 6661next if $col < 72; 6662# Don't break an = triplet. 6663next if $c eq '='; 6664next if substr($newbuf, -2, 1) eq '='; 6665# If we're near the end, don't worry about it. 6666next if $j == $fsize - 1; 6667# If newline's coming up anyways, don't force another one. 6668$c = substr $buffer, $j+1, 1; 6669next if $c eq "\n"; 6670# Ok, it's a long line, we need to cut it. 6671$spaceCol = length $newbuf if ! $spaceCol; 6672substr($newbuf, $spaceCol, 0) = "=\n"; 6673$spaceCol += 2; 6674$col = length($newbuf) - $spaceCol; 6675$spaceCol = 0; 6676} 6677 6678if($isMail) { 6679# Don't qp the subject. 6680$newbuf =~ s/^.*/$subline/; 6681} 6682 6683$enc = "quoted-printable"; 6684$$res_type = $ctype; 6685$$res_enc = $enc; 6686$$res_data = $newbuf; 6687return 1; 6688} 6689 6690# Almost all ascii, short lines, no problems. 6691$enc = ($nacount ? "8bit" : "7bit"); 6692$$res_type = $ctype; 6693$$res_enc = $enc; 6694$$res_data = $buffer; 6695return 1; 6696} # encodeAttachment 6697# Don't forget to turn lf into crlf before you send this on to smtp. 6698 6699# Send mail to the smtp server. 6700# sendMail(recipients, mailtext, attachments) 6701# Everything passed by reference. 6702sub sendMail($$$) 6703{ 6704my ($tolist, $main, $atlist) = @_; 6705length $outmailserver or 6706$errorMsg = "No mail server specified - check your $home/.ebprc file", return 0; 6707 6708my $proto = 'smtp'; 6709my $reply = $replyAddress[$whichMail]; 6710$altattach == 0 or $altattach == $#$atlist+1 or 6711$errorMsg = 'either none or all of the attachments must be declared "alternative"', return 0; 6712 6713# Read and/or refresh the address book. 6714if(length $addressFile and -e $addressFile) { 6715my $newtime = (stat($addressFile))[9]; 6716if($newtime > $adbooktime) { 6717%adbook = (); 6718$adbooktime = $newtime; 6719my ($alias, $email); 6720open FH, $addressFile or 6721$errorMsg = "Cannot open address book $addressFile.", return 0; 6722while(<FH>) { 6723s/\n$//; # don't need nl 6724next if /^\s*#/; # comment line 6725next if /^\s*$/; # blank line 6726($alias = $_) =~ s/:.*//; 6727($email = $_) =~ s/^[^:]*:([^:]*).*/$1/; 6728$adbook{$alias} = $email; 6729} 6730close FH; 6731} 6732} 6733 6734# Resolve recipients against address book. 6735foreach my $who (@$tolist) { 6736next if $who =~ /@/; 6737my $real = $adbook{$who}; 6738if(defined $real and length $real) { 6739# Remember that $who is a by reference variable, being in the for loop. 6740$who = $real; 6741next; 6742} 6743length $addressFile or 6744$errorMsg = "No address book specified - check your $home/.ebprc file", return 0; 6745$errorMsg = "alias $who not found in your address book"; 6746return 0; 6747} 6748 6749# Verify attachments are readable. 6750foreach my $f (@$atlist) { 6751if($f =~ /^\d+$/) { 6752my $cx = $f - 1; 6753cxCompare($cx) or return 0; 6754defined $factive[$cx] and $dol[$cx] or 6755$errorMsg = "session $f is empty - cannot atach", return 0; 6756} else { 6757-r $f or 6758$errorMsg = "cannot access attachment $f", return 0; 6759} 6760} 6761 6762my $mustmime = $#$atlist + 1; 6763my ($sendEnc, $sendType, $sendData); 6764encodeAttachment($main, 1, \$sendEnc, \$sendType, \$sendData) or return 0; 6765$mustmime = 1 if $sendEnc =~ /^q/; 6766 6767# Boundary, for sending attachments. 6768my $sendBound = rand; 6769$sendBound =~ s/^0./nextpart-domail/; 6770 6771# Looks good - let's get going. 6772pop3connect($outmailserver, 25) or return 0; 6773 6774normal: { 6775serverGetLine() or last normal; 6776while($serverLine =~ /^220-/) { 6777serverGetLine() or last normal; 6778} 6779$serverLine =~ /^220 / or 6780$errorMsg = "Unexpected prompt <$serverLine> at the start of the sendmail session", last normal; 6781 6782serverPutLine "helo $smtplogin" or last normal; 6783serverGetLine() or last normal; 6784$serverLine =~ /^250 / or 6785$errorMsg = "The mail server doesn't recognize $smtplogin", last normal; 6786 6787serverPutLine "mail from: $reply" or last normal; 6788serverGetLine() or last normal; 6789$serverLine =~ /^250 / or 6790$errorMsg = "mail server rejected $reply <$serverLine>", last normal; 6791 6792my $reclist = ""; # list of recipients 6793my $reccount = 0; # count recipients 6794foreach my $f (@$tolist) { 6795$f = "\"$f\"" if $f =~ /[^\w,.@=_-]/; 6796$reclist .= ", " if $reccount; 6797++$reccount; 6798$reclist .= $f; 6799serverPutLine "rcpt to: $f" or last normal; 6800serverGetLine() or last normal; 6801$serverLine =~ /^250 / or 6802$errorMsg = "mail server rejected $f <$serverLine>", last normal; 6803} # loop over recipients 6804 6805serverPutLine "data" or last normal; 6806serverGetLine() or last normal; 6807$serverLine =~ /^354 / or 6808$errorMsg = "The mail server is not ready to accept email data <$serverLine>", last normal; 6809serverPutLine "To: $reclist$eol" . 6810"From: $myname <$reply>$eol" . 6811"Reply-To: $myname <$reply>$eol" . 6812"Date: " . mailTimeString() . $eol . 6813"Mime-Version: 1.0" or last normal; 6814 6815# dot alone tells smtp we're done. 6816# Make sure there isn't a dot line in the middle of the mail. 6817$sendData =~ s/^\.$/ ./gm; 6818# serverPutLine() routine already adds the last newline. 6819substr($sendData, -1) = "" if substr($sendData, -1) eq "\n"; 6820# smtp requires crlf. 6821$sendData =~ s/\n/\r\n/g; 6822 6823if(! $mustmime) { 6824serverPutLine "Content-Type: $sendType$eol" . 6825"Content-Transfer-Encoding: $sendEnc" or last normal; 6826} else { 6827$sendData =~ s/^(.*\r\n)// or 6828$errorMsg = "could not pull subject line out of sendData", last normal; 6829my $subline = $1; 6830serverPutLine $subline . 6831"Content-Type: multipart/" . 6832($altattach ? "alternative" : "mixed") . 6833"; boundary=$sendBound$eol" . 6834"Content-Transfer-Encoding: 7bit$eol" . 6835$eol . 6836"This message is in MIME format. Since your mail reader does not understand$eol" . 6837"this format, some or all of this message may not be legible.$eol" . 6838$eol . 6839"--$sendBound$eol" . 6840"Content-Type: $sendType$eol" . 6841"Content-Transfer-Encoding: $sendEnc" or last normal; 6842} 6843serverPutLine $sendData or last normal; 6844 6845if($mustmime) { 6846foreach my $f (@$atlist) { 6847encodeAttachment($f, 0, \$sendEnc, \$sendType, \$sendData) or last normal; 6848serverPutLine "$eol--$sendBound$eol" . 6849"Content-Type: $sendType" . 6850# If the filename has a quote in it, forget it. 6851# Also, suppress filename if this is an alternative presentation. 6852# Also, suppress filename if you pulled it out of an edbrowse session. 6853(($altattach or $f =~ /"/ or $f =~ /^\d+$/) ? 6854"" : "; name=\"$f\"") . $eol . 6855"Content-Transfer-Encoding: $sendEnc$eol" or last normal; 6856 6857$sendData =~ s/^\.$/ ./gm; 6858substr($sendData, -1) = "" if substr($sendData, -1) eq "\n"; 6859$sendData =~ s/\n/\r\n/g; 6860serverPutLine $sendData or last normal; 6861} # loop over attachments 6862# Last boundary. 6863serverPutLine "$eol--$sendBound--" or last normal; 6864} # mime parts 6865 6866serverPutLine "." or last normal; 6867serverGetLine() or last normal; 6868$serverLine =~ /message (accepted|received)/i or 6869$serverLine =~ /^250/ or 6870$errorMsg = "Could not send mail message <$serverLine>", last normal; 6871serverClose($proto); 6872return 1; 6873} # normal processing 6874 6875close SERVER_FH; 6876return 0; # failed 6877} # sendMail 6878 6879# Send the current session as outgoing mail. 6880sub sendMailCurrent() 6881{ 6882dirBrowseCheck("send mail") or return 0; 6883$fmode&$binmode and $errorMsg = "cannot mail a binary file - should this be an attachment?", return 0; 6884$dol or $errorMsg = "cannot mail an empty file", return 0; 6885$whichMail = $localMail; 6886 6887# Gather recipients and attachments, until we reach subject: 6888my @tolist = (); 6889my @atlist = (); 6890my ($ln, $t); 6891my $subject = 0; 6892for($ln=1; $ln<=$dol; ++$ln) { 6893$t = fetchLine $ln, 0; 6894$t =~ s/^reply[ -]to:* /to:/i; 6895$t =~ s/^mailto:/to:/i; 6896push(@tolist, $1), next if $t =~ /^to\s*:\s*(.*?)[ \t]*$/i; 6897if($t =~ /^(attach|alt)\s*:\s*(.*?)[ \t]*$/i) { 6898$altattach++ if lc($1) eq "alt"; 6899push(@atlist, $2); 6900next; 6901} 6902$whichMail = $1, next if $t =~ /^account\s*:\s*(\d+)[ \t]*$/i; 6903$subject = 1 if $t =~ /^subject\s*:/i; 6904last; 6905} 6906$whichMail = $smMail if length $smMail; 6907$subject or $errorMsg = "line $ln, should begin with to: attach: or subject:", return 0; 6908$#tolist >= 0 or $errorMsg = "no recipients specified - place `To: emailAddress' at the top of your file", return 0; 6909$whichMail <= $#inmailserver or $errorMsg = "account $whichMail is out of range", return 0; 6910 6911my $tbuf = ""; 6912$tbuf .= fetchLine($_, 0) . "\n" foreach ($ln..$dol); 6913$mailToSend = "buffer"; 6914return sendMail(\@tolist, \$tbuf, \@atlist); 6915} # sendMailCurrent 6916 6917 6918# runtime code starts here. 6919# Think of this code as being inside main(){} 6920 6921if($doslike) { 6922# Buffered I/O messes me up when this runs on NT, over telnet. 6923STDOUT->autoflush(1); 6924# The shell doesn't expand wild cards, let's do it here. 6925my @arglist = (); 6926push @arglist, glob($_) foreach (@ARGV); 6927@ARGV=@arglist; 6928} 6929 6930if($#ARGV >= 0 and $ARGV[0] eq "-v") { 6931print "$version\n"; 6932exit 0; 6933} 6934 6935# debug option 6936if($#ARGV >= 0 and $ARGV[0] =~ /^-d(\d*)$/) { 6937$debug = (length $1 ? $1 : 4); 6938shift @ARGV; 6939} 6940 6941# error exit option 6942if($#ARGV >= 0 and $ARGV[0] eq '-e') { 6943$errorExit = 1; 6944shift @ARGV; 6945} 6946 6947# -m is a special flag; run as a mail client. 6948if($#ARGV >= 0 and $ARGV[0] =~ /^-(u?)m(\d+)$/) { 6949$ismc = 1; # running as a mail client 6950my $unformat = length $1; 6951my $account = $2; 6952shift @ARGV; 6953$#inmailserver >= 0 or 6954dieq "there are no mail accounts in your .ebprc config file."; 6955$account <= $#inmailserver or 6956dieq "account designator $account is out of range."; 6957$whichMail = $account; 6958my @atfiles = (); 6959my $mailBuf = ""; 6960 6961if($#ARGV == 0 and $ARGV[0] eq "-Zap") { 6962$zapmail = 1; 6963shift @ARGV; 6964} 6965 6966while($#ARGV>= 0) { 6967my $arg = pop @ARGV; 6968if($arg =~ s/^([-+])//) { 6969++$altattach if $1 eq '-'; 6970open FH, $arg or 6971dieq "cannot access attachment $arg."; 6972close FH; 6973unshift @atfiles, $arg; 6974} else { 6975$mailToSend = $arg; 6976open FH, $mailToSend 6977or dieq "Cannot access send file $mailToSend."; 6978dieq "Send file $mailToSend has zero size." if -z FH; 6979binmode FH, ':raw' if $doslike; 6980my $fsize = (stat(FH))[7]; 6981my $rsize = sysread(FH, $mailBuf, $fsize); 6982close FH; 6983$rsize == $fsize or 6984dieq "cannot read the contents of $mailToSend,$!"; 6985last; 6986} 6987} # loop looking for files to transmit 6988 6989if(length $mailToSend or $#atfiles >= 0) { 6990# Mail client is in send mode. 6991length $mailToSend or 6992dieq "all arguments are attachments - you must include a plain send file."; 6993$#ARGV >= 0 or dieq "No recipients specified."; 6994sendMail(\@ARGV, \$mailBuf, \@atfiles) or dieq $errorMsg; 6995exit 0; 6996} # send mail 6997 6998# Move to the mail directory. 6999length $mailDir or dieq "mailbox directory not specified in your .ebprc file."; 7000chdir $mailDir or dieq "Cannot change directory to $mailDir."; 7001 7002# Now fetch the mail and process it, 7003# and ask the user what to do with it. 7004# Begin with the pop3 login/password sequence. 7005my $proto = "pop3"; 7006pop3connect($inmailserver[$whichMail], 110) or dieq $errorMsg; 7007serverGetLine(); 7008$serverLine =~ /^\+OK / 7009or dieq "Unexpected pop3 introduction <$serverLine>."; 7010my $login = $pop3login[$whichMail]; 7011my $password = $pop3password[$whichMail]; 7012serverPutLine("user $login"); 7013serverGetLine(); 7014# perhaps we require a password? 7015if($password) { 7016serverPutLine("pass $password"); 7017serverGetLine(); 7018} # sending password 7019$serverLine =~ /^\+OK/ 7020or dieq "Could not complete the pop3 login/password sequence <$serverLine>."; 7021 7022# determine number of messages 7023serverPutLine("stat"); 7024serverGetLine(); 7025$serverLine =~ /^\+OK / 7026or dieq "Could not obtain status information on your mailbox <$serverLine>."; 7027my $nmsgs = substr($serverLine, 4); 7028$nmsgs =~ s/ .*//; 7029 7030if(!$nmsgs) { 7031print "No mail\n"; 7032serverClose($proto); 7033exit 0; 7034} 7035 7036my $mailHuge = "Mail message consumes more than a million lines; you won't be able to use this client."; 7037print "$nmsgs messages\n"; 7038if($zapmail) { 7039$nmsgs = 300 if $nmsgs > 300; 7040} 7041 7042# Iterate over messages. 7043foreach my $m (1..$nmsgs) { 7044my ($filename, $j, $curpart, $rendered); 7045# Is this mail automatically going somewhere else? 7046my $redirect = ""; 7047my $delFlag = 0; 7048 7049if($zapmail) { 7050$delFlag = 1; 7051} else { 7052 7053# Clear out the editor before we read in the next message. 7054foreach $j (0..$#factive) { 7055cxReset $j, 1; 7056} 7057$context = 0; # probably not necessary 7058$factive[0] = 1; # mail goes into session 0 7059$#text = 1; 7060$text[0] = ""; 7061$text[1] = "--------------------------------------------------------------------------------"; 7062 7063# retrieve the entire mth message from the server. 7064serverPutLine("retr $m"); 7065my $exact_msg = ""; # an exact copy of the email 7066# Throw first line away, it's from the pop3 server, not part of the mail. 7067serverGetLine(); 7068$j = 1; 7069serverGetLine(); 7070while($serverLine ne ".") { 7071$exact_msg .= "$serverLine\n"; 7072lineLimit 1 and dieq $mailHuge; 7073push @text, $serverLine; 7074++$j; 7075$map .= sprintf($lnformat, $j); 7076serverGetLine(); 7077} 7078$dot = $dol = $j-1; 7079 7080if(not $unformat) { 7081# Browse the mail message for display. 7082$btags[0] = $btags = []; 7083$$btags[0] = {tag => "special", fw => {} }; 7084$badHtml = 1; 7085$mailSubject = $mailFrom = $mailReply = $mailDate = ""; 7086renderMail(\$rendered) and pushRenderedText(\$rendered) or 7087dieq $errorMsg; 7088$rendered = undef; # don't need it any more 7089 7090# Break the lines in the buffer. 7091$fmode &= ~$browsemode; # so I can run the next command 7092evaluate(",bl"); 7093$errorMsg = ""; 7094$dot = $dol; 7095$fmode |= $browsemode; 7096 7097# Let user know about attachments. 7098my $unat = 0; # unnamed attachments 7099my $exat = 0; # attachment already exists 7100if($nat) { 7101print "$nat attachments.\n"; 7102$j = 0; 7103foreach $curPart (@mimeParts) { 7104next unless $$curPart{isattach}; 7105++$j; 7106$filename = $$curPart{filename}; 7107++$unat, next unless length $filename; 7108print "$j = $filename"; 7109if(-e $filename) { 7110print " exists"; 7111$exat = 1; 7112} 7113print "\n"; 7114} 7115} 7116 7117# Paste on the html segments. 7118foreach $j (1..$#factive) { 7119next unless $factive[$j]; 7120next unless $dol[$j]; 7121$map .= sprintf($lnformat, 0) if $dol; 7122if($dol > 4) { 7123$map .= sprintf($lnformat, 1); 7124$map .= sprintf($lnformat, 0); 7125} 7126$map .= substr($map[$j], $lnwidth); 7127$dot = $dol = length($map)/$lnwidth - 1; 7128} 7129foreach my $t (@text) { 7130removeHiddenNumbers \$t; 7131} 7132 7133# See if the mail is redirected. 7134if(length $mailReply and $#fromSource >= 0) { 7135my $lowReply = lc $mailReply; 7136foreach my $j (0..$#fromSource) { 7137next unless index($lowReply, $fromSource[$j]) >= 0; 7138$redirect = $fromDest[$j]; 7139last; 7140} 7141} 7142 7143# I'm not going to redirect mail if there are unamed or existing attachments. 7144$redirect = "" if $redirect ne "x" and $unat + $exat; 7145} # formatting the mail message 7146 7147my $dispLine = 1; 7148if(length $redirect) { 7149$delFlag = 1; 7150# Replace % date/time fields. 7151if($redirect =~ /%[ymdhns]{2,}/) { 7152my ($ss, $nn, $hh, $dd, $mm, $yy) = localtime time; 7153$mm++; 7154$yy += 1900; 7155$redirect =~ s/%yyyy/sprintf "%4d", $yy/ge; 7156$redirect =~ s/%yy/sprintf "%02d", $yy%100/ge; 7157$redirect =~ s/%mm/sprintf "%02d", $mm/ge; 7158$redirect =~ s/%dd/sprintf "%02d", $dd/ge; 7159$redirect =~ s/%hh/sprintf "%02d", $hh/ge; 7160$redirect =~ s/%nn/sprintf "%02d", $nn/ge; 7161$redirect =~ s/%ss/sprintf "%02d", $ss/ge; 7162} 7163print "$mailReply > $redirect\n"; 7164} 7165 7166# display the next page of mail and get an input character. 7167dispInput: { 7168if(! $delFlag) { 7169print("skipped\n"), $delFlag = 1, last if ! $unformat and length $mailSubject and defined $junkSubjects{$mailSubject}; 7170foreach $j (keys %junkSubjects) { 7171next unless $j =~ /^`/; 7172my $trash = $j; 7173$trash =~ s/^`//; 7174next unless index($exact_msg, $trash) >= 0; 7175print("trash\n"), $delFlag = 1, last dispInput; 7176} 7177if($dispLine <= $dol) { 7178foreach $j (1..20) { 7179last if $dispLine > $dol; 7180my $line = fetchLine $dispLine, 0; 7181# Don't print date and return address, but they will be recorded, 7182# if you save the file. 7183next if $line =~ /^Mail sent /; 7184next if $line =~ /^Reply to /; 7185print "$line\n"; 7186} continue { ++$dispLine; } 7187} # display next page 7188} # not being deleted 7189 7190getkey: { 7191my $key; 7192if($delFlag) { 7193last if $redirect eq "x"; 7194$key = 'w'; 7195} else { 7196# Interactive prompt depends on whether there is more text or not. 7197STDOUT->autoflush(1); 7198print ($dispLine > $dol ? "? " : "* "); 7199STDOUT->autoflush(0); 7200 7201$key = userChar("qx? nwkuJdA"); 7202print "\b\b"; 7203 7204exit 0 if $key eq 'x'; 7205print("quit\n"), serverClose($proto), exit 0 if $key eq 'q'; 7206print("next\n"), last dispInput if $key eq 'n'; 7207print("delete\n"), $delFlag = 1, last dispInput if $key eq 'd'; 7208 7209if($key eq ' ') { 7210print "End of message\n" if $dispLine > $dol; 7211redo dispInput; 7212} 7213 7214if($key eq '?') { 7215print "?\tprint this help message. 7216q\tquit this program. 7217x\texit without changing anything on the mail server. 7218space\tread more of this mail message. 7219n\tmove on to the next mail message. 7220A\tadd the sender to your address book. 7221d\tdelete this message. 7222J\tjunk this subject, and delete any mail with this subject. 7223w\twrite this message to a file and delete it. 7224k\tkeep this message in a file, but don't delete it. 7225u\twrite this message unformatted to a file, and delete it.\n"; 7226redo; 7227} 7228 7229if($key eq 'J') { 7230print "No subject to junk\n", redo if $mailSubject eq ""; 7231print "No junkfile specified in your .ebprc file\n", redo unless length $junkFile; 7232print "junk\n"; 7233markSubject($mailSubject); 7234$delFlag = 1; 7235last dispInput; 7236} # J 7237 7238if($key eq 'A') { 7239print "No addressbook specified in your .ebprc file\n", redo unless length $addressFile; 7240print "Cannot establish sender's name and/or email address.", redo unless length $mailFrom and length $mailReply; 7241open FH, ">>$addressFile" 7242or dieq "Cannot append to $addressFile."; 7243$_ = lc $mailFrom; 7244s/\s/./g; 7245print "$_:$mailReply\n"; 7246print FH "$_:$mailReply\n"; 7247close FH; 7248redo; 7249} # A 7250} # delFlag or not 7251 7252# At this point we're saving the mail somewhere. 7253$delFlag = 1 if $key ne 'k'; 7254 7255if(length $redirect) { 7256$filename = $redirect; 7257} else { 7258$filename = getFileName(undef, 0); 7259} 7260if($filename ne "x") { 7261my $append = (-e $filename); 7262open FH, ">>$filename" 7263or dieq "Cannot create mail file $filename."; # should not happen 7264my $fsize = 0; 7265if($key eq 'u'or $unformat) { 7266print FH $exact_msg 7267or dieq "Cannot write to mail file $filename."; 7268$fsize = length $exact_msg; 7269} else { 7270foreach $j (1..$dol) { 7271my $line = fetchLine $j, 0; 7272print FH "$line\n" 7273or dieq "Cannot write to mail file $filename."; 7274$fsize += length($line) + 1; 7275} 7276} 7277close FH; 7278print "mail saved, $fsize bytes"; 7279print " appended" if $append; 7280print "\n"; 7281} 7282 7283if($key ne 'u' and $redirect ne 'x') { 7284# Ask the user about any attachments. 7285$j = 0; 7286foreach $curPart (@mimeParts) { 7287next unless $$curPart{isattach}; 7288++$j; 7289$filename = $$curPart{filename}; 7290if(length $redirect) { 7291print "attach $filename\n"; 7292} else { 7293print "Attachment $j "; 7294$filename = getFileName($filename, 1); 7295next if $filename eq "x"; 7296} 7297open FH, ">$filename" 7298or dieq "Cannot create attachment file $filename."; 7299binmode FH, ':raw' if $doslike; 7300print FH $$curPart{data} 7301or dieq "Cannot write to attachment file $filename."; 7302close FH; 7303} # loop over attachments 7304} # key other than 'u' 7305 7306} # input key 7307} # display and input 7308} # interactive or zap 7309 7310if($delFlag) { # Delete the message. 7311# Remember, it isn't really gone until you quit the session. 7312# So if you didn't want to delete, type x to exit abruptly, 7313# then fetch your mail again. 7314serverPutLine("dele $m"); 7315serverGetLine() or 7316dieq "Sorry, you took too long; mail server hung up."; 7317$serverLine =~ /^\+OK/ 7318or dieq "Unable to delete message <$serverLine>."; 7319} # Del 7320 7321} # loop over messages 7322 7323print "$nmsgs\n" if $zapmail; 7324 7325serverClose($proto); # that's all folks! 7326exit 0; 7327} # end mail client 7328 7329# Initial set of commands. 7330if($commandList{init}) { 7331evaluateSequence($commandList{init}, $commandCheck{init}); 7332} 7333 7334# Process the command line arguments. 7335foreach my $cx (0..$#ARGV) { 7336my $file = $ARGV[$cx]; 7337cxSwitch($cx, 0) if $cx; 7338$changeFname = ""; 7339my $rc = readFile($file, ""); 7340print "$filesize\n"; 7341$rc or print $errorMsg,"\n"; 7342$fname = $file; 7343$fname = $changeFname if length $changeFname; 7344$fmode &= ~($changemode|$firstopmode); 7345if($rc and $filesize and is_url($fname)) { 7346# Go ahead and browse it. 7347$inglob = $intFlag = 0; 7348$filesize = -1; 7349$rc = evaluate("b"); 7350print "$filesize\n" if $filesize >= 0; 7351$rc or print "$errorMsg\n"; 7352} # open of url 7353} # loop over args on the command line 7354cxSwitch(0, 0) if $context; 7355print "edbrowse ready\n" if ! length $fname; 7356 7357# get user commands. 7358while(1) { 7359my $line = readLine(); 7360my $saveLine = $line; 7361$inglob = 0; 7362$intFlag = 0; 7363$filesize = -1; 7364my $rc = evaluate($line); 7365print "$filesize\n" if $filesize >= 0; 7366if(!$rc) { 7367print ((($helpall or $cmd =~ /[$showerror_cmd]/o) ? $errorMsg : "?"), "\n"); 7368exit 1 if $errorExit; 7369} 7370$linePending = $saveLine; 7371if($ubackup) { 7372$lastdot = $savedot, $lastdol = $savedol; 7373$lastmap = $savemap, $lastlabels = $savelabels; 7374$ubackup = 0; 7375} 7376} # infinite loop 7377 7378#********************************************************************* 7379# The following code is written and maintained by Chris Brannon, 7380# cbrannon@wilnet1.com 7381# It manages secure http and ftp connections. 7382#********************************************************************* 7383 7384sub do_ssl($$$$) 7385{ 7386# Do the SSL thing. This takes four arguments: server, port, message, 7387# and buffer reference. 7388# <message> is a scalar containing http headers. <buffer reference> is 7389# a reference to a scalar. We tack each chunk of received data onto that 7390# scalar. Thusly, we don't have to return a variable containing twenty 7391# MB of data. 7392# I borrow heavily from Karl's plain http connection code. 7393unless(eval { require Net::SSLeay }) { 7394$errorMsg = "you must have the Net::SSLeay module and OpenSSL toolkit to speak https", return 0; 7395} 7396# Should I error-check these values? I don't know. Probably. 7397my $server = shift; 7398my $port = shift; 7399my $message = shift; 7400my $bufref = shift; 7401my $iaddr = inet_aton($server) or 7402$errorMsg = "Cannot identify $server on the network", return 0; 7403my $paddr = sockaddr_in($port, $iaddr); 7404my $proto = getprotobyname('tcp'); 7405socket(FH, PF_INET, SOCK_STREAM, $proto) or 7406$errorMsg = "cannot allocate a socket", return 0; 7407connect(FH, $paddr) or 7408$errorMsg = "cannot connect to $server on $port: $!", return 0; 7409Net::SSLeay::load_error_strings(); 7410Net::SSLeay::SSLeay_add_ssl_algorithms(); 7411Net::SSLeay::randomize(); 7412$ctx = Net::SSLeay::CTX_new(); 7413Net::SSLeay::CTX_set_options($ctx, &Net::SSLeay::OP_ALL); 7414if($ssl_verify) { 7415Net::SSLeay::CTX_load_verify_locations($ctx, $ebcerts, '') or 7416$errorMsg = "Error opening certificate file $ebcerts: $!", return 0; 7417Net::SSLeay::CTX_set_verify($ctx, &Net::SSLeay::VERIFY_PEER, 0); 7418} 7419# Should the user be warned somehow when SSL certificate verification has 7420# been turned off? Accepting unverifiable certificates can be a security 7421# risk. But some servers, like https://listman.redhat.com can't be verified 7422# with my certificate bundle. So I make verification the default, but optional. 7423$ssl = Net::SSLeay::new($ctx); 7424Net::SSLeay::set_fd($ssl, fileno(FH)) or 7425$errorMsg = Net::SSLeay::ERR_error_string(Net::SSLeay::ERR_get_error()), return 0; 7426if(Net::SSLeay::connect($ssl) == -1) { 7427$errorMsg = Net::SSLeay::ERR_error_string(Net::SSLeay::ERR_get_error()); 7428return 0; 7429} 7430Net::SSLeay::ssl_write_all($ssl, $message) or 7431$errorMsg = &Net::SSLeay::ERR_error_string(&Net::SSLeay::ERR_get_error()), return 0; 7432my ($chunk, $filesize, $rsize, $last_fk, $fk); 7433$fk = $last_fk = 0; 7434STDOUT->autoflush(1) if ! $doslike; 7435while($chunk = Net::SSLeay::ssl_read_all($ssl, 100000)) { 7436$$bufref .= $chunk; # how cute! 7437$rsize = length($chunk); # Is this computationally expensive?? 7438$filesize += $rsize; 7439last if $rsize == 0; 7440$fk = int($filesize / 100000); 7441if($fk > $last_fk) { 7442print "."; 7443$last_fk = $fk; 7444} 7445last if($filesize >= $maxfile); 7446} 7447close(FH); 7448print "\n" if $last_fk > $fk; 7449STDOUT->autoflush(0) if ! $doslike; 7450$filesize <= $maxfile or 7451$errorMsg = "file is too large, limit 40MB", return 0; 7452defined $rsize or 7453$errorMsg = "error reading data from the socket", return 0; 7454# There's no way to distinguish between a read error and reading a zero 7455# length file. I guess that's ok. 7456if(defined($filesize)) { 7457return $filesize; 7458} else { 7459return 0; 7460} 7461} # do_ssl 7462 7463sub ftp_connect($$$$) 7464{ 7465my($host, $port, $path, $bufref) = @_; 7466my $proto = 'ftp'; 7467my ($tempbuf, @disposeOf); 7468my $filesize = 0; 7469my $login = "anonymous"; 7470my $password = 'some-user@edbrowse.net'; 7471my $dataOpen = ( 7472$passive ? \&pasvOpen : \&ftpListen); 7473if($host =~ s/^([^:@]*):([^:@]*)@//) { 7474$login = $1, $password = $2; 7475} 7476# Do an ftp connect, prompting for username & password. 7477my $iaddr = inet_aton($host) or 7478$errorMsg = "cannot identify $host on the network", return 0; 7479my $paddr = sockaddr_in($port, $iaddr); 7480socket(SERVER_FH, PF_INET, SOCK_STREAM, getprotobyname('tcp')) or 7481$errorMsg = "cannot allocate a socket", return 0; 7482connect(SERVER_FH, $paddr) or 7483$errorMsg = "cannot connect to $host", return 0; 7484SERVER_FH->autoflush(1); 7485STDOUT->autoflush(1) if !$doslike; 7486serverGetLine or serverClose($proto), return 0; 7487serverClose($proto), return 0 if ftpError($serverLine, 220, "server sent \"$serverLine\" while attempting login"); 7488serverPutLine "user $login"; 7489do { 7490serverGetLine or serverClose($proto), return 0; 7491} 7492while($serverLine =~ /^220/); 7493serverClose($proto), return 0 if ftpError($serverLine, 331, "invalid username: server sent $serverLine"); 7494serverPutLine "pass $password"; 7495serverGetLine or serverClose($proto), return 0; 7496serverClose($proto), return 0 if ftpError($serverLine, 230, "bad password: server sent $serverLine"); 7497my $wmsg = ""; # welcome message 7498if($serverLine =~ s/^\s*230\s*?-\s*//) { 7499# We got a welcome message. 7500$wmsg = "$serverLine\n"; 7501while(serverGetLine) { 7502last if $serverLine =~ /^\s*230\s*?[^-]/; 7503$serverLine =~ s/^\s*230\s*?-\s*//; 7504$wmsg .= "$serverLine\n"; 7505} 7506} 7507$wmsg = "" unless $path eq "/"; # trash the welcome message, we're going somewhere else 7508serverPutLine "CWD $path"; 7509serverGetLine or serverClose($proto), return 0; 7510if($serverLine =~ /^\s*250\s*/) { 7511if($serverLine =~ s/^\s*250\s*?-\s*//) { 7512# Its a directory-specific greeting. 7513$wmsg = "$serverLine\n"; 7514while(serverGetLine) { 7515last if $serverLine =~ /^\s*250\s*?[^-]/; 7516$serverLine =~ s/^\s*250\s*?-\s*//; 7517$wmsg .= "$serverLine\n"; 7518} 7519} 7520serverPutLine "type a"; 7521serverGetLine or serverClose($proto), return 0; 7522serverClose($proto), return 0 if ftpError($serverLine, 200, "ASCII transfers not supported by server: received \"$serverLine\""); 7523&$dataOpen or 7524serverClose($proto), return 0; 7525serverPutLine "list"; 7526serverGetLine or serverClose($proto), return 0; 7527serverClose($proto), return 0 if ftpError($serverLine, "150", "error retrieving directory listing"); 7528$tempbuf = ""; 7529ftpRead(\$tempbuf) or 7530serverClose($proto), return 0; 7531ParseList: { 7532serverPutLine "syst"; 7533serverGetLine or serverClose($proto), return 0; 7534if($serverLine =~ /unix/i) { 7535# Good. Let's try to htmlize this file listing. 7536my $base_filename = "ftp://$host"; 7537$base_filename .= ":$port" if $port != 21; 7538$base_filename .= $path; 7539$base_filename .= '/' if $base_filename !~ m,/$,; 7540# Yah, I know. That looks disgusting. 7541textUnmeta(\$wmsg); 7542$$bufref = "http/1.0 200 ok$eol$eol<html><head><title>Directory Listing</title>\n</head>\n<body>\n$wmsg<ul>\n"; 7543my @lines = split("$eol", $tempbuf); 7544shift(@lines); # Ditch the "total: xxx" line from Unix ls 7545foreach $line (@lines) { 7546# Extract the filename and length from ls -l format 7547my @listItems = split /\s+/, $line; 7548my $mode = $listItems[0]; 7549my $extracted = $listItems[$#listItems]; 7550my $extlen = $listItems[$#listItems-4]; 7551$extlen = "/" if $mode =~ /^d/; 7552$$bufref .= "<li><a href=\"$base_filename$extracted\">$extracted</a> $extlen\n"; 7553} 7554$$bufref .= "</ul>\n</body>\n</html>\n"; 7555$$bufref =~ s/<ul>\n<\/ul>/This ftp directory is empty./; 7556$filesize = length($$bufref); 7557} else { 7558$$bufref = $tempbuf; # Oh well... 7559} 7560serverPutLine "quit"; 7561@disposeOf = <SERVER_FH>; 7562close SERVER_FH; 7563return 0 if !$filesize; 7564return $filesize; 7565} # ParseList 7566} else { 7567# Try to retr. If unable, the path was bogus. 7568serverPutLine "type i"; 7569serverGetLine or serverClose($proto), return 0; 7570serverClose($proto), return 0 if ftpError($serverLine, 200, "binary transfers unsupported by server: received \"$serverLine\""); 7571&$dataOpen or 7572serverClose($proto), return 0; 7573serverPutLine "retr $path"; 7574serverGetLine or serverClose($proto), return 0; 7575serverClose($proto), return 0 if ftpError($serverLine, "150", 7576"the path you specified in this URL is neither a filename nor a directory"); 7577# Let's read our data. 7578$filesize = ftpRead($bufref); 7579serverClose($proto); 7580# The problem is, the ftp server will get an extraneous abor command when 7581# we close connection. I only want these sent after an error condition, to 7582# abort a transfer. 7583return 0 if !$filesize; 7584return $filesize; 7585} 7586} # ftp_connect 7587 7588sub ftpRead($) 7589{ 7590# I don't like the fact that this subroutine returns 0 on error. Seems wrong. 7591my $bufref = shift; 7592my $rsize = 0; 7593my $filesize = 0; 7594my $last_fk = 0; 7595my $chunk; 7596my $proto = 'ftp'; 7597if(!$passive) { 7598my $check = ''; 7599vec($check, fileno(FLFH), 1) = 1; 7600select($check, undef, undef, 10) or 7601$errorMsg = "ftp data connection timed out", $filesize = 0, goto Cleanup; 7602socket(FDFH, PF_INET, SOCK_STREAM, getprotobyname('TCP')) or 7603$errorMsg = "unable to allocate a socket", $filesize = 0, goto Cleanup; 7604accept(FDFH, FLFH); 7605shutdown(FDFH, 1); 7606} 7607while(defined($rsize = sysread(FDFH, $chunk, 100000))) { 7608print "sockread $rsize\n" if $debug >= 5; 7609$$bufref .= $chunk; 7610$filesize += $rsize; 7611last if $rsize == 0; 7612my $fk = int($filesize / 100000); 7613if($fk > $last_fk) { 7614print "."; 7615$last_fk = $fk; 7616} 7617last if $filesize >= $maxfile; 7618} 7619my $line; 7620serverGetLine or return 0; 7621close FDFH; 7622close FLFH; 7623# ignore it; it should read 226 transfer complete 7624print "\n" if $last_fk; 7625defined($rsize) or 7626$errorMsg = "error reading data from the socket", $filesize = 0, goto Cleanup; 7627$filesize <= $maxfile or 7628$errorMsg = "file to large: 4-1M limit", $filesize = 0, goto Cleanup; 7629$filesize > 0 or 7630$errorMsg = "empty file", $filesize = 0, goto Cleanup; 7631Cleanup: { 7632close FDFH if defined FDFH; 7633close FLFH if defined FLFH; 7634return $filesize; 7635} 7636} # ftpRead 7637 7638sub ftpError($$$) 7639{ 7640# This subroutine matches an ftp response against an status code. The code can 7641# be specified as a regexp. So, 25[0-9] as the status code will let us match 7642# any of the 25X status codes. 7643# It returns 1 on error. This subroutine used to do cleanup, but 7644# I'm leaving this job to the main ftp subroutine. 7645my ($input, $statcode, $errmsg) = @_; 7646$errorMsg = $errmsg, return 1 if($input !~ /^\s*$statcode/); 7647return 0; 7648} # ftpError 7649 7650sub pasvOpen() 7651{ 7652my ($line, $packed_ftpaddr, $ipaddr, $port); 7653serverPutLine "pasv"; 7654serverGetLine or return 0; 7655return 0 if ftpError($serverLine, '227', "server doesn't support passive mode: received \"$serverLine\""); 7656if($serverLine =~ /([0-9]+,[0-9]+,[0-9]+,[0-9]+,[0-9]+,[0-9]+)/) { 7657$packed_ftpaddr = pack("C6", split(',', $1)); 7658} else { 7659$errorMsg = "cannot make ftp data connection: server sent \"$serverLine\""; 7660return 0; 7661} 7662$ipaddr = substr($packed_ftpaddr, 0, 4); 7663$port = unpack("n", substr($packed_ftpaddr, 4, 2)); 7664# The address for ftp data connections is written this way: 7665# 127,0,0,1,100,100 7666# We turn those decimal notations into a packed string of unsigned chars.. 7667# The first four characters are the IP address in network byte order. They 7668# are fed directly to sockaddr_in. The last two are unpacked as an 7669# unsigned short in NBO. The new decimal representation is fed to sockaddr_in. 7670my $saddr = sockaddr_in($port, $ipaddr); 7671socket(FDFH, PF_INET, SOCK_STREAM, getprotobyname('tcp')) or 7672$errorMsg = "cannot allocate a socket", return 0; 7673connect(FDFH, $saddr) or 7674$errorMsg = "cannot open ftp data connection", return 0; 7675shutdown(FDFH, 1); # Hmm. My server hangs if this doesn't happen... 7676return 1; 7677} # pasvOpen 7678 7679sub ftpListen { 7680my $ctladdr = (sockaddr_in(getsockname(SERVER_FH)))[1]; 7681$errorMsg = "unable to obtain address of control connection; cannot initiate data connection", 7682return 0 if !$ctladdr; 7683my $port = int(rand(64510) + 1025); 7684socket(FLFH, PF_INET, SOCK_STREAM, getprotobyname('tcp')) or 7685$errorMsg = "unable to allocate a socket", return 0; 7686my $saddr = sockaddr_in($port, $ctladdr); 7687bind(FLFH, $saddr) or 7688$errorMsg = "unable to bind socket: port $port $!", return 0; 7689listen(FLFH, 1) or 7690$errorMsg = "unable to listen on ftp data socket", return 0; 7691serverPutLine sprintf("port %d,%d,%d,%d,%d,%d", unpack('C4', $ctladdr), $port >> 8, $port & 255); 7692serverGetLine or return 0; 7693return 0 if ftpError($serverLine, '200', "ftp server does not support port command, received \"$serverLine\""); 7694shutdown(FLFH, 1); 7695return 1; 7696} # ftpListen 7697 7698 7699 7700# Cookie support 7701sub setCookies($$) 7702{ 7703# We only support Netscape-style cookies presently. The newer style will 7704# be supported eventually. It offers some functionality that Netscape's 7705# doesn't. 7706my $cookie = shift; 7707print "incoming cookie: $cookie\n" if $debug >= 4; 7708$cookie =~ s/^Set-Cookie:\s+//i; 7709stripWhite \$cookie; 7710return unless length $cookie; 7711my $url_desc = shift; 7712my @cook_array = split(';', $cookie); 7713# We should have the cookie into its component parts. 7714my($name, $value, $path, $domain, $expires, $secure); 7715($name, $value) = split('=', shift(@cook_array), 2); 7716$value = "" unless defined $value; 7717my $crumb; 7718while($crumb = shift(@cook_array)) { 7719stripWhite \$crumb; 7720$crumb = "secure=" if $crumb =~ /^secure$/i; 7721if($crumb =~ s/^domain=//i) { 7722# Do some work on $crumb to protect us from general maliciousness/idiocy. 7723my $workingserver = $$url_desc{SERVER}; 7724next unless $$url_desc{SERVER} =~ /\Q$crumb\E$/i; 7725my $l = length $crumb; 7726next if length($workingserver) > $l and substr($crumb, 0, 1) ne '.' and substr($workingserver, -$l-1, 1) ne '.'; 7727# We simply won't use a bogus domain attribute. We ignore it, and the domain 7728# eventually is set to the default. 7729# In other words, we don't want somebody sending us a cookie for all of .com. 7730my $numfields = $crumb =~ y/././; 7731++$numfields unless substr($crumb, 0, 1) eq '.'; 7732if($crumb =~ /\.(net|com|gov|edu|mil|org|int|tv|bus)$/i) { 7733# One nasty regexp, oh well. Domain attributes from these domains may 7734# have a minimum of two fields. 7735next if $numfields < 2; 7736} else { 7737# Everyone else needs three fields. 7738next if $numfields < 3; 7739} 7740$domain = $crumb; 7741} elsif($crumb =~ s/^path=//i) { 7742$path = $crumb; 7743} elsif($crumb =~ s/^expires=?\s*//i) { 7744# Squeeze a time_t out of the date string, hopefully! If not, then "-1" 7745# is used as the date, so the cookie will expire on quit. 7746$expires = cookieDate($crumb); 7747} elsif($crumb =~ s/^max-age=?\s*//i) { 7748if($crumb =~ /^\d+$/ and not defined $expires) { 7749$expires = time() + $crumb; 7750} 7751} elsif($crumb =~ s/^secure=//i) { 7752# SSL-only cookie. 7753$secure = 1; 7754} else { 7755print STDERR "Error processing cookie with element $crumb\n"; # debugging statement 7756} 7757} 7758$domain = $$url_desc{SERVER} if !defined $domain; 7759# Here's what it should be, according to the standard. 7760# $path = $$url_desc{PATH} if !defined $path; 7761# Here's what some sites require, such as http://tdzk.net 7762# This is apparently what Explorer does. 7763# Oh well, who the hell needs standards; 7764# when you're a monopoly you set the standards. 7765$path = "/" if !defined $path; 7766$expires = -1 if !defined $expires; 7767$secure = 0 if !defined $secure; # For secure cookies, it will have been set to 1 7768# Put the cookie into the master cookie jar. 7769print "into jar: $domain $path $expires $name $value\n" if $debug >= 4; 7770$cookies{$domain}{$path}{$name} = 7771{value => $value, expires => $expires, secure => $secure}; 7772# If a server sends two cookies of the same path and name, with different values, 7773# the former will be quashed by the latter. This is proper behavior. 7774if($expires != -1) { # Persistent cookie. 7775my $chmodFlag = 0; 7776$chmodFlag = 1 unless -f $ebcooks; 7777# Now, append to the cookie file. 7778# I learned the format for Netscape's cookie file from lynx's source. Thank you, lynx team. 7779if(!open(COOKFILE, ">>$ebcooks")) { 7780warn "unable to open cookie jar for append: $!"; 7781} else { 7782chmod 0600, $ebcooks if $chmodFlag; 7783print COOKFILE join("\t", $domain, 'FALSE', $path, 7784$secure ? 'TRUE' : 'FALSE', $expires, $name, $value) . "\n"; 7785# A note. Lynx defines a field, "what". I don't know what its used 7786# for. But all the Netscape cookie files I've seen have it set to "FALSE". 7787# so will we. 7788# Maybe its proprietary to Netscape's browser. 7789close COOKFILE; 7790} 7791} 7792} # setCookies 7793 7794sub fetchCookies($) 7795{ 7796my $url_desc = shift; 7797my $cur_scheme = $$url_desc{SCHEME}; 7798my $cur_domain = $$url_desc{SERVER}; 7799my $cur_path = $$url_desc{PATH}; 7800my ($domainm, $pathm, $cookiem); # The 'm' at the end stands for 'match' 7801my @sendable = (); # Sendable cookie strings. 7802foreach $domainm (keys(%cookies)) { 7803next unless $cur_domain =~ /\Q$domainm\E$/i; 7804my $l = length $domainm; 7805next if length($cur_domain) > $l and substr($domainm, 0, 1) ne '.' and substr($cur_domain, -$l-1, 1) ne '.'; 7806foreach $pathm (keys(%{$cookies{$domainm}})) { 7807next unless $cur_path =~ /^\Q$pathm\E/; 7808foreach $cookiem (keys(%{$cookies{$domainm}{$pathm}})) { 7809my $deref = $cookies{$domainm}{$pathm}{$cookiem}; 7810# $deref is a simple hash reference, containing the description of one cookie. 7811# We can do the rest of our matching painlessly, without dereferencing 7812# the whole nasty data structure every time. 7813next if $$deref{secure} and ($cur_scheme !~ /https/); 7814my $j = join('=', $cookiem, $$deref{value}); 7815$j =~ s/=$//; 7816push @sendable, $j; 7817print "outgoing cookie: $domainm $pathm $j\n" if $debug >= 4; 7818} 7819} 7820} 7821return "" if $#sendable < 0; # no cookies 7822my $outgoing = 'Cookie: ' . join("; ", @sendable); 7823# Lynx prepends a cookie2: directive. 7824# I don't know what it means or what it's for. Here it is. 7825return "Cookie2: \$Version=1$eol$outgoing$eol"; 7826} # fetchCookies 7827 7828sub cookieDate($) 7829{ 7830# This might become a general http date decoder, if we ever find 7831# places where dates are useful. 7832my $datestring = shift; 7833stripWhite \$datestring; 7834if($datestring =~ /^[a-z]{3,9},\s+(\d\d)[- ]([a-z]{3})[- ](\d\d(?:\d\d)?)\s+(\d\d):(\d\d):(\d\d)\s+GMT/i) { 7835my ($day, $mon, $year, $hour, $min, $sec) = ($1, $2, $3, $4, $5, $6); 7836if(($year < 100) and ($year > 0)) { 7837# two digit. 7838if($year >= 70) { 7839$year += 1900; 7840} else { $year += 2000; } 7841} 7842$mon = $monhash{lc($mon)} - 1; 7843# We should probably range-check all the fields, 7844# but year is definitely necessary. 7845$year = 2035 if $year > 2035; 7846$year = 1970 if $year < 1970; 7847my $time = timegm($sec, $min, $hour, $day, $mon, $year); 7848return $time; 7849} else { 7850return -1; 7851} 7852} # cookyDate 7853 7854sub fillJar() 7855{ 7856# Initialize the cookie jar. 7857my $writeFlag = 0; # Write revised cookie file? 7858open(COOKFILE, "+<$ebcooks") or return; 7859my $inline; 7860my $nowtime = time; 7861while($inline = <COOKFILE>) { 7862chomp $inline; 7863my ($domain, $what, $path, $secure, $expires, $name, $value) = split("\t", $inline); 7864$writeFlag = 1 if exists $cookies{$domain}{$path}{$name}; 7865if($expires > $nowtime) { 7866$cookies{$domain}{$path}{$name} = 7867{value => $value, secure => $secure eq "TRUE" ? 1 : 0, expires => $expires} 7868} else { 7869$writeFlag = 1; 7870} # cookies expired. 7871} # loop reading 7872if($writeFlag) { 7873seek COOKFILE, 0, 0; 7874truncate COOKFILE, 0; 7875my ($odomain, $opath, $ocook); # o for out 7876foreach $odomain (keys(%cookies)) { 7877foreach $opath (keys(%{$cookies{$odomain}})) { 7878foreach $ocook (keys(%{$cookies{$odomain}{$opath}})) { 7879my %deref = %{$cookies{$odomain}{$opath}{$ocook}}; 7880print COOKFILE join("\t", $odomain, 'FALSE', $opath, 7881$deref{secure} ? "TRUE" : "FALSE", $deref{expires}, $ocook, 7882$deref{value}), "\n" if $deref{expires} > $nowtime; 7883} 7884} 7885} 7886} # rewrite file 7887close COOKFILE; 7888} # fillJar 7889 7890#********************************************************************* 7891# Web Express features. For more on Web Express, visit 7892# http://www.webexpresstech.com/WebXP/WebExpressTutorial.html 7893#********************************************************************* 7894 7895sub webExpress($) 7896{ 7897my $line = shift; 7898stripWhite \$line; 7899$line =~ s/\s+/ /g; 7900my $code = $line; 7901$code =~ s/ .*//; 7902$line =~ s/.*? //; 7903defined $shortcut{$code} or 7904$errorMsg = "shortcut $code is not recognized", return 0; 7905my $newurl = $shortcut{$code}{url}; 7906 7907# Step through $line and extract options, indicated by - 7908# This isn't implemented yet. 7909 7910# Done with options, what remains is the search argument. 7911my $arg = urlEncode $line; 7912length $arg or 7913$errorMsg = "shortcut is given no search argument", return 0; 7914 7915# Put the argument into the url. 7916$newurl =~ s/\$1/$arg/; 7917 7918return 1, $newurl, $shortcut{$code}{after}; 7919} # webExpress 7920 7921 7922# return "x" if an error is encountered 7923sub parseWWWAuth($$) 7924{ 7925my ($authline, $url_desc) = @_; 7926my ($qop_auth, $qop_authint) = (0, 1); # this would be an enum in C 7927my ($username, $pass); 7928 7929# parse the authorization request line 7930my @challenges = (); 7931my ($attribname, $value); 7932stripWhite(\$authline); 7933$authline =~ s/^WWW-Authenticate:\s*//i; 7934while($authline =~ s/^\s*([^\s]+)\s+//) { 7935my %challenge = (authscheme => $1); 7936while($authline =~ s/^([^=]+)=//) { 7937$attribname = lc($1); 7938if($authline =~ s/^"//) { 7939# value of attribute is a quoted string. 7940$authline =~ s/^([^"]+)"((,\s*)|$)//; 7941$value = $1; 7942} else { 7943$authline =~ s/^([^,]+)((,\s*)|$)//; 7944$value = $1; 7945} 7946$challenge{$attribname} = $value; 7947} 7948if($challenge{authscheme} =~ /^digest/i && defined($challenge{qop})) { 7949my ($q, $newq) = undef; 7950my @qop = split(/\s*,\s*/, $challenge{qop}); 7951foreach $q (@qop) { 7952$newq = $qop_authint, last if $q =~ /^auth-int$/i; 7953} 7954if(!defined($newq)) { 7955foreach $q (@qop) { 7956$newq = $qop_auth, last if $q =~ /^auth$/i; 7957} 7958} 7959$errorMsg = "Server sent a bad qop value in digest authentication", return "x" unless defined $newq; 7960$challenge{qop} = $newq; 7961} 7962push(@challenges, {%challenge}); 7963} 7964my ($c, $used_challenge) = undef; 7965# Server may have sent multiple challenges with multiple auth schemes. 7966# Spec says that we use the strongest scheme supported by the server. 7967foreach $c (@challenges) { 7968$used_challenge = $c, last if $$c{authscheme} =~ /^Digest$/i; 7969} 7970if(!defined($used_challenge)) { 7971foreach $c (@challenges) { 7972$used_challenge = $c if($$c{authscheme} =~ /Basic/); 7973} 7974} 7975$errorMsg = "no usable challenges were found", return "x" unless defined $used_challenge; 7976if($$used_challenge{authscheme} =~ /Basic/i) { 7977($username, $pass) = getUserPass($$url_desc{SERVER} . "\x01" . $$url_desc{PORT} . "\x01" . $$used_challenge{realm}); 7978return "x" if $username eq "x"; 7979my $do64x = "$username:$pass"; 7980my $do64y = ""; 7981encodeBase64(\$do64x, "", \$do64y); 7982return "Authorization: Basic $do64y$eol"; 7983} 7984else { # Not Basic, must be Digest. 7985unless(eval { require Digest::MD5 }) { 7986$errorMsg = "You need to download the Digest::MD5 module from CPAN to do digest authentication.", return "x"; 7987} 7988$errorMsg = "Unsupported algorithm for digest authentication", return "x" if(defined($$used_challenge{algorithm}) && $$used_challenge{algorithm} !~ /^md5$/i); 7989$errorMsg = "unable to perform digest authentication", return "x" if(!defined($$used_challenge{realm}) 7990|| !defined($$used_challenge{nonce})); 7991($username, $pass) = getUserPass($$url_desc{SERVER} . "\x01" . $$url_desc{PORT} . "\x01" . $$used_challenge{realm}); 7992return "x" if $username eq "x"; 7993srand(time()); 7994my $nc = "00000001"; 7995my $cnonce = sprintf("%08x%08x", int(rand(0xffffffff)), int(rand(0xffffffff))); 7996# pseudorandoms are fine here. The cnonce is used to thwart chosen plaintext 7997# attacks when checking integrity of message content. Probably not much 7998# of a threat for MD5. Maybe it will be someday, and when it is, I'll 7999# dream up a better way to create a random cnonce. 8000my ($a1, $a2); 8001$a1 = "$username:$$used_challenge{realm}:$pass"; 8002if($$used_challenge{qop} == $qop_authint) { 8003$a2 = $$url_desc{method} . ':' . $$url_desc{PATH} . Digest::MD5::md5_hex($$url_desc{content}); 8004} else { 8005$a2 = $$url_desc{method} . ':' . $$url_desc{PATH}; 8006} 8007my $response; 8008if(defined($$used_challenge{qop})) { 8009$response = Digest::MD5::md5_hex(Digest::MD5::md5_hex($a1) . ':' . $$used_challenge{nonce} . ':' . 8010 $nc . ':' . $cnonce . ':' . 8011($$used_challenge{qop} == $qop_auth ? "auth" : "auth-int") . ':' . Digest::MD5::md5_hex($a2)) ; 8012} else { 8013$response = Digest::MD5::md5_hex(Digest::MD5::md5_hex($a1) . ':' . $$used_challenge{nonce} . ':' . Digest::MD5::md5_hex($a2)) ; 8014} 8015my $out = "Authorization: Digest username=\"$username\", realm=\"$$used_challenge{realm}\", " . 8016"nonce=\"$$used_challenge{nonce}\", uri=\"$$url_desc{PATH}\", response=\"$response\""; 8017$out .= ", opaque=\"$$used_challenge{opaque}\"" if defined($$used_challenge{opaque}); 8018$out .= ", algorithm=\"$$used_challenge{algorithm}\"" if defined($$used_challenge{algorithm}); 8019if(defined($$used_challenge{qop})) { 8020$out .= ", qop="; 8021$out .= "\"auth\"" if $$used_challenge{qop} == $qop_auth; 8022$out .= "\"auth-int\"" if $$used_challenge{qop} == $qop_authint; 8023$out .= ", nc=$nc, cnonce=\"$cnonce\""; 8024} 8025$out .= "$eol"; 8026return $out; 8027} 8028} # parseWWWAuth 8029 8030sub getUserPass($) 8031{ 8032my $request = shift; 8033my $abort = "login password sequence aborted"; 8034if(! $authAttempt and defined $authHist{$request}) { 8035return split ":", $authHist{$request}; 8036} 8037my ($server, $port, $realm) = split(":", $request); 8038print "Server $server requests authentication for $realm. (type x to abort)\n"; 8039print "Username: "; 8040my $username = <STDIN>; 8041chomp $username; 8042$errorMsg = $abort, return ("x","x") if $username eq "x"; 8043print "Password: "; 8044my $pass = <STDIN>; 8045chomp $pass; 8046$errorMsg = $abort, return ("x","x") if $pass eq "x"; 8047$authHist{$request} = "$username:$pass"; 8048return ($username, $pass); 8049} # getUserPass 8050 8051 8052