1# http.tcl --
2#
3#	Client-side HTTP for GET, POST, and HEAD commands. These routines can
4#	be used in untrusted code that uses the Safesock security policy.
5#	These procedures use a callback interface to avoid using vwait, which
6#	is not defined in the safe base.
7#
8# See the file "license.terms" for information on usage and redistribution of
9# this file, and for a DISCLAIMER OF ALL WARRANTIES.
10
11package require Tcl 8.4
12# Keep this in sync with pkgIndex.tcl and with the install directories in
13# Makefiles
14package provide http 2.7.13
15
16namespace eval http {
17    # Allow resourcing to not clobber existing data
18
19    variable http
20    if {![info exists http]} {
21	array set http {
22	    -accept */*
23	    -proxyhost {}
24	    -proxyport {}
25	    -proxyfilter http::ProxyRequired
26	    -urlencoding utf-8
27	}
28	set http(-useragent) "Tcl http client package [package provide http]"
29    }
30
31    proc init {} {
32	# Set up the map for quoting chars. RFC3986 Section 2.3 say percent
33	# encode all except: "... percent-encoded octets in the ranges of
34	# ALPHA (%41-%5A and %61-%7A), DIGIT (%30-%39), hyphen (%2D), period
35	# (%2E), underscore (%5F), or tilde (%7E) should not be created by URI
36	# producers ..."
37	for {set i 0} {$i <= 256} {incr i} {
38	    set c [format %c $i]
39	    if {![string match {[-._~a-zA-Z0-9]} $c]} {
40		set map($c) %[format %.2X $i]
41	    }
42	}
43	# These are handled specially
44	set map(\n) %0D%0A
45	variable formMap [array get map]
46
47	# Create a map for HTTP/1.1 open sockets
48	variable socketmap
49	if {[info exists socketmap]} {
50	    # Close but don't remove open sockets on re-init
51	    foreach {url sock} [array get socketmap] {
52		catch {close $sock}
53	    }
54	}
55	array set socketmap {}
56    }
57    init
58
59    variable urlTypes
60    if {![info exists urlTypes]} {
61	set urlTypes(http) [list 80 ::socket]
62    }
63
64    variable encodings [string tolower [encoding names]]
65    # This can be changed, but iso8859-1 is the RFC standard.
66    variable defaultCharset
67    if {![info exists defaultCharset]} {
68	set defaultCharset "iso8859-1"
69    }
70
71    # Force RFC 3986 strictness in geturl url verification?
72    variable strict
73    if {![info exists strict]} {
74	set strict 1
75    }
76
77    # Let user control default keepalive for compatibility
78    variable defaultKeepalive
79    if {![info exists defaultKeepalive]} {
80	set defaultKeepalive 0
81    }
82
83    namespace export geturl config reset wait formatQuery register unregister
84    # Useful, but not exported: data size status code
85}
86
87# http::Log --
88#
89#	Debugging output -- define this to observe HTTP/1.1 socket usage.
90#	Should echo any args received.
91#
92# Arguments:
93#     msg	Message to output
94#
95proc http::Log {args} {}
96
97# http::register --
98#
99#     See documentation for details.
100#
101# Arguments:
102#     proto	URL protocol prefix, e.g. https
103#     port	Default port for protocol
104#     command	Command to use to create socket
105# Results:
106#     list of port and command that was registered.
107
108proc http::register {proto port command} {
109    variable urlTypes
110    set urlTypes([string tolower $proto]) [list $port $command]
111}
112
113# http::unregister --
114#
115#     Unregisters URL protocol handler
116#
117# Arguments:
118#     proto	URL protocol prefix, e.g. https
119# Results:
120#     list of port and command that was unregistered.
121
122proc http::unregister {proto} {
123    variable urlTypes
124    set lower [string tolower $proto]
125    if {![info exists urlTypes($lower)]} {
126	return -code error "unsupported url type \"$proto\""
127    }
128    set old $urlTypes($lower)
129    unset urlTypes($lower)
130    return $old
131}
132
133# http::config --
134#
135#	See documentation for details.
136#
137# Arguments:
138#	args		Options parsed by the procedure.
139# Results:
140#        TODO
141
142proc http::config {args} {
143    variable http
144    set options [lsort [array names http -*]]
145    set usage [join $options ", "]
146    if {[llength $args] == 0} {
147	set result {}
148	foreach name $options {
149	    lappend result $name $http($name)
150	}
151	return $result
152    }
153    set options [string map {- ""} $options]
154    set pat ^-(?:[join $options |])$
155    if {[llength $args] == 1} {
156	set flag [lindex $args 0]
157	if {![regexp -- $pat $flag]} {
158	    return -code error "Unknown option $flag, must be: $usage"
159	}
160	return $http($flag)
161    } else {
162	foreach {flag value} $args {
163	    if {![regexp -- $pat $flag]} {
164		return -code error "Unknown option $flag, must be: $usage"
165	    }
166	    set http($flag) $value
167	}
168    }
169}
170
171# http::Finish --
172#
173#	Clean up the socket and eval close time callbacks
174#
175# Arguments:
176#	token	    Connection token.
177#	errormsg    (optional) If set, forces status to error.
178#       skipCB      (optional) If set, don't call the -command callback. This
179#		    is useful when geturl wants to throw an exception instead
180#		    of calling the callback. That way, the same error isn't
181#		    reported to two places.
182#
183# Side Effects:
184#        Closes the socket
185
186proc http::Finish {token {errormsg ""} {skipCB 0}} {
187    variable $token
188    upvar 0 $token state
189    global errorInfo errorCode
190    if {$errormsg ne ""} {
191	set state(error) [list $errormsg $errorInfo $errorCode]
192	set state(status) "error"
193    }
194    if {
195	($state(status) eq "timeout") || ($state(status) eq "error") ||
196	([info exists state(connection)] && ($state(connection) eq "close"))
197    } then {
198        CloseSocket $state(sock) $token
199    }
200    if {[info exists state(after)]} {
201	after cancel $state(after)
202    }
203    if {[info exists state(-command)] && !$skipCB
204	    && ![info exists state(done-command-cb)]} {
205	set state(done-command-cb) yes
206	if {[catch {eval $state(-command) {$token}} err] && $errormsg eq ""} {
207	    set state(error) [list $err $errorInfo $errorCode]
208	    set state(status) error
209	}
210    }
211}
212
213# http::CloseSocket -
214#
215#	Close a socket and remove it from the persistent sockets table.  If
216#	possible an http token is included here but when we are called from a
217#	fileevent on remote closure we need to find the correct entry - hence
218#	the second section.
219
220proc ::http::CloseSocket {s {token {}}} {
221    variable socketmap
222    catch {fileevent $s readable {}}
223    set conn_id {}
224    if {$token ne ""} {
225        variable $token
226        upvar 0 $token state
227        if {[info exists state(socketinfo)]} {
228	    set conn_id $state(socketinfo)
229        }
230    } else {
231        set map [array get socketmap]
232        set ndx [lsearch -exact $map $s]
233        if {$ndx != -1} {
234	    incr ndx -1
235	    set conn_id [lindex $map $ndx]
236        }
237    }
238    if {$conn_id eq {} || ![info exists socketmap($conn_id)]} {
239        Log "Closing socket $s (no connection info)"
240        if {[catch {close $s} err]} {
241	    Log "Error: $err"
242	}
243    } else {
244	if {[info exists socketmap($conn_id)]} {
245	    Log "Closing connection $conn_id (sock $socketmap($conn_id))"
246	    if {[catch {close $socketmap($conn_id)} err]} {
247		Log "Error: $err"
248	    }
249	    unset socketmap($conn_id)
250	} else {
251	    Log "Cannot close connection $conn_id - no socket in socket map"
252	}
253    }
254}
255
256# http::reset --
257#
258#	See documentation for details.
259#
260# Arguments:
261#	token	Connection token.
262#	why	Status info.
263#
264# Side Effects:
265#       See Finish
266
267proc http::reset {token {why reset}} {
268    variable $token
269    upvar 0 $token state
270    set state(status) $why
271    catch {fileevent $state(sock) readable {}}
272    catch {fileevent $state(sock) writable {}}
273    Finish $token
274    if {[info exists state(error)]} {
275	set errorlist $state(error)
276	unset state
277	eval ::error $errorlist
278    }
279}
280
281# http::geturl --
282#
283#	Establishes a connection to a remote url via http.
284#
285# Arguments:
286#       url		The http URL to goget.
287#       args		Option value pairs. Valid options include:
288#				-blocksize, -validate, -headers, -timeout
289# Results:
290#	Returns a token for this connection. This token is the name of an
291#	array that the caller should unset to garbage collect the state.
292
293proc http::geturl {url args} {
294    variable http
295    variable urlTypes
296    variable defaultCharset
297    variable defaultKeepalive
298    variable strict
299
300    # Initialize the state variable, an array. We'll return the name of this
301    # array as the token for the transaction.
302
303    if {![info exists http(uid)]} {
304	set http(uid) 0
305    }
306    set token [namespace current]::[incr http(uid)]
307    variable $token
308    upvar 0 $token state
309    reset $token
310
311    # Process command options.
312
313    array set state {
314	-binary		false
315	-blocksize	8192
316	-queryblocksize 8192
317	-validate	0
318	-headers	{}
319	-timeout	0
320	-type		application/x-www-form-urlencoded
321	-queryprogress	{}
322	-protocol	1.1
323	binary		0
324	state		connecting
325	meta		{}
326	coding		{}
327	currentsize	0
328	totalsize	0
329	querylength	0
330	queryoffset	0
331	type		text/html
332	body		{}
333	status		""
334	http		""
335	connection	close
336    }
337    set state(-keepalive) $defaultKeepalive
338    set state(-strict) $strict
339    # These flags have their types verified [Bug 811170]
340    array set type {
341	-binary		boolean
342	-blocksize	integer
343	-queryblocksize integer
344	-strict		boolean
345	-timeout	integer
346	-validate	boolean
347    }
348    set state(charset)	$defaultCharset
349    set options {
350	-binary -blocksize -channel -command -handler -headers -keepalive
351	-method -myaddr -progress -protocol -query -queryblocksize
352	-querychannel -queryprogress -strict -timeout -type -validate
353    }
354    set usage [join [lsort $options] ", "]
355    set options [string map {- ""} $options]
356    set pat ^-(?:[join $options |])$
357    foreach {flag value} $args {
358	if {[regexp -- $pat $flag]} {
359	    # Validate numbers
360	    if {
361		[info exists type($flag)] &&
362		![string is $type($flag) -strict $value]
363	    } then {
364		unset $token
365		return -code error \
366		    "Bad value for $flag ($value), must be $type($flag)"
367	    }
368	    set state($flag) $value
369	} else {
370	    unset $token
371	    return -code error "Unknown option $flag, can be: $usage"
372	}
373    }
374
375    # Make sure -query and -querychannel aren't both specified
376
377    set isQueryChannel [info exists state(-querychannel)]
378    set isQuery [info exists state(-query)]
379    if {$isQuery && $isQueryChannel} {
380	unset $token
381	return -code error "Can't combine -query and -querychannel options!"
382    }
383
384    # Validate URL, determine the server host and port, and check proxy case
385    # Recognize user:pass@host URLs also, although we do not do anything with
386    # that info yet.
387
388    # URLs have basically four parts.
389    # First, before the colon, is the protocol scheme (e.g. http)
390    # Second, for HTTP-like protocols, is the authority
391    #	The authority is preceded by // and lasts up to (but not including)
392    #	the following / or ? and it identifies up to four parts, of which
393    #	only one, the host, is required (if an authority is present at all).
394    #	All other parts of the authority (user name, password, port number)
395    #	are optional.
396    # Third is the resource name, which is split into two parts at a ?
397    #	The first part (from the single "/" up to "?") is the path, and the
398    #	second part (from that "?" up to "#") is the query. *HOWEVER*, we do
399    #	not need to separate them; we send the whole lot to the server.
400    #	Both, path and query are allowed to be missing, including their
401    #	delimiting character.
402    # Fourth is the fragment identifier, which is everything after the first
403    #	"#" in the URL. The fragment identifier MUST NOT be sent to the server
404    #	and indeed, we don't bother to validate it (it could be an error to
405    #	pass it in here, but it's cheap to strip).
406    #
407    # An example of a URL that has all the parts:
408    #
409    #     http://jschmoe:xyzzy@www.bogus.net:8000/foo/bar.tml?q=foo#changes
410    #
411    # The "http" is the protocol, the user is "jschmoe", the password is
412    # "xyzzy", the host is "www.bogus.net", the port is "8000", the path is
413    # "/foo/bar.tml", the query is "q=foo", and the fragment is "changes".
414    #
415    # Note that the RE actually combines the user and password parts, as
416    # recommended in RFC 3986. Indeed, that RFC states that putting passwords
417    # in URLs is a Really Bad Idea, something with which I would agree utterly.
418    # Also note that we do not currently support IPv6 addresses.
419    #
420    # From a validation perspective, we need to ensure that the parts of the
421    # URL that are going to the server are correctly encoded.  This is only
422    # done if $state(-strict) is true (inherited from $::http::strict).
423
424    set URLmatcher {(?x)		# this is _expanded_ syntax
425	^
426	(?: (\w+) : ) ?			# <protocol scheme>
427	(?: //
428	    (?:
429		(
430		    [^@/\#?]+		# <userinfo part of authority>
431		) @
432	    )?
433	    ( [^/:\#?]+ )		# <host part of authority>
434	    (?: : (\d+) )?		# <port part of authority>
435	)?
436	( [/\?] [^\#]*)?		# <path> (including query)
437	(?: \# (.*) )?			# <fragment>
438	$
439    }
440
441    # Phase one: parse
442    if {![regexp -- $URLmatcher $url -> proto user host port srvurl]} {
443	unset $token
444	return -code error "Unsupported URL: $url"
445    }
446    # Phase two: validate
447    if {$host eq ""} {
448	# Caller has to provide a host name; we do not have a "default host"
449	# that would enable us to handle relative URLs.
450	unset $token
451	return -code error "Missing host part: $url"
452	# Note that we don't check the hostname for validity here; if it's
453	# invalid, we'll simply fail to resolve it later on.
454    }
455    if {$port ne "" && $port > 65535} {
456	unset $token
457	return -code error "Invalid port number: $port"
458    }
459    # The user identification and resource identification parts of the URL can
460    # have encoded characters in them; take care!
461    if {$user ne ""} {
462	# Check for validity according to RFC 3986, Appendix A
463	set validityRE {(?xi)
464	    ^
465	    (?: [-\w.~!$&'()*+,;=:] | %[0-9a-f][0-9a-f] )+
466	    $
467	}
468	if {$state(-strict) && ![regexp -- $validityRE $user]} {
469	    unset $token
470	    # Provide a better error message in this error case
471	    if {[regexp {(?i)%(?![0-9a-f][0-9a-f]).?.?} $user bad]} {
472		return -code error \
473			"Illegal encoding character usage \"$bad\" in URL user"
474	    }
475	    return -code error "Illegal characters in URL user"
476	}
477    }
478    if {$srvurl ne ""} {
479	# RFC 3986 allows empty paths (not even a /), but servers
480	# return 400 if the path in the HTTP request doesn't start
481	# with / , so add it here if needed.
482	if {[string index $srvurl 0] ne "/"} {
483	    set srvurl /$srvurl
484	}
485	# Check for validity according to RFC 3986, Appendix A
486	set validityRE {(?xi)
487	    ^
488	    # Path part (already must start with / character)
489	    (?:	      [-\w.~!$&'()*+,;=:@/]  | %[0-9a-f][0-9a-f] )*
490	    # Query part (optional, permits ? characters)
491	    (?: \? (?: [-\w.~!$&'()*+,;=:@/?] | %[0-9a-f][0-9a-f] )* )?
492	    $
493	}
494	if {$state(-strict) && ![regexp -- $validityRE $srvurl]} {
495	    unset $token
496	    # Provide a better error message in this error case
497	    if {[regexp {(?i)%(?![0-9a-f][0-9a-f])..} $srvurl bad]} {
498		return -code error \
499		    "Illegal encoding character usage \"$bad\" in URL path"
500	    }
501	    return -code error "Illegal characters in URL path"
502	}
503    } else {
504	set srvurl /
505    }
506    if {$proto eq ""} {
507	set proto http
508    }
509    set lower [string tolower $proto]
510    if {![info exists urlTypes($lower)]} {
511	unset $token
512	return -code error "Unsupported URL type \"$proto\""
513    }
514    set defport [lindex $urlTypes($lower) 0]
515    set defcmd [lindex $urlTypes($lower) 1]
516
517    if {$port eq ""} {
518	set port $defport
519    }
520    if {![catch {$http(-proxyfilter) $host} proxy]} {
521	set phost [lindex $proxy 0]
522	set pport [lindex $proxy 1]
523    }
524
525    # OK, now reassemble into a full URL
526    set url ${proto}://
527    if {$user ne ""} {
528	append url $user
529	append url @
530    }
531    append url $host
532    if {$port != $defport} {
533	append url : $port
534    }
535    append url $srvurl
536    # Don't append the fragment!
537    set state(url) $url
538
539    # If a timeout is specified we set up the after event and arrange for an
540    # asynchronous socket connection.
541
542    set sockopts [list -async]
543    if {$state(-timeout) > 0} {
544	set state(after) [after $state(-timeout) \
545		[list http::reset $token timeout]]
546    }
547
548    # If we are using the proxy, we must pass in the full URL that includes
549    # the server name.
550
551    if {[info exists phost] && ($phost ne "")} {
552	set srvurl $url
553	set targetAddr [list $phost $pport]
554    } else {
555	set targetAddr [list $host $port]
556    }
557    # Proxy connections aren't shared among different hosts.
558    set state(socketinfo) $host:$port
559
560    # See if we are supposed to use a previously opened channel.
561    if {$state(-keepalive)} {
562	variable socketmap
563	if {[info exists socketmap($state(socketinfo))]} {
564	    if {[catch {fconfigure $socketmap($state(socketinfo))}]} {
565		Log "WARNING: socket for $state(socketinfo) was closed"
566		unset socketmap($state(socketinfo))
567	    } else {
568		set sock $socketmap($state(socketinfo))
569		Log "reusing socket $sock for $state(socketinfo)"
570		catch {fileevent $sock writable {}}
571		catch {fileevent $sock readable {}}
572	    }
573	}
574	# don't automatically close this connection socket
575	set state(connection) {}
576    }
577    if {![info exists sock]} {
578	# Pass -myaddr directly to the socket command
579	if {[info exists state(-myaddr)]} {
580	    lappend sockopts -myaddr $state(-myaddr)
581	}
582        if {[catch {eval $defcmd $sockopts $targetAddr} sock]} {
583	    # something went wrong while trying to establish the connection.
584	    # Clean up after events and such, but DON'T call the command
585	    # callback (if available) because we're going to throw an
586	    # exception from here instead.
587
588	    set state(sock) $sock
589	    Finish $token "" 1
590	    cleanup $token
591	    return -code error $sock
592        }
593    }
594    set state(sock) $sock
595    Log "Using $sock for $state(socketinfo)" \
596        [expr {$state(-keepalive)?"keepalive":""}]
597    if {$state(-keepalive)} {
598        set socketmap($state(socketinfo)) $sock
599    }
600
601    if {![info exists phost]} {
602	set phost ""
603    }
604    fileevent $sock writable [list http::Connect $token $proto $phost $srvurl]
605
606    # Wait for the connection to complete.
607    if {![info exists state(-command)]} {
608	# geturl does EVERYTHING asynchronously, so if the user
609	# calls it synchronously, we just do a wait here.
610	http::wait $token
611
612	if {![info exists state]} {
613	    # If we timed out then Finish has been called and the users
614	    # command callback may have cleaned up the token. If so we end up
615	    # here with nothing left to do.
616	    return $token
617	} elseif {$state(status) eq "error"} {
618	    # Something went wrong while trying to establish the connection.
619	    # Clean up after events and such, but DON'T call the command
620	    # callback (if available) because we're going to throw an
621	    # exception from here instead.
622	    set err [lindex $state(error) 0]
623	    cleanup $token
624	    return -code error $err
625	}
626    }
627
628    return $token
629}
630
631
632proc http::Connected { token proto phost srvurl} {
633    variable http
634    variable urlTypes
635
636    variable $token
637    upvar 0 $token state
638
639    # Set back the variables needed here
640    set sock $state(sock)
641    set isQueryChannel [info exists state(-querychannel)]
642    set isQuery [info exists state(-query)]
643    set host [lindex [split $state(socketinfo) :] 0]
644    set port [lindex [split $state(socketinfo) :] 1]
645
646    set lower [string tolower $proto]
647    set defport [lindex $urlTypes($lower) 0]
648
649    # Send data in cr-lf format, but accept any line terminators
650
651    fconfigure $sock -translation {auto crlf} -buffersize $state(-blocksize)
652
653    # The following is disallowed in safe interpreters, but the socket is
654    # already in non-blocking mode in that case.
655
656    catch {fconfigure $sock -blocking off}
657    set how GET
658    if {$isQuery} {
659	set state(querylength) [string length $state(-query)]
660	if {$state(querylength) > 0} {
661	    set how POST
662	    set contDone 0
663	} else {
664	    # There's no query data.
665	    unset state(-query)
666	    set isQuery 0
667	}
668    } elseif {$state(-validate)} {
669	set how HEAD
670    } elseif {$isQueryChannel} {
671	set how POST
672	# The query channel must be blocking for the async Write to
673	# work properly.
674	fconfigure $state(-querychannel) -blocking 1 -translation binary
675	set contDone 0
676    }
677    if {[info exists state(-method)] && $state(-method) ne ""} {
678	set how $state(-method)
679    }
680
681    if {[catch {
682	puts $sock "$how $srvurl HTTP/$state(-protocol)"
683	puts $sock "Accept: $http(-accept)"
684	array set hdrs $state(-headers)
685	if {[info exists hdrs(Host)]} {
686	    # Allow Host spoofing. [Bug 928154]
687	    puts $sock "Host: $hdrs(Host)"
688	} elseif {$port == $defport} {
689	    # Don't add port in this case, to handle broken servers. [Bug
690	    # #504508]
691	    puts $sock "Host: $host"
692	} else {
693	    puts $sock "Host: $host:$port"
694	}
695	unset hdrs
696	puts $sock "User-Agent: $http(-useragent)"
697        if {$state(-protocol) == 1.0 && $state(-keepalive)} {
698	    puts $sock "Connection: keep-alive"
699        }
700        if {$state(-protocol) > 1.0 && !$state(-keepalive)} {
701	    puts $sock "Connection: close" ;# RFC2616 sec 8.1.2.1
702        }
703        if {[info exists phost] && ($phost ne "") && $state(-keepalive)} {
704	    puts $sock "Proxy-Connection: Keep-Alive"
705        }
706        set accept_encoding_seen 0
707	set content_type_seen 0
708	foreach {key value} $state(-headers) {
709	    if {[string equal -nocase $key "host"]} {
710		continue
711	    }
712	    if {[string equal -nocase $key "accept-encoding"]} {
713		set accept_encoding_seen 1
714	    }
715	    if {[string equal -nocase $key "content-type"]} {
716		set content_type_seen 1
717	    }
718	    set value [string map [list \n "" \r ""] $value]
719	    set key [string trim $key]
720	    if {[string equal -nocase $key "content-length"]} {
721		set contDone 1
722		set state(querylength) $value
723	    }
724	    if {[string length $key]} {
725		puts $sock "$key: $value"
726	    }
727	}
728	# Soft zlib dependency check - no package require
729        if {
730	    !$accept_encoding_seen &&
731	    ([package vsatisfies [package provide Tcl] 8.6]
732		|| [llength [package provide zlib]]) &&
733	    !([info exists state(-channel)] || [info exists state(-handler)])
734        } then {
735	    puts $sock "Accept-Encoding: gzip, identity, *;q=0.1"
736        }
737	if {$isQueryChannel && $state(querylength) == 0} {
738	    # Try to determine size of data in channel. If we cannot seek, the
739	    # surrounding catch will trap us
740
741	    set start [tell $state(-querychannel)]
742	    seek $state(-querychannel) 0 end
743	    set state(querylength) \
744		    [expr {[tell $state(-querychannel)] - $start}]
745	    seek $state(-querychannel) $start
746	}
747
748	# Flush the request header and set up the fileevent that will either
749	# push the POST data or read the response.
750	#
751	# fileevent note:
752	#
753	# It is possible to have both the read and write fileevents active at
754	# this point. The only scenario it seems to affect is a server that
755	# closes the connection without reading the POST data. (e.g., early
756	# versions TclHttpd in various error cases). Depending on the
757	# platform, the client may or may not be able to get the response from
758	# the server because of the error it will get trying to write the post
759	# data.  Having both fileevents active changes the timing and the
760	# behavior, but no two platforms (among Solaris, Linux, and NT) behave
761	# the same, and none behave all that well in any case. Servers should
762	# always read their POST data if they expect the client to read their
763	# response.
764
765	if {$isQuery || $isQueryChannel} {
766	    if {!$content_type_seen} {
767		puts $sock "Content-Type: $state(-type)"
768	    }
769	    if {!$contDone} {
770		puts $sock "Content-Length: $state(querylength)"
771	    }
772	    puts $sock ""
773	    fconfigure $sock -translation {auto binary}
774	    fileevent $sock writable [list http::Write $token]
775	} else {
776	    puts $sock ""
777	    flush $sock
778	    fileevent $sock readable [list http::Event $sock $token]
779	}
780
781    } err]} then {
782	# The socket probably was never connected, or the connection dropped
783	# later.
784
785	# if state(status) is error, it means someone's already called Finish
786	# to do the above-described clean up.
787	if {$state(status) ne "error"} {
788	    Finish $token $err
789	}
790    }
791
792}
793
794# Data access functions:
795# Data - the URL data
796# Status - the transaction status: ok, reset, eof, timeout
797# Code - the HTTP transaction code, e.g., 200
798# Size - the size of the URL data
799
800proc http::data {token} {
801    variable $token
802    upvar 0 $token state
803    return $state(body)
804}
805proc http::status {token} {
806    if {![info exists $token]} {
807	return "error"
808    }
809    variable $token
810    upvar 0 $token state
811    return $state(status)
812}
813proc http::code {token} {
814    variable $token
815    upvar 0 $token state
816    return $state(http)
817}
818proc http::ncode {token} {
819    variable $token
820    upvar 0 $token state
821    if {[regexp {[0-9]{3}} $state(http) numeric_code]} {
822	return $numeric_code
823    } else {
824	return $state(http)
825    }
826}
827proc http::size {token} {
828    variable $token
829    upvar 0 $token state
830    return $state(currentsize)
831}
832proc http::meta {token} {
833    variable $token
834    upvar 0 $token state
835    return $state(meta)
836}
837proc http::error {token} {
838    variable $token
839    upvar 0 $token state
840    if {[info exists state(error)]} {
841	return $state(error)
842    }
843    return ""
844}
845
846# http::cleanup
847#
848#	Garbage collect the state associated with a transaction
849#
850# Arguments
851#	token	The token returned from http::geturl
852#
853# Side Effects
854#	unsets the state array
855
856proc http::cleanup {token} {
857    variable $token
858    upvar 0 $token state
859    if {[info exists state]} {
860	unset state
861    }
862}
863
864# http::Connect
865#
866#	This callback is made when an asyncronous connection completes.
867#
868# Arguments
869#	token	The token returned from http::geturl
870#
871# Side Effects
872#	Sets the status of the connection, which unblocks
873# 	the waiting geturl call
874
875proc http::Connect {token proto phost srvurl} {
876    variable $token
877    upvar 0 $token state
878    set err "due to unexpected EOF"
879    if {
880	[eof $state(sock)] ||
881	[set err [fconfigure $state(sock) -error]] ne ""
882    } then {
883	Finish $token "connect failed $err"
884    } else {
885	fileevent $state(sock) writable {}
886	::http::Connected $token $proto $phost $srvurl
887    }
888    return
889}
890
891# http::Write
892#
893#	Write POST query data to the socket
894#
895# Arguments
896#	token	The token for the connection
897#
898# Side Effects
899#	Write the socket and handle callbacks.
900
901proc http::Write {token} {
902    variable $token
903    upvar 0 $token state
904    set sock $state(sock)
905
906    # Output a block.  Tcl will buffer this if the socket blocks
907    set done 0
908    if {[catch {
909	# Catch I/O errors on dead sockets
910
911	if {[info exists state(-query)]} {
912	    # Chop up large query strings so queryprogress callback can give
913	    # smooth feedback.
914
915	    puts -nonewline $sock \
916		[string range $state(-query) $state(queryoffset) \
917		     [expr {$state(queryoffset) + $state(-queryblocksize) - 1}]]
918	    incr state(queryoffset) $state(-queryblocksize)
919	    if {$state(queryoffset) >= $state(querylength)} {
920		set state(queryoffset) $state(querylength)
921		set done 1
922	    }
923	} else {
924	    # Copy blocks from the query channel
925
926	    set outStr [read $state(-querychannel) $state(-queryblocksize)]
927	    puts -nonewline $sock $outStr
928	    incr state(queryoffset) [string length $outStr]
929	    if {[eof $state(-querychannel)]} {
930		set done 1
931	    }
932	}
933    } err]} then {
934	# Do not call Finish here, but instead let the read half of the socket
935	# process whatever server reply there is to get.
936
937	set state(posterror) $err
938	set done 1
939    }
940    if {$done} {
941	catch {flush $sock}
942	fileevent $sock writable {}
943	fileevent $sock readable [list http::Event $sock $token]
944    }
945
946    # Callback to the client after we've completely handled everything.
947
948    if {[string length $state(-queryprogress)]} {
949	eval $state(-queryprogress) \
950	    [list $token $state(querylength) $state(queryoffset)]
951    }
952}
953
954# http::Event
955#
956#	Handle input on the socket
957#
958# Arguments
959#	sock	The socket receiving input.
960#	token	The token returned from http::geturl
961#
962# Side Effects
963#	Read the socket and handle callbacks.
964
965proc http::Event {sock token} {
966    variable $token
967    upvar 0 $token state
968
969    if {![info exists state]} {
970	Log "Event $sock with invalid token '$token' - remote close?"
971	if {![eof $sock]} {
972	    if {[set d [read $sock]] ne ""} {
973		Log "WARNING: additional data left on closed socket"
974	    }
975	}
976	CloseSocket $sock
977	return
978    }
979    if {$state(state) eq "connecting"} {
980	if {[catch {gets $sock state(http)} n]} {
981	    return [Finish $token $n]
982	} elseif {$n >= 0} {
983	    set state(state) "header"
984	}
985    } elseif {$state(state) eq "header"} {
986	if {[catch {gets $sock line} n]} {
987	    return [Finish $token $n]
988	} elseif {$n == 0} {
989	    # We have now read all headers
990	    # We ignore HTTP/1.1 100 Continue returns. RFC2616 sec 8.2.3
991	    if {$state(http) == "" || ([regexp {^\S+\s(\d+)} $state(http) {} x] && $x == 100)} {
992		return
993	    }
994
995	    set state(state) body
996
997	    # If doing a HEAD, then we won't get any body
998	    if {$state(-validate)} {
999		Eof $token
1000		return
1001	    }
1002
1003	    # For non-chunked transfer we may have no body - in this case we
1004	    # may get no further file event if the connection doesn't close
1005	    # and no more data is sent. We can tell and must finish up now -
1006	    # not later.
1007	    if {
1008		!(([info exists state(connection)]
1009			&& ($state(connection) eq "close"))
1010		    || [info exists state(transfer)])
1011		&& ($state(totalsize) == 0)
1012	    } then {
1013		Log "body size is 0 and no events likely - complete."
1014		Eof $token
1015		return
1016	    }
1017
1018	    # We have to use binary translation to count bytes properly.
1019	    fconfigure $sock -translation binary
1020
1021	    if {
1022		$state(-binary) || ![string match -nocase text* $state(type)]
1023	    } then {
1024		# Turn off conversions for non-text data
1025		set state(binary) 1
1026	    }
1027	    if {
1028		$state(binary) || [string match *gzip* $state(coding)] ||
1029		[string match *compress* $state(coding)]
1030	    } then {
1031		if {[info exists state(-channel)]} {
1032		    fconfigure $state(-channel) -translation binary
1033		}
1034	    }
1035	    if {
1036		[info exists state(-channel)] &&
1037		![info exists state(-handler)]
1038	    } then {
1039		# Initiate a sequence of background fcopies
1040		fileevent $sock readable {}
1041		CopyStart $sock $token
1042		return
1043	    }
1044	} elseif {$n > 0} {
1045	    # Process header lines
1046	    if {[regexp -nocase {^([^:]+):(.+)$} $line x key value]} {
1047		switch -- [string tolower $key] {
1048		    content-type {
1049			set state(type) [string trim [string tolower $value]]
1050			# grab the optional charset information
1051			if {[regexp -nocase \
1052				 {charset\s*=\s*\"((?:[^""]|\\\")*)\"} \
1053				 $state(type) -> cs]} {
1054			    set state(charset) [string map {{\"} \"} $cs]
1055			} else {
1056			    regexp -nocase {charset\s*=\s*(\S+?);?} \
1057				$state(type) -> state(charset)
1058			}
1059		    }
1060		    content-length {
1061			set state(totalsize) [string trim $value]
1062		    }
1063		    content-encoding {
1064			set state(coding) [string trim $value]
1065		    }
1066		    transfer-encoding {
1067			set state(transfer) \
1068			    [string trim [string tolower $value]]
1069		    }
1070		    proxy-connection -
1071		    connection {
1072			set state(connection) \
1073			    [string trim [string tolower $value]]
1074		    }
1075		}
1076		lappend state(meta) $key [string trim $value]
1077	    }
1078	}
1079    } else {
1080	# Now reading body
1081	if {[catch {
1082	    if {[info exists state(-handler)]} {
1083		set n [eval $state(-handler) [list $sock $token]]
1084	    } elseif {[info exists state(transfer_final)]} {
1085		set line [getTextLine $sock]
1086		set n [string length $line]
1087		if {$n > 0} {
1088		    Log "found $n bytes following final chunk"
1089		    append state(transfer_final) $line
1090		} else {
1091		    Log "final chunk part"
1092		    Eof $token
1093		}
1094	    } elseif {
1095		[info exists state(transfer)]
1096		&& $state(transfer) eq "chunked"
1097	    } then {
1098		set size 0
1099		set chunk [getTextLine $sock]
1100		set n [string length $chunk]
1101		if {[string trim $chunk] ne ""} {
1102		    scan $chunk %x size
1103		    if {$size != 0} {
1104			set bl [fconfigure $sock -blocking]
1105			fconfigure $sock -blocking 1
1106			set chunk [read $sock $size]
1107			fconfigure $sock -blocking $bl
1108			set n [string length $chunk]
1109			if {$n >= 0} {
1110			    append state(body) $chunk
1111			}
1112			if {$size != [string length $chunk]} {
1113			    Log "WARNING: mis-sized chunk:\
1114				was [string length $chunk], should be $size"
1115			}
1116			getTextLine $sock
1117		    } else {
1118			set state(transfer_final) {}
1119		    }
1120		}
1121	    } else {
1122		#Log "read non-chunk $state(currentsize) of $state(totalsize)"
1123		set block [read $sock $state(-blocksize)]
1124		set n [string length $block]
1125		if {$n >= 0} {
1126		    append state(body) $block
1127		}
1128	    }
1129	    if {[info exists state]} {
1130		if {$n >= 0} {
1131		    incr state(currentsize) $n
1132		}
1133		# If Content-Length - check for end of data.
1134		if {
1135		    ($state(totalsize) > 0)
1136		    && ($state(currentsize) >= $state(totalsize))
1137		} then {
1138		    Eof $token
1139		}
1140	    }
1141	} err]} then {
1142	    return [Finish $token $err]
1143	} else {
1144	    if {[info exists state(-progress)]} {
1145		eval $state(-progress) \
1146		    [list $token $state(totalsize) $state(currentsize)]
1147	    }
1148	}
1149    }
1150
1151    # catch as an Eof above may have closed the socket already
1152    if {![catch {eof $sock} eof] && $eof} {
1153	if {[info exists $token]} {
1154	    set state(connection) close
1155	    Eof $token
1156	} else {
1157	    # open connection closed on a token that has been cleaned up.
1158	    CloseSocket $sock
1159	}
1160	return
1161    }
1162}
1163
1164# http::getTextLine --
1165#
1166#	Get one line with the stream in blocking crlf mode
1167#
1168# Arguments
1169#	sock	The socket receiving input.
1170#
1171# Results:
1172#	The line of text, without trailing newline
1173
1174proc http::getTextLine {sock} {
1175    set tr [fconfigure $sock -translation]
1176    set bl [fconfigure $sock -blocking]
1177    fconfigure $sock -translation crlf -blocking 1
1178    set r [gets $sock]
1179    fconfigure $sock -translation $tr -blocking $bl
1180    return $r
1181}
1182
1183# http::CopyStart
1184#
1185#	Error handling wrapper around fcopy
1186#
1187# Arguments
1188#	sock	The socket to copy from
1189#	token	The token returned from http::geturl
1190#
1191# Side Effects
1192#	This closes the connection upon error
1193
1194proc http::CopyStart {sock token} {
1195    variable $token
1196    upvar 0 $token state
1197    if {[catch {
1198	fcopy $sock $state(-channel) -size $state(-blocksize) -command \
1199	    [list http::CopyDone $token]
1200    } err]} then {
1201	Finish $token $err
1202    }
1203}
1204
1205# http::CopyDone
1206#
1207#	fcopy completion callback
1208#
1209# Arguments
1210#	token	The token returned from http::geturl
1211#	count	The amount transfered
1212#
1213# Side Effects
1214#	Invokes callbacks
1215
1216proc http::CopyDone {token count {error {}}} {
1217    variable $token
1218    upvar 0 $token state
1219    set sock $state(sock)
1220    incr state(currentsize) $count
1221    if {[info exists state(-progress)]} {
1222	eval $state(-progress) \
1223	    [list $token $state(totalsize) $state(currentsize)]
1224    }
1225    # At this point the token may have been reset
1226    if {[string length $error]} {
1227	Finish $token $error
1228    } elseif {[catch {eof $sock} iseof] || $iseof} {
1229	Eof $token
1230    } else {
1231	CopyStart $sock $token
1232    }
1233}
1234
1235# http::Eof
1236#
1237#	Handle eof on the socket
1238#
1239# Arguments
1240#	token	The token returned from http::geturl
1241#
1242# Side Effects
1243#	Clean up the socket
1244
1245proc http::Eof {token {force 0}} {
1246    variable $token
1247    upvar 0 $token state
1248    if {$state(state) eq "header"} {
1249	# Premature eof
1250	set state(status) eof
1251    } else {
1252	set state(status) ok
1253    }
1254
1255    if {($state(coding) eq "gzip") && [string length $state(body)] > 0} {
1256        if {[catch {
1257	    if {[package vsatisfies [package present Tcl] 8.6]} {
1258		# The zlib integration into 8.6 includes proper gzip support
1259		set state(body) [zlib gunzip $state(body)]
1260	    } else {
1261		set state(body) [Gunzip $state(body)]
1262	    }
1263        } err]} then {
1264	    return [Finish $token $err]
1265        }
1266    }
1267
1268    if {!$state(binary)} {
1269        # If we are getting text, set the incoming channel's encoding
1270        # correctly.  iso8859-1 is the RFC default, but this could be any IANA
1271        # charset.  However, we only know how to convert what we have
1272        # encodings for.
1273
1274        set enc [CharsetToEncoding $state(charset)]
1275        if {$enc ne "binary"} {
1276	    set state(body) [encoding convertfrom $enc $state(body)]
1277        }
1278
1279        # Translate text line endings.
1280        set state(body) [string map {\r\n \n \r \n} $state(body)]
1281    }
1282
1283    Finish $token
1284}
1285
1286# http::wait --
1287#
1288#	See documentation for details.
1289#
1290# Arguments:
1291#	token	Connection token.
1292#
1293# Results:
1294#        The status after the wait.
1295
1296proc http::wait {token} {
1297    variable $token
1298    upvar 0 $token state
1299
1300    if {![info exists state(status)] || $state(status) eq ""} {
1301	# We must wait on the original variable name, not the upvar alias
1302	vwait ${token}(status)
1303    }
1304
1305    return [status $token]
1306}
1307
1308# http::formatQuery --
1309#
1310#	See documentation for details.  Call http::formatQuery with an even
1311#	number of arguments, where the first is a name, the second is a value,
1312#	the third is another name, and so on.
1313#
1314# Arguments:
1315#	args	A list of name-value pairs.
1316#
1317# Results:
1318#	TODO
1319
1320proc http::formatQuery {args} {
1321    set result ""
1322    set sep ""
1323    foreach i $args {
1324	append result $sep [mapReply $i]
1325	if {$sep eq "="} {
1326	    set sep &
1327	} else {
1328	    set sep =
1329	}
1330    }
1331    return $result
1332}
1333
1334# http::mapReply --
1335#
1336#	Do x-www-urlencoded character mapping
1337#
1338# Arguments:
1339#	string	The string the needs to be encoded
1340#
1341# Results:
1342#       The encoded string
1343
1344proc http::mapReply {string} {
1345    variable http
1346    variable formMap
1347
1348    # The spec says: "non-alphanumeric characters are replaced by '%HH'". Use
1349    # a pre-computed map and [string map] to do the conversion (much faster
1350    # than [regsub]/[subst]). [Bug 1020491]
1351
1352    if {$http(-urlencoding) ne ""} {
1353	set string [encoding convertto $http(-urlencoding) $string]
1354	return [string map $formMap $string]
1355    }
1356    set converted [string map $formMap $string]
1357    if {[string match "*\[\u0100-\uffff\]*" $converted]} {
1358	regexp {[\u0100-\uffff]} $converted badChar
1359	# Return this error message for maximum compatability... :^/
1360	return -code error \
1361	    "can't read \"formMap($badChar)\": no such element in array"
1362    }
1363    return $converted
1364}
1365
1366# http::ProxyRequired --
1367#	Default proxy filter.
1368#
1369# Arguments:
1370#	host	The destination host
1371#
1372# Results:
1373#       The current proxy settings
1374
1375proc http::ProxyRequired {host} {
1376    variable http
1377    if {[info exists http(-proxyhost)] && [string length $http(-proxyhost)]} {
1378	if {
1379	    ![info exists http(-proxyport)] ||
1380	    ![string length $http(-proxyport)]
1381	} then {
1382	    set http(-proxyport) 8080
1383	}
1384	return [list $http(-proxyhost) $http(-proxyport)]
1385    }
1386}
1387
1388# http::CharsetToEncoding --
1389#
1390#	Tries to map a given IANA charset to a tcl encoding.  If no encoding
1391#	can be found, returns binary.
1392#
1393
1394proc http::CharsetToEncoding {charset} {
1395    variable encodings
1396
1397    set charset [string tolower $charset]
1398    if {[regexp {iso-?8859-([0-9]+)} $charset -> num]} {
1399	set encoding "iso8859-$num"
1400    } elseif {[regexp {iso-?2022-(jp|kr)} $charset -> ext]} {
1401	set encoding "iso2022-$ext"
1402    } elseif {[regexp {shift[-_]?js} $charset]} {
1403	set encoding "shiftjis"
1404    } elseif {[regexp {(?:windows|cp)-?([0-9]+)} $charset -> num]} {
1405	set encoding "cp$num"
1406    } elseif {$charset eq "us-ascii"} {
1407	set encoding "ascii"
1408    } elseif {[regexp {(?:iso-?)?lat(?:in)?-?([0-9]+)} $charset -> num]} {
1409	switch -- $num {
1410	    5 {set encoding "iso8859-9"}
1411	    1 - 2 - 3 {
1412		set encoding "iso8859-$num"
1413	    }
1414	}
1415    } else {
1416	# other charset, like euc-xx, utf-8,...  may directly map to encoding
1417	set encoding $charset
1418    }
1419    set idx [lsearch -exact $encodings $encoding]
1420    if {$idx >= 0} {
1421	return $encoding
1422    } else {
1423	return "binary"
1424    }
1425}
1426
1427# http::Gunzip --
1428#
1429#	Decompress data transmitted using the gzip transfer coding.
1430#
1431
1432# FIX ME: redo using zlib sinflate
1433proc http::Gunzip {data} {
1434    binary scan $data Scb5icc magic method flags time xfl os
1435    set pos 10
1436    if {$magic != 0x1f8b} {
1437        return -code error "invalid data: supplied data is not in gzip format"
1438    }
1439    if {$method != 8} {
1440        return -code error "invalid compression method"
1441    }
1442
1443    # lassign [split $flags ""] f_text f_crc f_extra f_name f_comment
1444    foreach {f_text f_crc f_extra f_name f_comment} [split $flags ""] break
1445    set extra ""
1446    if {$f_extra} {
1447	binary scan $data @${pos}S xlen
1448        incr pos 2
1449        set extra [string range $data $pos $xlen]
1450        set pos [incr xlen]
1451    }
1452
1453    set name ""
1454    if {$f_name} {
1455        set ndx [string first \0 $data $pos]
1456        set name [string range $data $pos $ndx]
1457        set pos [incr ndx]
1458    }
1459
1460    set comment ""
1461    if {$f_comment} {
1462        set ndx [string first \0 $data $pos]
1463        set comment [string range $data $pos $ndx]
1464        set pos [incr ndx]
1465    }
1466
1467    set fcrc ""
1468    if {$f_crc} {
1469	set fcrc [string range $data $pos [incr pos]]
1470        incr pos
1471    }
1472
1473    binary scan [string range $data end-7 end] ii crc size
1474    set inflated [zlib inflate [string range $data $pos end-8]]
1475    set chk [zlib crc32 $inflated]
1476    if {($crc & 0xffffffff) != ($chk & 0xffffffff)} {
1477	return -code error "invalid data: checksum mismatch $crc != $chk"
1478    }
1479    return $inflated
1480}
1481
1482# Local variables:
1483# indent-tabs-mode: t
1484# End:
1485