1# http.tcl -- 2# 3# Client-side HTTP for GET, POST, and HEAD commands. These routines can 4# be used in untrusted code that uses the Safesock security policy. 5# These procedures use a callback interface to avoid using vwait, which 6# is not defined in the safe base. 7# 8# See the file "license.terms" for information on usage and redistribution of 9# this file, and for a DISCLAIMER OF ALL WARRANTIES. 10 11package require Tcl 8.4 12# Keep this in sync with pkgIndex.tcl and with the install directories in 13# Makefiles 14package provide http 2.7.13 15 16namespace eval http { 17 # Allow resourcing to not clobber existing data 18 19 variable http 20 if {![info exists http]} { 21 array set http { 22 -accept */* 23 -proxyhost {} 24 -proxyport {} 25 -proxyfilter http::ProxyRequired 26 -urlencoding utf-8 27 } 28 set http(-useragent) "Tcl http client package [package provide http]" 29 } 30 31 proc init {} { 32 # Set up the map for quoting chars. RFC3986 Section 2.3 say percent 33 # encode all except: "... percent-encoded octets in the ranges of 34 # ALPHA (%41-%5A and %61-%7A), DIGIT (%30-%39), hyphen (%2D), period 35 # (%2E), underscore (%5F), or tilde (%7E) should not be created by URI 36 # producers ..." 37 for {set i 0} {$i <= 256} {incr i} { 38 set c [format %c $i] 39 if {![string match {[-._~a-zA-Z0-9]} $c]} { 40 set map($c) %[format %.2X $i] 41 } 42 } 43 # These are handled specially 44 set map(\n) %0D%0A 45 variable formMap [array get map] 46 47 # Create a map for HTTP/1.1 open sockets 48 variable socketmap 49 if {[info exists socketmap]} { 50 # Close but don't remove open sockets on re-init 51 foreach {url sock} [array get socketmap] { 52 catch {close $sock} 53 } 54 } 55 array set socketmap {} 56 } 57 init 58 59 variable urlTypes 60 if {![info exists urlTypes]} { 61 set urlTypes(http) [list 80 ::socket] 62 } 63 64 variable encodings [string tolower [encoding names]] 65 # This can be changed, but iso8859-1 is the RFC standard. 66 variable defaultCharset 67 if {![info exists defaultCharset]} { 68 set defaultCharset "iso8859-1" 69 } 70 71 # Force RFC 3986 strictness in geturl url verification? 72 variable strict 73 if {![info exists strict]} { 74 set strict 1 75 } 76 77 # Let user control default keepalive for compatibility 78 variable defaultKeepalive 79 if {![info exists defaultKeepalive]} { 80 set defaultKeepalive 0 81 } 82 83 namespace export geturl config reset wait formatQuery register unregister 84 # Useful, but not exported: data size status code 85} 86 87# http::Log -- 88# 89# Debugging output -- define this to observe HTTP/1.1 socket usage. 90# Should echo any args received. 91# 92# Arguments: 93# msg Message to output 94# 95proc http::Log {args} {} 96 97# http::register -- 98# 99# See documentation for details. 100# 101# Arguments: 102# proto URL protocol prefix, e.g. https 103# port Default port for protocol 104# command Command to use to create socket 105# Results: 106# list of port and command that was registered. 107 108proc http::register {proto port command} { 109 variable urlTypes 110 set urlTypes([string tolower $proto]) [list $port $command] 111} 112 113# http::unregister -- 114# 115# Unregisters URL protocol handler 116# 117# Arguments: 118# proto URL protocol prefix, e.g. https 119# Results: 120# list of port and command that was unregistered. 121 122proc http::unregister {proto} { 123 variable urlTypes 124 set lower [string tolower $proto] 125 if {![info exists urlTypes($lower)]} { 126 return -code error "unsupported url type \"$proto\"" 127 } 128 set old $urlTypes($lower) 129 unset urlTypes($lower) 130 return $old 131} 132 133# http::config -- 134# 135# See documentation for details. 136# 137# Arguments: 138# args Options parsed by the procedure. 139# Results: 140# TODO 141 142proc http::config {args} { 143 variable http 144 set options [lsort [array names http -*]] 145 set usage [join $options ", "] 146 if {[llength $args] == 0} { 147 set result {} 148 foreach name $options { 149 lappend result $name $http($name) 150 } 151 return $result 152 } 153 set options [string map {- ""} $options] 154 set pat ^-(?:[join $options |])$ 155 if {[llength $args] == 1} { 156 set flag [lindex $args 0] 157 if {![regexp -- $pat $flag]} { 158 return -code error "Unknown option $flag, must be: $usage" 159 } 160 return $http($flag) 161 } else { 162 foreach {flag value} $args { 163 if {![regexp -- $pat $flag]} { 164 return -code error "Unknown option $flag, must be: $usage" 165 } 166 set http($flag) $value 167 } 168 } 169} 170 171# http::Finish -- 172# 173# Clean up the socket and eval close time callbacks 174# 175# Arguments: 176# token Connection token. 177# errormsg (optional) If set, forces status to error. 178# skipCB (optional) If set, don't call the -command callback. This 179# is useful when geturl wants to throw an exception instead 180# of calling the callback. That way, the same error isn't 181# reported to two places. 182# 183# Side Effects: 184# Closes the socket 185 186proc http::Finish {token {errormsg ""} {skipCB 0}} { 187 variable $token 188 upvar 0 $token state 189 global errorInfo errorCode 190 if {$errormsg ne ""} { 191 set state(error) [list $errormsg $errorInfo $errorCode] 192 set state(status) "error" 193 } 194 if { 195 ($state(status) eq "timeout") || ($state(status) eq "error") || 196 ([info exists state(connection)] && ($state(connection) eq "close")) 197 } then { 198 CloseSocket $state(sock) $token 199 } 200 if {[info exists state(after)]} { 201 after cancel $state(after) 202 } 203 if {[info exists state(-command)] && !$skipCB 204 && ![info exists state(done-command-cb)]} { 205 set state(done-command-cb) yes 206 if {[catch {eval $state(-command) {$token}} err] && $errormsg eq ""} { 207 set state(error) [list $err $errorInfo $errorCode] 208 set state(status) error 209 } 210 } 211} 212 213# http::CloseSocket - 214# 215# Close a socket and remove it from the persistent sockets table. If 216# possible an http token is included here but when we are called from a 217# fileevent on remote closure we need to find the correct entry - hence 218# the second section. 219 220proc ::http::CloseSocket {s {token {}}} { 221 variable socketmap 222 catch {fileevent $s readable {}} 223 set conn_id {} 224 if {$token ne ""} { 225 variable $token 226 upvar 0 $token state 227 if {[info exists state(socketinfo)]} { 228 set conn_id $state(socketinfo) 229 } 230 } else { 231 set map [array get socketmap] 232 set ndx [lsearch -exact $map $s] 233 if {$ndx != -1} { 234 incr ndx -1 235 set conn_id [lindex $map $ndx] 236 } 237 } 238 if {$conn_id eq {} || ![info exists socketmap($conn_id)]} { 239 Log "Closing socket $s (no connection info)" 240 if {[catch {close $s} err]} { 241 Log "Error: $err" 242 } 243 } else { 244 if {[info exists socketmap($conn_id)]} { 245 Log "Closing connection $conn_id (sock $socketmap($conn_id))" 246 if {[catch {close $socketmap($conn_id)} err]} { 247 Log "Error: $err" 248 } 249 unset socketmap($conn_id) 250 } else { 251 Log "Cannot close connection $conn_id - no socket in socket map" 252 } 253 } 254} 255 256# http::reset -- 257# 258# See documentation for details. 259# 260# Arguments: 261# token Connection token. 262# why Status info. 263# 264# Side Effects: 265# See Finish 266 267proc http::reset {token {why reset}} { 268 variable $token 269 upvar 0 $token state 270 set state(status) $why 271 catch {fileevent $state(sock) readable {}} 272 catch {fileevent $state(sock) writable {}} 273 Finish $token 274 if {[info exists state(error)]} { 275 set errorlist $state(error) 276 unset state 277 eval ::error $errorlist 278 } 279} 280 281# http::geturl -- 282# 283# Establishes a connection to a remote url via http. 284# 285# Arguments: 286# url The http URL to goget. 287# args Option value pairs. Valid options include: 288# -blocksize, -validate, -headers, -timeout 289# Results: 290# Returns a token for this connection. This token is the name of an 291# array that the caller should unset to garbage collect the state. 292 293proc http::geturl {url args} { 294 variable http 295 variable urlTypes 296 variable defaultCharset 297 variable defaultKeepalive 298 variable strict 299 300 # Initialize the state variable, an array. We'll return the name of this 301 # array as the token for the transaction. 302 303 if {![info exists http(uid)]} { 304 set http(uid) 0 305 } 306 set token [namespace current]::[incr http(uid)] 307 variable $token 308 upvar 0 $token state 309 reset $token 310 311 # Process command options. 312 313 array set state { 314 -binary false 315 -blocksize 8192 316 -queryblocksize 8192 317 -validate 0 318 -headers {} 319 -timeout 0 320 -type application/x-www-form-urlencoded 321 -queryprogress {} 322 -protocol 1.1 323 binary 0 324 state connecting 325 meta {} 326 coding {} 327 currentsize 0 328 totalsize 0 329 querylength 0 330 queryoffset 0 331 type text/html 332 body {} 333 status "" 334 http "" 335 connection close 336 } 337 set state(-keepalive) $defaultKeepalive 338 set state(-strict) $strict 339 # These flags have their types verified [Bug 811170] 340 array set type { 341 -binary boolean 342 -blocksize integer 343 -queryblocksize integer 344 -strict boolean 345 -timeout integer 346 -validate boolean 347 } 348 set state(charset) $defaultCharset 349 set options { 350 -binary -blocksize -channel -command -handler -headers -keepalive 351 -method -myaddr -progress -protocol -query -queryblocksize 352 -querychannel -queryprogress -strict -timeout -type -validate 353 } 354 set usage [join [lsort $options] ", "] 355 set options [string map {- ""} $options] 356 set pat ^-(?:[join $options |])$ 357 foreach {flag value} $args { 358 if {[regexp -- $pat $flag]} { 359 # Validate numbers 360 if { 361 [info exists type($flag)] && 362 ![string is $type($flag) -strict $value] 363 } then { 364 unset $token 365 return -code error \ 366 "Bad value for $flag ($value), must be $type($flag)" 367 } 368 set state($flag) $value 369 } else { 370 unset $token 371 return -code error "Unknown option $flag, can be: $usage" 372 } 373 } 374 375 # Make sure -query and -querychannel aren't both specified 376 377 set isQueryChannel [info exists state(-querychannel)] 378 set isQuery [info exists state(-query)] 379 if {$isQuery && $isQueryChannel} { 380 unset $token 381 return -code error "Can't combine -query and -querychannel options!" 382 } 383 384 # Validate URL, determine the server host and port, and check proxy case 385 # Recognize user:pass@host URLs also, although we do not do anything with 386 # that info yet. 387 388 # URLs have basically four parts. 389 # First, before the colon, is the protocol scheme (e.g. http) 390 # Second, for HTTP-like protocols, is the authority 391 # The authority is preceded by // and lasts up to (but not including) 392 # the following / or ? and it identifies up to four parts, of which 393 # only one, the host, is required (if an authority is present at all). 394 # All other parts of the authority (user name, password, port number) 395 # are optional. 396 # Third is the resource name, which is split into two parts at a ? 397 # The first part (from the single "/" up to "?") is the path, and the 398 # second part (from that "?" up to "#") is the query. *HOWEVER*, we do 399 # not need to separate them; we send the whole lot to the server. 400 # Both, path and query are allowed to be missing, including their 401 # delimiting character. 402 # Fourth is the fragment identifier, which is everything after the first 403 # "#" in the URL. The fragment identifier MUST NOT be sent to the server 404 # and indeed, we don't bother to validate it (it could be an error to 405 # pass it in here, but it's cheap to strip). 406 # 407 # An example of a URL that has all the parts: 408 # 409 # http://jschmoe:xyzzy@www.bogus.net:8000/foo/bar.tml?q=foo#changes 410 # 411 # The "http" is the protocol, the user is "jschmoe", the password is 412 # "xyzzy", the host is "www.bogus.net", the port is "8000", the path is 413 # "/foo/bar.tml", the query is "q=foo", and the fragment is "changes". 414 # 415 # Note that the RE actually combines the user and password parts, as 416 # recommended in RFC 3986. Indeed, that RFC states that putting passwords 417 # in URLs is a Really Bad Idea, something with which I would agree utterly. 418 # Also note that we do not currently support IPv6 addresses. 419 # 420 # From a validation perspective, we need to ensure that the parts of the 421 # URL that are going to the server are correctly encoded. This is only 422 # done if $state(-strict) is true (inherited from $::http::strict). 423 424 set URLmatcher {(?x) # this is _expanded_ syntax 425 ^ 426 (?: (\w+) : ) ? # <protocol scheme> 427 (?: // 428 (?: 429 ( 430 [^@/\#?]+ # <userinfo part of authority> 431 ) @ 432 )? 433 ( [^/:\#?]+ ) # <host part of authority> 434 (?: : (\d+) )? # <port part of authority> 435 )? 436 ( [/\?] [^\#]*)? # <path> (including query) 437 (?: \# (.*) )? # <fragment> 438 $ 439 } 440 441 # Phase one: parse 442 if {![regexp -- $URLmatcher $url -> proto user host port srvurl]} { 443 unset $token 444 return -code error "Unsupported URL: $url" 445 } 446 # Phase two: validate 447 if {$host eq ""} { 448 # Caller has to provide a host name; we do not have a "default host" 449 # that would enable us to handle relative URLs. 450 unset $token 451 return -code error "Missing host part: $url" 452 # Note that we don't check the hostname for validity here; if it's 453 # invalid, we'll simply fail to resolve it later on. 454 } 455 if {$port ne "" && $port > 65535} { 456 unset $token 457 return -code error "Invalid port number: $port" 458 } 459 # The user identification and resource identification parts of the URL can 460 # have encoded characters in them; take care! 461 if {$user ne ""} { 462 # Check for validity according to RFC 3986, Appendix A 463 set validityRE {(?xi) 464 ^ 465 (?: [-\w.~!$&'()*+,;=:] | %[0-9a-f][0-9a-f] )+ 466 $ 467 } 468 if {$state(-strict) && ![regexp -- $validityRE $user]} { 469 unset $token 470 # Provide a better error message in this error case 471 if {[regexp {(?i)%(?![0-9a-f][0-9a-f]).?.?} $user bad]} { 472 return -code error \ 473 "Illegal encoding character usage \"$bad\" in URL user" 474 } 475 return -code error "Illegal characters in URL user" 476 } 477 } 478 if {$srvurl ne ""} { 479 # RFC 3986 allows empty paths (not even a /), but servers 480 # return 400 if the path in the HTTP request doesn't start 481 # with / , so add it here if needed. 482 if {[string index $srvurl 0] ne "/"} { 483 set srvurl /$srvurl 484 } 485 # Check for validity according to RFC 3986, Appendix A 486 set validityRE {(?xi) 487 ^ 488 # Path part (already must start with / character) 489 (?: [-\w.~!$&'()*+,;=:@/] | %[0-9a-f][0-9a-f] )* 490 # Query part (optional, permits ? characters) 491 (?: \? (?: [-\w.~!$&'()*+,;=:@/?] | %[0-9a-f][0-9a-f] )* )? 492 $ 493 } 494 if {$state(-strict) && ![regexp -- $validityRE $srvurl]} { 495 unset $token 496 # Provide a better error message in this error case 497 if {[regexp {(?i)%(?![0-9a-f][0-9a-f])..} $srvurl bad]} { 498 return -code error \ 499 "Illegal encoding character usage \"$bad\" in URL path" 500 } 501 return -code error "Illegal characters in URL path" 502 } 503 } else { 504 set srvurl / 505 } 506 if {$proto eq ""} { 507 set proto http 508 } 509 set lower [string tolower $proto] 510 if {![info exists urlTypes($lower)]} { 511 unset $token 512 return -code error "Unsupported URL type \"$proto\"" 513 } 514 set defport [lindex $urlTypes($lower) 0] 515 set defcmd [lindex $urlTypes($lower) 1] 516 517 if {$port eq ""} { 518 set port $defport 519 } 520 if {![catch {$http(-proxyfilter) $host} proxy]} { 521 set phost [lindex $proxy 0] 522 set pport [lindex $proxy 1] 523 } 524 525 # OK, now reassemble into a full URL 526 set url ${proto}:// 527 if {$user ne ""} { 528 append url $user 529 append url @ 530 } 531 append url $host 532 if {$port != $defport} { 533 append url : $port 534 } 535 append url $srvurl 536 # Don't append the fragment! 537 set state(url) $url 538 539 # If a timeout is specified we set up the after event and arrange for an 540 # asynchronous socket connection. 541 542 set sockopts [list -async] 543 if {$state(-timeout) > 0} { 544 set state(after) [after $state(-timeout) \ 545 [list http::reset $token timeout]] 546 } 547 548 # If we are using the proxy, we must pass in the full URL that includes 549 # the server name. 550 551 if {[info exists phost] && ($phost ne "")} { 552 set srvurl $url 553 set targetAddr [list $phost $pport] 554 } else { 555 set targetAddr [list $host $port] 556 } 557 # Proxy connections aren't shared among different hosts. 558 set state(socketinfo) $host:$port 559 560 # See if we are supposed to use a previously opened channel. 561 if {$state(-keepalive)} { 562 variable socketmap 563 if {[info exists socketmap($state(socketinfo))]} { 564 if {[catch {fconfigure $socketmap($state(socketinfo))}]} { 565 Log "WARNING: socket for $state(socketinfo) was closed" 566 unset socketmap($state(socketinfo)) 567 } else { 568 set sock $socketmap($state(socketinfo)) 569 Log "reusing socket $sock for $state(socketinfo)" 570 catch {fileevent $sock writable {}} 571 catch {fileevent $sock readable {}} 572 } 573 } 574 # don't automatically close this connection socket 575 set state(connection) {} 576 } 577 if {![info exists sock]} { 578 # Pass -myaddr directly to the socket command 579 if {[info exists state(-myaddr)]} { 580 lappend sockopts -myaddr $state(-myaddr) 581 } 582 if {[catch {eval $defcmd $sockopts $targetAddr} sock]} { 583 # something went wrong while trying to establish the connection. 584 # Clean up after events and such, but DON'T call the command 585 # callback (if available) because we're going to throw an 586 # exception from here instead. 587 588 set state(sock) $sock 589 Finish $token "" 1 590 cleanup $token 591 return -code error $sock 592 } 593 } 594 set state(sock) $sock 595 Log "Using $sock for $state(socketinfo)" \ 596 [expr {$state(-keepalive)?"keepalive":""}] 597 if {$state(-keepalive)} { 598 set socketmap($state(socketinfo)) $sock 599 } 600 601 if {![info exists phost]} { 602 set phost "" 603 } 604 fileevent $sock writable [list http::Connect $token $proto $phost $srvurl] 605 606 # Wait for the connection to complete. 607 if {![info exists state(-command)]} { 608 # geturl does EVERYTHING asynchronously, so if the user 609 # calls it synchronously, we just do a wait here. 610 http::wait $token 611 612 if {![info exists state]} { 613 # If we timed out then Finish has been called and the users 614 # command callback may have cleaned up the token. If so we end up 615 # here with nothing left to do. 616 return $token 617 } elseif {$state(status) eq "error"} { 618 # Something went wrong while trying to establish the connection. 619 # Clean up after events and such, but DON'T call the command 620 # callback (if available) because we're going to throw an 621 # exception from here instead. 622 set err [lindex $state(error) 0] 623 cleanup $token 624 return -code error $err 625 } 626 } 627 628 return $token 629} 630 631 632proc http::Connected { token proto phost srvurl} { 633 variable http 634 variable urlTypes 635 636 variable $token 637 upvar 0 $token state 638 639 # Set back the variables needed here 640 set sock $state(sock) 641 set isQueryChannel [info exists state(-querychannel)] 642 set isQuery [info exists state(-query)] 643 set host [lindex [split $state(socketinfo) :] 0] 644 set port [lindex [split $state(socketinfo) :] 1] 645 646 set lower [string tolower $proto] 647 set defport [lindex $urlTypes($lower) 0] 648 649 # Send data in cr-lf format, but accept any line terminators 650 651 fconfigure $sock -translation {auto crlf} -buffersize $state(-blocksize) 652 653 # The following is disallowed in safe interpreters, but the socket is 654 # already in non-blocking mode in that case. 655 656 catch {fconfigure $sock -blocking off} 657 set how GET 658 if {$isQuery} { 659 set state(querylength) [string length $state(-query)] 660 if {$state(querylength) > 0} { 661 set how POST 662 set contDone 0 663 } else { 664 # There's no query data. 665 unset state(-query) 666 set isQuery 0 667 } 668 } elseif {$state(-validate)} { 669 set how HEAD 670 } elseif {$isQueryChannel} { 671 set how POST 672 # The query channel must be blocking for the async Write to 673 # work properly. 674 fconfigure $state(-querychannel) -blocking 1 -translation binary 675 set contDone 0 676 } 677 if {[info exists state(-method)] && $state(-method) ne ""} { 678 set how $state(-method) 679 } 680 681 if {[catch { 682 puts $sock "$how $srvurl HTTP/$state(-protocol)" 683 puts $sock "Accept: $http(-accept)" 684 array set hdrs $state(-headers) 685 if {[info exists hdrs(Host)]} { 686 # Allow Host spoofing. [Bug 928154] 687 puts $sock "Host: $hdrs(Host)" 688 } elseif {$port == $defport} { 689 # Don't add port in this case, to handle broken servers. [Bug 690 # #504508] 691 puts $sock "Host: $host" 692 } else { 693 puts $sock "Host: $host:$port" 694 } 695 unset hdrs 696 puts $sock "User-Agent: $http(-useragent)" 697 if {$state(-protocol) == 1.0 && $state(-keepalive)} { 698 puts $sock "Connection: keep-alive" 699 } 700 if {$state(-protocol) > 1.0 && !$state(-keepalive)} { 701 puts $sock "Connection: close" ;# RFC2616 sec 8.1.2.1 702 } 703 if {[info exists phost] && ($phost ne "") && $state(-keepalive)} { 704 puts $sock "Proxy-Connection: Keep-Alive" 705 } 706 set accept_encoding_seen 0 707 set content_type_seen 0 708 foreach {key value} $state(-headers) { 709 if {[string equal -nocase $key "host"]} { 710 continue 711 } 712 if {[string equal -nocase $key "accept-encoding"]} { 713 set accept_encoding_seen 1 714 } 715 if {[string equal -nocase $key "content-type"]} { 716 set content_type_seen 1 717 } 718 set value [string map [list \n "" \r ""] $value] 719 set key [string trim $key] 720 if {[string equal -nocase $key "content-length"]} { 721 set contDone 1 722 set state(querylength) $value 723 } 724 if {[string length $key]} { 725 puts $sock "$key: $value" 726 } 727 } 728 # Soft zlib dependency check - no package require 729 if { 730 !$accept_encoding_seen && 731 ([package vsatisfies [package provide Tcl] 8.6] 732 || [llength [package provide zlib]]) && 733 !([info exists state(-channel)] || [info exists state(-handler)]) 734 } then { 735 puts $sock "Accept-Encoding: gzip, identity, *;q=0.1" 736 } 737 if {$isQueryChannel && $state(querylength) == 0} { 738 # Try to determine size of data in channel. If we cannot seek, the 739 # surrounding catch will trap us 740 741 set start [tell $state(-querychannel)] 742 seek $state(-querychannel) 0 end 743 set state(querylength) \ 744 [expr {[tell $state(-querychannel)] - $start}] 745 seek $state(-querychannel) $start 746 } 747 748 # Flush the request header and set up the fileevent that will either 749 # push the POST data or read the response. 750 # 751 # fileevent note: 752 # 753 # It is possible to have both the read and write fileevents active at 754 # this point. The only scenario it seems to affect is a server that 755 # closes the connection without reading the POST data. (e.g., early 756 # versions TclHttpd in various error cases). Depending on the 757 # platform, the client may or may not be able to get the response from 758 # the server because of the error it will get trying to write the post 759 # data. Having both fileevents active changes the timing and the 760 # behavior, but no two platforms (among Solaris, Linux, and NT) behave 761 # the same, and none behave all that well in any case. Servers should 762 # always read their POST data if they expect the client to read their 763 # response. 764 765 if {$isQuery || $isQueryChannel} { 766 if {!$content_type_seen} { 767 puts $sock "Content-Type: $state(-type)" 768 } 769 if {!$contDone} { 770 puts $sock "Content-Length: $state(querylength)" 771 } 772 puts $sock "" 773 fconfigure $sock -translation {auto binary} 774 fileevent $sock writable [list http::Write $token] 775 } else { 776 puts $sock "" 777 flush $sock 778 fileevent $sock readable [list http::Event $sock $token] 779 } 780 781 } err]} then { 782 # The socket probably was never connected, or the connection dropped 783 # later. 784 785 # if state(status) is error, it means someone's already called Finish 786 # to do the above-described clean up. 787 if {$state(status) ne "error"} { 788 Finish $token $err 789 } 790 } 791 792} 793 794# Data access functions: 795# Data - the URL data 796# Status - the transaction status: ok, reset, eof, timeout 797# Code - the HTTP transaction code, e.g., 200 798# Size - the size of the URL data 799 800proc http::data {token} { 801 variable $token 802 upvar 0 $token state 803 return $state(body) 804} 805proc http::status {token} { 806 if {![info exists $token]} { 807 return "error" 808 } 809 variable $token 810 upvar 0 $token state 811 return $state(status) 812} 813proc http::code {token} { 814 variable $token 815 upvar 0 $token state 816 return $state(http) 817} 818proc http::ncode {token} { 819 variable $token 820 upvar 0 $token state 821 if {[regexp {[0-9]{3}} $state(http) numeric_code]} { 822 return $numeric_code 823 } else { 824 return $state(http) 825 } 826} 827proc http::size {token} { 828 variable $token 829 upvar 0 $token state 830 return $state(currentsize) 831} 832proc http::meta {token} { 833 variable $token 834 upvar 0 $token state 835 return $state(meta) 836} 837proc http::error {token} { 838 variable $token 839 upvar 0 $token state 840 if {[info exists state(error)]} { 841 return $state(error) 842 } 843 return "" 844} 845 846# http::cleanup 847# 848# Garbage collect the state associated with a transaction 849# 850# Arguments 851# token The token returned from http::geturl 852# 853# Side Effects 854# unsets the state array 855 856proc http::cleanup {token} { 857 variable $token 858 upvar 0 $token state 859 if {[info exists state]} { 860 unset state 861 } 862} 863 864# http::Connect 865# 866# This callback is made when an asyncronous connection completes. 867# 868# Arguments 869# token The token returned from http::geturl 870# 871# Side Effects 872# Sets the status of the connection, which unblocks 873# the waiting geturl call 874 875proc http::Connect {token proto phost srvurl} { 876 variable $token 877 upvar 0 $token state 878 set err "due to unexpected EOF" 879 if { 880 [eof $state(sock)] || 881 [set err [fconfigure $state(sock) -error]] ne "" 882 } then { 883 Finish $token "connect failed $err" 884 } else { 885 fileevent $state(sock) writable {} 886 ::http::Connected $token $proto $phost $srvurl 887 } 888 return 889} 890 891# http::Write 892# 893# Write POST query data to the socket 894# 895# Arguments 896# token The token for the connection 897# 898# Side Effects 899# Write the socket and handle callbacks. 900 901proc http::Write {token} { 902 variable $token 903 upvar 0 $token state 904 set sock $state(sock) 905 906 # Output a block. Tcl will buffer this if the socket blocks 907 set done 0 908 if {[catch { 909 # Catch I/O errors on dead sockets 910 911 if {[info exists state(-query)]} { 912 # Chop up large query strings so queryprogress callback can give 913 # smooth feedback. 914 915 puts -nonewline $sock \ 916 [string range $state(-query) $state(queryoffset) \ 917 [expr {$state(queryoffset) + $state(-queryblocksize) - 1}]] 918 incr state(queryoffset) $state(-queryblocksize) 919 if {$state(queryoffset) >= $state(querylength)} { 920 set state(queryoffset) $state(querylength) 921 set done 1 922 } 923 } else { 924 # Copy blocks from the query channel 925 926 set outStr [read $state(-querychannel) $state(-queryblocksize)] 927 puts -nonewline $sock $outStr 928 incr state(queryoffset) [string length $outStr] 929 if {[eof $state(-querychannel)]} { 930 set done 1 931 } 932 } 933 } err]} then { 934 # Do not call Finish here, but instead let the read half of the socket 935 # process whatever server reply there is to get. 936 937 set state(posterror) $err 938 set done 1 939 } 940 if {$done} { 941 catch {flush $sock} 942 fileevent $sock writable {} 943 fileevent $sock readable [list http::Event $sock $token] 944 } 945 946 # Callback to the client after we've completely handled everything. 947 948 if {[string length $state(-queryprogress)]} { 949 eval $state(-queryprogress) \ 950 [list $token $state(querylength) $state(queryoffset)] 951 } 952} 953 954# http::Event 955# 956# Handle input on the socket 957# 958# Arguments 959# sock The socket receiving input. 960# token The token returned from http::geturl 961# 962# Side Effects 963# Read the socket and handle callbacks. 964 965proc http::Event {sock token} { 966 variable $token 967 upvar 0 $token state 968 969 if {![info exists state]} { 970 Log "Event $sock with invalid token '$token' - remote close?" 971 if {![eof $sock]} { 972 if {[set d [read $sock]] ne ""} { 973 Log "WARNING: additional data left on closed socket" 974 } 975 } 976 CloseSocket $sock 977 return 978 } 979 if {$state(state) eq "connecting"} { 980 if {[catch {gets $sock state(http)} n]} { 981 return [Finish $token $n] 982 } elseif {$n >= 0} { 983 set state(state) "header" 984 } 985 } elseif {$state(state) eq "header"} { 986 if {[catch {gets $sock line} n]} { 987 return [Finish $token $n] 988 } elseif {$n == 0} { 989 # We have now read all headers 990 # We ignore HTTP/1.1 100 Continue returns. RFC2616 sec 8.2.3 991 if {$state(http) == "" || ([regexp {^\S+\s(\d+)} $state(http) {} x] && $x == 100)} { 992 return 993 } 994 995 set state(state) body 996 997 # If doing a HEAD, then we won't get any body 998 if {$state(-validate)} { 999 Eof $token 1000 return 1001 } 1002 1003 # For non-chunked transfer we may have no body - in this case we 1004 # may get no further file event if the connection doesn't close 1005 # and no more data is sent. We can tell and must finish up now - 1006 # not later. 1007 if { 1008 !(([info exists state(connection)] 1009 && ($state(connection) eq "close")) 1010 || [info exists state(transfer)]) 1011 && ($state(totalsize) == 0) 1012 } then { 1013 Log "body size is 0 and no events likely - complete." 1014 Eof $token 1015 return 1016 } 1017 1018 # We have to use binary translation to count bytes properly. 1019 fconfigure $sock -translation binary 1020 1021 if { 1022 $state(-binary) || ![string match -nocase text* $state(type)] 1023 } then { 1024 # Turn off conversions for non-text data 1025 set state(binary) 1 1026 } 1027 if { 1028 $state(binary) || [string match *gzip* $state(coding)] || 1029 [string match *compress* $state(coding)] 1030 } then { 1031 if {[info exists state(-channel)]} { 1032 fconfigure $state(-channel) -translation binary 1033 } 1034 } 1035 if { 1036 [info exists state(-channel)] && 1037 ![info exists state(-handler)] 1038 } then { 1039 # Initiate a sequence of background fcopies 1040 fileevent $sock readable {} 1041 CopyStart $sock $token 1042 return 1043 } 1044 } elseif {$n > 0} { 1045 # Process header lines 1046 if {[regexp -nocase {^([^:]+):(.+)$} $line x key value]} { 1047 switch -- [string tolower $key] { 1048 content-type { 1049 set state(type) [string trim [string tolower $value]] 1050 # grab the optional charset information 1051 if {[regexp -nocase \ 1052 {charset\s*=\s*\"((?:[^""]|\\\")*)\"} \ 1053 $state(type) -> cs]} { 1054 set state(charset) [string map {{\"} \"} $cs] 1055 } else { 1056 regexp -nocase {charset\s*=\s*(\S+?);?} \ 1057 $state(type) -> state(charset) 1058 } 1059 } 1060 content-length { 1061 set state(totalsize) [string trim $value] 1062 } 1063 content-encoding { 1064 set state(coding) [string trim $value] 1065 } 1066 transfer-encoding { 1067 set state(transfer) \ 1068 [string trim [string tolower $value]] 1069 } 1070 proxy-connection - 1071 connection { 1072 set state(connection) \ 1073 [string trim [string tolower $value]] 1074 } 1075 } 1076 lappend state(meta) $key [string trim $value] 1077 } 1078 } 1079 } else { 1080 # Now reading body 1081 if {[catch { 1082 if {[info exists state(-handler)]} { 1083 set n [eval $state(-handler) [list $sock $token]] 1084 } elseif {[info exists state(transfer_final)]} { 1085 set line [getTextLine $sock] 1086 set n [string length $line] 1087 if {$n > 0} { 1088 Log "found $n bytes following final chunk" 1089 append state(transfer_final) $line 1090 } else { 1091 Log "final chunk part" 1092 Eof $token 1093 } 1094 } elseif { 1095 [info exists state(transfer)] 1096 && $state(transfer) eq "chunked" 1097 } then { 1098 set size 0 1099 set chunk [getTextLine $sock] 1100 set n [string length $chunk] 1101 if {[string trim $chunk] ne ""} { 1102 scan $chunk %x size 1103 if {$size != 0} { 1104 set bl [fconfigure $sock -blocking] 1105 fconfigure $sock -blocking 1 1106 set chunk [read $sock $size] 1107 fconfigure $sock -blocking $bl 1108 set n [string length $chunk] 1109 if {$n >= 0} { 1110 append state(body) $chunk 1111 } 1112 if {$size != [string length $chunk]} { 1113 Log "WARNING: mis-sized chunk:\ 1114 was [string length $chunk], should be $size" 1115 } 1116 getTextLine $sock 1117 } else { 1118 set state(transfer_final) {} 1119 } 1120 } 1121 } else { 1122 #Log "read non-chunk $state(currentsize) of $state(totalsize)" 1123 set block [read $sock $state(-blocksize)] 1124 set n [string length $block] 1125 if {$n >= 0} { 1126 append state(body) $block 1127 } 1128 } 1129 if {[info exists state]} { 1130 if {$n >= 0} { 1131 incr state(currentsize) $n 1132 } 1133 # If Content-Length - check for end of data. 1134 if { 1135 ($state(totalsize) > 0) 1136 && ($state(currentsize) >= $state(totalsize)) 1137 } then { 1138 Eof $token 1139 } 1140 } 1141 } err]} then { 1142 return [Finish $token $err] 1143 } else { 1144 if {[info exists state(-progress)]} { 1145 eval $state(-progress) \ 1146 [list $token $state(totalsize) $state(currentsize)] 1147 } 1148 } 1149 } 1150 1151 # catch as an Eof above may have closed the socket already 1152 if {![catch {eof $sock} eof] && $eof} { 1153 if {[info exists $token]} { 1154 set state(connection) close 1155 Eof $token 1156 } else { 1157 # open connection closed on a token that has been cleaned up. 1158 CloseSocket $sock 1159 } 1160 return 1161 } 1162} 1163 1164# http::getTextLine -- 1165# 1166# Get one line with the stream in blocking crlf mode 1167# 1168# Arguments 1169# sock The socket receiving input. 1170# 1171# Results: 1172# The line of text, without trailing newline 1173 1174proc http::getTextLine {sock} { 1175 set tr [fconfigure $sock -translation] 1176 set bl [fconfigure $sock -blocking] 1177 fconfigure $sock -translation crlf -blocking 1 1178 set r [gets $sock] 1179 fconfigure $sock -translation $tr -blocking $bl 1180 return $r 1181} 1182 1183# http::CopyStart 1184# 1185# Error handling wrapper around fcopy 1186# 1187# Arguments 1188# sock The socket to copy from 1189# token The token returned from http::geturl 1190# 1191# Side Effects 1192# This closes the connection upon error 1193 1194proc http::CopyStart {sock token} { 1195 variable $token 1196 upvar 0 $token state 1197 if {[catch { 1198 fcopy $sock $state(-channel) -size $state(-blocksize) -command \ 1199 [list http::CopyDone $token] 1200 } err]} then { 1201 Finish $token $err 1202 } 1203} 1204 1205# http::CopyDone 1206# 1207# fcopy completion callback 1208# 1209# Arguments 1210# token The token returned from http::geturl 1211# count The amount transfered 1212# 1213# Side Effects 1214# Invokes callbacks 1215 1216proc http::CopyDone {token count {error {}}} { 1217 variable $token 1218 upvar 0 $token state 1219 set sock $state(sock) 1220 incr state(currentsize) $count 1221 if {[info exists state(-progress)]} { 1222 eval $state(-progress) \ 1223 [list $token $state(totalsize) $state(currentsize)] 1224 } 1225 # At this point the token may have been reset 1226 if {[string length $error]} { 1227 Finish $token $error 1228 } elseif {[catch {eof $sock} iseof] || $iseof} { 1229 Eof $token 1230 } else { 1231 CopyStart $sock $token 1232 } 1233} 1234 1235# http::Eof 1236# 1237# Handle eof on the socket 1238# 1239# Arguments 1240# token The token returned from http::geturl 1241# 1242# Side Effects 1243# Clean up the socket 1244 1245proc http::Eof {token {force 0}} { 1246 variable $token 1247 upvar 0 $token state 1248 if {$state(state) eq "header"} { 1249 # Premature eof 1250 set state(status) eof 1251 } else { 1252 set state(status) ok 1253 } 1254 1255 if {($state(coding) eq "gzip") && [string length $state(body)] > 0} { 1256 if {[catch { 1257 if {[package vsatisfies [package present Tcl] 8.6]} { 1258 # The zlib integration into 8.6 includes proper gzip support 1259 set state(body) [zlib gunzip $state(body)] 1260 } else { 1261 set state(body) [Gunzip $state(body)] 1262 } 1263 } err]} then { 1264 return [Finish $token $err] 1265 } 1266 } 1267 1268 if {!$state(binary)} { 1269 # If we are getting text, set the incoming channel's encoding 1270 # correctly. iso8859-1 is the RFC default, but this could be any IANA 1271 # charset. However, we only know how to convert what we have 1272 # encodings for. 1273 1274 set enc [CharsetToEncoding $state(charset)] 1275 if {$enc ne "binary"} { 1276 set state(body) [encoding convertfrom $enc $state(body)] 1277 } 1278 1279 # Translate text line endings. 1280 set state(body) [string map {\r\n \n \r \n} $state(body)] 1281 } 1282 1283 Finish $token 1284} 1285 1286# http::wait -- 1287# 1288# See documentation for details. 1289# 1290# Arguments: 1291# token Connection token. 1292# 1293# Results: 1294# The status after the wait. 1295 1296proc http::wait {token} { 1297 variable $token 1298 upvar 0 $token state 1299 1300 if {![info exists state(status)] || $state(status) eq ""} { 1301 # We must wait on the original variable name, not the upvar alias 1302 vwait ${token}(status) 1303 } 1304 1305 return [status $token] 1306} 1307 1308# http::formatQuery -- 1309# 1310# See documentation for details. Call http::formatQuery with an even 1311# number of arguments, where the first is a name, the second is a value, 1312# the third is another name, and so on. 1313# 1314# Arguments: 1315# args A list of name-value pairs. 1316# 1317# Results: 1318# TODO 1319 1320proc http::formatQuery {args} { 1321 set result "" 1322 set sep "" 1323 foreach i $args { 1324 append result $sep [mapReply $i] 1325 if {$sep eq "="} { 1326 set sep & 1327 } else { 1328 set sep = 1329 } 1330 } 1331 return $result 1332} 1333 1334# http::mapReply -- 1335# 1336# Do x-www-urlencoded character mapping 1337# 1338# Arguments: 1339# string The string the needs to be encoded 1340# 1341# Results: 1342# The encoded string 1343 1344proc http::mapReply {string} { 1345 variable http 1346 variable formMap 1347 1348 # The spec says: "non-alphanumeric characters are replaced by '%HH'". Use 1349 # a pre-computed map and [string map] to do the conversion (much faster 1350 # than [regsub]/[subst]). [Bug 1020491] 1351 1352 if {$http(-urlencoding) ne ""} { 1353 set string [encoding convertto $http(-urlencoding) $string] 1354 return [string map $formMap $string] 1355 } 1356 set converted [string map $formMap $string] 1357 if {[string match "*\[\u0100-\uffff\]*" $converted]} { 1358 regexp {[\u0100-\uffff]} $converted badChar 1359 # Return this error message for maximum compatability... :^/ 1360 return -code error \ 1361 "can't read \"formMap($badChar)\": no such element in array" 1362 } 1363 return $converted 1364} 1365 1366# http::ProxyRequired -- 1367# Default proxy filter. 1368# 1369# Arguments: 1370# host The destination host 1371# 1372# Results: 1373# The current proxy settings 1374 1375proc http::ProxyRequired {host} { 1376 variable http 1377 if {[info exists http(-proxyhost)] && [string length $http(-proxyhost)]} { 1378 if { 1379 ![info exists http(-proxyport)] || 1380 ![string length $http(-proxyport)] 1381 } then { 1382 set http(-proxyport) 8080 1383 } 1384 return [list $http(-proxyhost) $http(-proxyport)] 1385 } 1386} 1387 1388# http::CharsetToEncoding -- 1389# 1390# Tries to map a given IANA charset to a tcl encoding. If no encoding 1391# can be found, returns binary. 1392# 1393 1394proc http::CharsetToEncoding {charset} { 1395 variable encodings 1396 1397 set charset [string tolower $charset] 1398 if {[regexp {iso-?8859-([0-9]+)} $charset -> num]} { 1399 set encoding "iso8859-$num" 1400 } elseif {[regexp {iso-?2022-(jp|kr)} $charset -> ext]} { 1401 set encoding "iso2022-$ext" 1402 } elseif {[regexp {shift[-_]?js} $charset]} { 1403 set encoding "shiftjis" 1404 } elseif {[regexp {(?:windows|cp)-?([0-9]+)} $charset -> num]} { 1405 set encoding "cp$num" 1406 } elseif {$charset eq "us-ascii"} { 1407 set encoding "ascii" 1408 } elseif {[regexp {(?:iso-?)?lat(?:in)?-?([0-9]+)} $charset -> num]} { 1409 switch -- $num { 1410 5 {set encoding "iso8859-9"} 1411 1 - 2 - 3 { 1412 set encoding "iso8859-$num" 1413 } 1414 } 1415 } else { 1416 # other charset, like euc-xx, utf-8,... may directly map to encoding 1417 set encoding $charset 1418 } 1419 set idx [lsearch -exact $encodings $encoding] 1420 if {$idx >= 0} { 1421 return $encoding 1422 } else { 1423 return "binary" 1424 } 1425} 1426 1427# http::Gunzip -- 1428# 1429# Decompress data transmitted using the gzip transfer coding. 1430# 1431 1432# FIX ME: redo using zlib sinflate 1433proc http::Gunzip {data} { 1434 binary scan $data Scb5icc magic method flags time xfl os 1435 set pos 10 1436 if {$magic != 0x1f8b} { 1437 return -code error "invalid data: supplied data is not in gzip format" 1438 } 1439 if {$method != 8} { 1440 return -code error "invalid compression method" 1441 } 1442 1443 # lassign [split $flags ""] f_text f_crc f_extra f_name f_comment 1444 foreach {f_text f_crc f_extra f_name f_comment} [split $flags ""] break 1445 set extra "" 1446 if {$f_extra} { 1447 binary scan $data @${pos}S xlen 1448 incr pos 2 1449 set extra [string range $data $pos $xlen] 1450 set pos [incr xlen] 1451 } 1452 1453 set name "" 1454 if {$f_name} { 1455 set ndx [string first \0 $data $pos] 1456 set name [string range $data $pos $ndx] 1457 set pos [incr ndx] 1458 } 1459 1460 set comment "" 1461 if {$f_comment} { 1462 set ndx [string first \0 $data $pos] 1463 set comment [string range $data $pos $ndx] 1464 set pos [incr ndx] 1465 } 1466 1467 set fcrc "" 1468 if {$f_crc} { 1469 set fcrc [string range $data $pos [incr pos]] 1470 incr pos 1471 } 1472 1473 binary scan [string range $data end-7 end] ii crc size 1474 set inflated [zlib inflate [string range $data $pos end-8]] 1475 set chk [zlib crc32 $inflated] 1476 if {($crc & 0xffffffff) != ($chk & 0xffffffff)} { 1477 return -code error "invalid data: checksum mismatch $crc != $chk" 1478 } 1479 return $inflated 1480} 1481 1482# Local variables: 1483# indent-tabs-mode: t 1484# End: 1485