1 //
2 // HtHTTP.cc
3 //
4 // HtHTTP: Interface classes for HTTP messaging
5 //
6 // Including:
7 // 	 -  Generic class
8 // 	 -  Response message class
9 //
10 // Part of the ht://Dig package   <http://www.htdig.org/>
11 // Copyright (c) 1995-2004 The ht://Dig Group
12 // For copyright details, see the file COPYING in your distribution
13 // or the GNU Library General Public License (LGPL) version 2 or later
14 // <http://www.gnu.org/copyleft/lgpl.html>
15 //
16 // $Id: HtHTTP.cc,v 1.27 2004/05/28 13:15:23 lha Exp $
17 //
18 
19 #ifdef HAVE_CONFIG_H
20 #include "htconfig.h"
21 #endif /* HAVE_CONFIG_H */
22 
23 #include "lib.h"
24 #include "Transport.h"
25 #include "HtHTTP.h"
26 
27 #include <signal.h>
28 #include <sys/types.h>
29 #include <ctype.h>
30 #include <stdio.h>      // for sscanf
31 
32 // for setw()
33 #ifdef HAVE_STD
34 #include <iomanip>
35 #ifdef HAVE_NAMESPACES
36 using namespace std;
37 #endif
38 #else
39 #include <iomanip.h>
40 #endif /* HAVE_STD */
41 
42 #if 1
43 typedef void (*SIGNAL_HANDLER) (...);
44 #else
45 typedef SIG_PF SIGNAL_HANDLER;
46 #endif
47 
48    // User Agent
49    	 String HtHTTP::_user_agent = 0;
50 
51    // Stats information
52    	 int HtHTTP::_tot_seconds = 0;
53    	 int HtHTTP::_tot_requests = 0;
54    	 int HtHTTP::_tot_bytes = 0;
55 
56    // flag that manage the option of 'HEAD' before 'GET'
57          bool HtHTTP::_head_before_get = true;
58 
59    // Handler of the CanParse function
60 
61          int (* HtHTTP::CanBeParsed) (char *) = 0;
62 
63    // Cookies jar
64       	 HtCookieJar *HtHTTP::_cookie_jar = 0;  // Set to 0 by default
65 
66 ///////
67    //    HtHTTP_Response class
68    //
69    //    Response message sent by the remote HTTP server
70 ///////
71 
72 
73 // Construction
74 
HtHTTP_Response()75 HtHTTP_Response::HtHTTP_Response()
76 : _version(0),
77    _transfer_encoding(0),
78    _server(0),
79    _hdrconnection(0),
80    _content_language(0)
81 {
82 }
83 
84 
85 // Destruction
86 
~HtHTTP_Response()87 HtHTTP_Response::~HtHTTP_Response()
88 {
89 }
90 
91 
Reset()92 void HtHTTP_Response::Reset()
93 {
94 
95    // Call the base class method in order to reset
96    // the base class attributes
97 
98    Transport_Response::Reset();
99 
100    // Initialize the version, transfer-encoding, location and server strings
101    _version.trunc();
102    _transfer_encoding.trunc();
103    _hdrconnection.trunc();
104    _server.trunc();
105    _content_language.trunc();
106 
107 }
108 
109 
110 
111 
112 ///////
113    //    HtHTTP generic class
114    //
115    //
116 ///////
117 
118 
119 // Construction
120 
HtHTTP(Connection & connection)121 HtHTTP::HtHTTP(Connection& connection)
122 : Transport(&connection),
123    _Method(Method_GET), // Default Method Request
124    _bytes_read(0),
125    _accept_language(0),
126    _persistent_connection_allowed(true),
127    _persistent_connection_possible(false),
128    _send_cookies(true)
129 {
130 }
131 
132 // Destruction
133 
~HtHTTP()134 HtHTTP::~HtHTTP()
135 {
136 }
137 
138 
139 ///////
140    //    Manages the requesting process
141 ///////
142 
Request()143 Transport::DocStatus HtHTTP::Request()
144 {
145 
146    DocStatus result = Document_ok;
147 
148 ///////
149    //    We make a double request (HEAD and, maybe, GET)
150    //    Depending on the
151 ///////
152 
153    if (HeadBeforeGet() &&                 // Option value to true
154       _Method == Method_GET)              // Initial request method is GET
155    {
156 
157       if (debug>3)
158          cout << "  Making a HEAD call before the GET" << endl;
159 
160       _Method = Method_HEAD;
161 
162       result = HTTPRequest();
163 
164       _Method = Method_GET;
165    }
166 
167    if (result == Document_ok)
168       result = HTTPRequest();
169 
170    if(result == Document_no_header
171       && isPersistentConnectionAllowed())
172    {
173 
174       // Sometimes, the parsing phase of the header of the response
175       // that the server gives us back, fails and a <no header>
176       // error is raised. This happens with HTTP/1.1 persistent
177       // connections, usually because the previous response stream
178       // has not yet been flushed, so the buffer still contains
179       // data regarding the last document retrieved. That sucks alot!
180       // The only thing to do is to lose persistent connections benefits
181       // for this document, so close the connection and 'GET' it again.
182 
183       CloseConnection();      // Close a previous connection
184 
185       if (debug>0)
186          cout << "! Impossible to get the HTTP header line." << endl
187               << "  Connection closed. Try to get it again." << endl;
188 
189       result = HTTPRequest(); // Get the document again
190 
191    }
192 
193    return result;
194 }
195 
196 
197 ///////
198    //    Sends an HTTP 1/1 request
199 ///////
200 
HTTPRequest()201 Transport::DocStatus HtHTTP::HTTPRequest()
202 {
203 
204    static Transport::DocStatus DocumentStatus;
205    bool ShouldTheBodyBeRead = true;
206 
207    SetBodyReadingController(&HtHTTP::ReadBody);
208 
209    // Reset the response
210    _response.Reset();
211 
212    // Flush the connection
213    FlushConnection();
214 
215    _bytes_read=0;
216 
217    if( debug > 4)
218    	 cout << "Try to get through to host "
219 	      << _url.host() << " (port " << _url.port() << ")" << endl;
220 
221    ConnectionStatus result;
222 
223    // Assign the timeout
224    AssignConnectionTimeOut();
225 
226    // Assign number of retries
227    AssignConnectionRetries();
228 
229    // Assign connection wait time
230    AssignConnectionWaitTime();
231 
232    // Start the timer
233    _start_time.SettoNow();
234 
235    result = EstablishConnection();
236 
237    if(result != Connection_ok && result != Connection_already_up)
238    {
239 
240       switch (result)
241       {
242          // Open failed
243 
244          case Connection_open_failed:
245             if (debug>1)
246                cout << "Unable to open the connection with host: "
247                   << _url.host() << " (port " << _url.port() << ")" << endl;
248 	    CloseConnection();
249             return FinishRequest(Document_no_connection);
250             break;
251 
252          // Server not reached
253          case Connection_no_server:
254             if (debug>1)
255                cout << "Unable to find the host: "
256                   << _url.host() << " (port " << _url.port() << ")" << endl;
257 	    CloseConnection();
258             return FinishRequest(Document_no_host);
259             break;
260 
261          // Port not reached
262          case Connection_no_port:
263             if (debug>1)
264                cout << "Unable to connect with the port " << _url.port()
265                   << " of the host: " << _url.host() << endl;
266 	    CloseConnection();
267             return FinishRequest(Document_no_port);
268             break;
269 
270          // Connection failed
271          case Connection_failed:
272             if (debug>1)
273                cout << "Unable to establish the connection with host: "
274                   << _url.host() << " (port " << _url.port() << ")" << endl;
275 	    CloseConnection();
276             return FinishRequest(Document_no_connection);
277             break;
278 
279          // Other reason
280          default:
281             if (debug>1)
282                cout << "connection failed with unexpected result: result = "
283                   << (int)result << ", "
284                   << _url.host() << " (port " << _url.port() << ")" << endl;
285 	    CloseConnection();
286             return FinishRequest(Document_other_error);
287             break;
288          }
289 
290    	 return FinishRequest(Document_other_error);
291 
292    }
293 
294    // Visual comments about the result of the connection
295    if (debug > 5)
296    	 switch(result)
297       {
298 	    case Connection_already_up:
299    	       cout << "Taking advantage of persistent connections" << endl;
300    	       break;
301 	    case Connection_ok:
302    	       cout << "New connection open successfully" << endl;
303 	       break;
304 	    default:
305 	       cout << "Unexptected value: " << (int)result << endl;
306 	       break;
307    }
308 
309    String command;
310 
311    switch(_Method)
312    {
313       case Method_GET:
314    	    command = "GET ";
315             break;
316       case Method_HEAD:
317             command = "HEAD ";
318             ShouldTheBodyBeRead = false;
319       break;
320    }
321 
322    // Set the request command
323 
324    SetRequestCommand(command);
325 
326    if (debug > 6)
327       cout << "Request\n" << command;
328 
329    // Writes the command
330    ConnectionWrite(command);
331 
332    // Parse the header
333    if (ParseHeader() == -1) // Connection down
334    {
335 	    // The connection probably fell down !?!
336    	 	 if ( debug > 4 )
337 	       cout << setw(5) << Transport::GetTotOpen() << " - "
338                   << "Connection fell down ... let's close it" << endl;
339 
340 	    CloseConnection();	// Let's close the connection which is down now
341 
342 	    // Return that the connection has fallen down during the request
343 	    return FinishRequest(Document_connection_down);
344    }
345 
346 
347    if (_response._status_code == -1)
348    {
349    	 // Unable to retrieve the status line
350 
351    	 if ( debug > 4 )
352 	    cout << "Unable to retrieve or parse the status line" << endl;
353 
354    	 return FinishRequest(Document_no_header);
355    }
356 
357 
358    if (debug > 3)
359    {
360 
361       cout << "Retrieving document " << _url.path() << " on host: "
362          << _url.host() << ":" << _url.port() << endl;
363 
364       cout << "Http version      : " << _response._version << endl;
365       cout << "Server            : " << _response._version << endl;
366       cout << "Status Code       : " << _response._status_code << endl;
367       cout << "Reason            : " << _response._reason_phrase << endl;
368 
369       if (_response.GetAccessTime())
370       cout << "Access Time       : " << _response.GetAccessTime()->GetRFC1123() << endl;
371 
372       if (_response.GetModificationTime())
373       cout << "Modification Time : " << _response.GetModificationTime()->GetRFC1123() << endl;
374 
375       cout << "Content-type      : " << _response.GetContentType() << endl;
376 
377       if (_response._transfer_encoding.length())
378       cout << "Transfer-encoding : " << _response._transfer_encoding << endl;
379 
380       if (_response._content_language.length())
381       cout << "Content-Language : " << _response._content_language << endl;
382 
383       if (_response._hdrconnection.length())
384       cout << "Connection        : " << _response._hdrconnection << endl;
385 
386    }
387 
388    // Check if persistent connection are possible
389    CheckPersistentConnection(_response);
390 
391    if (debug > 4)
392    	 cout << "Persistent connection: "
393 	    << (_persistent_connection_possible ? "would be accepted" : "not accepted")
394 	    << endl;
395 
396    DocumentStatus = GetDocumentStatus(_response);
397 
398    // We read the body only if the document has been found
399    if (DocumentStatus != Document_ok)
400    {
401       ShouldTheBodyBeRead=false;
402    }
403 
404    // For now a chunked response MUST BE retrieved
405    if (mystrncasecmp ((char*)_response._transfer_encoding, "chunked", 7) == 0)
406    {
407       // Change the controller of the body reading
408       SetBodyReadingController(&HtHTTP::ReadChunkedBody);
409    }
410 
411    // If "ShouldTheBodyBeRead" is set to true and
412    // If the document is parsable, we can read the body
413    // otherwise it is not worthwhile
414 
415    if (ShouldTheBodyBeRead)
416    {
417       if ( debug > 4 )
418          cout << "Reading the body of the response" << endl;
419 
420       // We use a int (HtHTTP::*)() function pointer
421       if ( (this->*_readbody)() == -1 )
422       {
423          // The connection probably fell down !?!
424          if ( debug > 4 )
425             cout  << setw(5) << Transport::GetTotOpen() << " - "
426                << "Connection fell down ... let's close it" << endl;
427 
428          CloseConnection();	// Let's close the connection which is down now
429 
430          // Return that the connection has fallen down during the request
431          return FinishRequest(Document_connection_down);
432       }
433 
434       if ( debug > 6 )
435          cout << "Contents:" << endl << _response.GetContents();
436 
437       // Check if the stream returned by the server has not been completely read
438 
439       if (_response._document_length != _response._content_length &&
440           _response._document_length == _max_document_size)
441       {
442          // Max document size reached
443 
444          if (debug > 4)
445             cout << "Max document size (" << GetRequestMaxDocumentSize()
446                << ") reached ";
447 
448          if (isPersistentConnectionUp())
449          {
450            // Only have to close persistent connection when we didn't read
451            // all the input. For now, we always read all chunked input...
452            if (mystrncasecmp ((char*)_response._transfer_encoding, "chunked", 7) != 0)
453            {
454             if (debug > 4)
455                cout << "- connection closed. ";
456 
457             CloseConnection();
458            }
459          }
460 
461          if (debug > 4)
462             cout << endl;
463       }
464 
465       // Make sure our content-length makes sense, if none given...
466       if (_response._content_length < _response._document_length)
467          _response._content_length = _response._document_length;
468 
469    }
470    else if ( debug > 4 )
471          cout << "Body not retrieved" << endl;
472 
473 
474    // Close the connection (if there's no persistent connection)
475 
476    if( ! isPersistentConnectionUp() )
477    {
478       if ( debug > 4 )
479          cout  << setw(5) << Transport::GetTotOpen() << " - "
480             << "Connection closed (No persistent connection)" << endl;
481 
482       CloseConnection();
483    }
484    else
485    {
486       // Persistent connection is active
487 
488       // If the document is not parsable and we asked for it with a 'GET'
489       // method, the stream's not been completely read.
490 
491       if (DocumentStatus == Document_not_parsable && _Method == Method_GET)
492       {
493          // We have to close the connection.
494          if ( debug > 4 )
495             cout << "Connection must be closed (stream not completely read)"
496                << endl;
497 
498          CloseConnection();
499 
500       }
501       else
502          if ( debug > 4 )
503             cout << "Connection stays up ... (Persistent connection)" << endl;
504    }
505 
506 
507    // Check the doc_status and return a value
508 
509    return FinishRequest(DocumentStatus);
510 
511 }
512 
513 
514 
EstablishConnection()515 HtHTTP::ConnectionStatus HtHTTP::EstablishConnection()
516 {
517 
518    int result;
519 
520    // Open the connection
521    result=OpenConnection();
522 
523    if (!result)
524    	 return Connection_open_failed; // Connection failed
525    else  if(debug > 4)
526          {
527             cout << setw(5) << Transport::GetTotOpen() << " - ";
528 
529    	    if (result == -1)
530 	       cout << "Connection already open. No need to re-open." << endl;
531 	    else
532 	       cout << "Open of the connection ok" << endl;
533          }
534 
535 
536    if(result==1) // New connection open
537    {
538 
539       // Assign the remote host to the connection
540       if ( !AssignConnectionServer() )
541       	 return Connection_no_server;
542 	 else if (debug > 4)
543 	       cout << "\tAssigned the remote host " << _url.host() << endl;
544 
545       // Assign the port of the remote host
546       if ( !AssignConnectionPort() )
547       	 return Connection_no_port;
548 	 else if (debug > 4)
549 	       cout << "\tAssigned the port " << _url.port() << endl;
550    }
551 
552    // Connect
553    if (! (result = Connect()))
554    	 return Connection_failed;
555    else if (result == -1) return Connection_already_up; // Persistent
556 	    else return Connection_ok; // New connection
557 
558 }
559 
560 
561 
562 // Set the string of the HTTP message request
563 
SetRequestCommand(String & cmd)564 void HtHTTP::SetRequestCommand(String &cmd)
565 {
566 
567    // Initialize it
568 
569    if (_useproxy) {
570 	cmd << _url.get() << " HTTP/1.1\r\n";
571    } else
572    cmd << _url.path() << " HTTP/1.1\r\n";
573 
574    // Insert the "virtual" host to which ask the document
575 
576    cmd << "Host: " << _url.host();
577    if (_url.port() != 0 && _url.port() != _url.DefaultPort())
578       cmd << ":" << _url.port();
579    cmd << "\r\n";
580 
581 
582    // Insert the User Agent
583 
584    if (_user_agent.length())
585       cmd << "User-Agent: " << _user_agent << "\r\n";
586 
587 
588    // Referer
589    if (_referer.get().length())
590      cmd << "Referer: " << _referer.get() << "\r\n";
591 
592    // Accept-Language
593    if (_accept_language.length())
594      cmd << "Accept-language: " << _accept_language << "\r\n";
595 
596    // Authentication
597    if (_credentials.length())
598      cmd << "Authorization: Basic " << _credentials << "\r\n";
599 
600    // Proxy Authentication
601    if (_useproxy && _proxy_credentials.length())
602      cmd << "Proxy-Authorization: Basic " << _proxy_credentials << "\r\n";
603 
604    // Accept-Encoding: waiting to handle the gzip and compress formats, we
605    // just send an empty header which, according to the HTTP 1/1 standard,
606    // should let the server know that we only accept the 'identity' case
607    // (no encoding of the document)
608    cmd << "Accept-Encoding: \r\n";
609 
610    // A date has been passed to check if the server one is newer than
611    // the one we already own.
612 
613    if(_modification_time && *_modification_time > 0)
614    {
615        _modification_time->ToGMTime();
616        cmd << "If-Modified-Since: " << _modification_time->GetRFC1123() << "\r\n";
617    }
618 
619 ///////
620    // 	 Cookies! Let's go eat them! ;-)
621 ///////
622 
623    // The method returns all the valid cookies and writes them
624    // directly into the request string, as a list of headers
625    if (_send_cookies && _cookie_jar)
626       _cookie_jar->SetHTTPRequest_CookiesString(_url, cmd);
627 
628 
629    // Let's close the command
630    cmd << "\r\n";
631 
632 }
633 
634 
635 
636 
637 //*****************************************************************************
638 // int HtHTTP::ParseHeader()
639 //   Parse the header of the document
640 //
ParseHeader()641 int HtHTTP::ParseHeader()
642 {
643     String	line = 0;
644     int		inHeader = 1;
645 
646     if (_response._modification_time)
647     {
648 	delete _response._modification_time;
649 	_response._modification_time=0;
650     }
651     while (inHeader)
652     {
653 
654       line.trunc();
655 
656       if(! _connection->Read_Line(line, "\n"))
657          return -1;  // Connection down
658 
659       _bytes_read+=line.length();
660       line.chop('\r');
661 
662       if (line.length() == 0)
663          inHeader = 0;
664       else
665       {
666          // Found a not-empty line
667 
668          if (debug > 2)
669             cout << "Header line: " << line << endl;
670 
671          // Status - Line check
672          char	*token = line.get();
673 
674          while (*token && !isspace(*token) && *token != ':')
675             ++token;
676 
677          while (*token && (isspace(*token) || *token == ':'))
678             ++token;
679 
680          if(!strncmp((char*)line, "HTTP/", 5))
681          {
682             // Here is the status-line
683 
684             // store the HTTP version returned by the server
685             _response._version = strtok(line, " ");
686 
687             // Store the status code
688             _response._status_code = atoi(strtok(0, " "));
689 
690             // Store the reason phrase
691             _response._reason_phrase = strtok(0, "\n");
692 
693          }
694          else if( ! mystrncasecmp((char*)line, "server:", 7))
695          {
696             // Server info
697 
698             // Set the server info
699             token = strtok(token, "\n\t");
700 
701             if (token && *token)
702                _response._server = token;
703 
704          }
705          else if( ! mystrncasecmp((char*)line, "last-modified:", 14))
706          {
707             // Modification date sent by the server
708 
709             // Set the response modification time
710             token = strtok(token, "\n\t");
711 
712             if (token && *token)
713                _response._modification_time = NewDate(token);
714 
715          }
716          else if( ! mystrncasecmp((char*)line, "date:", 5))
717          {
718             // Access date time sent by the server
719 
720             // Set the response access time
721             token = strtok(token, "\n\t");
722 
723             if (token && *token)
724                _response._access_time = NewDate(token);
725 
726          }
727          else if( ! mystrncasecmp((char*)line, "content-type:", 13))
728          {
729             // Content - type
730 
731             token = strtok(token, "\n\t");
732 
733             if (token && *token)
734                _response._content_type = token;
735 
736          }
737          else if( ! mystrncasecmp((char*)line, "content-length:", 15))
738          {
739             // Content - length
740 
741             token = strtok(token, "\n\t");
742 
743             if (token && *token)
744                _response._content_length = atoi(token);
745 
746          }
747          else if( ! mystrncasecmp((char*)line, "transfer-encoding:", 18))
748          {
749             // Transfer-encoding
750 
751             token = strtok(token, "\n\t");
752 
753             if (token && *token)
754                _response._transfer_encoding = token;
755 
756          }
757          else if( ! mystrncasecmp((char*)line, "location:", 9))
758          {
759             // Found a location directive - redirect in act
760 
761             token = strtok(token, "\n\t");
762 
763             if (token && *token)
764                _response._location = token;
765 
766          }
767          else if( ! mystrncasecmp((char*)line, "connection:", 11))
768          {
769             // Ooops ... found a Connection clause
770 
771             token = strtok(token, "\n\t");
772 
773             if (token && *token)
774                _response._hdrconnection = token;
775 
776          }
777          else if( ! mystrncasecmp((char*)line, "content-language:", 17))
778          {
779             // Found a content-language directive
780 
781             token = strtok(token, "\n\t");
782 
783             if (token && *token)
784                _response._content_language = token;
785 
786          }
787          else if( ! mystrncasecmp((char*)line, "set-cookie:", 11))
788          {
789  	    // Found a cookie
790 
791             // Are cookies enabled?
792 			if (_send_cookies && _cookie_jar)
793             {
794                token = strtok(token, "\n\t");
795 
796                if (token && *token)
797 	       {
798       	          // Insert the cookie into the jar
799       	          _cookie_jar->AddCookie(token, _url);
800 	       }
801             }
802 
803          }
804          else
805          {
806             // Discarded
807 
808             if (debug > 3)
809                cout << "Discarded header line: " << line << endl;
810          }
811       }
812     }
813 
814     if (_response._modification_time == 0)
815     {
816       if (debug > 3)
817          cout << "No modification time returned: assuming now" << endl;
818 
819          //Set the modification time
820 	_response._modification_time = new HtDateTime;
821         _response._modification_time->ToGMTime(); // Set to GM time
822 
823     }
824 
825     return 1;
826 
827 }
828 
829 
830 // Check for a document to be parsable
831 // It all depends on the content-type directive returned by the server
832 
isParsable(const char * content_type)833 bool HtHTTP::isParsable(const char *content_type)
834 {
835 
836    // Here I can decide what kind of document I can parse
837    // depending on the value of Transport:_default_parser_content_type
838    // and the rest are determined by the external_parser settings
839 
840    if( ! mystrncasecmp (_default_parser_content_type.get(), content_type,
841       _default_parser_content_type.length()) )
842        return true;
843 
844    // External function that checks if a document is parsable or not.
845    // CanBeParsed should point to a function that returns an int value,
846    // given a char * containing the content-type.
847 
848    if (CanBeParsed && (*CanBeParsed)( (char *) content_type) )
849       return true;
850 
851    return false;
852 
853 }
854 
855 
856 // Check for a possibile persistent connection
857 // on the return message's HTTP version basis
858 
CheckPersistentConnection(HtHTTP_Response & response)859 void HtHTTP::CheckPersistentConnection(HtHTTP_Response &response)
860 {
861 
862    const char *version = response.GetVersion();
863 
864    if( ! mystrncasecmp ("HTTP/1.1", version, 8))
865    {
866       const char *connection = response.GetConnectionInfo();
867 
868       if( ! mystrncasecmp ("close", connection, 5))
869          _persistent_connection_possible=false; // Server wants to close
870       else _persistent_connection_possible=true;
871 
872    }
873    else
874       _persistent_connection_possible=false;
875 
876 }
877 
878 
FinishRequest(HtHTTP::DocStatus ds)879 HtHTTP::DocStatus HtHTTP::FinishRequest (HtHTTP::DocStatus ds)
880 {
881 
882    int seconds;
883 
884    // Set the finish time
885    _end_time.SettoNow();
886 
887    // Let's add the number of seconds needed by the request
888    seconds=HtDateTime::GetDiff(_end_time, _start_time);
889 
890    _tot_seconds += seconds;
891    _tot_requests ++;
892    _tot_bytes += _bytes_read;
893 
894    if (debug > 2)
895       cout << "Request time: " << seconds << " secs" << endl;
896 
897    return ds;
898 
899 }
900 
901 
GetDocumentStatus(HtHTTP_Response & r)902 HtHTTP::DocStatus HtHTTP::GetDocumentStatus(HtHTTP_Response &r)
903 {
904 
905    // Let's give a look at the return status code
906 
907    HtHTTP::DocStatus returnStatus=Document_not_found;
908    int statuscode;
909 
910    statuscode=r.GetStatusCode();
911 
912    if(statuscode==200)
913    {
914 	    returnStatus = Document_ok;   // OK
915 
916    	    // Is it parsable?
917 
918    	    if (! isParsable ((const char*)r.GetContentType()) )
919    	 	    returnStatus=Document_not_parsable;
920    }
921    else if(statuscode > 200 && statuscode < 300)
922 	    returnStatus = Document_ok;      	   	 // Successful 2xx
923    else if(statuscode==304)
924 	    returnStatus = Document_not_changed;   	 // Not modified
925    else if(statuscode > 300 && statuscode < 400)
926 	    returnStatus = Document_redirect;      	 // Redirection 3xx
927    else if(statuscode==401)
928 	    returnStatus = Document_not_authorized;   // Unauthorized
929 
930    // Exit the function
931    return returnStatus;
932 
933 }
934 
SetCredentials(const String & s)935 void HtHTTP::SetCredentials (const String& s)
936 {
937    Transport::SetHTTPBasicAccessAuthorizationString(_credentials, s);
938 }
939 
940 
SetProxyCredentials(const String & s)941 void HtHTTP::SetProxyCredentials (const String& s)
942 {
943    Transport::SetHTTPBasicAccessAuthorizationString(_proxy_credentials, s);
944 }
945 
ReadBody()946 int HtHTTP::ReadBody()
947 {
948 
949     _response._contents = 0;	// Initialize the string
950 
951     char	docBuffer[8192];
952     int		bytesRead = 0;
953     int		bytesToGo = _response._content_length;
954 
955     if (bytesToGo < 0 || bytesToGo > _max_document_size)
956         bytesToGo = _max_document_size;
957 
958     while (bytesToGo > 0)
959     {
960         int len = bytesToGo< (int)sizeof(docBuffer) ? bytesToGo : (int)sizeof(docBuffer);
961         bytesRead = _connection->Read(docBuffer, len);
962         if (bytesRead <= 0)
963             break;
964 
965 	_response._contents.append(docBuffer, bytesRead);
966 
967 	bytesToGo -= bytesRead;
968 
969 	_bytes_read+=bytesRead;
970 
971     }
972 
973     // Set document length
974     _response._document_length = _response._contents.length();
975 
976    return bytesRead;
977 
978 }
979 
980 
ReadChunkedBody()981 int HtHTTP::ReadChunkedBody()
982 {
983    // Chunked Transfer decoding
984    // as shown in the RFC2616 (HTTP/1.1) - 19.4.6
985 
986 #define  BSIZE  8192
987 
988    int            length = 0;  // initialize the length
989    unsigned int   chunk_size;
990    String         ChunkHeader = 0;
991    char           buffer[BSIZE+1];
992    int		  chunk, rsize;
993 
994    _response._contents.trunc();	// Initialize the string
995 
996    // Read chunk-size and CRLF
997    if (!_connection->Read_Line(ChunkHeader, "\r\n"))
998       return -1;
999 
1000    sscanf ((char *)ChunkHeader, "%x", &chunk_size);
1001 
1002    if (debug>4)
1003       cout << "Initial chunk-size: " << chunk_size << endl;
1004 
1005    while (chunk_size > 0)
1006    {
1007       chunk = chunk_size;
1008 
1009       do {
1010 	if (chunk > BSIZE) {
1011 	  rsize = BSIZE;
1012 	  if (debug>4)
1013 	    cout << "Read chunk partial: left=" <<  chunk << endl;
1014 	} else {
1015 	  rsize = chunk;
1016 	}
1017 	chunk -= rsize;
1018 
1019 	// Read Chunk data
1020 	if (_connection->Read(buffer, rsize) == -1)
1021 	  return -1;
1022 
1023 	length+=rsize;
1024 
1025 	// Append the chunk-data to the contents of the response
1026         // ... but not more than _max_document_size...
1027         if (rsize > _max_document_size-_response._contents.length())
1028             rsize = _max_document_size-_response._contents.length();
1029 	buffer[rsize] = 0;
1030 	_response._contents.append(buffer, rsize);
1031 
1032       } while (chunk);
1033 
1034      //     if (_connection->Read(buffer, chunk_size) == -1)
1035      //       return -1;
1036 
1037       // Read CRLF - to be ignored
1038       if (!_connection->Read_Line(ChunkHeader, "\r\n"))
1039          return -1;
1040 
1041       // Read chunk-size and CRLF
1042       if (!_connection->Read_Line(ChunkHeader, "\r\n"))
1043          return -1;
1044 
1045       sscanf ((char *)ChunkHeader, "%x", &chunk_size);
1046 
1047       if (debug>4)
1048          cout << "Chunk-size: " << chunk_size << endl;
1049    }
1050 
1051    ChunkHeader = 0;
1052 
1053    // Ignoring next part of the body - the TRAILER
1054    // (it contains further headers - not implemented)
1055 
1056     // Set content length
1057    _response._content_length = length;
1058 
1059     // Set document length
1060     _response._document_length = _response._contents.length();
1061 
1062    return length;
1063 
1064 }
1065 
1066 
1067 ///////
1068    //    Show the statistics
1069 ///////
1070 
ShowStatistics(ostream & out)1071 ostream &HtHTTP::ShowStatistics (ostream &out)
1072 {
1073    Transport::ShowStatistics(out);  // call the base class method
1074 
1075    out << " HTTP Requests             : " << GetTotRequests() << endl;
1076    out << " HTTP KBytes requested     : " << (double)GetTotBytes()/1024 << endl;
1077    out << " HTTP Average request time : " << GetAverageRequestTime()
1078       << " secs" << endl;
1079 
1080    out << " HTTP Average speed        : " << GetAverageSpeed()/1024
1081       << " KBytes/secs" << endl;
1082 
1083    return out;
1084 }
1085