1 //
2 // HtHTTP.cc
3 //
4 // HtHTTP: Interface classes for HTTP messaging
5 //
6 // Including:
7 // - Generic class
8 // - Response message class
9 //
10 // Part of the ht://Dig package <http://www.htdig.org/>
11 // Copyright (c) 1995-2004 The ht://Dig Group
12 // For copyright details, see the file COPYING in your distribution
13 // or the GNU Library General Public License (LGPL) version 2 or later
14 // <http://www.gnu.org/copyleft/lgpl.html>
15 //
16 // $Id: HtHTTP.cc,v 1.27 2004/05/28 13:15:23 lha Exp $
17 //
18
19 #ifdef HAVE_CONFIG_H
20 #include "htconfig.h"
21 #endif /* HAVE_CONFIG_H */
22
23 #include "lib.h"
24 #include "Transport.h"
25 #include "HtHTTP.h"
26
27 #include <signal.h>
28 #include <sys/types.h>
29 #include <ctype.h>
30 #include <stdio.h> // for sscanf
31
32 // for setw()
33 #ifdef HAVE_STD
34 #include <iomanip>
35 #ifdef HAVE_NAMESPACES
36 using namespace std;
37 #endif
38 #else
39 #include <iomanip.h>
40 #endif /* HAVE_STD */
41
42 #if 1
43 typedef void (*SIGNAL_HANDLER) (...);
44 #else
45 typedef SIG_PF SIGNAL_HANDLER;
46 #endif
47
48 // User Agent
49 String HtHTTP::_user_agent = 0;
50
51 // Stats information
52 int HtHTTP::_tot_seconds = 0;
53 int HtHTTP::_tot_requests = 0;
54 int HtHTTP::_tot_bytes = 0;
55
56 // flag that manage the option of 'HEAD' before 'GET'
57 bool HtHTTP::_head_before_get = true;
58
59 // Handler of the CanParse function
60
61 int (* HtHTTP::CanBeParsed) (char *) = 0;
62
63 // Cookies jar
64 HtCookieJar *HtHTTP::_cookie_jar = 0; // Set to 0 by default
65
66 ///////
67 // HtHTTP_Response class
68 //
69 // Response message sent by the remote HTTP server
70 ///////
71
72
73 // Construction
74
HtHTTP_Response()75 HtHTTP_Response::HtHTTP_Response()
76 : _version(0),
77 _transfer_encoding(0),
78 _server(0),
79 _hdrconnection(0),
80 _content_language(0)
81 {
82 }
83
84
85 // Destruction
86
~HtHTTP_Response()87 HtHTTP_Response::~HtHTTP_Response()
88 {
89 }
90
91
Reset()92 void HtHTTP_Response::Reset()
93 {
94
95 // Call the base class method in order to reset
96 // the base class attributes
97
98 Transport_Response::Reset();
99
100 // Initialize the version, transfer-encoding, location and server strings
101 _version.trunc();
102 _transfer_encoding.trunc();
103 _hdrconnection.trunc();
104 _server.trunc();
105 _content_language.trunc();
106
107 }
108
109
110
111
112 ///////
113 // HtHTTP generic class
114 //
115 //
116 ///////
117
118
119 // Construction
120
HtHTTP(Connection & connection)121 HtHTTP::HtHTTP(Connection& connection)
122 : Transport(&connection),
123 _Method(Method_GET), // Default Method Request
124 _bytes_read(0),
125 _accept_language(0),
126 _persistent_connection_allowed(true),
127 _persistent_connection_possible(false),
128 _send_cookies(true)
129 {
130 }
131
132 // Destruction
133
~HtHTTP()134 HtHTTP::~HtHTTP()
135 {
136 }
137
138
139 ///////
140 // Manages the requesting process
141 ///////
142
Request()143 Transport::DocStatus HtHTTP::Request()
144 {
145
146 DocStatus result = Document_ok;
147
148 ///////
149 // We make a double request (HEAD and, maybe, GET)
150 // Depending on the
151 ///////
152
153 if (HeadBeforeGet() && // Option value to true
154 _Method == Method_GET) // Initial request method is GET
155 {
156
157 if (debug>3)
158 cout << " Making a HEAD call before the GET" << endl;
159
160 _Method = Method_HEAD;
161
162 result = HTTPRequest();
163
164 _Method = Method_GET;
165 }
166
167 if (result == Document_ok)
168 result = HTTPRequest();
169
170 if(result == Document_no_header
171 && isPersistentConnectionAllowed())
172 {
173
174 // Sometimes, the parsing phase of the header of the response
175 // that the server gives us back, fails and a <no header>
176 // error is raised. This happens with HTTP/1.1 persistent
177 // connections, usually because the previous response stream
178 // has not yet been flushed, so the buffer still contains
179 // data regarding the last document retrieved. That sucks alot!
180 // The only thing to do is to lose persistent connections benefits
181 // for this document, so close the connection and 'GET' it again.
182
183 CloseConnection(); // Close a previous connection
184
185 if (debug>0)
186 cout << "! Impossible to get the HTTP header line." << endl
187 << " Connection closed. Try to get it again." << endl;
188
189 result = HTTPRequest(); // Get the document again
190
191 }
192
193 return result;
194 }
195
196
197 ///////
198 // Sends an HTTP 1/1 request
199 ///////
200
HTTPRequest()201 Transport::DocStatus HtHTTP::HTTPRequest()
202 {
203
204 static Transport::DocStatus DocumentStatus;
205 bool ShouldTheBodyBeRead = true;
206
207 SetBodyReadingController(&HtHTTP::ReadBody);
208
209 // Reset the response
210 _response.Reset();
211
212 // Flush the connection
213 FlushConnection();
214
215 _bytes_read=0;
216
217 if( debug > 4)
218 cout << "Try to get through to host "
219 << _url.host() << " (port " << _url.port() << ")" << endl;
220
221 ConnectionStatus result;
222
223 // Assign the timeout
224 AssignConnectionTimeOut();
225
226 // Assign number of retries
227 AssignConnectionRetries();
228
229 // Assign connection wait time
230 AssignConnectionWaitTime();
231
232 // Start the timer
233 _start_time.SettoNow();
234
235 result = EstablishConnection();
236
237 if(result != Connection_ok && result != Connection_already_up)
238 {
239
240 switch (result)
241 {
242 // Open failed
243
244 case Connection_open_failed:
245 if (debug>1)
246 cout << "Unable to open the connection with host: "
247 << _url.host() << " (port " << _url.port() << ")" << endl;
248 CloseConnection();
249 return FinishRequest(Document_no_connection);
250 break;
251
252 // Server not reached
253 case Connection_no_server:
254 if (debug>1)
255 cout << "Unable to find the host: "
256 << _url.host() << " (port " << _url.port() << ")" << endl;
257 CloseConnection();
258 return FinishRequest(Document_no_host);
259 break;
260
261 // Port not reached
262 case Connection_no_port:
263 if (debug>1)
264 cout << "Unable to connect with the port " << _url.port()
265 << " of the host: " << _url.host() << endl;
266 CloseConnection();
267 return FinishRequest(Document_no_port);
268 break;
269
270 // Connection failed
271 case Connection_failed:
272 if (debug>1)
273 cout << "Unable to establish the connection with host: "
274 << _url.host() << " (port " << _url.port() << ")" << endl;
275 CloseConnection();
276 return FinishRequest(Document_no_connection);
277 break;
278
279 // Other reason
280 default:
281 if (debug>1)
282 cout << "connection failed with unexpected result: result = "
283 << (int)result << ", "
284 << _url.host() << " (port " << _url.port() << ")" << endl;
285 CloseConnection();
286 return FinishRequest(Document_other_error);
287 break;
288 }
289
290 return FinishRequest(Document_other_error);
291
292 }
293
294 // Visual comments about the result of the connection
295 if (debug > 5)
296 switch(result)
297 {
298 case Connection_already_up:
299 cout << "Taking advantage of persistent connections" << endl;
300 break;
301 case Connection_ok:
302 cout << "New connection open successfully" << endl;
303 break;
304 default:
305 cout << "Unexptected value: " << (int)result << endl;
306 break;
307 }
308
309 String command;
310
311 switch(_Method)
312 {
313 case Method_GET:
314 command = "GET ";
315 break;
316 case Method_HEAD:
317 command = "HEAD ";
318 ShouldTheBodyBeRead = false;
319 break;
320 }
321
322 // Set the request command
323
324 SetRequestCommand(command);
325
326 if (debug > 6)
327 cout << "Request\n" << command;
328
329 // Writes the command
330 ConnectionWrite(command);
331
332 // Parse the header
333 if (ParseHeader() == -1) // Connection down
334 {
335 // The connection probably fell down !?!
336 if ( debug > 4 )
337 cout << setw(5) << Transport::GetTotOpen() << " - "
338 << "Connection fell down ... let's close it" << endl;
339
340 CloseConnection(); // Let's close the connection which is down now
341
342 // Return that the connection has fallen down during the request
343 return FinishRequest(Document_connection_down);
344 }
345
346
347 if (_response._status_code == -1)
348 {
349 // Unable to retrieve the status line
350
351 if ( debug > 4 )
352 cout << "Unable to retrieve or parse the status line" << endl;
353
354 return FinishRequest(Document_no_header);
355 }
356
357
358 if (debug > 3)
359 {
360
361 cout << "Retrieving document " << _url.path() << " on host: "
362 << _url.host() << ":" << _url.port() << endl;
363
364 cout << "Http version : " << _response._version << endl;
365 cout << "Server : " << _response._version << endl;
366 cout << "Status Code : " << _response._status_code << endl;
367 cout << "Reason : " << _response._reason_phrase << endl;
368
369 if (_response.GetAccessTime())
370 cout << "Access Time : " << _response.GetAccessTime()->GetRFC1123() << endl;
371
372 if (_response.GetModificationTime())
373 cout << "Modification Time : " << _response.GetModificationTime()->GetRFC1123() << endl;
374
375 cout << "Content-type : " << _response.GetContentType() << endl;
376
377 if (_response._transfer_encoding.length())
378 cout << "Transfer-encoding : " << _response._transfer_encoding << endl;
379
380 if (_response._content_language.length())
381 cout << "Content-Language : " << _response._content_language << endl;
382
383 if (_response._hdrconnection.length())
384 cout << "Connection : " << _response._hdrconnection << endl;
385
386 }
387
388 // Check if persistent connection are possible
389 CheckPersistentConnection(_response);
390
391 if (debug > 4)
392 cout << "Persistent connection: "
393 << (_persistent_connection_possible ? "would be accepted" : "not accepted")
394 << endl;
395
396 DocumentStatus = GetDocumentStatus(_response);
397
398 // We read the body only if the document has been found
399 if (DocumentStatus != Document_ok)
400 {
401 ShouldTheBodyBeRead=false;
402 }
403
404 // For now a chunked response MUST BE retrieved
405 if (mystrncasecmp ((char*)_response._transfer_encoding, "chunked", 7) == 0)
406 {
407 // Change the controller of the body reading
408 SetBodyReadingController(&HtHTTP::ReadChunkedBody);
409 }
410
411 // If "ShouldTheBodyBeRead" is set to true and
412 // If the document is parsable, we can read the body
413 // otherwise it is not worthwhile
414
415 if (ShouldTheBodyBeRead)
416 {
417 if ( debug > 4 )
418 cout << "Reading the body of the response" << endl;
419
420 // We use a int (HtHTTP::*)() function pointer
421 if ( (this->*_readbody)() == -1 )
422 {
423 // The connection probably fell down !?!
424 if ( debug > 4 )
425 cout << setw(5) << Transport::GetTotOpen() << " - "
426 << "Connection fell down ... let's close it" << endl;
427
428 CloseConnection(); // Let's close the connection which is down now
429
430 // Return that the connection has fallen down during the request
431 return FinishRequest(Document_connection_down);
432 }
433
434 if ( debug > 6 )
435 cout << "Contents:" << endl << _response.GetContents();
436
437 // Check if the stream returned by the server has not been completely read
438
439 if (_response._document_length != _response._content_length &&
440 _response._document_length == _max_document_size)
441 {
442 // Max document size reached
443
444 if (debug > 4)
445 cout << "Max document size (" << GetRequestMaxDocumentSize()
446 << ") reached ";
447
448 if (isPersistentConnectionUp())
449 {
450 // Only have to close persistent connection when we didn't read
451 // all the input. For now, we always read all chunked input...
452 if (mystrncasecmp ((char*)_response._transfer_encoding, "chunked", 7) != 0)
453 {
454 if (debug > 4)
455 cout << "- connection closed. ";
456
457 CloseConnection();
458 }
459 }
460
461 if (debug > 4)
462 cout << endl;
463 }
464
465 // Make sure our content-length makes sense, if none given...
466 if (_response._content_length < _response._document_length)
467 _response._content_length = _response._document_length;
468
469 }
470 else if ( debug > 4 )
471 cout << "Body not retrieved" << endl;
472
473
474 // Close the connection (if there's no persistent connection)
475
476 if( ! isPersistentConnectionUp() )
477 {
478 if ( debug > 4 )
479 cout << setw(5) << Transport::GetTotOpen() << " - "
480 << "Connection closed (No persistent connection)" << endl;
481
482 CloseConnection();
483 }
484 else
485 {
486 // Persistent connection is active
487
488 // If the document is not parsable and we asked for it with a 'GET'
489 // method, the stream's not been completely read.
490
491 if (DocumentStatus == Document_not_parsable && _Method == Method_GET)
492 {
493 // We have to close the connection.
494 if ( debug > 4 )
495 cout << "Connection must be closed (stream not completely read)"
496 << endl;
497
498 CloseConnection();
499
500 }
501 else
502 if ( debug > 4 )
503 cout << "Connection stays up ... (Persistent connection)" << endl;
504 }
505
506
507 // Check the doc_status and return a value
508
509 return FinishRequest(DocumentStatus);
510
511 }
512
513
514
EstablishConnection()515 HtHTTP::ConnectionStatus HtHTTP::EstablishConnection()
516 {
517
518 int result;
519
520 // Open the connection
521 result=OpenConnection();
522
523 if (!result)
524 return Connection_open_failed; // Connection failed
525 else if(debug > 4)
526 {
527 cout << setw(5) << Transport::GetTotOpen() << " - ";
528
529 if (result == -1)
530 cout << "Connection already open. No need to re-open." << endl;
531 else
532 cout << "Open of the connection ok" << endl;
533 }
534
535
536 if(result==1) // New connection open
537 {
538
539 // Assign the remote host to the connection
540 if ( !AssignConnectionServer() )
541 return Connection_no_server;
542 else if (debug > 4)
543 cout << "\tAssigned the remote host " << _url.host() << endl;
544
545 // Assign the port of the remote host
546 if ( !AssignConnectionPort() )
547 return Connection_no_port;
548 else if (debug > 4)
549 cout << "\tAssigned the port " << _url.port() << endl;
550 }
551
552 // Connect
553 if (! (result = Connect()))
554 return Connection_failed;
555 else if (result == -1) return Connection_already_up; // Persistent
556 else return Connection_ok; // New connection
557
558 }
559
560
561
562 // Set the string of the HTTP message request
563
SetRequestCommand(String & cmd)564 void HtHTTP::SetRequestCommand(String &cmd)
565 {
566
567 // Initialize it
568
569 if (_useproxy) {
570 cmd << _url.get() << " HTTP/1.1\r\n";
571 } else
572 cmd << _url.path() << " HTTP/1.1\r\n";
573
574 // Insert the "virtual" host to which ask the document
575
576 cmd << "Host: " << _url.host();
577 if (_url.port() != 0 && _url.port() != _url.DefaultPort())
578 cmd << ":" << _url.port();
579 cmd << "\r\n";
580
581
582 // Insert the User Agent
583
584 if (_user_agent.length())
585 cmd << "User-Agent: " << _user_agent << "\r\n";
586
587
588 // Referer
589 if (_referer.get().length())
590 cmd << "Referer: " << _referer.get() << "\r\n";
591
592 // Accept-Language
593 if (_accept_language.length())
594 cmd << "Accept-language: " << _accept_language << "\r\n";
595
596 // Authentication
597 if (_credentials.length())
598 cmd << "Authorization: Basic " << _credentials << "\r\n";
599
600 // Proxy Authentication
601 if (_useproxy && _proxy_credentials.length())
602 cmd << "Proxy-Authorization: Basic " << _proxy_credentials << "\r\n";
603
604 // Accept-Encoding: waiting to handle the gzip and compress formats, we
605 // just send an empty header which, according to the HTTP 1/1 standard,
606 // should let the server know that we only accept the 'identity' case
607 // (no encoding of the document)
608 cmd << "Accept-Encoding: \r\n";
609
610 // A date has been passed to check if the server one is newer than
611 // the one we already own.
612
613 if(_modification_time && *_modification_time > 0)
614 {
615 _modification_time->ToGMTime();
616 cmd << "If-Modified-Since: " << _modification_time->GetRFC1123() << "\r\n";
617 }
618
619 ///////
620 // Cookies! Let's go eat them! ;-)
621 ///////
622
623 // The method returns all the valid cookies and writes them
624 // directly into the request string, as a list of headers
625 if (_send_cookies && _cookie_jar)
626 _cookie_jar->SetHTTPRequest_CookiesString(_url, cmd);
627
628
629 // Let's close the command
630 cmd << "\r\n";
631
632 }
633
634
635
636
637 //*****************************************************************************
638 // int HtHTTP::ParseHeader()
639 // Parse the header of the document
640 //
ParseHeader()641 int HtHTTP::ParseHeader()
642 {
643 String line = 0;
644 int inHeader = 1;
645
646 if (_response._modification_time)
647 {
648 delete _response._modification_time;
649 _response._modification_time=0;
650 }
651 while (inHeader)
652 {
653
654 line.trunc();
655
656 if(! _connection->Read_Line(line, "\n"))
657 return -1; // Connection down
658
659 _bytes_read+=line.length();
660 line.chop('\r');
661
662 if (line.length() == 0)
663 inHeader = 0;
664 else
665 {
666 // Found a not-empty line
667
668 if (debug > 2)
669 cout << "Header line: " << line << endl;
670
671 // Status - Line check
672 char *token = line.get();
673
674 while (*token && !isspace(*token) && *token != ':')
675 ++token;
676
677 while (*token && (isspace(*token) || *token == ':'))
678 ++token;
679
680 if(!strncmp((char*)line, "HTTP/", 5))
681 {
682 // Here is the status-line
683
684 // store the HTTP version returned by the server
685 _response._version = strtok(line, " ");
686
687 // Store the status code
688 _response._status_code = atoi(strtok(0, " "));
689
690 // Store the reason phrase
691 _response._reason_phrase = strtok(0, "\n");
692
693 }
694 else if( ! mystrncasecmp((char*)line, "server:", 7))
695 {
696 // Server info
697
698 // Set the server info
699 token = strtok(token, "\n\t");
700
701 if (token && *token)
702 _response._server = token;
703
704 }
705 else if( ! mystrncasecmp((char*)line, "last-modified:", 14))
706 {
707 // Modification date sent by the server
708
709 // Set the response modification time
710 token = strtok(token, "\n\t");
711
712 if (token && *token)
713 _response._modification_time = NewDate(token);
714
715 }
716 else if( ! mystrncasecmp((char*)line, "date:", 5))
717 {
718 // Access date time sent by the server
719
720 // Set the response access time
721 token = strtok(token, "\n\t");
722
723 if (token && *token)
724 _response._access_time = NewDate(token);
725
726 }
727 else if( ! mystrncasecmp((char*)line, "content-type:", 13))
728 {
729 // Content - type
730
731 token = strtok(token, "\n\t");
732
733 if (token && *token)
734 _response._content_type = token;
735
736 }
737 else if( ! mystrncasecmp((char*)line, "content-length:", 15))
738 {
739 // Content - length
740
741 token = strtok(token, "\n\t");
742
743 if (token && *token)
744 _response._content_length = atoi(token);
745
746 }
747 else if( ! mystrncasecmp((char*)line, "transfer-encoding:", 18))
748 {
749 // Transfer-encoding
750
751 token = strtok(token, "\n\t");
752
753 if (token && *token)
754 _response._transfer_encoding = token;
755
756 }
757 else if( ! mystrncasecmp((char*)line, "location:", 9))
758 {
759 // Found a location directive - redirect in act
760
761 token = strtok(token, "\n\t");
762
763 if (token && *token)
764 _response._location = token;
765
766 }
767 else if( ! mystrncasecmp((char*)line, "connection:", 11))
768 {
769 // Ooops ... found a Connection clause
770
771 token = strtok(token, "\n\t");
772
773 if (token && *token)
774 _response._hdrconnection = token;
775
776 }
777 else if( ! mystrncasecmp((char*)line, "content-language:", 17))
778 {
779 // Found a content-language directive
780
781 token = strtok(token, "\n\t");
782
783 if (token && *token)
784 _response._content_language = token;
785
786 }
787 else if( ! mystrncasecmp((char*)line, "set-cookie:", 11))
788 {
789 // Found a cookie
790
791 // Are cookies enabled?
792 if (_send_cookies && _cookie_jar)
793 {
794 token = strtok(token, "\n\t");
795
796 if (token && *token)
797 {
798 // Insert the cookie into the jar
799 _cookie_jar->AddCookie(token, _url);
800 }
801 }
802
803 }
804 else
805 {
806 // Discarded
807
808 if (debug > 3)
809 cout << "Discarded header line: " << line << endl;
810 }
811 }
812 }
813
814 if (_response._modification_time == 0)
815 {
816 if (debug > 3)
817 cout << "No modification time returned: assuming now" << endl;
818
819 //Set the modification time
820 _response._modification_time = new HtDateTime;
821 _response._modification_time->ToGMTime(); // Set to GM time
822
823 }
824
825 return 1;
826
827 }
828
829
830 // Check for a document to be parsable
831 // It all depends on the content-type directive returned by the server
832
isParsable(const char * content_type)833 bool HtHTTP::isParsable(const char *content_type)
834 {
835
836 // Here I can decide what kind of document I can parse
837 // depending on the value of Transport:_default_parser_content_type
838 // and the rest are determined by the external_parser settings
839
840 if( ! mystrncasecmp (_default_parser_content_type.get(), content_type,
841 _default_parser_content_type.length()) )
842 return true;
843
844 // External function that checks if a document is parsable or not.
845 // CanBeParsed should point to a function that returns an int value,
846 // given a char * containing the content-type.
847
848 if (CanBeParsed && (*CanBeParsed)( (char *) content_type) )
849 return true;
850
851 return false;
852
853 }
854
855
856 // Check for a possibile persistent connection
857 // on the return message's HTTP version basis
858
CheckPersistentConnection(HtHTTP_Response & response)859 void HtHTTP::CheckPersistentConnection(HtHTTP_Response &response)
860 {
861
862 const char *version = response.GetVersion();
863
864 if( ! mystrncasecmp ("HTTP/1.1", version, 8))
865 {
866 const char *connection = response.GetConnectionInfo();
867
868 if( ! mystrncasecmp ("close", connection, 5))
869 _persistent_connection_possible=false; // Server wants to close
870 else _persistent_connection_possible=true;
871
872 }
873 else
874 _persistent_connection_possible=false;
875
876 }
877
878
FinishRequest(HtHTTP::DocStatus ds)879 HtHTTP::DocStatus HtHTTP::FinishRequest (HtHTTP::DocStatus ds)
880 {
881
882 int seconds;
883
884 // Set the finish time
885 _end_time.SettoNow();
886
887 // Let's add the number of seconds needed by the request
888 seconds=HtDateTime::GetDiff(_end_time, _start_time);
889
890 _tot_seconds += seconds;
891 _tot_requests ++;
892 _tot_bytes += _bytes_read;
893
894 if (debug > 2)
895 cout << "Request time: " << seconds << " secs" << endl;
896
897 return ds;
898
899 }
900
901
GetDocumentStatus(HtHTTP_Response & r)902 HtHTTP::DocStatus HtHTTP::GetDocumentStatus(HtHTTP_Response &r)
903 {
904
905 // Let's give a look at the return status code
906
907 HtHTTP::DocStatus returnStatus=Document_not_found;
908 int statuscode;
909
910 statuscode=r.GetStatusCode();
911
912 if(statuscode==200)
913 {
914 returnStatus = Document_ok; // OK
915
916 // Is it parsable?
917
918 if (! isParsable ((const char*)r.GetContentType()) )
919 returnStatus=Document_not_parsable;
920 }
921 else if(statuscode > 200 && statuscode < 300)
922 returnStatus = Document_ok; // Successful 2xx
923 else if(statuscode==304)
924 returnStatus = Document_not_changed; // Not modified
925 else if(statuscode > 300 && statuscode < 400)
926 returnStatus = Document_redirect; // Redirection 3xx
927 else if(statuscode==401)
928 returnStatus = Document_not_authorized; // Unauthorized
929
930 // Exit the function
931 return returnStatus;
932
933 }
934
SetCredentials(const String & s)935 void HtHTTP::SetCredentials (const String& s)
936 {
937 Transport::SetHTTPBasicAccessAuthorizationString(_credentials, s);
938 }
939
940
SetProxyCredentials(const String & s)941 void HtHTTP::SetProxyCredentials (const String& s)
942 {
943 Transport::SetHTTPBasicAccessAuthorizationString(_proxy_credentials, s);
944 }
945
ReadBody()946 int HtHTTP::ReadBody()
947 {
948
949 _response._contents = 0; // Initialize the string
950
951 char docBuffer[8192];
952 int bytesRead = 0;
953 int bytesToGo = _response._content_length;
954
955 if (bytesToGo < 0 || bytesToGo > _max_document_size)
956 bytesToGo = _max_document_size;
957
958 while (bytesToGo > 0)
959 {
960 int len = bytesToGo< (int)sizeof(docBuffer) ? bytesToGo : (int)sizeof(docBuffer);
961 bytesRead = _connection->Read(docBuffer, len);
962 if (bytesRead <= 0)
963 break;
964
965 _response._contents.append(docBuffer, bytesRead);
966
967 bytesToGo -= bytesRead;
968
969 _bytes_read+=bytesRead;
970
971 }
972
973 // Set document length
974 _response._document_length = _response._contents.length();
975
976 return bytesRead;
977
978 }
979
980
ReadChunkedBody()981 int HtHTTP::ReadChunkedBody()
982 {
983 // Chunked Transfer decoding
984 // as shown in the RFC2616 (HTTP/1.1) - 19.4.6
985
986 #define BSIZE 8192
987
988 int length = 0; // initialize the length
989 unsigned int chunk_size;
990 String ChunkHeader = 0;
991 char buffer[BSIZE+1];
992 int chunk, rsize;
993
994 _response._contents.trunc(); // Initialize the string
995
996 // Read chunk-size and CRLF
997 if (!_connection->Read_Line(ChunkHeader, "\r\n"))
998 return -1;
999
1000 sscanf ((char *)ChunkHeader, "%x", &chunk_size);
1001
1002 if (debug>4)
1003 cout << "Initial chunk-size: " << chunk_size << endl;
1004
1005 while (chunk_size > 0)
1006 {
1007 chunk = chunk_size;
1008
1009 do {
1010 if (chunk > BSIZE) {
1011 rsize = BSIZE;
1012 if (debug>4)
1013 cout << "Read chunk partial: left=" << chunk << endl;
1014 } else {
1015 rsize = chunk;
1016 }
1017 chunk -= rsize;
1018
1019 // Read Chunk data
1020 if (_connection->Read(buffer, rsize) == -1)
1021 return -1;
1022
1023 length+=rsize;
1024
1025 // Append the chunk-data to the contents of the response
1026 // ... but not more than _max_document_size...
1027 if (rsize > _max_document_size-_response._contents.length())
1028 rsize = _max_document_size-_response._contents.length();
1029 buffer[rsize] = 0;
1030 _response._contents.append(buffer, rsize);
1031
1032 } while (chunk);
1033
1034 // if (_connection->Read(buffer, chunk_size) == -1)
1035 // return -1;
1036
1037 // Read CRLF - to be ignored
1038 if (!_connection->Read_Line(ChunkHeader, "\r\n"))
1039 return -1;
1040
1041 // Read chunk-size and CRLF
1042 if (!_connection->Read_Line(ChunkHeader, "\r\n"))
1043 return -1;
1044
1045 sscanf ((char *)ChunkHeader, "%x", &chunk_size);
1046
1047 if (debug>4)
1048 cout << "Chunk-size: " << chunk_size << endl;
1049 }
1050
1051 ChunkHeader = 0;
1052
1053 // Ignoring next part of the body - the TRAILER
1054 // (it contains further headers - not implemented)
1055
1056 // Set content length
1057 _response._content_length = length;
1058
1059 // Set document length
1060 _response._document_length = _response._contents.length();
1061
1062 return length;
1063
1064 }
1065
1066
1067 ///////
1068 // Show the statistics
1069 ///////
1070
ShowStatistics(ostream & out)1071 ostream &HtHTTP::ShowStatistics (ostream &out)
1072 {
1073 Transport::ShowStatistics(out); // call the base class method
1074
1075 out << " HTTP Requests : " << GetTotRequests() << endl;
1076 out << " HTTP KBytes requested : " << (double)GetTotBytes()/1024 << endl;
1077 out << " HTTP Average request time : " << GetAverageRequestTime()
1078 << " secs" << endl;
1079
1080 out << " HTTP Average speed : " << GetAverageSpeed()/1024
1081 << " KBytes/secs" << endl;
1082
1083 return out;
1084 }
1085