1 // 2 // Document.h 3 // 4 // Document: This class holds everything there is to know about a document. 5 // The actual contents of the document may or may not be present at 6 // all times for memory conservation reasons. 7 // The document can be told to retrieve its contents. This is done 8 // with the Retrieve call. In case the retrieval causes a 9 // redirect, the link is followed, but this process is done 10 // only once (to prevent loops.) If the redirect didn't 11 // work, Document_not_found is returned. 12 // 13 // Part of the ht://Dig package <http://www.htdig.org/> 14 // Copyright (c) 1995-2004 The ht://Dig Group 15 // For copyright details, see the file COPYING in your distribution 16 // or the GNU Library General Public License (LGPL) version 2 or later 17 // <http://www.gnu.org/copyleft/lgpl.html> 18 // 19 // $Id: Document.h,v 1.19 2004/05/28 13:15:14 lha Exp $ 20 // 21 // 22 #ifndef _Document_h_ 23 #define _Document_h_ 24 25 #include "Parsable.h" 26 #include "Object.h" 27 #include "URL.h" 28 #include "htString.h" 29 #include "StringList.h" 30 #include "Transport.h" 31 #include "HtHTTP.h" 32 #include "HtFile.h" 33 #include "HtFTP.h" 34 #include "HtNNTP.h" 35 #include "ExternalTransport.h" 36 #include "Server.h" 37 38 39 class Connection; 40 41 42 class Document : public Object 43 { 44 public: 45 // 46 // Construction/Destruction 47 // 48 Document(char *url = 0, int max_size = 0); 49 ~Document(); 50 51 // 52 // Interface to the document. 53 // 54 void Reset(); Length()55 int Length() {return document_length;} ContentLength()56 int ContentLength() {return contentLength;} StoredLength()57 int StoredLength() {return contents.length();} Contents()58 char *Contents() {return contents;} Contents(char * s)59 void Contents(char *s) {contents = s; document_length = contents.length();} ContentType()60 char *ContentType() {return contentType.get();} 61 62 // 63 // In case the retrieval process went through a redirect process, 64 // the new url can be gotten using the following call 65 // Redirected()66 char *Redirected() {return redirected_to;} Url()67 URL *Url() {return url;} 68 void Url(const String &url); 69 void Referer(const String &url); ModTime()70 time_t ModTime() {return modtime.GetTime_t();} 71 72 Transport::DocStatus Retrieve(Server *server, HtDateTime date); 73 Transport::DocStatus RetrieveLocal(HtDateTime date, StringList *filenames); 74 75 // 76 // Return an appropriate parsable object for the document type. 77 // 78 Parsable *getParsable(); 79 80 // 81 // Set the username and password to be used in any requests 82 // setUsernamePassword(const String & credentials)83 void setUsernamePassword(const String& credentials) 84 { authorization = credentials;} 85 setProxyUsernamePassword(const String & credentials)86 void setProxyUsernamePassword(const String& credentials) 87 { proxy_authorization = credentials;} 88 GetHTTPHandler()89 HtHTTP *GetHTTPHandler() const { return HTTPConnect; } 90 91 private: 92 enum 93 { 94 Header_ok, 95 Header_not_found, 96 Header_not_changed, 97 Header_redirect, 98 Header_not_text, 99 Header_not_authorized 100 }; 101 102 URL *url; 103 URL *proxy; 104 URL *referer; 105 String contents; 106 String redirected_to; 107 String contentType; 108 String authorization; 109 String proxy_authorization; 110 int contentLength; 111 int document_length; 112 HtDateTime modtime; 113 int max_doc_size; 114 int num_retries; 115 116 int UseProxy(); 117 118 Transport *transportConnect; 119 HtHTTP *HTTPConnect; 120 HtHTTP *HTTPSConnect; 121 HtFile *FileConnect; 122 HtFTP *FTPConnect; 123 HtNNTP *NNTPConnect; 124 ExternalTransport *externalConnect; 125 126 127 /////// 128 // Tell us if we should retry to retrieve an URL depending on 129 // the first returned document status 130 /////// 131 132 int ShouldWeRetry(Transport::DocStatus DocumentStatus); 133 134 }; 135 136 #endif 137 138 139