1 //
2 // Document.h
3 //
4 // Document: This class holds everything there is to know about a document.
5 //           The actual contents of the document may or may not be present at
6 //           all times for memory conservation reasons.
7 //           The document can be told to retrieve its contents.  This is done
8 //           with the Retrieve call.  In case the retrieval causes a
9 //           redirect, the link is followed, but this process is done
10 //           only once (to prevent loops.) If the redirect didn't
11 //           work, Document_not_found is returned.
12 //
13 // Part of the ht://Dig package   <http://www.htdig.org/>
14 // Copyright (c) 1995-2004 The ht://Dig Group
15 // For copyright details, see the file COPYING in your distribution
16 // or the GNU Library General Public License (LGPL) version 2 or later
17 // <http://www.gnu.org/copyleft/lgpl.html>
18 //
19 // $Id: Document.h,v 1.19 2004/05/28 13:15:14 lha Exp $
20 //
21 //
22 #ifndef _Document_h_
23 #define _Document_h_
24 
25 #include "Parsable.h"
26 #include "Object.h"
27 #include "URL.h"
28 #include "htString.h"
29 #include "StringList.h"
30 #include "Transport.h"
31 #include "HtHTTP.h"
32 #include "HtFile.h"
33 #include "HtFTP.h"
34 #include "HtNNTP.h"
35 #include "ExternalTransport.h"
36 #include "Server.h"
37 
38 
39 class Connection;
40 
41 
42 class Document : public Object
43 {
44 public:
45     //
46     // Construction/Destruction
47     //
48     Document(char *url = 0, int max_size = 0);
49     ~Document();
50 
51     //
52     // Interface to the document.
53     //
54     void			Reset();
Length()55     int				Length()	  {return document_length;}
ContentLength()56     int				ContentLength()	  {return contentLength;}
StoredLength()57     int				StoredLength()	  {return contents.length();}
Contents()58     char			*Contents()	  {return contents;}
Contents(char * s)59     void			Contents(char *s) {contents = s; document_length = contents.length();}
ContentType()60     char			*ContentType()	  {return contentType.get();}
61 
62     //
63     // In case the retrieval process went through a redirect process,
64     // the new url can be gotten using the following call
65     //
Redirected()66     char			*Redirected()		{return redirected_to;}
Url()67     URL				*Url()			{return url;}
68     void			Url(const String &url);
69     void			Referer(const String &url);
ModTime()70     time_t			ModTime()		{return modtime.GetTime_t();}
71 
72     Transport::DocStatus	Retrieve(Server *server, HtDateTime date);
73     Transport::DocStatus	RetrieveLocal(HtDateTime date, StringList *filenames);
74 
75     //
76     // Return an appropriate parsable object for the document type.
77     //
78     Parsable			*getParsable();
79 
80     //
81     // Set the username and password to be used in any requests
82     //
setUsernamePassword(const String & credentials)83     void			setUsernamePassword(const String& credentials)
84                                           { authorization = credentials;}
85 
setProxyUsernamePassword(const String & credentials)86     void			setProxyUsernamePassword(const String& credentials)
87                                           { proxy_authorization = credentials;}
88 
GetHTTPHandler()89     HtHTTP *GetHTTPHandler() const { return HTTPConnect; }
90 
91 private:
92     enum
93     {
94 	Header_ok,
95 	Header_not_found,
96 	Header_not_changed,
97 	Header_redirect,
98 	Header_not_text,
99 	Header_not_authorized
100     };
101 
102     URL				*url;
103     URL				*proxy;
104     URL				*referer;
105     String			contents;
106     String			redirected_to;
107     String			contentType;
108     String			authorization;
109     String			proxy_authorization;
110     int				contentLength;
111     int				document_length;
112     HtDateTime			modtime;
113     int				max_doc_size;
114     int				num_retries;
115 
116     int				UseProxy();
117 
118     Transport			*transportConnect;
119     HtHTTP			*HTTPConnect;
120     HtHTTP			*HTTPSConnect;
121     HtFile			*FileConnect;
122     HtFTP                       *FTPConnect;
123     HtNNTP			*NNTPConnect;
124     ExternalTransport		*externalConnect;
125 
126 
127  ///////
128     //    Tell us if we should retry to retrieve an URL depending on
129     //    the first returned document status
130  ///////
131 
132    int ShouldWeRetry(Transport::DocStatus DocumentStatus);
133 
134 };
135 
136 #endif
137 
138 
139