1 //
2 // ExternalTransport.cc
3 //
4 // ExternalTransport: Allows external programs to retrieve given URLs with
5 // unknown protocols.
6 //
7 // Part of the ht://Dig package <http://www.htdig.org/>
8 // Copyright (c) 1995-2004 The ht://Dig Group
9 // For copyright details, see the file COPYING in your distribution
10 // or the GNU Library General Public License (LGPL) version 2 or later
11 // <http://www.gnu.org/copyleft/lgpl.html>
12 //
13 // $Id: ExternalTransport.cc,v 1.9 2004/05/28 13:15:14 lha Exp $
14 //
15
16 #ifdef HAVE_CONFIG_H
17 #include "htconfig.h"
18 #endif /* HAVE_CONFIG_H */
19
20 #include "ExternalTransport.h"
21 #include "htdig.h"
22 #include "QuotedStringList.h"
23 #include "URL.h"
24 #include "Dictionary.h"
25 #include "good_strtok.h"
26
27 #include <ctype.h>
28 #include <stdio.h>
29
30 #ifndef _MSC_VER /* _WIN32 */
31 #include <unistd.h>
32 #endif
33
34 #include <stdlib.h>
35 #ifdef HAVE_WAIT_H
36 #include <wait.h>
37 #elif HAVE_SYS_WAIT_H
38 #include <sys/wait.h>
39 #endif
40
41 #include "defaults.h"
42
43 static Dictionary *handlers = 0;
44 static Dictionary *toTypes = 0;
45 extern String configFile;
46
47 //*****************************************************************************
48 // ExternalTransport::ExternalTransport(char *protocol)
49 //
ExternalTransport(const String & protocol)50 ExternalTransport::ExternalTransport(const String &protocol)
51 {
52 if (canHandle(protocol))
53 {
54 _Handler = ((String *)handlers->Find(protocol))->get();
55 }
56 ExternalTransport::_Protocol = protocol;
57 _Response = new ExternalTransport_Response;
58 }
59
60
61 //*****************************************************************************
62 // ExternalTransport::~ExternalTransport()
63 //
~ExternalTransport()64 ExternalTransport::~ExternalTransport()
65 {
66 if (_Response)
67 {
68 delete _Response;
69 }
70 }
71
72
73 //*****************************************************************************
74 // int ExternalTransport::canHandle(const String &protocol)
75 //
76 int
canHandle(const String & protocol)77 ExternalTransport::canHandle(const String &protocol)
78 {
79 HtConfiguration* config= HtConfiguration::config();
80 if (!handlers)
81 {
82 handlers = new Dictionary();
83 toTypes = new Dictionary();
84
85 QuotedStringList qsl(config->Find("external_protocols"), " \t");
86 String from, to;
87 int i;
88 int sep;
89
90 for (i = 0; qsl[i]; i += 2)
91 {
92 from = qsl[i];
93 to = "";
94 sep = from.indexOf("->");
95 if (sep != -1)
96 {
97 to = from.sub(sep+2).get();
98 from = from.sub(0, sep).get();
99 }
100
101 // Recognise service specified as "https://" rather than "https"
102 sep = from.indexOf(":");
103 if (sep != -1)
104 from = from.sub(0, sep).get();
105
106 handlers->Add(from, new String(qsl[i + 1]));
107 toTypes->Add(from, new String(to));
108 }
109 }
110 return handlers->Exists(protocol);
111 }
112
113
114 //*****************************************************************************
115 // void ExternalTransport::SetConnection(URL *u)
116 //
SetConnection(URL * u)117 void ExternalTransport::SetConnection (URL *u)
118 {
119 // Grab the actual URL to pass to the handler
120 _URL = *u;
121
122 // OK, now call the parent method to make sure everything else is set up.
123 Transport::SetConnection (u->host(), u->port());
124 }
125
126
127 //*****************************************************************************
128 // DocStatus ExternalTransport::Request()
129 //
Request()130 Transport::DocStatus ExternalTransport::Request()
131 {
132 // NEAL - ENABLE/REWRITE THIS ASAP FOR WIN32
133 #ifndef _MSC_VER /* _WIN32 */
134 //
135 // Start the external handler, passing the protocol, URL and config file
136 // as command arguments
137 //
138 StringList hargs(_Handler);
139 char **handlargs = new char * [hargs.Count() + 5];
140 int argi;
141 for (argi = 0; argi < hargs.Count(); argi++)
142 handlargs[argi] = (char *)hargs[argi];
143 handlargs[argi++] = _Protocol.get();
144 handlargs[argi++] = (char *)_URL.get().get();
145 handlargs[argi++] = configFile.get();
146 handlargs[argi++] = 0;
147
148 int stdout_pipe[2];
149 int fork_result = -1;
150 int fork_try;
151
152 if (pipe(stdout_pipe) == -1)
153 {
154 if (debug)
155 cerr << "External transport error: Can't create pipe!" << endl;
156 delete [] handlargs;
157 return GetDocumentStatus(_Response);
158 }
159
160 for (fork_try = 4; --fork_try >= 0;)
161 {
162 fork_result = fork(); // Fork so we can execute in the child process
163 if (fork_result != -1)
164 break;
165 if (fork_try)
166 sleep(3);
167 }
168 if (fork_result == -1)
169 {
170 if (debug)
171 cerr << "Fork Failure in ExternalTransport" << endl;
172 delete [] handlargs;
173 return GetDocumentStatus(_Response);
174 }
175
176 if (fork_result == 0) // Child process
177 {
178 close(STDOUT_FILENO); // Close handle STDOUT to replace with pipe
179 dup(stdout_pipe[1]);
180 close(stdout_pipe[0]);
181 close(stdout_pipe[1]);
182 // not really necessary, and may pose Cygwin incompatibility...
183 //close(STDIN_FILENO); // Close STDIN to replace with null dev.
184 //open("/dev/null", O_RDONLY);
185
186 // Call External Transport Handler
187 execv(handlargs[0], handlargs);
188
189 exit(EXIT_FAILURE);
190 }
191
192 // Parent Process
193 delete [] handlargs;
194 close(stdout_pipe[1]); // Close STDOUT for writing
195 FILE *input = fdopen(stdout_pipe[0], "r");
196 if (input == NULL)
197 {
198 if (debug)
199 cerr << "Fdopen Failure in ExternalTransport" << endl;
200 return GetDocumentStatus(_Response);
201 }
202
203 // Set up a response for this request
204 _Response->Reset();
205 // We just accessed the document
206 _Response->_access_time = new HtDateTime();
207 _Response->_access_time->SettoNow();
208
209
210 // OK, now parse the stuff we got back from the handler...
211 String line;
212 char *token1;
213 int in_header = 1;
214
215 while (in_header && readLine(input, line))
216 {
217 line.chop('\r');
218 if (line.length() > 0 && debug > 2)
219 cout << "Header line: " << line << endl;
220 token1 = strtok(line, "\t");
221 if (token1 == NULL)
222 {
223 token1 = "";
224 in_header = 0;
225 break;
226 }
227
228 switch (*token1)
229 {
230 case 's': // status code
231 token1 = strtok(0, "\t");
232 if (token1 != NULL)
233 _Response->_status_code = atoi(token1);
234 else
235 cerr<< "External transport error: expected status code in line "<<line<<"\n" << " URL: " << _URL.get() << "\n";
236 break;
237
238 case 'r': // status reason
239 token1 = strtok(0, "\t");
240 if (token1 != NULL)
241 _Response->_reason_phrase = token1;
242 else
243 cerr<< "External transport error: expected status reason in line "<<line<<"\n" << " URL: " << _URL.get() << "\n";
244 break;
245
246 case 'm': // modification time
247 token1 = strtok(0, "\t");
248 if (token1 != NULL)
249 _Response->_modification_time= NewDate(token1); // Hopefully we can grok it...
250 else
251 cerr<< "External transport error: expected modification time in line "<<line<<"\n" << " URL: " << _URL.get() << "\n";
252 break;
253
254 case 't': // Content-Type
255 token1 = strtok(0, "\t");
256 if (token1 != NULL)
257 _Response->_content_type = token1;
258 else
259 cerr<< "External transport error: expected content-type in line "<<line<<"\n" << " URL: " << _URL.get() << "\n";
260 break;
261
262 case 'l': // Content-Length
263 token1 = strtok(0, "\t");
264 if (token1 != NULL)
265 _Response->_content_length = atoi(token1);
266 else
267 cerr<< "External transport error: expected content-length in line "<<line<<"\n" << " URL: " << _URL.get() << "\n";
268 break;
269
270 case 'u': // redirect target
271 token1 = strtok(0, "\t");
272 if (token1 != NULL)
273 _Response->_location = token1;
274 else
275 cerr<< "External transport error: expected URL in line "<<line<<"\n" << " URL: " << _URL.get() << "\n";
276 break;
277
278 default:
279 cerr<< "External transport error: unknown field in line "<<line<<"\n" << " URL: " << _URL.get() << "\n";
280 break;
281 }
282 }
283
284 // OK, now we read in the rest of the document as contents...
285 _Response->_contents = 0;
286 char docBuffer[8192];
287 int bytesRead;
288
289 while ((bytesRead = fread(docBuffer, 1, sizeof(docBuffer), input)) > 0)
290 {
291 if (debug > 2)
292 cout << "Read " << bytesRead << " from document\n";
293 if (_Response->_contents.length() + bytesRead > _max_document_size)
294 bytesRead = _max_document_size - _Response->_contents.length();
295 _Response->_contents.append(docBuffer, bytesRead);
296 if (_Response->_contents.length() >= _max_document_size)
297 break;
298 }
299 _Response->_document_length = _Response->_contents.length();
300 fclose(input);
301 // close(stdout_pipe[0]); // This is closed for us by the fclose()
302
303 int rpid, status;
304 while ((rpid = wait(&status)) != fork_result && rpid != -1)
305 ;
306
307 #endif
308
309 return GetDocumentStatus(_Response);
310 }
311
312
313 //*****************************************************************************
314 // private
315 // DocStatus ExternalTransport::GetDocumentStatus(ExternalTransport_Response *r)
316 //
GetDocumentStatus(ExternalTransport_Response * r)317 Transport::DocStatus ExternalTransport::GetDocumentStatus(ExternalTransport_Response *r)
318 {
319 // The default is 'not found' if we can't figure it out...
320 DocStatus returnStatus = Document_not_found;
321 int statuscode = r->GetStatusCode();
322
323 if (statuscode == 200)
324 {
325 returnStatus = Document_ok; // OK
326 // Is it parsable?
327 }
328
329 else if (statuscode > 200 && statuscode < 300)
330 returnStatus = Document_ok; // Successful 2xx
331 else if (statuscode == 304)
332 returnStatus = Document_not_changed; // Not modified
333 else if (statuscode > 300 && statuscode < 400)
334 returnStatus = Document_redirect; // Redirection 3xx
335 else if (statuscode == 401)
336 returnStatus = Document_not_authorized; // Unauthorized
337
338 return returnStatus;
339 }
340
341
342 //*****************************************************************************
343 // private
344 // int ExternalTransport::readLine(FILE *in, String &line)
345 //
346 int
readLine(FILE * in,String & line)347 ExternalTransport::readLine(FILE *in, String &line)
348 {
349 char buffer[2048];
350 int length;
351
352 line = 0;
353 while (fgets(buffer, sizeof(buffer), in))
354 {
355 length = strlen(buffer);
356 if (buffer[length - 1] == '\n')
357 {
358 //
359 // A full line has been read. Return it.
360 //
361 line << buffer;
362 line.chop('\n');
363 return 1;
364 }
365 else
366 {
367 //
368 // Only a partial line was read. Append it to the line
369 // and read some more.
370 //
371 line << buffer;
372 }
373 }
374 return line.length() > 0;
375 }
376
377