1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /* This Source Code Form is subject to the terms of the Mozilla Public
3 * License, v. 2.0. If a copy of the MPL was not distributed with this
4 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
5
6
7 /*
8 * Author: Wan-Teh Chang
9 *
10 * Given an HTTP URL, httpget uses the GET method to fetch the file.
11 * The fetched file is written to stdout by default, or can be
12 * saved in an output file.
13 *
14 * This is a single-threaded program.
15 */
16
17 #include "prio.h"
18 #include "prnetdb.h"
19 #include "prlog.h"
20 #include "prerror.h"
21 #include "prprf.h"
22 #include "prinit.h"
23
24 #include <stdio.h>
25 #include <string.h>
26 #include <stdlib.h> /* for atoi */
27
28 #define FCOPY_BUFFER_SIZE (16 * 1024)
29 #define INPUT_BUFFER_SIZE 1024
30 #define LINE_SIZE 512
31 #define HOST_SIZE 256
32 #define PORT_SIZE 32
33 #define PATH_SIZE 512
34
35 /*
36 * A buffer for storing the excess input data for ReadLine.
37 * The data in the buffer starts from (including) the element pointed to
38 * by inputHead, and ends just before (not including) the element pointed
39 * to by inputTail. The buffer is empty if inputHead == inputTail.
40 */
41
42 static char inputBuf[INPUT_BUFFER_SIZE];
43 /*
44 * inputBufEnd points just past the end of inputBuf
45 */
46 static char *inputBufEnd = inputBuf + sizeof(inputBuf);
47 static char *inputHead = inputBuf;
48 static char *inputTail = inputBuf;
49
50 static PRBool endOfStream = PR_FALSE;
51
52 /*
53 * ReadLine --
54 *
55 * Read in a line of text, terminated by CRLF or LF, from fd into buf.
56 * The terminating CRLF or LF is included (always as '\n'). The text
57 * in buf is terminated by a null byte. The excess bytes are stored in
58 * inputBuf for use in the next ReadLine call or FetchFile call.
59 * Returns the number of bytes in buf. 0 means end of stream. Returns
60 * -1 if read fails.
61 */
62
ReadLine(PRFileDesc * fd,char * buf,PRUint32 bufSize)63 PRInt32 ReadLine(PRFileDesc *fd, char *buf, PRUint32 bufSize)
64 {
65 char *dst = buf;
66 char *bufEnd = buf + bufSize; /* just past the end of buf */
67 PRBool lineFound = PR_FALSE;
68 char *crPtr = NULL; /* points to the CR ('\r') character */
69 PRInt32 nRead;
70
71 loop:
72 PR_ASSERT(inputBuf <= inputHead && inputHead <= inputTail
73 && inputTail <= inputBufEnd);
74 while (lineFound == PR_FALSE && inputHead != inputTail
75 && dst < bufEnd - 1) {
76 if (*inputHead == '\r') {
77 crPtr = dst;
78 } else if (*inputHead == '\n') {
79 lineFound = PR_TRUE;
80 if (crPtr == dst - 1) {
81 dst--;
82 }
83 }
84 *(dst++) = *(inputHead++);
85 }
86 if (lineFound == PR_TRUE || dst == bufEnd - 1 || endOfStream == PR_TRUE) {
87 *dst = '\0';
88 return dst - buf;
89 }
90
91 /*
92 * The input buffer should be empty now
93 */
94 PR_ASSERT(inputHead == inputTail);
95
96 nRead = PR_Read(fd, inputBuf, sizeof(inputBuf));
97 if (nRead == -1) {
98 *dst = '\0';
99 return -1;
100 } else if (nRead == 0) {
101 endOfStream = PR_TRUE;
102 *dst = '\0';
103 return dst - buf;
104 }
105 inputHead = inputBuf;
106 inputTail = inputBuf + nRead;
107 goto loop;
108 }
109
DrainInputBuffer(char * buf,PRUint32 bufSize)110 PRInt32 DrainInputBuffer(char *buf, PRUint32 bufSize)
111 {
112 PRInt32 nBytes = inputTail - inputHead;
113
114 if (nBytes == 0) {
115 if (endOfStream) {
116 return -1;
117 } else {
118 return 0;
119 }
120 }
121 if ((PRInt32) bufSize < nBytes) {
122 nBytes = bufSize;
123 }
124 memcpy(buf, inputHead, nBytes);
125 inputHead += nBytes;
126 return nBytes;
127 }
128
FetchFile(PRFileDesc * in,PRFileDesc * out)129 PRStatus FetchFile(PRFileDesc *in, PRFileDesc *out)
130 {
131 char buf[FCOPY_BUFFER_SIZE];
132 PRInt32 nBytes;
133
134 while ((nBytes = DrainInputBuffer(buf, sizeof(buf))) > 0) {
135 if (PR_Write(out, buf, nBytes) != nBytes) {
136 fprintf(stderr, "httpget: cannot write to file\n");
137 return PR_FAILURE;
138 }
139 }
140 if (nBytes < 0) {
141 /* Input buffer is empty and end of stream */
142 return PR_SUCCESS;
143 }
144 while ((nBytes = PR_Read(in, buf, sizeof(buf))) > 0) {
145 if (PR_Write(out, buf, nBytes) != nBytes) {
146 fprintf(stderr, "httpget: cannot write to file\n");
147 return PR_FAILURE;
148 }
149 }
150 if (nBytes < 0) {
151 fprintf(stderr, "httpget: cannot read from socket\n");
152 return PR_FAILURE;
153 }
154 return PR_SUCCESS;
155 }
156
FastFetchFile(PRFileDesc * in,PRFileDesc * out,PRUint32 size)157 PRStatus FastFetchFile(PRFileDesc *in, PRFileDesc *out, PRUint32 size)
158 {
159 PRInt32 nBytes;
160 PRFileMap *outfMap;
161 void *addr;
162 char *start;
163 PRUint32 rem;
164 PRUint32 bytesToRead;
165 PRStatus rv;
166 PRInt64 sz64;
167
168 LL_UI2L(sz64, size);
169 outfMap = PR_CreateFileMap(out, sz64, PR_PROT_READWRITE);
170 PR_ASSERT(outfMap);
171 addr = PR_MemMap(outfMap, LL_ZERO, size);
172 if (addr == NULL) {
173 fprintf(stderr, "cannot memory-map file: (%d, %d)\n", PR_GetError(),
174 PR_GetOSError());
175
176 PR_CloseFileMap(outfMap);
177 return PR_FAILURE;
178 }
179 start = (char *) addr;
180 rem = size;
181 while ((nBytes = DrainInputBuffer(start, rem)) > 0) {
182 start += nBytes;
183 rem -= nBytes;
184 }
185 if (nBytes < 0) {
186 /* Input buffer is empty and end of stream */
187 return PR_SUCCESS;
188 }
189 bytesToRead = (rem < FCOPY_BUFFER_SIZE) ? rem : FCOPY_BUFFER_SIZE;
190 while (rem > 0 && (nBytes = PR_Read(in, start, bytesToRead)) > 0) {
191 start += nBytes;
192 rem -= nBytes;
193 bytesToRead = (rem < FCOPY_BUFFER_SIZE) ? rem : FCOPY_BUFFER_SIZE;
194 }
195 if (nBytes < 0) {
196 fprintf(stderr, "httpget: cannot read from socket\n");
197 return PR_FAILURE;
198 }
199 rv = PR_MemUnmap(addr, size);
200 PR_ASSERT(rv == PR_SUCCESS);
201 rv = PR_CloseFileMap(outfMap);
202 PR_ASSERT(rv == PR_SUCCESS);
203 return PR_SUCCESS;
204 }
205
ParseURL(char * url,char * host,PRUint32 hostSize,char * port,PRUint32 portSize,char * path,PRUint32 pathSize)206 PRStatus ParseURL(char *url, char *host, PRUint32 hostSize,
207 char *port, PRUint32 portSize, char *path, PRUint32 pathSize)
208 {
209 char *start, *end;
210 char *dst;
211 char *hostEnd;
212 char *portEnd;
213 char *pathEnd;
214
215 if (strncmp(url, "http", 4)) {
216 fprintf(stderr, "httpget: the protocol must be http\n");
217 return PR_FAILURE;
218 }
219 if (strncmp(url + 4, "://", 3) || url[7] == '\0') {
220 fprintf(stderr, "httpget: malformed URL: %s\n", url);
221 return PR_FAILURE;
222 }
223
224 start = end = url + 7;
225 dst = host;
226 hostEnd = host + hostSize;
227 while (*end && *end != ':' && *end != '/') {
228 if (dst == hostEnd - 1) {
229 fprintf(stderr, "httpget: host name too long\n");
230 return PR_FAILURE;
231 }
232 *(dst++) = *(end++);
233 }
234 *dst = '\0';
235
236 if (*end == '\0') {
237 PR_snprintf(port, portSize, "%d", 80);
238 PR_snprintf(path, pathSize, "%s", "/");
239 return PR_SUCCESS;
240 }
241
242 if (*end == ':') {
243 end++;
244 dst = port;
245 portEnd = port + portSize;
246 while (*end && *end != '/') {
247 if (dst == portEnd - 1) {
248 fprintf(stderr, "httpget: port number too long\n");
249 return PR_FAILURE;
250 }
251 *(dst++) = *(end++);
252 }
253 *dst = '\0';
254 if (*end == '\0') {
255 PR_snprintf(path, pathSize, "%s", "/");
256 return PR_SUCCESS;
257 }
258 } else {
259 PR_snprintf(port, portSize, "%d", 80);
260 }
261
262 dst = path;
263 pathEnd = path + pathSize;
264 while (*end) {
265 if (dst == pathEnd - 1) {
266 fprintf(stderr, "httpget: file pathname too long\n");
267 return PR_FAILURE;
268 }
269 *(dst++) = *(end++);
270 }
271 *dst = '\0';
272 return PR_SUCCESS;
273 }
274
PrintUsage(void)275 void PrintUsage(void) {
276 fprintf(stderr, "usage: httpget url\n"
277 " httpget -o outputfile url\n"
278 " httpget url -o outputfile\n");
279 }
280
main(int argc,char ** argv)281 int main(int argc, char **argv)
282 {
283 PRHostEnt hostentry;
284 char buf[PR_NETDB_BUF_SIZE];
285 PRNetAddr addr;
286 PRFileDesc *socket = NULL, *file = NULL;
287 PRIntn cmdSize;
288 char host[HOST_SIZE];
289 char port[PORT_SIZE];
290 char path[PATH_SIZE];
291 char line[LINE_SIZE];
292 int exitStatus = 0;
293 PRBool endOfHeader = PR_FALSE;
294 char *url;
295 char *fileName = NULL;
296 PRUint32 fileSize;
297
298 if (argc != 2 && argc != 4) {
299 PrintUsage();
300 exit(1);
301 }
302
303 if (argc == 2) {
304 /*
305 * case 1: httpget url
306 */
307 url = argv[1];
308 } else {
309 if (strcmp(argv[1], "-o") == 0) {
310 /*
311 * case 2: httpget -o outputfile url
312 */
313 fileName = argv[2];
314 url = argv[3];
315 } else {
316 /*
317 * case 3: httpget url -o outputfile
318 */
319 url = argv[1];
320 if (strcmp(argv[2], "-o") != 0) {
321 PrintUsage();
322 exit(1);
323 }
324 fileName = argv[3];
325 }
326 }
327
328 if (ParseURL(url, host, sizeof(host), port, sizeof(port),
329 path, sizeof(path)) == PR_FAILURE) {
330 exit(1);
331 }
332
333 if (PR_GetHostByName(host, buf, sizeof(buf), &hostentry)
334 == PR_FAILURE) {
335 fprintf(stderr, "httpget: unknown host name: %s\n", host);
336 exit(1);
337 }
338
339 addr.inet.family = PR_AF_INET;
340 addr.inet.port = PR_htons((short) atoi(port));
341 addr.inet.ip = *((PRUint32 *) hostentry.h_addr_list[0]);
342
343 socket = PR_NewTCPSocket();
344 if (socket == NULL) {
345 fprintf(stderr, "httpget: cannot create new tcp socket\n");
346 exit(1);
347 }
348
349 if (PR_Connect(socket, &addr, PR_INTERVAL_NO_TIMEOUT) == PR_FAILURE) {
350 fprintf(stderr, "httpget: cannot connect to http server\n");
351 exitStatus = 1;
352 goto done;
353 }
354
355 if (fileName == NULL) {
356 file = PR_STDOUT;
357 } else {
358 file = PR_Open(fileName, PR_RDWR | PR_CREATE_FILE | PR_TRUNCATE,
359 00777);
360 if (file == NULL) {
361 fprintf(stderr, "httpget: cannot open file %s: (%d, %d)\n",
362 fileName, PR_GetError(), PR_GetOSError());
363 exitStatus = 1;
364 goto done;
365 }
366 }
367
368 cmdSize = PR_snprintf(buf, sizeof(buf), "GET %s HTTP/1.0\r\n\r\n", path);
369 PR_ASSERT(cmdSize == (PRIntn) strlen("GET HTTP/1.0\r\n\r\n")
370 + (PRIntn) strlen(path));
371 if (PR_Write(socket, buf, cmdSize) != cmdSize) {
372 fprintf(stderr, "httpget: cannot write to http server\n");
373 exitStatus = 1;
374 goto done;
375 }
376
377 if (ReadLine(socket, line, sizeof(line)) <= 0) {
378 fprintf(stderr, "httpget: cannot read line from http server\n");
379 exitStatus = 1;
380 goto done;
381 }
382
383 /* HTTP response: 200 == OK */
384 if (strstr(line, "200") == NULL) {
385 fprintf(stderr, "httpget: %s\n", line);
386 exitStatus = 1;
387 goto done;
388 }
389
390 while (ReadLine(socket, line, sizeof(line)) > 0) {
391 if (line[0] == '\n') {
392 endOfHeader = PR_TRUE;
393 break;
394 }
395 if (strncmp(line, "Content-Length", 14) == 0
396 || strncmp(line, "Content-length", 14) == 0) {
397 char *p = line + 14;
398
399 while (*p == ' ' || *p == '\t') {
400 p++;
401 }
402 if (*p != ':') {
403 continue;
404 }
405 p++;
406 while (*p == ' ' || *p == '\t') {
407 p++;
408 }
409 fileSize = 0;
410 while ('0' <= *p && *p <= '9') {
411 fileSize = 10 * fileSize + (*p - '0');
412 p++;
413 }
414 }
415 }
416 if (endOfHeader == PR_FALSE) {
417 fprintf(stderr, "httpget: cannot read line from http server\n");
418 exitStatus = 1;
419 goto done;
420 }
421
422 if (fileName == NULL || fileSize == 0) {
423 FetchFile(socket, file);
424 } else {
425 FastFetchFile(socket, file, fileSize);
426 }
427
428 done:
429 if (socket) {
430 PR_Close(socket);
431 }
432 if (file) {
433 PR_Close(file);
434 }
435 PR_Cleanup();
436 return exitStatus;
437 }
438