1 /***************************************************************************
2  *                                  _   _ ____  _
3  *  Project                     ___| | | |  _ \| |
4  *                             / __| | | | |_) | |
5  *                            | (__| |_| |  _ <| |___
6  *                             \___|\___/|_| \_\_____|
7  *
8  * Copyright (C) 1998 - 2008, Daniel Stenberg, <daniel@haxx.se>, et al.
9  *
10  * This software is licensed as described in the file COPYING, which
11  * you should have received as part of this distribution. The terms
12  * are also available at http://curl.haxx.se/docs/copyright.html.
13  *
14  * You may opt to use, copy, modify, merge, publish, distribute and/or sell
15  * copies of the Software, and permit persons to whom the Software is
16  * furnished to do so, under the terms of the COPYING file.
17  *
18  * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
19  * KIND, either express or implied.
20  *
21  * $Id: http_chunks.c,v 1.47 2008-10-24 01:27:00 yangtse Exp $
22  ***************************************************************************/
23 #include "setup.h"
24 
25 #ifndef CURL_DISABLE_HTTP
26 /* -- WIN32 approved -- */
27 #include <stdio.h>
28 #include <string.h>
29 #include <stdarg.h>
30 #include <stdlib.h>
31 #include <ctype.h>
32 
33 #include "urldata.h" /* it includes http_chunks.h */
34 #include "sendf.h"   /* for the client write stuff */
35 
36 #include "content_encoding.h"
37 #include "http.h"
38 #include "memory.h"
39 #include "easyif.h" /* for Curl_convert_to_network prototype */
40 
41 #define _MPRINTF_REPLACE /* use our functions only */
42 #include <curl/mprintf.h>
43 
44 /* The last #include file should be: */
45 #include "memdebug.h"
46 
47 /*
48  * Chunk format (simplified):
49  *
50  * <HEX SIZE>[ chunk extension ] CRLF
51  * <DATA> CRLF
52  *
53  * Highlights from RFC2616 section 3.6 say:
54 
55    The chunked encoding modifies the body of a message in order to
56    transfer it as a series of chunks, each with its own size indicator,
57    followed by an OPTIONAL trailer containing entity-header fields. This
58    allows dynamically produced content to be transferred along with the
59    information necessary for the recipient to verify that it has
60    received the full message.
61 
62        Chunked-Body   = *chunk
63                         last-chunk
64                         trailer
65                         CRLF
66 
67        chunk          = chunk-size [ chunk-extension ] CRLF
68                         chunk-data CRLF
69        chunk-size     = 1*HEX
70        last-chunk     = 1*("0") [ chunk-extension ] CRLF
71 
72        chunk-extension= *( ";" chunk-ext-name [ "=" chunk-ext-val ] )
73        chunk-ext-name = token
74        chunk-ext-val  = token | quoted-string
75        chunk-data     = chunk-size(OCTET)
76        trailer        = *(entity-header CRLF)
77 
78    The chunk-size field is a string of hex digits indicating the size of
79    the chunk. The chunked encoding is ended by any chunk whose size is
80    zero, followed by the trailer, which is terminated by an empty line.
81 
82  */
83 
84 /* Check for an ASCII hex digit.
85  We avoid the use of isxdigit to accommodate non-ASCII hosts. */
Curl_isxdigit(char digit)86 static bool Curl_isxdigit(char digit)
87 {
88   return (bool)( (digit >= 0x30 && digit <= 0x39)    /* 0-9 */
89               || (digit >= 0x41 && digit <= 0x46)    /* A-F */
90               || (digit >= 0x61 && digit <= 0x66) ); /* a-f */
91 }
92 
Curl_httpchunk_init(struct connectdata * conn)93 void Curl_httpchunk_init(struct connectdata *conn)
94 {
95   struct Curl_chunker *chunk = &conn->chunk;
96   chunk->hexindex=0; /* start at 0 */
97   chunk->dataleft=0; /* no data left yet! */
98   chunk->state = CHUNK_HEX; /* we get hex first! */
99 }
100 
101 /*
102  * chunk_read() returns a OK for normal operations, or a positive return code
103  * for errors. STOP means this sequence of chunks is complete.  The 'wrote'
104  * argument is set to tell the caller how many bytes we actually passed to the
105  * client (for byte-counting and whatever).
106  *
107  * The states and the state-machine is further explained in the header file.
108  *
109  * This function always uses ASCII hex values to accommodate non-ASCII hosts.
110  * For example, 0x0d and 0x0a are used instead of '\r' and '\n'.
111  */
Curl_httpchunk_read(struct connectdata * conn,char * datap,ssize_t datalen,ssize_t * wrotep)112 CHUNKcode Curl_httpchunk_read(struct connectdata *conn,
113                               char *datap,
114                               ssize_t datalen,
115                               ssize_t *wrotep)
116 {
117   CURLcode result=CURLE_OK;
118   struct SessionHandle *data = conn->data;
119   struct Curl_chunker *ch = &conn->chunk;
120   struct SingleRequest *k = &data->req;
121   size_t piece;
122   size_t length = (size_t)datalen;
123   size_t *wrote = (size_t *)wrotep;
124 
125   *wrote = 0; /* nothing's written yet */
126 
127   /* the original data is written to the client, but we go on with the
128      chunk read process, to properly calculate the content length*/
129   if(data->set.http_te_skip && !k->ignorebody) {
130     result = Curl_client_write(conn, CLIENTWRITE_BODY, datap, datalen);
131     if(result)
132       return CHUNKE_WRITE_ERROR;
133   }
134 
135   while(length) {
136     switch(ch->state) {
137     case CHUNK_HEX:
138       if(Curl_isxdigit(*datap)) {
139         if(ch->hexindex < MAXNUM_SIZE) {
140           ch->hexbuffer[ch->hexindex] = *datap;
141           datap++;
142           length--;
143           ch->hexindex++;
144         }
145         else {
146           return CHUNKE_TOO_LONG_HEX; /* longer hex than we support */
147         }
148       }
149       else {
150         if(0 == ch->hexindex) {
151           /* This is illegal data, we received junk where we expected
152              a hexadecimal digit. */
153           return CHUNKE_ILLEGAL_HEX;
154         }
155         /* length and datap are unmodified */
156         ch->hexbuffer[ch->hexindex]=0;
157 #ifdef CURL_DOES_CONVERSIONS
158         /* convert to host encoding before calling strtoul */
159         result = Curl_convert_from_network(conn->data,
160                                            ch->hexbuffer,
161                                            ch->hexindex);
162         if(result != CURLE_OK) {
163           /* Curl_convert_from_network calls failf if unsuccessful */
164           /* Treat it as a bad hex character */
165           return(CHUNKE_ILLEGAL_HEX);
166         }
167 #endif /* CURL_DOES_CONVERSIONS */
168         ch->datasize=strtoul(ch->hexbuffer, NULL, 16);
169         ch->state = CHUNK_POSTHEX;
170       }
171       break;
172 
173     case CHUNK_POSTHEX:
174       /* In this state, we're waiting for CRLF to arrive. We support
175          this to allow so called chunk-extensions to show up here
176          before the CRLF comes. */
177       if(*datap == 0x0d)
178         ch->state = CHUNK_CR;
179       length--;
180       datap++;
181       break;
182 
183     case CHUNK_CR:
184       /* waiting for the LF */
185       if(*datap == 0x0a) {
186         /* we're now expecting data to come, unless size was zero! */
187         if(0 == ch->datasize) {
188           if(k->trailerhdrpresent!=TRUE) {
189             /* No Trailer: header found - revert to original Curl processing */
190             ch->state = CHUNK_STOPCR;
191 
192             /* We need to increment the datap here since we bypass the
193                increment below with the immediate break */
194             length--;
195             datap++;
196 
197             /* This is the final byte, continue to read the final CRLF */
198             break;
199           }
200           else {
201             ch->state = CHUNK_TRAILER; /* attempt to read trailers */
202             conn->trlPos=0;
203           }
204         }
205         else {
206           ch->state = CHUNK_DATA;
207         }
208       }
209       else
210         /* previously we got a fake CR, go back to CR waiting! */
211         ch->state = CHUNK_CR;
212       datap++;
213       length--;
214       break;
215 
216     case CHUNK_DATA:
217       /* we get pure and fine data
218 
219          We expect another 'datasize' of data. We have 'length' right now,
220          it can be more or less than 'datasize'. Get the smallest piece.
221       */
222       piece = (ch->datasize >= length)?length:ch->datasize;
223 
224       /* Write the data portion available */
225 #ifdef HAVE_LIBZ
226       switch (conn->data->set.http_ce_skip?
227               IDENTITY : data->req.content_encoding) {
228       case IDENTITY:
229 #endif
230         if(!k->ignorebody) {
231           if( !data->set.http_te_skip )
232             result = Curl_client_write(conn, CLIENTWRITE_BODY, datap,
233                                        piece);
234           else
235             result = CURLE_OK;
236         }
237 #ifdef HAVE_LIBZ
238         break;
239 
240       case DEFLATE:
241         /* update data->req.keep.str to point to the chunk data. */
242         data->req.str = datap;
243         result = Curl_unencode_deflate_write(conn, &data->req,
244                                              (ssize_t)piece);
245         break;
246 
247       case GZIP:
248         /* update data->req.keep.str to point to the chunk data. */
249         data->req.str = datap;
250         result = Curl_unencode_gzip_write(conn, &data->req,
251                                           (ssize_t)piece);
252         break;
253 
254       case COMPRESS:
255       default:
256         failf (conn->data,
257                "Unrecognized content encoding type. "
258                "libcurl understands `identity', `deflate' and `gzip' "
259                "content encodings.");
260         return CHUNKE_BAD_ENCODING;
261       }
262 #endif
263 
264       if(result)
265         return CHUNKE_WRITE_ERROR;
266 
267       *wrote += piece;
268 
269       ch->datasize -= piece; /* decrease amount left to expect */
270       datap += piece;    /* move read pointer forward */
271       length -= piece;   /* decrease space left in this round */
272 
273       if(0 == ch->datasize)
274         /* end of data this round, we now expect a trailing CRLF */
275         ch->state = CHUNK_POSTCR;
276       break;
277 
278     case CHUNK_POSTCR:
279       if(*datap == 0x0d) {
280         ch->state = CHUNK_POSTLF;
281         datap++;
282         length--;
283       }
284       else {
285         return CHUNKE_BAD_CHUNK;
286       }
287       break;
288 
289     case CHUNK_POSTLF:
290       if(*datap == 0x0a) {
291         /*
292          * The last one before we go back to hex state and start all
293          * over.
294          */
295         Curl_httpchunk_init(conn);
296         datap++;
297         length--;
298       }
299       else {
300         return CHUNKE_BAD_CHUNK;
301       }
302 
303       break;
304 
305     case CHUNK_TRAILER:
306       /* conn->trailer is assumed to be freed in url.c on a
307          connection basis */
308       if(conn->trlPos >= conn->trlMax) {
309         char *ptr;
310         if(conn->trlMax) {
311           conn->trlMax *= 2;
312           ptr = realloc(conn->trailer,conn->trlMax);
313         }
314         else {
315           conn->trlMax=128;
316           ptr = malloc(conn->trlMax);
317         }
318         if(!ptr)
319           return CHUNKE_OUT_OF_MEMORY;
320         conn->trailer = ptr;
321       }
322       conn->trailer[conn->trlPos++]=*datap;
323 
324       if(*datap == 0x0d)
325         ch->state = CHUNK_TRAILER_CR;
326       else {
327         datap++;
328         length--;
329       }
330       break;
331 
332     case CHUNK_TRAILER_CR:
333       if(*datap == 0x0d) {
334         ch->state = CHUNK_TRAILER_POSTCR;
335         datap++;
336         length--;
337       }
338       else
339         return CHUNKE_BAD_CHUNK;
340       break;
341 
342     case CHUNK_TRAILER_POSTCR:
343       if(*datap == 0x0a) {
344         conn->trailer[conn->trlPos++]=0x0a;
345         conn->trailer[conn->trlPos]=0;
346         if(conn->trlPos==2) {
347           ch->state = CHUNK_STOP;
348           datap++;
349           length--;
350 
351           /*
352            * Note that this case skips over the final STOP states since we've
353            * already read the final CRLF and need to return
354            */
355 
356           ch->dataleft = length;
357 
358           return CHUNKE_STOP; /* return stop */
359         }
360         else {
361 #ifdef CURL_DOES_CONVERSIONS
362           /* Convert to host encoding before calling Curl_client_write */
363           result = Curl_convert_from_network(conn->data,
364                                              conn->trailer,
365                                              conn->trlPos);
366           if(result != CURLE_OK) {
367             /* Curl_convert_from_network calls failf if unsuccessful */
368             /* Treat it as a bad chunk */
369             return(CHUNKE_BAD_CHUNK);
370           }
371 #endif /* CURL_DOES_CONVERSIONS */
372           if(!data->set.http_te_skip) {
373             result = Curl_client_write(conn, CLIENTWRITE_HEADER,
374                                        conn->trailer, conn->trlPos);
375             if(result)
376               return CHUNKE_WRITE_ERROR;
377           }
378         }
379         ch->state = CHUNK_TRAILER;
380         conn->trlPos=0;
381         datap++;
382         length--;
383       }
384       else
385         return CHUNKE_BAD_CHUNK;
386       break;
387 
388     case CHUNK_STOPCR:
389       /* Read the final CRLF that ends all chunk bodies */
390 
391       if(*datap == 0x0d) {
392         ch->state = CHUNK_STOP;
393         datap++;
394         length--;
395       }
396       else {
397         return CHUNKE_BAD_CHUNK;
398       }
399       break;
400 
401     case CHUNK_STOP:
402       if(*datap == 0x0a) {
403         datap++;
404         length--;
405 
406         /* Record the length of any data left in the end of the buffer
407            even if there's no more chunks to read */
408 
409         ch->dataleft = length;
410         return CHUNKE_STOP; /* return stop */
411       }
412       else {
413         return CHUNKE_BAD_CHUNK;
414       }
415 
416     default:
417       return CHUNKE_STATE_ERROR;
418     }
419   }
420   return CHUNKE_OK;
421 }
422 #endif /* CURL_DISABLE_HTTP */
423