1 /* -*- Mode: C; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 8 -*- */
2 /*
3 * soup-multipart.c: multipart HTTP message bodies
4 *
5 * Copyright (C) 2008 Red Hat, Inc.
6 */
7
8 #ifdef HAVE_CONFIG_H
9 #include <config.h>
10 #endif
11
12 #include <string.h>
13
14 #include "soup-multipart.h"
15 #include "soup-headers.h"
16 #include "soup-message-headers-private.h"
17 #include "soup.h"
18
19 /**
20 * SECTION:soup-multipart
21 * @short_description: multipart HTTP message bodies
22 * @see_also: #SoupMessageBody, #SoupMessageHeaders
23 *
24 * Functions to use multi-part HTTP messages.
25 **/
26
27 /**
28 * SoupMultipart:
29 *
30 * Represents a multipart HTTP message body, parsed according to the
31 * syntax of RFC 2046. Of particular interest to HTTP are
32 * <literal>multipart/byte-ranges</literal> and
33 * <literal>multipart/form-data</literal>.
34 *
35 * Although the headers of a #SoupMultipart body part will contain the
36 * full headers from that body part, libsoup does not interpret them
37 * according to MIME rules. For example, each body part is assumed to
38 * have "binary" Content-Transfer-Encoding, even if its headers
39 * explicitly state otherwise. In other words, don't try to use
40 * #SoupMultipart for handling real MIME multiparts.
41 *
42 **/
43
44 struct _SoupMultipart {
45 char *mime_type, *boundary;
46 GPtrArray *headers, *bodies;
47 };
48
49 static SoupMultipart *
soup_multipart_new_internal(char * mime_type,char * boundary)50 soup_multipart_new_internal (char *mime_type, char *boundary)
51 {
52 SoupMultipart *multipart;
53
54 multipart = g_slice_new (SoupMultipart);
55 multipart->mime_type = mime_type;
56 multipart->boundary = boundary;
57 multipart->headers = g_ptr_array_new_with_free_func ((GDestroyNotify)soup_message_headers_unref);
58 multipart->bodies = g_ptr_array_new_with_free_func ((GDestroyNotify)g_bytes_unref);
59
60 return multipart;
61 }
62
63 static char *
generate_boundary(void)64 generate_boundary (void)
65 {
66 guint32 data[2];
67
68 data[0] = g_random_int ();
69 data[1] = g_random_int ();
70
71 /* The maximum boundary string length is 69 characters, and a
72 * stringified SHA256 checksum is 64 bytes long.
73 */
74 return g_compute_checksum_for_data (G_CHECKSUM_SHA256,
75 (const guchar *)&data,
76 sizeof (data));
77 }
78
79 /**
80 * soup_multipart_new:
81 * @mime_type: the MIME type of the multipart to create.
82 *
83 * Creates a new empty #SoupMultipart with a randomly-generated
84 * boundary string. Note that @mime_type must be the full MIME type,
85 * including "multipart/".
86 *
87 * Returns: a new empty #SoupMultipart of the given @mime_type
88 *
89 * See also: soup_message_new_from_multipart()
90 **/
91 SoupMultipart *
soup_multipart_new(const char * mime_type)92 soup_multipart_new (const char *mime_type)
93 {
94 return soup_multipart_new_internal (g_strdup (mime_type),
95 generate_boundary ());
96 }
97
98 static const char *
find_boundary(const char * start,const char * end,const char * boundary,int boundary_len)99 find_boundary (const char *start, const char *end,
100 const char *boundary, int boundary_len)
101 {
102 const char *b;
103
104 for (b = memchr (start, '-', end - start);
105 b && b + boundary_len + 4 < end;
106 b = memchr (b + 2, '-', end - (b + 2))) {
107 /* Check for "--boundary" */
108 if (b[1] != '-' ||
109 memcmp (b + 2, boundary, boundary_len) != 0)
110 continue;
111
112 /* Check that it's at start of line */
113 if (!(b == start || (b[-1] == '\n' && b[-2] == '\r')))
114 continue;
115
116 /* Check for "--" or "\r\n" after boundary */
117 if ((b[boundary_len + 2] == '-' && b[boundary_len + 3] == '-') ||
118 (b[boundary_len + 2] == '\r' && b[boundary_len + 3] == '\n'))
119 return b;
120 }
121 return NULL;
122 }
123
124 /**
125 * soup_multipart_new_from_message:
126 * @headers: the headers of the HTTP message to parse
127 * @body: the body of the HTTP message to parse
128 *
129 * Parses @headers and @body to form a new #SoupMultipart
130 *
131 * Returns: (nullable): a new #SoupMultipart (or %NULL if the
132 * message couldn't be parsed or wasn't multipart).
133 *
134 **/
135 SoupMultipart *
soup_multipart_new_from_message(SoupMessageHeaders * headers,GBytes * body)136 soup_multipart_new_from_message (SoupMessageHeaders *headers,
137 GBytes *body)
138 {
139 SoupMultipart *multipart;
140 const char *content_type, *boundary;
141 GHashTable *params;
142 int boundary_len;
143 const char *start, *split, *end, *body_end;
144 SoupMessageHeaders *part_headers;
145 GBytes *part_body;
146
147 content_type = soup_message_headers_get_content_type (headers, ¶ms);
148 if (!content_type)
149 return NULL;
150
151 boundary = g_hash_table_lookup (params, "boundary");
152 if (strncmp (content_type, "multipart/", 10) != 0 || !boundary) {
153 g_hash_table_destroy (params);
154 return NULL;
155 }
156
157 multipart = soup_multipart_new_internal (
158 g_strdup (content_type), g_strdup (boundary));
159 g_hash_table_destroy (params);
160
161 gsize body_size;
162 const char *body_data = g_bytes_get_data (body, &body_size);
163 body_end = body_data + body_size;
164 boundary = multipart->boundary;
165 boundary_len = strlen (boundary);
166
167 /* skip preamble */
168 start = find_boundary (body_data, body_end,
169 boundary, boundary_len);
170 if (!start) {
171 soup_multipart_free (multipart);
172 return NULL;
173 }
174
175 while (start[2 + boundary_len] != '-') {
176 end = find_boundary (start + 2 + boundary_len, body_end,
177 boundary, boundary_len);
178 if (!end) {
179 soup_multipart_free (multipart);
180 return NULL;
181 }
182
183 split = strstr (start, "\r\n\r\n");
184 if (!split || split > end) {
185 soup_multipart_free (multipart);
186 return NULL;
187 }
188 split += 4;
189
190 /* @start points to the start of the boundary line
191 * preceding this part, and @split points to the end
192 * of the headers / start of the body.
193 *
194 * We tell soup_headers_parse() to start parsing at
195 * @start, because it skips the first line of the
196 * input anyway (expecting it to be either a
197 * Request-Line or Status-Line).
198 */
199 part_headers = soup_message_headers_new (SOUP_MESSAGE_HEADERS_MULTIPART);
200 g_ptr_array_add (multipart->headers, part_headers);
201 if (!soup_headers_parse (start, split - 2 - start,
202 part_headers)) {
203 soup_multipart_free (multipart);
204 return NULL;
205 }
206
207 /* @split, as previously mentioned, points to the
208 * start of the body, and @end points to the start of
209 * the following boundary line, which is to say 2 bytes
210 * after the end of the body.
211 */
212 part_body = g_bytes_new_from_bytes (body, // FIXME
213 split - body_data,
214 end - 2 - split);
215 g_ptr_array_add (multipart->bodies, part_body);
216
217 start = end;
218 }
219
220 return multipart;
221 }
222
223 /**
224 * soup_multipart_get_length:
225 * @multipart: a #SoupMultipart
226 *
227 * Gets the number of body parts in @multipart
228 *
229 * Returns: the number of body parts in @multipart
230 *
231 **/
232 int
soup_multipart_get_length(SoupMultipart * multipart)233 soup_multipart_get_length (SoupMultipart *multipart)
234 {
235 return multipart->bodies->len;
236 }
237
238 /**
239 * soup_multipart_get_part:
240 * @multipart: a #SoupMultipart
241 * @part: the part number to get (counting from 0)
242 * @headers: (out) (transfer none): return location for the MIME part
243 * headers
244 * @body: (out) (transfer none): return location for the MIME part
245 * body
246 *
247 * Gets the indicated body part from @multipart.
248 *
249 * Returns: %TRUE on success, %FALSE if @part is out of range (in
250 * which case @headers and @body won't be set)
251 *
252 **/
253 gboolean
soup_multipart_get_part(SoupMultipart * multipart,int part,SoupMessageHeaders ** headers,GBytes ** body)254 soup_multipart_get_part (SoupMultipart *multipart, int part,
255 SoupMessageHeaders **headers, GBytes **body)
256 {
257 if (part < 0 || part >= multipart->bodies->len)
258 return FALSE;
259 *headers = multipart->headers->pdata[part];
260 *body = multipart->bodies->pdata[part];
261 return TRUE;
262 }
263
264 /**
265 * soup_multipart_append_part:
266 * @multipart: a #SoupMultipart
267 * @headers: the MIME part headers
268 * @body: the MIME part body
269 *
270 * Adds a new MIME part to @multipart with the given headers and body.
271 * (The multipart will make its own copies of @headers and @body, so
272 * you should free your copies if you are not using them for anything
273 * else.)
274 *
275 **/
276 void
soup_multipart_append_part(SoupMultipart * multipart,SoupMessageHeaders * headers,GBytes * body)277 soup_multipart_append_part (SoupMultipart *multipart,
278 SoupMessageHeaders *headers,
279 GBytes *body)
280 {
281 SoupMessageHeaders *headers_copy;
282 SoupMessageHeadersIter iter;
283 const char *name, *value;
284
285 /* Copying @headers is annoying, but the alternatives seem
286 * worse:
287 *
288 * 1) We don't want to use g_boxed_copy, because
289 * SoupMessageHeaders actually implements that as just a
290 * ref, which would be confusing since SoupMessageHeaders
291 * is mutable and the caller might modify @headers after
292 * appending it.
293 *
294 * 2) We can't change SoupMessageHeaders to not just do a ref
295 * from g_boxed_copy, because that would break language
296 * bindings (which need to be able to hold a ref on
297 * soup_message_get_request_headers (msg), but don't want
298 * to duplicate it).
299 *
300 * 3) We don't want to steal the reference to @headers,
301 * because then we'd have to either also steal the
302 * reference to @body (which would be inconsistent with
303 * other GBytes methods), or NOT steal the reference to
304 * @body, in which case there'd be inconsistency just
305 * between the two arguments of this method!
306 */
307 headers_copy = soup_message_headers_new (SOUP_MESSAGE_HEADERS_MULTIPART);
308 soup_message_headers_iter_init (&iter, headers);
309 while (soup_message_headers_iter_next (&iter, &name, &value))
310 soup_message_headers_append (headers_copy, name, value);
311
312 g_ptr_array_add (multipart->headers, headers_copy);
313 g_ptr_array_add (multipart->bodies, g_bytes_ref (body));
314 }
315
316 /**
317 * soup_multipart_append_form_string:
318 * @multipart: a multipart (presumably of type "multipart/form-data")
319 * @control_name: the name of the control associated with @data
320 * @data: the body data
321 *
322 * Adds a new MIME part containing @data to @multipart, using
323 * "Content-Disposition: form-data", as per the HTML forms
324 * specification.
325 *
326 **/
327 void
soup_multipart_append_form_string(SoupMultipart * multipart,const char * control_name,const char * data)328 soup_multipart_append_form_string (SoupMultipart *multipart,
329 const char *control_name, const char *data)
330 {
331 GBytes *body;
332
333 body = g_bytes_new (data, strlen (data));
334 soup_multipart_append_form_file (multipart, control_name,
335 NULL, NULL, body);
336 g_bytes_unref (body);
337 }
338
339 /**
340 * soup_multipart_append_form_file:
341 * @multipart: a multipart (presumably of type "multipart/form-data")
342 * @control_name: the name of the control associated with this file
343 * @filename: the name of the file, or %NULL if not known
344 * @content_type: the MIME type of the file, or %NULL if not known
345 * @body: the file data
346 *
347 * Adds a new MIME part containing @body to @multipart, using
348 * "Content-Disposition: form-data", as per the HTML forms
349 * specification.
350 *
351 **/
352 void
soup_multipart_append_form_file(SoupMultipart * multipart,const char * control_name,const char * filename,const char * content_type,GBytes * body)353 soup_multipart_append_form_file (SoupMultipart *multipart,
354 const char *control_name, const char *filename,
355 const char *content_type, GBytes *body)
356 {
357 SoupMessageHeaders *headers;
358 GString *disposition;
359
360 headers = soup_message_headers_new (SOUP_MESSAGE_HEADERS_MULTIPART);
361 disposition = g_string_new ("form-data; ");
362 soup_header_g_string_append_param_quoted (disposition, "name", control_name);
363 if (filename) {
364 g_string_append (disposition, "; ");
365 soup_header_g_string_append_param_quoted (disposition, "filename", filename);
366 }
367 soup_message_headers_append_common (headers, SOUP_HEADER_CONTENT_DISPOSITION,
368 disposition->str);
369 g_string_free (disposition, TRUE);
370
371 if (content_type) {
372 soup_message_headers_append_common (headers, SOUP_HEADER_CONTENT_TYPE,
373 content_type);
374 }
375
376 g_ptr_array_add (multipart->headers, headers);
377 g_ptr_array_add (multipart->bodies, g_bytes_ref (body));
378 }
379
380 /**
381 * soup_multipart_to_message:
382 * @multipart: a #SoupMultipart
383 * @dest_headers: the headers of the HTTP message to serialize @multipart to
384 * @dest_body: (out): the body of the HTTP message to serialize @multipart to
385 *
386 * Serializes @multipart to @dest_headers and @dest_body.
387 *
388 **/
389 void
soup_multipart_to_message(SoupMultipart * multipart,SoupMessageHeaders * dest_headers,GBytes ** dest_body)390 soup_multipart_to_message (SoupMultipart *multipart,
391 SoupMessageHeaders *dest_headers,
392 GBytes **dest_body)
393 {
394 SoupMessageHeaders *part_headers;
395 GBytes *part_body;
396 SoupMessageHeadersIter iter;
397 const char *name, *value;
398 GString *str;
399 GHashTable *params;
400 guint i;
401
402 params = g_hash_table_new (g_str_hash, g_str_equal);
403 g_hash_table_insert (params, "boundary", multipart->boundary);
404 soup_message_headers_set_content_type (dest_headers,
405 multipart->mime_type,
406 params);
407 g_hash_table_destroy (params);
408
409 str = g_string_new (NULL);
410
411 for (i = 0; i < multipart->bodies->len; i++) {
412 part_headers = multipart->headers->pdata[i];
413 part_body = multipart->bodies->pdata[i];
414
415 if (i > 0)
416 g_string_append (str, "\r\n");
417 g_string_append (str, "--");
418 g_string_append (str, multipart->boundary);
419 g_string_append (str, "\r\n");
420 soup_message_headers_iter_init (&iter, part_headers);
421 while (soup_message_headers_iter_next (&iter, &name, &value))
422 g_string_append_printf (str, "%s: %s\r\n", name, value);
423 g_string_append (str, "\r\n");
424 g_string_append_len (str,
425 g_bytes_get_data (part_body, NULL),
426 g_bytes_get_size (part_body));
427 }
428
429 g_string_append (str, "\r\n--");
430 g_string_append (str, multipart->boundary);
431 g_string_append (str, "--\r\n");
432
433 /* (The "\r\n" after the close-delimiter seems wrong according
434 * to my reading of RFCs 2046 and 2616, but that's what
435 * everyone else does.)
436 */
437
438 *dest_body = g_string_free_to_bytes (str);
439 }
440
441 /**
442 * soup_multipart_free:
443 * @multipart: a #SoupMultipart
444 *
445 * Frees @multipart
446 *
447 **/
448 void
soup_multipart_free(SoupMultipart * multipart)449 soup_multipart_free (SoupMultipart *multipart)
450 {
451 g_free (multipart->mime_type);
452 g_free (multipart->boundary);
453 g_ptr_array_free (multipart->headers, TRUE);
454 g_ptr_array_free (multipart->bodies, TRUE);
455
456 g_slice_free (SoupMultipart, multipart);
457 }
458
459 static SoupMultipart *
soup_multipart_copy(SoupMultipart * multipart)460 soup_multipart_copy (SoupMultipart *multipart)
461 {
462 SoupMultipart *copy;
463 guint i;
464
465 copy = soup_multipart_new_internal (g_strdup (multipart->mime_type),
466 g_strdup (multipart->boundary));
467 for (i = 0; i < multipart->bodies->len; i++) {
468 soup_multipart_append_part (copy,
469 multipart->headers->pdata[i],
470 multipart->bodies->pdata[i]);
471 }
472 return copy;
473 }
474
475 G_DEFINE_BOXED_TYPE (SoupMultipart, soup_multipart, soup_multipart_copy, soup_multipart_free)
476