1 /* -*- Mode: C; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 8 -*- */
2 /*
3  * soup-multipart.c: multipart HTTP message bodies
4  *
5  * Copyright (C) 2008 Red Hat, Inc.
6  */
7 
8 #ifdef HAVE_CONFIG_H
9 #include <config.h>
10 #endif
11 
12 #include <string.h>
13 
14 #include "soup-multipart.h"
15 #include "soup-headers.h"
16 #include "soup-message-headers-private.h"
17 #include "soup.h"
18 
19 /**
20  * SECTION:soup-multipart
21  * @short_description: multipart HTTP message bodies
22  * @see_also: #SoupMessageBody, #SoupMessageHeaders
23  *
24  * Functions to use multi-part HTTP messages.
25  **/
26 
27 /**
28  * SoupMultipart:
29  *
30  * Represents a multipart HTTP message body, parsed according to the
31  * syntax of RFC 2046. Of particular interest to HTTP are
32  * <literal>multipart/byte-ranges</literal> and
33  * <literal>multipart/form-data</literal>.
34  *
35  * Although the headers of a #SoupMultipart body part will contain the
36  * full headers from that body part, libsoup does not interpret them
37  * according to MIME rules. For example, each body part is assumed to
38  * have "binary" Content-Transfer-Encoding, even if its headers
39  * explicitly state otherwise. In other words, don't try to use
40  * #SoupMultipart for handling real MIME multiparts.
41  *
42  **/
43 
44 struct _SoupMultipart {
45 	char *mime_type, *boundary;
46 	GPtrArray *headers, *bodies;
47 };
48 
49 static SoupMultipart *
soup_multipart_new_internal(char * mime_type,char * boundary)50 soup_multipart_new_internal (char *mime_type, char *boundary)
51 {
52 	SoupMultipart *multipart;
53 
54 	multipart = g_slice_new (SoupMultipart);
55 	multipart->mime_type = mime_type;
56 	multipart->boundary = boundary;
57 	multipart->headers = g_ptr_array_new_with_free_func ((GDestroyNotify)soup_message_headers_unref);
58 	multipart->bodies = g_ptr_array_new_with_free_func ((GDestroyNotify)g_bytes_unref);
59 
60 	return multipart;
61 }
62 
63 static char *
generate_boundary(void)64 generate_boundary (void)
65 {
66 	guint32 data[2];
67 
68 	data[0] = g_random_int ();
69 	data[1] = g_random_int ();
70 
71 	/* The maximum boundary string length is 69 characters, and a
72 	 * stringified SHA256 checksum is 64 bytes long.
73 	 */
74 	return g_compute_checksum_for_data (G_CHECKSUM_SHA256,
75 					    (const guchar *)&data,
76 					    sizeof (data));
77 }
78 
79 /**
80  * soup_multipart_new:
81  * @mime_type: the MIME type of the multipart to create.
82  *
83  * Creates a new empty #SoupMultipart with a randomly-generated
84  * boundary string. Note that @mime_type must be the full MIME type,
85  * including "multipart/".
86  *
87  * Returns: a new empty #SoupMultipart of the given @mime_type
88  *
89  * See also: soup_message_new_from_multipart()
90  **/
91 SoupMultipart *
soup_multipart_new(const char * mime_type)92 soup_multipart_new (const char *mime_type)
93 {
94 	return soup_multipart_new_internal (g_strdup (mime_type),
95 					    generate_boundary ());
96 }
97 
98 static const char *
find_boundary(const char * start,const char * end,const char * boundary,int boundary_len)99 find_boundary (const char *start, const char *end,
100 	       const char *boundary, int boundary_len)
101 {
102 	const char *b;
103 
104 	for (b = memchr (start, '-', end - start);
105 	     b && b + boundary_len + 4 < end;
106 	     b = memchr (b + 2, '-', end - (b + 2))) {
107 		/* Check for "--boundary" */
108 		if (b[1] != '-' ||
109 		    memcmp (b + 2, boundary, boundary_len) != 0)
110 			continue;
111 
112 		/* Check that it's at start of line */
113 		if (!(b == start || (b[-1] == '\n' && b[-2] == '\r')))
114 			continue;
115 
116 		/* Check for "--" or "\r\n" after boundary */
117 		if ((b[boundary_len + 2] == '-' && b[boundary_len + 3] == '-') ||
118 		    (b[boundary_len + 2] == '\r' && b[boundary_len + 3] == '\n'))
119 			return b;
120 	}
121 	return NULL;
122 }
123 
124 /**
125  * soup_multipart_new_from_message:
126  * @headers: the headers of the HTTP message to parse
127  * @body: the body of the HTTP message to parse
128  *
129  * Parses @headers and @body to form a new #SoupMultipart
130  *
131  * Returns: (nullable): a new #SoupMultipart (or %NULL if the
132  * message couldn't be parsed or wasn't multipart).
133  *
134  **/
135 SoupMultipart *
soup_multipart_new_from_message(SoupMessageHeaders * headers,GBytes * body)136 soup_multipart_new_from_message (SoupMessageHeaders *headers,
137 				 GBytes             *body)
138 {
139 	SoupMultipart *multipart;
140 	const char *content_type, *boundary;
141 	GHashTable *params;
142 	int boundary_len;
143 	const char *start, *split, *end, *body_end;
144 	SoupMessageHeaders *part_headers;
145 	GBytes *part_body;
146 
147 	content_type = soup_message_headers_get_content_type (headers, &params);
148 	if (!content_type)
149 		return NULL;
150 
151 	boundary = g_hash_table_lookup (params, "boundary");
152 	if (strncmp (content_type, "multipart/", 10) != 0 || !boundary) {
153 		g_hash_table_destroy (params);
154 		return NULL;
155 	}
156 
157 	multipart = soup_multipart_new_internal (
158 		g_strdup (content_type), g_strdup (boundary));
159 	g_hash_table_destroy (params);
160 
161         gsize body_size;
162         const char *body_data = g_bytes_get_data (body, &body_size);
163 	body_end = body_data + body_size;
164 	boundary = multipart->boundary;
165 	boundary_len = strlen (boundary);
166 
167 	/* skip preamble */
168 	start = find_boundary (body_data, body_end,
169 			       boundary, boundary_len);
170 	if (!start) {
171 		soup_multipart_free (multipart);
172 		return NULL;
173 	}
174 
175 	while (start[2 + boundary_len] != '-') {
176 		end = find_boundary (start + 2 + boundary_len, body_end,
177 				     boundary, boundary_len);
178 		if (!end) {
179 			soup_multipart_free (multipart);
180 			return NULL;
181 		}
182 
183 		split = strstr (start, "\r\n\r\n");
184 		if (!split || split > end) {
185 			soup_multipart_free (multipart);
186 			return NULL;
187 		}
188 		split += 4;
189 
190 		/* @start points to the start of the boundary line
191 		 * preceding this part, and @split points to the end
192 		 * of the headers / start of the body.
193 		 *
194 		 * We tell soup_headers_parse() to start parsing at
195 		 * @start, because it skips the first line of the
196 		 * input anyway (expecting it to be either a
197 		 * Request-Line or Status-Line).
198 		 */
199 		part_headers = soup_message_headers_new (SOUP_MESSAGE_HEADERS_MULTIPART);
200 		g_ptr_array_add (multipart->headers, part_headers);
201 		if (!soup_headers_parse (start, split - 2 - start,
202 					 part_headers)) {
203 			soup_multipart_free (multipart);
204 			return NULL;
205 		}
206 
207 		/* @split, as previously mentioned, points to the
208 		 * start of the body, and @end points to the start of
209 		 * the following boundary line, which is to say 2 bytes
210 		 * after the end of the body.
211 		 */
212 		part_body = g_bytes_new_from_bytes (body, // FIXME
213 						    split - body_data,
214 						    end - 2 - split);
215 		g_ptr_array_add (multipart->bodies, part_body);
216 
217 		start = end;
218 	}
219 
220 	return multipart;
221 }
222 
223 /**
224  * soup_multipart_get_length:
225  * @multipart: a #SoupMultipart
226  *
227  * Gets the number of body parts in @multipart
228  *
229  * Returns: the number of body parts in @multipart
230  *
231  **/
232 int
soup_multipart_get_length(SoupMultipart * multipart)233 soup_multipart_get_length (SoupMultipart *multipart)
234 {
235 	return multipart->bodies->len;
236 }
237 
238 /**
239  * soup_multipart_get_part:
240  * @multipart: a #SoupMultipart
241  * @part: the part number to get (counting from 0)
242  * @headers: (out) (transfer none): return location for the MIME part
243  * headers
244  * @body: (out) (transfer none): return location for the MIME part
245  * body
246  *
247  * Gets the indicated body part from @multipart.
248  *
249  * Returns: %TRUE on success, %FALSE if @part is out of range (in
250  * which case @headers and @body won't be set)
251  *
252  **/
253 gboolean
soup_multipart_get_part(SoupMultipart * multipart,int part,SoupMessageHeaders ** headers,GBytes ** body)254 soup_multipart_get_part (SoupMultipart *multipart, int part,
255 			 SoupMessageHeaders **headers, GBytes **body)
256 {
257 	if (part < 0 || part >= multipart->bodies->len)
258 		return FALSE;
259 	*headers = multipart->headers->pdata[part];
260 	*body = multipart->bodies->pdata[part];
261 	return TRUE;
262 }
263 
264 /**
265  * soup_multipart_append_part:
266  * @multipart: a #SoupMultipart
267  * @headers: the MIME part headers
268  * @body: the MIME part body
269  *
270  * Adds a new MIME part to @multipart with the given headers and body.
271  * (The multipart will make its own copies of @headers and @body, so
272  * you should free your copies if you are not using them for anything
273  * else.)
274  *
275  **/
276 void
soup_multipart_append_part(SoupMultipart * multipart,SoupMessageHeaders * headers,GBytes * body)277 soup_multipart_append_part (SoupMultipart      *multipart,
278 			    SoupMessageHeaders *headers,
279 			    GBytes         *body)
280 {
281 	SoupMessageHeaders *headers_copy;
282 	SoupMessageHeadersIter iter;
283 	const char *name, *value;
284 
285 	/* Copying @headers is annoying, but the alternatives seem
286 	 * worse:
287 	 *
288 	 * 1) We don't want to use g_boxed_copy, because
289 	 *    SoupMessageHeaders actually implements that as just a
290 	 *    ref, which would be confusing since SoupMessageHeaders
291 	 *    is mutable and the caller might modify @headers after
292 	 *    appending it.
293 	 *
294 	 * 2) We can't change SoupMessageHeaders to not just do a ref
295 	 *    from g_boxed_copy, because that would break language
296 	 *    bindings (which need to be able to hold a ref on
297 	 *    soup_message_get_request_headers (msg), but don't want
298          *    to duplicate it).
299 	 *
300 	 * 3) We don't want to steal the reference to @headers,
301 	 *    because then we'd have to either also steal the
302 	 *    reference to @body (which would be inconsistent with
303 	 *    other GBytes methods), or NOT steal the reference to
304 	 *    @body, in which case there'd be inconsistency just
305 	 *    between the two arguments of this method!
306 	 */
307 	headers_copy = soup_message_headers_new (SOUP_MESSAGE_HEADERS_MULTIPART);
308 	soup_message_headers_iter_init (&iter, headers);
309 	while (soup_message_headers_iter_next (&iter, &name, &value))
310 		soup_message_headers_append (headers_copy, name, value);
311 
312 	g_ptr_array_add (multipart->headers, headers_copy);
313 	g_ptr_array_add (multipart->bodies, g_bytes_ref (body));
314 }
315 
316 /**
317  * soup_multipart_append_form_string:
318  * @multipart: a multipart (presumably of type "multipart/form-data")
319  * @control_name: the name of the control associated with @data
320  * @data: the body data
321  *
322  * Adds a new MIME part containing @data to @multipart, using
323  * "Content-Disposition: form-data", as per the HTML forms
324  * specification.
325  *
326  **/
327 void
soup_multipart_append_form_string(SoupMultipart * multipart,const char * control_name,const char * data)328 soup_multipart_append_form_string (SoupMultipart *multipart,
329 				   const char *control_name, const char *data)
330 {
331 	GBytes *body;
332 
333 	body = g_bytes_new (data, strlen (data));
334 	soup_multipart_append_form_file (multipart, control_name,
335 					 NULL, NULL, body);
336 	g_bytes_unref (body);
337 }
338 
339 /**
340  * soup_multipart_append_form_file:
341  * @multipart: a multipart (presumably of type "multipart/form-data")
342  * @control_name: the name of the control associated with this file
343  * @filename: the name of the file, or %NULL if not known
344  * @content_type: the MIME type of the file, or %NULL if not known
345  * @body: the file data
346  *
347  * Adds a new MIME part containing @body to @multipart, using
348  * "Content-Disposition: form-data", as per the HTML forms
349  * specification.
350  *
351  **/
352 void
soup_multipart_append_form_file(SoupMultipart * multipart,const char * control_name,const char * filename,const char * content_type,GBytes * body)353 soup_multipart_append_form_file (SoupMultipart *multipart,
354 				 const char *control_name, const char *filename,
355 				 const char *content_type, GBytes *body)
356 {
357 	SoupMessageHeaders *headers;
358 	GString *disposition;
359 
360 	headers = soup_message_headers_new (SOUP_MESSAGE_HEADERS_MULTIPART);
361 	disposition = g_string_new ("form-data; ");
362 	soup_header_g_string_append_param_quoted (disposition, "name", control_name);
363 	if (filename) {
364 		g_string_append (disposition, "; ");
365 		soup_header_g_string_append_param_quoted (disposition, "filename", filename);
366 	}
367 	soup_message_headers_append_common (headers, SOUP_HEADER_CONTENT_DISPOSITION,
368                                             disposition->str);
369 	g_string_free (disposition, TRUE);
370 
371 	if (content_type) {
372 		soup_message_headers_append_common (headers, SOUP_HEADER_CONTENT_TYPE,
373                                                     content_type);
374 	}
375 
376 	g_ptr_array_add (multipart->headers, headers);
377 	g_ptr_array_add (multipart->bodies, g_bytes_ref (body));
378 }
379 
380 /**
381  * soup_multipart_to_message:
382  * @multipart: a #SoupMultipart
383  * @dest_headers: the headers of the HTTP message to serialize @multipart to
384  * @dest_body: (out): the body of the HTTP message to serialize @multipart to
385  *
386  * Serializes @multipart to @dest_headers and @dest_body.
387  *
388  **/
389 void
soup_multipart_to_message(SoupMultipart * multipart,SoupMessageHeaders * dest_headers,GBytes ** dest_body)390 soup_multipart_to_message (SoupMultipart      *multipart,
391 			   SoupMessageHeaders *dest_headers,
392 			   GBytes            **dest_body)
393 {
394 	SoupMessageHeaders *part_headers;
395 	GBytes *part_body;
396 	SoupMessageHeadersIter iter;
397 	const char *name, *value;
398 	GString *str;
399 	GHashTable *params;
400 	guint i;
401 
402 	params = g_hash_table_new (g_str_hash, g_str_equal);
403 	g_hash_table_insert (params, "boundary", multipart->boundary);
404 	soup_message_headers_set_content_type (dest_headers,
405 					       multipart->mime_type,
406 					       params);
407 	g_hash_table_destroy (params);
408 
409 	str = g_string_new (NULL);
410 
411 	for (i = 0; i < multipart->bodies->len; i++) {
412 		part_headers = multipart->headers->pdata[i];
413 		part_body = multipart->bodies->pdata[i];
414 
415 		if (i > 0)
416 			g_string_append (str, "\r\n");
417 		g_string_append (str, "--");
418 		g_string_append (str, multipart->boundary);
419 		g_string_append (str, "\r\n");
420 		soup_message_headers_iter_init (&iter, part_headers);
421 		while (soup_message_headers_iter_next (&iter, &name, &value))
422 			g_string_append_printf (str, "%s: %s\r\n", name, value);
423 		g_string_append (str, "\r\n");
424 		g_string_append_len (str,
425 				     g_bytes_get_data (part_body, NULL),
426 				     g_bytes_get_size (part_body));
427 	}
428 
429 	g_string_append (str, "\r\n--");
430 	g_string_append (str, multipart->boundary);
431 	g_string_append (str, "--\r\n");
432 
433 	/* (The "\r\n" after the close-delimiter seems wrong according
434 	 * to my reading of RFCs 2046 and 2616, but that's what
435 	 * everyone else does.)
436 	 */
437 
438 	*dest_body = g_string_free_to_bytes (str);
439 }
440 
441 /**
442  * soup_multipart_free:
443  * @multipart: a #SoupMultipart
444  *
445  * Frees @multipart
446  *
447  **/
448 void
soup_multipart_free(SoupMultipart * multipart)449 soup_multipart_free (SoupMultipart *multipart)
450 {
451 	g_free (multipart->mime_type);
452 	g_free (multipart->boundary);
453 	g_ptr_array_free (multipart->headers, TRUE);
454 	g_ptr_array_free (multipart->bodies, TRUE);
455 
456 	g_slice_free (SoupMultipart, multipart);
457 }
458 
459 static SoupMultipart *
soup_multipart_copy(SoupMultipart * multipart)460 soup_multipart_copy (SoupMultipart *multipart)
461 {
462 	SoupMultipart *copy;
463 	guint i;
464 
465 	copy = soup_multipart_new_internal (g_strdup (multipart->mime_type),
466 					    g_strdup (multipart->boundary));
467 	for (i = 0; i < multipart->bodies->len; i++) {
468 		soup_multipart_append_part (copy,
469 					    multipart->headers->pdata[i],
470 					    multipart->bodies->pdata[i]);
471 	}
472 	return copy;
473 }
474 
475 G_DEFINE_BOXED_TYPE (SoupMultipart, soup_multipart, soup_multipart_copy, soup_multipart_free)
476