1 /*
2  * Copyright (C) 2001-2003 FhG Fokus
3  *
4  * This file is part of Kamailio, a free SIP server.
5  *
6  * Kamailio is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation; either version 2 of the License, or
9  * (at your option) any later version
10  *
11  * Kamailio is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
19  *
20  */
21 
22 /*! \file
23  * \brief Parser :: Content part
24  *
25  * \ingroup parser
26  */
27 
28 
29 #include <stdio.h>
30 #include <stdlib.h>
31 #include <sys/types.h>
32 #include <unistd.h>
33 #include "../mem/mem.h"
34 #include "../dprint.h"
35 #include "../str.h"
36 #include "../ut.h"
37 #include "parse_content.h"
38 
39 
40 #define is_mime_char(_c_) \
41 	(isalnum((int)_c_) || (_c_)=='-' || (_c_)=='+' || (_c_)=='.' || (_c_)=='_' \
42 			|| (_c_)=='!' || (_c_)=='%' || (_c_)=='*' \
43 			|| (_c_)=='\'' || (_c_)=='`' || (_c_)=='~')
44 #define is_char_equal(_c_,_cs_) \
45 	( (isalpha((int)_c_)?(((_c_)|0x20)==(_cs_)):((_c_)==(_cs_)))==1 )
46 
47 
48 /*! \brief
49  * Node of the type's tree; this tree contains all the known types;
50  */
51 typedef struct type_node_s {
52 	char c;                      /*!< char contained by this node */
53 	unsigned char final;         /*!< says what mime type/subtype was detected
54 									*!< if string ends at this node */
55 	unsigned char nr_sons;       /*!< the number of sub-nodes */
56 	int next;                    /*!< the next sibling node */
57 }type_node_t;
58 
59 
60 static type_node_t type_tree[] = {
61 	{'t',TYPE_UNKNOWN,1,4}, /* 0 */
62 		{'e',TYPE_UNKNOWN,1,-1},
63 			{'x',TYPE_UNKNOWN,1,-1},
64 				{'t',TYPE_TEXT,0,-1},
65 	{'m',TYPE_UNKNOWN,2,19}, /* 4 */
66 		{'e',TYPE_UNKNOWN,1,11}, /* 5 */
67 			{'s',TYPE_UNKNOWN,1,-1},
68 				{'s',TYPE_UNKNOWN,1,-1},
69 					{'a',TYPE_UNKNOWN,1,-1},
70 						{'g',TYPE_UNKNOWN,1,-1},
71 							{'e',TYPE_MESSAGE,0,-1},
72 		{'u',TYPE_UNKNOWN,1,-1}, /* 11 */
73 			{'l',TYPE_UNKNOWN,1,-1},
74 				{'t',TYPE_UNKNOWN,1,-1},
75 					{'i',TYPE_UNKNOWN,1,-1},
76 						{'p',TYPE_UNKNOWN,1,-1},
77 							{'a',TYPE_UNKNOWN,1,-1},
78 								{'r',TYPE_UNKNOWN,1,-1},
79 									{'t',TYPE_MULTIPART,0,-1},
80 	{'a',TYPE_UNKNOWN,1,-1}, /* 19 */
81 		{'p',TYPE_UNKNOWN,1,-1},
82 			{'p',TYPE_UNKNOWN,1,-1},
83 				{'l',TYPE_UNKNOWN,1,-1},
84 					{'i',TYPE_UNKNOWN,1,-1},
85 						{'c',TYPE_UNKNOWN,1,-1},
86 							{'a',TYPE_UNKNOWN,1,-1},
87 								{'t',TYPE_UNKNOWN,1,-1},
88 									{'i',TYPE_UNKNOWN,1,-1},
89 										{'o',TYPE_UNKNOWN,1,-1},
90 											{'n',TYPE_APPLICATION,0,-1},
91 	};
92 
93 static type_node_t subtype_tree[] = {
94 	{'p',SUBTYPE_UNKNOWN,2,13},
95 		{'l',SUBTYPE_UNKNOWN,1,5},
96 			{'a',SUBTYPE_UNKNOWN,1,-1},
97 				{'i',SUBTYPE_UNKNOWN,1,-1},
98 					{'n',SUBTYPE_PLAIN,0,-1},
99 		{'i',SUBTYPE_UNKNOWN,1,-1}, /* 5 */
100 			{'d',SUBTYPE_UNKNOWN,1,-1},
101 				{'f',SUBTYPE_UNKNOWN,1,-1},
102 					{'+',TYPE_UNKNOWN,1,-1},
103 						{'x',TYPE_UNKNOWN,1,-1},
104 							{'m',TYPE_UNKNOWN,1,-1},
105 								{'l',SUBTYPE_PIDFXML,0,-1},
106 									{'l',SUBTYPE_PIDFXML,0,-1},
107 	{'s',SUBTYPE_UNKNOWN,1,16}, /* 13 */
108 		{'d',SUBTYPE_UNKNOWN,1,-1},
109 			{'p',SUBTYPE_SDP,0,-1},
110 	{'c',SUBTYPE_UNKNOWN,1,34}, /* 16 */
111 		{'p',SUBTYPE_UNKNOWN,2,-1},
112 			{'i',SUBTYPE_UNKNOWN,1,29},
113 				{'m',SUBTYPE_CPIM,1,-1},
114 					{'-',SUBTYPE_UNKNOWN,1,-1},
115 						{'p',SUBTYPE_UNKNOWN,1,-1},
116 							{'i',SUBTYPE_UNKNOWN,1,-1},
117 								{'d',SUBTYPE_UNKNOWN,1,-1},
118 									{'f',SUBTYPE_UNKNOWN,1,-1},
119 										{'+',SUBTYPE_UNKNOWN,1,-1},
120 											{'x',SUBTYPE_UNKNOWN,1,-1},
121 												{'m',SUBTYPE_UNKNOWN,1,-1},
122 													{'l',SUBTYPE_CPIM_PIDFXML,0,-1},
123 			{'l',SUBTYPE_UNKNOWN,1,-1}, /* 29 */
124 				{'+',TYPE_UNKNOWN,1,-1},
125 					{'x',TYPE_UNKNOWN,1,-1},
126 						{'m',TYPE_UNKNOWN,1,-1},
127 							{'l',SUBTYPE_CPLXML,0,-1},
128 	{'r',SUBTYPE_UNKNOWN,2,48}, /* 34 */
129 		{'l',SUBTYPE_UNKNOWN,1,42},/* 35 */
130 			{'m',SUBTYPE_UNKNOWN,1,-1},
131 				{'i',SUBTYPE_UNKNOWN,1,-1},
132 					{'+',TYPE_UNKNOWN,1,-1},
133 						{'x',TYPE_UNKNOWN,1,-1},
134 							{'m',TYPE_UNKNOWN,1,-1},
135 								{'l',SUBTYPE_RLMIXML,0,-1},
136 		{'e',SUBTYPE_UNKNOWN,1,-1}, /* 42 */
137 			{'l',SUBTYPE_UNKNOWN,1,-1},
138 				{'a',SUBTYPE_UNKNOWN,1,-1},
139 					{'t',SUBTYPE_UNKNOWN,1,-1},
140 						{'e',SUBTYPE_UNKNOWN,1,-1},
141 							{'d',SUBTYPE_RELATED,0,-1},
142 	{'l',SUBTYPE_UNKNOWN,1,57}, /* 48 */
143 		{'p',SUBTYPE_UNKNOWN,1,-1},
144 			{'i',SUBTYPE_UNKNOWN,1,-1},
145 				{'d',SUBTYPE_UNKNOWN,1,-1},
146 					{'f',SUBTYPE_UNKNOWN,1,-1},
147 						{'+',SUBTYPE_UNKNOWN,1,-1},
148 							{'x',SUBTYPE_UNKNOWN,1,-1},
149 								{'m',SUBTYPE_UNKNOWN,1,-1},
150 									{'l',SUBTYPE_LPIDFXML,0,-1},
151 	{'w',SUBTYPE_UNKNOWN,1,72}, /* 57 */
152 		{'a',SUBTYPE_UNKNOWN,1,-1},
153 			{'t',SUBTYPE_UNKNOWN,1,-1},
154 				{'c',SUBTYPE_UNKNOWN,1,-1},
155 					{'h',SUBTYPE_UNKNOWN,1,-1},
156 						{'e',SUBTYPE_UNKNOWN,1,-1},
157 							{'r',SUBTYPE_UNKNOWN,1,-1},
158 								{'i',TYPE_UNKNOWN,1,-1},
159 									{'n',TYPE_UNKNOWN,1,-1},
160 										{'f',TYPE_UNKNOWN,1,-1},
161 											{'o',TYPE_UNKNOWN,1,-1},
162 												{'+',TYPE_UNKNOWN,1,-1},
163 													{'x',TYPE_UNKNOWN,1,-1},
164 														{'m',TYPE_UNKNOWN,1,-1},
165 															{'l',SUBTYPE_WATCHERINFOXML,0,-1},
166 	{'x',SUBTYPE_UNKNOWN,2,94}, /* 72 */
167 		{'p',SUBTYPE_UNKNOWN,1,81}, /* 73 */
168 			{'i',SUBTYPE_UNKNOWN,1,-1},
169 				{'d',SUBTYPE_UNKNOWN,1,-1},
170 					{'f',SUBTYPE_UNKNOWN,1,-1},
171 						{'+',SUBTYPE_UNKNOWN,1,-1},
172 							{'x',SUBTYPE_UNKNOWN,1,-1},
173 								{'m',SUBTYPE_UNKNOWN,1,-1},
174 									{'l',SUBTYPE_XPIDFXML,0,-1},
175 		{'m',SUBTYPE_UNKNOWN,1,-1}, /* 81 */
176 			{'l',SUBTYPE_UNKNOWN,1,-1},
177 				{'+',SUBTYPE_UNKNOWN,1,-1},
178 					{'m',SUBTYPE_UNKNOWN,1,-1},
179 						{'s',SUBTYPE_UNKNOWN,1,-1},
180 							{'r',SUBTYPE_UNKNOWN,1,-1},
181 								{'t',SUBTYPE_UNKNOWN,1,-1},
182 									{'c',SUBTYPE_UNKNOWN,1,-1},
183 										{'.',SUBTYPE_UNKNOWN,1,-1},
184 											{'p',SUBTYPE_UNKNOWN,1,-1},
185 												{'i',SUBTYPE_UNKNOWN,1,-1},
186 													{'d',SUBTYPE_UNKNOWN,1,-1},
187 														{'f',SUBTYPE_XML_MSRTC_PIDF,0,-1},
188 	{'e',SUBTYPE_UNKNOWN,1,107}, /* 94 */
189 		{'x',SUBTYPE_UNKNOWN,1,-1},
190 			{'t',SUBTYPE_UNKNOWN,1,-1},
191 				{'e',SUBTYPE_UNKNOWN,1,-1},
192 					{'r',SUBTYPE_UNKNOWN,1,-1},
193 						{'n',SUBTYPE_UNKNOWN,1,-1},
194 							{'a',SUBTYPE_UNKNOWN,1,-1},
195 								{'l',SUBTYPE_UNKNOWN,1,-1},
196 									{'-',SUBTYPE_UNKNOWN,1,-1},
197 										{'b',SUBTYPE_UNKNOWN,1,-1},
198 											{'o',SUBTYPE_UNKNOWN,1,-1},
199 												{'d',SUBTYPE_UNKNOWN,1,-1},
200 													{'y',SUBTYPE_EXTERNAL_BODY,0,-1},
201 	{'m',SUBTYPE_UNKNOWN,1,112}, /* 107 */
202 		{'i',SUBTYPE_UNKNOWN,1,-1},
203 			{'x',SUBTYPE_UNKNOWN,1,-1},
204 				{'e',SUBTYPE_UNKNOWN,1,-1},
205 					{'d',SUBTYPE_MIXED,0,-1},
206 	{'i',SUBTYPE_UNKNOWN,1,-1}, /* 112 */
207 		{'s',SUBTYPE_UNKNOWN,1,-1},
208 			{'u',SUBTYPE_UNKNOWN,1,-1},
209 				{'p',SUBTYPE_ISUP,0,-1},
210 };
211 
212 
213 
parse_content_length(char * const buffer,const char * const end,int * const length)214 char* parse_content_length(char* const buffer, const char* const end,
215 		int* const length)
216 {
217 	int number;
218 	char *p;
219 	int  size;
220 
221 	p = buffer;
222 	/* search the begining of the number */
223 	while ( p<end && (*p==' ' || *p=='\t' ||
224 	(*p=='\n' && (*(p+1)==' '||*(p+1)=='\t')) ))
225 		p++;
226 	if (p==end)
227 		goto error;
228 	/* parse the number */
229 	size = 0;
230 	number = 0;
231 	while (p<end && *p>='0' && *p<='9') {
232 		number = number*10 + (*p)-'0';
233 		size ++;
234 		p++;
235 	}
236 	if (p==end || size==0)
237 		goto error;
238 	/* now we should have only spaces at the end */
239 	while ( p<end && (*p==' ' || *p=='\t' ||
240 	(*p=='\n' && (*(p+1)==' '||*(p+1)=='\t')) ))
241 		p++;
242 	if (p==end)
243 		goto error;
244 	/* the header ends proper? */
245 	if ( (*(p++)!='\n') && (*(p-1)!='\r' || *(p++)!='\n' ) )
246 		goto error;
247 
248 	*length = number;
249 	return p;
250 error:
251 	LM_ERR("parse error near char [%d][%c]\n", *p, *p);
252 	return 0;
253 }
254 
255 
256 
decode_mime_type(char * const start,const char * const end,unsigned int * const mime_type)257 char* decode_mime_type(char* const start, const char* const end,
258 		unsigned int* const mime_type)
259 {
260 	int node;
261 	char *mark;
262 	char *p;
263 	unsigned int type_candidate;
264 
265 	p = start;
266 
267 	/* search the begining of the type */
268 	while ( p<end && (*p==' ' || *p=='\t' ||
269 	(*p=='\n' && (*(p+1)==' '||*(p+1)=='\t')) ))
270 		p++;
271 	if (p==end)
272 		goto error;
273 
274 	/* parse the type */
275 	if (*p=='*') {
276 		*mime_type = TYPE_ALL<<16;
277 		p++;
278 	} else {
279 		node = 0;
280 		mark = p;
281 		type_candidate = TYPE_UNKNOWN;
282 		while (p<end && is_mime_char(*p)  ) {
283 			while ( node!=-1 && !is_char_equal(*p,type_tree[node].c) ) {
284 				node = type_tree[node].next;
285 			}
286 			if (node!=-1) {
287 				type_candidate = type_tree[node].final;
288 				if (type_tree[node].nr_sons)
289 					node++;
290 				else
291 					node = -1;
292 			} else {
293 				/* end of the type tree has reached,
294 				but the type has still some remaining
295 				characters (Miklos) */
296 				type_candidate = TYPE_UNKNOWN;
297 			}
298 			p++;
299 		}
300 		if (p==end || mark==p)
301 			goto error;
302 		*mime_type = type_candidate<<16;
303 	}
304 
305 	/* search the '/' separator */
306 	while ( p<end && (*p==' ' || *p=='\t' ||
307 	(*p=='\n' && (*(p+1)==' '||*(p+1)=='\t')) ))
308 		p++;
309 	if ( p==end || *(p++)!='/')
310 		goto error;
311 
312 	/* search the begining of the sub-type */
313 	while ( p<end && (*p==' ' || *p=='\t' ||
314 	(*p=='\n' && (*(p+1)==' '||*(p+1)=='\t')) ))
315 		p++;
316 	if (p==end)
317 		goto error;
318 
319 	/* parse the sub-type */
320 	if (*p=='*') {
321 		*mime_type |= SUBTYPE_ALL;
322 		p++;
323 	} else {
324 		node = 0;
325 		mark = p;
326 		type_candidate = SUBTYPE_UNKNOWN;
327 		while (p<end && is_mime_char(*p) ) {
328 			while(node!=-1 && !is_char_equal(*p,subtype_tree[node].c) )
329 				node = subtype_tree[node].next;
330 			if (node!=-1) {
331 				type_candidate = subtype_tree[node].final;
332 				if (subtype_tree[node].nr_sons)
333 					node++;
334 				else
335 					node = -1;
336 			} else {
337 				/* end of the subtype tree has reached,
338 				but the subtype has still some remaining
339 				characters (Miklos) */
340 				type_candidate = SUBTYPE_UNKNOWN;
341 			}
342 			p++;
343 		}
344 		if (p==mark)
345 			goto error;
346 		*mime_type |= type_candidate;;
347 	}
348 
349 	/* now its possible to have some spaces */
350 	while ( p<end && (*p==' ' || *p=='\t' ||
351 	(*p=='\n' && (*(p+1)==' '||*(p+1)=='\t')) ))
352 		p++;
353 
354 	/* if there are params, ignore them!! -> eat everything to
355 	 * the end or to the first ',' */
356 	if ( p<end && *p==';' )
357 		for(p++; p<end && *p!=','; p++);
358 
359 	/* is this the correct end? */
360 	if (p!=end && *p!=',' )
361 		goto error;
362 
363 	/* check the format of the decoded mime */
364 	if ((*mime_type)>>16==TYPE_ALL && ((*mime_type)&0x00ff)!=SUBTYPE_ALL) {
365 		LM_ERR("invalid mime format found "
366 			" <*/submime> in [%.*s]!!\n", (int)(end-start), start);
367 		return 0;
368 	}
369 
370 	return p;
371 error:
372 	LM_ERR("parse error near in [%.*s] char"
373 		"[%d][%c] offset=%d\n", (int)(end-start),start,*p,*p,(int)(p-start));
374 	return 0;
375 }
376 
377 
378 
379 /*! \brief
380  * \return
381  *  	-   > 0 mime found
382  *      -   = 0 hdr not found
383  *      -   =-1 error */
parse_content_type_hdr(struct sip_msg * const msg)384 int parse_content_type_hdr(struct sip_msg* const msg)
385 {
386 	char *end;
387 	const char *ret;
388 	unsigned int  mime;
389 
390 	/* is the header already found? */
391 	if ( msg->content_type==0 ) {
392 		/* if not, found it */
393 		if ( parse_headers(msg, HDR_CONTENTTYPE_F, 0)==-1)
394 			goto error;
395 		if ( msg->content_type==0 ) {
396 			LM_DBG("missing Content-Type header\n");
397 			return 0;
398 		}
399 	}
400 
401 	/* maybe the header is already parsed! */
402 	if ( msg->content_type->parsed!=0)
403 		return get_content_type(msg);
404 
405 	/* it seams we have to parse it! :-( */
406 	end = msg->content_type->body.s + msg->content_type->body.len;
407 	ret = decode_mime_type(msg->content_type->body.s, end , &mime);
408 	if (ret==0)
409 		goto error;
410 	if (ret!=end) {
411 		LM_ERR("Content-Type hdr contains more then one mime type!\n");
412 		goto error;
413 	}
414 	if ((mime&0x00ff)==SUBTYPE_ALL || (mime>>16)==TYPE_ALL) {
415 		LM_ERR("invalid mime with wildcard '*' in Content-Type hdr!\n");
416 		goto error;
417 	}
418 
419 	msg->content_type->parsed = (void*)(unsigned long)mime;
420 	return mime;
421 
422 error:
423 	return -1;
424 }
425 
parse_accept_body(struct hdr_field * const hdr)426 int parse_accept_body(struct hdr_field* const hdr)
427 {
428 	static unsigned int mimes[MAX_MIMES_NR];
429 	int nr_mimes;
430 	unsigned int mime;
431 	char *end;
432 	char *ret;
433 
434 	if (!hdr) return -1;
435 
436 	/* maybe the header is already parsed! */
437 	if (hdr->parsed!=0) return 1;
438 
439 	/* it seams we have to parse it! :-( */
440 	ret = hdr->body.s;
441 	end = ret + hdr->body.len;
442 	nr_mimes = 0;
443 	while (1){
444 		ret = decode_mime_type(ret, end , &mime);
445 		if (ret==0)
446 			goto error;
447 		/* a new mime was found  -> put it into array */
448 		if (nr_mimes==MAX_MIMES_NR) {
449 			LM_ERR("Accept hdr contains more than %d mime type"
450 					" -> buffer overflow!!\n", MAX_MIMES_NR);
451 			goto error;
452 		}
453 		mimes[nr_mimes++] = mime;
454 		/* is another mime following? */
455 		if (ret==end )
456 			break;
457 		/* parse the mime separator ',' */
458 		if (*ret!=',' || ret+1==end) {
459 			LM_ERR("parse error between mimes at "
460 				"char <%x> (offset=%d) in <%.*s>!\n",
461 				*ret, (int)(ret-hdr->body.s),
462 				hdr->body.len, hdr->body.s);
463 			goto error;
464 		}
465 		/* skip the ',' */
466 		ret++;
467 	}
468 
469 	/* copy and link the mime buffer into the message */
470 	hdr->parsed = (void*)pkg_malloc((nr_mimes+1)*sizeof(int));
471 	if (hdr->parsed==0) {
472 		PKG_MEM_ERROR;
473 		goto error;
474 	}
475 	memcpy(hdr->parsed,mimes,nr_mimes*sizeof(int));
476 	/* make the buffer null terminated */
477 	((int*)hdr->parsed)[nr_mimes] = 0;
478 
479 	return 1;
480 error:
481 	return -1;
482 }
483 
484 /*! \brief
485  * returns: > 0 ok
486  *          = 0 hdr not found
487  *          = -1 error */
parse_accept_hdr(struct sip_msg * const msg)488 int parse_accept_hdr(struct sip_msg* const msg)
489 {
490 	static unsigned int mimes[MAX_MIMES_NR];
491 	int nr_mimes;
492 	unsigned int mime;
493 	char *end;
494 	char *ret;
495 
496 	/* is the header already found? */
497 	if ( msg->accept==0 ) {
498 		/* if not, found it */
499 		if ( parse_headers(msg, HDR_ACCEPT_F, 0)==-1)
500 			goto error;
501 		if ( msg->accept==0 ) {
502 			LM_DBG("missing Accept header\n");
503 			return 0;
504 		}
505 	}
506 
507 	/* maybe the header is already parsed! */
508 	if ( msg->accept->parsed!=0)
509 		return 1;
510 
511 	/* it seams we have to parse it! :-( */
512 	ret = msg->accept->body.s;
513 	end = ret + msg->accept->body.len;
514 	nr_mimes = 0;
515 	while (1){
516 		ret = decode_mime_type(ret, end , &mime);
517 		if (ret==0)
518 			goto error;
519 		/* a new mime was found  -> put it into array */
520 		if (nr_mimes==MAX_MIMES_NR) {
521 			LM_ERR("Accept hdr contains more than"
522 				" %d mime type -> buffer overflow!!\n",MAX_MIMES_NR);
523 			goto error;
524 		}
525 		mimes[nr_mimes++] = mime;
526 		/* is another mime following? */
527 		if (ret==end )
528 			break;
529 		/* parse the mime separator ',' */
530 		if (*ret!=',' || ret+1==end) {
531 			LM_ERR("parse error between mimes at "
532 				"char <%x> (offset=%d) in <%.*s>!\n",
533 				*ret, (int)(ret-msg->accept->body.s),
534 				msg->accept->body.len, msg->accept->body.s);
535 			goto error;
536 		}
537 		/* skip the ',' */
538 		ret++;
539 	}
540 
541 	/* copy and link the mime buffer into the message */
542 	msg->accept->parsed = (void*)pkg_malloc((nr_mimes+1)*sizeof(int));
543 	if (msg->accept->parsed==0) {
544 		PKG_MEM_ERROR;
545 		goto error;
546 	}
547 	memcpy(msg->accept->parsed,mimes,nr_mimes*sizeof(int));
548 	/* make the buffer null terminated */
549 	((int*)msg->accept->parsed)[nr_mimes] = 0;
550 
551 	return 1;
552 error:
553 	return -1;
554 }
555