1 /* ====================================================================
2  * The Kannel Software License, Version 1.0
3  *
4  * Copyright (c) 2001-2014 Kannel Group
5  * Copyright (c) 1998-2001 WapIT Ltd.
6  * All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  *
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  *
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in
17  *    the documentation and/or other materials provided with the
18  *    distribution.
19  *
20  * 3. The end-user documentation included with the redistribution,
21  *    if any, must include the following acknowledgment:
22  *       "This product includes software developed by the
23  *        Kannel Group (http://www.kannel.org/)."
24  *    Alternately, this acknowledgment may appear in the software itself,
25  *    if and wherever such third-party acknowledgments normally appear.
26  *
27  * 4. The names "Kannel" and "Kannel Group" must not be used to
28  *    endorse or promote products derived from this software without
29  *    prior written permission. For written permission, please
30  *    contact org@kannel.org.
31  *
32  * 5. Products derived from this software may not be called "Kannel",
33  *    nor may "Kannel" appear in their name, without prior written
34  *    permission of the Kannel Group.
35  *
36  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
37  * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
38  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
39  * DISCLAIMED.  IN NO EVENT SHALL THE KANNEL GROUP OR ITS CONTRIBUTORS
40  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY,
41  * OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
42  * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
43  * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
44  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
45  * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
46  * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
47  * ====================================================================
48  *
49  * This software consists of voluntary contributions made by many
50  * individuals on behalf of the Kannel Group.  For more information on
51  * the Kannel Group, please see <http://www.kannel.org/>.
52  *
53  * Portions of this software are based upon software originally written at
54  * WapIT Ltd., Helsinki, Finland for the Kannel project.
55  */
56 
57 /*
58  * http.h - HTTP protocol implementation
59  *
60  * This header file defines the interface to the HTTP implementation
61  * in Kannel.
62  *
63  * We implement both the client and the server side of the protocol.
64  * We don't implement HTTP completely - only those parts that Kannel needs.
65  * You may or may not be able to use this code for other projects. It has
66  * not been a goal, but it might be possible, though you do need other
67  * parts of Kannel's gwlib as well.
68  *
69  * Initialization
70  * ==============
71  *
72  * The library MUST be initialized by a call to http_init. Failure to
73  * initialize means the library WILL NOT work. Note that the library
74  * can't initialize itself implicitly, because it cannot reliably
75  * create a mutex to protect the initialization. Therefore, it is the
76  * caller's responsibility to call http_init exactly once (no more, no
77  * less) at the beginning of the process, before any other thread makes
78  * any calls to the library.
79  *
80  * Client functionality
81  * ====================
82  *
83  * The library will invisibly keep the connections to HTTP servers open,
84  * so that it is possible to make several HTTP requests over a single
85  * TCP connection. This makes it much more efficient in high-load situations.
86  * On the other hand, if one request takes long, the library will still
87  * use several connections to the same server anyway.
88  *
89  * The library user can specify an HTTP proxy to be used. There can be only
90  * one proxy at a time, but it is possible to specify a list of hosts for
91  * which the proxy is not used. The proxy can be changed at run time.
92  *
93  * Server functionality
94  * ====================
95  *
96  * The library allows the implementation of an HTTP server by having
97  * functions to specify which ports should be open, and receiving requests
98  * from those ports.
99  *
100  * Header manipulation
101  * ===================
102  *
103  * The library additionally has some functions for manipulating lists of
104  * headers. These take a `List' (see gwlib/list.h) of Octstr's. The list
105  * represents a list of headers in an HTTP request or reply. The functions
106  * manipulate the list by adding and removing headers by name. It is a
107  * very bad idea to manipulate the list without using the header
108  * manipulation functions, however.
109  *
110  * Basic Authentication
111  * ====================
112  *
113  * Basic Authentication is the standard way for a client to authenticate
114  * itself to a server. It is done by adding an "Authorization" header
115  * to the request. The interface in this header therefore doesn't mention
116  * it, but the client and the server can do it by checking the headers
117  * using the generic functions provided.
118  *
119  * Acknowledgements
120  * ================
121  *
122  * Design: Lars Wirzenius, Richard Braakman
123  * Implementation: Lars Wirzenius
124  */
125 
126 
127 #ifndef HTTP_H
128 #define HTTP_H
129 
130 #include "gwlib/list.h"
131 #include "gwlib/octstr.h"
132 
133 
134 /*
135  * Well-known return values from HTTP servers. This is a complete
136  * list as defined by the W3C in RFC 2616, section 10.4.3.
137  */
138 
139 enum {
140     HTTP_CONTINUE                           = 100,
141     HTTP_SWITCHING_PROTOCOLS                = 101,
142     HTTP_OK                                 = 200,
143     HTTP_CREATED                            = 201,
144     HTTP_ACCEPTED                           = 202,
145     HTTP_NON_AUTHORATIVE_INFORMATION        = 203,
146     HTTP_NO_CONTENT                         = 204,
147     HTTP_RESET_CONTENT                      = 205,
148     HTTP_PARTIAL_CONTENT                    = 206,
149     HTTP_MULTIPLE_CHOICES                   = 300,
150     HTTP_MOVED_PERMANENTLY                  = 301,
151     HTTP_FOUND                              = 302,
152     HTTP_SEE_OTHER                          = 303,
153     HTTP_NOT_MODIFIED                       = 304,
154     HTTP_USE_PROXY                          = 305,
155     /* HTTP 306 is not used and reserved */
156     HTTP_TEMPORARY_REDIRECT                 = 307,
157     HTTP_BAD_REQUEST                        = 400,
158     HTTP_UNAUTHORIZED                       = 401,
159     HTTP_PAYMENT_REQUIRED                   = 402,
160     HTTP_FORBIDDEN                          = 403,
161     HTTP_NOT_FOUND                          = 404,
162     HTTP_BAD_METHOD                         = 405,
163     HTTP_NOT_ACCEPTABLE                     = 406,
164     HTTP_PROXY_AUTHENTICATION_REQUIRED      = 407,
165     HTTP_REQUEST_TIMEOUT                    = 408,
166     HTTP_CONFLICT                           = 409,
167     HTTP_GONE                               = 410,
168     HTTP_LENGTH_REQUIRED                    = 411,
169     HTTP_PRECONDITION_FAILED                = 412,
170     HTTP_REQUEST_ENTITY_TOO_LARGE           = 413,
171     HTTP_REQUEST_URI_TOO_LARGE              = 414,
172     HTTP_UNSUPPORTED_MEDIA_TYPE             = 415,
173     HTTP_REQUESTED_RANGE_NOT_SATISFIABLE    = 416,
174     HTTP_EXPECTATION_FAILED                 = 417,
175     HTTP_INTERNAL_SERVER_ERROR              = 500,
176     HTTP_NOT_IMPLEMENTED                    = 501,
177     HTTP_BAD_GATEWAY                        = 502,
178     HTTP_SERVICE_UNAVAILABLE                = 503,
179     HTTP_GATEWAY_TIMEOUT                    = 504,
180     HTTP_HTTP_VERSION_NOT_SUPPORTED         = 505
181 };
182 
183 /*
184  * Groupings of the status codes listed above.
185  * See the http_status_class() function.
186  */
187 
188 enum {
189 	HTTP_STATUS_PROVISIONAL = 100,
190 	HTTP_STATUS_SUCCESSFUL = 200,
191 	HTTP_STATUS_REDIRECTION = 300,
192 	HTTP_STATUS_CLIENT_ERROR = 400,
193 	HTTP_STATUS_SERVER_ERROR = 500,
194 	HTTP_STATUS_UNKNOWN = 0
195 };
196 
197 
198 /*
199  * Methods supported by this HTTP library.  Currently not public but
200  * probably should be.
201  */
202 enum {
203 	HTTP_METHOD_GET = 1,
204 	HTTP_METHOD_POST = 2,
205 	HTTP_METHOD_HEAD = 3
206 };
207 
208 /*
209  * A structure describing a CGI-BIN argument/variable.
210  */
211 typedef struct {
212 	Octstr *name;
213 	Octstr *value;
214 } HTTPCGIVar;
215 
216 
217 /*
218  * Initialization function. This MUST be called before any other function
219  * declared in this header file.
220  */
221 void http_init(void);
222 
223 
224 /*
225  * Shutdown function. This MUST be called when no other function
226  * declared in this header file will be called anymore.
227  */
228 void http_shutdown(void);
229 
230 
231 /***********************************************************************
232  * HTTP URL parsing.
233  */
234 
235 /*
236  * A structure describing a full URL with it's components.
237  */
238 typedef struct {
239     Octstr *url;
240     Octstr *scheme;
241     Octstr *host;
242     unsigned long port;
243     Octstr *user;
244     Octstr *pass;
245     Octstr *path;
246     Octstr *query;
247     Octstr *fragment;
248 } HTTPURLParse;
249 
250 /*
251  * Create an URL parsing structure.
252  */
253 HTTPURLParse *http_urlparse_create(void);
254 
255 /*
256  * Destroy an URL parsing structure.
257  */
258 void http_urlparse_destroy(HTTPURLParse *p);
259 
260 /*
261  * Parse the given URL and return a parsed struct containing all
262  * parsed components. If parsing failed, returns NULL.
263  */
264 HTTPURLParse *parse_url(Octstr *url);
265 
266 /*
267  * Dump the parsed struct to debug log level.
268  */
269 void parse_dump(HTTPURLParse *p);
270 
271 
272 /***********************************************************************
273  * HTTP proxy interface.
274  */
275 
276 
277 /*
278  * Functions for controlling proxy use. http_use_proxy sets the proxy to
279  * use; if another proxy was already in use, it is closed and forgotten
280  * about as soon as all existing requests via it have been served.
281  *
282  * http_close_proxy closes the current proxy connection, after any
283  * pending requests have been served.
284  */
285 void http_use_proxy(Octstr *hostname, int port, int ssl, List *exceptions,
286     	    	    Octstr *username, Octstr *password, Octstr *exceptions_regex);
287 void http_close_proxy(void);
288 
289 
290 /***********************************************************************
291  * HTTP client interface.
292  */
293 
294 /*
295  * Define interface from which all http requestes will be served
296  */
297 void http_set_interface(const Octstr *our_host);
298 
299 /**
300  * Define timeout in seconds for which HTTP client will wait for
301  * response. Set -1 to disable timeouts.
302  */
303 void http_set_client_timeout(long timeout);
304 
305 /*
306  * Functions for doing a GET request. The difference is that _real follows
307  * redirections, plain http_get does not. Return value is the status
308  * code of the request as a numeric value, or -1 if a response from the
309  * server was not received. If return value is not -1, reply_headers and
310  * reply_body are set and MUST be destroyed by caller.
311  *
312  * XXX these are going away in the future
313  */
314 int http_get_real(int method, Octstr *url, List *request_headers,
315                   Octstr **final_url, List **reply_headers,
316                   Octstr **reply_body);
317 
318 /*
319  * An identification for a caller of HTTP. This is used with
320  * http_start_request, and http_receive_result to route results to the right
321  * callers.
322  *
323  * Implementation note: We use a List as the type so that we can use
324  * that list for communicating the results. This makes it unnecessary
325  * to map the caller identifier to a List internally in the HTTP module.
326  */
327 typedef List HTTPCaller;
328 
329 
330 /*
331  * Create an HTTP caller identifier.
332  */
333 HTTPCaller *http_caller_create(void);
334 
335 
336 /*
337  * Destroy an HTTP caller identifier. Those that aren't destroyed
338  * explicitly are destroyed by http_shutdown.
339  */
340 void http_caller_destroy(HTTPCaller *caller);
341 
342 
343 /*
344  * Signal to a caller (presumably waiting in http_receive_result) that
345  * we're entering shutdown phase. This will make http_receive_result
346  * no longer block if the queue is empty.
347  */
348 void http_caller_signal_shutdown(HTTPCaller *caller);
349 
350 
351 /*
352  * Start an HTTP request. It will be completed in the background, and
353  * the result will eventually be received by http_receive_result.
354  * http_receive_result will return the id parameter passed to this function,
355  * and the caller can use this to keep track of which request and which
356  * response belong together. If id is NULL, it is changed to a non-null
357  * value (NULL replies from http_receive_result are reserved for cases
358  * when it doesn't return a reply).
359  *
360  * If `body' is NULL, it is a GET request, otherwise as POST request.
361  * If `follow' is true, HTTP redirections are followed, otherwise not.
362  *
363  * 'certkeyfile' defines a filename where openssl looks for a PEM-encoded
364  * certificate and a private key, if openssl is compiled in and an https
365  * URL is used. It can be NULL, in which case none is used and thus there
366  * is no ssl authentication, unless you have set a global one with
367  * use_global_certkey_file() from conn.c.
368  */
369 void http_start_request(HTTPCaller *caller, int method, Octstr *url,
370                         List *headers, Octstr *body, int follow, void *id,
371     	    	    	Octstr *certkeyfile);
372 
373 
374 /*
375  * Get the result of a GET or a POST request. Returns either the id pointer
376  * (the one passed to http_start request if non-NULL) or NULL if
377  * http_caller_signal_shutdown has been called and there are no queued results.
378  */
379 void *http_receive_result_real(HTTPCaller *caller, int *status, Octstr **final_url,
380     	    	    	 List **headers, Octstr **body, int blocking);
381 
382 /* old compatibility mode, always blocking */
383 #define http_receive_result(caller, status, final_url, headers, body) \
384     http_receive_result_real(caller, status, final_url, headers, body, 1)
385 
386 /***********************************************************************
387  * HTTP server interface.
388  */
389 
390 
391 /*
392  * Data structure representing an HTTP client that has connected to
393  * the server we implement. It is used to route responses correctly.
394  */
395 typedef struct HTTPClient HTTPClient;
396 
397 
398 /**
399  * Define timeout in seconds for which HTTP server will wait for
400  * request. Set -1 to disable timeouts.
401  */
402 void http_set_server_timeout(int port, long timeout);
403 
404 /*
405  * Open an HTTP server at a given port. Return -1 for errors (invalid
406  * port number, etc), 0 for OK. This will also start a background thread
407  * to listen for connections to that port and read the requests from them.
408  * Second boolean variable indicates if the HTTP server should be started
409  * for SSL-enabled connections.
410  */
411 int http_open_port(int port, int ssl);
412 
413 
414 /*
415  * Same as above, but bind to a specific interface.
416  */
417 int http_open_port_if(int port, int ssl, Octstr *interface);
418 
419 
420 /*
421  * Accept a request from a client to the specified open port. Return NULL
422  * if the port is closed, otherwise a pointer to a client descriptor.
423  * Return the IP number (as a string) and other related information about
424  * the request via arguments if function return value is non-NULL. The
425  * caller is responsible for destroying the values returned via arguments,
426  * the caller descriptor is destroyed by http_send_reply.
427  *
428  * The requests are actually read by a background thread handled by the
429  * HTTP implementation, so it is not necessary by the HTTP user to have
430  * many threads to be fast. The HTTP user should use a single thread,
431  * unless requests can block.
432  */
433 HTTPClient *http_accept_request(int port, Octstr **client_ip,
434     	    	    	    	Octstr **url, List **headers, Octstr **body,
435 				List **cgivars);
436 
437 
438 /*
439  * Send a reply to a previously accepted request. The caller is responsible
440  * for destroying the headers and body after the call to http_send_reply
441  * finishes. This allows using them in several replies in an efficient way.
442  */
443 void http_send_reply(HTTPClient *client, int status, List *headers,
444     	    	     Octstr *body);
445 
446 
447 /*
448  * Don't send a reply to a previously accepted request, but only close
449  * the connection to the client. This can be used to reject requests from
450  * clients that are not authorized to access us.
451  */
452 void http_close_client(HTTPClient *client);
453 
454 
455 /*
456  * Close a currently open port and stop corresponding background threads.
457  */
458 void http_close_port(int port);
459 
460 
461 /*
462  * Close all currently open ports and stop background threads.
463  */
464 void http_close_all_ports(void);
465 
466 
467 /*
468  * Destroy a list of HTTPCGIVar objects.
469  */
470 void http_destroy_cgiargs(List *args);
471 
472 
473 /*
474  * Return reference to CGI argument 'name', or NULL if not matching.
475  */
476 Octstr *http_cgi_variable(List *list, char *name);
477 
478 /*
479  * Return METHOD used by client
480  */
481 int http_method(HTTPClient *client);
482 
483 /*
484  * Return URL used by client
485  */
486 Octstr *http_request_url(HTTPClient *client);
487 
488 
489 /***********************************************************************
490  * HTTP header interface.
491  */
492 
493 
494 /*
495  * Functions for manipulating a list of headers. You can use a list of
496  * headers returned by one of the functions above, or create an empty
497  * list with http_create_empty_headers. Use http_destroy_headers to
498  * destroy a list of headers (not just the list, but the headers
499  * themselves). You can also use http_parse_header_string to create a list:
500  * it takes a textual representation of headers as an Octstr and returns
501  * the corresponding List. http_generate_header_string goes the other
502  * way.
503  *
504  * Once you have a list of headers, you can use http_header_add and the
505  * other functions to manipulate it.
506  */
507 List *http_create_empty_headers(void);
508 void http_destroy_headers(List *headers);
509 void http_header_add(List *headers, char *name, char *contents);
510 void http_header_get(List *headers, long i, Octstr **name, Octstr **value);
511 List *http_header_duplicate(List *headers);
512 void http_header_pack(List *headers);
513 void http_append_headers(List *to, List *from);
514 Octstr *http_header_value(List *headers, Octstr *header);
515 
516 
517 /*
518  * Append all headers from new_headers to old_headers.  Headers from
519  * new_headers _replace_ the ones in old_headers if they have the same
520  * name.  For example, if you have:
521  * old_headers
522  *    Accept: text/html
523  *    Accept: text/plain
524  *    Accept: image/jpeg
525  *    Accept-Language: en
526  * new_headers
527  *    Accept: text/html
528  *    Accept: text/plain
529  * then after the operation, old_headers will have
530  *    Accept-Language: en
531  *    Accept: text/html
532  *    Accept: text/plain
533  */
534 void http_header_combine(List *old_headers, List *new_headers);
535 
536 /*
537  * Return the length of the quoted-string (a HTTP field element)
538  * starting at position pos in the header.  Return -1 if there
539  * is no quoted-string at that position.
540  */
541 long http_header_quoted_string_len(Octstr *header, long pos);
542 
543 
544 /*
545  * Take the value part of a header that has a format that allows
546  * multiple comma-separated elements, and split it into a list of
547  * those elements.  Note that the function may have surprising
548  * results for values of headers that are not in this format.
549  */
550 List *http_header_split_value(Octstr *value);
551 
552 
553 /*
554  * The same as http_header_split_value, except that it splits
555  * headers containing 'credentials' or 'challenge' lists, which
556  * have a slightly different format.  It also normalizes the list
557  * elements, so that parameters are introduced with ';'.
558  */
559 List *http_header_split_auth_value(Octstr *value);
560 
561 
562 /*
563  * Remove all headers with name 'name' from the list.  Return the
564  * number of headers removed.
565  */
566 long http_header_remove_all(List *headers, char *name);
567 
568 
569 /*
570  * Remove the hop-by-hop headers from a header list.  These are the
571  * headers that describe a specific connection, not anything about
572  * the content.  RFC2616 section 13.5.1 defines these.
573  */
574 void http_remove_hop_headers(List *headers);
575 
576 
577 /*
578  * Update the headers to reflect that a transformation has been
579  * applied to the entity body.
580  */
581 void http_header_mark_transformation(List *headers, Octstr *new_body,
582     	    	    	    	     Octstr *new_type);
583 
584 
585 /*
586  * Find the first header called `name' in `headers'. Returns its contents
587  * as a new Octet string, which the caller must free. Return NULL for
588  * not found.
589  */
590 Octstr *http_header_find_first_real(List *headers, char *name,
591                                     const char *file, long line, const char *func);
592 #define http_header_find_first(headers, name) \
593     gw_claim_area(http_header_find_first_real((headers), (name), __FILE__, __LINE__, __func__))
594 List *http_header_find_all(List *headers, char *name);
595 
596 
597 /*
598  * Find the Content-Type header and returns the type and charset.
599  */
600 void http_header_get_content_type(List *headers, Octstr **type,
601 	Octstr **charset);
602 
603 
604 /*
605  * Check if a specific mime-type can be handled by a client. This is
606  * indicated via 'Accept' headers. Returns 1 if the mime-type is acceptable,
607  * otherwise 0.
608  */
609 int http_type_accepted(List *headers, char *type);
610 
611 
612 /*
613  * Dump the contents of a header list with debug.
614  */
615 void http_header_dump(List *headers);
616 
617 /*
618  * Ditto with cgi variables. Do not panic, when an empty are found from the
619  * list.
620  */
621 void http_cgivar_dump(List *cgiargs);
622 
623 /*
624  * As above function except that dump appended to Octstr.
625  */
626 void http_cgivar_dump_into(List *cgiargs, Octstr *os);
627 
628 /*
629  * Check if the passed charset is in the 'Accept-Charset' header list
630  * alues of the client. Returns 1 if the charset is acceptable, otherwise 0.
631  */
632 int http_charset_accepted(List *headers, char *charset);
633 
634 
635 /*
636  * Add Basic Authentication headers headers.
637  */
638 void http_add_basic_auth(List *headers, Octstr *username, Octstr *password);
639 
640 
641 /*
642  * Many HTTP field elements can take parameters in a standardized
643  * form: parameters appear after the main value, each is introduced
644  * by a semicolon (;), and consists of a key=value pair or just
645  * a key, where the key is a token and the value is either a token
646  * or a quoted-string.
647  * The main value itself is a series of tokens, separators, and
648  * quoted-strings.
649  *
650  * This function will take such a field element, and look for the
651  * value of a specific key, which is then returned. If the key
652  * is not found within the header value NULL is returned.
653  *
654  * BEWARE: value is *only* the header value, not the whole header with
655  * field name.
656  *
657  * Example:
658  *    * assume to have "Content-Type: application/xml; charset=UTF-8"
659  *    * within List *headers
660  *   value = http_header_value(headers, octstr_imm("Content-Type"))
661  *   val = http_get_header_parameter(value, octstr_imm("charset"));
662  * will return "UTF-8" to lvalue.
663  */
664 Octstr *http_get_header_parameter(Octstr *value, Octstr *parameter);
665 
666 
667 /*
668  * Return the general class of a status code.  For example, all
669  * 2xx codes are HTTP_STATUS_SUCCESSFUL.  See the list at the top
670  * of this file.
671  */
672 int http_status_class(int code);
673 
674 
675 /*
676  * Return the HTTP_METHOD_xxx enum code for a Octstr containing
677  * the HTTP method name.
678  */
679 int http_name2method(Octstr *method);
680 
681 
682 /*
683  * Return the char containing the HTTP method name.
684  */
685 char *http_method2name(int method);
686 
687 #endif
688