1 #ifndef CONNECT___HTTP_CONNECTOR__H
2 #define CONNECT___HTTP_CONNECTOR__H
3 
4 /* $Id: ncbi_http_connector.h 598971 2019-12-17 18:49:26Z lavr $
5  * ===========================================================================
6  *
7  *                            PUBLIC DOMAIN NOTICE
8  *               National Center for Biotechnology Information
9  *
10  *  This software/database is a "United States Government Work" under the
11  *  terms of the United States Copyright Act.  It was written as part of
12  *  the author's official duties as a United States Government employee and
13  *  thus cannot be copyrighted.  This software/database is freely available
14  *  to the public for use. The National Library of Medicine and the U.S.
15  *  Government have not placed any restriction on its use or reproduction.
16  *
17  *  Although all reasonable efforts have been taken to ensure the accuracy
18  *  and reliability of the software and data, the NLM and the U.S.
19  *  Government do not and cannot warrant the performance or results that
20  *  may be obtained by using this software or data. The NLM and the U.S.
21  *  Government disclaim all warranties, express or implied, including
22  *  warranties of performance, merchantability or fitness for any particular
23  *  purpose.
24  *
25  *  Please cite the author in any work or product based on this material.
26  *
27  * ===========================================================================
28  *
29  * Author:  Denis Vakatov
30  *
31  * File Description:
32  *   Implement CONNECTOR for the HTTP-based network connection
33  *
34  *   See in "ncbi_connector.h" for the detailed specification of the underlying
35  *   connector ("CONNECTOR", "SConnectorTag") methods and structures.
36  *
37  */
38 
39 #include <connect/ncbi_connutil.h>
40 
41 #ifndef NCBI_DEPRECATED
42 #  define NCBI_HTTP_CONNECTOR_DEPRECATED
43 #else
44 #  define NCBI_HTTP_CONNECTOR_DEPRECATED NCBI_DEPRECATED
45 #endif
46 
47 
48 /** @addtogroup Connectors
49  *
50  * @{
51  */
52 
53 
54 #ifdef __cplusplus
55 extern "C" {
56 #endif
57 
58 
59 /** HTTP connector flags.
60  *
61  * @var fHTTP_Flushable
62  *
63  *    HTTP/1.0 or when fHTTP_WriteThru is not set:
64  *       by default all data written to the connection are kept until read
65  *       begins (even though CONN_Flush() might have been called in between the
66  *       writes);  with this flag set, CONN_Flush() will result the data to be
67  *       actually sent to the server side, so the following write will form a
68  *       new request, and not get added to the previous one;  also this flag
69  *       assures that the connector sends at least an HTTP header on "CLOSE"
70  *       and re-"CONNECT", even if no data for HTTP body have been written.
71  *
72  *    HTTP/1.1 and when fHTTP_WriteThru is set:
73  *       CONN_Flush() attempts to send all pending data down to server.
74  *
75  * @var fHTTP_KeepHeader
76  *       Do not strip HTTP header (i.e. everything up to the first "\r\n\r\n",
77  *       including the "\r\n\r\n") from the incomning HTTP response (including
78  *       any server error, which then is made available for reading as well).
79  *       *NOTE* this flag disables automatic authorization and redirection.
80  *
81  * @var fHCC_UrlDecodeInput
82  *       Assume the response body as single-part, URL-encoded;  perform the
83  *       URL-decoding on read, and deliver decoded data to the user.  Obsolete!
84  *
85  * @var fHTTP_PushAuth
86  *       Present credentials to the server if they are set in the connection
87  *       parameters when sending 1st request.  Normally, the credentials are
88  *       only presented on a retry when the server rejects the initial request
89  *       with 401 / 407.  This saves a hit, but is only honored with HTTP/1.1.
90  *
91  * @var fHTTP_WriteThru
92  *       Valid only with HTTP/1.1:  Connection to the server is made upon a
93  *       first CONN_Write(), or CONN_Flush() if fHTTP_Flushable is set, or
94  *       CONN_Wait(eIO_Write), and each CONN_Write() forms a chunk of HTTP
95  *       data to be sent to the server.  Reading / waiting for read from the
96  *       connector finalizes the body and, if reading, fetches the response.
97  *
98  * @var fHTTP_NoUpread
99  *       Do *not* do internal reading into temporary buffer while sending data
100  *       to HTTP server;  by default any send operation tries to fetch data as
101  *       they are coming back from the server in order to prevent stalling due
102  *       to data clogging the connection.
103  *
104  * @var fHTTP_DropUnread
105  *       Do not collect incoming data in "Read" mode before switching into
106  *       "Write" mode for preparing next request;  by default all data sent by
107  *       the server get stored even if not all of it have been requested prior
108  *       to a "Write" that followed data reading (stream emulation).
109  *
110  * @var fHTTP_NoAutoRetry
111  *       Do not attempt any auto-retries in case of failing connections
112  *       (this flag effectively overrides SConnNetInfo::max_try with 1).
113  *
114  * @var fHTTP_UnsafeRedirects
115  *       For security reasons the following redirects comprise security risk,
116  *       and thus, are prohibited:  switching from https to http, and/or
117  *       re-POSTing data (regardless of the transport, either http or https);
118  *       this flag allows such redirects (when encountered) to be honored.
119  *
120  * @note
121  *  URL encoding/decoding (in the "fHCC_Url*" cases and "net_info->args")
122  *  is performed by URL_Encode() and URL_Decode() -- see "ncbi_connutil.[ch]".
123  *
124  * @sa
125  *  SConnNetInfo, ConnNetInfo_OverrideUserHeader, URL_Encode, URL_Decode
126  */
127 enum EHTTP_Flag {
128     fHTTP_AutoReconnect   = 0x1,  /**< See HTTP_CreateConnectorEx()          */
129     fHTTP_Flushable       = 0x2,  /**< Connector will really flush on Flush()*/
130     fHTTP_KeepHeader      = 0x4,  /**< Keep HTTP header (see limitations)    */
131   /*fHCC_UrlEncodeArgs    = 0x8,       URL-encode "info->args" (w/o fragment)*/
132   /*fHCC_UrlDecodeInput   = 0x10,      URL-decode response body              */
133   /*fHCC_UrlEncodeOutput  = 0x20,      URL-encode all output data            */
134   /*fHCC_UrlCodec         = 0x30,      fHTTP_UrlDecodeInput | ...EncodeOutput*/
135     fHTTP_PushAuth        = 0x10, /**< HTTP/1.1 pushes out auth if present   */
136     fHTTP_WriteThru       = 0x20, /**< HTTP/1.1 writes through (chunked)     */
137     fHTTP_NoUpread        = 0x40, /**< Do not use SOCK_SetReadOnWrite()      */
138     fHTTP_DropUnread      = 0x80, /**< Each microsession drops unread data   */
139     fHTTP_NoAutoRetry     = 0x100,/**< No auto-retries allowed               */
140     fHTTP_NoAutomagicSID  = 0x200,/**< Do not add NCBI SID automagically     */
141     fHTTP_UnsafeRedirects = 0x400,/**< Any redirect will be honored          */
142     fHTTP_AdjustOnRedirect= 0x800,/**< Call adjust routine for redirects, too*/
143     fHTTP_SuppressMessages= 0x1000/**< Most annoying ones reduced to traces  */
144 };
145 typedef unsigned int THTTP_Flags; /**< Bitwise OR of EHTTP_Flag              */
146 NCBI_HTTP_CONNECTOR_DEPRECATED
147 /** DEPRECATED, do not use! */
148 typedef enum {
149   /*fHCC_AutoReconnect    = fHTTP_AutoReconnect,                             */
150   /*fHCC_Flushable        = fHTTP_Flushable,                                 */
151   /*fHCC_SureFlush        = fHTTP_Flushable,                                 */
152   /*fHCC_KeepHeader       = fHTTP_KeepHeader,                                */
153     fHCC_UrlEncodeArgs    = 0x8,  /**< NB: Error-prone semantics, do not use!*/
154     fHCC_UrlDecodeInput   = 0x10, /**< Obsolete, may not work, do not use!   */
155     fHCC_UrlEncodeOutput  = 0x20, /**< Obsolete, may not work, do not use!   */
156     fHCC_UrlCodec         = 0x30  /**< fHCC_UrlDecodeInput | ...EncodeOutput */
157   /*fHCC_NoUpread         = fHTTP_NoUpread,                                  */
158   /*fHCC_DropUnread       = fHTTP_DropUnread,                                */
159   /*fHCC_NoAutoRetry      = fHTTP_NoAutoRetry                                */
160 } EHCC_Flag;
161 NCBI_HTTP_CONNECTOR_DEPRECATED
162 typedef unsigned int THCC_Flags;  /**< bitwise OR of EHCC_Flag, deprecated   */
163 
164 
165 /** Same as HTTP_CreateConnector(net_info, flags, 0, 0, 0, 0)
166  * with the passed "user_header" overriding the value provided in
167  * "net_info->http_user_header".
168  * @sa
169  *  HTTP_CreateConnectorEx, ConnNetInfo_OverrideUserHeader
170  */
171 extern NCBI_XCONNECT_EXPORT CONNECTOR HTTP_CreateConnector
172 (const SConnNetInfo* net_info,
173  const char*         user_header,
174  THTTP_Flags         flags
175  );
176 
177 
178 /** The extended version HTTP_CreateConnectorEx() is able to track the HTTP
179  * response chain and also change the URL of the server "on-the-fly":
180  * - FHTTP_ParseHeader() gets called every time a new HTTP response header is
181  *   received from the server, and only if fHTTP_KeepHeader is NOT set.
182  *   Return code from the parser adjusts the existing server error condition
183  *   (if any) as the following:
184  *
185  *   + eHTTP_HeaderError:    unconditionally flag a server error;
186  *   + eHTTP_HeaderSuccess:  header parse successful, retain existing condition
187  *                           (note that in case of an already existing server
188  *                           error condition the response body can be logged
189  *                           but will not be made available for the user code
190  *                           to read, and eIO_Unknown will result on read);
191  *   + eHTTP_HeaderContinue: if there was already a server error condition,
192  *                           the response body will be made available for the
193  *                           user code to read (but only if HTTP connector
194  *                           cannot post-process the request such as for
195  *                           redirects, authorization etc);  otherwise, this
196  *                           code has the same effect as eHTTP_HeaderSuccess;
197  *   + eHTTP_HeaderComplete: flag this request as processed completely, and do
198  *                           not do any post-processing (such as redirects,
199  *                           authorization etc), yet make the response body (if
200  *                           any, and regardless of whether there was a server
201  *                           error or not) available for reading.
202  *
203  * - FHTTP_Adjust() gets invoked every time before starting a new "HTTP
204  *   micro-session" to make a hit when a previous hit has failed;  it is passed
205  *   "net_info" as stored within the connector, and the number of previously
206  *   unsuccessful consecutive attempts (in the least significant word) since
207  *   the connector was opened;  it is passed 0 in that parameter if calling for
208  *   a redirect (when fHTTP_AdjustOnRedirect was set).  A zero (false) return
209  *   value ends the retries;  a non-zero continues with the request:  an
210  *   advisory value of greater than 0 means an adjustment was made, and a
211  *   negative value indicates no changes.
212  *   This very same callback is also invoked when a new request is about to be
213  *   made for solicitaiton of new URL for the hit -- in this case return 1 if
214  *   the SConnNetInfo was updated with a new parameters;  or -1 of no changes
215  *   were made;  or 0 to stop the request with an error.
216  *
217  * - FHTTP_Cleanup() gets called when the connector is about to be destroyed;
218  *   "user_data" is guaranteed not to be referenced anymore (so this is a good
219  *   place to clean up "user_data" if necessary).
220  *
221  * @sa
222  *   SConnNetInfo::max_try
223  */
224 typedef enum {
225     eHTTP_HeaderError    = 0,  /**< Parse failed, treat as a server error */
226     eHTTP_HeaderSuccess  = 1,  /**< Parse succeeded, retain server status */
227     eHTTP_HeaderContinue = 2,  /**< Parse succeeded, continue with body   */
228     eHTTP_HeaderComplete = 3   /**< Parse succeeded, no more processing   */
229 } EHTTP_HeaderParse;
230 typedef EHTTP_HeaderParse (*FHTTP_ParseHeader)
231 (const char*         http_header,   /**< HTTP header to parse                */
232  void*               user_data,     /**< supplemental user data              */
233  int                 server_error   /**< != 0 if HTTP error (NOT 2xx code)   */
234  );
235 
236 
237 /* Called with failure_count == 0 for redirects; and with failure_count == -1
238  * for a new URL before starting new successive request(s).  Return value 0
239  * means an error, and stops processing;  return value 1 means changes were
240  * made, and request should proceed;  and return value -1 means no changes.
241  */
242 typedef int/*bool*/ (*FHTTP_Adjust)
243 (SConnNetInfo*       net_info,      /**< net_info to adjust (in place)       */
244  void*               user_data,     /**< supplemental user data              */
245  unsigned int        failure_count  /**< low word: # of failures since open  */
246  );
247 
248 typedef void        (*FHTTP_Cleanup)
249 (void*               user_data      /**< supplemental user data              */
250  );
251 
252 
253 /** Create new CONNECTOR structure to hit the specified URL using HTTP with
254  * either POST / GET (or ANY) method.  Use the configuration values stored in
255  * "net_info".  If "net_info" is NULL, then use the default info as created by
256  * ConnNetInfo_Create(0).
257  *
258  * If "net_info" does not explicitly specify an HTTP request method (i.e. it
259  * has it as "eReqMethod_Any"), then the actual method sent to the HTTP server
260  * depends on whether any data has been written to the connection with
261  * CONN_Write():  the presence of pending data will cause a POST request (with
262  * a "Content-Length:" tag supplied automatically and reflecting the total
263  * pending data size), and GET request method will result in the absence of any
264  * data.  An explicit value for the request method will cause the specified
265  * request to be used regardless of pending data, and will flag an error if any
266  * data will have to be sent with a GET (per the standard).
267  *
268  * When not using HTTP/1.1's fHTTP_WriteThru mode, in order to work around
269  * some HTTP communication features, this code does:
270  *
271  *  1. Accumulate all output data in an internal memory buffer until the
272  *     first CONN_Read() (including peek) or CONN_Wait(on read) is attempted
273  *     (also see fHTTP_Flushable flag below).
274  *  2. On the first CONN_Read() or CONN_Wait(on read), compose and send the
275  *     whole HTTP request as:
276  *        @verbatim
277  *        METHOD <net_info->path>?<net_info->args> HTTP/1.0\r\n
278  *        <user_header\r\n>
279  *        Content-Length: <accumulated_data_length>\r\n
280  *        \r\n
281  *        <accumulated_data>
282  *        @endverbatim
283  *     @note
284  *       If <user_header> is neither a NULL pointer nor an empty string, then:
285  *       - it must NOT contain any "empty lines":  "\r\n\r\n";
286  *       - multiple tags must be separated by "\r\n" (*not* just "\n");
287  *       - it should be terminated by a single "\r\n" (will be added, if not);
288  *       - it gets inserted to the HTTP header "as is", without any automatic
289  *         checking and / or encoding (except for the trailing "\r\n");
290  *       - the "user_header" specified in the arguments overrides any user
291  *         header that can be provided via the "net_info" argument, see
292  *         ConnNetInfo_OverrideUserHeader() from <connect/ncbi_connutil.h>.
293  *     @note
294  *       Data may depart to the server side earlier if CONN_Flush()'ed in a
295  *       fHTTP_Flushable connector, see "flags".
296  *  3. After the request has been sent, then the response data from the peer
297  *     (usually, a CGI program) can be actually read out.
298  *  4. On a CONN_Write() operation, which follows data reading, the connection
299  *     to the peer is read out until EOF (the data stored internally) then
300  *     forcedly closed (the peer CGI process will presumably die if it has not
301  *     done so yet on its own), and data to be written again get stored in the
302  *     buffer until next "Read" etc, see item 1).  The subsequent read will
303  *     first see the leftovers (if any) of data stored previously, then the
304  *     new data generated in response to the latest request.  The behavior can
305  *     be changed by the fHTTP_DropUnread flag.
306  *
307  *  When fHTTP_WriteThru is set with HTTP/1.1, writing to the connector begins
308  *  upon any write operations, and reading from the connector causes the
309  *  request body to finalize and response to be fetched from the server.
310  *  Request method must be explicitly specified with fHTTP_WriteThru, "ANY"
311  *  does not get accepted (the eIO_NotSupported error returned).
312  *
313  *  @note
314  *     If "fHTTP_AutoReconnect" is set in "flags", then the connector makes an
315  *     automatic reconnect to the same URL with just the same parameters for
316  *     each micro-session steps (1,2,3) repeated.
317  *  @note
318  *     If "fHTTP_AutoReconnect" is not set then only a single
319  *     "Write ... Write Read ... Read" micro-session is allowed, and any
320  *     following write attempt fails with "eIO_Closed".
321  *
322  * @sa
323  *  EHTTP_Flag
324  */
325 extern NCBI_XCONNECT_EXPORT CONNECTOR HTTP_CreateConnectorEx
326 (const SConnNetInfo* net_info,
327  THTTP_Flags         flags,
328  FHTTP_ParseHeader   parse_header,  /**< may be NULL, then no addtl. parsing */
329  void*               user_data,     /**< user data for HTTP CBs (callbacks)  */
330  FHTTP_Adjust        adjust,        /**< may be NULL                         */
331  FHTTP_Cleanup       cleanup        /**< may be NULL                         */
332  );
333 
334 
335 /** Create a tunnel to "net_info->host:net_info->port" via an HTTP proxy server
336  * located at "net_info->http_proxy_host:net_info->http_proxy_port".  Return
337  * the tunnel as a socket via the last parameter.  For compatibility with
338  * future API extensions, please make sure *sock is NULL when making the call.
339  * The proxy gets contacted via HTTPS if "net_info->scheme == eURL_Https", and
340  * the resultant socket is returned secure (with the SSL session still active).
341  * @note
342  *  "net_info" can be passed as NULL to be constructed from the environment.
343  * @note
344  *  "sock" parameter must be non-NULL but must point to a NULL SOCK (checked!).
345  * @note
346  *  Some HTTP proxies do not process "data" correctly (e.g. Squid 3) when sent
347  *  along with the tunnel creation request (despite the standard specifically
348  *  allows such use), so they may require separate SOCK I/O calls to write the
349  *  data to the tunnel.
350  * @return
351  *  eIO_Success if the tunnel has been successfully created;
352  *  otherwise, return an error code and set "*sock" to NULL upon return.
353  * @sa
354  *  THTTP_Flags, SOCK_CreateEx, SOCK_Close
355  */
356 extern NCBI_XCONNECT_EXPORT EIO_Status HTTP_CreateTunnelEx
357 (const SConnNetInfo* net_info,
358  THTTP_Flags         flags,
359  const void*         init_data,  /**< initial data block to send via tunnel  */
360  size_t              init_size,  /**< size of the initial data block         */
361  void*               user_data,  /**< user data for the adjust callback      */
362  FHTTP_Adjust        adjust,     /**< adjust callback, may be NULL           */
363  SOCK*               sock        /**< return socket; must be non-NULL        */
364  );
365 
366 
367 /** Same as HTTP_CreateTunnelEx(net_info, flags, 0, 0, 0, 0, sock) */
368 extern NCBI_XCONNECT_EXPORT EIO_Status HTTP_CreateTunnel
369 (const SConnNetInfo* net_info,
370  THTTP_Flags         flags,
371  SOCK*               sock
372  );
373 
374 
375 typedef void (*FHTTP_NcbiMessageHook)(const char* message);
376 
377 /** Set a message hook procedure for messages originating from NCBI via HTTP.
378  *  Any hook will be called no more than once.  Until no hook is installed,
379  *  and exactly one message is caught, a critical error will be generated in
380  *  the standard log file upon acceptance of every message.  *Not MT-safe*.
381  */
382 extern NCBI_XCONNECT_EXPORT void HTTP_SetNcbiMessageHook
383 (FHTTP_NcbiMessageHook  /**< New hook to be installed, NULL to reset */
384  );
385 
386 
387 #ifdef __cplusplus
388 }  /* extern "C" */
389 #endif
390 
391 
392 /* @} */
393 
394 #endif /* CONNECT___HTTP_CONNECTOR__H */
395