1 #ifndef CONNECT___HTTP_CONNECTOR__H 2 #define CONNECT___HTTP_CONNECTOR__H 3 4 /* $Id: ncbi_http_connector.h 598971 2019-12-17 18:49:26Z lavr $ 5 * =========================================================================== 6 * 7 * PUBLIC DOMAIN NOTICE 8 * National Center for Biotechnology Information 9 * 10 * This software/database is a "United States Government Work" under the 11 * terms of the United States Copyright Act. It was written as part of 12 * the author's official duties as a United States Government employee and 13 * thus cannot be copyrighted. This software/database is freely available 14 * to the public for use. The National Library of Medicine and the U.S. 15 * Government have not placed any restriction on its use or reproduction. 16 * 17 * Although all reasonable efforts have been taken to ensure the accuracy 18 * and reliability of the software and data, the NLM and the U.S. 19 * Government do not and cannot warrant the performance or results that 20 * may be obtained by using this software or data. The NLM and the U.S. 21 * Government disclaim all warranties, express or implied, including 22 * warranties of performance, merchantability or fitness for any particular 23 * purpose. 24 * 25 * Please cite the author in any work or product based on this material. 26 * 27 * =========================================================================== 28 * 29 * Author: Denis Vakatov 30 * 31 * File Description: 32 * Implement CONNECTOR for the HTTP-based network connection 33 * 34 * See in "ncbi_connector.h" for the detailed specification of the underlying 35 * connector ("CONNECTOR", "SConnectorTag") methods and structures. 36 * 37 */ 38 39 #include <connect/ncbi_connutil.h> 40 41 #ifndef NCBI_DEPRECATED 42 # define NCBI_HTTP_CONNECTOR_DEPRECATED 43 #else 44 # define NCBI_HTTP_CONNECTOR_DEPRECATED NCBI_DEPRECATED 45 #endif 46 47 48 /** @addtogroup Connectors 49 * 50 * @{ 51 */ 52 53 54 #ifdef __cplusplus 55 extern "C" { 56 #endif 57 58 59 /** HTTP connector flags. 60 * 61 * @var fHTTP_Flushable 62 * 63 * HTTP/1.0 or when fHTTP_WriteThru is not set: 64 * by default all data written to the connection are kept until read 65 * begins (even though CONN_Flush() might have been called in between the 66 * writes); with this flag set, CONN_Flush() will result the data to be 67 * actually sent to the server side, so the following write will form a 68 * new request, and not get added to the previous one; also this flag 69 * assures that the connector sends at least an HTTP header on "CLOSE" 70 * and re-"CONNECT", even if no data for HTTP body have been written. 71 * 72 * HTTP/1.1 and when fHTTP_WriteThru is set: 73 * CONN_Flush() attempts to send all pending data down to server. 74 * 75 * @var fHTTP_KeepHeader 76 * Do not strip HTTP header (i.e. everything up to the first "\r\n\r\n", 77 * including the "\r\n\r\n") from the incomning HTTP response (including 78 * any server error, which then is made available for reading as well). 79 * *NOTE* this flag disables automatic authorization and redirection. 80 * 81 * @var fHCC_UrlDecodeInput 82 * Assume the response body as single-part, URL-encoded; perform the 83 * URL-decoding on read, and deliver decoded data to the user. Obsolete! 84 * 85 * @var fHTTP_PushAuth 86 * Present credentials to the server if they are set in the connection 87 * parameters when sending 1st request. Normally, the credentials are 88 * only presented on a retry when the server rejects the initial request 89 * with 401 / 407. This saves a hit, but is only honored with HTTP/1.1. 90 * 91 * @var fHTTP_WriteThru 92 * Valid only with HTTP/1.1: Connection to the server is made upon a 93 * first CONN_Write(), or CONN_Flush() if fHTTP_Flushable is set, or 94 * CONN_Wait(eIO_Write), and each CONN_Write() forms a chunk of HTTP 95 * data to be sent to the server. Reading / waiting for read from the 96 * connector finalizes the body and, if reading, fetches the response. 97 * 98 * @var fHTTP_NoUpread 99 * Do *not* do internal reading into temporary buffer while sending data 100 * to HTTP server; by default any send operation tries to fetch data as 101 * they are coming back from the server in order to prevent stalling due 102 * to data clogging the connection. 103 * 104 * @var fHTTP_DropUnread 105 * Do not collect incoming data in "Read" mode before switching into 106 * "Write" mode for preparing next request; by default all data sent by 107 * the server get stored even if not all of it have been requested prior 108 * to a "Write" that followed data reading (stream emulation). 109 * 110 * @var fHTTP_NoAutoRetry 111 * Do not attempt any auto-retries in case of failing connections 112 * (this flag effectively overrides SConnNetInfo::max_try with 1). 113 * 114 * @var fHTTP_UnsafeRedirects 115 * For security reasons the following redirects comprise security risk, 116 * and thus, are prohibited: switching from https to http, and/or 117 * re-POSTing data (regardless of the transport, either http or https); 118 * this flag allows such redirects (when encountered) to be honored. 119 * 120 * @note 121 * URL encoding/decoding (in the "fHCC_Url*" cases and "net_info->args") 122 * is performed by URL_Encode() and URL_Decode() -- see "ncbi_connutil.[ch]". 123 * 124 * @sa 125 * SConnNetInfo, ConnNetInfo_OverrideUserHeader, URL_Encode, URL_Decode 126 */ 127 enum EHTTP_Flag { 128 fHTTP_AutoReconnect = 0x1, /**< See HTTP_CreateConnectorEx() */ 129 fHTTP_Flushable = 0x2, /**< Connector will really flush on Flush()*/ 130 fHTTP_KeepHeader = 0x4, /**< Keep HTTP header (see limitations) */ 131 /*fHCC_UrlEncodeArgs = 0x8, URL-encode "info->args" (w/o fragment)*/ 132 /*fHCC_UrlDecodeInput = 0x10, URL-decode response body */ 133 /*fHCC_UrlEncodeOutput = 0x20, URL-encode all output data */ 134 /*fHCC_UrlCodec = 0x30, fHTTP_UrlDecodeInput | ...EncodeOutput*/ 135 fHTTP_PushAuth = 0x10, /**< HTTP/1.1 pushes out auth if present */ 136 fHTTP_WriteThru = 0x20, /**< HTTP/1.1 writes through (chunked) */ 137 fHTTP_NoUpread = 0x40, /**< Do not use SOCK_SetReadOnWrite() */ 138 fHTTP_DropUnread = 0x80, /**< Each microsession drops unread data */ 139 fHTTP_NoAutoRetry = 0x100,/**< No auto-retries allowed */ 140 fHTTP_NoAutomagicSID = 0x200,/**< Do not add NCBI SID automagically */ 141 fHTTP_UnsafeRedirects = 0x400,/**< Any redirect will be honored */ 142 fHTTP_AdjustOnRedirect= 0x800,/**< Call adjust routine for redirects, too*/ 143 fHTTP_SuppressMessages= 0x1000/**< Most annoying ones reduced to traces */ 144 }; 145 typedef unsigned int THTTP_Flags; /**< Bitwise OR of EHTTP_Flag */ 146 NCBI_HTTP_CONNECTOR_DEPRECATED 147 /** DEPRECATED, do not use! */ 148 typedef enum { 149 /*fHCC_AutoReconnect = fHTTP_AutoReconnect, */ 150 /*fHCC_Flushable = fHTTP_Flushable, */ 151 /*fHCC_SureFlush = fHTTP_Flushable, */ 152 /*fHCC_KeepHeader = fHTTP_KeepHeader, */ 153 fHCC_UrlEncodeArgs = 0x8, /**< NB: Error-prone semantics, do not use!*/ 154 fHCC_UrlDecodeInput = 0x10, /**< Obsolete, may not work, do not use! */ 155 fHCC_UrlEncodeOutput = 0x20, /**< Obsolete, may not work, do not use! */ 156 fHCC_UrlCodec = 0x30 /**< fHCC_UrlDecodeInput | ...EncodeOutput */ 157 /*fHCC_NoUpread = fHTTP_NoUpread, */ 158 /*fHCC_DropUnread = fHTTP_DropUnread, */ 159 /*fHCC_NoAutoRetry = fHTTP_NoAutoRetry */ 160 } EHCC_Flag; 161 NCBI_HTTP_CONNECTOR_DEPRECATED 162 typedef unsigned int THCC_Flags; /**< bitwise OR of EHCC_Flag, deprecated */ 163 164 165 /** Same as HTTP_CreateConnector(net_info, flags, 0, 0, 0, 0) 166 * with the passed "user_header" overriding the value provided in 167 * "net_info->http_user_header". 168 * @sa 169 * HTTP_CreateConnectorEx, ConnNetInfo_OverrideUserHeader 170 */ 171 extern NCBI_XCONNECT_EXPORT CONNECTOR HTTP_CreateConnector 172 (const SConnNetInfo* net_info, 173 const char* user_header, 174 THTTP_Flags flags 175 ); 176 177 178 /** The extended version HTTP_CreateConnectorEx() is able to track the HTTP 179 * response chain and also change the URL of the server "on-the-fly": 180 * - FHTTP_ParseHeader() gets called every time a new HTTP response header is 181 * received from the server, and only if fHTTP_KeepHeader is NOT set. 182 * Return code from the parser adjusts the existing server error condition 183 * (if any) as the following: 184 * 185 * + eHTTP_HeaderError: unconditionally flag a server error; 186 * + eHTTP_HeaderSuccess: header parse successful, retain existing condition 187 * (note that in case of an already existing server 188 * error condition the response body can be logged 189 * but will not be made available for the user code 190 * to read, and eIO_Unknown will result on read); 191 * + eHTTP_HeaderContinue: if there was already a server error condition, 192 * the response body will be made available for the 193 * user code to read (but only if HTTP connector 194 * cannot post-process the request such as for 195 * redirects, authorization etc); otherwise, this 196 * code has the same effect as eHTTP_HeaderSuccess; 197 * + eHTTP_HeaderComplete: flag this request as processed completely, and do 198 * not do any post-processing (such as redirects, 199 * authorization etc), yet make the response body (if 200 * any, and regardless of whether there was a server 201 * error or not) available for reading. 202 * 203 * - FHTTP_Adjust() gets invoked every time before starting a new "HTTP 204 * micro-session" to make a hit when a previous hit has failed; it is passed 205 * "net_info" as stored within the connector, and the number of previously 206 * unsuccessful consecutive attempts (in the least significant word) since 207 * the connector was opened; it is passed 0 in that parameter if calling for 208 * a redirect (when fHTTP_AdjustOnRedirect was set). A zero (false) return 209 * value ends the retries; a non-zero continues with the request: an 210 * advisory value of greater than 0 means an adjustment was made, and a 211 * negative value indicates no changes. 212 * This very same callback is also invoked when a new request is about to be 213 * made for solicitaiton of new URL for the hit -- in this case return 1 if 214 * the SConnNetInfo was updated with a new parameters; or -1 of no changes 215 * were made; or 0 to stop the request with an error. 216 * 217 * - FHTTP_Cleanup() gets called when the connector is about to be destroyed; 218 * "user_data" is guaranteed not to be referenced anymore (so this is a good 219 * place to clean up "user_data" if necessary). 220 * 221 * @sa 222 * SConnNetInfo::max_try 223 */ 224 typedef enum { 225 eHTTP_HeaderError = 0, /**< Parse failed, treat as a server error */ 226 eHTTP_HeaderSuccess = 1, /**< Parse succeeded, retain server status */ 227 eHTTP_HeaderContinue = 2, /**< Parse succeeded, continue with body */ 228 eHTTP_HeaderComplete = 3 /**< Parse succeeded, no more processing */ 229 } EHTTP_HeaderParse; 230 typedef EHTTP_HeaderParse (*FHTTP_ParseHeader) 231 (const char* http_header, /**< HTTP header to parse */ 232 void* user_data, /**< supplemental user data */ 233 int server_error /**< != 0 if HTTP error (NOT 2xx code) */ 234 ); 235 236 237 /* Called with failure_count == 0 for redirects; and with failure_count == -1 238 * for a new URL before starting new successive request(s). Return value 0 239 * means an error, and stops processing; return value 1 means changes were 240 * made, and request should proceed; and return value -1 means no changes. 241 */ 242 typedef int/*bool*/ (*FHTTP_Adjust) 243 (SConnNetInfo* net_info, /**< net_info to adjust (in place) */ 244 void* user_data, /**< supplemental user data */ 245 unsigned int failure_count /**< low word: # of failures since open */ 246 ); 247 248 typedef void (*FHTTP_Cleanup) 249 (void* user_data /**< supplemental user data */ 250 ); 251 252 253 /** Create new CONNECTOR structure to hit the specified URL using HTTP with 254 * either POST / GET (or ANY) method. Use the configuration values stored in 255 * "net_info". If "net_info" is NULL, then use the default info as created by 256 * ConnNetInfo_Create(0). 257 * 258 * If "net_info" does not explicitly specify an HTTP request method (i.e. it 259 * has it as "eReqMethod_Any"), then the actual method sent to the HTTP server 260 * depends on whether any data has been written to the connection with 261 * CONN_Write(): the presence of pending data will cause a POST request (with 262 * a "Content-Length:" tag supplied automatically and reflecting the total 263 * pending data size), and GET request method will result in the absence of any 264 * data. An explicit value for the request method will cause the specified 265 * request to be used regardless of pending data, and will flag an error if any 266 * data will have to be sent with a GET (per the standard). 267 * 268 * When not using HTTP/1.1's fHTTP_WriteThru mode, in order to work around 269 * some HTTP communication features, this code does: 270 * 271 * 1. Accumulate all output data in an internal memory buffer until the 272 * first CONN_Read() (including peek) or CONN_Wait(on read) is attempted 273 * (also see fHTTP_Flushable flag below). 274 * 2. On the first CONN_Read() or CONN_Wait(on read), compose and send the 275 * whole HTTP request as: 276 * @verbatim 277 * METHOD <net_info->path>?<net_info->args> HTTP/1.0\r\n 278 * <user_header\r\n> 279 * Content-Length: <accumulated_data_length>\r\n 280 * \r\n 281 * <accumulated_data> 282 * @endverbatim 283 * @note 284 * If <user_header> is neither a NULL pointer nor an empty string, then: 285 * - it must NOT contain any "empty lines": "\r\n\r\n"; 286 * - multiple tags must be separated by "\r\n" (*not* just "\n"); 287 * - it should be terminated by a single "\r\n" (will be added, if not); 288 * - it gets inserted to the HTTP header "as is", without any automatic 289 * checking and / or encoding (except for the trailing "\r\n"); 290 * - the "user_header" specified in the arguments overrides any user 291 * header that can be provided via the "net_info" argument, see 292 * ConnNetInfo_OverrideUserHeader() from <connect/ncbi_connutil.h>. 293 * @note 294 * Data may depart to the server side earlier if CONN_Flush()'ed in a 295 * fHTTP_Flushable connector, see "flags". 296 * 3. After the request has been sent, then the response data from the peer 297 * (usually, a CGI program) can be actually read out. 298 * 4. On a CONN_Write() operation, which follows data reading, the connection 299 * to the peer is read out until EOF (the data stored internally) then 300 * forcedly closed (the peer CGI process will presumably die if it has not 301 * done so yet on its own), and data to be written again get stored in the 302 * buffer until next "Read" etc, see item 1). The subsequent read will 303 * first see the leftovers (if any) of data stored previously, then the 304 * new data generated in response to the latest request. The behavior can 305 * be changed by the fHTTP_DropUnread flag. 306 * 307 * When fHTTP_WriteThru is set with HTTP/1.1, writing to the connector begins 308 * upon any write operations, and reading from the connector causes the 309 * request body to finalize and response to be fetched from the server. 310 * Request method must be explicitly specified with fHTTP_WriteThru, "ANY" 311 * does not get accepted (the eIO_NotSupported error returned). 312 * 313 * @note 314 * If "fHTTP_AutoReconnect" is set in "flags", then the connector makes an 315 * automatic reconnect to the same URL with just the same parameters for 316 * each micro-session steps (1,2,3) repeated. 317 * @note 318 * If "fHTTP_AutoReconnect" is not set then only a single 319 * "Write ... Write Read ... Read" micro-session is allowed, and any 320 * following write attempt fails with "eIO_Closed". 321 * 322 * @sa 323 * EHTTP_Flag 324 */ 325 extern NCBI_XCONNECT_EXPORT CONNECTOR HTTP_CreateConnectorEx 326 (const SConnNetInfo* net_info, 327 THTTP_Flags flags, 328 FHTTP_ParseHeader parse_header, /**< may be NULL, then no addtl. parsing */ 329 void* user_data, /**< user data for HTTP CBs (callbacks) */ 330 FHTTP_Adjust adjust, /**< may be NULL */ 331 FHTTP_Cleanup cleanup /**< may be NULL */ 332 ); 333 334 335 /** Create a tunnel to "net_info->host:net_info->port" via an HTTP proxy server 336 * located at "net_info->http_proxy_host:net_info->http_proxy_port". Return 337 * the tunnel as a socket via the last parameter. For compatibility with 338 * future API extensions, please make sure *sock is NULL when making the call. 339 * The proxy gets contacted via HTTPS if "net_info->scheme == eURL_Https", and 340 * the resultant socket is returned secure (with the SSL session still active). 341 * @note 342 * "net_info" can be passed as NULL to be constructed from the environment. 343 * @note 344 * "sock" parameter must be non-NULL but must point to a NULL SOCK (checked!). 345 * @note 346 * Some HTTP proxies do not process "data" correctly (e.g. Squid 3) when sent 347 * along with the tunnel creation request (despite the standard specifically 348 * allows such use), so they may require separate SOCK I/O calls to write the 349 * data to the tunnel. 350 * @return 351 * eIO_Success if the tunnel has been successfully created; 352 * otherwise, return an error code and set "*sock" to NULL upon return. 353 * @sa 354 * THTTP_Flags, SOCK_CreateEx, SOCK_Close 355 */ 356 extern NCBI_XCONNECT_EXPORT EIO_Status HTTP_CreateTunnelEx 357 (const SConnNetInfo* net_info, 358 THTTP_Flags flags, 359 const void* init_data, /**< initial data block to send via tunnel */ 360 size_t init_size, /**< size of the initial data block */ 361 void* user_data, /**< user data for the adjust callback */ 362 FHTTP_Adjust adjust, /**< adjust callback, may be NULL */ 363 SOCK* sock /**< return socket; must be non-NULL */ 364 ); 365 366 367 /** Same as HTTP_CreateTunnelEx(net_info, flags, 0, 0, 0, 0, sock) */ 368 extern NCBI_XCONNECT_EXPORT EIO_Status HTTP_CreateTunnel 369 (const SConnNetInfo* net_info, 370 THTTP_Flags flags, 371 SOCK* sock 372 ); 373 374 375 typedef void (*FHTTP_NcbiMessageHook)(const char* message); 376 377 /** Set a message hook procedure for messages originating from NCBI via HTTP. 378 * Any hook will be called no more than once. Until no hook is installed, 379 * and exactly one message is caught, a critical error will be generated in 380 * the standard log file upon acceptance of every message. *Not MT-safe*. 381 */ 382 extern NCBI_XCONNECT_EXPORT void HTTP_SetNcbiMessageHook 383 (FHTTP_NcbiMessageHook /**< New hook to be installed, NULL to reset */ 384 ); 385 386 387 #ifdef __cplusplus 388 } /* extern "C" */ 389 #endif 390 391 392 /* @} */ 393 394 #endif /* CONNECT___HTTP_CONNECTOR__H */ 395