1 /* $Id: ncbiwww.h,v 6.8 2007/06/20 22:05:40 vakatov Exp $
2 * ===========================================================================
3 *
4 *                            PUBLIC DOMAIN NOTICE
5 *               National Center for Biotechnology Information
6 *
7 *  This software/database is a "United States Government Work" under the
8 *  terms of the United States Copyright Act.  It was written as part of
9 *  the author's official duties as a United States Government employee and
10 *  thus cannot be copyrighted.  This software/database is freely available
11 *  to the public for use. The National Library of Medicine and the U.S.
12 *  Government have not placed any restriction on its use or reproduction.
13 *
14 *  Although all reasonable efforts have been taken to ensure the accuracy
15 *  and reliability of the software and data, the NLM and the U.S.
16 *  Government do not and cannot warrant the performance or results that
17 *  may be obtained by using this software or data. The NLM and the U.S.
18 *  Government disclaim all warranties, express or implied, including
19 *  warranties of performance, merchantability or fitness for any particular
20 *  purpose.
21 *
22 *  Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================
25 *
26 * File Name:  $RCSfile: ncbiwww.h,v $
27 *
28 * Author:  Sergei Shavirin
29 *
30 * Version Creation Date: 11/03/1996
31 *
32 * $Revision: 6.8 $
33 *
34 * File Description:
35 *   This file contains main definitions to read and process HTTP
36 *   protocols input for WWW CGI programs
37 *   Currently it works for all ncbi supported platforms.
38 *
39 * $Log: ncbiwww.h,v $
40 * Revision 6.8  2007/06/20 22:05:40  vakatov
41 * MAX_WWW_ENTRIES -- increased from 4096 to 32768.
42 * It is just a quick-fix to let this code live a little longer, requested
43 * by Karl and Vasuki.
44 *
45 * Revision 6.7  2002/02/07 14:48:22  ivanov
46 * Added WWWGetEntriesEx(), WWWGetEntriesFormDataEx(), WWWReadFileInMemoryEx(),
47 * WWWGetValueSizeByIndex() -- support binary files in the multipart form-data.
48 *
49 * Revision 6.6  2002/01/28 21:27:00  ivanov
50 * Added WWWGetArgsEx() and WWWGetArgsAttr_...() functions.
51 * Added structure SWWWGetArgsAttr definition.
52 *
53 * Revision 6.5  2001/05/10 14:58:35  shavirin
54 * Fixed typo.
55 *
56 * Revision 6.4  2001/05/09 19:25:35  shavirin
57 * Added function WWWGetProxiedIP() to get 'real' address of the client
58 * using PROXIED_IP environment set by proxy server.
59 *
60 * Revision 6.3  1999/09/29 19:08:43  shavirin
61 * Added new functions: WWWGetLastValueByName and WWWFindNameEx
62 *
63 * Revision 6.2  1999/08/18 18:48:40  shavirin
64 * Increased number of MAX_WWW_ENTRIES to 4096.
65 *
66 * Revision 6.1  1999/01/26 19:43:27  vakatov
67 * Adopted for 32-bit MS-Windows DLLs
68 *
69 * Revision 5.1  1997/05/09 16:01:25  vakatov
70 * "ncbicoll.[ch]" is not being used anywhere anymore -- remove it!
71 * Move "ncbiwww.h" and "wwwutils.c" from /network/www2(ncbiwww2.lib)
72 * to /corelib(ncbi.lib)
73 *
74 * Revision 1.7  1997/04/04  21:26:32  savchuk
75 * WWWInfoPtr definition has been changed.
76 * WWWInfoNew() prototype has been removed
77 *
78 * Revision 1.6  1997/02/26  15:20:50  shavirin
79 * Added definition of function WWWGetDocRoot()
80 *
81 * Revision 1.5  1996/12/18  17:44:48  shavirin
82 * Added support for CC++ compiler usage.
83 *
84 * Revision 1.4  1996/12/13  22:53:18  shavirin
85 * Added definitions to new functions.
86 * ..
87 *
88 * Revision 1.3  1996/12/12  19:24:35  shavirin
89 * Changed definitions of WWWReadPosting() and entered new function
90 * WWWGetArgs(). Added WWWErrorCode definitions.
91 *
92 * Revision 1.2  1996/12/11  18:13:31  shavirin
93 * Main WWWInfoPtr changed to Void Pointer to hide real structure,
94 * that called now WWWInfoDataPtr
95 *
96 * Revision 1.1  1996/12/03  22:47:18  shavirin
97 * Initial revision
98 *
99 * ==========================================================================
100 */
101 
102 #ifndef _NCBI_WWW_
103 #define _NCBI_WWW_ ncbiwww
104 
105 #ifndef _NCBI_
106 #include <ncbi.h>
107 #endif
108 
109 #undef NLM_EXTERN
110 #ifdef NLM_IMPORT
111 #define NLM_EXTERN NLM_IMPORT
112 #else
113 #define NLM_EXTERN extern
114 #endif
115 
116 
117 /****************************************************************************/
118 /* DEFINES */
119 /****************************************************************************/
120 
121 #define MAX_WWW_ENTRIES  32768 /* maximum number of html tags in input */
122 #define WWW_MAX_NAME_LEN 512  /* Limit for Name in HTML tag */
123 
124 #define MISC_BROWSER     0    /* Any Browser Netscape Ver. 1 included */
125 #define NETSCAPE         1    /* Netscape Ver. 2 and higher */
126 #define EXPLORER         2    /* Microsoft Internet Explorer. Any Version */
127 
128 #define COMMAND_LINE 0        /* program used from command line */
129 #define WWW_GET      1        /* method with ?name=value&name=value&.. form */
130 #define WWW_POST     2        /* input through stdin in ?..=..&..=.. form */
131 #define FORM_DATA    3        /* RFC 1867 multipart/form-data */
132 
133 #define LIVE_SERVER_PORT 80   /* default HTTPD live port */
134 
135 #define INIT_BUFF_SIZE 4028   /* temporary buffer to read from file/stdin */
136 
137 /****************************************************************************/
138 /* TYPEDEFS */
139 /****************************************************************************/
140 
141 typedef struct WWWEntry {
142   CharPtr name;           /* HTML tag NAME=..  */
143   CharPtr val;            /* HTML tag VALUE=.. */
144   Int4    size;           /* Size of data in "val" */
145 } WWWEntry, PNTR WWWEntryPtr;
146 
147 /* typedef VoidPtr WWWInfo; */
148 typedef VoidPtr WWWInfoPtr;
149 
150 typedef struct WWWInfoData {
151   Int4    method;            /* GET, POST or COMMAND_LINE */
152   Int4    port;              /* Server port - current server */
153   CharPtr server_name;       /* Server name - current server */
154   CharPtr doc_root;          /* Document directory of current server */
155   CharPtr script_name;       /* Script name - CGI program */
156   CharPtr host;              /* remote host of client */
157   CharPtr address;           /* remote address of client - may be proxy */
158   CharPtr proxied_ip;        /* 'real' remote addr of client as set by proxy */
159   CharPtr agent;             /* Label of remote client */
160   CharPtr query;             /* Complete input buffer */
161   Int4    query_len;         /* Length of data in input buffer */
162   Int4    browser;           /* Value derived from Label */
163   WWWEntryPtr PNTR entries;  /* Parced input data */
164   Int4    num_entries;       /* Number of HTML tags */
165 } WWWInfoData, PNTR WWWInfoDataPtr;
166 
167 typedef enum {
168   WWWErrOk      = 0,
169   WWWErrNoMem   = 1,
170   WWWErrNetwork = 2
171 } WWWErrorCode;
172 
173 
174 struct SWWWGetArgs_tag;
175 typedef struct SWWWGetArgsAttr_tag PNTR SWWWGetArgsAttr;
176 
177 /****************************************************************************/
178 /* FINCTION DEFINITIONS */
179 /****************************************************************************/
180 
181 #ifdef __cplusplus
182 extern "C" {
183 #endif
184 
185 /* ----------------------  WWWGetEntries  -------------------------
186    Purpose:     Assuming, that input buffer is in HTTP or RFC 1867
187                 this function converts input into array of name, value
188                 pairs in the form of WWWEntry -es.
189    Parameters:  num_entries - number of paires returned
190                 WWWBuffer   - main input HTTP buffer
191                 NetscapeOK - if TRUE check for RFC 1867 will
192                 be performed before standard processing (Not used now)
193                 WWWBuffer_len - length of data in the WWWBuffer
194 
195    Returns:     Pointer to array of WWWEntry pairs
196    NOTE:        RFC 1867 may be enabled only with Netscape Version 2 and
197                 higher.
198   ------------------------------------------------------------------*/
199 NLM_EXTERN WWWEntryPtr PNTR WWWGetEntries(Int4Ptr num_entries,
200                                           CharPtr WWWBuffer,
201                                           Boolean NetscapeOK);
202 
203 NLM_EXTERN WWWEntryPtr PNTR WWWGetEntriesEx(Int4Ptr num_entries,
204                                             CharPtr WWWBuffer,
205                                             Int4    WWWBuffer_len);
206 
207 
208 /* --------------------  WWWGetEntriesFomData  -----------------------
209    Purpose:     Assuming, that input buffer is in RFC 1867
210                 ftp://ds.internic.net/rfc/rfc1867.txt
211                 (multipart/form-data) encoding this function
212                 converts input into array of name, value pairs
213                 in the form of WWWEntry -es.
214    Parameters:  WWWBuffer   - main input HTTP buffer
215                 entries - pointer to array of WWWEntry -es
216    Returns:     Number of WWW entries returned
217    NOTE:        RFC 1867 may be enabled only with Netscape Version 2 and
218                 higher.
219   ------------------------------------------------------------------*/
220 NLM_EXTERN Int4 WWWGetEntriesFormData(WWWEntryPtr PNTR entries,
221                                       CharPtr WWWBuffer);
222 
223 NLM_EXTERN Int4 WWWGetEntriesFormDataEx(WWWEntryPtr PNTR entries,
224                                         CharPtr WWWBuffer,
225                                         Int4 WWWBuffer_len);
226 
227 
228 /* -----------------------  WWWGetArgs  ---------------------------
229    Purpose:     This function read HTML input in POST, GET or
230                 multipart/form-data encoding - depends upon
231                 environment. If used from command-line this
232                 function will return valid WWWInfo structure
233                 with all field blank exept info->method, that
234                 will be set to COMMAND_LINE.
235                 If argc == 1 this function will read WWW buffer
236                 from STDIN, otherwise it will treat argv[1] as
237                 WWW buffer.
238    Parameters:  None
239    Returns:     WWWInfoPtr structure with processed HTTP input and
240                 environment
241    NOTE:        This function will filer input for non-printing
242                 characters. Transfer of binary files is not supported.
243 
244   ------------------------------------------------------------------*/
245 NLM_EXTERN WWWErrorCode WWWGetArgs(WWWInfoPtr PNTR info);
246 
247 
248 /* -----------------------  WWWGetArgsEx  ---------------------------
249    Purpose:     Identical to previous function, but it have additional
250                 parameter with working attributes
251    Parameters:  attr - function's working attributes
252    Returns:     WWWInfoPtr structure with processed HTTP input and
253                 environment
254 
255   ------------------------------------------------------------------*/
256 
257 
258 NLM_EXTERN WWWErrorCode WWWGetArgsEx(WWWInfoPtr PNTR info,
259                                      SWWWGetArgsAttr attr);
260 
261 /* -----------------------  WWWGetArgsAttr...  -----------------------
262    Purpose:     This functions create, destroy and set parameter's
263                 values to SWWWGetArgsAttr structure.
264    NOTE:        Created structure use into WWWGetArgsEx()
265   ------------------------------------------------------------------*/
266 
267 NLM_EXTERN SWWWGetArgsAttr WWWGetArgsAttr_Create(void);
268 
269 NLM_EXTERN void WWWGetArgsAttr_Destroy(SWWWGetArgsAttr attr);
270 
271 NLM_EXTERN Boolean WWWGetArgsAttr_SetFilter(SWWWGetArgsAttr attr,
272                                             Boolean filter_non_print);
273 NLM_EXTERN Boolean WWWGetArgsAttr_SetReadArgv(SWWWGetArgsAttr attr,
274                                             Boolean read_argv);
275 
276 
277 /* ----------------------  WWWReadPosting  -------------------------
278    Purpose:     This function read HTML input in POST, GET or
279                 multipart/form-data encoding - depends upon
280                 environment. If used from command-line this
281                 function will return valid WWWInfo structure
282                 with all field blank exept info->method, that
283                 will be set to COMMAND_LINE
284                 No more proccesing will be performed.
285    Parameters:  None
286    Returns:     WWWInfoPtr structure with processed HTTP input and
287                 environment
288    NOTE:        This function will filer input for non-printing
289                 characters. Transfer of binary files is not supported.
290 
291   ------------------------------------------------------------------*/
292 NLM_EXTERN WWWErrorCode WWWReadPosting(WWWInfoPtr PNTR info);
293 
294 
295 /* -------------------  WWWReadFileInMemory  -----------------------
296    Purpose:     Function reads data from file or stdin into
297                 string buffer (terminated by NULLB).
298 
299    Parameters:  fd - opened file
300                 len - number of bytes to read. If this value set
301                       to 0 file will be read until EOF or closing
302                       external connection (for sockets).
303                       If len != 0 NOT MORE THAN len bytes will
304                       be read from input streem
305                 filter - if TRUE filtering of non-printed characters
306                       will be performed
307                 rsize - return size of read data
308    Returns:     Pointer to allocated buffer.
309    NOTE:        Please be carefull with "len": function read input
310                 absolutely differently if len == 0 or len != 0
311 
312   ------------------------------------------------------------------*/
313 NLM_EXTERN CharPtr WWWReadFileInMemory(FILE *fd, Int4 len, Boolean filter);
314 
315 NLM_EXTERN CharPtr WWWReadFileInMemoryEx(FILE *fd, Int4 len, Boolean filter,
316                                          Int4Ptr rsize);
317 
318 
319 /* ----------------------  WWWInfoFree  -------------------------
320    Purpose:     Free WWWInfo structure
321    Parameters:  WWWInfo structure
322    Returns:     None
323   ------------------------------------------------------------------*/
324 NLM_EXTERN void WWWInfoFree(WWWInfoPtr info);
325 
326 
327 /* ----------------------  WWWGetWWWEntry  -------------------------
328    Purpose:     Return pointer to array of name=value tags
329    Parameters:  WWWInfoPtr
330    Returns:     Method used or -1 if error
331   ------------------------------------------------------------------*/
332 NLM_EXTERN WWWEntryPtr PNTR WWWGetWWWEntries(WWWInfoPtr info);
333 
334 
335 /* ----------------------  WWWGetMethod  -------------------------
336    Purpose:     Return method used in WWW Request or COMMAND_LINE
337    Parameters:  WWWInfoPtr
338    Returns:     Method used or -1 if error
339   ------------------------------------------------------------------*/
340 NLM_EXTERN Int4 WWWGetMethod(WWWInfoPtr info);
341 
342 
343 /* ----------------------  WWWGetBrowser  -------------------------
344    Purpose:     Return browser used in WWW Request
345    Parameters:  WWWInfoPtr
346    Returns:     Browser used or -1 if error
347   ------------------------------------------------------------------*/
348 NLM_EXTERN Int4 WWWGetBrowser(WWWInfoPtr info);
349 
350 
351 /* ----------------------  WWWGetNumEntries  -------------------------
352    Purpose:     Return number of name=value tags in WWW Request
353    Parameters:  WWWInfoPtr
354    Returns:     Number of Entries or -1 if error
355   ------------------------------------------------------------------*/
356 NLM_EXTERN Int4 WWWGetNumEntries(WWWInfoPtr info);
357 
358 
359 /* ----------------------  WWWGetAgent  -------------------------
360    Purpose:     Return agent used in WWW Request
361    Parameters:  WWWInfoPtr
362    Returns:     Agent used or NULL if error
363   ------------------------------------------------------------------*/
364 NLM_EXTERN CharPtr WWWGetAgent(WWWInfoPtr info);
365 
366 
367 /* ----------------------  WWWGetAddress  -------------------------
368    Purpose:     Return address used in WWW Request
369    Parameters:  WWWInfoPtr
370    Returns:     Address used or NULL if error
371    ------------------------------------------------------------------*/
372 NLM_EXTERN CharPtr WWWGetAddress(WWWInfoPtr info);
373 
374 
375 /* ----------------------  WWWGetDocRoot  -------------------------
376    Purpose:     Return DOCUMENT_ROOT directory of current server
377    Parameters:  WWWInfoPtr
378    Returns:     Document root directory or NULL if error
379   ------------------------------------------------------------------*/
380 NLM_EXTERN CharPtr WWWGetDocRoot(WWWInfoPtr info_in);
381 
382 /* ----------------------  WWWGetProxedIP  -------------------------
383    Purpose:     Return 'real' client address as set by proxy server
384    Parameters:  WWWInfoPtr
385    Returns:     Host used or NULL if error
386   ------------------------------------------------------------------*/
387 NLM_EXTERN CharPtr WWWGetProxiedIP(WWWInfoPtr info_in);
388 
389 
390 /* ----------------------  WWWGetHost  -------------------------
391    Purpose:     Return host used in WWW Request
392    Parameters:  WWWInfoPtr
393    Returns:     Host used or NULL if error
394   ------------------------------------------------------------------*/
395 NLM_EXTERN CharPtr WWWGetHost(WWWInfoPtr info);
396 
397 
398 /* ----------------------  WWWGetServer  -------------------------
399    Purpose:     Return HTTPD server name used in WWW Request
400    Parameters:  WWWInfoPtr
401    Returns:     Server name used or NULL if error
402   ------------------------------------------------------------------*/
403 NLM_EXTERN CharPtr WWWGetServer(WWWInfoPtr info_in);
404 
405 
406 /* ----------------------  WWWGetQuery  -------------------------
407    Purpose:     Return full query used in WWW Request
408 
409    Parameters:  WWWInfoPtr
410 
411    Returns:     Query used or NULL if error
412   ------------------------------------------------------------------*/
413 NLM_EXTERN CharPtr WWWGetQuery(WWWInfoPtr info);
414 
415 
416 /* ----------------------  WWWGetPort  -------------------------
417    Purpose:     Return port used in WWW Request
418    Parameters:  WWWInfoPtr
419    Returns:     Port used or -1 if error
420   ------------------------------------------------------------------*/
421 NLM_EXTERN Int4 WWWGetPort(WWWInfoPtr info);
422 
423 
424 /* ----------------------  WWWInfoNew  -------------------------
425    Purpose:     Allocates WWWInfo structure
426    Parameters:  None
427    Returns:     WWWInfo structure
428   ------------------------------------------------------------------*/
429 /* NLM_EXTERN WWWInfoPtr WWWInfoNew(void); */
430 
431 
432 /* ----------------------  WWWFindName  -------------------------
433    Purpose:     This function look for Name in WWW Entries structure
434    Parameters:  info - WWWInfo structure
435                 find - Name to find
436    Returns:     index in WWWEntry structue if "find" found and -1 if not
437   ------------------------------------------------------------------*/
438 NLM_EXTERN Int4 WWWFindName(WWWInfoPtr info, CharPtr find);
439 
440 
441 /* ----------------------  WWWFindName  -------------------------
442    Purpose:     This function look for Name in WWW Entries structure
443                 starting from specifix index value
444    Parameters:  info - WWWInfo structure
445                 find - Name to find
446                 index - index value to start with
447    Returns:     index in WWWEntry structue if "find" found and -1 if not
448   ------------------------------------------------------------------*/
449 NLM_EXTERN Int4 WWWFindNameEx(WWWInfoPtr info_in, CharPtr find, Int4 index);
450 
451 
452 /* ----------------------  WWWGetNameByIndex  ----------------------
453    Purpose:     This function get Name correspondig to specific
454                 index.
455    Parameters:  info - WWWInfo structure
456                 index - Index in WWW Entries structure
457    Returns:     Pointer to Name or NULL if index invalid
458   ------------------------------------------------------------------*/
459 NLM_EXTERN CharPtr WWWGetNameByIndex(WWWInfoPtr info, Int4 index);
460 
461 
462 /* -------------------  WWWGetValueByIndex  ---------------------
463    Purpose:     This function get Value correspondig to specific
464                 index.
465    Parameters:  info - WWWInfo structure
466                 index - Index in WWW Entries structure
467    Returns:     Pointer to Value or NULL if index invalid
468   ------------------------------------------------------------------*/
469 NLM_EXTERN CharPtr WWWGetValueByIndex(WWWInfoPtr info, Int4 index);
470 
471 
472 /* -------------------  WWWGetValueSizeByIndex  ---------------------
473    Purpose:     This function get size of Value correspondig to
474                 specific index.
475    Parameters:  info - WWWInfo structure
476                 index - Index in WWW Entries structure
477    Returns:     Number of bytes stored into the Value
478   ------------------------------------------------------------------*/
479 NLM_EXTERN Int4 WWWGetValueSizeByIndex(WWWInfoPtr info, Int4 index);
480 
481 
482 /* -------------------  WWWGetValueByName  ---------------------
483    Purpose:     This function get Value correspondig to specific
484                 Name.
485    Parameters:  info - WWWInfo structure
486                 name - name to look for
487                 start - Index in WWW Entries structure to start from
488    Returns:     Pointer to Value or NULL if Name was not found
489   ------------------------------------------------------------------*/
490 NLM_EXTERN CharPtr WWWGetValueByName(WWWInfoPtr info, CharPtr name);
491 
492 
493 /* -------------------  WWWGetValueByName  ---------------------
494    Purpose:     This function get LAST Value correspondig to specific
495                 Name if there are more then one.
496    Parameters:  info - WWWInfo structure
497                 name - name to look for
498                 start - Index in WWW Entries structure to start from
499    Returns:     Pointer to Value or NULL if Name was not found
500   ------------------------------------------------------------------*/
501 NLM_EXTERN CharPtr WWWGetLastValueByName(WWWInfoPtr info_in, CharPtr find);
502 
503 
504 /* -------------------  WWWSubstituteValue  ---------------------
505    Purpose:     This function substitute "old" value by "new"
506                 value in WWWInfo structure.
507    Parameters:  info_in - WWWInfo structure
508                 old - value to change
509                 new - new value to assign
510    Returns:     FALSE if "old" value not found, otherwise TRUE
511   ------------------------------------------------------------------*/
512 NLM_EXTERN Boolean WWWSubstituteValue(WWWInfoPtr info_in,
513                                       CharPtr old, CharPtr new_value);
514 
515 
516 /* -------------------  WWWSubstituteValueByName  -------------------
517    Purpose:     This function substitute  value corresponding to "name"
518                 by "new" value in WWWInfo structure.
519    Parameters:  info_in - WWWInfo structure
520                 name - corresponding name
521                 new - new value to assign
522    Returns:     FALSE if "name" was not found, otherwise TRUE
523   ------------------------------------------------------------------*/
524 NLM_EXTERN Boolean WWWSubstituteValueByName(WWWInfoPtr info_in,
525                                             CharPtr new_value, CharPtr name);
526 
527 #ifdef __cplusplus
528 }
529 #endif
530 
531 #undef NLM_EXTERN
532 #ifdef NLM_EXPORT
533 #define NLM_EXTERN NLM_EXPORT
534 #else
535 #define NLM_EXTERN
536 #endif
537 
538 #endif
539