1 /***************************************************************************/
2 /*    This code is part of WWW grabber called pavuk                        */
3 /*    Copyright (c) 1997 - 2001 Stefan Ondrejicka                          */
4 /*    Distributed under GPL 2 or later                                     */
5 /***************************************************************************/
6 
7 #ifndef _url_h_
8 #define _url_h_
9 
10 #include "tools.h"
11 #include "dllist.h"
12 #include "mt.h"
13 
14 typedef struct
15 {
16   char *host;                           /*** HTTP host address ***/
17   unsigned short port;                  /*** HTTP service port number ***/
18   char *document;                       /*** document path ***/
19   char *searchstr;                      /*** query string ***/
20   char *anchor_name;                    /*** anchor name ***/
21   char *user;                           /*** username for authorization ***/
22   char *password;                       /*** password for authorization ***/
23 } url_http_t;
24 
25 typedef struct
26 {
27   char *host;                           /*** FTP host address ***/
28   unsigned short port;                  /*** FTP service port number ***/
29   char *user;                           /*** username for authorization ***/
30   char *password;                       /*** password for authorization ***/
31   char *path;                           /*** document path ***/
32   char *anchor_name;                    /*** anchor name ***/
33   bool_t dir;                           /*** is this FTP directory URL ? ***/
34 } url_ftp_t;
35 
36 typedef struct
37 {
38   char *filename;                       /*** file path ***/
39   char *searchstr;                      /*** query string ***/
40   char *anchor_name;                    /*** anchor name ***/
41 } url_file_t;
42 
43 typedef struct
44 {
45   char *host;                           /*** GOPHER host address ***/
46   unsigned short port;                  /*** GOPHER service port number ***/
47   char *selector;                       /*** document selector ***/
48 } url_gopher_t;
49 
50 typedef struct
51 {
52   char *urlstr;                         /*** url string for unsupported type of URLs ***/
53 } url_unsup_t;
54 
55 typedef enum
56 {                       /*** id for URL types ***/
57   URLT_UNKNOWN,
58   URLT_HTTP,
59   URLT_HTTPS,
60   URLT_FTP,
61   URLT_FTPS,
62   URLT_FILE,
63   URLT_GOPHER,
64   URLT_FROMPARENT
65 } protocol;
66 
67 /* refers to RFC 2396 */
68 #define URL_PATH_UNSAFE " <>\"#%{}|\\^[]`+@=&$?;"
69 #define URL_QUERY_UNSAFE " <>\"#%{}|\\^[]`"
70 #define URL_RQUERY_UNSAFE " ;/?:@&=+$,<>#%\"{}|\\^[]`"
71 #define URL_AUTH_UNSAFE " <>\"#%{}|\\^[]`+@=&:$?;/"
72 
73 
74 #define URL_REDIRECT            (unsigned int) (1 << 0)
75 #define URL_INLINE_OBJ          (unsigned int) (1 << 1)
76 #define URL_PROCESSED           (unsigned int) (1 << 2)
77 #define URL_DOWNLOADED          (unsigned int) (1 << 3)
78 #define URL_ERR_UNREC           (unsigned int) (1 << 4)
79 #define URL_MOVED               (unsigned int) (1 << 5)
80 #define URL_REJECTED            (unsigned int) (1 << 6)
81 #define URL_USER_DISABLED       (unsigned int) (1 << 7)
82 #define URL_NOT_FOUND           (unsigned int) (1 << 8)
83 #define URL_TRUNCATED           (unsigned int) (1 << 9)
84 #define URL_ERR_REC             (unsigned int) (1 << 10)
85 #define URL_STYLE               (unsigned int) (1 << 11)
86 #define URL_INNSCACHE           (unsigned int) (1 << 12)
87 #define URL_ISHTML              (unsigned int) (1 << 13)
88 #define URL_ISLOCAL             (unsigned int) (1 << 14)
89 #define URL_NORECURSE           (unsigned int) (1 << 15)
90 #define URL_FORM_ACTION         (unsigned int) (1 << 16)
91 #define URL_HAVE_FORMS          (unsigned int) (1 << 17)
92 #define URL_ISFIRST             (unsigned int) (1 << 18)
93 #define URL_ISSTARTING          (unsigned int) (1 << 19)
94 #define URL_ISSCRIPT            (unsigned int) (1 << 20)
95 
96 typedef struct _protinfo
97 {
98   protocol id;
99   char *dirname;
100   char *urlid;
101   char *typestr;
102   int default_port;
103   bool_t supported;
104 } protinfo;
105 
106 typedef struct
107 {                                       /*** properties of document ***/
108   char *type;                           /*** MIME type of document ***/
109   ssize_t size;                         /*** size of document ***/
110   time_t mdtm;                          /*** modification time ***/
111 } url_prop;
112 
113 typedef union
114 {
115   url_http_t http;
116   url_file_t file;
117   url_ftp_t ftp;
118   url_gopher_t gopher;
119   url_unsup_t unsup;
120 } url_union_t;
121 
122 typedef struct _url
123 {
124   protocol type;                        /*** type of URL ***/
125 
126   dllist *parent_url;                   /*** list of parent URLs ***/
127   struct _url *moved_to;                /*** pointer to new URL if document was moved ***/
128 
129   unsigned short level;                 /*** tree level of document ***/
130   unsigned short ref_cnt;               /*** number of references to this URL structure ***/
131   unsigned int status;                  /*** status flags of URL ***/
132 
133   url_union_t p;                        /*** parsed URL infos ***/
134   char *local_name;                     /*** assigned local filename ***/
135   void *extension;                      /*** posible url extensions ***/
136 
137 #ifdef WITH_TREE
138 #ifdef I_FACE
139   url_prop *prop;                       /*** document properties ***/
140   void **tree_nfo;                      /*** UI representation of tree nodes ***/
141 #endif
142 #endif                          /* WITH_TREE */
143 #ifdef HAVE_MT
144   pthread_mutex_t lock;                 /*** mt lock ***/
145 #endif
146 } url;
147 
148 #define URLI_NORMAL             1
149 #define URLI_FORM               2
150 
151 typedef enum
152 {
153   FORM_M_GET,
154   FORM_M_POST,
155   FORM_M_UNKNOWN
156 } form_method;
157 
158 typedef enum
159 {
160   FORM_E_MULTIPART,
161   FORM_E_URLENCODED,
162   FORM_E_UNKNOWN
163 } form_encoding;
164 
165 typedef struct
166 {
167   char *urlstr;
168   int type;
169   form_method method;
170   form_encoding encoding;
171   dllist *fields;
172   char *localname;
173 } url_info;
174 
175 extern url_info *url_info_new(char *);
176 extern url_info *url_info_parse(char *);
177 extern char *url_info_dump(url_info *);
178 extern void url_info_free(url_info *);
179 extern url_info *url_info_duplicate(url_info *);
180 
181 extern url *url_parse(char *);
182 extern url *url_dup_url(url *);
183 extern char *url_parse_scheme(char *);
184 extern protocol url_scheme_to_schemeid(char *);
185 extern int dllist_url_compare(dllist_t key1, dllist_t key2);
186 extern int url_compare(url *, url *);
187 extern char *url_to_absolute_url(char *, char *, url *, char *);
188 extern char *url_encode_str(char *, char *);
189 extern char *url_decode_str(const char *, int);
190 extern url *new_url(url *);
191 extern char *url_to_filename(url *, int);
192 extern char *url_to_filename_with_type(url *, const char *, int);
193 extern char *url_get_default_local_name(url *);
194 extern char *url_get_local_name_real(url *, const char *, int);
195 extern void url_changed_filename(url *);
196 extern char *url_to_in_filename(url *);
197 extern void free_deep_url(url *);
198 extern char *get_redirect_abs_path(url *, char *);
199 extern char *url_to_urlstr(url *, int);
200 extern char *url_to_request_urlstr(url *, int);
201 extern void url_path_abs(url *);
202 extern url *filename_to_url(char *);
203 extern void cat_links_to_url_list(dllist *);
204 extern void append_url_to_list(url *);
205 extern void append_url_list_to_list(dllist *, dllist *);
206 extern char *url_get_site(url *);
207 extern int url_get_port(url *);
208 extern char *url_get_pass(url *, char *);
209 extern char *url_get_user(url *, char *);
210 extern int url_get_auth_scheme(url *, char *);
211 extern char *url_get_path(url *);
212 extern char *url_get_full_path(url *);
213 extern char *url_get_anchor_name(url *);
214 extern void url_clear_anchor(url *);
215 extern char *url_get_search_str(url *);
216 extern void url_set_path(url *, char *);
217 extern int url_is_dir_index(url *);
218 extern int url_is_same_site(url *, url *);
219 extern void url_add_to_url_hash_tab(url *);
220 extern void url_remove_from_url_hash_tab(url *);
221 extern void url_add_to_file_hash_tab(url *);
222 extern void url_remove_from_file_hash_tab(url *);
223 extern url *url_was_befor(url *);
224 extern void replace_url_in_list(url *, int);
225 extern void link_url_in_list(url *, url *);
226 extern int url_redirect_to(url *, url *, int);
227 extern void url_forget_filename(url *);
228 extern void url_set_filename(url *, char *);
229 
230 extern const protinfo prottable[9];
231 
232 #endif
233