1 /***************************************************************************/ 2 /* This code is part of WWW grabber called pavuk */ 3 /* Copyright (c) 1997 - 2001 Stefan Ondrejicka */ 4 /* Distributed under GPL 2 or later */ 5 /***************************************************************************/ 6 7 #ifndef _url_h_ 8 #define _url_h_ 9 10 #include "tools.h" 11 #include "dllist.h" 12 #include "mt.h" 13 14 typedef struct 15 { 16 char *host; /*** HTTP host address ***/ 17 unsigned short port; /*** HTTP service port number ***/ 18 char *document; /*** document path ***/ 19 char *searchstr; /*** query string ***/ 20 char *anchor_name; /*** anchor name ***/ 21 char *user; /*** username for authorization ***/ 22 char *password; /*** password for authorization ***/ 23 } url_http_t; 24 25 typedef struct 26 { 27 char *host; /*** FTP host address ***/ 28 unsigned short port; /*** FTP service port number ***/ 29 char *user; /*** username for authorization ***/ 30 char *password; /*** password for authorization ***/ 31 char *path; /*** document path ***/ 32 char *anchor_name; /*** anchor name ***/ 33 bool_t dir; /*** is this FTP directory URL ? ***/ 34 } url_ftp_t; 35 36 typedef struct 37 { 38 char *filename; /*** file path ***/ 39 char *searchstr; /*** query string ***/ 40 char *anchor_name; /*** anchor name ***/ 41 } url_file_t; 42 43 typedef struct 44 { 45 char *host; /*** GOPHER host address ***/ 46 unsigned short port; /*** GOPHER service port number ***/ 47 char *selector; /*** document selector ***/ 48 } url_gopher_t; 49 50 typedef struct 51 { 52 char *urlstr; /*** url string for unsupported type of URLs ***/ 53 } url_unsup_t; 54 55 typedef enum 56 { /*** id for URL types ***/ 57 URLT_UNKNOWN, 58 URLT_HTTP, 59 URLT_HTTPS, 60 URLT_FTP, 61 URLT_FTPS, 62 URLT_FILE, 63 URLT_GOPHER, 64 URLT_FROMPARENT 65 } protocol; 66 67 /* refers to RFC 2396 */ 68 #define URL_PATH_UNSAFE " <>\"#%{}|\\^[]`+@=&$?;" 69 #define URL_QUERY_UNSAFE " <>\"#%{}|\\^[]`" 70 #define URL_RQUERY_UNSAFE " ;/?:@&=+$,<>#%\"{}|\\^[]`" 71 #define URL_AUTH_UNSAFE " <>\"#%{}|\\^[]`+@=&:$?;/" 72 73 74 #define URL_REDIRECT (unsigned int) (1 << 0) 75 #define URL_INLINE_OBJ (unsigned int) (1 << 1) 76 #define URL_PROCESSED (unsigned int) (1 << 2) 77 #define URL_DOWNLOADED (unsigned int) (1 << 3) 78 #define URL_ERR_UNREC (unsigned int) (1 << 4) 79 #define URL_MOVED (unsigned int) (1 << 5) 80 #define URL_REJECTED (unsigned int) (1 << 6) 81 #define URL_USER_DISABLED (unsigned int) (1 << 7) 82 #define URL_NOT_FOUND (unsigned int) (1 << 8) 83 #define URL_TRUNCATED (unsigned int) (1 << 9) 84 #define URL_ERR_REC (unsigned int) (1 << 10) 85 #define URL_STYLE (unsigned int) (1 << 11) 86 #define URL_INNSCACHE (unsigned int) (1 << 12) 87 #define URL_ISHTML (unsigned int) (1 << 13) 88 #define URL_ISLOCAL (unsigned int) (1 << 14) 89 #define URL_NORECURSE (unsigned int) (1 << 15) 90 #define URL_FORM_ACTION (unsigned int) (1 << 16) 91 #define URL_HAVE_FORMS (unsigned int) (1 << 17) 92 #define URL_ISFIRST (unsigned int) (1 << 18) 93 #define URL_ISSTARTING (unsigned int) (1 << 19) 94 #define URL_ISSCRIPT (unsigned int) (1 << 20) 95 96 typedef struct _protinfo 97 { 98 protocol id; 99 char *dirname; 100 char *urlid; 101 char *typestr; 102 int default_port; 103 bool_t supported; 104 } protinfo; 105 106 typedef struct 107 { /*** properties of document ***/ 108 char *type; /*** MIME type of document ***/ 109 ssize_t size; /*** size of document ***/ 110 time_t mdtm; /*** modification time ***/ 111 } url_prop; 112 113 typedef union 114 { 115 url_http_t http; 116 url_file_t file; 117 url_ftp_t ftp; 118 url_gopher_t gopher; 119 url_unsup_t unsup; 120 } url_union_t; 121 122 typedef struct _url 123 { 124 protocol type; /*** type of URL ***/ 125 126 dllist *parent_url; /*** list of parent URLs ***/ 127 struct _url *moved_to; /*** pointer to new URL if document was moved ***/ 128 129 unsigned short level; /*** tree level of document ***/ 130 unsigned short ref_cnt; /*** number of references to this URL structure ***/ 131 unsigned int status; /*** status flags of URL ***/ 132 133 url_union_t p; /*** parsed URL infos ***/ 134 char *local_name; /*** assigned local filename ***/ 135 void *extension; /*** posible url extensions ***/ 136 137 #ifdef WITH_TREE 138 #ifdef I_FACE 139 url_prop *prop; /*** document properties ***/ 140 void **tree_nfo; /*** UI representation of tree nodes ***/ 141 #endif 142 #endif /* WITH_TREE */ 143 #ifdef HAVE_MT 144 pthread_mutex_t lock; /*** mt lock ***/ 145 #endif 146 } url; 147 148 #define URLI_NORMAL 1 149 #define URLI_FORM 2 150 151 typedef enum 152 { 153 FORM_M_GET, 154 FORM_M_POST, 155 FORM_M_UNKNOWN 156 } form_method; 157 158 typedef enum 159 { 160 FORM_E_MULTIPART, 161 FORM_E_URLENCODED, 162 FORM_E_UNKNOWN 163 } form_encoding; 164 165 typedef struct 166 { 167 char *urlstr; 168 int type; 169 form_method method; 170 form_encoding encoding; 171 dllist *fields; 172 char *localname; 173 } url_info; 174 175 extern url_info *url_info_new(char *); 176 extern url_info *url_info_parse(char *); 177 extern char *url_info_dump(url_info *); 178 extern void url_info_free(url_info *); 179 extern url_info *url_info_duplicate(url_info *); 180 181 extern url *url_parse(char *); 182 extern url *url_dup_url(url *); 183 extern char *url_parse_scheme(char *); 184 extern protocol url_scheme_to_schemeid(char *); 185 extern int dllist_url_compare(dllist_t key1, dllist_t key2); 186 extern int url_compare(url *, url *); 187 extern char *url_to_absolute_url(char *, char *, url *, char *); 188 extern char *url_encode_str(char *, char *); 189 extern char *url_decode_str(const char *, int); 190 extern url *new_url(url *); 191 extern char *url_to_filename(url *, int); 192 extern char *url_to_filename_with_type(url *, const char *, int); 193 extern char *url_get_default_local_name(url *); 194 extern char *url_get_local_name_real(url *, const char *, int); 195 extern void url_changed_filename(url *); 196 extern char *url_to_in_filename(url *); 197 extern void free_deep_url(url *); 198 extern char *get_redirect_abs_path(url *, char *); 199 extern char *url_to_urlstr(url *, int); 200 extern char *url_to_request_urlstr(url *, int); 201 extern void url_path_abs(url *); 202 extern url *filename_to_url(char *); 203 extern void cat_links_to_url_list(dllist *); 204 extern void append_url_to_list(url *); 205 extern void append_url_list_to_list(dllist *, dllist *); 206 extern char *url_get_site(url *); 207 extern int url_get_port(url *); 208 extern char *url_get_pass(url *, char *); 209 extern char *url_get_user(url *, char *); 210 extern int url_get_auth_scheme(url *, char *); 211 extern char *url_get_path(url *); 212 extern char *url_get_full_path(url *); 213 extern char *url_get_anchor_name(url *); 214 extern void url_clear_anchor(url *); 215 extern char *url_get_search_str(url *); 216 extern void url_set_path(url *, char *); 217 extern int url_is_dir_index(url *); 218 extern int url_is_same_site(url *, url *); 219 extern void url_add_to_url_hash_tab(url *); 220 extern void url_remove_from_url_hash_tab(url *); 221 extern void url_add_to_file_hash_tab(url *); 222 extern void url_remove_from_file_hash_tab(url *); 223 extern url *url_was_befor(url *); 224 extern void replace_url_in_list(url *, int); 225 extern void link_url_in_list(url *, url *); 226 extern int url_redirect_to(url *, url *, int); 227 extern void url_forget_filename(url *); 228 extern void url_set_filename(url *, char *); 229 230 extern const protinfo prottable[9]; 231 232 #endif 233