1 /*
2 * DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE
3 * Version 2, December 2004
4 *
5 * Copyright (C) 2012-2013 Sebastien Tricaud <sebastien@honeynet.org>
6 *
7 * Everyone is permitted to copy and distribute verbatim or modified
8 * copies of this license document, and changing it is allowed as long
9 * as the name is changed.
10 *
11 * DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE
12 * TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
13 *
14 * 0. You just DO WHAT THE FUCK YOU WANT TO.
15 */
16
17 #include <stdio.h>
18 #include <ctype.h>
19 #include <stdlib.h>
20
21 #include <faup/faup.h>
22 #include <faup/features.h>
23
faup_features_init(faup_features_t * features)24 void faup_features_init(faup_features_t* features)
25 {
26 features->scheme.pos = -1;
27 features->hierarchical.pos = -1;
28 features->credential.pos = -1;
29 features->host.pos = -1;
30 features->subdomain.pos = -1;
31 features->domain.pos = -1;
32 features->domain_without_tld.pos = -1;
33 features->port.pos = -1;
34 features->resource_path.pos = -1;
35 features->query_string.pos = -1;
36 features->fragment.pos = -1;
37 features->tld.pos = -1;
38
39 features->scheme.field = FAUP_FEATURES_FIELD_SCHEME;
40 features->hierarchical.field = FAUP_FEATURES_FIELD_HIERARCHICAL;
41 features->credential.field = FAUP_FEATURES_FIELD_CREDENTIAL;
42 features->host.field = FAUP_FEATURES_FIELD_HOST;
43 features->subdomain.field = FAUP_FEATURES_FIELD_SUBDOMAIN;
44 features->domain.field = FAUP_FEATURES_FIELD_DOMAIN;
45 features->domain_without_tld.field = FAUP_FEATURES_FIELD_DOMAIN_WITHOUT_TLD;
46 features->port.field = FAUP_FEATURES_FIELD_PORT;
47 features->resource_path.field = FAUP_FEATURES_FIELD_RESOURCE_PATH;
48 features->query_string.field = FAUP_FEATURES_FIELD_QUERY_STRING;
49 features->fragment.field = FAUP_FEATURES_FIELD_FRAGMENT;
50 features->tld.field = FAUP_FEATURES_FIELD_TLD;
51 }
52
53 #ifdef WIN32
get_last_c(const char * buffer,size_t pos)54 static __inline char get_last_c(const char *buffer, size_t pos)
55 #else
56 static inline char get_last_c(const char *buffer, size_t pos)
57 #endif
58 {
59 if (pos > 0) {
60 return buffer[pos-1];
61 }
62
63 return -1;
64 }
65
faup_features_exist(faup_feature_t feature)66 int faup_features_exist(faup_feature_t feature)
67 {
68 return (feature.pos >= 0);
69 }
70
faup_features_errors_lookup(faup_features_t const * url_features)71 int faup_features_errors_lookup(faup_features_t const* url_features)
72 {
73 /* if ((faup_features_exist(url_features.scheme)) && */
74 /* (!faup_features_exist(url_features.hierarchical))) { */
75 /* fprintf(stderr, "url error: can't have a scheme without a hierarchical!\n"); */
76 /* return 1; */
77 /* } */
78
79 return 0;
80 }
81
faup_features_find(faup_handler_t * fh,const char * url,const size_t url_len)82 void faup_features_find(faup_handler_t *fh, const char *url, const size_t url_len)
83 {
84 faup_features_t* url_features = &fh->faup.features;
85 char c;
86 char next_c;
87 size_t nb_slashes = 0;
88 //int char_counter[128];
89 int last_slash_pos = 0;
90 const char *url_o = url; /* We keep the original pointer as we move it */
91 ssize_t whatever_len = 0;
92
93 faup_last_slash_t last_slash_meaning = FAUP_LAST_SLASH_NOTFOUND;
94 char host_is_ipv6 = 0;
95
96 ssize_t current_pos = 0;
97 ssize_t buffer_pos = 0;
98 size_t i;
99
100 size_t special_char_after_colons_pos = -1;
101
102
103 faup_features_init(url_features);
104
105 for (i = 0; i < url_len; i++) {
106 c = url[i];
107
108 if ((i + 1) < url_len) {
109 next_c = url[i+1];
110 } else {
111 next_c = '\0';
112 }
113
114 if (c == '/') {
115 nb_slashes++;
116 }
117
118 /* printf("reading %c, bufferpos=%d\n", c, buffer_pos); */
119
120 if (host_is_ipv6 && c != ']') {
121 current_pos++;
122 continue;
123 }
124
125 switch(c) {
126 case '/':
127 // This is for URLs such as "http://test:\/test@example.com"
128 if (get_last_c(url_o, current_pos) == '\\') {
129 buffer_pos=-1;
130 break;
131 }
132 /* If it is the first time we have a '/' and previous char is ':' */
133 if ((nb_slashes == 1) && (get_last_c(url_o, current_pos) == ':')) {
134 if (last_slash_meaning < FAUP_LAST_SLASH_AFTER_DOMAIN) {
135 last_slash_meaning = FAUP_LAST_SLASH_HIERARCHICAL;
136 url_features->hierarchical.pos = current_pos -1;
137 c = get_last_c(url_o, current_pos - 1);
138 if (isalpha(c)) {
139 url_features->scheme.pos = 0;
140 }
141 url_features->host.pos = -1; /* So finally we don't start with a host */
142 url_features->port.pos = -1; /* So the last ':' we've found was not for a port but for */
143 } /* if (last_slash_meaning < FAUP_LAST_SLASH_AFTER_DOMAIN) */
144 } else {
145 /* We now check for the resource path */
146 if (!faup_features_exist(url_features->resource_path)) {
147 if (!faup_features_exist(url_features->scheme)) {
148 if (!faup_features_exist(url_features->hierarchical)) {
149 /* This host has a '/' with no hierarchy */
150 /* The seen '/' is not a hierarchy so it is something like foo/bar.html */
151 last_slash_meaning = FAUP_LAST_SLASH_AFTER_DOMAIN;
152 url_features->resource_path.pos = current_pos;
153 }
154 } else {
155 if (faup_features_exist(url_features->host)) {
156 last_slash_meaning = FAUP_LAST_SLASH_AFTER_DOMAIN;
157 url_features->resource_path.pos = current_pos;
158 }
159 }
160 }
161 }
162
163 last_slash_pos = current_pos;
164
165 if (faup_features_exist(url_features->host)) {
166 last_slash_meaning = FAUP_LAST_SLASH_AFTER_DOMAIN;
167 }
168
169 buffer_pos=-1;
170 break;
171 case '@':
172 if (last_slash_meaning < FAUP_LAST_SLASH_AFTER_DOMAIN) {
173 if (special_char_after_colons_pos != current_pos) {
174 if (!faup_features_exist(url_features->credential)) {
175 if (get_last_c(url_o, current_pos) == '@') {
176 break;
177 }
178
179 whatever_len = buffer_pos;
180 if ((last_slash_meaning == FAUP_LAST_SLASH_HIERARCHICAL) || /* This '@' belongs to the authentication if http://foo:bar@host/blah */
181 (last_slash_meaning == FAUP_LAST_SLASH_NOTFOUND)) { /* This '@' belongs to the authentication if foo:bar@host/blah */
182 url_features->credential.pos = url_features->host.pos; /* The credential starts where we thought it was a pos */
183 url_features->host.pos = current_pos + 1;
184 url_features->port.pos = -1; /* So the last ':' we've found was not for a port but for credential */
185 } else {
186 if (special_char_after_colons_pos != url_features->hierarchical.pos) {
187 // That '/' belongs to the password after colons ':'
188 last_slash_meaning = FAUP_LAST_SLASH_HIERARCHICAL;
189 url_features->credential.pos = url_features->host.pos;
190 url_features->host.pos = current_pos + 1;
191 url_features->port.pos = -1;
192 url_features->resource_path.pos = -1;
193 }
194 }
195 }
196 }
197 }
198 buffer_pos=-1;
199 break;
200 case ':':
201 /* We have three cases here:
202 - a ':' for the credential
203 - a ':' for the port number
204 - a ':' in the query request */
205 if (!faup_features_exist(url_features->port)) {
206 if (last_slash_meaning < FAUP_LAST_SLASH_AFTER_DOMAIN) {
207 if (isalnum(next_c)) {
208 // Skip a special char that may come after a port. Thus, this would not be a port.
209 url_features->port.pos = current_pos + 1;
210 } else {
211 if (next_c != ':') {
212 special_char_after_colons_pos = current_pos + 1;
213 } else {
214 /* In this case we discovered a dot after the other. It it most likely an IPv6 address */
215 if (url_features->host.pos < 0) {
216 url_features->host.pos = current_pos;
217 host_is_ipv6 = 1;
218 fh->faup.url_type = FAUP_URL_IPV6;
219 }
220
221 }
222 }
223 }
224 }
225
226 buffer_pos=-1;
227
228 break;
229 case '?':
230 /* printf("Current pos:%zd, special_char:%zd\n", current_pos, special_char_after_colons_pos); */
231 if (special_char_after_colons_pos != current_pos) {
232 if (last_slash_meaning == FAUP_LAST_SLASH_AFTER_DOMAIN) {
233 if (!faup_features_exist(url_features->query_string)) {
234 url_features->query_string.pos = current_pos;
235 }
236 } else if (last_slash_meaning < FAUP_LAST_SLASH_AFTER_DOMAIN) {
237 /* printf("Before last slash after domain"); */
238 if (!faup_features_exist(url_features->resource_path)) {
239 if (!faup_features_exist(url_features->scheme)) {
240 if (!faup_features_exist(url_features->hierarchical)) {
241 /* This host has a '/' with no hierarchy */
242 /* The seen '/' is not a hierarchy so it is something like foo/bar.html */
243 last_slash_meaning = FAUP_LAST_SLASH_AFTER_DOMAIN;
244 if (!faup_features_exist(url_features->query_string)) {
245 url_features->query_string.pos = current_pos;
246 }
247 }
248 } else {
249 if (faup_features_exist(url_features->host)) {
250 last_slash_meaning = FAUP_LAST_SLASH_AFTER_DOMAIN;
251 if (!faup_features_exist(url_features->query_string)) {
252 url_features->query_string.pos = current_pos;
253 }
254 }
255 }
256 }
257
258 }
259
260 }
261
262 buffer_pos=-1;
263 break;
264 case '#':
265 if (special_char_after_colons_pos != current_pos) {
266 /* I was checking for FAUP_LAST_SLASH_AFTER_DOMAIN for a reason I ignore. I removed that check and all the tests are passing. \o/ */
267 /* if ((last_slash_meaning == FAUP_LAST_SLASH_AFTER_DOMAIN) && (!faup_features_exist(url_features->fragment))) { */
268 if (!faup_features_exist(url_features->fragment)) {
269 url_features->fragment.pos = current_pos;
270 }
271 }
272
273 buffer_pos=-1;
274 break;
275 case '[': /* This can be an IPv6 URL, see RFC 2732*/
276 if (url_features->host.pos < 0) {
277 url_features->host.pos = current_pos;
278 host_is_ipv6 = 1;
279 }
280 break;
281 case ']':
282 host_is_ipv6 = 0; /* We stop handle the special IPv6 case*/
283 break;
284 default:
285 //fh->allocated_buf[buffer_pos] = c;
286 if (current_pos == 0) {
287 /* We assume we have a host to start. We shall turn it back to -1 if we have a scheme or hierachy */
288 url_features->host.pos = 0;
289 }
290 /* We have a scheme, but no host nor no credential, then host is current_pos until we have a credential to remove it */
291 if ((!faup_features_exist(url_features->host)) &&
292 (!faup_features_exist(url_features->credential))) {
293 url_features->host.pos = current_pos;
294
295 }
296
297 break;
298 }
299
300 buffer_pos++;
301 current_pos++;
302 }
303
304 // faup_features_debug("features", &fh->faup.features);
305 }
306
faup_features_debug_print(char * string,int32_t pos,uint32_t size)307 void faup_features_debug_print(char *string, int32_t pos, uint32_t size)
308 {
309 if (pos >= 0) {
310 fprintf(stdout, "%s:%d,%u\n", string, pos, size);
311 }
312 }
313
faup_features_get_field_name(faup_feature_t feature)314 char *faup_features_get_field_name(faup_feature_t feature)
315 {
316 switch(feature.field) {
317 case FAUP_FEATURES_FIELD_SCHEME:
318 return "scheme";
319 break;
320 case FAUP_FEATURES_FIELD_HIERARCHICAL:
321 return "hierarchical";
322 break;
323 case FAUP_FEATURES_FIELD_CREDENTIAL:
324 return "credential";
325 break;
326 case FAUP_FEATURES_FIELD_SUBDOMAIN:
327 return "subdomain";
328 break;
329 case FAUP_FEATURES_FIELD_DOMAIN:
330 return "domain";
331 break;
332 case FAUP_FEATURES_FIELD_DOMAIN_WITHOUT_TLD:
333 return "domain_without_tld";
334 break;
335 case FAUP_FEATURES_FIELD_HOST:
336 return "host";
337 break;
338 case FAUP_FEATURES_FIELD_TLD:
339 return "tld";
340 break;
341 case FAUP_FEATURES_FIELD_PORT:
342 return "port";
343 break;
344 case FAUP_FEATURES_FIELD_RESOURCE_PATH:
345 return "resource_path";
346 break;
347 case FAUP_FEATURES_FIELD_QUERY_STRING:
348 return "query_string";
349 break;
350 case FAUP_FEATURES_FIELD_FRAGMENT:
351 return "fragment";
352 break;
353 default:
354 return "Unknown field!";
355 }
356 }
357
358
faup_features_debug(const char * url,faup_features_t const * features)359 void faup_features_debug(const char *url, faup_features_t const* features)
360 {
361 fprintf(stdout, "url:%s\n", url);
362 faup_features_debug_print("features->scheme", features->scheme.pos, features->scheme.size);
363 faup_features_debug_print("features->hierarchical", features->hierarchical.pos, features->hierarchical.size);
364 faup_features_debug_print("features->credential", features->credential.pos, features->credential.size);
365 faup_features_debug_print("features->host", features->host.pos, features->host.size);
366 faup_features_debug_print("features->domain", features->domain.pos, features->domain.size);
367 faup_features_debug_print("features->domain_without_tld", features->domain_without_tld.pos, features->domain_without_tld.size);
368 faup_features_debug_print("features->subdomain", features->subdomain.pos, features->subdomain.size);
369 faup_features_debug_print("features->tld", features->tld.pos, features->tld.size);
370 faup_features_debug_print("features->port", features->port.pos, features->port.size);
371 faup_features_debug_print("features->resource_path", features->resource_path.pos, features->resource_path.size);
372 faup_features_debug_print("features->query_string", features->query_string.pos, features->query_string.size);
373 faup_features_debug_print("features->fragment", features->fragment.pos, features->fragment.size);
374 }
375
_get_feature_string(faup_handler_t * fh,faup_feature_t feature)376 char *_get_feature_string(faup_handler_t *fh, faup_feature_t feature)
377 {
378 char *retstring = NULL;
379 if (feature.pos < 0) return NULL;
380 if (feature.size <= 0) return NULL;
381
382 retstring = malloc(feature.size + 1);
383 retstring[feature.size] = '\0';
384 return memcpy(retstring, fh->faup.org_str + feature.pos, feature.size);
385 }
386
faup_features_get_string(faup_handler_t * fh,faup_features_field_t field)387 char *faup_features_get_string(faup_handler_t *fh, faup_features_field_t field)
388 {
389 char *retstring = NULL;
390
391 switch(field) {
392 case FAUP_FEATURES_FIELD_SCHEME:
393 return _get_feature_string(fh, fh->faup.features.scheme);
394 break;
395 case FAUP_FEATURES_FIELD_HIERARCHICAL:
396 return _get_feature_string(fh, fh->faup.features.hierarchical);
397 break;
398 case FAUP_FEATURES_FIELD_CREDENTIAL:
399 return _get_feature_string(fh, fh->faup.features.credential);
400 break;
401 case FAUP_FEATURES_FIELD_SUBDOMAIN:
402 return _get_feature_string(fh, fh->faup.features.subdomain);
403 break;
404 case FAUP_FEATURES_FIELD_DOMAIN:
405 return _get_feature_string(fh, fh->faup.features.domain);
406 break;
407 case FAUP_FEATURES_FIELD_DOMAIN_WITHOUT_TLD:
408 return _get_feature_string(fh, fh->faup.features.domain_without_tld);
409 break;
410 case FAUP_FEATURES_FIELD_HOST:
411 return _get_feature_string(fh, fh->faup.features.host);
412 break;
413 case FAUP_FEATURES_FIELD_TLD:
414 return _get_feature_string(fh, fh->faup.features.tld);
415 break;
416 case FAUP_FEATURES_FIELD_PORT:
417 return _get_feature_string(fh, fh->faup.features.port);
418 break;
419 case FAUP_FEATURES_FIELD_RESOURCE_PATH:
420 return _get_feature_string(fh, fh->faup.features.resource_path);
421 break;
422 case FAUP_FEATURES_FIELD_QUERY_STRING:
423 return _get_feature_string(fh, fh->faup.features.query_string);
424 break;
425 case FAUP_FEATURES_FIELD_FRAGMENT:
426 return _get_feature_string(fh, fh->faup.features.fragment);
427 break;
428 }
429
430 return NULL;
431 }
432