1 /*
2  * DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE
3  * Version 2, December 2004
4  *
5  * Copyright (C) 2012-2013 Sebastien Tricaud <sebastien@honeynet.org>
6  *
7  * Everyone is permitted to copy and distribute verbatim or modified
8  * copies of this license document, and changing it is allowed as long
9  * as the name is changed.
10  *
11  * DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE
12  * TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
13  *
14  *  0. You just DO WHAT THE FUCK YOU WANT TO.
15  */
16 
17 #include <stdio.h>
18 #include <ctype.h>
19 #include <stdlib.h>
20 
21 #include <faup/faup.h>
22 #include <faup/features.h>
23 
faup_features_init(faup_features_t * features)24 void faup_features_init(faup_features_t* features)
25 {
26 	features->scheme.pos             = -1;
27 	features->hierarchical.pos       = -1;
28 	features->credential.pos         = -1;
29 	features->host.pos               = -1;
30 	features->subdomain.pos          = -1;
31 	features->domain.pos             = -1;
32 	features->domain_without_tld.pos = -1;
33 	features->port.pos               = -1;
34 	features->resource_path.pos      = -1;
35 	features->query_string.pos       = -1;
36 	features->fragment.pos           = -1;
37 	features->tld.pos                = -1;
38 
39 	features->scheme.field             = FAUP_FEATURES_FIELD_SCHEME;
40 	features->hierarchical.field       = FAUP_FEATURES_FIELD_HIERARCHICAL;
41 	features->credential.field         = FAUP_FEATURES_FIELD_CREDENTIAL;
42 	features->host.field               = FAUP_FEATURES_FIELD_HOST;
43 	features->subdomain.field          = FAUP_FEATURES_FIELD_SUBDOMAIN;
44 	features->domain.field             = FAUP_FEATURES_FIELD_DOMAIN;
45 	features->domain_without_tld.field = FAUP_FEATURES_FIELD_DOMAIN_WITHOUT_TLD;
46 	features->port.field               = FAUP_FEATURES_FIELD_PORT;
47 	features->resource_path.field      = FAUP_FEATURES_FIELD_RESOURCE_PATH;
48 	features->query_string.field       = FAUP_FEATURES_FIELD_QUERY_STRING;
49 	features->fragment.field           = FAUP_FEATURES_FIELD_FRAGMENT;
50 	features->tld.field                = FAUP_FEATURES_FIELD_TLD;
51 }
52 
53 #ifdef WIN32
get_last_c(const char * buffer,size_t pos)54 static __inline char get_last_c(const char *buffer, size_t pos)
55 #else
56 static inline char get_last_c(const char *buffer, size_t pos)
57 #endif
58 {
59 	if (pos > 0) {
60 		return buffer[pos-1];
61 	}
62 
63 	return -1;
64 }
65 
faup_features_exist(faup_feature_t feature)66 int faup_features_exist(faup_feature_t feature)
67 {
68 	return (feature.pos >= 0);
69 }
70 
faup_features_errors_lookup(faup_features_t const * url_features)71 int faup_features_errors_lookup(faup_features_t const* url_features)
72 {
73 	/* if ((faup_features_exist(url_features.scheme)) && */
74 	/*     (!faup_features_exist(url_features.hierarchical))) { */
75 	/* 	fprintf(stderr, "url error: can't have a scheme without a hierarchical!\n"); */
76 	/* 	return 1; */
77 	/* } */
78 
79 	return 0;
80 }
81 
faup_features_find(faup_handler_t * fh,const char * url,const size_t url_len)82 void faup_features_find(faup_handler_t *fh, const char *url, const size_t url_len)
83 {
84 	faup_features_t* url_features = &fh->faup.features;
85 	char c;
86 	char next_c;
87 	size_t nb_slashes = 0;
88 	//int char_counter[128];
89 	int last_slash_pos = 0;
90 	const char *url_o = url;	/* We keep the original pointer as we move it */
91 	ssize_t whatever_len = 0;
92 
93 	faup_last_slash_t last_slash_meaning = FAUP_LAST_SLASH_NOTFOUND;
94 	char host_is_ipv6 = 0;
95 
96 	ssize_t current_pos = 0;
97 	ssize_t buffer_pos = 0;
98 	size_t i;
99 
100 	size_t special_char_after_colons_pos = -1;
101 
102 
103 	faup_features_init(url_features);
104 
105 	for (i = 0; i < url_len; i++) {
106 		c = url[i];
107 
108 		if ((i + 1) < url_len) {
109 			next_c = url[i+1];
110 		} else {
111 			next_c = '\0';
112 		}
113 
114 		if (c == '/') {
115 			nb_slashes++;
116 		}
117 
118 		/* printf("reading %c, bufferpos=%d\n", c, buffer_pos); */
119 
120 		if (host_is_ipv6 && c != ']') {
121 			current_pos++;
122 		        continue;
123 		}
124 
125 		switch(c) {
126 			case '/':
127 				// This is for URLs such as "http://test:\/test@example.com"
128 				if (get_last_c(url_o, current_pos) == '\\') {
129 					buffer_pos=-1;
130 					break;
131 				}
132 				/* If it is the first time we have a '/' and previous char is ':' */
133 				if ((nb_slashes == 1) && (get_last_c(url_o, current_pos) == ':')) {
134 					if (last_slash_meaning < FAUP_LAST_SLASH_AFTER_DOMAIN) {
135 						last_slash_meaning = FAUP_LAST_SLASH_HIERARCHICAL;
136 						url_features->hierarchical.pos = current_pos -1;
137 						c = get_last_c(url_o, current_pos - 1);
138 						if (isalpha(c)) {
139 							url_features->scheme.pos = 0;
140 						}
141 						url_features->host.pos = -1; /* So finally we don't start with a host */
142 						url_features->port.pos = -1; /* So the last ':' we've found was not for a port but for  */
143 					} /* if (last_slash_meaning < FAUP_LAST_SLASH_AFTER_DOMAIN) */
144 				} else {
145 					/* We now check for the resource path */
146 					if (!faup_features_exist(url_features->resource_path)) {
147 						if (!faup_features_exist(url_features->scheme)) {
148 							if (!faup_features_exist(url_features->hierarchical)) {
149 								/* This host has a '/' with no hierarchy */
150 								/* The seen '/' is not a hierarchy so it is something like foo/bar.html */
151 								last_slash_meaning = FAUP_LAST_SLASH_AFTER_DOMAIN;
152 								url_features->resource_path.pos = current_pos;
153 							}
154 						} else {
155 							if (faup_features_exist(url_features->host)) {
156 								last_slash_meaning = FAUP_LAST_SLASH_AFTER_DOMAIN;
157 								url_features->resource_path.pos = current_pos;
158 							}
159 						}
160 					}
161 				}
162 
163 				last_slash_pos = current_pos;
164 
165 				if (faup_features_exist(url_features->host)) {
166 					last_slash_meaning = FAUP_LAST_SLASH_AFTER_DOMAIN;
167 				}
168 
169 				buffer_pos=-1;
170 				break;
171 			case '@':
172 				if (last_slash_meaning < FAUP_LAST_SLASH_AFTER_DOMAIN) {
173 					if (special_char_after_colons_pos != current_pos) {
174 						if (!faup_features_exist(url_features->credential)) {
175 							if (get_last_c(url_o, current_pos) == '@') {
176 								break;
177 							}
178 
179 							whatever_len = buffer_pos;
180 							if ((last_slash_meaning == FAUP_LAST_SLASH_HIERARCHICAL) || /* This '@' belongs to the authentication if http://foo:bar@host/blah */
181 									(last_slash_meaning == FAUP_LAST_SLASH_NOTFOUND)) {     /* This '@' belongs to the authentication if foo:bar@host/blah */
182 								url_features->credential.pos = url_features->host.pos; /* The credential starts where we thought it was a pos */
183 								url_features->host.pos = current_pos + 1;
184 								url_features->port.pos = -1; /* So the last ':' we've found was not for a port but for credential */
185 							} else {
186 								if (special_char_after_colons_pos != url_features->hierarchical.pos) {
187 									// That '/' belongs to the password after colons ':'
188 									last_slash_meaning = FAUP_LAST_SLASH_HIERARCHICAL;
189 									url_features->credential.pos = url_features->host.pos;
190 									url_features->host.pos = current_pos + 1;
191 									url_features->port.pos = -1;
192 									url_features->resource_path.pos = -1;
193 								}
194 							}
195 						}
196 					}
197 				}
198 				buffer_pos=-1;
199 				break;
200 			case ':':
201 				/* We have three cases here:
202 				   - a ':' for the credential
203 				   - a ':' for the port number
204 				   - a ':' in the query request */
205 				if (!faup_features_exist(url_features->port)) {
206 					if (last_slash_meaning < FAUP_LAST_SLASH_AFTER_DOMAIN) {
207 						if (isalnum(next_c)) {
208 							// Skip a special char that may come after a port. Thus, this would not be a port.
209 							url_features->port.pos = current_pos + 1;
210 						} else {
211 						  if (next_c != ':') {
212 							special_char_after_colons_pos = current_pos + 1;
213 						  } else {
214 						    /* In this case we discovered a dot after the other. It it most likely an IPv6 address */
215 						    if (url_features->host.pos < 0) {
216 						      url_features->host.pos = current_pos;
217 						      host_is_ipv6 = 1;
218 						      fh->faup.url_type = FAUP_URL_IPV6;
219 						    }
220 
221 						  }
222 						}
223 					}
224 				}
225 
226 				buffer_pos=-1;
227 
228 				break;
229 			case '?':
230 				/* printf("Current pos:%zd, special_char:%zd\n", current_pos, special_char_after_colons_pos); */
231 				if (special_char_after_colons_pos != current_pos) {
232 					if (last_slash_meaning == FAUP_LAST_SLASH_AFTER_DOMAIN) {
233 					  if (!faup_features_exist(url_features->query_string)) {
234 					    url_features->query_string.pos = current_pos;
235 					  }
236 					} else if (last_slash_meaning < FAUP_LAST_SLASH_AFTER_DOMAIN) {
237 					  /* printf("Before last slash after domain"); */
238 					  if (!faup_features_exist(url_features->resource_path)) {
239 						if (!faup_features_exist(url_features->scheme)) {
240 							if (!faup_features_exist(url_features->hierarchical)) {
241 								/* This host has a '/' with no hierarchy */
242 								/* The seen '/' is not a hierarchy so it is something like foo/bar.html */
243 								last_slash_meaning = FAUP_LAST_SLASH_AFTER_DOMAIN;
244 								if (!faup_features_exist(url_features->query_string)) {
245 								  url_features->query_string.pos = current_pos;
246 								}
247 							}
248 						} else {
249 							if (faup_features_exist(url_features->host)) {
250 								last_slash_meaning = FAUP_LAST_SLASH_AFTER_DOMAIN;
251 								if (!faup_features_exist(url_features->query_string)) {
252 								  url_features->query_string.pos = current_pos;
253 								}
254 							}
255 						}
256 					}
257 
258 					}
259 
260 				}
261 
262 				buffer_pos=-1;
263 				break;
264 			case '#':
265 				if (special_char_after_colons_pos != current_pos) {
266 				  /* I was checking for FAUP_LAST_SLASH_AFTER_DOMAIN for a reason I ignore. I removed that check and all the tests are passing. \o/ */
267 				  /* if ((last_slash_meaning == FAUP_LAST_SLASH_AFTER_DOMAIN) && (!faup_features_exist(url_features->fragment))) { */
268 				  if (!faup_features_exist(url_features->fragment)) {
269 				    url_features->fragment.pos = current_pos;
270 				  }
271 				}
272 
273 				buffer_pos=-1;
274 				break;
275 		    case '[': /* This can be an IPv6 URL, see RFC 2732*/
276 				if (url_features->host.pos < 0) {
277 				  url_features->host.pos = current_pos;
278 				  host_is_ipv6 = 1;
279 				}
280 			    break;
281 		    case ']':
282 			    host_is_ipv6 = 0; /* We stop handle the special IPv6 case*/
283 			    break;
284 			default:
285 				//fh->allocated_buf[buffer_pos] = c;
286 				if (current_pos == 0) {
287 					/* We assume we have a host to start. We shall turn it back to -1 if we have a scheme or hierachy */
288 					url_features->host.pos = 0;
289 				}
290 				/* We have a scheme, but no host nor no credential, then host is current_pos until we have a credential to remove it */
291 				if ((!faup_features_exist(url_features->host)) &&
292 						(!faup_features_exist(url_features->credential))) {
293 					url_features->host.pos = current_pos;
294 
295 				}
296 
297 				break;
298 		}
299 
300 		buffer_pos++;
301 		current_pos++;
302 	}
303 
304 		// faup_features_debug("features", &fh->faup.features);
305 }
306 
faup_features_debug_print(char * string,int32_t pos,uint32_t size)307 void faup_features_debug_print(char *string, int32_t pos, uint32_t size)
308 {
309   if (pos >= 0) {
310     fprintf(stdout, "%s:%d,%u\n", string, pos, size);
311   }
312 }
313 
faup_features_get_field_name(faup_feature_t feature)314 char *faup_features_get_field_name(faup_feature_t feature)
315 {
316 	switch(feature.field) {
317 		case FAUP_FEATURES_FIELD_SCHEME:
318 			return "scheme";
319 			break;
320 		case FAUP_FEATURES_FIELD_HIERARCHICAL:
321 			return "hierarchical";
322 			break;
323 		case FAUP_FEATURES_FIELD_CREDENTIAL:
324 			return "credential";
325 			break;
326 		case FAUP_FEATURES_FIELD_SUBDOMAIN:
327 			return "subdomain";
328 			break;
329 		case FAUP_FEATURES_FIELD_DOMAIN:
330 			return "domain";
331 			break;
332 		case FAUP_FEATURES_FIELD_DOMAIN_WITHOUT_TLD:
333 			return "domain_without_tld";
334 			break;
335 		case FAUP_FEATURES_FIELD_HOST:
336 			return "host";
337 			break;
338 		case FAUP_FEATURES_FIELD_TLD:
339 			return "tld";
340 			break;
341 		case FAUP_FEATURES_FIELD_PORT:
342 			return "port";
343 			break;
344 		case FAUP_FEATURES_FIELD_RESOURCE_PATH:
345 			return "resource_path";
346 			break;
347 		case FAUP_FEATURES_FIELD_QUERY_STRING:
348 			return "query_string";
349 			break;
350 		case FAUP_FEATURES_FIELD_FRAGMENT:
351 			return "fragment";
352 			break;
353 		default:
354 			return "Unknown field!";
355 	}
356 }
357 
358 
faup_features_debug(const char * url,faup_features_t const * features)359 void faup_features_debug(const char *url, faup_features_t const* features)
360 {
361 	fprintf(stdout, "url:%s\n", url);
362 	faup_features_debug_print("features->scheme", features->scheme.pos, features->scheme.size);
363 	faup_features_debug_print("features->hierarchical", features->hierarchical.pos, features->hierarchical.size);
364 	faup_features_debug_print("features->credential", features->credential.pos, features->credential.size);
365 	faup_features_debug_print("features->host", features->host.pos, features->host.size);
366 	faup_features_debug_print("features->domain", features->domain.pos, features->domain.size);
367 	faup_features_debug_print("features->domain_without_tld", features->domain_without_tld.pos, features->domain_without_tld.size);
368 	faup_features_debug_print("features->subdomain", features->subdomain.pos, features->subdomain.size);
369 	faup_features_debug_print("features->tld", features->tld.pos, features->tld.size);
370 	faup_features_debug_print("features->port", features->port.pos, features->port.size);
371 	faup_features_debug_print("features->resource_path", features->resource_path.pos, features->resource_path.size);
372 	faup_features_debug_print("features->query_string", features->query_string.pos, features->query_string.size);
373 	faup_features_debug_print("features->fragment", features->fragment.pos, features->fragment.size);
374 }
375 
_get_feature_string(faup_handler_t * fh,faup_feature_t feature)376 char *_get_feature_string(faup_handler_t *fh, faup_feature_t feature)
377 {
378   char *retstring = NULL;
379   if (feature.pos < 0) return NULL;
380   if (feature.size <= 0) return NULL;
381 
382   retstring = malloc(feature.size + 1);
383   retstring[feature.size] = '\0';
384   return memcpy(retstring, fh->faup.org_str + feature.pos, feature.size);
385 }
386 
faup_features_get_string(faup_handler_t * fh,faup_features_field_t field)387 char *faup_features_get_string(faup_handler_t *fh, faup_features_field_t field)
388 {
389   char *retstring = NULL;
390 
391   switch(field) {
392   case FAUP_FEATURES_FIELD_SCHEME:
393     return _get_feature_string(fh, fh->faup.features.scheme);
394     break;
395   case FAUP_FEATURES_FIELD_HIERARCHICAL:
396     return _get_feature_string(fh, fh->faup.features.hierarchical);
397     break;
398   case FAUP_FEATURES_FIELD_CREDENTIAL:
399     return _get_feature_string(fh, fh->faup.features.credential);
400     break;
401   case FAUP_FEATURES_FIELD_SUBDOMAIN:
402     return _get_feature_string(fh, fh->faup.features.subdomain);
403     break;
404   case FAUP_FEATURES_FIELD_DOMAIN:
405     return _get_feature_string(fh, fh->faup.features.domain);
406     break;
407   case FAUP_FEATURES_FIELD_DOMAIN_WITHOUT_TLD:
408     return _get_feature_string(fh, fh->faup.features.domain_without_tld);
409     break;
410   case FAUP_FEATURES_FIELD_HOST:
411     return _get_feature_string(fh, fh->faup.features.host);
412     break;
413   case FAUP_FEATURES_FIELD_TLD:
414     return _get_feature_string(fh, fh->faup.features.tld);
415     break;
416   case FAUP_FEATURES_FIELD_PORT:
417     return _get_feature_string(fh, fh->faup.features.port);
418     break;
419   case FAUP_FEATURES_FIELD_RESOURCE_PATH:
420     return _get_feature_string(fh, fh->faup.features.resource_path);
421     break;
422   case FAUP_FEATURES_FIELD_QUERY_STRING:
423     return _get_feature_string(fh, fh->faup.features.query_string);
424     break;
425   case FAUP_FEATURES_FIELD_FRAGMENT:
426     return _get_feature_string(fh, fh->faup.features.fragment);
427     break;
428   }
429 
430   return NULL;
431 }
432