1 /*
2 *			GPAC - Multimedia Framework C SDK
3 *
4 *			Authors: Jean Le Feuvre
5 *			Copyright (c) Telecom ParisTech 2000-2012
6 *					All rights reserved
7 *
8 *  This file is part of GPAC / common tools sub-project
9 *
10 *  GPAC is free software; you can redistribute it and/or modify
11 *  it under the terms of the GNU Lesser General Public License as published by
12 *  the Free Software Foundation; either version 2, or (at your option)
13 *  any later version.
14 *
15 *  GPAC is distributed in the hope that it will be useful,
16 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
17 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18 *  GNU Lesser General Public License for more details.
19 *
20 *  You should have received a copy of the GNU Lesser General Public
21 *  License along with this library; see the file COPYING.  If not, write to
22 *  the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
23 *
24 */
25 
26 #include <gpac/network.h>
27 
28 /* the length of the URL separator ("://" || "|//") */
29 #define URL_SEP_LENGTH	3
30 
31 /* our supported protocol types */
32 enum
33 {
34 	/*absolute path to file*/
35 	GF_URL_TYPE_FILE = 0,
36 	/*relative URL*/
37 	GF_URL_TYPE_RELATIVE,
38 	/*any other URL*/
39 	GF_URL_TYPE_ANY
40 };
41 
42 /*resolve the protocol type, for a std URL: http:// or ftp:// ...*/
URL_GetProtocolType(const char * pathName)43 static u32 URL_GetProtocolType(const char *pathName)
44 {
45 	char *begin;
46 	if (!pathName) return GF_URL_TYPE_ANY;
47 
48 	/* URL with the data scheme are not relative to avoid concatenation */
49 	if (!strnicmp(pathName, "data:", 5)) return GF_URL_TYPE_ANY;
50 
51 	if ((pathName[0] == '/') || (pathName[0] == '\\')
52 		|| (pathName[1] == ':')
53 		|| ((pathName[0] == ':') && (pathName[1] == ':'))
54 		) return GF_URL_TYPE_FILE;
55 
56 	begin = strstr(pathName, "://");
57 	if (!begin) begin = strstr(pathName, "|//");
58 	if (!begin) return GF_URL_TYPE_RELATIVE;
59 	if (!strnicmp(pathName, "file", 4)) return GF_URL_TYPE_FILE;
60 	return GF_URL_TYPE_ANY;
61 }
62 
63 /*gets protocol type*/
gf_url_is_local(const char * pathName)64 Bool gf_url_is_local(const char *pathName)
65 {
66 	u32 mode = URL_GetProtocolType(pathName);
67 	return (mode == GF_URL_TYPE_ANY) ? GF_FALSE : GF_TRUE;
68 }
69 
gf_url_get_absolute_path(const char * pathName,const char * parentPath)70 char *gf_url_get_absolute_path(const char *pathName, const char *parentPath)
71 {
72 	u32 prot_type = URL_GetProtocolType(pathName);
73 
74 	/*abs path name*/
75 	if (prot_type == GF_URL_TYPE_FILE) {
76 		/*abs path*/
77 		if (!strstr(pathName, "://") && !strstr(pathName, "|//")) return gf_strdup(pathName);
78 		pathName += 6;
79 		/*not sure if "file:///C:\..." is std, but let's handle it anyway*/
80 		if ((pathName[0] == '/') && (pathName[2] == ':')) pathName += 1;
81 		return gf_strdup(pathName);
82 	}
83 	if (prot_type == GF_URL_TYPE_ANY) return NULL;
84 	if (!parentPath) return gf_strdup(pathName);
85 
86 	/*try with the parent URL*/
87 	prot_type = URL_GetProtocolType(parentPath);
88 	/*if abs parent path concatenate*/
89 	if (prot_type == GF_URL_TYPE_FILE) return gf_url_concatenate(parentPath, pathName);
90 	if (prot_type != GF_URL_TYPE_RELATIVE) return NULL;
91 	/*if we are here, parentPath is also relative... return the original PathName*/
92 	return gf_strdup(pathName);
93 }
94 
95 GF_EXPORT
gf_url_concatenate(const char * parentName,const char * pathName)96 char *gf_url_concatenate(const char *parentName, const char *pathName)
97 {
98 	u32 pathSepCount, i, prot_type;
99 	char *outPath, *name, *rad, *tmp2;
100 	char tmp[GF_MAX_PATH];
101 
102 	if (!pathName && !parentName) return NULL;
103 	if (!pathName) return gf_strdup(parentName);
104 	if (!parentName) return gf_strdup(pathName);
105 
106 	if (!strncmp(pathName, "data:", 5)) return gf_strdup(pathName);
107 
108 	if ((strlen(parentName) > GF_MAX_PATH) || (strlen(pathName) > GF_MAX_PATH)) {
109 		GF_LOG(GF_LOG_ERROR, GF_LOG_CORE, ("URL too long for concatenation: \n%s\n", pathName));
110 		return NULL;
111 	}
112 
113 	prot_type = URL_GetProtocolType(pathName);
114 	if (prot_type != GF_URL_TYPE_RELATIVE) {
115 		char *sep = NULL;
116 		if (pathName[0] == '/') sep = strstr(parentName, "://");
117 		if (sep) sep = strchr(sep + 3, '/');
118 		if (sep) {
119 			u32 len;
120 			sep[0] = 0;
121 			len = (u32)strlen(parentName);
122 			outPath = (char*)gf_malloc(sizeof(char)*(len + 1 + strlen(pathName)));
123 			strcpy(outPath, parentName);
124 			strcat(outPath, pathName);
125 			sep[0] = '/';
126 		}
127 		else {
128 			outPath = gf_strdup(pathName);
129 		}
130 		goto check_spaces;
131 	}
132 
133 	/*old upnp addressing a la Platinum*/
134 	rad = strstr(parentName, "%3fpath=");
135 	if (!rad) rad = strstr(parentName, "%3Fpath=");
136 	if (!rad) rad = strstr(parentName, "?path=");
137 	if (rad) {
138 		char *the_path;
139 		rad = strchr(rad, '=');
140 		rad[0] = 0;
141 		the_path = gf_strdup(rad + 1);
142 		i = 0;
143 		while (1) {
144 			if (the_path[i] == 0) break;
145 			if (!strnicmp(the_path + i, "%5c", 3) || !strnicmp(the_path + i, "%2f", 3)) {
146 				the_path[i] = '/';
147 				memmove(the_path + i + 1, the_path + i + 3, strlen(the_path + i + 3) + 1);
148 			}
149 			else if (!strnicmp(the_path + i, "%05c", 4) || !strnicmp(the_path + i, "%02f", 4)) {
150 				the_path[i] = '/';
151 				memmove(the_path + i + 1, the_path + i + 4, strlen(the_path + i + 4) + 1);
152 			}
153 			i++;
154 		}
155 		name = gf_url_concatenate(the_path, pathName);
156 		outPath = (char*)gf_malloc(strlen(parentName) + strlen(name) + 2);
157 		sprintf(outPath, "%s=%s", parentName, name);
158 		rad[0] = '=';
159 		gf_free(name);
160 		gf_free(the_path);
161 		return outPath;
162 	}
163 
164 	/*rewrite path to use / not % encoding*/
165 	rad = strchr(parentName, '%');
166 	if (rad && (!strnicmp(rad, "%5c", 3) || !strnicmp(rad, "%05c", 4) || !strnicmp(rad, "%2f", 3) || !strnicmp(rad, "%02f", 4))) {
167 		char *the_path = gf_strdup(parentName);
168 		i = 0;
169 		while (1) {
170 			if (the_path[i] == 0) break;
171 			if (!strnicmp(the_path + i, "%5c", 3) || !strnicmp(the_path + i, "%2f", 3)) {
172 				the_path[i] = '/';
173 				memmove(the_path + i + 1, the_path + i + 3, strlen(the_path + i + 3) + 1);
174 			}
175 			else if (!strnicmp(the_path + i, "%05c", 4) || !strnicmp(the_path + i, "%02f", 4)) {
176 				the_path[i] = '/';
177 				memmove(the_path + i + 1, the_path + i + 4, strlen(the_path + i + 4) + 1);
178 			}
179 			i++;
180 		}
181 		name = gf_url_concatenate(the_path, pathName);
182 		gf_free(the_path);
183 		return name;
184 	}
185 
186 
187 	pathSepCount = 0;
188 	name = NULL;
189 	if (pathName[0] == '.') {
190 		if (!strcmp(pathName, "..")) {
191 			pathSepCount = 1;
192 			name = "";
193 		}
194 		if (!strcmp(pathName, "./")) {
195 			pathSepCount = 0;
196 			name = "";
197 		}
198 		for (i = 0; i< strlen(pathName) - 2; i++) {
199 			/*current dir*/
200 			if ((pathName[i] == '.')
201 				&& ((pathName[i + 1] == GF_PATH_SEPARATOR) || (pathName[i + 1] == '/'))) {
202 				i++;
203 				continue;
204 			}
205 			/*parent dir*/
206 			if ((pathName[i] == '.') && (pathName[i + 1] == '.')
207 				&& ((pathName[i + 2] == GF_PATH_SEPARATOR) || (pathName[i + 2] == '/'))
208 				) {
209 				pathSepCount++;
210 				i += 2;
211 				name = (char *)&pathName[i + 1];
212 			}
213 			else {
214 				name = (char *)&pathName[i];
215 				break;
216 			}
217 		}
218 	}
219 	if (!name) name = (char *)pathName;
220 
221 	strcpy(tmp, parentName);
222 	while (strchr(" \r\n\t", tmp[strlen(tmp) - 1])) {
223 		tmp[strlen(tmp) - 1] = 0;
224 	}
225 	//strip query part or fragment part
226 	rad = strchr(tmp, '?');
227 	if (rad) rad[0] = 0;
228 	tmp2 = strrchr(tmp, '/');
229 	if (!tmp2) tmp2 = strrchr(tmp, '\\');
230 	if (!tmp2) tmp2 = tmp;
231 	rad = strchr(tmp2, '#');
232 	if (rad) rad[0] = 0;
233 
234 	/*remove the last /*/
235 	for (i = (u32)strlen(tmp); i > 0; i--) {
236 		//break our path at each separator
237 		if ((tmp[i - 1] == GF_PATH_SEPARATOR) || (tmp[i - 1] == '/')) {
238 			tmp[i - 1] = 0;
239 			if (!pathSepCount) break;
240 			pathSepCount--;
241 		}
242 	}
243 	//if i==0, the parent path was relative, just return the pathName
244 	if (!i) {
245 		tmp[i] = 0;
246 		while (pathSepCount) {
247 			strcat(tmp, "../");
248 			pathSepCount--;
249 		}
250 	}
251 	else {
252 		strcat(tmp, "/");
253 	}
254 
255 	i = (u32)strlen(tmp);
256 	outPath = (char *)gf_malloc(i + strlen(name) + 1);
257 	sprintf(outPath, "%s%s", tmp, name);
258 
259 	/*cleanup paths sep for win32*/
260 	for (i = 0; i<strlen(outPath); i++)
261 		if (outPath[i] == '\\') outPath[i] = '/';
262 
263 check_spaces:
264 	i = 0;
265 	while (outPath[i]) {
266 		if (outPath[i] == '?') break;
267 
268 		if (outPath[i] != '%') {
269 			i++;
270 			continue;
271 		}
272 		if (!strnicmp(outPath + i, "%3f", 3)) break;
273 		if (!strnicmp(outPath + i, "%20", 3)) {
274 			outPath[i] = ' ';
275 			memmove(outPath + i + 1, outPath + i + 3, strlen(outPath + i) - 2);
276 		}
277 		i++;
278 	}
279 	return outPath;
280 }
281 
282 GF_EXPORT
gf_url_to_fs_path(char * sURL)283 void gf_url_to_fs_path(char *sURL)
284 {
285 	if (!strnicmp(sURL, "file://", 7)) {
286 		/*file:///C:\ scheme*/
287 		if ((sURL[7] == '/') && (sURL[9] == ':')) {
288 			memmove(sURL, sURL + 8, strlen(sURL) - 7);
289 		}
290 		else {
291 			memmove(sURL, sURL + 7, strlen(sURL) - 6);
292 		}
293 	}
294 
295 	while (1) {
296 		char *sep = strstr(sURL, "%20");
297 		if (!sep) break;
298 		sep[0] = ' ';
299 		memmove(sep + 1, sep + 3, strlen(sep) - 2);
300 	}
301 }
302 
303 //TODO handle reserved characters
304 const char *pce_special = " %";
305 const char *pce_encoded = "0123456789ABCDEF";
306 
gf_url_percent_encode(const char * path)307 char *gf_url_percent_encode(const char *path)
308 {
309 	char *outpath;
310 	u32 i, count, len;
311 	if (!path) return NULL;
312 
313 	len = (u32)strlen(path);
314 	count = 0;
315 	for (i = 0; i<len; i++) {
316 		u8 c = path[i];
317 		if (strchr(pce_special, c) != NULL) {
318 			if ((i + 2<len) && ((strchr(pce_encoded, path[i + 1]) == NULL) || (strchr(pce_encoded, path[i + 2]) == NULL))) {
319 				count += 2;
320 			}
321 		}
322 		else if (c >> 7) {
323 			count += 2;
324 		}
325 	}
326 	if (!count) return gf_strdup(path);
327 	outpath = (char*)gf_malloc(sizeof(char) * (len + count + 1));
328 	strcpy(outpath, path);
329 
330 	count = 0;
331 	for (i = 0; i<len; i++) {
332 		Bool do_enc = GF_FALSE;
333 		u8 c = path[i];
334 
335 		if (strchr(pce_special, c) != NULL) {
336 			if ((i + 2<len) && ((strchr(pce_encoded, path[i + 1]) == NULL) || (strchr(pce_encoded, path[i + 2]) == NULL))) {
337 				do_enc = GF_TRUE;
338 			}
339 		}
340 		else if (c >> 7) {
341 			do_enc = GF_TRUE;
342 		}
343 
344 		if (do_enc) {
345 			char szChar[3];
346 			sprintf(szChar, "%02X", c);
347 			outpath[i + count] = '%';
348 			outpath[i + count + 1] = szChar[0];
349 			outpath[i + count + 2] = szChar[1];
350 			count += 2;
351 		}
352 		else {
353 			outpath[i + count] = c;
354 		}
355 	}
356 	outpath[i + count] = 0;
357 	return outpath;
358 }
359 
360 GF_EXPORT
gf_url_get_resource_name(const char * sURL)361 const char *gf_url_get_resource_name(const char *sURL)
362 {
363 	char *sep;
364 	if (!sURL) return NULL;
365 	sep = strrchr(sURL, '/');
366 	if (!sep) sep = strrchr(sURL, '\\');
367 	if (sep) return sep + 1;
368 	return sURL;
369 }
370 
371 GF_EXPORT
gf_url_get_resource_path(const char * sURL,char * res_path)372 Bool gf_url_get_resource_path(const char *sURL, char *res_path)
373 {
374 	char *sep;
375 	strcpy(res_path, sURL);
376 	sep = strrchr(res_path, '/');
377 	if (!sep) sep = strrchr(res_path, '\\');
378 	if (sep) {
379 		sep[1] = 0;
380 		return GF_TRUE;
381 	}
382 	return GF_FALSE;
383 }
384 
385 
386 GF_EXPORT
gf_url_remove_last_delimiter(const char * sURL,char * res_path)387 Bool gf_url_remove_last_delimiter(const char *sURL, char *res_path)
388 {
389 	strcpy(res_path, sURL);
390 	if (sURL[strlen(sURL) - 1] == GF_PATH_SEPARATOR) {
391 		res_path[strlen(sURL) - 1] = 0;
392 		return GF_TRUE;
393 	}
394 
395 	return GF_FALSE;
396 }
397 
398 GF_EXPORT
gf_url_get_ressource_extension(const char * sURL)399 const char* gf_url_get_ressource_extension(const char *sURL) {
400 	const char *dot = strrchr(sURL, '.');
401 	if (!dot || dot == sURL) return "";
402 	return dot + 1;
403 }