1 /* GIMP - The GNU Image Manipulation Program
2 * Copyright (C) 1995 Spencer Kimball and Peter Mattis
3 *
4 * The GIMP Help Browser - URI functions
5 * Copyright (C) 2001 Jacob Schroeder <jacob@convergence.de>
6 *
7 * This program is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program. If not, see <https://www.gnu.org/licenses/>.
19 */
20
21 #include "config.h"
22
23 #include <string.h>
24
25 #include <glib.h>
26
27 #include "uri.h"
28
29 /* #define URI_DEBUG 1 */
30
31 typedef enum
32 {
33 URI_UNKNOWN,
34 URI_ABSURI,
35 URI_NETPATH,
36 URI_ABSPATH,
37 URI_RELPATH,
38 URI_QUERY,
39 URI_EMPTY,
40 URI_FRAGMENT,
41 URI_INVALID
42 } UriType;
43
44
45 static UriType
uri_get_type(const gchar * uri)46 uri_get_type (const gchar *uri)
47 {
48 gchar c;
49 const gchar *cptr;
50 UriType type = URI_UNKNOWN;
51
52 if (!uri)
53 return type;
54
55 cptr = uri;
56 c = *cptr++;
57
58 if (g_ascii_isalpha (c))
59 {
60 type = URI_RELPATH; /* assume relative path */
61
62 while ((c = *cptr++))
63 {
64 if (g_ascii_isalnum (c) || c == '+' || c == '-' || c == '.')
65 continue;
66
67 if (c == ':')
68 {
69 /* it was a scheme */
70 type = URI_ABSURI;
71 }
72 break;
73 }
74 }
75 else
76 {
77 switch (c)
78 {
79 case '/':
80 if (*cptr == '/')
81 {
82 cptr++;
83 type = URI_NETPATH;
84 }
85 else
86 {
87 type = URI_ABSPATH;
88 }
89 break;
90 case '?':
91 type = URI_QUERY;
92 break;
93 case '#':
94 type = URI_FRAGMENT;
95 break;
96 case '\0':
97 type = URI_EMPTY;
98 break;
99 default:
100 type = URI_RELPATH;
101 break;
102 }
103 }
104
105 #ifdef URI_DEBUG
106 g_print ("uri_get_type (\"%s\") -> ", uri);
107 switch (type)
108 {
109 case URI_UNKNOWN: g_print ("unknown"); break;
110 case URI_ABSURI: g_print ("absuri"); break;
111 case URI_NETPATH: g_print ("netpath"); break;
112 case URI_ABSPATH: g_print ("abspath"); break;
113 case URI_RELPATH: g_print ("relpath"); break;
114 case URI_QUERY: g_print ("query"); break;
115 case URI_EMPTY: g_print ("empty"); break;
116 case URI_FRAGMENT: g_print ("fragment"); break;
117 case URI_INVALID: g_print ("invalid"); break;
118 }
119 g_print ("\n");
120 #endif
121
122 return type;
123 }
124
125 gchar *
uri_to_abs(const gchar * uri,const gchar * base_uri)126 uri_to_abs (const gchar *uri,
127 const gchar *base_uri)
128 {
129 gchar c;
130 const gchar *cptr;
131 gchar *retval = NULL;
132 UriType uri_type = URI_UNKNOWN;
133 UriType base_type = URI_UNKNOWN;
134
135 gint base_cnt = 0; /* no of chars to be copied from base URI */
136 gint uri_cnt = 0; /* no of chars to be copied from URI */
137 gint sep_cnt = 0; /* no of chars to be inserted between them */
138
139 const gchar *sep_str = ""; /* string to insert between base and uri */
140 const gchar *part;
141 const gchar *last_segment = NULL;
142
143 #ifdef URI_DEBUG
144 g_print ("uri_to_abs (\"%s\", \"%s\")\n", uri, base_uri);
145 #endif
146
147 /* this function does not use the algorithm that is being proposed
148 * in RFC 2396. Instead it analyses the first characters of each
149 * URI to determine its kind (abs, net, path, ...).
150 * After that it locates the missing parts in the base URI and then
151 * concats everything into a newly allocated string.
152 */
153
154 /* determine the kind of the URIs */
155 uri_type = uri_get_type (uri);
156
157 if (uri_type != URI_ABSURI)
158 {
159 base_type = uri_get_type (base_uri);
160
161 if (base_type != URI_ABSURI)
162 return NULL; /* neither uri nor base uri are absolute */
163 }
164
165 /* find missing parts in base URI */
166 switch (uri_type)
167 {
168 case URI_ABSURI:
169 /* base uri not needed */
170 break;
171
172 case URI_QUERY:
173 /* ??? last segment? */
174 uri_type = URI_RELPATH;
175 case URI_NETPATH: /* base scheme */
176 case URI_ABSPATH: /* base scheme and authority */
177 case URI_RELPATH: /* base scheme, authority and path */
178 cptr = base_uri;
179
180 /* skip scheme */
181 while ((c = *cptr++) && c != ':')
182 ; /* nada */
183
184 base_cnt = cptr - base_uri; /* incl : */
185
186 if (*cptr != '/')
187 {
188 /* completion not possible */
189 return NULL;
190 }
191
192 if (uri_type == URI_NETPATH)
193 break;
194
195 /* skip authority */
196 if (cptr[0] == '/' && cptr[1] == '/')
197 {
198 part = cptr;
199 cptr += 2;
200
201 while ((c = *cptr++) && c != '/' && c != '?' && c != '#')
202 ; /* nada */
203
204 cptr--;
205 base_cnt += cptr - part;
206 }
207
208 if (uri_type == URI_ABSPATH)
209 break;
210
211 /* skip path */
212 if (*cptr != '/')
213 {
214 sep_cnt = 1;
215 sep_str = "/";
216 break;
217 }
218
219 part = cptr;
220
221 g_assert (*cptr == '/');
222
223 while ((c = *cptr++) && c != '?' && c != '#')
224 {
225 if (c == '/')
226 last_segment = cptr - 1;
227 };
228
229 g_assert (last_segment);
230
231 cptr = last_segment;
232
233 while ((c = *uri) && c == '.' && cptr > part)
234 {
235 gint shift_segment = 0;
236
237 c = uri[1];
238
239 if (c == '.' )
240 {
241 c = uri[2];
242 shift_segment = 1;
243 }
244
245 if (c == '/')
246 {
247 uri += 2;
248 }
249 else if (c == 0 || c == '?' || c == '#')
250 {
251 uri += 1;
252 }
253 else
254 {
255 break;
256 }
257
258 g_assert (*cptr == '/');
259
260 if (shift_segment)
261 {
262 uri += 1;
263 while (cptr > part && *--cptr != '/')
264 ; /* nada */
265 }
266 }
267
268 base_cnt += cptr - part + 1;
269 break;
270
271 case URI_EMPTY:
272 case URI_FRAGMENT:
273 /* use whole base uri */
274 base_cnt = strlen (base_uri);
275 break;
276
277 case URI_UNKNOWN:
278 case URI_INVALID:
279 return NULL;
280 }
281
282 /* do not include fragment part from the URI reference */
283 for (cptr = uri; (c = *cptr) && c != '#'; cptr++)
284 ; /* nada */
285
286 uri_cnt = cptr - uri;
287
288 /* allocate string and copy characters */
289
290 retval = g_new (gchar, base_cnt + sep_cnt + uri_cnt + 1);
291
292 if (base_cnt)
293 strncpy (retval, base_uri, base_cnt);
294
295 if (sep_cnt)
296 strncpy (retval + base_cnt, sep_str, sep_cnt);
297
298 if (uri_cnt)
299 strncpy (retval + base_cnt + sep_cnt, uri, uri_cnt);
300
301 retval[base_cnt + sep_cnt + uri_cnt] = '\0';
302
303 #ifdef URI_DEBUG
304 g_print (" -> \"%s\"\n", retval);
305 #endif
306
307 return retval;
308 }
309
310 #if 0
311 RFC 2396 URI Generic Syntax August 1998
312
313
314 A. Collected BNF for URI
315
316 URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
317 absoluteURI = scheme ":" ( hier_part | opaque_part )
318 relativeURI = ( net_path | abs_path | rel_path ) [ "?" query ]
319
320 hier_part = ( net_path | abs_path ) [ "?" query ]
321 opaque_part = uric_no_slash *uric
322
323 uric_no_slash = unreserved | escaped | ";" | "?" | ":" | "@" |
324 "&" | "=" | "+" | "$" | ","
325
326 net_path = "//" authority [ abs_path ]
327 abs_path = "/" path_segments
328 rel_path = rel_segment [ abs_path ]
329
330 rel_segment = 1*( unreserved | escaped |
331 ";" | "@" | "&" | "=" | "+" | "$" | "," )
332
333 scheme = alpha *( alpha | digit | "+" | "-" | "." )
334
335 authority = server | reg_name
336
337 reg_name = 1*( unreserved | escaped | "$" | "," |
338 ";" | ":" | "@" | "&" | "=" | "+" )
339
340 server = [ [ userinfo "@" ] hostport ]
341 userinfo = *( unreserved | escaped |
342 ";" | ":" | "&" | "=" | "+" | "$" | "," )
343
344 hostport = host [ ":" port ]
345 host = hostname | IPv4address
346 hostname = *( domainlabel "." ) toplabel [ "." ]
347 domainlabel = alphanum | alphanum *( alphanum | "-" ) alphanum
348 toplabel = alpha | alpha *( alphanum | "-" ) alphanum
349 IPv4address = 1*digit "." 1*digit "." 1*digit "." 1*digit
350 port = *digit
351
352 path = [ abs_path | opaque_part ]
353 path_segments = segment *( "/" segment )
354 segment = *pchar *( ";" param )
355 param = *pchar
356 pchar = unreserved | escaped |
357 ":" | "@" | "&" | "=" | "+" | "$" | ","
358
359 query = *uric
360
361 fragment = *uric
362
363 uric = reserved | unreserved | escaped
364 reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" |
365 "$" | ","
366 unreserved = alphanum | mark
367 mark = "-" | "_" | "." | "!" | "~" | "*" | "'" |
368 "(" | ")"
369
370 escaped = "%" hex hex
371 hex = digit | "A" | "B" | "C" | "D" | "E" | "F" |
372 "a" | "b" | "c" | "d" | "e" | "f"
373
374 alphanum = alpha | digit
375 alpha = lowalpha | upalpha
376
377 lowalpha = "a" | "b" | "c" | "d" | "e" | "f" | "g" | "h" | "i" |
378 "j" | "k" | "l" | "m" | "n" | "o" | "p" | "q" | "r" |
379 "s" | "t" | "u" | "v" | "w" | "x" | "y" | "z"
380 upalpha = "A" | "B" | "C" | "D" | "E" | "F" | "G" | "H" | "I" |
381 "J" | "K" | "L" | "M" | "N" | "O" | "P" | "Q" | "R" |
382 "S" | "T" | "U" | "V" | "W" | "X" | "Y" | "Z"
383 digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" |
384 "8" | "9"
385
386 #endif
387