1 /* GIMP - The GNU Image Manipulation Program
2  * Copyright (C) 1995 Spencer Kimball and Peter Mattis
3  *
4  * The GIMP Help Browser - URI functions
5  * Copyright (C) 2001  Jacob Schroeder  <jacob@convergence.de>
6  *
7  * This program is free software: you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation; either version 3 of the License, or
10  * (at your option) any later version.
11  *
12  * This program is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with this program.  If not, see <https://www.gnu.org/licenses/>.
19  */
20 
21 #include "config.h"
22 
23 #include <string.h>
24 
25 #include <glib.h>
26 
27 #include "uri.h"
28 
29 /*  #define URI_DEBUG 1  */
30 
31 typedef enum
32 {
33   URI_UNKNOWN,
34   URI_ABSURI,
35   URI_NETPATH,
36   URI_ABSPATH,
37   URI_RELPATH,
38   URI_QUERY,
39   URI_EMPTY,
40   URI_FRAGMENT,
41   URI_INVALID
42 } UriType;
43 
44 
45 static UriType
uri_get_type(const gchar * uri)46 uri_get_type (const gchar *uri)
47 {
48   gchar        c;
49   const gchar *cptr;
50   UriType      type = URI_UNKNOWN;
51 
52   if (!uri)
53     return type;
54 
55   cptr = uri;
56   c = *cptr++;
57 
58   if (g_ascii_isalpha (c))
59     {
60       type = URI_RELPATH;  /* assume relative path */
61 
62       while ((c = *cptr++))
63         {
64           if (g_ascii_isalnum (c) || c == '+' || c == '-' || c == '.')
65             continue;
66 
67           if (c == ':')
68             {
69               /* it was a scheme */
70               type = URI_ABSURI;
71             }
72           break;
73         }
74     }
75   else
76     {
77       switch (c)
78         {
79         case '/':
80           if (*cptr == '/')
81             {
82               cptr++;
83               type = URI_NETPATH;
84             }
85           else
86             {
87               type = URI_ABSPATH;
88             }
89           break;
90         case '?':
91           type = URI_QUERY;
92           break;
93         case '#':
94           type = URI_FRAGMENT;
95           break;
96         case '\0':
97           type = URI_EMPTY;
98           break;
99         default:
100           type = URI_RELPATH;
101           break;
102         }
103     }
104 
105 #ifdef URI_DEBUG
106   g_print ("uri_get_type (\"%s\") -> ", uri);
107   switch (type)
108     {
109     case URI_UNKNOWN:  g_print ("unknown");  break;
110     case URI_ABSURI:   g_print ("absuri");   break;
111     case URI_NETPATH:  g_print ("netpath");  break;
112     case URI_ABSPATH:  g_print ("abspath");  break;
113     case URI_RELPATH:  g_print ("relpath");  break;
114     case URI_QUERY:    g_print ("query");    break;
115     case URI_EMPTY:    g_print ("empty");    break;
116     case URI_FRAGMENT: g_print ("fragment"); break;
117     case URI_INVALID:  g_print ("invalid");  break;
118     }
119   g_print ("\n");
120 #endif
121 
122   return type;
123 }
124 
125 gchar *
uri_to_abs(const gchar * uri,const gchar * base_uri)126 uri_to_abs (const gchar *uri,
127             const gchar *base_uri)
128 {
129   gchar        c;
130   const gchar *cptr;
131   gchar       *retval    = NULL;
132   UriType      uri_type  = URI_UNKNOWN;
133   UriType      base_type = URI_UNKNOWN;
134 
135   gint base_cnt    =  0;  /* no of chars to be copied from base URI  */
136   gint uri_cnt     =  0;  /* no of chars to be copied from URI       */
137   gint sep_cnt     =  0;  /* no of chars to be inserted between them */
138 
139   const gchar *sep_str = ""; /* string to insert between base and uri */
140   const gchar *part;
141   const gchar *last_segment = NULL;
142 
143 #ifdef URI_DEBUG
144   g_print ("uri_to_abs (\"%s\", \"%s\")\n", uri, base_uri);
145 #endif
146 
147   /* this function does not use the algorithm that is being proposed
148    * in RFC 2396. Instead it analyses the first characters of each
149    * URI to determine its kind (abs, net, path, ...).
150    * After that it locates the missing parts in the base URI and then
151    * concats everything into a newly allocated string.
152    */
153 
154   /* determine the kind of the URIs */
155   uri_type = uri_get_type (uri);
156 
157   if (uri_type != URI_ABSURI)
158     {
159       base_type = uri_get_type (base_uri);
160 
161       if (base_type != URI_ABSURI)
162         return NULL;  /*  neither uri nor base uri are absolute  */
163     }
164 
165   /* find missing parts in base URI */
166   switch (uri_type)
167     {
168     case URI_ABSURI:
169       /* base uri not needed */
170       break;
171 
172     case URI_QUERY:
173       /* ??? last segment? */
174       uri_type = URI_RELPATH;
175     case URI_NETPATH:  /* base scheme */
176     case URI_ABSPATH:  /* base scheme and authority */
177     case URI_RELPATH:  /* base scheme, authority and path */
178       cptr = base_uri;
179 
180       /* skip scheme */
181       while ((c = *cptr++) && c != ':')
182         ; /* nada */
183 
184       base_cnt = cptr - base_uri; /* incl : */
185 
186       if (*cptr != '/')
187         {
188           /* completion not possible */
189           return NULL;
190         }
191 
192       if (uri_type == URI_NETPATH)
193         break;
194 
195       /* skip authority */
196       if (cptr[0] == '/' && cptr[1] == '/')
197         {
198           part = cptr;
199           cptr += 2;
200 
201           while ((c = *cptr++) && c != '/' && c != '?' && c != '#')
202             ; /* nada */
203 
204           cptr--;
205           base_cnt += cptr - part;
206         }
207 
208       if (uri_type == URI_ABSPATH)
209         break;
210 
211       /* skip path */
212       if (*cptr != '/')
213         {
214           sep_cnt = 1;
215           sep_str = "/";
216           break;
217         }
218 
219       part = cptr;
220 
221       g_assert (*cptr == '/');
222 
223       while ((c = *cptr++) && c != '?' && c != '#')
224         {
225           if (c == '/')
226             last_segment = cptr - 1;
227         };
228 
229       g_assert (last_segment);
230 
231       cptr = last_segment;
232 
233       while ((c = *uri) && c == '.' && cptr > part)
234         {
235           gint shift_segment = 0;
236 
237           c = uri[1];
238 
239           if (c == '.' )
240             {
241               c = uri[2];
242               shift_segment = 1;
243             }
244 
245           if (c == '/')
246             {
247               uri += 2;
248             }
249           else if (c == 0 || c == '?' || c == '#')
250             {
251               uri += 1;
252             }
253           else
254             {
255               break;
256             }
257 
258           g_assert (*cptr == '/');
259 
260           if (shift_segment)
261             {
262               uri += 1;
263               while (cptr > part && *--cptr != '/')
264                 ; /* nada */
265             }
266         }
267 
268       base_cnt += cptr - part + 1;
269       break;
270 
271     case URI_EMPTY:
272     case URI_FRAGMENT:
273       /* use whole base uri */
274       base_cnt = strlen (base_uri);
275       break;
276 
277     case URI_UNKNOWN:
278     case URI_INVALID:
279       return NULL;
280     }
281 
282   /* do not include fragment part from the URI reference */
283   for (cptr = uri; (c = *cptr) && c != '#'; cptr++)
284     ; /* nada */
285 
286   uri_cnt = cptr - uri;
287 
288   /* allocate string and copy characters */
289 
290   retval = g_new (gchar, base_cnt + sep_cnt + uri_cnt + 1);
291 
292   if (base_cnt)
293     strncpy (retval, base_uri, base_cnt);
294 
295   if (sep_cnt)
296     strncpy (retval + base_cnt, sep_str, sep_cnt);
297 
298   if (uri_cnt)
299     strncpy (retval + base_cnt + sep_cnt, uri, uri_cnt);
300 
301   retval[base_cnt + sep_cnt + uri_cnt] = '\0';
302 
303 #ifdef URI_DEBUG
304   g_print ("  ->  \"%s\"\n", retval);
305 #endif
306 
307   return retval;
308 }
309 
310 #if 0
311 RFC 2396                   URI Generic Syntax                August 1998
312 
313 
314 A. Collected BNF for URI
315 
316       URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
317       absoluteURI   = scheme ":" ( hier_part | opaque_part )
318       relativeURI   = ( net_path | abs_path | rel_path ) [ "?" query ]
319 
320       hier_part     = ( net_path | abs_path ) [ "?" query ]
321       opaque_part   = uric_no_slash *uric
322 
323       uric_no_slash = unreserved | escaped | ";" | "?" | ":" | "@" |
324                       "&" | "=" | "+" | "$" | ","
325 
326       net_path      = "//" authority [ abs_path ]
327       abs_path      = "/"  path_segments
328       rel_path      = rel_segment [ abs_path ]
329 
330       rel_segment   = 1*( unreserved | escaped |
331                           ";" | "@" | "&" | "=" | "+" | "$" | "," )
332 
333       scheme        = alpha *( alpha | digit | "+" | "-" | "." )
334 
335       authority     = server | reg_name
336 
337       reg_name      = 1*( unreserved | escaped | "$" | "," |
338                           ";" | ":" | "@" | "&" | "=" | "+" )
339 
340       server        = [ [ userinfo "@" ] hostport ]
341       userinfo      = *( unreserved | escaped |
342                          ";" | ":" | "&" | "=" | "+" | "$" | "," )
343 
344       hostport      = host [ ":" port ]
345       host          = hostname | IPv4address
346       hostname      = *( domainlabel "." ) toplabel [ "." ]
347       domainlabel   = alphanum | alphanum *( alphanum | "-" ) alphanum
348       toplabel      = alpha | alpha *( alphanum | "-" ) alphanum
349       IPv4address   = 1*digit "." 1*digit "." 1*digit "." 1*digit
350       port          = *digit
351 
352       path          = [ abs_path | opaque_part ]
353       path_segments = segment *( "/" segment )
354       segment       = *pchar *( ";" param )
355       param         = *pchar
356       pchar         = unreserved | escaped |
357                       ":" | "@" | "&" | "=" | "+" | "$" | ","
358 
359       query         = *uric
360 
361       fragment      = *uric
362 
363       uric          = reserved | unreserved | escaped
364       reserved      = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" |
365                       "$" | ","
366       unreserved    = alphanum | mark
367       mark          = "-" | "_" | "." | "!" | "~" | "*" | "'" |
368                       "(" | ")"
369 
370       escaped       = "%" hex hex
371       hex           = digit | "A" | "B" | "C" | "D" | "E" | "F" |
372                               "a" | "b" | "c" | "d" | "e" | "f"
373 
374       alphanum      = alpha | digit
375       alpha         = lowalpha | upalpha
376 
377       lowalpha = "a" | "b" | "c" | "d" | "e" | "f" | "g" | "h" | "i" |
378                  "j" | "k" | "l" | "m" | "n" | "o" | "p" | "q" | "r" |
379                  "s" | "t" | "u" | "v" | "w" | "x" | "y" | "z"
380       upalpha  = "A" | "B" | "C" | "D" | "E" | "F" | "G" | "H" | "I" |
381                  "J" | "K" | "L" | "M" | "N" | "O" | "P" | "Q" | "R" |
382                  "S" | "T" | "U" | "V" | "W" | "X" | "Y" | "Z"
383       digit    = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" |
384                  "8" | "9"
385 
386 #endif
387