1 /* giowin32-private.c - private glib-gio functions for W32 GAppInfo
2  *
3  * Copyright 2019 Руслан Ижбулатов
4  *
5  * This library is free software; you can redistribute it and/or
6  * modify it under the terms of the GNU Lesser General Public
7  * License as published by the Free Software Foundation; either
8  * version 2.1 of the License, or (at your option) any later version.
9  *
10  * This library is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13  * Lesser General Public License for more details.
14  *
15  * You should have received a copy of the GNU Lesser General Public License
16  * along with this library; if not, see <http://www.gnu.org/licenses/>.
17  */
18 
19 
20 static gsize
g_utf16_len(const gunichar2 * str)21 g_utf16_len (const gunichar2 *str)
22 {
23   gsize result;
24 
25   for (result = 0; str[0] != 0; str++, result++)
26     ;
27 
28   return result;
29 }
30 
31 static gunichar2 *
g_wcsdup(const gunichar2 * str,gssize str_len)32 g_wcsdup (const gunichar2 *str, gssize str_len)
33 {
34   gsize str_len_unsigned;
35   gsize str_size;
36 
37   g_return_val_if_fail (str != NULL, NULL);
38 
39   if (str_len < 0)
40     str_len_unsigned = g_utf16_len (str);
41   else
42     str_len_unsigned = (gsize) str_len;
43 
44   g_assert (str_len_unsigned <= G_MAXSIZE / sizeof (gunichar2) - 1);
45   str_size = (str_len_unsigned + 1) * sizeof (gunichar2);
46 
47   return g_memdup2 (str, str_size);
48 }
49 
50 static const gunichar2 *
g_utf16_wchr(const gunichar2 * str,const wchar_t wchr)51 g_utf16_wchr (const gunichar2 *str, const wchar_t wchr)
52 {
53   for (; str != NULL && str[0] != 0; str++)
54     if ((wchar_t) str[0] == wchr)
55       return str;
56 
57   return NULL;
58 }
59 
60 static gboolean
g_utf16_to_utf8_and_fold(const gunichar2 * str,gssize length,gchar ** str_u8,gchar ** str_u8_folded)61 g_utf16_to_utf8_and_fold (const gunichar2  *str,
62                           gssize            length,
63                           gchar           **str_u8,
64                           gchar           **str_u8_folded)
65 {
66   gchar *u8;
67   gchar *folded;
68   u8 = g_utf16_to_utf8 (str, length, NULL, NULL, NULL);
69 
70   if (u8 == NULL)
71     return FALSE;
72 
73   folded = g_utf8_casefold (u8, -1);
74 
75   if (str_u8)
76     *str_u8 = g_steal_pointer (&u8);
77 
78   g_free (u8);
79 
80   if (str_u8_folded)
81     *str_u8_folded = g_steal_pointer (&folded);
82 
83   g_free (folded);
84 
85   return TRUE;
86 }
87 
88 /* Finds the last directory separator in @filename,
89  * returns a pointer to the position after that separator.
90  * If the string ends with a separator, returned value
91  * will be pointing at the NUL terminator.
92  * If the string does not contain separators, returns the
93  * string itself.
94  */
95 static const gunichar2 *
g_utf16_find_basename(const gunichar2 * filename,gssize len)96 g_utf16_find_basename (const gunichar2 *filename,
97                        gssize           len)
98 {
99   const gunichar2 *result;
100 
101   if (len < 0)
102     len = g_utf16_len (filename);
103   if (len == 0)
104     return filename;
105 
106   result = &filename[len - 1];
107 
108   while (result > filename)
109     {
110       if ((wchar_t) result[0] == L'/' ||
111           (wchar_t) result[0] == L'\\')
112         {
113           result += 1;
114           break;
115         }
116 
117       result -= 1;
118     }
119 
120   return result;
121 }
122 
123 /* Finds the last directory separator in @filename,
124  * returns a pointer to the position after that separator.
125  * If the string ends with a separator, returned value
126  * will be pointing at the NUL terminator.
127  * If the string does not contain separators, returns the
128  * string itself.
129  */
130 static const gchar *
g_utf8_find_basename(const gchar * filename,gssize len)131 g_utf8_find_basename (const gchar *filename,
132                       gssize       len)
133 {
134   const gchar *result;
135 
136   if (len < 0)
137     len = strlen (filename);
138   if (len == 0)
139     return filename;
140 
141   result = &filename[len - 1];
142 
143   while (result > filename)
144     {
145       if (result[0] == '/' ||
146           result[0] == '\\')
147         {
148           result += 1;
149           break;
150         }
151 
152       result -= 1;
153     }
154 
155   return result;
156 }
157 
158 /**
159  * Parses @commandline, figuring out what the filename being invoked
160  * is. All returned strings are pointers into @commandline.
161  * @commandline must be a valid UTF-16 string and not be NULL.
162  * @after_executable is the first character after executable
163  * (usually a space, but not always).
164  * If @comma_separator is TRUE, accepts ',' as a separator between
165  * the filename and the following argument.
166  */
167 static void
_g_win32_parse_filename(const gunichar2 * commandline,gboolean comma_separator,const gunichar2 ** executable_start,gssize * executable_len,const gunichar2 ** executable_basename,const gunichar2 ** after_executable)168 _g_win32_parse_filename (const gunichar2  *commandline,
169                          gboolean          comma_separator,
170                          const gunichar2 **executable_start,
171                          gssize           *executable_len,
172                          const gunichar2 **executable_basename,
173                          const gunichar2 **after_executable)
174 {
175   const gunichar2 *p;
176   const gunichar2 *first_argument;
177   gboolean quoted;
178   gssize len;
179   gssize execlen;
180   gboolean found;
181 
182   while ((wchar_t) commandline[0] == L' ')
183     commandline++;
184 
185   quoted = FALSE;
186   execlen = 0;
187   found = FALSE;
188   first_argument = NULL;
189 
190   if ((wchar_t) commandline[0] == L'"')
191     {
192       quoted = TRUE;
193       commandline += 1;
194     }
195 
196   len = g_utf16_len (commandline);
197   p = commandline;
198 
199   while (p < &commandline[len])
200     {
201       switch ((wchar_t) p[0])
202         {
203         case L'"':
204           if (quoted)
205             {
206               first_argument = p + 1;
207               /* Note: this is a valid commandline for opening "c:/file.txt":
208                * > "notepad"c:/file.txt
209                */
210               p = &commandline[len];
211               found = TRUE;
212             }
213           else
214             execlen += 1;
215           break;
216         case L' ':
217           if (!quoted)
218             {
219               first_argument = p;
220               p = &commandline[len];
221               found = TRUE;
222             }
223           else
224             execlen += 1;
225           break;
226         case L',':
227           if (!quoted && comma_separator)
228             {
229               first_argument = p;
230               p = &commandline[len];
231               found = TRUE;
232             }
233           else
234             execlen += 1;
235           break;
236         default:
237           execlen += 1;
238           break;
239         }
240       p += 1;
241     }
242 
243   if (!found)
244     first_argument = &commandline[len];
245 
246   if (executable_start)
247     *executable_start = commandline;
248 
249   if (executable_len)
250     *executable_len = execlen;
251 
252   if (executable_basename)
253     *executable_basename = g_utf16_find_basename (commandline, execlen);
254 
255   if (after_executable)
256     *after_executable = first_argument;
257 }
258 
259 /* Make sure @commandline is a valid UTF-16 string before
260  * calling this function!
261  * follow_class_chain_to_handler() does perform such validation.
262  */
263 static void
_g_win32_extract_executable(const gunichar2 * commandline,gchar ** ex_out,gchar ** ex_basename_out,gchar ** ex_folded_out,gchar ** ex_folded_basename_out,gchar ** dll_function_out)264 _g_win32_extract_executable (const gunichar2  *commandline,
265                              gchar           **ex_out,
266                              gchar           **ex_basename_out,
267                              gchar           **ex_folded_out,
268                              gchar           **ex_folded_basename_out,
269                              gchar           **dll_function_out)
270 {
271   gchar *ex;
272   gchar *ex_folded;
273   const gunichar2 *first_argument;
274   const gunichar2 *executable;
275   const gunichar2 *executable_basename;
276   gboolean quoted;
277   gboolean folded;
278   gssize execlen;
279 
280   _g_win32_parse_filename (commandline, FALSE, &executable, &execlen, &executable_basename, &first_argument);
281 
282   commandline = executable;
283 
284   while ((wchar_t) first_argument[0] == L' ')
285     first_argument++;
286 
287   folded = g_utf16_to_utf8_and_fold (executable, (gssize) execlen, &ex, &ex_folded);
288   /* This should never fail as @executable has to be valid UTF-16. */
289   g_assert (folded);
290 
291   if (dll_function_out)
292     *dll_function_out = NULL;
293 
294   /* See if the executable basename is "rundll32.exe". If so, then
295    * parse the rest of the commandline as r'"?path-to-dll"?[ ]*,*[ ]*dll_function_to_invoke'
296    */
297   /* Using just "rundll32.exe", without an absolute path, seems
298    * very exploitable, but MS does that sometimes, so we have
299    * to accept that.
300    */
301   if ((g_strcmp0 (ex_folded, "rundll32.exe") == 0 ||
302        g_str_has_suffix (ex_folded, "\\rundll32.exe") ||
303        g_str_has_suffix (ex_folded, "/rundll32.exe")) &&
304       first_argument[0] != 0 &&
305       dll_function_out != NULL)
306     {
307       /* Corner cases:
308        * > rundll32.exe c:\some,file,with,commas.dll,some_function
309        * is treated by rundll32 as:
310        * dll=c:\some
311        * function=file,with,commas.dll,some_function
312        * unless the dll name is surrounded by double quotation marks:
313        * > rundll32.exe "c:\some,file,with,commas.dll",some_function
314        * in which case everything works normally.
315        * Also, quoting only works if it surrounds the file name, i.e:
316        * > rundll32.exe "c:\some,file"",with,commas.dll",some_function
317        * will not work.
318        * Also, comma is optional when filename is quoted or when function
319        * name is separated from the filename by space(s):
320        * > rundll32.exe "c:\some,file,with,commas.dll"some_function
321        * will work,
322        * > rundll32.exe c:\some_dll_without_commas_or_spaces.dll some_function
323        * will work too.
324        * Also, any number of commas is accepted:
325        * > rundll32.exe c:\some_dll_without_commas_or_spaces.dll , , ,,, , some_function
326        * works just fine.
327        * And the ultimate example is:
328        * > "rundll32.exe""c:\some,file,with,commas.dll"some_function
329        * and it also works.
330        * Good job, Microsoft!
331        */
332       const gunichar2 *filename_end = NULL;
333       gssize filename_len = 0;
334       gssize function_len = 0;
335       const gunichar2 *dllpart;
336 
337       quoted = FALSE;
338 
339       if ((wchar_t) first_argument[0] == L'"')
340         quoted = TRUE;
341 
342       _g_win32_parse_filename (first_argument, TRUE, &dllpart, &filename_len, NULL, &filename_end);
343 
344       if (filename_end[0] != 0 && filename_len > 0)
345         {
346           const gunichar2 *function_begin = filename_end;
347 
348           while ((wchar_t) function_begin[0] == L',' || (wchar_t) function_begin[0] == L' ')
349             function_begin += 1;
350 
351           if (function_begin[0] != 0)
352             {
353               gchar *dllpart_utf8;
354               gchar *dllpart_utf8_folded;
355               gchar *function_utf8;
356               gboolean folded;
357               const gunichar2 *space = g_utf16_wchr (function_begin, L' ');
358 
359               if (space)
360                 function_len = space - function_begin;
361               else
362                 function_len = g_utf16_len (function_begin);
363 
364               if (quoted)
365                 first_argument += 1;
366 
367               folded = g_utf16_to_utf8_and_fold (first_argument, filename_len, &dllpart_utf8, &dllpart_utf8_folded);
368               g_assert (folded);
369 
370               function_utf8 = g_utf16_to_utf8 (function_begin, function_len, NULL, NULL, NULL);
371 
372               /* We only take this branch when dll_function_out is not NULL */
373               *dll_function_out = g_steal_pointer (&function_utf8);
374 
375               g_free (function_utf8);
376 
377               /*
378                * Free our previous output candidate (rundll32) and replace it with the DLL path,
379                * then proceed forward as if nothing has changed.
380                */
381               g_free (ex);
382               g_free (ex_folded);
383 
384               ex = dllpart_utf8;
385               ex_folded = dllpart_utf8_folded;
386             }
387         }
388     }
389 
390   if (ex_out)
391     {
392       if (ex_basename_out)
393         *ex_basename_out = (gchar *) g_utf8_find_basename (ex, -1);
394 
395       *ex_out = g_steal_pointer (&ex);
396     }
397 
398   g_free (ex);
399 
400   if (ex_folded_out)
401     {
402       if (ex_folded_basename_out)
403         *ex_folded_basename_out = (gchar *) g_utf8_find_basename (ex_folded, -1);
404 
405       *ex_folded_out = g_steal_pointer (&ex_folded);
406     }
407 
408   g_free (ex_folded);
409 }
410 
411 /**
412  * rundll32 accepts many different commandlines. Among them is this:
413  * > rundll32.exe "c:/program files/foo/bar.dll",,, , ,,,, , function_name %1
414  * rundll32 just reads the first argument as a potentially quoted
415  * filename until the quotation ends (if quoted) or until a comma,
416  * or until a space. Then ignores all subsequent spaces (if any) and commas (if any;
417  * at least one comma is mandatory only if the filename is not quoted),
418  * and then interprets the rest of the commandline (until a space or a NUL-byte)
419  * as a name of a function.
420  * When GLib tries to run a program, it attempts to correctly re-quote the arguments,
421  * turning the first argument into "c:/program files/foo/bar.dll,,,".
422  * This breaks rundll32 parsing logic.
423  * Try to work around this by ensuring that the syntax is like this:
424  * > rundll32.exe "c:/program files/foo/bar.dll" function_name
425  * This syntax is valid for rundll32 *and* GLib spawn routines won't break it.
426  *
427  * @commandline must have at least 2 arguments, and the second argument
428  * must contain a (possibly quoted) filename, followed by a space or
429  * a comma. This can be checked for with an extract_executable() call -
430  * it should return a non-null dll_function.
431  */
432 static void
_g_win32_fixup_broken_microsoft_rundll_commandline(gunichar2 * commandline)433 _g_win32_fixup_broken_microsoft_rundll_commandline (gunichar2 *commandline)
434 {
435   const gunichar2 *first_argument;
436   gunichar2 *after_first_argument;
437 
438   _g_win32_parse_filename (commandline, FALSE, NULL, NULL, NULL, &first_argument);
439 
440   while ((wchar_t) first_argument[0] == L' ')
441     first_argument++;
442 
443   _g_win32_parse_filename (first_argument, TRUE, NULL, NULL, NULL, (const gunichar2 **) &after_first_argument);
444 
445   if ((wchar_t) after_first_argument[0] == L',')
446     after_first_argument[0] = 0x0020;
447   /* Else everything is ok (first char after filename is ' ' or the first char
448    * of the function name - either way this will work).
449    */
450 }
451