1 /***
2 *stdargv.c - standard & wildcard _setargv routine
3 *
4 * Copyright (c) Microsoft Corporation. All rights reserved.
5 *
6 *Purpose:
7 * processes program command line, with or without wildcard expansion
8 *
9 *******************************************************************************/
10
11 #include <corecrt_internal.h>
12 #include <corecrt_internal_traits.h>
13 #include <limits.h>
14 #include <mbstring.h>
15 #include <stdlib.h>
16
17
18
19 // In the function below, we need to ensure that we've initialized the mbc table
20 // before we start performing character transformations.
do_locale_initialization(char)21 static void do_locale_initialization(char) throw() { __acrt_initialize_multibyte(); }
do_locale_initialization(wchar_t)22 static void do_locale_initialization(wchar_t) throw() { /* no-op */ }
23
get_command_line(char)24 static char* get_command_line(char) throw() { return _acmdln; }
get_command_line(wchar_t)25 static wchar_t* get_command_line(wchar_t) throw() { return _wcmdln; }
26
get_argv(char)27 static char**& get_argv(char) throw() { return __argv; }
get_argv(wchar_t)28 static wchar_t**& get_argv(wchar_t) throw() { return __wargv; }
29
expand_argv_wildcards(_In_z_ char ** const argv,_Out_ _Deref_post_z_ char *** const expanded_argv)30 static errno_t expand_argv_wildcards(
31 _In_z_ char** const argv,
32 _Out_ _Deref_post_z_ char*** const expanded_argv) throw()
33 {
34 return __acrt_expand_narrow_argv_wildcards(argv, expanded_argv);
35 }
36
expand_argv_wildcards(_In_z_ wchar_t ** const argv,_Out_ _Deref_post_z_ wchar_t *** const expanded_argv)37 static errno_t expand_argv_wildcards(
38 _In_z_ wchar_t** const argv,
39 _Out_ _Deref_post_z_ wchar_t*** const expanded_argv) throw()
40 {
41 return __acrt_expand_wide_argv_wildcards(argv, expanded_argv);
42 }
43
44
45
46 /***
47 *static void parse_cmdline(cmdstart, argv, args, argument_count, character_count)
48 *
49 *Purpose:
50 * Parses the command line and sets up the argv[] array.
51 * On entry, cmdstart should point to the command line,
52 * argv should point to memory for the argv array, args
53 * points to memory to place the text of the arguments.
54 * If these are nullptr, then no storing (only counting)
55 * is done. On exit, *argument_count has the number of
56 * arguments (plus one for a final nullptr argument),
57 * and *character_count has the number of bytes used in the buffer
58 * pointed to by args.
59 *
60 *Entry:
61 * Character *cmdstart - pointer to command line of the form
62 * <progname><nul><args><nul>
63 * Character **argv - where to build argv array; nullptr means don't
64 * build array
65 * Character *args - where to place argument text; nullptr means don't
66 * store text
67 *
68 *Exit:
69 * no return value
70 * int *argument_count - returns number of argv entries created
71 * int *character_count - number of characters used in args buffer
72 *
73 *Exceptions:
74 *
75 *******************************************************************************/
76
77
78 // should_copy_another_character helper functions
79 // should_copy_another_character is *ONLY* checking for DBCS lead bytes to see if there
80 // might be a following trail byte. This works because the callers are only concerned
81 // about escaped quote sequences and other codepages aren't using those quotes.
should_copy_another_character(char const c)82 static bool __cdecl should_copy_another_character(char const c) throw()
83 {
84 // This is OK for UTF-8 as a quote is never a trail byte.
85 return _ismbblead(c) != 0;
86 }
87
should_copy_another_character(wchar_t)88 static bool __cdecl should_copy_another_character(wchar_t) throw()
89 {
90 // This is OK for UTF-16 as a quote is never part of a surrogate pair.
91 return false;
92 }
93
94 template <typename Character>
parse_command_line(Character * cmdstart,Character ** argv,Character * args,size_t * argument_count,size_t * character_count)95 static void __cdecl parse_command_line(
96 Character* cmdstart,
97 Character** argv,
98 Character* args,
99 size_t* argument_count,
100 size_t* character_count
101 ) throw()
102 {
103 *character_count = 0;
104 *argument_count = 1; // We'll have at least the program name
105
106 Character c;
107 int copy_character; /* 1 = copy char to *args */
108 unsigned numslash; /* num of backslashes seen */
109
110 /* first scan the program name, copy it, and count the bytes */
111 Character* p = cmdstart;
112 if (argv)
113 *argv++ = args;
114
115 // A quoted program name is handled here. The handling is much
116 // simpler than for other arguments. Basically, whatever lies
117 // between the leading double-quote and next one, or a terminal null
118 // character is simply accepted. Fancier handling is not required
119 // because the program name must be a legal NTFS/HPFS file name.
120 // Note that the double-quote characters are not copied, nor do they
121 // contribute to character_count.
122 bool in_quotes = false;
123 do
124 {
125 if (*p == '"')
126 {
127 in_quotes = !in_quotes;
128 c = *p++;
129 continue;
130 }
131
132 ++*character_count;
133 if (args)
134 *args++ = *p;
135
136 c = *p++;
137
138 if (should_copy_another_character(c))
139 {
140 ++*character_count;
141 if (args)
142 *args++ = *p; // Copy 2nd byte too
143 ++p; // skip over trail byte
144 }
145 }
146 while (c != '\0' && (in_quotes || (c != ' ' && c != '\t')));
147
148 if (c == '\0')
149 {
150 p--;
151 }
152 else
153 {
154 if (args)
155 *(args - 1) = '\0';
156 }
157
158 in_quotes = false;
159
160 // Loop on each argument
161 for (;;)
162 {
163 if (*p)
164 {
165 while (*p == ' ' || *p == '\t')
166 ++p;
167 }
168
169 if (*p == '\0')
170 break; // End of arguments
171
172 // Scan an argument:
173 if (argv)
174 *argv++ = args;
175
176 ++*argument_count;
177
178 // Loop through scanning one argument:
179 for (;;)
180 {
181 copy_character = 1;
182
183 // Rules:
184 // 2N backslashes + " ==> N backslashes and begin/end quote
185 // 2N + 1 backslashes + " ==> N backslashes + literal "
186 // N backslashes ==> N backslashes
187 numslash = 0;
188
189 while (*p == '\\')
190 {
191 // Count number of backslashes for use below
192 ++p;
193 ++numslash;
194 }
195
196 if (*p == '"')
197 {
198 // if 2N backslashes before, start/end quote, otherwise
199 // copy literally:
200 if (numslash % 2 == 0)
201 {
202 if (in_quotes && p[1] == '"')
203 {
204 p++; // Double quote inside quoted string
205 }
206 else
207 {
208 // Skip first quote char and copy second:
209 copy_character = 0; // Don't copy quote
210 in_quotes = !in_quotes;
211 }
212 }
213
214 numslash /= 2;
215 }
216
217 // Copy slashes:
218 while (numslash--)
219 {
220 if (args)
221 *args++ = '\\';
222 ++*character_count;
223 }
224
225 // If at end of arg, break loop:
226 if (*p == '\0' || (!in_quotes && (*p == ' ' || *p == '\t')))
227 break;
228
229 // Copy character into argument:
230 if (copy_character)
231 {
232 if (args)
233 *args++ = *p;
234
235 if (should_copy_another_character(*p))
236 {
237 ++p;
238 ++*character_count;
239
240 if (args)
241 *args++ = *p;
242 }
243
244 ++*character_count;
245 }
246
247 ++p;
248 }
249
250 // Null-terminate the argument:
251 if (args)
252 *args++ = '\0'; // Terminate the string
253
254 ++*character_count;
255 }
256
257 // We put one last argument in -- a null pointer:
258 if (argv)
259 *argv++ = nullptr;
260
261 ++*argument_count;
262 }
263
264
265
__acrt_allocate_buffer_for_argv(size_t const argument_count,size_t const character_count,size_t const character_size)266 extern "C" unsigned char* __cdecl __acrt_allocate_buffer_for_argv(
267 size_t const argument_count,
268 size_t const character_count,
269 size_t const character_size
270 )
271 {
272 if (argument_count >= SIZE_MAX / sizeof(void*))
273 return nullptr;
274
275 if (character_count >= SIZE_MAX / character_size)
276 return nullptr;
277
278 size_t const argument_array_size = argument_count * sizeof(void*);
279 size_t const character_array_size = character_count * character_size;
280
281 if (SIZE_MAX - argument_array_size <= character_array_size)
282 return nullptr;
283
284 size_t const total_size = argument_array_size + character_array_size;
285 __crt_unique_heap_ptr<unsigned char> buffer(_calloc_crt_t(unsigned char, total_size));
286 if (!buffer)
287 return nullptr;
288
289 return buffer.detach();
290 }
291
292
293
294 /***
295 *_setargv, __setargv - set up "argc" and "argv" for C programs
296 *
297 *Purpose:
298 * Read the command line and create the argv array for C
299 * programs.
300 *
301 *Entry:
302 * Arguments are retrieved from the program command line,
303 * pointed to by _acmdln.
304 *
305 *Exit:
306 * Returns 0 if successful, -1 if memory allocation failed.
307 * "argv" points to a null-terminated list of pointers to ASCIZ
308 * strings, each of which is an argument from the command line.
309 * "argc" is the number of arguments. The strings are copied from
310 * the environment segment into space allocated on the heap/stack.
311 * The list of pointers is also located on the heap or stack.
312 * _pgmptr points to the program name.
313 *
314 *Exceptions:
315 * Terminates with out of memory error if no memory to allocate.
316 *
317 *******************************************************************************/
318 template <typename Character>
common_configure_argv(_crt_argv_mode const mode)319 static errno_t __cdecl common_configure_argv(_crt_argv_mode const mode) throw()
320 {
321 typedef __crt_char_traits<Character> traits;
322
323 if (mode == _crt_argv_no_arguments)
324 {
325 return 0;
326 }
327
328 _VALIDATE_RETURN_ERRCODE(
329 mode == _crt_argv_expanded_arguments ||
330 mode == _crt_argv_unexpanded_arguments, EINVAL);
331
332 do_locale_initialization(Character());
333
334
335 static Character program_name[MAX_PATH + 1];
336 traits::get_module_file_name(nullptr, program_name, MAX_PATH);
337 traits::set_program_name(&program_name[0]);
338
339 // If there's no command line at all, then use the program name as the
340 // command line to parse, so that argv[0] is initialized with the program
341 // name. (This won't happen when the program is run by cmd.exe, but it
342 // could happen if the program is spawned via some other means.)
343 Character* const raw_command_line = get_command_line(Character());
344 Character* const command_line = raw_command_line == nullptr || raw_command_line[0] == '\0'
345 ? program_name
346 : raw_command_line;
347
348 size_t argument_count = 0;
349 size_t character_count = 0;
350 parse_command_line(
351 command_line,
352 static_cast<Character**>(nullptr),
353 static_cast<Character*>(nullptr),
354 &argument_count,
355 &character_count);
356
357 __crt_unique_heap_ptr<unsigned char> buffer(__acrt_allocate_buffer_for_argv(
358 argument_count,
359 character_count,
360 sizeof(Character)));
361
362 _VALIDATE_RETURN_ERRCODE_NOEXC(buffer, ENOMEM);
363
364 Character** const first_argument = reinterpret_cast<Character**>(buffer.get());
365 Character* const first_string = reinterpret_cast<Character*>(buffer.get() + argument_count * sizeof(Character*));
366
367 parse_command_line(command_line, first_argument, first_string, &argument_count, &character_count);
368
369 // If we are not expanding wildcards, then we are done...
370 if (mode == _crt_argv_unexpanded_arguments)
371 {
372 __argc = static_cast<int>(argument_count - 1);
373 get_argv(Character()) = reinterpret_cast<Character**>(buffer.detach());
374 return 0;
375 }
376
377 // ... otherwise, we try to do the wildcard expansion:
378 __crt_unique_heap_ptr<Character*> expanded_argv;
379 errno_t const argv_expansion_status = expand_argv_wildcards(first_argument, expanded_argv.get_address_of());
380 if (argv_expansion_status != 0)
381 return argv_expansion_status;
382
383 __argc = [&]()
384 {
385 size_t n = 0;
386 for (auto it = expanded_argv.get(); *it; ++it, ++n) { }
387 return static_cast<int>(n);
388 }();
389
390 get_argv(Character()) = expanded_argv.detach();
391 return 0;
392 }
393
394
395
_configure_narrow_argv(_crt_argv_mode const mode)396 extern "C" errno_t __cdecl _configure_narrow_argv(_crt_argv_mode const mode)
397 {
398 return common_configure_argv<char>(mode);
399 }
400
_configure_wide_argv(_crt_argv_mode const mode)401 extern "C" errno_t __cdecl _configure_wide_argv(_crt_argv_mode const mode)
402 {
403 return common_configure_argv<wchar_t>(mode);
404 }
405