1 /* buildcmd.c -- build command lines from a list of arguments.
2    Copyright (C) 1990-2021 Free Software Foundation, Inc.
3 
4    This program is free software: you can redistribute it and/or modify
5    it under the terms of the GNU General Public License as published by
6    the Free Software Foundation, either version 3 of the License, or
7    (at your option) any later version.
8 
9    This program is distributed in the hope that it will be useful,
10    but WITHOUT ANY WARRANTY; without even the implied warranty of
11    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12    GNU General Public License for more details.
13 
14    You should have received a copy of the GNU General Public License
15    along with this program.  If not, see <https://www.gnu.org/licenses/>.
16 */
17 /* config.h must be included first. */
18 #include <config.h>
19 
20 /* system headers. */
21 #include <assert.h>
22 #include <errno.h>
23 #include <limits.h>
24 #include <stdbool.h>
25 #include <stdlib.h>
26 #include <string.h>
27 #ifndef _POSIX_SOURCE
28 # include <sys/param.h>
29 #endif
30 #include <unistd.h>
31 #include <wchar.h>
32 #include <xalloc.h>
33 
34 /* gnulib headers. */
35 #include "xstrtol.h"
36 
37 /* find headers. */
38 #include "system.h"
39 #include "buildcmd.h"
40 #include "die.h"
41 
42 
43 /* COMPAT:  SYSV version defaults size (and has a max value of) to 470.
44    We try to make it as large as possible.  See bc_get_arg_max() below. */
45 #if defined NCARGS && !defined ARG_MAX
46 /* We include sys/param.h in order to detect this case. */
47 # error "You have an unusual system.  Once you remove this error message from buildcmd.c, it should work, but please make sure that DejaGnu is installed on your system and that 'make check' passes before using the findutils programs.  Please mail bug-findutils@gnu.org to tell us about your system."
48 # define ARG_MAX NCARGS
49 #endif
50 
51 
52 static const char *special_terminating_arg = "do_not_care";
53 
54 
55 
56 /* Add a terminator to the argument list. */
57 static void
bc_args_complete(struct buildcmd_control * ctl,struct buildcmd_state * state)58 bc_args_complete (struct buildcmd_control *ctl,
59 		  struct buildcmd_state *state)
60 {
61   bc_push_arg (ctl, state, special_terminating_arg, 0, NULL, 0, 0);
62 }
63 
64 
65 /* Replace all instances of `replace_pat' in ARG with `linebuf',
66    and add the resulting string to the list of arguments for the command
67    to execute.
68    ARGLEN is the length of ARG, not including the null.
69    LBLEN is the length of LINEBUF, not including the null.
70    PFXLEN is the length of PREFIX.  Substitution is not performed on
71    the prefix.   The prefix is used if the argument contains replace_pat.
72 
73    COMPAT: insertions on the SYSV version are limited to 255 chars per line,
74    and a max of 5 occurrences of replace_pat in the initial-arguments.
75    Those restrictions do not exist here.  */
76 
77 void
bc_do_insert(struct buildcmd_control * ctl,struct buildcmd_state * state,char * arg,size_t arglen,const char * prefix,size_t pfxlen,const char * linebuf,size_t lblen,int initial_args)78 bc_do_insert (struct buildcmd_control *ctl,
79               struct buildcmd_state *state,
80               char *arg, size_t arglen,
81               const char *prefix, size_t pfxlen,
82               const char *linebuf, size_t lblen,
83               int initial_args)
84 {
85   /* Temporary copy of each arg with the replace pattern replaced by the
86      real arg.  */
87   static char *insertbuf;
88   char *p;
89   size_t bytes_left = ctl->arg_max - 1;    /* Bytes left on the command line.  */
90 
91   /* XXX: on systems lacking an upper limit for exec args, ctl->arg_max
92    *      may have been set to LONG_MAX (see bc_get_arg_max()).  Hence
93    *      this xmalloc call may be a bad idea, especially since we are
94    *      adding 1 to it...
95    */
96   if (!insertbuf)
97     insertbuf = xmalloc (ctl->arg_max + 1);
98   p = insertbuf;
99 
100   do
101     {
102       size_t len;               /* Length in ARG before `replace_pat'.  */
103       char *s = mbsstr (arg, ctl->replace_pat);
104       if (s)
105         {
106           len = s - arg;
107         }
108       else
109         {
110           len = arglen;
111         }
112 
113       if (bytes_left <= len)
114         break;
115       else
116 	bytes_left -= len;
117 
118       strncpy (p, arg, len);
119       p += len;
120       arg += len;
121       arglen -= len;
122 
123       if (s)
124         {
125 	  if (bytes_left <= (lblen + pfxlen))
126 	    break;
127 	  else
128 	    bytes_left -= (lblen + pfxlen);
129 
130 	  if (prefix)
131 	    {
132 	      strcpy (p, prefix);
133 	      p += pfxlen;
134 	    }
135           strcpy (p, linebuf);
136           p += lblen;
137 
138           arg += ctl->rplen;
139           arglen -= ctl->rplen;
140         }
141     }
142   while (*arg);
143   if (*arg)
144     die (EXIT_FAILURE, 0, _("command too long"));
145   *p++ = '\0';
146 
147   bc_push_arg (ctl, state,
148 	       insertbuf, p - insertbuf,
149                NULL, 0,
150                initial_args);
151 }
152 
153 
154 /* Update our best guess as to how many arguments we should pass to the next
155  * invocation of the command.
156  */
157 static size_t
update_limit(struct buildcmd_control * ctl,struct buildcmd_state * state,bool success,size_t limit)158 update_limit (struct buildcmd_control *ctl,
159 	      struct buildcmd_state *state,
160 	      bool success,
161 	      size_t limit)
162 {
163   if (success)
164     {
165       if (limit > state->largest_successful_arg_count)
166 	state->largest_successful_arg_count = limit;
167     }
168   else
169     {
170       if (limit < state->smallest_failed_arg_count
171 	  || (0 == state->smallest_failed_arg_count))
172 	state->smallest_failed_arg_count = limit;
173     }
174 
175   if (0 == (state->largest_successful_arg_count)
176       || (state->smallest_failed_arg_count <= state->largest_successful_arg_count))
177     {
178       /* No success yet, or running on a system which has
179 	 limits on total argv length, but not arg count. */
180       if (success)
181 	{
182 	  if (limit < SIZE_MAX)
183 	    ++limit;
184 	}
185       else
186 	{
187 	  limit /= 2;
188 	}
189     }
190   else  /* We can use bisection. */
191     {
192       const size_t shift = (state->smallest_failed_arg_count
193 			  - state->largest_successful_arg_count) / 2;
194       if (success)
195 	{
196 	  if (shift)
197 	    limit += shift;
198 	  else
199 	    ++limit;
200 	}
201       else
202 	{
203 	  if (shift)
204 	    limit -= shift;
205 	  else
206 	    --limit;
207 	}
208     }
209 
210   /* Make sure the returned value is such that progress is
211    * actually possible.
212    */
213   if (ctl->initial_argc && (limit <= ctl->initial_argc + 1u))
214     limit = ctl->initial_argc + 1u;
215   if (0 == limit)
216     limit = 1u;
217 
218   return limit;
219 }
220 
221 
222 /* Copy some of the program arguments into an argv list.   Copy all the
223  * initial arguments, plus up to LIMIT additional arguments.
224  */
225 static size_t
copy_args(struct buildcmd_control * ctl,struct buildcmd_state * state,char ** working_args,size_t limit,size_t done)226 copy_args (struct buildcmd_control *ctl,
227 	   struct buildcmd_state *state,
228 	   char** working_args, size_t limit, size_t done)
229 {
230   size_t dst_pos = 0;
231   size_t src_pos = 0;
232 
233   while (src_pos < ctl->initial_argc)
234     {
235       working_args[dst_pos++] = state->cmd_argv[src_pos++];
236     }
237   src_pos += done;
238   while (src_pos < state->cmd_argc && dst_pos < limit)
239     {
240       working_args[dst_pos++] = state->cmd_argv[src_pos++];
241     }
242   assert (dst_pos >= ctl->initial_argc);
243   working_args[dst_pos] = NULL;
244   return dst_pos;
245 }
246 
247 
248 
249 
250 /* Execute the program with the currently-built list of arguments. */
251 void
bc_do_exec(struct buildcmd_control * ctl,struct buildcmd_state * state)252 bc_do_exec (struct buildcmd_control *ctl,
253 	    struct buildcmd_state *state)
254 {
255     char** working_args;
256     size_t limit, done;
257 
258     /* Terminate the args. */
259     bc_args_complete (ctl, state);
260     /* Verify that the argument list is terminated. */
261     assert (state->cmd_argc > 0);
262     assert (state->cmd_argv[state->cmd_argc-1] == NULL);
263 
264     working_args = xmalloc ((1+state->cmd_argc) * sizeof (char*));
265     done = 0;
266     limit = state->cmd_argc;
267 
268     do
269       {
270 	const size_t dst_pos = copy_args (ctl, state, working_args,
271 					  limit, done);
272 	if (ctl->exec_callback (ctl, state->usercontext, dst_pos, working_args))
273 	  {
274 	    limit = update_limit (ctl, state, true, limit);
275 	    done += (dst_pos - ctl->initial_argc);
276 	  }
277 	else  /* got E2BIG, adjust arguments */
278 	  {
279 	    if (limit <= ctl->initial_argc + 1)
280 	      {
281 		/* No room to reduce the length of the argument list.
282 		   Issue an error message and give up. */
283 		die (EXIT_FAILURE, 0,
284 		     _("can't call exec() due to argument size restrictions"));
285 	      }
286 	    else
287 	      {
288 		/* Try fewer arguments. */
289 		limit = update_limit (ctl, state, false, limit);
290 	      }
291 	  }
292       }
293     while ((done + 1) < (state->cmd_argc - ctl->initial_argc));
294     /* (state->cmd_argc - ctl->initial_argc) includes the terminating NULL,
295      * which is why we add 1 to done in the test above. */
296 
297     free (working_args);
298     bc_clear_args (ctl, state);
299 }
300 
301 
302 /* Return nonzero if there would not be enough room for an additional
303  * argument.  We check the total number of arguments only, not the space
304  * occupied by those arguments.
305  *
306  * If we return zero, there still may not be enough room for the next
307  * argument, depending on its length.
308  */
309 static int
bc_argc_limit_reached(int initial_args,const struct buildcmd_control * ctl,struct buildcmd_state * state)310 bc_argc_limit_reached (int initial_args,
311 		       const struct buildcmd_control *ctl,
312 		       struct buildcmd_state *state)
313 {
314   /* Check to see if we about to exceed a limit set by xargs' -n option */
315   if (!initial_args && ctl->args_per_exec &&
316       ( (state->cmd_argc - ctl->initial_argc) == ctl->args_per_exec))
317     return 1;
318 
319   /* We deliberately use an equality test here rather than >= in order
320    * to force a software failure if the code is modified in such a way
321    * that it fails to call this function for every new argument.
322    */
323   return state->cmd_argc == ctl->max_arg_count;
324 }
325 
326 
327 /* Add ARG to the end of the list of arguments `cmd_argv' to pass
328    to the command.
329    LEN is the length of ARG, including the terminating null.
330    If this brings the list up to its maximum size, execute the command.
331 */
332 void
bc_push_arg(struct buildcmd_control * ctl,struct buildcmd_state * state,const char * arg,size_t len,const char * prefix,size_t pfxlen,int initial_args)333 bc_push_arg (struct buildcmd_control *ctl,
334              struct buildcmd_state *state,
335              const char *arg, size_t len,
336              const char *prefix, size_t pfxlen,
337              int initial_args)
338 {
339   const int terminate = (arg == special_terminating_arg);
340 
341   assert (arg != NULL);
342 
343   if (!terminate)
344     {
345       if (state->cmd_argv_chars + len + pfxlen > ctl->arg_max)
346         {
347           if (initial_args || state->cmd_argc == ctl->initial_argc)
348             die (EXIT_FAILURE, 0,
349 		 _("cannot fit single argument within argument list size limit"));
350 
351           /* xargs option -i (replace_pat) implies -x (exit_if_size_exceeded) */
352           if (ctl->replace_pat
353               || (ctl->exit_if_size_exceeded &&
354                   (ctl->lines_per_exec || ctl->args_per_exec)))
355             die (EXIT_FAILURE, 0, _("argument list too long"));
356           bc_do_exec (ctl, state);
357         }
358       if (bc_argc_limit_reached (initial_args, ctl, state))
359             bc_do_exec (ctl, state);
360     }
361 
362   if (!initial_args)
363     {
364       state->todo = 1;
365     }
366 
367   if (state->cmd_argc >= state->cmd_argv_alloc)
368     {
369       /* XXX: we could use extendbuf() here. */
370       if (!state->cmd_argv)
371         {
372           state->cmd_argv_alloc = 64;
373           state->cmd_argv = xmalloc (sizeof (char *) * state->cmd_argv_alloc);
374         }
375       else
376         {
377           state->cmd_argv_alloc *= 2;
378           state->cmd_argv = xrealloc (state->cmd_argv,
379 				      sizeof (char *) * state->cmd_argv_alloc);
380         }
381     }
382 
383   if (terminate)
384     state->cmd_argv[state->cmd_argc++] = NULL;
385   else
386     {
387       state->cmd_argv[state->cmd_argc++] = state->argbuf + state->cmd_argv_chars;
388       if (prefix)
389         {
390           strcpy (state->argbuf + state->cmd_argv_chars, prefix);
391           state->cmd_argv_chars += pfxlen;
392         }
393 
394       strcpy (state->argbuf + state->cmd_argv_chars, arg);
395       state->cmd_argv_chars += len;
396 
397       /* If we have now collected enough arguments,
398        * do the exec immediately.
399        */
400       if (bc_argc_limit_reached (initial_args, ctl, state))
401 	{
402 	  bc_do_exec (ctl, state);
403 	}
404     }
405 
406   /* If this is an initial argument, set the high-water mark. */
407   if (initial_args)
408     {
409       state->cmd_initial_argv_chars = state->cmd_argv_chars;
410     }
411 }
412 
413 
414 size_t
bc_get_arg_max(void)415 bc_get_arg_max (void)
416 {
417   long val;
418 
419   /* We may resort to using LONG_MAX, so check it fits. */
420   /* XXX: better to do a compile-time check */
421   assert ( (~(size_t)0) >= LONG_MAX);
422 
423 #ifdef _SC_ARG_MAX
424   val = sysconf (_SC_ARG_MAX);
425 #else
426   val = -1;
427 #endif
428 
429   if (val > 0)
430     return val;
431 
432   /* either _SC_ARG_MAX was not available or
433    * there is no particular limit.
434    */
435 #ifdef ARG_MAX
436   val = ARG_MAX;
437   if (val > 0)
438     return val;
439 #endif
440 
441   /* The value returned by this function bounds the
442    * value applied as the ceiling for the -s option.
443    * Hence it the system won't tell us what its limit
444    * is, we allow the user to specify more or less
445    * whatever value they like.
446    */
447   return LONG_MAX;
448 }
449 
450 
451 static int
cb_exec_noop(struct buildcmd_control * ctl,void * usercontext,int argc,char ** argv)452 cb_exec_noop (struct buildcmd_control * ctl,
453 	      void *usercontext,
454 	      int argc,
455 	      char **argv)
456 {
457   /* does nothing. */
458   (void) ctl;
459   (void) usercontext;
460   (void) argc;
461   (void) argv;
462 
463   return 0;
464 }
465 
466 
467 /* Return how much of ARG_MAX is used by the environment.  */
468 size_t
bc_size_of_environment(void)469 bc_size_of_environment (void)
470 {
471   size_t len = 0u;
472   char **envp = environ;
473 
474   while (*envp)
475     len += strlen (*envp++) + 1;
476 
477   return len;
478 }
479 
480 
481 enum BC_INIT_STATUS
bc_init_controlinfo(struct buildcmd_control * ctl,size_t headroom)482 bc_init_controlinfo (struct buildcmd_control *ctl,
483 		     size_t headroom)
484 {
485   size_t size_of_environment = bc_size_of_environment ();
486 
487   /* POSIX requires that _POSIX_ARG_MAX is 4096.  That is the lowest
488    * possible value for ARG_MAX on a POSIX compliant system.  See
489    * https://www.opengroup.org/onlinepubs/009695399/basedefs/limits.h.html
490    */
491   ctl->posix_arg_size_min = _POSIX_ARG_MAX;
492   ctl->posix_arg_size_max = bc_get_arg_max ();
493 
494   ctl->exit_if_size_exceeded = 0;
495 
496   /* Take the size of the environment into account.  */
497   if (size_of_environment > ctl->posix_arg_size_max)
498     {
499       return BC_INIT_ENV_TOO_BIG;
500     }
501   else if ((headroom + size_of_environment) >= ctl->posix_arg_size_max)
502     {
503       /* POSIX.2 requires xargs to subtract 2048, but ARG_MAX is
504        * guaranteed to be at least 4096.  Although xargs could use an
505        * assertion here, we use a runtime check which returns an error
506        * code, because our caller may not be xargs.
507        */
508       return BC_INIT_CANNOT_ACCOMODATE_HEADROOM;
509     }
510   else
511     {
512       ctl->posix_arg_size_max -= size_of_environment;
513       ctl->posix_arg_size_max -= headroom;
514     }
515 
516   /* need to subtract 2 on the following line - for Linux/PPC */
517   ctl->max_arg_count = (ctl->posix_arg_size_max / sizeof (char*)) - 2u;
518   assert (ctl->max_arg_count > 0);
519   ctl->rplen = 0u;
520   ctl->replace_pat = NULL;
521   ctl->initial_argc = 0;
522   ctl->exec_callback = cb_exec_noop;
523   ctl->lines_per_exec = 0;
524   ctl->args_per_exec = 0;
525 
526   /* Set the initial value of arg_max to the largest value we can
527    * tolerate.
528    */
529   ctl->arg_max = ctl->posix_arg_size_max;
530 
531   return BC_INIT_OK;
532 }
533 
534 void
bc_use_sensible_arg_max(struct buildcmd_control * ctl)535 bc_use_sensible_arg_max (struct buildcmd_control *ctl)
536 {
537 #ifdef DEFAULT_ARG_SIZE
538   enum { arg_size = DEFAULT_ARG_SIZE };
539 #else
540   enum { arg_size = (128u * 1024u) };
541 #endif
542 
543   /* Check against the upper and lower limits. */
544   if (arg_size > ctl->posix_arg_size_max)
545     ctl->arg_max = ctl->posix_arg_size_max;
546   else if (arg_size < ctl->posix_arg_size_min)
547     ctl->arg_max = ctl->posix_arg_size_min;
548   else
549     ctl->arg_max = arg_size;
550 }
551 
552 
553 
554 
555 void
bc_init_state(const struct buildcmd_control * ctl,struct buildcmd_state * state,void * context)556 bc_init_state (const struct buildcmd_control *ctl,
557 	       struct buildcmd_state *state,
558 	       void *context)
559 {
560   state->cmd_argc = 0;
561   state->cmd_argv_chars = 0;
562   state->cmd_argv = NULL;
563   state->cmd_argv_alloc = 0;
564   state->largest_successful_arg_count = 0;
565   state->smallest_failed_arg_count = 0;
566 
567   /* XXX: the following memory allocation is inadvisable on systems
568    * with no ARG_MAX, because ctl->arg_max may actually be close to
569    * LONG_MAX.   Adding one to it is safe though because earlier we
570    * subtracted 2048.
571    */
572   assert (ctl->arg_max <= (LONG_MAX - 2048L));
573   state->argbuf = xmalloc (ctl->arg_max + 1u);
574 
575   state->cmd_argv_chars = state->cmd_initial_argv_chars = 0;
576   state->todo = 0;
577   state->dir_fd = -1;
578   state->usercontext = context;
579 }
580 
581 void
bc_clear_args(const struct buildcmd_control * ctl,struct buildcmd_state * state)582 bc_clear_args (const struct buildcmd_control *ctl,
583 	       struct buildcmd_state *state)
584 {
585   state->cmd_argc = ctl->initial_argc;
586   state->cmd_argv_chars = state->cmd_initial_argv_chars;
587   state->todo = 0;
588   state->dir_fd = -1;
589 }
590 
591 
592 /* Return nonzero if the value stored in the environment variable ENV_VAR_NAME
593  * exceeds QUANTITY.
594  */
595 static int
exceeds(const char * env_var_name,size_t quantity)596 exceeds (const char *env_var_name, size_t quantity)
597 {
598   const char *val = getenv (env_var_name);
599   if (val)
600     {
601       char *tmp;
602       unsigned long limit;
603 
604       if (xstrtoul (val, &tmp, 10, &limit, NULL) == LONGINT_OK)
605 	{
606 	  if (quantity > limit)
607 	    return 1;
608 	}
609       else
610 	{
611 	  die (EXIT_FAILURE, errno,
612 	       _("Environment variable %s is not set to a "
613 		 "valid decimal number"),
614 	       env_var_name);
615 	  return 0;
616 	}
617     }
618   return 0;
619 }
620 
621 /* Return nonzero if the indicated argument list exceeds a testing limit.
622  * NOTE: argv could be declared 'const char *const *argv', but it works as
623  * expected only with C++ compilers <http://c-faq.com/ansi/constmismatch.html>.
624  */
625 bool
bc_args_exceed_testing_limit(char ** argv)626 bc_args_exceed_testing_limit (char **argv)
627 {
628   size_t chars, args;
629 
630   for (chars=args=0; *argv; ++argv)
631     {
632       ++args;
633       chars += strlen(*argv);
634     }
635 
636   return (exceeds ("__GNU_FINDUTILS_EXEC_ARG_COUNT_LIMIT", args) ||
637 	  exceeds ("__GNU_FINDUTILS_EXEC_ARG_LENGTH_LIMIT", chars));
638 }
639