xref: /dragonfly/contrib/cvs-1.12/lib/quotearg.c (revision d4ef6694)
1 /* quotearg.c - quote arguments for output
2 
3    Copyright (C) 1998, 1999, 2000, 2001, 2002, 2004, 2005 Free Software
4    Foundation, Inc.
5 
6    This program is free software; you can redistribute it and/or modify
7    it under the terms of the GNU General Public License as published by
8    the Free Software Foundation; either version 2, or (at your option)
9    any later version.
10 
11    This program is distributed in the hope that it will be useful,
12    but WITHOUT ANY WARRANTY; without even the implied warranty of
13    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14    GNU General Public License for more details.
15 
16    You should have received a copy of the GNU General Public License
17    along with this program; if not, write to the Free Software Foundation,
18    Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.  */
19 
20 /* Written by Paul Eggert <eggert@twinsun.com> */
21 
22 #ifdef HAVE_CONFIG_H
23 # include <config.h>
24 #endif
25 
26 #include "quotearg.h"
27 
28 #include "xalloc.h"
29 
30 #include <ctype.h>
31 #include <errno.h>
32 #include <limits.h>
33 #include <stdbool.h>
34 #include <stdlib.h>
35 #include <string.h>
36 
37 #include "gettext.h"
38 #define _(msgid) gettext (msgid)
39 #define N_(msgid) msgid
40 
41 #if HAVE_WCHAR_H
42 
43 /* BSD/OS 4.1 wchar.h requires FILE and struct tm to be declared.  */
44 # include <stdio.h>
45 # include <time.h>
46 
47 # include <wchar.h>
48 #endif
49 
50 #if !HAVE_MBRTOWC
51 /* Disable multibyte processing entirely.  Since MB_CUR_MAX is 1, the
52    other macros are defined only for documentation and to satisfy C
53    syntax.  */
54 # undef MB_CUR_MAX
55 # define MB_CUR_MAX 1
56 # define mbrtowc(pwc, s, n, ps) ((*(pwc) = *(s)) != 0)
57 # define iswprint(wc) isprint ((unsigned char) (wc))
58 # undef HAVE_MBSINIT
59 #endif
60 
61 #if !defined mbsinit && !HAVE_MBSINIT
62 # define mbsinit(ps) 1
63 #endif
64 
65 #ifndef iswprint
66 # if HAVE_WCTYPE_H
67 #  include <wctype.h>
68 # endif
69 # if !defined iswprint && !HAVE_ISWPRINT
70 #  define iswprint(wc) 1
71 # endif
72 #endif
73 
74 #ifndef SIZE_MAX
75 # define SIZE_MAX ((size_t) -1)
76 #endif
77 
78 #define INT_BITS (sizeof (int) * CHAR_BIT)
79 
80 struct quoting_options
81 {
82   /* Basic quoting style.  */
83   enum quoting_style style;
84 
85   /* Quote the characters indicated by this bit vector even if the
86      quoting style would not normally require them to be quoted.  */
87   unsigned int quote_these_too[(UCHAR_MAX / INT_BITS) + 1];
88 };
89 
90 /* Names of quoting styles.  */
91 char const *const quoting_style_args[] =
92 {
93   "literal",
94   "shell",
95   "shell-always",
96   "c",
97   "escape",
98   "locale",
99   "clocale",
100   0
101 };
102 
103 /* Correspondences to quoting style names.  */
104 enum quoting_style const quoting_style_vals[] =
105 {
106   literal_quoting_style,
107   shell_quoting_style,
108   shell_always_quoting_style,
109   c_quoting_style,
110   escape_quoting_style,
111   locale_quoting_style,
112   clocale_quoting_style
113 };
114 
115 /* The default quoting options.  */
116 static struct quoting_options default_quoting_options;
117 
118 /* Allocate a new set of quoting options, with contents initially identical
119    to O if O is not null, or to the default if O is null.
120    It is the caller's responsibility to free the result.  */
121 struct quoting_options *
122 clone_quoting_options (struct quoting_options *o)
123 {
124   int e = errno;
125   struct quoting_options *p = xmalloc (sizeof *p);
126   *p = *(o ? o : &default_quoting_options);
127   errno = e;
128   return p;
129 }
130 
131 /* Get the value of O's quoting style.  If O is null, use the default.  */
132 enum quoting_style
133 get_quoting_style (struct quoting_options *o)
134 {
135   return (o ? o : &default_quoting_options)->style;
136 }
137 
138 /* In O (or in the default if O is null),
139    set the value of the quoting style to S.  */
140 void
141 set_quoting_style (struct quoting_options *o, enum quoting_style s)
142 {
143   (o ? o : &default_quoting_options)->style = s;
144 }
145 
146 /* In O (or in the default if O is null),
147    set the value of the quoting options for character C to I.
148    Return the old value.  Currently, the only values defined for I are
149    0 (the default) and 1 (which means to quote the character even if
150    it would not otherwise be quoted).  */
151 int
152 set_char_quoting (struct quoting_options *o, char c, int i)
153 {
154   unsigned char uc = c;
155   unsigned int *p =
156     (o ? o : &default_quoting_options)->quote_these_too + uc / INT_BITS;
157   int shift = uc % INT_BITS;
158   int r = (*p >> shift) & 1;
159   *p ^= ((i & 1) ^ r) << shift;
160   return r;
161 }
162 
163 /* MSGID approximates a quotation mark.  Return its translation if it
164    has one; otherwise, return either it or "\"", depending on S.  */
165 static char const *
166 gettext_quote (char const *msgid, enum quoting_style s)
167 {
168   char const *translation = _(msgid);
169   if (translation == msgid && s == clocale_quoting_style)
170     translation = "\"";
171   return translation;
172 }
173 
174 /* Place into buffer BUFFER (of size BUFFERSIZE) a quoted version of
175    argument ARG (of size ARGSIZE), using QUOTING_STYLE and the
176    non-quoting-style part of O to control quoting.
177    Terminate the output with a null character, and return the written
178    size of the output, not counting the terminating null.
179    If BUFFERSIZE is too small to store the output string, return the
180    value that would have been returned had BUFFERSIZE been large enough.
181    If ARGSIZE is SIZE_MAX, use the string length of the argument for ARGSIZE.
182 
183    This function acts like quotearg_buffer (BUFFER, BUFFERSIZE, ARG,
184    ARGSIZE, O), except it uses QUOTING_STYLE instead of the quoting
185    style specified by O, and O may not be null.  */
186 
187 static size_t
188 quotearg_buffer_restyled (char *buffer, size_t buffersize,
189 			  char const *arg, size_t argsize,
190 			  enum quoting_style quoting_style,
191 			  struct quoting_options const *o)
192 {
193   size_t i;
194   size_t len = 0;
195   char const *quote_string = 0;
196   size_t quote_string_len = 0;
197   bool backslash_escapes = false;
198   bool unibyte_locale = MB_CUR_MAX == 1;
199 
200 #define STORE(c) \
201     do \
202       { \
203 	if (len < buffersize) \
204 	  buffer[len] = (c); \
205 	len++; \
206       } \
207     while (0)
208 
209   switch (quoting_style)
210     {
211     case c_quoting_style:
212       STORE ('"');
213       backslash_escapes = true;
214       quote_string = "\"";
215       quote_string_len = 1;
216       break;
217 
218     case escape_quoting_style:
219       backslash_escapes = true;
220       break;
221 
222     case locale_quoting_style:
223     case clocale_quoting_style:
224       {
225 	/* TRANSLATORS:
226 	   Get translations for open and closing quotation marks.
227 
228 	   The message catalog should translate "`" to a left
229 	   quotation mark suitable for the locale, and similarly for
230 	   "'".  If the catalog has no translation,
231 	   locale_quoting_style quotes `like this', and
232 	   clocale_quoting_style quotes "like this".
233 
234 	   For example, an American English Unicode locale should
235 	   translate "`" to U+201C (LEFT DOUBLE QUOTATION MARK), and
236 	   should translate "'" to U+201D (RIGHT DOUBLE QUOTATION
237 	   MARK).  A British English Unicode locale should instead
238 	   translate these to U+2018 (LEFT SINGLE QUOTATION MARK) and
239 	   U+2019 (RIGHT SINGLE QUOTATION MARK), respectively.
240 
241 	   If you don't know what to put here, please see
242 	   <http://en.wikipedia.org/wiki/Quotation_mark#Glyphs>
243 	   and use glyphs suitable for your language.  */
244 
245 	char const *left = gettext_quote (N_("`"), quoting_style);
246 	char const *right = gettext_quote (N_("'"), quoting_style);
247 	for (quote_string = left; *quote_string; quote_string++)
248 	  STORE (*quote_string);
249 	backslash_escapes = true;
250 	quote_string = right;
251 	quote_string_len = strlen (quote_string);
252       }
253       break;
254 
255     case shell_always_quoting_style:
256       STORE ('\'');
257       quote_string = "'";
258       quote_string_len = 1;
259       break;
260 
261     default:
262       break;
263     }
264 
265   for (i = 0;  ! (argsize == SIZE_MAX ? arg[i] == '\0' : i == argsize);  i++)
266     {
267       unsigned char c;
268       unsigned char esc;
269 
270       if (backslash_escapes
271 	  && quote_string_len
272 	  && i + quote_string_len <= argsize
273 	  && memcmp (arg + i, quote_string, quote_string_len) == 0)
274 	STORE ('\\');
275 
276       c = arg[i];
277       switch (c)
278 	{
279 	case '\0':
280 	  if (backslash_escapes)
281 	    {
282 	      STORE ('\\');
283 	      STORE ('0');
284 	      STORE ('0');
285 	      c = '0';
286 	    }
287 	  break;
288 
289 	case '?':
290 	  switch (quoting_style)
291 	    {
292 	    case shell_quoting_style:
293 	      goto use_shell_always_quoting_style;
294 
295 	    case c_quoting_style:
296 	      if (i + 2 < argsize && arg[i + 1] == '?')
297 		switch (arg[i + 2])
298 		  {
299 		  case '!': case '\'':
300 		  case '(': case ')': case '-': case '/':
301 		  case '<': case '=': case '>':
302 		    /* Escape the second '?' in what would otherwise be
303 		       a trigraph.  */
304 		    c = arg[i + 2];
305 		    i += 2;
306 		    STORE ('?');
307 		    STORE ('\\');
308 		    STORE ('?');
309 		    break;
310 		  }
311 	      break;
312 
313 	    default:
314 	      break;
315 	    }
316 	  break;
317 
318 	case '\a': esc = 'a'; goto c_escape;
319 	case '\b': esc = 'b'; goto c_escape;
320 	case '\f': esc = 'f'; goto c_escape;
321 	case '\n': esc = 'n'; goto c_and_shell_escape;
322 	case '\r': esc = 'r'; goto c_and_shell_escape;
323 	case '\t': esc = 't'; goto c_and_shell_escape;
324 	case '\v': esc = 'v'; goto c_escape;
325 	case '\\': esc = c; goto c_and_shell_escape;
326 
327 	c_and_shell_escape:
328 	  if (quoting_style == shell_quoting_style)
329 	    goto use_shell_always_quoting_style;
330 	c_escape:
331 	  if (backslash_escapes)
332 	    {
333 	      c = esc;
334 	      goto store_escape;
335 	    }
336 	  break;
337 
338 	case '{': case '}': /* sometimes special if isolated */
339 	  if (! (argsize == SIZE_MAX ? arg[1] == '\0' : argsize == 1))
340 	    break;
341 	  /* Fall through.  */
342 	case '#': case '~':
343 	  if (i != 0)
344 	    break;
345 	  /* Fall through.  */
346 	case ' ':
347 	case '!': /* special in bash */
348 	case '"': case '$': case '&':
349 	case '(': case ')': case '*': case ';':
350 	case '<':
351 	case '=': /* sometimes special in 0th or (with "set -k") later args */
352 	case '>': case '[':
353 	case '^': /* special in old /bin/sh, e.g. SunOS 4.1.4 */
354 	case '`': case '|':
355 	  /* A shell special character.  In theory, '$' and '`' could
356 	     be the first bytes of multibyte characters, which means
357 	     we should check them with mbrtowc, but in practice this
358 	     doesn't happen so it's not worth worrying about.  */
359 	  if (quoting_style == shell_quoting_style)
360 	    goto use_shell_always_quoting_style;
361 	  break;
362 
363 	case '\'':
364 	  switch (quoting_style)
365 	    {
366 	    case shell_quoting_style:
367 	      goto use_shell_always_quoting_style;
368 
369 	    case shell_always_quoting_style:
370 	      STORE ('\'');
371 	      STORE ('\\');
372 	      STORE ('\'');
373 	      break;
374 
375 	    default:
376 	      break;
377 	    }
378 	  break;
379 
380 	case '%': case '+': case ',': case '-': case '.': case '/':
381 	case '0': case '1': case '2': case '3': case '4': case '5':
382 	case '6': case '7': case '8': case '9': case ':':
383 	case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
384 	case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
385 	case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
386 	case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
387 	case 'Y': case 'Z': case ']': case '_': case 'a': case 'b':
388 	case 'c': case 'd': case 'e': case 'f': case 'g': case 'h':
389 	case 'i': case 'j': case 'k': case 'l': case 'm': case 'n':
390 	case 'o': case 'p': case 'q': case 'r': case 's': case 't':
391 	case 'u': case 'v': case 'w': case 'x': case 'y': case 'z':
392 	  /* These characters don't cause problems, no matter what the
393 	     quoting style is.  They cannot start multibyte sequences.  */
394 	  break;
395 
396 	default:
397 	  /* If we have a multibyte sequence, copy it until we reach
398 	     its end, find an error, or come back to the initial shift
399 	     state.  For C-like styles, if the sequence has
400 	     unprintable characters, escape the whole sequence, since
401 	     we can't easily escape single characters within it.  */
402 	  {
403 	    /* Length of multibyte sequence found so far.  */
404 	    size_t m;
405 
406 	    bool printable;
407 
408 	    if (unibyte_locale)
409 	      {
410 		m = 1;
411 		printable = isprint (c) != 0;
412 	      }
413 	    else
414 	      {
415 		mbstate_t mbstate;
416 		memset (&mbstate, 0, sizeof mbstate);
417 
418 		m = 0;
419 		printable = true;
420 		if (argsize == SIZE_MAX)
421 		  argsize = strlen (arg);
422 
423 		do
424 		  {
425 		    wchar_t w;
426 		    size_t bytes = mbrtowc (&w, &arg[i + m],
427 					    argsize - (i + m), &mbstate);
428 		    if (bytes == 0)
429 		      break;
430 		    else if (bytes == (size_t) -1)
431 		      {
432 			printable = false;
433 			break;
434 		      }
435 		    else if (bytes == (size_t) -2)
436 		      {
437 			printable = false;
438 			while (i + m < argsize && arg[i + m])
439 			  m++;
440 			break;
441 		      }
442 		    else
443 		      {
444 			/* Work around a bug with older shells that "see" a '\'
445 			   that is really the 2nd byte of a multibyte character.
446 			   In practice the problem is limited to ASCII
447 			   chars >= '@' that are shell special chars.  */
448 			if ('[' == 0x5b && quoting_style == shell_quoting_style)
449 			  {
450 			    size_t j;
451 			    for (j = 1; j < bytes; j++)
452 			      switch (arg[i + m + j])
453 				{
454 				case '[': case '\\': case '^':
455 				case '`': case '|':
456 				  goto use_shell_always_quoting_style;
457 				}
458 			  }
459 
460 			if (! iswprint (w))
461 			  printable = false;
462 			m += bytes;
463 		      }
464 		  }
465 		while (! mbsinit (&mbstate));
466 	      }
467 
468 	    if (1 < m || (backslash_escapes && ! printable))
469 	      {
470 		/* Output a multibyte sequence, or an escaped
471 		   unprintable unibyte character.  */
472 		size_t ilim = i + m;
473 
474 		for (;;)
475 		  {
476 		    if (backslash_escapes && ! printable)
477 		      {
478 			STORE ('\\');
479 			STORE ('0' + (c >> 6));
480 			STORE ('0' + ((c >> 3) & 7));
481 			c = '0' + (c & 7);
482 		      }
483 		    if (ilim <= i + 1)
484 		      break;
485 		    STORE (c);
486 		    c = arg[++i];
487 		  }
488 
489 		goto store_c;
490 	      }
491 	  }
492 	}
493 
494       if (! (backslash_escapes
495 	     && o->quote_these_too[c / INT_BITS] & (1 << (c % INT_BITS))))
496 	goto store_c;
497 
498     store_escape:
499       STORE ('\\');
500 
501     store_c:
502       STORE (c);
503     }
504 
505   if (i == 0 && quoting_style == shell_quoting_style)
506     goto use_shell_always_quoting_style;
507 
508   if (quote_string)
509     for (; *quote_string; quote_string++)
510       STORE (*quote_string);
511 
512   if (len < buffersize)
513     buffer[len] = '\0';
514   return len;
515 
516  use_shell_always_quoting_style:
517   return quotearg_buffer_restyled (buffer, buffersize, arg, argsize,
518 				   shell_always_quoting_style, o);
519 }
520 
521 /* Place into buffer BUFFER (of size BUFFERSIZE) a quoted version of
522    argument ARG (of size ARGSIZE), using O to control quoting.
523    If O is null, use the default.
524    Terminate the output with a null character, and return the written
525    size of the output, not counting the terminating null.
526    If BUFFERSIZE is too small to store the output string, return the
527    value that would have been returned had BUFFERSIZE been large enough.
528    If ARGSIZE is SIZE_MAX, use the string length of the argument for
529    ARGSIZE.  */
530 size_t
531 quotearg_buffer (char *buffer, size_t buffersize,
532 		 char const *arg, size_t argsize,
533 		 struct quoting_options const *o)
534 {
535   struct quoting_options const *p = o ? o : &default_quoting_options;
536   int e = errno;
537   size_t r = quotearg_buffer_restyled (buffer, buffersize, arg, argsize,
538 				       p->style, p);
539   errno = e;
540   return r;
541 }
542 
543 /* Like quotearg_buffer (..., ARG, ARGSIZE, O), except return newly
544    allocated storage containing the quoted string.  */
545 char *
546 quotearg_alloc (char const *arg, size_t argsize,
547 		struct quoting_options const *o)
548 {
549   int e = errno;
550   size_t bufsize = quotearg_buffer (0, 0, arg, argsize, o) + 1;
551   char *buf = xmalloc (bufsize);
552   quotearg_buffer (buf, bufsize, arg, argsize, o);
553   errno = e;
554   return buf;
555 }
556 
557 /* Use storage slot N to return a quoted version of argument ARG.
558    ARG is of size ARGSIZE, but if that is SIZE_MAX, ARG is a
559    null-terminated string.
560    OPTIONS specifies the quoting options.
561    The returned value points to static storage that can be
562    reused by the next call to this function with the same value of N.
563    N must be nonnegative.  N is deliberately declared with type "int"
564    to allow for future extensions (using negative values).  */
565 static char *
566 quotearg_n_options (int n, char const *arg, size_t argsize,
567 		    struct quoting_options const *options)
568 {
569   int e = errno;
570 
571   /* Preallocate a slot 0 buffer, so that the caller can always quote
572      one small component of a "memory exhausted" message in slot 0.  */
573   static char slot0[256];
574   static unsigned int nslots = 1;
575   unsigned int n0 = n;
576   struct slotvec
577     {
578       size_t size;
579       char *val;
580     };
581   static struct slotvec slotvec0 = {sizeof slot0, slot0};
582   static struct slotvec *slotvec = &slotvec0;
583 
584   if (n < 0)
585     abort ();
586 
587   if (nslots <= n0)
588     {
589       unsigned int n1 = n0 + 1;
590 
591       if (xalloc_oversized (n1, sizeof *slotvec))
592 	xalloc_die ();
593 
594       if (slotvec == &slotvec0)
595 	{
596 	  slotvec = xmalloc (sizeof *slotvec);
597 	  *slotvec = slotvec0;
598 	}
599       slotvec = xrealloc (slotvec, n1 * sizeof *slotvec);
600       memset (slotvec + nslots, 0, (n1 - nslots) * sizeof *slotvec);
601       nslots = n1;
602     }
603 
604   {
605     size_t size = slotvec[n].size;
606     char *val = slotvec[n].val;
607     size_t qsize = quotearg_buffer (val, size, arg, argsize, options);
608 
609     if (size <= qsize)
610       {
611 	slotvec[n].size = size = qsize + 1;
612 	if (val != slot0)
613 	  free (val);
614 	slotvec[n].val = val = xmalloc (size);
615 	quotearg_buffer (val, size, arg, argsize, options);
616       }
617 
618     errno = e;
619     return val;
620   }
621 }
622 
623 char *
624 quotearg_n (int n, char const *arg)
625 {
626   return quotearg_n_options (n, arg, SIZE_MAX, &default_quoting_options);
627 }
628 
629 char *
630 quotearg (char const *arg)
631 {
632   return quotearg_n (0, arg);
633 }
634 
635 /* Return quoting options for STYLE, with no extra quoting.  */
636 static struct quoting_options
637 quoting_options_from_style (enum quoting_style style)
638 {
639   struct quoting_options o;
640   o.style = style;
641   memset (o.quote_these_too, 0, sizeof o.quote_these_too);
642   return o;
643 }
644 
645 char *
646 quotearg_n_style (int n, enum quoting_style s, char const *arg)
647 {
648   struct quoting_options const o = quoting_options_from_style (s);
649   return quotearg_n_options (n, arg, SIZE_MAX, &o);
650 }
651 
652 char *
653 quotearg_n_style_mem (int n, enum quoting_style s,
654 		      char const *arg, size_t argsize)
655 {
656   struct quoting_options const o = quoting_options_from_style (s);
657   return quotearg_n_options (n, arg, argsize, &o);
658 }
659 
660 char *
661 quotearg_style (enum quoting_style s, char const *arg)
662 {
663   return quotearg_n_style (0, s, arg);
664 }
665 
666 char *
667 quotearg_char (char const *arg, char ch)
668 {
669   struct quoting_options options;
670   options = default_quoting_options;
671   set_char_quoting (&options, ch, 1);
672   return quotearg_n_options (0, arg, SIZE_MAX, &options);
673 }
674 
675 char *
676 quotearg_colon (char const *arg)
677 {
678   return quotearg_char (arg, ':');
679 }
680