1 /*
2  *  R : A Computer Language for Statistical Data Analysis
3  *  Copyright (C) 1999--2021  The R Core Team
4  *  Copyright (C) 1995, 1996  Robert Gentleman and Ross Ihaka
5  *
6  *  This program is free software; you can redistribute it and/or modify
7  *  it under the terms of the GNU General Public License as published by
8  *  the Free Software Foundation; either version 2 of the License, or
9  *  (at your option) any later version.
10  *
11  *  This program is distributed in the hope that it will be useful,
12  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
13  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14  *  GNU General Public License for more details.
15  *
16  *  You should have received a copy of the GNU General Public License
17  *  along with this program; if not, a copy is available at
18  *  https://www.R-project.org/Licenses/
19  */
20 
21 
22 /* =========
23  * Printing:
24  * =========
25  *
26  * All printing in R is done via the functions Rprintf and REprintf
27  * or their (v) versions Rvprintf and REvprintf.
28  * These routines work exactly like (v)printf(3).  Rprintf writes to
29  * ``standard output''.	 It is redirected by the sink() function,
30  * and is suitable for ordinary output.	 REprintf writes to
31  * ``standard error'' and is useful for error messages and warnings.
32  * It is not redirected by sink().
33  *
34  *  See ./format.c  for the  format_FOO_  functions which provide
35  *	~~~~~~~~~~  the	 length, width, etc.. that are used here.
36  *  See ./print.c  for do_printdefault, do_prmatrix, etc.
37  *
38  *
39  * Here, the following UTILITIES are provided:
40  *
41  * The utilities EncodeLogical, EncodeInteger, EncodeReal
42  * and EncodeString can be used to convert R objects to a form suitable
43  * for printing.  These print the values passed in a formatted form
44  * or, in the case of NA values, an NA indicator.  EncodeString takes
45  * care of printing all the standard ANSI escapes \a, \t \n etc.
46  * so that these appear in their backslash form in the string.	There
47  * is also a routine called Rstrlen which computes the length of the
48  * string in its escaped rather than literal form.
49  *
50  * Finally there is a routine called EncodeElement which will encode
51  * a single R-vector element.  This is used in deparse and write.table.
52  */
53 
54 /* if ESC_BARE_QUOTE is defined, " in an unquoted string is replaced
55    by \".  " in a quoted string is always replaced by \". */
56 
57 #ifdef HAVE_CONFIG_H
58 #include <config.h>
59 #endif
60 
61 #include <Defn.h>
62 #include <Rmath.h>
63 #include <Print.h>
64 #include <R_ext/RS.h>
65 #include <Rconnections.h>
66 
67 #include "RBufferUtils.h"
68 
69 /* At times we want to convert marked UTF-8 strings to wchar_t*. We
70  * can use our facilities to do so in a UTF-8 locale or system
71  * facilities if the platform tells us that wchar_t is UCS-4 or we
72  * know that about the platform. */
73 #if !defined(__STDC_ISO_10646__) && (defined(__APPLE__) || defined(__DragonFly__) || defined(__FreeBSD__) || defined(__sun))
74 /* This may not be 100% true (see the comment in rlocale.h),
75    but it seems true in normal locales */
76 # define __STDC_ISO_10646__
77 #endif
78 
79 #ifdef Win32
80 #include <trioremap.h>
81 #endif
82 #ifndef min
83 #define min(a, b) (((a)<(b))?(a):(b))
84 #endif
85 
86 #define BUFSIZE 8192  /* used by Rprintf etc */
87 
88 attribute_hidden
R_Decode2Long(char * p,int * ierr)89 R_size_t R_Decode2Long(char *p, int *ierr)
90 {
91     R_size_t v = strtol(p, &p, 10);
92     *ierr = 0;
93     if(p[0] == '\0') return v;
94     /* else look for letter-code ending : */
95     if(R_Verbose)
96 	REprintf("R_Decode2Long(): v=%ld\n", v);
97     // NOTE: currently, positive *ierr are not differentiated in the callers:
98     if(p[0] == 'G') {
99 	if((Giga * (double)v) > (double) R_SIZE_T_MAX) { *ierr = 4; return(v); }
100 	return (R_size_t) Giga * v;
101     }
102     else if(p[0] == 'M') {
103 	if((Mega * (double)v) > (double) R_SIZE_T_MAX) { *ierr = 1; return(v); }
104 	return (R_size_t) Mega * v;
105     }
106     else if(p[0] == 'K') {
107 	if((1024 * (double)v) > (double) R_SIZE_T_MAX) { *ierr = 2; return(v); }
108 	return (1024*v);
109     }
110     else if(p[0] == 'k') {
111 	if((1000 * (double)v) > (double) R_SIZE_T_MAX) { *ierr = 3; return(v); }
112 	return (1000*v);
113     }
114     else {
115 	*ierr = -1;
116 	return(v);
117     }
118 }
119 
120 /* There is no documented (or enforced) limit on 'w' here,
121    so use snprintf */
122 #define NB 1000
EncodeLogical(int x,int w)123 const char *EncodeLogical(int x, int w)
124 {
125     static char buff[NB];
126     if(x == NA_LOGICAL) snprintf(buff, NB, "%*s", min(w, (NB-1)), CHAR(R_print.na_string));
127     else if(x) snprintf(buff, NB, "%*s", min(w, (NB-1)), "TRUE");
128     else snprintf(buff, NB, "%*s", min(w, (NB-1)), "FALSE");
129     buff[NB-1] = '\0';
130     return buff;
131 }
132 
EncodeInteger(int x,int w)133 const char *EncodeInteger(int x, int w)
134 {
135     static char buff[NB];
136     if(x == NA_INTEGER) snprintf(buff, NB, "%*s", min(w, (NB-1)), CHAR(R_print.na_string));
137     else snprintf(buff, NB, "%*d", min(w, (NB-1)), x);
138     buff[NB-1] = '\0';
139     return buff;
140 }
141 
142 attribute_hidden
EncodeRaw(Rbyte x,const char * prefix)143 const char *EncodeRaw(Rbyte x, const char * prefix)
144 {
145     static char buff[10];
146     sprintf(buff, "%s%02x", prefix, x);
147     return buff;
148 }
149 
150 attribute_hidden
EncodeEnvironment(SEXP x)151 const char *EncodeEnvironment(SEXP x)
152 {
153     const void *vmax = vmaxget();
154     static char ch[1000];
155     if (x == R_GlobalEnv)
156 	sprintf(ch, "<environment: R_GlobalEnv>");
157     else if (x == R_BaseEnv)
158 	sprintf(ch, "<environment: base>");
159     else if (x == R_EmptyEnv)
160 	sprintf(ch, "<environment: R_EmptyEnv>");
161     else if (R_IsPackageEnv(x))
162 	snprintf(ch, 1000, "<environment: %s>",
163 		translateChar(STRING_ELT(R_PackageEnvName(x), 0)));
164     else if (R_IsNamespaceEnv(x))
165 	snprintf(ch, 1000, "<environment: namespace:%s>",
166 		translateChar(STRING_ELT(R_NamespaceEnvSpec(x), 0)));
167     else snprintf(ch, 1000, "<environment: %p>", (void *)x);
168 
169     vmaxset(vmax);
170     return ch;
171 }
172 
EncodeReal(double x,int w,int d,int e,char cdec)173 const char *EncodeReal(double x, int w, int d, int e, char cdec)
174 {
175     char dec[2];
176     dec[0] = cdec; dec[1] = '\0';
177     return EncodeReal0(x, w, d, e, dec);
178 }
179 
EncodeReal0(double x,int w,int d,int e,const char * dec)180 const char *EncodeReal0(double x, int w, int d, int e, const char *dec)
181 {
182     static char buff[NB], buff2[2*NB];
183     char fmt[20], *out = buff;
184 
185     /* IEEE allows signed zeros (yuck!) */
186     if (x == 0.0) x = 0.0;
187     if (!R_FINITE(x)) {
188 	if(ISNA(x)) snprintf(buff, NB, "%*s", min(w, (NB-1)), CHAR(R_print.na_string));
189 	else if(ISNAN(x)) snprintf(buff, NB, "%*s", min(w, (NB-1)), "NaN");
190 	else if(x > 0) snprintf(buff, NB, "%*s", min(w, (NB-1)), "Inf");
191 	else snprintf(buff, NB, "%*s", min(w, (NB-1)), "-Inf");
192     }
193     else if (e) {
194 	if(d) {
195 	    sprintf(fmt,"%%#%d.%de", min(w, (NB-1)), d);
196 	    snprintf(buff, NB, fmt, x);
197 	}
198 	else {
199 	    sprintf(fmt,"%%%d.%de", min(w, (NB-1)), d);
200 	    snprintf(buff, NB, fmt, x);
201 	}
202     }
203     else { /* e = 0 */
204 	sprintf(fmt,"%%%d.%df", min(w, (NB-1)), d);
205 	snprintf(buff, NB, fmt, x);
206     }
207     buff[NB-1] = '\0';
208 
209     if(strcmp(dec, ".")) {
210 	char *p, *q;
211 	for(p = buff, q = buff2; *p; p++) {
212 	    if(*p == '.') for(const char *r = dec; *r; r++) *q++ = *r;
213 	    else *q++ = *p;
214 	}
215 	*q = '\0';
216 	out = buff2;
217     }
218 
219     return out;
220 }
221 
222 static const char
EncodeRealDrop0(double x,int w,int d,int e,const char * dec)223 *EncodeRealDrop0(double x, int w, int d, int e, const char *dec)
224 {
225     static char buff[NB], buff2[2*NB];
226     char fmt[20], *out = buff;
227 
228     /* IEEE allows signed zeros (yuck!) */
229     if (x == 0.0) x = 0.0;
230     if (!R_FINITE(x)) {
231 	if(ISNA(x)) snprintf(buff, NB, "%*s", min(w, (NB-1)), CHAR(R_print.na_string));
232 	else if(ISNAN(x)) snprintf(buff, NB, "%*s", min(w, (NB-1)), "NaN");
233 	else if(x > 0) snprintf(buff, NB, "%*s", min(w, (NB-1)), "Inf");
234 	else snprintf(buff, NB, "%*s", min(w, (NB-1)), "-Inf");
235     }
236     else if (e) {
237 	if(d) {
238 	    sprintf(fmt,"%%#%d.%de", min(w, (NB-1)), d);
239 	    snprintf(buff, NB, fmt, x);
240 	}
241 	else {
242 	    sprintf(fmt,"%%%d.%de", min(w, (NB-1)), d);
243 	    snprintf(buff, NB, fmt, x);
244 	}
245     }
246     else { /* e = 0 */
247 	sprintf(fmt,"%%%d.%df", min(w, (NB-1)), d);
248 	snprintf(buff, NB, fmt, x);
249     }
250     buff[NB-1] = '\0';
251 
252     // Drop trailing zeroes
253     for (char *p = buff; *p; p++) {
254 	if(*p == '.') {
255 	    char *replace = p++;
256 	    while ('0' <= *p  &&  *p <= '9')
257 		if(*(p++) != '0')
258 		    replace = p;
259 	    if(replace != p)
260 		while((*(replace++) = *(p++)))
261 		    ;
262 	    break;
263 	}
264     }
265 
266     if(strcmp(dec, ".")) {
267 	char *p, *q;
268 	for(p = buff, q = buff2; *p; p++) {
269 	    if(*p == '.') for(const char *r = dec; *r; r++) *q++ = *r;
270 	    else *q++ = *p;
271 	}
272 	*q = '\0';
273 	out = buff2;
274     }
275 
276     return out;
277 }
278 
StringFromReal(double x,int * warn)279 SEXP attribute_hidden StringFromReal(double x, int *warn)
280 {
281     int w, d, e;
282     formatReal(&x, 1, &w, &d, &e, 0);
283     if (ISNA(x)) return NA_STRING;
284     else return mkChar(EncodeRealDrop0(x, w, d, e, OutDec));
285 }
286 
287 
288 attribute_hidden
EncodeReal2(double x,int w,int d,int e)289 const char *EncodeReal2(double x, int w, int d, int e)
290 {
291     static char buff[NB];
292     char fmt[20];
293 
294     /* IEEE allows signed zeros (yuck!) */
295     if (x == 0.0) x = 0.0;
296     if (!R_FINITE(x)) {
297 	if(ISNA(x)) snprintf(buff, NB, "%*s", min(w, (NB-1)), CHAR(R_print.na_string));
298 	else if(ISNAN(x)) snprintf(buff, NB, "%*s", min(w, (NB-1)), "NaN");
299 	else if(x > 0) snprintf(buff, NB, "%*s", min(w, (NB-1)), "Inf");
300 	else snprintf(buff, NB, "%*s", min(w, (NB-1)), "-Inf");
301     }
302     else if (e) {
303 	if(d) {
304 	    sprintf(fmt,"%%#%d.%de", min(w, (NB-1)), d);
305 	    snprintf(buff, NB, fmt, x);
306 	}
307 	else {
308 	    sprintf(fmt,"%%%d.%de", min(w, (NB-1)), d);
309 	    snprintf(buff, NB, fmt, x);
310 	}
311     }
312     else { /* e = 0 */
313 	sprintf(fmt,"%%#%d.%df", min(w, (NB-1)), d);
314 	snprintf(buff, NB, fmt, x);
315     }
316     buff[NB-1] = '\0';
317     return buff;
318 }
319 
320 #ifdef formatComplex_USING_signif
321 void z_prec_r(Rcomplex *r, Rcomplex *x, double digits);
322 #endif
323 
324 #define NB3 NB+3
325 const char
EncodeComplex(Rcomplex x,int wr,int dr,int er,int wi,int di,int ei,const char * dec)326 *EncodeComplex(Rcomplex x, int wr, int dr, int er, int wi, int di, int ei,
327 	       const char *dec)
328 {
329     static char buff[NB3];
330 
331     /* IEEE allows signed zeros; strip these here */
332     if (x.r == 0.0) x.r = 0.0;
333     if (x.i == 0.0) x.i = 0.0;
334 
335     if (ISNA(x.r) || ISNA(x.i)) {
336 	snprintf(buff, NB,
337 		 "%*s", /* was "%*s%*s", R_print.gap, "", */
338 		 min(wr+wi+2, (NB-1)), CHAR(R_print.na_string));
339     } else {
340 	char Re[NB];
341 	const char *Im, *tmp;
342 	int flagNegIm = 0;
343 	Rcomplex y;
344 #ifdef formatComplex_USING_signif
345 	/* formatComplex rounded, but this does not, and we need to
346 	   keep it that way so we don't get strange trailing zeros.
347 	   But we do want to avoid printing small exponentials that
348 	   are probably garbage.
349 	 */
350 	z_prec_r(&y, &x, R_print.digits);
351 #endif
352 	/* EncodeReal has static buffer, so copy */
353 	tmp = EncodeReal0(y.r == 0. ? y.r : x.r, wr, dr, er, dec);
354 	strcpy(Re, tmp);
355 	if ( (flagNegIm = (x.i < 0)) ) x.i = -x.i;
356 	Im = EncodeReal0(y.i == 0. ? y.i : x.i, wi, di, ei, dec);
357 	snprintf(buff, NB3, "%s%s%si", Re, flagNegIm ? "-" : "+", Im);
358     }
359     buff[NB3-1] = '\0';
360     return buff;
361 }
362 
363 /* <FIXME>
364    encodeString and Rstrwid assume that the wchar_t representation
365    used to hold multibyte chars is Unicode.  This is usually true, and
366    we warn if it is not known to be true.  Potentially looking at
367    wchar_t ranges as we do is incorrect, but that is even less likely to
368    be problematic.
369 
370    On Windows with surrogate pairs it will not be canonical, but AFAIK
371    they do not occur in any MBCS (so it would only matter if we implement
372    UTF-8, and then only if Windows has surrogate pairs switched on.).
373 */
374 
375 #include <rlocale.h> /* redefines isw* functions */
376 
377 #ifdef Win32
378 #include "rgui_UTF8.h"
379 #endif
380 
381 /* strlen() using escaped rather than literal form.
382    In MBCS locales it works in characters, and reports in display width.
383    Rstrwid is also used in printarray.c.
384 
385    This supported embedded nuls when we had those.
386  */
387 attribute_hidden
Rstrwid(const char * str,int slen,cetype_t ienc,int quote)388 int Rstrwid(const char *str, int slen, cetype_t ienc, int quote)
389 {
390     const char *p = str;
391     int len = 0, i;
392 
393     if(ienc == CE_BYTES) { // not currently used for that encoding
394 	for (i = 0; i < slen; i++) {
395 	    unsigned char k = str[i];
396 	    if (k >= 0x20 && k < 0x80) len += 1;
397 	    else len += 4;
398 	}
399 	return len;
400     }
401     /* Future-proof: currently that is all Rstrlen calls it with,
402        and printarray has CE_NATIVE explicitly */
403     if(ienc > 2) // CE_NATIVE, CE_UTF8, CE_BYTES are supported
404 	warning("unsupported encoding (%d) in Rstrwid", ienc);
405     if(mbcslocale || ienc == CE_UTF8) {
406 	Rboolean useUTF8 = (ienc == CE_UTF8);
407 	mbstate_t mb_st;
408 
409 	if(!useUTF8)  mbs_init(&mb_st);
410 	for (i = 0; i < slen; i++) {
411 	    unsigned int k; /* not wint_t as it might be signed */
412 	    wchar_t wc;
413 	    int res = useUTF8 ? (int) utf8toucs(&wc, p):
414 		(int) mbrtowc(&wc, p, R_MB_CUR_MAX, NULL);
415 	    if(res >= 0) {
416 		if (useUTF8 && IS_HIGH_SURROGATE(wc))
417 		    k = utf8toucs32(wc, p);
418 		else
419 		    k = wc;
420 		if(0x20 <= k && k < 0x7f && iswprint(k)) {
421 		    switch(wc) {
422 		    case L'\\':
423 			len += 2;
424 			break;
425 		    case L'\'':
426 		    case L'"':
427 		    case L'`':
428 			len += (quote == *p) ? 2 : 1;
429 			break;
430 		    default:
431 			len++; /* assumes these are all width 1 */
432 			break;
433 		    }
434 		    p++;
435 		} else if (k < 0x80) {
436 		    switch(wc) {
437 		    case L'\a':
438 		    case L'\b':
439 		    case L'\f':
440 		    case L'\n':
441 		    case L'\r':
442 		    case L'\t':
443 		    case L'\v':
444 		    case L'\0':
445 			len += 2; break;
446 		    default:
447 			/* print in octal */
448 			len += 4; break;
449 		    }
450 		    p++;
451 		} else {
452 		    /* no need to worry about truncation as iswprint
453 		     * and wcwidth get replaced on Windows */
454 		    // conceivably an invalid \U escape could use 11 or 12
455 		    len += iswprint(k) ?
456 #ifdef USE_RI18N_WIDTH
457 			Ri18n_wcwidth(k) :
458 #else
459 			/* this is expected to return -1 for
460 			   non-printable (including unassigned)
461 			   characters: that is unlikely to occur,
462 			   although the system's idea of 'printing'
463 			   may differ from the internal tables */
464 			imax2(wcwidth((wchar_t) k), 0) :
465 #endif
466 		    	(k > 0xffff ? 10 : 6);
467 		    i += (res - 1);
468 		    p += res;
469 		}
470 	    } else { /* invalid char */
471 		len += 4;
472 		p++;
473 	    }
474 	}
475     } else // not MBCS nor marked as UTF-8
476 	for (i = 0; i < slen; i++) {
477 	    if((unsigned char) *p < 0x80) {
478 		/* ASCII */
479 		if(isprint((int)*p)) {
480 		    switch(*p) {
481 		    case '\\':
482 			len += 2; break;
483 		    case '\'':
484 		    case '"':
485 		    case '`':
486 			len += (quote == *p)? 2 : 1; break;
487 		    default:
488 			len++; break;
489 		    }
490 		} else switch(*p) {
491 		    case '\a':
492 		    case '\b':
493 		    case '\f':
494 		    case '\n':
495 		    case '\r':
496 		    case '\t':
497 		    case '\v':
498 		    case '\0':
499 			len += 2; break;
500 		    default:
501 			/* print in octal */
502 			len += 4; break;
503 		    }
504 		p++;
505 	    } else { /* 8 bit char */
506 #ifdef Win32 /* It seems Windows does not know what is printable! */
507 		len++;
508 #else
509 		len += isprint((int)*p) ? 1 : 4;
510 #endif
511 		p++;
512 	    }
513 	}
514 
515     return len;
516 }
517 
518 /* Match what EncodeString does with encodings */
519 attribute_hidden
Rstrlen(SEXP s,int quote)520 int Rstrlen(SEXP s, int quote)
521 {
522     cetype_t ienc = getCharCE(s);
523     if (ienc == CE_UTF8 || ienc == CE_BYTES)
524 	return Rstrwid(CHAR(s), LENGTH(s), ienc, quote);
525     const void *vmax = vmaxget();
526     const char *p = translateChar(s);
527     int len = Rstrwid(p, (int)strlen(p), CE_NATIVE, quote);
528     vmaxset(vmax);
529     return len;
530 }
531 
532 /* Here w is the minimum field width
533    If 'quote' is non-zero the result should be quoted (and internal quotes
534    escaped and NA strings handled differently).
535 
536    EncodeString is called from EncodeElement, EncodeChar, cat() (for labels
537    when filling), to (auto)print character vectors, arrays, names and
538    CHARSXPs.  It is also called by do_encodeString, but not from
539    format().
540  */
541 
542 attribute_hidden
EncodeString(SEXP s,int w,int quote,Rprt_adj justify)543 const char *EncodeString(SEXP s, int w, int quote, Rprt_adj justify)
544 {
545     int i, cnt;
546     const char *p; char *q, buf[13];
547     cetype_t ienc = getCharCE(s);
548     Rboolean useUTF8 = w < 0;
549     const void *vmax = vmaxget();
550 
551     if (w < 0) w = w + 1000000;
552 
553     /* We have to do something like this as the result is returned, and
554        passed on by EncodeElement -- so no way could be end user be
555        responsible for freeing it.  However, this is not thread-safe. */
556 
557     static R_StringBuffer gBuffer = {NULL, 0, BUFSIZE};
558     R_StringBuffer *buffer = &gBuffer;
559 
560     if (s == NA_STRING) {
561 	p = quote ? CHAR(R_print.na_string) : CHAR(R_print.na_string_noquote);
562 	cnt = (int)(quote ? strlen(CHAR(R_print.na_string)) :
563 		strlen(CHAR(R_print.na_string_noquote)));
564 	i = (quote ? Rstrlen(R_print.na_string, 0) :
565 		Rstrlen(R_print.na_string_noquote, 0));
566 	quote = 0;
567     } else {
568 	if(IS_BYTES(s)) {
569 	    ienc = CE_NATIVE;
570 #ifdef Win32
571 	    if (WinUTF8out)
572 		ienc = CE_UTF8;
573 #endif
574 	    p = CHAR(s);
575 	    cnt = (int) strlen(p);
576 	    const char *q;
577 	    char *pp = R_alloc(4*cnt+1, 1), *qq = pp, buf[5];
578 	    for (q = p; *q; q++) {
579 		unsigned char k = (unsigned char) *q;
580 		if (k >= 0x20 && k < 0x80) {
581 		    *qq++ = *q;
582 		    if (quote && *q == '"') cnt++;
583 		} else {
584 		    snprintf(buf, 5, "\\x%02x", k);
585 		    for(int j = 0; j < 4; j++) *qq++ = buf[j];
586 		    cnt += 3;
587 		}
588 	    }
589 	    *qq = '\0';
590 	    p = pp;
591 	    i = cnt;
592 #ifdef Win32
593 	} else if(WinUTF8out) {
594 	    if(ienc == CE_UTF8) {
595 		p = CHAR(s);
596 		i = Rstrlen(s, quote);
597 		cnt = LENGTH(s);
598 	    } else {
599 		p = translateCharUTF8(s);
600 		if(p == CHAR(s)) {
601 		    i = Rstrlen(s, quote);
602 		    cnt = LENGTH(s);
603 		} else {
604 		    cnt = (int) strlen(p);
605 		    i = Rstrwid(p, cnt, CE_UTF8, quote);
606 		}
607 		ienc = CE_UTF8;
608 	    }
609 #endif
610 	} else {
611 	    if (useUTF8 && ienc == CE_UTF8) {
612 		p = CHAR(s);
613 		i = Rstrlen(s, quote);
614 		cnt = LENGTH(s);
615 	    } else {
616 		ienc = CE_NATIVE;
617 		p = translateChar(s);
618 		if(p == CHAR(s)) {
619 		    i = Rstrlen(s, quote);
620 		    cnt = LENGTH(s);
621 		} else {
622 		    cnt = (int) strlen(p);
623 		    i = Rstrwid(p, cnt, CE_NATIVE, quote);
624 		}
625 	    }
626 	}
627     }
628 
629     /* We need enough space for the encoded string, including escapes.
630        Octal encoding turns one byte into four.
631        \u encoding can turn a multibyte into six or ten,
632        but it turns 2/3 into 6, and 4 (and perhaps 5/6) into 10.
633        Let's be wasteful here (the worst case appears to be an MBCS with
634        one byte for an upper-plane Unicode point output as ten bytes,
635        but I doubt that such an MBCS exists: two bytes is plausible).
636 
637        +2 allows for quotes, +6 for UTF_8 escapes.
638      */
639     if(5.*cnt + 8 > (double) SIZE_MAX)
640 	error(_("too large string (nchar=%d) => 5*nchar + 8 > SIZE_MAX"));
641     size_t q_len = 5*(size_t)cnt + 8;
642     if(q_len < w) q_len = (size_t) w;
643     q = R_AllocStringBuffer(q_len, buffer);
644 
645     int b = w - i - (quote ? 2 : 0); /* total amount of padding */
646     if(justify == Rprt_adj_none) b = 0;
647     if(b > 0 && justify != Rprt_adj_left) {
648 	int b0 = (justify == Rprt_adj_centre) ? b/2 : b;
649 	for(i = 0 ; i < b0 ; i++) *q++ = ' ';
650 	b -= b0;
651     }
652     if(quote) *q++ = (char) quote;
653     if(mbcslocale || ienc == CE_UTF8) {
654 	Rboolean useUTF8 = (ienc == CE_UTF8);
655 	mbstate_t mb_st;
656 #ifndef __STDC_ISO_10646__
657 	Rboolean Unicode_warning = FALSE;
658 #endif
659 	if(!useUTF8)  mbs_init(&mb_st);
660 #ifdef Win32
661 	else if(WinUTF8out) { memcpy(q, UTF8in, 3); q += 3; }
662 #endif
663 	for (i = 0; i < cnt; i++) {
664 	    wchar_t wc;
665 	    int res = (int)(useUTF8 ? utf8toucs(&wc, p):
666 			    mbrtowc(&wc, p, R_MB_CUR_MAX, NULL));
667 	    if(res >= 0) { /* res = 0 is a terminator */
668 		unsigned int k; /* not wint_t as it might be signed */
669 		if (useUTF8 && IS_HIGH_SURROGATE(wc))
670 		    k = utf8toucs32(wc, p);
671 		else
672 		    k = wc;
673 		/* To be portable, treat \0 explicitly */
674 		if(res == 0) {k = 0; wc = L'\0';}
675 		if(0x20 <= k && k < 0x7f && iswprint(k)) {
676 		    switch(wc) {
677 		    case L'\\': *q++ = '\\'; *q++ = '\\'; p++; break;
678 		    case L'\'':
679 		    case L'"':
680 		    case L'`':
681 			{
682 			    if(quote == *p)  *q++ = '\\';
683 			    *q++ = *p++;
684 			    break;
685 			}
686 		    default:
687 			for(int j = 0; j < res; j++) *q++ = *p++;
688 			break;
689 		    }
690 		} else if (k < 0x80) {
691 		    /* ANSI Escapes */
692 		    switch(wc) {
693 		    case L'\a': *q++ = '\\'; *q++ = 'a'; break;
694 		    case L'\b': *q++ = '\\'; *q++ = 'b'; break;
695 		    case L'\f': *q++ = '\\'; *q++ = 'f'; break;
696 		    case L'\n': *q++ = '\\'; *q++ = 'n'; break;
697 		    case L'\r': *q++ = '\\'; *q++ = 'r'; break;
698 		    case L'\t': *q++ = '\\'; *q++ = 't'; break;
699 		    case L'\v': *q++ = '\\'; *q++ = 'v'; break;
700 		    case L'\0': *q++ = '\\'; *q++ = '0'; break;
701 
702 		    default:
703 			/* print in octal */
704 			// gcc 7 requires cast here
705 			snprintf(buf, 5, "\\%03o", (unsigned char)k);
706 			for(int j = 0; j < 4; j++) *q++ = buf[j];
707 			break;
708 		    }
709 		    p++;
710 		} else {
711 		    /* wc could be an unpaired surrogate and this does
712 		     * not do the same as Rstrwid */
713 		    /* no need to worry about truncation as iswprint
714 		     * gets replaced on Windows */
715 		    if(iswprint(k)) {
716 			/* The problem here is that wc may be
717 			   printable according to the Unicode tables,
718 			   but it may not be printable on the output
719 			   device concerned.
720 
721 			   And the system iswprintf may not correspond
722 			   to the latest Unicode tables.
723 			*/
724 			for(int j = 0; j < res; j++) *q++ = *p++;
725 		    } else {
726 # if !defined (__STDC_ISO_10646__) && !defined (Win32)
727 			Unicode_warning = TRUE;
728 # endif
729 			if(k > 0xffff)
730 			    snprintf(buf, 13, "\\U{%06x}", k);
731 			else
732 			    snprintf(buf, 11, "\\u%04x", k);
733 			int j = (int) strlen(buf);
734 			memcpy(q, buf, j);
735 			q += j;
736 			p += res;
737 		    }
738 		    i += (res - 1);
739 		}
740 	    } else { /* invalid char */
741 		snprintf(q, 5, "\\x%02x", *((unsigned char *)p));
742 		q += 4; p++;
743 	    }
744 	}
745 #ifndef __STDC_ISO_10646__
746 	if(Unicode_warning)
747 	    warning(_("it is not known that wchar_t is Unicode on this platform"));
748 #endif
749 
750     } else
751 	for (i = 0; i < cnt; i++) {
752 
753 	    /* ASCII */
754 	    if((unsigned char) *p < 0x80) {
755 		if(*p != '\t' && isprint((int)*p)) { /* Windows has \t as printable */
756 		    switch(*p) {
757 		    case '\\': *q++ = '\\'; *q++ = '\\'; break;
758 		    case '\'':
759 		    case '"':
760 		    case '`':
761 		    {
762 			if(quote == *p)  *q++ = '\\';
763 			*q++ = *p;
764 			break;
765 		    }
766 		    default: *q++ = *p; break;
767 		    }
768 		} else switch(*p) {
769 			/* ANSI Escapes */
770 		    case '\a': *q++ = '\\'; *q++ = 'a'; break;
771 		    case '\b': *q++ = '\\'; *q++ = 'b'; break;
772 		    case '\f': *q++ = '\\'; *q++ = 'f'; break;
773 		    case '\n': *q++ = '\\'; *q++ = 'n'; break;
774 		    case '\r': *q++ = '\\'; *q++ = 'r'; break;
775 		    case '\t': *q++ = '\\'; *q++ = 't'; break;
776 		    case '\v': *q++ = '\\'; *q++ = 'v'; break;
777 		    case '\0': *q++ = '\\'; *q++ = '0'; break;
778 
779 		    default:
780 			/* print in octal */
781 			snprintf(buf, 5, "\\%03o", (unsigned char) *p);
782 			for(int j = 0; j < 4; j++) *q++ = buf[j];
783 			break;
784 		    }
785 		p++;
786 	    } else {  /* 8 bit char */
787 		if(!isprint((int)*p & 0xff)) {
788 		    /* print in octal */
789 		    snprintf(buf, 5, "\\%03o", (unsigned char) *p);
790 		    for(int j = 0; j < 4; j++) *q++ = buf[j];
791 		    p++;
792 		} else *q++ = *p++;
793 	    }
794 	}
795 
796 #ifdef Win32
797     if(WinUTF8out && ienc == CE_UTF8)  { memcpy(q, UTF8out, 3); q += 3; }
798 #endif
799     if(quote) *q++ = (char) quote;
800     if(b > 0 && justify != Rprt_adj_right) {
801 	for(i = 0 ; i < b ; i++) *q++ = ' ';
802     }
803     *q = '\0';
804 
805     vmaxset(vmax);
806     return buffer->data;
807 }
808 
809 /* EncodeElement is called by cat(), write.table() and deparsing. */
810 
811 /* NB this is called by R.app even though it is in no public header, so
812    alter there if you alter this */
EncodeElement(SEXP x,int indx,int quote,char cdec)813 const char *EncodeElement(SEXP x, int indx, int quote, char cdec)
814 {
815     char dec[2];
816     dec[0] = cdec; dec[1] = '\0';
817     return EncodeElement0(x, indx, quote, dec);
818 }
819 
EncodeElement0(SEXP x,R_xlen_t indx,int quote,const char * dec)820 const char *EncodeElement0(SEXP x, R_xlen_t indx, int quote, const char *dec)
821 {
822     int w, d, e, wi, di, ei;
823     const char *res;
824 
825     switch(TYPEOF(x)) {
826     case LGLSXP:
827 	formatLogical(&LOGICAL_RO(x)[indx], 1, &w);
828 	res = EncodeLogical(LOGICAL_RO(x)[indx], w);
829 	break;
830     case INTSXP:
831 	formatInteger(&INTEGER_RO(x)[indx], 1, &w);
832 	res = EncodeInteger(INTEGER_RO(x)[indx], w);
833 	break;
834     case REALSXP:
835 	formatReal(&REAL_RO(x)[indx], 1, &w, &d, &e, 0);
836 	res = EncodeReal0(REAL_RO(x)[indx], w, d, e, dec);
837 	break;
838     case STRSXP:
839 	formatString(&STRING_PTR_RO(x)[indx], 1, &w, quote);
840 	res = EncodeString(STRING_ELT(x, indx), w, quote, Rprt_adj_left);
841 	break;
842     case CPLXSXP:
843 	formatComplex(&COMPLEX_RO(x)[indx], 1, &w, &d, &e, &wi, &di, &ei, 0);
844 	res = EncodeComplex(COMPLEX_RO(x)[indx], w, d, e, wi, di, ei, dec);
845 	break;
846     case RAWSXP:
847 	res = EncodeRaw(RAW_RO(x)[indx], "");
848 	break;
849     default:
850 	res = NULL; /* -Wall */
851 	UNIMPLEMENTED_TYPE("EncodeElement", x);
852     }
853     return res;
854 }
855 
856 /* EncodeChar is a simple wrapper for EncodeString
857    called by error messages to display CHARSXP values.
858 
859    The pointer returned by EncodeChar points into an internal buffer
860    which is overwritten by subsequent calls to EncodeChar/EncodeString.
861    It is the responsibility of the caller to copy the result before
862    any subsequent call to EncodeChar/EncodeString may happen. Note that
863    particularly it is NOT safe to pass the result of EncodeChar as 3rd
864    argument to errorcall (errorcall_cpy can be used instead). */
865 //attribute_hidden
EncodeChar(SEXP x)866 const char *EncodeChar(SEXP x)
867 {
868     return EncodeString(x, 0, 0, Rprt_adj_left);
869 }
870 
871 
Rprintf(const char * format,...)872 void Rprintf(const char *format, ...)
873 {
874     va_list(ap);
875 
876     va_start(ap, format);
877     Rvprintf(format, ap);
878     va_end(ap);
879 }
880 
881 /*
882   REprintf is used by the error handler do not add
883   anything unless you're sure it won't
884   cause problems
885 */
REprintf(const char * format,...)886 void REprintf(const char *format, ...)
887 {
888     va_list(ap);
889     va_start(ap, format);
890     REvprintf(format, ap);
891     va_end(ap);
892 }
893 
894 #if defined(HAVE_VASPRINTF) && !HAVE_DECL_VASPRINTF
895 int vasprintf(char **strp, const char *fmt, va_list ap)
896 #ifdef __cplusplus
897 	throw ()
898 #endif
899 ;
900 #endif
901 
902 # define R_BUFSIZE BUFSIZE
903 // similar to dummy_vfprintf in connections.c
904 attribute_hidden
Rcons_vprintf(const char * format,va_list arg)905 void Rcons_vprintf(const char *format, va_list arg)
906 {
907     char buf[R_BUFSIZE], *p = buf;
908     int res;
909     const void *vmax = vmaxget();
910     int usedRalloc = FALSE, usedVasprintf = FALSE;
911     va_list aq;
912 
913     va_copy(aq, arg);
914     res = Rvsnprintf_mbcs(buf, R_BUFSIZE, format, aq);
915     va_end(aq);
916 #ifdef HAVE_VASPRINTF
917     if(res >= R_BUFSIZE || res < 0) {
918 	res = vasprintf(&p, format, arg);
919 	if (res < 0) {
920 	    p = buf;
921 	    warning(_("printing of extremely long output is truncated"));
922 	} else usedVasprintf = TRUE;
923     }
924 #else
925     if(res >= R_BUFSIZE) { /* res is the desired output length */
926 	usedRalloc = TRUE;
927 	/* dummy_vfprintf protects against `res` being counted short; we do not
928 	   do that here */
929 	p = R_alloc(res+1, sizeof(char));
930 	vsprintf(p, format, arg);
931     } else if(res < 0) {
932 	/* Some non-C99 conforming vsnprintf implementations return -1 on
933 	   truncation instead of only on error. */
934 	usedRalloc = TRUE;
935 	p = R_alloc(10*R_BUFSIZE, sizeof(char));
936 	res = Rvsnprintf_mbcs(p, 10*R_BUFSIZE, format, arg);
937 	if (res < 0 || res >= 10*R_BUFSIZE)
938 	    warning(_("printing of extremely long output is truncated"));
939     }
940 #endif /* HAVE_VASPRINTF */
941     R_WriteConsole(p, (int) strlen(p));
942     if(usedRalloc) vmaxset(vmax);
943     if(usedVasprintf) free(p);
944 }
945 
Rvprintf(const char * format,va_list arg)946 void Rvprintf(const char *format, va_list arg)
947 {
948     int i=0, con_num=R_OutputCon;
949     Rconnection con;
950     va_list argcopy;
951     static int printcount = 0;
952 
953     if (++printcount > 100) {
954 	R_CheckUserInterrupt();
955 	printcount = 0 ;
956     }
957 
958     do{
959       con = getConnection(con_num);
960       va_copy(argcopy, arg);
961       /* Parentheses added for Fedora with -D_FORTIFY_SOURCE=2 */
962       (con->vfprintf)(con, format, argcopy);
963       va_end(argcopy);
964       con->fflush(con);
965       con_num = getActiveSink(i++);
966     } while(con_num>0);
967 
968 
969 }
970 
971 /*
972    REvprintf is part of the error handler.
973    Do not change it unless you are SURE that
974    your changes are compatible with the
975    error handling mechanism.
976 
977    It is also used in R_Suicide on Unix.
978 */
979 
REvprintf(const char * format,va_list arg)980 void REvprintf(const char *format, va_list arg)
981 {
982     static char *malloc_buf = NULL;
983 
984     if (malloc_buf) {
985 	char *tmp = malloc_buf;
986 	malloc_buf = NULL;
987 	free(tmp);
988     }
989     if(R_ErrorCon != 2) {
990 	Rconnection con = getConnection_no_err(R_ErrorCon);
991 	if(con == NULL) {
992 	    /* should never happen, but in case of corruption... */
993 	    R_ErrorCon = 2;
994 	} else {
995 	    /* Parentheses added for FC4 with gcc4 and -D_FORTIFY_SOURCE=2 */
996 	    (con->vfprintf)(con, format, arg);
997 	    con->fflush(con);
998 	    return;
999 	}
1000     }
1001     if(R_Consolefile) {
1002 	/* try to interleave stdout and stderr carefully */
1003 	if(R_Outputfile && (R_Outputfile != R_Consolefile)) {
1004 	    fflush(R_Outputfile);
1005 	    vfprintf(R_Consolefile, format, arg);
1006 	    /* normally R_Consolefile is stderr and so unbuffered, but
1007 	       it can be something else (e.g. stdout on Win9x) */
1008 	    fflush(R_Consolefile);
1009 	} else vfprintf(R_Consolefile, format, arg);
1010     } else {
1011 	char buf[BUFSIZE];
1012 	Rboolean printed = FALSE;
1013 	va_list aq;
1014 
1015 	va_copy(aq, arg);
1016 	int res = Rvsnprintf_mbcs(buf, BUFSIZE, format, aq);
1017 	va_end(aq);
1018 	if (res >= BUFSIZE) {
1019 	    /* A very long string has been truncated. Try to allocate a large
1020 	       buffer for it to print it in full. Do not use R_alloc() as this
1021 	       can be run due to memory allocation error from the R heap.
1022 	       Do not use contexts and do not throw any errors nor warnings
1023 	       as this may be run from error handling. */
1024 	    int size = res + 1;
1025 	    malloc_buf = (char *)malloc(size * sizeof(char));
1026 	    if (malloc_buf) {
1027 		res = vsnprintf(malloc_buf, size, format, arg);
1028 		if (res == size - 1) {
1029 		    R_WriteConsoleEx(malloc_buf, res, 1);
1030 		    printed = TRUE;
1031 		}
1032 		char *tmp = malloc_buf;
1033 		malloc_buf = NULL;
1034 		free(tmp);
1035 	    }
1036 	}
1037 	if (!printed)
1038 	    R_WriteConsoleEx(buf, (int) strlen(buf), 1);
1039     }
1040 }
1041 
IndexWidth(R_xlen_t n)1042 int attribute_hidden IndexWidth(R_xlen_t n)
1043 {
1044     return (int) (log10(n + 0.5) + 1);
1045 }
1046 
VectorIndex(R_xlen_t i,int w)1047 void attribute_hidden VectorIndex(R_xlen_t i, int w)
1048 {
1049 /* print index label "[`i']" , using total width `w' (left filling blanks) */
1050     Rprintf("%*s[%ld]", w-IndexWidth(i)-2, "", i);
1051 }
1052