1 /*
2 * R : A Computer Language for Statistical Data Analysis
3 * Copyright (C) 1999--2021 The R Core Team
4 * Copyright (C) 1995, 1996 Robert Gentleman and Ross Ihaka
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, a copy is available at
18 * https://www.R-project.org/Licenses/
19 */
20
21
22 /* =========
23 * Printing:
24 * =========
25 *
26 * All printing in R is done via the functions Rprintf and REprintf
27 * or their (v) versions Rvprintf and REvprintf.
28 * These routines work exactly like (v)printf(3). Rprintf writes to
29 * ``standard output''. It is redirected by the sink() function,
30 * and is suitable for ordinary output. REprintf writes to
31 * ``standard error'' and is useful for error messages and warnings.
32 * It is not redirected by sink().
33 *
34 * See ./format.c for the format_FOO_ functions which provide
35 * ~~~~~~~~~~ the length, width, etc.. that are used here.
36 * See ./print.c for do_printdefault, do_prmatrix, etc.
37 *
38 *
39 * Here, the following UTILITIES are provided:
40 *
41 * The utilities EncodeLogical, EncodeInteger, EncodeReal
42 * and EncodeString can be used to convert R objects to a form suitable
43 * for printing. These print the values passed in a formatted form
44 * or, in the case of NA values, an NA indicator. EncodeString takes
45 * care of printing all the standard ANSI escapes \a, \t \n etc.
46 * so that these appear in their backslash form in the string. There
47 * is also a routine called Rstrlen which computes the length of the
48 * string in its escaped rather than literal form.
49 *
50 * Finally there is a routine called EncodeElement which will encode
51 * a single R-vector element. This is used in deparse and write.table.
52 */
53
54 /* if ESC_BARE_QUOTE is defined, " in an unquoted string is replaced
55 by \". " in a quoted string is always replaced by \". */
56
57 #ifdef HAVE_CONFIG_H
58 #include <config.h>
59 #endif
60
61 #include <Defn.h>
62 #include <Rmath.h>
63 #include <Print.h>
64 #include <R_ext/RS.h>
65 #include <Rconnections.h>
66
67 #include "RBufferUtils.h"
68
69 /* At times we want to convert marked UTF-8 strings to wchar_t*. We
70 * can use our facilities to do so in a UTF-8 locale or system
71 * facilities if the platform tells us that wchar_t is UCS-4 or we
72 * know that about the platform. */
73 #if !defined(__STDC_ISO_10646__) && (defined(__APPLE__) || defined(__DragonFly__) || defined(__FreeBSD__) || defined(__sun))
74 /* This may not be 100% true (see the comment in rlocale.h),
75 but it seems true in normal locales */
76 # define __STDC_ISO_10646__
77 #endif
78
79 #ifdef Win32
80 #include <trioremap.h>
81 #endif
82 #ifndef min
83 #define min(a, b) (((a)<(b))?(a):(b))
84 #endif
85
86 #define BUFSIZE 8192 /* used by Rprintf etc */
87
88 attribute_hidden
R_Decode2Long(char * p,int * ierr)89 R_size_t R_Decode2Long(char *p, int *ierr)
90 {
91 R_size_t v = strtol(p, &p, 10);
92 *ierr = 0;
93 if(p[0] == '\0') return v;
94 /* else look for letter-code ending : */
95 if(R_Verbose)
96 REprintf("R_Decode2Long(): v=%ld\n", v);
97 // NOTE: currently, positive *ierr are not differentiated in the callers:
98 if(p[0] == 'G') {
99 if((Giga * (double)v) > (double) R_SIZE_T_MAX) { *ierr = 4; return(v); }
100 return (R_size_t) Giga * v;
101 }
102 else if(p[0] == 'M') {
103 if((Mega * (double)v) > (double) R_SIZE_T_MAX) { *ierr = 1; return(v); }
104 return (R_size_t) Mega * v;
105 }
106 else if(p[0] == 'K') {
107 if((1024 * (double)v) > (double) R_SIZE_T_MAX) { *ierr = 2; return(v); }
108 return (1024*v);
109 }
110 else if(p[0] == 'k') {
111 if((1000 * (double)v) > (double) R_SIZE_T_MAX) { *ierr = 3; return(v); }
112 return (1000*v);
113 }
114 else {
115 *ierr = -1;
116 return(v);
117 }
118 }
119
120 /* There is no documented (or enforced) limit on 'w' here,
121 so use snprintf */
122 #define NB 1000
EncodeLogical(int x,int w)123 const char *EncodeLogical(int x, int w)
124 {
125 static char buff[NB];
126 if(x == NA_LOGICAL) snprintf(buff, NB, "%*s", min(w, (NB-1)), CHAR(R_print.na_string));
127 else if(x) snprintf(buff, NB, "%*s", min(w, (NB-1)), "TRUE");
128 else snprintf(buff, NB, "%*s", min(w, (NB-1)), "FALSE");
129 buff[NB-1] = '\0';
130 return buff;
131 }
132
EncodeInteger(int x,int w)133 const char *EncodeInteger(int x, int w)
134 {
135 static char buff[NB];
136 if(x == NA_INTEGER) snprintf(buff, NB, "%*s", min(w, (NB-1)), CHAR(R_print.na_string));
137 else snprintf(buff, NB, "%*d", min(w, (NB-1)), x);
138 buff[NB-1] = '\0';
139 return buff;
140 }
141
142 attribute_hidden
EncodeRaw(Rbyte x,const char * prefix)143 const char *EncodeRaw(Rbyte x, const char * prefix)
144 {
145 static char buff[10];
146 sprintf(buff, "%s%02x", prefix, x);
147 return buff;
148 }
149
150 attribute_hidden
EncodeEnvironment(SEXP x)151 const char *EncodeEnvironment(SEXP x)
152 {
153 const void *vmax = vmaxget();
154 static char ch[1000];
155 if (x == R_GlobalEnv)
156 sprintf(ch, "<environment: R_GlobalEnv>");
157 else if (x == R_BaseEnv)
158 sprintf(ch, "<environment: base>");
159 else if (x == R_EmptyEnv)
160 sprintf(ch, "<environment: R_EmptyEnv>");
161 else if (R_IsPackageEnv(x))
162 snprintf(ch, 1000, "<environment: %s>",
163 translateChar(STRING_ELT(R_PackageEnvName(x), 0)));
164 else if (R_IsNamespaceEnv(x))
165 snprintf(ch, 1000, "<environment: namespace:%s>",
166 translateChar(STRING_ELT(R_NamespaceEnvSpec(x), 0)));
167 else snprintf(ch, 1000, "<environment: %p>", (void *)x);
168
169 vmaxset(vmax);
170 return ch;
171 }
172
EncodeReal(double x,int w,int d,int e,char cdec)173 const char *EncodeReal(double x, int w, int d, int e, char cdec)
174 {
175 char dec[2];
176 dec[0] = cdec; dec[1] = '\0';
177 return EncodeReal0(x, w, d, e, dec);
178 }
179
EncodeReal0(double x,int w,int d,int e,const char * dec)180 const char *EncodeReal0(double x, int w, int d, int e, const char *dec)
181 {
182 static char buff[NB], buff2[2*NB];
183 char fmt[20], *out = buff;
184
185 /* IEEE allows signed zeros (yuck!) */
186 if (x == 0.0) x = 0.0;
187 if (!R_FINITE(x)) {
188 if(ISNA(x)) snprintf(buff, NB, "%*s", min(w, (NB-1)), CHAR(R_print.na_string));
189 else if(ISNAN(x)) snprintf(buff, NB, "%*s", min(w, (NB-1)), "NaN");
190 else if(x > 0) snprintf(buff, NB, "%*s", min(w, (NB-1)), "Inf");
191 else snprintf(buff, NB, "%*s", min(w, (NB-1)), "-Inf");
192 }
193 else if (e) {
194 if(d) {
195 sprintf(fmt,"%%#%d.%de", min(w, (NB-1)), d);
196 snprintf(buff, NB, fmt, x);
197 }
198 else {
199 sprintf(fmt,"%%%d.%de", min(w, (NB-1)), d);
200 snprintf(buff, NB, fmt, x);
201 }
202 }
203 else { /* e = 0 */
204 sprintf(fmt,"%%%d.%df", min(w, (NB-1)), d);
205 snprintf(buff, NB, fmt, x);
206 }
207 buff[NB-1] = '\0';
208
209 if(strcmp(dec, ".")) {
210 char *p, *q;
211 for(p = buff, q = buff2; *p; p++) {
212 if(*p == '.') for(const char *r = dec; *r; r++) *q++ = *r;
213 else *q++ = *p;
214 }
215 *q = '\0';
216 out = buff2;
217 }
218
219 return out;
220 }
221
222 static const char
EncodeRealDrop0(double x,int w,int d,int e,const char * dec)223 *EncodeRealDrop0(double x, int w, int d, int e, const char *dec)
224 {
225 static char buff[NB], buff2[2*NB];
226 char fmt[20], *out = buff;
227
228 /* IEEE allows signed zeros (yuck!) */
229 if (x == 0.0) x = 0.0;
230 if (!R_FINITE(x)) {
231 if(ISNA(x)) snprintf(buff, NB, "%*s", min(w, (NB-1)), CHAR(R_print.na_string));
232 else if(ISNAN(x)) snprintf(buff, NB, "%*s", min(w, (NB-1)), "NaN");
233 else if(x > 0) snprintf(buff, NB, "%*s", min(w, (NB-1)), "Inf");
234 else snprintf(buff, NB, "%*s", min(w, (NB-1)), "-Inf");
235 }
236 else if (e) {
237 if(d) {
238 sprintf(fmt,"%%#%d.%de", min(w, (NB-1)), d);
239 snprintf(buff, NB, fmt, x);
240 }
241 else {
242 sprintf(fmt,"%%%d.%de", min(w, (NB-1)), d);
243 snprintf(buff, NB, fmt, x);
244 }
245 }
246 else { /* e = 0 */
247 sprintf(fmt,"%%%d.%df", min(w, (NB-1)), d);
248 snprintf(buff, NB, fmt, x);
249 }
250 buff[NB-1] = '\0';
251
252 // Drop trailing zeroes
253 for (char *p = buff; *p; p++) {
254 if(*p == '.') {
255 char *replace = p++;
256 while ('0' <= *p && *p <= '9')
257 if(*(p++) != '0')
258 replace = p;
259 if(replace != p)
260 while((*(replace++) = *(p++)))
261 ;
262 break;
263 }
264 }
265
266 if(strcmp(dec, ".")) {
267 char *p, *q;
268 for(p = buff, q = buff2; *p; p++) {
269 if(*p == '.') for(const char *r = dec; *r; r++) *q++ = *r;
270 else *q++ = *p;
271 }
272 *q = '\0';
273 out = buff2;
274 }
275
276 return out;
277 }
278
StringFromReal(double x,int * warn)279 SEXP attribute_hidden StringFromReal(double x, int *warn)
280 {
281 int w, d, e;
282 formatReal(&x, 1, &w, &d, &e, 0);
283 if (ISNA(x)) return NA_STRING;
284 else return mkChar(EncodeRealDrop0(x, w, d, e, OutDec));
285 }
286
287
288 attribute_hidden
EncodeReal2(double x,int w,int d,int e)289 const char *EncodeReal2(double x, int w, int d, int e)
290 {
291 static char buff[NB];
292 char fmt[20];
293
294 /* IEEE allows signed zeros (yuck!) */
295 if (x == 0.0) x = 0.0;
296 if (!R_FINITE(x)) {
297 if(ISNA(x)) snprintf(buff, NB, "%*s", min(w, (NB-1)), CHAR(R_print.na_string));
298 else if(ISNAN(x)) snprintf(buff, NB, "%*s", min(w, (NB-1)), "NaN");
299 else if(x > 0) snprintf(buff, NB, "%*s", min(w, (NB-1)), "Inf");
300 else snprintf(buff, NB, "%*s", min(w, (NB-1)), "-Inf");
301 }
302 else if (e) {
303 if(d) {
304 sprintf(fmt,"%%#%d.%de", min(w, (NB-1)), d);
305 snprintf(buff, NB, fmt, x);
306 }
307 else {
308 sprintf(fmt,"%%%d.%de", min(w, (NB-1)), d);
309 snprintf(buff, NB, fmt, x);
310 }
311 }
312 else { /* e = 0 */
313 sprintf(fmt,"%%#%d.%df", min(w, (NB-1)), d);
314 snprintf(buff, NB, fmt, x);
315 }
316 buff[NB-1] = '\0';
317 return buff;
318 }
319
320 #ifdef formatComplex_USING_signif
321 void z_prec_r(Rcomplex *r, Rcomplex *x, double digits);
322 #endif
323
324 #define NB3 NB+3
325 const char
EncodeComplex(Rcomplex x,int wr,int dr,int er,int wi,int di,int ei,const char * dec)326 *EncodeComplex(Rcomplex x, int wr, int dr, int er, int wi, int di, int ei,
327 const char *dec)
328 {
329 static char buff[NB3];
330
331 /* IEEE allows signed zeros; strip these here */
332 if (x.r == 0.0) x.r = 0.0;
333 if (x.i == 0.0) x.i = 0.0;
334
335 if (ISNA(x.r) || ISNA(x.i)) {
336 snprintf(buff, NB,
337 "%*s", /* was "%*s%*s", R_print.gap, "", */
338 min(wr+wi+2, (NB-1)), CHAR(R_print.na_string));
339 } else {
340 char Re[NB];
341 const char *Im, *tmp;
342 int flagNegIm = 0;
343 Rcomplex y;
344 #ifdef formatComplex_USING_signif
345 /* formatComplex rounded, but this does not, and we need to
346 keep it that way so we don't get strange trailing zeros.
347 But we do want to avoid printing small exponentials that
348 are probably garbage.
349 */
350 z_prec_r(&y, &x, R_print.digits);
351 #endif
352 /* EncodeReal has static buffer, so copy */
353 tmp = EncodeReal0(y.r == 0. ? y.r : x.r, wr, dr, er, dec);
354 strcpy(Re, tmp);
355 if ( (flagNegIm = (x.i < 0)) ) x.i = -x.i;
356 Im = EncodeReal0(y.i == 0. ? y.i : x.i, wi, di, ei, dec);
357 snprintf(buff, NB3, "%s%s%si", Re, flagNegIm ? "-" : "+", Im);
358 }
359 buff[NB3-1] = '\0';
360 return buff;
361 }
362
363 /* <FIXME>
364 encodeString and Rstrwid assume that the wchar_t representation
365 used to hold multibyte chars is Unicode. This is usually true, and
366 we warn if it is not known to be true. Potentially looking at
367 wchar_t ranges as we do is incorrect, but that is even less likely to
368 be problematic.
369
370 On Windows with surrogate pairs it will not be canonical, but AFAIK
371 they do not occur in any MBCS (so it would only matter if we implement
372 UTF-8, and then only if Windows has surrogate pairs switched on.).
373 */
374
375 #include <rlocale.h> /* redefines isw* functions */
376
377 #ifdef Win32
378 #include "rgui_UTF8.h"
379 #endif
380
381 /* strlen() using escaped rather than literal form.
382 In MBCS locales it works in characters, and reports in display width.
383 Rstrwid is also used in printarray.c.
384
385 This supported embedded nuls when we had those.
386 */
387 attribute_hidden
Rstrwid(const char * str,int slen,cetype_t ienc,int quote)388 int Rstrwid(const char *str, int slen, cetype_t ienc, int quote)
389 {
390 const char *p = str;
391 int len = 0, i;
392
393 if(ienc == CE_BYTES) { // not currently used for that encoding
394 for (i = 0; i < slen; i++) {
395 unsigned char k = str[i];
396 if (k >= 0x20 && k < 0x80) len += 1;
397 else len += 4;
398 }
399 return len;
400 }
401 /* Future-proof: currently that is all Rstrlen calls it with,
402 and printarray has CE_NATIVE explicitly */
403 if(ienc > 2) // CE_NATIVE, CE_UTF8, CE_BYTES are supported
404 warning("unsupported encoding (%d) in Rstrwid", ienc);
405 if(mbcslocale || ienc == CE_UTF8) {
406 Rboolean useUTF8 = (ienc == CE_UTF8);
407 mbstate_t mb_st;
408
409 if(!useUTF8) mbs_init(&mb_st);
410 for (i = 0; i < slen; i++) {
411 unsigned int k; /* not wint_t as it might be signed */
412 wchar_t wc;
413 int res = useUTF8 ? (int) utf8toucs(&wc, p):
414 (int) mbrtowc(&wc, p, R_MB_CUR_MAX, NULL);
415 if(res >= 0) {
416 if (useUTF8 && IS_HIGH_SURROGATE(wc))
417 k = utf8toucs32(wc, p);
418 else
419 k = wc;
420 if(0x20 <= k && k < 0x7f && iswprint(k)) {
421 switch(wc) {
422 case L'\\':
423 len += 2;
424 break;
425 case L'\'':
426 case L'"':
427 case L'`':
428 len += (quote == *p) ? 2 : 1;
429 break;
430 default:
431 len++; /* assumes these are all width 1 */
432 break;
433 }
434 p++;
435 } else if (k < 0x80) {
436 switch(wc) {
437 case L'\a':
438 case L'\b':
439 case L'\f':
440 case L'\n':
441 case L'\r':
442 case L'\t':
443 case L'\v':
444 case L'\0':
445 len += 2; break;
446 default:
447 /* print in octal */
448 len += 4; break;
449 }
450 p++;
451 } else {
452 /* no need to worry about truncation as iswprint
453 * and wcwidth get replaced on Windows */
454 // conceivably an invalid \U escape could use 11 or 12
455 len += iswprint(k) ?
456 #ifdef USE_RI18N_WIDTH
457 Ri18n_wcwidth(k) :
458 #else
459 /* this is expected to return -1 for
460 non-printable (including unassigned)
461 characters: that is unlikely to occur,
462 although the system's idea of 'printing'
463 may differ from the internal tables */
464 imax2(wcwidth((wchar_t) k), 0) :
465 #endif
466 (k > 0xffff ? 10 : 6);
467 i += (res - 1);
468 p += res;
469 }
470 } else { /* invalid char */
471 len += 4;
472 p++;
473 }
474 }
475 } else // not MBCS nor marked as UTF-8
476 for (i = 0; i < slen; i++) {
477 if((unsigned char) *p < 0x80) {
478 /* ASCII */
479 if(isprint((int)*p)) {
480 switch(*p) {
481 case '\\':
482 len += 2; break;
483 case '\'':
484 case '"':
485 case '`':
486 len += (quote == *p)? 2 : 1; break;
487 default:
488 len++; break;
489 }
490 } else switch(*p) {
491 case '\a':
492 case '\b':
493 case '\f':
494 case '\n':
495 case '\r':
496 case '\t':
497 case '\v':
498 case '\0':
499 len += 2; break;
500 default:
501 /* print in octal */
502 len += 4; break;
503 }
504 p++;
505 } else { /* 8 bit char */
506 #ifdef Win32 /* It seems Windows does not know what is printable! */
507 len++;
508 #else
509 len += isprint((int)*p) ? 1 : 4;
510 #endif
511 p++;
512 }
513 }
514
515 return len;
516 }
517
518 /* Match what EncodeString does with encodings */
519 attribute_hidden
Rstrlen(SEXP s,int quote)520 int Rstrlen(SEXP s, int quote)
521 {
522 cetype_t ienc = getCharCE(s);
523 if (ienc == CE_UTF8 || ienc == CE_BYTES)
524 return Rstrwid(CHAR(s), LENGTH(s), ienc, quote);
525 const void *vmax = vmaxget();
526 const char *p = translateChar(s);
527 int len = Rstrwid(p, (int)strlen(p), CE_NATIVE, quote);
528 vmaxset(vmax);
529 return len;
530 }
531
532 /* Here w is the minimum field width
533 If 'quote' is non-zero the result should be quoted (and internal quotes
534 escaped and NA strings handled differently).
535
536 EncodeString is called from EncodeElement, EncodeChar, cat() (for labels
537 when filling), to (auto)print character vectors, arrays, names and
538 CHARSXPs. It is also called by do_encodeString, but not from
539 format().
540 */
541
542 attribute_hidden
EncodeString(SEXP s,int w,int quote,Rprt_adj justify)543 const char *EncodeString(SEXP s, int w, int quote, Rprt_adj justify)
544 {
545 int i, cnt;
546 const char *p; char *q, buf[13];
547 cetype_t ienc = getCharCE(s);
548 Rboolean useUTF8 = w < 0;
549 const void *vmax = vmaxget();
550
551 if (w < 0) w = w + 1000000;
552
553 /* We have to do something like this as the result is returned, and
554 passed on by EncodeElement -- so no way could be end user be
555 responsible for freeing it. However, this is not thread-safe. */
556
557 static R_StringBuffer gBuffer = {NULL, 0, BUFSIZE};
558 R_StringBuffer *buffer = &gBuffer;
559
560 if (s == NA_STRING) {
561 p = quote ? CHAR(R_print.na_string) : CHAR(R_print.na_string_noquote);
562 cnt = (int)(quote ? strlen(CHAR(R_print.na_string)) :
563 strlen(CHAR(R_print.na_string_noquote)));
564 i = (quote ? Rstrlen(R_print.na_string, 0) :
565 Rstrlen(R_print.na_string_noquote, 0));
566 quote = 0;
567 } else {
568 if(IS_BYTES(s)) {
569 ienc = CE_NATIVE;
570 #ifdef Win32
571 if (WinUTF8out)
572 ienc = CE_UTF8;
573 #endif
574 p = CHAR(s);
575 cnt = (int) strlen(p);
576 const char *q;
577 char *pp = R_alloc(4*cnt+1, 1), *qq = pp, buf[5];
578 for (q = p; *q; q++) {
579 unsigned char k = (unsigned char) *q;
580 if (k >= 0x20 && k < 0x80) {
581 *qq++ = *q;
582 if (quote && *q == '"') cnt++;
583 } else {
584 snprintf(buf, 5, "\\x%02x", k);
585 for(int j = 0; j < 4; j++) *qq++ = buf[j];
586 cnt += 3;
587 }
588 }
589 *qq = '\0';
590 p = pp;
591 i = cnt;
592 #ifdef Win32
593 } else if(WinUTF8out) {
594 if(ienc == CE_UTF8) {
595 p = CHAR(s);
596 i = Rstrlen(s, quote);
597 cnt = LENGTH(s);
598 } else {
599 p = translateCharUTF8(s);
600 if(p == CHAR(s)) {
601 i = Rstrlen(s, quote);
602 cnt = LENGTH(s);
603 } else {
604 cnt = (int) strlen(p);
605 i = Rstrwid(p, cnt, CE_UTF8, quote);
606 }
607 ienc = CE_UTF8;
608 }
609 #endif
610 } else {
611 if (useUTF8 && ienc == CE_UTF8) {
612 p = CHAR(s);
613 i = Rstrlen(s, quote);
614 cnt = LENGTH(s);
615 } else {
616 ienc = CE_NATIVE;
617 p = translateChar(s);
618 if(p == CHAR(s)) {
619 i = Rstrlen(s, quote);
620 cnt = LENGTH(s);
621 } else {
622 cnt = (int) strlen(p);
623 i = Rstrwid(p, cnt, CE_NATIVE, quote);
624 }
625 }
626 }
627 }
628
629 /* We need enough space for the encoded string, including escapes.
630 Octal encoding turns one byte into four.
631 \u encoding can turn a multibyte into six or ten,
632 but it turns 2/3 into 6, and 4 (and perhaps 5/6) into 10.
633 Let's be wasteful here (the worst case appears to be an MBCS with
634 one byte for an upper-plane Unicode point output as ten bytes,
635 but I doubt that such an MBCS exists: two bytes is plausible).
636
637 +2 allows for quotes, +6 for UTF_8 escapes.
638 */
639 if(5.*cnt + 8 > (double) SIZE_MAX)
640 error(_("too large string (nchar=%d) => 5*nchar + 8 > SIZE_MAX"));
641 size_t q_len = 5*(size_t)cnt + 8;
642 if(q_len < w) q_len = (size_t) w;
643 q = R_AllocStringBuffer(q_len, buffer);
644
645 int b = w - i - (quote ? 2 : 0); /* total amount of padding */
646 if(justify == Rprt_adj_none) b = 0;
647 if(b > 0 && justify != Rprt_adj_left) {
648 int b0 = (justify == Rprt_adj_centre) ? b/2 : b;
649 for(i = 0 ; i < b0 ; i++) *q++ = ' ';
650 b -= b0;
651 }
652 if(quote) *q++ = (char) quote;
653 if(mbcslocale || ienc == CE_UTF8) {
654 Rboolean useUTF8 = (ienc == CE_UTF8);
655 mbstate_t mb_st;
656 #ifndef __STDC_ISO_10646__
657 Rboolean Unicode_warning = FALSE;
658 #endif
659 if(!useUTF8) mbs_init(&mb_st);
660 #ifdef Win32
661 else if(WinUTF8out) { memcpy(q, UTF8in, 3); q += 3; }
662 #endif
663 for (i = 0; i < cnt; i++) {
664 wchar_t wc;
665 int res = (int)(useUTF8 ? utf8toucs(&wc, p):
666 mbrtowc(&wc, p, R_MB_CUR_MAX, NULL));
667 if(res >= 0) { /* res = 0 is a terminator */
668 unsigned int k; /* not wint_t as it might be signed */
669 if (useUTF8 && IS_HIGH_SURROGATE(wc))
670 k = utf8toucs32(wc, p);
671 else
672 k = wc;
673 /* To be portable, treat \0 explicitly */
674 if(res == 0) {k = 0; wc = L'\0';}
675 if(0x20 <= k && k < 0x7f && iswprint(k)) {
676 switch(wc) {
677 case L'\\': *q++ = '\\'; *q++ = '\\'; p++; break;
678 case L'\'':
679 case L'"':
680 case L'`':
681 {
682 if(quote == *p) *q++ = '\\';
683 *q++ = *p++;
684 break;
685 }
686 default:
687 for(int j = 0; j < res; j++) *q++ = *p++;
688 break;
689 }
690 } else if (k < 0x80) {
691 /* ANSI Escapes */
692 switch(wc) {
693 case L'\a': *q++ = '\\'; *q++ = 'a'; break;
694 case L'\b': *q++ = '\\'; *q++ = 'b'; break;
695 case L'\f': *q++ = '\\'; *q++ = 'f'; break;
696 case L'\n': *q++ = '\\'; *q++ = 'n'; break;
697 case L'\r': *q++ = '\\'; *q++ = 'r'; break;
698 case L'\t': *q++ = '\\'; *q++ = 't'; break;
699 case L'\v': *q++ = '\\'; *q++ = 'v'; break;
700 case L'\0': *q++ = '\\'; *q++ = '0'; break;
701
702 default:
703 /* print in octal */
704 // gcc 7 requires cast here
705 snprintf(buf, 5, "\\%03o", (unsigned char)k);
706 for(int j = 0; j < 4; j++) *q++ = buf[j];
707 break;
708 }
709 p++;
710 } else {
711 /* wc could be an unpaired surrogate and this does
712 * not do the same as Rstrwid */
713 /* no need to worry about truncation as iswprint
714 * gets replaced on Windows */
715 if(iswprint(k)) {
716 /* The problem here is that wc may be
717 printable according to the Unicode tables,
718 but it may not be printable on the output
719 device concerned.
720
721 And the system iswprintf may not correspond
722 to the latest Unicode tables.
723 */
724 for(int j = 0; j < res; j++) *q++ = *p++;
725 } else {
726 # if !defined (__STDC_ISO_10646__) && !defined (Win32)
727 Unicode_warning = TRUE;
728 # endif
729 if(k > 0xffff)
730 snprintf(buf, 13, "\\U{%06x}", k);
731 else
732 snprintf(buf, 11, "\\u%04x", k);
733 int j = (int) strlen(buf);
734 memcpy(q, buf, j);
735 q += j;
736 p += res;
737 }
738 i += (res - 1);
739 }
740 } else { /* invalid char */
741 snprintf(q, 5, "\\x%02x", *((unsigned char *)p));
742 q += 4; p++;
743 }
744 }
745 #ifndef __STDC_ISO_10646__
746 if(Unicode_warning)
747 warning(_("it is not known that wchar_t is Unicode on this platform"));
748 #endif
749
750 } else
751 for (i = 0; i < cnt; i++) {
752
753 /* ASCII */
754 if((unsigned char) *p < 0x80) {
755 if(*p != '\t' && isprint((int)*p)) { /* Windows has \t as printable */
756 switch(*p) {
757 case '\\': *q++ = '\\'; *q++ = '\\'; break;
758 case '\'':
759 case '"':
760 case '`':
761 {
762 if(quote == *p) *q++ = '\\';
763 *q++ = *p;
764 break;
765 }
766 default: *q++ = *p; break;
767 }
768 } else switch(*p) {
769 /* ANSI Escapes */
770 case '\a': *q++ = '\\'; *q++ = 'a'; break;
771 case '\b': *q++ = '\\'; *q++ = 'b'; break;
772 case '\f': *q++ = '\\'; *q++ = 'f'; break;
773 case '\n': *q++ = '\\'; *q++ = 'n'; break;
774 case '\r': *q++ = '\\'; *q++ = 'r'; break;
775 case '\t': *q++ = '\\'; *q++ = 't'; break;
776 case '\v': *q++ = '\\'; *q++ = 'v'; break;
777 case '\0': *q++ = '\\'; *q++ = '0'; break;
778
779 default:
780 /* print in octal */
781 snprintf(buf, 5, "\\%03o", (unsigned char) *p);
782 for(int j = 0; j < 4; j++) *q++ = buf[j];
783 break;
784 }
785 p++;
786 } else { /* 8 bit char */
787 if(!isprint((int)*p & 0xff)) {
788 /* print in octal */
789 snprintf(buf, 5, "\\%03o", (unsigned char) *p);
790 for(int j = 0; j < 4; j++) *q++ = buf[j];
791 p++;
792 } else *q++ = *p++;
793 }
794 }
795
796 #ifdef Win32
797 if(WinUTF8out && ienc == CE_UTF8) { memcpy(q, UTF8out, 3); q += 3; }
798 #endif
799 if(quote) *q++ = (char) quote;
800 if(b > 0 && justify != Rprt_adj_right) {
801 for(i = 0 ; i < b ; i++) *q++ = ' ';
802 }
803 *q = '\0';
804
805 vmaxset(vmax);
806 return buffer->data;
807 }
808
809 /* EncodeElement is called by cat(), write.table() and deparsing. */
810
811 /* NB this is called by R.app even though it is in no public header, so
812 alter there if you alter this */
EncodeElement(SEXP x,int indx,int quote,char cdec)813 const char *EncodeElement(SEXP x, int indx, int quote, char cdec)
814 {
815 char dec[2];
816 dec[0] = cdec; dec[1] = '\0';
817 return EncodeElement0(x, indx, quote, dec);
818 }
819
EncodeElement0(SEXP x,R_xlen_t indx,int quote,const char * dec)820 const char *EncodeElement0(SEXP x, R_xlen_t indx, int quote, const char *dec)
821 {
822 int w, d, e, wi, di, ei;
823 const char *res;
824
825 switch(TYPEOF(x)) {
826 case LGLSXP:
827 formatLogical(&LOGICAL_RO(x)[indx], 1, &w);
828 res = EncodeLogical(LOGICAL_RO(x)[indx], w);
829 break;
830 case INTSXP:
831 formatInteger(&INTEGER_RO(x)[indx], 1, &w);
832 res = EncodeInteger(INTEGER_RO(x)[indx], w);
833 break;
834 case REALSXP:
835 formatReal(&REAL_RO(x)[indx], 1, &w, &d, &e, 0);
836 res = EncodeReal0(REAL_RO(x)[indx], w, d, e, dec);
837 break;
838 case STRSXP:
839 formatString(&STRING_PTR_RO(x)[indx], 1, &w, quote);
840 res = EncodeString(STRING_ELT(x, indx), w, quote, Rprt_adj_left);
841 break;
842 case CPLXSXP:
843 formatComplex(&COMPLEX_RO(x)[indx], 1, &w, &d, &e, &wi, &di, &ei, 0);
844 res = EncodeComplex(COMPLEX_RO(x)[indx], w, d, e, wi, di, ei, dec);
845 break;
846 case RAWSXP:
847 res = EncodeRaw(RAW_RO(x)[indx], "");
848 break;
849 default:
850 res = NULL; /* -Wall */
851 UNIMPLEMENTED_TYPE("EncodeElement", x);
852 }
853 return res;
854 }
855
856 /* EncodeChar is a simple wrapper for EncodeString
857 called by error messages to display CHARSXP values.
858
859 The pointer returned by EncodeChar points into an internal buffer
860 which is overwritten by subsequent calls to EncodeChar/EncodeString.
861 It is the responsibility of the caller to copy the result before
862 any subsequent call to EncodeChar/EncodeString may happen. Note that
863 particularly it is NOT safe to pass the result of EncodeChar as 3rd
864 argument to errorcall (errorcall_cpy can be used instead). */
865 //attribute_hidden
EncodeChar(SEXP x)866 const char *EncodeChar(SEXP x)
867 {
868 return EncodeString(x, 0, 0, Rprt_adj_left);
869 }
870
871
Rprintf(const char * format,...)872 void Rprintf(const char *format, ...)
873 {
874 va_list(ap);
875
876 va_start(ap, format);
877 Rvprintf(format, ap);
878 va_end(ap);
879 }
880
881 /*
882 REprintf is used by the error handler do not add
883 anything unless you're sure it won't
884 cause problems
885 */
REprintf(const char * format,...)886 void REprintf(const char *format, ...)
887 {
888 va_list(ap);
889 va_start(ap, format);
890 REvprintf(format, ap);
891 va_end(ap);
892 }
893
894 #if defined(HAVE_VASPRINTF) && !HAVE_DECL_VASPRINTF
895 int vasprintf(char **strp, const char *fmt, va_list ap)
896 #ifdef __cplusplus
897 throw ()
898 #endif
899 ;
900 #endif
901
902 # define R_BUFSIZE BUFSIZE
903 // similar to dummy_vfprintf in connections.c
904 attribute_hidden
Rcons_vprintf(const char * format,va_list arg)905 void Rcons_vprintf(const char *format, va_list arg)
906 {
907 char buf[R_BUFSIZE], *p = buf;
908 int res;
909 const void *vmax = vmaxget();
910 int usedRalloc = FALSE, usedVasprintf = FALSE;
911 va_list aq;
912
913 va_copy(aq, arg);
914 res = Rvsnprintf_mbcs(buf, R_BUFSIZE, format, aq);
915 va_end(aq);
916 #ifdef HAVE_VASPRINTF
917 if(res >= R_BUFSIZE || res < 0) {
918 res = vasprintf(&p, format, arg);
919 if (res < 0) {
920 p = buf;
921 warning(_("printing of extremely long output is truncated"));
922 } else usedVasprintf = TRUE;
923 }
924 #else
925 if(res >= R_BUFSIZE) { /* res is the desired output length */
926 usedRalloc = TRUE;
927 /* dummy_vfprintf protects against `res` being counted short; we do not
928 do that here */
929 p = R_alloc(res+1, sizeof(char));
930 vsprintf(p, format, arg);
931 } else if(res < 0) {
932 /* Some non-C99 conforming vsnprintf implementations return -1 on
933 truncation instead of only on error. */
934 usedRalloc = TRUE;
935 p = R_alloc(10*R_BUFSIZE, sizeof(char));
936 res = Rvsnprintf_mbcs(p, 10*R_BUFSIZE, format, arg);
937 if (res < 0 || res >= 10*R_BUFSIZE)
938 warning(_("printing of extremely long output is truncated"));
939 }
940 #endif /* HAVE_VASPRINTF */
941 R_WriteConsole(p, (int) strlen(p));
942 if(usedRalloc) vmaxset(vmax);
943 if(usedVasprintf) free(p);
944 }
945
Rvprintf(const char * format,va_list arg)946 void Rvprintf(const char *format, va_list arg)
947 {
948 int i=0, con_num=R_OutputCon;
949 Rconnection con;
950 va_list argcopy;
951 static int printcount = 0;
952
953 if (++printcount > 100) {
954 R_CheckUserInterrupt();
955 printcount = 0 ;
956 }
957
958 do{
959 con = getConnection(con_num);
960 va_copy(argcopy, arg);
961 /* Parentheses added for Fedora with -D_FORTIFY_SOURCE=2 */
962 (con->vfprintf)(con, format, argcopy);
963 va_end(argcopy);
964 con->fflush(con);
965 con_num = getActiveSink(i++);
966 } while(con_num>0);
967
968
969 }
970
971 /*
972 REvprintf is part of the error handler.
973 Do not change it unless you are SURE that
974 your changes are compatible with the
975 error handling mechanism.
976
977 It is also used in R_Suicide on Unix.
978 */
979
REvprintf(const char * format,va_list arg)980 void REvprintf(const char *format, va_list arg)
981 {
982 static char *malloc_buf = NULL;
983
984 if (malloc_buf) {
985 char *tmp = malloc_buf;
986 malloc_buf = NULL;
987 free(tmp);
988 }
989 if(R_ErrorCon != 2) {
990 Rconnection con = getConnection_no_err(R_ErrorCon);
991 if(con == NULL) {
992 /* should never happen, but in case of corruption... */
993 R_ErrorCon = 2;
994 } else {
995 /* Parentheses added for FC4 with gcc4 and -D_FORTIFY_SOURCE=2 */
996 (con->vfprintf)(con, format, arg);
997 con->fflush(con);
998 return;
999 }
1000 }
1001 if(R_Consolefile) {
1002 /* try to interleave stdout and stderr carefully */
1003 if(R_Outputfile && (R_Outputfile != R_Consolefile)) {
1004 fflush(R_Outputfile);
1005 vfprintf(R_Consolefile, format, arg);
1006 /* normally R_Consolefile is stderr and so unbuffered, but
1007 it can be something else (e.g. stdout on Win9x) */
1008 fflush(R_Consolefile);
1009 } else vfprintf(R_Consolefile, format, arg);
1010 } else {
1011 char buf[BUFSIZE];
1012 Rboolean printed = FALSE;
1013 va_list aq;
1014
1015 va_copy(aq, arg);
1016 int res = Rvsnprintf_mbcs(buf, BUFSIZE, format, aq);
1017 va_end(aq);
1018 if (res >= BUFSIZE) {
1019 /* A very long string has been truncated. Try to allocate a large
1020 buffer for it to print it in full. Do not use R_alloc() as this
1021 can be run due to memory allocation error from the R heap.
1022 Do not use contexts and do not throw any errors nor warnings
1023 as this may be run from error handling. */
1024 int size = res + 1;
1025 malloc_buf = (char *)malloc(size * sizeof(char));
1026 if (malloc_buf) {
1027 res = vsnprintf(malloc_buf, size, format, arg);
1028 if (res == size - 1) {
1029 R_WriteConsoleEx(malloc_buf, res, 1);
1030 printed = TRUE;
1031 }
1032 char *tmp = malloc_buf;
1033 malloc_buf = NULL;
1034 free(tmp);
1035 }
1036 }
1037 if (!printed)
1038 R_WriteConsoleEx(buf, (int) strlen(buf), 1);
1039 }
1040 }
1041
IndexWidth(R_xlen_t n)1042 int attribute_hidden IndexWidth(R_xlen_t n)
1043 {
1044 return (int) (log10(n + 0.5) + 1);
1045 }
1046
VectorIndex(R_xlen_t i,int w)1047 void attribute_hidden VectorIndex(R_xlen_t i, int w)
1048 {
1049 /* print index label "[`i']" , using total width `w' (left filling blanks) */
1050 Rprintf("%*s[%ld]", w-IndexWidth(i)-2, "", i);
1051 }
1052