1 //========================================================================
2 //
3 // GooString.cc
4 //
5 // Simple variable-length string type.
6 //
7 // Copyright 1996-2003 Glyph & Cog, LLC
8 //
9 //========================================================================
10 
11 //========================================================================
12 //
13 // Modified under the Poppler project - http://poppler.freedesktop.org
14 //
15 // All changes made under the Poppler project to this file are licensed
16 // under GPL version 2 or later
17 //
18 // Copyright (C) 2006 Kristian Høgsberg <krh@redhat.com>
19 // Copyright (C) 2006 Krzysztof Kowalczyk <kkowalczyk@gmail.com>
20 // Copyright (C) 2007 Jeff Muizelaar <jeff@infidigm.net>
21 // Copyright (C) 2008-2011, 2016-2018, 2021 Albert Astals Cid <aacid@kde.org>
22 // Copyright (C) 2011 Kenji Uno <ku@digitaldolphins.jp>
23 // Copyright (C) 2012, 2013 Fabio D'Urso <fabiodurso@hotmail.it>
24 // Copyright (C) 2012, 2017 Adrian Johnson <ajohnson@redneon.com>
25 // Copyright (C) 2012 Pino Toscano <pino@kde.org>
26 // Copyright (C) 2013 Jason Crain <jason@aquaticape.us>
27 // Copyright (C) 2015 William Bader <williambader@hotmail.com>
28 // Copyright (C) 2016 Jakub Alba <jakubalba@gmail.com>
29 // Copyright (C) 2018 Klarälvdalens Datakonsult AB, a KDAB Group company, <info@kdab.com>. Work sponsored by the LiMux project of the city of Munich
30 // Copyright (C) 2018 Adam Reichold <adam.reichold@t-online.de>
31 // Copyright (C) 2018 Greg Knight <lyngvi@gmail.com>
32 // Copyright (C) 2019 Oliver Sander <oliver.sander@tu-dresden.de>
33 //
34 // To see a description of the changes please see the Changelog file that
35 // came with your tarball or type make ChangeLog if you are building from git
36 //
37 //========================================================================
38 
39 #include <config.h>
40 
41 #include <cassert>
42 #include <cctype>
43 #include <cmath>
44 #include <cstddef>
45 #include <cstdlib>
46 #include <cstring>
47 
48 #include "gmem.h"
49 #include "GooString.h"
50 
51 //------------------------------------------------------------------------
52 
53 namespace {
54 
55 union GooStringFormatArg {
56     int i;
57     unsigned int ui;
58     long l;
59     unsigned long ul;
60     long long ll;
61     unsigned long long ull;
62     double f;
63     char c;
64     char *s;
65     GooString *gs;
66 };
67 
68 enum GooStringFormatType
69 {
70     fmtIntDecimal,
71     fmtIntHex,
72     fmtIntHexUpper,
73     fmtIntOctal,
74     fmtIntBinary,
75     fmtUIntDecimal,
76     fmtUIntHex,
77     fmtUIntHexUpper,
78     fmtUIntOctal,
79     fmtUIntBinary,
80     fmtLongDecimal,
81     fmtLongHex,
82     fmtLongHexUpper,
83     fmtLongOctal,
84     fmtLongBinary,
85     fmtULongDecimal,
86     fmtULongHex,
87     fmtULongHexUpper,
88     fmtULongOctal,
89     fmtULongBinary,
90     fmtLongLongDecimal,
91     fmtLongLongHex,
92     fmtLongLongHexUpper,
93     fmtLongLongOctal,
94     fmtLongLongBinary,
95     fmtULongLongDecimal,
96     fmtULongLongHex,
97     fmtULongLongHexUpper,
98     fmtULongLongOctal,
99     fmtULongLongBinary,
100     fmtDouble,
101     fmtDoubleTrimSmallAware,
102     fmtDoubleTrim,
103     fmtChar,
104     fmtString,
105     fmtGooString,
106     fmtSpace
107 };
108 
109 const char *const formatStrings[] = { "d",   "x",   "X",   "o",   "b",   "ud",  "ux",   "uX",   "uo",   "ub",   "ld",   "lx", "lX", "lo", "lb", "uld", "ulx", "ulX", "ulo",
110                                       "ulb", "lld", "llx", "llX", "llo", "llb", "ulld", "ullx", "ullX", "ullo", "ullb", "f",  "gs", "g",  "c",  "s",   "t",   "w",   nullptr };
111 
112 void formatInt(long long x, char *buf, int bufSize, bool zeroFill, int width, int base, const char **p, int *len, bool upperCase = false);
113 
114 void formatUInt(unsigned long long x, char *buf, int bufSize, bool zeroFill, int width, int base, const char **p, int *len, bool upperCase = false);
115 
116 void formatDouble(double x, char *buf, int bufSize, int prec, bool trim, const char **p, int *len);
117 
118 void formatDoubleSmallAware(double x, char *buf, int bufSize, int prec, bool trim, const char **p, int *len);
119 
120 }
121 
122 //------------------------------------------------------------------------
123 
format(const char * fmt,...)124 GooString *GooString::format(const char *fmt, ...)
125 {
126     auto *s = new GooString();
127 
128     va_list argList;
129     va_start(argList, fmt);
130     s->appendfv(fmt, argList);
131     va_end(argList);
132 
133     return s;
134 }
135 
formatv(const char * fmt,va_list argList)136 GooString *GooString::formatv(const char *fmt, va_list argList)
137 {
138     auto *s = new GooString();
139 
140     s->appendfv(fmt, argList);
141 
142     return s;
143 }
144 
appendf(const char * fmt,...)145 GooString *GooString::appendf(const char *fmt, ...)
146 {
147     va_list argList;
148     va_start(argList, fmt);
149     appendfv(fmt, argList);
150     va_end(argList);
151 
152     return this;
153 }
154 
appendfv(const char * fmt,va_list argList)155 GooString *GooString::appendfv(const char *fmt, va_list argList)
156 {
157     GooStringFormatArg *args;
158     int argsLen, argsSize;
159     GooStringFormatArg arg;
160     int idx, width, prec;
161     bool reverseAlign, zeroFill;
162     GooStringFormatType ft;
163     char buf[65];
164     int len, i;
165     const char *p0, *p1;
166     const char *str;
167     GooStringFormatArg argsBuf[8];
168 
169     argsLen = 0;
170     argsSize = sizeof(argsBuf) / sizeof(argsBuf[0]);
171     args = argsBuf;
172 
173     p0 = fmt;
174     while (*p0) {
175         if (*p0 == '{') {
176             ++p0;
177             if (*p0 == '{') {
178                 ++p0;
179                 append('{');
180             } else {
181 
182                 // parse the format string
183                 if (!(*p0 >= '0' && *p0 <= '9')) {
184                     break;
185                 }
186                 idx = *p0 - '0';
187                 for (++p0; *p0 >= '0' && *p0 <= '9'; ++p0) {
188                     idx = 10 * idx + (*p0 - '0');
189                 }
190                 if (*p0 != ':') {
191                     break;
192                 }
193                 ++p0;
194                 if (*p0 == '-') {
195                     reverseAlign = true;
196                     ++p0;
197                 } else {
198                     reverseAlign = false;
199                 }
200                 width = 0;
201                 zeroFill = *p0 == '0';
202                 for (; *p0 >= '0' && *p0 <= '9'; ++p0) {
203                     width = 10 * width + (*p0 - '0');
204                 }
205                 if (width < 0) {
206                     width = 0;
207                 }
208                 if (*p0 == '.') {
209                     ++p0;
210                     prec = 0;
211                     for (; *p0 >= '0' && *p0 <= '9'; ++p0) {
212                         prec = 10 * prec + (*p0 - '0');
213                     }
214                 } else {
215                     prec = 0;
216                 }
217                 for (ft = (GooStringFormatType)0; formatStrings[ft]; ft = (GooStringFormatType)(ft + 1)) {
218                     if (!strncmp(p0, formatStrings[ft], strlen(formatStrings[ft]))) {
219                         break;
220                     }
221                 }
222                 if (!formatStrings[ft]) {
223                     break;
224                 }
225                 p0 += strlen(formatStrings[ft]);
226                 if (*p0 != '}') {
227                     break;
228                 }
229                 ++p0;
230 
231                 // fetch the argument
232                 if (idx > argsLen) {
233                     break;
234                 }
235                 if (idx == argsLen) {
236                     if (argsLen == argsSize) {
237                         argsSize *= 2;
238                         if (args == argsBuf) {
239                             args = (GooStringFormatArg *)gmallocn(argsSize, sizeof(GooStringFormatArg));
240                             memcpy(args, argsBuf, argsLen * sizeof(GooStringFormatArg));
241                         } else {
242                             args = (GooStringFormatArg *)greallocn(args, argsSize, sizeof(GooStringFormatArg));
243                         }
244                     }
245                     switch (ft) {
246                     case fmtIntDecimal:
247                     case fmtIntHex:
248                     case fmtIntHexUpper:
249                     case fmtIntOctal:
250                     case fmtIntBinary:
251                     case fmtSpace:
252                         args[argsLen].i = va_arg(argList, int);
253                         break;
254                     case fmtUIntDecimal:
255                     case fmtUIntHex:
256                     case fmtUIntHexUpper:
257                     case fmtUIntOctal:
258                     case fmtUIntBinary:
259                         args[argsLen].ui = va_arg(argList, unsigned int);
260                         break;
261                     case fmtLongDecimal:
262                     case fmtLongHex:
263                     case fmtLongHexUpper:
264                     case fmtLongOctal:
265                     case fmtLongBinary:
266                         args[argsLen].l = va_arg(argList, long);
267                         break;
268                     case fmtULongDecimal:
269                     case fmtULongHex:
270                     case fmtULongHexUpper:
271                     case fmtULongOctal:
272                     case fmtULongBinary:
273                         args[argsLen].ul = va_arg(argList, unsigned long);
274                         break;
275                     case fmtLongLongDecimal:
276                     case fmtLongLongHex:
277                     case fmtLongLongHexUpper:
278                     case fmtLongLongOctal:
279                     case fmtLongLongBinary:
280                         args[argsLen].ll = va_arg(argList, long long);
281                         break;
282                     case fmtULongLongDecimal:
283                     case fmtULongLongHex:
284                     case fmtULongLongHexUpper:
285                     case fmtULongLongOctal:
286                     case fmtULongLongBinary:
287                         args[argsLen].ull = va_arg(argList, unsigned long long);
288                         break;
289                     case fmtDouble:
290                     case fmtDoubleTrim:
291                     case fmtDoubleTrimSmallAware:
292                         args[argsLen].f = va_arg(argList, double);
293                         break;
294                     case fmtChar:
295                         args[argsLen].c = (char)va_arg(argList, int);
296                         break;
297                     case fmtString:
298                         args[argsLen].s = va_arg(argList, char *);
299                         break;
300                     case fmtGooString:
301                         args[argsLen].gs = va_arg(argList, GooString *);
302                         break;
303                     }
304                     ++argsLen;
305                 }
306 
307                 // format the argument
308                 arg = args[idx];
309                 switch (ft) {
310                 case fmtIntDecimal:
311                     formatInt(arg.i, buf, sizeof(buf), zeroFill, width, 10, &str, &len);
312                     break;
313                 case fmtIntHex:
314                     formatInt(arg.i, buf, sizeof(buf), zeroFill, width, 16, &str, &len);
315                     break;
316                 case fmtIntHexUpper:
317                     formatInt(arg.i, buf, sizeof(buf), zeroFill, width, 16, &str, &len, true);
318                     break;
319                 case fmtIntOctal:
320                     formatInt(arg.i, buf, sizeof(buf), zeroFill, width, 8, &str, &len);
321                     break;
322                 case fmtIntBinary:
323                     formatInt(arg.i, buf, sizeof(buf), zeroFill, width, 2, &str, &len);
324                     break;
325                 case fmtUIntDecimal:
326                     formatUInt(arg.ui, buf, sizeof(buf), zeroFill, width, 10, &str, &len);
327                     break;
328                 case fmtUIntHex:
329                     formatUInt(arg.ui, buf, sizeof(buf), zeroFill, width, 16, &str, &len);
330                     break;
331                 case fmtUIntHexUpper:
332                     formatUInt(arg.ui, buf, sizeof(buf), zeroFill, width, 16, &str, &len, true);
333                     break;
334                 case fmtUIntOctal:
335                     formatUInt(arg.ui, buf, sizeof(buf), zeroFill, width, 8, &str, &len);
336                     break;
337                 case fmtUIntBinary:
338                     formatUInt(arg.ui, buf, sizeof(buf), zeroFill, width, 2, &str, &len);
339                     break;
340                 case fmtLongDecimal:
341                     formatInt(arg.l, buf, sizeof(buf), zeroFill, width, 10, &str, &len);
342                     break;
343                 case fmtLongHex:
344                     formatInt(arg.l, buf, sizeof(buf), zeroFill, width, 16, &str, &len);
345                     break;
346                 case fmtLongHexUpper:
347                     formatInt(arg.l, buf, sizeof(buf), zeroFill, width, 16, &str, &len, true);
348                     break;
349                 case fmtLongOctal:
350                     formatInt(arg.l, buf, sizeof(buf), zeroFill, width, 8, &str, &len);
351                     break;
352                 case fmtLongBinary:
353                     formatInt(arg.l, buf, sizeof(buf), zeroFill, width, 2, &str, &len);
354                     break;
355                 case fmtULongDecimal:
356                     formatUInt(arg.ul, buf, sizeof(buf), zeroFill, width, 10, &str, &len);
357                     break;
358                 case fmtULongHex:
359                     formatUInt(arg.ul, buf, sizeof(buf), zeroFill, width, 16, &str, &len);
360                     break;
361                 case fmtULongHexUpper:
362                     formatUInt(arg.ul, buf, sizeof(buf), zeroFill, width, 16, &str, &len, true);
363                     break;
364                 case fmtULongOctal:
365                     formatUInt(arg.ul, buf, sizeof(buf), zeroFill, width, 8, &str, &len);
366                     break;
367                 case fmtULongBinary:
368                     formatUInt(arg.ul, buf, sizeof(buf), zeroFill, width, 2, &str, &len);
369                     break;
370                 case fmtLongLongDecimal:
371                     formatInt(arg.ll, buf, sizeof(buf), zeroFill, width, 10, &str, &len);
372                     break;
373                 case fmtLongLongHex:
374                     formatInt(arg.ll, buf, sizeof(buf), zeroFill, width, 16, &str, &len);
375                     break;
376                 case fmtLongLongHexUpper:
377                     formatInt(arg.ll, buf, sizeof(buf), zeroFill, width, 16, &str, &len, true);
378                     break;
379                 case fmtLongLongOctal:
380                     formatInt(arg.ll, buf, sizeof(buf), zeroFill, width, 8, &str, &len);
381                     break;
382                 case fmtLongLongBinary:
383                     formatInt(arg.ll, buf, sizeof(buf), zeroFill, width, 2, &str, &len);
384                     break;
385                 case fmtULongLongDecimal:
386                     formatUInt(arg.ull, buf, sizeof(buf), zeroFill, width, 10, &str, &len);
387                     break;
388                 case fmtULongLongHex:
389                     formatUInt(arg.ull, buf, sizeof(buf), zeroFill, width, 16, &str, &len);
390                     break;
391                 case fmtULongLongHexUpper:
392                     formatUInt(arg.ull, buf, sizeof(buf), zeroFill, width, 16, &str, &len, true);
393                     break;
394                 case fmtULongLongOctal:
395                     formatUInt(arg.ull, buf, sizeof(buf), zeroFill, width, 8, &str, &len);
396                     break;
397                 case fmtULongLongBinary:
398                     formatUInt(arg.ull, buf, sizeof(buf), zeroFill, width, 2, &str, &len);
399                     break;
400                 case fmtDouble:
401                     formatDouble(arg.f, buf, sizeof(buf), prec, false, &str, &len);
402                     break;
403                 case fmtDoubleTrim:
404                     formatDouble(arg.f, buf, sizeof(buf), prec, true, &str, &len);
405                     break;
406                 case fmtDoubleTrimSmallAware:
407                     formatDoubleSmallAware(arg.f, buf, sizeof(buf), prec, true, &str, &len);
408                     break;
409                 case fmtChar:
410                     buf[0] = arg.c;
411                     str = buf;
412                     len = 1;
413                     reverseAlign = !reverseAlign;
414                     break;
415                 case fmtString:
416                     str = arg.s;
417                     len = strlen(str);
418                     reverseAlign = !reverseAlign;
419                     break;
420                 case fmtGooString:
421                     if (arg.gs) {
422                         str = arg.gs->c_str();
423                         len = arg.gs->getLength();
424                     } else {
425                         str = "(null)";
426                         len = 6;
427                     }
428                     reverseAlign = !reverseAlign;
429                     break;
430                 case fmtSpace:
431                     str = buf;
432                     len = 0;
433                     width = arg.i;
434                     break;
435                 }
436 
437                 // append the formatted arg, handling width and alignment
438                 if (!reverseAlign && len < width) {
439                     for (i = len; i < width; ++i) {
440                         append(' ');
441                     }
442                 }
443                 append(str, len);
444                 if (reverseAlign && len < width) {
445                     for (i = len; i < width; ++i) {
446                         append(' ');
447                     }
448                 }
449             }
450 
451         } else if (*p0 == '}') {
452             ++p0;
453             if (*p0 == '}') {
454                 ++p0;
455             }
456             append('}');
457 
458         } else {
459             for (p1 = p0 + 1; *p1 && *p1 != '{' && *p1 != '}'; ++p1)
460                 ;
461             append(p0, p1 - p0);
462             p0 = p1;
463         }
464     }
465 
466     if (args != argsBuf) {
467         gfree(args);
468     }
469 
470     return this;
471 }
472 
473 namespace {
474 
475 const char lowerCaseDigits[17] = "0123456789abcdef";
476 const char upperCaseDigits[17] = "0123456789ABCDEF";
477 
formatInt(long long x,char * buf,int bufSize,bool zeroFill,int width,int base,const char ** p,int * len,bool upperCase)478 void formatInt(long long x, char *buf, int bufSize, bool zeroFill, int width, int base, const char **p, int *len, bool upperCase)
479 {
480     const char *vals = upperCase ? upperCaseDigits : lowerCaseDigits;
481     bool neg;
482     int start, i, j;
483     unsigned long long abs_x;
484 
485     i = bufSize;
486     if ((neg = x < 0)) {
487         abs_x = -x;
488     } else {
489         abs_x = x;
490     }
491     start = neg ? 1 : 0;
492     if (abs_x == 0) {
493         buf[--i] = '0';
494     } else {
495         while (i > start && abs_x) {
496             buf[--i] = vals[abs_x % base];
497             abs_x /= base;
498         }
499     }
500     if (zeroFill) {
501         for (j = bufSize - i; i > start && j < width - start; ++j) {
502             buf[--i] = '0';
503         }
504     }
505     if (neg) {
506         buf[--i] = '-';
507     }
508     *p = buf + i;
509     *len = bufSize - i;
510 }
511 
formatUInt(unsigned long long x,char * buf,int bufSize,bool zeroFill,int width,int base,const char ** p,int * len,bool upperCase)512 void formatUInt(unsigned long long x, char *buf, int bufSize, bool zeroFill, int width, int base, const char **p, int *len, bool upperCase)
513 {
514     const char *vals = upperCase ? upperCaseDigits : lowerCaseDigits;
515     int i, j;
516 
517     i = bufSize;
518     if (x == 0) {
519         buf[--i] = '0';
520     } else {
521         while (i > 0 && x) {
522             buf[--i] = vals[x % base];
523             x /= base;
524         }
525     }
526     if (zeroFill) {
527         for (j = bufSize - i; i > 0 && j < width; ++j) {
528             buf[--i] = '0';
529         }
530     }
531     *p = buf + i;
532     *len = bufSize - i;
533 }
534 
formatDouble(double x,char * buf,int bufSize,int prec,bool trim,const char ** p,int * len)535 void formatDouble(double x, char *buf, int bufSize, int prec, bool trim, const char **p, int *len)
536 {
537     bool neg, started;
538     double x2;
539     int d, i, j;
540 
541     if ((neg = x < 0)) {
542         x = -x;
543     }
544     x = floor(x * pow(10.0, prec) + 0.5);
545     i = bufSize;
546     started = !trim;
547     for (j = 0; j < prec && i > 1; ++j) {
548         x2 = floor(0.1 * (x + 0.5));
549         d = (int)floor(x - 10 * x2 + 0.5);
550         if (started || d != 0) {
551             buf[--i] = '0' + d;
552             started = true;
553         }
554         x = x2;
555     }
556     if (i > 1 && started) {
557         buf[--i] = '.';
558     }
559     if (i > 1) {
560         do {
561             x2 = floor(0.1 * (x + 0.5));
562             d = (int)floor(x - 10 * x2 + 0.5);
563             buf[--i] = '0' + d;
564             x = x2;
565         } while (i > 1 && x);
566     }
567     if (neg) {
568         buf[--i] = '-';
569     }
570     *p = buf + i;
571     *len = bufSize - i;
572 }
573 
formatDoubleSmallAware(double x,char * buf,int bufSize,int prec,bool trim,const char ** p,int * len)574 void formatDoubleSmallAware(double x, char *buf, int bufSize, int prec, bool trim, const char **p, int *len)
575 {
576     double absX = fabs(x);
577     if (absX >= 0.1) {
578         formatDouble(x, buf, bufSize, prec, trim, p, len);
579     } else {
580         while (absX < 0.1 && prec < 16) {
581             absX = absX * 10;
582             prec++;
583         }
584         formatDouble(x, buf, bufSize, prec, trim, p, len);
585     }
586 }
587 
588 }
589 
lowerCase()590 GooString *GooString::lowerCase()
591 {
592     for (auto &c : *this) {
593         if (std::isupper(c)) {
594             c = std::tolower(c);
595         }
596     }
597 
598     return this;
599 }
600 
prependUnicodeMarker()601 void GooString::prependUnicodeMarker()
602 {
603     insert(0, "\xFE\xFF", 2);
604 }
605 
startsWith(const char * prefix) const606 bool GooString::startsWith(const char *prefix) const
607 {
608     const auto len = size();
609     const auto prefixLen = std::strlen(prefix);
610 
611     if (len < prefixLen)
612         return false;
613 
614     return static_cast<const std::string &>(*this).compare(0, prefixLen, prefix) == 0;
615 }
616 
endsWith(const char * suffix) const617 bool GooString::endsWith(const char *suffix) const
618 {
619     const auto len = size();
620     const auto suffixLen = std::strlen(suffix);
621 
622     if (len < suffixLen)
623         return false;
624 
625     return static_cast<const std::string &>(*this).compare(len - suffixLen, suffixLen, suffix) == 0;
626 }
627 
sanitizedName(bool psmode) const628 GooString *GooString::sanitizedName(bool psmode) const
629 {
630     auto *name = new GooString();
631 
632     if (psmode) {
633         // ghostscript chokes on names that begin with out-of-limits
634         // numbers, e.g., 1e4foo is handled correctly (as a name), but
635         // 1e999foo generates a limitcheck error
636         const auto c = getChar(0);
637         if (c >= '0' && c <= '9') {
638             name->append('f');
639         }
640     }
641 
642     for (const auto c : *this) {
643         if (c <= (char)0x20 || c >= (char)0x7f || c == ' ' || c == '(' || c == ')' || c == '<' || c == '>' || c == '[' || c == ']' || c == '{' || c == '}' || c == '/' || c == '%' || c == '#') {
644             char buf[8];
645             sprintf(buf, "#%02x", c & 0xff);
646             name->append(buf);
647         } else {
648             name->append(c);
649         }
650     }
651 
652     return name;
653 }
654