1 //========================================================================
2 //
3 // GooString.cc
4 //
5 // Simple variable-length string type.
6 //
7 // Copyright 1996-2003 Glyph & Cog, LLC
8 //
9 //========================================================================
10
11 //========================================================================
12 //
13 // Modified under the Poppler project - http://poppler.freedesktop.org
14 //
15 // All changes made under the Poppler project to this file are licensed
16 // under GPL version 2 or later
17 //
18 // Copyright (C) 2006 Kristian Høgsberg <krh@redhat.com>
19 // Copyright (C) 2006 Krzysztof Kowalczyk <kkowalczyk@gmail.com>
20 // Copyright (C) 2007 Jeff Muizelaar <jeff@infidigm.net>
21 // Copyright (C) 2008-2011, 2016-2018, 2021 Albert Astals Cid <aacid@kde.org>
22 // Copyright (C) 2011 Kenji Uno <ku@digitaldolphins.jp>
23 // Copyright (C) 2012, 2013 Fabio D'Urso <fabiodurso@hotmail.it>
24 // Copyright (C) 2012, 2017 Adrian Johnson <ajohnson@redneon.com>
25 // Copyright (C) 2012 Pino Toscano <pino@kde.org>
26 // Copyright (C) 2013 Jason Crain <jason@aquaticape.us>
27 // Copyright (C) 2015 William Bader <williambader@hotmail.com>
28 // Copyright (C) 2016 Jakub Alba <jakubalba@gmail.com>
29 // Copyright (C) 2018 Klarälvdalens Datakonsult AB, a KDAB Group company, <info@kdab.com>. Work sponsored by the LiMux project of the city of Munich
30 // Copyright (C) 2018 Adam Reichold <adam.reichold@t-online.de>
31 // Copyright (C) 2018 Greg Knight <lyngvi@gmail.com>
32 // Copyright (C) 2019 Oliver Sander <oliver.sander@tu-dresden.de>
33 //
34 // To see a description of the changes please see the Changelog file that
35 // came with your tarball or type make ChangeLog if you are building from git
36 //
37 //========================================================================
38
39 #include <config.h>
40
41 #include <cassert>
42 #include <cctype>
43 #include <cmath>
44 #include <cstddef>
45 #include <cstdlib>
46 #include <cstring>
47
48 #include "gmem.h"
49 #include "GooString.h"
50
51 //------------------------------------------------------------------------
52
53 namespace {
54
55 union GooStringFormatArg {
56 int i;
57 unsigned int ui;
58 long l;
59 unsigned long ul;
60 long long ll;
61 unsigned long long ull;
62 double f;
63 char c;
64 char *s;
65 GooString *gs;
66 };
67
68 enum GooStringFormatType
69 {
70 fmtIntDecimal,
71 fmtIntHex,
72 fmtIntHexUpper,
73 fmtIntOctal,
74 fmtIntBinary,
75 fmtUIntDecimal,
76 fmtUIntHex,
77 fmtUIntHexUpper,
78 fmtUIntOctal,
79 fmtUIntBinary,
80 fmtLongDecimal,
81 fmtLongHex,
82 fmtLongHexUpper,
83 fmtLongOctal,
84 fmtLongBinary,
85 fmtULongDecimal,
86 fmtULongHex,
87 fmtULongHexUpper,
88 fmtULongOctal,
89 fmtULongBinary,
90 fmtLongLongDecimal,
91 fmtLongLongHex,
92 fmtLongLongHexUpper,
93 fmtLongLongOctal,
94 fmtLongLongBinary,
95 fmtULongLongDecimal,
96 fmtULongLongHex,
97 fmtULongLongHexUpper,
98 fmtULongLongOctal,
99 fmtULongLongBinary,
100 fmtDouble,
101 fmtDoubleTrimSmallAware,
102 fmtDoubleTrim,
103 fmtChar,
104 fmtString,
105 fmtGooString,
106 fmtSpace
107 };
108
109 const char *const formatStrings[] = { "d", "x", "X", "o", "b", "ud", "ux", "uX", "uo", "ub", "ld", "lx", "lX", "lo", "lb", "uld", "ulx", "ulX", "ulo",
110 "ulb", "lld", "llx", "llX", "llo", "llb", "ulld", "ullx", "ullX", "ullo", "ullb", "f", "gs", "g", "c", "s", "t", "w", nullptr };
111
112 void formatInt(long long x, char *buf, int bufSize, bool zeroFill, int width, int base, const char **p, int *len, bool upperCase = false);
113
114 void formatUInt(unsigned long long x, char *buf, int bufSize, bool zeroFill, int width, int base, const char **p, int *len, bool upperCase = false);
115
116 void formatDouble(double x, char *buf, int bufSize, int prec, bool trim, const char **p, int *len);
117
118 void formatDoubleSmallAware(double x, char *buf, int bufSize, int prec, bool trim, const char **p, int *len);
119
120 }
121
122 //------------------------------------------------------------------------
123
format(const char * fmt,...)124 GooString *GooString::format(const char *fmt, ...)
125 {
126 auto *s = new GooString();
127
128 va_list argList;
129 va_start(argList, fmt);
130 s->appendfv(fmt, argList);
131 va_end(argList);
132
133 return s;
134 }
135
formatv(const char * fmt,va_list argList)136 GooString *GooString::formatv(const char *fmt, va_list argList)
137 {
138 auto *s = new GooString();
139
140 s->appendfv(fmt, argList);
141
142 return s;
143 }
144
appendf(const char * fmt,...)145 GooString *GooString::appendf(const char *fmt, ...)
146 {
147 va_list argList;
148 va_start(argList, fmt);
149 appendfv(fmt, argList);
150 va_end(argList);
151
152 return this;
153 }
154
appendfv(const char * fmt,va_list argList)155 GooString *GooString::appendfv(const char *fmt, va_list argList)
156 {
157 GooStringFormatArg *args;
158 int argsLen, argsSize;
159 GooStringFormatArg arg;
160 int idx, width, prec;
161 bool reverseAlign, zeroFill;
162 GooStringFormatType ft;
163 char buf[65];
164 int len, i;
165 const char *p0, *p1;
166 const char *str;
167 GooStringFormatArg argsBuf[8];
168
169 argsLen = 0;
170 argsSize = sizeof(argsBuf) / sizeof(argsBuf[0]);
171 args = argsBuf;
172
173 p0 = fmt;
174 while (*p0) {
175 if (*p0 == '{') {
176 ++p0;
177 if (*p0 == '{') {
178 ++p0;
179 append('{');
180 } else {
181
182 // parse the format string
183 if (!(*p0 >= '0' && *p0 <= '9')) {
184 break;
185 }
186 idx = *p0 - '0';
187 for (++p0; *p0 >= '0' && *p0 <= '9'; ++p0) {
188 idx = 10 * idx + (*p0 - '0');
189 }
190 if (*p0 != ':') {
191 break;
192 }
193 ++p0;
194 if (*p0 == '-') {
195 reverseAlign = true;
196 ++p0;
197 } else {
198 reverseAlign = false;
199 }
200 width = 0;
201 zeroFill = *p0 == '0';
202 for (; *p0 >= '0' && *p0 <= '9'; ++p0) {
203 width = 10 * width + (*p0 - '0');
204 }
205 if (width < 0) {
206 width = 0;
207 }
208 if (*p0 == '.') {
209 ++p0;
210 prec = 0;
211 for (; *p0 >= '0' && *p0 <= '9'; ++p0) {
212 prec = 10 * prec + (*p0 - '0');
213 }
214 } else {
215 prec = 0;
216 }
217 for (ft = (GooStringFormatType)0; formatStrings[ft]; ft = (GooStringFormatType)(ft + 1)) {
218 if (!strncmp(p0, formatStrings[ft], strlen(formatStrings[ft]))) {
219 break;
220 }
221 }
222 if (!formatStrings[ft]) {
223 break;
224 }
225 p0 += strlen(formatStrings[ft]);
226 if (*p0 != '}') {
227 break;
228 }
229 ++p0;
230
231 // fetch the argument
232 if (idx > argsLen) {
233 break;
234 }
235 if (idx == argsLen) {
236 if (argsLen == argsSize) {
237 argsSize *= 2;
238 if (args == argsBuf) {
239 args = (GooStringFormatArg *)gmallocn(argsSize, sizeof(GooStringFormatArg));
240 memcpy(args, argsBuf, argsLen * sizeof(GooStringFormatArg));
241 } else {
242 args = (GooStringFormatArg *)greallocn(args, argsSize, sizeof(GooStringFormatArg));
243 }
244 }
245 switch (ft) {
246 case fmtIntDecimal:
247 case fmtIntHex:
248 case fmtIntHexUpper:
249 case fmtIntOctal:
250 case fmtIntBinary:
251 case fmtSpace:
252 args[argsLen].i = va_arg(argList, int);
253 break;
254 case fmtUIntDecimal:
255 case fmtUIntHex:
256 case fmtUIntHexUpper:
257 case fmtUIntOctal:
258 case fmtUIntBinary:
259 args[argsLen].ui = va_arg(argList, unsigned int);
260 break;
261 case fmtLongDecimal:
262 case fmtLongHex:
263 case fmtLongHexUpper:
264 case fmtLongOctal:
265 case fmtLongBinary:
266 args[argsLen].l = va_arg(argList, long);
267 break;
268 case fmtULongDecimal:
269 case fmtULongHex:
270 case fmtULongHexUpper:
271 case fmtULongOctal:
272 case fmtULongBinary:
273 args[argsLen].ul = va_arg(argList, unsigned long);
274 break;
275 case fmtLongLongDecimal:
276 case fmtLongLongHex:
277 case fmtLongLongHexUpper:
278 case fmtLongLongOctal:
279 case fmtLongLongBinary:
280 args[argsLen].ll = va_arg(argList, long long);
281 break;
282 case fmtULongLongDecimal:
283 case fmtULongLongHex:
284 case fmtULongLongHexUpper:
285 case fmtULongLongOctal:
286 case fmtULongLongBinary:
287 args[argsLen].ull = va_arg(argList, unsigned long long);
288 break;
289 case fmtDouble:
290 case fmtDoubleTrim:
291 case fmtDoubleTrimSmallAware:
292 args[argsLen].f = va_arg(argList, double);
293 break;
294 case fmtChar:
295 args[argsLen].c = (char)va_arg(argList, int);
296 break;
297 case fmtString:
298 args[argsLen].s = va_arg(argList, char *);
299 break;
300 case fmtGooString:
301 args[argsLen].gs = va_arg(argList, GooString *);
302 break;
303 }
304 ++argsLen;
305 }
306
307 // format the argument
308 arg = args[idx];
309 switch (ft) {
310 case fmtIntDecimal:
311 formatInt(arg.i, buf, sizeof(buf), zeroFill, width, 10, &str, &len);
312 break;
313 case fmtIntHex:
314 formatInt(arg.i, buf, sizeof(buf), zeroFill, width, 16, &str, &len);
315 break;
316 case fmtIntHexUpper:
317 formatInt(arg.i, buf, sizeof(buf), zeroFill, width, 16, &str, &len, true);
318 break;
319 case fmtIntOctal:
320 formatInt(arg.i, buf, sizeof(buf), zeroFill, width, 8, &str, &len);
321 break;
322 case fmtIntBinary:
323 formatInt(arg.i, buf, sizeof(buf), zeroFill, width, 2, &str, &len);
324 break;
325 case fmtUIntDecimal:
326 formatUInt(arg.ui, buf, sizeof(buf), zeroFill, width, 10, &str, &len);
327 break;
328 case fmtUIntHex:
329 formatUInt(arg.ui, buf, sizeof(buf), zeroFill, width, 16, &str, &len);
330 break;
331 case fmtUIntHexUpper:
332 formatUInt(arg.ui, buf, sizeof(buf), zeroFill, width, 16, &str, &len, true);
333 break;
334 case fmtUIntOctal:
335 formatUInt(arg.ui, buf, sizeof(buf), zeroFill, width, 8, &str, &len);
336 break;
337 case fmtUIntBinary:
338 formatUInt(arg.ui, buf, sizeof(buf), zeroFill, width, 2, &str, &len);
339 break;
340 case fmtLongDecimal:
341 formatInt(arg.l, buf, sizeof(buf), zeroFill, width, 10, &str, &len);
342 break;
343 case fmtLongHex:
344 formatInt(arg.l, buf, sizeof(buf), zeroFill, width, 16, &str, &len);
345 break;
346 case fmtLongHexUpper:
347 formatInt(arg.l, buf, sizeof(buf), zeroFill, width, 16, &str, &len, true);
348 break;
349 case fmtLongOctal:
350 formatInt(arg.l, buf, sizeof(buf), zeroFill, width, 8, &str, &len);
351 break;
352 case fmtLongBinary:
353 formatInt(arg.l, buf, sizeof(buf), zeroFill, width, 2, &str, &len);
354 break;
355 case fmtULongDecimal:
356 formatUInt(arg.ul, buf, sizeof(buf), zeroFill, width, 10, &str, &len);
357 break;
358 case fmtULongHex:
359 formatUInt(arg.ul, buf, sizeof(buf), zeroFill, width, 16, &str, &len);
360 break;
361 case fmtULongHexUpper:
362 formatUInt(arg.ul, buf, sizeof(buf), zeroFill, width, 16, &str, &len, true);
363 break;
364 case fmtULongOctal:
365 formatUInt(arg.ul, buf, sizeof(buf), zeroFill, width, 8, &str, &len);
366 break;
367 case fmtULongBinary:
368 formatUInt(arg.ul, buf, sizeof(buf), zeroFill, width, 2, &str, &len);
369 break;
370 case fmtLongLongDecimal:
371 formatInt(arg.ll, buf, sizeof(buf), zeroFill, width, 10, &str, &len);
372 break;
373 case fmtLongLongHex:
374 formatInt(arg.ll, buf, sizeof(buf), zeroFill, width, 16, &str, &len);
375 break;
376 case fmtLongLongHexUpper:
377 formatInt(arg.ll, buf, sizeof(buf), zeroFill, width, 16, &str, &len, true);
378 break;
379 case fmtLongLongOctal:
380 formatInt(arg.ll, buf, sizeof(buf), zeroFill, width, 8, &str, &len);
381 break;
382 case fmtLongLongBinary:
383 formatInt(arg.ll, buf, sizeof(buf), zeroFill, width, 2, &str, &len);
384 break;
385 case fmtULongLongDecimal:
386 formatUInt(arg.ull, buf, sizeof(buf), zeroFill, width, 10, &str, &len);
387 break;
388 case fmtULongLongHex:
389 formatUInt(arg.ull, buf, sizeof(buf), zeroFill, width, 16, &str, &len);
390 break;
391 case fmtULongLongHexUpper:
392 formatUInt(arg.ull, buf, sizeof(buf), zeroFill, width, 16, &str, &len, true);
393 break;
394 case fmtULongLongOctal:
395 formatUInt(arg.ull, buf, sizeof(buf), zeroFill, width, 8, &str, &len);
396 break;
397 case fmtULongLongBinary:
398 formatUInt(arg.ull, buf, sizeof(buf), zeroFill, width, 2, &str, &len);
399 break;
400 case fmtDouble:
401 formatDouble(arg.f, buf, sizeof(buf), prec, false, &str, &len);
402 break;
403 case fmtDoubleTrim:
404 formatDouble(arg.f, buf, sizeof(buf), prec, true, &str, &len);
405 break;
406 case fmtDoubleTrimSmallAware:
407 formatDoubleSmallAware(arg.f, buf, sizeof(buf), prec, true, &str, &len);
408 break;
409 case fmtChar:
410 buf[0] = arg.c;
411 str = buf;
412 len = 1;
413 reverseAlign = !reverseAlign;
414 break;
415 case fmtString:
416 str = arg.s;
417 len = strlen(str);
418 reverseAlign = !reverseAlign;
419 break;
420 case fmtGooString:
421 if (arg.gs) {
422 str = arg.gs->c_str();
423 len = arg.gs->getLength();
424 } else {
425 str = "(null)";
426 len = 6;
427 }
428 reverseAlign = !reverseAlign;
429 break;
430 case fmtSpace:
431 str = buf;
432 len = 0;
433 width = arg.i;
434 break;
435 }
436
437 // append the formatted arg, handling width and alignment
438 if (!reverseAlign && len < width) {
439 for (i = len; i < width; ++i) {
440 append(' ');
441 }
442 }
443 append(str, len);
444 if (reverseAlign && len < width) {
445 for (i = len; i < width; ++i) {
446 append(' ');
447 }
448 }
449 }
450
451 } else if (*p0 == '}') {
452 ++p0;
453 if (*p0 == '}') {
454 ++p0;
455 }
456 append('}');
457
458 } else {
459 for (p1 = p0 + 1; *p1 && *p1 != '{' && *p1 != '}'; ++p1)
460 ;
461 append(p0, p1 - p0);
462 p0 = p1;
463 }
464 }
465
466 if (args != argsBuf) {
467 gfree(args);
468 }
469
470 return this;
471 }
472
473 namespace {
474
475 const char lowerCaseDigits[17] = "0123456789abcdef";
476 const char upperCaseDigits[17] = "0123456789ABCDEF";
477
formatInt(long long x,char * buf,int bufSize,bool zeroFill,int width,int base,const char ** p,int * len,bool upperCase)478 void formatInt(long long x, char *buf, int bufSize, bool zeroFill, int width, int base, const char **p, int *len, bool upperCase)
479 {
480 const char *vals = upperCase ? upperCaseDigits : lowerCaseDigits;
481 bool neg;
482 int start, i, j;
483 unsigned long long abs_x;
484
485 i = bufSize;
486 if ((neg = x < 0)) {
487 abs_x = -x;
488 } else {
489 abs_x = x;
490 }
491 start = neg ? 1 : 0;
492 if (abs_x == 0) {
493 buf[--i] = '0';
494 } else {
495 while (i > start && abs_x) {
496 buf[--i] = vals[abs_x % base];
497 abs_x /= base;
498 }
499 }
500 if (zeroFill) {
501 for (j = bufSize - i; i > start && j < width - start; ++j) {
502 buf[--i] = '0';
503 }
504 }
505 if (neg) {
506 buf[--i] = '-';
507 }
508 *p = buf + i;
509 *len = bufSize - i;
510 }
511
formatUInt(unsigned long long x,char * buf,int bufSize,bool zeroFill,int width,int base,const char ** p,int * len,bool upperCase)512 void formatUInt(unsigned long long x, char *buf, int bufSize, bool zeroFill, int width, int base, const char **p, int *len, bool upperCase)
513 {
514 const char *vals = upperCase ? upperCaseDigits : lowerCaseDigits;
515 int i, j;
516
517 i = bufSize;
518 if (x == 0) {
519 buf[--i] = '0';
520 } else {
521 while (i > 0 && x) {
522 buf[--i] = vals[x % base];
523 x /= base;
524 }
525 }
526 if (zeroFill) {
527 for (j = bufSize - i; i > 0 && j < width; ++j) {
528 buf[--i] = '0';
529 }
530 }
531 *p = buf + i;
532 *len = bufSize - i;
533 }
534
formatDouble(double x,char * buf,int bufSize,int prec,bool trim,const char ** p,int * len)535 void formatDouble(double x, char *buf, int bufSize, int prec, bool trim, const char **p, int *len)
536 {
537 bool neg, started;
538 double x2;
539 int d, i, j;
540
541 if ((neg = x < 0)) {
542 x = -x;
543 }
544 x = floor(x * pow(10.0, prec) + 0.5);
545 i = bufSize;
546 started = !trim;
547 for (j = 0; j < prec && i > 1; ++j) {
548 x2 = floor(0.1 * (x + 0.5));
549 d = (int)floor(x - 10 * x2 + 0.5);
550 if (started || d != 0) {
551 buf[--i] = '0' + d;
552 started = true;
553 }
554 x = x2;
555 }
556 if (i > 1 && started) {
557 buf[--i] = '.';
558 }
559 if (i > 1) {
560 do {
561 x2 = floor(0.1 * (x + 0.5));
562 d = (int)floor(x - 10 * x2 + 0.5);
563 buf[--i] = '0' + d;
564 x = x2;
565 } while (i > 1 && x);
566 }
567 if (neg) {
568 buf[--i] = '-';
569 }
570 *p = buf + i;
571 *len = bufSize - i;
572 }
573
formatDoubleSmallAware(double x,char * buf,int bufSize,int prec,bool trim,const char ** p,int * len)574 void formatDoubleSmallAware(double x, char *buf, int bufSize, int prec, bool trim, const char **p, int *len)
575 {
576 double absX = fabs(x);
577 if (absX >= 0.1) {
578 formatDouble(x, buf, bufSize, prec, trim, p, len);
579 } else {
580 while (absX < 0.1 && prec < 16) {
581 absX = absX * 10;
582 prec++;
583 }
584 formatDouble(x, buf, bufSize, prec, trim, p, len);
585 }
586 }
587
588 }
589
lowerCase()590 GooString *GooString::lowerCase()
591 {
592 for (auto &c : *this) {
593 if (std::isupper(c)) {
594 c = std::tolower(c);
595 }
596 }
597
598 return this;
599 }
600
prependUnicodeMarker()601 void GooString::prependUnicodeMarker()
602 {
603 insert(0, "\xFE\xFF", 2);
604 }
605
startsWith(const char * prefix) const606 bool GooString::startsWith(const char *prefix) const
607 {
608 const auto len = size();
609 const auto prefixLen = std::strlen(prefix);
610
611 if (len < prefixLen)
612 return false;
613
614 return static_cast<const std::string &>(*this).compare(0, prefixLen, prefix) == 0;
615 }
616
endsWith(const char * suffix) const617 bool GooString::endsWith(const char *suffix) const
618 {
619 const auto len = size();
620 const auto suffixLen = std::strlen(suffix);
621
622 if (len < suffixLen)
623 return false;
624
625 return static_cast<const std::string &>(*this).compare(len - suffixLen, suffixLen, suffix) == 0;
626 }
627
sanitizedName(bool psmode) const628 GooString *GooString::sanitizedName(bool psmode) const
629 {
630 auto *name = new GooString();
631
632 if (psmode) {
633 // ghostscript chokes on names that begin with out-of-limits
634 // numbers, e.g., 1e4foo is handled correctly (as a name), but
635 // 1e999foo generates a limitcheck error
636 const auto c = getChar(0);
637 if (c >= '0' && c <= '9') {
638 name->append('f');
639 }
640 }
641
642 for (const auto c : *this) {
643 if (c <= (char)0x20 || c >= (char)0x7f || c == ' ' || c == '(' || c == ')' || c == '<' || c == '>' || c == '[' || c == ']' || c == '{' || c == '}' || c == '/' || c == '%' || c == '#') {
644 char buf[8];
645 sprintf(buf, "#%02x", c & 0xff);
646 name->append(buf);
647 } else {
648 name->append(c);
649 }
650 }
651
652 return name;
653 }
654