1 #include "mupdf/fitz.h"
2
3 #include <string.h>
4 #include <errno.h>
5 #include <math.h>
6 #include <float.h>
7 #include <stdlib.h>
8
9 #ifndef PATH_MAX
10 #define PATH_MAX 4096
11 #endif
12
13 #ifdef _WIN32
14 #include <windows.h> /* for MultiByteToWideChar etc. */
15 #endif
16
17 static inline int
fz_tolower(int c)18 fz_tolower(int c)
19 {
20 if (c >= 'A' && c <= 'Z')
21 return c + 32;
22 return c;
23 }
24
25 size_t
fz_strnlen(const char * s,size_t n)26 fz_strnlen(const char *s, size_t n)
27 {
28 const char *p = memchr(s, 0, n);
29 return p ? (size_t) (p - s) : n;
30 }
31
32 int
fz_strncasecmp(const char * a,const char * b,size_t n)33 fz_strncasecmp(const char *a, const char *b, size_t n)
34 {
35 if (!n--)
36 return 0;
37 for (; *a && *b && n && (*a == *b || fz_tolower(*a) == fz_tolower(*b)); a++, b++, n--)
38 ;
39 return fz_tolower(*a) - fz_tolower(*b);
40 }
41
42 int
fz_strcasecmp(const char * a,const char * b)43 fz_strcasecmp(const char *a, const char *b)
44 {
45 while (fz_tolower(*a) == fz_tolower(*b))
46 {
47 if (*a++ == 0)
48 return 0;
49 b++;
50 }
51 return fz_tolower(*a) - fz_tolower(*b);
52 }
53
54 char *
fz_strsep(char ** stringp,const char * delim)55 fz_strsep(char **stringp, const char *delim)
56 {
57 char *ret = *stringp;
58 if (!ret) return NULL;
59 if ((*stringp = strpbrk(*stringp, delim)) != NULL)
60 *((*stringp)++) = '\0';
61 return ret;
62 }
63
64 size_t
fz_strlcpy(char * dst,const char * src,size_t siz)65 fz_strlcpy(char *dst, const char *src, size_t siz)
66 {
67 register char *d = dst;
68 register const char *s = src;
69 register size_t n = siz;
70
71 /* Copy as many bytes as will fit */
72 if (n != 0 && --n != 0) {
73 do {
74 if ((*d++ = *s++) == 0)
75 break;
76 } while (--n != 0);
77 }
78
79 /* Not enough room in dst, add NUL and traverse rest of src */
80 if (n == 0) {
81 if (siz != 0)
82 *d = '\0'; /* NUL-terminate dst */
83 while (*s++)
84 ;
85 }
86
87 return(s - src - 1); /* count does not include NUL */
88 }
89
90 size_t
fz_strlcat(char * dst,const char * src,size_t siz)91 fz_strlcat(char *dst, const char *src, size_t siz)
92 {
93 register char *d = dst;
94 register const char *s = src;
95 register size_t n = siz;
96 size_t dlen;
97
98 /* Find the end of dst and adjust bytes left but don't go past end */
99 while (*d != '\0' && n-- != 0)
100 d++;
101 dlen = d - dst;
102 n = siz - dlen;
103
104 if (n == 0)
105 return dlen + strlen(s);
106 while (*s != '\0') {
107 if (n != 1) {
108 *d++ = *s;
109 n--;
110 }
111 s++;
112 }
113 *d = '\0';
114
115 return dlen + (s - src); /* count does not include NUL */
116 }
117
118 void
fz_dirname(char * dir,const char * path,size_t n)119 fz_dirname(char *dir, const char *path, size_t n)
120 {
121 size_t i;
122
123 if (!path || !path[0])
124 {
125 fz_strlcpy(dir, ".", n);
126 return;
127 }
128
129 fz_strlcpy(dir, path, n);
130
131 i = strlen(dir);
132 for(; dir[i] == '/'; --i) if (!i) { fz_strlcpy(dir, "/", n); return; }
133 for(; dir[i] != '/'; --i) if (!i) { fz_strlcpy(dir, ".", n); return; }
134 for(; dir[i] == '/'; --i) if (!i) { fz_strlcpy(dir, "/", n); return; }
135 dir[i+1] = 0;
136 }
137
138 #ifdef _WIN32
139
fz_realpath(const char * path,char buf[PATH_MAX])140 char *fz_realpath(const char *path, char buf[PATH_MAX])
141 {
142 wchar_t wpath[PATH_MAX];
143 wchar_t wbuf[PATH_MAX];
144 int i;
145 if (!MultiByteToWideChar(CP_UTF8, 0, path, -1, wpath, PATH_MAX))
146 return NULL;
147 if (!GetFullPathNameW(wpath, PATH_MAX, wbuf, NULL))
148 return NULL;
149 if (!WideCharToMultiByte(CP_UTF8, 0, wbuf, -1, buf, PATH_MAX, NULL, NULL))
150 return NULL;
151 for (i=0; buf[i]; ++i)
152 if (buf[i] == '\\')
153 buf[i] = '/';
154 return buf;
155 }
156
157 #else
158
fz_realpath(const char * path,char buf[PATH_MAX])159 char *fz_realpath(const char *path, char buf[PATH_MAX])
160 {
161 return realpath(path, buf);
162 }
163
164 #endif
165
ishex(int a)166 static inline int ishex(int a)
167 {
168 return (a >= 'A' && a <= 'F') ||
169 (a >= 'a' && a <= 'f') ||
170 (a >= '0' && a <= '9');
171 }
172
tohex(int c)173 static inline int tohex(int c)
174 {
175 if (c >= '0' && c <= '9') return c - '0';
176 if (c >= 'a' && c <= 'f') return c - 'a' + 0xA;
177 if (c >= 'A' && c <= 'F') return c - 'A' + 0xA;
178 return 0;
179 }
180
181 char *
fz_urldecode(char * url)182 fz_urldecode(char *url)
183 {
184 char *s = url;
185 char *p = url;
186 while (*s)
187 {
188 int c = (unsigned char) *s++;
189 if (c == '%' && ishex(s[0]) && ishex(s[1]))
190 {
191 int a = tohex(*s++);
192 int b = tohex(*s++);
193 *p++ = a << 4 | b;
194 }
195 else
196 {
197 *p++ = c;
198 }
199 }
200 *p = 0;
201 return url;
202 }
203
204 void
fz_format_output_path(fz_context * ctx,char * path,size_t size,const char * fmt,int page)205 fz_format_output_path(fz_context *ctx, char *path, size_t size, const char *fmt, int page)
206 {
207 const char *s, *p;
208 char num[40];
209 int i, n;
210 int z = 0;
211
212 for (i = 0; page; page /= 10)
213 num[i++] = '0' + page % 10;
214 num[i] = 0;
215
216 s = p = strchr(fmt, '%');
217 if (p)
218 {
219 ++p;
220 while (*p >= '0' && *p <= '9')
221 z = z * 10 + (*p++ - '0');
222 }
223 if (p && *p == 'd')
224 {
225 ++p;
226 }
227 else
228 {
229 s = p = strrchr(fmt, '.');
230 if (!p)
231 s = p = fmt + strlen(fmt);
232 }
233
234 if (z < 1)
235 z = 1;
236 while (i < z && i < (int)sizeof num)
237 num[i++] = '0';
238 n = s - fmt;
239 if (n + i + strlen(p) >= size)
240 fz_throw(ctx, FZ_ERROR_GENERIC, "path name buffer overflow");
241 memcpy(path, fmt, n);
242 while (i > 0)
243 path[n++] = num[--i];
244 fz_strlcpy(path + n, p, size - n);
245 }
246
247 #define SEP(x) ((x)=='/' || (x) == 0)
248
249 char *
fz_cleanname(char * name)250 fz_cleanname(char *name)
251 {
252 char *p, *q, *dotdot;
253 int rooted;
254
255 rooted = name[0] == '/';
256
257 /*
258 * invariants:
259 * p points at beginning of path element we're considering.
260 * q points just past the last path element we wrote (no slash).
261 * dotdot points just past the point where .. cannot backtrack
262 * any further (no slash).
263 */
264 p = q = dotdot = name + rooted;
265 while (*p)
266 {
267 if(p[0] == '/') /* null element */
268 p++;
269 else if (p[0] == '.' && SEP(p[1]))
270 p += 1; /* don't count the separator in case it is nul */
271 else if (p[0] == '.' && p[1] == '.' && SEP(p[2]))
272 {
273 p += 2;
274 if (q > dotdot) /* can backtrack */
275 {
276 while(--q > dotdot && *q != '/')
277 ;
278 }
279 else if (!rooted) /* /.. is / but ./../ is .. */
280 {
281 if (q != name)
282 *q++ = '/';
283 *q++ = '.';
284 *q++ = '.';
285 dotdot = q;
286 }
287 }
288 else /* real path element */
289 {
290 if (q != name+rooted)
291 *q++ = '/';
292 while ((*q = *p) != '/' && *q != 0)
293 p++, q++;
294 }
295 }
296
297 if (q == name) /* empty string is really "." */
298 *q++ = '.';
299 *q = '\0';
300 return name;
301 }
302
303 enum
304 {
305 UTFmax = 4, /* maximum bytes per rune */
306 Runesync = 0x80, /* cannot represent part of a UTF sequence (<) */
307 Runeself = 0x80, /* rune and UTF sequences are the same (<) */
308 Runeerror = 0xFFFD, /* decoding error in UTF */
309 Runemax = 0x10FFFF, /* maximum rune value */
310 };
311
312 enum
313 {
314 Bit1 = 7,
315 Bitx = 6,
316 Bit2 = 5,
317 Bit3 = 4,
318 Bit4 = 3,
319 Bit5 = 2,
320
321 T1 = ((1<<(Bit1+1))-1) ^ 0xFF, /* 0000 0000 */
322 Tx = ((1<<(Bitx+1))-1) ^ 0xFF, /* 1000 0000 */
323 T2 = ((1<<(Bit2+1))-1) ^ 0xFF, /* 1100 0000 */
324 T3 = ((1<<(Bit3+1))-1) ^ 0xFF, /* 1110 0000 */
325 T4 = ((1<<(Bit4+1))-1) ^ 0xFF, /* 1111 0000 */
326 T5 = ((1<<(Bit5+1))-1) ^ 0xFF, /* 1111 1000 */
327
328 Rune1 = (1<<(Bit1+0*Bitx))-1, /* 0000 0000 0111 1111 */
329 Rune2 = (1<<(Bit2+1*Bitx))-1, /* 0000 0111 1111 1111 */
330 Rune3 = (1<<(Bit3+2*Bitx))-1, /* 1111 1111 1111 1111 */
331 Rune4 = (1<<(Bit4+3*Bitx))-1, /* 0001 1111 1111 1111 1111 1111 */
332
333 Maskx = (1<<Bitx)-1, /* 0011 1111 */
334 Testx = Maskx ^ 0xFF, /* 1100 0000 */
335
336 Bad = Runeerror,
337 };
338
339 int
fz_chartorune(int * rune,const char * str)340 fz_chartorune(int *rune, const char *str)
341 {
342 int c, c1, c2, c3;
343 int l;
344
345 /*
346 * one character sequence
347 * 00000-0007F => T1
348 */
349 c = *(const unsigned char*)str;
350 if(c < Tx) {
351 *rune = c;
352 return 1;
353 }
354
355 /*
356 * two character sequence
357 * 0080-07FF => T2 Tx
358 */
359 c1 = *(const unsigned char*)(str+1) ^ Tx;
360 if(c1 & Testx)
361 goto bad;
362 if(c < T3) {
363 if(c < T2)
364 goto bad;
365 l = ((c << Bitx) | c1) & Rune2;
366 if(l <= Rune1)
367 goto bad;
368 *rune = l;
369 return 2;
370 }
371
372 /*
373 * three character sequence
374 * 0800-FFFF => T3 Tx Tx
375 */
376 c2 = *(const unsigned char*)(str+2) ^ Tx;
377 if(c2 & Testx)
378 goto bad;
379 if(c < T4) {
380 l = ((((c << Bitx) | c1) << Bitx) | c2) & Rune3;
381 if(l <= Rune2)
382 goto bad;
383 *rune = l;
384 return 3;
385 }
386
387 /*
388 * four character sequence (21-bit value)
389 * 10000-1FFFFF => T4 Tx Tx Tx
390 */
391 c3 = *(const unsigned char*)(str+3) ^ Tx;
392 if (c3 & Testx)
393 goto bad;
394 if (c < T5) {
395 l = ((((((c << Bitx) | c1) << Bitx) | c2) << Bitx) | c3) & Rune4;
396 if (l <= Rune3)
397 goto bad;
398 *rune = l;
399 return 4;
400 }
401 /*
402 * Support for 5-byte or longer UTF-8 would go here, but
403 * since we don't have that, we'll just fall through to bad.
404 */
405
406 /*
407 * bad decoding
408 */
409 bad:
410 *rune = Bad;
411 return 1;
412 }
413
414 int
fz_runetochar(char * str,int rune)415 fz_runetochar(char *str, int rune)
416 {
417 /* Runes are signed, so convert to unsigned for range check. */
418 unsigned int c = (unsigned int)rune;
419
420 /*
421 * one character sequence
422 * 00000-0007F => 00-7F
423 */
424 if(c <= Rune1) {
425 str[0] = c;
426 return 1;
427 }
428
429 /*
430 * two character sequence
431 * 0080-07FF => T2 Tx
432 */
433 if(c <= Rune2) {
434 str[0] = T2 | (c >> 1*Bitx);
435 str[1] = Tx | (c & Maskx);
436 return 2;
437 }
438
439 /*
440 * If the Rune is out of range, convert it to the error rune.
441 * Do this test here because the error rune encodes to three bytes.
442 * Doing it earlier would duplicate work, since an out of range
443 * Rune wouldn't have fit in one or two bytes.
444 */
445 if (c > Runemax)
446 c = Runeerror;
447
448 /*
449 * three character sequence
450 * 0800-FFFF => T3 Tx Tx
451 */
452 if (c <= Rune3) {
453 str[0] = T3 | (c >> 2*Bitx);
454 str[1] = Tx | ((c >> 1*Bitx) & Maskx);
455 str[2] = Tx | (c & Maskx);
456 return 3;
457 }
458
459 /*
460 * four character sequence (21-bit value)
461 * 10000-1FFFFF => T4 Tx Tx Tx
462 */
463 str[0] = T4 | (c >> 3*Bitx);
464 str[1] = Tx | ((c >> 2*Bitx) & Maskx);
465 str[2] = Tx | ((c >> 1*Bitx) & Maskx);
466 str[3] = Tx | (c & Maskx);
467 return 4;
468 }
469
470 int
fz_runelen(int c)471 fz_runelen(int c)
472 {
473 char str[10];
474 return fz_runetochar(str, c);
475 }
476
477 int
fz_utflen(const char * s)478 fz_utflen(const char *s)
479 {
480 int c, n, rune;
481 n = 0;
482 for(;;) {
483 c = *(const unsigned char*)s;
484 if(c < Runeself) {
485 if(c == 0)
486 return n;
487 s++;
488 } else
489 s += fz_chartorune(&rune, s);
490 n++;
491 }
492 return 0;
493 }
494
fz_atof(const char * s)495 float fz_atof(const char *s)
496 {
497 float result;
498
499 if (s == NULL)
500 return 0;
501
502 errno = 0;
503 result = fz_strtof(s, NULL);
504 if ((errno == ERANGE && result == 0) || isnan(result))
505 /* Return 1.0 on underflow, as it's a small known value that won't cause a divide by 0. */
506 return 1;
507 result = fz_clamp(result, -FLT_MAX, FLT_MAX);
508 return result;
509 }
510
fz_atoi(const char * s)511 int fz_atoi(const char *s)
512 {
513 if (s == NULL)
514 return 0;
515 return atoi(s);
516 }
517
fz_atoi64(const char * s)518 int64_t fz_atoi64(const char *s)
519 {
520 if (s == NULL)
521 return 0;
522 return atoll(s);
523 }
524
fz_is_page_range(fz_context * ctx,const char * s)525 int fz_is_page_range(fz_context *ctx, const char *s)
526 {
527 /* TODO: check the actual syntax... */
528 while (*s)
529 {
530 if ((*s < '0' || *s > '9') && *s != 'N' && *s != '-' && *s != ',')
531 return 0;
532 s++;
533 }
534 return 1;
535 }
536
fz_parse_page_range(fz_context * ctx,const char * s,int * a,int * b,int n)537 const char *fz_parse_page_range(fz_context *ctx, const char *s, int *a, int *b, int n)
538 {
539 if (!s || !s[0])
540 return NULL;
541
542 if (s[0] == ',')
543 s += 1;
544
545 if (s[0] == 'N')
546 {
547 *a = n;
548 s += 1;
549 }
550 else
551 *a = strtol(s, (char**)&s, 10);
552
553 if (s[0] == '-')
554 {
555 if (s[1] == 'N')
556 {
557 *b = n;
558 s += 2;
559 }
560 else
561 *b = strtol(s+1, (char**)&s, 10);
562 }
563 else
564 *b = *a;
565
566 *a = fz_clampi(*a, 1, n);
567 *b = fz_clampi(*b, 1, n);
568
569 return s;
570 }
571
572 /* memmem from musl */
573
574 #define MAX(a,b) ((a)>(b)?(a):(b))
575
576 #define BITOP(a,b,op) \
577 ((a)[(size_t)(b)/(8*sizeof *(a))] op (size_t)1<<((size_t)(b)%(8*sizeof *(a))))
578
twobyte_memmem(const unsigned char * h,size_t k,const unsigned char * n)579 static char *twobyte_memmem(const unsigned char *h, size_t k, const unsigned char *n)
580 {
581 uint16_t nw = n[0]<<8 | n[1], hw = h[0]<<8 | h[1];
582 for (h++, k--; k; k--, hw = hw<<8 | *++h)
583 if (hw == nw) return (char *)h-1;
584 return 0;
585 }
586
threebyte_memmem(const unsigned char * h,size_t k,const unsigned char * n)587 static char *threebyte_memmem(const unsigned char *h, size_t k, const unsigned char *n)
588 {
589 uint32_t nw = n[0]<<24 | n[1]<<16 | n[2]<<8;
590 uint32_t hw = h[0]<<24 | h[1]<<16 | h[2]<<8;
591 for (h+=2, k-=2; k; k--, hw = (hw|*++h)<<8)
592 if (hw == nw) return (char *)h-2;
593 return 0;
594 }
595
fourbyte_memmem(const unsigned char * h,size_t k,const unsigned char * n)596 static char *fourbyte_memmem(const unsigned char *h, size_t k, const unsigned char *n)
597 {
598 uint32_t nw = n[0]<<24 | n[1]<<16 | n[2]<<8 | n[3];
599 uint32_t hw = h[0]<<24 | h[1]<<16 | h[2]<<8 | h[3];
600 for (h+=3, k-=3; k; k--, hw = hw<<8 | *++h)
601 if (hw == nw) return (char *)h-3;
602 return 0;
603 }
604
twoway_memmem(const unsigned char * h,const unsigned char * z,const unsigned char * n,size_t l)605 static char *twoway_memmem(const unsigned char *h, const unsigned char *z, const unsigned char *n, size_t l)
606 {
607 size_t i, ip, jp, k, p, ms, p0, mem, mem0;
608 size_t byteset[32 / sizeof(size_t)] = { 0 };
609 size_t shift[256];
610
611 /* Computing length of needle and fill shift table */
612 for (i=0; i<l; i++)
613 BITOP(byteset, n[i], |=), shift[n[i]] = i+1;
614
615 /* Compute maximal suffix */
616 ip = -1; jp = 0; k = p = 1;
617 while (jp+k<l) {
618 if (n[ip+k] == n[jp+k]) {
619 if (k == p) {
620 jp += p;
621 k = 1;
622 } else k++;
623 } else if (n[ip+k] > n[jp+k]) {
624 jp += k;
625 k = 1;
626 p = jp - ip;
627 } else {
628 ip = jp++;
629 k = p = 1;
630 }
631 }
632 ms = ip;
633 p0 = p;
634
635 /* And with the opposite comparison */
636 ip = -1; jp = 0; k = p = 1;
637 while (jp+k<l) {
638 if (n[ip+k] == n[jp+k]) {
639 if (k == p) {
640 jp += p;
641 k = 1;
642 } else k++;
643 } else if (n[ip+k] < n[jp+k]) {
644 jp += k;
645 k = 1;
646 p = jp - ip;
647 } else {
648 ip = jp++;
649 k = p = 1;
650 }
651 }
652 if (ip+1 > ms+1) ms = ip;
653 else p = p0;
654
655 /* Periodic needle? */
656 if (memcmp(n, n+p, ms+1)) {
657 mem0 = 0;
658 p = MAX(ms, l-ms-1) + 1;
659 } else mem0 = l-p;
660 mem = 0;
661
662 /* Search loop */
663 for (;;) {
664 /* If remainder of haystack is shorter than needle, done */
665 if ((size_t)(z-h) < l) return 0;
666
667 /* Check last byte first; advance by shift on mismatch */
668 if (BITOP(byteset, h[l-1], &)) {
669 k = l-shift[h[l-1]];
670 if (k) {
671 if (mem0 && mem && k < p) k = l-p;
672 h += k;
673 mem = 0;
674 continue;
675 }
676 } else {
677 h += l;
678 mem = 0;
679 continue;
680 }
681
682 /* Compare right half */
683 for (k=MAX(ms+1,mem); k<l && n[k] == h[k]; k++);
684 if (k < l) {
685 h += k-ms;
686 mem = 0;
687 continue;
688 }
689 /* Compare left half */
690 for (k=ms+1; k>mem && n[k-1] == h[k-1]; k--);
691 if (k <= mem) return (char *)h;
692 h += p;
693 mem = mem0;
694 }
695 }
696
fz_memmem(const void * h0,size_t k,const void * n0,size_t l)697 void *fz_memmem(const void *h0, size_t k, const void *n0, size_t l)
698 {
699 const unsigned char *h = h0, *n = n0;
700
701 /* Return immediately on empty needle */
702 if (!l) return (void *)h;
703
704 /* Return immediately when needle is longer than haystack */
705 if (k<l) return 0;
706
707 /* Use faster algorithms for short needles */
708 h = memchr(h0, *n, k);
709 if (!h || l==1) return (void *)h;
710 k -= h - (const unsigned char *)h0;
711 if (k<l) return 0;
712 if (l==2) return twobyte_memmem(h, k, n);
713 if (l==3) return threebyte_memmem(h, k, n);
714 if (l==4) return fourbyte_memmem(h, k, n);
715
716 return twoway_memmem(h, h+k, n, l);
717 }
718