1 /******************************************************************************
2 * $Id$
3 *
4 * Project: MapServer
5 * Purpose: Various string handling functions.
6 * Author: Steve Lime and the MapServer team.
7 *
8 * Notes: A couple of string handling functions (strrstr, strlcat) were taken from
9 * other sources. Copyright notices accompany those functions below.
10 *
11 ******************************************************************************
12 * Copyright (c) 1996-2005 Regents of the University of Minnesota.
13 * Copyright (c) 1998 Todd C. Miller <Todd.Miller@courtesan.com>
14 *
15 * Permission is hereby granted, free of charge, to any person obtaining a
16 * copy of this software and associated documentation files (the "Software"),
17 * to deal in the Software without restriction, including without limitation
18 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
19 * and/or sell copies of the Software, and to permit persons to whom the
20 * Software is furnished to do so, subject to the following conditions:
21 *
22 * The above copyright notice and this permission notice shall be included in
23 * all copies of this Software or works derived from this Software.
24 *
25 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
26 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
27 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
28 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
29 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
30 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
31 * DEALINGS IN THE SOFTWARE.
32 ****************************************************************************/
33
34 #include "mapserver.h"
35 #include "mapthread.h"
36
37 #include "cpl_vsi.h"
38
39 #include <ctype.h>
40 #include <string.h>
41 #include <errno.h>
42
43 /*
44 * Find the first occurrence of find in s, ignore case.
45 */
46
47 #ifdef USE_FRIBIDI
48 #if (defined(_WIN32) && !defined(__CYGWIN__)) || defined(HAVE_FRIBIDI2)
49 #include "fribidi.h"
50 #else
51 #include <fribidi/fribidi.h>
52 #endif
53 #define MAX_STR_LEN 65000
54 #endif
55
56 #ifdef USE_ICONV
57 #include <iconv.h>
58 #include <wchar.h>
59 #endif
60
61 #include "mapentities.h"
62
63 #ifndef HAVE_STRRSTR
64 /*
65 ** Copyright (c) 2000-2004 University of Illinois Board of Trustees
66 ** Copyright (c) 2000-2005 Mark D. Roth
67 ** All rights reserved.
68 **
69 ** Developed by: Campus Information Technologies and Educational Services,
70 ** University of Illinois at Urbana-Champaign
71 **
72 ** Permission is hereby granted, free of charge, to any person obtaining
73 ** a copy of this software and associated documentation files (the
74 ** ``Software''), to deal with the Software without restriction, including
75 ** without limitation the rights to use, copy, modify, merge, publish,
76 ** distribute, sublicense, and/or sell copies of the Software, and to
77 ** permit persons to whom the Software is furnished to do so, subject to
78 ** the following conditions:
79 **
80 ** * Redistributions of source code must retain the above copyright
81 ** notice, this list of conditions and the following disclaimers.
82 **
83 ** * Redistributions in binary form must reproduce the above copyright
84 ** notice, this list of conditions and the following disclaimers in the
85 ** documentation and/or other materials provided with the distribution.
86 **
87 ** * Neither the names of Campus Information Technologies and Educational
88 ** Services, University of Illinois at Urbana-Champaign, nor the names
89 ** of its contributors may be used to endorse or promote products derived
90 ** from this Software without specific prior written permission.
91 **
92 ** THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND,
93 ** EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
94 ** MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
95 ** IN NO EVENT SHALL THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR
96 ** ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
97 ** TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE
98 ** OR THE USE OR OTHER DEALINGS WITH THE SOFTWARE.
99 */
strrstr(const char * string,const char * find)100 char *strrstr(const char *string, const char *find)
101 {
102 size_t stringlen, findlen;
103 const char *cp;
104
105 findlen = strlen(find);
106 stringlen = strlen(string);
107 if (findlen > stringlen)
108 return NULL;
109
110 for (cp = string + stringlen - findlen; cp >= string; cp--)
111 if (strncmp(cp, find, findlen) == 0)
112 return (char*) cp;
113
114 return NULL;
115 }
116 #endif
117
118 #ifndef HAVE_STRLCAT
119 /*
120 * Copyright (c) 1998 Todd C. Miller <Todd.Miller@courtesan.com>
121 *
122 * Permission to use, copy, modify, and distribute this software for any
123 * purpose with or without fee is hereby granted, provided that the above
124 * copyright notice and this permission notice appear in all copies.
125 *
126 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
127 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
128 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
129 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
130 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
131 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
132 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
133 */
134
135 /*
136 * Appends src to string dst of size siz (unlike strncat, siz is the
137 * full size of dst, not space left). At most siz-1 characters
138 * will be copied. Always NUL terminates (unless siz <= strlen(dst)).
139 * Returns strlen(src) + MS_MIN(siz, strlen(initial dst)).
140 * If retval >= siz, truncation occurred.
141 */
strlcat(char * dst,const char * src,size_t siz)142 size_t strlcat(char *dst, const char *src, size_t siz)
143 {
144 register char *d = dst;
145 register const char *s = src;
146 register size_t n = siz;
147 size_t dlen;
148
149 /* Find the end of dst and adjust bytes left but don't go past end */
150 while (n-- != 0 && *d != '\0')
151 d++;
152 dlen = d - dst;
153 n = siz - dlen;
154
155 if (n == 0)
156 return(dlen + strlen(s));
157 while (*s != '\0') {
158 if (n != 1) {
159 *d++ = *s;
160 n--;
161 }
162 s++;
163 }
164 *d = '\0';
165
166 return(dlen + (s - src));/* count does not include NUL */
167 }
168 #endif
169
170 #ifndef HAVE_STRLCPY
171 /*
172 * Copyright (c) 1998 Todd C. Miller <Todd.Miller@courtesan.com>
173 * All rights reserved.
174 *
175 * Redistribution and use in source and binary forms, with or without
176 * modification, are permitted provided that the following conditions
177 * are met:
178 * 1. Redistributions of source code must retain the above copyright
179 * notice, this list of conditions and the following disclaimer.
180 * 2. Redistributions in binary form must reproduce the above copyright
181 * notice, this list of conditions and the following disclaimer in the
182 * documentation and/or other materials provided with the distribution.
183 * 3. The name of the author may not be used to endorse or promote products
184 * derived from this software without specific prior written permission.
185 *
186 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,
187 * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
188 * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
189 * THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
190 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
191 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
192 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
193 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
194 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
195 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
196 */
197
198 /*
199 * Copy src to string dst of size siz. At most siz-1 characters
200 * will be copied. Always NUL terminates (unless siz == 0).
201 * Returns strlen(src); if retval >= siz, truncation occurred.
202 */
203 size_t
strlcpy(char * dst,const char * src,size_t siz)204 strlcpy(char *dst, const char *src, size_t siz)
205 {
206 register char *d = dst;
207 register const char *s = src;
208 register size_t n = siz;
209
210 /* Copy as many bytes as will fit */
211 if (n != 0 && --n != 0) {
212 do {
213 if ((*d++ = *s++) == 0)
214 break;
215 } while (--n != 0);
216 }
217
218 /* Not enough room in dst, add NUL and traverse rest of src */
219 if (n == 0) {
220 if (siz != 0)
221 *d = '\0'; /* NUL-terminate dst */
222 while (*s++)
223 ;
224 }
225
226 return(s - src - 1); /* count does not include NUL */
227 }
228 #endif
229
230 #ifndef HAVE_STRCASESTR
231 /*-
232 * Copyright (c) 1990, 1993
233 * The Regents of the University of California. All rights reserved.
234 *
235 * This code is derived from software contributed to Berkeley by
236 * Chris Torek.
237 *
238 * Redistribution and use in source and binary forms, with or without
239 * modification, are permitted provided that the following conditions
240 * are met:
241 * 1. Redistributions of source code must retain the above copyright
242 * notice, this list of conditions and the following disclaimer.
243 * 2. Redistributions in binary form must reproduce the above copyright
244 * notice, this list of conditions and the following disclaimer in the
245 * documentation and/or other materials provided with the distribution.
246 * 3. Neither the name of the University nor the names of its contributors
247 * may be used to endorse or promote products derived from this software
248 * without specific prior written permission.
249 *
250 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
251 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
252 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
253 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
254 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
255 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
256 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
257 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
258 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
259 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
260 * SUCH DAMAGE.
261 */
strcasestr(const char * s,const char * find)262 char *strcasestr(const char *s, const char *find)
263 {
264 char c, sc;
265 size_t len;
266
267 if ((c = *find++) != 0) {
268 c = tolower((unsigned char)c);
269 len = strlen(find);
270 do {
271 do {
272 if ((sc = *s++) == 0)
273 return (NULL);
274 } while ((char)tolower((unsigned char)sc) != c);
275 } while (strncasecmp(s, find, len) != 0);
276 s--;
277 }
278 return ((char *)s);
279 }
280 #endif
281
282 #ifndef HAVE_STRNCASECMP
strncasecmp(const char * s1,const char * s2,int len)283 int strncasecmp(const char *s1, const char *s2, int len)
284 {
285 register const char *cp1, *cp2;
286 int cmp = 0;
287
288 cp1 = s1;
289 cp2 = s2;
290
291 if(len == 0)
292 return(0);
293
294 if (!*cp1)
295 return -1;
296 else if (!*cp2)
297 return 1;
298
299 while(*cp1 && *cp2 && len) {
300 if((cmp = (toupper(*cp1) - toupper(*cp2))) != 0)
301 return(cmp);
302 cp1++;
303 cp2++;
304 len--;
305 }
306
307 if(len == 0) {
308 return(0);
309 }
310 if(*cp1 || *cp2) {
311 if (*cp1)
312 return(1);
313 else
314 return (-1);
315 }
316 return(0);
317 }
318 #endif
319
320 #ifndef HAVE_STRCASECMP
strcasecmp(const char * s1,const char * s2)321 int strcasecmp(const char *s1, const char *s2)
322 {
323 register const char *cp1, *cp2;
324 int cmp = 0;
325
326 cp1 = s1;
327 cp2 = s2;
328 if ((!cp1) || (!cp2 )) {
329 return (0);
330 }
331 while(*cp1 && *cp2) {
332 if((cmp = (toupper(*cp1) - toupper(*cp2))) != 0)
333 return(cmp);
334 cp1++;
335 cp2++;
336 }
337 if(*cp1 || *cp2) {
338 if (*cp1)
339 return(1);
340 else
341 return (-1);
342 }
343
344 return(0);
345 }
346 #endif
347
msLongToString(long value)348 char *msLongToString(long value)
349 {
350 size_t bufferSize = 256;
351 char *buffer = (char*)msSmallMalloc(bufferSize);
352
353 snprintf(buffer, bufferSize, "%ld", value);
354 return(buffer);
355 }
356
msDoubleToString(double value,int force_f)357 char *msDoubleToString(double value, int force_f)
358 {
359 size_t bufferSize = 256;
360 char *buffer = (char*)msSmallMalloc(bufferSize);
361
362 if (force_f == MS_TRUE)
363 snprintf(buffer, bufferSize, "%f", value);
364 else
365 snprintf(buffer, bufferSize, "%g", value);
366 return(buffer);
367 }
368
msIntToString(int value)369 char *msIntToString(int value)
370 {
371 size_t bufferSize = 256;
372 char *buffer = (char*)msSmallMalloc(bufferSize);
373
374 snprintf(buffer, bufferSize, "%i", value);
375 return(buffer);
376 }
377
msStringToUpper(char * string)378 void msStringToUpper(char *string)
379 {
380 int i;
381
382 if (string != NULL) {
383 for (i = 0; i < strlen(string); i++) {
384 string[i] = toupper(string[i]);
385 }
386 return;
387 }
388 }
389
msStringToLower(char * string)390 void msStringToLower(char *string)
391 {
392 int i;
393
394 if (string != NULL) {
395 for (i = 0; i < strlen(string); i++) {
396 string[i] = tolower(string[i]);
397 }
398 return;
399 }
400 }
401
402 /**
403 * Force the first character to uppercase and the rest of the characters to
404 * lower case for EACH word in the string.
405 */
msStringInitCap(char * string)406 void msStringInitCap(char *string)
407 {
408 int i;
409 int start = 1;
410 if (string != NULL) {
411 for (i = 0; i < (int)strlen(string); i++) {
412 if (string[i] == ' ')
413 start = 1;
414 else if (start) {
415 string[i] = toupper(string[i]);
416 start = 0;
417 }
418 else {
419 string[i] = tolower(string[i]);
420 }
421 }
422 }
423 }
424
425 /**
426 * Force the first character to uppercase for the FIRST word in the string
427 * and the rest of the characters to lower case.
428 */
msStringFirstCap(char * string)429 void msStringFirstCap(char *string)
430 {
431 int i;
432 int start = 1;
433 if (string != NULL) {
434 for (i = 0; i < (int)strlen(string); i++) {
435 if (string[i] != ' ') {
436 if (start) {
437 string[i] = toupper(string[i]);
438 start = 0;
439 }
440 else
441 string[i] = tolower(string[i]);
442 }
443 }
444 }
445 }
446
msStringChop(char * string)447 char *msStringChop(char *string)
448 {
449 int n;
450
451 n = strlen(string);
452 if(n>0)
453 string[n-1] = '\0';
454
455 return(string);
456 }
457
458 /*
459 ** Trim leading and trailing white space.
460 */
msStringTrim(char * str)461 void msStringTrim(char *str)
462 {
463 int i;
464
465 /* Send nulls home without supper. */
466 if( ! str ) return;
467
468 /* Move non-white string to the front. */
469 i = strspn(str, " ");
470 if(i) {
471 memmove(str, str + i, strlen(str) - i + 1);
472 }
473 /* Nothing left? Exit. */
474 if(strlen(str) == 0) {
475 return;
476 }
477 /* Null-terminate end of non-white string. */
478 for(i=strlen(str)-1; i>=0; i--) { /* step backwards from end */
479 if(str[i] != ' ') {
480 str[i+1] = '\0';
481 return;
482 }
483 }
484 return;
485 }
486
487 /*
488 ** Remove leading white spaces and shift everything to the left.
489 */
msStringTrimLeft(char * string)490 char *msStringTrimLeft(char *string)
491 {
492 char *read, *write;
493 int i, length;
494
495 if (string && strlen(string) > 0) {
496 length = strlen(string);
497 read = string;
498 write = string;
499
500 for (i=0; i<length; i++) {
501 if (isspace(string[i]))
502 read++;
503 else
504 break;
505 }
506
507 if (read > write) {
508 while (*read) {
509 *write = *read;
510 read++;
511 write++;
512 }
513 *write = '\0';
514 }
515 }
516 return string;
517 }
518
519 /* ------------------------------------------------------------------------------- */
520 /* Trims trailing blanks from a string */
521 /* ------------------------------------------------------------------------------- */
msStringTrimBlanks(char * string)522 void msStringTrimBlanks(char *string)
523 {
524 int i,n;
525
526 n = strlen(string);
527 for(i=n-1; i>=0; i--) { /* step backwards through the string */
528 if(string[i] != ' ') {
529 string[i+1] = '\0';
530 return;
531 }
532 }
533 }
534
535 /* ------------------------------------------------------------------------------- */
536 /* Trims end-of-line marker from a string */
537 /* Usefull in conjunction with fgets() calls */
538 /* ------------------------------------------------------------------------------- */
msStringTrimEOL(char * string)539 void msStringTrimEOL(char *string)
540 {
541 int i;
542
543 for(i=0 ; string[i] != '\0'; i++) {
544 if(string[i] == '\n') {
545 string[i] = '\0'; /* Terminate the string at the newline */
546 return;
547 }
548 }
549 }
550
551 /* ------------------------------------------------------------------------------- */
552 /* Replace all occurances of old with new in str. */
553 /* It is assumed that str was dynamically created using malloc. */
554 /* ------------------------------------------------------------------------------- */
msReplaceSubstring(char * str,const char * old,const char * new)555 char *msReplaceSubstring(char *str, const char *old, const char *new)
556 {
557 size_t str_len, old_len, new_len, tmp_offset;
558 char *tmp_ptr;
559
560 if(new == NULL)
561 new = "";
562
563 /*
564 ** If old is not found then leave str alone
565 */
566 if( (tmp_ptr = strstr(str, old)) == NULL)
567 return(str);
568
569 /*
570 ** Grab some info about incoming strings
571 */
572 str_len = strlen(str);
573 old_len = strlen(old);
574 new_len = strlen(new);
575
576 /*
577 ** Now loop until old is NOT found in new
578 */
579 while( tmp_ptr != NULL ) {
580
581 /*
582 ** re-allocate memory for buf assuming 1 replacement of old with new
583 ** don't bother reallocating if old is larger than new)
584 */
585 if (old_len < new_len) {
586 tmp_offset = tmp_ptr - str;
587 str_len = str_len - old_len + new_len;
588 str = (char *)msSmallRealloc(str, (str_len + 1)); /* make new space for a copy */
589 tmp_ptr = str + tmp_offset;
590 }
591
592 /*
593 ** Move the trailing part of str to make some room unless old_len == new_len
594 */
595 if (old_len != new_len) {
596 memmove(tmp_ptr+new_len, tmp_ptr+old_len, strlen(tmp_ptr)-old_len+1);
597 }
598
599 /*
600 ** Now copy new over old
601 */
602 memcpy(tmp_ptr, new, new_len);
603
604 /*
605 ** And look for more matches in the rest of the string
606 */
607 tmp_ptr = strstr(tmp_ptr + new_len, old);
608 }
609
610 return(str);
611 }
612
613 /*
614 * same goal as msReplaceSubstring, but for the known case
615 * when we won't have to do reallocs etc
616 * used to replace the wrap characetr by a newline for labels
617 */
msReplaceChar(char * str,char old,char new)618 void msReplaceChar(char *str, char old, char new)
619 {
620 while(*(str++))
621 if(*str==old)
622 *str=new;
623 }
624
625 /*
626 ** how many times does ch occur in str
627 */
msCountChars(char * str,char ch)628 int msCountChars(char *str, char ch)
629 {
630 int i, l, n=0;
631
632 l = strlen(str);
633 for(i=0; i<l; i++)
634 if(str[i] == ch) n++;
635
636 return(n);
637 }
638
639 /* ------------------------------------------------------------------------------- */
640 /* Strip filename from a full path */
641 /* ------------------------------------------------------------------------------- */
msStripPath(char * fn)642 char *msStripPath(char *fn)
643 {
644 char *pSlash;
645 char *pBackslash;
646
647 /* try to locate both, the last slash or backslash */
648 pSlash = strrchr(fn,'/');
649 pBackslash = strrchr(fn,'\\');
650
651 if( pSlash != NULL && pBackslash != NULL ) {
652 if( pSlash < pBackslash )
653 return ++pBackslash;
654 else
655 return ++pSlash;
656 } else if ( pSlash != NULL )
657 return ++pSlash; /* skip past the "slash" */
658 else if ( pBackslash != NULL )
659 return ++pBackslash; /* skip past the "backslash" */
660 else
661 return(fn);
662 }
663
664 /*
665 ** Returns the *path* portion of the filename fn. Memory is allocated using malloc.
666 */
msGetPath(const char * fn)667 char *msGetPath(const char *fn)
668 {
669 char *str;
670 int i, length;
671
672 length = strlen(fn);
673 if((str = msStrdup(fn)) == NULL)
674 return(NULL);
675
676 for(i=length-1; i>=0; i--) { /* step backwards through the string */
677 if((str[i] == '/') || (str[i] == '\\')) {
678 str[i+1] = '\0';
679 break;
680 }
681 }
682
683 if(strcmp(str, fn) == 0) {
684 msFree(str);
685 #if defined(_WIN32) && !defined(__CYGWIN__)
686 str = msStrdup(".\\");
687 #else
688 str= msStrdup("./");
689 #endif
690 }
691
692 return(str);
693 }
694
695 /*
696 ** Returns a *path* built from abs_path and path.
697 ** The pszReturnPath must be declared by the caller function as an array
698 ** of MS_MAXPATHLEN char
699 */
msBuildPath(char * pszReturnPath,const char * abs_path,const char * path)700 char *msBuildPath(char *pszReturnPath, const char *abs_path, const char *path)
701 {
702 int abslen = 0;
703 int pathlen = 0;
704
705
706 if(path == NULL) {
707 msSetError(MS_IOERR, NULL, "msBuildPath");
708 return NULL;
709 }
710
711 pathlen = strlen(path);
712 if (abs_path)
713 abslen = strlen(abs_path);
714
715 if((pathlen + abslen + 2) > MS_MAXPATHLEN) {
716 msSetError(MS_IOERR, "Path is too long. Check server logs.",
717 "msBuildPath()");
718 msDebug("msBuildPath(): (%s%s): path is too long.\n", abs_path, path);
719 return NULL;
720 }
721
722 /* Check if path is absolute */
723 if((abs_path == NULL) || (abslen == 0) ||
724 (path[0] == '\\') || (path[0] == '/') ||
725 (pathlen > 1 && (path[1] == ':'))) {
726 strlcpy(pszReturnPath, path, MS_MAXPATHLEN);
727 return(pszReturnPath);
728 }
729
730 /* else return abs_path/path */
731 if((abs_path[abslen-1] == '/') || (abs_path[abslen-1] == '\\')) {
732 snprintf(pszReturnPath, MS_MAXPATHLEN, "%s%s", abs_path, path);
733 } else {
734 snprintf(pszReturnPath, MS_MAXPATHLEN, "%s/%s", abs_path, path);
735 }
736
737 return(pszReturnPath);
738 }
739
740 /*
741 ** Returns a *path* built from abs_path, path1 and path2.
742 ** abs_path/path1/path2
743 ** The pszReturnPath must be declared by the caller function as an array
744 ** of MS_MAXPATHLEN char
745 */
msBuildPath3(char * pszReturnPath,const char * abs_path,const char * path1,const char * path2)746 char *msBuildPath3(char *pszReturnPath, const char *abs_path, const char *path1,const char *path2)
747 {
748 char szPath[MS_MAXPATHLEN];
749
750 return msBuildPath(pszReturnPath, abs_path,
751 msBuildPath(szPath, path1, path2));
752 }
753
754 /*
755 ** Similar to msBuildPath(), but the input path is only qualified by the
756 ** absolute path if this will result in it pointing to a readable file.
757 **
758 ** Returns NULL if the resulting path doesn't point to a readable file.
759 */
760
msTryBuildPath(char * szReturnPath,const char * abs_path,const char * path)761 char *msTryBuildPath(char *szReturnPath, const char *abs_path, const char *path)
762
763 {
764 VSILFILE *fp;
765
766 if( msBuildPath( szReturnPath, abs_path, path ) == NULL )
767 return NULL;
768
769 fp = VSIFOpenL( szReturnPath, "r" );
770 if( fp == NULL ) {
771 strlcpy( szReturnPath, path, MS_MAXPATHLEN);
772 return NULL;
773 } else
774 VSIFCloseL( fp );
775
776 return szReturnPath;
777 }
778
779 /*
780 ** Similar to msBuildPath3(), but the input path is only qualified by the
781 ** absolute path if this will result in it pointing to a readable file.
782 **
783 ** Returns NULL if the resulting path doesn't point to a readable file.
784 */
785
msTryBuildPath3(char * szReturnPath,const char * abs_path,const char * path1,const char * path2)786 char *msTryBuildPath3(char *szReturnPath, const char *abs_path, const char *path1, const char *path2)
787
788 {
789 VSILFILE *fp;
790
791 if( msBuildPath3( szReturnPath, abs_path, path1, path2 ) == NULL )
792 return NULL;
793
794 fp = VSIFOpenL( szReturnPath, "r" );
795 if( fp == NULL ) {
796 strlcpy( szReturnPath, path2, MS_MAXPATHLEN);
797 return NULL;
798 } else
799 VSIFCloseL( fp );
800
801 return szReturnPath;
802 }
803
804 /*
805 ** Splits a string into multiple strings based on ch. Consecutive ch's are ignored.
806 */
msStringSplit(const char * string,char ch,int * num_tokens)807 char **msStringSplit(const char *string, char ch, int *num_tokens)
808 {
809 int i,j,k;
810 int length,n;
811 char **token;
812 char last_ch='\0';
813
814 n = 1; /* always at least 1 token, the string itself */
815 length = strlen(string);
816 for(i=0; i<length; i++) {
817 if(string[i] == ch && last_ch != ch)
818 n++;
819 last_ch = string[i];
820 }
821
822 token = (char **) msSmallMalloc(sizeof(char *)*n);
823
824 k = 0;
825 token[k] = (char *)msSmallMalloc(sizeof(char)*(length+1));
826
827 j = 0;
828 last_ch='\0';
829 for(i=0; i<length; i++) {
830 if(string[i] == ch) {
831
832 if(last_ch == ch)
833 continue;
834
835 token[k][j] = '\0'; /* terminate current token */
836
837 k++;
838 token[k] = (char *)msSmallMalloc(sizeof(char)*(length+1));
839
840 j = 0;
841 } else {
842 token[k][j] = string[i];
843 j++;
844 }
845
846 last_ch = string[i];
847 }
848
849 token[k][j] = '\0'; /* terminate last token */
850
851 *num_tokens = n;
852
853 return(token);
854 }
855
856 /*
857 This function is a copy of CSLTokenizeString2() function of the CPL component.
858 See the port/cpl_string.cpp file in gdal source for the complete documentation.
859 Available Flags:
860 * - MS_ALLOWEMPTYTOKENS: allow the return of empty tokens when two
861 * delimiters in a row occur with no other text between them. If not set,
862 * empty tokens will be discarded;
863 * - MS_STRIPLEADSPACES: strip leading space characters from the token (as
864 * reported by isspace());
865 * - MS_STRIPENDSPACES: strip ending space characters from the token (as
866 * reported by isspace());
867 * - MS_HONOURSTRINGS: double quotes can be used to hold values that should
868 * not be broken into multiple tokens;
869 * - MS_PRESERVEQUOTES: string quotes are carried into the tokens when this
870 * is set, otherwise they are removed;
871 * - MS_PRESERVEESCAPES: if set backslash escapes (for backslash itself,
872 * and for literal double quotes) will be preserved in the tokens, otherwise
873 * the backslashes will be removed in processing.
874 */
msStringSplitComplex(const char * pszString,const char * pszDelimiters,int * num_tokens,int nFlags)875 char ** msStringSplitComplex( const char * pszString,
876 const char * pszDelimiters,
877 int *num_tokens,
878 int nFlags )
879
880 {
881 char **papszRetList = NULL;
882 int nRetMax = 0, nRetLen = 0;
883 char *pszToken;
884 int nTokenMax, nTokenLen;
885 int bHonourStrings = (nFlags & MS_HONOURSTRINGS);
886 int bAllowEmptyTokens = (nFlags & MS_ALLOWEMPTYTOKENS);
887 int bStripLeadSpaces = (nFlags & MS_STRIPLEADSPACES);
888 int bStripEndSpaces = (nFlags & MS_STRIPENDSPACES);
889
890 pszToken = (char *) msSmallMalloc(sizeof(char)*10);;
891 nTokenMax = 10;
892
893 while( pszString != NULL && *pszString != '\0' ) {
894 int bInString = MS_FALSE;
895 int bStartString = MS_TRUE;
896
897 nTokenLen = 0;
898
899 /* Try to find the next delimeter, marking end of token */
900 for( ; *pszString != '\0'; pszString++ ) {
901
902 /* End if this is a delimeter skip it and break. */
903 if( !bInString && strchr(pszDelimiters, *pszString) != NULL ) {
904 pszString++;
905 break;
906 }
907
908 /* If this is a quote, and we are honouring constant
909 strings, then process the constant strings, with out delim
910 but don't copy over the quotes */
911 if( bHonourStrings && *pszString == '"' ) {
912 if( nFlags & MS_PRESERVEQUOTES ) {
913 pszToken[nTokenLen] = *pszString;
914 nTokenLen++;
915 }
916
917 if( bInString ) {
918 bInString = MS_FALSE;
919 continue;
920 } else {
921 bInString = MS_TRUE;
922 continue;
923 }
924 }
925
926 /*
927 * Within string constants we allow for escaped quotes, but in
928 * processing them we will unescape the quotes and \\ sequence
929 * reduces to \
930 */
931 if( bInString && pszString[0] == '\\' ) {
932 if ( pszString[1] == '"' || pszString[1] == '\\' ) {
933 if( nFlags & MS_PRESERVEESCAPES ) {
934 pszToken[nTokenLen] = *pszString;
935 nTokenLen++;
936 }
937
938 pszString++;
939 }
940 }
941
942 /*
943 * Strip spaces at the token start if requested.
944 */
945 if ( !bInString && bStripLeadSpaces
946 && bStartString && isspace((unsigned char)*pszString) )
947 continue;
948
949 bStartString = MS_FALSE;
950
951 /*
952 * Extend token buffer if we are running close to its end.
953 */
954 if( nTokenLen >= nTokenMax-3 ) {
955 nTokenMax = nTokenMax * 2 + 10;
956 pszToken = (char *) msSmallRealloc(pszToken, sizeof(char)*nTokenMax);
957 }
958
959 pszToken[nTokenLen] = *pszString;
960 nTokenLen++;
961 }
962
963 /*
964 * Strip spaces at the token end if requested.
965 */
966 if ( !bInString && bStripEndSpaces ) {
967 while ( nTokenLen && isspace((unsigned char)pszToken[nTokenLen - 1]) )
968 nTokenLen--;
969 }
970
971 pszToken[nTokenLen] = '\0';
972
973 /*
974 * Add the token.
975 */
976 if( pszToken[0] != '\0' || bAllowEmptyTokens ) {
977 if( nRetLen >= nRetMax - 1 ) {
978 nRetMax = nRetMax * 2 + 10;
979 papszRetList = (char **) msSmallRealloc(papszRetList, sizeof(char*)*nRetMax);
980 }
981
982 papszRetList[nRetLen++] = msStrdup( pszToken );
983 papszRetList[nRetLen] = NULL;
984 }
985 }
986
987 /*
988 * If the last token was empty, then we need to capture
989 * it now, as the loop would skip it.
990 */
991 if( *pszString == '\0' && bAllowEmptyTokens && nRetLen > 0
992 && strchr(pszDelimiters,*(pszString-1)) != NULL ) {
993 if( nRetLen >= nRetMax - 1 ) {
994 nRetMax = nRetMax * 2 + 10;
995 papszRetList = (char **) msSmallRealloc(papszRetList, sizeof(char*)*nRetMax);
996 }
997
998 papszRetList[nRetLen++] = msStrdup("");
999 papszRetList[nRetLen] = NULL;
1000 }
1001
1002 if( papszRetList == NULL )
1003 papszRetList = (char **) msSmallMalloc(sizeof(char *)*1);
1004
1005 *num_tokens = nRetLen;
1006 free(pszToken);
1007
1008 return papszRetList;
1009 }
1010
1011 /* This method is similar to msStringSplit but support quoted strings.
1012 It also support multi-characters delimiter and allows to preserve quotes */
msStringTokenize(const char * pszLine,const char * pszDelim,int * num_tokens,int preserve_quote)1013 char **msStringTokenize( const char *pszLine, const char *pszDelim,
1014 int *num_tokens, int preserve_quote )
1015 {
1016 char **papszResult = NULL;
1017 int n = 1, iChar, nLength = strlen(pszLine), iTokenChar = 0, bInQuotes = MS_FALSE;
1018 char *pszToken = (char *) msSmallMalloc(sizeof(char)*(nLength+1));
1019 int nDelimLen = strlen(pszDelim);
1020
1021 /* Compute the number of tokens */
1022 for( iChar = 0; pszLine[iChar] != '\0'; iChar++ ) {
1023 if( bInQuotes && pszLine[iChar] == '"' && pszLine[iChar+1] == '"' ) {
1024 iChar++;
1025 } else if( pszLine[iChar] == '"' ) {
1026 bInQuotes = !bInQuotes;
1027 } else if ( !bInQuotes && strncmp(pszLine+iChar,pszDelim,nDelimLen) == 0 ) {
1028 iChar += nDelimLen - 1;
1029 n++;
1030 }
1031 }
1032
1033 papszResult = (char **) msSmallMalloc(sizeof(char *)*n);
1034 n = iTokenChar = bInQuotes = 0;
1035 for( iChar = 0; pszLine[iChar] != '\0'; iChar++ ) {
1036 if( bInQuotes && pszLine[iChar] == '"' && pszLine[iChar+1] == '"' ) {
1037 if (preserve_quote == MS_TRUE)
1038 pszToken[iTokenChar++] = '"';
1039 pszToken[iTokenChar++] = '"';
1040 iChar++;
1041 } else if( pszLine[iChar] == '"' ) {
1042 if (preserve_quote == MS_TRUE)
1043 pszToken[iTokenChar++] = '"';
1044 bInQuotes = !bInQuotes;
1045 } else if( !bInQuotes && strncmp(pszLine+iChar,pszDelim,nDelimLen) == 0 ) {
1046 pszToken[iTokenChar++] = '\0';
1047 papszResult[n] = pszToken;
1048 pszToken = (char *) msSmallMalloc(sizeof(char)*(nLength+1));
1049 iChar += nDelimLen - 1;
1050 iTokenChar = 0;
1051 n++;
1052 } else {
1053 pszToken[iTokenChar++] = pszLine[iChar];
1054 }
1055 }
1056
1057 pszToken[iTokenChar++] = '\0';
1058 papszResult[n] = pszToken;
1059
1060 *num_tokens = n+1;
1061
1062 return papszResult;
1063 }
1064
1065 /**********************************************************************
1066 * msEncodeChar()
1067 *
1068 * Return 1 if the character argument should be encoded for safety
1069 * in URL use and 0 otherwise. Specific character map taken from
1070 * http://www.ietf.org/rfc/rfc2396.txt
1071 *
1072 **********************************************************************/
1073
msEncodeChar(const char c)1074 int msEncodeChar(const char c)
1075 {
1076 if (
1077 (c >= 0x61 && c <= 0x7A ) || /* Letters a-z */
1078 (c >= 0x41 && c <= 0x5A ) || /* Letters A-Z */
1079 (c >= 0x30 && c <= 0x39 ) || /* Numbers 0-9 */
1080 (c >= 0x27 && c <= 0x2A ) || /* * ' ( ) */
1081 (c >= 0x2D && c <= 0x2E ) || /* - . */
1082 (c == 0x5F ) || /* _ */
1083 (c == 0x21 ) || /* ! */
1084 (c == 0x7E ) ) { /* ~ */
1085 return(0);
1086 } else {
1087 return(1);
1088 }
1089 }
1090
msEncodeUrl(const char * data)1091 char *msEncodeUrl(const char *data)
1092 {
1093 /*
1094 * Delegate to msEncodeUrlExcept, with a null second argument
1095 * to render the except handling moot.
1096 */
1097 return(msEncodeUrlExcept(data, '\0'));
1098 }
1099
1100 /**********************************************************************
1101 * msEncodeCharExcept()
1102 *
1103 * URL encoding, applies RFP2396 encoding to all characters
1104 * except the one exception character. An exception character
1105 * of '\0' implies no exception handling.
1106 *
1107 **********************************************************************/
1108
msEncodeUrlExcept(const char * data,const char except)1109 char *msEncodeUrlExcept(const char *data, const char except)
1110 {
1111 static const char *hex = "0123456789ABCDEF";
1112 const char *i;
1113 char *j, *code;
1114 int inc;
1115 unsigned char ch;
1116
1117 for (inc=0, i=data; *i!='\0'; i++)
1118 if (msEncodeChar(*i))
1119 inc += 2;
1120
1121 code = (char*)msSmallMalloc(strlen(data)+inc+1);
1122
1123 for (j=code, i=data; *i!='\0'; i++, j++) {
1124 if ( except != '\0' && *i == except ) {
1125 *j = except;
1126 } else if (msEncodeChar(*i)) {
1127 ch = *i;
1128 *j++ = '%';
1129 *j++ = hex[ch/16];
1130 *j = hex[ch%16];
1131 } else
1132 *j = *i;
1133 }
1134 *j = '\0';
1135
1136 return code;
1137 }
1138
1139 /************************************************************************/
1140 /* msEscapeJSonString() */
1141 /************************************************************************/
1142
1143 /* The input (and output) string are not supposed to start/end with double */
1144 /* quote characters. It is the responsibility of the caller to do that. */
msEscapeJSonString(const char * pszJSonString)1145 char* msEscapeJSonString(const char* pszJSonString)
1146 {
1147 /* Worst case is one character to become \uABCD so 6 characters */
1148 char* pszRet;
1149 int i = 0, j = 0;
1150 static const char* pszHex = "0123456789ABCDEF";
1151
1152 pszRet = (char*) msSmallMalloc(strlen(pszJSonString) * 6 + 1);
1153 /* From http://www.json.org/ */
1154 for(i = 0; pszJSonString[i] != '\0'; i++)
1155 {
1156 unsigned char ch = pszJSonString[i];
1157 if( ch == '\b' )
1158 {
1159 pszRet[j++] = '\\';
1160 pszRet[j++] = 'b';
1161 }
1162 else if( ch == '\f' )
1163 {
1164 pszRet[j++] = '\\';
1165 pszRet[j++] = 'f';
1166 }
1167 else if( ch == '\n' )
1168 {
1169 pszRet[j++] = '\\';
1170 pszRet[j++] = 'n';
1171 }
1172 else if( ch == '\r' )
1173 {
1174 pszRet[j++] = '\\';
1175 pszRet[j++] = 'r';
1176 }
1177 else if( ch == '\t' )
1178 {
1179 pszRet[j++] = '\\';
1180 pszRet[j++] = 't';
1181 }
1182 else if( ch < 32 )
1183 {
1184 pszRet[j++] = '\\';
1185 pszRet[j++] = 'u';
1186 pszRet[j++] = '0';
1187 pszRet[j++] = '0';
1188 pszRet[j++] = pszHex[ch / 16];
1189 pszRet[j++] = pszHex[ch % 16];
1190 }
1191 else if( ch == '"' )
1192 {
1193 pszRet[j++] = '\\';
1194 pszRet[j++] = '"';
1195 }
1196 else if( ch == '\\' )
1197 {
1198 pszRet[j++] = '\\';
1199 pszRet[j++] = '\\';
1200 }
1201 else
1202 {
1203 pszRet[j++] = ch;
1204 }
1205 }
1206 pszRet[j] = '\0';
1207 return pszRet;
1208 }
1209
1210 /* msEncodeHTMLEntities()
1211 **
1212 ** Return a copy of string after replacing some problematic chars with their
1213 ** HTML entity equivalents.
1214 **
1215 ** The replacements performed are:
1216 ** '&' -> "&", '"' -> """, '<' -> "<" and '>' -> ">"
1217 **/
msEncodeHTMLEntities(const char * string)1218 char *msEncodeHTMLEntities(const char *string)
1219 {
1220 int buflen, i;
1221 char *newstring;
1222 const char *c;
1223
1224 if(string == NULL)
1225 return NULL;
1226
1227 /* Start with 100 extra chars for replacements... */
1228 /* should be good enough for most cases */
1229 buflen = strlen(string) + 100;
1230 newstring = (char*)malloc(buflen+1);
1231 MS_CHECK_ALLOC(newstring, buflen+1, NULL);
1232
1233 for(i=0, c=string; *c != '\0'; c++) {
1234 /* Need to realloc buffer? */
1235 if (i+6 > buflen) {
1236 /* If we had to realloc then this string must contain several */
1237 /* entities... so let's go with twice the previous buffer size */
1238 buflen *= 2;
1239 newstring = (char*)realloc(newstring, buflen+1);
1240 MS_CHECK_ALLOC(newstring, buflen+1, NULL);
1241 }
1242
1243 switch(*c) {
1244 case '&':
1245 strcpy(newstring+i, "&");
1246 i += 5;
1247 break;
1248 case '<':
1249 strcpy(newstring+i, "<");
1250 i += 4;
1251 break;
1252 case '>':
1253 strcpy(newstring+i, ">");
1254 i += 4;
1255 break;
1256 case '"':
1257 strcpy(newstring+i, """);
1258 i += 6;
1259 break;
1260 case '\'':
1261 strcpy(newstring+i, "'"); /* changed from ' and i += 6 (bug 1040) */
1262 i += 5;
1263 break;
1264 default:
1265 newstring[i++] = *c;
1266 }
1267 }
1268
1269 newstring[i++] = '\0';
1270
1271 return newstring;
1272 }
1273
1274
1275 /* msDecodeHTMLEntities()
1276 **
1277 ** Modify the string to replace encoded characters by their true value
1278 **
1279 ** The replacements performed are:
1280 ** "&" -> '&', """ -> '"', "<" -> '<' and ">" -> '>'
1281 **/
msDecodeHTMLEntities(const char * string)1282 void msDecodeHTMLEntities(const char *string)
1283 {
1284 char *pszAmp=NULL, *pszSemiColon=NULL, *pszReplace=NULL, *pszEnd=NULL;
1285 char *pszBuffer=NULL;
1286 size_t bufferSize = 0;
1287
1288 if(string == NULL)
1289 return;
1290 else
1291 pszBuffer = (char*)string;
1292
1293 bufferSize = strlen(pszBuffer);
1294 pszReplace = (char*) msSmallMalloc(bufferSize+1);
1295 pszEnd = (char*) msSmallMalloc(bufferSize+1);
1296
1297 while((pszAmp = strchr(pszBuffer, '&')) != NULL) {
1298 /* Get the &...; */
1299 strlcpy(pszReplace, pszAmp, bufferSize);
1300 pszSemiColon = strchr(pszReplace, ';');
1301 if(pszSemiColon == NULL)
1302 break;
1303 else
1304 pszSemiColon++;
1305
1306 /* Get everything after the &...; */
1307 strlcpy(pszEnd, pszSemiColon, bufferSize);
1308
1309 pszReplace[pszSemiColon-pszReplace] = '\0';
1310
1311 /* Replace the &...; */
1312 if(strcasecmp(pszReplace, "&") == 0) {
1313 pszBuffer[pszAmp - pszBuffer] = '&';
1314 pszBuffer[pszAmp - pszBuffer + 1] = '\0';
1315 strcat(pszBuffer, pszEnd);
1316 } else if(strcasecmp(pszReplace, "<") == 0) {
1317 pszBuffer[pszAmp - pszBuffer] = '<';
1318 pszBuffer[pszAmp - pszBuffer + 1] = '\0';
1319 strcat(pszBuffer, pszEnd);
1320 } else if(strcasecmp(pszReplace, ">") == 0) {
1321 pszBuffer[pszAmp - pszBuffer] = '>';
1322 pszBuffer[pszAmp - pszBuffer + 1] = '\0';
1323 strcat(pszBuffer, pszEnd);
1324 } else if(strcasecmp(pszReplace, """) == 0) {
1325 pszBuffer[pszAmp - pszBuffer] = '"';
1326 pszBuffer[pszAmp - pszBuffer + 1] = '\0';
1327 strcat(pszBuffer, pszEnd);
1328 } else if(strcasecmp(pszReplace, "'") == 0) {
1329 pszBuffer[pszAmp - pszBuffer] = '\'';
1330 pszBuffer[pszAmp - pszBuffer + 1] = '\0';
1331 strcat(pszBuffer, pszEnd);
1332 }
1333
1334 pszBuffer = pszAmp + 1;
1335 }
1336
1337 free(pszReplace);
1338 free(pszEnd);
1339
1340 return;
1341 }
1342
1343 /*
1344 ** msIsXMLValid
1345 **
1346 ** Check if the string is an XML valid string. It should contains only
1347 ** A-Z, a-z, 0-9, '_', '-', '.', and ':'
1348 ** Return MS_TRUE or MS_FALSE
1349 */
msIsXMLTagValid(const char * string)1350 int msIsXMLTagValid(const char *string)
1351 {
1352 int i, nLen;
1353
1354 nLen = strlen(string);
1355
1356 for(i=0; i<nLen; i++) {
1357 if( !( string[i] >= 'A' && string[i] <= 'Z' ) &&
1358 !( string[i] >= 'a' && string[i] <= 'z' ) &&
1359 !( string[i] >= '0' && string[i] <= '9' ) &&
1360 string[i] != '-' && string[i] != '.' &&
1361 string[i] != ':' && string[i] != '_' )
1362 return MS_FALSE;
1363 }
1364
1365 return MS_TRUE;
1366 }
1367
1368
1369 /*
1370 * Concatenate pszSrc to pszDest and reallocate memory if necessary.
1371 */
msStringConcatenate(char * pszDest,const char * pszSrc)1372 char *msStringConcatenate(char *pszDest, const char *pszSrc)
1373 {
1374 int nLen;
1375
1376 if (pszSrc == NULL)
1377 return pszDest;
1378
1379 /* if destination is null, allocate memory */
1380 if (pszDest == NULL) {
1381 pszDest = msStrdup(pszSrc);
1382 } else { /* if dest is not null, reallocate memory */
1383 char *pszTemp;
1384
1385 nLen = strlen(pszDest) + strlen(pszSrc);
1386
1387 pszTemp = (char*)realloc(pszDest, nLen + 1);
1388 if (pszTemp) {
1389 pszDest = pszTemp;
1390 strcat(pszDest, pszSrc);
1391 pszDest[nLen] = '\0';
1392 } else {
1393 msSetError(MS_MEMERR, "Error while reallocating memory.", "msStringConcatenate()");
1394 return NULL;
1395 }
1396 }
1397
1398 return pszDest;
1399 }
1400
msJoinStrings(char ** array,int arrayLength,const char * delimeter)1401 char *msJoinStrings(char **array, int arrayLength, const char *delimeter)
1402 {
1403 char *string;
1404 int stringLength=0;
1405 int delimeterLength;
1406 int i;
1407
1408 if(!array || arrayLength <= 0 || !delimeter) return NULL;
1409
1410 delimeterLength = strlen(delimeter);
1411
1412 for(i=0; i<arrayLength; i++)
1413 stringLength += strlen(array[i]) + delimeterLength;
1414
1415 string = (char *)calloc(stringLength+1, sizeof(char));
1416 MS_CHECK_ALLOC(string, (stringLength+1)* sizeof(char), NULL);
1417 string[0] = '\0';
1418
1419 for(i=0; i<arrayLength-1; i++) {
1420 strlcat(string, array[i], stringLength);
1421 strlcat(string, delimeter, stringLength);
1422 }
1423 strlcat(string, array[i], stringLength); /* add last element, no delimiter */
1424
1425 return string;
1426 }
1427
1428 #define HASH_SIZE 16
1429 /*
1430 * Return a hashed string for a given input string.
1431 * The caller should free the return value.
1432 */
msHashString(const char * pszStr)1433 char *msHashString(const char *pszStr)
1434 {
1435 unsigned char sums[HASH_SIZE] = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
1436 char *pszOutBuf = NULL;
1437 size_t bufferSize = 0;
1438 int i=0;
1439
1440 bufferSize = HASH_SIZE*2+1;
1441 pszOutBuf = (char*)msSmallMalloc(bufferSize);
1442
1443 for(i=0; pszStr && pszStr[i]; i++) {
1444 sums[i%HASH_SIZE] += (unsigned char)(pszStr[i]);
1445 }
1446
1447 for(i=0; i<HASH_SIZE; i++) {
1448 snprintf(pszOutBuf + i*2, bufferSize-(i*2), "%02x", sums[i]);
1449 }
1450
1451 return pszOutBuf;
1452 }
1453
msCommifyString(char * str)1454 char *msCommifyString(char *str)
1455 {
1456 int i, j, old_length, new_length;
1457 int num_commas=0, num_decimal_points=0;
1458 int add_commas;
1459
1460 char comma=',', decimal_point='.';
1461
1462 if(!str) return NULL;
1463
1464 num_decimal_points = msCountChars(str, decimal_point);
1465 if(num_decimal_points > 1) return str;
1466
1467 old_length = strlen(str);
1468 if(num_decimal_points == 0) {
1469 num_commas = floor((old_length - 1)/3);
1470 add_commas=1; /* add commas right away */
1471 } else {
1472 num_commas = floor(((old_length - strlen(strchr(str, decimal_point))) - 1)/3);
1473 add_commas=0; /* wait until after the decimal point */
1474 }
1475
1476 if(num_commas < 1) return str; /* nothing to add */
1477
1478 new_length = old_length + num_commas;
1479 str = (char *) msSmallRealloc(str, new_length+1);
1480 str[new_length] = '\0';
1481
1482 j = 0;
1483 for(i=new_length-1; i>=0; i--) { /* step backwards through the string */
1484
1485 if(num_decimal_points == 1 && add_commas == 0) { /* to the right of the decimal point, no commas */
1486 str[i] = str[i-num_commas];
1487 if(str[i] == decimal_point) add_commas = 1;
1488 } else if(add_commas == 1 && j>2) { /* need a comma */
1489 str[i] = comma;
1490 num_commas--; /* need one fewer now */
1491 j = 0; /* reset */
1492 } else {
1493 str[i] = str[i-num_commas]; /* shift to the right */
1494 j++;
1495 }
1496
1497 if(num_commas == 0) break; /* done, rest of string is ok "as is" */
1498 }
1499
1500 return str;
1501 }
1502
1503
1504 /* ------------------------------------------------------------------------------- */
1505 /* Replace all occurrences of old with new in str. */
1506 /* It is assumed that str was dynamically created using malloc. */
1507 /* Same function as msReplaceSubstring but this is case insensitive */
1508 /* ------------------------------------------------------------------------------- */
msCaseReplaceSubstring(char * str,const char * old,const char * new)1509 char *msCaseReplaceSubstring(char *str, const char *old, const char *new)
1510 {
1511 size_t str_len, old_len, new_len, tmp_offset;
1512 char *tmp_ptr;
1513
1514 /*
1515 ** If old is not found then leave str alone
1516 */
1517 if( (tmp_ptr = (char *) strcasestr(str, old)) == NULL)
1518 return(str);
1519
1520 if(new == NULL)
1521 new = "";
1522
1523
1524 /*
1525 ** Grab some info about incoming strings
1526 */
1527 str_len = strlen(str);
1528 old_len = strlen(old);
1529 new_len = strlen(new);
1530
1531 /*
1532 ** Now loop until old is NOT found in new
1533 */
1534 while( tmp_ptr != NULL ) {
1535
1536 /*
1537 ** re-allocate memory for buf assuming 1 replacement of old with new
1538 ** don't bother reallocating if old is larger than new)
1539 */
1540 if (old_len < new_len) {
1541 tmp_offset = tmp_ptr - str;
1542 str_len = str_len - old_len + new_len;
1543 str = (char *)msSmallRealloc(str, (str_len + 1)); /* make new space for a copy */
1544 tmp_ptr = str + tmp_offset;
1545 }
1546
1547 /*
1548 ** Move the trailing part of str to make some room unless old_len == new_len
1549 */
1550 if (old_len != new_len) {
1551 memmove(tmp_ptr+new_len, tmp_ptr+old_len, strlen(tmp_ptr)-old_len+1);
1552 }
1553
1554 /*
1555 ** Now copy new over old
1556 */
1557 memcpy(tmp_ptr, new, new_len);
1558
1559 /*
1560 ** And look for more matches in the rest of the string
1561 */
1562 tmp_ptr = (char *) strcasestr(tmp_ptr + new_len, old);
1563 }
1564
1565 return(str);
1566 }
1567
1568 /*
1569 ** Converts a 2 character hexidecimal string to an integer.
1570 */
msHexToInt(char * hex)1571 int msHexToInt(char *hex)
1572 {
1573 int number;
1574
1575 number = (hex[0] >= 'A' ? ((hex[0] & 0xdf) - 'A')+10 : (hex[0] - '0'));
1576 number *= 16;
1577 number += (hex[1] >= 'A' ? ((hex[1] & 0xdf) - 'A')+10 : (hex[1] - '0'));
1578
1579 return(number);
1580 }
1581
1582
1583 /*
1584 ** Use FRIBIDI to encode the string.
1585 ** The return value must be freed by the caller.
1586 */
1587 #ifdef USE_FRIBIDI
msGetFriBidiEncodedString(const char * string,const char * encoding)1588 char *msGetFriBidiEncodedString(const char *string, const char *encoding)
1589 {
1590 FriBidiChar logical[MAX_STR_LEN];
1591 FriBidiParType base;
1592 size_t len;
1593
1594 #ifdef FRIBIDI_NO_CHARSETS
1595 iconv_t to_ucs4, from_ucs4;
1596 #else
1597 int to_char_set_num;
1598 int from_char_set_num;
1599 #endif
1600
1601 len = strlen(string);
1602
1603 #ifdef FRIBIDI_NO_CHARSETS
1604 to_ucs4 = iconv_open ("WCHAR_T", encoding);
1605 from_ucs4 = iconv_open ("UTF-8", "WCHAR_T");
1606 #else
1607 to_char_set_num = fribidi_parse_charset ((char*)encoding);
1608 from_char_set_num = fribidi_parse_charset ("UTF-8");
1609 #endif
1610
1611 #ifdef FRIBIDI_NO_CHARSETS
1612 if (to_ucs4 == (iconv_t) (-1) || from_ucs4 == (iconv_t) (-1))
1613 #else
1614 if (!to_char_set_num || !from_char_set_num)
1615 #endif
1616 {
1617 msSetError(MS_IDENTERR, "Encoding not supported (%s).",
1618 "msGetFriBidiEncodedString()", encoding);
1619 return NULL;
1620 }
1621
1622 #ifdef FRIBIDI_NO_CHARSETS
1623 {
1624 char *st = string, *ust = (char *) logical;
1625 int in_len = (int) len;
1626 len = sizeof logical;
1627 iconv (to_ucs4, &st, &in_len, &ust, (int *) &len);
1628 len = (FriBidiChar *) ust - logical;
1629 }
1630 #else
1631 len = fribidi_charset_to_unicode (to_char_set_num, (char*)string, len, logical);
1632 #endif
1633
1634 {
1635 FriBidiChar *visual;
1636 char outstring[MAX_STR_LEN];
1637 FriBidiStrIndex *ltov, *vtol;
1638 FriBidiLevel *levels;
1639 FriBidiStrIndex new_len;
1640 fribidi_boolean log2vis;
1641 int i, j;
1642
1643 visual = (FriBidiChar *) msSmallMalloc (sizeof (FriBidiChar) * (len + 1));
1644 ltov = NULL;
1645 vtol = NULL;
1646 levels = NULL;
1647
1648 /* Create a bidi string. */
1649 log2vis = fribidi_log2vis (logical, len, &base,
1650 /* output */
1651 visual, ltov, vtol, levels);
1652
1653 if (!log2vis) {
1654 msSetError(MS_IDENTERR, "Failed to create bidi string.",
1655 "msGetFriBidiEncodedString()");
1656 return NULL;
1657 }
1658
1659 new_len = len;
1660
1661 /* Convert it to utf-8 for display. */
1662 #ifdef FRIBIDI_NO_CHARSETS
1663 {
1664 char *str = outstring, *ust = (char *) visual;
1665 int in_len = len * sizeof visual[0];
1666 new_len = sizeof outstring;
1667 iconv (from_ucs4, &ust, &in_len, &str, (int *) &new_len);
1668 *str = '\0';
1669 new_len = str - outstring;
1670 }
1671 #else
1672 new_len =
1673 fribidi_unicode_to_charset (from_char_set_num,
1674 visual, len, outstring);
1675
1676 /* scan str and compress out FRIBIDI_CHAR_FILL UTF8 characters */
1677
1678 for (i=0, j=0; i<new_len; i++, j++) {
1679 if (outstring[i] == '\xef' && outstring[i+1] == '\xbb' && outstring[i+2] == '\xbf') {
1680 i += 3;
1681 }
1682 if (i != j) {
1683 outstring[j] = outstring[i];
1684 }
1685 }
1686 outstring[j] = '\0';
1687
1688 #endif
1689
1690 free(visual);
1691 return msStrdup(outstring);
1692 }
1693 }
1694 #endif
1695
1696 /*
1697 ** Simple charset converter. Converts string from specified encoding to UTF-8.
1698 ** The return value must be freed by the caller.
1699 */
msGetEncodedString(const char * string,const char * encoding)1700 char *msGetEncodedString(const char *string, const char *encoding)
1701 {
1702 #ifdef USE_ICONV
1703 iconv_t cd = NULL;
1704 const char *inp;
1705 char *outp, *out = NULL;
1706 size_t len, bufsize, bufleft, iconv_status;
1707 assert(encoding);
1708
1709 #ifdef USE_FRIBIDI
1710 msAcquireLock(TLOCK_FRIBIDI);
1711 if(fribidi_parse_charset ((char*)encoding)) {
1712 char *ret = msGetFriBidiEncodedString(string, encoding);
1713 msReleaseLock(TLOCK_FRIBIDI);
1714 return ret;
1715 }
1716 msReleaseLock(TLOCK_FRIBIDI);
1717 #endif
1718 len = strlen(string);
1719
1720 if (len == 0 || strcasecmp(encoding, "UTF-8")==0)
1721 return msStrdup(string); /* Nothing to do: string already in UTF-8 */
1722
1723 cd = iconv_open("UTF-8", encoding);
1724 if(cd == (iconv_t)-1) {
1725 msSetError(MS_IDENTERR, "Encoding not supported by libiconv (%s).",
1726 "msGetEncodedString()", encoding);
1727 return NULL;
1728 }
1729
1730 bufsize = len * 6 + 1; /* Each UTF-8 char can be up to 6 bytes */
1731 inp = string;
1732 out = (char*) malloc(bufsize);
1733 if(out == NULL) {
1734 msSetError(MS_MEMERR, NULL, "msGetEncodedString()");
1735 iconv_close(cd);
1736 return NULL;
1737 }
1738 strlcpy(out, string, bufsize);
1739 outp = out;
1740
1741 bufleft = bufsize;
1742 iconv_status = -1;
1743
1744 while (len > 0) {
1745 iconv_status = iconv(cd, (char**)&inp, &len, &outp, &bufleft);
1746 if(iconv_status == -1) {
1747 msFree(out);
1748 iconv_close(cd);
1749 return msStrdup(string);
1750 }
1751 }
1752 out[bufsize - bufleft] = '\0';
1753
1754 iconv_close(cd);
1755
1756 return out;
1757 #else
1758 if (*string == '\0' || (encoding && strcasecmp(encoding, "UTF-8")==0))
1759 return msStrdup(string); /* Nothing to do: string already in UTF-8 */
1760
1761 msSetError(MS_MISCERR, "Not implemeted since Iconv is not enabled.", "msGetEncodedString()");
1762 return NULL;
1763 #endif
1764 }
1765
1766
msConvertWideStringToUTF8(const wchar_t * string,const char * encoding)1767 char* msConvertWideStringToUTF8 (const wchar_t* string, const char* encoding)
1768 {
1769 #ifdef USE_ICONV
1770
1771 char* output = NULL;
1772 char* errormessage = NULL;
1773 iconv_t cd = NULL;
1774 size_t nStr;
1775 size_t nInSize;
1776 size_t nOutSize;
1777 size_t iconv_status = -1;
1778 size_t nBufferSize;
1779
1780 char* pszUTF8 = NULL;
1781 const wchar_t* pwszWide = NULL;
1782
1783 if (string != NULL) {
1784 nStr = wcslen (string);
1785 nBufferSize = ((nStr * 6) + 1);
1786 output = (char*) msSmallMalloc (nBufferSize);
1787
1788 if (nStr == 0) {
1789 /* return an empty 8 byte string */
1790 output[0] = '\0';
1791 return output;
1792 }
1793
1794 cd = iconv_open("UTF-8", encoding);
1795
1796 nOutSize = nBufferSize;
1797 if ((iconv_t)-1 != cd) {
1798 nInSize = sizeof (wchar_t)*nStr;
1799 pszUTF8 = output;
1800 pwszWide = string;
1801 iconv_status = iconv(cd, (char **)&pwszWide, &nInSize, &pszUTF8, &nOutSize);
1802 if ((size_t)-1 == iconv_status) {
1803 switch (errno) {
1804 case E2BIG:
1805 errormessage = "There is not sufficient room in buffer";
1806 break;
1807 case EILSEQ:
1808 errormessage = "An invalid multibyte sequence has been encountered in the input";
1809 break;
1810 case EINVAL:
1811 errormessage = "An incomplete multibyte sequence has been encountered in the input";
1812 break;
1813 default:
1814 errormessage = "Unknown";
1815 break;
1816 }
1817 msSetError(MS_MISCERR, "Unable to convert string in encoding '%s' to UTF8 %s",
1818 "msConvertWideStringToUTF8()",
1819 encoding,errormessage);
1820 iconv_close(cd);
1821 msFree(output);
1822 return NULL;
1823 }
1824 iconv_close(cd);
1825 } else {
1826 msSetError(MS_MISCERR, "Encoding not supported by libiconv (%s).",
1827 "msConvertWideStringToUTF8()",
1828 encoding);
1829 msFree(output);
1830 return NULL;
1831 }
1832
1833 } else {
1834 /* we were given a NULL wide string, nothing we can do here */
1835 return NULL;
1836 }
1837
1838 /* NULL-terminate the output string */
1839 output[nBufferSize - nOutSize] = '\0';
1840 return output;
1841 #else
1842 msSetError(MS_MISCERR, "Not implemented since Iconv is not enabled.", "msConvertWideStringToUTF8()");
1843 return NULL;
1844 #endif
1845 }
1846
1847 /*
1848 ** Returns the next glyph in string and advances *in_ptr to the next
1849 ** character.
1850 **
1851 ** If out_string is not NULL then the character (bytes) is copied to this
1852 ** buffer and null-terminated. out_string must be a pre-allocated buffer of
1853 ** at least 11 bytes.
1854 **
1855 ** The function returns the number of bytes in this glyph.
1856 **
1857 ** This function treats 3 types of glyph encodings:
1858 * - as an html entity, for example { , Ư , or é
1859 * - as an utf8 encoded character
1860 * - if utf8 decoding fails, as a raw character
1861 *
1862 ** This function mimics the character decoding function used in gdft.c of
1863 * libGD. It is necessary to have the same behaviour, as input strings must be
1864 * split into the same glyphs as what gd does.
1865 **
1866 ** In UTF-8, the number of leading 1 bits in the first byte specifies the
1867 ** number of bytes in the entire sequence.
1868 ** Source: http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8
1869 **
1870 ** U-00000000 U-0000007F: 0xxxxxxx
1871 ** U-00000080 U-000007FF: 110xxxxx 10xxxxxx
1872 ** U-00000800 U-0000FFFF: 1110xxxx 10xxxxxx 10xxxxxx
1873 ** U-00010000 U-001FFFFF: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
1874 ** U-00200000 U-03FFFFFF: 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
1875 ** U-04000000 U-7FFFFFFF: 1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
1876 */
msGetNextGlyph(const char ** in_ptr,char * out_string)1877 int msGetNextGlyph(const char **in_ptr, char *out_string)
1878 {
1879 unsigned char in;
1880 int numbytes=0;
1881 unsigned int unicode;
1882 int i;
1883
1884 in = (unsigned char)**in_ptr;
1885
1886 if (in == 0)
1887 return -1; /* Empty string */
1888 if((numbytes=msGetUnicodeEntity(*in_ptr,&unicode))>0) {
1889 if(out_string) {
1890 for(i=0; i<numbytes; i++) {
1891 out_string[i]=(*in_ptr)[i];
1892 }
1893 out_string[numbytes]='\0';
1894 }
1895 *in_ptr+=numbytes;
1896 return numbytes;
1897 }
1898 if (in < 0xC0) {
1899 /*
1900 * Handles properly formed UTF-8 characters between
1901 * 0x01 and 0x7F. Also treats \0 and naked trail
1902 * bytes 0x80 to 0xBF as valid characters representing
1903 * themselves.
1904 */
1905 /*goto end of loop to return just the char*/
1906 } else if (in < 0xE0) {
1907 if (((*in_ptr)[1]& 0xC0) == 0x80) {
1908 if(out_string) {
1909 out_string[0]=in;
1910 out_string[1]=(*in_ptr)[1];
1911 out_string[2]='\0';
1912 }
1913 *in_ptr+=2;
1914 return 2; /*110xxxxx 10xxxxxx*/
1915 }
1916 } else if (in < 0xF0) {
1917 if (((*in_ptr)[1]& 0xC0) == 0x80 && ((*in_ptr)[2]& 0xC0) == 0x80) {
1918 if(out_string) {
1919 out_string[0]=in;
1920 *in_ptr+=numbytes;
1921 out_string[1]=(*in_ptr)[1];
1922 out_string[2]=(*in_ptr)[2];
1923 out_string[3]='\0';
1924 }
1925 *in_ptr+=3;
1926 return 3; /* 1110xxxx 10xxxxxx 10xxxxxx */
1927 }
1928 } else if (in < 0xF8) {
1929 if (((*in_ptr)[1]& 0xC0) == 0x80 && ((*in_ptr)[2]& 0xC0) == 0x80
1930 && ((*in_ptr)[3]& 0xC0) == 0x80) {
1931 if(out_string) {
1932 out_string[0]=in;
1933 out_string[1]=(*in_ptr)[1];
1934 out_string[2]=(*in_ptr)[2];
1935 out_string[3]=(*in_ptr)[3];
1936 out_string[4]='\0';
1937 }
1938 *in_ptr+=4;
1939 return 4; /* 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */
1940 }
1941 } else if (in < 0xFC) {
1942 if (((*in_ptr)[1]& 0xC0) == 0x80 && ((*in_ptr)[2]& 0xC0) == 0x80
1943 && ((*in_ptr)[3]& 0xC0) == 0x80 && ((*in_ptr)[4]& 0xC0) == 0x80) {
1944 if(out_string) {
1945 out_string[0]=in;
1946 out_string[1]=(*in_ptr)[1];
1947 out_string[2]=(*in_ptr)[2];
1948 out_string[3]=(*in_ptr)[3];
1949 out_string[4]=(*in_ptr)[4];
1950 out_string[5]='\0';
1951 }
1952 *in_ptr+=5;
1953 return 5; /* 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx */
1954 }
1955 } else if (in < 0xFE) {
1956 if (((*in_ptr)[1]& 0xC0) == 0x80 && ((*in_ptr)[2]& 0xC0) == 0x80
1957 && ((*in_ptr)[3]& 0xC0) == 0x80 && ((*in_ptr)[4]& 0xC0) == 0x80
1958 && ((*in_ptr)[5]& 0xC0) == 0x80) {
1959 if(out_string) {
1960 out_string[0]=in;
1961 out_string[1]=(*in_ptr)[1];
1962 out_string[2]=(*in_ptr)[2];
1963 out_string[3]=(*in_ptr)[3];
1964 out_string[4]=(*in_ptr)[4];
1965 out_string[5]=(*in_ptr)[5];
1966 out_string[6]='\0';
1967 }
1968 *in_ptr+=6;
1969 return 6; /* 1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx */
1970 }
1971 }
1972
1973 if (out_string) {
1974 out_string[0]=in;
1975 out_string[1] = '\0'; /* 0xxxxxxx */
1976 }
1977 (*in_ptr)++;
1978 return 1;
1979 }
1980
1981 /*
1982 ** Returns the number of glyphs in string
1983 */
msGetNumGlyphs(const char * in_ptr)1984 int msGetNumGlyphs(const char *in_ptr)
1985 {
1986 int numchars=0;
1987
1988 while( msGetNextGlyph(&in_ptr, NULL) != -1 )
1989 numchars++;
1990
1991 return numchars;
1992 }
1993
cmp_entities(const void * e1,const void * e2)1994 static int cmp_entities(const void *e1, const void *e2)
1995 {
1996 struct mapentities_s *en1 = (struct mapentities_s *) e1;
1997 struct mapentities_s *en2 = (struct mapentities_s *) e2;
1998 return strcmp(en1->name, en2->name);
1999 }
2000 /*
2001 * this function tests if the string pointed by inptr represents
2002 * an HTML entity, in decimal form ( e.g. Å), in hexadecimal
2003 * form ( e.g. 水 ), or from html 4.0 spec ( e.g. é )
2004 * - returns returns 0 if the string doesn't represent such an entity.
2005 * - if the string does start with such entity,it returns the number of
2006 * bytes occupied by said entity, and stores the unicode value in *unicode
2007 */
msGetUnicodeEntity(const char * inptr,unsigned int * unicode)2008 int msGetUnicodeEntity(const char *inptr, unsigned int *unicode)
2009 {
2010 unsigned char *in = (unsigned char*)inptr;
2011 int l,val=0;
2012 if(*in=='&') {
2013 in++;
2014 if(*in=='#') {
2015 in++;
2016 if(*in=='x'||*in=='X') {
2017 in++;
2018 for(l=3; l<8; l++) {
2019 char byte;
2020 if(*in>='0'&&*in<='9')
2021 byte = *in - '0';
2022 else if(*in>='a'&&*in<='f')
2023 byte = *in - 'a' + 10;
2024 else if(*in>='A'&&*in<='F')
2025 byte = *in - 'A' + 10;
2026 else
2027 break;
2028 in++;
2029 val = (val * 16) + byte;
2030 }
2031 if(*in==';' && l>3 ) {
2032 *unicode=val;
2033 return ++l;
2034 }
2035 } else {
2036 for(l=2; l<8; l++) {
2037 if(*in>='0'&&*in<='9') {
2038 val = val*10+*in-'0';
2039 in++;
2040 } else
2041 break;
2042 }
2043 if(*in==';' && l>2 ) {
2044 *unicode=val;
2045 return ++l;
2046 }
2047 }
2048 } else {
2049 char entity_name_buf[MAP_ENTITY_NAME_LENGTH_MAX+1];
2050 char *p;
2051 struct mapentities_s key, *res;
2052 key.name = p = entity_name_buf;
2053 for (l = 1; l <= MAP_ENTITY_NAME_LENGTH_MAX+1; l++) {
2054 if (*in == '\0') /*end of string before possible entity: return*/
2055 break;
2056 if (*in == ';') { /*possible end of entity: do a lookup*/
2057 *p++ = '\0';
2058 res = bsearch(&key, mapentities, MAP_NR_OF_ENTITIES,
2059 sizeof(mapentities[0]), *cmp_entities);
2060 if (res) {
2061 *unicode = res->value;
2062 return ++l;
2063 }
2064 break; /*the string was of the form of an entity but didn't correspond to an existing one: return*/
2065 }
2066 *p++ = *in;
2067 in++;
2068 }
2069 }
2070 }
2071 return 0;
2072 }
2073
2074 /**
2075 * msStringIsInteger()
2076 *
2077 * determines whether a given string is an integer
2078 *
2079 * @param string the string to be tested
2080 *
2081 * @return MS_SUCCESS or MS_FAILURE
2082 */
2083
msStringIsInteger(const char * string)2084 int msStringIsInteger(const char *string)
2085 {
2086 int length, i;
2087
2088 length = strlen(string);
2089
2090 if (length == 0)
2091 return MS_FAILURE;
2092
2093 for(i=0; i<length; i++) {
2094 if (!isdigit(string[i]))
2095 return MS_FAILURE;
2096 }
2097
2098 return MS_SUCCESS;
2099 }
2100
2101 /************************************************************************/
2102 /* msStrdup() */
2103 /************************************************************************/
2104
2105 /* Safe version of msStrdup(). This function is taken from gdal/cpl. */
2106
msStrdup(const char * pszString)2107 char *msStrdup(const char * pszString)
2108 {
2109 size_t nStringLength;
2110 char *pszReturn;
2111
2112 if (pszString == NULL)
2113 pszString = "";
2114
2115 nStringLength = strlen(pszString) + 1; /* null terminated byte */
2116 pszReturn = malloc(nStringLength);
2117
2118 if (pszReturn == NULL) {
2119 fprintf(stderr, "msSmallMalloc(): Out of memory allocating %ld bytes.\n",
2120 (long)strlen(pszString));
2121 exit(1);
2122 }
2123
2124 memcpy(pszReturn, pszString, nStringLength);
2125
2126 return pszReturn;
2127 }
2128
2129
2130 /************************************************************************/
2131 /* msStringEscape() */
2132 /************************************************************************/
2133
2134 /* Checks if a string contains single or double quotes and escape them.
2135 NOTE: the user must free the returned char* if it is different than the
2136 one passed in */
2137
msStringEscape(const char * pszString)2138 char* msStringEscape( const char * pszString )
2139 {
2140 char *string_tmp, *string_ptr;
2141 int i,ncharstoescape=0;
2142
2143 if (pszString == NULL || strlen(pszString) == 0)
2144 return msStrdup("");
2145
2146 for (i=0; pszString[i]; i++)
2147 ncharstoescape += ((pszString[i] == '\"')||(pszString[i] == '\''));
2148
2149 if(!ncharstoescape) {
2150 return (char*)pszString;
2151 }
2152
2153 string_tmp = (char*)msSmallMalloc(strlen(pszString)+ncharstoescape+1);
2154 for (string_ptr=(char*)pszString,i=0; *string_ptr!='\0'; ++string_ptr,++i) {
2155 if ( (*string_ptr == '\"') || (*string_ptr == '\'') ) {
2156 string_tmp[i] = '\\';
2157 ++i;
2158 }
2159 string_tmp[i] = *string_ptr;
2160 }
2161
2162 string_tmp[i] = '\0';
2163 return string_tmp;
2164 }
2165
2166 /************************************************************************/
2167 /* msStringInArray() */
2168 /************************************************************************/
2169
2170 /* Check if a string is in a array */
msStringInArray(const char * pszString,char ** array,int numelements)2171 int msStringInArray( const char * pszString, char **array, int numelements)
2172 {
2173 int i;
2174 for (i=0; i<numelements; ++i) {
2175 if (strcasecmp(pszString, array[i])==0)
2176 return MS_TRUE;
2177 }
2178 return MS_FALSE;
2179 }
2180
msLayerEncodeShapeAttributes(layerObj * layer,shapeObj * shape)2181 int msLayerEncodeShapeAttributes( layerObj *layer, shapeObj *shape) {
2182
2183 #ifdef USE_ICONV
2184 iconv_t cd = NULL;
2185 const char *inp;
2186 char *outp, *out = NULL;
2187 size_t len, bufsize, bufleft;
2188 int i;
2189
2190 if( !layer->encoding || !*layer->encoding || !strcasecmp(layer->encoding, "UTF-8"))
2191 return MS_SUCCESS;
2192
2193 cd = iconv_open("UTF-8", layer->encoding);
2194 if(cd == (iconv_t)-1) {
2195 msSetError(MS_IDENTERR, "Encoding not supported by libiconv (%s).",
2196 "msGetEncodedString()", layer->encoding);
2197 return MS_FAILURE;
2198 }
2199
2200 for(i=0;i <shape->numvalues; i++) {
2201 int failedIconv = FALSE;
2202 if(!shape->values[i] || (len = strlen(shape->values[i]))==0) {
2203 continue; /* Nothing to do */
2204 }
2205
2206 bufsize = len * 6 + 1; /* Each UTF-8 char can be up to 6 bytes */
2207 inp = shape->values[i];
2208 out = (char*) msSmallMalloc(bufsize);
2209
2210 strlcpy(out, shape->values[i], bufsize);
2211 outp = out;
2212
2213 bufleft = bufsize;
2214
2215 while (len > 0) {
2216 const size_t iconv_status = iconv(cd, (char**)&inp, &len, &outp, &bufleft);
2217 if(iconv_status == (size_t)(-1)) {
2218 failedIconv = TRUE;
2219 break;
2220 }
2221 }
2222 if( failedIconv ) {
2223 msFree(out);
2224 continue; /* silently ignore failed conversions */
2225 }
2226 out[bufsize - bufleft] = '\0';
2227 msFree(shape->values[i]);
2228 shape->values[i] = out;
2229 }
2230 iconv_close(cd);
2231
2232 return MS_SUCCESS;
2233 #else
2234 if( !layer->encoding || !*layer->encoding || !strcasecmp(layer->encoding, "UTF-8"))
2235 return MS_SUCCESS;
2236 msSetError(MS_MISCERR, "Not implemented since Iconv is not enabled.", "msGetEncodedString()");
2237 return MS_FAILURE;
2238 #endif
2239 }
2240
2241 /************************************************************************/
2242 /* msStringBuffer */
2243 /************************************************************************/
2244
2245 struct msStringBuffer
2246 {
2247 size_t alloc_size;
2248 size_t length;
2249 char *str;
2250 };
2251
2252 /************************************************************************/
2253 /* msStringBufferAlloc() */
2254 /************************************************************************/
2255
msStringBufferAlloc(void)2256 msStringBuffer* msStringBufferAlloc(void)
2257 {
2258 return (msStringBuffer*)msSmallCalloc(sizeof(msStringBuffer), 1);
2259 }
2260
2261 /************************************************************************/
2262 /* msStringBufferFree() */
2263 /************************************************************************/
2264
msStringBufferFree(msStringBuffer * sb)2265 void msStringBufferFree(msStringBuffer* sb)
2266 {
2267 if( sb )
2268 msFree(sb->str);
2269 msFree(sb);
2270 }
2271
2272 /************************************************************************/
2273 /* msStringBufferGetString() */
2274 /************************************************************************/
2275
msStringBufferGetString(msStringBuffer * sb)2276 const char* msStringBufferGetString(msStringBuffer* sb)
2277 {
2278 return sb->str;
2279 }
2280
2281 /************************************************************************/
2282 /* msStringBufferReleaseStringAndFree() */
2283 /************************************************************************/
2284
msStringBufferReleaseStringAndFree(msStringBuffer * sb)2285 char* msStringBufferReleaseStringAndFree(msStringBuffer* sb)
2286 {
2287 char* str = sb->str;
2288 sb->str = NULL;
2289 sb->alloc_size = 0;
2290 sb->length = 0;
2291 msStringBufferFree(sb);
2292 return str;
2293 }
2294
2295 /************************************************************************/
2296 /* msStringBufferAppend() */
2297 /************************************************************************/
2298
msStringBufferAppend(msStringBuffer * sb,const char * pszAppendedString)2299 int msStringBufferAppend(msStringBuffer* sb, const char* pszAppendedString)
2300 {
2301 size_t nAppendLen = strlen(pszAppendedString);
2302 if( sb->length + nAppendLen >= sb->alloc_size )
2303 {
2304 size_t newAllocSize1 = sb->alloc_size + sb->alloc_size / 3;
2305 size_t newAllocSize2 = sb->length + nAppendLen + 1;
2306 size_t newAllocSize = MAX(newAllocSize1, newAllocSize2);
2307 void* newStr = realloc(sb->str, newAllocSize);
2308 if( newStr == NULL ) {
2309 msSetError(MS_MEMERR, "Not enough memory", "msStringBufferAppend()");
2310 return MS_FAILURE;
2311 }
2312 sb->alloc_size = newAllocSize;
2313 sb->str = (char*) newStr;
2314 }
2315 memcpy(sb->str + sb->length, pszAppendedString, nAppendLen + 1);
2316 sb->length += nAppendLen;
2317 return MS_SUCCESS;
2318 }
2319