1 //C- -*- C++ -*-
2 //C- -------------------------------------------------------------------
3 //C- DjVuLibre-3.5
4 //C- Copyright (c) 2002 Leon Bottou and Yann Le Cun.
5 //C- Copyright (c) 2001 AT&T
6 //C-
7 //C- This software is subject to, and may be distributed under, the
8 //C- GNU General Public License, either Version 2 of the license,
9 //C- or (at your option) any later version. The license should have
10 //C- accompanied the software or you may obtain a copy of the license
11 //C- from the Free Software Foundation at http://www.fsf.org .
12 //C-
13 //C- This program is distributed in the hope that it will be useful,
14 //C- but WITHOUT ANY WARRANTY; without even the implied warranty of
15 //C- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 //C- GNU General Public License for more details.
17 //C-
18 //C- DjVuLibre-3.5 is derived from the DjVu(r) Reference Library from
19 //C- Lizardtech Software. Lizardtech Software has authorized us to
20 //C- replace the original DjVu(r) Reference Library notice by the following
21 //C- text (see doc/lizard2002.djvu and doc/lizardtech2007.djvu):
22 //C-
23 //C- ------------------------------------------------------------------
24 //C- | DjVu (r) Reference Library (v. 3.5)
25 //C- | Copyright (c) 1999-2001 LizardTech, Inc. All Rights Reserved.
26 //C- | The DjVu Reference Library is protected by U.S. Pat. No.
27 //C- | 6,058,214 and patents pending.
28 //C- |
29 //C- | This software is subject to, and may be distributed under, the
30 //C- | GNU General Public License, either Version 2 of the license,
31 //C- | or (at your option) any later version. The license should have
32 //C- | accompanied the software or you may obtain a copy of the license
33 //C- | from the Free Software Foundation at http://www.fsf.org .
34 //C- |
35 //C- | The computer code originally released by LizardTech under this
36 //C- | license and unmodified by other parties is deemed "the LIZARDTECH
37 //C- | ORIGINAL CODE." Subject to any third party intellectual property
38 //C- | claims, LizardTech grants recipient a worldwide, royalty-free,
39 //C- | non-exclusive license to make, use, sell, or otherwise dispose of
40 //C- | the LIZARDTECH ORIGINAL CODE or of programs derived from the
41 //C- | LIZARDTECH ORIGINAL CODE in compliance with the terms of the GNU
42 //C- | General Public License. This grant only confers the right to
43 //C- | infringe patent claims underlying the LIZARDTECH ORIGINAL CODE to
44 //C- | the extent such infringement is reasonably necessary to enable
45 //C- | recipient to make, have made, practice, sell, or otherwise dispose
46 //C- | of the LIZARDTECH ORIGINAL CODE (or portions thereof) and not to
47 //C- | any greater extent that may be necessary to utilize further
48 //C- | modifications or combinations.
49 //C- |
50 //C- | The LIZARDTECH ORIGINAL CODE is provided "AS IS" WITHOUT WARRANTY
51 //C- | OF ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
52 //C- | TO ANY WARRANTY OF NON-INFRINGEMENT, OR ANY IMPLIED WARRANTY OF
53 //C- | MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE.
54 //C- +------------------------------------------------------------------
55
56 // From: Leon Bottou, 1/31/2002
57 // This file has very little to do with my initial implementation.
58 // It has been practically rewritten by Lizardtech for i18n changes.
59 // My original implementation was very small in comparison
60 // <http://prdownloads.sourceforge.net/djvu/DjVu2_2b-src.tgz>.
61 // In my opinion, the duplication of the string classes is a failed
62 // attempt to use the type system to enforce coding policies.
63 // This could be fixed. But there are better things to do in djvulibre.
64
65 #ifdef HAVE_CONFIG_H
66 # include "config.h"
67 #endif
68 #if NEED_GNUG_PRAGMAS
69 # pragma implementation
70 #endif
71
72 #include "GString.h"
73 #include "GThreads.h"
74 #include "debug.h"
75
76 #include <stddef.h>
77 #include <stdlib.h>
78 #include <stdio.h>
79 #include <string.h>
80 #if HAS_WCHAR
81 # include <locale.h>
82 # if !defined(AUTOCONF) || HAVE_WCHAR_H
83 # include <wchar.h>
84 # endif
85 # if HAS_WCTYPE
86 # include <wctype.h>
87 # endif
88 #endif
89 #include <ctype.h>
90
91 #ifndef LC_NUMERIC //MingW
92 # undef DO_CHANGELOCALE
93 # define LC_NUMERIC 0
94 #endif
95 #ifndef DO_CHANGELOCALE
96 # define DO_CHANGELOCALE 0
97 #endif
98
99
100 #ifdef HAVE_NAMESPACES
101 namespace DJVU {
102 # ifdef NOT_DEFINED // Just to fool emacs c++ mode
103 }
104 #endif
105 #endif
106
107
~GBaseString()108 GBaseString::~GBaseString() {}
~GNativeString()109 GNativeString::~GNativeString() {}
~GUTF8String()110 GUTF8String::~GUTF8String() {}
111
112 #if !HAS_MBSTATE && HAS_WCHAR
113 // Under some systems, wctomb() and mbtowc() are not thread
114 // safe. In those cases, wcrtomb and mbrtowc are preferred.
115 // For Solaris, wctomb() and mbtowc() are thread safe, and
116 // wcrtomb() and mbrtowc() don't exist.
117
118 #define wcrtomb MYwcrtomb
119 #define mbrtowc MYmbrtowc
120 #define mbrlen MYmbrlen
121
122 static inline int
wcrtomb(char * bytes,wchar_t w,mbstate_t *)123 wcrtomb(char *bytes,wchar_t w,mbstate_t *)
124 {
125 return wctomb(bytes,w);
126 }
127
128 static inline int
mbrtowc(wchar_t * w,const char * source,size_t n,mbstate_t *)129 mbrtowc(wchar_t *w,const char *source, size_t n, mbstate_t *)
130 {
131 return mbtowc(w,source,n);
132 }
133
134 static inline size_t
mbrlen(const char * s,size_t n,mbstate_t *)135 mbrlen(const char *s, size_t n, mbstate_t *)
136 {
137 return mblen(s,n);
138 }
139 #endif // !HAS_MBSTATE || HAS_WCHAR
140
141
142 GP<GStringRep>
upcase(void) const143 GStringRep::upcase(void) const
144 { return tocase(giswupper,gtowupper); }
145
146 GP<GStringRep>
downcase(void) const147 GStringRep::downcase(void) const
148 { return tocase(giswlower,gtowlower); }
149
150 GP<GStringRep>
create(const unsigned int sz)151 GStringRep::UTF8::create(const unsigned int sz)
152 {
153 return GStringRep::create(sz,(GStringRep::UTF8 *)0);
154 }
155
156 GP<GStringRep>
create(const char * s)157 GStringRep::UTF8::create(const char *s)
158 {
159 GStringRep::UTF8 dummy;
160 return dummy.strdup(s);
161 }
162
163 GP<GStringRep>
create(const GP<GStringRep> & s1,const GP<GStringRep> & s2)164 GStringRep::UTF8::create(const GP<GStringRep> &s1,const GP<GStringRep> &s2)
165 {
166 GStringRep::UTF8 dummy;
167 return dummy.concat(s1,s2);
168 }
169
170 GP<GStringRep>
create(const GP<GStringRep> & s1,const char * s2)171 GStringRep::UTF8::create( const GP<GStringRep> &s1,const char *s2)
172 {
173 GStringRep::UTF8 dummy;
174 return dummy.concat(s1,s2);
175 }
176
177 GP<GStringRep>
create(const char * s1,const GP<GStringRep> & s2)178 GStringRep::UTF8::create( const char *s1, const GP<GStringRep> &s2)
179 {
180 GStringRep::UTF8 dummy;
181 return dummy.concat(s1,s2);
182 }
183
184 GP<GStringRep>
create(const char * s1,const char * s2)185 GStringRep::UTF8::create( const char *s1,const char *s2)
186 {
187 GStringRep::UTF8 dummy;
188 return dummy.concat(s1,s2);
189 }
190
191 GP<GStringRep>
create(const char * s,const int start,const int length)192 GStringRep::UTF8::create(const char *s,const int start,const int length)
193 {
194 GStringRep::UTF8 dummy;
195 return dummy.substr(s,start,length);
196 }
197
198 GP<GStringRep>
create(const uint16_t * s,const int start,const int length)199 GStringRep::UTF8::create(
200 const uint16_t *s,const int start,const int length)
201 {
202 GStringRep::UTF8 dummy;
203 return dummy.substr(s,start,length);
204 }
205
206 GP<GStringRep>
create(const uint32_t * s,const int start,const int length)207 GStringRep::UTF8::create(
208 const uint32_t *s,const int start,const int length)
209 {
210 GStringRep::UTF8 dummy;
211 return dummy.substr(s,start,length);
212 }
213
214 GP<GStringRep>
blank(const unsigned int sz) const215 GStringRep::UTF8::blank(const unsigned int sz) const
216 {
217 return GStringRep::create(sz,(GStringRep::UTF8 *)0);
218 }
219
220 bool
isUTF8(void) const221 GStringRep::UTF8::isUTF8(void) const
222 {
223 return true;
224 }
225
226 GP<GStringRep>
toThis(const GP<GStringRep> & rep,const GP<GStringRep> &) const227 GStringRep::UTF8::toThis(
228 const GP<GStringRep> &rep,const GP<GStringRep> &) const
229 {
230 return rep?(rep->toUTF8(true)):rep;
231 }
232
233 GP<GStringRep>
create(const char fmt[],va_list & args)234 GStringRep::UTF8::create(const char fmt[],va_list& args)
235 {
236 const GP<GStringRep> s(create(fmt));
237 return (s?(s->vformat(args)):s);
238 }
239
240 #if !HAS_WCHAR
241
242 #define NATIVE_CREATE(x) UTF8::create( x );
243
244 #ifdef LC_ALL
245 #undef LC_ALL
246 #endif
247 #define LC_ALL 0
248
249 class GStringRep::ChangeLocale
250 {
251 public:
ChangeLocale(const int,const char *)252 ChangeLocale(const int,const char *) {}
~ChangeLocale()253 ~ChangeLocale() {};
254 };
255
256 GP<GStringRep>
NativeToUTF8(const char * s)257 GStringRep::NativeToUTF8( const char *s )
258 {
259 return GStringRep::UTF8::create(s);
260 }
261
262 #else
263
264 #define NATIVE_CREATE(x) Native::create( x );
265
266 // The declaration and implementation of GStringRep::ChangeLocale
267 // Not used in WinCE
268
269 class GStringRep::ChangeLocale
270 {
271 public:
272 ChangeLocale(const int category,const char locale[]);
273 ~ChangeLocale();
274 private:
275 GUTF8String locale;
276 #if DO_CHANGELOCALE
277 int category;
278 #endif
279 };
280
281 class GStringRep::Native : public GStringRep
282 {
283 public:
284 // default constructor
285 Native(void);
286 // virtual destructor
287 virtual ~Native();
288
289 // Other virtual methods.
290 // Create an empty string.
291 virtual GP<GStringRep> blank(const unsigned int sz = 0) const;
292 // Append a string.
293 virtual GP<GStringRep> append(const GP<GStringRep> &s2) const;
294 // Test if Native.
295 virtual bool isNative(void) const;
296 // Convert to Native.
297 virtual GP<GStringRep> toNative(
298 const EscapeMode escape=UNKNOWN_ESCAPED) const;
299 // Convert to UTF8.
300 virtual GP<GStringRep> toUTF8(const bool nothrow=false) const;
301 // Convert to UTF8.
302 virtual GP<GStringRep> toThis(
303 const GP<GStringRep> &rep,const GP<GStringRep> &) const;
304 // Compare with #s2#.
305 virtual int cmp(const GP<GStringRep> &s2, const int len=(-1)) const;
306
307 // Convert strings to numbers.
308 virtual int toInt(void) const;
309 virtual long toLong(
310 const int pos, int &endpos, const int base=10) const;
311 virtual unsigned long toULong(
312 const int pos, int &endpos, const int base=10) const;
313 virtual double toDouble(
314 const int pos, int &endpos) const;
315
316 // Create an empty string
317 static GP<GStringRep> create(const unsigned int sz = 0);
318
319 // Create a strdup string.
320 static GP<GStringRep> create(const char *s);
321
322 // Creates by appending to the current string
323
324 // Creates with a concat operation.
325 static GP<GStringRep> create(
326 const GP<GStringRep> &s1,const GP<GStringRep> &s2);
327 static GP<GStringRep> create( const GP<GStringRep> &s1,const char *s2);
328 static GP<GStringRep> create( const char *s1, const GP<GStringRep> &s2);
329 static GP<GStringRep> create(const char *s1,const char *s2);
330
331 // Create with a strdup and substr operation.
332 static GP<GStringRep> create(
333 const char *s,const int start,const int length=(-1));
334 static GP<GStringRep> create(
335 const uint16_t *s,const int start,const int length=(-1));
336 static GP<GStringRep> create(
337 const uint32_t *s,const int start,const int length=(-1));
338
339 // Create with an sprintf()
340 static GP<GStringRep> create_format(const char fmt[],...);
341 static GP<GStringRep> create(const char fmt[],va_list &args);
342
343 virtual unsigned char *UCS4toString(
344 const uint32_t w,unsigned char *ptr, mbstate_t *ps=0) const;
345
346 // Tests if a string is legally encoded in the current character set.
347 virtual bool is_valid(void) const;
348
349 virtual int ncopy(wchar_t * const buf, const int buflen) const;
350
351 friend class GBaseString;
352 protected:
353 // Return the next character and increment the source pointer.
354 virtual uint32_t getValidUCS4(const char *&source) const;
355 };
356
357 GP<GStringRep>
create(const unsigned int sz)358 GStringRep::Native::create(const unsigned int sz)
359 {
360 return GStringRep::create(sz,(GStringRep::Native *)0);
361 }
362
363 // Create a strdup string.
364 GP<GStringRep>
create(const char * s)365 GStringRep::Native::create(const char *s)
366 {
367 GStringRep::Native dummy;
368 return dummy.strdup(s);
369 }
370
371 GP<GStringRep>
create(const GP<GStringRep> & s1,const GP<GStringRep> & s2)372 GStringRep::Native::create(const GP<GStringRep> &s1,const GP<GStringRep> &s2)
373 {
374 GStringRep::Native dummy;
375 return dummy.concat(s1,s2);
376 }
377
378 GP<GStringRep>
create(const GP<GStringRep> & s1,const char * s2)379 GStringRep::Native::create( const GP<GStringRep> &s1,const char *s2)
380 {
381 GStringRep::Native dummy;
382 return dummy.concat(s1,s2);
383 }
384
385 GP<GStringRep>
create(const char * s1,const GP<GStringRep> & s2)386 GStringRep::Native::create( const char *s1, const GP<GStringRep> &s2)
387 {
388 GStringRep::Native dummy;
389 return dummy.concat(s1,s2);
390 }
391
392 GP<GStringRep>
create(const char * s1,const char * s2)393 GStringRep::Native::create(const char *s1,const char *s2)
394 {
395 GStringRep::Native dummy;
396 return dummy.concat(s1,s2);
397 }
398
399 GP<GStringRep>
create(const char * s,const int start,const int length)400 GStringRep::Native::create(
401 const char *s,const int start,const int length)
402 {
403 GStringRep::Native dummy;
404 return dummy.substr(s,start,length);
405 }
406
407 GP<GStringRep>
create(const uint16_t * s,const int start,const int length)408 GStringRep::Native::create(
409 const uint16_t *s,const int start,const int length)
410 {
411 GStringRep::Native dummy;
412 return dummy.substr(s,start,length);
413 }
414
415 GP<GStringRep>
create(const uint32_t * s,const int start,const int length)416 GStringRep::Native::create(
417 const uint32_t *s,const int start,const int length)
418 {
419 GStringRep::Native dummy;
420 return dummy.substr(s,start,length);
421 }
422
423 GP<GStringRep>
blank(const unsigned int sz) const424 GStringRep::Native::blank(const unsigned int sz) const
425 {
426 return GStringRep::create(sz,(GStringRep::Native *)0);
427 }
428
429 bool
isNative(void) const430 GStringRep::Native::isNative(void) const
431 {
432 return true;
433 }
434
435 GP<GStringRep>
toThis(const GP<GStringRep> & rep,const GP<GStringRep> &) const436 GStringRep::Native::toThis(
437 const GP<GStringRep> &rep,const GP<GStringRep> &) const
438 {
439 return rep?(rep->toNative(NOT_ESCAPED)):rep;
440 }
441
442 GP<GStringRep>
create(const char fmt[],va_list & args)443 GStringRep::Native::create(const char fmt[],va_list &args)
444 {
445 const GP<GStringRep> s(create(fmt));
446 return (s?(s->vformat(args)):s);
447 }
448
449 int
ncopy(wchar_t * const buf,const int buflen) const450 GStringRep::Native::ncopy(
451 wchar_t * const buf, const int buflen ) const
452 {
453 return toUTF8()->ncopy(buf,buflen);
454 }
455
ChangeLocale(const int xcategory,const char xlocale[])456 GStringRep::ChangeLocale::ChangeLocale(const int xcategory, const char xlocale[] )
457 #if DO_CHANGELOCALE
458 : category(xcategory)
459 #endif
460 {
461 #if DO_CHANGELOCALE
462 // This is disabled under UNIX because
463 // it does not play nice with MT.
464 if(xlocale)
465 {
466 locale=setlocale(xcategory,0);
467 if(locale.length() &&(locale!=xlocale))
468 {
469 if(locale == setlocale(category,xlocale))
470 {
471 locale.empty();
472 }
473 }
474 else
475 {
476 locale.empty();
477 }
478 }
479 #endif
480 }
481
~ChangeLocale()482 GStringRep::ChangeLocale::~ChangeLocale()
483 {
484 #if DO_CHANGELOCALE
485 if(locale.length())
486 {
487 setlocale(category,(const char *)locale);
488 }
489 #endif
490 }
491
492 GNativeString &
format(const char fmt[],...)493 GNativeString::format(const char fmt[], ... )
494 {
495 va_list args;
496 va_start(args, fmt);
497 return init(GStringRep::Native::create(fmt,args));
498 }
499
500 // Gather the native implementations here. Not used in WinCE.
501
Native(void)502 GStringRep::Native::Native(void) {}
~Native()503 GStringRep::Native::~Native() {}
504
505 GP<GStringRep>
append(const GP<GStringRep> & s2) const506 GStringRep::Native::append(const GP<GStringRep> &s2) const
507 {
508 GP<GStringRep> retval;
509 if(s2)
510 {
511 if(s2->isUTF8())
512 {
513 G_THROW( ERR_MSG("GStringRep.appendUTF8toNative") );
514 }
515 retval=concat(data,s2->data);
516 }else
517 {
518 retval=const_cast<GStringRep::Native *>(this);
519 }
520 return retval;
521 }
522
523 GP<GStringRep>
create_format(const char fmt[],...)524 GStringRep::Native::create_format(const char fmt[],...)
525 {
526 va_list args;
527 va_start(args, fmt);
528 return create(fmt,args);
529 }
530
531 unsigned char *
UCS4toString(const uint32_t w0,unsigned char * ptr,mbstate_t * ps) const532 GStringRep::Native::UCS4toString(
533 const uint32_t w0,unsigned char *ptr, mbstate_t *ps) const
534 {
535 return UCS4toNative(w0,ptr,ps);
536 }
537
538 // Convert a UCS4 to a multibyte string in the value bytes.
539 // The data pointed to by ptr should be long enough to contain
540 // the results with a nill termination. (Normally 7 characters
541 // is enough.)
542 unsigned char *
UCS4toNative(const uint32_t w0,unsigned char * ptr,mbstate_t * ps)543 GStringRep::UCS4toNative(const uint32_t w0,unsigned char *ptr, mbstate_t *ps)
544 {
545 uint16_t w1;
546 uint16_t w2=1;
547 for(int count=(sizeof(wchar_t)==sizeof(w1))
548 ? UCS4toUTF16(w0,w1,w2) : 1;
549 count;
550 --count,w1=w2)
551 {
552 // wchar_t can be either UCS4 or UCS2
553 const wchar_t w=(sizeof(wchar_t) == sizeof(w1))?(wchar_t)w1:(wchar_t)w0;
554 int i=wcrtomb((char *)ptr,w,ps);
555 if(i<0)
556 break;
557 ptr[i]=0;
558 ptr += i;
559 }
560 ptr[0]=0;
561 return ptr;
562 }
563
564 GP<GStringRep>
toNative(const EscapeMode escape) const565 GStringRep::Native::toNative(const EscapeMode escape) const
566 {
567 if(escape == UNKNOWN_ESCAPED)
568 G_THROW( ERR_MSG("GStringRep.NativeToNative") );
569 return const_cast<GStringRep::Native *>(this);
570 }
571
572 GP<GStringRep>
toUTF8(const bool) const573 GStringRep::Native::toUTF8(const bool) const
574 {
575 unsigned char *buf;
576 GPBuffer<unsigned char> gbuf(buf,size*6+1);
577 buf[0]=0;
578 if(data && size)
579 {
580 size_t n=size;
581 const char *source=data;
582 mbstate_t ps;
583 unsigned char *ptr=buf;
584 //(void)mbrlen(source, n, &ps);
585 memset(&ps,0,sizeof(mbstate_t));
586 int i=0;
587 if(sizeof(wchar_t) == sizeof(uint32_t))
588 {
589 wchar_t w = 0;
590 for(;(n>0)&&((i=mbrtowc(&w,source,n,&ps))>=0); n-=i,source+=i)
591 {
592 ptr=UCS4toUTF8((uint32_t)w,ptr);
593 }
594 }
595 else
596 {
597 wchar_t w = 0;
598 for(;(n>0)&&((i=mbrtowc(&w,source,n,&ps))>=0);n-=i,source+=i)
599 {
600 uint16_t s[2];
601 s[0]=w;
602 uint32_t w0;
603 if(UTF16toUCS4(w0,s,s+1)<=0)
604 {
605 source+=i;
606 n-=i;
607 if((n>0)&&((i=mbrtowc(&w,source,n,&ps))>=0))
608 {
609 s[1]=w;
610 if(UTF16toUCS4(w0,s,s+2)<=0)
611 {
612 i=(-1);
613 break;
614 }
615 }
616 else
617 {
618 i=(-1);
619 break;
620 }
621 }
622 ptr=UCS4toUTF8(w0,ptr);
623 }
624 }
625 if(i<0)
626 {
627 gbuf.resize(0);
628 }
629 else
630 {
631 ptr[0]=0;
632 }
633 }
634 return GStringRep::UTF8::create((const char *)buf);
635 }
636
637 GNativeString
UTF8ToNative(const bool currentlocale,const EscapeMode escape) const638 GBaseString::UTF8ToNative(
639 const bool currentlocale,const EscapeMode escape) const
640 {
641 const char *source=(*this);
642 GP<GStringRep> retval;
643 if(source && source[0])
644 {
645 #if DO_CHANGELOCALE
646 GUTF8String lc_ctype(setlocale(LC_CTYPE,0));
647 bool repeat;
648 for(repeat=!currentlocale;;repeat=false)
649 {
650 #endif
651 retval=(*this)->toNative((GStringRep::EscapeMode)escape);
652 #if DO_CHANGELOCALE
653 if (!repeat || retval || (lc_ctype == setlocale(LC_CTYPE,"")))
654 break;
655 }
656 if(!repeat)
657 setlocale(LC_CTYPE,(const char *)lc_ctype);
658 #endif
659 }
660 return GNativeString(retval);
661 }
662
663 /*MBCS*/
664 GNativeString
getUTF82Native(EscapeMode escape) const665 GBaseString::getUTF82Native( EscapeMode escape ) const
666 { //MBCS cvt
667 GNativeString retval;
668
669 // We don't want to convert this if it
670 // already is known to be native...
671 // if (isNative()) return *this;
672
673 const size_t slen=length()+1;
674 if(slen>1)
675 {
676 retval=UTF8ToNative(false,escape) ;
677 if(!retval.length())
678 {
679 retval=(const char*)*this;
680 }
681 }
682 return retval;
683 }
684
685 GUTF8String
NativeToUTF8(void) const686 GBaseString::NativeToUTF8(void) const
687 {
688 GP<GStringRep> retval;
689 if(length())
690 {
691 const char *source=(*this);
692 #if DO_CHANGELOCALE
693 GUTF8String lc_ctype=setlocale(LC_CTYPE,0);
694 bool repeat;
695 for(repeat=true;;repeat=false)
696 {
697 #endif
698 if( (retval=GStringRep::NativeToUTF8(source)) )
699 if(GStringRep::cmp(retval->toNative(),source))
700 retval=GStringRep::UTF8::create((unsigned int)0);
701 #if DO_CHANGELOCALE
702 if(!repeat || retval || (lc_ctype == setlocale(LC_CTYPE,"")))
703 break;
704 }
705 if(!repeat)
706 setlocale(LC_CTYPE,(const char *)lc_ctype);
707 #endif
708 }
709 return GUTF8String(retval);
710 }
711
712 GUTF8String
getNative2UTF8(void) const713 GBaseString::getNative2UTF8(void) const
714 { //MBCS cvt
715
716 // We don't want to do a transform this
717 // if we already are in the given type.
718 // if (isUTF8()) return *this;
719
720 const size_t slen=length()+1;
721 GUTF8String retval;
722 if(slen > 1)
723 {
724 retval=NativeToUTF8();
725 if(!retval.length())
726 {
727 retval=(const char *)(*this);
728 }
729 }
730 return retval;
731 } /*MBCS*/
732
733 int
cmp(const GP<GStringRep> & s2,const int len) const734 GStringRep::Native::cmp(const GP<GStringRep> &s2,const int len) const
735 {
736 int retval;
737 if(s2)
738 {
739 if(s2->isUTF8())
740 {
741 const GP<GStringRep> r(toUTF8(true));
742 if(r)
743 {
744 retval=GStringRep::cmp(r->data,s2->data,len);
745 }else
746 {
747 retval=cmp(s2->toNative(NOT_ESCAPED),len);
748 }
749 }else
750 {
751 retval=GStringRep::cmp(data,s2->data,len);
752 }
753 }else
754 {
755 retval=GStringRep::cmp(data,0,len);
756 }
757 return retval;
758 }
759
760 int
toInt() const761 GStringRep::Native::toInt() const
762 {
763 return atoi(data);
764 }
765
766 long
toLong(const int pos,int & endpos,const int base) const767 GStringRep::Native::toLong(
768 const int pos, int &endpos, const int base) const
769 {
770 char *edata=0;
771 const long retval=strtol(data+pos, &edata, base);
772 if(edata)
773 {
774 endpos=(int)((size_t)edata-(size_t)data);
775 }else
776 {
777 endpos=(-1);
778 }
779 return retval;
780 }
781
782 unsigned long
toULong(const int pos,int & endpos,const int base) const783 GStringRep::Native::toULong(
784 const int pos, int &endpos, const int base) const
785 {
786 char *edata=0;
787 const unsigned long retval=strtoul(data+pos, &edata, base);
788 if(edata)
789 {
790 endpos=(int)((size_t)edata-(size_t)data);
791 }else
792 {
793 endpos=(-1);
794 }
795 return retval;
796 }
797
798 double
toDouble(const int pos,int & endpos) const799 GStringRep::Native::toDouble(
800 const int pos, int &endpos) const
801 {
802 char *edata=0;
803 const double retval=strtod(data+pos, &edata);
804 if(edata)
805 {
806 endpos=(int)((size_t)edata-(size_t)data);
807 }else
808 {
809 endpos=(-1);
810 }
811 return retval;
812 }
813
814 uint32_t
getValidUCS4(const char * & source) const815 GStringRep::Native::getValidUCS4(const char *&source) const
816 {
817 uint32_t retval=0;
818 int n=(int)((size_t)size+(size_t)data-(size_t)source);
819 if(source && (n > 0))
820 {
821 mbstate_t ps;
822 //(void)mbrlen(source, n, &ps);
823 memset(&ps,0,sizeof(mbstate_t));
824 wchar_t wt;
825 const int len=mbrtowc(&wt,source,n,&ps);
826 if(len>=0)
827 {
828 if(sizeof(wchar_t) == sizeof(uint16_t))
829 {
830 source+=len;
831 uint16_t s[2];
832 s[0]=(uint16_t)wt;
833 if(UTF16toUCS4(retval,s,s+1)<=0)
834 {
835 if((n-=len)>0)
836 {
837 const int len=mbrtowc(&wt,source,n,&ps);
838 if(len>=0)
839 {
840 s[1]=(uint16_t)wt;
841 uint32_t w;
842 if(UTF16toUCS4(w,s,s+2)>0)
843 {
844 source+=len;
845 retval=w;
846 }
847 }
848 }
849 }
850 }else
851 {
852 retval=(uint32_t)wt;
853 source++;
854 }
855 }else
856 {
857 source++;
858 }
859 }
860 return retval;
861 }
862
863 // Tests if a string is legally encoded in the current character set.
864 bool
is_valid(void) const865 GStringRep::Native::is_valid(void) const
866 {
867 bool retval=true;
868 if(data && size)
869 {
870 size_t n=size;
871 const char *s=data;
872 mbstate_t ps;
873 //(void)mbrlen(s, n, &ps);
874 memset(&ps,0,sizeof(mbstate_t));
875 do
876 {
877 size_t m=mbrlen(s,n,&ps);
878 if(m > n)
879 {
880 retval=false;
881 break;
882 }else if(m)
883 {
884 s+=m;
885 n-=m;
886 }else
887 {
888 break;
889 }
890 } while(n);
891 }
892 return retval;
893 }
894
895 // These are dummy functions.
896 void
set_remainder(void const * const,const unsigned int,const EncodeType)897 GStringRep::set_remainder(void const * const, const unsigned int,
898 const EncodeType) {}
899 void
set_remainder(void const * const,const unsigned int,const GP<GStringRep> & encoding)900 GStringRep::set_remainder(void const * const, const unsigned int,
901 const GP<GStringRep> &encoding) {}
902 void
set_remainder(const GP<GStringRep::Unicode> &)903 GStringRep::set_remainder( const GP<GStringRep::Unicode> &) {}
904
905 GP<GStringRep::Unicode>
get_remainder(void) const906 GStringRep::get_remainder( void ) const
907 {
908 return 0;
909 }
910
GNativeString(const char dat)911 GNativeString::GNativeString(const char dat)
912 {
913 init(GStringRep::Native::create(&dat,0,1));
914 }
915
GNativeString(const char * str)916 GNativeString::GNativeString(const char *str)
917 {
918 init(GStringRep::Native::create(str));
919 }
920
GNativeString(const unsigned char * str)921 GNativeString::GNativeString(const unsigned char *str)
922 {
923 init(GStringRep::Native::create((const char *)str));
924 }
925
GNativeString(const uint16_t * str)926 GNativeString::GNativeString(const uint16_t *str)
927 {
928 init(GStringRep::Native::create(str,0,-1));
929 }
930
GNativeString(const uint32_t * str)931 GNativeString::GNativeString(const uint32_t *str)
932 {
933 init(GStringRep::Native::create(str,0,-1));
934 }
935
GNativeString(const char * dat,unsigned int len)936 GNativeString::GNativeString(const char *dat, unsigned int len)
937 {
938 init(
939 GStringRep::Native::create(dat,0,((int)len<0)?(-1):(int)len));
940 }
941
GNativeString(const uint16_t * dat,unsigned int len)942 GNativeString::GNativeString(const uint16_t *dat, unsigned int len)
943 {
944 init(
945 GStringRep::Native::create(dat,0,((int)len<0)?(-1):(int)len));
946 }
947
GNativeString(const uint32_t * dat,unsigned int len)948 GNativeString::GNativeString(const uint32_t *dat, unsigned int len)
949 {
950 init(
951 GStringRep::Native::create(dat,0,((int)len<0)?(-1):(int)len));
952 }
953
GNativeString(const GNativeString & str)954 GNativeString::GNativeString(const GNativeString &str)
955 {
956 init(str);
957 }
958
GNativeString(const GBaseString & gs,int from,int len)959 GNativeString::GNativeString(const GBaseString &gs, int from, int len)
960 {
961 init(
962 GStringRep::Native::create(gs,from,((int)len<0)?(-1):(int)len));
963 }
964
GNativeString(const int number)965 GNativeString::GNativeString(const int number)
966 {
967 init(GStringRep::Native::create_format("%d",number));
968 }
969
GNativeString(const double number)970 GNativeString::GNativeString(const double number)
971 {
972 init(GStringRep::Native::create_format("%f",number));
973 }
974
975 GNativeString&
operator =(const char str)976 GNativeString::operator= (const char str)
977 { return init(GStringRep::Native::create(&str,0,1)); }
978
979 GNativeString&
operator =(const char * str)980 GNativeString::operator= (const char *str)
981 { return init(GStringRep::Native::create(str)); }
982
983 GNativeString
operator +(const GNativeString & s2) const984 GBaseString::operator+(const GNativeString &s2) const
985 {
986 return GStringRep::Native::create(*this,s2);
987 }
988
989 GP<GStringRep>
NativeToUTF8(const char * s)990 GStringRep::NativeToUTF8( const char *s )
991 {
992 return GStringRep::Native::create(s)->toUTF8();
993 }
994
995 #endif // HAS_WCHAR
996
997 template <class TYPE>
998 GP<GStringRep>
create(const unsigned int sz,TYPE *)999 GStringRep::create(const unsigned int sz, TYPE *)
1000 {
1001 GP<GStringRep> gaddr;
1002 if (sz > 0)
1003 {
1004 GStringRep *addr;
1005 gaddr=(addr=new TYPE);
1006 addr->data=(char *)(::operator new(sz+1));
1007 addr->size = sz;
1008 addr->data[sz] = 0;
1009 }
1010 return gaddr;
1011 }
1012
1013 GP<GStringRep>
strdup(const char * s) const1014 GStringRep::strdup(const char *s) const
1015 {
1016 GP<GStringRep> retval;
1017 const int length=s?strlen(s):0;
1018 if(length>0)
1019 {
1020 retval=blank(length);
1021 char const * const end=s+length;
1022 char *ptr=retval->data;
1023 for(;*s&&(s!=end);ptr++)
1024 {
1025 ptr[0]=s++[0];
1026 }
1027 ptr[0]=0;
1028 }
1029 return retval;
1030 }
1031
1032 GP<GStringRep>
substr(const char * s,const int start,const int len) const1033 GStringRep::substr(const char *s,const int start,const int len) const
1034 {
1035 GP<GStringRep> retval;
1036 if(s && s[0])
1037 {
1038 const unsigned int length=(start<0 || len<0)?(unsigned int)strlen(s):(unsigned int)(-1);
1039 const char *startptr, *endptr;
1040 if(start<0)
1041 {
1042 startptr=s+length+start;
1043 if(startptr<s)
1044 startptr=s;
1045 }else
1046 {
1047 startptr=s;
1048 for(const char * const ptr=s+start;(startptr<ptr)&&*startptr;++startptr)
1049 EMPTY_LOOP;
1050 }
1051 if(len<0)
1052 {
1053 if(s+length+1 < startptr+len)
1054 {
1055 endptr=startptr;
1056 }else
1057 {
1058 endptr=s+length+1+len;
1059 }
1060 }else
1061 {
1062 endptr=startptr;
1063 for(const char * const ptr=startptr+len;(endptr<ptr)&&*endptr;++endptr)
1064 EMPTY_LOOP;
1065 }
1066 if(endptr>startptr)
1067 {
1068 retval=blank((size_t)(endptr-startptr));
1069 char *data=retval->data;
1070 for(; (startptr<endptr) && *startptr; ++startptr,++data)
1071 {
1072 data[0]=startptr[0];
1073 }
1074 data[0]=0;
1075 }
1076 }
1077 return retval;
1078 }
1079
1080 GP<GStringRep>
substr(const uint16_t * s,const int start,const int len) const1081 GStringRep::substr(const uint16_t *s,const int start,const int len) const
1082 {
1083 GP<GStringRep> retval;
1084 if(s && s[0])
1085 {
1086 uint16_t const *eptr;
1087 if(len<0)
1088 {
1089 for(eptr=s;eptr[0];++eptr)
1090 EMPTY_LOOP;
1091 }else
1092 {
1093 eptr=&(s[len]);
1094 }
1095 s=&s[start];
1096 if((size_t)s<(size_t)eptr)
1097 {
1098 mbstate_t ps;
1099 memset(&ps,0,sizeof(mbstate_t));
1100 unsigned char *buf,*ptr;
1101 GPBuffer<unsigned char> gbuf(buf,(((size_t)eptr-(size_t)s)/2)*3+7);
1102 for(ptr=buf;s[0];)
1103 {
1104 uint32_t w;
1105 int i=UTF16toUCS4(w,s,eptr);
1106 if(i<=0)
1107 break;
1108 s+=i;
1109 ptr=UCS4toString(w,ptr,&ps);
1110 }
1111 ptr[0]=0;
1112 retval = strdup( (const char *)buf );
1113 }
1114 }
1115 return retval;
1116 }
1117
1118 GP<GStringRep>
substr(const uint32_t * s,const int start,const int len) const1119 GStringRep::substr(const uint32_t *s,const int start,const int len) const
1120 {
1121 GP<GStringRep> retval;
1122 if(s && s[0])
1123 {
1124 uint32_t const *eptr;
1125 if(len<0)
1126 {
1127 for(eptr=s;eptr[0];++eptr)
1128 EMPTY_LOOP;
1129 }else
1130 {
1131 eptr=&(s[len]);
1132 }
1133 s=&s[start];
1134 if((size_t)s<(size_t)eptr)
1135 {
1136 mbstate_t ps;
1137 memset(&ps,0,sizeof(mbstate_t));
1138 unsigned char *buf,*ptr;
1139 GPBuffer<unsigned char> gbuf(buf,((((size_t)eptr-(size_t)s))/4)*6+7);
1140 for(ptr=buf;s[0];++s)
1141 {
1142 ptr=UCS4toString(s[0],ptr,&ps);
1143 }
1144 ptr[0]=0;
1145 retval = strdup( (const char *)buf );
1146 }
1147 }
1148 return retval;
1149 }
1150
1151 GP<GStringRep>
append(const char * s2) const1152 GStringRep::append(const char *s2) const
1153 {
1154 GP<GStringRep> retval;
1155 if(s2)
1156 {
1157 retval=concat(data,s2);
1158 }else
1159 {
1160 retval=const_cast<GStringRep *>(this);
1161 }
1162 return retval;
1163 }
1164
1165 GP<GStringRep>
append(const GP<GStringRep> & s2) const1166 GStringRep::UTF8::append(const GP<GStringRep> &s2) const
1167 {
1168 GP<GStringRep> retval;
1169 if(s2)
1170 {
1171 if(s2->isNative())
1172 {
1173 G_THROW( ERR_MSG("GStringRep.appendNativeToUTF8") );
1174 }
1175 retval=concat(data,s2->data);
1176 }else
1177 {
1178 retval=const_cast<GStringRep::UTF8 *>(this);
1179 }
1180 return retval;
1181 }
1182
1183 GP<GStringRep>
concat(const char * s1,const char * s2) const1184 GStringRep::concat(const char *s1,const char *s2) const
1185 {
1186 const int length1=(s1?strlen(s1):0);
1187 const int length2=(s2?strlen(s2):0);
1188 const int length=length1+length2;
1189 GP<GStringRep> retval;
1190 if(length>0)
1191 {
1192 retval=blank(length);
1193 GStringRep &r=*retval;
1194 if(length1)
1195 {
1196 strcpy(r.data,s1);
1197 if(length2)
1198 strcat(r.data,s2);
1199 }else
1200 {
1201 strcpy(r.data,s2);
1202 }
1203 }
1204 return retval;
1205 }
1206
1207 const char *GBaseString::nullstr = "";
1208
1209 void
empty(void)1210 GBaseString::empty( void )
1211 {
1212 init(0);
1213 }
1214
1215 GP<GStringRep>
getbuf(int n) const1216 GStringRep::getbuf(int n) const
1217 {
1218 GP<GStringRep> retval;
1219 if(n < 0)
1220 n=strlen(data);
1221 if(n >= 0)
1222 {
1223 retval=blank((n>0) ? n : 1);
1224 char *ndata=retval->data;
1225 strncpy(ndata,data,n);
1226 ndata[n]=0;
1227 }
1228 return retval;
1229 }
1230
1231 const char *
isCharType(bool (* xiswtest)(const unsigned long wc),const char * ptr,const bool reverse) const1232 GStringRep::isCharType(bool (*xiswtest)(const unsigned long wc),
1233 const char *ptr,
1234 const bool reverse) const
1235 {
1236 const char *xptr = ptr;
1237 unsigned long w=getValidUCS4(xptr);
1238 if(ptr != xptr)
1239 {
1240 if (sizeof(wchar_t) == 2)
1241 w &= 0xffff;
1242 if (reverse ^ xiswtest(w))
1243 ptr = xptr;
1244 }
1245 return ptr;
1246 }
1247
1248 int
nextCharType(bool (* xiswtest)(const unsigned long wc),const int from,const int len,const bool reverse) const1249 GStringRep::nextCharType(
1250 bool (*xiswtest)(const unsigned long wc), const int from, const int len,
1251 const bool reverse) const
1252 {
1253 // We want to return the position of the next
1254 // non white space starting from the #from#
1255 // location. isspace should work in any locale
1256 // so we should only need to do this for the non-
1257 // native locales (UTF8)
1258 int retval;
1259 if(from<size)
1260 {
1261 retval=from;
1262 const char * ptr = data+from;
1263 for( const char * const eptr=ptr+((len<0)?(size-from):len);
1264 (ptr<eptr) && *ptr;)
1265 {
1266 // Skip characters that fail the isCharType test
1267 char const * const xptr=isCharType(xiswtest,ptr,!reverse);
1268 if(xptr == ptr)
1269 break;
1270 ptr=xptr;
1271 }
1272 retval=(int)((size_t)ptr-(size_t)data);
1273 }else
1274 {
1275 retval=size;
1276 }
1277 return retval;
1278 }
1279
1280 bool
giswspace(const unsigned long w)1281 GStringRep::giswspace(const unsigned long w)
1282 {
1283 #if HAS_WCTYPE
1284 return !!iswspace((wchar_t)w);
1285 #else
1286 return (w & ~0xff) ? false : !!isspace((int)(w & 0xff));
1287 #endif
1288 }
1289
1290 bool
giswupper(const unsigned long w)1291 GStringRep::giswupper(const unsigned long w)
1292 {
1293 #if HAS_WCTYPE
1294 return !!iswupper((wchar_t)w);
1295 #else
1296 return (w & ~0xff) ? false : !!isupper((int)(w & 0xff));
1297 #endif
1298 }
1299
1300 bool
giswlower(const unsigned long w)1301 GStringRep::giswlower(const unsigned long w)
1302 {
1303 #if HAS_WCTYPE
1304 return !!iswlower((wchar_t)w);
1305 #else
1306 return (w & ~0xff) ? false : !!islower((int)(w & 0xff));
1307 #endif
1308 }
1309
1310 unsigned long
gtowupper(const unsigned long w)1311 GStringRep::gtowupper(const unsigned long w)
1312 {
1313 #if HAS_WCTYPE
1314 return (unsigned long)towupper((wchar_t)w);
1315 #else
1316 return (w&~0xff) ? w : (unsigned long)toupper(w & 0xff);
1317 #endif
1318 }
1319
1320 unsigned long
gtowlower(const unsigned long w)1321 GStringRep::gtowlower(const unsigned long w)
1322 {
1323 #if HAS_WCTYPE
1324 return (unsigned long)towlower((wchar_t)w);
1325 #else
1326 return (w&~0xff) ? w : (unsigned long)tolower(w & 0xff);
1327 #endif
1328 }
1329
1330 GP<GStringRep>
tocase(bool (* xiswcase)(const unsigned long wc),unsigned long (* xtowcase)(const unsigned long wc)) const1331 GStringRep::tocase(
1332 bool (*xiswcase)(const unsigned long wc),
1333 unsigned long (*xtowcase)(const unsigned long wc)) const
1334 {
1335 GP<GStringRep> retval;
1336 char const * const eptr=data+size;
1337 char const *ptr=data;
1338 while(ptr<eptr)
1339 {
1340 char const * const xptr=isCharType(xiswcase,ptr,false);
1341 if(ptr == xptr)
1342 break;
1343 ptr=xptr;
1344 }
1345 if(ptr<eptr)
1346 {
1347 const int n=(int)((size_t)ptr-(size_t)data);
1348 unsigned char *buf;
1349 GPBuffer<unsigned char> gbuf(buf,n+(1+size-n)*6);
1350 if(n>0)
1351 {
1352 strncpy((char *)buf,data,n);
1353 }
1354 unsigned char *buf_ptr=buf+n;
1355 for(char const *ptr=data+n;ptr<eptr;)
1356 {
1357 char const * const xptr=ptr;
1358 const unsigned long w=getValidUCS4(ptr);
1359 if(ptr == xptr)
1360 break;
1361 if(xiswcase(w))
1362 {
1363 const int len=(int)((size_t)ptr-(size_t)xptr);
1364 strncpy((char *)buf_ptr,xptr,len);
1365 buf_ptr+=len;
1366 }else
1367 {
1368 mbstate_t ps;
1369 memset(&ps,0,sizeof(mbstate_t));
1370 buf_ptr=UCS4toString(xtowcase(w),buf_ptr,&ps);
1371 }
1372 }
1373 buf_ptr[0]=0;
1374 retval=substr((const char *)buf,0,(int)((size_t)buf_ptr-(size_t)buf));
1375 }else
1376 {
1377 retval=const_cast<GStringRep *>(this);
1378 }
1379 return retval;
1380 }
1381
1382 // Returns a copy of this string with characters used in XML escaped as follows:
1383 // '<' --> "<"
1384 // '>' --> ">"
1385 // '&' --> "&"
1386 // '\'' --> "'"
1387 // '\"' --> """
1388 // Also escapes characters 0x00 through 0x1f and 0x7e through 0x7f.
1389 GP<GStringRep>
toEscaped(const bool tosevenbit) const1390 GStringRep::toEscaped( const bool tosevenbit ) const
1391 {
1392 bool modified=false;
1393 char *ret;
1394 GPBuffer<char> gret(ret,size*7);
1395 ret[0]=0;
1396 char *retptr=ret;
1397 char const *start=data;
1398 char const *s=start;
1399 char const *last=s;
1400 GP<GStringRep> special;
1401 for(unsigned long w;(w=getValidUCS4(s));last=s)
1402 // Whoever wrote this for statement should be __complete_here__
1403 {
1404 char const *ss=0;
1405 switch(w)
1406 {
1407 case '<':
1408 ss="<";
1409 break;
1410 case '>':
1411 ss=">";
1412 break;
1413 case '&':
1414 ss="&";
1415 break;
1416 case '\47':
1417 ss="'";
1418 break;
1419 case '\42':
1420 ss=""";
1421 break;
1422 default:
1423 if((w<' ')||(w>=0x7e && (tosevenbit || (w < 0x80))))
1424 {
1425 special=toThis(UTF8::create_format("&#%lu;",w));
1426 ss=special->data;
1427 }
1428 break;
1429 }
1430 if(ss)
1431 {
1432 modified=true;
1433 if(s!=start)
1434 {
1435 size_t len=(size_t)last-(size_t)start;
1436 strncpy(retptr,start,len);
1437 retptr+=len;
1438 start=s;
1439 }
1440 if(ss[0])
1441 {
1442 size_t len=strlen(ss);
1443 strcpy(retptr,ss);
1444 retptr+=len;
1445 }
1446 }
1447 }
1448 GP<GStringRep> retval;
1449 if(modified)
1450 {
1451 strcpy(retptr,start);
1452 retval=strdup( ret );
1453 }else
1454 {
1455 retval=const_cast<GStringRep *>(this);
1456 }
1457 // DEBUG_MSG( "Escaped string is '" << ret << "'\n" );
1458 return retval;
1459 }
1460
1461
1462 static const GMap<GUTF8String,GUTF8String> &
BasicMap(void)1463 BasicMap( void )
1464 {
1465 static GMap<GUTF8String,GUTF8String> Basic;
1466 if (! Basic.size())
1467 {
1468 Basic["lt"] = GUTF8String('<');
1469 Basic["gt"] = GUTF8String('>');
1470 Basic["amp"] = GUTF8String('&');
1471 Basic["apos"] = GUTF8String('\47');
1472 Basic["quot"] = GUTF8String('\42');
1473 }
1474 return Basic;
1475 }
1476
1477 GUTF8String
fromEscaped(const GMap<GUTF8String,GUTF8String> ConvMap) const1478 GUTF8String::fromEscaped( const GMap<GUTF8String,GUTF8String> ConvMap ) const
1479 {
1480 GUTF8String ret; // Build output string here
1481 int start_locn = 0; // Beginning of substring to skip
1482 int amp_locn; // Location of a found ampersand
1483
1484 while( (amp_locn = search( '&', start_locn )) > -1 )
1485 {
1486 // Found the next apostrophe
1487 // Locate the closing semicolon
1488 const int semi_locn = search( ';', amp_locn );
1489 // No closing semicolon, exit and copy
1490 // the rest of the string.
1491 if( semi_locn < 0 )
1492 break;
1493 ret += substr( start_locn, amp_locn - start_locn );
1494 int const len = semi_locn - amp_locn - 1;
1495 if(len)
1496 {
1497 GUTF8String key = substr( amp_locn+1, len);
1498 //DEBUG_MSG( "key = '" << key << "'\n" );
1499 char const * s=key;
1500 if( s[0] == '#')
1501 {
1502 unsigned long value;
1503 char *ptr=0;
1504 if(s[1] == 'x' || s[1] == 'X')
1505 {
1506 value=strtoul((char const *)(s+2),&ptr,16);
1507 }else
1508 {
1509 value=strtoul((char const *)(s+1),&ptr,10);
1510 }
1511 if(ptr)
1512 {
1513 unsigned char utf8char[7];
1514 unsigned char const * const end=GStringRep::UCS4toUTF8(value,utf8char);
1515 ret+=GUTF8String((char const *)utf8char,(size_t)end-(size_t)utf8char);
1516 }else
1517 {
1518 ret += substr( amp_locn, semi_locn - amp_locn + 1 );
1519 }
1520 }else
1521 {
1522 GPosition map_entry = ConvMap.contains( key );
1523 if( map_entry )
1524 { // Found in the conversion map, substitute
1525 ret += ConvMap[map_entry];
1526 } else
1527 {
1528 static const GMap<GUTF8String,GUTF8String> &Basic = BasicMap();
1529 GPosition map_entry = Basic.contains( key );
1530 if ( map_entry )
1531 {
1532 ret += Basic[map_entry];
1533 }else
1534 {
1535 ret += substr( amp_locn, len+2 );
1536 }
1537 }
1538 }
1539 }else
1540 {
1541 ret += substr( amp_locn, len+2 );
1542 }
1543 start_locn = semi_locn + 1;
1544 // DEBUG_MSG( "ret = '" << ret << "'\n" );
1545 }
1546
1547 // Copy the end of the string to the output
1548 ret += substr( start_locn, length()-start_locn );
1549
1550 // DEBUG_MSG( "Unescaped string is '" << ret << "'\n" );
1551 return (ret == *this)?(*this):ret;
1552 }
1553
1554 GUTF8String
fromEscaped(void) const1555 GUTF8String::fromEscaped(void) const
1556 {
1557 const GMap<GUTF8String,GUTF8String> nill;
1558 return fromEscaped(nill);
1559 }
1560
1561 GP<GStringRep>
setat(int n,char ch) const1562 GStringRep::setat(int n, char ch) const
1563 {
1564 GP<GStringRep> retval;
1565 if(n<0)
1566 n+=size;
1567 if (n < 0 || n>size)
1568 GBaseString::throw_illegal_subscript();
1569 if(ch == data[n])
1570 {
1571 retval=const_cast<GStringRep *>(this);
1572 }else if(!ch)
1573 {
1574 retval=getbuf(n);
1575 }else
1576 {
1577 retval=getbuf((n<size)?size:n);
1578 retval->data[n]=ch;
1579 if(n == size)
1580 retval->data[n+1]=0;
1581 }
1582 return retval;
1583 }
1584
1585 #if defined(AUTOCONF) && defined(HAVE_VSNPRINTF)
1586 # define USE_VSNPRINTF vsnprintf
1587 #elif defined(_WIN32) && !defined(__CYGWIN32__)
1588 # define USE_VSNPRINTF _vsnprintf
1589 #elif defined(linux)
1590 # define USE_VSNPRINTF vsnprintf
1591 #endif
1592
1593 GUTF8String &
format(const char fmt[],...)1594 GUTF8String::format(const char fmt[], ... )
1595 {
1596 va_list args;
1597 va_start(args, fmt);
1598 return init(GStringRep::UTF8::create(fmt,args));
1599 }
1600
1601 GP<GStringRep>
create_format(const char fmt[],...)1602 GStringRep::UTF8::create_format(const char fmt[],...)
1603 {
1604 va_list args;
1605 va_start(args, fmt);
1606 return create(fmt,args);
1607 }
1608
1609 GP<GStringRep>
vformat(va_list args) const1610 GStringRep::vformat(va_list args) const
1611 {
1612 GP<GStringRep> retval;
1613 if(size)
1614 {
1615 char const * const fmt=data;
1616 int buflen=32768;
1617 char *buffer;
1618 GPBuffer<char> gbuffer(buffer,buflen);
1619 ChangeLocale locale(LC_NUMERIC,(isNative()?0:"C"));
1620 // Format string
1621 #ifdef USE_VSNPRINTF
1622 while(USE_VSNPRINTF(buffer, buflen, fmt, args)<0)
1623 {
1624 gbuffer.resize(0);
1625 gbuffer.resize(buflen+32768);
1626 }
1627 va_end(args);
1628 #else
1629 buffer[buflen-1] = 0;
1630 vsprintf(buffer, fmt, args);
1631 va_end(args);
1632 if (buffer[buflen-1])
1633 {
1634 // This isn't as fatal since it is on the stack, but we
1635 // definitely should stop the current operation.
1636 G_THROW( ERR_MSG("GString.overwrite") );
1637 }
1638 #endif
1639 retval=strdup((const char *)buffer);
1640 }
1641 // Go altering the string
1642 return retval;
1643 }
1644
1645 int
search(char c,int from) const1646 GStringRep::search(char c, int from) const
1647 {
1648 if (from<0)
1649 from += size;
1650 int retval=(-1);
1651 if (from>=0 && from<size)
1652 {
1653 char const *const s = strchr(data+from,c);
1654 if(s)
1655 retval=(int)((size_t)s-(size_t)data);
1656 }
1657 return retval;
1658 }
1659
1660 int
search(char const * ptr,int from) const1661 GStringRep::search(char const *ptr, int from) const
1662 {
1663 if(from<0)
1664 {
1665 from+=size;
1666 if(from<0)
1667 G_THROW( ERR_MSG("GString.bad_subscript") );
1668 }
1669 int retval=(-1);
1670 if (from>=0 && from<size)
1671 {
1672 char const *const s = strstr(data+from,ptr);
1673 if(s)
1674 retval=(int)((size_t)s-(size_t)data);
1675 }
1676 return retval;
1677 }
1678
1679 int
rsearch(char c,int from) const1680 GStringRep::rsearch(char c, int from) const
1681 {
1682 if(from<0)
1683 {
1684 from+=size;
1685 if(from<0)
1686 G_THROW( ERR_MSG("GString.bad_subscript") );
1687 }
1688 int retval=(-1);
1689 if ((from>=0) && (from<size))
1690 {
1691 char const *const s = strrchr(data+from,c);
1692 if(s)
1693 retval=(int)((size_t)s-(size_t)data);
1694 }
1695 return retval;
1696 }
1697
1698 int
rsearch(char const * ptr,int from) const1699 GStringRep::rsearch(char const *ptr, int from) const
1700 {
1701 if(from<0)
1702 {
1703 from+=size;
1704 if(from<0)
1705 G_THROW( ERR_MSG("GString.bad_subscript") );
1706 }
1707 int retval=(-1);
1708 for(int loc=from;(loc=search(ptr,loc)) >= 0;++loc)
1709 retval=loc;
1710 return retval;
1711 }
1712
1713 int
contains(const char accept[],int from) const1714 GStringRep::contains(const char accept[],int from) const
1715 {
1716 if(from<0)
1717 {
1718 from+=size;
1719 if(from<0)
1720 G_THROW( ERR_MSG("GString.bad_subscript") );
1721 }
1722 int retval=(-1);
1723 if (accept && accept[0] && from>=0 && from<size)
1724 {
1725 char const * const src = data+from;
1726 char const *ptr=strpbrk(src,accept);
1727 if(ptr)
1728 {
1729 retval=(int)(ptr-src)+from;
1730 }
1731 }
1732 return retval;
1733 }
1734
1735 int
rcontains(const char accept[],int from) const1736 GStringRep::rcontains(const char accept[],int from) const
1737 {
1738 int retval=(-1);
1739 while((from=contains(accept,from)) >= 0)
1740 {
1741 retval=from++;
1742 }
1743 return retval;
1744 }
1745
1746 bool
is_int(void) const1747 GBaseString::is_int(void) const
1748 {
1749 bool isLong=!!ptr;
1750 if(isLong)
1751 {
1752 int endpos;
1753 (*this)->toLong(0,endpos);
1754 if(endpos>=0)
1755 {
1756 isLong=((*this)->nextNonSpace(endpos) == (int)length());
1757 }
1758 }
1759 return isLong;
1760 }
1761
1762 bool
is_float(void) const1763 GBaseString::is_float(void) const
1764 {
1765 bool isDouble=!!ptr;
1766 if(isDouble)
1767 {
1768 int endpos;
1769 (*this)->toDouble(0,endpos);
1770 if(endpos>=0)
1771 {
1772 isDouble=((*this)->nextNonSpace(endpos) == (int)length());
1773 }
1774 }
1775 return isDouble;
1776 }
1777
1778 unsigned int
hash(const GBaseString & str)1779 hash(const GBaseString &str)
1780 {
1781 unsigned int x = 0;
1782 const char *s = (const char*)str;
1783 while (*s)
1784 x = x ^ (x<<6) ^ (unsigned char)(*s++);
1785 return x;
1786 }
1787
1788 void
throw_illegal_subscript()1789 GBaseString::throw_illegal_subscript()
1790 {
1791 G_THROW( ERR_MSG("GString.bad_subscript") );
1792 }
1793
1794 unsigned char *
UCS4toString(const uint32_t w0,unsigned char * ptr,mbstate_t *) const1795 GStringRep::UTF8::UCS4toString(
1796 const uint32_t w0,unsigned char *ptr, mbstate_t *) const
1797 {
1798 return UCS4toUTF8(w0,ptr);
1799 }
1800
1801 int
ncopy(wchar_t * const buf,const int buflen) const1802 GStringRep::UTF8::ncopy(wchar_t * const buf, const int buflen ) const
1803 {
1804 int retval=(-1);
1805 if(buf && buflen)
1806 {
1807 buf[0]=0;
1808 if(data[0])
1809 {
1810 const size_t length=strlen(data);
1811 const unsigned char * const eptr=(const unsigned char *)(data+length);
1812 wchar_t *r=buf;
1813 wchar_t const * const rend=buf+buflen;
1814 for(const unsigned char *s=(const unsigned char *)data;
1815 (r<rend)&&(s<eptr)&&*s;)
1816 {
1817 const uint32_t w0=UTF8toUCS4(s,eptr);
1818 uint16_t w1;
1819 uint16_t w2=1;
1820 for(int count=(sizeof(wchar_t)==sizeof(w1))
1821 ?UCS4toUTF16(w0,w1,w2):1;
1822 count&&(r<rend);
1823 --count,w1=w2,++r)
1824 {
1825 r[0]=(sizeof(wchar_t) == sizeof(w1))?(wchar_t)w1:(wchar_t)w0;
1826 }
1827 }
1828 if(r<rend)
1829 {
1830 r[0]=0;
1831 retval=((size_t)r-(size_t)buf)/sizeof(wchar_t);
1832 }
1833 }
1834 else
1835 {
1836 retval=0;
1837 }
1838 }
1839 return retval;
1840 }
1841
1842 GP<GStringRep>
toNative(const EscapeMode escape) const1843 GStringRep::UTF8::toNative(const EscapeMode escape) const
1844 {
1845 GP<GStringRep> retval;
1846 if(data[0])
1847 {
1848 const size_t length=strlen(data);
1849 const unsigned char * const eptr=(const unsigned char *)(data+length);
1850 unsigned char *buf;
1851 GPBuffer<unsigned char> gbuf(buf,12*length+12);
1852 unsigned char *r=buf;
1853 mbstate_t ps;
1854 memset(&ps,0,sizeof(mbstate_t));
1855 for(const unsigned char *s=(const unsigned char *)data;(s<eptr)&& *s;)
1856 {
1857 const unsigned char * const s0 = s;
1858 const uint32_t w0=UTF8toUCS4(s,eptr);
1859 if (s == s0)
1860 {
1861 s += 1;
1862 *r++ = '?';
1863 }
1864 else
1865 {
1866 const unsigned char * const r0 = r;
1867 r=UCS4toNative(w0,r,&ps);
1868 if(r == r0)
1869 {
1870 if (escape == IS_ESCAPED)
1871 {
1872 sprintf((char *)r,"&#%lu;",(unsigned long)w0);
1873 r += strlen((char *)r);
1874 }
1875 else
1876 {
1877 *r++ = '?';
1878 }
1879 }
1880 }
1881 }
1882 r[0]=0;
1883 retval = NATIVE_CREATE( (const char *)buf );
1884 }
1885 else
1886 {
1887 retval = NATIVE_CREATE( (unsigned int)0 );
1888 }
1889 return retval;
1890 }
1891
1892 GP<GStringRep>
toUTF8(const bool nothrow) const1893 GStringRep::UTF8::toUTF8(const bool nothrow) const
1894 {
1895 if(!nothrow)
1896 G_THROW( ERR_MSG("GStringRep.UTF8ToUTF8") );
1897 return const_cast<GStringRep::UTF8 *>(this);
1898 }
1899
1900 // Tests if a string is legally encoded in the current character set.
1901 bool
is_valid(void) const1902 GStringRep::UTF8::is_valid(void) const
1903 {
1904 bool retval=true;
1905 if(data && size)
1906 {
1907 const unsigned char * const eptr=(const unsigned char *)(data+size);
1908 for(const unsigned char *s=(const unsigned char *)data;(s<eptr)&& *s;)
1909 {
1910 const unsigned char * const r=s;
1911 (void)UTF8toUCS4(s,eptr);
1912 if(r == s)
1913 {
1914 retval=false;
1915 break;
1916 }
1917 }
1918 }
1919 return retval;
1920 }
1921
1922 static inline uint32_t
add_char(uint32_t const U,unsigned char const * const r)1923 add_char(uint32_t const U, unsigned char const * const r)
1924 {
1925 uint32_t const C=r[0];
1926 return ((C|0x3f) == 0xbf)?((U<<6)|(C&0x3f)):0;
1927 }
1928
1929 uint32_t
UTF8toUCS4(unsigned char const * & s,void const * const eptr)1930 GStringRep::UTF8toUCS4(
1931 unsigned char const *&s,void const * const eptr)
1932 {
1933 uint32_t U=0;
1934 unsigned char const *r=s;
1935 if(r < eptr)
1936 {
1937 uint32_t const C1=r++[0];
1938 if(C1&0x80)
1939 {
1940 if(r < eptr)
1941 {
1942 U=C1;
1943 if((U=((C1&0x40)?add_char(U,r++):0)))
1944 {
1945 if(C1&0x20)
1946 {
1947 if(r < eptr)
1948 {
1949 if((U=add_char(U,r++)))
1950 {
1951 if(C1&0x10)
1952 {
1953 if(r < eptr)
1954 {
1955 if((U=add_char(U,r++)))
1956 {
1957 if(C1&0x8)
1958 {
1959 if(r < eptr)
1960 {
1961 if((U=add_char(U,r++)))
1962 {
1963 if(C1&0x4)
1964 {
1965 if(r < eptr)
1966 {
1967 if((U=((!(C1&0x2))?(add_char(U,r++)&0x7fffffff):0)))
1968 {
1969 s=r;
1970 }else
1971 {
1972 U=(unsigned int)(-1)-s++[0];
1973 }
1974 }else
1975 {
1976 U=0;
1977 }
1978 }else if((U=((U&0x4000000)?0:(U&0x3ffffff))))
1979 {
1980 s=r;
1981 }
1982 }else
1983 {
1984 U=(unsigned int)(-1)-s++[0];
1985 }
1986 }else
1987 {
1988 U=0;
1989 }
1990 }else if((U=((U&0x200000)?0:(U&0x1fffff))))
1991 {
1992 s=r;
1993 }
1994 }else
1995 {
1996 U=(unsigned int)(-1)-s++[0];
1997 }
1998 }else
1999 {
2000 U=0;
2001 }
2002 }else if((U=((U&0x10000)?0:(U&0xffff))))
2003 {
2004 s=r;
2005 }
2006 }else
2007 {
2008 U=(unsigned int)(-1)-s++[0];
2009 }
2010 }else
2011 {
2012 U=0;
2013 }
2014 }else if((U=((U&0x800)?0:(U&0x7ff))))
2015 {
2016 s=r;
2017 }
2018 }else
2019 {
2020 U=(unsigned int)(-1)-s++[0];
2021 }
2022 }else
2023 {
2024 U=0;
2025 }
2026 }else if((U=C1))
2027 {
2028 s=r;
2029 }
2030 }
2031 return U;
2032 }
2033
2034 unsigned char *
UCS4toUTF8(const uint32_t w,unsigned char * ptr)2035 GStringRep::UCS4toUTF8(const uint32_t w,unsigned char *ptr)
2036 {
2037 if(w <= 0x7f)
2038 {
2039 *ptr++ = (unsigned char)w;
2040 }
2041 else if(w <= 0x7ff)
2042 {
2043 *ptr++ = (unsigned char)((w>>6)|0xC0);
2044 *ptr++ = (unsigned char)((w|0x80)&0xBF);
2045 }
2046 else if(w <= 0xFFFF)
2047 {
2048 *ptr++ = (unsigned char)((w>>12)|0xE0);
2049 *ptr++ = (unsigned char)(((w>>6)|0x80)&0xBF);
2050 *ptr++ = (unsigned char)((w|0x80)&0xBF);
2051 }
2052 else if(w <= 0x1FFFFF)
2053 {
2054 *ptr++ = (unsigned char)((w>>18)|0xF0);
2055 *ptr++ = (unsigned char)(((w>>12)|0x80)&0xBF);
2056 *ptr++ = (unsigned char)(((w>>6)|0x80)&0xBF);
2057 *ptr++ = (unsigned char)((w|0x80)&0xBF);
2058 }
2059 else if(w <= 0x3FFFFFF)
2060 {
2061 *ptr++ = (unsigned char)((w>>24)|0xF8);
2062 *ptr++ = (unsigned char)(((w>>18)|0x80)&0xBF);
2063 *ptr++ = (unsigned char)(((w>>12)|0x80)&0xBF);
2064 *ptr++ = (unsigned char)(((w>>6)|0x80)&0xBF);
2065 *ptr++ = (unsigned char)((w|0x80)&0xBF);
2066 }
2067 else if(w <= 0x7FFFFFFF)
2068 {
2069 *ptr++ = (unsigned char)((w>>30)|0xFC);
2070 *ptr++ = (unsigned char)(((w>>24)|0x80)&0xBF);
2071 *ptr++ = (unsigned char)(((w>>18)|0x80)&0xBF);
2072 *ptr++ = (unsigned char)(((w>>12)|0x80)&0xBF);
2073 *ptr++ = (unsigned char)(((w>>6)|0x80)&0xBF);
2074 *ptr++ = (unsigned char)((w|0x80)&0xBF);
2075 }
2076 else
2077 {
2078 *ptr++ = '?';
2079 }
2080 return ptr;
2081 }
2082
2083 // Creates with a concat operation.
2084 GP<GStringRep>
concat(const char * s1,const GP<GStringRep> & s2) const2085 GStringRep::concat( const char *s1, const GP<GStringRep> &s2) const
2086 {
2087 GP<GStringRep> retval;
2088 if(s2)
2089 {
2090 retval=toThis(s2);
2091 if(s1 && s1[0])
2092 {
2093 if(retval)
2094 {
2095 retval=concat(s1,retval->data);
2096 }else
2097 {
2098 retval=strdup(s1);
2099 }
2100 }
2101 }else if(s1 && s1[0])
2102 {
2103 retval=strdup(s1);
2104 }
2105 return retval;
2106 }
2107
2108 // Creates with a concat operation.
2109
2110 GP<GStringRep>
concat(const GP<GStringRep> & s1,const char * s2) const2111 GStringRep::concat( const GP<GStringRep> &s1,const char *s2) const
2112 {
2113 GP<GStringRep> retval;
2114 if(s1)
2115 {
2116 retval=toThis(s1);
2117 if(s2 && s2[0])
2118 {
2119 if(retval)
2120 {
2121 retval=retval->append(s2);
2122 }else
2123 {
2124 retval=strdup(s2);
2125 }
2126 }
2127 }else if(s2 && s2[0])
2128 {
2129 retval=strdup(s2);
2130 }
2131 return retval;
2132 }
2133
2134 GP<GStringRep>
concat(const GP<GStringRep> & s1,const GP<GStringRep> & s2) const2135 GStringRep::concat(const GP<GStringRep> &s1,const GP<GStringRep> &s2) const
2136 {
2137 GP<GStringRep> retval;
2138 if(s1)
2139 {
2140 retval=toThis(s1,s2);
2141 if(retval && s2)
2142 {
2143 retval=retval->append(toThis(s2));
2144 }
2145 }else if(s2)
2146 {
2147 retval=toThis(s2);
2148 }
2149 return retval;
2150 }
2151
GStringRep(void)2152 GStringRep::GStringRep(void)
2153 {
2154 size=0;
2155 data=0;
2156 }
2157
~GStringRep()2158 GStringRep::~GStringRep()
2159 {
2160 if(data)
2161 {
2162 data[0]=0;
2163 ::operator delete(data);
2164 }
2165 data=0;
2166 }
2167
UTF8(void)2168 GStringRep::UTF8::UTF8(void) {}
2169
~UTF8()2170 GStringRep::UTF8::~UTF8() {}
2171
2172 int
cmp(const char * s1,const int len) const2173 GStringRep::cmp(const char *s1,const int len) const
2174 {
2175 return cmp(data,s1,len);
2176 }
2177
2178 int
cmp(const char * s1,const char * s2,const int len)2179 GStringRep::cmp(const char *s1, const char *s2,const int len)
2180 {
2181 return (len
2182 ?((s1&&s1[0])
2183 ?((s2&&s2[0])
2184 ?((len>0)
2185 ?strncmp(s1,s2,len)
2186 :strcmp(s1,s2))
2187 :1)
2188 :((s2&&s2[0])?(-1):0))
2189 :0);
2190 }
2191
2192 int
cmp(const GP<GStringRep> & s1,const GP<GStringRep> & s2,const int len)2193 GStringRep::cmp(const GP<GStringRep> &s1, const GP<GStringRep> &s2,
2194 const int len )
2195 {
2196 return (s1?(s1->cmp(s2,len)):cmp(0,(s2?(s2->data):0),len));
2197 }
2198
2199 int
cmp(const GP<GStringRep> & s1,const char * s2,const int len)2200 GStringRep::cmp(const GP<GStringRep> &s1, const char *s2,
2201 const int len )
2202 {
2203 return cmp((s1?s1->data:0),s2,len);
2204 }
2205
2206 int
cmp(const char * s1,const GP<GStringRep> & s2,const int len)2207 GStringRep::cmp(const char *s1, const GP<GStringRep> &s2,
2208 const int len )
2209 {
2210 return cmp(s1,(s2?(s2->data):0),len);
2211 }
2212
2213 int
cmp(const GP<GStringRep> & s2,const int len) const2214 GStringRep::UTF8::cmp(const GP<GStringRep> &s2,const int len) const
2215 {
2216 int retval;
2217 if(s2)
2218 {
2219 if(s2->isNative())
2220 {
2221 GP<GStringRep> r(s2->toUTF8(true));
2222 if(r)
2223 {
2224 retval=GStringRep::cmp(data,r->data,len);
2225 }else
2226 {
2227 retval=-(s2->cmp(toNative(NOT_ESCAPED),len));
2228 }
2229 }else
2230 {
2231 retval=GStringRep::cmp(data,s2->data,len);
2232 }
2233 }else
2234 {
2235 retval=GStringRep::cmp(data,0,len);
2236 }
2237 return retval;
2238 }
2239
2240 int
toInt() const2241 GStringRep::UTF8::toInt() const
2242 {
2243 int endpos;
2244 return (int)toLong(0,endpos);
2245 }
2246
2247 static inline long
Cstrtol(char * data,char ** edata,const int base)2248 Cstrtol(char *data,char **edata, const int base)
2249 {
2250 GStringRep::ChangeLocale locale(LC_NUMERIC,"C");
2251 while (data && *data==' ') data++;
2252 return strtol(data,edata,base);
2253 }
2254
2255 long
toLong(const int pos,int & endpos,const int base) const2256 GStringRep::UTF8::toLong(
2257 const int pos, int &endpos, const int base) const
2258 {
2259 char *edata=0;
2260 long retval=Cstrtol(data+pos,&edata, base);
2261 if(edata)
2262 {
2263 endpos=edata-data;
2264 }else
2265 {
2266 GP<GStringRep> ptr = GStringRep::UTF8::create();
2267 endpos=(-1);
2268 ptr=ptr->strdup(data+pos);
2269 if(ptr)
2270 ptr=ptr->toNative(NOT_ESCAPED);
2271 if(ptr)
2272 {
2273 int xendpos;
2274 retval=ptr->toLong(0,xendpos,base);
2275 if(xendpos> 0)
2276 {
2277 endpos=(int)size;
2278 ptr=ptr->strdup(data+xendpos);
2279 if(ptr)
2280 {
2281 ptr=ptr->toUTF8(true);
2282 if(ptr)
2283 {
2284 endpos-=(int)(ptr->size);
2285 }
2286 }
2287 }
2288 }
2289 }
2290 return retval;
2291 }
2292
2293 static inline unsigned long
Cstrtoul(char * data,char ** edata,const int base)2294 Cstrtoul(char *data,char **edata, const int base)
2295 {
2296 GStringRep::ChangeLocale locale(LC_NUMERIC,"C");
2297 while (data && *data==' ') data++;
2298 return strtoul(data,edata,base);
2299 }
2300
2301 unsigned long
toULong(const int pos,int & endpos,const int base) const2302 GStringRep::UTF8::toULong(
2303 const int pos, int &endpos, const int base) const
2304 {
2305 char *edata=0;
2306 unsigned long retval=Cstrtoul(data+pos,&edata, base);
2307 if(edata)
2308 {
2309 endpos=edata-data;
2310 }else
2311 {
2312 GP<GStringRep> ptr = GStringRep::UTF8::create();
2313 endpos=(-1);
2314 ptr=ptr->strdup(data+pos);
2315 if(ptr)
2316 ptr=ptr->toNative(NOT_ESCAPED);
2317 if(ptr)
2318 {
2319 int xendpos;
2320 retval=ptr->toULong(0,xendpos,base);
2321 if(xendpos> 0)
2322 {
2323 endpos=(int)size;
2324 ptr=ptr->strdup(data+xendpos);
2325 if(ptr)
2326 {
2327 ptr=ptr->toUTF8(true);
2328 if(ptr)
2329 {
2330 endpos-=(int)(ptr->size);
2331 }
2332 }
2333 }
2334 }
2335 }
2336 return retval;
2337 }
2338
2339 static inline double
Cstrtod(char * data,char ** edata)2340 Cstrtod(char *data,char **edata)
2341 {
2342 GStringRep::ChangeLocale locale(LC_NUMERIC,"C");
2343 while (data && *data==' ') data++;
2344 return strtod(data,edata);
2345 }
2346
2347 double
toDouble(const int pos,int & endpos) const2348 GStringRep::UTF8::toDouble(const int pos, int &endpos) const
2349 {
2350 char *edata=0;
2351 double retval=Cstrtod(data+pos,&edata);
2352 if(edata)
2353 {
2354 endpos=edata-data;
2355 }else
2356 {
2357 GP<GStringRep> ptr = GStringRep::UTF8::create();
2358 endpos=(-1);
2359 ptr=ptr->strdup(data+pos);
2360 if(ptr)
2361 ptr=ptr->toNative(NOT_ESCAPED);
2362 if(ptr)
2363 {
2364 int xendpos;
2365 retval=ptr->toDouble(0,xendpos);
2366 if(xendpos >= 0)
2367 {
2368 endpos=(int)size;
2369 ptr=ptr->strdup(data+xendpos);
2370 if(ptr)
2371 {
2372 ptr=ptr->toUTF8(true);
2373 if(ptr)
2374 {
2375 endpos-=(int)(ptr->size);
2376 }
2377 }
2378 }
2379 }
2380 }
2381 return retval;
2382 }
2383
2384 int
getUCS4(uint32_t & w,const int from) const2385 GStringRep::getUCS4(uint32_t &w, const int from) const
2386 {
2387 int retval;
2388 if(from>=size)
2389 {
2390 w=0;
2391 retval=size;
2392 }else if(from<0)
2393 {
2394 w=(unsigned int)(-1);
2395 retval=(-1);
2396 }else
2397 {
2398 const char *source=data+from;
2399 w=getValidUCS4(source);
2400 retval=(int)((size_t)source-(size_t)data);
2401 }
2402 return retval;
2403 }
2404
2405
2406 uint32_t
getValidUCS4(const char * & source) const2407 GStringRep::UTF8::getValidUCS4(const char *&source) const
2408 {
2409 return GStringRep::UTF8toUCS4((const unsigned char *&)source,data+size);
2410 }
2411
2412 int
nextNonSpace(const int from,const int len) const2413 GStringRep::nextNonSpace(const int from,const int len) const
2414 {
2415 return nextCharType(giswspace,from,len,true);
2416 }
2417
2418 int
nextSpace(const int from,const int len) const2419 GStringRep::nextSpace(const int from,const int len) const
2420 {
2421 return nextCharType(giswspace,from,len,false);
2422 }
2423
2424 int
nextChar(const int from) const2425 GStringRep::nextChar(const int from) const
2426 {
2427 char const * xptr=data+from;
2428 (void)getValidUCS4(xptr);
2429 return (int)((size_t)xptr-(size_t)data);
2430 }
2431
2432 int
firstEndSpace(int from,const int len) const2433 GStringRep::firstEndSpace(int from,const int len) const
2434 {
2435 const int xsize=(len<0)?size:(from+len);
2436 const int ysize=(size<xsize)?size:xsize;
2437 int retval=ysize;
2438 while(from<ysize)
2439 {
2440 from=nextNonSpace(from,ysize-from);
2441 if(from < size)
2442 {
2443 const int r=nextSpace(from,ysize-from);
2444 // If a character isn't legal, then it will return
2445 // tru for both nextSpace and nextNonSpace.
2446 if(r == from)
2447 {
2448 from++;
2449 }else
2450 {
2451 from=retval=r;
2452 }
2453 }
2454 }
2455 return retval;
2456 }
2457
2458 int
UCS4toUTF16(const uint32_t w,uint16_t & w1,uint16_t & w2)2459 GStringRep::UCS4toUTF16(
2460 const uint32_t w,uint16_t &w1, uint16_t &w2)
2461 {
2462 int retval;
2463 if(w<0x10000)
2464 {
2465 w1=(uint16_t)w;
2466 w2=0;
2467 retval=1;
2468 }else
2469 {
2470 w1=(uint16_t)((((w-0x10000)>>10)&0x3ff)+0xD800);
2471 w2=(uint16_t)((w&0x3ff)+0xDC00);
2472 retval=2;
2473 }
2474 return retval;
2475 }
2476
2477 int
UTF16toUCS4(uint32_t & U,uint16_t const * const s,void const * const eptr)2478 GStringRep::UTF16toUCS4(
2479 uint32_t &U,uint16_t const * const s,void const * const eptr)
2480 {
2481 int retval=0;
2482 U=0;
2483 uint16_t const * const r=s+1;
2484 if(r <= eptr)
2485 {
2486 uint32_t const W1=s[0];
2487 if((W1<0xD800)||(W1>0xDFFF))
2488 {
2489 if((U=W1))
2490 {
2491 retval=1;
2492 }
2493 }else if(W1<=0xDBFF)
2494 {
2495 uint16_t const * const rr=r+1;
2496 if(rr <= eptr)
2497 {
2498 uint32_t const W2=s[1];
2499 if(((W2>=0xDC00)||(W2<=0xDFFF))&&((U=(0x10000+((W1&0x3ff)<<10))|(W2&0x3ff))))
2500 {
2501 retval=2;
2502 }else
2503 {
2504 retval=(-1);
2505 }
2506 }
2507 }
2508 }
2509 return retval;
2510 }
2511
2512
2513 //bcr
2514
2515 GUTF8String&
operator +=(char ch)2516 GUTF8String::operator+= (char ch)
2517 {
2518 return init(
2519 GStringRep::UTF8::create((const char*)*this,
2520 GStringRep::UTF8::create(&ch,0,1)));
2521 }
2522
2523 GUTF8String&
operator +=(const char * str)2524 GUTF8String::operator+= (const char *str)
2525 {
2526 return init(GStringRep::UTF8::create(*this,str));
2527 }
2528
2529 GUTF8String&
operator +=(const GBaseString & str)2530 GUTF8String::operator+= (const GBaseString &str)
2531 {
2532 return init(GStringRep::UTF8::create(*this,str));
2533 }
2534
2535 GUTF8String
substr(int from,int len) const2536 GUTF8String::substr(int from, int len) const
2537 { return GUTF8String(*this, from, len); }
2538
2539 GUTF8String
operator +(const GBaseString & s2) const2540 GUTF8String::operator+(const GBaseString &s2) const
2541 { return GStringRep::UTF8::create(*this,s2); }
2542
2543 GUTF8String
operator +(const GUTF8String & s2) const2544 GUTF8String::operator+(const GUTF8String &s2) const
2545 { return GStringRep::UTF8::create(*this,s2); }
2546
2547 GUTF8String
operator +(const char * s2) const2548 GUTF8String::operator+(const char *s2) const
2549 { return GStringRep::UTF8::create(*this,s2); }
2550
2551 char *
getbuf(int n)2552 GUTF8String::getbuf(int n)
2553 {
2554 if(ptr)
2555 init((*this)->getbuf(n));
2556 else if(n>0)
2557 init(GStringRep::UTF8::create(n));
2558 else
2559 init(0);
2560 return ptr?((*this)->data):0;
2561 }
2562
2563 void
setat(const int n,const char ch)2564 GUTF8String::setat(const int n, const char ch)
2565 {
2566 if((!n)&&(!ptr))
2567 {
2568 init(GStringRep::UTF8::create(&ch,0,1));
2569 }else
2570 {
2571 init((*this)->setat(CheckSubscript(n),ch));
2572 }
2573 }
2574
2575 GP<GStringRep>
UTF8ToNative(const char * s,const EscapeMode escape)2576 GStringRep::UTF8ToNative( const char *s, const EscapeMode escape )
2577 {
2578 return GStringRep::UTF8::create(s)->toNative(escape);
2579 }
2580
GUTF8String(const char dat)2581 GUTF8String::GUTF8String(const char dat)
2582 { init(GStringRep::UTF8::create(&dat,0,1)); }
2583
GUTF8String(const GUTF8String & fmt,va_list & args)2584 GUTF8String::GUTF8String(const GUTF8String &fmt, va_list &args)
2585 {
2586 if (fmt.ptr)
2587 init(fmt->vformat(args));
2588 else
2589 init(fmt);
2590 }
2591
GUTF8String(const char * str)2592 GUTF8String::GUTF8String(const char *str)
2593 { init(GStringRep::UTF8::create(str)); }
2594
GUTF8String(const unsigned char * str)2595 GUTF8String::GUTF8String(const unsigned char *str)
2596 { init(GStringRep::UTF8::create((const char *)str)); }
2597
GUTF8String(const uint16_t * str)2598 GUTF8String::GUTF8String(const uint16_t *str)
2599 { init(GStringRep::UTF8::create(str,0,-1)); }
2600
GUTF8String(const uint32_t * str)2601 GUTF8String::GUTF8String(const uint32_t *str)
2602 { init(GStringRep::UTF8::create(str,0,-1)); }
2603
GUTF8String(const char * dat,unsigned int len)2604 GUTF8String::GUTF8String(const char *dat, unsigned int len)
2605 { init(GStringRep::UTF8::create(dat,0,((int)len<0)?(-1):(int)len)); }
2606
GUTF8String(const uint16_t * dat,unsigned int len)2607 GUTF8String::GUTF8String(const uint16_t *dat, unsigned int len)
2608 { init(GStringRep::UTF8::create(dat,0,((int)len<0)?(-1):(int)len)); }
2609
GUTF8String(const uint32_t * dat,unsigned int len)2610 GUTF8String::GUTF8String(const uint32_t *dat, unsigned int len)
2611 { init(GStringRep::UTF8::create(dat,0,((int)len<0)?(-1):(int)len)); }
2612
GUTF8String(const GBaseString & gs,int from,int len)2613 GUTF8String::GUTF8String(const GBaseString &gs, int from, int len)
2614 { init(GStringRep::UTF8::create(gs,from,((int)len<0)?(-1):(int)len)); }
2615
GUTF8String(const int number)2616 GUTF8String::GUTF8String(const int number)
2617 { init(GStringRep::UTF8::create_format("%d",number)); }
2618
GUTF8String(const double number)2619 GUTF8String::GUTF8String(const double number)
2620 { init(GStringRep::UTF8::create_format("%f",number)); }
2621
operator =(const char str)2622 GUTF8String& GUTF8String::operator= (const char str)
2623 { return init(GStringRep::UTF8::create(&str,0,1)); }
2624
operator =(const char * str)2625 GUTF8String& GUTF8String::operator= (const char *str)
2626 { return init(GStringRep::UTF8::create(str)); }
2627
operator +(const GUTF8String & s2) const2628 GUTF8String GBaseString::operator+(const GUTF8String &s2) const
2629 { return GStringRep::UTF8::create(*this,s2); }
2630
2631 #if HAS_WCHAR
2632 GUTF8String
operator +(const GUTF8String & s2) const2633 GNativeString::operator+(const GUTF8String &s2) const
2634 {
2635 if (ptr)
2636 return GStringRep::UTF8::create((*this)->toUTF8(true),s2);
2637 else
2638 return GStringRep::UTF8::create((*this),s2);
2639 }
2640 #endif
2641
2642 GUTF8String
operator +(const GNativeString & s2) const2643 GUTF8String::operator+(const GNativeString &s2) const
2644 {
2645 GP<GStringRep> g = s2;
2646 if (s2.ptr)
2647 g = s2->toUTF8(true);
2648 return GStringRep::UTF8::create(*this,g);
2649 }
2650
2651 GUTF8String
operator +(const char * s1,const GUTF8String & s2)2652 operator+(const char *s1, const GUTF8String &s2)
2653 { return GStringRep::UTF8::create(s1,s2); }
2654
2655 #if HAS_WCHAR
2656 GNativeString
operator +(const char * s1,const GNativeString & s2)2657 operator+(const char *s1, const GNativeString &s2)
2658 { return GStringRep::Native::create(s1,s2); }
2659
2660 GNativeString&
operator +=(char ch)2661 GNativeString::operator+= (char ch)
2662 {
2663 char s[2]; s[0]=ch; s[1]=0;
2664 return init(GStringRep::Native::create((const char*)*this, s));
2665 }
2666
2667 GNativeString&
operator +=(const char * str)2668 GNativeString::operator+= (const char *str)
2669 {
2670 return init(GStringRep::Native::create(*this,str));
2671 }
2672
2673 GNativeString&
operator +=(const GBaseString & str)2674 GNativeString::operator+= (const GBaseString &str)
2675 {
2676 return init(GStringRep::Native::create(*this,str));
2677 }
2678
2679 GNativeString
operator +(const GBaseString & s2) const2680 GNativeString::operator+(const GBaseString &s2) const
2681 { return GStringRep::Native::create(*this,s2); }
2682
2683 GNativeString
operator +(const GNativeString & s2) const2684 GNativeString::operator+(const GNativeString &s2) const
2685 { return GStringRep::Native::create(*this,s2); }
2686
2687 GNativeString
operator +(const char * s2) const2688 GNativeString::operator+(const char *s2) const
2689 { return GStringRep::Native::create(*this,s2); }
2690
2691 char *
getbuf(int n)2692 GNativeString::getbuf(int n)
2693 {
2694 if(ptr)
2695 init((*this)->getbuf(n));
2696 else if(n>0)
2697 init(GStringRep::Native::create(n));
2698 else
2699 init(0);
2700 return ptr?((*this)->data):0;
2701 }
2702
2703 void
setat(const int n,const char ch)2704 GNativeString::setat(const int n, const char ch)
2705 {
2706 if((!n)&&(!ptr))
2707 {
2708 init(GStringRep::Native::create(&ch,0,1));
2709 }else
2710 {
2711 init((*this)->setat(CheckSubscript(n),ch));
2712 }
2713 }
2714
2715 #endif
2716
2717
2718 #ifdef HAVE_NAMESPACES
2719 }
2720 # ifndef NOT_USING_DJVU_NAMESPACE
2721 using namespace DJVU;
2722 # endif
2723 #endif
2724