1 //C-  -*- C++ -*-
2 //C- -------------------------------------------------------------------
3 //C- DjVuLibre-3.5
4 //C- Copyright (c) 2002  Leon Bottou and Yann Le Cun.
5 //C- Copyright (c) 2001  AT&T
6 //C-
7 //C- This software is subject to, and may be distributed under, the
8 //C- GNU General Public License, either Version 2 of the license,
9 //C- or (at your option) any later version. The license should have
10 //C- accompanied the software or you may obtain a copy of the license
11 //C- from the Free Software Foundation at http://www.fsf.org .
12 //C-
13 //C- This program is distributed in the hope that it will be useful,
14 //C- but WITHOUT ANY WARRANTY; without even the implied warranty of
15 //C- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16 //C- GNU General Public License for more details.
17 //C-
18 //C- DjVuLibre-3.5 is derived from the DjVu(r) Reference Library from
19 //C- Lizardtech Software.  Lizardtech Software has authorized us to
20 //C- replace the original DjVu(r) Reference Library notice by the following
21 //C- text (see doc/lizard2002.djvu and doc/lizardtech2007.djvu):
22 //C-
23 //C-  ------------------------------------------------------------------
24 //C- | DjVu (r) Reference Library (v. 3.5)
25 //C- | Copyright (c) 1999-2001 LizardTech, Inc. All Rights Reserved.
26 //C- | The DjVu Reference Library is protected by U.S. Pat. No.
27 //C- | 6,058,214 and patents pending.
28 //C- |
29 //C- | This software is subject to, and may be distributed under, the
30 //C- | GNU General Public License, either Version 2 of the license,
31 //C- | or (at your option) any later version. The license should have
32 //C- | accompanied the software or you may obtain a copy of the license
33 //C- | from the Free Software Foundation at http://www.fsf.org .
34 //C- |
35 //C- | The computer code originally released by LizardTech under this
36 //C- | license and unmodified by other parties is deemed "the LIZARDTECH
37 //C- | ORIGINAL CODE."  Subject to any third party intellectual property
38 //C- | claims, LizardTech grants recipient a worldwide, royalty-free,
39 //C- | non-exclusive license to make, use, sell, or otherwise dispose of
40 //C- | the LIZARDTECH ORIGINAL CODE or of programs derived from the
41 //C- | LIZARDTECH ORIGINAL CODE in compliance with the terms of the GNU
42 //C- | General Public License.   This grant only confers the right to
43 //C- | infringe patent claims underlying the LIZARDTECH ORIGINAL CODE to
44 //C- | the extent such infringement is reasonably necessary to enable
45 //C- | recipient to make, have made, practice, sell, or otherwise dispose
46 //C- | of the LIZARDTECH ORIGINAL CODE (or portions thereof) and not to
47 //C- | any greater extent that may be necessary to utilize further
48 //C- | modifications or combinations.
49 //C- |
50 //C- | The LIZARDTECH ORIGINAL CODE is provided "AS IS" WITHOUT WARRANTY
51 //C- | OF ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
52 //C- | TO ANY WARRANTY OF NON-INFRINGEMENT, OR ANY IMPLIED WARRANTY OF
53 //C- | MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE.
54 //C- +------------------------------------------------------------------
55 
56 // From: Leon Bottou, 1/31/2002
57 // This file has very little to do with my initial implementation.
58 // It has been practically rewritten by Lizardtech for i18n changes.
59 // My original implementation was very small in comparison
60 // <http://prdownloads.sourceforge.net/djvu/DjVu2_2b-src.tgz>.
61 // In my opinion, the duplication of the string classes is a failed
62 // attempt to use the type system to enforce coding policies.
63 // This could be fixed.  But there are better things to do in djvulibre.
64 
65 #ifdef HAVE_CONFIG_H
66 # include "config.h"
67 #endif
68 #if NEED_GNUG_PRAGMAS
69 # pragma implementation
70 #endif
71 
72 #include "GString.h"
73 #include "GThreads.h"
74 #include "debug.h"
75 
76 #include <stddef.h>
77 #include <stdlib.h>
78 #include <stdio.h>
79 #include <string.h>
80 #if HAS_WCHAR
81 # include <locale.h>
82 # if !defined(AUTOCONF) || HAVE_WCHAR_H
83 #  include <wchar.h>
84 # endif
85 # if HAS_WCTYPE
86 #  include <wctype.h>
87 # endif
88 #endif
89 #include <ctype.h>
90 
91 #ifndef LC_NUMERIC          //MingW
92 # undef DO_CHANGELOCALE
93 # define LC_NUMERIC 0
94 #endif
95 #ifndef DO_CHANGELOCALE
96 # define DO_CHANGELOCALE 0
97 #endif
98 
99 
100 #ifdef HAVE_NAMESPACES
101 namespace DJVU {
102 # ifdef NOT_DEFINED // Just to fool emacs c++ mode
103 }
104 #endif
105 #endif
106 
107 
~GBaseString()108 GBaseString::~GBaseString() {}
~GNativeString()109 GNativeString::~GNativeString() {}
~GUTF8String()110 GUTF8String::~GUTF8String() {}
111 
112 #if !HAS_MBSTATE && HAS_WCHAR
113 // Under some systems, wctomb() and mbtowc() are not thread
114 // safe.  In those cases, wcrtomb and mbrtowc are preferred.
115 // For Solaris, wctomb() and mbtowc() are thread safe, and
116 // wcrtomb() and mbrtowc() don't exist.
117 
118 #define wcrtomb MYwcrtomb
119 #define mbrtowc MYmbrtowc
120 #define mbrlen  MYmbrlen
121 
122 static inline int
wcrtomb(char * bytes,wchar_t w,mbstate_t *)123 wcrtomb(char *bytes,wchar_t w,mbstate_t *)
124 {
125   return wctomb(bytes,w);
126 }
127 
128 static inline int
mbrtowc(wchar_t * w,const char * source,size_t n,mbstate_t *)129 mbrtowc(wchar_t *w,const char *source, size_t n, mbstate_t *)
130 {
131   return mbtowc(w,source,n);
132 }
133 
134 static inline size_t
mbrlen(const char * s,size_t n,mbstate_t *)135 mbrlen(const char *s, size_t n, mbstate_t *)
136 {
137   return mblen(s,n);
138 }
139 #endif // !HAS_MBSTATE || HAS_WCHAR
140 
141 
142 GP<GStringRep>
upcase(void) const143 GStringRep::upcase(void) const
144 { return tocase(giswupper,gtowupper); }
145 
146 GP<GStringRep>
downcase(void) const147 GStringRep::downcase(void) const
148 { return tocase(giswlower,gtowlower); }
149 
150 GP<GStringRep>
create(const unsigned int sz)151 GStringRep::UTF8::create(const unsigned int sz)
152 {
153   return GStringRep::create(sz,(GStringRep::UTF8 *)0);
154 }
155 
156 GP<GStringRep>
create(const char * s)157 GStringRep::UTF8::create(const char *s)
158 {
159   GStringRep::UTF8 dummy;
160   return dummy.strdup(s);
161 }
162 
163 GP<GStringRep>
create(const GP<GStringRep> & s1,const GP<GStringRep> & s2)164 GStringRep::UTF8::create(const GP<GStringRep> &s1,const GP<GStringRep> &s2)
165 {
166   GStringRep::UTF8 dummy;
167   return dummy.concat(s1,s2);
168 }
169 
170 GP<GStringRep>
create(const GP<GStringRep> & s1,const char * s2)171 GStringRep::UTF8::create( const GP<GStringRep> &s1,const char *s2)
172 {
173   GStringRep::UTF8 dummy;
174   return dummy.concat(s1,s2);
175 }
176 
177 GP<GStringRep>
create(const char * s1,const GP<GStringRep> & s2)178 GStringRep::UTF8::create( const char *s1, const GP<GStringRep> &s2)
179 {
180   GStringRep::UTF8 dummy;
181   return dummy.concat(s1,s2);
182 }
183 
184 GP<GStringRep>
create(const char * s1,const char * s2)185 GStringRep::UTF8::create( const char *s1,const char *s2)
186 {
187   GStringRep::UTF8 dummy;
188   return dummy.concat(s1,s2);
189 }
190 
191 GP<GStringRep>
create(const char * s,const int start,const int length)192 GStringRep::UTF8::create(const char *s,const int start,const int length)
193 {
194   GStringRep::UTF8 dummy;
195   return dummy.substr(s,start,length);
196 }
197 
198 GP<GStringRep>
create(const uint16_t * s,const int start,const int length)199 GStringRep::UTF8::create(
200   const uint16_t *s,const int start,const int length)
201 {
202   GStringRep::UTF8 dummy;
203   return dummy.substr(s,start,length);
204 }
205 
206 GP<GStringRep>
create(const uint32_t * s,const int start,const int length)207 GStringRep::UTF8::create(
208   const uint32_t *s,const int start,const int length)
209 {
210   GStringRep::UTF8 dummy;
211   return dummy.substr(s,start,length);
212 }
213 
214 GP<GStringRep>
blank(const unsigned int sz) const215 GStringRep::UTF8::blank(const unsigned int sz) const
216 {
217    return GStringRep::create(sz,(GStringRep::UTF8 *)0);
218 }
219 
220 bool
isUTF8(void) const221 GStringRep::UTF8::isUTF8(void) const
222 {
223   return true;
224 }
225 
226 GP<GStringRep>
toThis(const GP<GStringRep> & rep,const GP<GStringRep> &) const227 GStringRep::UTF8::toThis(
228     const GP<GStringRep> &rep,const GP<GStringRep> &) const
229 {
230   return rep?(rep->toUTF8(true)):rep;
231 }
232 
233 GP<GStringRep>
create(const char fmt[],va_list & args)234 GStringRep::UTF8::create(const char fmt[],va_list& args)
235 {
236   const GP<GStringRep> s(create(fmt));
237   return (s?(s->vformat(args)):s);
238 }
239 
240 #if !HAS_WCHAR
241 
242 #define NATIVE_CREATE(x) UTF8::create( x );
243 
244 #ifdef LC_ALL
245 #undef LC_ALL
246 #endif
247 #define LC_ALL 0
248 
249 class GStringRep::ChangeLocale
250 {
251 public:
ChangeLocale(const int,const char *)252   ChangeLocale(const int,const char *) {}
~ChangeLocale()253   ~ChangeLocale() {};
254 };
255 
256 GP<GStringRep>
NativeToUTF8(const char * s)257 GStringRep::NativeToUTF8( const char *s )
258 {
259   return GStringRep::UTF8::create(s);
260 }
261 
262 #else
263 
264 #define NATIVE_CREATE(x) Native::create( x );
265 
266 // The declaration and implementation of GStringRep::ChangeLocale
267 // Not used in WinCE
268 
269 class GStringRep::ChangeLocale
270 {
271 public:
272   ChangeLocale(const int category,const char locale[]);
273   ~ChangeLocale();
274 private:
275   GUTF8String locale;
276 #if DO_CHANGELOCALE
277   int category;
278 #endif
279 };
280 
281 class GStringRep::Native : public GStringRep
282 {
283 public:
284   // default constructor
285   Native(void);
286   // virtual destructor
287   virtual ~Native();
288 
289     // Other virtual methods.
290       // Create an empty string.
291   virtual GP<GStringRep> blank(const unsigned int sz = 0) const;
292       // Append a string.
293   virtual GP<GStringRep> append(const GP<GStringRep> &s2) const;
294       // Test if Native.
295   virtual bool isNative(void) const;
296       // Convert to Native.
297   virtual GP<GStringRep> toNative(
298     const EscapeMode escape=UNKNOWN_ESCAPED) const;
299       // Convert to UTF8.
300   virtual GP<GStringRep> toUTF8(const bool nothrow=false) const;
301       // Convert to UTF8.
302   virtual GP<GStringRep> toThis(
303      const GP<GStringRep> &rep,const GP<GStringRep> &) const;
304       // Compare with #s2#.
305   virtual int cmp(const GP<GStringRep> &s2, const int len=(-1)) const;
306 
307   // Convert strings to numbers.
308   virtual int toInt(void) const;
309   virtual long toLong(
310     const int pos, int &endpos, const int base=10) const;
311   virtual unsigned long toULong(
312     const int pos, int &endpos, const int base=10) const;
313   virtual double toDouble(
314     const int pos, int &endpos) const;
315 
316     // Create an empty string
317   static GP<GStringRep> create(const unsigned int sz = 0);
318 
319     // Create a strdup string.
320   static GP<GStringRep> create(const char *s);
321 
322   // Creates by appending to the current string
323 
324    // Creates with a concat operation.
325   static GP<GStringRep> create(
326     const GP<GStringRep> &s1,const GP<GStringRep> &s2);
327   static GP<GStringRep> create( const GP<GStringRep> &s1,const char *s2);
328   static GP<GStringRep> create( const char *s1, const GP<GStringRep> &s2);
329   static GP<GStringRep> create(const char *s1,const char *s2);
330 
331     // Create with a strdup and substr operation.
332   static GP<GStringRep> create(
333     const char *s,const int start,const int length=(-1));
334   static GP<GStringRep> create(
335     const uint16_t *s,const int start,const int length=(-1));
336   static GP<GStringRep> create(
337     const uint32_t *s,const int start,const int length=(-1));
338 
339     // Create with an sprintf()
340   static GP<GStringRep> create_format(const char fmt[],...);
341   static GP<GStringRep> create(const char fmt[],va_list &args);
342 
343   virtual unsigned char *UCS4toString(
344     const uint32_t w,unsigned char *ptr, mbstate_t *ps=0) const;
345 
346   // Tests if a string is legally encoded in the current character set.
347   virtual bool is_valid(void) const;
348 
349   virtual int ncopy(wchar_t * const buf, const int buflen) const;
350 
351   friend class GBaseString;
352 protected:
353   // Return the next character and increment the source pointer.
354   virtual uint32_t getValidUCS4(const char *&source) const;
355 };
356 
357 GP<GStringRep>
create(const unsigned int sz)358 GStringRep::Native::create(const unsigned int sz)
359 {
360   return GStringRep::create(sz,(GStringRep::Native *)0);
361 }
362 
363     // Create a strdup string.
364 GP<GStringRep>
create(const char * s)365 GStringRep::Native::create(const char *s)
366 {
367   GStringRep::Native dummy;
368   return dummy.strdup(s);
369 }
370 
371 GP<GStringRep>
create(const GP<GStringRep> & s1,const GP<GStringRep> & s2)372 GStringRep::Native::create(const GP<GStringRep> &s1,const GP<GStringRep> &s2)
373 {
374   GStringRep::Native dummy;
375   return dummy.concat(s1,s2);
376 }
377 
378 GP<GStringRep>
create(const GP<GStringRep> & s1,const char * s2)379 GStringRep::Native::create( const GP<GStringRep> &s1,const char *s2)
380 {
381   GStringRep::Native dummy;
382   return dummy.concat(s1,s2);
383 }
384 
385 GP<GStringRep>
create(const char * s1,const GP<GStringRep> & s2)386 GStringRep::Native::create( const char *s1, const GP<GStringRep> &s2)
387 {
388   GStringRep::Native dummy;
389   return dummy.concat(s1,s2);
390 }
391 
392 GP<GStringRep>
create(const char * s1,const char * s2)393 GStringRep::Native::create(const char *s1,const char *s2)
394 {
395   GStringRep::Native dummy;
396   return dummy.concat(s1,s2);
397 }
398 
399 GP<GStringRep>
create(const char * s,const int start,const int length)400 GStringRep::Native::create(
401   const char *s,const int start,const int length)
402 {
403   GStringRep::Native dummy;
404   return dummy.substr(s,start,length);
405 }
406 
407 GP<GStringRep>
create(const uint16_t * s,const int start,const int length)408 GStringRep::Native::create(
409     const uint16_t *s,const int start,const int length)
410 {
411   GStringRep::Native dummy;
412   return dummy.substr(s,start,length);
413 }
414 
415 GP<GStringRep>
create(const uint32_t * s,const int start,const int length)416 GStringRep::Native::create(
417   const uint32_t *s,const int start,const int length)
418 {
419   GStringRep::Native dummy;
420   return dummy.substr(s,start,length);
421 }
422 
423 GP<GStringRep>
blank(const unsigned int sz) const424 GStringRep::Native::blank(const unsigned int sz) const
425 {
426   return GStringRep::create(sz,(GStringRep::Native *)0);
427 }
428 
429 bool
isNative(void) const430 GStringRep::Native::isNative(void) const
431 {
432   return true;
433 }
434 
435 GP<GStringRep>
toThis(const GP<GStringRep> & rep,const GP<GStringRep> &) const436 GStringRep::Native::toThis(
437      const GP<GStringRep> &rep,const GP<GStringRep> &) const
438 {
439   return rep?(rep->toNative(NOT_ESCAPED)):rep;
440 }
441 
442 GP<GStringRep>
create(const char fmt[],va_list & args)443 GStringRep::Native::create(const char fmt[],va_list &args)
444 {
445   const GP<GStringRep> s(create(fmt));
446   return (s?(s->vformat(args)):s);
447 }
448 
449 int
ncopy(wchar_t * const buf,const int buflen) const450 GStringRep::Native::ncopy(
451   wchar_t * const buf, const int buflen ) const
452 {
453   return toUTF8()->ncopy(buf,buflen);
454 }
455 
ChangeLocale(const int xcategory,const char xlocale[])456 GStringRep::ChangeLocale::ChangeLocale(const int xcategory, const char xlocale[] )
457 #if DO_CHANGELOCALE
458   : category(xcategory)
459 #endif
460 {
461 #if DO_CHANGELOCALE
462   // This is disabled under UNIX because
463   // it does not play nice with MT.
464   if(xlocale)
465     {
466       locale=setlocale(xcategory,0);
467       if(locale.length() &&(locale!=xlocale))
468         {
469           if(locale == setlocale(category,xlocale))
470             {
471               locale.empty();
472             }
473         }
474       else
475         {
476           locale.empty();
477         }
478     }
479 #endif
480 }
481 
~ChangeLocale()482 GStringRep::ChangeLocale::~ChangeLocale()
483 {
484 #if DO_CHANGELOCALE
485   if(locale.length())
486     {
487       setlocale(category,(const char *)locale);
488     }
489 #endif
490 }
491 
492 GNativeString &
format(const char fmt[],...)493 GNativeString::format(const char fmt[], ... )
494 {
495   va_list args;
496   va_start(args, fmt);
497   return init(GStringRep::Native::create(fmt,args));
498 }
499 
500 // Gather the native implementations here. Not used in WinCE.
501 
Native(void)502 GStringRep::Native::Native(void) {}
~Native()503 GStringRep::Native::~Native() {}
504 
505 GP<GStringRep>
append(const GP<GStringRep> & s2) const506 GStringRep::Native::append(const GP<GStringRep> &s2) const
507 {
508   GP<GStringRep> retval;
509   if(s2)
510   {
511     if(s2->isUTF8())
512     {
513       G_THROW( ERR_MSG("GStringRep.appendUTF8toNative") );
514     }
515     retval=concat(data,s2->data);
516   }else
517   {
518     retval=const_cast<GStringRep::Native *>(this);
519   }
520   return retval;
521 }
522 
523 GP<GStringRep>
create_format(const char fmt[],...)524 GStringRep::Native::create_format(const char fmt[],...)
525 {
526   va_list args;
527   va_start(args, fmt);
528   return create(fmt,args);
529 }
530 
531 unsigned char *
UCS4toString(const uint32_t w0,unsigned char * ptr,mbstate_t * ps) const532 GStringRep::Native::UCS4toString(
533   const uint32_t w0,unsigned char *ptr, mbstate_t *ps) const
534 {
535   return UCS4toNative(w0,ptr,ps);
536 }
537 
538 // Convert a UCS4 to a multibyte string in the value bytes.
539 // The data pointed to by ptr should be long enough to contain
540 // the results with a nill termination.  (Normally 7 characters
541 // is enough.)
542 unsigned char *
UCS4toNative(const uint32_t w0,unsigned char * ptr,mbstate_t * ps)543 GStringRep::UCS4toNative(const uint32_t w0,unsigned char *ptr, mbstate_t *ps)
544 {
545   uint16_t w1;
546   uint16_t w2=1;
547   for(int count=(sizeof(wchar_t)==sizeof(w1))
548         ? UCS4toUTF16(w0,w1,w2) : 1;
549       count;
550       --count,w1=w2)
551     {
552       // wchar_t can be either UCS4 or UCS2
553       const wchar_t w=(sizeof(wchar_t) == sizeof(w1))?(wchar_t)w1:(wchar_t)w0;
554       int i=wcrtomb((char *)ptr,w,ps);
555       if(i<0)
556         break;
557       ptr[i]=0;
558       ptr += i;
559     }
560   ptr[0]=0;
561   return ptr;
562 }
563 
564 GP<GStringRep>
toNative(const EscapeMode escape) const565 GStringRep::Native::toNative(const EscapeMode escape) const
566 {
567   if(escape == UNKNOWN_ESCAPED)
568     G_THROW( ERR_MSG("GStringRep.NativeToNative") );
569   return const_cast<GStringRep::Native *>(this);
570 }
571 
572 GP<GStringRep>
toUTF8(const bool) const573 GStringRep::Native::toUTF8(const bool) const
574 {
575   unsigned char *buf;
576   GPBuffer<unsigned char> gbuf(buf,size*6+1);
577   buf[0]=0;
578   if(data && size)
579   {
580     size_t n=size;
581     const char *source=data;
582     mbstate_t ps;
583     unsigned char *ptr=buf;
584     //(void)mbrlen(source, n, &ps);
585     memset(&ps,0,sizeof(mbstate_t));
586     int i=0;
587     if(sizeof(wchar_t) == sizeof(uint32_t))
588       {
589         wchar_t w = 0;
590         for(;(n>0)&&((i=mbrtowc(&w,source,n,&ps))>=0); n-=i,source+=i)
591           {
592             ptr=UCS4toUTF8((uint32_t)w,ptr);
593           }
594       }
595     else
596       {
597         wchar_t w = 0;
598         for(;(n>0)&&((i=mbrtowc(&w,source,n,&ps))>=0);n-=i,source+=i)
599           {
600             uint16_t s[2];
601             s[0]=w;
602             uint32_t w0;
603             if(UTF16toUCS4(w0,s,s+1)<=0)
604               {
605                 source+=i;
606                 n-=i;
607                 if((n>0)&&((i=mbrtowc(&w,source,n,&ps))>=0))
608                   {
609                     s[1]=w;
610                     if(UTF16toUCS4(w0,s,s+2)<=0)
611                       {
612                         i=(-1);
613                         break;
614                       }
615                   }
616                 else
617                   {
618                     i=(-1);
619                     break;
620                   }
621               }
622             ptr=UCS4toUTF8(w0,ptr);
623           }
624       }
625     if(i<0)
626       {
627         gbuf.resize(0);
628       }
629     else
630       {
631         ptr[0]=0;
632       }
633   }
634   return GStringRep::UTF8::create((const char *)buf);
635 }
636 
637 GNativeString
UTF8ToNative(const bool currentlocale,const EscapeMode escape) const638 GBaseString::UTF8ToNative(
639   const bool currentlocale,const EscapeMode escape) const
640 {
641   const char *source=(*this);
642   GP<GStringRep> retval;
643   if(source && source[0])
644     {
645 #if DO_CHANGELOCALE
646       GUTF8String lc_ctype(setlocale(LC_CTYPE,0));
647       bool repeat;
648       for(repeat=!currentlocale;;repeat=false)
649         {
650 #endif
651           retval=(*this)->toNative((GStringRep::EscapeMode)escape);
652 #if DO_CHANGELOCALE
653           if (!repeat || retval || (lc_ctype == setlocale(LC_CTYPE,"")))
654             break;
655         }
656       if(!repeat)
657         setlocale(LC_CTYPE,(const char *)lc_ctype);
658 #endif
659     }
660   return GNativeString(retval);
661 }
662 
663 /*MBCS*/
664 GNativeString
getUTF82Native(EscapeMode escape) const665 GBaseString::getUTF82Native( EscapeMode escape ) const
666 { //MBCS cvt
667   GNativeString retval;
668 
669   // We don't want to convert this if it
670   // already is known to be native...
671 //  if (isNative()) return *this;
672 
673   const size_t slen=length()+1;
674   if(slen>1)
675   {
676     retval=UTF8ToNative(false,escape) ;
677     if(!retval.length())
678     {
679       retval=(const char*)*this;
680     }
681   }
682   return retval;
683 }
684 
685 GUTF8String
NativeToUTF8(void) const686 GBaseString::NativeToUTF8(void) const
687 {
688   GP<GStringRep> retval;
689   if(length())
690   {
691     const char *source=(*this);
692 #if DO_CHANGELOCALE
693     GUTF8String lc_ctype=setlocale(LC_CTYPE,0);
694     bool repeat;
695     for(repeat=true;;repeat=false)
696       {
697 #endif
698         if( (retval=GStringRep::NativeToUTF8(source)) )
699           if(GStringRep::cmp(retval->toNative(),source))
700             retval=GStringRep::UTF8::create((unsigned int)0);
701 #if DO_CHANGELOCALE
702         if(!repeat || retval || (lc_ctype == setlocale(LC_CTYPE,"")))
703           break;
704       }
705     if(!repeat)
706       setlocale(LC_CTYPE,(const char *)lc_ctype);
707 #endif
708   }
709   return GUTF8String(retval);
710 }
711 
712 GUTF8String
getNative2UTF8(void) const713 GBaseString::getNative2UTF8(void) const
714 { //MBCS cvt
715 
716    // We don't want to do a transform this
717    // if we already are in the given type.
718 //   if (isUTF8()) return *this;
719 
720   const size_t slen=length()+1;
721   GUTF8String retval;
722   if(slen > 1)
723   {
724     retval=NativeToUTF8();
725     if(!retval.length())
726     {
727       retval=(const char *)(*this);
728     }
729   }
730   return retval;
731 } /*MBCS*/
732 
733 int
cmp(const GP<GStringRep> & s2,const int len) const734 GStringRep::Native::cmp(const GP<GStringRep> &s2,const int len) const
735 {
736   int retval;
737   if(s2)
738   {
739     if(s2->isUTF8())
740     {
741       const GP<GStringRep> r(toUTF8(true));
742       if(r)
743       {
744         retval=GStringRep::cmp(r->data,s2->data,len);
745       }else
746       {
747         retval=cmp(s2->toNative(NOT_ESCAPED),len);
748       }
749     }else
750     {
751       retval=GStringRep::cmp(data,s2->data,len);
752     }
753   }else
754   {
755     retval=GStringRep::cmp(data,0,len);
756   }
757   return retval;
758 }
759 
760 int
toInt() const761 GStringRep::Native::toInt() const
762 {
763   return atoi(data);
764 }
765 
766 long
toLong(const int pos,int & endpos,const int base) const767 GStringRep::Native::toLong(
768   const int pos, int &endpos, const int base) const
769 {
770    char *edata=0;
771    const long retval=strtol(data+pos, &edata, base);
772    if(edata)
773    {
774      endpos=(int)((size_t)edata-(size_t)data);
775    }else
776    {
777      endpos=(-1);
778    }
779    return retval;
780 }
781 
782 unsigned long
toULong(const int pos,int & endpos,const int base) const783 GStringRep::Native::toULong(
784   const int pos, int &endpos, const int base) const
785 {
786    char *edata=0;
787    const unsigned long retval=strtoul(data+pos, &edata, base);
788    if(edata)
789    {
790      endpos=(int)((size_t)edata-(size_t)data);
791    }else
792    {
793      endpos=(-1);
794    }
795    return retval;
796 }
797 
798 double
toDouble(const int pos,int & endpos) const799 GStringRep::Native::toDouble(
800   const int pos, int &endpos) const
801 {
802    char *edata=0;
803    const double retval=strtod(data+pos, &edata);
804    if(edata)
805    {
806      endpos=(int)((size_t)edata-(size_t)data);
807    }else
808    {
809      endpos=(-1);
810    }
811    return retval;
812 }
813 
814 uint32_t
getValidUCS4(const char * & source) const815 GStringRep::Native::getValidUCS4(const char *&source) const
816 {
817   uint32_t retval=0;
818   int n=(int)((size_t)size+(size_t)data-(size_t)source);
819   if(source && (n > 0))
820   {
821     mbstate_t ps;
822     //(void)mbrlen(source, n, &ps);
823     memset(&ps,0,sizeof(mbstate_t));
824     wchar_t wt;
825     const int len=mbrtowc(&wt,source,n,&ps);
826     if(len>=0)
827     {
828       if(sizeof(wchar_t) == sizeof(uint16_t))
829       {
830         source+=len;
831         uint16_t s[2];
832         s[0]=(uint16_t)wt;
833         if(UTF16toUCS4(retval,s,s+1)<=0)
834         {
835           if((n-=len)>0)
836           {
837             const int len=mbrtowc(&wt,source,n,&ps);
838             if(len>=0)
839             {
840               s[1]=(uint16_t)wt;
841               uint32_t w;
842               if(UTF16toUCS4(w,s,s+2)>0)
843               {
844                 source+=len;
845                 retval=w;
846               }
847             }
848           }
849         }
850       }else
851       {
852         retval=(uint32_t)wt;
853         source++;
854       }
855     }else
856     {
857       source++;
858     }
859   }
860   return retval;
861 }
862 
863 // Tests if a string is legally encoded in the current character set.
864 bool
is_valid(void) const865 GStringRep::Native::is_valid(void) const
866 {
867   bool retval=true;
868   if(data && size)
869   {
870     size_t n=size;
871     const char *s=data;
872     mbstate_t ps;
873     //(void)mbrlen(s, n, &ps);
874     memset(&ps,0,sizeof(mbstate_t));
875     do
876     {
877       size_t m=mbrlen(s,n,&ps);
878       if(m > n)
879       {
880         retval=false;
881         break;
882       }else if(m)
883       {
884         s+=m;
885         n-=m;
886       }else
887       {
888         break;
889       }
890     } while(n);
891   }
892   return retval;
893 }
894 
895 // These are dummy functions.
896 void
set_remainder(void const * const,const unsigned int,const EncodeType)897 GStringRep::set_remainder(void const * const, const unsigned int,
898   const EncodeType) {}
899 void
set_remainder(void const * const,const unsigned int,const GP<GStringRep> & encoding)900 GStringRep::set_remainder(void const * const, const unsigned int,
901   const GP<GStringRep> &encoding) {}
902 void
set_remainder(const GP<GStringRep::Unicode> &)903 GStringRep::set_remainder( const GP<GStringRep::Unicode> &) {}
904 
905 GP<GStringRep::Unicode>
get_remainder(void) const906 GStringRep::get_remainder( void ) const
907 {
908   return 0;
909 }
910 
GNativeString(const char dat)911 GNativeString::GNativeString(const char dat)
912 {
913   init(GStringRep::Native::create(&dat,0,1));
914 }
915 
GNativeString(const char * str)916 GNativeString::GNativeString(const char *str)
917 {
918   init(GStringRep::Native::create(str));
919 }
920 
GNativeString(const unsigned char * str)921 GNativeString::GNativeString(const unsigned char *str)
922 {
923   init(GStringRep::Native::create((const char *)str));
924 }
925 
GNativeString(const uint16_t * str)926 GNativeString::GNativeString(const uint16_t *str)
927 {
928   init(GStringRep::Native::create(str,0,-1));
929 }
930 
GNativeString(const uint32_t * str)931 GNativeString::GNativeString(const uint32_t *str)
932 {
933   init(GStringRep::Native::create(str,0,-1));
934 }
935 
GNativeString(const char * dat,unsigned int len)936 GNativeString::GNativeString(const char *dat, unsigned int len)
937 {
938   init(
939     GStringRep::Native::create(dat,0,((int)len<0)?(-1):(int)len));
940 }
941 
GNativeString(const uint16_t * dat,unsigned int len)942 GNativeString::GNativeString(const uint16_t *dat, unsigned int len)
943 {
944   init(
945     GStringRep::Native::create(dat,0,((int)len<0)?(-1):(int)len));
946 }
947 
GNativeString(const uint32_t * dat,unsigned int len)948 GNativeString::GNativeString(const uint32_t *dat, unsigned int len)
949 {
950   init(
951     GStringRep::Native::create(dat,0,((int)len<0)?(-1):(int)len));
952 }
953 
GNativeString(const GNativeString & str)954 GNativeString::GNativeString(const GNativeString &str)
955 {
956   init(str);
957 }
958 
GNativeString(const GBaseString & gs,int from,int len)959 GNativeString::GNativeString(const GBaseString &gs, int from, int len)
960 {
961   init(
962     GStringRep::Native::create(gs,from,((int)len<0)?(-1):(int)len));
963 }
964 
GNativeString(const int number)965 GNativeString::GNativeString(const int number)
966 {
967   init(GStringRep::Native::create_format("%d",number));
968 }
969 
GNativeString(const double number)970 GNativeString::GNativeString(const double number)
971 {
972   init(GStringRep::Native::create_format("%f",number));
973 }
974 
975 GNativeString&
operator =(const char str)976 GNativeString::operator= (const char str)
977 { return init(GStringRep::Native::create(&str,0,1)); }
978 
979 GNativeString&
operator =(const char * str)980 GNativeString::operator= (const char *str)
981 { return init(GStringRep::Native::create(str)); }
982 
983 GNativeString
operator +(const GNativeString & s2) const984 GBaseString::operator+(const GNativeString &s2) const
985 {
986   return GStringRep::Native::create(*this,s2);
987 }
988 
989 GP<GStringRep>
NativeToUTF8(const char * s)990 GStringRep::NativeToUTF8( const char *s )
991 {
992   return GStringRep::Native::create(s)->toUTF8();
993 }
994 
995 #endif // HAS_WCHAR
996 
997 template <class TYPE>
998 GP<GStringRep>
create(const unsigned int sz,TYPE *)999 GStringRep::create(const unsigned int sz, TYPE *)
1000 {
1001   GP<GStringRep> gaddr;
1002   if (sz > 0)
1003   {
1004     GStringRep *addr;
1005     gaddr=(addr=new TYPE);
1006     addr->data=(char *)(::operator new(sz+1));
1007     addr->size = sz;
1008     addr->data[sz] = 0;
1009   }
1010   return gaddr;
1011 }
1012 
1013 GP<GStringRep>
strdup(const char * s) const1014 GStringRep::strdup(const char *s) const
1015 {
1016   GP<GStringRep> retval;
1017   const int length=s?strlen(s):0;
1018   if(length>0)
1019   {
1020     retval=blank(length);
1021     char const * const end=s+length;
1022     char *ptr=retval->data;
1023     for(;*s&&(s!=end);ptr++)
1024     {
1025       ptr[0]=s++[0];
1026     }
1027     ptr[0]=0;
1028   }
1029   return retval;
1030 }
1031 
1032 GP<GStringRep>
substr(const char * s,const int start,const int len) const1033 GStringRep::substr(const char *s,const int start,const int len) const
1034 {
1035   GP<GStringRep> retval;
1036   if(s && s[0])
1037   {
1038     const unsigned int length=(start<0 || len<0)?(unsigned int)strlen(s):(unsigned int)(-1);
1039     const char *startptr, *endptr;
1040     if(start<0)
1041     {
1042       startptr=s+length+start;
1043       if(startptr<s)
1044         startptr=s;
1045     }else
1046     {
1047       startptr=s;
1048       for(const char * const ptr=s+start;(startptr<ptr)&&*startptr;++startptr)
1049         EMPTY_LOOP;
1050     }
1051     if(len<0)
1052     {
1053       if(s+length+1 < startptr+len)
1054       {
1055         endptr=startptr;
1056       }else
1057       {
1058         endptr=s+length+1+len;
1059       }
1060     }else
1061     {
1062       endptr=startptr;
1063       for(const char * const ptr=startptr+len;(endptr<ptr)&&*endptr;++endptr)
1064         EMPTY_LOOP;
1065     }
1066     if(endptr>startptr)
1067     {
1068       retval=blank((size_t)(endptr-startptr));
1069       char *data=retval->data;
1070       for(; (startptr<endptr) && *startptr; ++startptr,++data)
1071       {
1072         data[0]=startptr[0];
1073       }
1074       data[0]=0;
1075     }
1076   }
1077   return retval;
1078 }
1079 
1080 GP<GStringRep>
substr(const uint16_t * s,const int start,const int len) const1081 GStringRep::substr(const uint16_t *s,const int start,const int len) const
1082 {
1083   GP<GStringRep> retval;
1084   if(s && s[0])
1085   {
1086     uint16_t const *eptr;
1087     if(len<0)
1088     {
1089       for(eptr=s;eptr[0];++eptr)
1090         EMPTY_LOOP;
1091     }else
1092     {
1093       eptr=&(s[len]);
1094     }
1095     s=&s[start];
1096     if((size_t)s<(size_t)eptr)
1097     {
1098       mbstate_t ps;
1099       memset(&ps,0,sizeof(mbstate_t));
1100       unsigned char *buf,*ptr;
1101       GPBuffer<unsigned char> gbuf(buf,(((size_t)eptr-(size_t)s)/2)*3+7);
1102       for(ptr=buf;s[0];)
1103       {
1104         uint32_t w;
1105         int i=UTF16toUCS4(w,s,eptr);
1106         if(i<=0)
1107           break;
1108         s+=i;
1109         ptr=UCS4toString(w,ptr,&ps);
1110       }
1111       ptr[0]=0;
1112       retval = strdup( (const char *)buf );
1113     }
1114   }
1115   return retval;
1116 }
1117 
1118 GP<GStringRep>
substr(const uint32_t * s,const int start,const int len) const1119 GStringRep::substr(const uint32_t *s,const int start,const int len) const
1120 {
1121   GP<GStringRep> retval;
1122   if(s && s[0])
1123   {
1124     uint32_t const *eptr;
1125     if(len<0)
1126     {
1127       for(eptr=s;eptr[0];++eptr)
1128         EMPTY_LOOP;
1129     }else
1130     {
1131       eptr=&(s[len]);
1132     }
1133     s=&s[start];
1134     if((size_t)s<(size_t)eptr)
1135     {
1136       mbstate_t ps;
1137       memset(&ps,0,sizeof(mbstate_t));
1138       unsigned char *buf,*ptr;
1139       GPBuffer<unsigned char> gbuf(buf,((((size_t)eptr-(size_t)s))/4)*6+7);
1140       for(ptr=buf;s[0];++s)
1141       {
1142         ptr=UCS4toString(s[0],ptr,&ps);
1143       }
1144       ptr[0]=0;
1145       retval = strdup( (const char *)buf );
1146     }
1147   }
1148   return retval;
1149 }
1150 
1151 GP<GStringRep>
append(const char * s2) const1152 GStringRep::append(const char *s2) const
1153 {
1154   GP<GStringRep> retval;
1155   if(s2)
1156   {
1157     retval=concat(data,s2);
1158   }else
1159   {
1160     retval=const_cast<GStringRep *>(this);
1161   }
1162   return retval;
1163 }
1164 
1165 GP<GStringRep>
append(const GP<GStringRep> & s2) const1166 GStringRep::UTF8::append(const GP<GStringRep> &s2) const
1167 {
1168   GP<GStringRep> retval;
1169   if(s2)
1170   {
1171     if(s2->isNative())
1172     {
1173       G_THROW( ERR_MSG("GStringRep.appendNativeToUTF8") );
1174     }
1175     retval=concat(data,s2->data);
1176   }else
1177   {
1178     retval=const_cast<GStringRep::UTF8 *>(this);
1179   }
1180   return retval;
1181 }
1182 
1183 GP<GStringRep>
concat(const char * s1,const char * s2) const1184 GStringRep::concat(const char *s1,const char *s2) const
1185 {
1186   const int length1=(s1?strlen(s1):0);
1187   const int length2=(s2?strlen(s2):0);
1188   const int length=length1+length2;
1189   GP<GStringRep> retval;
1190   if(length>0)
1191   {
1192     retval=blank(length);
1193     GStringRep &r=*retval;
1194     if(length1)
1195     {
1196       strcpy(r.data,s1);
1197       if(length2)
1198         strcat(r.data,s2);
1199     }else
1200     {
1201       strcpy(r.data,s2);
1202     }
1203   }
1204   return retval;
1205 }
1206 
1207 const char *GBaseString::nullstr = "";
1208 
1209 void
empty(void)1210 GBaseString::empty( void )
1211 {
1212   init(0);
1213 }
1214 
1215 GP<GStringRep>
getbuf(int n) const1216 GStringRep::getbuf(int n) const
1217 {
1218   GP<GStringRep> retval;
1219   if(n < 0)
1220     n=strlen(data);
1221   if(n >= 0)
1222   {
1223     retval=blank((n>0) ? n : 1);
1224     char *ndata=retval->data;
1225     strncpy(ndata,data,n);
1226     ndata[n]=0;
1227   }
1228   return retval;
1229 }
1230 
1231 const char *
isCharType(bool (* xiswtest)(const unsigned long wc),const char * ptr,const bool reverse) const1232 GStringRep::isCharType(bool (*xiswtest)(const unsigned long wc),
1233                        const char *ptr,
1234                        const bool reverse) const
1235 {
1236   const char *xptr = ptr;
1237   unsigned long w=getValidUCS4(xptr);
1238   if(ptr != xptr)
1239     {
1240       if (sizeof(wchar_t) == 2)
1241         w &= 0xffff;
1242       if (reverse ^ xiswtest(w))
1243         ptr = xptr;
1244     }
1245   return ptr;
1246 }
1247 
1248 int
nextCharType(bool (* xiswtest)(const unsigned long wc),const int from,const int len,const bool reverse) const1249 GStringRep::nextCharType(
1250   bool (*xiswtest)(const unsigned long wc), const int from, const int len,
1251   const bool reverse) const
1252 {
1253   // We want to return the position of the next
1254   // non white space starting from the #from#
1255   // location.  isspace should work in any locale
1256   // so we should only need to do this for the non-
1257   // native locales (UTF8)
1258   int retval;
1259   if(from<size)
1260   {
1261     retval=from;
1262     const char * ptr = data+from;
1263     for( const char * const eptr=ptr+((len<0)?(size-from):len);
1264       (ptr<eptr) && *ptr;)
1265     {
1266        // Skip characters that fail the isCharType test
1267       char const * const xptr=isCharType(xiswtest,ptr,!reverse);
1268       if(xptr == ptr)
1269         break;
1270       ptr=xptr;
1271     }
1272     retval=(int)((size_t)ptr-(size_t)data);
1273   }else
1274   {
1275     retval=size;
1276   }
1277   return retval;
1278 }
1279 
1280 bool
giswspace(const unsigned long w)1281 GStringRep::giswspace(const unsigned long w)
1282 {
1283 #if HAS_WCTYPE
1284   return !!iswspace((wchar_t)w);
1285 #else
1286   return (w & ~0xff) ? false : !!isspace((int)(w & 0xff));
1287 #endif
1288 }
1289 
1290 bool
giswupper(const unsigned long w)1291 GStringRep::giswupper(const unsigned long w)
1292 {
1293 #if HAS_WCTYPE
1294   return !!iswupper((wchar_t)w);
1295 #else
1296   return (w & ~0xff) ? false : !!isupper((int)(w & 0xff));
1297 #endif
1298 }
1299 
1300 bool
giswlower(const unsigned long w)1301 GStringRep::giswlower(const unsigned long w)
1302 {
1303 #if HAS_WCTYPE
1304   return !!iswlower((wchar_t)w);
1305 #else
1306   return (w & ~0xff) ? false : !!islower((int)(w & 0xff));
1307 #endif
1308 }
1309 
1310 unsigned long
gtowupper(const unsigned long w)1311 GStringRep::gtowupper(const unsigned long w)
1312 {
1313 #if HAS_WCTYPE
1314   return (unsigned long)towupper((wchar_t)w);
1315 #else
1316   return (w&~0xff) ? w : (unsigned long)toupper(w & 0xff);
1317 #endif
1318 }
1319 
1320 unsigned long
gtowlower(const unsigned long w)1321 GStringRep::gtowlower(const unsigned long w)
1322 {
1323 #if HAS_WCTYPE
1324   return (unsigned long)towlower((wchar_t)w);
1325 #else
1326   return (w&~0xff) ? w : (unsigned long)tolower(w & 0xff);
1327 #endif
1328 }
1329 
1330 GP<GStringRep>
tocase(bool (* xiswcase)(const unsigned long wc),unsigned long (* xtowcase)(const unsigned long wc)) const1331 GStringRep::tocase(
1332   bool (*xiswcase)(const unsigned long wc),
1333   unsigned long (*xtowcase)(const unsigned long wc)) const
1334 {
1335   GP<GStringRep> retval;
1336   char const * const eptr=data+size;
1337   char const *ptr=data;
1338   while(ptr<eptr)
1339   {
1340     char const * const xptr=isCharType(xiswcase,ptr,false);
1341     if(ptr == xptr)
1342       break;
1343     ptr=xptr;
1344   }
1345   if(ptr<eptr)
1346   {
1347     const int n=(int)((size_t)ptr-(size_t)data);
1348     unsigned char *buf;
1349     GPBuffer<unsigned char> gbuf(buf,n+(1+size-n)*6);
1350     if(n>0)
1351     {
1352       strncpy((char *)buf,data,n);
1353     }
1354     unsigned char *buf_ptr=buf+n;
1355     for(char const *ptr=data+n;ptr<eptr;)
1356     {
1357       char const * const xptr=ptr;
1358       const unsigned long w=getValidUCS4(ptr);
1359       if(ptr == xptr)
1360         break;
1361       if(xiswcase(w))
1362       {
1363         const int len=(int)((size_t)ptr-(size_t)xptr);
1364         strncpy((char *)buf_ptr,xptr,len);
1365         buf_ptr+=len;
1366       }else
1367       {
1368         mbstate_t ps;
1369         memset(&ps,0,sizeof(mbstate_t));
1370         buf_ptr=UCS4toString(xtowcase(w),buf_ptr,&ps);
1371       }
1372     }
1373     buf_ptr[0]=0;
1374     retval=substr((const char *)buf,0,(int)((size_t)buf_ptr-(size_t)buf));
1375   }else
1376   {
1377     retval=const_cast<GStringRep *>(this);
1378   }
1379   return retval;
1380 }
1381 
1382 // Returns a copy of this string with characters used in XML escaped as follows:
1383 //      '<'  -->  "&lt;"
1384 //      '>'  -->  "&gt;"
1385 //      '&'  -->  "&amp;"
1386 //      '\'' -->  "&apos;"
1387 //      '\"' -->  "&quot;"
1388 //  Also escapes characters 0x00 through 0x1f and 0x7e through 0x7f.
1389 GP<GStringRep>
toEscaped(const bool tosevenbit) const1390 GStringRep::toEscaped( const bool tosevenbit ) const
1391 {
1392   bool modified=false;
1393   char *ret;
1394   GPBuffer<char> gret(ret,size*7);
1395   ret[0]=0;
1396   char *retptr=ret;
1397   char const *start=data;
1398   char const *s=start;
1399   char const *last=s;
1400   GP<GStringRep> special;
1401   for(unsigned long w;(w=getValidUCS4(s));last=s)
1402     // Whoever wrote this for statement should be __complete_here__
1403   {
1404     char const *ss=0;
1405     switch(w)
1406     {
1407     case '<':
1408       ss="&lt;";
1409       break;
1410     case '>':
1411       ss="&gt;";
1412       break;
1413     case '&':
1414       ss="&amp;";
1415       break;
1416     case '\47':
1417       ss="&apos;";
1418       break;
1419     case '\42':
1420       ss="&quot;";
1421       break;
1422     default:
1423       if((w<' ')||(w>=0x7e && (tosevenbit || (w < 0x80))))
1424       {
1425         special=toThis(UTF8::create_format("&#%lu;",w));
1426         ss=special->data;
1427       }
1428       break;
1429     }
1430     if(ss)
1431     {
1432       modified=true;
1433       if(s!=start)
1434       {
1435         size_t len=(size_t)last-(size_t)start;
1436         strncpy(retptr,start,len);
1437         retptr+=len;
1438         start=s;
1439       }
1440       if(ss[0])
1441       {
1442         size_t len=strlen(ss);
1443         strcpy(retptr,ss);
1444         retptr+=len;
1445       }
1446     }
1447   }
1448   GP<GStringRep> retval;
1449   if(modified)
1450   {
1451     strcpy(retptr,start);
1452     retval=strdup( ret );
1453   }else
1454   {
1455     retval=const_cast<GStringRep *>(this);
1456   }
1457 //  DEBUG_MSG( "Escaped string is '" << ret << "'\n" );
1458   return retval;
1459 }
1460 
1461 
1462 static const GMap<GUTF8String,GUTF8String> &
BasicMap(void)1463 BasicMap( void )
1464 {
1465   static GMap<GUTF8String,GUTF8String> Basic;
1466   if (! Basic.size())
1467     {
1468       Basic["lt"]   = GUTF8String('<');
1469       Basic["gt"]   = GUTF8String('>');
1470       Basic["amp"]  = GUTF8String('&');
1471       Basic["apos"] = GUTF8String('\47');
1472       Basic["quot"] = GUTF8String('\42');
1473     }
1474   return Basic;
1475 }
1476 
1477 GUTF8String
fromEscaped(const GMap<GUTF8String,GUTF8String> ConvMap) const1478 GUTF8String::fromEscaped( const GMap<GUTF8String,GUTF8String> ConvMap ) const
1479 {
1480   GUTF8String ret;                  // Build output string here
1481   int start_locn = 0;           // Beginning of substring to skip
1482   int amp_locn;                 // Location of a found ampersand
1483 
1484   while( (amp_locn = search( '&', start_locn )) > -1 )
1485   {
1486       // Found the next apostrophe
1487       // Locate the closing semicolon
1488     const int semi_locn = search( ';', amp_locn );
1489       // No closing semicolon, exit and copy
1490       //  the rest of the string.
1491     if( semi_locn < 0 )
1492       break;
1493     ret += substr( start_locn, amp_locn - start_locn );
1494     int const len = semi_locn - amp_locn - 1;
1495     if(len)
1496     {
1497       GUTF8String key = substr( amp_locn+1, len);
1498       //DEBUG_MSG( "key = '" << key << "'\n" );
1499       char const * s=key;
1500       if( s[0] == '#')
1501       {
1502         unsigned long value;
1503         char *ptr=0;
1504         if(s[1] == 'x' || s[1] == 'X')
1505         {
1506           value=strtoul((char const *)(s+2),&ptr,16);
1507         }else
1508         {
1509           value=strtoul((char const *)(s+1),&ptr,10);
1510         }
1511         if(ptr)
1512         {
1513           unsigned char utf8char[7];
1514           unsigned char const * const end=GStringRep::UCS4toUTF8(value,utf8char);
1515           ret+=GUTF8String((char const *)utf8char,(size_t)end-(size_t)utf8char);
1516         }else
1517         {
1518           ret += substr( amp_locn, semi_locn - amp_locn + 1 );
1519         }
1520       }else
1521       {
1522         GPosition map_entry = ConvMap.contains( key );
1523         if( map_entry )
1524         {                           // Found in the conversion map, substitute
1525           ret += ConvMap[map_entry];
1526         } else
1527         {
1528           static const GMap<GUTF8String,GUTF8String> &Basic = BasicMap();
1529           GPosition map_entry = Basic.contains( key );
1530           if ( map_entry )
1531           {
1532             ret += Basic[map_entry];
1533           }else
1534           {
1535             ret += substr( amp_locn, len+2 );
1536           }
1537         }
1538       }
1539     }else
1540     {
1541       ret += substr( amp_locn, len+2 );
1542     }
1543     start_locn = semi_locn + 1;
1544 //    DEBUG_MSG( "ret = '" << ret << "'\n" );
1545   }
1546 
1547                                 // Copy the end of the string to the output
1548   ret += substr( start_locn, length()-start_locn );
1549 
1550 //  DEBUG_MSG( "Unescaped string is '" << ret << "'\n" );
1551   return (ret == *this)?(*this):ret;
1552 }
1553 
1554 GUTF8String
fromEscaped(void) const1555 GUTF8String::fromEscaped(void) const
1556 {
1557   const GMap<GUTF8String,GUTF8String> nill;
1558   return fromEscaped(nill);
1559 }
1560 
1561 GP<GStringRep>
setat(int n,char ch) const1562 GStringRep::setat(int n, char ch) const
1563 {
1564   GP<GStringRep> retval;
1565   if(n<0)
1566     n+=size;
1567   if (n < 0 || n>size)
1568     GBaseString::throw_illegal_subscript();
1569   if(ch == data[n])
1570   {
1571     retval=const_cast<GStringRep *>(this);
1572   }else if(!ch)
1573   {
1574     retval=getbuf(n);
1575   }else
1576   {
1577     retval=getbuf((n<size)?size:n);
1578     retval->data[n]=ch;
1579     if(n == size)
1580       retval->data[n+1]=0;
1581   }
1582   return retval;
1583 }
1584 
1585 #if defined(AUTOCONF) && defined(HAVE_VSNPRINTF)
1586 # define USE_VSNPRINTF vsnprintf
1587 #elif defined(_WIN32) && !defined(__CYGWIN32__)
1588 # define USE_VSNPRINTF _vsnprintf
1589 #elif defined(linux)
1590 # define USE_VSNPRINTF vsnprintf
1591 #endif
1592 
1593 GUTF8String &
format(const char fmt[],...)1594 GUTF8String::format(const char fmt[], ... )
1595 {
1596   va_list args;
1597   va_start(args, fmt);
1598   return init(GStringRep::UTF8::create(fmt,args));
1599 }
1600 
1601 GP<GStringRep>
create_format(const char fmt[],...)1602 GStringRep::UTF8::create_format(const char fmt[],...)
1603 {
1604   va_list args;
1605   va_start(args, fmt);
1606   return create(fmt,args);
1607 }
1608 
1609 GP<GStringRep>
vformat(va_list args) const1610 GStringRep::vformat(va_list args) const
1611 {
1612   GP<GStringRep> retval;
1613   if(size)
1614   {
1615     char const * const fmt=data;
1616     int buflen=32768;
1617     char *buffer;
1618     GPBuffer<char> gbuffer(buffer,buflen);
1619     ChangeLocale locale(LC_NUMERIC,(isNative()?0:"C"));
1620     // Format string
1621 #ifdef USE_VSNPRINTF
1622     while(USE_VSNPRINTF(buffer, buflen, fmt, args)<0)
1623       {
1624         gbuffer.resize(0);
1625         gbuffer.resize(buflen+32768);
1626       }
1627     va_end(args);
1628 #else
1629     buffer[buflen-1] = 0;
1630     vsprintf(buffer, fmt, args);
1631     va_end(args);
1632     if (buffer[buflen-1])
1633       {
1634         // This isn't as fatal since it is on the stack, but we
1635         // definitely should stop the current operation.
1636         G_THROW( ERR_MSG("GString.overwrite") );
1637       }
1638 #endif
1639     retval=strdup((const char *)buffer);
1640   }
1641   // Go altering the string
1642   return retval;
1643 }
1644 
1645 int
search(char c,int from) const1646 GStringRep::search(char c, int from) const
1647 {
1648   if (from<0)
1649     from += size;
1650   int retval=(-1);
1651   if (from>=0 && from<size)
1652   {
1653     char const *const s = strchr(data+from,c);
1654     if(s)
1655       retval=(int)((size_t)s-(size_t)data);
1656   }
1657   return retval;
1658 }
1659 
1660 int
search(char const * ptr,int from) const1661 GStringRep::search(char const *ptr, int from) const
1662 {
1663   if(from<0)
1664   {
1665     from+=size;
1666     if(from<0)
1667       G_THROW( ERR_MSG("GString.bad_subscript") );
1668   }
1669   int retval=(-1);
1670   if (from>=0 && from<size)
1671   {
1672     char const *const s = strstr(data+from,ptr);
1673     if(s)
1674       retval=(int)((size_t)s-(size_t)data);
1675   }
1676   return retval;
1677 }
1678 
1679 int
rsearch(char c,int from) const1680 GStringRep::rsearch(char c, int from) const
1681 {
1682   if(from<0)
1683   {
1684     from+=size;
1685     if(from<0)
1686       G_THROW( ERR_MSG("GString.bad_subscript") );
1687   }
1688   int retval=(-1);
1689   if ((from>=0) && (from<size))
1690   {
1691     char const *const s = strrchr(data+from,c);
1692     if(s)
1693       retval=(int)((size_t)s-(size_t)data);
1694   }
1695   return retval;
1696 }
1697 
1698 int
rsearch(char const * ptr,int from) const1699 GStringRep::rsearch(char const *ptr, int from) const
1700 {
1701   if(from<0)
1702   {
1703     from+=size;
1704     if(from<0)
1705       G_THROW( ERR_MSG("GString.bad_subscript") );
1706   }
1707   int retval=(-1);
1708   for(int loc=from;(loc=search(ptr,loc)) >= 0;++loc)
1709     retval=loc;
1710   return retval;
1711 }
1712 
1713 int
contains(const char accept[],int from) const1714 GStringRep::contains(const char accept[],int from) const
1715 {
1716   if(from<0)
1717   {
1718     from+=size;
1719     if(from<0)
1720       G_THROW( ERR_MSG("GString.bad_subscript") );
1721   }
1722   int retval=(-1);
1723   if (accept && accept[0] && from>=0 && from<size)
1724   {
1725     char const * const src = data+from;
1726     char const *ptr=strpbrk(src,accept);
1727     if(ptr)
1728     {
1729       retval=(int)(ptr-src)+from;
1730     }
1731   }
1732   return retval;
1733 }
1734 
1735 int
rcontains(const char accept[],int from) const1736 GStringRep::rcontains(const char accept[],int from) const
1737 {
1738   int retval=(-1);
1739   while((from=contains(accept,from)) >= 0)
1740   {
1741     retval=from++;
1742   }
1743   return retval;
1744 }
1745 
1746 bool
is_int(void) const1747 GBaseString::is_int(void) const
1748 {
1749   bool isLong=!!ptr;
1750   if(isLong)
1751   {
1752     int endpos;
1753     (*this)->toLong(0,endpos);
1754     if(endpos>=0)
1755     {
1756       isLong=((*this)->nextNonSpace(endpos) == (int)length());
1757     }
1758   }
1759   return isLong;
1760 }
1761 
1762 bool
is_float(void) const1763 GBaseString::is_float(void) const
1764 {
1765   bool isDouble=!!ptr;
1766   if(isDouble)
1767   {
1768     int endpos;
1769     (*this)->toDouble(0,endpos);
1770     if(endpos>=0)
1771     {
1772       isDouble=((*this)->nextNonSpace(endpos) == (int)length());
1773     }
1774   }
1775   return isDouble;
1776 }
1777 
1778 unsigned int
hash(const GBaseString & str)1779 hash(const GBaseString &str)
1780 {
1781   unsigned int x = 0;
1782   const char *s = (const char*)str;
1783   while (*s)
1784     x = x ^ (x<<6) ^ (unsigned char)(*s++);
1785   return x;
1786 }
1787 
1788 void
throw_illegal_subscript()1789 GBaseString::throw_illegal_subscript()
1790 {
1791   G_THROW( ERR_MSG("GString.bad_subscript") );
1792 }
1793 
1794 unsigned char *
UCS4toString(const uint32_t w0,unsigned char * ptr,mbstate_t *) const1795 GStringRep::UTF8::UCS4toString(
1796   const uint32_t w0,unsigned char *ptr, mbstate_t *) const
1797 {
1798   return UCS4toUTF8(w0,ptr);
1799 }
1800 
1801 int
ncopy(wchar_t * const buf,const int buflen) const1802 GStringRep::UTF8::ncopy(wchar_t * const buf, const int buflen ) const
1803 {
1804   int retval=(-1);
1805   if(buf && buflen)
1806     {
1807       buf[0]=0;
1808       if(data[0])
1809 	{
1810           const size_t length=strlen(data);
1811           const unsigned char * const eptr=(const unsigned char *)(data+length);
1812 	  wchar_t *r=buf;
1813 	  wchar_t const * const rend=buf+buflen;
1814           for(const unsigned char *s=(const unsigned char *)data;
1815               (r<rend)&&(s<eptr)&&*s;)
1816             {
1817               const uint32_t w0=UTF8toUCS4(s,eptr);
1818               uint16_t w1;
1819               uint16_t w2=1;
1820               for(int count=(sizeof(wchar_t)==sizeof(w1))
1821                     ?UCS4toUTF16(w0,w1,w2):1;
1822                   count&&(r<rend);
1823                   --count,w1=w2,++r)
1824 		{
1825 		  r[0]=(sizeof(wchar_t) == sizeof(w1))?(wchar_t)w1:(wchar_t)w0;
1826 		}
1827             }
1828 	  if(r<rend)
1829             {
1830               r[0]=0;
1831               retval=((size_t)r-(size_t)buf)/sizeof(wchar_t);
1832             }
1833 	}
1834       else
1835 	{
1836 	  retval=0;
1837 	}
1838     }
1839   return retval;
1840 }
1841 
1842 GP<GStringRep>
toNative(const EscapeMode escape) const1843 GStringRep::UTF8::toNative(const EscapeMode escape) const
1844 {
1845   GP<GStringRep> retval;
1846   if(data[0])
1847   {
1848     const size_t length=strlen(data);
1849     const unsigned char * const eptr=(const unsigned char *)(data+length);
1850     unsigned char *buf;
1851     GPBuffer<unsigned char> gbuf(buf,12*length+12);
1852     unsigned char *r=buf;
1853     mbstate_t ps;
1854     memset(&ps,0,sizeof(mbstate_t));
1855     for(const unsigned char *s=(const unsigned char *)data;(s<eptr)&& *s;)
1856       {
1857         const unsigned char * const s0 = s;
1858         const uint32_t w0=UTF8toUCS4(s,eptr);
1859         if (s == s0)
1860           {
1861             s += 1;
1862             *r++ = '?';
1863           }
1864         else
1865           {
1866             const unsigned char * const r0 = r;
1867             r=UCS4toNative(w0,r,&ps);
1868             if(r == r0)
1869               {
1870                 if (escape == IS_ESCAPED)
1871                   {
1872                     sprintf((char *)r,"&#%lu;",(unsigned long)w0);
1873                     r += strlen((char *)r);
1874                   }
1875                 else
1876                   {
1877                     *r++ = '?';
1878                   }
1879               }
1880           }
1881       }
1882     r[0]=0;
1883     retval = NATIVE_CREATE( (const char *)buf );
1884   }
1885   else
1886   {
1887     retval = NATIVE_CREATE( (unsigned int)0 );
1888   }
1889   return retval;
1890 }
1891 
1892 GP<GStringRep>
toUTF8(const bool nothrow) const1893 GStringRep::UTF8::toUTF8(const bool nothrow) const
1894 {
1895   if(!nothrow)
1896     G_THROW( ERR_MSG("GStringRep.UTF8ToUTF8") );
1897   return const_cast<GStringRep::UTF8 *>(this);
1898 }
1899 
1900 // Tests if a string is legally encoded in the current character set.
1901 bool
is_valid(void) const1902 GStringRep::UTF8::is_valid(void) const
1903 {
1904   bool retval=true;
1905   if(data && size)
1906   {
1907     const unsigned char * const eptr=(const unsigned char *)(data+size);
1908     for(const unsigned char *s=(const unsigned char *)data;(s<eptr)&& *s;)
1909     {
1910       const unsigned char * const r=s;
1911       (void)UTF8toUCS4(s,eptr);
1912       if(r == s)
1913       {
1914         retval=false;
1915         break;
1916       }
1917     }
1918   }
1919   return retval;
1920 }
1921 
1922 static inline uint32_t
add_char(uint32_t const U,unsigned char const * const r)1923 add_char(uint32_t const U, unsigned char const * const r)
1924 {
1925   uint32_t const C=r[0];
1926   return ((C|0x3f) == 0xbf)?((U<<6)|(C&0x3f)):0;
1927 }
1928 
1929 uint32_t
UTF8toUCS4(unsigned char const * & s,void const * const eptr)1930 GStringRep::UTF8toUCS4(
1931   unsigned char const *&s,void const * const eptr)
1932 {
1933   uint32_t U=0;
1934   unsigned char const *r=s;
1935   if(r < eptr)
1936   {
1937     uint32_t const C1=r++[0];
1938     if(C1&0x80)
1939     {
1940       if(r < eptr)
1941       {
1942         U=C1;
1943         if((U=((C1&0x40)?add_char(U,r++):0)))
1944         {
1945           if(C1&0x20)
1946           {
1947             if(r < eptr)
1948             {
1949               if((U=add_char(U,r++)))
1950               {
1951                 if(C1&0x10)
1952                 {
1953                   if(r < eptr)
1954                   {
1955                     if((U=add_char(U,r++)))
1956                     {
1957                       if(C1&0x8)
1958                       {
1959                         if(r < eptr)
1960                         {
1961                           if((U=add_char(U,r++)))
1962                           {
1963                             if(C1&0x4)
1964                             {
1965                               if(r < eptr)
1966                               {
1967                                 if((U=((!(C1&0x2))?(add_char(U,r++)&0x7fffffff):0)))
1968                                 {
1969                                   s=r;
1970                                 }else
1971                                 {
1972                                   U=(unsigned int)(-1)-s++[0];
1973                                 }
1974                               }else
1975                               {
1976                                 U=0;
1977                               }
1978                             }else if((U=((U&0x4000000)?0:(U&0x3ffffff))))
1979                             {
1980                               s=r;
1981                             }
1982                           }else
1983                           {
1984                             U=(unsigned int)(-1)-s++[0];
1985                           }
1986                         }else
1987                         {
1988                           U=0;
1989                         }
1990                       }else if((U=((U&0x200000)?0:(U&0x1fffff))))
1991                       {
1992                         s=r;
1993                       }
1994                     }else
1995                     {
1996                       U=(unsigned int)(-1)-s++[0];
1997                     }
1998                   }else
1999                   {
2000                     U=0;
2001                   }
2002                 }else if((U=((U&0x10000)?0:(U&0xffff))))
2003                 {
2004                   s=r;
2005                 }
2006               }else
2007               {
2008                 U=(unsigned int)(-1)-s++[0];
2009               }
2010             }else
2011             {
2012               U=0;
2013             }
2014           }else if((U=((U&0x800)?0:(U&0x7ff))))
2015           {
2016             s=r;
2017           }
2018         }else
2019         {
2020           U=(unsigned int)(-1)-s++[0];
2021         }
2022       }else
2023       {
2024         U=0;
2025       }
2026     }else if((U=C1))
2027     {
2028       s=r;
2029     }
2030   }
2031   return U;
2032 }
2033 
2034 unsigned char *
UCS4toUTF8(const uint32_t w,unsigned char * ptr)2035 GStringRep::UCS4toUTF8(const uint32_t w,unsigned char *ptr)
2036 {
2037   if(w <= 0x7f)
2038   {
2039     *ptr++ = (unsigned char)w;
2040   }
2041   else if(w <= 0x7ff)
2042   {
2043     *ptr++ = (unsigned char)((w>>6)|0xC0);
2044     *ptr++ = (unsigned char)((w|0x80)&0xBF);
2045   }
2046   else if(w <= 0xFFFF)
2047   {
2048     *ptr++ = (unsigned char)((w>>12)|0xE0);
2049     *ptr++ = (unsigned char)(((w>>6)|0x80)&0xBF);
2050     *ptr++ = (unsigned char)((w|0x80)&0xBF);
2051   }
2052   else if(w <= 0x1FFFFF)
2053   {
2054     *ptr++ = (unsigned char)((w>>18)|0xF0);
2055     *ptr++ = (unsigned char)(((w>>12)|0x80)&0xBF);
2056     *ptr++ = (unsigned char)(((w>>6)|0x80)&0xBF);
2057     *ptr++ = (unsigned char)((w|0x80)&0xBF);
2058   }
2059   else if(w <= 0x3FFFFFF)
2060   {
2061     *ptr++ = (unsigned char)((w>>24)|0xF8);
2062     *ptr++ = (unsigned char)(((w>>18)|0x80)&0xBF);
2063     *ptr++ = (unsigned char)(((w>>12)|0x80)&0xBF);
2064     *ptr++ = (unsigned char)(((w>>6)|0x80)&0xBF);
2065     *ptr++ = (unsigned char)((w|0x80)&0xBF);
2066   }
2067   else if(w <= 0x7FFFFFFF)
2068   {
2069     *ptr++ = (unsigned char)((w>>30)|0xFC);
2070     *ptr++ = (unsigned char)(((w>>24)|0x80)&0xBF);
2071     *ptr++ = (unsigned char)(((w>>18)|0x80)&0xBF);
2072     *ptr++ = (unsigned char)(((w>>12)|0x80)&0xBF);
2073     *ptr++ = (unsigned char)(((w>>6)|0x80)&0xBF);
2074     *ptr++ = (unsigned char)((w|0x80)&0xBF);
2075   }
2076   else
2077   {
2078     *ptr++ = '?';
2079   }
2080   return ptr;
2081 }
2082 
2083    // Creates with a concat operation.
2084 GP<GStringRep>
concat(const char * s1,const GP<GStringRep> & s2) const2085 GStringRep::concat( const char *s1, const GP<GStringRep> &s2) const
2086 {
2087   GP<GStringRep> retval;
2088   if(s2)
2089   {
2090     retval=toThis(s2);
2091     if(s1 && s1[0])
2092     {
2093       if(retval)
2094       {
2095         retval=concat(s1,retval->data);
2096       }else
2097       {
2098         retval=strdup(s1);
2099       }
2100     }
2101   }else if(s1 && s1[0])
2102   {
2103     retval=strdup(s1);
2104   }
2105   return retval;
2106 }
2107 
2108    // Creates with a concat operation.
2109 
2110 GP<GStringRep>
concat(const GP<GStringRep> & s1,const char * s2) const2111 GStringRep::concat( const GP<GStringRep> &s1,const char *s2) const
2112 {
2113   GP<GStringRep> retval;
2114   if(s1)
2115   {
2116     retval=toThis(s1);
2117     if(s2 && s2[0])
2118     {
2119       if(retval)
2120       {
2121         retval=retval->append(s2);
2122       }else
2123       {
2124         retval=strdup(s2);
2125       }
2126     }
2127   }else if(s2 && s2[0])
2128   {
2129     retval=strdup(s2);
2130   }
2131   return retval;
2132 }
2133 
2134 GP<GStringRep>
concat(const GP<GStringRep> & s1,const GP<GStringRep> & s2) const2135 GStringRep::concat(const GP<GStringRep> &s1,const GP<GStringRep> &s2) const
2136 {
2137   GP<GStringRep> retval;
2138   if(s1)
2139   {
2140     retval=toThis(s1,s2);
2141     if(retval && s2)
2142     {
2143       retval=retval->append(toThis(s2));
2144     }
2145   }else if(s2)
2146   {
2147     retval=toThis(s2);
2148   }
2149   return retval;
2150 }
2151 
GStringRep(void)2152 GStringRep::GStringRep(void)
2153 {
2154   size=0;
2155   data=0;
2156 }
2157 
~GStringRep()2158 GStringRep::~GStringRep()
2159 {
2160   if(data)
2161   {
2162     data[0]=0;
2163     ::operator delete(data);
2164   }
2165   data=0;
2166 }
2167 
UTF8(void)2168 GStringRep::UTF8::UTF8(void) {}
2169 
~UTF8()2170 GStringRep::UTF8::~UTF8() {}
2171 
2172 int
cmp(const char * s1,const int len) const2173 GStringRep::cmp(const char *s1,const int len) const
2174 {
2175   return cmp(data,s1,len);
2176 }
2177 
2178 int
cmp(const char * s1,const char * s2,const int len)2179 GStringRep::cmp(const char *s1, const char *s2,const int len)
2180 {
2181   return (len
2182    ?((s1&&s1[0])
2183       ?((s2&&s2[0])
2184         ?((len>0)
2185           ?strncmp(s1,s2,len)
2186           :strcmp(s1,s2))
2187         :1)
2188       :((s2&&s2[0])?(-1):0))
2189    :0);
2190 }
2191 
2192 int
cmp(const GP<GStringRep> & s1,const GP<GStringRep> & s2,const int len)2193 GStringRep::cmp(const GP<GStringRep> &s1, const GP<GStringRep> &s2,
2194   const int len )
2195 {
2196   return (s1?(s1->cmp(s2,len)):cmp(0,(s2?(s2->data):0),len));
2197 }
2198 
2199 int
cmp(const GP<GStringRep> & s1,const char * s2,const int len)2200 GStringRep::cmp(const GP<GStringRep> &s1, const char *s2,
2201   const int len )
2202 {
2203   return cmp((s1?s1->data:0),s2,len);
2204 }
2205 
2206 int
cmp(const char * s1,const GP<GStringRep> & s2,const int len)2207 GStringRep::cmp(const char *s1, const GP<GStringRep> &s2,
2208   const int len )
2209 {
2210   return cmp(s1,(s2?(s2->data):0),len);
2211 }
2212 
2213 int
cmp(const GP<GStringRep> & s2,const int len) const2214 GStringRep::UTF8::cmp(const GP<GStringRep> &s2,const int len) const
2215 {
2216   int retval;
2217   if(s2)
2218   {
2219     if(s2->isNative())
2220     {
2221       GP<GStringRep> r(s2->toUTF8(true));
2222       if(r)
2223       {
2224         retval=GStringRep::cmp(data,r->data,len);
2225       }else
2226       {
2227         retval=-(s2->cmp(toNative(NOT_ESCAPED),len));
2228       }
2229     }else
2230     {
2231       retval=GStringRep::cmp(data,s2->data,len);
2232     }
2233   }else
2234   {
2235     retval=GStringRep::cmp(data,0,len);
2236   }
2237   return retval;
2238 }
2239 
2240 int
toInt() const2241 GStringRep::UTF8::toInt() const
2242 {
2243   int endpos;
2244   return (int)toLong(0,endpos);
2245 }
2246 
2247 static inline long
Cstrtol(char * data,char ** edata,const int base)2248 Cstrtol(char *data,char **edata, const int base)
2249 {
2250   GStringRep::ChangeLocale locale(LC_NUMERIC,"C");
2251   while (data && *data==' ') data++;
2252   return strtol(data,edata,base);
2253 }
2254 
2255 long
toLong(const int pos,int & endpos,const int base) const2256 GStringRep::UTF8::toLong(
2257   const int pos, int &endpos, const int base) const
2258 {
2259   char *edata=0;
2260   long retval=Cstrtol(data+pos,&edata, base);
2261   if(edata)
2262   {
2263     endpos=edata-data;
2264   }else
2265   {
2266     GP<GStringRep> ptr = GStringRep::UTF8::create();
2267     endpos=(-1);
2268     ptr=ptr->strdup(data+pos);
2269     if(ptr)
2270       ptr=ptr->toNative(NOT_ESCAPED);
2271     if(ptr)
2272     {
2273       int xendpos;
2274       retval=ptr->toLong(0,xendpos,base);
2275       if(xendpos> 0)
2276       {
2277         endpos=(int)size;
2278         ptr=ptr->strdup(data+xendpos);
2279         if(ptr)
2280         {
2281           ptr=ptr->toUTF8(true);
2282           if(ptr)
2283           {
2284             endpos-=(int)(ptr->size);
2285           }
2286         }
2287       }
2288     }
2289   }
2290   return retval;
2291 }
2292 
2293 static inline unsigned long
Cstrtoul(char * data,char ** edata,const int base)2294 Cstrtoul(char *data,char **edata, const int base)
2295 {
2296   GStringRep::ChangeLocale locale(LC_NUMERIC,"C");
2297   while (data && *data==' ') data++;
2298   return strtoul(data,edata,base);
2299 }
2300 
2301 unsigned long
toULong(const int pos,int & endpos,const int base) const2302 GStringRep::UTF8::toULong(
2303   const int pos, int &endpos, const int base) const
2304 {
2305   char *edata=0;
2306   unsigned long retval=Cstrtoul(data+pos,&edata, base);
2307   if(edata)
2308   {
2309     endpos=edata-data;
2310   }else
2311   {
2312     GP<GStringRep> ptr = GStringRep::UTF8::create();
2313     endpos=(-1);
2314     ptr=ptr->strdup(data+pos);
2315     if(ptr)
2316       ptr=ptr->toNative(NOT_ESCAPED);
2317     if(ptr)
2318     {
2319       int xendpos;
2320       retval=ptr->toULong(0,xendpos,base);
2321       if(xendpos> 0)
2322       {
2323         endpos=(int)size;
2324         ptr=ptr->strdup(data+xendpos);
2325         if(ptr)
2326         {
2327           ptr=ptr->toUTF8(true);
2328           if(ptr)
2329           {
2330             endpos-=(int)(ptr->size);
2331           }
2332         }
2333       }
2334     }
2335   }
2336   return retval;
2337 }
2338 
2339 static inline double
Cstrtod(char * data,char ** edata)2340 Cstrtod(char *data,char **edata)
2341 {
2342   GStringRep::ChangeLocale locale(LC_NUMERIC,"C");
2343   while (data && *data==' ') data++;
2344   return strtod(data,edata);
2345 }
2346 
2347 double
toDouble(const int pos,int & endpos) const2348 GStringRep::UTF8::toDouble(const int pos, int &endpos) const
2349 {
2350   char *edata=0;
2351   double retval=Cstrtod(data+pos,&edata);
2352   if(edata)
2353   {
2354     endpos=edata-data;
2355   }else
2356   {
2357     GP<GStringRep> ptr = GStringRep::UTF8::create();
2358     endpos=(-1);
2359     ptr=ptr->strdup(data+pos);
2360     if(ptr)
2361       ptr=ptr->toNative(NOT_ESCAPED);
2362     if(ptr)
2363     {
2364       int xendpos;
2365       retval=ptr->toDouble(0,xendpos);
2366       if(xendpos >= 0)
2367       {
2368         endpos=(int)size;
2369         ptr=ptr->strdup(data+xendpos);
2370         if(ptr)
2371         {
2372           ptr=ptr->toUTF8(true);
2373           if(ptr)
2374           {
2375             endpos-=(int)(ptr->size);
2376           }
2377         }
2378       }
2379     }
2380   }
2381   return retval;
2382 }
2383 
2384 int
getUCS4(uint32_t & w,const int from) const2385 GStringRep::getUCS4(uint32_t &w, const int from) const
2386 {
2387   int retval;
2388   if(from>=size)
2389   {
2390     w=0;
2391     retval=size;
2392   }else if(from<0)
2393   {
2394     w=(unsigned int)(-1);
2395     retval=(-1);
2396   }else
2397   {
2398     const char *source=data+from;
2399     w=getValidUCS4(source);
2400     retval=(int)((size_t)source-(size_t)data);
2401   }
2402   return retval;
2403 }
2404 
2405 
2406 uint32_t
getValidUCS4(const char * & source) const2407 GStringRep::UTF8::getValidUCS4(const char *&source) const
2408 {
2409   return GStringRep::UTF8toUCS4((const unsigned char *&)source,data+size);
2410 }
2411 
2412 int
nextNonSpace(const int from,const int len) const2413 GStringRep::nextNonSpace(const int from,const int len) const
2414 {
2415   return nextCharType(giswspace,from,len,true);
2416 }
2417 
2418 int
nextSpace(const int from,const int len) const2419 GStringRep::nextSpace(const int from,const int len) const
2420 {
2421   return nextCharType(giswspace,from,len,false);
2422 }
2423 
2424 int
nextChar(const int from) const2425 GStringRep::nextChar(const int from) const
2426 {
2427   char const * xptr=data+from;
2428   (void)getValidUCS4(xptr);
2429   return (int)((size_t)xptr-(size_t)data);
2430 }
2431 
2432 int
firstEndSpace(int from,const int len) const2433 GStringRep::firstEndSpace(int from,const int len) const
2434 {
2435   const int xsize=(len<0)?size:(from+len);
2436   const int ysize=(size<xsize)?size:xsize;
2437   int retval=ysize;
2438   while(from<ysize)
2439   {
2440     from=nextNonSpace(from,ysize-from);
2441     if(from < size)
2442     {
2443       const int r=nextSpace(from,ysize-from);
2444       // If a character isn't legal, then it will return
2445       // tru for both nextSpace and nextNonSpace.
2446       if(r == from)
2447       {
2448         from++;
2449       }else
2450       {
2451         from=retval=r;
2452       }
2453     }
2454   }
2455   return retval;
2456 }
2457 
2458 int
UCS4toUTF16(const uint32_t w,uint16_t & w1,uint16_t & w2)2459 GStringRep::UCS4toUTF16(
2460   const uint32_t w,uint16_t &w1, uint16_t &w2)
2461 {
2462   int retval;
2463   if(w<0x10000)
2464   {
2465     w1=(uint16_t)w;
2466     w2=0;
2467     retval=1;
2468   }else
2469   {
2470     w1=(uint16_t)((((w-0x10000)>>10)&0x3ff)+0xD800);
2471     w2=(uint16_t)((w&0x3ff)+0xDC00);
2472     retval=2;
2473   }
2474   return retval;
2475 }
2476 
2477 int
UTF16toUCS4(uint32_t & U,uint16_t const * const s,void const * const eptr)2478 GStringRep::UTF16toUCS4(
2479   uint32_t &U,uint16_t const * const s,void const * const eptr)
2480 {
2481   int retval=0;
2482   U=0;
2483   uint16_t const * const r=s+1;
2484   if(r <= eptr)
2485   {
2486     uint32_t const W1=s[0];
2487     if((W1<0xD800)||(W1>0xDFFF))
2488     {
2489       if((U=W1))
2490       {
2491         retval=1;
2492       }
2493     }else if(W1<=0xDBFF)
2494     {
2495       uint16_t const * const rr=r+1;
2496       if(rr <= eptr)
2497       {
2498         uint32_t const W2=s[1];
2499         if(((W2>=0xDC00)||(W2<=0xDFFF))&&((U=(0x10000+((W1&0x3ff)<<10))|(W2&0x3ff))))
2500         {
2501           retval=2;
2502         }else
2503         {
2504           retval=(-1);
2505         }
2506       }
2507     }
2508   }
2509   return retval;
2510 }
2511 
2512 
2513 //bcr
2514 
2515 GUTF8String&
operator +=(char ch)2516 GUTF8String::operator+= (char ch)
2517 {
2518   return init(
2519     GStringRep::UTF8::create((const char*)*this,
2520     GStringRep::UTF8::create(&ch,0,1)));
2521 }
2522 
2523 GUTF8String&
operator +=(const char * str)2524 GUTF8String::operator+= (const char *str)
2525 {
2526   return init(GStringRep::UTF8::create(*this,str));
2527 }
2528 
2529 GUTF8String&
operator +=(const GBaseString & str)2530 GUTF8String::operator+= (const GBaseString &str)
2531 {
2532   return init(GStringRep::UTF8::create(*this,str));
2533 }
2534 
2535 GUTF8String
substr(int from,int len) const2536 GUTF8String::substr(int from, int len) const
2537 { return GUTF8String(*this, from, len); }
2538 
2539 GUTF8String
operator +(const GBaseString & s2) const2540 GUTF8String::operator+(const GBaseString &s2) const
2541 { return GStringRep::UTF8::create(*this,s2); }
2542 
2543 GUTF8String
operator +(const GUTF8String & s2) const2544 GUTF8String::operator+(const GUTF8String &s2) const
2545 { return GStringRep::UTF8::create(*this,s2); }
2546 
2547 GUTF8String
operator +(const char * s2) const2548 GUTF8String::operator+(const char    *s2) const
2549 { return GStringRep::UTF8::create(*this,s2); }
2550 
2551 char *
getbuf(int n)2552 GUTF8String::getbuf(int n)
2553 {
2554   if(ptr)
2555     init((*this)->getbuf(n));
2556   else if(n>0)
2557     init(GStringRep::UTF8::create(n));
2558   else
2559     init(0);
2560   return ptr?((*this)->data):0;
2561 }
2562 
2563 void
setat(const int n,const char ch)2564 GUTF8String::setat(const int n, const char ch)
2565 {
2566   if((!n)&&(!ptr))
2567   {
2568     init(GStringRep::UTF8::create(&ch,0,1));
2569   }else
2570   {
2571     init((*this)->setat(CheckSubscript(n),ch));
2572   }
2573 }
2574 
2575 GP<GStringRep>
UTF8ToNative(const char * s,const EscapeMode escape)2576 GStringRep::UTF8ToNative( const char *s, const EscapeMode escape )
2577 {
2578   return GStringRep::UTF8::create(s)->toNative(escape);
2579 }
2580 
GUTF8String(const char dat)2581 GUTF8String::GUTF8String(const char dat)
2582 { init(GStringRep::UTF8::create(&dat,0,1)); }
2583 
GUTF8String(const GUTF8String & fmt,va_list & args)2584 GUTF8String::GUTF8String(const GUTF8String &fmt, va_list &args)
2585 {
2586   if (fmt.ptr)
2587     init(fmt->vformat(args));
2588   else
2589     init(fmt);
2590 }
2591 
GUTF8String(const char * str)2592 GUTF8String::GUTF8String(const char *str)
2593 { init(GStringRep::UTF8::create(str)); }
2594 
GUTF8String(const unsigned char * str)2595 GUTF8String::GUTF8String(const unsigned char *str)
2596 { init(GStringRep::UTF8::create((const char *)str)); }
2597 
GUTF8String(const uint16_t * str)2598 GUTF8String::GUTF8String(const uint16_t *str)
2599 { init(GStringRep::UTF8::create(str,0,-1)); }
2600 
GUTF8String(const uint32_t * str)2601 GUTF8String::GUTF8String(const uint32_t *str)
2602 { init(GStringRep::UTF8::create(str,0,-1)); }
2603 
GUTF8String(const char * dat,unsigned int len)2604 GUTF8String::GUTF8String(const char *dat, unsigned int len)
2605 { init(GStringRep::UTF8::create(dat,0,((int)len<0)?(-1):(int)len)); }
2606 
GUTF8String(const uint16_t * dat,unsigned int len)2607 GUTF8String::GUTF8String(const uint16_t *dat, unsigned int len)
2608 { init(GStringRep::UTF8::create(dat,0,((int)len<0)?(-1):(int)len)); }
2609 
GUTF8String(const uint32_t * dat,unsigned int len)2610 GUTF8String::GUTF8String(const uint32_t *dat, unsigned int len)
2611 { init(GStringRep::UTF8::create(dat,0,((int)len<0)?(-1):(int)len)); }
2612 
GUTF8String(const GBaseString & gs,int from,int len)2613 GUTF8String::GUTF8String(const GBaseString &gs, int from, int len)
2614 { init(GStringRep::UTF8::create(gs,from,((int)len<0)?(-1):(int)len)); }
2615 
GUTF8String(const int number)2616 GUTF8String::GUTF8String(const int number)
2617 { init(GStringRep::UTF8::create_format("%d",number)); }
2618 
GUTF8String(const double number)2619 GUTF8String::GUTF8String(const double number)
2620 { init(GStringRep::UTF8::create_format("%f",number)); }
2621 
operator =(const char str)2622 GUTF8String& GUTF8String::operator= (const char str)
2623 { return init(GStringRep::UTF8::create(&str,0,1)); }
2624 
operator =(const char * str)2625 GUTF8String& GUTF8String::operator= (const char *str)
2626 { return init(GStringRep::UTF8::create(str)); }
2627 
operator +(const GUTF8String & s2) const2628 GUTF8String GBaseString::operator+(const GUTF8String &s2) const
2629 { return GStringRep::UTF8::create(*this,s2); }
2630 
2631 #if HAS_WCHAR
2632 GUTF8String
operator +(const GUTF8String & s2) const2633 GNativeString::operator+(const GUTF8String &s2) const
2634 {
2635   if (ptr)
2636     return GStringRep::UTF8::create((*this)->toUTF8(true),s2);
2637   else
2638     return GStringRep::UTF8::create((*this),s2);
2639 }
2640 #endif
2641 
2642 GUTF8String
operator +(const GNativeString & s2) const2643 GUTF8String::operator+(const GNativeString &s2) const
2644 {
2645   GP<GStringRep> g = s2;
2646   if (s2.ptr)
2647     g = s2->toUTF8(true);
2648   return GStringRep::UTF8::create(*this,g);
2649 }
2650 
2651 GUTF8String
operator +(const char * s1,const GUTF8String & s2)2652 operator+(const char    *s1, const GUTF8String &s2)
2653 { return GStringRep::UTF8::create(s1,s2); }
2654 
2655 #if HAS_WCHAR
2656 GNativeString
operator +(const char * s1,const GNativeString & s2)2657 operator+(const char    *s1, const GNativeString &s2)
2658 { return GStringRep::Native::create(s1,s2); }
2659 
2660 GNativeString&
operator +=(char ch)2661 GNativeString::operator+= (char ch)
2662 {
2663   char s[2]; s[0]=ch; s[1]=0;
2664   return init(GStringRep::Native::create((const char*)*this, s));
2665 }
2666 
2667 GNativeString&
operator +=(const char * str)2668 GNativeString::operator+= (const char *str)
2669 {
2670   return init(GStringRep::Native::create(*this,str));
2671 }
2672 
2673 GNativeString&
operator +=(const GBaseString & str)2674 GNativeString::operator+= (const GBaseString &str)
2675 {
2676   return init(GStringRep::Native::create(*this,str));
2677 }
2678 
2679 GNativeString
operator +(const GBaseString & s2) const2680 GNativeString::operator+(const GBaseString &s2) const
2681 { return GStringRep::Native::create(*this,s2); }
2682 
2683 GNativeString
operator +(const GNativeString & s2) const2684 GNativeString::operator+(const GNativeString &s2) const
2685 { return GStringRep::Native::create(*this,s2); }
2686 
2687 GNativeString
operator +(const char * s2) const2688 GNativeString::operator+(const char    *s2) const
2689 { return GStringRep::Native::create(*this,s2); }
2690 
2691 char *
getbuf(int n)2692 GNativeString::getbuf(int n)
2693 {
2694   if(ptr)
2695     init((*this)->getbuf(n));
2696   else if(n>0)
2697     init(GStringRep::Native::create(n));
2698   else
2699     init(0);
2700   return ptr?((*this)->data):0;
2701 }
2702 
2703 void
setat(const int n,const char ch)2704 GNativeString::setat(const int n, const char ch)
2705 {
2706   if((!n)&&(!ptr))
2707   {
2708     init(GStringRep::Native::create(&ch,0,1));
2709   }else
2710   {
2711     init((*this)->setat(CheckSubscript(n),ch));
2712   }
2713 }
2714 
2715 #endif
2716 
2717 
2718 #ifdef HAVE_NAMESPACES
2719 }
2720 # ifndef NOT_USING_DJVU_NAMESPACE
2721 using namespace DJVU;
2722 # endif
2723 #endif
2724