1 //C-  -*- C++ -*-
2 //C- -------------------------------------------------------------------
3 //C- DjVuLibre-3.5
4 //C- Copyright (c) 2002  Leon Bottou and Yann Le Cun.
5 //C- Copyright (c) 2001  AT&T
6 //C-
7 //C- This software is subject to, and may be distributed under, the
8 //C- GNU General Public License, either Version 2 of the license,
9 //C- or (at your option) any later version. The license should have
10 //C- accompanied the software or you may obtain a copy of the license
11 //C- from the Free Software Foundation at http://www.fsf.org .
12 //C-
13 //C- This program is distributed in the hope that it will be useful,
14 //C- but WITHOUT ANY WARRANTY; without even the implied warranty of
15 //C- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16 //C- GNU General Public License for more details.
17 //C-
18 //C- DjVuLibre-3.5 is derived from the DjVu(r) Reference Library from
19 //C- Lizardtech Software.  Lizardtech Software has authorized us to
20 //C- replace the original DjVu(r) Reference Library notice by the following
21 //C- text (see doc/lizard2002.djvu and doc/lizardtech2007.djvu):
22 //C-
23 //C-  ------------------------------------------------------------------
24 //C- | DjVu (r) Reference Library (v. 3.5)
25 //C- | Copyright (c) 1999-2001 LizardTech, Inc. All Rights Reserved.
26 //C- | The DjVu Reference Library is protected by U.S. Pat. No.
27 //C- | 6,058,214 and patents pending.
28 //C- |
29 //C- | This software is subject to, and may be distributed under, the
30 //C- | GNU General Public License, either Version 2 of the license,
31 //C- | or (at your option) any later version. The license should have
32 //C- | accompanied the software or you may obtain a copy of the license
33 //C- | from the Free Software Foundation at http://www.fsf.org .
34 //C- |
35 //C- | The computer code originally released by LizardTech under this
36 //C- | license and unmodified by other parties is deemed "the LIZARDTECH
37 //C- | ORIGINAL CODE."  Subject to any third party intellectual property
38 //C- | claims, LizardTech grants recipient a worldwide, royalty-free,
39 //C- | non-exclusive license to make, use, sell, or otherwise dispose of
40 //C- | the LIZARDTECH ORIGINAL CODE or of programs derived from the
41 //C- | LIZARDTECH ORIGINAL CODE in compliance with the terms of the GNU
42 //C- | General Public License.   This grant only confers the right to
43 //C- | infringe patent claims underlying the LIZARDTECH ORIGINAL CODE to
44 //C- | the extent such infringement is reasonably necessary to enable
45 //C- | recipient to make, have made, practice, sell, or otherwise dispose
46 //C- | of the LIZARDTECH ORIGINAL CODE (or portions thereof) and not to
47 //C- | any greater extent that may be necessary to utilize further
48 //C- | modifications or combinations.
49 //C- |
50 //C- | The LIZARDTECH ORIGINAL CODE is provided "AS IS" WITHOUT WARRANTY
51 //C- | OF ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
52 //C- | TO ANY WARRANTY OF NON-INFRINGEMENT, OR ANY IMPLIED WARRANTY OF
53 //C- | MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE.
54 //C- +------------------------------------------------------------------
55 
56 #ifndef _GSTRING_H_
57 #define _GSTRING_H_
58 #ifdef HAVE_CONFIG_H
59 #include "config.h"
60 #endif
61 #if NEED_GNUG_PRAGMAS
62 # pragma interface
63 #endif
64 
65 /** @name GString.h
66 
67     Files #"GString.h"# and #"GString.cpp"# implement a general
68     purpose string class \Ref{GBaseString}, with dirived types
69     \Ref{GUTF8String} and \Ref{GNativeString} for UTF8 MBS encoding
70     and the current Native MBS encoding respectively.  This
71     implementation relies on smart pointers (see
72     \Ref{GSmartPointer.h}).
73 
74     {\bf Historical Comments} --- At some point during the DjVu
75     research era, it became clear that C++ compilers rarely provided
76     portable libraries. We then decided to avoid fancy classes (like
77     #iostream# or #string#) and to rely only on the good old C
78     library.  A good string class however is very useful.  We had
79     already randomly picked letter 'G' to prefix class names and we
80     logically derived the new class name.  Native English speakers
81     kept laughing in hiding.  This is ironic because we completely
82     forgot this letter 'G' when creating more challenging things
83     like the ZP Coder or the IW44 wavelets.
84 
85     {\bf Later Changes}
86     When converting to I18N, we (Lizardtech) decided that two string classes
87     where needing, replacing the original GString with \Ref{GUTF8String} and
88     \Ref{GNativeString}.
89 
90     @memo
91     General purpose string class.
92     @author
93     L\'eon Bottou <leonb@research.att.com> -- initial implementation.\\
94 
95 // From: Leon Bottou, 1/31/2002
96 // This file has very little to do with my initial implementation.
97 // It has been practically rewritten by Lizardtech for i18n changes.
98 // My original implementation was very small in comparison
99 // <http://prdownloads.sourceforge.net/djvu/DjVu2_2b-src.tgz>.
100 // In my opinion, the duplication of the string classes is a failed
101 // attempt to use the type system to enforce coding policies.
102 // This could be fixed.  But there are better things to do in djvulibre.
103 
104 */
105 //@{
106 
107 
108 #include "DjVuGlobal.h"
109 #include "GContainer.h"
110 
111 #include <stddef.h>
112 #include <stdlib.h>
113 #include <stdarg.h>
114 #ifdef _WIN32
115 # include <windows.h>
116 # ifndef AUTOCONF
117 #  define HAS_WCHAR 1
118 #  define HAS_WCTYPE 1
119 #  define HAS_MBSTATE 1
120 # endif
121 #endif
122 
123 #if HAS_WCHAR
124 # include <wchar.h>
125 #elif HAVE_WCHAR_H
126 # include <wchar.h>
127 #endif
128 
129 #if HAVE_STDINT_H
130 # include <stdint.h>
131 #elif HAVE_INTTYPES_H
132 # include <inttypes.h>
133 #else
134 # ifdef _WIN32
135 typedef unsigned __int32 uint32_t;
136 typedef unsigned __int16 uint16_t;
137 # else
138 # pragma message("Please verify defs for uint32_t and uint16_t")
139 typedef unsigned int   uint32_t // verify
140 typedef unsigned short uint16_t // verify
141 # endif
142 #endif
143 
144 #ifdef HAVE_NAMESPACES
145 namespace DJVU {
146 # ifdef NOT_DEFINED // Just to fool emacs c++ mode
147 }
148 #endif
149 #endif
150 
151 #if !HAS_MBSTATE
152 # ifndef HAVE_MBSTATE_T
153 typedef int mbstate_t;
154 # endif
155 #endif
156 
157 class GBaseString;
158 class GUTF8String;
159 class GNativeString;
160 
161 // Internal string representation.
162 class DJVUAPI GStringRep : public GPEnabled
163 {
164 public:
165   enum EncodeType { XUCS4, XUCS4BE, XUCS4LE, XUCS4_2143, XUCS4_3412,
166     XUTF16, XUTF16BE, XUTF16LE, XUTF8, XEBCDIC, XOTHER } ;
167 
168   enum EscapeMode { UNKNOWN_ESCAPED=0,  IS_ESCAPED=1, NOT_ESCAPED=2 };
169 
170   class UTF8;
171   friend class UTF8;
172   class Unicode;
173   friend class Unicode;
174 
175   class ChangeLocale;
176 #if HAS_WCHAR
177   class Native;
178   friend class Native;
179 #endif // HAS_WCHAR
180   friend class GBaseString;
181   friend class GUTF8String;
182   friend class GNativeString;
183   friend DJVUAPI unsigned int hash(const GBaseString &ref);
184 
185 public:
186   // default constructor
187   GStringRep(void);
188   // virtual destructor
189   virtual ~GStringRep();
190 
191     // Other virtual methods.
192       // Create an empty string.
193   virtual GP<GStringRep> blank(const unsigned int sz) const = 0;
194       // Create a duplicate at the given size.
195   GP<GStringRep>  getbuf(int n) const;
196       // Change the value of one of the bytes.
197   GP<GStringRep> setat(int n, char ch) const;
198       // Append a string.
199   virtual GP<GStringRep> append(const GP<GStringRep> &s2) const = 0;
200       // Test if isUTF8.
isUTF8(void)201   virtual bool isUTF8(void) const { return false; }
202       // Test if Native.
isNative(void)203   virtual bool isNative(void) const { return false; }
204       // Convert to Native.
205   virtual GP<GStringRep> toNative(
206     const EscapeMode escape=UNKNOWN_ESCAPED ) const = 0;
207       // Convert to UTF8.
208   virtual GP<GStringRep> toUTF8(const bool nothrow=false) const = 0;
209       // Convert to same as current class.
210   virtual GP<GStringRep> toThis(
211     const GP<GStringRep> &rep,const GP<GStringRep> &locale=0) const = 0;
212       // Compare with #s2#.
213   virtual int cmp(const GP<GStringRep> &s2,const int len=(-1)) const = 0;
214 
215   // Convert strings to numbers.
216   virtual int toInt(void) const = 0;
217   virtual long int toLong(
218     const int pos, int &endpos, const int base=10) const = 0;
219   virtual unsigned long toULong(
220     const int pos, int &endpos, const int base=10) const = 0;
221   virtual double toDouble(const int pos, int &endpos) const = 0;
222 
223   // return the position of the next character
224   int nextChar( const int from=0 ) const;
225 
226   // return next non space position
227   int nextNonSpace( const int from=0, const int len=(-1) ) const;
228 
229   // return next white space position
230   int nextSpace( const int from=0, const int len=(-1) ) const;
231 
232   // return the position after the last non-whitespace character.
233   int firstEndSpace( int from=0, const int len=(-1) ) const;
234 
235     // Create an empty string.
236   template <class TYPE> static GP<GStringRep> create(
237     const unsigned int sz,TYPE *);
238     // Creates with a strdup string.
239   GP<GStringRep> strdup(const char *s) const;
240 
241     // Creates by appending to the current string
242   GP<GStringRep> append(const char *s2) const;
243 
244     // Creates with a concat operation.
245   GP<GStringRep> concat(const GP<GStringRep> &s1,const GP<GStringRep> &s2) const;
246   GP<GStringRep> concat(const char *s1,const GP<GStringRep> &s2) const;
247   GP<GStringRep> concat(const GP<GStringRep> &s1,const char *s2) const;
248   GP<GStringRep> concat(const char *s1,const char *s2) const;
249 
250    /* Creates with a strdup and substr.  Negative values have strlen(s)+1
251       added to them.
252    */
253   GP<GStringRep> substr(
254     const char *s,const int start,const int length=(-1)) const;
255 
256   GP<GStringRep> substr(
257     const uint16_t *s,const int start,const int length=(-1)) const;
258 
259   GP<GStringRep> substr(
260     const uint32_t *s,const int start,const int length=(-1)) const;
261 
262   /** Initializes a string with a formatted string (as in #vprintf#).  The
263       string is re-initialized with the characters generated according to the
264       specified format #fmt# and using the optional arguments.  See the ANSI-C
265       function #vprintf()# for more information. The current implementation
266       will cause a segmentation violation if the resulting string is longer
267       than 32768 characters. */
268   GP<GStringRep> vformat(va_list args) const;
269   // -- SEARCHING
270 
271   static GP<GStringRep> UTF8ToNative( const char *s,
272     const EscapeMode escape=UNKNOWN_ESCAPED );
273   static GP<GStringRep> NativeToUTF8( const char *s );
274 
275   // Creates an uppercase version of the current string.
276   GP<GStringRep> upcase(void) const;
277   // Creates a lowercase version of the current string.
278   GP<GStringRep> downcase(void) const;
279 
280   /** Returns the next UCS4 character, and updates the pointer s. */
281   static uint32_t UTF8toUCS4(
282     unsigned char const *&s, void const * const endptr );
283 
284   /** Returns the number of bytes in next UCS4 character,
285       and sets #w# to the next UCS4 chacter.  */
UTF8toUCS4(uint32_t & w,unsigned char const s[],void const * const endptr)286   static int UTF8toUCS4(
287     uint32_t &w, unsigned char const s[], void const * const endptr )
288   { unsigned char const *r=s;w=UTF8toUCS4(r,endptr);return (int)((size_t)r-(size_t)s); }
289 
290   /** Returns the next UCS4 word from the UTF16 string. */
291   static int UTF16toUCS4(
292      uint32_t &w, uint16_t const * const s,void const * const eptr);
293 
294   static int UCS4toUTF16(
295     uint32_t w, uint16_t &w1, uint16_t &w2);
296 
297   int cmp(const char *s2, const int len=(-1)) const;
298   static int cmp(
299     const GP<GStringRep> &s1, const GP<GStringRep> &s2, const int len=(-1)) ;
300   static int cmp(
301     const GP<GStringRep> &s1, const char *s2, const int len=(-1));
302   static int cmp(
303     const char *s1, const GP<GStringRep> &s2, const int len=(-1));
304   static int cmp(
305     const char *s1, const char *s2, const int len=(-1));
306 
307   // Lookup the next character, and return the position of the next character.
308   int getUCS4(uint32_t &w, const int from) const;
309 
310   virtual unsigned char *UCS4toString(
311     const uint32_t w, unsigned char *ptr, mbstate_t *ps=0) const = 0;
312 
313   static unsigned char *UCS4toUTF8(
314     const uint32_t w,unsigned char *ptr);
315 
316   static unsigned char *UCS4toNative(
317     const uint32_t w,unsigned char *ptr, mbstate_t *ps);
318 
319   int search(char c, int from=0) const;
320 
321   int search(char const *str, int from=0) const;
322 
323   int rsearch(char c, int from=0) const;
324 
325   int rsearch(char const *str, int from=0) const;
326 
327   int contains(char const accept[], int from=0) const;
328 
329   int rcontains(char const accept[], int from=0) const;
330 
331 protected:
332   // Return the next character and increment the source pointer.
333   virtual uint32_t getValidUCS4(const char *&source) const = 0;
334 
335   GP<GStringRep> tocase(
336     bool (*xiswcase)(const unsigned long wc),
337     unsigned long (*xtowcase)(const unsigned long wc)) const;
338 
339   // Tests if the specified character passes the xiswtest.  If so, the
340   // return pointer is incremented to the next character, otherwise the
341   // specified #ptr# is returned.
342   const char * isCharType( bool (*xiswtest)(const unsigned long wc), const char *ptr,
343     const bool reverse=false) const;
344 
345   // Find the next character position that passes the isCharType test.
346   int nextCharType(
347     bool (*xiswtest)(const unsigned long wc),const int from,const int len,
348     const bool reverse=false) const;
349 
350   static bool giswspace(const unsigned long w);
351   static bool giswupper(const unsigned long w);
352   static bool giswlower(const unsigned long w);
353   static unsigned long gtowupper(const unsigned long w);
354   static unsigned long gtowlower(const unsigned long w);
355 
356   virtual void set_remainder( void const * const buf, const unsigned int size,
357     const EncodeType encodetype);
358   virtual void set_remainder( void const * const buf, const unsigned int size,
359     const GP<GStringRep> &encoding );
360   virtual void set_remainder ( const GP<Unicode> &remainder );
361 
362   virtual GP<Unicode> get_remainder( void ) const;
363 
364 public:
365   /* Returns a copy of this string with characters used in XML with
366       '<'  to "&lt;", '>'  to "&gt;",  '&' to "&amp;" '\'' to
367       "&apos;", and  '\"' to  "&quot;".   Characters 0x01 through
368       0x1f are also escaped. */
369   GP<GStringRep> toEscaped( const bool tosevenbit ) const;
370 
371   // Tests if a string is legally encoded in the current character set.
372   virtual bool is_valid(void) const = 0;
373 #if HAS_WCHAR
374   virtual int ncopy(wchar_t * const buf, const int buflen) const = 0;
375 #endif
376 protected:
377 
378 // Actual string data.
379   int  size;
380   char *data;
381 };
382 
383 class DJVUAPI GStringRep::UTF8 : public GStringRep
384 {
385 public:
386   // default constructor
387   UTF8(void);
388   // virtual destructor
389   virtual ~UTF8();
390 
391     // Other virtual methods.
392   virtual GP<GStringRep> blank(const unsigned int sz = 0) const;
393   virtual GP<GStringRep> append(const GP<GStringRep> &s2) const;
394       // Test if Native.
395   virtual bool isUTF8(void) const;
396       // Convert to Native.
397   virtual GP<GStringRep> toNative(
398     const EscapeMode escape=UNKNOWN_ESCAPED) const;
399       // Convert to UTF8.
400   virtual GP<GStringRep> toUTF8(const bool nothrow=false) const;
401       // Convert to same as current class.
402   virtual GP<GStringRep> toThis(
403     const GP<GStringRep> &rep,const GP<GStringRep> &) const;
404       // Compare with #s2#.
405   virtual int cmp(const GP<GStringRep> &s2,const int len=(-1)) const;
406 
407   static GP<GStringRep> create(const unsigned int sz = 0);
408 
409   // Convert strings to numbers.
410   virtual int toInt(void) const;
411   virtual long int toLong(
412     const int pos, int &endpos, const int base=10) const;
413   virtual unsigned long toULong(
414     const int pos, int &endpos, const int base=10) const;
415   virtual double toDouble(
416     const int pos, int &endpos) const;
417 
418     // Create a strdup string.
419   static GP<GStringRep> create(const char *s);
420 
421    // Creates with a concat operation.
422   static GP<GStringRep> create(
423     const GP<GStringRep> &s1,const GP<GStringRep> &s2);
424   static GP<GStringRep> create( const GP<GStringRep> &s1,const char *s2);
425   static GP<GStringRep> create( const char *s1, const GP<GStringRep> &s2);
426   static GP<GStringRep> create( const char *s1,const char *s2);
427 
428     // Create with a strdup and substr operation.
429   static GP<GStringRep> create(
430     const char *s,const int start,const int length=(-1));
431 
432   static GP<GStringRep> create(
433     const uint16_t *s,const int start,const int length=(-1));
434 
435   static GP<GStringRep> create(
436     const uint32_t *s,const int start,const int length=(-1));
437 
438   static GP<GStringRep> create_format(const char fmt[],...);
439   static GP<GStringRep> create(const char fmt[],va_list& args);
440 
441   virtual unsigned char *UCS4toString(
442     const uint32_t w,unsigned char *ptr, mbstate_t *ps=0) const;
443 
444   // Tests if a string is legally encoded in the current character set.
445   virtual bool is_valid(void) const;
446 #if HAS_WCHAR
447   virtual int ncopy(wchar_t * const buf, const int buflen) const;
448 #endif
449   friend class GBaseString;
450 
451 protected:
452   // Return the next character and increment the source pointer.
453   virtual uint32_t getValidUCS4(const char *&source) const;
454 };
455 
456 
457 /** General purpose character string.
458     Each dirivied instance of class #GBaseString# represents a
459     character string.  Overloaded operators provide a value semantic
460     to #GBaseString# objects.  Conversion operators and constructors
461     transparently convert between #GBaseString# objects and
462     #const char*# pointers.  The #GBaseString# class has no public
463     constructors, since a dirived type should always be used
464     to specify the desired multibyte character encoding.
465 
466     Functions taking strings as arguments should declare their
467     arguments as "#const char*#".  Such functions will work equally
468     well with dirived #GBaseString# objects since there is a fast
469     conversion operator from the dirivied #GBaseString# objects
470     to "#const char*#".  Functions returning strings should return
471     #GUTF8String# or #GNativeString# objects because the class will
472     automatically manage the necessary memory.
473 
474     Characters in the string can be identified by their position.  The
475     first character of a string is numbered zero. Negative positions
476     represent characters relative to the end of the string (i.e.
477     position #-1# accesses the last character of the string,
478     position #-2# represents the second last character, etc.)  */
479 
480 class DJVUAPI GBaseString : protected GP<GStringRep>
481 {
482 public:
483   enum EscapeMode {
484     UNKNOWN_ESCAPED=GStringRep::UNKNOWN_ESCAPED,
485     IS_ESCAPED=GStringRep::IS_ESCAPED,
486     NOT_ESCAPED=GStringRep::NOT_ESCAPED };
487 
488   friend class GUTF8String;
489   friend class GNativeString;
490 protected:
491   // Sets the gstr pointer;
492   inline void init(void);
493 
494   ~GBaseString();
495   inline GBaseString &init(const GP<GStringRep> &rep);
496 
497   // -- CONSTRUCTORS
498   /** Null constructor. Constructs an empty string. */
499   GBaseString( void );
500 
501 public:
502   // -- ACCESS
503   /** Converts a string into a constant null terminated character
504       array.  This conversion operator is very efficient because
505       it simply returns a pointer to the internal string data. The
506       returned pointer remains valid as long as the string is
507       unmodified. */
508   operator const char* ( void ) const  ;
509   /// Returns the string length.
510   unsigned int length( void ) const;
511   /** Returns true if and only if the string contains zero characters.
512       This operator is useful for conditional expression in control
513       structures.
514       \begin{verbatim}
515          if (! str) { ... }
516          while (!! str) { ... }  -- Note the double operator!
517       \end{verbatim}
518       Class #GBaseString# does not to support syntax
519       "#if# #(str)# #{}#" because the required conversion operator
520       introduces dangerous ambiguities with certain compilers. */
521   bool operator! ( void ) const;
522 
523   // -- INDEXING
524   /** Returns the character at position #n#. An exception
525       \Ref{GException} is thrown if number #n# is not in range #-len#
526       to #len-1#, where #len# is the length of the string.  The first
527       character of a string is numbered zero.  Negative positions
528       represent characters relative to the end of the string. */
529   char operator[] (int n) const;
530   /// Returns #TRUE# if the string contains an integer number.
531   bool is_int(void) const;
532   /// Returns #TRUE# if the string contains a float number.
533   bool is_float(void) const;
534 
535   /** Converts strings between native & UTF8 **/
536   GNativeString getUTF82Native( EscapeMode escape=UNKNOWN_ESCAPED ) const;
537   GUTF8String getNative2UTF8( void ) const;
538 
539   // -- ALTERING
540   /// Reinitializes a string with the null string.
541   void empty( void );
542   // -- SEARCHING
543   /** Searches character #c# in the string, starting at position
544       #from# and scanning forward until reaching the end of the
545       string.  This function returns the position of the matching
546       character.  It returns #-1# if character #c# cannot be found. */
547   int search(char c, int from=0) const;
548 
549   /** Searches sub-string #str# in the string, starting at position
550       #from# and scanning forward until reaching the end of the
551       string.  This function returns the position of the first
552       matching character of the sub-string.  It returns #-1# if
553       string #str# cannot be found. */
554   int search(const char *str, int from=0) const;
555 
556   /** Searches character #c# in the string, starting at position
557       #from# and scanning backwards until reaching the beginning of
558       the string.  This function returns the position of the matching
559       character.  It returns #-1# if character #c# cannot be found. */
560   int rsearch(char c, const int from=0) const;
561   /** Searches sub-string #str# in the string, starting at position
562       #from# and scanning backwards until reaching the beginning of
563       the string.  This function returns the position of the first
564       matching character of the sub-string. It returns #-1# if
565       string #str# cannot be found. */
566   int rsearch(const char *str, const int from=0) const;
567   /** Searches for any of the specified characters in the accept
568       string.  It returns #-1# if the none of the characters and
569       be found, otherwise the position of the first match. */
570   int contains(const char accept[], const int from=0) const;
571   /** Searches for any of the specified characters in the accept
572       string.  It returns #-1# if the none of the characters and be
573       found, otherwise the position of the last match. */
574   int rcontains(const char accept[], const int from=0) const;
575 
576   /** Concatenates strings. Returns a string composed by concatenating
577       the characters of strings #s1# and #s2#. */
578   GUTF8String operator+(const GUTF8String &s2) const;
579   GNativeString operator+(const GNativeString &s2) const;
580 
581   /** Returns an integer.  Implements i18n atoi.  */
582   int toInt(void) const;
583 
584   /** Returns a long intenger.  Implments i18n strtol.  */
585   long toLong(const int pos, int &endpos, const int base=10) const;
586 
587   /** Returns a unsigned long integer.  Implements i18n strtoul. */
588   unsigned long toULong(
589     const int pos, int &endpos, const int base=10) const;
590 
591   /** Returns a double.  Implements the i18n strtod.  */
592   double toDouble(
593     const int pos, int &endpos ) const;
594 
595   /** Returns a long intenger.  Implments i18n strtol.  */
596   static long toLong(
597     const GUTF8String& src, const int pos, int &endpos, const int base=10);
598 
599   static unsigned long toULong(
600     const GUTF8String& src, const int pos, int &endpos, const int base=10);
601 
602   static double toDouble(
603     const GUTF8String& src, const int pos, int &endpos);
604 
605   /** Returns a long intenger.  Implments i18n strtol.  */
606   static long toLong(
607     const GNativeString& src, const int pos, int &endpos, const int base=10);
608 
609   static unsigned long toULong(
610     const GNativeString& src, const int pos, int &endpos, const int base=10);
611 
612   static double toDouble(
613     const GNativeString& src, const int pos, int &endpos);
614 
615   // -- HASHING
616 
617   // -- COMPARISONS
618     /** Returns an #int#.  Compares string with #s2# and returns
619         sorting order. */
620   int cmp(const GBaseString &s2, const int len=(-1)) const;
621     /** Returns an #int#.  Compares string with #s2# and returns
622         sorting order. */
623   int cmp(const char *s2, const int len=(-1)) const;
624     /** Returns an #int#.  Compares string with #s2# and returns
625         sorting order. */
626   int cmp(const char s2) const;
627     /** Returns an #int#.  Compares #s2# with #s2# and returns
628         sorting order. */
629   static int cmp(const char *s1, const char *s2, const int len=(-1));
630   /** Returns a boolean. The Standard C strncmp takes two string and
631       compares the first N characters.  static bool GBaseString::ncmp
632       will compare #s1# with #s2# with the #len# characters starting
633       from the beginning of the string. */
634   /** String comparison. Returns true if and only if character
635       strings #s1# and #s2# are equal (as with #strcmp#.)
636     */
637   bool operator==(const GBaseString &s2) const;
638   bool operator==(const char *s2) const;
639   friend bool operator==(const char    *s1, const GBaseString &s2);
640 
641   /** String comparison. Returns true if and only if character
642       strings #s1# and #s2# are not equal (as with #strcmp#.)
643     */
644   bool operator!=(const GBaseString &s2) const;
645   bool operator!=(const char *s2) const;
646   friend bool operator!=(const char *s1, const GBaseString &s2);
647 
648   /** String comparison. Returns true if and only if character
649       strings #s1# is lexicographically greater than or equal to
650       string #s2# (as with #strcmp#.) */
651   bool operator>=(const GBaseString &s2) const;
652   bool operator>=(const char *s2) const;
653   bool operator>=(const char s2) const;
654   friend bool operator>=(const char    *s1, const GBaseString &s2);
655   friend bool operator>=(const char s1, const GBaseString &s2);
656 
657   /** String comparison. Returns true if and only if character
658       strings #s1# is lexicographically less than string #s2#
659       (as with #strcmp#.)
660    */
661   bool operator<(const GBaseString &s2) const;
662   bool operator<(const char *s2) const;
663   bool operator<(const char s2) const;
664   friend bool operator<(const char *s1, const GBaseString &s2);
665   friend bool operator<(const char s1, const GBaseString &s2);
666 
667   /** String comparison. Returns true if and only if character
668       strings #s1# is lexicographically greater than string #s2#
669       (as with #strcmp#.)
670    */
671   bool operator> (const GBaseString &s2) const;
672   bool operator> (const char *s2) const;
673   bool operator> (const char s2) const;
674   friend bool operator> (const char    *s1, const GBaseString &s2);
675   friend bool operator> (const char s1, const GBaseString &s2);
676 
677   /** String comparison. Returns true if and only if character
678       strings #s1# is lexicographically less than or equal to string
679       #s2# (as with #strcmp#.)
680    */
681   bool operator<=(const GBaseString &s2) const;
682   bool operator<=(const char *s2) const;
683   bool operator<=(const char s2) const;
684   friend bool operator<=(const char    *s1, const GBaseString &s2);
685   friend bool operator<=(const char    s1, const GBaseString &s2);
686 
687    /** Returns an integer.  Implements a functional i18n atoi. Note
688        that if you pass a GBaseString that is not in Native format
689        the results may be disparaging. */
690 
691   /** Returns a hash code for the string.  This hashing function
692       helps when creating associative maps with string keys (see
693       \Ref{GMap}).  This hash code may be reduced to an arbitrary
694       range by computing its remainder modulo the upper bound of
695       the range. */
696   friend DJVUAPI unsigned int hash(const GBaseString &ref);
697   // -- HELPERS
698   friend class GStringRep;
699 
700   /// Returns next non space position.
701   int nextNonSpace( const int from=0, const int len=(-1) ) const;
702 
703   /// Returns next character position.
704   int nextChar( const int from=0 ) const;
705 
706   /// Returns next non space position.
707   int nextSpace( const int from=0, const int len=(-1) ) const;
708 
709   /// return the position after the last non-whitespace character.
710   int firstEndSpace( const int from=0,const int len=(-1) ) const;
711 
712   /// Tests if the string is legally encoded in the current codepage.
713   bool is_valid(void) const;
714 
715   /// copy to a wchar_t buffer
716 #if HAS_WCHAR
717   int ncopy(wchar_t * const buf, const int buflen) const;
718 #endif
719 protected:
720   const char *gstr;
721   static void throw_illegal_subscript() no_return;
722   static const char *nullstr;
723 public:
724   GNativeString UTF8ToNative(
725     const bool currentlocale=false,
726     const EscapeMode escape=UNKNOWN_ESCAPED) const;
727   GUTF8String NativeToUTF8(void) const;
728 protected:
729   inline int CheckSubscript(int n) const;
730 };
731 
732 /** General purpose character string.
733     Each instance of class #GUTF8String# represents a character
734     string.  Overloaded operators provide a value semantic to
735     #GUTF8String# objects.  Conversion operators and constructors
736     transparently convert between #GUTF8String# objects and
737     #const char*# pointers.
738 
739     Functions taking strings as arguments should declare their
740     arguments as "#const char*#".  Such functions will work equally
741     well with #GUTF8String# objects since there is a fast conversion
742     operator from #GUTF8String# to "#const char*#".  Functions
743     returning strings should return #GUTF8String# or #GNativeString#
744     objects because the class will automatically manage the necessary
745     memory.
746 
747     Characters in the string can be identified by their position.  The
748     first character of a string is numbered zero. Negative positions
749     represent characters relative to the end of the string (i.e.
750     position #-1# accesses the last character of the string,
751     position #-2# represents the second last character, etc.)  */
752 
753 class DJVUAPI GUTF8String : public GBaseString
754 {
755 public:
756   ~GUTF8String();
757   inline void init(void);
758 
759   inline GUTF8String &init(const GP<GStringRep> &rep);
760 
761   // -- CONSTRUCTORS
762   /** Null constructor. Constructs an empty string. */
763   GUTF8String(void);
764   /// Constructs a string from a character.
765   GUTF8String(const char dat);
766   /// Constructs a string from a null terminated character array.
767   GUTF8String(const char *str);
768   /// Constructs a string from a null terminated character array.
769   GUTF8String(const unsigned char *str);
770   GUTF8String(const uint16_t *dat);
771   GUTF8String(const uint32_t *dat);
772   /** Constructs a string from a character array.  Elements of the
773       character array #dat# are added into the string until the
774       string length reaches #len# or until encountering a null
775       character (whichever comes first). */
776   GUTF8String(const char *dat, unsigned int len);
777   GUTF8String(const uint16_t *dat, unsigned int len);
778   GUTF8String(const uint32_t *dat, unsigned int len);
779 
780   /// Construct from base class.
781   GUTF8String(const GP<GStringRep> &str);
782   GUTF8String(const GBaseString &str);
783   GUTF8String(const GUTF8String &str);
784   GUTF8String(const GNativeString &str);
785   /** Constructs a string from a character array.  Elements of the
786       character array #dat# are added into the string until the
787       string length reaches #len# or until encountering a null
788       character (whichever comes first). */
789   GUTF8String(const GBaseString &gs, int from, int len);
790 
791   /** Copy a null terminated character array. Resets this string
792       with the character string contained in the null terminated
793       character array #str#. */
794   GUTF8String& operator= (const char str);
795   GUTF8String& operator= (const char *str);
796   inline GUTF8String& operator= (const GP<GStringRep> &str);
797   inline GUTF8String& operator= (const GBaseString &str);
798   inline GUTF8String& operator= (const GUTF8String &str);
799   inline GUTF8String& operator= (const GNativeString &str);
800 
801   /** Constructs a string with a formatted string (as in #vprintf#).
802       The string is re-initialized with the characters generated
803       according to the specified format #fmt# and using the optional
804       arguments.  See the ANSI-C function #vprintf()# for more
805       information. The current implementation will cause a
806       segmentation violation if the resulting string is longer
807       than 32768 characters. */
808   GUTF8String(const GUTF8String &fmt, va_list &args);
809 
810   /// Constructs a string from a character.
811   /** Constructs a string with a human-readable representation of
812       integer #number#.  The format is similar to format #"%d"# in
813       function #printf#. */
814   GUTF8String(const int number);
815 
816   /** Constructs a string with a human-readable representation of
817       floating point number #number#. The format is similar to
818       format #"%f"# in function #printf#.  */
819   GUTF8String(const double number);
820 
821 
822   /** Initializes a string with a formatted string (as in #printf#).
823       The string is re-initialized with the characters generated
824       according to the specified format #fmt# and using the optional
825       arguments.  See the ANSI-C function #printf()# for more
826       information. The current implementation will cause a
827       segmentation violation if the resulting string is longer
828       than 32768 characters. */
829   GUTF8String &format(const char *fmt, ... );
830   /** Initializes a string with a formatted string (as in #vprintf#).
831       The string is re-initialized with the characters generated
832       according to the specified format #fmt# and using the optional
833       arguments.  See the ANSI-C function #vprintf()# for more
834       information. The current implementation will cause a
835       segmentation violation if the resulting string is longer
836       than 32768 characters. */
837   GUTF8String &vformat(const GUTF8String &fmt, va_list &args);
838 
839   /** Returns a copy of this string with characters used in XML with
840       '<'  to "&lt;", '>'  to "&gt;",  '&' to "&amp;" '\'' to
841       "&apos;", and  '\"' to  "&quot;".   Characters 0x01 through
842       0x1f are also escaped. */
843   GUTF8String toEscaped( const bool tosevenbit=false ) const;
844 
845   /** Converts strings containing HTML/XML escaped characters into
846       their unescaped forms. Numeric representations of characters
847       (e.g., "&#38;" or "&#x26;" for "*") are the only forms
848       converted by this function. */
849   GUTF8String fromEscaped( void ) const;
850 
851   /** Converts strings containing HTML/XML escaped characters
852       (e.g., "&lt;" for "<") into their unescaped forms. The
853       conversion is partially defined by the ConvMap argument which
854       specifies the conversion strings to be recognized. Numeric
855       representations of characters (e.g., "&#38;" or "&#x26;"
856       for "*") are always converted. */
857   GUTF8String fromEscaped(
858     const GMap<GUTF8String,GUTF8String> ConvMap ) const;
859 
860 
861   // -- CONCATENATION
862   /// Appends character #ch# to the string.
863   GUTF8String& operator+= (char ch);
864 
865   /// Appends the null terminated character array #str# to the string.
866   GUTF8String& operator+= (const char *str);
867   /// Appends the specified GBaseString to the string.
868   GUTF8String& operator+= (const GBaseString &str);
869 
870   /** Returns a sub-string.  The sub-string is composed by copying
871       #len# characters starting at position #from# in this string.
872       The length of the resulting string may be smaller than #len#
873       if the specified range is too large. */
874   GUTF8String substr(int from, int len/*=(-1)*/) const;
875 
876   /** Returns an upper case copy of this string.  The returned string
877       contains a copy of the current string with all letters turned
878       into upper case letters. */
879   GUTF8String upcase( void ) const;
880   /** Returns an lower case copy of this string.  The returned string
881       contains a copy of the current string with all letters turned
882       into lower case letters. */
883   GUTF8String downcase( void ) const;
884 
885   /** Concatenates strings. Returns a string composed by concatenating
886       the characters of strings #s1# and #s2#.
887   */
888   GUTF8String operator+(const GBaseString &s2) const;
889   GUTF8String operator+(const GUTF8String &s2) const;
890   GUTF8String operator+(const GNativeString &s2) const;
891   GUTF8String operator+(const char *s2) const;
892   friend DJVUAPI GUTF8String operator+(const char *s1, const GUTF8String &s2);
893 
894   /** Provides a direct access to the string buffer.  Returns a
895       pointer for directly accessing the string buffer.  This pointer
896       valid remains valid as long as the string is not modified by
897       other means.  Positive values for argument #n# represent the
898       length of the returned buffer.  The returned string buffer will
899       be large enough to hold at least #n# characters plus a null
900       character.  If #n# is positive but smaller than the string
901       length, the string will be truncated to #n# characters. */
902   char *getbuf(int n = -1);
903   /** Set the character at position #n# to value #ch#.  An exception
904       \Ref{GException} is thrown if number #n# is not in range #-len#
905       to #len#, where #len# is the length of the string.  If character
906       #ch# is zero, the string is truncated at position #n#.  The
907       first character of a string is numbered zero. Negative
908       positions represent characters relative to the end of the
909       string. If position #n# is equal to the length of the string,
910       this function appends character #ch# to the end of the string. */
911   void setat(const int n, const char ch);
912 public:
913   typedef enum GStringRep::EncodeType EncodeType;
914   static GUTF8String create(void const * const buf,
915     const unsigned int size,
916     const EncodeType encodetype, const GUTF8String &encoding);
917   static GUTF8String create( void const * const buf,
918     unsigned int size, const EncodeType encodetype );
919   static GUTF8String create( void const * const buf,
920     const unsigned int size, const GUTF8String &encoding );
921   static GUTF8String create( void const * const buf,
922     const unsigned int size, const GP<GStringRep::Unicode> &remainder);
923   GP<GStringRep::Unicode> get_remainder(void) const;
924   static GUTF8String create( const char *buf, const unsigned int bufsize );
925   static GUTF8String create( const uint16_t *buf, const unsigned int bufsize );
926   static GUTF8String create( const uint32_t *buf, const unsigned int bufsize );
927 };
928 
929 
930 #if !HAS_WCHAR
931 #define GBaseString GUTF8String
932 #endif
933 
934 /** General purpose character string.
935     Each instance of class #GNativeString# represents a character
936     string.  Overloaded operators provide a value semantic to
937     #GNativeString# objects.  Conversion operators and constructors
938     transparently convert between #GNativeString# objects and
939     #const char*# pointers.
940 
941     Functions taking strings as arguments should declare their
942     arguments as "#const char*#".  Such functions will work equally
943     well with #GNativeString# objects since there is a fast conversion
944     operator from #GNativeString# to "#const char*#".  Functions
945     returning strings should return #GUTF8String# or #GNativeString#
946     objects because the class will automatically manage the necessary
947     memory.
948 
949     Characters in the string can be identified by their position.  The
950     first character of a string is numbered zero. Negative positions
951     represent characters relative to the end of the string (i.e.
952     position #-1# accesses the last character of the string,
953     position #-2# represents the second last character, etc.)  */
954 
955 class DJVUAPI GNativeString : public GBaseString
956 {
957 public:
958   ~GNativeString();
959   // -- CONSTRUCTORS
960   /** Null constructor. Constructs an empty string. */
961   GNativeString(void);
962   /// Constructs a string from a character.
963   GNativeString(const char dat);
964   /// Constructs a string from a null terminated character array.
965   GNativeString(const char *str);
966   /// Constructs a string from a null terminated character array.
967   GNativeString(const unsigned char *str);
968   GNativeString(const uint16_t *str);
969   GNativeString(const uint32_t *str);
970   /** Constructs a string from a character array.  Elements of the
971       character array #dat# are added into the string until the
972       string length reaches #len# or until encountering a null
973       character (whichever comes first). */
974   GNativeString(const char *dat, unsigned int len);
975   GNativeString(const uint16_t *dat, unsigned int len);
976   GNativeString(const uint32_t *dat, unsigned int len);
977   /// Construct from base class.
978   GNativeString(const GP<GStringRep> &str);
979   GNativeString(const GBaseString &str);
980 #if HAS_WCHAR
981   GNativeString(const GUTF8String &str);
982 #endif
983   GNativeString(const GNativeString &str);
984   /** Constructs a string from a character array.  Elements of the
985       character array #dat# are added into the string until the
986       string length reaches #len# or until encountering a null
987       character (whichever comes first). */
988   GNativeString(const GBaseString &gs, int from, int len);
989 
990   /** Constructs a string with a formatted string (as in #vprintf#).
991       The string is re-initialized with the characters generated
992       according to the specified format #fmt# and using the optional
993       arguments.  See the ANSI-C function #vprintf()# for more
994       information. The current implementation will cause a
995       segmentation violation if the resulting string is longer than
996       32768 characters. */
997   GNativeString(const GNativeString &fmt, va_list &args);
998 
999   /** Constructs a string with a human-readable representation of
1000       integer #number#.  The format is similar to format #"%d"# in
1001       function #printf#. */
1002   GNativeString(const int number);
1003 
1004   /** Constructs a string with a human-readable representation of
1005       floating point number #number#. The format is similar to
1006       format #"%f"# in function #printf#.  */
1007   GNativeString(const double number);
1008 
1009 #if !HAS_WCHAR
1010 #undef GBaseString
1011 #else
1012   /// Initialize this string class
1013   void init(void);
1014 
1015   /// Initialize this string class
1016   GNativeString &init(const GP<GStringRep> &rep);
1017 
1018   /** Copy a null terminated character array. Resets this string with
1019       the character string contained in the null terminated character
1020       array #str#. */
1021   GNativeString& operator= (const char str);
1022   GNativeString& operator= (const char *str);
1023   inline GNativeString& operator= (const GP<GStringRep> &str);
1024   inline GNativeString& operator= (const GBaseString &str);
1025   inline GNativeString& operator= (const GUTF8String &str);
1026   inline GNativeString& operator= (const GNativeString &str);
1027   // -- CONCATENATION
1028   /// Appends character #ch# to the string.
1029   GNativeString& operator+= (char ch);
1030   /// Appends the null terminated character array #str# to the string.
1031   GNativeString& operator+= (const char *str);
1032   /// Appends the specified GBaseString to the string.
1033   GNativeString& operator+= (const GBaseString &str);
1034 
1035   /** Returns a sub-string.  The sub-string is composed by copying
1036       #len# characters starting at position #from# in this string.
1037       The length of the resulting string may be smaller than #len#
1038       if the specified range is too large. */
1039   GNativeString substr(int from, int len/*=(-1)*/) const;
1040 
1041   /** Returns an upper case copy of this string.  The returned
1042       string contains a copy of the current string with all letters
1043       turned into upper case letters. */
1044   GNativeString upcase( void ) const;
1045   /** Returns an lower case copy of this string.  The returned
1046       string contains a copy of the current string with all letters
1047       turned into lower case letters. */
1048   GNativeString downcase( void ) const;
1049 
1050 
1051   GNativeString operator+(const GBaseString &s2) const;
1052   GNativeString operator+(const GNativeString &s2) const;
1053   GUTF8String operator+(const GUTF8String &s2) const;
1054   GNativeString operator+(const char *s2) const;
1055   friend DJVUAPI GNativeString operator+(const char *s1, const GNativeString &s2);
1056 
1057   /** Initializes a string with a formatted string (as in #printf#).
1058       The string is re-initialized with the characters generated
1059       according to the specified format #fmt# and using the optional
1060       arguments.  See the ANSI-C function #printf()# for more
1061       information. The current implementation will cause a
1062       segmentation violation if the resulting string is longer than
1063       32768 characters. */
1064   GNativeString &format(const char *fmt, ... );
1065   /** Initializes a string with a formatted string (as in #vprintf#).
1066       The string is re-initialized with the characters generated
1067       according to the specified format #fmt# and using the optional
1068       arguments.  See the ANSI-C function #vprintf()# for more
1069       information. The current implementation will cause a
1070       segmentation violation if the resulting string is longer than
1071       32768 characters. */
1072   GNativeString &vformat(const GNativeString &fmt, va_list &args);
1073 
1074   /** Returns a copy of this string with characters used in XML with
1075       '<'  to "&lt;", '>'  to "&gt;",  '&' to "&amp;" '\'' to
1076       "&apos;", and  '\"' to  "&quot;".   Characters 0x01 through
1077       0x1f are also escaped. */
1078   GNativeString toEscaped( const bool tosevenbit=false ) const;
1079 
1080 
1081   /** Provides a direct access to the string buffer.  Returns a
1082       pointer for directly accessing the string buffer.  This
1083       pointer valid remains valid as long as the string is not
1084       modified by other means.  Positive values for argument #n#
1085       represent the length of the returned buffer.  The returned
1086       string buffer will be large enough to hold at least #n#
1087       characters plus a null character.  If #n# is positive but
1088       smaller than the string length, the string will be truncated
1089       to #n# characters. */
1090   char *getbuf(int n = -1);
1091   /** Set the character at position #n# to value #ch#.  An exception
1092       \Ref{GException} is thrown if number #n# is not in range #-len#
1093       to #len#, where #len# is the length of the string.  If
1094       character #ch# is zero, the string is truncated at position
1095       #n#.  The first character of a string is numbered zero.
1096       Negative positions represent characters relative to the end of
1097       the string. If position #n# is equal to the length of the
1098       string, this function appends character #ch# to the end of the
1099       string. */
1100   void setat(const int n, const char ch);
1101 
1102   static GNativeString create( const char *buf, const unsigned int bufsize );
1103   static GNativeString create( const uint16_t *buf, const unsigned int bufsize );
1104   static GNativeString create( const uint32_t *buf, const unsigned int bufsize );
1105 #endif // WinCE
1106 };
1107 
1108 //@}
1109 
1110 inline
1111 GBaseString::operator const char* ( void ) const
1112 {
1113   return ptr?(*this)->data:nullstr;
1114 }
1115 
1116 inline unsigned int
length(void)1117 GBaseString::length( void ) const
1118 {
1119   return ptr ? (*this)->size : 0;
1120 }
1121 
1122 inline bool
1123 GBaseString::operator! ( void ) const
1124 {
1125   return !ptr;
1126 }
1127 
1128 inline GUTF8String
upcase(void)1129 GUTF8String::upcase( void ) const
1130 {
1131   if (ptr) return (*this)->upcase();
1132   return *this;
1133 }
1134 
1135 inline GUTF8String
downcase(void)1136 GUTF8String::downcase( void ) const
1137 {
1138   if (ptr) return (*this)->downcase();
1139   return *this;
1140 }
1141 
1142 inline void
init(void)1143 GUTF8String::init(void)
1144 { GBaseString::init(); }
1145 
1146 inline GUTF8String &
init(const GP<GStringRep> & rep)1147 GUTF8String::init(const GP<GStringRep> &rep)
1148 { GP<GStringRep>::operator=(rep?rep->toUTF8(true):rep); init(); return *this; }
1149 
1150 inline GUTF8String &
vformat(const GUTF8String & fmt,va_list & args)1151 GUTF8String::vformat(const GUTF8String &fmt, va_list &args)
1152 { return (*this = (fmt.ptr?GUTF8String(fmt,args):fmt)); }
1153 
1154 inline GUTF8String
toEscaped(const bool tosevenbit)1155 GUTF8String::toEscaped( const bool tosevenbit ) const
1156 { return ptr?GUTF8String((*this)->toEscaped(tosevenbit)):(*this); }
1157 
1158 inline GP<GStringRep::Unicode>
get_remainder(void)1159 GUTF8String::get_remainder(void) const
1160 {
1161   GP<GStringRep::Unicode> retval;
1162   if(ptr)
1163     retval=((*this)->get_remainder());
1164   return retval;
1165 }
1166 
1167 inline
GUTF8String(const GNativeString & str)1168 GUTF8String::GUTF8String(const GNativeString &str)
1169 { init(str.length()?(str->toUTF8(true)):(GP<GStringRep>)str); }
1170 
1171 inline
GUTF8String(const GP<GStringRep> & str)1172 GUTF8String::GUTF8String(const GP<GStringRep> &str)
1173 { init(str?(str->toUTF8(true)):str); }
1174 
1175 inline
GUTF8String(const GBaseString & str)1176 GUTF8String::GUTF8String(const GBaseString &str)
1177 { init(str.length()?(str->toUTF8(true)):(GP<GStringRep>)str); }
1178 
1179 inline void
init(void)1180 GBaseString::init(void)
1181 {
1182   gstr=ptr?((*this)->data):nullstr;
1183 }
1184 /** Returns an integer.  Implements i18n atoi.  */
1185 inline int
toInt(void)1186 GBaseString::toInt(void) const
1187 { return ptr?(*this)->toInt():0; }
1188 
1189 /** Returns a long intenger.  Implments i18n strtol.  */
1190 inline long
toLong(const int pos,int & endpos,const int base)1191 GBaseString::toLong(const int pos, int &endpos, const int base) const
1192 {
1193   long int retval=0;
1194   if(ptr)
1195   {
1196     retval=(*this)->toLong(pos, endpos, base);
1197   }else
1198   {
1199     endpos=(-1);
1200   }
1201   return retval;
1202 }
1203 
1204 inline long
toLong(const GUTF8String & src,const int pos,int & endpos,const int base)1205 GBaseString::toLong(
1206   const GUTF8String& src, const int pos, int &endpos, const int base)
1207 {
1208   return src.toLong(pos,endpos,base);
1209 }
1210 
1211 inline long
toLong(const GNativeString & src,const int pos,int & endpos,const int base)1212 GBaseString::toLong(
1213   const GNativeString& src, const int pos, int &endpos, const int base)
1214 {
1215   return src.toLong(pos,endpos,base);
1216 }
1217 
1218 /** Returns a unsigned long integer.  Implements i18n strtoul. */
1219 inline unsigned long
toULong(const int pos,int & endpos,const int base)1220 GBaseString::toULong(const int pos, int &endpos, const int base) const
1221 {
1222   unsigned long retval=0;
1223   if(ptr)
1224   {
1225     retval=(*this)->toULong(pos, endpos, base);
1226   }else
1227   {
1228     endpos=(-1);
1229   }
1230   return retval;
1231 }
1232 
1233 inline unsigned long
toULong(const GUTF8String & src,const int pos,int & endpos,const int base)1234 GBaseString::toULong(
1235   const GUTF8String& src, const int pos, int &endpos, const int base)
1236 {
1237   return src.toULong(pos,endpos,base);
1238 }
1239 
1240 inline unsigned long
toULong(const GNativeString & src,const int pos,int & endpos,const int base)1241 GBaseString::toULong(
1242   const GNativeString& src, const int pos, int &endpos, const int base)
1243 {
1244   return src.toULong(pos,endpos,base);
1245 }
1246 
1247 /** Returns a double.  Implements the i18n strtod.  */
1248 inline double
toDouble(const int pos,int & endpos)1249 GBaseString::toDouble(
1250   const int pos, int &endpos ) const
1251 {
1252   double retval=(double)0;
1253   if(ptr)
1254   {
1255     retval=(*this)->toDouble(pos, endpos);
1256   }else
1257   {
1258     endpos=(-1);
1259   }
1260   return retval;
1261 }
1262 
1263 inline double
toDouble(const GUTF8String & src,const int pos,int & endpos)1264 GBaseString::toDouble(
1265   const GUTF8String& src, const int pos, int &endpos)
1266 {
1267   return src.toDouble(pos,endpos);
1268 }
1269 
1270 inline double
toDouble(const GNativeString & src,const int pos,int & endpos)1271 GBaseString::toDouble(
1272   const GNativeString& src, const int pos, int &endpos)
1273 {
1274   return src.toDouble(pos,endpos);
1275 }
1276 
1277 inline GBaseString &
init(const GP<GStringRep> & rep)1278 GBaseString::init(const GP<GStringRep> &rep)
1279 { GP<GStringRep>::operator=(rep); init(); return *this;}
1280 
1281 inline char
1282 GBaseString::operator[] (int n) const
1283 { return ((n||ptr)?((*this)->data[CheckSubscript(n)]):0); }
1284 
1285 inline int
search(char c,int from)1286 GBaseString::search(char c, int from) const
1287 { return ptr?((*this)->search(c,from)):(-1); }
1288 
1289 inline int
search(const char * str,int from)1290 GBaseString::search(const char *str, int from) const
1291 { return ptr?((*this)->search(str,from)):(-1); }
1292 
1293 inline int
rsearch(char c,const int from)1294 GBaseString::rsearch(char c, const int from) const
1295 { return ptr?((*this)->rsearch(c,from)):(-1); }
1296 
1297 inline int
rsearch(const char * str,const int from)1298 GBaseString::rsearch(const char *str, const int from) const
1299 { return ptr?((*this)->rsearch(str,from)):(-1); }
1300 
1301 inline int
contains(const char accept[],const int from)1302 GBaseString::contains(const char accept[], const int from) const
1303 { return ptr?((*this)->contains(accept,from)):(-1); }
1304 
1305 inline int
rcontains(const char accept[],const int from)1306 GBaseString::rcontains(const char accept[], const int from) const
1307 { return ptr?((*this)->rcontains(accept,from)):(-1); }
1308 
1309 inline int
cmp(const GBaseString & s2,const int len)1310 GBaseString::cmp(const GBaseString &s2, const int len) const
1311 { return GStringRep::cmp(*this,s2,len); }
1312 
1313 inline int
cmp(const char * s2,const int len)1314 GBaseString::cmp(const char *s2, const int len) const
1315 { return GStringRep::cmp(*this,s2,len); }
1316 
1317 inline int
cmp(const char s2)1318 GBaseString::cmp(const char s2) const
1319 { return GStringRep::cmp(*this,&s2,1); }
1320 
1321 inline int
cmp(const char * s1,const char * s2,const int len)1322 GBaseString::cmp(const char *s1, const char *s2, const int len)
1323 { return GStringRep::cmp(s1,s2,len); }
1324 
1325 inline bool
1326 GBaseString::operator==(const GBaseString &s2) const
1327 { return !cmp(s2); }
1328 
1329 inline bool
1330 GBaseString::operator==(const char *s2) const
1331 { return !cmp(s2); }
1332 
1333 inline bool
1334 GBaseString::operator!=(const GBaseString &s2) const
1335 { return !!cmp(s2); }
1336 
1337 inline bool
1338 GBaseString::operator!=(const char *s2) const
1339 { return !!cmp(s2); }
1340 
1341 inline bool
1342 GBaseString::operator>=(const GBaseString &s2) const
1343 { return (cmp(s2)>=0); }
1344 
1345 inline bool
1346 GBaseString::operator>=(const char *s2) const
1347 { return (cmp(s2)>=0); }
1348 
1349 inline bool
1350 GBaseString::operator>=(const char s2) const
1351 { return (cmp(s2)>=0); }
1352 
1353 inline bool
1354 GBaseString::operator<(const GBaseString &s2) const
1355 { return (cmp(s2)<0); }
1356 
1357 inline bool
1358 GBaseString::operator<(const char *s2) const
1359 { return (cmp(s2)<0); }
1360 
1361 inline bool
1362 GBaseString::operator<(const char s2) const
1363 { return (cmp(s2)<0); }
1364 
1365 inline bool
1366 GBaseString::operator> (const GBaseString &s2) const
1367 { return (cmp(s2)>0); }
1368 
1369 inline bool
1370 GBaseString::operator> (const char *s2) const
1371 { return (cmp(s2)>0); }
1372 
1373 inline bool
1374 GBaseString::operator> (const char s2) const
1375 { return (cmp(s2)>0); }
1376 
1377 inline bool
1378 GBaseString::operator<=(const GBaseString &s2) const
1379 { return (cmp(s2)<=0); }
1380 
1381 inline bool
1382 GBaseString::operator<=(const char *s2) const
1383 { return (cmp(s2)<=0); }
1384 
1385 inline bool
1386 GBaseString::operator<=(const char s2) const
1387 { return (cmp(s2)<=0); }
1388 
1389 inline int
nextNonSpace(const int from,const int len)1390 GBaseString::nextNonSpace( const int from, const int len ) const
1391 { return ptr?(*this)->nextNonSpace(from,len):0; }
1392 
1393 inline int
nextChar(const int from)1394 GBaseString::nextChar( const int from ) const
1395 { return ptr?(*this)->nextChar(from):0; }
1396 
1397 inline int
nextSpace(const int from,const int len)1398 GBaseString::nextSpace( const int from, const int len ) const
1399 { return ptr?(*this)->nextSpace(from,len):0; }
1400 
1401 inline int
firstEndSpace(const int from,const int len)1402 GBaseString::firstEndSpace( const int from,const int len ) const
1403 { return ptr?(*this)->firstEndSpace(from,len):0; }
1404 
1405 inline bool
is_valid(void)1406 GBaseString::is_valid(void) const
1407 { return ptr?((*this)->is_valid()):true; }
1408 
1409 #if HAS_WCHAR
1410 inline int
ncopy(wchar_t * const buf,const int buflen)1411 GBaseString::ncopy(wchar_t * const buf, const int buflen) const
1412 {if(buf&&buflen)buf[0]=0;return ptr?((*this)->ncopy(buf,buflen)):0;}
1413 #endif
1414 
1415 inline int
CheckSubscript(int n)1416 GBaseString::CheckSubscript(int n) const
1417 {
1418   if(n)
1419   {
1420     if (n<0 && ptr)
1421       n += (*this)->size;
1422     if (n<0 || !ptr || n > (int)(*this)->size)
1423       throw_illegal_subscript();
1424   }
1425   return n;
1426 }
1427 
GBaseString(void)1428 inline GBaseString::GBaseString(void) { init(); }
1429 
GUTF8String(void)1430 inline GUTF8String::GUTF8String(void) { }
1431 
GUTF8String(const GUTF8String & str)1432 inline GUTF8String::GUTF8String(const GUTF8String &str)
1433 { init(str); }
1434 
1435 inline GUTF8String& GUTF8String::operator= (const GP<GStringRep> &str)
1436 { return init(str); }
1437 
1438 inline GUTF8String& GUTF8String::operator= (const GBaseString &str)
1439 { return init(str); }
1440 
1441 inline GUTF8String& GUTF8String::operator= (const GUTF8String &str)
1442 { return init(str); }
1443 
1444 inline GUTF8String& GUTF8String::operator= (const GNativeString &str)
1445 { return init(str); }
1446 
1447 inline GUTF8String
create(const char * buf,const unsigned int bufsize)1448 GUTF8String::create( const char *buf, const unsigned int bufsize )
1449 {
1450 #if HAS_WCHAR
1451   return GNativeString(buf,bufsize);
1452 #else
1453   return GUTF8String(buf,bufsize);
1454 #endif
1455 }
1456 
1457 inline GUTF8String
create(const uint16_t * buf,const unsigned int bufsize)1458 GUTF8String::create( const uint16_t *buf, const unsigned int bufsize )
1459 {
1460   return GUTF8String(buf,bufsize);
1461 }
1462 
1463 inline GUTF8String
create(const uint32_t * buf,const unsigned int bufsize)1464 GUTF8String::create( const uint32_t *buf, const unsigned int bufsize )
1465 {
1466   return GUTF8String(buf,bufsize);
1467 }
1468 
GNativeString(void)1469 inline GNativeString::GNativeString(void) {}
1470 
1471 #if !HAS_WCHAR
1472 // For Windows CE, GNativeString is essentially GUTF8String
1473 
1474 inline
GNativeString(const GUTF8String & str)1475 GNativeString::GNativeString(const GUTF8String &str)
1476 : GUTF8String(str) {}
1477 
1478 inline
GNativeString(const GP<GStringRep> & str)1479 GNativeString::GNativeString(const GP<GStringRep> &str)
1480 : GUTF8String(str) {}
1481 
1482 inline
GNativeString(const char dat)1483 GNativeString::GNativeString(const char dat)
1484 : GUTF8String(dat) {}
1485 
1486 inline
GNativeString(const char * str)1487 GNativeString::GNativeString(const char *str)
1488 : GUTF8String(str) {}
1489 
1490 inline
GNativeString(const unsigned char * str)1491 GNativeString::GNativeString(const unsigned char *str)
1492 : GUTF8String(str) {}
1493 
1494 inline
GNativeString(const uint16_t * str)1495 GNativeString::GNativeString(const uint16_t *str)
1496 : GUTF8String(str) {}
1497 
1498 inline
GNativeString(const uint32_t * str)1499 GNativeString::GNativeString(const uint32_t *str)
1500 : GUTF8String(str) {}
1501 
1502 inline
GNativeString(const char * dat,unsigned int len)1503 GNativeString::GNativeString(const char *dat, unsigned int len)
1504 : GUTF8String(dat,len) {}
1505 
1506 inline
GNativeString(const uint16_t * dat,unsigned int len)1507 GNativeString::GNativeString(const uint16_t *dat, unsigned int len)
1508 : GUTF8String(dat,len) {}
1509 
1510 inline
GNativeString(const uint32_t * dat,unsigned int len)1511 GNativeString::GNativeString(const uint32_t *dat, unsigned int len)
1512 : GUTF8String(dat,len) {}
1513 
1514 inline
GNativeString(const GNativeString & str)1515 GNativeString::GNativeString(const GNativeString &str)
1516 : GUTF8String(str) {}
1517 
1518 inline
GNativeString(const int number)1519 GNativeString::GNativeString(const int number)
1520 : GUTF8String(number) {}
1521 
1522 inline
GNativeString(const double number)1523 GNativeString::GNativeString(const double number)
1524 : GUTF8String(number) {}
1525 
1526 inline
GNativeString(const GNativeString & fmt,va_list & args)1527 GNativeString::GNativeString(const GNativeString &fmt, va_list &args)
1528 : GUTF8String(fmt,args) {}
1529 
1530 #else // HAS_WCHAR
1531 
1532 /// Initialize this string class
1533 inline void
init(void)1534 GNativeString::init(void)
1535 { GBaseString::init(); }
1536 
1537 /// Initialize this string class
1538 inline GNativeString &
init(const GP<GStringRep> & rep)1539 GNativeString::init(const GP<GStringRep> &rep)
1540 {
1541   GP<GStringRep>::operator=(rep?rep->toNative(GStringRep::NOT_ESCAPED):rep);
1542   init();
1543   return *this;
1544 }
1545 
1546 inline GNativeString
substr(int from,int len)1547 GNativeString::substr(int from, int len) const
1548 { return GNativeString(*this, from, len); }
1549 
1550 inline GNativeString &
vformat(const GNativeString & fmt,va_list & args)1551 GNativeString::vformat(const GNativeString &fmt, va_list &args)
1552 { return (*this = (fmt.ptr?GNativeString(fmt,args):fmt)); }
1553 
1554 inline GNativeString
toEscaped(const bool tosevenbit)1555 GNativeString::toEscaped( const bool tosevenbit ) const
1556 { return ptr?GNativeString((*this)->toEscaped(tosevenbit)):(*this); }
1557 
1558 inline
GNativeString(const GUTF8String & str)1559 GNativeString::GNativeString(const GUTF8String &str)
1560 {
1561   if (str.length())
1562     init(str->toNative(GStringRep::NOT_ESCAPED));
1563   else
1564     init((GP<GStringRep>)str);
1565 }
1566 
1567 inline
GNativeString(const GP<GStringRep> & str)1568 GNativeString::GNativeString(const GP<GStringRep> &str)
1569 {
1570   if (str)
1571     init(str->toNative(GStringRep::NOT_ESCAPED));
1572   else
1573     init(str);
1574 }
1575 
1576 inline
GNativeString(const GBaseString & str)1577 GNativeString::GNativeString(const GBaseString &str)
1578 {
1579   if (str.length())
1580     init(str->toNative(GStringRep::NOT_ESCAPED));
1581   else
1582     init((GP<GStringRep>)str);
1583 }
1584 
1585 
1586 inline
GNativeString(const GNativeString & fmt,va_list & args)1587 GNativeString::GNativeString(const GNativeString &fmt, va_list &args)
1588 {
1589   if (fmt.ptr)
1590     init(fmt->vformat(args));
1591   else
1592     init(fmt);
1593 }
1594 
1595 inline GNativeString
create(const char * buf,const unsigned int bufsize)1596 GNativeString::create( const char *buf, const unsigned int bufsize )
1597 {
1598   return GNativeString(buf,bufsize);
1599 }
1600 
1601 inline GNativeString
create(const uint16_t * buf,const unsigned int bufsize)1602 GNativeString::create( const uint16_t *buf, const unsigned int bufsize )
1603 {
1604   return GNativeString(buf,bufsize);
1605 }
1606 
1607 inline GNativeString
create(const uint32_t * buf,const unsigned int bufsize)1608 GNativeString::create( const uint32_t *buf, const unsigned int bufsize )
1609 {
1610   return GNativeString(buf,bufsize);
1611 }
1612 
1613 inline GNativeString&
1614 GNativeString::operator= (const GP<GStringRep> &str)
1615 { return init(str); }
1616 
1617 inline GNativeString&
1618 GNativeString::operator= (const GBaseString &str)
1619 { return init(str); }
1620 
1621 inline GNativeString&
1622 GNativeString::operator= (const GUTF8String &str)
1623 { return init(str); }
1624 
1625 inline GNativeString&
1626 GNativeString::operator= (const GNativeString &str)
1627 { return init(str); }
1628 
1629 inline GNativeString
upcase(void)1630 GNativeString::upcase( void ) const
1631 {
1632   if (ptr) return (*this)->upcase();
1633   return *this;
1634 }
1635 
1636 inline GNativeString
downcase(void)1637 GNativeString::downcase( void ) const
1638 {
1639   if (ptr) return (*this)->downcase();
1640   return *this;
1641 }
1642 
1643 #endif // HAS_WCHAR
1644 
1645 inline bool
1646 operator==(const char *s1, const GBaseString &s2)
1647 { return !s2.cmp(s1); }
1648 
1649 inline bool
1650 operator!=(const char *s1, const GBaseString &s2)
1651 { return !!s2.cmp(s1); }
1652 
1653 inline bool
1654 operator>=(const char    *s1, const GBaseString &s2)
1655 { return (s2.cmp(s1)<=0); }
1656 
1657 inline bool
1658 operator>=(const char s1, const GBaseString &s2)
1659 { return (s2.cmp(s1)<=0); }
1660 
1661 inline bool
1662 operator<(const char *s1, const GBaseString &s2)
1663 { return (s2.cmp(s1)>0); }
1664 
1665 inline bool
1666 operator<(const char s1, const GBaseString &s2)
1667 { return (s2.cmp(s1)>0); }
1668 
1669 inline bool
1670 operator> (const char    *s1, const GBaseString &s2)
1671 { return (s2.cmp(s1)<0); }
1672 
1673 inline bool
1674 operator> (const char s1, const GBaseString &s2)
1675 { return (s2.cmp(s1)<0); }
1676 
1677 inline bool
1678 operator<=(const char    *s1, const GBaseString &s2)
1679 { return !(s1>s2); }
1680 
1681 inline bool
1682 operator<=(const char    s1, const GBaseString &s2)
1683 { return !(s1>s2); }
1684 
1685 // ------------------- The end
1686 
1687 
1688 #ifdef HAVE_NAMESPACES
1689 }
1690 # ifndef NOT_USING_DJVU_NAMESPACE
1691 using namespace DJVU;
1692 # endif
1693 #endif
1694 #endif
1695 
1696