1 //C- -*- C++ -*-
2 //C- -------------------------------------------------------------------
3 //C- DjVuLibre-3.5
4 //C- Copyright (c) 2002 Leon Bottou and Yann Le Cun.
5 //C- Copyright (c) 2001 AT&T
6 //C-
7 //C- This software is subject to, and may be distributed under, the
8 //C- GNU General Public License, either Version 2 of the license,
9 //C- or (at your option) any later version. The license should have
10 //C- accompanied the software or you may obtain a copy of the license
11 //C- from the Free Software Foundation at http://www.fsf.org .
12 //C-
13 //C- This program is distributed in the hope that it will be useful,
14 //C- but WITHOUT ANY WARRANTY; without even the implied warranty of
15 //C- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 //C- GNU General Public License for more details.
17 //C-
18 //C- DjVuLibre-3.5 is derived from the DjVu(r) Reference Library from
19 //C- Lizardtech Software. Lizardtech Software has authorized us to
20 //C- replace the original DjVu(r) Reference Library notice by the following
21 //C- text (see doc/lizard2002.djvu and doc/lizardtech2007.djvu):
22 //C-
23 //C- ------------------------------------------------------------------
24 //C- | DjVu (r) Reference Library (v. 3.5)
25 //C- | Copyright (c) 1999-2001 LizardTech, Inc. All Rights Reserved.
26 //C- | The DjVu Reference Library is protected by U.S. Pat. No.
27 //C- | 6,058,214 and patents pending.
28 //C- |
29 //C- | This software is subject to, and may be distributed under, the
30 //C- | GNU General Public License, either Version 2 of the license,
31 //C- | or (at your option) any later version. The license should have
32 //C- | accompanied the software or you may obtain a copy of the license
33 //C- | from the Free Software Foundation at http://www.fsf.org .
34 //C- |
35 //C- | The computer code originally released by LizardTech under this
36 //C- | license and unmodified by other parties is deemed "the LIZARDTECH
37 //C- | ORIGINAL CODE." Subject to any third party intellectual property
38 //C- | claims, LizardTech grants recipient a worldwide, royalty-free,
39 //C- | non-exclusive license to make, use, sell, or otherwise dispose of
40 //C- | the LIZARDTECH ORIGINAL CODE or of programs derived from the
41 //C- | LIZARDTECH ORIGINAL CODE in compliance with the terms of the GNU
42 //C- | General Public License. This grant only confers the right to
43 //C- | infringe patent claims underlying the LIZARDTECH ORIGINAL CODE to
44 //C- | the extent such infringement is reasonably necessary to enable
45 //C- | recipient to make, have made, practice, sell, or otherwise dispose
46 //C- | of the LIZARDTECH ORIGINAL CODE (or portions thereof) and not to
47 //C- | any greater extent that may be necessary to utilize further
48 //C- | modifications or combinations.
49 //C- |
50 //C- | The LIZARDTECH ORIGINAL CODE is provided "AS IS" WITHOUT WARRANTY
51 //C- | OF ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
52 //C- | TO ANY WARRANTY OF NON-INFRINGEMENT, OR ANY IMPLIED WARRANTY OF
53 //C- | MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE.
54 //C- +------------------------------------------------------------------
55
56 #ifndef _GSTRING_H_
57 #define _GSTRING_H_
58 #ifdef HAVE_CONFIG_H
59 #include "config.h"
60 #endif
61 #if NEED_GNUG_PRAGMAS
62 # pragma interface
63 #endif
64
65 /** @name GString.h
66
67 Files #"GString.h"# and #"GString.cpp"# implement a general
68 purpose string class \Ref{GBaseString}, with dirived types
69 \Ref{GUTF8String} and \Ref{GNativeString} for UTF8 MBS encoding
70 and the current Native MBS encoding respectively. This
71 implementation relies on smart pointers (see
72 \Ref{GSmartPointer.h}).
73
74 {\bf Historical Comments} --- At some point during the DjVu
75 research era, it became clear that C++ compilers rarely provided
76 portable libraries. We then decided to avoid fancy classes (like
77 #iostream# or #string#) and to rely only on the good old C
78 library. A good string class however is very useful. We had
79 already randomly picked letter 'G' to prefix class names and we
80 logically derived the new class name. Native English speakers
81 kept laughing in hiding. This is ironic because we completely
82 forgot this letter 'G' when creating more challenging things
83 like the ZP Coder or the IW44 wavelets.
84
85 {\bf Later Changes}
86 When converting to I18N, we (Lizardtech) decided that two string classes
87 where needing, replacing the original GString with \Ref{GUTF8String} and
88 \Ref{GNativeString}.
89
90 @memo
91 General purpose string class.
92 @author
93 L\'eon Bottou <leonb@research.att.com> -- initial implementation.\\
94
95 // From: Leon Bottou, 1/31/2002
96 // This file has very little to do with my initial implementation.
97 // It has been practically rewritten by Lizardtech for i18n changes.
98 // My original implementation was very small in comparison
99 // <http://prdownloads.sourceforge.net/djvu/DjVu2_2b-src.tgz>.
100 // In my opinion, the duplication of the string classes is a failed
101 // attempt to use the type system to enforce coding policies.
102 // This could be fixed. But there are better things to do in djvulibre.
103
104 */
105 //@{
106
107
108 #include "DjVuGlobal.h"
109 #include "GContainer.h"
110
111 #include <stddef.h>
112 #include <stdlib.h>
113 #include <stdarg.h>
114 #ifdef _WIN32
115 # include <windows.h>
116 # ifndef AUTOCONF
117 # define HAS_WCHAR 1
118 # define HAS_WCTYPE 1
119 # define HAS_MBSTATE 1
120 # endif
121 #endif
122
123 #if HAS_WCHAR
124 # include <wchar.h>
125 #elif HAVE_WCHAR_H
126 # include <wchar.h>
127 #endif
128
129 #if HAVE_STDINT_H
130 # include <stdint.h>
131 #elif HAVE_INTTYPES_H
132 # include <inttypes.h>
133 #else
134 # ifdef _WIN32
135 typedef unsigned __int32 uint32_t;
136 typedef unsigned __int16 uint16_t;
137 # else
138 # pragma message("Please verify defs for uint32_t and uint16_t")
139 typedef unsigned int uint32_t // verify
140 typedef unsigned short uint16_t // verify
141 # endif
142 #endif
143
144 #ifdef HAVE_NAMESPACES
145 namespace DJVU {
146 # ifdef NOT_DEFINED // Just to fool emacs c++ mode
147 }
148 #endif
149 #endif
150
151 #if !HAS_MBSTATE
152 # ifndef HAVE_MBSTATE_T
153 typedef int mbstate_t;
154 # endif
155 #endif
156
157 class GBaseString;
158 class GUTF8String;
159 class GNativeString;
160
161 // Internal string representation.
162 class DJVUAPI GStringRep : public GPEnabled
163 {
164 public:
165 enum EncodeType { XUCS4, XUCS4BE, XUCS4LE, XUCS4_2143, XUCS4_3412,
166 XUTF16, XUTF16BE, XUTF16LE, XUTF8, XEBCDIC, XOTHER } ;
167
168 enum EscapeMode { UNKNOWN_ESCAPED=0, IS_ESCAPED=1, NOT_ESCAPED=2 };
169
170 class UTF8;
171 friend class UTF8;
172 class Unicode;
173 friend class Unicode;
174
175 class ChangeLocale;
176 #if HAS_WCHAR
177 class Native;
178 friend class Native;
179 #endif // HAS_WCHAR
180 friend class GBaseString;
181 friend class GUTF8String;
182 friend class GNativeString;
183 friend DJVUAPI unsigned int hash(const GBaseString &ref);
184
185 public:
186 // default constructor
187 GStringRep(void);
188 // virtual destructor
189 virtual ~GStringRep();
190
191 // Other virtual methods.
192 // Create an empty string.
193 virtual GP<GStringRep> blank(const unsigned int sz) const = 0;
194 // Create a duplicate at the given size.
195 GP<GStringRep> getbuf(int n) const;
196 // Change the value of one of the bytes.
197 GP<GStringRep> setat(int n, char ch) const;
198 // Append a string.
199 virtual GP<GStringRep> append(const GP<GStringRep> &s2) const = 0;
200 // Test if isUTF8.
isUTF8(void)201 virtual bool isUTF8(void) const { return false; }
202 // Test if Native.
isNative(void)203 virtual bool isNative(void) const { return false; }
204 // Convert to Native.
205 virtual GP<GStringRep> toNative(
206 const EscapeMode escape=UNKNOWN_ESCAPED ) const = 0;
207 // Convert to UTF8.
208 virtual GP<GStringRep> toUTF8(const bool nothrow=false) const = 0;
209 // Convert to same as current class.
210 virtual GP<GStringRep> toThis(
211 const GP<GStringRep> &rep,const GP<GStringRep> &locale=0) const = 0;
212 // Compare with #s2#.
213 virtual int cmp(const GP<GStringRep> &s2,const int len=(-1)) const = 0;
214
215 // Convert strings to numbers.
216 virtual int toInt(void) const = 0;
217 virtual long int toLong(
218 const int pos, int &endpos, const int base=10) const = 0;
219 virtual unsigned long toULong(
220 const int pos, int &endpos, const int base=10) const = 0;
221 virtual double toDouble(const int pos, int &endpos) const = 0;
222
223 // return the position of the next character
224 int nextChar( const int from=0 ) const;
225
226 // return next non space position
227 int nextNonSpace( const int from=0, const int len=(-1) ) const;
228
229 // return next white space position
230 int nextSpace( const int from=0, const int len=(-1) ) const;
231
232 // return the position after the last non-whitespace character.
233 int firstEndSpace( int from=0, const int len=(-1) ) const;
234
235 // Create an empty string.
236 template <class TYPE> static GP<GStringRep> create(
237 const unsigned int sz,TYPE *);
238 // Creates with a strdup string.
239 GP<GStringRep> strdup(const char *s) const;
240
241 // Creates by appending to the current string
242 GP<GStringRep> append(const char *s2) const;
243
244 // Creates with a concat operation.
245 GP<GStringRep> concat(const GP<GStringRep> &s1,const GP<GStringRep> &s2) const;
246 GP<GStringRep> concat(const char *s1,const GP<GStringRep> &s2) const;
247 GP<GStringRep> concat(const GP<GStringRep> &s1,const char *s2) const;
248 GP<GStringRep> concat(const char *s1,const char *s2) const;
249
250 /* Creates with a strdup and substr. Negative values have strlen(s)+1
251 added to them.
252 */
253 GP<GStringRep> substr(
254 const char *s,const int start,const int length=(-1)) const;
255
256 GP<GStringRep> substr(
257 const uint16_t *s,const int start,const int length=(-1)) const;
258
259 GP<GStringRep> substr(
260 const uint32_t *s,const int start,const int length=(-1)) const;
261
262 /** Initializes a string with a formatted string (as in #vprintf#). The
263 string is re-initialized with the characters generated according to the
264 specified format #fmt# and using the optional arguments. See the ANSI-C
265 function #vprintf()# for more information. The current implementation
266 will cause a segmentation violation if the resulting string is longer
267 than 32768 characters. */
268 GP<GStringRep> vformat(va_list args) const;
269 // -- SEARCHING
270
271 static GP<GStringRep> UTF8ToNative( const char *s,
272 const EscapeMode escape=UNKNOWN_ESCAPED );
273 static GP<GStringRep> NativeToUTF8( const char *s );
274
275 // Creates an uppercase version of the current string.
276 GP<GStringRep> upcase(void) const;
277 // Creates a lowercase version of the current string.
278 GP<GStringRep> downcase(void) const;
279
280 /** Returns the next UCS4 character, and updates the pointer s. */
281 static uint32_t UTF8toUCS4(
282 unsigned char const *&s, void const * const endptr );
283
284 /** Returns the number of bytes in next UCS4 character,
285 and sets #w# to the next UCS4 chacter. */
UTF8toUCS4(uint32_t & w,unsigned char const s[],void const * const endptr)286 static int UTF8toUCS4(
287 uint32_t &w, unsigned char const s[], void const * const endptr )
288 { unsigned char const *r=s;w=UTF8toUCS4(r,endptr);return (int)((size_t)r-(size_t)s); }
289
290 /** Returns the next UCS4 word from the UTF16 string. */
291 static int UTF16toUCS4(
292 uint32_t &w, uint16_t const * const s,void const * const eptr);
293
294 static int UCS4toUTF16(
295 uint32_t w, uint16_t &w1, uint16_t &w2);
296
297 int cmp(const char *s2, const int len=(-1)) const;
298 static int cmp(
299 const GP<GStringRep> &s1, const GP<GStringRep> &s2, const int len=(-1)) ;
300 static int cmp(
301 const GP<GStringRep> &s1, const char *s2, const int len=(-1));
302 static int cmp(
303 const char *s1, const GP<GStringRep> &s2, const int len=(-1));
304 static int cmp(
305 const char *s1, const char *s2, const int len=(-1));
306
307 // Lookup the next character, and return the position of the next character.
308 int getUCS4(uint32_t &w, const int from) const;
309
310 virtual unsigned char *UCS4toString(
311 const uint32_t w, unsigned char *ptr, mbstate_t *ps=0) const = 0;
312
313 static unsigned char *UCS4toUTF8(
314 const uint32_t w,unsigned char *ptr);
315
316 static unsigned char *UCS4toNative(
317 const uint32_t w,unsigned char *ptr, mbstate_t *ps);
318
319 int search(char c, int from=0) const;
320
321 int search(char const *str, int from=0) const;
322
323 int rsearch(char c, int from=0) const;
324
325 int rsearch(char const *str, int from=0) const;
326
327 int contains(char const accept[], int from=0) const;
328
329 int rcontains(char const accept[], int from=0) const;
330
331 protected:
332 // Return the next character and increment the source pointer.
333 virtual uint32_t getValidUCS4(const char *&source) const = 0;
334
335 GP<GStringRep> tocase(
336 bool (*xiswcase)(const unsigned long wc),
337 unsigned long (*xtowcase)(const unsigned long wc)) const;
338
339 // Tests if the specified character passes the xiswtest. If so, the
340 // return pointer is incremented to the next character, otherwise the
341 // specified #ptr# is returned.
342 const char * isCharType( bool (*xiswtest)(const unsigned long wc), const char *ptr,
343 const bool reverse=false) const;
344
345 // Find the next character position that passes the isCharType test.
346 int nextCharType(
347 bool (*xiswtest)(const unsigned long wc),const int from,const int len,
348 const bool reverse=false) const;
349
350 static bool giswspace(const unsigned long w);
351 static bool giswupper(const unsigned long w);
352 static bool giswlower(const unsigned long w);
353 static unsigned long gtowupper(const unsigned long w);
354 static unsigned long gtowlower(const unsigned long w);
355
356 virtual void set_remainder( void const * const buf, const unsigned int size,
357 const EncodeType encodetype);
358 virtual void set_remainder( void const * const buf, const unsigned int size,
359 const GP<GStringRep> &encoding );
360 virtual void set_remainder ( const GP<Unicode> &remainder );
361
362 virtual GP<Unicode> get_remainder( void ) const;
363
364 public:
365 /* Returns a copy of this string with characters used in XML with
366 '<' to "<", '>' to ">", '&' to "&" '\'' to
367 "'", and '\"' to """. Characters 0x01 through
368 0x1f are also escaped. */
369 GP<GStringRep> toEscaped( const bool tosevenbit ) const;
370
371 // Tests if a string is legally encoded in the current character set.
372 virtual bool is_valid(void) const = 0;
373 #if HAS_WCHAR
374 virtual int ncopy(wchar_t * const buf, const int buflen) const = 0;
375 #endif
376 protected:
377
378 // Actual string data.
379 int size;
380 char *data;
381 };
382
383 class DJVUAPI GStringRep::UTF8 : public GStringRep
384 {
385 public:
386 // default constructor
387 UTF8(void);
388 // virtual destructor
389 virtual ~UTF8();
390
391 // Other virtual methods.
392 virtual GP<GStringRep> blank(const unsigned int sz = 0) const;
393 virtual GP<GStringRep> append(const GP<GStringRep> &s2) const;
394 // Test if Native.
395 virtual bool isUTF8(void) const;
396 // Convert to Native.
397 virtual GP<GStringRep> toNative(
398 const EscapeMode escape=UNKNOWN_ESCAPED) const;
399 // Convert to UTF8.
400 virtual GP<GStringRep> toUTF8(const bool nothrow=false) const;
401 // Convert to same as current class.
402 virtual GP<GStringRep> toThis(
403 const GP<GStringRep> &rep,const GP<GStringRep> &) const;
404 // Compare with #s2#.
405 virtual int cmp(const GP<GStringRep> &s2,const int len=(-1)) const;
406
407 static GP<GStringRep> create(const unsigned int sz = 0);
408
409 // Convert strings to numbers.
410 virtual int toInt(void) const;
411 virtual long int toLong(
412 const int pos, int &endpos, const int base=10) const;
413 virtual unsigned long toULong(
414 const int pos, int &endpos, const int base=10) const;
415 virtual double toDouble(
416 const int pos, int &endpos) const;
417
418 // Create a strdup string.
419 static GP<GStringRep> create(const char *s);
420
421 // Creates with a concat operation.
422 static GP<GStringRep> create(
423 const GP<GStringRep> &s1,const GP<GStringRep> &s2);
424 static GP<GStringRep> create( const GP<GStringRep> &s1,const char *s2);
425 static GP<GStringRep> create( const char *s1, const GP<GStringRep> &s2);
426 static GP<GStringRep> create( const char *s1,const char *s2);
427
428 // Create with a strdup and substr operation.
429 static GP<GStringRep> create(
430 const char *s,const int start,const int length=(-1));
431
432 static GP<GStringRep> create(
433 const uint16_t *s,const int start,const int length=(-1));
434
435 static GP<GStringRep> create(
436 const uint32_t *s,const int start,const int length=(-1));
437
438 static GP<GStringRep> create_format(const char fmt[],...);
439 static GP<GStringRep> create(const char fmt[],va_list& args);
440
441 virtual unsigned char *UCS4toString(
442 const uint32_t w,unsigned char *ptr, mbstate_t *ps=0) const;
443
444 // Tests if a string is legally encoded in the current character set.
445 virtual bool is_valid(void) const;
446 #if HAS_WCHAR
447 virtual int ncopy(wchar_t * const buf, const int buflen) const;
448 #endif
449 friend class GBaseString;
450
451 protected:
452 // Return the next character and increment the source pointer.
453 virtual uint32_t getValidUCS4(const char *&source) const;
454 };
455
456
457 /** General purpose character string.
458 Each dirivied instance of class #GBaseString# represents a
459 character string. Overloaded operators provide a value semantic
460 to #GBaseString# objects. Conversion operators and constructors
461 transparently convert between #GBaseString# objects and
462 #const char*# pointers. The #GBaseString# class has no public
463 constructors, since a dirived type should always be used
464 to specify the desired multibyte character encoding.
465
466 Functions taking strings as arguments should declare their
467 arguments as "#const char*#". Such functions will work equally
468 well with dirived #GBaseString# objects since there is a fast
469 conversion operator from the dirivied #GBaseString# objects
470 to "#const char*#". Functions returning strings should return
471 #GUTF8String# or #GNativeString# objects because the class will
472 automatically manage the necessary memory.
473
474 Characters in the string can be identified by their position. The
475 first character of a string is numbered zero. Negative positions
476 represent characters relative to the end of the string (i.e.
477 position #-1# accesses the last character of the string,
478 position #-2# represents the second last character, etc.) */
479
480 class DJVUAPI GBaseString : protected GP<GStringRep>
481 {
482 public:
483 enum EscapeMode {
484 UNKNOWN_ESCAPED=GStringRep::UNKNOWN_ESCAPED,
485 IS_ESCAPED=GStringRep::IS_ESCAPED,
486 NOT_ESCAPED=GStringRep::NOT_ESCAPED };
487
488 friend class GUTF8String;
489 friend class GNativeString;
490 protected:
491 // Sets the gstr pointer;
492 inline void init(void);
493
494 ~GBaseString();
495 inline GBaseString &init(const GP<GStringRep> &rep);
496
497 // -- CONSTRUCTORS
498 /** Null constructor. Constructs an empty string. */
499 GBaseString( void );
500
501 public:
502 // -- ACCESS
503 /** Converts a string into a constant null terminated character
504 array. This conversion operator is very efficient because
505 it simply returns a pointer to the internal string data. The
506 returned pointer remains valid as long as the string is
507 unmodified. */
508 operator const char* ( void ) const ;
509 /// Returns the string length.
510 unsigned int length( void ) const;
511 /** Returns true if and only if the string contains zero characters.
512 This operator is useful for conditional expression in control
513 structures.
514 \begin{verbatim}
515 if (! str) { ... }
516 while (!! str) { ... } -- Note the double operator!
517 \end{verbatim}
518 Class #GBaseString# does not to support syntax
519 "#if# #(str)# #{}#" because the required conversion operator
520 introduces dangerous ambiguities with certain compilers. */
521 bool operator! ( void ) const;
522
523 // -- INDEXING
524 /** Returns the character at position #n#. An exception
525 \Ref{GException} is thrown if number #n# is not in range #-len#
526 to #len-1#, where #len# is the length of the string. The first
527 character of a string is numbered zero. Negative positions
528 represent characters relative to the end of the string. */
529 char operator[] (int n) const;
530 /// Returns #TRUE# if the string contains an integer number.
531 bool is_int(void) const;
532 /// Returns #TRUE# if the string contains a float number.
533 bool is_float(void) const;
534
535 /** Converts strings between native & UTF8 **/
536 GNativeString getUTF82Native( EscapeMode escape=UNKNOWN_ESCAPED ) const;
537 GUTF8String getNative2UTF8( void ) const;
538
539 // -- ALTERING
540 /// Reinitializes a string with the null string.
541 void empty( void );
542 // -- SEARCHING
543 /** Searches character #c# in the string, starting at position
544 #from# and scanning forward until reaching the end of the
545 string. This function returns the position of the matching
546 character. It returns #-1# if character #c# cannot be found. */
547 int search(char c, int from=0) const;
548
549 /** Searches sub-string #str# in the string, starting at position
550 #from# and scanning forward until reaching the end of the
551 string. This function returns the position of the first
552 matching character of the sub-string. It returns #-1# if
553 string #str# cannot be found. */
554 int search(const char *str, int from=0) const;
555
556 /** Searches character #c# in the string, starting at position
557 #from# and scanning backwards until reaching the beginning of
558 the string. This function returns the position of the matching
559 character. It returns #-1# if character #c# cannot be found. */
560 int rsearch(char c, const int from=0) const;
561 /** Searches sub-string #str# in the string, starting at position
562 #from# and scanning backwards until reaching the beginning of
563 the string. This function returns the position of the first
564 matching character of the sub-string. It returns #-1# if
565 string #str# cannot be found. */
566 int rsearch(const char *str, const int from=0) const;
567 /** Searches for any of the specified characters in the accept
568 string. It returns #-1# if the none of the characters and
569 be found, otherwise the position of the first match. */
570 int contains(const char accept[], const int from=0) const;
571 /** Searches for any of the specified characters in the accept
572 string. It returns #-1# if the none of the characters and be
573 found, otherwise the position of the last match. */
574 int rcontains(const char accept[], const int from=0) const;
575
576 /** Concatenates strings. Returns a string composed by concatenating
577 the characters of strings #s1# and #s2#. */
578 GUTF8String operator+(const GUTF8String &s2) const;
579 GNativeString operator+(const GNativeString &s2) const;
580
581 /** Returns an integer. Implements i18n atoi. */
582 int toInt(void) const;
583
584 /** Returns a long intenger. Implments i18n strtol. */
585 long toLong(const int pos, int &endpos, const int base=10) const;
586
587 /** Returns a unsigned long integer. Implements i18n strtoul. */
588 unsigned long toULong(
589 const int pos, int &endpos, const int base=10) const;
590
591 /** Returns a double. Implements the i18n strtod. */
592 double toDouble(
593 const int pos, int &endpos ) const;
594
595 /** Returns a long intenger. Implments i18n strtol. */
596 static long toLong(
597 const GUTF8String& src, const int pos, int &endpos, const int base=10);
598
599 static unsigned long toULong(
600 const GUTF8String& src, const int pos, int &endpos, const int base=10);
601
602 static double toDouble(
603 const GUTF8String& src, const int pos, int &endpos);
604
605 /** Returns a long intenger. Implments i18n strtol. */
606 static long toLong(
607 const GNativeString& src, const int pos, int &endpos, const int base=10);
608
609 static unsigned long toULong(
610 const GNativeString& src, const int pos, int &endpos, const int base=10);
611
612 static double toDouble(
613 const GNativeString& src, const int pos, int &endpos);
614
615 // -- HASHING
616
617 // -- COMPARISONS
618 /** Returns an #int#. Compares string with #s2# and returns
619 sorting order. */
620 int cmp(const GBaseString &s2, const int len=(-1)) const;
621 /** Returns an #int#. Compares string with #s2# and returns
622 sorting order. */
623 int cmp(const char *s2, const int len=(-1)) const;
624 /** Returns an #int#. Compares string with #s2# and returns
625 sorting order. */
626 int cmp(const char s2) const;
627 /** Returns an #int#. Compares #s2# with #s2# and returns
628 sorting order. */
629 static int cmp(const char *s1, const char *s2, const int len=(-1));
630 /** Returns a boolean. The Standard C strncmp takes two string and
631 compares the first N characters. static bool GBaseString::ncmp
632 will compare #s1# with #s2# with the #len# characters starting
633 from the beginning of the string. */
634 /** String comparison. Returns true if and only if character
635 strings #s1# and #s2# are equal (as with #strcmp#.)
636 */
637 bool operator==(const GBaseString &s2) const;
638 bool operator==(const char *s2) const;
639 friend bool operator==(const char *s1, const GBaseString &s2);
640
641 /** String comparison. Returns true if and only if character
642 strings #s1# and #s2# are not equal (as with #strcmp#.)
643 */
644 bool operator!=(const GBaseString &s2) const;
645 bool operator!=(const char *s2) const;
646 friend bool operator!=(const char *s1, const GBaseString &s2);
647
648 /** String comparison. Returns true if and only if character
649 strings #s1# is lexicographically greater than or equal to
650 string #s2# (as with #strcmp#.) */
651 bool operator>=(const GBaseString &s2) const;
652 bool operator>=(const char *s2) const;
653 bool operator>=(const char s2) const;
654 friend bool operator>=(const char *s1, const GBaseString &s2);
655 friend bool operator>=(const char s1, const GBaseString &s2);
656
657 /** String comparison. Returns true if and only if character
658 strings #s1# is lexicographically less than string #s2#
659 (as with #strcmp#.)
660 */
661 bool operator<(const GBaseString &s2) const;
662 bool operator<(const char *s2) const;
663 bool operator<(const char s2) const;
664 friend bool operator<(const char *s1, const GBaseString &s2);
665 friend bool operator<(const char s1, const GBaseString &s2);
666
667 /** String comparison. Returns true if and only if character
668 strings #s1# is lexicographically greater than string #s2#
669 (as with #strcmp#.)
670 */
671 bool operator> (const GBaseString &s2) const;
672 bool operator> (const char *s2) const;
673 bool operator> (const char s2) const;
674 friend bool operator> (const char *s1, const GBaseString &s2);
675 friend bool operator> (const char s1, const GBaseString &s2);
676
677 /** String comparison. Returns true if and only if character
678 strings #s1# is lexicographically less than or equal to string
679 #s2# (as with #strcmp#.)
680 */
681 bool operator<=(const GBaseString &s2) const;
682 bool operator<=(const char *s2) const;
683 bool operator<=(const char s2) const;
684 friend bool operator<=(const char *s1, const GBaseString &s2);
685 friend bool operator<=(const char s1, const GBaseString &s2);
686
687 /** Returns an integer. Implements a functional i18n atoi. Note
688 that if you pass a GBaseString that is not in Native format
689 the results may be disparaging. */
690
691 /** Returns a hash code for the string. This hashing function
692 helps when creating associative maps with string keys (see
693 \Ref{GMap}). This hash code may be reduced to an arbitrary
694 range by computing its remainder modulo the upper bound of
695 the range. */
696 friend DJVUAPI unsigned int hash(const GBaseString &ref);
697 // -- HELPERS
698 friend class GStringRep;
699
700 /// Returns next non space position.
701 int nextNonSpace( const int from=0, const int len=(-1) ) const;
702
703 /// Returns next character position.
704 int nextChar( const int from=0 ) const;
705
706 /// Returns next non space position.
707 int nextSpace( const int from=0, const int len=(-1) ) const;
708
709 /// return the position after the last non-whitespace character.
710 int firstEndSpace( const int from=0,const int len=(-1) ) const;
711
712 /// Tests if the string is legally encoded in the current codepage.
713 bool is_valid(void) const;
714
715 /// copy to a wchar_t buffer
716 #if HAS_WCHAR
717 int ncopy(wchar_t * const buf, const int buflen) const;
718 #endif
719 protected:
720 const char *gstr;
721 static void throw_illegal_subscript() no_return;
722 static const char *nullstr;
723 public:
724 GNativeString UTF8ToNative(
725 const bool currentlocale=false,
726 const EscapeMode escape=UNKNOWN_ESCAPED) const;
727 GUTF8String NativeToUTF8(void) const;
728 protected:
729 inline int CheckSubscript(int n) const;
730 };
731
732 /** General purpose character string.
733 Each instance of class #GUTF8String# represents a character
734 string. Overloaded operators provide a value semantic to
735 #GUTF8String# objects. Conversion operators and constructors
736 transparently convert between #GUTF8String# objects and
737 #const char*# pointers.
738
739 Functions taking strings as arguments should declare their
740 arguments as "#const char*#". Such functions will work equally
741 well with #GUTF8String# objects since there is a fast conversion
742 operator from #GUTF8String# to "#const char*#". Functions
743 returning strings should return #GUTF8String# or #GNativeString#
744 objects because the class will automatically manage the necessary
745 memory.
746
747 Characters in the string can be identified by their position. The
748 first character of a string is numbered zero. Negative positions
749 represent characters relative to the end of the string (i.e.
750 position #-1# accesses the last character of the string,
751 position #-2# represents the second last character, etc.) */
752
753 class DJVUAPI GUTF8String : public GBaseString
754 {
755 public:
756 ~GUTF8String();
757 inline void init(void);
758
759 inline GUTF8String &init(const GP<GStringRep> &rep);
760
761 // -- CONSTRUCTORS
762 /** Null constructor. Constructs an empty string. */
763 GUTF8String(void);
764 /// Constructs a string from a character.
765 GUTF8String(const char dat);
766 /// Constructs a string from a null terminated character array.
767 GUTF8String(const char *str);
768 /// Constructs a string from a null terminated character array.
769 GUTF8String(const unsigned char *str);
770 GUTF8String(const uint16_t *dat);
771 GUTF8String(const uint32_t *dat);
772 /** Constructs a string from a character array. Elements of the
773 character array #dat# are added into the string until the
774 string length reaches #len# or until encountering a null
775 character (whichever comes first). */
776 GUTF8String(const char *dat, unsigned int len);
777 GUTF8String(const uint16_t *dat, unsigned int len);
778 GUTF8String(const uint32_t *dat, unsigned int len);
779
780 /// Construct from base class.
781 GUTF8String(const GP<GStringRep> &str);
782 GUTF8String(const GBaseString &str);
783 GUTF8String(const GUTF8String &str);
784 GUTF8String(const GNativeString &str);
785 /** Constructs a string from a character array. Elements of the
786 character array #dat# are added into the string until the
787 string length reaches #len# or until encountering a null
788 character (whichever comes first). */
789 GUTF8String(const GBaseString &gs, int from, int len);
790
791 /** Copy a null terminated character array. Resets this string
792 with the character string contained in the null terminated
793 character array #str#. */
794 GUTF8String& operator= (const char str);
795 GUTF8String& operator= (const char *str);
796 inline GUTF8String& operator= (const GP<GStringRep> &str);
797 inline GUTF8String& operator= (const GBaseString &str);
798 inline GUTF8String& operator= (const GUTF8String &str);
799 inline GUTF8String& operator= (const GNativeString &str);
800
801 /** Constructs a string with a formatted string (as in #vprintf#).
802 The string is re-initialized with the characters generated
803 according to the specified format #fmt# and using the optional
804 arguments. See the ANSI-C function #vprintf()# for more
805 information. The current implementation will cause a
806 segmentation violation if the resulting string is longer
807 than 32768 characters. */
808 GUTF8String(const GUTF8String &fmt, va_list &args);
809
810 /// Constructs a string from a character.
811 /** Constructs a string with a human-readable representation of
812 integer #number#. The format is similar to format #"%d"# in
813 function #printf#. */
814 GUTF8String(const int number);
815
816 /** Constructs a string with a human-readable representation of
817 floating point number #number#. The format is similar to
818 format #"%f"# in function #printf#. */
819 GUTF8String(const double number);
820
821
822 /** Initializes a string with a formatted string (as in #printf#).
823 The string is re-initialized with the characters generated
824 according to the specified format #fmt# and using the optional
825 arguments. See the ANSI-C function #printf()# for more
826 information. The current implementation will cause a
827 segmentation violation if the resulting string is longer
828 than 32768 characters. */
829 GUTF8String &format(const char *fmt, ... );
830 /** Initializes a string with a formatted string (as in #vprintf#).
831 The string is re-initialized with the characters generated
832 according to the specified format #fmt# and using the optional
833 arguments. See the ANSI-C function #vprintf()# for more
834 information. The current implementation will cause a
835 segmentation violation if the resulting string is longer
836 than 32768 characters. */
837 GUTF8String &vformat(const GUTF8String &fmt, va_list &args);
838
839 /** Returns a copy of this string with characters used in XML with
840 '<' to "<", '>' to ">", '&' to "&" '\'' to
841 "'", and '\"' to """. Characters 0x01 through
842 0x1f are also escaped. */
843 GUTF8String toEscaped( const bool tosevenbit=false ) const;
844
845 /** Converts strings containing HTML/XML escaped characters into
846 their unescaped forms. Numeric representations of characters
847 (e.g., "&" or "&" for "*") are the only forms
848 converted by this function. */
849 GUTF8String fromEscaped( void ) const;
850
851 /** Converts strings containing HTML/XML escaped characters
852 (e.g., "<" for "<") into their unescaped forms. The
853 conversion is partially defined by the ConvMap argument which
854 specifies the conversion strings to be recognized. Numeric
855 representations of characters (e.g., "&" or "&"
856 for "*") are always converted. */
857 GUTF8String fromEscaped(
858 const GMap<GUTF8String,GUTF8String> ConvMap ) const;
859
860
861 // -- CONCATENATION
862 /// Appends character #ch# to the string.
863 GUTF8String& operator+= (char ch);
864
865 /// Appends the null terminated character array #str# to the string.
866 GUTF8String& operator+= (const char *str);
867 /// Appends the specified GBaseString to the string.
868 GUTF8String& operator+= (const GBaseString &str);
869
870 /** Returns a sub-string. The sub-string is composed by copying
871 #len# characters starting at position #from# in this string.
872 The length of the resulting string may be smaller than #len#
873 if the specified range is too large. */
874 GUTF8String substr(int from, int len/*=(-1)*/) const;
875
876 /** Returns an upper case copy of this string. The returned string
877 contains a copy of the current string with all letters turned
878 into upper case letters. */
879 GUTF8String upcase( void ) const;
880 /** Returns an lower case copy of this string. The returned string
881 contains a copy of the current string with all letters turned
882 into lower case letters. */
883 GUTF8String downcase( void ) const;
884
885 /** Concatenates strings. Returns a string composed by concatenating
886 the characters of strings #s1# and #s2#.
887 */
888 GUTF8String operator+(const GBaseString &s2) const;
889 GUTF8String operator+(const GUTF8String &s2) const;
890 GUTF8String operator+(const GNativeString &s2) const;
891 GUTF8String operator+(const char *s2) const;
892 friend DJVUAPI GUTF8String operator+(const char *s1, const GUTF8String &s2);
893
894 /** Provides a direct access to the string buffer. Returns a
895 pointer for directly accessing the string buffer. This pointer
896 valid remains valid as long as the string is not modified by
897 other means. Positive values for argument #n# represent the
898 length of the returned buffer. The returned string buffer will
899 be large enough to hold at least #n# characters plus a null
900 character. If #n# is positive but smaller than the string
901 length, the string will be truncated to #n# characters. */
902 char *getbuf(int n = -1);
903 /** Set the character at position #n# to value #ch#. An exception
904 \Ref{GException} is thrown if number #n# is not in range #-len#
905 to #len#, where #len# is the length of the string. If character
906 #ch# is zero, the string is truncated at position #n#. The
907 first character of a string is numbered zero. Negative
908 positions represent characters relative to the end of the
909 string. If position #n# is equal to the length of the string,
910 this function appends character #ch# to the end of the string. */
911 void setat(const int n, const char ch);
912 public:
913 typedef enum GStringRep::EncodeType EncodeType;
914 static GUTF8String create(void const * const buf,
915 const unsigned int size,
916 const EncodeType encodetype, const GUTF8String &encoding);
917 static GUTF8String create( void const * const buf,
918 unsigned int size, const EncodeType encodetype );
919 static GUTF8String create( void const * const buf,
920 const unsigned int size, const GUTF8String &encoding );
921 static GUTF8String create( void const * const buf,
922 const unsigned int size, const GP<GStringRep::Unicode> &remainder);
923 GP<GStringRep::Unicode> get_remainder(void) const;
924 static GUTF8String create( const char *buf, const unsigned int bufsize );
925 static GUTF8String create( const uint16_t *buf, const unsigned int bufsize );
926 static GUTF8String create( const uint32_t *buf, const unsigned int bufsize );
927 };
928
929
930 #if !HAS_WCHAR
931 #define GBaseString GUTF8String
932 #endif
933
934 /** General purpose character string.
935 Each instance of class #GNativeString# represents a character
936 string. Overloaded operators provide a value semantic to
937 #GNativeString# objects. Conversion operators and constructors
938 transparently convert between #GNativeString# objects and
939 #const char*# pointers.
940
941 Functions taking strings as arguments should declare their
942 arguments as "#const char*#". Such functions will work equally
943 well with #GNativeString# objects since there is a fast conversion
944 operator from #GNativeString# to "#const char*#". Functions
945 returning strings should return #GUTF8String# or #GNativeString#
946 objects because the class will automatically manage the necessary
947 memory.
948
949 Characters in the string can be identified by their position. The
950 first character of a string is numbered zero. Negative positions
951 represent characters relative to the end of the string (i.e.
952 position #-1# accesses the last character of the string,
953 position #-2# represents the second last character, etc.) */
954
955 class DJVUAPI GNativeString : public GBaseString
956 {
957 public:
958 ~GNativeString();
959 // -- CONSTRUCTORS
960 /** Null constructor. Constructs an empty string. */
961 GNativeString(void);
962 /// Constructs a string from a character.
963 GNativeString(const char dat);
964 /// Constructs a string from a null terminated character array.
965 GNativeString(const char *str);
966 /// Constructs a string from a null terminated character array.
967 GNativeString(const unsigned char *str);
968 GNativeString(const uint16_t *str);
969 GNativeString(const uint32_t *str);
970 /** Constructs a string from a character array. Elements of the
971 character array #dat# are added into the string until the
972 string length reaches #len# or until encountering a null
973 character (whichever comes first). */
974 GNativeString(const char *dat, unsigned int len);
975 GNativeString(const uint16_t *dat, unsigned int len);
976 GNativeString(const uint32_t *dat, unsigned int len);
977 /// Construct from base class.
978 GNativeString(const GP<GStringRep> &str);
979 GNativeString(const GBaseString &str);
980 #if HAS_WCHAR
981 GNativeString(const GUTF8String &str);
982 #endif
983 GNativeString(const GNativeString &str);
984 /** Constructs a string from a character array. Elements of the
985 character array #dat# are added into the string until the
986 string length reaches #len# or until encountering a null
987 character (whichever comes first). */
988 GNativeString(const GBaseString &gs, int from, int len);
989
990 /** Constructs a string with a formatted string (as in #vprintf#).
991 The string is re-initialized with the characters generated
992 according to the specified format #fmt# and using the optional
993 arguments. See the ANSI-C function #vprintf()# for more
994 information. The current implementation will cause a
995 segmentation violation if the resulting string is longer than
996 32768 characters. */
997 GNativeString(const GNativeString &fmt, va_list &args);
998
999 /** Constructs a string with a human-readable representation of
1000 integer #number#. The format is similar to format #"%d"# in
1001 function #printf#. */
1002 GNativeString(const int number);
1003
1004 /** Constructs a string with a human-readable representation of
1005 floating point number #number#. The format is similar to
1006 format #"%f"# in function #printf#. */
1007 GNativeString(const double number);
1008
1009 #if !HAS_WCHAR
1010 #undef GBaseString
1011 #else
1012 /// Initialize this string class
1013 void init(void);
1014
1015 /// Initialize this string class
1016 GNativeString &init(const GP<GStringRep> &rep);
1017
1018 /** Copy a null terminated character array. Resets this string with
1019 the character string contained in the null terminated character
1020 array #str#. */
1021 GNativeString& operator= (const char str);
1022 GNativeString& operator= (const char *str);
1023 inline GNativeString& operator= (const GP<GStringRep> &str);
1024 inline GNativeString& operator= (const GBaseString &str);
1025 inline GNativeString& operator= (const GUTF8String &str);
1026 inline GNativeString& operator= (const GNativeString &str);
1027 // -- CONCATENATION
1028 /// Appends character #ch# to the string.
1029 GNativeString& operator+= (char ch);
1030 /// Appends the null terminated character array #str# to the string.
1031 GNativeString& operator+= (const char *str);
1032 /// Appends the specified GBaseString to the string.
1033 GNativeString& operator+= (const GBaseString &str);
1034
1035 /** Returns a sub-string. The sub-string is composed by copying
1036 #len# characters starting at position #from# in this string.
1037 The length of the resulting string may be smaller than #len#
1038 if the specified range is too large. */
1039 GNativeString substr(int from, int len/*=(-1)*/) const;
1040
1041 /** Returns an upper case copy of this string. The returned
1042 string contains a copy of the current string with all letters
1043 turned into upper case letters. */
1044 GNativeString upcase( void ) const;
1045 /** Returns an lower case copy of this string. The returned
1046 string contains a copy of the current string with all letters
1047 turned into lower case letters. */
1048 GNativeString downcase( void ) const;
1049
1050
1051 GNativeString operator+(const GBaseString &s2) const;
1052 GNativeString operator+(const GNativeString &s2) const;
1053 GUTF8String operator+(const GUTF8String &s2) const;
1054 GNativeString operator+(const char *s2) const;
1055 friend DJVUAPI GNativeString operator+(const char *s1, const GNativeString &s2);
1056
1057 /** Initializes a string with a formatted string (as in #printf#).
1058 The string is re-initialized with the characters generated
1059 according to the specified format #fmt# and using the optional
1060 arguments. See the ANSI-C function #printf()# for more
1061 information. The current implementation will cause a
1062 segmentation violation if the resulting string is longer than
1063 32768 characters. */
1064 GNativeString &format(const char *fmt, ... );
1065 /** Initializes a string with a formatted string (as in #vprintf#).
1066 The string is re-initialized with the characters generated
1067 according to the specified format #fmt# and using the optional
1068 arguments. See the ANSI-C function #vprintf()# for more
1069 information. The current implementation will cause a
1070 segmentation violation if the resulting string is longer than
1071 32768 characters. */
1072 GNativeString &vformat(const GNativeString &fmt, va_list &args);
1073
1074 /** Returns a copy of this string with characters used in XML with
1075 '<' to "<", '>' to ">", '&' to "&" '\'' to
1076 "'", and '\"' to """. Characters 0x01 through
1077 0x1f are also escaped. */
1078 GNativeString toEscaped( const bool tosevenbit=false ) const;
1079
1080
1081 /** Provides a direct access to the string buffer. Returns a
1082 pointer for directly accessing the string buffer. This
1083 pointer valid remains valid as long as the string is not
1084 modified by other means. Positive values for argument #n#
1085 represent the length of the returned buffer. The returned
1086 string buffer will be large enough to hold at least #n#
1087 characters plus a null character. If #n# is positive but
1088 smaller than the string length, the string will be truncated
1089 to #n# characters. */
1090 char *getbuf(int n = -1);
1091 /** Set the character at position #n# to value #ch#. An exception
1092 \Ref{GException} is thrown if number #n# is not in range #-len#
1093 to #len#, where #len# is the length of the string. If
1094 character #ch# is zero, the string is truncated at position
1095 #n#. The first character of a string is numbered zero.
1096 Negative positions represent characters relative to the end of
1097 the string. If position #n# is equal to the length of the
1098 string, this function appends character #ch# to the end of the
1099 string. */
1100 void setat(const int n, const char ch);
1101
1102 static GNativeString create( const char *buf, const unsigned int bufsize );
1103 static GNativeString create( const uint16_t *buf, const unsigned int bufsize );
1104 static GNativeString create( const uint32_t *buf, const unsigned int bufsize );
1105 #endif // WinCE
1106 };
1107
1108 //@}
1109
1110 inline
1111 GBaseString::operator const char* ( void ) const
1112 {
1113 return ptr?(*this)->data:nullstr;
1114 }
1115
1116 inline unsigned int
length(void)1117 GBaseString::length( void ) const
1118 {
1119 return ptr ? (*this)->size : 0;
1120 }
1121
1122 inline bool
1123 GBaseString::operator! ( void ) const
1124 {
1125 return !ptr;
1126 }
1127
1128 inline GUTF8String
upcase(void)1129 GUTF8String::upcase( void ) const
1130 {
1131 if (ptr) return (*this)->upcase();
1132 return *this;
1133 }
1134
1135 inline GUTF8String
downcase(void)1136 GUTF8String::downcase( void ) const
1137 {
1138 if (ptr) return (*this)->downcase();
1139 return *this;
1140 }
1141
1142 inline void
init(void)1143 GUTF8String::init(void)
1144 { GBaseString::init(); }
1145
1146 inline GUTF8String &
init(const GP<GStringRep> & rep)1147 GUTF8String::init(const GP<GStringRep> &rep)
1148 { GP<GStringRep>::operator=(rep?rep->toUTF8(true):rep); init(); return *this; }
1149
1150 inline GUTF8String &
vformat(const GUTF8String & fmt,va_list & args)1151 GUTF8String::vformat(const GUTF8String &fmt, va_list &args)
1152 { return (*this = (fmt.ptr?GUTF8String(fmt,args):fmt)); }
1153
1154 inline GUTF8String
toEscaped(const bool tosevenbit)1155 GUTF8String::toEscaped( const bool tosevenbit ) const
1156 { return ptr?GUTF8String((*this)->toEscaped(tosevenbit)):(*this); }
1157
1158 inline GP<GStringRep::Unicode>
get_remainder(void)1159 GUTF8String::get_remainder(void) const
1160 {
1161 GP<GStringRep::Unicode> retval;
1162 if(ptr)
1163 retval=((*this)->get_remainder());
1164 return retval;
1165 }
1166
1167 inline
GUTF8String(const GNativeString & str)1168 GUTF8String::GUTF8String(const GNativeString &str)
1169 { init(str.length()?(str->toUTF8(true)):(GP<GStringRep>)str); }
1170
1171 inline
GUTF8String(const GP<GStringRep> & str)1172 GUTF8String::GUTF8String(const GP<GStringRep> &str)
1173 { init(str?(str->toUTF8(true)):str); }
1174
1175 inline
GUTF8String(const GBaseString & str)1176 GUTF8String::GUTF8String(const GBaseString &str)
1177 { init(str.length()?(str->toUTF8(true)):(GP<GStringRep>)str); }
1178
1179 inline void
init(void)1180 GBaseString::init(void)
1181 {
1182 gstr=ptr?((*this)->data):nullstr;
1183 }
1184 /** Returns an integer. Implements i18n atoi. */
1185 inline int
toInt(void)1186 GBaseString::toInt(void) const
1187 { return ptr?(*this)->toInt():0; }
1188
1189 /** Returns a long intenger. Implments i18n strtol. */
1190 inline long
toLong(const int pos,int & endpos,const int base)1191 GBaseString::toLong(const int pos, int &endpos, const int base) const
1192 {
1193 long int retval=0;
1194 if(ptr)
1195 {
1196 retval=(*this)->toLong(pos, endpos, base);
1197 }else
1198 {
1199 endpos=(-1);
1200 }
1201 return retval;
1202 }
1203
1204 inline long
toLong(const GUTF8String & src,const int pos,int & endpos,const int base)1205 GBaseString::toLong(
1206 const GUTF8String& src, const int pos, int &endpos, const int base)
1207 {
1208 return src.toLong(pos,endpos,base);
1209 }
1210
1211 inline long
toLong(const GNativeString & src,const int pos,int & endpos,const int base)1212 GBaseString::toLong(
1213 const GNativeString& src, const int pos, int &endpos, const int base)
1214 {
1215 return src.toLong(pos,endpos,base);
1216 }
1217
1218 /** Returns a unsigned long integer. Implements i18n strtoul. */
1219 inline unsigned long
toULong(const int pos,int & endpos,const int base)1220 GBaseString::toULong(const int pos, int &endpos, const int base) const
1221 {
1222 unsigned long retval=0;
1223 if(ptr)
1224 {
1225 retval=(*this)->toULong(pos, endpos, base);
1226 }else
1227 {
1228 endpos=(-1);
1229 }
1230 return retval;
1231 }
1232
1233 inline unsigned long
toULong(const GUTF8String & src,const int pos,int & endpos,const int base)1234 GBaseString::toULong(
1235 const GUTF8String& src, const int pos, int &endpos, const int base)
1236 {
1237 return src.toULong(pos,endpos,base);
1238 }
1239
1240 inline unsigned long
toULong(const GNativeString & src,const int pos,int & endpos,const int base)1241 GBaseString::toULong(
1242 const GNativeString& src, const int pos, int &endpos, const int base)
1243 {
1244 return src.toULong(pos,endpos,base);
1245 }
1246
1247 /** Returns a double. Implements the i18n strtod. */
1248 inline double
toDouble(const int pos,int & endpos)1249 GBaseString::toDouble(
1250 const int pos, int &endpos ) const
1251 {
1252 double retval=(double)0;
1253 if(ptr)
1254 {
1255 retval=(*this)->toDouble(pos, endpos);
1256 }else
1257 {
1258 endpos=(-1);
1259 }
1260 return retval;
1261 }
1262
1263 inline double
toDouble(const GUTF8String & src,const int pos,int & endpos)1264 GBaseString::toDouble(
1265 const GUTF8String& src, const int pos, int &endpos)
1266 {
1267 return src.toDouble(pos,endpos);
1268 }
1269
1270 inline double
toDouble(const GNativeString & src,const int pos,int & endpos)1271 GBaseString::toDouble(
1272 const GNativeString& src, const int pos, int &endpos)
1273 {
1274 return src.toDouble(pos,endpos);
1275 }
1276
1277 inline GBaseString &
init(const GP<GStringRep> & rep)1278 GBaseString::init(const GP<GStringRep> &rep)
1279 { GP<GStringRep>::operator=(rep); init(); return *this;}
1280
1281 inline char
1282 GBaseString::operator[] (int n) const
1283 { return ((n||ptr)?((*this)->data[CheckSubscript(n)]):0); }
1284
1285 inline int
search(char c,int from)1286 GBaseString::search(char c, int from) const
1287 { return ptr?((*this)->search(c,from)):(-1); }
1288
1289 inline int
search(const char * str,int from)1290 GBaseString::search(const char *str, int from) const
1291 { return ptr?((*this)->search(str,from)):(-1); }
1292
1293 inline int
rsearch(char c,const int from)1294 GBaseString::rsearch(char c, const int from) const
1295 { return ptr?((*this)->rsearch(c,from)):(-1); }
1296
1297 inline int
rsearch(const char * str,const int from)1298 GBaseString::rsearch(const char *str, const int from) const
1299 { return ptr?((*this)->rsearch(str,from)):(-1); }
1300
1301 inline int
contains(const char accept[],const int from)1302 GBaseString::contains(const char accept[], const int from) const
1303 { return ptr?((*this)->contains(accept,from)):(-1); }
1304
1305 inline int
rcontains(const char accept[],const int from)1306 GBaseString::rcontains(const char accept[], const int from) const
1307 { return ptr?((*this)->rcontains(accept,from)):(-1); }
1308
1309 inline int
cmp(const GBaseString & s2,const int len)1310 GBaseString::cmp(const GBaseString &s2, const int len) const
1311 { return GStringRep::cmp(*this,s2,len); }
1312
1313 inline int
cmp(const char * s2,const int len)1314 GBaseString::cmp(const char *s2, const int len) const
1315 { return GStringRep::cmp(*this,s2,len); }
1316
1317 inline int
cmp(const char s2)1318 GBaseString::cmp(const char s2) const
1319 { return GStringRep::cmp(*this,&s2,1); }
1320
1321 inline int
cmp(const char * s1,const char * s2,const int len)1322 GBaseString::cmp(const char *s1, const char *s2, const int len)
1323 { return GStringRep::cmp(s1,s2,len); }
1324
1325 inline bool
1326 GBaseString::operator==(const GBaseString &s2) const
1327 { return !cmp(s2); }
1328
1329 inline bool
1330 GBaseString::operator==(const char *s2) const
1331 { return !cmp(s2); }
1332
1333 inline bool
1334 GBaseString::operator!=(const GBaseString &s2) const
1335 { return !!cmp(s2); }
1336
1337 inline bool
1338 GBaseString::operator!=(const char *s2) const
1339 { return !!cmp(s2); }
1340
1341 inline bool
1342 GBaseString::operator>=(const GBaseString &s2) const
1343 { return (cmp(s2)>=0); }
1344
1345 inline bool
1346 GBaseString::operator>=(const char *s2) const
1347 { return (cmp(s2)>=0); }
1348
1349 inline bool
1350 GBaseString::operator>=(const char s2) const
1351 { return (cmp(s2)>=0); }
1352
1353 inline bool
1354 GBaseString::operator<(const GBaseString &s2) const
1355 { return (cmp(s2)<0); }
1356
1357 inline bool
1358 GBaseString::operator<(const char *s2) const
1359 { return (cmp(s2)<0); }
1360
1361 inline bool
1362 GBaseString::operator<(const char s2) const
1363 { return (cmp(s2)<0); }
1364
1365 inline bool
1366 GBaseString::operator> (const GBaseString &s2) const
1367 { return (cmp(s2)>0); }
1368
1369 inline bool
1370 GBaseString::operator> (const char *s2) const
1371 { return (cmp(s2)>0); }
1372
1373 inline bool
1374 GBaseString::operator> (const char s2) const
1375 { return (cmp(s2)>0); }
1376
1377 inline bool
1378 GBaseString::operator<=(const GBaseString &s2) const
1379 { return (cmp(s2)<=0); }
1380
1381 inline bool
1382 GBaseString::operator<=(const char *s2) const
1383 { return (cmp(s2)<=0); }
1384
1385 inline bool
1386 GBaseString::operator<=(const char s2) const
1387 { return (cmp(s2)<=0); }
1388
1389 inline int
nextNonSpace(const int from,const int len)1390 GBaseString::nextNonSpace( const int from, const int len ) const
1391 { return ptr?(*this)->nextNonSpace(from,len):0; }
1392
1393 inline int
nextChar(const int from)1394 GBaseString::nextChar( const int from ) const
1395 { return ptr?(*this)->nextChar(from):0; }
1396
1397 inline int
nextSpace(const int from,const int len)1398 GBaseString::nextSpace( const int from, const int len ) const
1399 { return ptr?(*this)->nextSpace(from,len):0; }
1400
1401 inline int
firstEndSpace(const int from,const int len)1402 GBaseString::firstEndSpace( const int from,const int len ) const
1403 { return ptr?(*this)->firstEndSpace(from,len):0; }
1404
1405 inline bool
is_valid(void)1406 GBaseString::is_valid(void) const
1407 { return ptr?((*this)->is_valid()):true; }
1408
1409 #if HAS_WCHAR
1410 inline int
ncopy(wchar_t * const buf,const int buflen)1411 GBaseString::ncopy(wchar_t * const buf, const int buflen) const
1412 {if(buf&&buflen)buf[0]=0;return ptr?((*this)->ncopy(buf,buflen)):0;}
1413 #endif
1414
1415 inline int
CheckSubscript(int n)1416 GBaseString::CheckSubscript(int n) const
1417 {
1418 if(n)
1419 {
1420 if (n<0 && ptr)
1421 n += (*this)->size;
1422 if (n<0 || !ptr || n > (int)(*this)->size)
1423 throw_illegal_subscript();
1424 }
1425 return n;
1426 }
1427
GBaseString(void)1428 inline GBaseString::GBaseString(void) { init(); }
1429
GUTF8String(void)1430 inline GUTF8String::GUTF8String(void) { }
1431
GUTF8String(const GUTF8String & str)1432 inline GUTF8String::GUTF8String(const GUTF8String &str)
1433 { init(str); }
1434
1435 inline GUTF8String& GUTF8String::operator= (const GP<GStringRep> &str)
1436 { return init(str); }
1437
1438 inline GUTF8String& GUTF8String::operator= (const GBaseString &str)
1439 { return init(str); }
1440
1441 inline GUTF8String& GUTF8String::operator= (const GUTF8String &str)
1442 { return init(str); }
1443
1444 inline GUTF8String& GUTF8String::operator= (const GNativeString &str)
1445 { return init(str); }
1446
1447 inline GUTF8String
create(const char * buf,const unsigned int bufsize)1448 GUTF8String::create( const char *buf, const unsigned int bufsize )
1449 {
1450 #if HAS_WCHAR
1451 return GNativeString(buf,bufsize);
1452 #else
1453 return GUTF8String(buf,bufsize);
1454 #endif
1455 }
1456
1457 inline GUTF8String
create(const uint16_t * buf,const unsigned int bufsize)1458 GUTF8String::create( const uint16_t *buf, const unsigned int bufsize )
1459 {
1460 return GUTF8String(buf,bufsize);
1461 }
1462
1463 inline GUTF8String
create(const uint32_t * buf,const unsigned int bufsize)1464 GUTF8String::create( const uint32_t *buf, const unsigned int bufsize )
1465 {
1466 return GUTF8String(buf,bufsize);
1467 }
1468
GNativeString(void)1469 inline GNativeString::GNativeString(void) {}
1470
1471 #if !HAS_WCHAR
1472 // For Windows CE, GNativeString is essentially GUTF8String
1473
1474 inline
GNativeString(const GUTF8String & str)1475 GNativeString::GNativeString(const GUTF8String &str)
1476 : GUTF8String(str) {}
1477
1478 inline
GNativeString(const GP<GStringRep> & str)1479 GNativeString::GNativeString(const GP<GStringRep> &str)
1480 : GUTF8String(str) {}
1481
1482 inline
GNativeString(const char dat)1483 GNativeString::GNativeString(const char dat)
1484 : GUTF8String(dat) {}
1485
1486 inline
GNativeString(const char * str)1487 GNativeString::GNativeString(const char *str)
1488 : GUTF8String(str) {}
1489
1490 inline
GNativeString(const unsigned char * str)1491 GNativeString::GNativeString(const unsigned char *str)
1492 : GUTF8String(str) {}
1493
1494 inline
GNativeString(const uint16_t * str)1495 GNativeString::GNativeString(const uint16_t *str)
1496 : GUTF8String(str) {}
1497
1498 inline
GNativeString(const uint32_t * str)1499 GNativeString::GNativeString(const uint32_t *str)
1500 : GUTF8String(str) {}
1501
1502 inline
GNativeString(const char * dat,unsigned int len)1503 GNativeString::GNativeString(const char *dat, unsigned int len)
1504 : GUTF8String(dat,len) {}
1505
1506 inline
GNativeString(const uint16_t * dat,unsigned int len)1507 GNativeString::GNativeString(const uint16_t *dat, unsigned int len)
1508 : GUTF8String(dat,len) {}
1509
1510 inline
GNativeString(const uint32_t * dat,unsigned int len)1511 GNativeString::GNativeString(const uint32_t *dat, unsigned int len)
1512 : GUTF8String(dat,len) {}
1513
1514 inline
GNativeString(const GNativeString & str)1515 GNativeString::GNativeString(const GNativeString &str)
1516 : GUTF8String(str) {}
1517
1518 inline
GNativeString(const int number)1519 GNativeString::GNativeString(const int number)
1520 : GUTF8String(number) {}
1521
1522 inline
GNativeString(const double number)1523 GNativeString::GNativeString(const double number)
1524 : GUTF8String(number) {}
1525
1526 inline
GNativeString(const GNativeString & fmt,va_list & args)1527 GNativeString::GNativeString(const GNativeString &fmt, va_list &args)
1528 : GUTF8String(fmt,args) {}
1529
1530 #else // HAS_WCHAR
1531
1532 /// Initialize this string class
1533 inline void
init(void)1534 GNativeString::init(void)
1535 { GBaseString::init(); }
1536
1537 /// Initialize this string class
1538 inline GNativeString &
init(const GP<GStringRep> & rep)1539 GNativeString::init(const GP<GStringRep> &rep)
1540 {
1541 GP<GStringRep>::operator=(rep?rep->toNative(GStringRep::NOT_ESCAPED):rep);
1542 init();
1543 return *this;
1544 }
1545
1546 inline GNativeString
substr(int from,int len)1547 GNativeString::substr(int from, int len) const
1548 { return GNativeString(*this, from, len); }
1549
1550 inline GNativeString &
vformat(const GNativeString & fmt,va_list & args)1551 GNativeString::vformat(const GNativeString &fmt, va_list &args)
1552 { return (*this = (fmt.ptr?GNativeString(fmt,args):fmt)); }
1553
1554 inline GNativeString
toEscaped(const bool tosevenbit)1555 GNativeString::toEscaped( const bool tosevenbit ) const
1556 { return ptr?GNativeString((*this)->toEscaped(tosevenbit)):(*this); }
1557
1558 inline
GNativeString(const GUTF8String & str)1559 GNativeString::GNativeString(const GUTF8String &str)
1560 {
1561 if (str.length())
1562 init(str->toNative(GStringRep::NOT_ESCAPED));
1563 else
1564 init((GP<GStringRep>)str);
1565 }
1566
1567 inline
GNativeString(const GP<GStringRep> & str)1568 GNativeString::GNativeString(const GP<GStringRep> &str)
1569 {
1570 if (str)
1571 init(str->toNative(GStringRep::NOT_ESCAPED));
1572 else
1573 init(str);
1574 }
1575
1576 inline
GNativeString(const GBaseString & str)1577 GNativeString::GNativeString(const GBaseString &str)
1578 {
1579 if (str.length())
1580 init(str->toNative(GStringRep::NOT_ESCAPED));
1581 else
1582 init((GP<GStringRep>)str);
1583 }
1584
1585
1586 inline
GNativeString(const GNativeString & fmt,va_list & args)1587 GNativeString::GNativeString(const GNativeString &fmt, va_list &args)
1588 {
1589 if (fmt.ptr)
1590 init(fmt->vformat(args));
1591 else
1592 init(fmt);
1593 }
1594
1595 inline GNativeString
create(const char * buf,const unsigned int bufsize)1596 GNativeString::create( const char *buf, const unsigned int bufsize )
1597 {
1598 return GNativeString(buf,bufsize);
1599 }
1600
1601 inline GNativeString
create(const uint16_t * buf,const unsigned int bufsize)1602 GNativeString::create( const uint16_t *buf, const unsigned int bufsize )
1603 {
1604 return GNativeString(buf,bufsize);
1605 }
1606
1607 inline GNativeString
create(const uint32_t * buf,const unsigned int bufsize)1608 GNativeString::create( const uint32_t *buf, const unsigned int bufsize )
1609 {
1610 return GNativeString(buf,bufsize);
1611 }
1612
1613 inline GNativeString&
1614 GNativeString::operator= (const GP<GStringRep> &str)
1615 { return init(str); }
1616
1617 inline GNativeString&
1618 GNativeString::operator= (const GBaseString &str)
1619 { return init(str); }
1620
1621 inline GNativeString&
1622 GNativeString::operator= (const GUTF8String &str)
1623 { return init(str); }
1624
1625 inline GNativeString&
1626 GNativeString::operator= (const GNativeString &str)
1627 { return init(str); }
1628
1629 inline GNativeString
upcase(void)1630 GNativeString::upcase( void ) const
1631 {
1632 if (ptr) return (*this)->upcase();
1633 return *this;
1634 }
1635
1636 inline GNativeString
downcase(void)1637 GNativeString::downcase( void ) const
1638 {
1639 if (ptr) return (*this)->downcase();
1640 return *this;
1641 }
1642
1643 #endif // HAS_WCHAR
1644
1645 inline bool
1646 operator==(const char *s1, const GBaseString &s2)
1647 { return !s2.cmp(s1); }
1648
1649 inline bool
1650 operator!=(const char *s1, const GBaseString &s2)
1651 { return !!s2.cmp(s1); }
1652
1653 inline bool
1654 operator>=(const char *s1, const GBaseString &s2)
1655 { return (s2.cmp(s1)<=0); }
1656
1657 inline bool
1658 operator>=(const char s1, const GBaseString &s2)
1659 { return (s2.cmp(s1)<=0); }
1660
1661 inline bool
1662 operator<(const char *s1, const GBaseString &s2)
1663 { return (s2.cmp(s1)>0); }
1664
1665 inline bool
1666 operator<(const char s1, const GBaseString &s2)
1667 { return (s2.cmp(s1)>0); }
1668
1669 inline bool
1670 operator> (const char *s1, const GBaseString &s2)
1671 { return (s2.cmp(s1)<0); }
1672
1673 inline bool
1674 operator> (const char s1, const GBaseString &s2)
1675 { return (s2.cmp(s1)<0); }
1676
1677 inline bool
1678 operator<=(const char *s1, const GBaseString &s2)
1679 { return !(s1>s2); }
1680
1681 inline bool
1682 operator<=(const char s1, const GBaseString &s2)
1683 { return !(s1>s2); }
1684
1685 // ------------------- The end
1686
1687
1688 #ifdef HAVE_NAMESPACES
1689 }
1690 # ifndef NOT_USING_DJVU_NAMESPACE
1691 using namespace DJVU;
1692 # endif
1693 #endif
1694 #endif
1695
1696