1 
2 /*
3  *  utf8_string - Implements a string-class which handles utf8 coded strings
4  *  Copyright (c) 2006 by Mattias Hultgren <mattias_hultgren@tele2.se>
5  *
6  *
7  *   This program is free software; you can redistribute it and/or modify
8  *   it under the terms of the GNU General Public License as published by
9  *   the Free Software Foundation; version 2 of the License.
10  *
11  *   This program is distributed in the hope that it will be useful,
12  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
13  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14  *   GNU General Public License for more details.
15  *
16  *   You should have received a copy of the GNU General Public
17  *   License along with this program; if not, write to the Free Software
18  *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
19  */
20 
21 
22 #ifndef UTF8_STRING_H_
23 #define UTF8_STRING_H_
24 
25 #include "vartypes.h"
26 
27 
28 #define UTF8_STRING_H_VERSION "v1"
29 #define UTF8_STRING_H_DATE    "2006-07 - 2006-10"
30 
31 
32 class utf8_string
33 {
34 private:
35 	char *str;
36 	uint32 size;
37 
38 	// these sizes are not in characters but in bytes
39 
40 	// set_size and enlarge_to uses bytes...
41 	// the string is erased and size is set to new_size
42 	void set_size( uint32 new_size ) throw(error_obj);
43 	// the string is preserved and size is set to the biggest of size and new_size
44 	void enlarge_to( uint32 new_size ) throw(error_obj);
45 
46 public:
47 	utf8_string();
48 	utf8_string( const utf8_string &src ) throw(error_obj);
49 	utf8_string( const char *src ) throw(error_obj);
50 	~utf8_string();
51 
52 	utf8_string & clear(void) throw();
53 
54 	// if pos is greater than the string's length nothing happens
crop(uint32 pos)55 	inline utf8_string & crop( uint32 pos ) { return remove( pos, uint32_max ); }
56 
57 	// returns length in characters (not in bytes)
58 	uint32 get_length(void) const;
59 
60 	utf8_string & insert( const utf8_string &src, uint32 pos ) throw(error_obj);
prepend(const utf8_string & src)61 	inline utf8_string & prepend( const utf8_string &src ) throw(error_obj) { return insert( src, 0 ); }
62 	utf8_string & append( const utf8_string &src ) throw(error_obj);
63 	utf8_string & remove( uint32 pos, uint32 len = 1 );
64 
replace(uint32 pos,uint32 len,const utf8_string & src)65 	inline utf8_string & replace( uint32 pos, uint32 len, const utf8_string &src ) throw(error_obj)
66 		{
67 			remove( pos, len );
68 			return insert( src, pos );
69 		}
70 
71 	void operator=( const utf8_string &src ) throw(error_obj);
72 
73 	void assign( const utf8_string &src, uint32 pos, uint32 length ) throw(error_obj);
74 
75 	bool operator==( const utf8_string &src ) const;
76 	inline bool operator!=( const utf8_string &src ) const { return !(*this == src); }
77 
78 	bool operator==( const char *src ) const;
79 	inline bool operator!=( const char *src ) const { return !(*this == src); }
80 
81 
82 // just think of that the index in a C-string might not be the same as in utf8_string
c_str(void)83 	inline const char * c_str(void) const { return str; }
84 	const char * c_str_from( uint32 index ) const;
85 
86 	// the character at pos will be tested for equality with all characters in test
87 	bool test_character( uint32 pos, const char *test ) const;
88 
89 	// if the character at pos isn't a digit then an error is thrown else the digit is returned
90 	int32 get_digit( uint32 pos ) const throw(error_obj);
91 
92 	utf8_string substr( uint32 start, uint32 length ) const throw(error_obj);
93 
94 	void remove_escape_sequences(void);
95 };
96 
97 
98 #endif // UTF8_STRING_H_
99