1 // -*- c-basic-offset: 2 -*- 2 /* 3 * This file is part of the KDE libraries 4 * Copyright (C) 1999-2000 Harri Porten (porten@kde.org) 5 * 6 * This library is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU Library General Public 8 * License as published by the Free Software Foundation; either 9 * version 2 of the License, or (at your option) any later version. 10 * 11 * This library is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 * Library General Public License for more details. 15 * 16 * You should have received a copy of the GNU Library General Public License 17 * along with this library; see the file COPYING.LIB. If not, write to 18 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, 19 * Boston, MA 02111-1307, USA. 20 */ 21 22 #ifndef _KJS_USTRING_H_ 23 #define _KJS_USTRING_H_ 24 25 #include "wv2_export.h" 26 27 namespace wvWare { 28 29 /** 30 * @return True if d is not a number (platform support required). 31 */ 32 bool isNaN(double d); 33 34 bool isPosInf(double d); 35 bool isNegInf(double d); 36 37 class UCharReference; 38 class UString; 39 class UConstString; 40 41 /** 42 * @short Unicode character. 43 * 44 * UChar represents a 16 bit Unicode character. It's internal data 45 * representation is compatible to XChar2b and QChar. It's therefore 46 * possible to exchange data with X and Qt with shallow copies. 47 */ 48 struct UChar { 49 /** 50 * Construct a character with value 0. 51 */ 52 UChar(); 53 /** 54 * Construct a character with the value denoted by the arguments. 55 * @param h higher byte 56 * @param l lower byte 57 */ 58 UChar(unsigned char h , unsigned char l); 59 /** 60 * Construct a character with the given value. 61 * @param u 16 bit Unicode value 62 */ 63 UChar(unsigned short u); 64 UChar(const UCharReference &c); 65 /** 66 * @return The higher byte of the character. 67 */ highUChar68 unsigned char high() const { return uc >> 8; } 69 /** 70 * @return The lower byte of the character. 71 */ lowUChar72 unsigned char low() const { return uc & 0xFF; } 73 /** 74 * @return the 16 bit Unicode value of the character 75 */ unicodeUChar76 unsigned short unicode() const { return uc; } 77 public: 78 /** 79 * @return The character converted to lower case. 80 */ 81 UChar toLower() const; 82 /** 83 * @return The character converted to upper case. 84 */ 85 UChar toUpper() const; 86 /** 87 * A static instance of UChar(0). 88 */ 89 static UChar null; 90 private: 91 friend class UCharReference; 92 friend class UString; 93 friend bool operator==(const UChar &c1, const UChar &c2); 94 friend bool operator==(const UString& s1, const char *s2); 95 friend bool operator<(const UString& s1, const UString& s2); 96 97 unsigned short uc; 98 }; 99 UChar()100 inline UChar::UChar() : uc(0) { } UChar(unsigned char h,unsigned char l)101 inline UChar::UChar(unsigned char h , unsigned char l) : uc(h << 8 | l) { } UChar(unsigned short u)102 inline UChar::UChar(unsigned short u) : uc(u) { } 103 104 /** 105 * @short Dynamic reference to a string character. 106 * 107 * UCharReference is the dynamic counterpart of @ref UChar. It's used when 108 * characters retrieved via index from a @ref UString are used in an 109 * assignment expression (and therefore can't be treated as being const): 110 * <pre> 111 * UString s("hello world"); 112 * s[0] = 'H'; 113 * </pre> 114 * 115 * If that sounds confusing your best bet is to simply forget about the 116 * existence of this class and treat is as being identical to @ref UChar. 117 */ 118 class WV2_EXPORT UCharReference { 119 friend class UString; UCharReference(UString * s,unsigned int off)120 UCharReference(UString *s, unsigned int off) : str(s), offset(off) { } 121 public: 122 /** 123 * Set the referenced character to c. 124 */ 125 UCharReference& operator=(UChar c); 126 /** 127 * Same operator as above except the argument that it takes. 128 */ 129 UCharReference& operator=(char c) { return operator=(UChar(c)); } 130 /** 131 * @return Unicode value. 132 */ unicode()133 unsigned short unicode() const { return ref().unicode(); } 134 /** 135 * @return Lower byte. 136 */ low()137 unsigned char low() const { return ref().uc & 0xFF; } 138 /** 139 * @return Higher byte. 140 */ high()141 unsigned char high() const { return ref().uc >> 8; } 142 /** 143 * @return Character converted to lower case. 144 */ toLower()145 UChar toLower() const { return ref().toLower(); } 146 /** 147 * @return Character converted to upper case. 148 */ toUpper()149 UChar toUpper() const { return ref().toUpper(); } 150 private: 151 // not implemented, can only be constructed from UString 152 UCharReference(); 153 154 UChar& ref() const; 155 UString *str; 156 int offset; 157 }; 158 159 /** 160 * @short 8 bit char based string class 161 */ 162 class WV2_EXPORT CString { 163 public: CString()164 CString() : data(0L) { } 165 explicit CString(const char *c); 166 CString(const CString &); 167 168 ~CString(); 169 170 CString &append(const CString &); 171 CString &operator=(const char *c); 172 CString &operator=(const CString &); 173 CString &operator+=(const CString &); 174 175 int length() const; c_str()176 const char *c_str() const { return data; } 177 private: 178 char *data; 179 }; 180 181 /** 182 * @short Unicode string class 183 */ 184 class WV2_EXPORT UString { 185 friend bool operator==(const UString&, const UString&); 186 friend class UCharReference; 187 friend class UConstString; 188 /** 189 * @internal 190 */ 191 struct Rep { 192 friend class UString; 193 friend bool operator==(const UString&, const UString&); 194 static Rep *create(UChar *d, int l); dataRep195 inline UChar *data() const { return dat; } lengthRep196 inline int length() const { return len; } 197 refRep198 inline void ref() { rc++; } derefRep199 inline int deref() { return --rc; } 200 201 UChar *dat; 202 int len; 203 int rc; 204 static Rep null; 205 }; 206 207 public: 208 /** 209 * Constructs a null string. 210 */ 211 UString(); 212 /** 213 * Constructs a string from the single character c. 214 */ 215 explicit UString(char c); 216 /** 217 * Constructs a string from the single character c. 218 */ 219 explicit UString(UChar c); 220 /** 221 * Constructs a string from a classical zero determined char string. 222 */ 223 explicit UString(const char *c); 224 /** 225 * Constructs a string from an array of Unicode characters of the specified 226 * length. 227 */ 228 UString(const UChar *c, int length); 229 /** 230 * If copy is false a shallow copy of the string will be created. That 231 * means that the data will NOT be copied and you'll have to guarantee that 232 * it doesn't get deleted during the lifetime of the UString object. 233 */ 234 UString(UChar *c, int length, bool copy); 235 /** 236 * Copy constructor. Makes a shallow copy only. 237 */ 238 UString(const UString &); 239 /** 240 * Destructor. If this handle was the only one holding a reference to the 241 * string the data will be freed. 242 */ 243 ~UString(); 244 245 /** 246 * Constructs a string from an int. 247 */ 248 static UString from(int i); 249 /** 250 * Constructs a string from an unsigned int. 251 */ 252 static UString from(unsigned int u); 253 /** 254 * Constructs a string from a double. 255 */ 256 static UString from(double d); 257 258 /** 259 * Append another string. 260 */ 261 UString &append(const UString &); 262 263 /** 264 * @return The string converted to the 8-bit string type @ref CString(). 265 */ 266 CString cstring() const; 267 /** 268 * Convert the Unicode string to plain ASCII chars chopping of any higher 269 * bytes. This method should only be used for *debugging* purposes as it 270 * is neither Unicode safe nor free from side effects. In order not to 271 * waste any memory the char buffer is static and *shared* by all UString 272 * instances. 273 */ 274 char *ascii() const; 275 276 /** 277 * Assignment operator. 278 */ 279 UString &operator=(const char *c); 280 /** 281 * Assignment operator. 282 */ 283 UString &operator=(const UString &); 284 /** 285 * Appends the specified string. 286 */ 287 UString &operator+=(const UString &s); 288 289 /** 290 * @return A pointer to the internal Unicode data. 291 */ data()292 const UChar* data() const { return rep->data(); } 293 /** 294 * @return True if null. 295 */ isNull()296 bool isNull() const { return (rep == &Rep::null); } 297 /** 298 * @return True if null or zero length. 299 */ isEmpty()300 bool isEmpty() const { return (!rep->len); } 301 /** 302 * Use this if you want to make sure that this string is a plain ASCII 303 * string. For example, if you don't want to lose any information when 304 * using @ref cstring() or @ref ascii(). 305 * 306 * @return True if the string doesn't contain any non-ASCII characters. 307 */ 308 bool is8Bit() const; 309 /** 310 * @return The length of the string. 311 */ length()312 int length() const { return rep->length(); } 313 /** 314 * Const character at specified position. 315 */ 316 UChar operator[](int pos) const; 317 /** 318 * Writable reference to character at specified position. 319 */ 320 UCharReference operator[](int pos); 321 322 /** 323 * Attempts an conversion to a number. Apart from floating point numbers, 324 * the algorithm will recognize hexadecimal representations (as 325 * indicated by a 0x or 0X prefix) and +/- Infinity. 326 * Returns NaN if the conversion failed. 327 * @param tolerant if true, toDouble can tolerate garbage after the number. 328 */ 329 double toDouble(bool tolerant=false) const; 330 /** 331 * Attempts an conversion to an unsigned long integer. ok will be set 332 * according to the success. 333 */ 334 unsigned long toULong(bool *ok = 0L) const; 335 /** 336 * @return Position of first occurrence of f starting at position pos. 337 * -1 if the search was not successful. 338 */ 339 int find(const UString &f, int pos = 0) const; 340 /** 341 * @return Position of first occurrence of f searching backwards from 342 * position pos. 343 * -1 if the search was not successful. 344 */ 345 int rfind(const UString &f, int pos) const; 346 /** 347 * @return The sub string starting at position pos and length len. 348 */ 349 UString substr(int pos = 0, int len = -1) const; 350 /** 351 * Static instance of a null string. 352 */ 353 static UString null; 354 private: 355 void attach(Rep *r); 356 void detach(); 357 void release(); 358 Rep *rep; 359 }; 360 361 inline bool operator==(const UChar &c1, const UChar &c2) { 362 return (c1.uc == c2.uc); 363 } 364 inline bool operator!=(const UChar &c1, const UChar &c2) { 365 return !(c1 == c2); 366 } 367 bool operator==(const UString& s1, const UString& s2); 368 inline bool operator!=(const UString& s1, const UString& s2) { 369 return !wvWare::operator==(s1, s2); 370 } 371 bool operator<(const UString& s1, const UString& s2); 372 bool operator==(const UString& s1, const char *s2); 373 inline bool operator!=(const UString& s1, const char *s2) { 374 return !wvWare::operator==(s1, s2); 375 } 376 inline bool operator==(const char *s1, const UString& s2) { 377 return operator==(s2, s1); 378 } 379 inline bool operator!=(const char *s1, const UString& s2) { 380 return !wvWare::operator==(s1, s2); 381 } 382 bool operator==(const CString& s1, const CString& s2); 383 UString operator+(const UString& s1, const UString& s2); 384 385 386 class UConstString : private UString { 387 public: 388 UConstString( UChar* data, unsigned int length ); 389 ~UConstString(); 390 string()391 const UString& string() const { return *this; } 392 }; 393 394 } // namespace 395 396 #endif 397