1 // Created on: 2013-01-28 2 // Created by: Kirill GAVRILOV 3 // Copyright (c) 2013-2014 OPEN CASCADE SAS 4 // 5 // This file is part of Open CASCADE Technology software library. 6 // 7 // This library is free software; you can redistribute it and/or modify it under 8 // the terms of the GNU Lesser General Public License version 2.1 as published 9 // by the Free Software Foundation, with special exception defined in the file 10 // OCCT_LGPL_EXCEPTION.txt. Consult the file LICENSE_LGPL_21.txt included in OCCT 11 // distribution for complete text of the license and disclaimer of any warranty. 12 // 13 // Alternatively, this file may be used under the terms of Open CASCADE 14 // commercial license or contractual agreement. 15 16 #ifndef NCollection_UtfString_HeaderFile 17 #define NCollection_UtfString_HeaderFile 18 19 #include <NCollection_UtfIterator.hxx> 20 21 #include <cstring> 22 #include <cstdlib> 23 24 //! This template class represent constant UTF-* string. 25 //! String stored in memory continuously, always NULL-terminated 26 //! and can be used as standard C-string using ToCString() method. 27 //! 28 //! Notice that changing the string is not allowed 29 //! and any modifications should produce new string. 30 //! 31 //! In comments to this class, terms "Unicode symbol" is used as 32 //! synonym of "Unicode code point". 33 template<typename Type> 34 class NCollection_UtfString 35 { 36 37 public: 38 Iterator() const39 NCollection_UtfIterator<Type> Iterator() const 40 { 41 return NCollection_UtfIterator<Type> (myString); 42 } 43 44 //! @return the size of the buffer in bytes, excluding NULL-termination symbol Size() const45 Standard_Integer Size() const 46 { 47 return mySize; 48 } 49 50 //! @return the length of the string in Unicode symbols Length() const51 Standard_Integer Length() const 52 { 53 return myLength; 54 } 55 56 //! Retrieve Unicode symbol at specified position. 57 //! Warning! This is a slow access. Iterator should be used for consecutive parsing. 58 //! @param theCharIndex the index of the symbol, should be lesser than Length() 59 //! @return the Unicode symbol value 60 Standard_Utf32Char GetChar (const Standard_Integer theCharIndex) const; 61 62 //! Retrieve string buffer at specified position. 63 //! Warning! This is a slow access. Iterator should be used for consecutive parsing. 64 //! @param theCharIndex the index of the symbol, should be less than Length() 65 //! (first symbol of the string has index 0) 66 //! @return the pointer to the symbol 67 const Type* GetCharBuffer (const Standard_Integer theCharIndex) const; 68 69 //! Retrieve Unicode symbol at specified position. 70 //! Warning! This is a slow access. Iterator should be used for consecutive parsing. operator [](const Standard_Integer theCharIndex) const71 Standard_Utf32Char operator[] (const Standard_Integer theCharIndex) const 72 { 73 return GetChar (theCharIndex); 74 } 75 76 //! Initialize empty string. 77 NCollection_UtfString(); 78 79 //! Copy constructor. 80 //! @param theCopy string to copy. 81 NCollection_UtfString (const NCollection_UtfString& theCopy); 82 83 #ifndef OCCT_NO_RVALUE_REFERENCE 84 //! Move constructor 85 NCollection_UtfString (NCollection_UtfString&& theOther); 86 #endif 87 88 //! Copy constructor from UTF-8 string. 89 //! @param theCopyUtf8 UTF-8 string to copy 90 //! @param theLength optional length limit in Unicode symbols (NOT bytes!) 91 //! The string is copied till NULL symbol or, if theLength >0, 92 //! till either NULL or theLength-th symbol (which comes first). 93 NCollection_UtfString (const char* theCopyUtf8, 94 const Standard_Integer theLength = -1); 95 96 //! Copy constructor from UTF-16 string. 97 //! @param theCopyUtf16 UTF-16 string to copy 98 //! @param theLength the length limit in Unicode symbols (NOT bytes!) 99 //! The string is copied till NULL symbol or, if theLength >0, 100 //! till either NULL or theLength-th symbol (which comes first). 101 NCollection_UtfString (const Standard_Utf16Char* theCopyUtf16, 102 const Standard_Integer theLength = -1); 103 104 //! Copy constructor from UTF-32 string. 105 //! @param theCopyUtf32 UTF-32 string to copy 106 //! @param theLength the length limit in Unicode symbols (NOT bytes!) 107 //! The string is copied till NULL symbol or, if theLength >0, 108 //! till either NULL or theLength-th symbol (which comes first). 109 NCollection_UtfString (const Standard_Utf32Char* theCopyUtf32, 110 const Standard_Integer theLength = -1); 111 112 #if !defined(_MSC_VER) || defined(_NATIVE_WCHAR_T_DEFINED) || (defined(_MSC_VER) && _MSC_VER >= 1900) 113 //! Copy constructor from wide UTF string. 114 //! @param theCopyUtfWide wide UTF string to copy 115 //! @param theLength the length limit in Unicode symbols (NOT bytes!) 116 //! The string is copied till NULL symbol or, if theLength >0, 117 //! till either NULL or theLength-th symbol (which comes first). 118 //! 119 //! This constructor is undefined if Standard_WideChar is the same type as Standard_Utf16Char. 120 NCollection_UtfString (const Standard_WideChar* theCopyUtfWide, 121 const Standard_Integer theLength = -1); 122 #endif 123 124 //! Copy from Unicode string in UTF-8, UTF-16, or UTF-32 encoding, 125 //! determined by size of TypeFrom character type. 126 //! @param theStringUtf Unicode string 127 //! @param theLength the length limit in Unicode symbols 128 //! The string is copied till NULL symbol or, if theLength >0, 129 //! till either NULL or theLength-th symbol (which comes first). 130 template <typename TypeFrom> FromUnicode(const TypeFrom * theStringUtf,const Standard_Integer theLength=-1)131 inline void FromUnicode (const TypeFrom* theStringUtf, 132 const Standard_Integer theLength = -1) 133 { 134 NCollection_UtfIterator<TypeFrom> anIterRead (theStringUtf); 135 if (*anIterRead == 0) 136 { 137 // special case 138 Clear(); 139 return; 140 } 141 fromUnicodeImpl (theStringUtf, theLength, anIterRead); 142 } 143 144 //! Copy from multibyte string in current system locale. 145 //! @param theString multibyte string 146 //! @param theLength the length limit in Unicode symbols 147 //! The string is copied till NULL symbol or, if theLength >0, 148 //! till either NULL or theLength-th symbol (which comes first). 149 void FromLocale (const char* theString, 150 const Standard_Integer theLength = -1); 151 152 //! Destructor. 153 ~NCollection_UtfString(); 154 155 //! Compares this string with another one. 156 bool IsEqual (const NCollection_UtfString& theCompare) const; 157 158 //! Returns the substring. 159 //! @param theStart start index (inclusive) of subString 160 //! @param theEnd end index (exclusive) of subString 161 //! @return the substring 162 NCollection_UtfString SubString (const Standard_Integer theStart, 163 const Standard_Integer theEnd) const; 164 165 //! Returns NULL-terminated Unicode string. 166 //! Should not be modified or deleted! 167 //! @return (const Type* ) pointer to string ToCString() const168 const Type* ToCString() const 169 { 170 return myString; 171 } 172 173 //! @return copy in UTF-8 format 174 const NCollection_UtfString<Standard_Utf8Char> ToUtf8() const; 175 176 //! @return copy in UTF-16 format 177 const NCollection_UtfString<Standard_Utf16Char> ToUtf16() const; 178 179 //! @return copy in UTF-32 format 180 const NCollection_UtfString<Standard_Utf32Char> ToUtf32() const; 181 182 //! @return copy in wide format (UTF-16 on Windows and UTF-32 on Linux) 183 const NCollection_UtfString<Standard_WideChar> ToUtfWide() const; 184 185 //! Converts the string into string in the current system locale. 186 //! @param theBuffer output buffer 187 //! @param theSizeBytes buffer size in bytes 188 //! @return true on success 189 bool ToLocale (char* theBuffer, 190 const Standard_Integer theSizeBytes) const; 191 192 //! @return true if string is empty IsEmpty() const193 bool IsEmpty() const 194 { 195 return myString[0] == Type(0); 196 } 197 198 //! Zero string. 199 void Clear(); 200 201 public: //! @name assign operators 202 203 //! Copy from another string. 204 const NCollection_UtfString& Assign (const NCollection_UtfString& theOther); 205 206 //! Exchange the data of two strings (without reallocating memory). 207 void Swap (NCollection_UtfString& theOther); 208 209 //! Copy from another string. operator =(const NCollection_UtfString & theOther)210 const NCollection_UtfString& operator= (const NCollection_UtfString& theOther) { return Assign (theOther); } 211 212 #ifndef OCCT_NO_RVALUE_REFERENCE 213 //! Move assignment operator. operator =(NCollection_UtfString && theOther)214 NCollection_UtfString& operator= (NCollection_UtfString&& theOther) { Swap (theOther); return *this; } 215 #endif 216 217 //! Copy from UTF-8 NULL-terminated string. 218 const NCollection_UtfString& operator= (const char* theStringUtf8); 219 220 //! Copy from wchar_t UTF NULL-terminated string. 221 const NCollection_UtfString& operator= (const Standard_WideChar* theStringUtfWide); 222 223 //! Join strings. 224 NCollection_UtfString& operator+= (const NCollection_UtfString& theAppend); 225 226 //! Join two strings. operator +(const NCollection_UtfString & theLeft,const NCollection_UtfString & theRight)227 friend NCollection_UtfString operator+ (const NCollection_UtfString& theLeft, 228 const NCollection_UtfString& theRight) 229 { 230 NCollection_UtfString aSumm; 231 strFree (aSumm.myString); 232 aSumm.mySize = theLeft.mySize + theRight.mySize; 233 aSumm.myLength = theLeft.myLength + theRight.myLength; 234 aSumm.myString = strAlloc (aSumm.mySize); 235 236 // copy bytes 237 strCopy ((Standard_Byte* )aSumm.myString, (const Standard_Byte* )theLeft.myString, theLeft.mySize); 238 strCopy ((Standard_Byte* )aSumm.myString + theLeft.mySize, (const Standard_Byte* )theRight.myString, theRight.mySize); 239 return aSumm; 240 } 241 242 public: //! @name compare operators 243 operator ==(const NCollection_UtfString & theCompare) const244 bool operator== (const NCollection_UtfString& theCompare) const 245 { 246 return IsEqual (theCompare); 247 } 248 bool operator!= (const NCollection_UtfString& theCompare) const; 249 250 private: //! @name low-level methods 251 252 //! Implementation of copy routine for string of the same type fromUnicodeImpl(const Type * theStringUtf,const Standard_Integer theLength,NCollection_UtfIterator<Type> & theIterator)253 void fromUnicodeImpl (const Type* theStringUtf, const Standard_Integer theLength, NCollection_UtfIterator<Type>& theIterator) 254 { 255 Type* anOldBuffer = myString; // necessary in case of self-copying 256 257 // advance to the end 258 const Standard_Integer aLengthMax = (theLength > 0) ? theLength : IntegerLast(); 259 for(; *theIterator != 0 && theIterator.Index() < aLengthMax; ++theIterator) {} 260 261 mySize = Standard_Integer((Standard_Byte* )theIterator.BufferHere() - (Standard_Byte* )theStringUtf); 262 myLength = theIterator.Index(); 263 myString = strAlloc (mySize); 264 strCopy ((Standard_Byte* )myString, (const Standard_Byte* )theStringUtf, mySize); 265 266 strFree (anOldBuffer); 267 } 268 269 //! Implementation of copy routine for string of other types 270 template<typename TypeFrom> fromUnicodeImpl(typename opencascade::std::enable_if<!opencascade::std::is_same<Type,TypeFrom>::value,const TypeFrom * >::type theStringUtf,const Standard_Integer theLength,NCollection_UtfIterator<TypeFrom> & theIterator)271 void fromUnicodeImpl (typename opencascade::std::enable_if<! opencascade::std::is_same<Type, TypeFrom>::value, const TypeFrom*>::type theStringUtf, 272 const Standard_Integer theLength, NCollection_UtfIterator<TypeFrom>& theIterator) 273 { 274 Type* anOldBuffer = myString; // necessary in case of self-copying 275 276 mySize = 0; 277 const Standard_Integer aLengthMax = (theLength > 0) ? theLength : IntegerLast(); 278 for (; *theIterator != 0 && theIterator.Index() < aLengthMax; ++theIterator) 279 { 280 mySize += theIterator.template AdvanceBytesUtf<Type>(); 281 } 282 myLength = theIterator.Index(); 283 284 myString = strAlloc (mySize); 285 286 // copy string 287 theIterator.Init (theStringUtf); 288 Type* anIterWrite = myString; 289 for (; *theIterator != 0 && theIterator.Index() < myLength; ++theIterator) 290 { 291 anIterWrite = theIterator.GetUtf (anIterWrite); 292 } 293 294 strFree (anOldBuffer); 295 } 296 297 //! Allocate NULL-terminated string buffer. strAlloc(const Standard_Size theSizeBytes)298 static Type* strAlloc (const Standard_Size theSizeBytes) 299 { 300 Type* aPtr = (Type* )Standard::Allocate (theSizeBytes + sizeof(Type)); 301 if (aPtr != NULL) 302 { 303 // always NULL-terminate the string 304 aPtr[theSizeBytes / sizeof(Type)] = Type(0); 305 } 306 return aPtr; 307 } 308 309 //! Release string buffer and nullify the pointer. strFree(Type * & thePtr)310 static void strFree (Type*& thePtr) 311 { 312 Standard::Free (thePtr); 313 } 314 315 //! Provides bytes interface to avoid incorrect pointer arithmetics. strCopy(Standard_Byte * theStrDst,const Standard_Byte * theStrSrc,const Standard_Integer theSizeBytes)316 static void strCopy (Standard_Byte* theStrDst, 317 const Standard_Byte* theStrSrc, 318 const Standard_Integer theSizeBytes) 319 { 320 std::memcpy (theStrDst, theStrSrc, (Standard_Size )theSizeBytes); 321 } 322 323 //! Compare two Unicode strings per-byte. strAreEqual(const Type * theString1,const Standard_Integer theSizeBytes1,const Type * theString2,const Standard_Integer theSizeBytes2)324 static bool strAreEqual (const Type* theString1, 325 const Standard_Integer theSizeBytes1, 326 const Type* theString2, 327 const Standard_Integer theSizeBytes2) 328 { 329 return (theSizeBytes1 == theSizeBytes2) 330 && (std::memcmp (theString1, theString2, (Standard_Size )theSizeBytes1) == 0); 331 } 332 333 private: //! @name private fields 334 335 Type* myString; //!< string buffer 336 Standard_Integer mySize; //!< buffer size in bytes, excluding NULL-termination symbol 337 Standard_Integer myLength; //!< length of the string in Unicode symbols (cached value, excluding NULL-termination symbol) 338 339 }; 340 341 typedef NCollection_UtfString<Standard_Utf8Char> NCollection_Utf8String; 342 typedef NCollection_UtfString<Standard_Utf16Char> NCollection_Utf16String; 343 typedef NCollection_UtfString<Standard_Utf32Char> NCollection_Utf32String; 344 typedef NCollection_UtfString<Standard_WideChar> NCollection_UtfWideString; 345 346 // template implementation (inline methods) 347 #include "NCollection_UtfString.lxx" 348 349 #endif // _NCollection_UtfString_H__ 350