1 // Copyright 2014 PDFium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6 
7 #ifndef CORE_FXCRT_INCLUDE_FX_STRING_H_
8 #define CORE_FXCRT_INCLUDE_FX_STRING_H_
9 
10 #include <stdint.h>  // For intptr_t.
11 #include <algorithm>
12 
13 #include "core/fxcrt/cfx_string_c_template.h"
14 #include "core/fxcrt/cfx_string_data_template.h"
15 #include "core/fxcrt/include/cfx_retain_ptr.h"
16 #include "core/fxcrt/include/fx_memory.h"
17 #include "core/fxcrt/include/fx_system.h"
18 
19 class CFX_ByteString;
20 class CFX_WideString;
21 
22 using CFX_ByteStringC = CFX_StringCTemplate<FX_CHAR>;
23 using CFX_WideStringC = CFX_StringCTemplate<FX_WCHAR>;
24 
25 #define FXBSTR_ID(c1, c2, c3, c4)                                      \
26   (((uint32_t)c1 << 24) | ((uint32_t)c2 << 16) | ((uint32_t)c3 << 8) | \
27    ((uint32_t)c4))
28 
29 #define FX_WSTRC(wstr) CFX_WideStringC(wstr, FX_ArraySize(wstr) - 1)
30 
31 // A mutable string with shared buffers using copy-on-write semantics that
32 // avoids the cost of std::string's iterator stability guarantees.
33 class CFX_ByteString {
34  public:
35   using CharType = FX_CHAR;
36 
37   CFX_ByteString();
38   CFX_ByteString(const CFX_ByteString& other);
39   CFX_ByteString(CFX_ByteString&& other);
40 
41   // Deliberately implicit to avoid calling on every string literal.
42   CFX_ByteString(char ch);
43   CFX_ByteString(const FX_CHAR* ptr);
44 
45   CFX_ByteString(const FX_CHAR* ptr, FX_STRSIZE len);
46   CFX_ByteString(const uint8_t* ptr, FX_STRSIZE len);
47 
48   explicit CFX_ByteString(const CFX_ByteStringC& bstrc);
49   CFX_ByteString(const CFX_ByteStringC& bstrc1, const CFX_ByteStringC& bstrc2);
50 
51   ~CFX_ByteString();
52 
clear()53   void clear() { m_pData.Reset(); }
54 
55   static CFX_ByteString FromUnicode(const FX_WCHAR* ptr, FX_STRSIZE len = -1);
56   static CFX_ByteString FromUnicode(const CFX_WideString& str);
57 
58   // Explicit conversion to C-style string.
59   // Note: Any subsequent modification of |this| will invalidate the result.
c_str()60   const FX_CHAR* c_str() const { return m_pData ? m_pData->m_String : ""; }
61 
62   // Explicit conversion to uint8_t*.
63   // Note: Any subsequent modification of |this| will invalidate the result.
raw_str()64   const uint8_t* raw_str() const {
65     return m_pData ? reinterpret_cast<const uint8_t*>(m_pData->m_String)
66                    : nullptr;
67   }
68 
69   // Explicit conversion to CFX_ByteStringC.
70   // Note: Any subsequent modification of |this| will invalidate the result.
AsStringC()71   CFX_ByteStringC AsStringC() const {
72     return CFX_ByteStringC(raw_str(), GetLength());
73   }
74 
GetLength()75   FX_STRSIZE GetLength() const { return m_pData ? m_pData->m_nDataLength : 0; }
IsEmpty()76   bool IsEmpty() const { return !GetLength(); }
77 
78   int Compare(const CFX_ByteStringC& str) const;
79   bool EqualNoCase(const CFX_ByteStringC& str) const;
80 
81   bool operator==(const char* ptr) const;
82   bool operator==(const CFX_ByteStringC& str) const;
83   bool operator==(const CFX_ByteString& other) const;
84 
85   bool operator!=(const char* ptr) const { return !(*this == ptr); }
86   bool operator!=(const CFX_ByteStringC& str) const { return !(*this == str); }
87   bool operator!=(const CFX_ByteString& other) const {
88     return !(*this == other);
89   }
90 
91   bool operator<(const CFX_ByteString& str) const {
92     int result = FXSYS_memcmp(c_str(), str.c_str(),
93                               std::min(GetLength(), str.GetLength()));
94     return result < 0 || (result == 0 && GetLength() < str.GetLength());
95   }
96 
97   const CFX_ByteString& operator=(const FX_CHAR* str);
98   const CFX_ByteString& operator=(const CFX_ByteStringC& bstrc);
99   const CFX_ByteString& operator=(const CFX_ByteString& stringSrc);
100 
101   const CFX_ByteString& operator+=(FX_CHAR ch);
102   const CFX_ByteString& operator+=(const FX_CHAR* str);
103   const CFX_ByteString& operator+=(const CFX_ByteString& str);
104   const CFX_ByteString& operator+=(const CFX_ByteStringC& bstrc);
105 
GetAt(FX_STRSIZE nIndex)106   uint8_t GetAt(FX_STRSIZE nIndex) const {
107     return m_pData ? m_pData->m_String[nIndex] : 0;
108   }
109 
110   uint8_t operator[](FX_STRSIZE nIndex) const {
111     return m_pData ? m_pData->m_String[nIndex] : 0;
112   }
113 
114   void SetAt(FX_STRSIZE nIndex, FX_CHAR ch);
115   FX_STRSIZE Insert(FX_STRSIZE index, FX_CHAR ch);
116   FX_STRSIZE Delete(FX_STRSIZE index, FX_STRSIZE count = 1);
117 
118   void Format(const FX_CHAR* lpszFormat, ...);
119   void FormatV(const FX_CHAR* lpszFormat, va_list argList);
120 
121   void Reserve(FX_STRSIZE len);
122   FX_CHAR* GetBuffer(FX_STRSIZE len);
123   void ReleaseBuffer(FX_STRSIZE len = -1);
124 
125   CFX_ByteString Mid(FX_STRSIZE first) const;
126   CFX_ByteString Mid(FX_STRSIZE first, FX_STRSIZE count) const;
127   CFX_ByteString Left(FX_STRSIZE count) const;
128   CFX_ByteString Right(FX_STRSIZE count) const;
129 
130   FX_STRSIZE Find(const CFX_ByteStringC& lpszSub, FX_STRSIZE start = 0) const;
131   FX_STRSIZE Find(FX_CHAR ch, FX_STRSIZE start = 0) const;
132   FX_STRSIZE ReverseFind(FX_CHAR ch) const;
133 
134   void MakeLower();
135   void MakeUpper();
136 
137   void TrimRight();
138   void TrimRight(FX_CHAR chTarget);
139   void TrimRight(const CFX_ByteStringC& lpszTargets);
140 
141   void TrimLeft();
142   void TrimLeft(FX_CHAR chTarget);
143   void TrimLeft(const CFX_ByteStringC& lpszTargets);
144 
145   FX_STRSIZE Replace(const CFX_ByteStringC& lpszOld,
146                      const CFX_ByteStringC& lpszNew);
147 
148   FX_STRSIZE Remove(FX_CHAR ch);
149 
150   CFX_WideString UTF8Decode() const;
151 
152   uint32_t GetID(FX_STRSIZE start_pos = 0) const;
153 
154 #define FXFORMAT_SIGNED 1
155 #define FXFORMAT_HEX 2
156 #define FXFORMAT_CAPITAL 4
157 
158   static CFX_ByteString FormatInteger(int i, uint32_t flags = 0);
159   static CFX_ByteString FormatFloat(FX_FLOAT f, int precision = 0);
160 
161  protected:
162   using StringData = CFX_StringDataTemplate<FX_CHAR>;
163 
164   void ReallocBeforeWrite(FX_STRSIZE nNewLen);
165   void AllocBeforeWrite(FX_STRSIZE nNewLen);
166   void AllocCopy(CFX_ByteString& dest,
167                  FX_STRSIZE nCopyLen,
168                  FX_STRSIZE nCopyIndex) const;
169   void AssignCopy(const FX_CHAR* pSrcData, FX_STRSIZE nSrcLen);
170   void Concat(const FX_CHAR* lpszSrcData, FX_STRSIZE nSrcLen);
171 
172   CFX_RetainPtr<StringData> m_pData;
173   friend class fxcrt_ByteStringConcat_Test;
174 };
175 
176 inline bool operator==(const char* lhs, const CFX_ByteString& rhs) {
177   return rhs == lhs;
178 }
179 inline bool operator==(const CFX_ByteStringC& lhs, const CFX_ByteString& rhs) {
180   return rhs == lhs;
181 }
182 inline bool operator!=(const char* lhs, const CFX_ByteString& rhs) {
183   return rhs != lhs;
184 }
185 inline bool operator!=(const CFX_ByteStringC& lhs, const CFX_ByteString& rhs) {
186   return rhs != lhs;
187 }
188 
189 inline CFX_ByteString operator+(const CFX_ByteStringC& str1,
190                                 const CFX_ByteStringC& str2) {
191   return CFX_ByteString(str1, str2);
192 }
193 inline CFX_ByteString operator+(const CFX_ByteStringC& str1,
194                                 const FX_CHAR* str2) {
195   return CFX_ByteString(str1, str2);
196 }
197 inline CFX_ByteString operator+(const FX_CHAR* str1,
198                                 const CFX_ByteStringC& str2) {
199   return CFX_ByteString(str1, str2);
200 }
201 inline CFX_ByteString operator+(const CFX_ByteStringC& str1, FX_CHAR ch) {
202   return CFX_ByteString(str1, CFX_ByteStringC(ch));
203 }
204 inline CFX_ByteString operator+(FX_CHAR ch, const CFX_ByteStringC& str2) {
205   return CFX_ByteString(ch, str2);
206 }
207 inline CFX_ByteString operator+(const CFX_ByteString& str1,
208                                 const CFX_ByteString& str2) {
209   return CFX_ByteString(str1.AsStringC(), str2.AsStringC());
210 }
211 inline CFX_ByteString operator+(const CFX_ByteString& str1, FX_CHAR ch) {
212   return CFX_ByteString(str1.AsStringC(), CFX_ByteStringC(ch));
213 }
214 inline CFX_ByteString operator+(FX_CHAR ch, const CFX_ByteString& str2) {
215   return CFX_ByteString(ch, str2.AsStringC());
216 }
217 inline CFX_ByteString operator+(const CFX_ByteString& str1,
218                                 const FX_CHAR* str2) {
219   return CFX_ByteString(str1.AsStringC(), str2);
220 }
221 inline CFX_ByteString operator+(const FX_CHAR* str1,
222                                 const CFX_ByteString& str2) {
223   return CFX_ByteString(str1, str2.AsStringC());
224 }
225 inline CFX_ByteString operator+(const CFX_ByteString& str1,
226                                 const CFX_ByteStringC& str2) {
227   return CFX_ByteString(str1.AsStringC(), str2);
228 }
229 inline CFX_ByteString operator+(const CFX_ByteStringC& str1,
230                                 const CFX_ByteString& str2) {
231   return CFX_ByteString(str1, str2.AsStringC());
232 }
233 
234 // A mutable string with shared buffers using copy-on-write semantics that
235 // avoids the cost of std::string's iterator stability guarantees.
236 class CFX_WideString {
237  public:
238   using CharType = FX_WCHAR;
239 
240   CFX_WideString();
241   CFX_WideString(const CFX_WideString& other);
242   CFX_WideString(CFX_WideString&& other);
243 
244   // Deliberately implicit to avoid calling on every string literal.
245   CFX_WideString(FX_WCHAR ch);
246   CFX_WideString(const FX_WCHAR* ptr);
247 
248   CFX_WideString(const FX_WCHAR* ptr, FX_STRSIZE len);
249 
250   explicit CFX_WideString(const CFX_WideStringC& str);
251   CFX_WideString(const CFX_WideStringC& str1, const CFX_WideStringC& str2);
252 
253   ~CFX_WideString();
254 
255   static CFX_WideString FromLocal(const CFX_ByteStringC& str);
256   static CFX_WideString FromCodePage(const CFX_ByteStringC& str,
257                                      uint16_t codepage);
258 
259   static CFX_WideString FromUTF8(const CFX_ByteStringC& str);
260   static CFX_WideString FromUTF16LE(const unsigned short* str, FX_STRSIZE len);
261 
262   static FX_STRSIZE WStringLength(const unsigned short* str);
263 
264   // Explicit conversion to C-style wide string.
265   // Note: Any subsequent modification of |this| will invalidate the result.
c_str()266   const FX_WCHAR* c_str() const { return m_pData ? m_pData->m_String : L""; }
267 
268   // Explicit conversion to CFX_WideStringC.
269   // Note: Any subsequent modification of |this| will invalidate the result.
AsStringC()270   CFX_WideStringC AsStringC() const {
271     return CFX_WideStringC(c_str(), GetLength());
272   }
273 
clear()274   void clear() { m_pData.Reset(); }
275 
GetLength()276   FX_STRSIZE GetLength() const { return m_pData ? m_pData->m_nDataLength : 0; }
IsEmpty()277   bool IsEmpty() const { return !GetLength(); }
278 
279   const CFX_WideString& operator=(const FX_WCHAR* str);
280   const CFX_WideString& operator=(const CFX_WideString& stringSrc);
281   const CFX_WideString& operator=(const CFX_WideStringC& stringSrc);
282 
283   const CFX_WideString& operator+=(const FX_WCHAR* str);
284   const CFX_WideString& operator+=(FX_WCHAR ch);
285   const CFX_WideString& operator+=(const CFX_WideString& str);
286   const CFX_WideString& operator+=(const CFX_WideStringC& str);
287 
288   bool operator==(const wchar_t* ptr) const;
289   bool operator==(const CFX_WideStringC& str) const;
290   bool operator==(const CFX_WideString& other) const;
291 
292   bool operator!=(const wchar_t* ptr) const { return !(*this == ptr); }
293   bool operator!=(const CFX_WideStringC& str) const { return !(*this == str); }
294   bool operator!=(const CFX_WideString& other) const {
295     return !(*this == other);
296   }
297 
298   bool operator<(const CFX_WideString& str) const {
299     int result =
300         wmemcmp(c_str(), str.c_str(), std::min(GetLength(), str.GetLength()));
301     return result < 0 || (result == 0 && GetLength() < str.GetLength());
302   }
303 
GetAt(FX_STRSIZE nIndex)304   FX_WCHAR GetAt(FX_STRSIZE nIndex) const {
305     return m_pData ? m_pData->m_String[nIndex] : 0;
306   }
307 
308   FX_WCHAR operator[](FX_STRSIZE nIndex) const {
309     return m_pData ? m_pData->m_String[nIndex] : 0;
310   }
311 
312   void SetAt(FX_STRSIZE nIndex, FX_WCHAR ch);
313 
314   int Compare(const FX_WCHAR* str) const;
315   int Compare(const CFX_WideString& str) const;
316   int CompareNoCase(const FX_WCHAR* str) const;
317 
318 
319   CFX_WideString Mid(FX_STRSIZE first) const;
320   CFX_WideString Mid(FX_STRSIZE first, FX_STRSIZE count) const;
321   CFX_WideString Left(FX_STRSIZE count) const;
322   CFX_WideString Right(FX_STRSIZE count) const;
323 
324   FX_STRSIZE Insert(FX_STRSIZE index, FX_WCHAR ch);
325   FX_STRSIZE Delete(FX_STRSIZE index, FX_STRSIZE count = 1);
326 
327   void Format(const FX_WCHAR* lpszFormat, ...);
328   void FormatV(const FX_WCHAR* lpszFormat, va_list argList);
329 
330   void MakeLower();
331   void MakeUpper();
332 
333   void TrimRight();
334   void TrimRight(FX_WCHAR chTarget);
335   void TrimRight(const CFX_WideStringC& pTargets);
336 
337   void TrimLeft();
338   void TrimLeft(FX_WCHAR chTarget);
339   void TrimLeft(const CFX_WideStringC& pTargets);
340 
341   void Reserve(FX_STRSIZE len);
342   FX_WCHAR* GetBuffer(FX_STRSIZE len);
343   void ReleaseBuffer(FX_STRSIZE len = -1);
344 
345   int GetInteger() const;
346   FX_FLOAT GetFloat() const;
347 
348   FX_STRSIZE Find(const CFX_WideStringC& pSub, FX_STRSIZE start = 0) const;
349   FX_STRSIZE Find(FX_WCHAR ch, FX_STRSIZE start = 0) const;
350   FX_STRSIZE Replace(const CFX_WideStringC& pOld, const CFX_WideStringC& pNew);
351   FX_STRSIZE Remove(FX_WCHAR ch);
352 
353   CFX_ByteString UTF8Encode() const;
354   CFX_ByteString UTF16LE_Encode() const;
355 
356  protected:
357   using StringData = CFX_StringDataTemplate<FX_WCHAR>;
358 
359   void ReallocBeforeWrite(FX_STRSIZE nLen);
360   void AllocBeforeWrite(FX_STRSIZE nLen);
361   void AllocCopy(CFX_WideString& dest,
362                  FX_STRSIZE nCopyLen,
363                  FX_STRSIZE nCopyIndex) const;
364   void AssignCopy(const FX_WCHAR* pSrcData, FX_STRSIZE nSrcLen);
365   void Concat(const FX_WCHAR* lpszSrcData, FX_STRSIZE nSrcLen);
366 
367   CFX_RetainPtr<StringData> m_pData;
368   friend class fxcrt_WideStringConcatInPlace_Test;
369 };
370 
371 inline CFX_WideString operator+(const CFX_WideStringC& str1,
372                                 const CFX_WideStringC& str2) {
373   return CFX_WideString(str1, str2);
374 }
375 inline CFX_WideString operator+(const CFX_WideStringC& str1,
376                                 const FX_WCHAR* str2) {
377   return CFX_WideString(str1, str2);
378 }
379 inline CFX_WideString operator+(const FX_WCHAR* str1,
380                                 const CFX_WideStringC& str2) {
381   return CFX_WideString(str1, str2);
382 }
383 inline CFX_WideString operator+(const CFX_WideStringC& str1, FX_WCHAR ch) {
384   return CFX_WideString(str1, CFX_WideStringC(ch));
385 }
386 inline CFX_WideString operator+(FX_WCHAR ch, const CFX_WideStringC& str2) {
387   return CFX_WideString(ch, str2);
388 }
389 inline CFX_WideString operator+(const CFX_WideString& str1,
390                                 const CFX_WideString& str2) {
391   return CFX_WideString(str1.AsStringC(), str2.AsStringC());
392 }
393 inline CFX_WideString operator+(const CFX_WideString& str1, FX_WCHAR ch) {
394   return CFX_WideString(str1.AsStringC(), CFX_WideStringC(ch));
395 }
396 inline CFX_WideString operator+(FX_WCHAR ch, const CFX_WideString& str2) {
397   return CFX_WideString(ch, str2.AsStringC());
398 }
399 inline CFX_WideString operator+(const CFX_WideString& str1,
400                                 const FX_WCHAR* str2) {
401   return CFX_WideString(str1.AsStringC(), str2);
402 }
403 inline CFX_WideString operator+(const FX_WCHAR* str1,
404                                 const CFX_WideString& str2) {
405   return CFX_WideString(str1, str2.AsStringC());
406 }
407 inline CFX_WideString operator+(const CFX_WideString& str1,
408                                 const CFX_WideStringC& str2) {
409   return CFX_WideString(str1.AsStringC(), str2);
410 }
411 inline CFX_WideString operator+(const CFX_WideStringC& str1,
412                                 const CFX_WideString& str2) {
413   return CFX_WideString(str1, str2.AsStringC());
414 }
415 inline bool operator==(const wchar_t* lhs, const CFX_WideString& rhs) {
416   return rhs == lhs;
417 }
418 inline bool operator==(const CFX_WideStringC& lhs, const CFX_WideString& rhs) {
419   return rhs == lhs;
420 }
421 inline bool operator!=(const wchar_t* lhs, const CFX_WideString& rhs) {
422   return rhs != lhs;
423 }
424 inline bool operator!=(const CFX_WideStringC& lhs, const CFX_WideString& rhs) {
425   return rhs != lhs;
426 }
427 
428 CFX_ByteString FX_UTF8Encode(const FX_WCHAR* pwsStr, FX_STRSIZE len);
FX_UTF8Encode(const CFX_WideStringC & wsStr)429 inline CFX_ByteString FX_UTF8Encode(const CFX_WideStringC& wsStr) {
430   return FX_UTF8Encode(wsStr.c_str(), wsStr.GetLength());
431 }
FX_UTF8Encode(const CFX_WideString & wsStr)432 inline CFX_ByteString FX_UTF8Encode(const CFX_WideString& wsStr) {
433   return FX_UTF8Encode(wsStr.c_str(), wsStr.GetLength());
434 }
435 
436 FX_FLOAT FX_atof(const CFX_ByteStringC& str);
FX_atof(const CFX_WideStringC & wsStr)437 inline FX_FLOAT FX_atof(const CFX_WideStringC& wsStr) {
438   return FX_atof(FX_UTF8Encode(wsStr.c_str(), wsStr.GetLength()).c_str());
439 }
440 bool FX_atonum(const CFX_ByteStringC& str, void* pData);
441 FX_STRSIZE FX_ftoa(FX_FLOAT f, FX_CHAR* buf);
442 
443 #endif  // CORE_FXCRT_INCLUDE_FX_STRING_H_
444