1 // Copyright 2014 PDFium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6
7 #ifndef CORE_FXCRT_INCLUDE_FX_STRING_H_
8 #define CORE_FXCRT_INCLUDE_FX_STRING_H_
9
10 #include <stdint.h> // For intptr_t.
11 #include <algorithm>
12
13 #include "core/fxcrt/cfx_string_c_template.h"
14 #include "core/fxcrt/cfx_string_data_template.h"
15 #include "core/fxcrt/include/cfx_retain_ptr.h"
16 #include "core/fxcrt/include/fx_memory.h"
17 #include "core/fxcrt/include/fx_system.h"
18
19 class CFX_ByteString;
20 class CFX_WideString;
21
22 using CFX_ByteStringC = CFX_StringCTemplate<FX_CHAR>;
23 using CFX_WideStringC = CFX_StringCTemplate<FX_WCHAR>;
24
25 #define FXBSTR_ID(c1, c2, c3, c4) \
26 (((uint32_t)c1 << 24) | ((uint32_t)c2 << 16) | ((uint32_t)c3 << 8) | \
27 ((uint32_t)c4))
28
29 #define FX_WSTRC(wstr) CFX_WideStringC(wstr, FX_ArraySize(wstr) - 1)
30
31 // A mutable string with shared buffers using copy-on-write semantics that
32 // avoids the cost of std::string's iterator stability guarantees.
33 class CFX_ByteString {
34 public:
35 using CharType = FX_CHAR;
36
37 CFX_ByteString();
38 CFX_ByteString(const CFX_ByteString& other);
39 CFX_ByteString(CFX_ByteString&& other);
40
41 // Deliberately implicit to avoid calling on every string literal.
42 CFX_ByteString(char ch);
43 CFX_ByteString(const FX_CHAR* ptr);
44
45 CFX_ByteString(const FX_CHAR* ptr, FX_STRSIZE len);
46 CFX_ByteString(const uint8_t* ptr, FX_STRSIZE len);
47
48 explicit CFX_ByteString(const CFX_ByteStringC& bstrc);
49 CFX_ByteString(const CFX_ByteStringC& bstrc1, const CFX_ByteStringC& bstrc2);
50
51 ~CFX_ByteString();
52
clear()53 void clear() { m_pData.Reset(); }
54
55 static CFX_ByteString FromUnicode(const FX_WCHAR* ptr, FX_STRSIZE len = -1);
56 static CFX_ByteString FromUnicode(const CFX_WideString& str);
57
58 // Explicit conversion to C-style string.
59 // Note: Any subsequent modification of |this| will invalidate the result.
c_str()60 const FX_CHAR* c_str() const { return m_pData ? m_pData->m_String : ""; }
61
62 // Explicit conversion to uint8_t*.
63 // Note: Any subsequent modification of |this| will invalidate the result.
raw_str()64 const uint8_t* raw_str() const {
65 return m_pData ? reinterpret_cast<const uint8_t*>(m_pData->m_String)
66 : nullptr;
67 }
68
69 // Explicit conversion to CFX_ByteStringC.
70 // Note: Any subsequent modification of |this| will invalidate the result.
AsStringC()71 CFX_ByteStringC AsStringC() const {
72 return CFX_ByteStringC(raw_str(), GetLength());
73 }
74
GetLength()75 FX_STRSIZE GetLength() const { return m_pData ? m_pData->m_nDataLength : 0; }
IsEmpty()76 bool IsEmpty() const { return !GetLength(); }
77
78 int Compare(const CFX_ByteStringC& str) const;
79 bool EqualNoCase(const CFX_ByteStringC& str) const;
80
81 bool operator==(const char* ptr) const;
82 bool operator==(const CFX_ByteStringC& str) const;
83 bool operator==(const CFX_ByteString& other) const;
84
85 bool operator!=(const char* ptr) const { return !(*this == ptr); }
86 bool operator!=(const CFX_ByteStringC& str) const { return !(*this == str); }
87 bool operator!=(const CFX_ByteString& other) const {
88 return !(*this == other);
89 }
90
91 bool operator<(const CFX_ByteString& str) const {
92 int result = FXSYS_memcmp(c_str(), str.c_str(),
93 std::min(GetLength(), str.GetLength()));
94 return result < 0 || (result == 0 && GetLength() < str.GetLength());
95 }
96
97 const CFX_ByteString& operator=(const FX_CHAR* str);
98 const CFX_ByteString& operator=(const CFX_ByteStringC& bstrc);
99 const CFX_ByteString& operator=(const CFX_ByteString& stringSrc);
100
101 const CFX_ByteString& operator+=(FX_CHAR ch);
102 const CFX_ByteString& operator+=(const FX_CHAR* str);
103 const CFX_ByteString& operator+=(const CFX_ByteString& str);
104 const CFX_ByteString& operator+=(const CFX_ByteStringC& bstrc);
105
GetAt(FX_STRSIZE nIndex)106 uint8_t GetAt(FX_STRSIZE nIndex) const {
107 return m_pData ? m_pData->m_String[nIndex] : 0;
108 }
109
110 uint8_t operator[](FX_STRSIZE nIndex) const {
111 return m_pData ? m_pData->m_String[nIndex] : 0;
112 }
113
114 void SetAt(FX_STRSIZE nIndex, FX_CHAR ch);
115 FX_STRSIZE Insert(FX_STRSIZE index, FX_CHAR ch);
116 FX_STRSIZE Delete(FX_STRSIZE index, FX_STRSIZE count = 1);
117
118 void Format(const FX_CHAR* lpszFormat, ...);
119 void FormatV(const FX_CHAR* lpszFormat, va_list argList);
120
121 void Reserve(FX_STRSIZE len);
122 FX_CHAR* GetBuffer(FX_STRSIZE len);
123 void ReleaseBuffer(FX_STRSIZE len = -1);
124
125 CFX_ByteString Mid(FX_STRSIZE first) const;
126 CFX_ByteString Mid(FX_STRSIZE first, FX_STRSIZE count) const;
127 CFX_ByteString Left(FX_STRSIZE count) const;
128 CFX_ByteString Right(FX_STRSIZE count) const;
129
130 FX_STRSIZE Find(const CFX_ByteStringC& lpszSub, FX_STRSIZE start = 0) const;
131 FX_STRSIZE Find(FX_CHAR ch, FX_STRSIZE start = 0) const;
132 FX_STRSIZE ReverseFind(FX_CHAR ch) const;
133
134 void MakeLower();
135 void MakeUpper();
136
137 void TrimRight();
138 void TrimRight(FX_CHAR chTarget);
139 void TrimRight(const CFX_ByteStringC& lpszTargets);
140
141 void TrimLeft();
142 void TrimLeft(FX_CHAR chTarget);
143 void TrimLeft(const CFX_ByteStringC& lpszTargets);
144
145 FX_STRSIZE Replace(const CFX_ByteStringC& lpszOld,
146 const CFX_ByteStringC& lpszNew);
147
148 FX_STRSIZE Remove(FX_CHAR ch);
149
150 CFX_WideString UTF8Decode() const;
151
152 uint32_t GetID(FX_STRSIZE start_pos = 0) const;
153
154 #define FXFORMAT_SIGNED 1
155 #define FXFORMAT_HEX 2
156 #define FXFORMAT_CAPITAL 4
157
158 static CFX_ByteString FormatInteger(int i, uint32_t flags = 0);
159 static CFX_ByteString FormatFloat(FX_FLOAT f, int precision = 0);
160
161 protected:
162 using StringData = CFX_StringDataTemplate<FX_CHAR>;
163
164 void ReallocBeforeWrite(FX_STRSIZE nNewLen);
165 void AllocBeforeWrite(FX_STRSIZE nNewLen);
166 void AllocCopy(CFX_ByteString& dest,
167 FX_STRSIZE nCopyLen,
168 FX_STRSIZE nCopyIndex) const;
169 void AssignCopy(const FX_CHAR* pSrcData, FX_STRSIZE nSrcLen);
170 void Concat(const FX_CHAR* lpszSrcData, FX_STRSIZE nSrcLen);
171
172 CFX_RetainPtr<StringData> m_pData;
173 friend class fxcrt_ByteStringConcat_Test;
174 };
175
176 inline bool operator==(const char* lhs, const CFX_ByteString& rhs) {
177 return rhs == lhs;
178 }
179 inline bool operator==(const CFX_ByteStringC& lhs, const CFX_ByteString& rhs) {
180 return rhs == lhs;
181 }
182 inline bool operator!=(const char* lhs, const CFX_ByteString& rhs) {
183 return rhs != lhs;
184 }
185 inline bool operator!=(const CFX_ByteStringC& lhs, const CFX_ByteString& rhs) {
186 return rhs != lhs;
187 }
188
189 inline CFX_ByteString operator+(const CFX_ByteStringC& str1,
190 const CFX_ByteStringC& str2) {
191 return CFX_ByteString(str1, str2);
192 }
193 inline CFX_ByteString operator+(const CFX_ByteStringC& str1,
194 const FX_CHAR* str2) {
195 return CFX_ByteString(str1, str2);
196 }
197 inline CFX_ByteString operator+(const FX_CHAR* str1,
198 const CFX_ByteStringC& str2) {
199 return CFX_ByteString(str1, str2);
200 }
201 inline CFX_ByteString operator+(const CFX_ByteStringC& str1, FX_CHAR ch) {
202 return CFX_ByteString(str1, CFX_ByteStringC(ch));
203 }
204 inline CFX_ByteString operator+(FX_CHAR ch, const CFX_ByteStringC& str2) {
205 return CFX_ByteString(ch, str2);
206 }
207 inline CFX_ByteString operator+(const CFX_ByteString& str1,
208 const CFX_ByteString& str2) {
209 return CFX_ByteString(str1.AsStringC(), str2.AsStringC());
210 }
211 inline CFX_ByteString operator+(const CFX_ByteString& str1, FX_CHAR ch) {
212 return CFX_ByteString(str1.AsStringC(), CFX_ByteStringC(ch));
213 }
214 inline CFX_ByteString operator+(FX_CHAR ch, const CFX_ByteString& str2) {
215 return CFX_ByteString(ch, str2.AsStringC());
216 }
217 inline CFX_ByteString operator+(const CFX_ByteString& str1,
218 const FX_CHAR* str2) {
219 return CFX_ByteString(str1.AsStringC(), str2);
220 }
221 inline CFX_ByteString operator+(const FX_CHAR* str1,
222 const CFX_ByteString& str2) {
223 return CFX_ByteString(str1, str2.AsStringC());
224 }
225 inline CFX_ByteString operator+(const CFX_ByteString& str1,
226 const CFX_ByteStringC& str2) {
227 return CFX_ByteString(str1.AsStringC(), str2);
228 }
229 inline CFX_ByteString operator+(const CFX_ByteStringC& str1,
230 const CFX_ByteString& str2) {
231 return CFX_ByteString(str1, str2.AsStringC());
232 }
233
234 // A mutable string with shared buffers using copy-on-write semantics that
235 // avoids the cost of std::string's iterator stability guarantees.
236 class CFX_WideString {
237 public:
238 using CharType = FX_WCHAR;
239
240 CFX_WideString();
241 CFX_WideString(const CFX_WideString& other);
242 CFX_WideString(CFX_WideString&& other);
243
244 // Deliberately implicit to avoid calling on every string literal.
245 CFX_WideString(FX_WCHAR ch);
246 CFX_WideString(const FX_WCHAR* ptr);
247
248 CFX_WideString(const FX_WCHAR* ptr, FX_STRSIZE len);
249
250 explicit CFX_WideString(const CFX_WideStringC& str);
251 CFX_WideString(const CFX_WideStringC& str1, const CFX_WideStringC& str2);
252
253 ~CFX_WideString();
254
255 static CFX_WideString FromLocal(const CFX_ByteStringC& str);
256 static CFX_WideString FromCodePage(const CFX_ByteStringC& str,
257 uint16_t codepage);
258
259 static CFX_WideString FromUTF8(const CFX_ByteStringC& str);
260 static CFX_WideString FromUTF16LE(const unsigned short* str, FX_STRSIZE len);
261
262 static FX_STRSIZE WStringLength(const unsigned short* str);
263
264 // Explicit conversion to C-style wide string.
265 // Note: Any subsequent modification of |this| will invalidate the result.
c_str()266 const FX_WCHAR* c_str() const { return m_pData ? m_pData->m_String : L""; }
267
268 // Explicit conversion to CFX_WideStringC.
269 // Note: Any subsequent modification of |this| will invalidate the result.
AsStringC()270 CFX_WideStringC AsStringC() const {
271 return CFX_WideStringC(c_str(), GetLength());
272 }
273
clear()274 void clear() { m_pData.Reset(); }
275
GetLength()276 FX_STRSIZE GetLength() const { return m_pData ? m_pData->m_nDataLength : 0; }
IsEmpty()277 bool IsEmpty() const { return !GetLength(); }
278
279 const CFX_WideString& operator=(const FX_WCHAR* str);
280 const CFX_WideString& operator=(const CFX_WideString& stringSrc);
281 const CFX_WideString& operator=(const CFX_WideStringC& stringSrc);
282
283 const CFX_WideString& operator+=(const FX_WCHAR* str);
284 const CFX_WideString& operator+=(FX_WCHAR ch);
285 const CFX_WideString& operator+=(const CFX_WideString& str);
286 const CFX_WideString& operator+=(const CFX_WideStringC& str);
287
288 bool operator==(const wchar_t* ptr) const;
289 bool operator==(const CFX_WideStringC& str) const;
290 bool operator==(const CFX_WideString& other) const;
291
292 bool operator!=(const wchar_t* ptr) const { return !(*this == ptr); }
293 bool operator!=(const CFX_WideStringC& str) const { return !(*this == str); }
294 bool operator!=(const CFX_WideString& other) const {
295 return !(*this == other);
296 }
297
298 bool operator<(const CFX_WideString& str) const {
299 int result =
300 wmemcmp(c_str(), str.c_str(), std::min(GetLength(), str.GetLength()));
301 return result < 0 || (result == 0 && GetLength() < str.GetLength());
302 }
303
GetAt(FX_STRSIZE nIndex)304 FX_WCHAR GetAt(FX_STRSIZE nIndex) const {
305 return m_pData ? m_pData->m_String[nIndex] : 0;
306 }
307
308 FX_WCHAR operator[](FX_STRSIZE nIndex) const {
309 return m_pData ? m_pData->m_String[nIndex] : 0;
310 }
311
312 void SetAt(FX_STRSIZE nIndex, FX_WCHAR ch);
313
314 int Compare(const FX_WCHAR* str) const;
315 int Compare(const CFX_WideString& str) const;
316 int CompareNoCase(const FX_WCHAR* str) const;
317
318
319 CFX_WideString Mid(FX_STRSIZE first) const;
320 CFX_WideString Mid(FX_STRSIZE first, FX_STRSIZE count) const;
321 CFX_WideString Left(FX_STRSIZE count) const;
322 CFX_WideString Right(FX_STRSIZE count) const;
323
324 FX_STRSIZE Insert(FX_STRSIZE index, FX_WCHAR ch);
325 FX_STRSIZE Delete(FX_STRSIZE index, FX_STRSIZE count = 1);
326
327 void Format(const FX_WCHAR* lpszFormat, ...);
328 void FormatV(const FX_WCHAR* lpszFormat, va_list argList);
329
330 void MakeLower();
331 void MakeUpper();
332
333 void TrimRight();
334 void TrimRight(FX_WCHAR chTarget);
335 void TrimRight(const CFX_WideStringC& pTargets);
336
337 void TrimLeft();
338 void TrimLeft(FX_WCHAR chTarget);
339 void TrimLeft(const CFX_WideStringC& pTargets);
340
341 void Reserve(FX_STRSIZE len);
342 FX_WCHAR* GetBuffer(FX_STRSIZE len);
343 void ReleaseBuffer(FX_STRSIZE len = -1);
344
345 int GetInteger() const;
346 FX_FLOAT GetFloat() const;
347
348 FX_STRSIZE Find(const CFX_WideStringC& pSub, FX_STRSIZE start = 0) const;
349 FX_STRSIZE Find(FX_WCHAR ch, FX_STRSIZE start = 0) const;
350 FX_STRSIZE Replace(const CFX_WideStringC& pOld, const CFX_WideStringC& pNew);
351 FX_STRSIZE Remove(FX_WCHAR ch);
352
353 CFX_ByteString UTF8Encode() const;
354 CFX_ByteString UTF16LE_Encode() const;
355
356 protected:
357 using StringData = CFX_StringDataTemplate<FX_WCHAR>;
358
359 void ReallocBeforeWrite(FX_STRSIZE nLen);
360 void AllocBeforeWrite(FX_STRSIZE nLen);
361 void AllocCopy(CFX_WideString& dest,
362 FX_STRSIZE nCopyLen,
363 FX_STRSIZE nCopyIndex) const;
364 void AssignCopy(const FX_WCHAR* pSrcData, FX_STRSIZE nSrcLen);
365 void Concat(const FX_WCHAR* lpszSrcData, FX_STRSIZE nSrcLen);
366
367 CFX_RetainPtr<StringData> m_pData;
368 friend class fxcrt_WideStringConcatInPlace_Test;
369 };
370
371 inline CFX_WideString operator+(const CFX_WideStringC& str1,
372 const CFX_WideStringC& str2) {
373 return CFX_WideString(str1, str2);
374 }
375 inline CFX_WideString operator+(const CFX_WideStringC& str1,
376 const FX_WCHAR* str2) {
377 return CFX_WideString(str1, str2);
378 }
379 inline CFX_WideString operator+(const FX_WCHAR* str1,
380 const CFX_WideStringC& str2) {
381 return CFX_WideString(str1, str2);
382 }
383 inline CFX_WideString operator+(const CFX_WideStringC& str1, FX_WCHAR ch) {
384 return CFX_WideString(str1, CFX_WideStringC(ch));
385 }
386 inline CFX_WideString operator+(FX_WCHAR ch, const CFX_WideStringC& str2) {
387 return CFX_WideString(ch, str2);
388 }
389 inline CFX_WideString operator+(const CFX_WideString& str1,
390 const CFX_WideString& str2) {
391 return CFX_WideString(str1.AsStringC(), str2.AsStringC());
392 }
393 inline CFX_WideString operator+(const CFX_WideString& str1, FX_WCHAR ch) {
394 return CFX_WideString(str1.AsStringC(), CFX_WideStringC(ch));
395 }
396 inline CFX_WideString operator+(FX_WCHAR ch, const CFX_WideString& str2) {
397 return CFX_WideString(ch, str2.AsStringC());
398 }
399 inline CFX_WideString operator+(const CFX_WideString& str1,
400 const FX_WCHAR* str2) {
401 return CFX_WideString(str1.AsStringC(), str2);
402 }
403 inline CFX_WideString operator+(const FX_WCHAR* str1,
404 const CFX_WideString& str2) {
405 return CFX_WideString(str1, str2.AsStringC());
406 }
407 inline CFX_WideString operator+(const CFX_WideString& str1,
408 const CFX_WideStringC& str2) {
409 return CFX_WideString(str1.AsStringC(), str2);
410 }
411 inline CFX_WideString operator+(const CFX_WideStringC& str1,
412 const CFX_WideString& str2) {
413 return CFX_WideString(str1, str2.AsStringC());
414 }
415 inline bool operator==(const wchar_t* lhs, const CFX_WideString& rhs) {
416 return rhs == lhs;
417 }
418 inline bool operator==(const CFX_WideStringC& lhs, const CFX_WideString& rhs) {
419 return rhs == lhs;
420 }
421 inline bool operator!=(const wchar_t* lhs, const CFX_WideString& rhs) {
422 return rhs != lhs;
423 }
424 inline bool operator!=(const CFX_WideStringC& lhs, const CFX_WideString& rhs) {
425 return rhs != lhs;
426 }
427
428 CFX_ByteString FX_UTF8Encode(const FX_WCHAR* pwsStr, FX_STRSIZE len);
FX_UTF8Encode(const CFX_WideStringC & wsStr)429 inline CFX_ByteString FX_UTF8Encode(const CFX_WideStringC& wsStr) {
430 return FX_UTF8Encode(wsStr.c_str(), wsStr.GetLength());
431 }
FX_UTF8Encode(const CFX_WideString & wsStr)432 inline CFX_ByteString FX_UTF8Encode(const CFX_WideString& wsStr) {
433 return FX_UTF8Encode(wsStr.c_str(), wsStr.GetLength());
434 }
435
436 FX_FLOAT FX_atof(const CFX_ByteStringC& str);
FX_atof(const CFX_WideStringC & wsStr)437 inline FX_FLOAT FX_atof(const CFX_WideStringC& wsStr) {
438 return FX_atof(FX_UTF8Encode(wsStr.c_str(), wsStr.GetLength()).c_str());
439 }
440 bool FX_atonum(const CFX_ByteStringC& str, void* pData);
441 FX_STRSIZE FX_ftoa(FX_FLOAT f, FX_CHAR* buf);
442
443 #endif // CORE_FXCRT_INCLUDE_FX_STRING_H_
444