1 // madronalib: a C++ framework for DSP applications.
2 // Copyright (c) 2020 Madrona Labs LLC. http://www.madronalabs.com
3 // Distributed under the MIT license: http://madrona-labs.mit-license.org/
4
5 // assignable but otherwise immutable UTF-8 text object class
6
7 #pragma once
8
9 #include <memory>
10 #include <string> // to remove
11 #include <vector>
12
13 namespace ml
14 {
15 // ----------------------------------------------------------------
16 // TextFragment - a minimal, immutable string class. Guaranteed not to allocate
17 // heap if the length in bytes is below kShortFragmentSize.
18
19 static constexpr int kShortFragmentSizeInCodePoints = 16;
20 static constexpr int kShortFragmentSizeInChars = kShortFragmentSizeInCodePoints * 4;
21
22 using CodePoint = char32_t;
23
24 class TextFragment
25 {
26 public:
27 class Iterator
28 {
29 class Impl;
30 std::unique_ptr<Impl> pImpl;
31
32 public:
33 Iterator(const char* pos);
34
35 ~Iterator(); // defined in the implementation file, where impl is a
36 // complete type
37 // Iterator(Iterator&&) = default;
38 Iterator(const Iterator&);
39 // Iterator& operator=(Iterator&&); // defined in the implementation file
40 Iterator& operator=(const Iterator&) = delete;
41
42 CodePoint operator*();
43 // CodePoint operator->() { return _utf8Iter.operator->(); }
44 Iterator& operator++();
45 CodePoint operator++(int i);
46
47 friend bool operator!=(Iterator lhs, Iterator rhs);
48 friend bool operator==(Iterator lhs, Iterator rhs);
49 };
50
51 TextFragment() noexcept;
52
53 /*
54 this could be a good idea but the (const char*) ctor is taking precedence,
55 revisit template<size_t N> TextFragment(const char(&p)[N]) noexcept : mSize(N)
56 {
57 std::cout << "?";
58 create();
59 if(mpText)
60 {
61 std::copy(p, p + mSize, mpText);
62 nullTerminate();
63 }
64 }
65 */
66
67 //
68 TextFragment(const char* pChars) noexcept;
69
70 // this ctor can be used to save the work of counting the length of the input
71 // if we know it already, as with static HashedCharArrays.
72 TextFragment(const char* pChars, size_t len) noexcept;
73
74 // single code point ctor
75 TextFragment(CodePoint c) noexcept;
76
77 // copy ctor
78 TextFragment(const TextFragment& a) noexcept;
79
80 // copy assignment operator: TextFragment is assignable but otherwise
81 // immutable.
82 TextFragment& operator=(const TextFragment& b) noexcept;
83
84 // move ctor
85 TextFragment(TextFragment&& b) noexcept;
86
87 // move assignment operator
88 TextFragment& operator=(TextFragment&& b) noexcept;
89
90 // use these ctors instead of operator+.
91 TextFragment(const TextFragment& a, const TextFragment& b) noexcept;
92 TextFragment(const TextFragment& a, const TextFragment& b, const TextFragment& c) noexcept;
93 TextFragment(const TextFragment& a, const TextFragment& b, const TextFragment& c,
94 const TextFragment& d) noexcept;
95
96 ~TextFragment() noexcept;
97
98 explicit operator bool() const { return mSize > 0; }
99
100 size_t lengthInBytes() const;
101
102 size_t lengthInCodePoints() const;
103
104 Iterator begin() const;
105 Iterator end() const;
106
getText()107 inline const char* getText() const { return mpText; }
108
beginsWith(const TextFragment & fb)109 inline bool beginsWith(const TextFragment& fb) const
110 {
111 size_t lenA = lengthInBytes();
112 size_t lenB = fb.lengthInBytes();
113 if (lenB > lenA) return false;
114 for (int i = 0; i < lenB; ++i)
115 {
116 if (mpText[i] != fb.mpText[i])
117 {
118 return false;
119 }
120 }
121 return true;
122 }
123
endsWith(const TextFragment & fb)124 inline bool endsWith(const TextFragment& fb) const
125 {
126 size_t lenA = lengthInBytes();
127 size_t lenB = fb.lengthInBytes();
128 if (lenB > lenA) return false;
129 for (int i = 0; i < lenB; ++i)
130 {
131 if (mpText[lenA - lenB + i] != fb.mpText[i])
132 {
133 return false;
134 }
135 }
136 return true;
137 }
138
139 // deprecated! MLTEST
toString()140 inline std::string toString() const { return std::string(mpText); }
141
142 private:
143 void construct(const char* s1, size_t len1, const char* s2 = nullptr, size_t len2 = 0,
144 const char* s3 = nullptr, size_t len3 = 0, const char* s4 = nullptr,
145 size_t len4 = 0) noexcept;
146
147 void create(size_t size) noexcept;
148 void nullTerminate() noexcept;
149 void dispose() noexcept;
150 void moveDataFromOther(TextFragment& b);
151
152 // TODO these things could share space, as in SmallStackBuffer
153 char* mpText;
154 char mLocalText[kShortFragmentSizeInChars];
155
156 // size of data in bytes, without null terminator
157 size_t mSize;
158 };
159
compareSizedCharArrays(const char * pA,size_t lenA,const char * pB,size_t lenB)160 inline bool compareSizedCharArrays(const char* pA, size_t lenA, const char* pB, size_t lenB)
161 {
162 if (lenA != lenB) return false;
163 if ((lenA == 0) && (lenB == 0)) return true;
164
165 for (size_t n = 0; n < lenA; ++n)
166 {
167 if (pA[n] != pB[n])
168 {
169 return false;
170 }
171 }
172
173 return true;
174 }
175
176 // TODO made operator== a free function- do likewise for other classes
177
178 inline bool operator==(const TextFragment a, const TextFragment b)
179 {
180 return compareSizedCharArrays(a.getText(), a.lengthInBytes(), b.getText(), b.lengthInBytes());
181 }
182
183 inline bool operator!=(TextFragment a, TextFragment b) { return !(a == b); }
184
185 inline std::ostream& operator<<(std::ostream& out, const TextFragment& r)
186 {
187 const char* c = r.getText();
188 out << c;
189 return out;
190 }
191
192 // ----------------------------------------------------------------
193 // Text - a placeholder for more features later like localization
194
195 typedef TextFragment Text;
196
197 // ----------------------------------------------------------------
198 // functions
199
200 bool validateCodePoint(CodePoint c);
201
202 std::vector<uint8_t> textToByteVector(TextFragment frag);
203 TextFragment byteVectorToText(const std::vector<uint8_t>& v);
204
205 std::vector<CodePoint> textToCodePoints(TextFragment frag);
206 TextFragment codePointsToText(std::vector<CodePoint> cv);
207
208 } // namespace ml
209