1 #include <cstring>
2 #include "Common/Render/DrawBuffer.h"
3 #include "Common/Data/Encoding/Utf8.h"
4 #include "Common/Data/Text/WrapText.h"
5
IsCJK(uint32_t c)6 bool WordWrapper::IsCJK(uint32_t c) {
7 if (c < 0x1000) {
8 return false;
9 }
10
11 // CJK characters can be wrapped more freely.
12 bool result = (c >= 0x1100 && c <= 0x11FF); // Hangul Jamo.
13 result = result || (c >= 0x2E80 && c <= 0x2FFF); // Kangxi Radicals etc.
14 #if 0
15 result = result || (c >= 0x3040 && c <= 0x31FF); // Hiragana, Katakana, Hangul Compatibility Jamo etc.
16 result = result || (c >= 0x3200 && c <= 0x32FF); // CJK Enclosed
17 result = result || (c >= 0x3300 && c <= 0x33FF); // CJK Compatibility
18 result = result || (c >= 0x3400 && c <= 0x4DB5); // CJK Unified Ideographs Extension A
19 #else
20 result = result || (c >= 0x3040 && c <= 0x4DB5); // Above collapsed
21 #endif
22 result = result || (c >= 0x4E00 && c <= 0x9FBB); // CJK Unified Ideographs
23 result = result || (c >= 0xAC00 && c <= 0xD7AF); // Hangul Syllables
24 result = result || (c >= 0xF900 && c <= 0xFAD9); // CJK Compatibility Ideographs
25 result = result || (c >= 0x20000 && c <= 0x2A6D6); // CJK Unified Ideographs Extension B
26 result = result || (c >= 0x2F800 && c <= 0x2FA1D); // CJK Compatibility Supplement
27 return result;
28 }
29
IsPunctuation(uint32_t c)30 bool WordWrapper::IsPunctuation(uint32_t c) {
31 switch (c) {
32 // TODO: This list of punctuation is very incomplete.
33 case ',':
34 case '.':
35 case ':':
36 case '!':
37 case ')':
38 case '?':
39 case 0x00AD: // SOFT HYPHEN
40 case 0x3001: // IDEOGRAPHIC COMMA
41 case 0x3002: // IDEOGRAPHIC FULL STOP
42 case 0x06D4: // ARABIC FULL STOP
43 case 0xFF01: // FULLWIDTH EXCLAMATION MARK
44 case 0xFF09: // FULLWIDTH RIGHT PARENTHESIS
45 case 0xFF1F: // FULLWIDTH QUESTION MARK
46 return true;
47
48 default:
49 return false;
50 }
51 }
52
IsSpace(uint32_t c)53 bool WordWrapper::IsSpace(uint32_t c) {
54 switch (c) {
55 case '\t':
56 case ' ':
57 case 0x2002: // EN SPACE
58 case 0x2003: // EM SPACE
59 case 0x3000: // IDEOGRAPHIC SPACE
60 return true;
61
62 default:
63 return false;
64 }
65 }
66
IsShy(uint32_t c)67 bool WordWrapper::IsShy(uint32_t c) {
68 return c == 0x00AD; // SOFT HYPHEN
69 }
70
Wrapped()71 std::string WordWrapper::Wrapped() {
72 if (out_.empty()) {
73 Wrap();
74 }
75 return out_;
76 }
77
WrapBeforeWord()78 bool WordWrapper::WrapBeforeWord() {
79 if (flags_ & FLAG_WRAP_TEXT) {
80 if (x_ + wordWidth_ > maxW_ && !out_.empty()) {
81 if (IsShy(lastChar_)) {
82 // Soft hyphen, replace it with a real hyphen since we wrapped at it.
83 // TODO: There's an edge case here where the hyphen might not fit.
84 out_[out_.size() - 2] = '-';
85 out_[out_.size() - 1] = '\n';
86 } else {
87 out_ += "\n";
88 }
89 lastChar_ = '\n';
90 lastLineStart_ = out_.size();
91 x_ = 0.0f;
92 forceEarlyWrap_ = false;
93 return true;
94 }
95 }
96 if (flags_ & FLAG_ELLIPSIZE_TEXT) {
97 const bool hasEllipsis = out_.size() > 3 && out_.substr(out_.size() - 3) == "...";
98 if (x_ + wordWidth_ > maxW_ && !hasEllipsis) {
99 AddEllipsis();
100 skipNextWord_ = true;
101 if ((flags_ & FLAG_WRAP_TEXT) == 0) {
102 scanForNewline_ = true;
103 }
104 }
105 }
106 return false;
107 }
108
AddEllipsis()109 void WordWrapper::AddEllipsis() {
110 if (!out_.empty() && IsSpaceOrShy(lastChar_)) {
111 UTF8 utf(out_.c_str(), (int)out_.size());
112 utf.bwd();
113 out_.resize(utf.byteIndex());
114 out_ += "...";
115 } else {
116 out_ += "...";
117 }
118 lastChar_ = '.';
119 x_ += ellipsisWidth_;
120 }
121
AppendWord(int endIndex,int lastChar,bool addNewline)122 void WordWrapper::AppendWord(int endIndex, int lastChar, bool addNewline) {
123 int lastWordStartIndex = lastIndex_;
124 if (WrapBeforeWord()) {
125 // Advance to the first non-whitespace UTF-8 character in the following word (if any) to prevent starting the new line with a whitespace
126 UTF8 utf8Word(str_, lastWordStartIndex);
127 while (lastWordStartIndex < endIndex) {
128 const uint32_t c = utf8Word.next();
129 if (!IsSpace(c)) {
130 break;
131 }
132 lastWordStartIndex = utf8Word.byteIndex();
133 }
134 }
135
136 lastEllipsisIndex_ = -1;
137 if (skipNextWord_) {
138 lastIndex_ = endIndex;
139 return;
140 }
141
142 // This will include the newline.
143 if (x_ <= maxW_) {
144 out_.append(str_ + lastWordStartIndex, str_ + endIndex);
145 } else {
146 scanForNewline_ = true;
147 }
148 if (addNewline && (flags_ & FLAG_WRAP_TEXT)) {
149 out_ += "\n";
150 lastChar_ = '\n';
151 lastLineStart_ = out_.size();
152 scanForNewline_ = false;
153 x_ = 0.0f;
154 } else {
155 // We may have appended a newline - check.
156 size_t pos = out_.find_last_of("\n");
157 if (pos != out_.npos) {
158 lastLineStart_ = pos + 1;
159 }
160
161 if (lastChar == -1 && !out_.empty()) {
162 UTF8 utf(out_.c_str(), (int)out_.size());
163 utf.bwd();
164 lastChar = utf.next();
165 }
166 lastChar_ = lastChar;
167
168 if (lastLineStart_ != out_.size()) {
169 // To account for kerning around spaces, we recalculate the entire line width.
170 x_ = MeasureWidth(out_.c_str() + lastLineStart_, out_.size() - lastLineStart_);
171 } else {
172 x_ = 0.0f;
173 }
174 }
175 lastIndex_ = endIndex;
176 wordWidth_ = 0.0f;
177 }
178
Wrap()179 void WordWrapper::Wrap() {
180 out_.clear();
181
182 // First, let's check if it fits as-is.
183 size_t len = strlen(str_);
184
185 // We know it'll be approximately this size. It's fine if the guess is a little off.
186 out_.reserve(len + len / 16);
187
188 if (MeasureWidth(str_, len) <= maxW_) {
189 // If it fits, we don't need to go through each character.
190 out_ = str_;
191 return;
192 }
193
194 if (flags_ & FLAG_ELLIPSIZE_TEXT) {
195 ellipsisWidth_ = MeasureWidth("...", 3);
196 }
197
198 for (UTF8 utf(str_); !utf.end(); ) {
199 int beforeIndex = utf.byteIndex();
200 uint32_t c = utf.next();
201 int afterIndex = utf.byteIndex();
202
203 // Is this a newline character, hard wrapping?
204 if (c == '\n') {
205 if (skipNextWord_) {
206 lastIndex_ = beforeIndex;
207 skipNextWord_ = false;
208 }
209 // This will include the newline character.
210 AppendWord(afterIndex, c, false);
211 // We wrapped once, so stop forcing.
212 forceEarlyWrap_ = false;
213 scanForNewline_ = false;
214 continue;
215 }
216
217 if (scanForNewline_) {
218 // We're discarding the rest of the characters until a newline (no wrapping.)
219 lastIndex_ = afterIndex;
220 continue;
221 }
222
223 // Measure the entire word for kerning purposes. May not be 100% perfect.
224 float newWordWidth = MeasureWidth(str_ + lastIndex_, afterIndex - lastIndex_);
225
226 // Is this the end of a word (space)? We'll also output up to a soft hyphen.
227 if (wordWidth_ > 0.0f && IsSpaceOrShy(c)) {
228 AppendWord(afterIndex, c, false);
229 skipNextWord_ = false;
230 continue;
231 }
232
233 // We're scanning for the next word.
234 if (skipNextWord_)
235 continue;
236
237 if ((flags_ & FLAG_ELLIPSIZE_TEXT) != 0 && wordWidth_ > 0.0f && lastEllipsisIndex_ == -1) {
238 float checkX = x_;
239 // If we allow wrapping, assume we'll wrap as needed.
240 if ((flags_ & FLAG_WRAP_TEXT) != 0 && x_ >= maxW_) {
241 checkX = 0;
242 }
243
244 // If we can only fit an ellipsis, time to output and skip ahead.
245 // Ignore x for newWordWidth, because we might wrap.
246 if (checkX + wordWidth_ + ellipsisWidth_ <= maxW_ && newWordWidth + ellipsisWidth_ > maxW_) {
247 lastEllipsisIndex_ = beforeIndex;
248 continue;
249 }
250 }
251
252 // Can the word fit on a line even all by itself so far?
253 if (wordWidth_ > 0.0f && newWordWidth > maxW_) {
254 // If we had a good place for an ellipsis, let's do that.
255 if (lastEllipsisIndex_ != -1) {
256 AppendWord(lastEllipsisIndex_, -1, false);
257 AddEllipsis();
258 skipNextWord_ = true;
259 if ((flags_ & FLAG_WRAP_TEXT) == 0) {
260 scanForNewline_ = true;
261 }
262 continue;
263 }
264
265 // Doesn't fit. Let's drop what's there so far onto its own line.
266 if (x_ > 0.0f && x_ + wordWidth_ > maxW_ && beforeIndex > lastIndex_ && (flags_ & FLAG_WRAP_TEXT) != 0) {
267 // Let's put as many characters as will fit on the previous line.
268 // This word can't fit on one line even, so it's going to be cut into pieces anyway.
269 // Better to avoid huge gaps, in that case.
270 forceEarlyWrap_ = true;
271
272 // Now rewind back to where the word started so we can wrap at the opportune moment.
273 wordWidth_ = 0.0f;
274 while (utf.byteIndex() > lastIndex_) {
275 utf.bwd();
276 }
277 continue;
278 }
279 // Now, add the word so far (without this latest character) and break.
280 AppendWord(beforeIndex, -1, true);
281 forceEarlyWrap_ = false;
282 // The current character will be handled as part of the next word.
283 continue;
284 }
285
286 if ((flags_ & FLAG_ELLIPSIZE_TEXT) && wordWidth_ > 0.0f && x_ + newWordWidth + ellipsisWidth_ > maxW_) {
287 if ((flags_ & FLAG_WRAP_TEXT) == 0 && x_ + wordWidth_ + ellipsisWidth_ <= maxW_) {
288 // Now, add the word so far (without this latest character) and show the ellipsis.
289 AppendWord(lastEllipsisIndex_ != -1 ? lastEllipsisIndex_ : beforeIndex, -1, false);
290 AddEllipsis();
291 forceEarlyWrap_ = false;
292 skipNextWord_ = true;
293 if ((flags_ & FLAG_WRAP_TEXT) == 0) {
294 scanForNewline_ = true;
295 }
296 continue;
297 }
298 }
299
300 wordWidth_ = newWordWidth;
301
302 // Is this the end of a word via punctuation / CJK?
303 if (wordWidth_ > 0.0f && (IsCJK(c) || IsPunctuation(c) || forceEarlyWrap_)) {
304 // CJK doesn't require spaces, so we treat each letter as its own word.
305 AppendWord(afterIndex, c, false);
306 }
307 }
308
309 // Now insert the rest of the string - the last word.
310 AppendWord((int)len, 0, false);
311 }
312