1 #include <cstring>
2 #include "Common/Render/DrawBuffer.h"
3 #include "Common/Data/Encoding/Utf8.h"
4 #include "Common/Data/Text/WrapText.h"
5 
IsCJK(uint32_t c)6 bool WordWrapper::IsCJK(uint32_t c) {
7 	if (c < 0x1000) {
8 		return false;
9 	}
10 
11 	// CJK characters can be wrapped more freely.
12 	bool result = (c >= 0x1100 && c <= 0x11FF); // Hangul Jamo.
13 	result = result || (c >= 0x2E80 && c <= 0x2FFF); // Kangxi Radicals etc.
14 #if 0
15 	result = result || (c >= 0x3040 && c <= 0x31FF); // Hiragana, Katakana, Hangul Compatibility Jamo etc.
16 	result = result || (c >= 0x3200 && c <= 0x32FF); // CJK Enclosed
17 	result = result || (c >= 0x3300 && c <= 0x33FF); // CJK Compatibility
18 	result = result || (c >= 0x3400 && c <= 0x4DB5); // CJK Unified Ideographs Extension A
19 #else
20 	result = result || (c >= 0x3040 && c <= 0x4DB5); // Above collapsed
21 #endif
22 	result = result || (c >= 0x4E00 && c <= 0x9FBB); // CJK Unified Ideographs
23 	result = result || (c >= 0xAC00 && c <= 0xD7AF); // Hangul Syllables
24 	result = result || (c >= 0xF900 && c <= 0xFAD9); // CJK Compatibility Ideographs
25 	result = result || (c >= 0x20000 && c <= 0x2A6D6); // CJK Unified Ideographs Extension B
26 	result = result || (c >= 0x2F800 && c <= 0x2FA1D); // CJK Compatibility Supplement
27 	return result;
28 }
29 
IsPunctuation(uint32_t c)30 bool WordWrapper::IsPunctuation(uint32_t c) {
31 	switch (c) {
32 	// TODO: This list of punctuation is very incomplete.
33 	case ',':
34 	case '.':
35 	case ':':
36 	case '!':
37 	case ')':
38 	case '?':
39 	case 0x00AD: // SOFT HYPHEN
40 	case 0x3001: // IDEOGRAPHIC COMMA
41 	case 0x3002: // IDEOGRAPHIC FULL STOP
42 	case 0x06D4: // ARABIC FULL STOP
43 	case 0xFF01: // FULLWIDTH EXCLAMATION MARK
44 	case 0xFF09: // FULLWIDTH RIGHT PARENTHESIS
45 	case 0xFF1F: // FULLWIDTH QUESTION MARK
46 		return true;
47 
48 	default:
49 		return false;
50 	}
51 }
52 
IsSpace(uint32_t c)53 bool WordWrapper::IsSpace(uint32_t c) {
54 	switch (c) {
55 	case '\t':
56 	case ' ':
57 	case 0x2002: // EN SPACE
58 	case 0x2003: // EM SPACE
59 	case 0x3000: // IDEOGRAPHIC SPACE
60 		return true;
61 
62 	default:
63 		return false;
64 	}
65 }
66 
IsShy(uint32_t c)67 bool WordWrapper::IsShy(uint32_t c) {
68 	return c == 0x00AD; // SOFT HYPHEN
69 }
70 
Wrapped()71 std::string WordWrapper::Wrapped() {
72 	if (out_.empty()) {
73 		Wrap();
74 	}
75 	return out_;
76 }
77 
WrapBeforeWord()78 bool WordWrapper::WrapBeforeWord() {
79 	if (flags_ & FLAG_WRAP_TEXT) {
80 		if (x_ + wordWidth_ > maxW_ && !out_.empty()) {
81 			if (IsShy(lastChar_)) {
82 				// Soft hyphen, replace it with a real hyphen since we wrapped at it.
83 				// TODO: There's an edge case here where the hyphen might not fit.
84 				out_[out_.size() - 2] = '-';
85 				out_[out_.size() - 1] = '\n';
86 			} else {
87 				out_ += "\n";
88 			}
89 			lastChar_ = '\n';
90 			lastLineStart_ = out_.size();
91 			x_ = 0.0f;
92 			forceEarlyWrap_ = false;
93 			return true;
94 		}
95 	}
96 	if (flags_ & FLAG_ELLIPSIZE_TEXT) {
97 		const bool hasEllipsis = out_.size() > 3 && out_.substr(out_.size() - 3) == "...";
98 		if (x_ + wordWidth_ > maxW_ && !hasEllipsis) {
99 			AddEllipsis();
100 			skipNextWord_ = true;
101 			if ((flags_ & FLAG_WRAP_TEXT) == 0) {
102 				scanForNewline_ = true;
103 			}
104 		}
105 	}
106 	return false;
107 }
108 
AddEllipsis()109 void WordWrapper::AddEllipsis() {
110 	if (!out_.empty() && IsSpaceOrShy(lastChar_)) {
111 		UTF8 utf(out_.c_str(), (int)out_.size());
112 		utf.bwd();
113 		out_.resize(utf.byteIndex());
114 		out_ += "...";
115 	} else {
116 		out_ += "...";
117 	}
118 	lastChar_ = '.';
119 	x_ += ellipsisWidth_;
120 }
121 
AppendWord(int endIndex,int lastChar,bool addNewline)122 void WordWrapper::AppendWord(int endIndex, int lastChar, bool addNewline) {
123 	int lastWordStartIndex = lastIndex_;
124 	if (WrapBeforeWord()) {
125 		// Advance to the first non-whitespace UTF-8 character in the following word (if any) to prevent starting the new line with a whitespace
126 		UTF8 utf8Word(str_, lastWordStartIndex);
127 		while (lastWordStartIndex < endIndex) {
128 			const uint32_t c = utf8Word.next();
129 			if (!IsSpace(c)) {
130 				break;
131 			}
132 			lastWordStartIndex = utf8Word.byteIndex();
133 		}
134 	}
135 
136 	lastEllipsisIndex_ = -1;
137 	if (skipNextWord_) {
138 		lastIndex_ = endIndex;
139 		return;
140 	}
141 
142 	// This will include the newline.
143 	if (x_ <= maxW_) {
144 		out_.append(str_ + lastWordStartIndex, str_ + endIndex);
145 	} else {
146 		scanForNewline_ = true;
147 	}
148 	if (addNewline && (flags_ & FLAG_WRAP_TEXT)) {
149 		out_ += "\n";
150 		lastChar_ = '\n';
151 		lastLineStart_ = out_.size();
152 		scanForNewline_ = false;
153 		x_ = 0.0f;
154 	} else {
155 		// We may have appended a newline - check.
156 		size_t pos = out_.find_last_of("\n");
157 		if (pos != out_.npos) {
158 			lastLineStart_ = pos + 1;
159 		}
160 
161 		if (lastChar == -1 && !out_.empty()) {
162 			UTF8 utf(out_.c_str(), (int)out_.size());
163 			utf.bwd();
164 			lastChar = utf.next();
165 		}
166 		lastChar_ = lastChar;
167 
168 		if (lastLineStart_ != out_.size()) {
169 			// To account for kerning around spaces, we recalculate the entire line width.
170 			x_ = MeasureWidth(out_.c_str() + lastLineStart_, out_.size() - lastLineStart_);
171 		} else {
172 			x_ = 0.0f;
173 		}
174 	}
175 	lastIndex_ = endIndex;
176 	wordWidth_ = 0.0f;
177 }
178 
Wrap()179 void WordWrapper::Wrap() {
180 	out_.clear();
181 
182 	// First, let's check if it fits as-is.
183 	size_t len = strlen(str_);
184 
185 	// We know it'll be approximately this size. It's fine if the guess is a little off.
186 	out_.reserve(len + len / 16);
187 
188 	if (MeasureWidth(str_, len) <= maxW_) {
189 		// If it fits, we don't need to go through each character.
190 		out_ = str_;
191 		return;
192 	}
193 
194 	if (flags_ & FLAG_ELLIPSIZE_TEXT) {
195 		ellipsisWidth_ = MeasureWidth("...", 3);
196 	}
197 
198 	for (UTF8 utf(str_); !utf.end(); ) {
199 		int beforeIndex = utf.byteIndex();
200 		uint32_t c = utf.next();
201 		int afterIndex = utf.byteIndex();
202 
203 		// Is this a newline character, hard wrapping?
204 		if (c == '\n') {
205 			if (skipNextWord_) {
206 				lastIndex_ = beforeIndex;
207 				skipNextWord_ = false;
208 			}
209 			// This will include the newline character.
210 			AppendWord(afterIndex, c, false);
211 			// We wrapped once, so stop forcing.
212 			forceEarlyWrap_ = false;
213 			scanForNewline_ = false;
214 			continue;
215 		}
216 
217 		if (scanForNewline_) {
218 			// We're discarding the rest of the characters until a newline (no wrapping.)
219 			lastIndex_ = afterIndex;
220 			continue;
221 		}
222 
223 		// Measure the entire word for kerning purposes.  May not be 100% perfect.
224 		float newWordWidth = MeasureWidth(str_ + lastIndex_, afterIndex - lastIndex_);
225 
226 		// Is this the end of a word (space)?  We'll also output up to a soft hyphen.
227 		if (wordWidth_ > 0.0f && IsSpaceOrShy(c)) {
228 			AppendWord(afterIndex, c, false);
229 			skipNextWord_ = false;
230 			continue;
231 		}
232 
233 		// We're scanning for the next word.
234 		if (skipNextWord_)
235 			continue;
236 
237 		if ((flags_ & FLAG_ELLIPSIZE_TEXT) != 0 && wordWidth_ > 0.0f && lastEllipsisIndex_ == -1) {
238 			float checkX = x_;
239 			// If we allow wrapping, assume we'll wrap as needed.
240 			if ((flags_ & FLAG_WRAP_TEXT) != 0 && x_ >= maxW_) {
241 				checkX = 0;
242 			}
243 
244 			// If we can only fit an ellipsis, time to output and skip ahead.
245 			// Ignore x for newWordWidth, because we might wrap.
246 			if (checkX + wordWidth_ + ellipsisWidth_ <= maxW_ && newWordWidth + ellipsisWidth_ > maxW_) {
247 				lastEllipsisIndex_ = beforeIndex;
248 				continue;
249 			}
250 		}
251 
252 		// Can the word fit on a line even all by itself so far?
253 		if (wordWidth_ > 0.0f && newWordWidth > maxW_) {
254 			// If we had a good place for an ellipsis, let's do that.
255 			if (lastEllipsisIndex_ != -1) {
256 				AppendWord(lastEllipsisIndex_, -1, false);
257 				AddEllipsis();
258 				skipNextWord_ = true;
259 				if ((flags_ & FLAG_WRAP_TEXT) == 0) {
260 					scanForNewline_ = true;
261 				}
262 				continue;
263 			}
264 
265 			// Doesn't fit.  Let's drop what's there so far onto its own line.
266 			if (x_ > 0.0f && x_ + wordWidth_ > maxW_ && beforeIndex > lastIndex_ && (flags_ & FLAG_WRAP_TEXT) != 0) {
267 				// Let's put as many characters as will fit on the previous line.
268 				// This word can't fit on one line even, so it's going to be cut into pieces anyway.
269 				// Better to avoid huge gaps, in that case.
270 				forceEarlyWrap_ = true;
271 
272 				// Now rewind back to where the word started so we can wrap at the opportune moment.
273 				wordWidth_ = 0.0f;
274 				while (utf.byteIndex() > lastIndex_) {
275 					utf.bwd();
276 				}
277 				continue;
278 			}
279 			// Now, add the word so far (without this latest character) and break.
280 			AppendWord(beforeIndex, -1, true);
281 			forceEarlyWrap_ = false;
282 			// The current character will be handled as part of the next word.
283 			continue;
284 		}
285 
286 		if ((flags_ & FLAG_ELLIPSIZE_TEXT) && wordWidth_ > 0.0f && x_ + newWordWidth + ellipsisWidth_ > maxW_) {
287 			if ((flags_ & FLAG_WRAP_TEXT) == 0 && x_ + wordWidth_ + ellipsisWidth_ <= maxW_) {
288 				// Now, add the word so far (without this latest character) and show the ellipsis.
289 				AppendWord(lastEllipsisIndex_ != -1 ? lastEllipsisIndex_ : beforeIndex, -1, false);
290 				AddEllipsis();
291 				forceEarlyWrap_ = false;
292 				skipNextWord_ = true;
293 				if ((flags_ & FLAG_WRAP_TEXT) == 0) {
294 					scanForNewline_ = true;
295 				}
296 				continue;
297 			}
298 		}
299 
300 		wordWidth_ = newWordWidth;
301 
302 		// Is this the end of a word via punctuation / CJK?
303 		if (wordWidth_ > 0.0f && (IsCJK(c) || IsPunctuation(c) || forceEarlyWrap_)) {
304 			// CJK doesn't require spaces, so we treat each letter as its own word.
305 			AppendWord(afterIndex, c, false);
306 		}
307 	}
308 
309 	// Now insert the rest of the string - the last word.
310 	AppendWord((int)len, 0, false);
311 }
312