Data/Text/WrapText.cpp

#include <cstring>
#include "Common/Render/DrawBuffer.h"
#include "Common/Data/Encoding/Utf8.h"
#include "Common/Data/Text/WrapText.h"

bool WordWrapper::IsCJK(uint32_t c) {
	if (c < 0x1000) {
		return false;
	}

	// CJK characters can be wrapped more freely.
	bool result = (c >= 0x1100 && c <= 0x11FF); // Hangul Jamo.
	result = result || (c >= 0x2E80 && c <= 0x2FFF); // Kangxi Radicals etc.
#if 0
	result = result || (c >= 0x3040 && c <= 0x31FF); // Hiragana, Katakana, Hangul Compatibility Jamo etc.
	result = result || (c >= 0x3200 && c <= 0x32FF); // CJK Enclosed
	result = result || (c >= 0x3300 && c <= 0x33FF); // CJK Compatibility
	result = result || (c >= 0x3400 && c <= 0x4DB5); // CJK Unified Ideographs Extension A
#else
	result = result || (c >= 0x3040 && c <= 0x4DB5); // Above collapsed
#endif
	result = result || (c >= 0x4E00 && c <= 0x9FBB); // CJK Unified Ideographs
	result = result || (c >= 0xAC00 && c <= 0xD7AF); // Hangul Syllables
	result = result || (c >= 0xF900 && c <= 0xFAD9); // CJK Compatibility Ideographs
	result = result || (c >= 0x20000 && c <= 0x2A6D6); // CJK Unified Ideographs Extension B
	result = result || (c >= 0x2F800 && c <= 0x2FA1D); // CJK Compatibility Supplement
	return result;
}

bool WordWrapper::IsPunctuation(uint32_t c) {
	switch (c) {
	// TODO: This list of punctuation is very incomplete.
	case ',':
	case '.':
	case ':':
	case '!':
	case ')':
	case '?':
	case 0x00AD: // SOFT HYPHEN
	case 0x3001: // IDEOGRAPHIC COMMA
	case 0x3002: // IDEOGRAPHIC FULL STOP
	case 0x06D4: // ARABIC FULL STOP
	case 0xFF01: // FULLWIDTH EXCLAMATION MARK
	case 0xFF09: // FULLWIDTH RIGHT PARENTHESIS
	case 0xFF1F: // FULLWIDTH QUESTION MARK
		return true;

	default:
		return false;
	}
}

bool WordWrapper::IsSpace(uint32_t c) {
	switch (c) {
	case '\t':
	case ' ':
	case 0x2002: // EN SPACE
	case 0x2003: // EM SPACE
	case 0x3000: // IDEOGRAPHIC SPACE
		return true;

	default:
		return false;
	}
}

bool WordWrapper::IsShy(uint32_t c) {
	return c == 0x00AD; // SOFT HYPHEN
}

std::string WordWrapper::Wrapped() {
	if (out_.empty()) {
		Wrap();
	}
	return out_;
}

bool WordWrapper::WrapBeforeWord() {
	if (flags_ & FLAG_WRAP_TEXT) {
		if (x_ + wordWidth_ > maxW_ && !out_.empty()) {
			if (IsShy(lastChar_)) {
				// Soft hyphen, replace it with a real hyphen since we wrapped at it.
				// TODO: There's an edge case here where the hyphen might not fit.
				out_[out_.size() - 2] = '-';
				out_[out_.size() - 1] = '\n';
			} else {
				out_ += "\n";
			}
			lastChar_ = '\n';
			lastLineStart_ = out_.size();
			x_ = 0.0f;
			forceEarlyWrap_ = false;
			return true;
		}
	}
	if (flags_ & FLAG_ELLIPSIZE_TEXT) {
		const bool hasEllipsis = out_.size() > 3 && out_.substr(out_.size() - 3) == "...";
		if (x_ + wordWidth_ > maxW_ && !hasEllipsis) {
			AddEllipsis();
			skipNextWord_ = true;
			if ((flags_ & FLAG_WRAP_TEXT) == 0) {
				scanForNewline_ = true;
			}
		}
	}
	return false;
}

void WordWrapper::AddEllipsis() {
	if (!out_.empty() && IsSpaceOrShy(lastChar_)) {
		UTF8 utf(out_.c_str(), (int)out_.size());
		utf.bwd();
		out_.resize(utf.byteIndex());
		out_ += "...";
	} else {
		out_ += "...";
	}
	lastChar_ = '.';
	x_ += ellipsisWidth_;
}

void WordWrapper::AppendWord(int endIndex, int lastChar, bool addNewline) {
	int lastWordStartIndex = lastIndex_;
	if (WrapBeforeWord()) {
		// Advance to the first non-whitespace UTF-8 character in the following word (if any) to prevent starting the new line with a whitespace
		UTF8 utf8Word(str_, lastWordStartIndex);
		while (lastWordStartIndex < endIndex) {
			const uint32_t c = utf8Word.next();
			if (!IsSpace(c)) {
				break;
			}
			lastWordStartIndex = utf8Word.byteIndex();
		}
	}

	lastEllipsisIndex_ = -1;
	if (skipNextWord_) {
		lastIndex_ = endIndex;
		return;
	}

	// This will include the newline.
	if (x_ <= maxW_) {
		out_.append(str_ + lastWordStartIndex, str_ + endIndex);
	} else {
		scanForNewline_ = true;
	}
	if (addNewline && (flags_ & FLAG_WRAP_TEXT)) {
		out_ += "\n";
		lastChar_ = '\n';
		lastLineStart_ = out_.size();
		scanForNewline_ = false;
		x_ = 0.0f;
	} else {
		// We may have appended a newline - check.
		size_t pos = out_.find_last_of("\n");
		if (pos != out_.npos) {
			lastLineStart_ = pos + 1;
		}

		if (lastChar == -1 && !out_.empty()) {
			UTF8 utf(out_.c_str(), (int)out_.size());
			utf.bwd();
			lastChar = utf.next();
		}
		lastChar_ = lastChar;

		if (lastLineStart_ != out_.size()) {
			// To account for kerning around spaces, we recalculate the entire line width.
			x_ = MeasureWidth(out_.c_str() + lastLineStart_, out_.size() - lastLineStart_);
		} else {
			x_ = 0.0f;
		}
	}
	lastIndex_ = endIndex;
	wordWidth_ = 0.0f;
}

void WordWrapper::Wrap() {
	out_.clear();

	// First, let's check if it fits as-is.
	size_t len = strlen(str_);

	// We know it'll be approximately this size. It's fine if the guess is a little off.
	out_.reserve(len + len / 16);

	if (MeasureWidth(str_, len) <= maxW_) {
		// If it fits, we don't need to go through each character.
		out_ = str_;
		return;
	}

	if (flags_ & FLAG_ELLIPSIZE_TEXT) {
		ellipsisWidth_ = MeasureWidth("...", 3);
	}

	for (UTF8 utf(str_); !utf.end(); ) {
		int beforeIndex = utf.byteIndex();
		uint32_t c = utf.next();
		int afterIndex = utf.byteIndex();

		// Is this a newline character, hard wrapping?
		if (c == '\n') {
			if (skipNextWord_) {
				lastIndex_ = beforeIndex;
				skipNextWord_ = false;
			}
			// This will include the newline character.
			AppendWord(afterIndex, c, false);
			// We wrapped once, so stop forcing.
			forceEarlyWrap_ = false;
			scanForNewline_ = false;
			continue;
		}

		if (scanForNewline_) {
			// We're discarding the rest of the characters until a newline (no wrapping.)
			lastIndex_ = afterIndex;
			continue;
		}

		// Measure the entire word for kerning purposes.  May not be 100% perfect.
		float newWordWidth = MeasureWidth(str_ + lastIndex_, afterIndex - lastIndex_);

		// Is this the end of a word (space)?  We'll also output up to a soft hyphen.
		if (wordWidth_ > 0.0f && IsSpaceOrShy(c)) {
			AppendWord(afterIndex, c, false);
			skipNextWord_ = false;
			continue;
		}

		// We're scanning for the next word.
		if (skipNextWord_)
			continue;

		if ((flags_ & FLAG_ELLIPSIZE_TEXT) != 0 && wordWidth_ > 0.0f && lastEllipsisIndex_ == -1) {
			float checkX = x_;
			// If we allow wrapping, assume we'll wrap as needed.
			if ((flags_ & FLAG_WRAP_TEXT) != 0 && x_ >= maxW_) {
				checkX = 0;
			}

			// If we can only fit an ellipsis, time to output and skip ahead.
			// Ignore x for newWordWidth, because we might wrap.
			if (checkX + wordWidth_ + ellipsisWidth_ <= maxW_ && newWordWidth + ellipsisWidth_ > maxW_) {
				lastEllipsisIndex_ = beforeIndex;
				continue;
			}
		}

		// Can the word fit on a line even all by itself so far?
		if (wordWidth_ > 0.0f && newWordWidth > maxW_) {
			// If we had a good place for an ellipsis, let's do that.
			if (lastEllipsisIndex_ != -1) {
				AppendWord(lastEllipsisIndex_, -1, false);
				AddEllipsis();
				skipNextWord_ = true;
				if ((flags_ & FLAG_WRAP_TEXT) == 0) {
					scanForNewline_ = true;
				}
				continue;
			}

			// Doesn't fit.  Let's drop what's there so far onto its own line.
			if (x_ > 0.0f && x_ + wordWidth_ > maxW_ && beforeIndex > lastIndex_ && (flags_ & FLAG_WRAP_TEXT) != 0) {
				// Let's put as many characters as will fit on the previous line.
				// This word can't fit on one line even, so it's going to be cut into pieces anyway.
				// Better to avoid huge gaps, in that case.
				forceEarlyWrap_ = true;

				// Now rewind back to where the word started so we can wrap at the opportune moment.
				wordWidth_ = 0.0f;
				while (utf.byteIndex() > lastIndex_) {
					utf.bwd();
				}
				continue;
			}
			// Now, add the word so far (without this latest character) and break.
			AppendWord(beforeIndex, -1, true);
			forceEarlyWrap_ = false;
			// The current character will be handled as part of the next word.
			continue;
		}

		if ((flags_ & FLAG_ELLIPSIZE_TEXT) && wordWidth_ > 0.0f && x_ + newWordWidth + ellipsisWidth_ > maxW_) {
			if ((flags_ & FLAG_WRAP_TEXT) == 0 && x_ + wordWidth_ + ellipsisWidth_ <= maxW_) {
				// Now, add the word so far (without this latest character) and show the ellipsis.
				AppendWord(lastEllipsisIndex_ != -1 ? lastEllipsisIndex_ : beforeIndex, -1, false);
				AddEllipsis();
				forceEarlyWrap_ = false;
				skipNextWord_ = true;
				if ((flags_ & FLAG_WRAP_TEXT) == 0) {
					scanForNewline_ = true;
				}
				continue;
			}
		}

		wordWidth_ = newWordWidth;

		// Is this the end of a word via punctuation / CJK?
		if (wordWidth_ > 0.0f && (IsCJK(c) || IsPunctuation(c) || forceEarlyWrap_)) {
			// CJK doesn't require spaces, so we treat each letter as its own word.
			AppendWord(afterIndex, c, false);
		}
	}

	// Now insert the rest of the string - the last word.
	AppendWord((int)len, 0, false);
}