1 /*
2    Copyright (C) 2003 - 2018 by David White <dave@whitevine.net>
3    Part of the Battle for Wesnoth Project https://www.wesnoth.org/
4 
5    This program is free software; you can redistribute it and/or modify
6    it under the terms of the GNU General Public License as published by
7    the Free Software Foundation; either version 2 of the License, or
8    (at your option) any later version.
9    This program is distributed in the hope that it will be useful,
10    but WITHOUT ANY WARRANTY.
11 
12    See the COPYING file for more details.
13 */
14 
15 /**
16  * @file
17  * Support for simple markup in text (fonts, colors, images).
18  * E.g. "@Victory" will be shown in green.
19  */
20 
21 #define GETTEXT_DOMAIN "wesnoth-lib"
22 
23 #include "font/sdl_ttf.hpp"
24 #include "gettext.hpp"
25 #include "font/marked-up_text.hpp"
26 #include "font/standard_colors.hpp"
27 #include "sdl/surface.hpp"
28 #include "serialization/string_utils.hpp"
29 #include "serialization/unicode.hpp"
30 #include "video.hpp"
31 #include "wml_exception.hpp"
32 #include "preferences/general.hpp"
33 
34 namespace font {
35 
36 // NOTE: if you add more markup characters below, you'll need to update
37 // the list in campaign_server.cpp (illegal_markup_chars) to blacklist
38 // them for add-on names and titles.
39 
40 const char LARGE_TEXT='*', SMALL_TEXT='`',
41 		   BOLD_TEXT='~',  NORMAL_TEXT='{',
42 		   NULL_MARKUP='^',
43 		   BLACK_TEXT='}', GRAY_TEXT='|',
44            GOOD_TEXT='@',  BAD_TEXT='#',
45            GREEN_TEXT='@', RED_TEXT='#',
46            COLOR_TEXT='<', IMAGE='&';
47 
parse_markup(std::string::const_iterator i1,std::string::const_iterator i2,int * font_size,color_t * color,int * style)48 std::string::const_iterator parse_markup(std::string::const_iterator i1,
49 												std::string::const_iterator i2,
50 												int* font_size,
51 												color_t* color, int* style)
52 {
53 	while(i1 != i2) {
54 		switch(*i1) {
55 		case '\\':
56 			// This must either be a quoted special character or a
57 			// quoted backslash - either way, remove leading backslash
58 			break;
59 		case BAD_TEXT:
60 			if (color) *color = BAD_COLOR;
61 			break;
62 		case GOOD_TEXT:
63 			if (color) *color = GOOD_COLOR;
64 			break;
65 		case NORMAL_TEXT:
66 			if (color) *color = NORMAL_COLOR;
67 			break;
68 		case BLACK_TEXT:
69 			if (color) *color = BLACK_COLOR;
70 			break;
71 		case GRAY_TEXT:
72 			if (color) *color = GRAY_COLOR;
73 			break;
74 		case LARGE_TEXT:
75 			if (font_size) *font_size += 2;
76 			break;
77 		case SMALL_TEXT:
78 			if (font_size) *font_size -= 2;
79 			break;
80 		case BOLD_TEXT:
81 			if (style) *style |= TTF_STYLE_BOLD;
82 			break;
83 		case NULL_MARKUP:
84 			return i1+1;
85 		case COLOR_TEXT:
86 			{
87 				std::string::const_iterator start = i1;
88 				// Very primitive parsing for rgb value
89 				// should look like <213,14,151>
90 				++i1;
91 				uint8_t red=0, green=0, blue=0, temp=0;
92 				while (i1 != i2 && *i1 >= '0' && *i1<='9') {
93 					temp*=10;
94 					temp += lexical_cast<int, char>(*i1);
95 					++i1;
96 				}
97 				red=temp;
98 				temp=0;
99 				if (i1 != i2 && ',' == (*i1)) {
100 					++i1;
101 					while(i1 != i2 && *i1 >= '0' && *i1<='9'){
102 						temp*=10;
103 						temp += lexical_cast<int, char>(*i1);
104 						++i1;
105 					}
106 					green=temp;
107 					temp=0;
108 				}
109 				if (i1 != i2 && ',' == (*i1)) {
110 					++i1;
111 					while(i1 != i2 && *i1 >= '0' && *i1<='9'){
112 						temp*=10;
113 						temp += lexical_cast<int, char>(*i1);
114 						++i1;
115 					}
116 				}
117 				blue=temp;
118 				if (i1 != i2 && '>' == (*i1)) {
119 					color_t temp_color {red, green, blue, 0};
120 					if (color) *color = temp_color;
121 				} else {
122 					// stop parsing and do not consume any chars
123 					return start;
124 				}
125 				if (i1 == i2) return i1;
126 				break;
127 			}
128 		default:
129 			return i1;
130 		}
131 		++i1;
132 	}
133 	return i1;
134 }
135 
del_tags(const std::string & text)136 std::string del_tags(const std::string& text){
137 	std::vector<std::string> lines = utils::split(text, '\n', 0);
138 	std::vector<std::string>::iterator line;
139 	for(line = lines.begin(); line != lines.end(); ++line) {
140 		std::string::const_iterator i1 = line->begin(),
141 			i2 = line->end();
142 		*line = std::string(parse_markup(i1,i2,nullptr,nullptr,nullptr),i2);
143 	}
144 	return utils::join(lines, "\n");
145 }
146 
text_area(const std::string & text,int size,int style)147 SDL_Rect text_area(const std::string& text, int size, int style)
148 {
149 	const SDL_Rect area {0,0,10000,10000};
150 	return draw_text(nullptr, area, size, font::NORMAL_COLOR, text, 0, 0, false, style);
151 }
152 
draw_text(surface & dst,const SDL_Rect & area,int size,const color_t & color,const std::string & txt,int x,int y,bool use_tooltips,int style)153 SDL_Rect draw_text(surface& dst, const SDL_Rect& area, int size,
154                    const color_t& color, const std::string& txt,
155                    int x, int y, bool use_tooltips, int style)
156 {
157 	// Make sure there's always at least a space,
158 	// so we can ensure that we can return a rectangle for height
159 	static const std::string blank_text(" ");
160 	const std::string& text = txt.empty() ? blank_text : txt;
161 
162 	SDL_Rect res;
163 	res.x = x;
164 	res.y = y;
165 	res.w = 0;
166 	res.h = 0;
167 
168 	std::string::const_iterator i1 = text.begin();
169 	std::string::const_iterator i2 = std::find(i1,text.end(),'\n');
170 	for(;;) {
171 		color_t col = color;
172 		int sz = size;
173 		int text_style = style;
174 
175 		i1 = parse_markup(i1,i2,&sz,&col,&text_style);
176 
177 		if(i1 != i2) {
178 			std::string new_string = utils::unescape(std::string(i1, i2));
179 
180 			const SDL_Rect rect = draw_text_line(dst, area, sz, col, new_string, x, y, use_tooltips, text_style);
181 			if(rect.w > res.w) {
182 				res.w = rect.w;
183 			}
184 
185 			res.h += rect.h;
186 			y += rect.h;
187 		}
188 
189 		if(i2 == text.end()) {
190 			break;
191 		}
192 
193 		i1 = i2+1;
194 		i2 = std::find(i1,text.end(),'\n');
195 	}
196 
197 	return res;
198 }
199 
draw_text(CVideo * gui,const SDL_Rect & area,int size,const color_t & color,const std::string & txt,int x,int y,bool use_tooltips,int style)200 SDL_Rect draw_text(CVideo* gui, const SDL_Rect& area, int size,
201                    const color_t& color, const std::string& txt,
202                    int x, int y, bool use_tooltips, int style)
203 {
204 	surface null_surf = surface(nullptr);
205 
206 	return draw_text(gui != nullptr ? gui->getSurface() : null_surf, area, size, color, txt, x, y, use_tooltips, style);
207 }
208 
is_format_char(char c)209 bool is_format_char(char c)
210 {
211 	switch(c) {
212 	case LARGE_TEXT:
213 	case SMALL_TEXT:
214 	case GOOD_TEXT:
215 	case BAD_TEXT:
216 	case NORMAL_TEXT:
217 	case BLACK_TEXT:
218 	case GRAY_TEXT:
219 	case BOLD_TEXT:
220 	case NULL_MARKUP:
221 		return true;
222 	default:
223 		return false;
224 	}
225 }
226 
is_cjk_char(const ucs4::char_t ch)227 bool is_cjk_char(const ucs4::char_t ch)
228 {
229 	/**
230 	 * You can check these range at http://unicode.org/charts/
231 	 * see the "East Asian Scripts" part.
232 	 * Notice that not all characters in that part is still in use today, so don't list them all here.
233 	 * Below are characters that I guess may be used in wesnoth translations.
234 	 */
235 
236 	//FIXME add range from Japanese-specific and Korean-specific section if you know the characters are used today.
237 
238 	if (ch < 0x2e80) return false; // shortcut for common non-CJK
239 
240 	return
241 		//Han Ideographs: all except Supplement
242 		(ch >= 0x4e00 && ch < 0x9fcf) ||
243 		(ch >= 0x3400 && ch < 0x4dbf) ||
244 		(ch >= 0x20000 && ch < 0x2a6df) ||
245 		(ch >= 0xf900 && ch < 0xfaff) ||
246 		(ch >= 0x3190 && ch < 0x319f) ||
247 
248 		//Radicals: all except Ideographic Description
249 		(ch >= 0x2e80 && ch < 0x2eff) ||
250 		(ch >= 0x2f00 && ch < 0x2fdf) ||
251 		(ch >= 0x31c0 && ch < 0x31ef) ||
252 
253 		//Chinese-specific: Bopomofo and Bopomofo Extended
254 		(ch >= 0x3104 && ch < 0x312e) ||
255 		(ch >= 0x31a0 && ch < 0x31bb) ||
256 
257 		//Yi-specific: Yi Radicals, Yi Syllables
258 		(ch >= 0xa490 && ch < 0xa4c7) ||
259 		(ch >= 0xa000 && ch < 0xa48d) ||
260 
261 		//Japanese-specific: Hiragana, Katakana, Kana Supplement
262 		(ch >= 0x3040 && ch <= 0x309f) ||
263 		(ch >= 0x30a0 && ch <= 0x30ff) ||
264 		(ch >= 0x1b000 && ch <= 0x1b001) ||
265 
266 		//Ainu-specific: Katakana Phonetic Extensions
267 		(ch >= 0x31f0 && ch <= 0x31ff) ||
268 
269 		//Korean-specific: Hangul Syllables, Hangul Jamo, Hangul Jamo Extended-A, Hangul Jamo Extended-B
270 		(ch >= 0xac00 && ch < 0xd7af) ||
271 		(ch >= 0x1100 && ch <= 0x11ff) ||
272 		(ch >= 0xa960 && ch <= 0xa97c) ||
273 		(ch >= 0xd7b0 && ch <= 0xd7fb) ||
274 
275 		//CJK Symbols and Punctuation
276 		(ch >= 0x3000 && ch < 0x303f) ||
277 
278 		//Halfwidth and Fullwidth Forms
279 		(ch >= 0xff00 && ch < 0xffef);
280 }
cut_word(std::string & line,std::string & word,int font_size,int style,int max_width)281 static void cut_word(std::string& line, std::string& word, int font_size, int style, int max_width)
282 {
283 	std::string tmp = line;
284 	utf8::iterator tc(word);
285 	bool first = true;
286 	font_size = preferences::font_scaled(font_size);
287 
288 	for(;tc != utf8::iterator::end(word); ++tc) {
289 		tmp.append(tc.substr().first, tc.substr().second);
290 		SDL_Rect tsize = line_size(tmp, font_size, style);
291 		if(tsize.w > max_width) {
292 			const std::string& w = word;
293 			if(line.empty() && first) {
294 				line += std::string(w.begin(), tc.substr().second);
295 				word = std::string(tc.substr().second, w.end());
296 			} else {
297 				line += std::string(w.begin(), tc.substr().first);
298 				word = std::string(tc.substr().first, w.end());
299 			}
300 			break;
301 		}
302 		first = false;
303 	}
304 }
305 
306 namespace {
307 
308 /*
309  * According to Kinsoku-Shori, Japanese rules about line-breaking:
310  *
311  * * the following characters cannot begin a line (so we will never break before them):
312  * 、。,.)〕]}〉》」』】’”ゝゞヽヾ々?!:;ぁぃぅぇぉゃゅょゎァィゥェォャュョヮっヵッヶ・…ー
313  *
314  * * the following characters cannot end a line (so we will never break after them):
315  * (〔[{〈《「『【‘“
316  *
317  * Unicode range that concerns word wrap for Chinese:
318  *   全角ASCII、全角中英文标点 (Fullwidth Character for ASCII, English punctuations and part of Chinese punctuations)
319  *   http://www.unicode.org/charts/PDF/UFF00.pdf
320  *   CJK 标点符号 (CJK punctuations)
321  *   http://www.unicode.org/charts/PDF/U3000.pdf
322  */
no_break_after(const ucs4::char_t ch)323 inline bool no_break_after(const ucs4::char_t ch)
324 {
325 	return
326 		/**
327 		 * don't break after these Japanese characters
328 		 */
329 		ch == 0x2018 || ch == 0x201c || ch == 0x3008 || ch == 0x300a || ch == 0x300c ||
330 		ch == 0x300e || ch == 0x3010 || ch == 0x3014 || ch == 0xff08 || ch == 0xff3b ||
331 		ch == 0xff5b ||
332 
333 		/**
334 		 * FIXME don't break after these Korean characters
335 		 */
336 
337 		/**
338 		 * don't break after these Chinese characters
339 		 * contains left side of different kinds of brackets and quotes
340 		 */
341 		ch == 0x3016 || ch == 0x301a || ch == 0x301d;
342 }
343 
no_break_before(const ucs4::char_t ch)344 inline bool no_break_before(const ucs4::char_t ch)
345 {
346 	return
347 		/**
348 		 * don't break before these Japanese characters
349 		 */
350 		ch == 0x2019 || ch == 0x201d || ch == 0x2026 || ch == 0x3001 || ch == 0x3002 ||
351 		ch == 0x3005 || ch == 0x3009 || ch == 0x300b || ch == 0x300d || ch == 0x300f ||
352 		ch == 0x3011 || ch == 0x3015 || ch == 0x3041 || ch == 0x3043 || ch == 0x3045 ||
353 		ch == 0x3047 || ch == 0x3049 || ch == 0x3063 || ch == 0x3083 || ch == 0x3085 ||
354 		ch == 0x3087 || ch == 0x308e || ch == 0x309d || ch == 0x309e || ch == 0x30a1 ||
355 		ch == 0x30a3 || ch == 0x30a5 || ch == 0x30a7 || ch == 0x30a9 || ch == 0x30c3 ||
356 		ch == 0x30e3 || ch == 0x30e5 || ch == 0x30e7 || ch == 0x30ee || ch == 0x30f5 ||
357 		ch == 0x30f6 || ch == 0x30fb || ch == 0x30fc || ch == 0x30fd || ch == 0x30fe ||
358 		ch == 0xff01 || ch == 0xff09 || ch == 0xff0c || ch == 0xff0e || ch == 0xff1a ||
359 		ch == 0xff1b || ch == 0xff1f || ch == 0xff3d || ch == 0xff5d ||
360 
361 		// Small katakana used in Ainu:
362 		ch == 0x31f0 || ch == 0x31f1 || ch == 0x31f2 || ch == 0x31f3 || ch == 0x31f4 ||
363 		ch == 0x31f5 || ch == 0x31f6 || ch == 0x31f7 || ch == 0x31f8 || ch == 0x31f9 ||
364 		ch == 0x31fa || ch == 0x31fb || ch == 0x31fc || ch == 0x31fd || ch == 0x31fe ||
365 		ch == 0x31ff ||
366 
367 		/**
368 		 * FIXME don't break before these Korean characters
369 		 */
370 
371 		/**
372 		 * don't break before these Chinese characters
373 		 * contains
374 		 *   many Chinese punctuations that should not start a line
375 		 *   and right side of different kinds of brackets, quotes
376 		 */
377 		ch == 0x301c || ch == 0xff0d || ch == 0xff64 || ch == 0xff65 || ch == 0x3017 ||
378 		ch == 0x301b || ch == 0x301e;
379 }
380 
break_before(const ucs4::char_t ch)381 inline bool break_before(const ucs4::char_t ch)
382 {
383 	if(no_break_before(ch))
384 		return false;
385 
386 	return is_cjk_char(ch);
387 }
388 
break_after(const ucs4::char_t ch)389 inline bool break_after(const ucs4::char_t ch)
390 {
391 	if(no_break_after(ch))
392 		return false;
393 
394 	return is_cjk_char(ch);
395 }
396 
397 } // end of anon namespace
398 
word_wrap_text(const std::string & unwrapped_text,int font_size,int max_width,int max_height,int max_lines,bool partial_line)399 std::string word_wrap_text(const std::string& unwrapped_text, int font_size,
400 	int max_width, int max_height, int max_lines, bool partial_line)
401 {
402 	VALIDATE(max_width > 0, _("The maximum text width is less than 1."));
403 
404 	utf8::iterator ch(unwrapped_text);
405 	std::string current_word;
406 	std::string current_line;
407 	size_t line_width = 0;
408 	size_t current_height = 0;
409 	bool line_break = false;
410 	bool first = true;
411 	bool start_of_line = true;
412 	std::string wrapped_text;
413 	std::string format_string;
414 	color_t color;
415 	int font_sz = font_size;
416 	int style = TTF_STYLE_NORMAL;
417 	utf8::iterator end = utf8::iterator::end(unwrapped_text);
418 
419 	while(1) {
420 		if(start_of_line) {
421 			line_width = 0;
422 			format_string.clear();
423 			while(ch != end && *ch < static_cast<ucs4::char_t>(0x100)
424 					&& is_format_char(*ch) && !ch.next_is_end()) {
425 
426 				format_string.append(ch.substr().first, ch.substr().second);
427 				++ch;
428 			}
429 			// We need to parse the special format characters
430 			// to give the proper font_size and style to line_size()
431 			font_sz = font_size;
432 			style = TTF_STYLE_NORMAL;
433 			parse_markup(format_string.begin(),format_string.end(),&font_sz,&color,&style);
434 			current_line.clear();
435 			start_of_line = false;
436 		}
437 
438 		// If there is no current word, get one
439 		if(current_word.empty() && ch == end) {
440 			break;
441 		} else if(current_word.empty()) {
442 			if(*ch == ' ' || *ch == '\n') {
443 				current_word = *ch;
444 				++ch;
445 			} else {
446 				ucs4::char_t previous = 0;
447 				for(;ch != utf8::iterator::end(unwrapped_text) &&
448 						*ch != ' ' && *ch != '\n'; ++ch) {
449 
450 					if(!current_word.empty() &&
451 							break_before(*ch) &&
452 							!no_break_after(previous))
453 						break;
454 
455 					if(!current_word.empty() &&
456 							break_after(previous) &&
457 							!no_break_before(*ch))
458 						break;
459 
460 					current_word.append(ch.substr().first, ch.substr().second);
461 
462 					previous = *ch;
463 				}
464 			}
465 		}
466 
467 		if(current_word == "\n") {
468 			line_break = true;
469 			current_word.clear();
470 			start_of_line = true;
471 		} else {
472 
473 			const size_t word_width = line_size(current_word, preferences::font_scaled(font_sz), style).w;
474 
475 			line_width += word_width;
476 
477 			if(static_cast<long>(line_width) > max_width) {
478 				if (!partial_line && static_cast<long>(word_width) > max_width) {
479 					cut_word(current_line,
480 						current_word, font_sz, style, max_width);
481 				}
482 				if(current_word == " ")
483 					current_word = "";
484 				line_break = true;
485 			} else {
486 				current_line += current_word;
487 				current_word = "";
488 			}
489 		}
490 
491 		if(line_break || (current_word.empty() && ch == end)) {
492 			SDL_Rect size = line_size(current_line, preferences::font_scaled(font_sz), style);
493 			if(max_height > 0 && current_height + size.h >= size_t(max_height)) {
494 				return wrapped_text;
495 			}
496 
497 			if(!first) {
498 				wrapped_text += '\n';
499 			}
500 
501 			wrapped_text += format_string + current_line;
502 			current_line.clear();
503 			line_width = 0;
504 			current_height += size.h;
505 			line_break = false;
506 			first = false;
507 
508 			if(--max_lines == 0) {
509 				return wrapped_text;
510 			}
511 		}
512 	}
513 	return wrapped_text;
514 }
515 
516 } // end namespace font
517