1 /************************************************************************************
2 
3 	AstroMenace
4 	Hardcore 3D space scroll-shooter with spaceship upgrade possibilities.
5 	Copyright (c) 2006-2019 Mikhail Kurinnoi, Viewizard
6 
7 
8 	AstroMenace is free software: you can redistribute it and/or modify
9 	it under the terms of the GNU General Public License as published by
10 	the Free Software Foundation, either version 3 of the License, or
11 	(at your option) any later version.
12 
13 	AstroMenace is distributed in the hope that it will be useful,
14 	but WITHOUT ANY WARRANTY; without even the implied warranty of
15 	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 	GNU General Public License for more details.
17 
18 	You should have received a copy of the GNU General Public License
19 	along with AstroMenace. If not, see <https://www.gnu.org/licenses/>.
20 
21 
22 	Website: https://viewizard.com/
23 	Project: https://github.com/viewizard/astromenace
24 	E-mail: viewizard@viewizard.com
25 
26 *************************************************************************************/
27 
28 // TODO since we use std::unordered_map with hashes now, work with text should be revised -
29 //      TextID should be removed, and English text should be used as ID and "default" text
30 
31 #include "../math/math.h"
32 #include "../vfs/vfs.h"
33 #include "../font/font.h"
34 #include "text.h"
35 
36 namespace viewizard {
37 
38 namespace {
39 
40 // Map of Maps with all sorted language data in utf8.
41 std::unordered_map<unsigned int, std::unordered_map<std::string, std::string>> TextTable;
42 // Map of Maps with all sorted language data in utf32.
43 std::unordered_map<unsigned int, std::unordered_map<std::string, std::u32string>> TextTableUTF32;
44 // Current default language. English, by default.
45 unsigned int CurrentLanguage{0};
46 // all characters used in text for current language
47 std::unordered_set<char32_t> CharsSetForLanguage;
48 
49 } // unnamed namespace
50 
51 /*
52  * Detect element in std::unordered_map<std::unordered_map<>>.
53  */
54 template <typename T, typename C, typename R>
isElementPresentInTable(T & Table,C Column,R & Row)55 static bool isElementPresentInTable(T &Table, C Column, R &Row)
56 {
57 	return ((Table.find(Column) != Table.end()) &&
58 		(Table[Column].find(Row) != Table[Column].end()));
59 }
60 
61 /*
62  * Unconditional rehash for std::unordered_map<std::unordered_map<>>.
63  */
64 template <typename T>
UnconditionalRehash(T & Table)65 static void UnconditionalRehash(T &Table)
66 {
67 	Table.rehash(0);
68 	for (auto &tmpColumn : Table) {
69 		tmpColumn.second.rehash(0);
70 	}
71 }
72 
73 /*
74  * Set default language.
75  */
vw_SetTextLanguage(unsigned int Language)76 void vw_SetTextLanguage(unsigned int Language)
77 {
78 	CurrentLanguage = Language;
79 }
80 
81 /*
82  * Get available languages count.
83  */
vw_GetLanguageListCount()84 unsigned int vw_GetLanguageListCount() {
85 	if (TextTable.empty())
86 		return 1; // English only
87 
88 	return static_cast<unsigned>(TextTable.size());
89 }
90 
91 /*
92  * Release data.
93  */
vw_ReleaseText()94 void vw_ReleaseText()
95 {
96 	TextTable.clear();
97 	TextTableUTF32.clear();
98 	CurrentLanguage = 0; // English
99 }
100 
101 /*
102  * Create text table with utf32 data (convert TextTable data for utf8 to utf32).
103  */
CreateTextTableUTF32()104 static void CreateTextTableUTF32()
105 {
106 	for (unsigned int i = 0; i < TextTable.size(); i++) {
107 		for (const auto tmpData : TextTable[i]) {
108 			TextTableUTF32[i][tmpData.first] = ConvertUTF8.from_bytes(tmpData.second);
109 		}
110 	}
111 	// unconditional rehash, at this line we have not rehashed map
112 	UnconditionalRehash(TextTableUTF32);
113 }
114 
115 /*
116  * Parse each row's block, separated by 1.SymbolSeparator, 2.SymbolEndOfLine, 3.EOF
117  */
GetRowTextBlock(std::string & CurrentTextBlock,uint8_t * Data,long DataSize,long & i,const char SymbolSeparator,const char SymbolEndOfLine)118 static int GetRowTextBlock(std::string &CurrentTextBlock, uint8_t *Data, long DataSize, long &i,
119 			   const char SymbolSeparator, const char SymbolEndOfLine)
120 {
121 	constexpr char SymbolQuotes{'\"'};
122 
123 	// check quotes, since with quotes we have another sequence of reading
124 	bool InsideFieldTrigger{Data[i] == SymbolQuotes};
125 	if (InsideFieldTrigger)
126 		i++; // skip quotes
127 
128 		// without quotes we read text till SymbolSeparator or SymbolEndOfLine
129 	for (; ((!InsideFieldTrigger && (Data[i] != SymbolSeparator) && (Data[i] != SymbolEndOfLine)) ||
130 		// if text braced by quotes, we read till end (second) of quotes
131 		(InsideFieldTrigger && (Data[i] != SymbolQuotes))) &&
132 	       (i < DataSize); i++) {
133 
134 		// special case, store '\n' instead of '\'+'n'
135 		if (!CurrentTextBlock.empty() &&
136 		    (CurrentTextBlock.back() == '\\') &&
137 		    (Data[i] == 'n'))
138 			CurrentTextBlock.back() = '\n';
139 		else
140 			CurrentTextBlock += Data[i];
141 	}
142 
143 	if (InsideFieldTrigger) {
144 		if (i >= DataSize) {
145 			std::cerr << __func__ << "(): " << "file end before end (second) of quotes.\n";
146 			return ERR_FILE_IO;
147 		}
148 		i++; // skip quotes
149 	}
150 
151 	return 0;
152 }
153 
154 /*
155  * Initialization. Load file with translation in .csv format (supported by LibreOffice Calc).
156  */
vw_InitText(const char * FileName,const char SymbolSeparator,const char SymbolEndOfLine)157 int vw_InitText(const char *FileName, const char SymbolSeparator, const char SymbolEndOfLine)
158 {
159 	vw_ReleaseText();
160 
161 	// open and don't call vw_fclose(), use tmpFile->Data directly
162 	std::unique_ptr<cFILE> tmpFile = vw_fopen(FileName);
163 	if (!tmpFile)
164 		return ERR_FILE_NOT_FOUND;
165 	std::cout << "Load and parse .csv file " << FileName << "\n";
166 
167 	// plain .csv file format parser
168 	// parse data by each byte, in order to use string as RowCode - build
169 	// RowCode first, only after that, initialize new TextTable row
170 	bool NeedBuildCurrentRowCode = true;
171 	std::string CurrentRowCode;
172 	unsigned int CurrentColumnNumber{0};
173 	unsigned int LineNumber{1}; // line number for error message
174 	for (long i = 0; i < tmpFile->GetSize(); i++) {
175 		// parse each row
176 		for (; (tmpFile->GetData()[i] != SymbolEndOfLine) && (i < tmpFile->GetSize()); i++) {
177 			// read text block in line, .csv line looks like:
178 			// text_block;text_block;...;text_blockSymbolEndOfLine
179 			// if text braced by quotes:
180 			// "text_block";"text_block";...;"text_block"SymbolEndOfLine
181 			std::string CurrentRowTextBlock{};
182 			if (GetRowTextBlock(CurrentRowTextBlock, tmpFile->GetData(), tmpFile->GetSize(), i,
183 					    SymbolSeparator, SymbolEndOfLine)) {
184 				std::cerr << __func__ << "(): " << "file corrupted.";
185 				vw_ReleaseText();
186 				return ERR_FILE_IO;
187 			}
188 			if (NeedBuildCurrentRowCode) {
189 				CurrentRowCode = CurrentRowTextBlock;
190 				// RowCode built, next blocks in this row contain data
191 				NeedBuildCurrentRowCode = false;
192 			}
193 			// we use column 0 with same row code, in order to make code simple and clear
194 			// plus, for UTF32 we need a column to store English UTF8 -> UTF32 conversion results
195 			TextTable[CurrentColumnNumber][CurrentRowCode] = CurrentRowTextBlock;
196 			CurrentColumnNumber++;
197 			// detect and skip duplicate line (if we already have this element created => this is duplicate)
198 			if (isElementPresentInTable(TextTable, CurrentColumnNumber, CurrentRowCode)) {
199 				std::cerr << __func__ << "(): " << "* Duplicate line detected, line number "
200 					  << LineNumber << "\n";
201 				for (; (tmpFile->GetData()[i] != SymbolEndOfLine) && (i < tmpFile->GetSize()); i++) {}
202 			}
203 			// we found SymbolEndOfLine in previous cycle, in order to prevent "i" changes, break cycle
204 			if (tmpFile->GetData()[i] == SymbolEndOfLine)
205 				break;
206 		}
207 		// move to next row
208 		CurrentColumnNumber = 0;
209 		NeedBuildCurrentRowCode = true;
210 		CurrentRowCode.clear();
211 		LineNumber++;
212 	}
213 	// unconditional rehash, at this line we have not rehashed map
214 	UnconditionalRehash(TextTable);
215 
216 	CreateTextTableUTF32();
217 
218 	std::cout << "Detected " << vw_GetLanguageListCount() << " languages:";
219 	for (unsigned int i = 0; i < TextTable.size(); i++) {
220 		std::cout << " " << vw_GetText("en", i);
221 	}
222 	std::cout << "\n\n";
223 	return 0;
224 }
225 
226 /*
227  * Get UTF8 text for particular language.
228  */
vw_GetText(const std::string & ItemID,unsigned int Language)229 const std::string &vw_GetText(const std::string &ItemID, unsigned int Language)
230 {
231 	if (Language > vw_GetLanguageListCount())
232 		Language = CurrentLanguage;
233 
234 	auto tmpText = TextTable[Language].find(ItemID);
235 	if (tmpText != TextTable[Language].end())
236 		return tmpText->second;
237 
238 	// ItemID should be added to TextTable for key and all languages, and
239 	// we should return pointer to this new entry, but not ItemID
240 	// make sure, vw_GetLanguageListCount() is called instead of size(),
241 	// since we could have an empty table (languages was not loaded)
242 	for (unsigned int i = 0; i < vw_GetLanguageListCount(); i++) {
243 		TextTable[i][ItemID] = ItemID;
244 	}
245 
246 	std::cout << "Added to text table for all languages: \"" << ItemID << "\"\n";
247 
248 	return TextTable[Language][ItemID];
249 }
250 
251 /*
252  * Get UTF32 text for particular language.
253  */
vw_GetTextUTF32(const std::string & ItemID,unsigned int Language)254 const std::u32string &vw_GetTextUTF32(const std::string &ItemID, unsigned int Language)
255 {
256 	if (Language > vw_GetLanguageListCount())
257 		Language = CurrentLanguage;
258 
259 	auto tmpText = TextTableUTF32[Language].find(ItemID);
260 	if (tmpText != TextTableUTF32[Language].end())
261 		return tmpText->second;
262 
263 	// ItemID should be converted to UTF32 and added to TextTableUTF32 for all languages,
264 	// and we should return pointer to this new entry, but not TextTableUTF32Error
265 	std::u32string tmpTextUTF32 = ConvertUTF8.from_bytes(ItemID);
266 	// make sure, vw_GetLanguageListCount() is called instead of size(),
267 	// since we could have an empty table (languages was not loaded)
268 	for (unsigned int i = 0; i < vw_GetLanguageListCount(); i++) {
269 		TextTableUTF32[i][ItemID] = tmpTextUTF32;
270 	}
271 
272 	std::cout << "Added to text table for all languages: \"" << ItemID << "\"\n";
273 
274 	return TextTableUTF32[Language][ItemID];
275 }
276 
277 /*
278  * Find all used in text characters for current language.
279  */
vw_FindCharsSetForLanguage()280 std::unordered_set<char32_t> &vw_FindCharsSetForLanguage()
281 {
282 	CharsSetForLanguage.clear();
283 
284 // TODO provide additional sort by character's use frequency in text
285 //      We need this, since vw_GenerateFontChars() generate texture limited by size. Some
286 //      characters could be skipped and will use personal texture. In this way we will
287 //      forced to switch textures and draw small vertex buffers all the time (if character
288 //      frequently used in text and use personal texture for rendering).
289 
290 	if (TextTableUTF32.empty()) {
291 		// default symbols for English, since we don't have text loaded
292 		std::string tmpSymbols{"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"
293 				       " .,!?-+\():;%&`'*#$=[]@^{}_~><–—«»“”|/©"};
294 		std::u32string tmpSymbolsUTF32 = ConvertUTF8.from_bytes(tmpSymbols);
295 		for (const auto &UTF32 : tmpSymbolsUTF32) {
296 			CharsSetForLanguage.insert(UTF32);
297 		}
298 	} else {
299 		for (const auto &tmpWords : TextTableUTF32[CurrentLanguage]) {
300 			for (const auto &UTF32 : tmpWords.second) {
301 				CharsSetForLanguage.insert(UTF32);
302 			}
303 		}
304 	}
305 	// unconditional rehash, at this line we have not rehashed set
306 	CharsSetForLanguage.rehash(0);
307 
308 #ifndef NDEBUG
309 	for (const auto UTF32 : CharsSetForLanguage) {
310 		std::cout << "Detected character:  "
311 			  << ConvertUTF8.to_bytes(UTF32) << "  "
312 			  << "0x" << std::uppercase << std::hex << UTF32 << std::dec << "\n";
313 	}
314 #endif // NDEBUG
315 
316 	return CharsSetForLanguage;
317 }
318 
319 } // viewizard namespace
320