1 /*
2 * PROJECT: ReactOS NLS to TXT Converter
3 * LICENSE: GNU General Public License Version 2.0 or any later version
4 * FILE: devutils/nls2txt/bestfit.c
5 * COPYRIGHT: Copyright 2016 Dmitry Chapyshev <dmitry@reactos.org>
6 */
7
8 #include "precomp.h"
9
10 static HANDLE
BestFit_CreateFile(const WCHAR * pszFile)11 BestFit_CreateFile(const WCHAR *pszFile)
12 {
13 DWORD dwBytesWritten;
14 HANDLE hFile;
15
16 hFile = CreateFileW(pszFile,
17 GENERIC_WRITE,
18 FILE_SHARE_READ | FILE_SHARE_WRITE,
19 NULL,
20 CREATE_ALWAYS,
21 FILE_ATTRIBUTE_NORMAL,
22 NULL);
23 if (hFile != INVALID_HANDLE_VALUE)
24 {
25 /* Write UTF-8 BOM */
26 WriteFile(hFile, "\xEF\xBB\xBF", 3, &dwBytesWritten, NULL);
27 }
28
29 return hFile;
30 }
31
32 static VOID
BestFit_CloseFile(HANDLE hFile)33 BestFit_CloseFile(HANDLE hFile)
34 {
35 CloseHandle(hFile);
36 }
37
38 static CHAR*
UTF8fromUNICODE(const WCHAR * pszInput,PSIZE_T Size)39 UTF8fromUNICODE(const WCHAR *pszInput, PSIZE_T Size)
40 {
41 ULONG Length;
42 CHAR *pszOutput;
43
44 if (!pszInput || !Size) return NULL;
45
46 Length = WideCharToMultiByte(CP_UTF8, 0, pszInput, -1, NULL, 0, NULL, NULL);
47
48 *Size = Length * sizeof(CHAR);
49
50 pszOutput = (CHAR *) malloc(*Size);
51 if (pszOutput)
52 {
53 WideCharToMultiByte(CP_UTF8, 0, pszInput, -1, pszOutput, Length, NULL, NULL);
54 }
55
56 return pszOutput;
57 }
58
59 static VOID
BestFit_Write(HANDLE hFile,const WCHAR * pszFormat,...)60 BestFit_Write(HANDLE hFile, const WCHAR *pszFormat, ...)
61 {
62 LARGE_INTEGER FileSize;
63 LARGE_INTEGER MoveTo;
64 LARGE_INTEGER NewPos;
65 DWORD dwBytesWritten;
66
67 if (hFile == INVALID_HANDLE_VALUE)
68 return;
69
70 MoveTo.QuadPart = 0;
71 if (!SetFilePointerEx(hFile, MoveTo, &NewPos, FILE_END))
72 return;
73
74 if (!GetFileSizeEx(hFile, &FileSize))
75 return;
76
77 if (LockFile(hFile, (DWORD_PTR)NewPos.QuadPart, 0, (DWORD_PTR)FileSize.QuadPart, 0))
78 {
79 WCHAR *pszString;
80 CHAR *pszUtf8;
81 va_list Args;
82 SIZE_T Size;
83
84 va_start(Args, pszFormat);
85
86 Size = (_vscwprintf(pszFormat, Args) + 1) * sizeof(WCHAR);
87 pszString = (WCHAR*) malloc(Size);
88
89 if (!pszString)
90 {
91 UnlockFile(hFile, (DWORD_PTR)NewPos.QuadPart, 0, (DWORD_PTR)FileSize.QuadPart, 0);
92 va_end(Args);
93 return;
94 }
95
96 StringCbVPrintfW(pszString, Size, pszFormat, Args);
97 va_end(Args);
98
99 pszUtf8 = UTF8fromUNICODE(pszString, &Size);
100 if (pszUtf8)
101 {
102 WriteFile(hFile, pszUtf8, Size - sizeof(CHAR), &dwBytesWritten, NULL);
103 free(pszUtf8);
104 }
105
106 free(pszString);
107
108 UnlockFile(hFile, (DWORD_PTR)NewPos.QuadPart, 0, (DWORD_PTR)FileSize.QuadPart, 0);
109 }
110 }
111
112 BOOL
BestFit_FromNLS(const WCHAR * pszNLSFile,const WCHAR * pszBestFitFile)113 BestFit_FromNLS(const WCHAR *pszNLSFile, const WCHAR *pszBestFitFile)
114 {
115 CPTABLEINFO CodePageTable;
116 PUSHORT CodePage;
117 HANDLE hFile;
118 USHORT CodePageChar;
119 ULONG UnicodeChar;
120
121 CodePage = NLS_ReadFile(pszNLSFile, &CodePageTable);
122 if (CodePage == NULL)
123 return FALSE;
124
125 hFile = BestFit_CreateFile(pszBestFitFile);
126 if (hFile == INVALID_HANDLE_VALUE)
127 {
128 free(CodePage);
129 return FALSE;
130 }
131
132 /* The only field is the decimal windows code page number for this code page. */
133 BestFit_Write(hFile, L"CODEPAGE %u\r\n\r\n", CodePageTable.CodePage);
134
135 BestFit_Write(hFile,
136 L"CPINFO %u 0x%02X 0x%04X\r\n\r\n",
137 /* "1" for a single byte code page, "2" for a double byte code page */
138 CodePageTable.MaximumCharacterSize,
139 /* Replacement characters for unassigned Unicode code points when
140 written to this code page */
141 CodePageTable.DefaultChar,
142 /* Replacement characters for illegal or unassigned code page values
143 when converting to Unicode. */
144 CodePageTable.UniDefaultChar);
145
146 /* This field contains the number of following records of code page to Unicode mappings. */
147 BestFit_Write(hFile, L"MBTABLE %u\r\n\r\n", NLS_RecordsCountForMBTable(&CodePageTable));
148
149 for (CodePageChar = 0; CodePageChar <= 0xFF; CodePageChar++)
150 {
151 if (!NLS_IsDefaultCharForMB(&CodePageTable, CodePageChar))
152 {
153 WCHAR szCharName[MAX_STR_LEN] = { 0 };
154
155 GetUName(CodePageTable.MultiByteTable[CodePageChar], szCharName);
156
157 BestFit_Write(hFile,
158 L"0x%02X 0x%04X ;%s\r\n",
159 CodePageChar,
160 CodePageTable.MultiByteTable[CodePageChar],
161 szCharName);
162 }
163 }
164
165 BestFit_Write(hFile, L"\r\n");
166
167 if (NLS_IsGlyphTablePresent(&CodePageTable))
168 {
169 PUSHORT GlyphTable = CodePageTable.MultiByteTable + 256 + 1;
170
171 BestFit_Write(hFile, L"GLYPHTABLE %u\r\n\r\n", NLS_RecordsCountForGlyphTable(&CodePageTable));
172
173 for (CodePageChar = 0; CodePageChar <= 0xFF; CodePageChar++)
174 {
175 if (CodePageChar != CodePageTable.UniDefaultChar)
176 {
177 WCHAR szCharName[MAX_STR_LEN] = { 0 };
178
179 GetUName(GlyphTable[CodePageChar], szCharName);
180
181 BestFit_Write(hFile,
182 L"0x%02X 0x%04X ;%s\r\n",
183 CodePageChar,
184 GlyphTable[CodePageChar],
185 szCharName);
186 }
187 }
188
189 BestFit_Write(hFile, L"\r\n");
190 }
191
192 if (NLS_IsDBCSCodePage(&CodePageTable))
193 {
194 PUSHORT LeadByteRanges = (PUSHORT)&CodePageTable.LeadByte[0];
195 USHORT Index;
196 USHORT LeadByte;
197
198 BestFit_Write(hFile,
199 L"DBCSRANGE %u ;%u DBCS Lead Byte Ranges\r\n\r\n",
200 CodePageTable.DBCSRanges[0],
201 CodePageTable.DBCSRanges[0]);
202
203 for (Index = 0; Index < MAXIMUM_LEADBYTES / 2; Index++)
204 {
205 if (!LeadByteRanges[Index])
206 continue;
207
208 BestFit_Write(hFile,
209 L"0x%X 0x%X ;Lead Byte Range %u\r\n\r\n",
210 LOBYTE(LeadByteRanges[Index]),
211 HIBYTE(LeadByteRanges[Index]),
212 Index + 1);
213
214 for (LeadByte = LOBYTE(LeadByteRanges[Index]);
215 LeadByte <= HIBYTE(LeadByteRanges[Index]);
216 LeadByte++)
217 {
218 PUSHORT LeadByteInfo = CodePageTable.DBCSOffsets;
219
220 BestFit_Write(hFile,
221 L"DBCSTABLE %u ;Range = %u, LeadByte = 0x%02X\r\n\r\n",
222 NLS_RecordsCountForDBCSTable(&CodePageTable, LeadByte),
223 Index + 1,
224 LeadByte);
225
226 for (CodePageChar = 0; CodePageChar <= 0xFF; CodePageChar++)
227 {
228 USHORT Info = LeadByteInfo[LeadByte];
229
230 if (Info && LeadByteInfo[Info + CodePageChar] != CodePageTable.UniDefaultChar)
231 {
232 BestFit_Write(hFile,
233 L"0x%02X 0x%04X\r\n",
234 CodePageChar,
235 LeadByteInfo[Info + CodePageChar]);
236 }
237 }
238
239 BestFit_Write(hFile, L"\r\n");
240 }
241 }
242 }
243
244 /* This field contains the number of records of Unicode to byte mappings. */
245 BestFit_Write(hFile, L"WCTABLE %u\r\n\r\n", NLS_RecordsCountForUnicodeTable(&CodePageTable));
246
247 for (UnicodeChar = 0; UnicodeChar <= 0xFFFF; UnicodeChar++)
248 {
249 if (!NLS_IsDefaultCharForUnicode(&CodePageTable, UnicodeChar))
250 {
251 WCHAR szCharName[MAX_STR_LEN] = { 0 };
252
253 GetUName(UnicodeChar, szCharName);
254
255 if (NLS_IsDBCSCodePage(&CodePageTable))
256 {
257 PUSHORT MultiByteTable = (PUSHORT)CodePageTable.WideCharTable;
258
259 BestFit_Write(hFile,
260 L"0x%04X 0x%04X ;%s\r\n",
261 UnicodeChar,
262 MultiByteTable[UnicodeChar],
263 szCharName);
264 }
265 else
266 {
267 PUCHAR SingleByteTable = (PUCHAR)CodePageTable.WideCharTable;
268
269 BestFit_Write(hFile,
270 L"0x%04X 0x%02X ;%s\r\n",
271 UnicodeChar,
272 SingleByteTable[UnicodeChar],
273 szCharName);
274 }
275 }
276 }
277
278 /* This tag marks the end of the code page data. Anything after this marker is ignored. */
279 BestFit_Write(hFile, L"\r\nENDCODEPAGE\r\n");
280
281 BestFit_CloseFile(hFile);
282 free(CodePage);
283
284 return TRUE;
285 }
286