1 /*
2  * PROJECT:     ReactOS NLS to TXT Converter
3  * LICENSE:     GNU General Public License Version 2.0 or any later version
4  * FILE:        devutils/nls2txt/bestfit.c
5  * COPYRIGHT:   Copyright 2016 Dmitry Chapyshev <dmitry@reactos.org>
6  */
7 
8 #include "precomp.h"
9 
10 static HANDLE
11 BestFit_CreateFile(const WCHAR *pszFile)
12 {
13     DWORD dwBytesWritten;
14     HANDLE hFile;
15 
16     hFile = CreateFileW(pszFile,
17                         GENERIC_WRITE,
18                         FILE_SHARE_READ | FILE_SHARE_WRITE,
19                         NULL,
20                         CREATE_ALWAYS,
21                         FILE_ATTRIBUTE_NORMAL,
22                         NULL);
23     if (hFile != INVALID_HANDLE_VALUE)
24     {
25         /* Write UTF-8 BOM */
26         WriteFile(hFile, "\xEF\xBB\xBF", 3, &dwBytesWritten, NULL);
27     }
28 
29     return hFile;
30 }
31 
32 static VOID
33 BestFit_CloseFile(HANDLE hFile)
34 {
35     CloseHandle(hFile);
36 }
37 
38 static CHAR*
39 UTF8fromUNICODE(const WCHAR *pszInput, PSIZE_T Size)
40 {
41     ULONG Length;
42     CHAR *pszOutput;
43 
44     if (!pszInput || !Size) return NULL;
45 
46     Length = WideCharToMultiByte(CP_UTF8, 0, pszInput, -1, NULL, 0, NULL, NULL);
47 
48     *Size = Length * sizeof(CHAR);
49 
50     pszOutput = (CHAR *) malloc(*Size);
51     if (pszOutput)
52     {
53         WideCharToMultiByte(CP_UTF8, 0, pszInput, -1, pszOutput, Length, NULL, NULL);
54     }
55 
56     return pszOutput;
57 }
58 
59 static VOID
60 BestFit_Write(HANDLE hFile, const WCHAR *pszFormat, ...)
61 {
62     LARGE_INTEGER FileSize;
63     LARGE_INTEGER MoveTo;
64     LARGE_INTEGER NewPos;
65     DWORD dwBytesWritten;
66 
67     if (hFile == INVALID_HANDLE_VALUE)
68         return;
69 
70     MoveTo.QuadPart = 0;
71     if (!SetFilePointerEx(hFile, MoveTo, &NewPos, FILE_END))
72         return;
73 
74     if (!GetFileSizeEx(hFile, &FileSize))
75         return;
76 
77     if (LockFile(hFile, (DWORD_PTR)NewPos.QuadPart, 0, (DWORD_PTR)FileSize.QuadPart, 0))
78     {
79         WCHAR *pszString;
80         CHAR *pszUtf8;
81         va_list Args;
82         SIZE_T Size;
83 
84         va_start(Args, pszFormat);
85 
86         Size = (_vscwprintf(pszFormat, Args) + 1) * sizeof(WCHAR);
87         pszString = (WCHAR*) malloc(Size);
88 
89         if (!pszString)
90         {
91             UnlockFile(hFile, (DWORD_PTR)NewPos.QuadPart, 0, (DWORD_PTR)FileSize.QuadPart, 0);
92             va_end(Args);
93             return;
94         }
95 
96         StringCbVPrintfW(pszString, Size, pszFormat, Args);
97         va_end(Args);
98 
99         pszUtf8 = UTF8fromUNICODE(pszString, &Size);
100         if (pszUtf8)
101         {
102             WriteFile(hFile, pszUtf8, Size - sizeof(CHAR), &dwBytesWritten, NULL);
103             free(pszUtf8);
104         }
105 
106         free(pszString);
107 
108         UnlockFile(hFile, (DWORD_PTR)NewPos.QuadPart, 0, (DWORD_PTR)FileSize.QuadPart, 0);
109     }
110 }
111 
112 BOOL
113 BestFit_FromNLS(const WCHAR *pszNLSFile, const WCHAR *pszBestFitFile)
114 {
115     CPTABLEINFO CodePageTable;
116     PUSHORT CodePage;
117     HANDLE hFile;
118     USHORT CodePageChar;
119     ULONG UnicodeChar;
120 
121     CodePage = NLS_ReadFile(pszNLSFile, &CodePageTable);
122     if (CodePage == NULL)
123         return FALSE;
124 
125     hFile = BestFit_CreateFile(pszBestFitFile);
126     if (hFile == INVALID_HANDLE_VALUE)
127     {
128         free(CodePage);
129         return FALSE;
130     }
131 
132     /* The only field is the decimal windows code page number for this code page. */
133     BestFit_Write(hFile, L"CODEPAGE %u\r\n\r\n", CodePageTable.CodePage);
134 
135     BestFit_Write(hFile,
136                   L"CPINFO %u 0x%02X 0x%04X\r\n\r\n",
137                   /* "1" for a single byte code page, "2" for a double byte code page */
138                   CodePageTable.MaximumCharacterSize,
139                   /* Replacement characters for unassigned Unicode code points when
140                      written to this code page */
141                   CodePageTable.DefaultChar,
142                   /* Replacement characters for illegal or unassigned code page values
143                      when converting to Unicode. */
144                   CodePageTable.UniDefaultChar);
145 
146     /* This field contains the number of following records of code page to Unicode mappings. */
147     BestFit_Write(hFile, L"MBTABLE %u\r\n\r\n", NLS_RecordsCountForMBTable(&CodePageTable));
148 
149     for (CodePageChar = 0; CodePageChar <= 0xFF; CodePageChar++)
150     {
151         if (!NLS_IsDefaultCharForMB(&CodePageTable, CodePageChar))
152         {
153             WCHAR szCharName[MAX_STR_LEN] = { 0 };
154 
155             GetUName(CodePageTable.MultiByteTable[CodePageChar], szCharName);
156 
157             BestFit_Write(hFile,
158                           L"0x%02X 0x%04X ;%s\r\n",
159                           CodePageChar,
160                           CodePageTable.MultiByteTable[CodePageChar],
161                           szCharName);
162         }
163     }
164 
165     BestFit_Write(hFile, L"\r\n");
166 
167     if (NLS_IsGlyphTablePresent(&CodePageTable))
168     {
169         PUSHORT GlyphTable = CodePageTable.MultiByteTable + 256 + 1;
170 
171         BestFit_Write(hFile, L"GLYPHTABLE %u\r\n\r\n", NLS_RecordsCountForGlyphTable(&CodePageTable));
172 
173         for (CodePageChar = 0; CodePageChar <= 0xFF; CodePageChar++)
174         {
175             if (CodePageChar != CodePageTable.UniDefaultChar)
176             {
177                 WCHAR szCharName[MAX_STR_LEN] = { 0 };
178 
179                 GetUName(GlyphTable[CodePageChar], szCharName);
180 
181                 BestFit_Write(hFile,
182                               L"0x%02X 0x%04X ;%s\r\n",
183                               CodePageChar,
184                               GlyphTable[CodePageChar],
185                               szCharName);
186             }
187         }
188 
189         BestFit_Write(hFile, L"\r\n");
190     }
191 
192     if (NLS_IsDBCSCodePage(&CodePageTable))
193     {
194         PUSHORT LeadByteRanges = (PUSHORT)&CodePageTable.LeadByte[0];
195         USHORT Index;
196         USHORT LeadByte;
197 
198         BestFit_Write(hFile,
199                       L"DBCSRANGE %u ;%u DBCS Lead Byte Ranges\r\n\r\n",
200                       CodePageTable.DBCSRanges[0],
201                       CodePageTable.DBCSRanges[0]);
202 
203         for (Index = 0; Index < MAXIMUM_LEADBYTES / 2; Index++)
204         {
205             if (!LeadByteRanges[Index])
206                 continue;
207 
208             BestFit_Write(hFile,
209                           L"0x%X 0x%X ;Lead Byte Range %u\r\n\r\n",
210                           LOBYTE(LeadByteRanges[Index]),
211                           HIBYTE(LeadByteRanges[Index]),
212                           Index + 1);
213 
214             for (LeadByte = LOBYTE(LeadByteRanges[Index]);
215                  LeadByte <= HIBYTE(LeadByteRanges[Index]);
216                  LeadByte++)
217             {
218                 PUSHORT LeadByteInfo = CodePageTable.DBCSOffsets;
219 
220                 BestFit_Write(hFile,
221                               L"DBCSTABLE %u ;Range = %u, LeadByte = 0x%02X\r\n\r\n",
222                               NLS_RecordsCountForDBCSTable(&CodePageTable, LeadByte),
223                               Index + 1,
224                               LeadByte);
225 
226                 for (CodePageChar = 0; CodePageChar <= 0xFF; CodePageChar++)
227                 {
228                     USHORT Info = LeadByteInfo[LeadByte];
229 
230                     if (Info && LeadByteInfo[Info + CodePageChar] != CodePageTable.UniDefaultChar)
231                     {
232                         BestFit_Write(hFile,
233                                       L"0x%02X 0x%04X\r\n",
234                                       CodePageChar,
235                                       LeadByteInfo[Info + CodePageChar]);
236                     }
237                 }
238 
239                 BestFit_Write(hFile, L"\r\n");
240             }
241         }
242     }
243 
244     /* This field contains the number of records of Unicode to byte mappings. */
245     BestFit_Write(hFile, L"WCTABLE %u\r\n\r\n", NLS_RecordsCountForUnicodeTable(&CodePageTable));
246 
247     for (UnicodeChar = 0; UnicodeChar <= 0xFFFF; UnicodeChar++)
248     {
249         if (!NLS_IsDefaultCharForUnicode(&CodePageTable, UnicodeChar))
250         {
251             WCHAR szCharName[MAX_STR_LEN] = { 0 };
252 
253             GetUName(UnicodeChar, szCharName);
254 
255             if (NLS_IsDBCSCodePage(&CodePageTable))
256             {
257                 PUSHORT MultiByteTable = (PUSHORT)CodePageTable.WideCharTable;
258 
259                 BestFit_Write(hFile,
260                               L"0x%04X 0x%04X ;%s\r\n",
261                               UnicodeChar,
262                               MultiByteTable[UnicodeChar],
263                               szCharName);
264             }
265             else
266             {
267                 PUCHAR SingleByteTable = (PUCHAR)CodePageTable.WideCharTable;
268 
269                 BestFit_Write(hFile,
270                               L"0x%04X 0x%02X ;%s\r\n",
271                               UnicodeChar,
272                               SingleByteTable[UnicodeChar],
273                               szCharName);
274             }
275         }
276     }
277 
278     /* This tag marks the end of the code page data. Anything after this marker is ignored. */
279     BestFit_Write(hFile, L"\r\nENDCODEPAGE\r\n");
280 
281     BestFit_CloseFile(hFile);
282     free(CodePage);
283 
284     return TRUE;
285 }
286