1 /*
2 * PROJECT: ReactOS Notepad
3 * LICENSE: LGPL-2.1-or-later (https://spdx.org/licenses/LGPL-2.1-or-later)
4 * PURPOSE: Providing a Windows-compatible simple text editor for ReactOS
5 * COPYRIGHT: Copyright 1998,99 Marcel Baur <mbaur@g26.ethz.ch>
6 * Copyright 2002 Sylvain Petreolle <spetreolle@yahoo.fr>
7 * Copyright 2002 Andriy Palamarchuk
8 * Copyright 2019-2023 Katayama Hirofumi MZ <katayama.hirofumi.mz@gmail.com>
9 */
10
11 #include "notepad.h"
12 #include <assert.h>
13
IsTextNonZeroASCII(LPCVOID pText,DWORD dwSize)14 static BOOL IsTextNonZeroASCII(LPCVOID pText, DWORD dwSize)
15 {
16 const signed char *pch = pText;
17 while (dwSize-- > 0)
18 {
19 if (*pch <= 0)
20 return FALSE;
21
22 ++pch;
23 }
24 return TRUE;
25 }
26
AnalyzeEncoding(const BYTE * pBytes,DWORD dwSize)27 static ENCODING AnalyzeEncoding(const BYTE *pBytes, DWORD dwSize)
28 {
29 INT flags = IS_TEXT_UNICODE_STATISTICS | IS_TEXT_UNICODE_REVERSE_STATISTICS;
30
31 if (IsTextNonZeroASCII(pBytes, dwSize))
32 return ENCODING_DEFAULT;
33
34 if (IsTextUnicode(pBytes, dwSize, &flags))
35 return ENCODING_UTF16LE;
36
37 if (((flags & IS_TEXT_UNICODE_REVERSE_MASK) == IS_TEXT_UNICODE_REVERSE_STATISTICS))
38 return ENCODING_UTF16BE;
39
40 /* is it UTF-8? */
41 if (MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS, (LPCSTR)pBytes, dwSize, NULL, 0))
42 return ENCODING_UTF8;
43
44 return ENCODING_ANSI;
45 }
46
47 static VOID
ReplaceNewLines(LPWSTR pszNew,SIZE_T cchNew,LPCWSTR pszOld,SIZE_T cchOld)48 ReplaceNewLines(LPWSTR pszNew, SIZE_T cchNew, LPCWSTR pszOld, SIZE_T cchOld)
49 {
50 BOOL bPrevCR = FALSE;
51 SIZE_T ichNew, ichOld;
52
53 for (ichOld = ichNew = 0; ichOld < cchOld; ++ichOld)
54 {
55 WCHAR ch = pszOld[ichOld];
56
57 if (ch == L'\n')
58 {
59 if (!bPrevCR)
60 {
61 pszNew[ichNew++] = L'\r';
62 pszNew[ichNew++] = L'\n';
63 }
64 }
65 else if (ch == '\r')
66 {
67 pszNew[ichNew++] = L'\r';
68 pszNew[ichNew++] = L'\n';
69 }
70 else
71 {
72 pszNew[ichNew++] = ch;
73 }
74
75 bPrevCR = (ch == L'\r');
76 }
77
78 pszNew[ichNew] = UNICODE_NULL;
79 assert(ichNew == cchNew);
80 }
81
82 static BOOL
ProcessNewLinesAndNulls(HLOCAL * phLocal,LPWSTR * ppszText,SIZE_T * pcchText,EOLN * piEoln)83 ProcessNewLinesAndNulls(HLOCAL *phLocal, LPWSTR *ppszText, SIZE_T *pcchText, EOLN *piEoln)
84 {
85 SIZE_T ich, cchText = *pcchText, adwEolnCount[3] = { 0, 0, 0 }, cNonCRLFs;
86 LPWSTR pszText = *ppszText;
87 EOLN iEoln;
88 BOOL bPrevCR = FALSE;
89
90 /* Replace '\0' with SPACE. Count newlines. */
91 for (ich = 0; ich < cchText; ++ich)
92 {
93 WCHAR ch = pszText[ich];
94 if (ch == UNICODE_NULL)
95 pszText[ich] = L' ';
96
97 if (ch == L'\n')
98 {
99 if (bPrevCR)
100 {
101 adwEolnCount[EOLN_CR]--;
102 adwEolnCount[EOLN_CRLF]++;
103 }
104 else
105 {
106 adwEolnCount[EOLN_LF]++;
107 }
108 }
109 else if (ch == '\r')
110 {
111 adwEolnCount[EOLN_CR]++;
112 }
113
114 bPrevCR = (ch == L'\r');
115 }
116
117 /* Choose the newline code */
118 if (adwEolnCount[EOLN_CR] > adwEolnCount[EOLN_CRLF])
119 iEoln = EOLN_CR;
120 else if (adwEolnCount[EOLN_LF] > adwEolnCount[EOLN_CRLF])
121 iEoln = EOLN_LF;
122 else
123 iEoln = EOLN_CRLF;
124
125 cNonCRLFs = adwEolnCount[EOLN_CR] + adwEolnCount[EOLN_LF];
126 if (cNonCRLFs != 0)
127 {
128 /* Allocate a buffer for EM_SETHANDLE */
129 SIZE_T cchNew = cchText + cNonCRLFs;
130 HLOCAL hLocal = LocalAlloc(LMEM_MOVEABLE, (cchNew + 1) * sizeof(WCHAR));
131 LPWSTR pszNew = LocalLock(hLocal);
132 if (!pszNew)
133 {
134 LocalFree(hLocal);
135 return FALSE; /* Failure */
136 }
137
138 ReplaceNewLines(pszNew, cchNew, pszText, cchText);
139
140 /* Replace with new data */
141 LocalUnlock(*phLocal);
142 LocalFree(*phLocal);
143 *phLocal = hLocal;
144 *ppszText = pszNew;
145 *pcchText = cchNew;
146 }
147
148 *piEoln = iEoln;
149 return TRUE;
150 }
151
152 BOOL
ReadText(HANDLE hFile,HLOCAL * phLocal,ENCODING * pencFile,EOLN * piEoln)153 ReadText(HANDLE hFile, HLOCAL *phLocal, ENCODING *pencFile, EOLN *piEoln)
154 {
155 LPBYTE pBytes = NULL;
156 LPWSTR pszText, pszNewText = NULL;
157 DWORD dwSize, dwPos;
158 SIZE_T i, cchText, cbContent;
159 BOOL bSuccess = FALSE;
160 ENCODING encFile;
161 UINT iCodePage;
162 HANDLE hMapping = INVALID_HANDLE_VALUE;
163 HLOCAL hNewLocal;
164
165 dwSize = GetFileSize(hFile, NULL);
166 if (dwSize == INVALID_FILE_SIZE)
167 goto done;
168
169 if (dwSize == 0) // If file is empty
170 {
171 hNewLocal = LocalReAlloc(*phLocal, sizeof(UNICODE_NULL), LMEM_MOVEABLE);
172 pszNewText = LocalLock(hNewLocal);
173 if (hNewLocal == NULL || pszNewText == NULL)
174 goto done;
175
176 *pszNewText = UNICODE_NULL;
177 LocalUnlock(hNewLocal);
178
179 *phLocal = hNewLocal;
180 *piEoln = EOLN_CRLF;
181 *pencFile = ENCODING_DEFAULT;
182 return TRUE;
183 }
184
185 hMapping = CreateFileMappingW(hFile, NULL, PAGE_READONLY, 0, 0, NULL);
186 if (hMapping == NULL)
187 goto done;
188
189 pBytes = MapViewOfFile(hMapping, FILE_MAP_READ, 0, 0, dwSize);
190 if (!pBytes)
191 goto done;
192
193 /* Look for Byte Order Marks */
194 dwPos = 0;
195 if ((dwSize >= 2) && (pBytes[0] == 0xFF) && (pBytes[1] == 0xFE))
196 {
197 encFile = ENCODING_UTF16LE;
198 dwPos += 2;
199 }
200 else if ((dwSize >= 2) && (pBytes[0] == 0xFE) && (pBytes[1] == 0xFF))
201 {
202 encFile = ENCODING_UTF16BE;
203 dwPos += 2;
204 }
205 else if ((dwSize >= 3) && (pBytes[0] == 0xEF) && (pBytes[1] == 0xBB) && (pBytes[2] == 0xBF))
206 {
207 encFile = ENCODING_UTF8BOM;
208 dwPos += 3;
209 }
210 else
211 {
212 encFile = AnalyzeEncoding(pBytes, dwSize);
213 }
214
215 switch(encFile)
216 {
217 case ENCODING_UTF16BE:
218 case ENCODING_UTF16LE:
219 {
220 /* Re-allocate the buffer for EM_SETHANDLE */
221 pszText = (LPWSTR) &pBytes[dwPos];
222 cchText = (dwSize - dwPos) / sizeof(WCHAR);
223 hNewLocal = LocalReAlloc(*phLocal, (cchText + 1) * sizeof(WCHAR), LMEM_MOVEABLE);
224 pszNewText = LocalLock(hNewLocal);
225 if (pszNewText == NULL)
226 goto done;
227
228 *phLocal = hNewLocal;
229 CopyMemory(pszNewText, pszText, cchText * sizeof(WCHAR));
230
231 if (encFile == ENCODING_UTF16BE) /* big endian; Swap bytes */
232 {
233 BYTE tmp, *pb = (LPBYTE)pszNewText;
234 for (i = 0; i < cchText * 2; i += 2)
235 {
236 tmp = pb[i];
237 pb[i] = pb[i + 1];
238 pb[i + 1] = tmp;
239 }
240 }
241 break;
242 }
243
244 case ENCODING_ANSI:
245 case ENCODING_UTF8:
246 case ENCODING_UTF8BOM:
247 {
248 iCodePage = ((encFile == ENCODING_UTF8 || encFile == ENCODING_UTF8BOM) ? CP_UTF8 : CP_ACP);
249
250 /* Get ready for ANSI-to-Wide conversion */
251 cbContent = dwSize - dwPos;
252 cchText = 0;
253 if (cbContent > 0)
254 {
255 cchText = MultiByteToWideChar(iCodePage, 0, (LPCSTR)&pBytes[dwPos], (INT)cbContent, NULL, 0);
256 if (cchText == 0)
257 goto done;
258 }
259
260 /* Re-allocate the buffer for EM_SETHANDLE */
261 hNewLocal = LocalReAlloc(*phLocal, (cchText + 1) * sizeof(WCHAR), LMEM_MOVEABLE);
262 pszNewText = LocalLock(hNewLocal);
263 if (!pszNewText)
264 goto done;
265 *phLocal = hNewLocal;
266
267 /* Do ANSI-to-Wide conversion */
268 if (cbContent > 0)
269 {
270 if (!MultiByteToWideChar(iCodePage, 0, (LPCSTR)&pBytes[dwPos], (INT)cbContent,
271 pszNewText, (INT)cchText))
272 {
273 goto done;
274 }
275 }
276 break;
277 }
278
279 DEFAULT_UNREACHABLE;
280 }
281
282 pszNewText[cchText] = UNICODE_NULL;
283
284 if (!ProcessNewLinesAndNulls(phLocal, &pszNewText, &cchText, piEoln))
285 goto done;
286
287 *pencFile = encFile;
288 bSuccess = TRUE;
289
290 done:
291 if (pBytes)
292 UnmapViewOfFile(pBytes);
293 if (hMapping != INVALID_HANDLE_VALUE)
294 CloseHandle(hMapping);
295 if (pszNewText)
296 LocalUnlock(*phLocal);
297 return bSuccess;
298 }
299
WriteEncodedText(HANDLE hFile,LPCWSTR pszText,DWORD dwTextLen,ENCODING encFile)300 static BOOL WriteEncodedText(HANDLE hFile, LPCWSTR pszText, DWORD dwTextLen, ENCODING encFile)
301 {
302 LPBYTE pBytes = NULL;
303 LPBYTE pAllocBuffer = NULL;
304 DWORD dwPos = 0;
305 DWORD dwByteCount;
306 BYTE buffer[1024];
307 UINT iCodePage = 0;
308 DWORD dwDummy, i;
309 BOOL bSuccess = FALSE;
310 int iBufferSize, iRequiredBytes;
311 BYTE b;
312
313 while(dwPos < dwTextLen)
314 {
315 switch(encFile)
316 {
317 case ENCODING_UTF16LE:
318 pBytes = (LPBYTE) &pszText[dwPos];
319 dwByteCount = (dwTextLen - dwPos) * sizeof(WCHAR);
320 dwPos = dwTextLen;
321 break;
322
323 case ENCODING_UTF16BE:
324 dwByteCount = (dwTextLen - dwPos) * sizeof(WCHAR);
325 if (dwByteCount > sizeof(buffer))
326 dwByteCount = sizeof(buffer);
327
328 memcpy(buffer, &pszText[dwPos], dwByteCount);
329 for (i = 0; i < dwByteCount; i += 2)
330 {
331 b = buffer[i+0];
332 buffer[i+0] = buffer[i+1];
333 buffer[i+1] = b;
334 }
335 pBytes = (LPBYTE) &buffer[dwPos];
336 dwPos += dwByteCount / sizeof(WCHAR);
337 break;
338
339 case ENCODING_ANSI:
340 case ENCODING_UTF8:
341 case ENCODING_UTF8BOM:
342 if (encFile == ENCODING_UTF8 || encFile == ENCODING_UTF8BOM)
343 iCodePage = CP_UTF8;
344 else
345 iCodePage = CP_ACP;
346
347 iRequiredBytes = WideCharToMultiByte(iCodePage, 0, &pszText[dwPos], dwTextLen - dwPos, NULL, 0, NULL, NULL);
348 if (iRequiredBytes <= 0)
349 {
350 goto done;
351 }
352 else if (iRequiredBytes < sizeof(buffer))
353 {
354 pBytes = buffer;
355 iBufferSize = sizeof(buffer);
356 }
357 else
358 {
359 pAllocBuffer = (LPBYTE) HeapAlloc(GetProcessHeap(), 0, iRequiredBytes);
360 if (!pAllocBuffer)
361 return FALSE;
362 pBytes = pAllocBuffer;
363 iBufferSize = iRequiredBytes;
364 }
365
366 dwByteCount = WideCharToMultiByte(iCodePage, 0, &pszText[dwPos], dwTextLen - dwPos, (LPSTR) pBytes, iBufferSize, NULL, NULL);
367 if (!dwByteCount)
368 goto done;
369
370 dwPos = dwTextLen;
371 break;
372
373 default:
374 goto done;
375 }
376
377 if (!WriteFile(hFile, pBytes, dwByteCount, &dwDummy, NULL))
378 goto done;
379
380 /* free the buffer, if we have allocated one */
381 if (pAllocBuffer)
382 {
383 HeapFree(GetProcessHeap(), 0, pAllocBuffer);
384 pAllocBuffer = NULL;
385 }
386 }
387 bSuccess = TRUE;
388
389 done:
390 if (pAllocBuffer)
391 HeapFree(GetProcessHeap(), 0, pAllocBuffer);
392 return bSuccess;
393 }
394
WriteText(HANDLE hFile,LPCWSTR pszText,DWORD dwTextLen,ENCODING encFile,EOLN iEoln)395 BOOL WriteText(HANDLE hFile, LPCWSTR pszText, DWORD dwTextLen, ENCODING encFile, EOLN iEoln)
396 {
397 WCHAR wcBom;
398 LPCWSTR pszLF = L"\n";
399 DWORD dwPos, dwNext;
400
401 /* Write the proper byte order marks if not ANSI or UTF-8 without BOM */
402 if (encFile != ENCODING_ANSI && encFile != ENCODING_UTF8)
403 {
404 wcBom = 0xFEFF;
405 if (!WriteEncodedText(hFile, &wcBom, 1, encFile))
406 return FALSE;
407 }
408
409 dwPos = 0;
410
411 /* pszText eoln are always \r\n */
412
413 do
414 {
415 /* Find the next eoln */
416 dwNext = dwPos;
417 while(dwNext < dwTextLen)
418 {
419 if (pszText[dwNext] == '\r' && pszText[dwNext + 1] == '\n')
420 break;
421 dwNext++;
422 }
423
424 if (dwNext != dwTextLen)
425 {
426 switch (iEoln)
427 {
428 case EOLN_LF:
429 /* Write text (without eoln) */
430 if (!WriteEncodedText(hFile, &pszText[dwPos], dwNext - dwPos, encFile))
431 return FALSE;
432 /* Write eoln */
433 if (!WriteEncodedText(hFile, pszLF, 1, encFile))
434 return FALSE;
435 break;
436 case EOLN_CR:
437 /* Write text (including \r as eoln) */
438 if (!WriteEncodedText(hFile, &pszText[dwPos], dwNext - dwPos + 1, encFile))
439 return FALSE;
440 break;
441 case EOLN_CRLF:
442 /* Write text (including \r\n as eoln) */
443 if (!WriteEncodedText(hFile, &pszText[dwPos], dwNext - dwPos + 2, encFile))
444 return FALSE;
445 break;
446 default:
447 return FALSE;
448 }
449 }
450 else
451 {
452 /* Write text (without eoln, since this is the end of the file) */
453 if (!WriteEncodedText(hFile, &pszText[dwPos], dwNext - dwPos, encFile))
454 return FALSE;
455 }
456
457 /* Skip \r\n */
458 dwPos = dwNext + 2;
459 }
460 while (dwPos < dwTextLen);
461
462 return TRUE;
463 }
464