1 /* 2 * PROJECT: ReactOS Notepad 3 * LICENSE: LGPL-2.1-or-later (https://spdx.org/licenses/LGPL-2.1-or-later) 4 * PURPOSE: Providing a Windows-compatible simple text editor for ReactOS 5 * COPYRIGHT: Copyright 1998,99 Marcel Baur <mbaur@g26.ethz.ch> 6 * Copyright 2002 Sylvain Petreolle <spetreolle@yahoo.fr> 7 * Copyright 2002 Andriy Palamarchuk 8 * Copyright 2019-2023 Katayama Hirofumi MZ <katayama.hirofumi.mz@gmail.com> 9 */ 10 11 #include "notepad.h" 12 #include <assert.h> 13 14 static BOOL IsTextNonZeroASCII(LPCVOID pText, DWORD dwSize) 15 { 16 const signed char *pch = pText; 17 while (dwSize-- > 0) 18 { 19 if (*pch <= 0) 20 return FALSE; 21 22 ++pch; 23 } 24 return TRUE; 25 } 26 27 static ENCODING AnalyzeEncoding(const BYTE *pBytes, DWORD dwSize) 28 { 29 INT flags = IS_TEXT_UNICODE_STATISTICS | IS_TEXT_UNICODE_REVERSE_STATISTICS; 30 31 if (IsTextNonZeroASCII(pBytes, dwSize)) 32 return ENCODING_DEFAULT; 33 34 if (IsTextUnicode(pBytes, dwSize, &flags)) 35 return ENCODING_UTF16LE; 36 37 if (((flags & IS_TEXT_UNICODE_REVERSE_MASK) == IS_TEXT_UNICODE_REVERSE_STATISTICS)) 38 return ENCODING_UTF16BE; 39 40 /* is it UTF-8? */ 41 if (MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS, (LPCSTR)pBytes, dwSize, NULL, 0)) 42 return ENCODING_UTF8; 43 44 return ENCODING_ANSI; 45 } 46 47 static VOID 48 ReplaceNewLines(LPWSTR pszNew, SIZE_T cchNew, LPCWSTR pszOld, SIZE_T cchOld) 49 { 50 BOOL bPrevCR = FALSE; 51 SIZE_T ichNew, ichOld; 52 53 for (ichOld = ichNew = 0; ichOld < cchOld; ++ichOld) 54 { 55 WCHAR ch = pszOld[ichOld]; 56 57 if (ch == L'\n') 58 { 59 if (!bPrevCR) 60 { 61 pszNew[ichNew++] = L'\r'; 62 pszNew[ichNew++] = L'\n'; 63 } 64 } 65 else if (ch == '\r') 66 { 67 pszNew[ichNew++] = L'\r'; 68 pszNew[ichNew++] = L'\n'; 69 } 70 else 71 { 72 pszNew[ichNew++] = ch; 73 } 74 75 bPrevCR = (ch == L'\r'); 76 } 77 78 pszNew[ichNew] = UNICODE_NULL; 79 assert(ichNew == cchNew); 80 } 81 82 static BOOL 83 ProcessNewLinesAndNulls(HLOCAL *phLocal, LPWSTR *ppszText, SIZE_T *pcchText, EOLN *piEoln) 84 { 85 SIZE_T ich, cchText = *pcchText, adwEolnCount[3] = { 0, 0, 0 }, cNonCRLFs; 86 LPWSTR pszText = *ppszText; 87 EOLN iEoln; 88 BOOL bPrevCR = FALSE; 89 90 /* Replace '\0' with SPACE. Count newlines. */ 91 for (ich = 0; ich < cchText; ++ich) 92 { 93 WCHAR ch = pszText[ich]; 94 if (ch == UNICODE_NULL) 95 pszText[ich] = L' '; 96 97 if (ch == L'\n') 98 { 99 if (bPrevCR) 100 { 101 adwEolnCount[EOLN_CR]--; 102 adwEolnCount[EOLN_CRLF]++; 103 } 104 else 105 { 106 adwEolnCount[EOLN_LF]++; 107 } 108 } 109 else if (ch == '\r') 110 { 111 adwEolnCount[EOLN_CR]++; 112 } 113 114 bPrevCR = (ch == L'\r'); 115 } 116 117 /* Choose the newline code */ 118 if (adwEolnCount[EOLN_CR] > adwEolnCount[EOLN_CRLF]) 119 iEoln = EOLN_CR; 120 else if (adwEolnCount[EOLN_LF] > adwEolnCount[EOLN_CRLF]) 121 iEoln = EOLN_LF; 122 else 123 iEoln = EOLN_CRLF; 124 125 cNonCRLFs = adwEolnCount[EOLN_CR] + adwEolnCount[EOLN_LF]; 126 if (cNonCRLFs != 0) 127 { 128 /* Allocate a buffer for EM_SETHANDLE */ 129 SIZE_T cchNew = cchText + cNonCRLFs; 130 HLOCAL hLocal = LocalAlloc(LMEM_MOVEABLE, (cchNew + 1) * sizeof(WCHAR)); 131 LPWSTR pszNew = LocalLock(hLocal); 132 if (!pszNew) 133 { 134 LocalFree(hLocal); 135 return FALSE; /* Failure */ 136 } 137 138 ReplaceNewLines(pszNew, cchNew, pszText, cchText); 139 140 /* Replace with new data */ 141 LocalUnlock(*phLocal); 142 LocalFree(*phLocal); 143 *phLocal = hLocal; 144 *ppszText = pszNew; 145 *pcchText = cchNew; 146 } 147 148 *piEoln = iEoln; 149 return TRUE; 150 } 151 152 BOOL 153 ReadText(HANDLE hFile, HLOCAL *phLocal, ENCODING *pencFile, EOLN *piEoln) 154 { 155 LPBYTE pBytes = NULL; 156 LPWSTR pszText, pszNewText = NULL; 157 DWORD dwSize, dwPos; 158 SIZE_T i, cchText, cbContent; 159 BOOL bSuccess = FALSE; 160 ENCODING encFile; 161 UINT iCodePage; 162 HANDLE hMapping = INVALID_HANDLE_VALUE; 163 HLOCAL hNewLocal; 164 165 dwSize = GetFileSize(hFile, NULL); 166 if (dwSize == INVALID_FILE_SIZE) 167 goto done; 168 169 if (dwSize == 0) // If file is empty 170 { 171 hNewLocal = LocalReAlloc(*phLocal, sizeof(UNICODE_NULL), LMEM_MOVEABLE); 172 pszNewText = LocalLock(hNewLocal); 173 if (hNewLocal == NULL || pszNewText == NULL) 174 goto done; 175 176 *pszNewText = UNICODE_NULL; 177 LocalUnlock(hNewLocal); 178 179 *phLocal = hNewLocal; 180 *piEoln = EOLN_CRLF; 181 *pencFile = ENCODING_DEFAULT; 182 return TRUE; 183 } 184 185 hMapping = CreateFileMappingW(hFile, NULL, PAGE_READONLY, 0, 0, NULL); 186 if (hMapping == NULL) 187 goto done; 188 189 pBytes = MapViewOfFile(hMapping, FILE_MAP_READ, 0, 0, dwSize); 190 if (!pBytes) 191 goto done; 192 193 /* Look for Byte Order Marks */ 194 dwPos = 0; 195 if ((dwSize >= 2) && (pBytes[0] == 0xFF) && (pBytes[1] == 0xFE)) 196 { 197 encFile = ENCODING_UTF16LE; 198 dwPos += 2; 199 } 200 else if ((dwSize >= 2) && (pBytes[0] == 0xFE) && (pBytes[1] == 0xFF)) 201 { 202 encFile = ENCODING_UTF16BE; 203 dwPos += 2; 204 } 205 else if ((dwSize >= 3) && (pBytes[0] == 0xEF) && (pBytes[1] == 0xBB) && (pBytes[2] == 0xBF)) 206 { 207 encFile = ENCODING_UTF8BOM; 208 dwPos += 3; 209 } 210 else 211 { 212 encFile = AnalyzeEncoding(pBytes, dwSize); 213 } 214 215 switch(encFile) 216 { 217 case ENCODING_UTF16BE: 218 case ENCODING_UTF16LE: 219 { 220 /* Re-allocate the buffer for EM_SETHANDLE */ 221 pszText = (LPWSTR) &pBytes[dwPos]; 222 cchText = (dwSize - dwPos) / sizeof(WCHAR); 223 hNewLocal = LocalReAlloc(*phLocal, (cchText + 1) * sizeof(WCHAR), LMEM_MOVEABLE); 224 pszNewText = LocalLock(hNewLocal); 225 if (pszNewText == NULL) 226 goto done; 227 228 *phLocal = hNewLocal; 229 CopyMemory(pszNewText, pszText, cchText * sizeof(WCHAR)); 230 231 if (encFile == ENCODING_UTF16BE) /* big endian; Swap bytes */ 232 { 233 BYTE tmp, *pb = (LPBYTE)pszNewText; 234 for (i = 0; i < cchText * 2; i += 2) 235 { 236 tmp = pb[i]; 237 pb[i] = pb[i + 1]; 238 pb[i + 1] = tmp; 239 } 240 } 241 break; 242 } 243 244 case ENCODING_ANSI: 245 case ENCODING_UTF8: 246 case ENCODING_UTF8BOM: 247 { 248 iCodePage = ((encFile == ENCODING_UTF8 || encFile == ENCODING_UTF8BOM) ? CP_UTF8 : CP_ACP); 249 250 /* Get ready for ANSI-to-Wide conversion */ 251 cbContent = dwSize - dwPos; 252 cchText = 0; 253 if (cbContent > 0) 254 { 255 cchText = MultiByteToWideChar(iCodePage, 0, (LPCSTR)&pBytes[dwPos], (INT)cbContent, NULL, 0); 256 if (cchText == 0) 257 goto done; 258 } 259 260 /* Re-allocate the buffer for EM_SETHANDLE */ 261 hNewLocal = LocalReAlloc(*phLocal, (cchText + 1) * sizeof(WCHAR), LMEM_MOVEABLE); 262 pszNewText = LocalLock(hNewLocal); 263 if (!pszNewText) 264 goto done; 265 *phLocal = hNewLocal; 266 267 /* Do ANSI-to-Wide conversion */ 268 if (cbContent > 0) 269 { 270 if (!MultiByteToWideChar(iCodePage, 0, (LPCSTR)&pBytes[dwPos], (INT)cbContent, 271 pszNewText, (INT)cchText)) 272 { 273 goto done; 274 } 275 } 276 break; 277 } 278 279 DEFAULT_UNREACHABLE; 280 } 281 282 pszNewText[cchText] = UNICODE_NULL; 283 284 if (!ProcessNewLinesAndNulls(phLocal, &pszNewText, &cchText, piEoln)) 285 goto done; 286 287 *pencFile = encFile; 288 bSuccess = TRUE; 289 290 done: 291 if (pBytes) 292 UnmapViewOfFile(pBytes); 293 if (hMapping != INVALID_HANDLE_VALUE) 294 CloseHandle(hMapping); 295 if (pszNewText) 296 LocalUnlock(*phLocal); 297 return bSuccess; 298 } 299 300 static BOOL WriteEncodedText(HANDLE hFile, LPCWSTR pszText, DWORD dwTextLen, ENCODING encFile) 301 { 302 LPBYTE pBytes = NULL; 303 LPBYTE pAllocBuffer = NULL; 304 DWORD dwPos = 0; 305 DWORD dwByteCount; 306 BYTE buffer[1024]; 307 UINT iCodePage = 0; 308 DWORD dwDummy, i; 309 BOOL bSuccess = FALSE; 310 int iBufferSize, iRequiredBytes; 311 BYTE b; 312 313 while(dwPos < dwTextLen) 314 { 315 switch(encFile) 316 { 317 case ENCODING_UTF16LE: 318 pBytes = (LPBYTE) &pszText[dwPos]; 319 dwByteCount = (dwTextLen - dwPos) * sizeof(WCHAR); 320 dwPos = dwTextLen; 321 break; 322 323 case ENCODING_UTF16BE: 324 dwByteCount = (dwTextLen - dwPos) * sizeof(WCHAR); 325 if (dwByteCount > sizeof(buffer)) 326 dwByteCount = sizeof(buffer); 327 328 memcpy(buffer, &pszText[dwPos], dwByteCount); 329 for (i = 0; i < dwByteCount; i += 2) 330 { 331 b = buffer[i+0]; 332 buffer[i+0] = buffer[i+1]; 333 buffer[i+1] = b; 334 } 335 pBytes = (LPBYTE) &buffer[dwPos]; 336 dwPos += dwByteCount / sizeof(WCHAR); 337 break; 338 339 case ENCODING_ANSI: 340 case ENCODING_UTF8: 341 case ENCODING_UTF8BOM: 342 if (encFile == ENCODING_UTF8 || encFile == ENCODING_UTF8BOM) 343 iCodePage = CP_UTF8; 344 else 345 iCodePage = CP_ACP; 346 347 iRequiredBytes = WideCharToMultiByte(iCodePage, 0, &pszText[dwPos], dwTextLen - dwPos, NULL, 0, NULL, NULL); 348 if (iRequiredBytes <= 0) 349 { 350 goto done; 351 } 352 else if (iRequiredBytes < sizeof(buffer)) 353 { 354 pBytes = buffer; 355 iBufferSize = sizeof(buffer); 356 } 357 else 358 { 359 pAllocBuffer = (LPBYTE) HeapAlloc(GetProcessHeap(), 0, iRequiredBytes); 360 if (!pAllocBuffer) 361 return FALSE; 362 pBytes = pAllocBuffer; 363 iBufferSize = iRequiredBytes; 364 } 365 366 dwByteCount = WideCharToMultiByte(iCodePage, 0, &pszText[dwPos], dwTextLen - dwPos, (LPSTR) pBytes, iBufferSize, NULL, NULL); 367 if (!dwByteCount) 368 goto done; 369 370 dwPos = dwTextLen; 371 break; 372 373 default: 374 goto done; 375 } 376 377 if (!WriteFile(hFile, pBytes, dwByteCount, &dwDummy, NULL)) 378 goto done; 379 380 /* free the buffer, if we have allocated one */ 381 if (pAllocBuffer) 382 { 383 HeapFree(GetProcessHeap(), 0, pAllocBuffer); 384 pAllocBuffer = NULL; 385 } 386 } 387 bSuccess = TRUE; 388 389 done: 390 if (pAllocBuffer) 391 HeapFree(GetProcessHeap(), 0, pAllocBuffer); 392 return bSuccess; 393 } 394 395 BOOL WriteText(HANDLE hFile, LPCWSTR pszText, DWORD dwTextLen, ENCODING encFile, EOLN iEoln) 396 { 397 WCHAR wcBom; 398 LPCWSTR pszLF = L"\n"; 399 DWORD dwPos, dwNext; 400 401 /* Write the proper byte order marks if not ANSI or UTF-8 without BOM */ 402 if (encFile != ENCODING_ANSI && encFile != ENCODING_UTF8) 403 { 404 wcBom = 0xFEFF; 405 if (!WriteEncodedText(hFile, &wcBom, 1, encFile)) 406 return FALSE; 407 } 408 409 dwPos = 0; 410 411 /* pszText eoln are always \r\n */ 412 413 do 414 { 415 /* Find the next eoln */ 416 dwNext = dwPos; 417 while(dwNext < dwTextLen) 418 { 419 if (pszText[dwNext] == '\r' && pszText[dwNext + 1] == '\n') 420 break; 421 dwNext++; 422 } 423 424 if (dwNext != dwTextLen) 425 { 426 switch (iEoln) 427 { 428 case EOLN_LF: 429 /* Write text (without eoln) */ 430 if (!WriteEncodedText(hFile, &pszText[dwPos], dwNext - dwPos, encFile)) 431 return FALSE; 432 /* Write eoln */ 433 if (!WriteEncodedText(hFile, pszLF, 1, encFile)) 434 return FALSE; 435 break; 436 case EOLN_CR: 437 /* Write text (including \r as eoln) */ 438 if (!WriteEncodedText(hFile, &pszText[dwPos], dwNext - dwPos + 1, encFile)) 439 return FALSE; 440 break; 441 case EOLN_CRLF: 442 /* Write text (including \r\n as eoln) */ 443 if (!WriteEncodedText(hFile, &pszText[dwPos], dwNext - dwPos + 2, encFile)) 444 return FALSE; 445 break; 446 default: 447 return FALSE; 448 } 449 } 450 else 451 { 452 /* Write text (without eoln, since this is the end of the file) */ 453 if (!WriteEncodedText(hFile, &pszText[dwPos], dwNext - dwPos, encFile)) 454 return FALSE; 455 } 456 457 /* Skip \r\n */ 458 dwPos = dwNext + 2; 459 } 460 while (dwPos < dwTextLen); 461 462 return TRUE; 463 } 464