1 /* 2 * Notepad (text.c) 3 * 4 * Copyright 1998,99 Marcel Baur <mbaur@g26.ethz.ch> 5 * Copyright 2002 Sylvain Petreolle <spetreolle@yahoo.fr> 6 * Copyright 2002 Andriy Palamarchuk 7 * Copyright 2019 Katayama Hirofumi MZ <katayama.hirofumi.mz@gmail.com> 8 * 9 * This library is free software; you can redistribute it and/or 10 * modify it under the terms of the GNU Lesser General Public 11 * License as published by the Free Software Foundation; either 12 * version 2.1 of the License, or (at your option) any later version. 13 * 14 * This library is distributed in the hope that it will be useful, 15 * but WITHOUT ANY WARRANTY; without even the implied warranty of 16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 17 * Lesser General Public License for more details. 18 * 19 * You should have received a copy of the GNU Lesser General Public 20 * License along with this library; if not, write to the Free Software 21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 22 */ 23 24 #include "notepad.h" 25 26 static BOOL Append(LPWSTR *ppszText, DWORD *pdwTextLen, LPCWSTR pszAppendText, DWORD dwAppendLen) 27 { 28 LPWSTR pszNewText; 29 30 if (dwAppendLen > 0) 31 { 32 if (*ppszText) 33 { 34 pszNewText = (LPWSTR) HeapReAlloc(GetProcessHeap(), 0, *ppszText, (*pdwTextLen + dwAppendLen) * sizeof(WCHAR)); 35 } 36 else 37 { 38 pszNewText = (LPWSTR) HeapAlloc(GetProcessHeap(), 0, dwAppendLen * sizeof(WCHAR)); 39 } 40 41 if (!pszNewText) 42 return FALSE; 43 44 memcpy(pszNewText + *pdwTextLen, pszAppendText, dwAppendLen * sizeof(WCHAR)); 45 *ppszText = pszNewText; 46 *pdwTextLen += dwAppendLen; 47 } 48 return TRUE; 49 } 50 51 BOOL IsTextNonZeroASCII(const void *pText, DWORD dwSize) 52 { 53 const signed char *pBytes = pText; 54 while (dwSize-- > 0) 55 { 56 if (*pBytes <= 0) 57 return FALSE; 58 59 ++pBytes; 60 } 61 return TRUE; 62 } 63 64 ENCODING AnalyzeEncoding(const char *pBytes, DWORD dwSize) 65 { 66 INT flags = IS_TEXT_UNICODE_STATISTICS; 67 68 if (dwSize <= 1) 69 return ENCODING_ANSI; 70 71 if (IsTextNonZeroASCII(pBytes, dwSize)) 72 { 73 return ENCODING_ANSI; 74 } 75 76 if (IsTextUnicode(pBytes, dwSize, &flags)) 77 { 78 return ENCODING_UTF16LE; 79 } 80 81 if ((flags & IS_TEXT_UNICODE_REVERSE_MASK) && !(flags & IS_TEXT_UNICODE_ILLEGAL_CHARS)) 82 { 83 return ENCODING_UTF16BE; 84 } 85 86 /* is it UTF-8? */ 87 if (MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS, pBytes, dwSize, NULL, 0)) 88 { 89 return ENCODING_UTF8; 90 } 91 92 return ENCODING_ANSI; 93 } 94 95 BOOL 96 ReadText(HANDLE hFile, LPWSTR *ppszText, DWORD *pdwTextLen, ENCODING *pencFile, int *piEoln) 97 { 98 DWORD dwSize; 99 LPBYTE pBytes = NULL; 100 LPWSTR pszText; 101 LPWSTR pszAllocText = NULL; 102 DWORD dwPos, i; 103 DWORD dwCharCount; 104 BOOL bSuccess = FALSE; 105 BYTE b = 0; 106 ENCODING encFile = ENCODING_ANSI; 107 int iCodePage = 0; 108 WCHAR szCrlf[2] = {'\r', '\n'}; 109 DWORD adwEolnCount[3] = {0, 0, 0}; 110 111 *ppszText = NULL; 112 *pdwTextLen = 0; 113 114 dwSize = GetFileSize(hFile, NULL); 115 if (dwSize == INVALID_FILE_SIZE) 116 goto done; 117 118 pBytes = HeapAlloc(GetProcessHeap(), 0, dwSize + 2); 119 if (!pBytes) 120 goto done; 121 122 if (!ReadFile(hFile, pBytes, dwSize, &dwSize, NULL)) 123 goto done; 124 dwPos = 0; 125 126 /* Make sure that there is a NUL character at the end, in any encoding */ 127 pBytes[dwSize + 0] = '\0'; 128 pBytes[dwSize + 1] = '\0'; 129 130 /* Look for Byte Order Marks */ 131 if ((dwSize >= 2) && (pBytes[0] == 0xFF) && (pBytes[1] == 0xFE)) 132 { 133 encFile = ENCODING_UTF16LE; 134 dwPos += 2; 135 } 136 else if ((dwSize >= 2) && (pBytes[0] == 0xFE) && (pBytes[1] == 0xFF)) 137 { 138 encFile = ENCODING_UTF16BE; 139 dwPos += 2; 140 } 141 else if ((dwSize >= 3) && (pBytes[0] == 0xEF) && (pBytes[1] == 0xBB) && (pBytes[2] == 0xBF)) 142 { 143 encFile = ENCODING_UTF8; 144 dwPos += 3; 145 } 146 else 147 { 148 encFile = AnalyzeEncoding((const char *)pBytes, dwSize); 149 } 150 151 switch(encFile) 152 { 153 case ENCODING_UTF16BE: 154 for (i = dwPos; i < dwSize-1; i += 2) 155 { 156 b = pBytes[i+0]; 157 pBytes[i+0] = pBytes[i+1]; 158 pBytes[i+1] = b; 159 } 160 /* fall through */ 161 162 case ENCODING_UTF16LE: 163 pszText = (LPWSTR) &pBytes[dwPos]; 164 dwCharCount = (dwSize - dwPos) / sizeof(WCHAR); 165 break; 166 167 case ENCODING_ANSI: 168 case ENCODING_UTF8: 169 if (encFile == ENCODING_ANSI) 170 iCodePage = CP_ACP; 171 else if (encFile == ENCODING_UTF8) 172 iCodePage = CP_UTF8; 173 174 if ((dwSize - dwPos) > 0) 175 { 176 dwCharCount = MultiByteToWideChar(iCodePage, 0, (LPCSTR)&pBytes[dwPos], dwSize - dwPos, NULL, 0); 177 if (dwCharCount == 0) 178 goto done; 179 } 180 else 181 { 182 /* special case for files with no characters (other than BOMs) */ 183 dwCharCount = 0; 184 } 185 186 pszAllocText = (LPWSTR) HeapAlloc(GetProcessHeap(), 0, (dwCharCount + 1) * sizeof(WCHAR)); 187 if (!pszAllocText) 188 goto done; 189 190 if ((dwSize - dwPos) > 0) 191 { 192 if (!MultiByteToWideChar(iCodePage, 0, (LPCSTR)&pBytes[dwPos], dwSize - dwPos, pszAllocText, dwCharCount)) 193 goto done; 194 } 195 196 pszAllocText[dwCharCount] = '\0'; 197 pszText = pszAllocText; 198 break; 199 DEFAULT_UNREACHABLE; 200 } 201 202 dwPos = 0; 203 for (i = 0; i < dwCharCount; i++) 204 { 205 switch(pszText[i]) 206 { 207 case '\r': 208 if ((i < dwCharCount-1) && (pszText[i+1] == '\n')) 209 { 210 i++; 211 adwEolnCount[EOLN_CRLF]++; 212 break; 213 } 214 /* fall through */ 215 216 case '\n': 217 if (!Append(ppszText, pdwTextLen, &pszText[dwPos], i - dwPos)) 218 return FALSE; 219 if (!Append(ppszText, pdwTextLen, szCrlf, ARRAY_SIZE(szCrlf))) 220 return FALSE; 221 dwPos = i + 1; 222 223 if (pszText[i] == '\r') 224 adwEolnCount[EOLN_CR]++; 225 else 226 adwEolnCount[EOLN_LF]++; 227 break; 228 229 case '\0': 230 pszText[i] = ' '; 231 break; 232 } 233 } 234 235 if (!*ppszText && (pszText == pszAllocText)) 236 { 237 /* special case; don't need to reallocate */ 238 *ppszText = pszAllocText; 239 *pdwTextLen = dwCharCount; 240 pszAllocText = NULL; 241 } 242 else 243 { 244 /* append last remaining text */ 245 if (!Append(ppszText, pdwTextLen, &pszText[dwPos], i - dwPos + 1)) 246 return FALSE; 247 } 248 249 /* chose which eoln to use */ 250 *piEoln = EOLN_CRLF; 251 if (adwEolnCount[EOLN_LF] > adwEolnCount[*piEoln]) 252 *piEoln = EOLN_LF; 253 if (adwEolnCount[EOLN_CR] > adwEolnCount[*piEoln]) 254 *piEoln = EOLN_CR; 255 *pencFile = encFile; 256 257 bSuccess = TRUE; 258 259 done: 260 if (pBytes) 261 HeapFree(GetProcessHeap(), 0, pBytes); 262 if (pszAllocText) 263 HeapFree(GetProcessHeap(), 0, pszAllocText); 264 265 if (!bSuccess && *ppszText) 266 { 267 HeapFree(GetProcessHeap(), 0, *ppszText); 268 *ppszText = NULL; 269 *pdwTextLen = 0; 270 } 271 return bSuccess; 272 } 273 274 static BOOL WriteEncodedText(HANDLE hFile, LPCWSTR pszText, DWORD dwTextLen, ENCODING encFile) 275 { 276 LPBYTE pBytes = NULL; 277 LPBYTE pAllocBuffer = NULL; 278 DWORD dwPos = 0; 279 DWORD dwByteCount; 280 BYTE buffer[1024]; 281 UINT iCodePage = 0; 282 DWORD dwDummy, i; 283 BOOL bSuccess = FALSE; 284 int iBufferSize, iRequiredBytes; 285 BYTE b; 286 287 while(dwPos < dwTextLen) 288 { 289 switch(encFile) 290 { 291 case ENCODING_UTF16LE: 292 pBytes = (LPBYTE) &pszText[dwPos]; 293 dwByteCount = (dwTextLen - dwPos) * sizeof(WCHAR); 294 dwPos = dwTextLen; 295 break; 296 297 case ENCODING_UTF16BE: 298 dwByteCount = (dwTextLen - dwPos) * sizeof(WCHAR); 299 if (dwByteCount > sizeof(buffer)) 300 dwByteCount = sizeof(buffer); 301 302 memcpy(buffer, &pszText[dwPos], dwByteCount); 303 for (i = 0; i < dwByteCount; i += 2) 304 { 305 b = buffer[i+0]; 306 buffer[i+0] = buffer[i+1]; 307 buffer[i+1] = b; 308 } 309 pBytes = (LPBYTE) &buffer[dwPos]; 310 dwPos += dwByteCount / sizeof(WCHAR); 311 break; 312 313 case ENCODING_ANSI: 314 case ENCODING_UTF8: 315 if (encFile == ENCODING_ANSI) 316 iCodePage = CP_ACP; 317 else if (encFile == ENCODING_UTF8) 318 iCodePage = CP_UTF8; 319 320 iRequiredBytes = WideCharToMultiByte(iCodePage, 0, &pszText[dwPos], dwTextLen - dwPos, NULL, 0, NULL, NULL); 321 if (iRequiredBytes <= 0) 322 { 323 goto done; 324 } 325 else if (iRequiredBytes < sizeof(buffer)) 326 { 327 pBytes = buffer; 328 iBufferSize = sizeof(buffer); 329 } 330 else 331 { 332 pAllocBuffer = (LPBYTE) HeapAlloc(GetProcessHeap(), 0, iRequiredBytes); 333 if (!pAllocBuffer) 334 return FALSE; 335 pBytes = pAllocBuffer; 336 iBufferSize = iRequiredBytes; 337 } 338 339 dwByteCount = WideCharToMultiByte(iCodePage, 0, &pszText[dwPos], dwTextLen - dwPos, (LPSTR) pBytes, iBufferSize, NULL, NULL); 340 if (!dwByteCount) 341 goto done; 342 343 dwPos = dwTextLen; 344 break; 345 346 default: 347 goto done; 348 } 349 350 if (!WriteFile(hFile, pBytes, dwByteCount, &dwDummy, NULL)) 351 goto done; 352 353 /* free the buffer, if we have allocated one */ 354 if (pAllocBuffer) 355 { 356 HeapFree(GetProcessHeap(), 0, pAllocBuffer); 357 pAllocBuffer = NULL; 358 } 359 } 360 bSuccess = TRUE; 361 362 done: 363 if (pAllocBuffer) 364 HeapFree(GetProcessHeap(), 0, pAllocBuffer); 365 return bSuccess; 366 } 367 368 BOOL WriteText(HANDLE hFile, LPCWSTR pszText, DWORD dwTextLen, ENCODING encFile, int iEoln) 369 { 370 WCHAR wcBom; 371 LPCWSTR pszLF = L"\n"; 372 DWORD dwPos, dwNext; 373 374 /* Write the proper byte order marks if not ANSI */ 375 if (encFile != ENCODING_ANSI) 376 { 377 wcBom = 0xFEFF; 378 if (!WriteEncodedText(hFile, &wcBom, 1, encFile)) 379 return FALSE; 380 } 381 382 dwPos = 0; 383 384 /* pszText eoln are always \r\n */ 385 386 do 387 { 388 /* Find the next eoln */ 389 dwNext = dwPos; 390 while(dwNext < dwTextLen) 391 { 392 if (pszText[dwNext] == '\r' && pszText[dwNext + 1] == '\n') 393 break; 394 dwNext++; 395 } 396 397 if (dwNext != dwTextLen) 398 { 399 switch (iEoln) 400 { 401 case EOLN_LF: 402 /* Write text (without eoln) */ 403 if (!WriteEncodedText(hFile, &pszText[dwPos], dwNext - dwPos, encFile)) 404 return FALSE; 405 /* Write eoln */ 406 if (!WriteEncodedText(hFile, pszLF, 1, encFile)) 407 return FALSE; 408 break; 409 case EOLN_CR: 410 /* Write text (including \r as eoln) */ 411 if (!WriteEncodedText(hFile, &pszText[dwPos], dwNext - dwPos + 1, encFile)) 412 return FALSE; 413 break; 414 case EOLN_CRLF: 415 /* Write text (including \r\n as eoln) */ 416 if (!WriteEncodedText(hFile, &pszText[dwPos], dwNext - dwPos + 2, encFile)) 417 return FALSE; 418 break; 419 default: 420 return FALSE; 421 } 422 } 423 else 424 { 425 /* Write text (without eoln, since this is the end of the file) */ 426 if (!WriteEncodedText(hFile, &pszText[dwPos], dwNext - dwPos, encFile)) 427 return FALSE; 428 } 429 430 /* Skip \r\n */ 431 dwPos = dwNext + 2; 432 } 433 while (dwPos < dwTextLen); 434 435 return TRUE; 436 } 437