1 /* 2 * Notepad (text.c) 3 * 4 * Copyright 1998,99 Marcel Baur <mbaur@g26.ethz.ch> 5 * Copyright 2002 Sylvain Petreolle <spetreolle@yahoo.fr> 6 * Copyright 2002 Andriy Palamarchuk 7 * Copyright 2019 Katayama Hirofumi MZ <katayama.hirofumi.mz@gmail.com> 8 * 9 * This library is free software; you can redistribute it and/or 10 * modify it under the terms of the GNU Lesser General Public 11 * License as published by the Free Software Foundation; either 12 * version 2.1 of the License, or (at your option) any later version. 13 * 14 * This library is distributed in the hope that it will be useful, 15 * but WITHOUT ANY WARRANTY; without even the implied warranty of 16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 17 * Lesser General Public License for more details. 18 * 19 * You should have received a copy of the GNU Lesser General Public 20 * License along with this library; if not, write to the Free Software 21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 22 */ 23 24 #include "notepad.h" 25 26 static BOOL Append(LPWSTR *ppszText, DWORD *pdwTextLen, LPCWSTR pszAppendText, DWORD dwAppendLen) 27 { 28 LPWSTR pszNewText; 29 30 if (dwAppendLen > 0) 31 { 32 if (*ppszText) 33 { 34 pszNewText = (LPWSTR) HeapReAlloc(GetProcessHeap(), 0, *ppszText, (*pdwTextLen + dwAppendLen) * sizeof(WCHAR)); 35 } 36 else 37 { 38 pszNewText = (LPWSTR) HeapAlloc(GetProcessHeap(), 0, dwAppendLen * sizeof(WCHAR)); 39 } 40 41 if (!pszNewText) 42 return FALSE; 43 44 memcpy(pszNewText + *pdwTextLen, pszAppendText, dwAppendLen * sizeof(WCHAR)); 45 *ppszText = pszNewText; 46 *pdwTextLen += dwAppendLen; 47 } 48 return TRUE; 49 } 50 51 ENCODING AnalyzeEncoding(const char *pBytes, DWORD dwSize) 52 { 53 INT flags = IS_TEXT_UNICODE_STATISTICS; 54 55 if (dwSize <= 1) 56 return ENCODING_ANSI; 57 58 if (IsTextUnicode(pBytes, dwSize, &flags)) 59 { 60 return ENCODING_UTF16LE; 61 } 62 63 if ((flags & IS_TEXT_UNICODE_REVERSE_MASK) && !(flags & IS_TEXT_UNICODE_ILLEGAL_CHARS)) 64 { 65 return ENCODING_UTF16BE; 66 } 67 68 /* is it UTF-8? */ 69 if (MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS, pBytes, dwSize, NULL, 0)) 70 { 71 return ENCODING_UTF8; 72 } 73 74 return ENCODING_ANSI; 75 } 76 77 BOOL 78 ReadText(HANDLE hFile, LPWSTR *ppszText, DWORD *pdwTextLen, ENCODING *pencFile, int *piEoln) 79 { 80 DWORD dwSize; 81 LPBYTE pBytes = NULL; 82 LPWSTR pszText; 83 LPWSTR pszAllocText = NULL; 84 DWORD dwPos, i; 85 DWORD dwCharCount; 86 BOOL bSuccess = FALSE; 87 BYTE b = 0; 88 ENCODING encFile = ENCODING_ANSI; 89 int iCodePage = 0; 90 WCHAR szCrlf[2] = {'\r', '\n'}; 91 DWORD adwEolnCount[3] = {0, 0, 0}; 92 93 *ppszText = NULL; 94 *pdwTextLen = 0; 95 96 dwSize = GetFileSize(hFile, NULL); 97 if (dwSize == INVALID_FILE_SIZE) 98 goto done; 99 100 pBytes = HeapAlloc(GetProcessHeap(), 0, dwSize + 2); 101 if (!pBytes) 102 goto done; 103 104 if (!ReadFile(hFile, pBytes, dwSize, &dwSize, NULL)) 105 goto done; 106 dwPos = 0; 107 108 /* Make sure that there is a NUL character at the end, in any encoding */ 109 pBytes[dwSize + 0] = '\0'; 110 pBytes[dwSize + 1] = '\0'; 111 112 /* Look for Byte Order Marks */ 113 if ((dwSize >= 2) && (pBytes[0] == 0xFF) && (pBytes[1] == 0xFE)) 114 { 115 encFile = ENCODING_UTF16LE; 116 dwPos += 2; 117 } 118 else if ((dwSize >= 2) && (pBytes[0] == 0xFE) && (pBytes[1] == 0xFF)) 119 { 120 encFile = ENCODING_UTF16BE; 121 dwPos += 2; 122 } 123 else if ((dwSize >= 3) && (pBytes[0] == 0xEF) && (pBytes[1] == 0xBB) && (pBytes[2] == 0xBF)) 124 { 125 encFile = ENCODING_UTF8; 126 dwPos += 3; 127 } 128 else 129 { 130 encFile = AnalyzeEncoding((const char *)pBytes, dwSize); 131 } 132 133 switch(encFile) 134 { 135 case ENCODING_UTF16BE: 136 for (i = dwPos; i < dwSize-1; i += 2) 137 { 138 b = pBytes[i+0]; 139 pBytes[i+0] = pBytes[i+1]; 140 pBytes[i+1] = b; 141 } 142 /* fall through */ 143 144 case ENCODING_UTF16LE: 145 pszText = (LPWSTR) &pBytes[dwPos]; 146 dwCharCount = (dwSize - dwPos) / sizeof(WCHAR); 147 break; 148 149 case ENCODING_ANSI: 150 case ENCODING_UTF8: 151 if (encFile == ENCODING_ANSI) 152 iCodePage = CP_ACP; 153 else if (encFile == ENCODING_UTF8) 154 iCodePage = CP_UTF8; 155 156 if ((dwSize - dwPos) > 0) 157 { 158 dwCharCount = MultiByteToWideChar(iCodePage, 0, (LPCSTR)&pBytes[dwPos], dwSize - dwPos, NULL, 0); 159 if (dwCharCount == 0) 160 goto done; 161 } 162 else 163 { 164 /* special case for files with no characters (other than BOMs) */ 165 dwCharCount = 0; 166 } 167 168 pszAllocText = (LPWSTR) HeapAlloc(GetProcessHeap(), 0, (dwCharCount + 1) * sizeof(WCHAR)); 169 if (!pszAllocText) 170 goto done; 171 172 if ((dwSize - dwPos) > 0) 173 { 174 if (!MultiByteToWideChar(iCodePage, 0, (LPCSTR)&pBytes[dwPos], dwSize - dwPos, pszAllocText, dwCharCount)) 175 goto done; 176 } 177 178 pszAllocText[dwCharCount] = '\0'; 179 pszText = pszAllocText; 180 break; 181 DEFAULT_UNREACHABLE; 182 } 183 184 dwPos = 0; 185 for (i = 0; i < dwCharCount; i++) 186 { 187 switch(pszText[i]) 188 { 189 case '\r': 190 if ((i < dwCharCount-1) && (pszText[i+1] == '\n')) 191 { 192 i++; 193 adwEolnCount[EOLN_CRLF]++; 194 break; 195 } 196 /* fall through */ 197 198 case '\n': 199 if (!Append(ppszText, pdwTextLen, &pszText[dwPos], i - dwPos)) 200 return FALSE; 201 if (!Append(ppszText, pdwTextLen, szCrlf, ARRAY_SIZE(szCrlf))) 202 return FALSE; 203 dwPos = i + 1; 204 205 if (pszText[i] == '\r') 206 adwEolnCount[EOLN_CR]++; 207 else 208 adwEolnCount[EOLN_LF]++; 209 break; 210 211 case '\0': 212 pszText[i] = ' '; 213 break; 214 } 215 } 216 217 if (!*ppszText && (pszText == pszAllocText)) 218 { 219 /* special case; don't need to reallocate */ 220 *ppszText = pszAllocText; 221 *pdwTextLen = dwCharCount; 222 pszAllocText = NULL; 223 } 224 else 225 { 226 /* append last remaining text */ 227 if (!Append(ppszText, pdwTextLen, &pszText[dwPos], i - dwPos + 1)) 228 return FALSE; 229 } 230 231 /* chose which eoln to use */ 232 *piEoln = EOLN_CRLF; 233 if (adwEolnCount[EOLN_LF] > adwEolnCount[*piEoln]) 234 *piEoln = EOLN_LF; 235 if (adwEolnCount[EOLN_CR] > adwEolnCount[*piEoln]) 236 *piEoln = EOLN_CR; 237 *pencFile = encFile; 238 239 bSuccess = TRUE; 240 241 done: 242 if (pBytes) 243 HeapFree(GetProcessHeap(), 0, pBytes); 244 if (pszAllocText) 245 HeapFree(GetProcessHeap(), 0, pszAllocText); 246 247 if (!bSuccess && *ppszText) 248 { 249 HeapFree(GetProcessHeap(), 0, *ppszText); 250 *ppszText = NULL; 251 *pdwTextLen = 0; 252 } 253 return bSuccess; 254 } 255 256 static BOOL WriteEncodedText(HANDLE hFile, LPCWSTR pszText, DWORD dwTextLen, ENCODING encFile) 257 { 258 LPBYTE pBytes = NULL; 259 LPBYTE pAllocBuffer = NULL; 260 DWORD dwPos = 0; 261 DWORD dwByteCount; 262 BYTE buffer[1024]; 263 UINT iCodePage = 0; 264 DWORD dwDummy, i; 265 BOOL bSuccess = FALSE; 266 int iBufferSize, iRequiredBytes; 267 BYTE b; 268 269 while(dwPos < dwTextLen) 270 { 271 switch(encFile) 272 { 273 case ENCODING_UTF16LE: 274 pBytes = (LPBYTE) &pszText[dwPos]; 275 dwByteCount = (dwTextLen - dwPos) * sizeof(WCHAR); 276 dwPos = dwTextLen; 277 break; 278 279 case ENCODING_UTF16BE: 280 dwByteCount = (dwTextLen - dwPos) * sizeof(WCHAR); 281 if (dwByteCount > sizeof(buffer)) 282 dwByteCount = sizeof(buffer); 283 284 memcpy(buffer, &pszText[dwPos], dwByteCount); 285 for (i = 0; i < dwByteCount; i += 2) 286 { 287 b = buffer[i+0]; 288 buffer[i+0] = buffer[i+1]; 289 buffer[i+1] = b; 290 } 291 pBytes = (LPBYTE) &buffer[dwPos]; 292 dwPos += dwByteCount / sizeof(WCHAR); 293 break; 294 295 case ENCODING_ANSI: 296 case ENCODING_UTF8: 297 if (encFile == ENCODING_ANSI) 298 iCodePage = CP_ACP; 299 else if (encFile == ENCODING_UTF8) 300 iCodePage = CP_UTF8; 301 302 iRequiredBytes = WideCharToMultiByte(iCodePage, 0, &pszText[dwPos], dwTextLen - dwPos, NULL, 0, NULL, NULL); 303 if (iRequiredBytes <= 0) 304 { 305 goto done; 306 } 307 else if (iRequiredBytes < sizeof(buffer)) 308 { 309 pBytes = buffer; 310 iBufferSize = sizeof(buffer); 311 } 312 else 313 { 314 pAllocBuffer = (LPBYTE) HeapAlloc(GetProcessHeap(), 0, iRequiredBytes); 315 if (!pAllocBuffer) 316 return FALSE; 317 pBytes = pAllocBuffer; 318 iBufferSize = iRequiredBytes; 319 } 320 321 dwByteCount = WideCharToMultiByte(iCodePage, 0, &pszText[dwPos], dwTextLen - dwPos, (LPSTR) pBytes, iBufferSize, NULL, NULL); 322 if (!dwByteCount) 323 goto done; 324 325 dwPos = dwTextLen; 326 break; 327 328 default: 329 goto done; 330 } 331 332 if (!WriteFile(hFile, pBytes, dwByteCount, &dwDummy, NULL)) 333 goto done; 334 335 /* free the buffer, if we have allocated one */ 336 if (pAllocBuffer) 337 { 338 HeapFree(GetProcessHeap(), 0, pAllocBuffer); 339 pAllocBuffer = NULL; 340 } 341 } 342 bSuccess = TRUE; 343 344 done: 345 if (pAllocBuffer) 346 HeapFree(GetProcessHeap(), 0, pAllocBuffer); 347 return bSuccess; 348 } 349 350 BOOL WriteText(HANDLE hFile, LPCWSTR pszText, DWORD dwTextLen, ENCODING encFile, int iEoln) 351 { 352 WCHAR wcBom; 353 LPCWSTR pszLF = L"\n"; 354 DWORD dwPos, dwNext; 355 356 /* Write the proper byte order marks if not ANSI */ 357 if (encFile != ENCODING_ANSI) 358 { 359 wcBom = 0xFEFF; 360 if (!WriteEncodedText(hFile, &wcBom, 1, encFile)) 361 return FALSE; 362 } 363 364 dwPos = 0; 365 366 /* pszText eoln are always \r\n */ 367 368 do 369 { 370 /* Find the next eoln */ 371 dwNext = dwPos; 372 while(dwNext < dwTextLen) 373 { 374 if (pszText[dwNext] == '\r' && pszText[dwNext + 1] == '\n') 375 break; 376 dwNext++; 377 } 378 379 if (dwNext != dwTextLen) 380 { 381 switch (iEoln) 382 { 383 case EOLN_LF: 384 /* Write text (without eoln) */ 385 if (!WriteEncodedText(hFile, &pszText[dwPos], dwNext - dwPos, encFile)) 386 return FALSE; 387 /* Write eoln */ 388 if (!WriteEncodedText(hFile, pszLF, 1, encFile)) 389 return FALSE; 390 break; 391 case EOLN_CR: 392 /* Write text (including \r as eoln) */ 393 if (!WriteEncodedText(hFile, &pszText[dwPos], dwNext - dwPos + 1, encFile)) 394 return FALSE; 395 break; 396 case EOLN_CRLF: 397 /* Write text (including \r\n as eoln) */ 398 if (!WriteEncodedText(hFile, &pszText[dwPos], dwNext - dwPos + 2, encFile)) 399 return FALSE; 400 break; 401 default: 402 return FALSE; 403 } 404 } 405 else 406 { 407 /* Write text (without eoln, since this is the end of the file) */ 408 if (!WriteEncodedText(hFile, &pszText[dwPos], dwNext - dwPos, encFile)) 409 return FALSE; 410 } 411 412 /* Skip \r\n */ 413 dwPos = dwNext + 2; 414 } 415 while (dwPos < dwTextLen); 416 417 return TRUE; 418 } 419