1 /* 2 * Notepad (text.c) 3 * 4 * Copyright 1998,99 Marcel Baur <mbaur@g26.ethz.ch> 5 * Copyright 2002 Sylvain Petreolle <spetreolle@yahoo.fr> 6 * Copyright 2002 Andriy Palamarchuk 7 * 8 * This library is free software; you can redistribute it and/or 9 * modify it under the terms of the GNU Lesser General Public 10 * License as published by the Free Software Foundation; either 11 * version 2.1 of the License, or (at your option) any later version. 12 * 13 * This library is distributed in the hope that it will be useful, 14 * but WITHOUT ANY WARRANTY; without even the implied warranty of 15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 16 * Lesser General Public License for more details. 17 * 18 * You should have received a copy of the GNU Lesser General Public 19 * License along with this library; if not, write to the Free Software 20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 21 */ 22 23 #include "notepad.h" 24 25 static BOOL Append(LPWSTR *ppszText, DWORD *pdwTextLen, LPCWSTR pszAppendText, DWORD dwAppendLen) 26 { 27 LPWSTR pszNewText; 28 29 if (dwAppendLen > 0) 30 { 31 if (*ppszText) 32 { 33 pszNewText = (LPWSTR) HeapReAlloc(GetProcessHeap(), 0, *ppszText, (*pdwTextLen + dwAppendLen) * sizeof(WCHAR)); 34 } 35 else 36 { 37 pszNewText = (LPWSTR) HeapAlloc(GetProcessHeap(), 0, dwAppendLen * sizeof(WCHAR)); 38 } 39 40 if (!pszNewText) 41 return FALSE; 42 43 memcpy(pszNewText + *pdwTextLen, pszAppendText, dwAppendLen * sizeof(WCHAR)); 44 *ppszText = pszNewText; 45 *pdwTextLen += dwAppendLen; 46 } 47 return TRUE; 48 } 49 50 BOOL 51 ReadText(HANDLE hFile, LPWSTR *ppszText, DWORD *pdwTextLen, ENCODING *pencFile, int *piEoln) 52 { 53 DWORD dwSize; 54 LPBYTE pBytes = NULL; 55 LPWSTR pszText; 56 LPWSTR pszAllocText = NULL; 57 DWORD dwPos, i; 58 DWORD dwCharCount; 59 BOOL bSuccess = FALSE; 60 BYTE b = 0; 61 ENCODING encFile = ENCODING_ANSI; 62 int iCodePage = 0; 63 WCHAR szCrlf[2] = {'\r', '\n'}; 64 DWORD adwEolnCount[3] = {0, 0, 0}; 65 66 *ppszText = NULL; 67 *pdwTextLen = 0; 68 69 dwSize = GetFileSize(hFile, NULL); 70 if (dwSize == INVALID_FILE_SIZE) 71 goto done; 72 73 pBytes = HeapAlloc(GetProcessHeap(), 0, dwSize + 2); 74 if (!pBytes) 75 goto done; 76 77 if (!ReadFile(hFile, pBytes, dwSize, &dwSize, NULL)) 78 goto done; 79 dwPos = 0; 80 81 /* Make sure that there is a NUL character at the end, in any encoding */ 82 pBytes[dwSize + 0] = '\0'; 83 pBytes[dwSize + 1] = '\0'; 84 85 /* Look for Byte Order Marks */ 86 if ((dwSize >= 2) && (pBytes[0] == 0xFF) && (pBytes[1] == 0xFE)) 87 { 88 encFile = ENCODING_UTF16LE; 89 dwPos += 2; 90 } 91 else if ((dwSize >= 2) && (pBytes[0] == 0xFE) && (pBytes[1] == 0xFF)) 92 { 93 encFile = ENCODING_UTF16BE; 94 dwPos += 2; 95 } 96 else if ((dwSize >= 3) && (pBytes[0] == 0xEF) && (pBytes[1] == 0xBB) && (pBytes[2] == 0xBF)) 97 { 98 encFile = ENCODING_UTF8; 99 dwPos += 3; 100 } 101 102 switch(encFile) 103 { 104 case ENCODING_UTF16BE: 105 for (i = dwPos; i < dwSize-1; i += 2) 106 { 107 b = pBytes[i+0]; 108 pBytes[i+0] = pBytes[i+1]; 109 pBytes[i+1] = b; 110 } 111 /* fall through */ 112 113 case ENCODING_UTF16LE: 114 pszText = (LPWSTR) &pBytes[dwPos]; 115 dwCharCount = (dwSize - dwPos) / sizeof(WCHAR); 116 break; 117 118 case ENCODING_ANSI: 119 case ENCODING_UTF8: 120 if (encFile == ENCODING_ANSI) 121 iCodePage = CP_ACP; 122 else if (encFile == ENCODING_UTF8) 123 iCodePage = CP_UTF8; 124 125 if ((dwSize - dwPos) > 0) 126 { 127 dwCharCount = MultiByteToWideChar(iCodePage, 0, (LPCSTR)&pBytes[dwPos], dwSize - dwPos, NULL, 0); 128 if (dwCharCount == 0) 129 goto done; 130 } 131 else 132 { 133 /* special case for files with no characters (other than BOMs) */ 134 dwCharCount = 0; 135 } 136 137 pszAllocText = (LPWSTR) HeapAlloc(GetProcessHeap(), 0, (dwCharCount + 1) * sizeof(WCHAR)); 138 if (!pszAllocText) 139 goto done; 140 141 if ((dwSize - dwPos) > 0) 142 { 143 if (!MultiByteToWideChar(iCodePage, 0, (LPCSTR)&pBytes[dwPos], dwSize - dwPos, pszAllocText, dwCharCount)) 144 goto done; 145 } 146 147 pszAllocText[dwCharCount] = '\0'; 148 pszText = pszAllocText; 149 break; 150 DEFAULT_UNREACHABLE; 151 } 152 153 dwPos = 0; 154 for (i = 0; i < dwCharCount; i++) 155 { 156 switch(pszText[i]) 157 { 158 case '\r': 159 if ((i < dwCharCount-1) && (pszText[i+1] == '\n')) 160 { 161 i++; 162 adwEolnCount[EOLN_CRLF]++; 163 break; 164 } 165 /* fall through */ 166 167 case '\n': 168 if (!Append(ppszText, pdwTextLen, &pszText[dwPos], i - dwPos)) 169 return FALSE; 170 if (!Append(ppszText, pdwTextLen, szCrlf, ARRAY_SIZE(szCrlf))) 171 return FALSE; 172 dwPos = i + 1; 173 174 if (pszText[i] == '\r') 175 adwEolnCount[EOLN_CR]++; 176 else 177 adwEolnCount[EOLN_LF]++; 178 break; 179 180 case '\0': 181 pszText[i] = ' '; 182 break; 183 } 184 } 185 186 if (!*ppszText && (pszText == pszAllocText)) 187 { 188 /* special case; don't need to reallocate */ 189 *ppszText = pszAllocText; 190 *pdwTextLen = dwCharCount; 191 pszAllocText = NULL; 192 } 193 else 194 { 195 /* append last remaining text */ 196 if (!Append(ppszText, pdwTextLen, &pszText[dwPos], i - dwPos + 1)) 197 return FALSE; 198 } 199 200 /* chose which eoln to use */ 201 *piEoln = EOLN_CRLF; 202 if (adwEolnCount[EOLN_LF] > adwEolnCount[*piEoln]) 203 *piEoln = EOLN_LF; 204 if (adwEolnCount[EOLN_CR] > adwEolnCount[*piEoln]) 205 *piEoln = EOLN_CR; 206 *pencFile = encFile; 207 208 bSuccess = TRUE; 209 210 done: 211 if (pBytes) 212 HeapFree(GetProcessHeap(), 0, pBytes); 213 if (pszAllocText) 214 HeapFree(GetProcessHeap(), 0, pszAllocText); 215 216 if (!bSuccess && *ppszText) 217 { 218 HeapFree(GetProcessHeap(), 0, *ppszText); 219 *ppszText = NULL; 220 *pdwTextLen = 0; 221 } 222 return bSuccess; 223 } 224 225 static BOOL WriteEncodedText(HANDLE hFile, LPCWSTR pszText, DWORD dwTextLen, ENCODING encFile) 226 { 227 LPBYTE pBytes = NULL; 228 LPBYTE pAllocBuffer = NULL; 229 DWORD dwPos = 0; 230 DWORD dwByteCount; 231 BYTE buffer[1024]; 232 UINT iCodePage = 0; 233 DWORD dwDummy, i; 234 BOOL bSuccess = FALSE; 235 int iBufferSize, iRequiredBytes; 236 BYTE b; 237 238 while(dwPos < dwTextLen) 239 { 240 switch(encFile) 241 { 242 case ENCODING_UTF16LE: 243 pBytes = (LPBYTE) &pszText[dwPos]; 244 dwByteCount = (dwTextLen - dwPos) * sizeof(WCHAR); 245 dwPos = dwTextLen; 246 break; 247 248 case ENCODING_UTF16BE: 249 dwByteCount = (dwTextLen - dwPos) * sizeof(WCHAR); 250 if (dwByteCount > sizeof(buffer)) 251 dwByteCount = sizeof(buffer); 252 253 memcpy(buffer, &pszText[dwPos], dwByteCount); 254 for (i = 0; i < dwByteCount; i += 2) 255 { 256 b = buffer[i+0]; 257 buffer[i+0] = buffer[i+1]; 258 buffer[i+1] = b; 259 } 260 pBytes = (LPBYTE) &buffer[dwPos]; 261 dwPos += dwByteCount / sizeof(WCHAR); 262 break; 263 264 case ENCODING_ANSI: 265 case ENCODING_UTF8: 266 if (encFile == ENCODING_ANSI) 267 iCodePage = CP_ACP; 268 else if (encFile == ENCODING_UTF8) 269 iCodePage = CP_UTF8; 270 271 iRequiredBytes = WideCharToMultiByte(iCodePage, 0, &pszText[dwPos], dwTextLen - dwPos, NULL, 0, NULL, NULL); 272 if (iRequiredBytes <= 0) 273 { 274 goto done; 275 } 276 else if (iRequiredBytes < sizeof(buffer)) 277 { 278 pBytes = buffer; 279 iBufferSize = sizeof(buffer); 280 } 281 else 282 { 283 pAllocBuffer = (LPBYTE) HeapAlloc(GetProcessHeap(), 0, iRequiredBytes); 284 if (!pAllocBuffer) 285 return FALSE; 286 pBytes = pAllocBuffer; 287 iBufferSize = iRequiredBytes; 288 } 289 290 dwByteCount = WideCharToMultiByte(iCodePage, 0, &pszText[dwPos], dwTextLen - dwPos, (LPSTR) pBytes, iBufferSize, NULL, NULL); 291 if (!dwByteCount) 292 goto done; 293 294 dwPos = dwTextLen; 295 break; 296 297 default: 298 goto done; 299 } 300 301 if (!WriteFile(hFile, pBytes, dwByteCount, &dwDummy, NULL)) 302 goto done; 303 304 /* free the buffer, if we have allocated one */ 305 if (pAllocBuffer) 306 { 307 HeapFree(GetProcessHeap(), 0, pAllocBuffer); 308 pAllocBuffer = NULL; 309 } 310 } 311 bSuccess = TRUE; 312 313 done: 314 if (pAllocBuffer) 315 HeapFree(GetProcessHeap(), 0, pAllocBuffer); 316 return bSuccess; 317 } 318 319 BOOL WriteText(HANDLE hFile, LPCWSTR pszText, DWORD dwTextLen, ENCODING encFile, int iEoln) 320 { 321 WCHAR wcBom; 322 LPCWSTR pszLF = L"\n"; 323 DWORD dwPos, dwNext; 324 325 /* Write the proper byte order marks if not ANSI */ 326 if (encFile != ENCODING_ANSI) 327 { 328 wcBom = 0xFEFF; 329 if (!WriteEncodedText(hFile, &wcBom, 1, encFile)) 330 return FALSE; 331 } 332 333 dwPos = 0; 334 335 /* pszText eoln are always \r\n */ 336 337 do 338 { 339 /* Find the next eoln */ 340 dwNext = dwPos; 341 while(dwNext < dwTextLen) 342 { 343 if (pszText[dwNext] == '\r' && pszText[dwNext + 1] == '\n') 344 break; 345 dwNext++; 346 } 347 348 if (dwNext != dwTextLen) 349 { 350 switch (iEoln) 351 { 352 case EOLN_LF: 353 /* Write text (without eoln) */ 354 if (!WriteEncodedText(hFile, &pszText[dwPos], dwNext - dwPos, encFile)) 355 return FALSE; 356 /* Write eoln */ 357 if (!WriteEncodedText(hFile, pszLF, 1, encFile)) 358 return FALSE; 359 break; 360 case EOLN_CR: 361 /* Write text (including \r as eoln) */ 362 if (!WriteEncodedText(hFile, &pszText[dwPos], dwNext - dwPos + 1, encFile)) 363 return FALSE; 364 break; 365 case EOLN_CRLF: 366 /* Write text (including \r\n as eoln) */ 367 if (!WriteEncodedText(hFile, &pszText[dwPos], dwNext - dwPos + 2, encFile)) 368 return FALSE; 369 break; 370 default: 371 return FALSE; 372 } 373 } 374 else 375 { 376 /* Write text (without eoln, since this is the end of the file) */ 377 if (!WriteEncodedText(hFile, &pszText[dwPos], dwNext - dwPos, encFile)) 378 return FALSE; 379 } 380 381 /* Skip \r\n */ 382 dwPos = dwNext + 2; 383 } 384 while (dwPos < dwTextLen); 385 386 return TRUE; 387 } 388