1 /* 2 * Notepad (text.c) 3 * 4 * Copyright 1998,99 Marcel Baur <mbaur@g26.ethz.ch> 5 * Copyright 2002 Sylvain Petreolle <spetreolle@yahoo.fr> 6 * Copyright 2002 Andriy Palamarchuk 7 * 8 * This library is free software; you can redistribute it and/or 9 * modify it under the terms of the GNU Lesser General Public 10 * License as published by the Free Software Foundation; either 11 * version 2.1 of the License, or (at your option) any later version. 12 * 13 * This library is distributed in the hope that it will be useful, 14 * but WITHOUT ANY WARRANTY; without even the implied warranty of 15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 16 * Lesser General Public License for more details. 17 * 18 * You should have received a copy of the GNU Lesser General Public 19 * License along with this library; if not, write to the Free Software 20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 21 */ 22 23 #include "notepad.h" 24 25 static BOOL Append(LPWSTR *ppszText, DWORD *pdwTextLen, LPCWSTR pszAppendText, DWORD dwAppendLen) 26 { 27 LPWSTR pszNewText; 28 29 if (dwAppendLen > 0) 30 { 31 if (*ppszText) 32 { 33 pszNewText = (LPWSTR) HeapReAlloc(GetProcessHeap(), 0, *ppszText, (*pdwTextLen + dwAppendLen) * sizeof(WCHAR)); 34 } 35 else 36 { 37 pszNewText = (LPWSTR) HeapAlloc(GetProcessHeap(), 0, dwAppendLen * sizeof(WCHAR)); 38 } 39 40 if (!pszNewText) 41 return FALSE; 42 43 memcpy(pszNewText + *pdwTextLen, pszAppendText, dwAppendLen * sizeof(WCHAR)); 44 *ppszText = pszNewText; 45 *pdwTextLen += dwAppendLen; 46 } 47 return TRUE; 48 } 49 50 BOOL 51 ReadText(HANDLE hFile, LPWSTR *ppszText, DWORD *pdwTextLen, int *pencFile, int *piEoln) 52 { 53 DWORD dwSize; 54 LPBYTE pBytes = NULL; 55 LPWSTR pszText; 56 LPWSTR pszAllocText = NULL; 57 DWORD dwPos, i; 58 DWORD dwCharCount; 59 BOOL bSuccess = FALSE; 60 BYTE b = 0; 61 int encFile = ENCODING_ANSI; 62 int iCodePage = 0; 63 WCHAR szCrlf[2] = {'\r', '\n'}; 64 DWORD adwEolnCount[3] = {0, 0, 0}; 65 66 *ppszText = NULL; 67 *pdwTextLen = 0; 68 69 dwSize = GetFileSize(hFile, NULL); 70 if (dwSize == INVALID_FILE_SIZE) 71 goto done; 72 73 pBytes = HeapAlloc(GetProcessHeap(), 0, dwSize + 2); 74 if (!pBytes) 75 goto done; 76 77 if (!ReadFile(hFile, pBytes, dwSize, &dwSize, NULL)) 78 goto done; 79 dwPos = 0; 80 81 /* Make sure that there is a NUL character at the end, in any encoding */ 82 pBytes[dwSize + 0] = '\0'; 83 pBytes[dwSize + 1] = '\0'; 84 85 /* Look for Byte Order Marks */ 86 if ((dwSize >= 2) && (pBytes[0] == 0xFF) && (pBytes[1] == 0xFE)) 87 { 88 encFile = ENCODING_UNICODE; 89 dwPos += 2; 90 } 91 else if ((dwSize >= 2) && (pBytes[0] == 0xFE) && (pBytes[1] == 0xFF)) 92 { 93 encFile = ENCODING_UNICODE_BE; 94 dwPos += 2; 95 } 96 else if ((dwSize >= 3) && (pBytes[0] == 0xEF) && (pBytes[1] == 0xBB) && (pBytes[2] == 0xBF)) 97 { 98 encFile = ENCODING_UTF8; 99 dwPos += 3; 100 } 101 102 switch(encFile) 103 { 104 case ENCODING_UNICODE_BE: 105 for (i = dwPos; i < dwSize-1; i += 2) 106 { 107 b = pBytes[i+0]; 108 pBytes[i+0] = pBytes[i+1]; 109 pBytes[i+1] = b; 110 } 111 /* fall through */ 112 113 case ENCODING_UNICODE: 114 pszText = (LPWSTR) &pBytes[dwPos]; 115 dwCharCount = (dwSize - dwPos) / sizeof(WCHAR); 116 break; 117 118 case ENCODING_ANSI: 119 case ENCODING_UTF8: 120 if (encFile == ENCODING_ANSI) 121 iCodePage = CP_ACP; 122 else if (encFile == ENCODING_UTF8) 123 iCodePage = CP_UTF8; 124 125 if ((dwSize - dwPos) > 0) 126 { 127 dwCharCount = MultiByteToWideChar(iCodePage, 0, (LPCSTR)&pBytes[dwPos], dwSize - dwPos, NULL, 0); 128 if (dwCharCount == 0) 129 goto done; 130 } 131 else 132 { 133 /* special case for files with no characters (other than BOMs) */ 134 dwCharCount = 0; 135 } 136 137 pszAllocText = (LPWSTR) HeapAlloc(GetProcessHeap(), 0, (dwCharCount + 1) * sizeof(WCHAR)); 138 if (!pszAllocText) 139 goto done; 140 141 if ((dwSize - dwPos) > 0) 142 { 143 if (!MultiByteToWideChar(iCodePage, 0, (LPCSTR)&pBytes[dwPos], dwSize - dwPos, pszAllocText, dwCharCount)) 144 goto done; 145 } 146 147 pszAllocText[dwCharCount] = '\0'; 148 pszText = pszAllocText; 149 break; 150 } 151 152 dwPos = 0; 153 for (i = 0; i < dwCharCount; i++) 154 { 155 switch(pszText[i]) 156 { 157 case '\r': 158 if ((i < dwCharCount-1) && (pszText[i+1] == '\n')) 159 { 160 i++; 161 adwEolnCount[EOLN_CRLF]++; 162 break; 163 } 164 /* fall through */ 165 166 case '\n': 167 if (!Append(ppszText, pdwTextLen, &pszText[dwPos], i - dwPos)) 168 return FALSE; 169 if (!Append(ppszText, pdwTextLen, szCrlf, ARRAY_SIZE(szCrlf))) 170 return FALSE; 171 dwPos = i + 1; 172 173 if (pszText[i] == '\r') 174 adwEolnCount[EOLN_CR]++; 175 else 176 adwEolnCount[EOLN_LF]++; 177 break; 178 179 case '\0': 180 pszText[i] = ' '; 181 break; 182 } 183 } 184 185 if (!*ppszText && (pszText == pszAllocText)) 186 { 187 /* special case; don't need to reallocate */ 188 *ppszText = pszAllocText; 189 *pdwTextLen = dwCharCount; 190 pszAllocText = NULL; 191 } 192 else 193 { 194 /* append last remaining text */ 195 if (!Append(ppszText, pdwTextLen, &pszText[dwPos], i - dwPos + 1)) 196 return FALSE; 197 } 198 199 /* chose which eoln to use */ 200 *piEoln = EOLN_CRLF; 201 if (adwEolnCount[EOLN_LF] > adwEolnCount[*piEoln]) 202 *piEoln = EOLN_LF; 203 if (adwEolnCount[EOLN_CR] > adwEolnCount[*piEoln]) 204 *piEoln = EOLN_CR; 205 *pencFile = encFile; 206 207 bSuccess = TRUE; 208 209 done: 210 if (pBytes) 211 HeapFree(GetProcessHeap(), 0, pBytes); 212 if (pszAllocText) 213 HeapFree(GetProcessHeap(), 0, pszAllocText); 214 215 if (!bSuccess && *ppszText) 216 { 217 HeapFree(GetProcessHeap(), 0, *ppszText); 218 *ppszText = NULL; 219 *pdwTextLen = 0; 220 } 221 return bSuccess; 222 } 223 224 static BOOL WriteEncodedText(HANDLE hFile, LPCWSTR pszText, DWORD dwTextLen, int encFile) 225 { 226 LPBYTE pBytes = NULL; 227 LPBYTE pAllocBuffer = NULL; 228 DWORD dwPos = 0; 229 DWORD dwByteCount; 230 BYTE buffer[1024]; 231 UINT iCodePage = 0; 232 DWORD dwDummy, i; 233 BOOL bSuccess = FALSE; 234 int iBufferSize, iRequiredBytes; 235 BYTE b; 236 237 while(dwPos < dwTextLen) 238 { 239 switch(encFile) 240 { 241 case ENCODING_UNICODE: 242 pBytes = (LPBYTE) &pszText[dwPos]; 243 dwByteCount = (dwTextLen - dwPos) * sizeof(WCHAR); 244 dwPos = dwTextLen; 245 break; 246 247 case ENCODING_UNICODE_BE: 248 dwByteCount = (dwTextLen - dwPos) * sizeof(WCHAR); 249 if (dwByteCount > sizeof(buffer)) 250 dwByteCount = sizeof(buffer); 251 252 memcpy(buffer, &pszText[dwPos], dwByteCount); 253 for (i = 0; i < dwByteCount; i += 2) 254 { 255 b = buffer[i+0]; 256 buffer[i+0] = buffer[i+1]; 257 buffer[i+1] = b; 258 } 259 pBytes = (LPBYTE) &buffer[dwPos]; 260 dwPos += dwByteCount / sizeof(WCHAR); 261 break; 262 263 case ENCODING_ANSI: 264 case ENCODING_UTF8: 265 if (encFile == ENCODING_ANSI) 266 iCodePage = CP_ACP; 267 else if (encFile == ENCODING_UTF8) 268 iCodePage = CP_UTF8; 269 270 iRequiredBytes = WideCharToMultiByte(iCodePage, 0, &pszText[dwPos], dwTextLen - dwPos, NULL, 0, NULL, NULL); 271 if (iRequiredBytes <= 0) 272 { 273 goto done; 274 } 275 else if (iRequiredBytes < sizeof(buffer)) 276 { 277 pBytes = buffer; 278 iBufferSize = sizeof(buffer); 279 } 280 else 281 { 282 pAllocBuffer = (LPBYTE) HeapAlloc(GetProcessHeap(), 0, iRequiredBytes); 283 if (!pAllocBuffer) 284 return FALSE; 285 pBytes = pAllocBuffer; 286 iBufferSize = iRequiredBytes; 287 } 288 289 dwByteCount = WideCharToMultiByte(iCodePage, 0, &pszText[dwPos], dwTextLen - dwPos, (LPSTR) pBytes, iBufferSize, NULL, NULL); 290 if (!dwByteCount) 291 goto done; 292 293 dwPos = dwTextLen; 294 break; 295 296 default: 297 goto done; 298 } 299 300 if (!WriteFile(hFile, pBytes, dwByteCount, &dwDummy, NULL)) 301 goto done; 302 303 /* free the buffer, if we have allocated one */ 304 if (pAllocBuffer) 305 { 306 HeapFree(GetProcessHeap(), 0, pAllocBuffer); 307 pAllocBuffer = NULL; 308 } 309 } 310 bSuccess = TRUE; 311 312 done: 313 if (pAllocBuffer) 314 HeapFree(GetProcessHeap(), 0, pAllocBuffer); 315 return bSuccess; 316 } 317 318 BOOL WriteText(HANDLE hFile, LPCWSTR pszText, DWORD dwTextLen, int encFile, int iEoln) 319 { 320 WCHAR wcBom; 321 LPCWSTR pszLF = L"\n"; 322 DWORD dwPos, dwNext; 323 324 /* Write the proper byte order marks if not ANSI */ 325 if (encFile != ENCODING_ANSI) 326 { 327 wcBom = 0xFEFF; 328 if (!WriteEncodedText(hFile, &wcBom, 1, encFile)) 329 return FALSE; 330 } 331 332 dwPos = 0; 333 334 /* pszText eoln are always \r\n */ 335 336 do 337 { 338 /* Find the next eoln */ 339 dwNext = dwPos; 340 while(dwNext < dwTextLen) 341 { 342 if (pszText[dwNext] == '\r' && pszText[dwNext + 1] == '\n') 343 break; 344 dwNext++; 345 } 346 347 if (dwNext != dwTextLen) 348 { 349 switch (iEoln) 350 { 351 case EOLN_LF: 352 /* Write text (without eoln) */ 353 if (!WriteEncodedText(hFile, &pszText[dwPos], dwNext - dwPos, encFile)) 354 return FALSE; 355 /* Write eoln */ 356 if (!WriteEncodedText(hFile, pszLF, 1, encFile)) 357 return FALSE; 358 break; 359 case EOLN_CR: 360 /* Write text (including \r as eoln) */ 361 if (!WriteEncodedText(hFile, &pszText[dwPos], dwNext - dwPos + 1, encFile)) 362 return FALSE; 363 break; 364 case EOLN_CRLF: 365 /* Write text (including \r\n as eoln) */ 366 if (!WriteEncodedText(hFile, &pszText[dwPos], dwNext - dwPos + 2, encFile)) 367 return FALSE; 368 break; 369 default: 370 return FALSE; 371 } 372 } 373 else 374 { 375 /* Write text (without eoln, since this is the end of the file) */ 376 if (!WriteEncodedText(hFile, &pszText[dwPos], dwNext - dwPos, encFile)) 377 return FALSE; 378 } 379 380 /* Skip \r\n */ 381 dwPos = dwNext + 2; 382 } 383 while (dwPos < dwTextLen); 384 385 return TRUE; 386 } 387