xref: /reactos/base/applications/notepad/text.c (revision 845faec4)
1 /*
2  *  Notepad (text.c)
3  *
4  *  Copyright 1998,99 Marcel Baur <mbaur@g26.ethz.ch>
5  *  Copyright 2002 Sylvain Petreolle <spetreolle@yahoo.fr>
6  *  Copyright 2002 Andriy Palamarchuk
7  *
8  * This library is free software; you can redistribute it and/or
9  * modify it under the terms of the GNU Lesser General Public
10  * License as published by the Free Software Foundation; either
11  * version 2.1 of the License, or (at your option) any later version.
12  *
13  * This library is distributed in the hope that it will be useful,
14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
16  * Lesser General Public License for more details.
17  *
18  * You should have received a copy of the GNU Lesser General Public
19  * License along with this library; if not, write to the Free Software
20  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
21  */
22 
23 #include "notepad.h"
24 
25 static BOOL Append(LPWSTR *ppszText, DWORD *pdwTextLen, LPCWSTR pszAppendText, DWORD dwAppendLen)
26 {
27     LPWSTR pszNewText;
28 
29     if (dwAppendLen > 0)
30     {
31         if (*ppszText)
32         {
33             pszNewText = (LPWSTR) HeapReAlloc(GetProcessHeap(), 0, *ppszText, (*pdwTextLen + dwAppendLen) * sizeof(WCHAR));
34         }
35         else
36         {
37             pszNewText = (LPWSTR) HeapAlloc(GetProcessHeap(), 0, dwAppendLen * sizeof(WCHAR));
38         }
39 
40         if (!pszNewText)
41             return FALSE;
42 
43         memcpy(pszNewText + *pdwTextLen, pszAppendText, dwAppendLen * sizeof(WCHAR));
44         *ppszText = pszNewText;
45         *pdwTextLen += dwAppendLen;
46     }
47     return TRUE;
48 }
49 
50 BOOL
51 ReadText(HANDLE hFile, LPWSTR *ppszText, DWORD *pdwTextLen, int *pencFile, int *piEoln)
52 {
53     DWORD dwSize;
54     LPBYTE pBytes = NULL;
55     LPWSTR pszText;
56     LPWSTR pszAllocText = NULL;
57     DWORD dwPos, i;
58     DWORD dwCharCount;
59     BOOL bSuccess = FALSE;
60     BYTE b = 0;
61     int encFile = ENCODING_ANSI;
62     int iCodePage = 0;
63     WCHAR szCrlf[2] = {'\r', '\n'};
64     DWORD adwEolnCount[3] = {0, 0, 0};
65 
66     *ppszText = NULL;
67     *pdwTextLen = 0;
68 
69     dwSize = GetFileSize(hFile, NULL);
70     if (dwSize == INVALID_FILE_SIZE)
71         goto done;
72 
73     pBytes = HeapAlloc(GetProcessHeap(), 0, dwSize + 2);
74     if (!pBytes)
75         goto done;
76 
77     if (!ReadFile(hFile, pBytes, dwSize, &dwSize, NULL))
78         goto done;
79     dwPos = 0;
80 
81     /* Make sure that there is a NUL character at the end, in any encoding */
82     pBytes[dwSize + 0] = '\0';
83     pBytes[dwSize + 1] = '\0';
84 
85     /* Look for Byte Order Marks */
86     if ((dwSize >= 2) && (pBytes[0] == 0xFF) && (pBytes[1] == 0xFE))
87     {
88         encFile = ENCODING_UNICODE;
89         dwPos += 2;
90     }
91     else if ((dwSize >= 2) && (pBytes[0] == 0xFE) && (pBytes[1] == 0xFF))
92     {
93         encFile = ENCODING_UNICODE_BE;
94         dwPos += 2;
95     }
96     else if ((dwSize >= 3) && (pBytes[0] == 0xEF) && (pBytes[1] == 0xBB) && (pBytes[2] == 0xBF))
97     {
98         encFile = ENCODING_UTF8;
99         dwPos += 3;
100     }
101 
102     switch(encFile)
103     {
104     case ENCODING_UNICODE_BE:
105         for (i = dwPos; i < dwSize-1; i += 2)
106         {
107             b = pBytes[i+0];
108             pBytes[i+0] = pBytes[i+1];
109             pBytes[i+1] = b;
110         }
111         /* fall through */
112 
113     case ENCODING_UNICODE:
114         pszText = (LPWSTR) &pBytes[dwPos];
115         dwCharCount = (dwSize - dwPos) / sizeof(WCHAR);
116         break;
117 
118     case ENCODING_ANSI:
119     case ENCODING_UTF8:
120         if (encFile == ENCODING_ANSI)
121             iCodePage = CP_ACP;
122         else if (encFile == ENCODING_UTF8)
123             iCodePage = CP_UTF8;
124 
125         if ((dwSize - dwPos) > 0)
126         {
127             dwCharCount = MultiByteToWideChar(iCodePage, 0, (LPCSTR)&pBytes[dwPos], dwSize - dwPos, NULL, 0);
128             if (dwCharCount == 0)
129                 goto done;
130         }
131         else
132         {
133             /* special case for files with no characters (other than BOMs) */
134             dwCharCount = 0;
135         }
136 
137         pszAllocText = (LPWSTR) HeapAlloc(GetProcessHeap(), 0, (dwCharCount + 1) * sizeof(WCHAR));
138         if (!pszAllocText)
139             goto done;
140 
141         if ((dwSize - dwPos) > 0)
142         {
143             if (!MultiByteToWideChar(iCodePage, 0, (LPCSTR)&pBytes[dwPos], dwSize - dwPos, pszAllocText, dwCharCount))
144                 goto done;
145         }
146 
147         pszAllocText[dwCharCount] = '\0';
148         pszText = pszAllocText;
149         break;
150     }
151 
152     dwPos = 0;
153     for (i = 0; i < dwCharCount; i++)
154     {
155         switch(pszText[i])
156         {
157         case '\r':
158             if ((i < dwCharCount-1) && (pszText[i+1] == '\n'))
159             {
160                 i++;
161                 adwEolnCount[EOLN_CRLF]++;
162                 break;
163             }
164             /* fall through */
165 
166         case '\n':
167             if (!Append(ppszText, pdwTextLen, &pszText[dwPos], i - dwPos))
168                 return FALSE;
169             if (!Append(ppszText, pdwTextLen, szCrlf, ARRAY_SIZE(szCrlf)))
170                 return FALSE;
171             dwPos = i + 1;
172 
173             if (pszText[i] == '\r')
174                 adwEolnCount[EOLN_CR]++;
175             else
176                 adwEolnCount[EOLN_LF]++;
177             break;
178 
179         case '\0':
180             pszText[i] = ' ';
181             break;
182         }
183     }
184 
185     if (!*ppszText && (pszText == pszAllocText))
186     {
187         /* special case; don't need to reallocate */
188         *ppszText = pszAllocText;
189         *pdwTextLen = dwCharCount;
190         pszAllocText = NULL;
191     }
192     else
193     {
194         /* append last remaining text */
195         if (!Append(ppszText, pdwTextLen, &pszText[dwPos], i - dwPos + 1))
196             return FALSE;
197     }
198 
199     /* chose which eoln to use */
200     *piEoln = EOLN_CRLF;
201     if (adwEolnCount[EOLN_LF] > adwEolnCount[*piEoln])
202         *piEoln = EOLN_LF;
203     if (adwEolnCount[EOLN_CR] > adwEolnCount[*piEoln])
204         *piEoln = EOLN_CR;
205     *pencFile = encFile;
206 
207     bSuccess = TRUE;
208 
209 done:
210     if (pBytes)
211         HeapFree(GetProcessHeap(), 0, pBytes);
212     if (pszAllocText)
213         HeapFree(GetProcessHeap(), 0, pszAllocText);
214 
215     if (!bSuccess && *ppszText)
216     {
217         HeapFree(GetProcessHeap(), 0, *ppszText);
218         *ppszText = NULL;
219         *pdwTextLen = 0;
220     }
221     return bSuccess;
222 }
223 
224 static BOOL WriteEncodedText(HANDLE hFile, LPCWSTR pszText, DWORD dwTextLen, int encFile)
225 {
226     LPBYTE pBytes = NULL;
227     LPBYTE pAllocBuffer = NULL;
228     DWORD dwPos = 0;
229     DWORD dwByteCount;
230     BYTE buffer[1024];
231     UINT iCodePage = 0;
232     DWORD dwDummy, i;
233     BOOL bSuccess = FALSE;
234     int iBufferSize, iRequiredBytes;
235     BYTE b;
236 
237     while(dwPos < dwTextLen)
238     {
239         switch(encFile)
240         {
241             case ENCODING_UNICODE:
242                 pBytes = (LPBYTE) &pszText[dwPos];
243                 dwByteCount = (dwTextLen - dwPos) * sizeof(WCHAR);
244                 dwPos = dwTextLen;
245                 break;
246 
247             case ENCODING_UNICODE_BE:
248                 dwByteCount = (dwTextLen - dwPos) * sizeof(WCHAR);
249                 if (dwByteCount > sizeof(buffer))
250                     dwByteCount = sizeof(buffer);
251 
252                 memcpy(buffer, &pszText[dwPos], dwByteCount);
253                 for (i = 0; i < dwByteCount; i += 2)
254                 {
255                     b = buffer[i+0];
256                     buffer[i+0] = buffer[i+1];
257                     buffer[i+1] = b;
258                 }
259                 pBytes = (LPBYTE) &buffer[dwPos];
260                 dwPos += dwByteCount / sizeof(WCHAR);
261                 break;
262 
263             case ENCODING_ANSI:
264             case ENCODING_UTF8:
265                 if (encFile == ENCODING_ANSI)
266                     iCodePage = CP_ACP;
267                 else if (encFile == ENCODING_UTF8)
268                     iCodePage = CP_UTF8;
269 
270                 iRequiredBytes = WideCharToMultiByte(iCodePage, 0, &pszText[dwPos], dwTextLen - dwPos, NULL, 0, NULL, NULL);
271                 if (iRequiredBytes <= 0)
272                 {
273                     goto done;
274                 }
275                 else if (iRequiredBytes < sizeof(buffer))
276                 {
277                     pBytes = buffer;
278                     iBufferSize = sizeof(buffer);
279                 }
280                 else
281                 {
282                     pAllocBuffer = (LPBYTE) HeapAlloc(GetProcessHeap(), 0, iRequiredBytes);
283                     if (!pAllocBuffer)
284                         return FALSE;
285                     pBytes = pAllocBuffer;
286                     iBufferSize = iRequiredBytes;
287                 }
288 
289                 dwByteCount = WideCharToMultiByte(iCodePage, 0, &pszText[dwPos], dwTextLen - dwPos, (LPSTR) pBytes, iBufferSize, NULL, NULL);
290                 if (!dwByteCount)
291                     goto done;
292 
293                 dwPos = dwTextLen;
294                 break;
295 
296             default:
297                 goto done;
298         }
299 
300         if (!WriteFile(hFile, pBytes, dwByteCount, &dwDummy, NULL))
301             goto done;
302 
303         /* free the buffer, if we have allocated one */
304         if (pAllocBuffer)
305         {
306             HeapFree(GetProcessHeap(), 0, pAllocBuffer);
307             pAllocBuffer = NULL;
308         }
309     }
310     bSuccess = TRUE;
311 
312 done:
313     if (pAllocBuffer)
314         HeapFree(GetProcessHeap(), 0, pAllocBuffer);
315     return bSuccess;
316 }
317 
318 BOOL WriteText(HANDLE hFile, LPCWSTR pszText, DWORD dwTextLen, int encFile, int iEoln)
319 {
320     WCHAR wcBom;
321     LPCWSTR pszLF = L"\n";
322     DWORD dwPos, dwNext;
323 
324     /* Write the proper byte order marks if not ANSI */
325     if (encFile != ENCODING_ANSI)
326     {
327         wcBom = 0xFEFF;
328         if (!WriteEncodedText(hFile, &wcBom, 1, encFile))
329             return FALSE;
330     }
331 
332     dwPos = 0;
333 
334     /* pszText eoln are always \r\n */
335 
336     do
337     {
338         /* Find the next eoln */
339         dwNext = dwPos;
340         while(dwNext < dwTextLen)
341         {
342             if (pszText[dwNext] == '\r' && pszText[dwNext + 1] == '\n')
343                 break;
344             dwNext++;
345         }
346 
347         if (dwNext != dwTextLen)
348         {
349             switch (iEoln)
350             {
351             case EOLN_LF:
352                 /* Write text (without eoln) */
353                 if (!WriteEncodedText(hFile, &pszText[dwPos], dwNext - dwPos, encFile))
354                     return FALSE;
355                 /* Write eoln */
356                 if (!WriteEncodedText(hFile, pszLF, 1, encFile))
357                     return FALSE;
358                 break;
359             case EOLN_CR:
360                 /* Write text (including \r as eoln) */
361                 if (!WriteEncodedText(hFile, &pszText[dwPos], dwNext - dwPos + 1, encFile))
362                     return FALSE;
363                 break;
364             case EOLN_CRLF:
365                 /* Write text (including \r\n as eoln) */
366                 if (!WriteEncodedText(hFile, &pszText[dwPos], dwNext - dwPos + 2, encFile))
367                     return FALSE;
368                 break;
369             default:
370                 return FALSE;
371             }
372         }
373         else
374         {
375             /* Write text (without eoln, since this is the end of the file) */
376             if (!WriteEncodedText(hFile, &pszText[dwPos], dwNext - dwPos, encFile))
377                 return FALSE;
378         }
379 
380         /* Skip \r\n */
381         dwPos = dwNext + 2;
382     }
383     while (dwPos < dwTextLen);
384 
385     return TRUE;
386 }
387