xref: /reactos/base/applications/notepad/text.c (revision 8786e12d)
1 /*
2  *  Notepad (text.c)
3  *
4  *  Copyright 1998,99 Marcel Baur <mbaur@g26.ethz.ch>
5  *  Copyright 2002 Sylvain Petreolle <spetreolle@yahoo.fr>
6  *  Copyright 2002 Andriy Palamarchuk
7  *
8  * This library is free software; you can redistribute it and/or
9  * modify it under the terms of the GNU Lesser General Public
10  * License as published by the Free Software Foundation; either
11  * version 2.1 of the License, or (at your option) any later version.
12  *
13  * This library is distributed in the hope that it will be useful,
14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
16  * Lesser General Public License for more details.
17  *
18  * You should have received a copy of the GNU Lesser General Public
19  * License along with this library; if not, write to the Free Software
20  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
21  */
22 
23 #include "notepad.h"
24 
25 static BOOL Append(LPWSTR *ppszText, DWORD *pdwTextLen, LPCWSTR pszAppendText, DWORD dwAppendLen)
26 {
27     LPWSTR pszNewText;
28 
29     if (dwAppendLen > 0)
30     {
31         if (*ppszText)
32         {
33             pszNewText = (LPWSTR) HeapReAlloc(GetProcessHeap(), 0, *ppszText, (*pdwTextLen + dwAppendLen) * sizeof(WCHAR));
34         }
35         else
36         {
37             pszNewText = (LPWSTR) HeapAlloc(GetProcessHeap(), 0, dwAppendLen * sizeof(WCHAR));
38         }
39 
40         if (!pszNewText)
41             return FALSE;
42 
43         memcpy(pszNewText + *pdwTextLen, pszAppendText, dwAppendLen * sizeof(WCHAR));
44         *ppszText = pszNewText;
45         *pdwTextLen += dwAppendLen;
46     }
47     return TRUE;
48 }
49 
50 BOOL
51 ReadText(HANDLE hFile, LPWSTR *ppszText, DWORD *pdwTextLen, ENCODING *pencFile, int *piEoln)
52 {
53     DWORD dwSize;
54     LPBYTE pBytes = NULL;
55     LPWSTR pszText;
56     LPWSTR pszAllocText = NULL;
57     DWORD dwPos, i;
58     DWORD dwCharCount;
59     BOOL bSuccess = FALSE;
60     BYTE b = 0;
61     ENCODING encFile = ENCODING_ANSI;
62     int iCodePage = 0;
63     WCHAR szCrlf[2] = {'\r', '\n'};
64     DWORD adwEolnCount[3] = {0, 0, 0};
65 
66     *ppszText = NULL;
67     *pdwTextLen = 0;
68 
69     dwSize = GetFileSize(hFile, NULL);
70     if (dwSize == INVALID_FILE_SIZE)
71         goto done;
72 
73     pBytes = HeapAlloc(GetProcessHeap(), 0, dwSize + 2);
74     if (!pBytes)
75         goto done;
76 
77     if (!ReadFile(hFile, pBytes, dwSize, &dwSize, NULL))
78         goto done;
79     dwPos = 0;
80 
81     /* Make sure that there is a NUL character at the end, in any encoding */
82     pBytes[dwSize + 0] = '\0';
83     pBytes[dwSize + 1] = '\0';
84 
85     /* Look for Byte Order Marks */
86     if ((dwSize >= 2) && (pBytes[0] == 0xFF) && (pBytes[1] == 0xFE))
87     {
88         encFile = ENCODING_UTF16LE;
89         dwPos += 2;
90     }
91     else if ((dwSize >= 2) && (pBytes[0] == 0xFE) && (pBytes[1] == 0xFF))
92     {
93         encFile = ENCODING_UTF16BE;
94         dwPos += 2;
95     }
96     else if ((dwSize >= 3) && (pBytes[0] == 0xEF) && (pBytes[1] == 0xBB) && (pBytes[2] == 0xBF))
97     {
98         encFile = ENCODING_UTF8;
99         dwPos += 3;
100     }
101 
102     switch(encFile)
103     {
104     case ENCODING_UTF16BE:
105         for (i = dwPos; i < dwSize-1; i += 2)
106         {
107             b = pBytes[i+0];
108             pBytes[i+0] = pBytes[i+1];
109             pBytes[i+1] = b;
110         }
111         /* fall through */
112 
113     case ENCODING_UTF16LE:
114         pszText = (LPWSTR) &pBytes[dwPos];
115         dwCharCount = (dwSize - dwPos) / sizeof(WCHAR);
116         break;
117 
118     case ENCODING_ANSI:
119     case ENCODING_UTF8:
120         if (encFile == ENCODING_ANSI)
121             iCodePage = CP_ACP;
122         else if (encFile == ENCODING_UTF8)
123             iCodePage = CP_UTF8;
124 
125         if ((dwSize - dwPos) > 0)
126         {
127             dwCharCount = MultiByteToWideChar(iCodePage, 0, (LPCSTR)&pBytes[dwPos], dwSize - dwPos, NULL, 0);
128             if (dwCharCount == 0)
129                 goto done;
130         }
131         else
132         {
133             /* special case for files with no characters (other than BOMs) */
134             dwCharCount = 0;
135         }
136 
137         pszAllocText = (LPWSTR) HeapAlloc(GetProcessHeap(), 0, (dwCharCount + 1) * sizeof(WCHAR));
138         if (!pszAllocText)
139             goto done;
140 
141         if ((dwSize - dwPos) > 0)
142         {
143             if (!MultiByteToWideChar(iCodePage, 0, (LPCSTR)&pBytes[dwPos], dwSize - dwPos, pszAllocText, dwCharCount))
144                 goto done;
145         }
146 
147         pszAllocText[dwCharCount] = '\0';
148         pszText = pszAllocText;
149         break;
150     DEFAULT_UNREACHABLE;
151     }
152 
153     dwPos = 0;
154     for (i = 0; i < dwCharCount; i++)
155     {
156         switch(pszText[i])
157         {
158         case '\r':
159             if ((i < dwCharCount-1) && (pszText[i+1] == '\n'))
160             {
161                 i++;
162                 adwEolnCount[EOLN_CRLF]++;
163                 break;
164             }
165             /* fall through */
166 
167         case '\n':
168             if (!Append(ppszText, pdwTextLen, &pszText[dwPos], i - dwPos))
169                 return FALSE;
170             if (!Append(ppszText, pdwTextLen, szCrlf, ARRAY_SIZE(szCrlf)))
171                 return FALSE;
172             dwPos = i + 1;
173 
174             if (pszText[i] == '\r')
175                 adwEolnCount[EOLN_CR]++;
176             else
177                 adwEolnCount[EOLN_LF]++;
178             break;
179 
180         case '\0':
181             pszText[i] = ' ';
182             break;
183         }
184     }
185 
186     if (!*ppszText && (pszText == pszAllocText))
187     {
188         /* special case; don't need to reallocate */
189         *ppszText = pszAllocText;
190         *pdwTextLen = dwCharCount;
191         pszAllocText = NULL;
192     }
193     else
194     {
195         /* append last remaining text */
196         if (!Append(ppszText, pdwTextLen, &pszText[dwPos], i - dwPos + 1))
197             return FALSE;
198     }
199 
200     /* chose which eoln to use */
201     *piEoln = EOLN_CRLF;
202     if (adwEolnCount[EOLN_LF] > adwEolnCount[*piEoln])
203         *piEoln = EOLN_LF;
204     if (adwEolnCount[EOLN_CR] > adwEolnCount[*piEoln])
205         *piEoln = EOLN_CR;
206     *pencFile = encFile;
207 
208     bSuccess = TRUE;
209 
210 done:
211     if (pBytes)
212         HeapFree(GetProcessHeap(), 0, pBytes);
213     if (pszAllocText)
214         HeapFree(GetProcessHeap(), 0, pszAllocText);
215 
216     if (!bSuccess && *ppszText)
217     {
218         HeapFree(GetProcessHeap(), 0, *ppszText);
219         *ppszText = NULL;
220         *pdwTextLen = 0;
221     }
222     return bSuccess;
223 }
224 
225 static BOOL WriteEncodedText(HANDLE hFile, LPCWSTR pszText, DWORD dwTextLen, ENCODING encFile)
226 {
227     LPBYTE pBytes = NULL;
228     LPBYTE pAllocBuffer = NULL;
229     DWORD dwPos = 0;
230     DWORD dwByteCount;
231     BYTE buffer[1024];
232     UINT iCodePage = 0;
233     DWORD dwDummy, i;
234     BOOL bSuccess = FALSE;
235     int iBufferSize, iRequiredBytes;
236     BYTE b;
237 
238     while(dwPos < dwTextLen)
239     {
240         switch(encFile)
241         {
242             case ENCODING_UTF16LE:
243                 pBytes = (LPBYTE) &pszText[dwPos];
244                 dwByteCount = (dwTextLen - dwPos) * sizeof(WCHAR);
245                 dwPos = dwTextLen;
246                 break;
247 
248             case ENCODING_UTF16BE:
249                 dwByteCount = (dwTextLen - dwPos) * sizeof(WCHAR);
250                 if (dwByteCount > sizeof(buffer))
251                     dwByteCount = sizeof(buffer);
252 
253                 memcpy(buffer, &pszText[dwPos], dwByteCount);
254                 for (i = 0; i < dwByteCount; i += 2)
255                 {
256                     b = buffer[i+0];
257                     buffer[i+0] = buffer[i+1];
258                     buffer[i+1] = b;
259                 }
260                 pBytes = (LPBYTE) &buffer[dwPos];
261                 dwPos += dwByteCount / sizeof(WCHAR);
262                 break;
263 
264             case ENCODING_ANSI:
265             case ENCODING_UTF8:
266                 if (encFile == ENCODING_ANSI)
267                     iCodePage = CP_ACP;
268                 else if (encFile == ENCODING_UTF8)
269                     iCodePage = CP_UTF8;
270 
271                 iRequiredBytes = WideCharToMultiByte(iCodePage, 0, &pszText[dwPos], dwTextLen - dwPos, NULL, 0, NULL, NULL);
272                 if (iRequiredBytes <= 0)
273                 {
274                     goto done;
275                 }
276                 else if (iRequiredBytes < sizeof(buffer))
277                 {
278                     pBytes = buffer;
279                     iBufferSize = sizeof(buffer);
280                 }
281                 else
282                 {
283                     pAllocBuffer = (LPBYTE) HeapAlloc(GetProcessHeap(), 0, iRequiredBytes);
284                     if (!pAllocBuffer)
285                         return FALSE;
286                     pBytes = pAllocBuffer;
287                     iBufferSize = iRequiredBytes;
288                 }
289 
290                 dwByteCount = WideCharToMultiByte(iCodePage, 0, &pszText[dwPos], dwTextLen - dwPos, (LPSTR) pBytes, iBufferSize, NULL, NULL);
291                 if (!dwByteCount)
292                     goto done;
293 
294                 dwPos = dwTextLen;
295                 break;
296 
297             default:
298                 goto done;
299         }
300 
301         if (!WriteFile(hFile, pBytes, dwByteCount, &dwDummy, NULL))
302             goto done;
303 
304         /* free the buffer, if we have allocated one */
305         if (pAllocBuffer)
306         {
307             HeapFree(GetProcessHeap(), 0, pAllocBuffer);
308             pAllocBuffer = NULL;
309         }
310     }
311     bSuccess = TRUE;
312 
313 done:
314     if (pAllocBuffer)
315         HeapFree(GetProcessHeap(), 0, pAllocBuffer);
316     return bSuccess;
317 }
318 
319 BOOL WriteText(HANDLE hFile, LPCWSTR pszText, DWORD dwTextLen, ENCODING encFile, int iEoln)
320 {
321     WCHAR wcBom;
322     LPCWSTR pszLF = L"\n";
323     DWORD dwPos, dwNext;
324 
325     /* Write the proper byte order marks if not ANSI */
326     if (encFile != ENCODING_ANSI)
327     {
328         wcBom = 0xFEFF;
329         if (!WriteEncodedText(hFile, &wcBom, 1, encFile))
330             return FALSE;
331     }
332 
333     dwPos = 0;
334 
335     /* pszText eoln are always \r\n */
336 
337     do
338     {
339         /* Find the next eoln */
340         dwNext = dwPos;
341         while(dwNext < dwTextLen)
342         {
343             if (pszText[dwNext] == '\r' && pszText[dwNext + 1] == '\n')
344                 break;
345             dwNext++;
346         }
347 
348         if (dwNext != dwTextLen)
349         {
350             switch (iEoln)
351             {
352             case EOLN_LF:
353                 /* Write text (without eoln) */
354                 if (!WriteEncodedText(hFile, &pszText[dwPos], dwNext - dwPos, encFile))
355                     return FALSE;
356                 /* Write eoln */
357                 if (!WriteEncodedText(hFile, pszLF, 1, encFile))
358                     return FALSE;
359                 break;
360             case EOLN_CR:
361                 /* Write text (including \r as eoln) */
362                 if (!WriteEncodedText(hFile, &pszText[dwPos], dwNext - dwPos + 1, encFile))
363                     return FALSE;
364                 break;
365             case EOLN_CRLF:
366                 /* Write text (including \r\n as eoln) */
367                 if (!WriteEncodedText(hFile, &pszText[dwPos], dwNext - dwPos + 2, encFile))
368                     return FALSE;
369                 break;
370             default:
371                 return FALSE;
372             }
373         }
374         else
375         {
376             /* Write text (without eoln, since this is the end of the file) */
377             if (!WriteEncodedText(hFile, &pszText[dwPos], dwNext - dwPos, encFile))
378                 return FALSE;
379         }
380 
381         /* Skip \r\n */
382         dwPos = dwNext + 2;
383     }
384     while (dwPos < dwTextLen);
385 
386     return TRUE;
387 }
388