xref: /reactos/base/applications/notepad/text.c (revision 62919904)
1 /*
2  *  Notepad (text.c)
3  *
4  *  Copyright 1998,99 Marcel Baur <mbaur@g26.ethz.ch>
5  *  Copyright 2002 Sylvain Petreolle <spetreolle@yahoo.fr>
6  *  Copyright 2002 Andriy Palamarchuk
7  *  Copyright 2019 Katayama Hirofumi MZ <katayama.hirofumi.mz@gmail.com>
8  *
9  * This library is free software; you can redistribute it and/or
10  * modify it under the terms of the GNU Lesser General Public
11  * License as published by the Free Software Foundation; either
12  * version 2.1 of the License, or (at your option) any later version.
13  *
14  * This library is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
17  * Lesser General Public License for more details.
18  *
19  * You should have received a copy of the GNU Lesser General Public
20  * License along with this library; if not, write to the Free Software
21  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
22  */
23 
24 #include "notepad.h"
25 
26 static BOOL Append(LPWSTR *ppszText, DWORD *pdwTextLen, LPCWSTR pszAppendText, DWORD dwAppendLen)
27 {
28     LPWSTR pszNewText;
29 
30     if (dwAppendLen > 0)
31     {
32         if (*ppszText)
33         {
34             pszNewText = (LPWSTR) HeapReAlloc(GetProcessHeap(), 0, *ppszText, (*pdwTextLen + dwAppendLen) * sizeof(WCHAR));
35         }
36         else
37         {
38             pszNewText = (LPWSTR) HeapAlloc(GetProcessHeap(), 0, dwAppendLen * sizeof(WCHAR));
39         }
40 
41         if (!pszNewText)
42             return FALSE;
43 
44         memcpy(pszNewText + *pdwTextLen, pszAppendText, dwAppendLen * sizeof(WCHAR));
45         *ppszText = pszNewText;
46         *pdwTextLen += dwAppendLen;
47     }
48     return TRUE;
49 }
50 
51 ENCODING AnalyzeEncoding(const char *pBytes, DWORD dwSize)
52 {
53     INT flags = IS_TEXT_UNICODE_STATISTICS;
54 
55     if (dwSize <= 1)
56         return ENCODING_ANSI;
57 
58     if (IsTextUnicode(pBytes, dwSize, &flags))
59     {
60         return ENCODING_UTF16LE;
61     }
62 
63     if ((flags & IS_TEXT_UNICODE_REVERSE_MASK) && !(flags & IS_TEXT_UNICODE_ILLEGAL_CHARS))
64     {
65         return ENCODING_UTF16BE;
66     }
67 
68     /* is it UTF-8? */
69     if (MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS, pBytes, dwSize, NULL, 0))
70     {
71         return ENCODING_UTF8;
72     }
73 
74     return ENCODING_ANSI;
75 }
76 
77 BOOL
78 ReadText(HANDLE hFile, LPWSTR *ppszText, DWORD *pdwTextLen, ENCODING *pencFile, int *piEoln)
79 {
80     DWORD dwSize;
81     LPBYTE pBytes = NULL;
82     LPWSTR pszText;
83     LPWSTR pszAllocText = NULL;
84     DWORD dwPos, i;
85     DWORD dwCharCount;
86     BOOL bSuccess = FALSE;
87     BYTE b = 0;
88     ENCODING encFile = ENCODING_ANSI;
89     int iCodePage = 0;
90     WCHAR szCrlf[2] = {'\r', '\n'};
91     DWORD adwEolnCount[3] = {0, 0, 0};
92 
93     *ppszText = NULL;
94     *pdwTextLen = 0;
95 
96     dwSize = GetFileSize(hFile, NULL);
97     if (dwSize == INVALID_FILE_SIZE)
98         goto done;
99 
100     pBytes = HeapAlloc(GetProcessHeap(), 0, dwSize + 2);
101     if (!pBytes)
102         goto done;
103 
104     if (!ReadFile(hFile, pBytes, dwSize, &dwSize, NULL))
105         goto done;
106     dwPos = 0;
107 
108     /* Make sure that there is a NUL character at the end, in any encoding */
109     pBytes[dwSize + 0] = '\0';
110     pBytes[dwSize + 1] = '\0';
111 
112     /* Look for Byte Order Marks */
113     if ((dwSize >= 2) && (pBytes[0] == 0xFF) && (pBytes[1] == 0xFE))
114     {
115         encFile = ENCODING_UTF16LE;
116         dwPos += 2;
117     }
118     else if ((dwSize >= 2) && (pBytes[0] == 0xFE) && (pBytes[1] == 0xFF))
119     {
120         encFile = ENCODING_UTF16BE;
121         dwPos += 2;
122     }
123     else if ((dwSize >= 3) && (pBytes[0] == 0xEF) && (pBytes[1] == 0xBB) && (pBytes[2] == 0xBF))
124     {
125         encFile = ENCODING_UTF8;
126         dwPos += 3;
127     }
128     else
129     {
130         encFile = AnalyzeEncoding((const char *)pBytes, dwSize);
131     }
132 
133     switch(encFile)
134     {
135     case ENCODING_UTF16BE:
136         for (i = dwPos; i < dwSize-1; i += 2)
137         {
138             b = pBytes[i+0];
139             pBytes[i+0] = pBytes[i+1];
140             pBytes[i+1] = b;
141         }
142         /* fall through */
143 
144     case ENCODING_UTF16LE:
145         pszText = (LPWSTR) &pBytes[dwPos];
146         dwCharCount = (dwSize - dwPos) / sizeof(WCHAR);
147         break;
148 
149     case ENCODING_ANSI:
150     case ENCODING_UTF8:
151         if (encFile == ENCODING_ANSI)
152             iCodePage = CP_ACP;
153         else if (encFile == ENCODING_UTF8)
154             iCodePage = CP_UTF8;
155 
156         if ((dwSize - dwPos) > 0)
157         {
158             dwCharCount = MultiByteToWideChar(iCodePage, 0, (LPCSTR)&pBytes[dwPos], dwSize - dwPos, NULL, 0);
159             if (dwCharCount == 0)
160                 goto done;
161         }
162         else
163         {
164             /* special case for files with no characters (other than BOMs) */
165             dwCharCount = 0;
166         }
167 
168         pszAllocText = (LPWSTR) HeapAlloc(GetProcessHeap(), 0, (dwCharCount + 1) * sizeof(WCHAR));
169         if (!pszAllocText)
170             goto done;
171 
172         if ((dwSize - dwPos) > 0)
173         {
174             if (!MultiByteToWideChar(iCodePage, 0, (LPCSTR)&pBytes[dwPos], dwSize - dwPos, pszAllocText, dwCharCount))
175                 goto done;
176         }
177 
178         pszAllocText[dwCharCount] = '\0';
179         pszText = pszAllocText;
180         break;
181     DEFAULT_UNREACHABLE;
182     }
183 
184     dwPos = 0;
185     for (i = 0; i < dwCharCount; i++)
186     {
187         switch(pszText[i])
188         {
189         case '\r':
190             if ((i < dwCharCount-1) && (pszText[i+1] == '\n'))
191             {
192                 i++;
193                 adwEolnCount[EOLN_CRLF]++;
194                 break;
195             }
196             /* fall through */
197 
198         case '\n':
199             if (!Append(ppszText, pdwTextLen, &pszText[dwPos], i - dwPos))
200                 return FALSE;
201             if (!Append(ppszText, pdwTextLen, szCrlf, ARRAY_SIZE(szCrlf)))
202                 return FALSE;
203             dwPos = i + 1;
204 
205             if (pszText[i] == '\r')
206                 adwEolnCount[EOLN_CR]++;
207             else
208                 adwEolnCount[EOLN_LF]++;
209             break;
210 
211         case '\0':
212             pszText[i] = ' ';
213             break;
214         }
215     }
216 
217     if (!*ppszText && (pszText == pszAllocText))
218     {
219         /* special case; don't need to reallocate */
220         *ppszText = pszAllocText;
221         *pdwTextLen = dwCharCount;
222         pszAllocText = NULL;
223     }
224     else
225     {
226         /* append last remaining text */
227         if (!Append(ppszText, pdwTextLen, &pszText[dwPos], i - dwPos + 1))
228             return FALSE;
229     }
230 
231     /* chose which eoln to use */
232     *piEoln = EOLN_CRLF;
233     if (adwEolnCount[EOLN_LF] > adwEolnCount[*piEoln])
234         *piEoln = EOLN_LF;
235     if (adwEolnCount[EOLN_CR] > adwEolnCount[*piEoln])
236         *piEoln = EOLN_CR;
237     *pencFile = encFile;
238 
239     bSuccess = TRUE;
240 
241 done:
242     if (pBytes)
243         HeapFree(GetProcessHeap(), 0, pBytes);
244     if (pszAllocText)
245         HeapFree(GetProcessHeap(), 0, pszAllocText);
246 
247     if (!bSuccess && *ppszText)
248     {
249         HeapFree(GetProcessHeap(), 0, *ppszText);
250         *ppszText = NULL;
251         *pdwTextLen = 0;
252     }
253     return bSuccess;
254 }
255 
256 static BOOL WriteEncodedText(HANDLE hFile, LPCWSTR pszText, DWORD dwTextLen, ENCODING encFile)
257 {
258     LPBYTE pBytes = NULL;
259     LPBYTE pAllocBuffer = NULL;
260     DWORD dwPos = 0;
261     DWORD dwByteCount;
262     BYTE buffer[1024];
263     UINT iCodePage = 0;
264     DWORD dwDummy, i;
265     BOOL bSuccess = FALSE;
266     int iBufferSize, iRequiredBytes;
267     BYTE b;
268 
269     while(dwPos < dwTextLen)
270     {
271         switch(encFile)
272         {
273             case ENCODING_UTF16LE:
274                 pBytes = (LPBYTE) &pszText[dwPos];
275                 dwByteCount = (dwTextLen - dwPos) * sizeof(WCHAR);
276                 dwPos = dwTextLen;
277                 break;
278 
279             case ENCODING_UTF16BE:
280                 dwByteCount = (dwTextLen - dwPos) * sizeof(WCHAR);
281                 if (dwByteCount > sizeof(buffer))
282                     dwByteCount = sizeof(buffer);
283 
284                 memcpy(buffer, &pszText[dwPos], dwByteCount);
285                 for (i = 0; i < dwByteCount; i += 2)
286                 {
287                     b = buffer[i+0];
288                     buffer[i+0] = buffer[i+1];
289                     buffer[i+1] = b;
290                 }
291                 pBytes = (LPBYTE) &buffer[dwPos];
292                 dwPos += dwByteCount / sizeof(WCHAR);
293                 break;
294 
295             case ENCODING_ANSI:
296             case ENCODING_UTF8:
297                 if (encFile == ENCODING_ANSI)
298                     iCodePage = CP_ACP;
299                 else if (encFile == ENCODING_UTF8)
300                     iCodePage = CP_UTF8;
301 
302                 iRequiredBytes = WideCharToMultiByte(iCodePage, 0, &pszText[dwPos], dwTextLen - dwPos, NULL, 0, NULL, NULL);
303                 if (iRequiredBytes <= 0)
304                 {
305                     goto done;
306                 }
307                 else if (iRequiredBytes < sizeof(buffer))
308                 {
309                     pBytes = buffer;
310                     iBufferSize = sizeof(buffer);
311                 }
312                 else
313                 {
314                     pAllocBuffer = (LPBYTE) HeapAlloc(GetProcessHeap(), 0, iRequiredBytes);
315                     if (!pAllocBuffer)
316                         return FALSE;
317                     pBytes = pAllocBuffer;
318                     iBufferSize = iRequiredBytes;
319                 }
320 
321                 dwByteCount = WideCharToMultiByte(iCodePage, 0, &pszText[dwPos], dwTextLen - dwPos, (LPSTR) pBytes, iBufferSize, NULL, NULL);
322                 if (!dwByteCount)
323                     goto done;
324 
325                 dwPos = dwTextLen;
326                 break;
327 
328             default:
329                 goto done;
330         }
331 
332         if (!WriteFile(hFile, pBytes, dwByteCount, &dwDummy, NULL))
333             goto done;
334 
335         /* free the buffer, if we have allocated one */
336         if (pAllocBuffer)
337         {
338             HeapFree(GetProcessHeap(), 0, pAllocBuffer);
339             pAllocBuffer = NULL;
340         }
341     }
342     bSuccess = TRUE;
343 
344 done:
345     if (pAllocBuffer)
346         HeapFree(GetProcessHeap(), 0, pAllocBuffer);
347     return bSuccess;
348 }
349 
350 BOOL WriteText(HANDLE hFile, LPCWSTR pszText, DWORD dwTextLen, ENCODING encFile, int iEoln)
351 {
352     WCHAR wcBom;
353     LPCWSTR pszLF = L"\n";
354     DWORD dwPos, dwNext;
355 
356     /* Write the proper byte order marks if not ANSI */
357     if (encFile != ENCODING_ANSI)
358     {
359         wcBom = 0xFEFF;
360         if (!WriteEncodedText(hFile, &wcBom, 1, encFile))
361             return FALSE;
362     }
363 
364     dwPos = 0;
365 
366     /* pszText eoln are always \r\n */
367 
368     do
369     {
370         /* Find the next eoln */
371         dwNext = dwPos;
372         while(dwNext < dwTextLen)
373         {
374             if (pszText[dwNext] == '\r' && pszText[dwNext + 1] == '\n')
375                 break;
376             dwNext++;
377         }
378 
379         if (dwNext != dwTextLen)
380         {
381             switch (iEoln)
382             {
383             case EOLN_LF:
384                 /* Write text (without eoln) */
385                 if (!WriteEncodedText(hFile, &pszText[dwPos], dwNext - dwPos, encFile))
386                     return FALSE;
387                 /* Write eoln */
388                 if (!WriteEncodedText(hFile, pszLF, 1, encFile))
389                     return FALSE;
390                 break;
391             case EOLN_CR:
392                 /* Write text (including \r as eoln) */
393                 if (!WriteEncodedText(hFile, &pszText[dwPos], dwNext - dwPos + 1, encFile))
394                     return FALSE;
395                 break;
396             case EOLN_CRLF:
397                 /* Write text (including \r\n as eoln) */
398                 if (!WriteEncodedText(hFile, &pszText[dwPos], dwNext - dwPos + 2, encFile))
399                     return FALSE;
400                 break;
401             default:
402                 return FALSE;
403             }
404         }
405         else
406         {
407             /* Write text (without eoln, since this is the end of the file) */
408             if (!WriteEncodedText(hFile, &pszText[dwPos], dwNext - dwPos, encFile))
409                 return FALSE;
410         }
411 
412         /* Skip \r\n */
413         dwPos = dwNext + 2;
414     }
415     while (dwPos < dwTextLen);
416 
417     return TRUE;
418 }
419