1 /* $Id: dos2unix.c 3114 2017-10-29 18:02:04Z bird $ */
2 /** @file
3  * dos2unix - Line ending conversion routines.
4  */
5 
6 /*
7  * Copyright (c) 2017 knut st. osmundsen <bird-kBuild-spamx@anduin.net>
8  *
9  * Permission is hereby granted, free of charge, to any person obtaining a
10  * copy of this software and associated documentation files (the "Software"),
11  * to deal in the Software without restriction, including without limitation
12  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
13  * and/or sell copies of the Software, and to permit persons to whom the
14  * Software is furnished to do so, subject to the following conditions:
15  *
16  * The above copyright notice and this permission notice shall be included
17  * in all copies or substantial portions of the Software.
18  *
19  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
22  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
24  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
25  * IN THE SOFTWARE.
26  *
27  * Alternatively, the content of this file may be used under the terms of the
28  * GPL version 2 or later, or LGPL version 2.1 or later.
29  */
30 
31 
32 /*********************************************************************************************************************************
33 *   Header Files                                                                                                                 *
34 *********************************************************************************************************************************/
35 #include "dos2unix.h"
36 #include <k/kDefs.h>
37 #include <errno.h>
38 #include <fcntl.h>
39 #if K_OS == K_OS_WINDOWS
40 # include <io.h>
41 #else
42 # include <unistd.h>
43 #endif
44 #include <assert.h>
45 
46 #ifndef O_BINARY
47 # ifdef _O_BINARY
48 #  define O_BINARY   _O_BINARY
49 # else
50 #  define O_BINARY   0
51 # endif
52 #endif
53 
54 
55 /*********************************************************************************************************************************
56 *   Defined Constants And Macros                                                                                                 *
57 *********************************************************************************************************************************/
58 #define STACK_BUF_SIZE  0x20000
59 
60 #define DOS2UNIX_LF     0x0a
61 #define DOS2UNIX_CR     0x0d
62 
63 
64 
65 /**
66  * Does a line ending analysis of the given file.
67  *
68  * @returns 0 on success, errno value on open or read error.
69  * @param   pszFilename         The path to the file
70  * @param   pfStyle             Where to return the DOS2UNIX_STYLE_XXX and
71  *                              DOS2UNIX_F_XXX flags.
72  * @param   pcDosEols           Where to return the number of DOS end-of-line
73  *                              sequences found.  Optional.
74  * @param   pcUnixEols          Where to return the number of UNIX end-of-line
75  *                              sequences found.
76  */
dos2unix_analyze_file(const char * pszFilename,KU32 * pfStyle,KSIZE * pcDosEols,KSIZE * pcUnixEols)77 int dos2unix_analyze_file(const char *pszFilename, KU32 *pfStyle, KSIZE *pcDosEols, KSIZE *pcUnixEols)
78 {
79     int iRet = 0;
80     int fd = open(pszFilename, O_RDONLY | O_BINARY);
81     if (fd >= 0)
82     {
83         iRet = dos2unix_analyze_fd(fd, pfStyle, pcDosEols, pcUnixEols);
84         close(fd);
85     }
86     else
87     {
88         iRet = errno;
89         *pfStyle = DOS2UNIX_STYLE_NONE;
90         if (pcUnixEols)
91             *pcUnixEols = 0;
92         if (pcDosEols)
93             *pcDosEols = 0;
94     }
95     return iRet;
96 }
97 
98 /**
99  * Does a line ending analysis of the given file descriptor.
100  *
101  * @returns 0 on success, errno value on open or read error.
102  * @param   fd                  The file descriptor to analyze.  Caller must
103  *                              place this as the desired position.
104  * @param   pfStyle             Where to return the DOS2UNIX_STYLE_XXX and
105  *                              DOS2UNIX_F_XXX flags.
106  * @param   pcDosEols           Where to return the number of DOS end-of-line
107  *                              sequences found.  Optional.
108  * @param   pcUnixEols          Where to return the number of UNIX end-of-line
109  *                              sequences found.
110  */
dos2unix_analyze_fd(int fd,KU32 * pfStyle,KSIZE * pcDosEols,KSIZE * pcUnixEols)111 int dos2unix_analyze_fd(int fd, KU32 *pfStyle, KSIZE *pcDosEols, KSIZE *pcUnixEols)
112 {
113     KSIZE   cUnixEols  = 0;
114     KSIZE   cDosEols   = 0;
115     KSIZE   cLoneCrs   = 0;
116     KBOOL   fPendingCr = K_FALSE;
117     int     iRet       = 0;
118 
119     /*
120      * Do the analysis.
121      */
122     *pfStyle = DOS2UNIX_STYLE_NONE;
123     for (;;)
124     {
125         char achBuf[STACK_BUF_SIZE];
126         int  cchRead = read(fd, achBuf, sizeof(achBuf));
127         if (cchRead > 0)
128         {
129             int off = 0;
130             if (fPendingCr)
131             {
132                 if (achBuf[0] == DOS2UNIX_LF)
133                 {
134                     off++;
135                     cDosEols++;
136                 }
137                 else
138                     cLoneCrs++;
139                 fPendingCr = K_FALSE;
140             }
141 
142             while (off < cchRead)
143             {
144                 char ch = achBuf[off++];
145                 if ((unsigned char)ch > (unsigned char)DOS2UNIX_CR)
146                 { /* likely */ }
147                 else if (ch == DOS2UNIX_CR)
148                 {
149                     if (off < cchRead && achBuf[off] == DOS2UNIX_CR)
150                         cDosEols++;
151                     else
152                     {
153                         fPendingCr = K_TRUE;
154                         while (off < cchRead)
155                         {
156                             ch = achBuf[off++];
157                             if (ch != DOS2UNIX_CR)
158                             {
159                                 if (ch == DOS2UNIX_LF)
160                                     cDosEols++;
161                                 else
162                                     cLoneCrs++;
163                                 fPendingCr = K_FALSE;
164                                 break;
165                             }
166                             cLoneCrs++;
167                         }
168                     }
169                 }
170                 else if (ch == DOS2UNIX_LF)
171                     cUnixEols++;
172                 else if (ch == '\0')
173                     *pfStyle |= DOS2UNIX_F_BINARY;
174             }
175         }
176         else
177         {
178             if (cchRead < 0)
179                 iRet = errno;
180             if (fPendingCr)
181                 cLoneCrs++;
182             break;
183         }
184     }
185 
186     /*
187      * Set return values.
188      */
189     if (cUnixEols > 0 && cDosEols == 0)
190         *pfStyle |= DOS2UNIX_STYLE_UNIX;
191     else if (cDosEols > 0 && cUnixEols == 0)
192         *pfStyle |= DOS2UNIX_STYLE_DOS;
193     else if (cDosEols != 0 && cUnixEols != 0)
194         *pfStyle |= DOS2UNIX_STYLE_MIXED;
195     if (pcUnixEols)
196         *pcUnixEols = cUnixEols;
197     if (pcDosEols)
198         *pcDosEols = cDosEols;
199 
200     return iRet;
201 }
202 
203 
204 /**
205  * Converts a buffer to unix line (LF) endings.
206  *
207  * @retval  K_TRUE if pending CR.  The caller must handle this case.
208  * @retval  K_FALSE if no pending CR.
209  *
210  * @param   pchSrc          The input buffer.
211  * @param   cchSrc          Number of characters to convert from the input
212  *                          buffer.
213  * @param   pchDst          The output buffer.  This must be at least as big as
214  *                          the input.  It is okay if this overlaps with the
215  *                          source buffer, as long as this is at the same or a
216  *                          lower address.
217  * @param   pcchDst         Where to return the number of characters in the
218  *                          output buffer.
219  */
dos2unix_convert_to_unix(const char * pchSrc,KSIZE cchSrc,char * pchDst,KSIZE * pcchDst)220 KBOOL dos2unix_convert_to_unix(const char *pchSrc, KSIZE cchSrc, char *pchDst, KSIZE *pcchDst)
221 {
222     KSIZE offDst = 0;
223     while (cchSrc-- > 0)
224     {
225         char ch = *pchSrc++;
226         if ((unsigned char)ch != (unsigned char)DOS2UNIX_CR)
227             pchDst[offDst++] = ch;
228         else if (cchSrc > 0 && *pchSrc == DOS2UNIX_LF)
229         {
230             pchDst[offDst++] = DOS2UNIX_LF;
231             cchSrc--;
232             pchSrc++;
233         }
234         else if (cchSrc == 0)
235         {
236             *pcchDst = offDst;
237             return K_TRUE;
238         }
239         else
240             pchDst[offDst++] = ch;
241     }
242 
243     *pcchDst = offDst;
244     return K_FALSE;
245 }
246 
247 
248 /**
249  * Converts a buffer to DOS (CRLF) endings.
250  *
251  * @retval  K_TRUE if pending CR.  The caller must handle this case.
252  * @retval  K_FALSE if no pending CR.
253  *
254  * @param   pchSrc          The input buffer.
255  * @param   cchSrc          Number of characters to convert from the input
256  *                          buffer.
257  * @param   pchDst          The output buffer.  This must be at least _twice_ as
258  *                          big as the input.  It is okay if the top half of the
259  *                          buffer overlaps with the source buffer.
260  * @param   pcchDst         Where to return the number of characters in the
261  *                          output buffer.
262  */
dos2unix_convert_to_dos(const char * pchSrc,KSIZE cchSrc,char * pchDst,KSIZE * pcchDst)263 KBOOL dos2unix_convert_to_dos(const char *pchSrc, KSIZE cchSrc, char *pchDst, KSIZE *pcchDst)
264 {
265     KSIZE offDst = 0;
266     while (cchSrc-- > 0)
267     {
268         char ch = *pchSrc++;
269         if ((unsigned char)ch > (unsigned char)DOS2UNIX_CR)
270             pchDst[offDst++] = ch;
271         else if (ch == DOS2UNIX_CR)
272         {
273             /* We treat CR kind of like an escape character. */
274             do
275             {
276                 if (cchSrc > 0)
277                 {
278                     pchDst[offDst++] = ch;
279                     cchSrc--;
280                     ch = *pchSrc++;
281                 }
282                 else
283                 {
284                     *pcchDst = offDst;
285                     return K_TRUE;
286                 }
287             } while (ch == DOS2UNIX_CR);
288             pchDst[offDst++] = ch;
289         }
290         else if (ch == DOS2UNIX_LF)
291         {
292             pchDst[offDst++] = DOS2UNIX_CR;
293             pchDst[offDst++] = DOS2UNIX_LF;
294         }
295         else
296             pchDst[offDst++] = ch;
297     }
298 
299     *pcchDst = offDst;
300     return K_FALSE;
301 }
302 
303