1 // c2cpp.cpp                            Copyright (C) A C Norman. 2015-2020
2 
3 // This changes comments from C to C++ style.... with all sorts of odd
4 // stylistic choices motivated by the CSL source code. The task that
5 // this file was written for is now done, so it is at most of historical
6 // interest.
7 
8 /**************************************************************************
9  * Copyright (C) 2020, Codemist.                         A C Norman       *
10  *                                                                        *
11  * Redistribution and use in source and binary forms, with or without     *
12  * modification, are permitted provided that the following conditions are *
13  * met:                                                                   *
14  *                                                                        *
15  *     * Redistributions of source code must retain the relevant          *
16  *       copyright notice, this list of conditions and the following      *
17  *       disclaimer.                                                      *
18  *     * Redistributions in binary form must reproduce the above          *
19  *       copyright notice, this list of conditions and the following      *
20  *       disclaimer in the documentation and/or other materials provided  *
21  *       with the distribution.                                           *
22  *                                                                        *
23  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS    *
24  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT      *
25  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS      *
26  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE         *
27  * COPYRIGHT OWNERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,   *
28  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,   *
29  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS  *
30  * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND *
31  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR  *
32  * TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF     *
33  * THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH   *
34  * DAMAGE.                                                                *
35  *************************************************************************/
36 
37 // $Id: c2cpp.cpp 5433 2020-10-15 21:09:02Z arthurcnorman $
38 
39 
40 #include <cstdio>
41 #include <cstdlib>
42 #include <cstring>
43 #include <cctype>
44 
45 std::FILE *src, *dest;
46 
47 // I will be lazy and assume all lines are short.
48 // Note that "wc -L files | sort | tail" will show the length of the longest
49 // lines in files...
50 
51 #define MAXLINELENGTH (2000)
52 
53 char line1[MAXLINELENGTH], line2[MAXLINELENGTH];
54 char type1[MAXLINELENGTH], type2[MAXLINELENGTH];
55 
56 int n1, n2;
57 int context;
58 int ch;
59 
60 // Contexts...
61 #define CONTEXT_NONE       0x0   // Generic
62 #define CONTEXT_STRING     0x1   // After (")
63 #define CONTEXT_STRING_ESC 0x2   // After a (\) within a string
64 #define CONTEXT_CHAR       0x3   // After (')
65 #define CONTEXT_CHAR_ESC   0x4   // After a (\) within a character literal
66 #define CONTEXT_BLOCK_C    0x5   // Entering (/*) so within a block comment
67 #define CONTEXT_BLOCK_C1   0x6   // After (/*) so within a block comment
68 #define CONTEXT_LINE_C     0x7   // After (//) so within a 1-line comment
69 #define CONTEXT_NONE1      0x8   // Exiting block comments with (*/)
70 #define CONTEXT_NONE2      0x9   // After (*/) but only whitespace so far
71 
72 // Types...
73 #define typeGENERAL '.'
74 #define typeSTRING  's'
75 #define typeCOMMENT 'c'
76 
77 int opening_block_comment, closing_block_comment;
78 int star_count, in_block_comment;
79 
put_comment(int i,int ch2)80 int put_comment(int i, int ch2)
81 {   int j, p=0;
82     for (j=0; j<opening_block_comment+2; j++)
83         if (line2[j] != ' ') break;
84     j = j-2;
85     if (j < 0)
86     {   int p;
87         for (p=std::strlen(line2); p>=0; p--)
88             line2[p-j] = line2[p];
89         p = -j;
90         j = 0;
91     }
92     line2[j] = '/';
93     line2[j+1] = ch2;
94     return i + p;
95 }
96 
process_simple_block_comment()97 int process_simple_block_comment()
98 {   int i, j;
99     for (i=0; i<n2; i++)
100         if (line2[i] == '*' && line2[i+1] == '/') break;
101     for (j=i+2; j<n2; j++)
102         if (!std::isspace(line2[j])) break;
103     if (i < n2 && j < n2)
104     {
105 // This is the messy case
106 //     /* xxx
107 //        yyy */ zzz
108 // with non-whitspace for zzz. I need to insert (/*) at the start of
109 // the final line.
110         i = put_comment(i, '*');
111         in_block_comment = 0;
112         context = CONTEXT_NONE;
113         return i+2;
114     }
115     else if (i < n2)
116     {
117 // Here there is nothing after the (*/) so I can afford to delete the
118 // (*/) but then reinsert a (//)
119         line2[i] = line2[i+1] = ' ';
120         i = put_comment(10000, '/');
121         in_block_comment = 0;
122         context = CONTEXT_NONE;
123         return std::strlen(line2);
124     }
125     else
126     {
127 // This is an intermediate line in a block comment. One style there puts a
128 // column of stars just below the initial start in the (/*) that opened the
129 // comment - I will remove that if it is present!
130         if (line2[opening_block_comment+1] == '*')
131             line2[opening_block_comment+1] = ' ';
132         put_comment(10000, '/');
133         return std::strlen(line2);
134     }
135 }
136 
process_box_block_comment()137 int process_box_block_comment()
138 {   int i;
139     for (i=0; i<n2; i++)
140         if (line2[i] == '*' && line2[i+1] == '/') break;
141     if (i < n2)
142     {   in_block_comment = 0;
143         context = CONTEXT_NONE;
144         return i+2;
145     }
146     else return n2;
147 }
148 
dumpline(char * s,const char * t)149 void dumpline(char *s, const char *t)
150 {   int i = std::strlen(s)-1;
151 // Discard trailing whitespace
152     while (i>=0 && std::isspace(s[i])) s[i--] = 0;
153     std::fprintf(dest, "%s\n", s);
154 // During initial tetsing I will dump the type information too...
155 //  fprintf(dest, "%s\n", t);
156 }
157 
readline()158 void readline()
159 {   int i, j, scanfrom=0;
160     if (n1 >= 0) dumpline(line1, type1);
161     n1 = n2;
162     std::memcpy(line1, line2, sizeof(line1));
163     std::memcpy(type1, type2, sizeof(type1));
164     n2 = 0;
165     line2[0] = type2[0] = 0;
166     while ((ch = std::getc(src)) != '\n' && ch != EOF)
167     {   type2[n2] = 0;
168         line2[n2++] = ch;
169     }
170     line2[n2] = 0;
171     if (in_block_comment < 0)
172         scanfrom = process_simple_block_comment();
173     else if (in_block_comment > 0)
174         scanfrom = process_box_block_comment();
175     for (i=scanfrom; line2[i]!=0; i++)
176     {   int c = line2[i],
177                 c1 = line2[i+1];
178         switch (context)
179         {   case CONTEXT_NONE:
180 // This is the generic case at the start of a file and well away from
181 // strings, comments etc. If you see ("), ('), (/*) or (//) you switch
182 // into a different context, and if it was (/*) you record the column in
183 // which the (/) fell.
184                 if (c == '"') context = CONTEXT_STRING;
185                 else if (c == '\'') context = CONTEXT_CHAR;
186                 else if (c == '/' && c1 == '*')
187                 {   opening_block_comment = i;
188                     closing_block_comment = -1;
189                     star_count = 0;
190                     context = CONTEXT_BLOCK_C;
191                 }
192                 else if (c == '/' && c1 == '/') context = CONTEXT_LINE_C;
193                 break;
194 // Following (") you are in a string until you find a second ("),
195 // except that (\) causes the character after it to be treated
196 // specially.
197             case CONTEXT_STRING:
198                 if (c == '"') context = CONTEXT_NONE;
199                 else if (c == '\\') context = CONTEXT_STRING_ESC;
200                 break;
201 // Here I am in a string and I had just seen a (\), so I do not test
202 // the character at all, but drop back to regular string status.
203             case CONTEXT_STRING_ESC:
204                 context = CONTEXT_STRING;
205                 break;
206 // Character literals are treated just liek string ones. This lets me
207 // have several characters within single quotes.
208             case CONTEXT_CHAR:
209                 if (c == '\'') context = CONTEXT_NONE;
210                 else if (c == '\\') context = CONTEXT_CHAR_ESC;
211                 break;
212             case CONTEXT_CHAR_ESC:
213                 context = CONTEXT_CHAR;
214                 break;
215 // When the (/) of a (/*) sequence has been processed I get here, and the
216 // character has to be a (*). I just pass by it and continue into the body
217 // of the block comment. I have to have this intermediate state so that
218 // in a sequence (/*/) the (*) does not get interpreted as the start of
219 // a (*/) to terminate the comment.
220             case CONTEXT_BLOCK_C:
221                 context = CONTEXT_BLOCK_C1;
222                 if (c1 == '!') star_count = 4; // (/*!) to be treated magically.
223                 break;
224 // Here is the main place where I skip the contents of a block comment.
225 // Almost just for fun I will detect and warn about nested (/*) sequences.
226             case CONTEXT_BLOCK_C1:
227                 if (c == '*' && c1 == '/')
228                 {   closing_block_comment = i;
229                     context = CONTEXT_NONE1;
230                     break;
231                 }
232                 if (c == '/' && c1 == '*')
233                 {   std::printf("\"/*\" sequence found within comment\n");
234                     std::printf("%s\n", line2);
235                 }
236                 if (c == '*' && star_count >= 0) star_count++;
237                 else if (star_count < 4) star_count = -1;
238                 break;
239 // As the end of a block comment, as at the start, I need a special state
240 // that is passed through as the second of the two character sequence is
241 // passed by. So this is for the (/) of the (*/) and is so that a
242 // sequence (*/*) does not simultaneously end one comment and start
243 // another.
244             case CONTEXT_NONE1:
245                 context = CONTEXT_NONE2;
246                 break;
247 // After a comment block (/* ... */) there could be further genuine
248 // material or there might be only whitespace up until the end of the
249 // line. In the former case I must leave the comment untouched, while in
250 // the latter I may change it into one starting with (//). So the state
251 // here is just like CONTEXT_NONE except that I remain here until I find
252 // something that is not whitespace.
253             case CONTEXT_NONE2:
254                 if (c == '"') context = CONTEXT_STRING;
255                 else if (c == '\'') context = CONTEXT_CHAR;
256                 else if (c == '/' && c1 == '*')
257                 {   opening_block_comment = i;
258                     closing_block_comment = -1;
259                     star_count = 0;
260                     context = CONTEXT_BLOCK_C;
261                 }
262                 else if (c == '/' && c1 == '/') context = CONTEXT_LINE_C;
263                 else if (c != ' ' && c != '\t') context = CONTEXT_NONE;
264                 break;
265 // Comments introduced by (//) just skip everything until they reach
266 // the end of the line.
267             case CONTEXT_LINE_C:
268                 break;
269         }
270 // The following tags each character in terms of whether it is in a
271 // string (or character literal) or comment. It leaves the final (") that
272 // terminates a string markes as "general", but the true contents of a string
273 // are marked. The purpose of this is that transformations made on the code
274 // ought not to apply within comments or strings. Well sometimes they should
275 // within comments, but that is a tougher issue and I will not mechanise it.
276         switch (context)
277         {   case CONTEXT_NONE:
278                 type2[i] = typeGENERAL;
279                 break;
280             case CONTEXT_STRING:
281             case CONTEXT_STRING_ESC:
282             case CONTEXT_CHAR:
283             case CONTEXT_CHAR_ESC:
284                 type2[i] = typeSTRING;
285                 break;
286             default:
287                 type2[i] = typeCOMMENT;
288         }
289         type2[i+1] = 0;
290         continue;
291     }
292     switch (context)
293     {
294 // At end of line if you had been in a (//) comment you drop back to
295 // the base state.
296         case CONTEXT_LINE_C:
297             context = CONTEXT_NONE;
298             break;
299 // If you had has (/* ... */) followed by at most whitespace then the
300 // comment can be reworked. However if there were a lof of stars as
301 // in (/****...*/) I will leave things alone.
302         case CONTEXT_NONE2:
303             if (star_count >= 4) break;
304             line2[opening_block_comment+1] = '/'; // Turn (/*) into (//)
305             line2[closing_block_comment] = 0;     // discard (*/)
306             while (std::isspace(line2[--closing_block_comment]))
307                 line2[closing_block_comment] = 0; // lose trailing white space
308             context = CONTEXT_NONE;
309             break;
310         case CONTEXT_BLOCK_C1:
311 // If my line ended up with (/**** ...) then I will set in_block_comment
312 // to 1 and as a consquence the whole comment block will be left as one
313 // introduced by (/*). If the line ended with (/* xxx) where xxx does not
314 // begin with a run of stars then I will map the comment marker to (//) and
315 // set in_block_comment to -1. The next lines will then need to be
316 // re-worked to keep them as comments!
317             if (in_block_comment == 0)
318             {
319 // I only want to set in_block_comment if it is not already set!
320                 if (star_count < 4)
321                 {   in_block_comment = -1;
322                     line2[opening_block_comment+1] = '/';
323                 }
324                 else in_block_comment = 1;
325             }
326             break;
327     }
328 // Here the line has been messed with a bit, but each character has been
329 // marked in type2 to characterise whether it is general, part of a string
330 // (or character literal) or part of a comment.
331 // So now I can perform transformations on it, with the first one that
332 // I wish to do being a change of Lisp_Object to LispObject.
333     for (i=0; line2[i] != 0; i++)
334     {   const char *target = "Lisp_Object";
335         int j = i;
336         while (*target != 0 &&
337                type2[j] == typeGENERAL &&
338                line2[j] == *target)
339         {   j++;
340             target++;
341         }
342         if (*target != 0) continue;
343 // Here there is an instance of my target word starting at position i.
344         for (j = i+4; line2[j+1] != 0 && type2[j+1] != typeCOMMENT; j++)
345         {   line2[j] = line2[j+1];
346             type2[j] = type2[j+1];
347         }
348         if (line2[j+1] == 0) line2[j] = 0;
349         else line2[j] = ' ';
350     }
351 }
352 
convert()353 void convert()
354 {   n1 = n2 = -1;
355     context = CONTEXT_NONE;
356     opening_block_comment = -1;
357     star_count = -1;
358     in_block_comment = 0;
359     readline();
360     while (ch != EOF)
361     {   readline();
362     }
363     if (line1[0] != 0 || line2[0] != 0) dumpline(line1, type1);
364     if (line2[0] != 0) dumpline(line2, type2);
365 }
366 
main(int argc,char * argv[])367 int main(int argc, char *argv[])
368 {   char *srcfile = argv[1];  // eg file.c
369     char destfile[100];
370     if (argc < 1)
371     {   std::printf("c2cpp file.c [file.cpp]\n");
372         return 0;
373     }
374     if (argc < 2) std::sprintf(destfile, "%spp", srcfile);
375     else std::strcpy(destfile, argv[2]);
376     std::printf("Convert from %s to %s\n", srcfile, destfile);
377     src = std::fopen(srcfile, "r");
378     dest = std::fopen(destfile, "w");
379     if (src != nullptr && dest != nullptr) convert();
380     if (src != nullptr) std::fclose(src);
381     if (dest != nullptr) std::fclose(dest);
382     return 0;
383 }
384 
385 // end of c2cpp.cpp
386