1 // c2cpp.cpp Copyright (C) A C Norman. 2015-2020
2
3 // This changes comments from C to C++ style.... with all sorts of odd
4 // stylistic choices motivated by the CSL source code. The task that
5 // this file was written for is now done, so it is at most of historical
6 // interest.
7
8 /**************************************************************************
9 * Copyright (C) 2020, Codemist. A C Norman *
10 * *
11 * Redistribution and use in source and binary forms, with or without *
12 * modification, are permitted provided that the following conditions are *
13 * met: *
14 * *
15 * * Redistributions of source code must retain the relevant *
16 * copyright notice, this list of conditions and the following *
17 * disclaimer. *
18 * * Redistributions in binary form must reproduce the above *
19 * copyright notice, this list of conditions and the following *
20 * disclaimer in the documentation and/or other materials provided *
21 * with the distribution. *
22 * *
23 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS *
24 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT *
25 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS *
26 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE *
27 * COPYRIGHT OWNERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, *
28 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, *
29 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS *
30 * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND *
31 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR *
32 * TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF *
33 * THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH *
34 * DAMAGE. *
35 *************************************************************************/
36
37 // $Id: c2cpp.cpp 5433 2020-10-15 21:09:02Z arthurcnorman $
38
39
40 #include <cstdio>
41 #include <cstdlib>
42 #include <cstring>
43 #include <cctype>
44
45 std::FILE *src, *dest;
46
47 // I will be lazy and assume all lines are short.
48 // Note that "wc -L files | sort | tail" will show the length of the longest
49 // lines in files...
50
51 #define MAXLINELENGTH (2000)
52
53 char line1[MAXLINELENGTH], line2[MAXLINELENGTH];
54 char type1[MAXLINELENGTH], type2[MAXLINELENGTH];
55
56 int n1, n2;
57 int context;
58 int ch;
59
60 // Contexts...
61 #define CONTEXT_NONE 0x0 // Generic
62 #define CONTEXT_STRING 0x1 // After (")
63 #define CONTEXT_STRING_ESC 0x2 // After a (\) within a string
64 #define CONTEXT_CHAR 0x3 // After (')
65 #define CONTEXT_CHAR_ESC 0x4 // After a (\) within a character literal
66 #define CONTEXT_BLOCK_C 0x5 // Entering (/*) so within a block comment
67 #define CONTEXT_BLOCK_C1 0x6 // After (/*) so within a block comment
68 #define CONTEXT_LINE_C 0x7 // After (//) so within a 1-line comment
69 #define CONTEXT_NONE1 0x8 // Exiting block comments with (*/)
70 #define CONTEXT_NONE2 0x9 // After (*/) but only whitespace so far
71
72 // Types...
73 #define typeGENERAL '.'
74 #define typeSTRING 's'
75 #define typeCOMMENT 'c'
76
77 int opening_block_comment, closing_block_comment;
78 int star_count, in_block_comment;
79
put_comment(int i,int ch2)80 int put_comment(int i, int ch2)
81 { int j, p=0;
82 for (j=0; j<opening_block_comment+2; j++)
83 if (line2[j] != ' ') break;
84 j = j-2;
85 if (j < 0)
86 { int p;
87 for (p=std::strlen(line2); p>=0; p--)
88 line2[p-j] = line2[p];
89 p = -j;
90 j = 0;
91 }
92 line2[j] = '/';
93 line2[j+1] = ch2;
94 return i + p;
95 }
96
process_simple_block_comment()97 int process_simple_block_comment()
98 { int i, j;
99 for (i=0; i<n2; i++)
100 if (line2[i] == '*' && line2[i+1] == '/') break;
101 for (j=i+2; j<n2; j++)
102 if (!std::isspace(line2[j])) break;
103 if (i < n2 && j < n2)
104 {
105 // This is the messy case
106 // /* xxx
107 // yyy */ zzz
108 // with non-whitspace for zzz. I need to insert (/*) at the start of
109 // the final line.
110 i = put_comment(i, '*');
111 in_block_comment = 0;
112 context = CONTEXT_NONE;
113 return i+2;
114 }
115 else if (i < n2)
116 {
117 // Here there is nothing after the (*/) so I can afford to delete the
118 // (*/) but then reinsert a (//)
119 line2[i] = line2[i+1] = ' ';
120 i = put_comment(10000, '/');
121 in_block_comment = 0;
122 context = CONTEXT_NONE;
123 return std::strlen(line2);
124 }
125 else
126 {
127 // This is an intermediate line in a block comment. One style there puts a
128 // column of stars just below the initial start in the (/*) that opened the
129 // comment - I will remove that if it is present!
130 if (line2[opening_block_comment+1] == '*')
131 line2[opening_block_comment+1] = ' ';
132 put_comment(10000, '/');
133 return std::strlen(line2);
134 }
135 }
136
process_box_block_comment()137 int process_box_block_comment()
138 { int i;
139 for (i=0; i<n2; i++)
140 if (line2[i] == '*' && line2[i+1] == '/') break;
141 if (i < n2)
142 { in_block_comment = 0;
143 context = CONTEXT_NONE;
144 return i+2;
145 }
146 else return n2;
147 }
148
dumpline(char * s,const char * t)149 void dumpline(char *s, const char *t)
150 { int i = std::strlen(s)-1;
151 // Discard trailing whitespace
152 while (i>=0 && std::isspace(s[i])) s[i--] = 0;
153 std::fprintf(dest, "%s\n", s);
154 // During initial tetsing I will dump the type information too...
155 // fprintf(dest, "%s\n", t);
156 }
157
readline()158 void readline()
159 { int i, j, scanfrom=0;
160 if (n1 >= 0) dumpline(line1, type1);
161 n1 = n2;
162 std::memcpy(line1, line2, sizeof(line1));
163 std::memcpy(type1, type2, sizeof(type1));
164 n2 = 0;
165 line2[0] = type2[0] = 0;
166 while ((ch = std::getc(src)) != '\n' && ch != EOF)
167 { type2[n2] = 0;
168 line2[n2++] = ch;
169 }
170 line2[n2] = 0;
171 if (in_block_comment < 0)
172 scanfrom = process_simple_block_comment();
173 else if (in_block_comment > 0)
174 scanfrom = process_box_block_comment();
175 for (i=scanfrom; line2[i]!=0; i++)
176 { int c = line2[i],
177 c1 = line2[i+1];
178 switch (context)
179 { case CONTEXT_NONE:
180 // This is the generic case at the start of a file and well away from
181 // strings, comments etc. If you see ("), ('), (/*) or (//) you switch
182 // into a different context, and if it was (/*) you record the column in
183 // which the (/) fell.
184 if (c == '"') context = CONTEXT_STRING;
185 else if (c == '\'') context = CONTEXT_CHAR;
186 else if (c == '/' && c1 == '*')
187 { opening_block_comment = i;
188 closing_block_comment = -1;
189 star_count = 0;
190 context = CONTEXT_BLOCK_C;
191 }
192 else if (c == '/' && c1 == '/') context = CONTEXT_LINE_C;
193 break;
194 // Following (") you are in a string until you find a second ("),
195 // except that (\) causes the character after it to be treated
196 // specially.
197 case CONTEXT_STRING:
198 if (c == '"') context = CONTEXT_NONE;
199 else if (c == '\\') context = CONTEXT_STRING_ESC;
200 break;
201 // Here I am in a string and I had just seen a (\), so I do not test
202 // the character at all, but drop back to regular string status.
203 case CONTEXT_STRING_ESC:
204 context = CONTEXT_STRING;
205 break;
206 // Character literals are treated just liek string ones. This lets me
207 // have several characters within single quotes.
208 case CONTEXT_CHAR:
209 if (c == '\'') context = CONTEXT_NONE;
210 else if (c == '\\') context = CONTEXT_CHAR_ESC;
211 break;
212 case CONTEXT_CHAR_ESC:
213 context = CONTEXT_CHAR;
214 break;
215 // When the (/) of a (/*) sequence has been processed I get here, and the
216 // character has to be a (*). I just pass by it and continue into the body
217 // of the block comment. I have to have this intermediate state so that
218 // in a sequence (/*/) the (*) does not get interpreted as the start of
219 // a (*/) to terminate the comment.
220 case CONTEXT_BLOCK_C:
221 context = CONTEXT_BLOCK_C1;
222 if (c1 == '!') star_count = 4; // (/*!) to be treated magically.
223 break;
224 // Here is the main place where I skip the contents of a block comment.
225 // Almost just for fun I will detect and warn about nested (/*) sequences.
226 case CONTEXT_BLOCK_C1:
227 if (c == '*' && c1 == '/')
228 { closing_block_comment = i;
229 context = CONTEXT_NONE1;
230 break;
231 }
232 if (c == '/' && c1 == '*')
233 { std::printf("\"/*\" sequence found within comment\n");
234 std::printf("%s\n", line2);
235 }
236 if (c == '*' && star_count >= 0) star_count++;
237 else if (star_count < 4) star_count = -1;
238 break;
239 // As the end of a block comment, as at the start, I need a special state
240 // that is passed through as the second of the two character sequence is
241 // passed by. So this is for the (/) of the (*/) and is so that a
242 // sequence (*/*) does not simultaneously end one comment and start
243 // another.
244 case CONTEXT_NONE1:
245 context = CONTEXT_NONE2;
246 break;
247 // After a comment block (/* ... */) there could be further genuine
248 // material or there might be only whitespace up until the end of the
249 // line. In the former case I must leave the comment untouched, while in
250 // the latter I may change it into one starting with (//). So the state
251 // here is just like CONTEXT_NONE except that I remain here until I find
252 // something that is not whitespace.
253 case CONTEXT_NONE2:
254 if (c == '"') context = CONTEXT_STRING;
255 else if (c == '\'') context = CONTEXT_CHAR;
256 else if (c == '/' && c1 == '*')
257 { opening_block_comment = i;
258 closing_block_comment = -1;
259 star_count = 0;
260 context = CONTEXT_BLOCK_C;
261 }
262 else if (c == '/' && c1 == '/') context = CONTEXT_LINE_C;
263 else if (c != ' ' && c != '\t') context = CONTEXT_NONE;
264 break;
265 // Comments introduced by (//) just skip everything until they reach
266 // the end of the line.
267 case CONTEXT_LINE_C:
268 break;
269 }
270 // The following tags each character in terms of whether it is in a
271 // string (or character literal) or comment. It leaves the final (") that
272 // terminates a string markes as "general", but the true contents of a string
273 // are marked. The purpose of this is that transformations made on the code
274 // ought not to apply within comments or strings. Well sometimes they should
275 // within comments, but that is a tougher issue and I will not mechanise it.
276 switch (context)
277 { case CONTEXT_NONE:
278 type2[i] = typeGENERAL;
279 break;
280 case CONTEXT_STRING:
281 case CONTEXT_STRING_ESC:
282 case CONTEXT_CHAR:
283 case CONTEXT_CHAR_ESC:
284 type2[i] = typeSTRING;
285 break;
286 default:
287 type2[i] = typeCOMMENT;
288 }
289 type2[i+1] = 0;
290 continue;
291 }
292 switch (context)
293 {
294 // At end of line if you had been in a (//) comment you drop back to
295 // the base state.
296 case CONTEXT_LINE_C:
297 context = CONTEXT_NONE;
298 break;
299 // If you had has (/* ... */) followed by at most whitespace then the
300 // comment can be reworked. However if there were a lof of stars as
301 // in (/****...*/) I will leave things alone.
302 case CONTEXT_NONE2:
303 if (star_count >= 4) break;
304 line2[opening_block_comment+1] = '/'; // Turn (/*) into (//)
305 line2[closing_block_comment] = 0; // discard (*/)
306 while (std::isspace(line2[--closing_block_comment]))
307 line2[closing_block_comment] = 0; // lose trailing white space
308 context = CONTEXT_NONE;
309 break;
310 case CONTEXT_BLOCK_C1:
311 // If my line ended up with (/**** ...) then I will set in_block_comment
312 // to 1 and as a consquence the whole comment block will be left as one
313 // introduced by (/*). If the line ended with (/* xxx) where xxx does not
314 // begin with a run of stars then I will map the comment marker to (//) and
315 // set in_block_comment to -1. The next lines will then need to be
316 // re-worked to keep them as comments!
317 if (in_block_comment == 0)
318 {
319 // I only want to set in_block_comment if it is not already set!
320 if (star_count < 4)
321 { in_block_comment = -1;
322 line2[opening_block_comment+1] = '/';
323 }
324 else in_block_comment = 1;
325 }
326 break;
327 }
328 // Here the line has been messed with a bit, but each character has been
329 // marked in type2 to characterise whether it is general, part of a string
330 // (or character literal) or part of a comment.
331 // So now I can perform transformations on it, with the first one that
332 // I wish to do being a change of Lisp_Object to LispObject.
333 for (i=0; line2[i] != 0; i++)
334 { const char *target = "Lisp_Object";
335 int j = i;
336 while (*target != 0 &&
337 type2[j] == typeGENERAL &&
338 line2[j] == *target)
339 { j++;
340 target++;
341 }
342 if (*target != 0) continue;
343 // Here there is an instance of my target word starting at position i.
344 for (j = i+4; line2[j+1] != 0 && type2[j+1] != typeCOMMENT; j++)
345 { line2[j] = line2[j+1];
346 type2[j] = type2[j+1];
347 }
348 if (line2[j+1] == 0) line2[j] = 0;
349 else line2[j] = ' ';
350 }
351 }
352
convert()353 void convert()
354 { n1 = n2 = -1;
355 context = CONTEXT_NONE;
356 opening_block_comment = -1;
357 star_count = -1;
358 in_block_comment = 0;
359 readline();
360 while (ch != EOF)
361 { readline();
362 }
363 if (line1[0] != 0 || line2[0] != 0) dumpline(line1, type1);
364 if (line2[0] != 0) dumpline(line2, type2);
365 }
366
main(int argc,char * argv[])367 int main(int argc, char *argv[])
368 { char *srcfile = argv[1]; // eg file.c
369 char destfile[100];
370 if (argc < 1)
371 { std::printf("c2cpp file.c [file.cpp]\n");
372 return 0;
373 }
374 if (argc < 2) std::sprintf(destfile, "%spp", srcfile);
375 else std::strcpy(destfile, argv[2]);
376 std::printf("Convert from %s to %s\n", srcfile, destfile);
377 src = std::fopen(srcfile, "r");
378 dest = std::fopen(destfile, "w");
379 if (src != nullptr && dest != nullptr) convert();
380 if (src != nullptr) std::fclose(src);
381 if (dest != nullptr) std::fclose(dest);
382 return 0;
383 }
384
385 // end of c2cpp.cpp
386