1#!/bin/csh -f
2
3#-----------------------------------------------------------------------
4# cmunge: a tool for encrypting and compacting C source code.
5#
6# Usage:  cmunge [options] files...
7#
8# Options:
9#   -I include-dir	-- Search directory `include-dir' for #include files.
10#   -d output-dir	-- Write the output file(s) to directory
11#			     `output-dir'.  (Default: current directory.)
12#   -f output-file-rootname
13#			-- Output files have names starting with the
14#			     string `output-file-rootname' followed by ".c"
15#			     if there is one output file, or "1.c", "2.c",
16#			     etc, if there is more than one.  E.g. `-f f'
17# 			     causes the output files to be named "f.c"
18# 			     or "f1.c", "f2.c", etc.  (Default: f).
19#   -l min-output-linelen
20#			-- Make lines of the output file(s) at least
21#			     `min-output-linelen' characters long.
22#			     (Default: 90.)
23#   -p prefix-letter	-- Give the translated C identifiers names
24#			     consisting of the letter `prefix-letter'
25#			     followed by a number.  E.g. `-p l' causes
26#			     the translated identifiers to be called
27#			     l1, l2, etc.  (Default: l).
28#   -v version-string	-- Insert string `version-string' as a C comment
29#			     in the first line of each output file.
30#			     (Default: no comment is written.)
31#   -<anything else>	-- Other arguments are passed directly to the
32#			     C pre-processor, `cpp'.  E.g. argument
33#                            `-Dname=def' is passed directly to `cpp'.
34#   files...		-- The input C source file(s) to be `munged'.
35#
36# Spaces are optional between option letters and their accompanying arguments.
37#
38# 'cmunge' calls 2 programs, 'proc-incl' and 'ctran', whose arguments are:
39#
40# proc-incl  [-I include-dir]... [-D include-dict-file] [file]
41# ctran  [-D id-dict-file] [-f output-file-rootname]
42#        [-l min-output-linelen] [-p prefix-letter] [-v version-string]
43#        file...
44#
45# with an intervening call to 'cpp', the C-preprocessor.
46#
47#
48# Author:  John Merlin, Dept of Electronics & Comp Sci, Univ of Southampton.
49# Developed:  Dec 1993.
50# Released on WWW:  Aug 1998.
51# This software can be downloaded from URL:
52#	http://www.vcpc.univie.ac.at/~jhm/cmunge/.
53# Present contact details:
54#	John Merlin, VCPC, University of Vienna.
55#	Email:  jhm@vcpc.univie.ac.at
56#
57#-----------------------------------------------------------------------
58# ** STILL TO DO:
59# -- get script to check exit status of 'proc-incl' & 'ctran', and
60#	exit & tidy-up if != 0.
61# -- in o/p info (at end) only o/p stuff about user & system incl files
62# if there are any; otherwise say, e.g. "No user-defined include files.".
63# -- try to limit number of vars -- unset as soon as poss.
64# -- abbreviate the info!
65# -- get it to work with dir names, rootnames, etc, with blanks,
66#-----------------------------------------------------------------------
67
68#===============================================================#
69# Substitute the absolute pathname of `cmunge's `bin' directory	#
70# (containing the programs `ctran' and `proc-incl')  here...	#
71#								#
72# N.B.  If things haven't been moved around, this is		#
73# ${cmunger}/bin, where ${cmunger} is the absolute pathname	#
74# of the `cmunger' directory.					#
75#===============================================================#
76set  cmunger_bin_dir = /usr/local/bin
77
78#===============================================================#
79# Substitute the pathname of the C pre-processor, `cpp', here...#
80#===============================================================#
81set  cpp = /usr/bin/cpp
82
83
84set  proc_incl = ${cmunger_bin_dir}/proc-incl
85set  ctran     = ${cmunger_bin_dir}/ctran
86
87set  nonomatch	# ...so can use 'tmp_*' to find if there are "tmp_" files
88
89#---------------#
90# defaults...	#
91#---------------#
92set  outdir  =
93set  outroot = f
94
95#----------------------------------------#
96# process command options (starting '-') #
97#----------------------------------------#
98set proc_incl_args = ()
99set cpp_args = ()
100set ctran_args = ()
101
102if (! $#argv)  goto  no_input_files
103while ("$argv[1]" =~ -*)
104	switch ($argv[1])
105	case  -I*:
106		#------------------------------------------------#
107		# -I include-dir	-- passed to 'proc-incl' #
108		#------------------------------------------------#
109		set proc_incl_args = ($proc_incl_args  $argv[1])
110		if ("$argv[1]" == "-I") then
111			shift
112			if (! $#argv) then
113				echo "cmunge: expected directory name after '-I'"
114				goto usage
115			endif
116			set proc_incl_args = ($proc_incl_args  "$argv[1]")
117		endif
118		breaksw
119
120	case  -d*:
121		#-------------------------------------------------------#
122		# -d output-dir	-- used here  ($outdir), and appended	#
123		# to the front of '$outroot' (after all the options	#
124		# have been read).					#
125		#-------------------------------------------------------#
126		if ("$argv[1]" == "-d") then
127			shift
128			if (! $#argv) then
129				echo "cmunge: expected directory name after '-d'"
130				goto usage
131			endif
132			set outdir = "$argv[1]"
133		else
134			set outdir = `sed 's/^-d//' <<ARG\
135$argv[1]\
136ARG`
137		endif
138		if ( ! (-e "$outdir")) then
139			echo "cmunge: output directory '$outdir' does not exist"
140			exit (1)
141		endif
142		set  outdir = "${outdir}/"
143		breaksw
144
145	case  -f*:
146		#-------------------------------------------------------#
147		# -f output-file-rootname  -- used here ($outroot) and	#
148		# passed to 'ctran'.					#
149		# N.B. It's added to 'ctran_args' only after all the	#
150		# options have been read, in case '$outdir', which must	#
151		# be appended to '$outroot', is also specified.		#
152		#-------------------------------------------------------#
153		if ("$argv[1]" == "-f") then
154			shift
155			if (! $#argv) then
156				echo "cmunge: expected output file rootname after '-f'"
157				goto usage
158			endif
159			set outroot = "$argv[1]"
160		else
161			set outroot = `sed 's/^-f//' <<ARG\
162$argv[1]\
163ARG`
164		endif
165		breaksw
166
167	case  -l*:
168	case  -p*:
169		#-----------------------------------------------#
170		# -l min-output-linelen	-- passed to 'ctran'	#
171		# -p prefix-letter	--    "    "    "	#
172		#-----------------------------------------------#
173		set arg = $argv[1]
174		set ctran_args = ($ctran_args  $arg)
175		if ("$arg" =~ -?) then
176			shift
177			if (! $#argv) then
178				switch ("$arg")
179				case  "-l" :
180					echo "cmunge: expected a number after '-l'"
181					breaksw
182				default :
183					echo "cmunge: expected a letter after '-p'"
184					breaksw
185				endsw
186				goto usage
187			endif
188			set ctran_args = ($ctran_args  $argv[1])
189		endif
190		breaksw
191
192	case  -v*:
193		#-----------------------------------------------#
194		# -v version-string	-- passed to 'ctran'	#
195		#-----------------------------------------------#
196		set arg = $argv[1]
197		if ("$arg" =~ -v) then
198			shift
199			if (! $#argv) then
200				echo "cmunge: expected a string after '-v'"
201				goto usage
202			endif
203			set version_str = "$argv[1]"
204		else
205			set ctran_args = ($ctran_args  $arg)
206		endif
207		breaksw
208
209	default:
210		#------------------------------------------------#
211		# Pass option directly as an argument to 'cpp'.  #
212		# (E.g. this could be something like '-DADAPT'). #
213		#------------------------------------------------#
214		set cpp_args = ($cpp_args  $argv[1])
215		breaksw
216	endsw
217	shift
218	if (! $#argv)  goto  no_input_files
219end
220
221if ($#argv > 1) then
222	set  munged_files = "${outroot}1.c - ${outroot}${#argv}.c"
223else
224	set  munged_files = "${outroot}.c"
225endif
226set outroot = "${outdir}$outroot"
227set ctran_args = ($ctran_args -f "$outroot")	# always pass in, in case
228						# '$outdir' appended.
229
230#-----------------------------------------------------------------#
231# Check that there are no 'tmp_' files that might be overwritten! #
232#-----------------------------------------------------------------#
233set  tmp_files = "${outdir}"tmp_*
234if ( $#tmp_files > 1 || "$tmp_files" != "${outdir}tmp_*" ) then
235	echo -n  "cmunge:  please delete or rename the files"
236	if ("$outdir" != "") echo -n  "in '${outdir}'"
237	echo  "whose names start 'tmp_'"
238	exit (1)
239endif
240unset  tmp_files
241
242#---------------------------------------------------------------#
243# delete other special files that may remain from a previous	#
244# run of 'cmunge'.						#
245#---------------------------------------------------------------#
246set  file_dict = "${outdir}File.dict"
247set  id2val = "${outdir}Id2val"
248set  val2id = "${outdir}Val2id"
249
250if (-e "$file_dict") then
251	echo  rm  $file_dict;  rm  "$file_dict"
252endif
253if (-e "$id2val") then
254	echo  rm  $id2val;  rm  "$id2val"
255endif
256if (-e "$val2id") then
257	echo  rm  $val2id;  rm  "$val2id"
258endif
259
260set  tmp_file	= "${outdir}tmp_file"	# general purpose temp file!
261set  incl_dict	= "${outdir}tmp_incl_dict"
262if ("$outdir" != "") set  proc_incl_args = ($proc_incl_args -D "$incl_dict")
263
264
265#-------------------------------------------------------------------
266#	Pass the 'input files' through filter 'proc-incl', which:
267# -- replaces each comment by ' ' and splices '\'-terminated lines;
268# -- in:	#include  <...>
269#		#include  token-sequence
270#	 	#include  "filename"	-- 'filename' in "/usr/include"
271# replaces "#include" by "@"  (to protect the 'include' file from
272# replacement by 'cpp' later on);
273# -- in:	#include "filename"	-- 'filename' *not* in "/usr/include"
274# replaces 'filename' by a new temporary filename ("tmp_h..").  These
275# files (assumed to be 'user-defined') will be 'expanded' by 'cpp' later.
276# -- does some simple textual reduction (e.g. replacing each sequence
277# of whitespace by a single ' ').
278#
279#	The output is written to temporary files called "tmp_c..".
280#	File 'tmp_incl_dict' is created to store the replacement filenames
281# for 'include' files (or rather their numeric suffixes, since they
282# all start "tmp_h..").
283#
284# N.B. We use a 'while' rather than a 'foreach' loop in case any of
285# the filenames in '$argv' contain embedded whitespace!
286#-------------------------------------------------------------------
287echo  ""
288echo  "Processing '#include' directives in source files..."
289
290set  tmp_c_files = ()
291set  final_files = ()
292
293set  num = 0
294set  i = 1
295while ($i <= $#argv)
296	set c_file = "$argv[$i]";	@ i++
297	@ num++
298	set  out_file    = "${outdir}tmp_c$num"
299	set  tmp_c_files = ($tmp_c_files  "$out_file")
300
301	if ($#argv > 1) then
302		cat  >> "$file_dict" <<FILE_ENTRY
303${outroot}${num}.c	$c_file
304FILE_ENTRY
305		set  final_files = ($final_files  "${outroot}${num}.c")
306	else
307		cat  >> "$file_dict" <<FILE_ENTRY
308${outroot}.c	$c_file
309FILE_ENTRY
310		set  final_files = ($final_files  "${outroot}.c")
311	endif
312
313	echo "	'$c_file'	-- output to '$out_file'"
314	$proc_incl  $proc_incl_args   "$c_file" > "$out_file"
315end
316
317echo -n "Continue (y/n) ?: "
318switch ($<)
319case  n:
320	exit (0);
321endsw
322
323#-------------------------------------------------------------------
324#	Now file 'tmp_incl_dict' contains a list of user-defined
325# files that are included by the C files.  These files are processed
326# as above for the C files.
327#	This may generate further new 'include' files, which are listed
328# in 'tmp_incl_dict', so this procedure is iterated until no more new
329# 'include' files are found.
330
331# N.B. The following 'set  incl_files' command should be robust even
332# for 'include' filenames containing whitespace!!  (Aren't I clever!!)
333#-------------------------------------------------------------------
334echo  ""
335echo  "Processing '#include' directives in user-defined 'include' files..."
336
337set  incl_files = "`grep '^u' $incl_dict | sed 's/^u	//'`"
338set  num = 0
339while ($#incl_files)
340	#-------------------------------------------------------#
341	# change 'u' -> 'p' in the records of '$incl_dict', as	#
342	# the files therein are now being 'processed'...	#
343	#-------------------------------------------------------#
344	sed 's/^u/p/' "$incl_dict" > "$tmp_file";  mv "$tmp_file" "$incl_dict"
345
346	#-------------------------------------------------------#
347	# use 'while' rather than 'foreach' in case a filename	#
348	# in 'incl_files' contain embedded whitespace!...	#
349	#-------------------------------------------------------#
350	set i = 1
351	while ($i <= $#incl_files)
352		set incl_file = "$incl_files[$i]";	@ i++
353		@ num++
354		set  out_file = "${outdir}tmp_h$num"
355
356		echo "	'$incl_file'	-- output to '$out_file'"
357		$proc_incl  $proc_incl_args   "$incl_file" > "$out_file"
358	end
359
360	set  incl_files = "`grep '^u' $incl_dict | sed 's/^u	//'`"
361
362echo -n "Continue (y/n) ?: "
363switch ($<)
364case  n:
365	exit (0);
366endsw
367
368end
369
370#---------------------------------------------------------------#
371# Pass input files through C preprocessor			#
372#								#
373# N.B. As before, we use a 'while' rather than a 'foreach' loop	#
374# in case any of the filenames in '$tmp_c_files' contain	#
375# embedded whitespace!						#
376#---------------------------------------------------------------#
377echo  ""
378echo  "Passing 'tmp_c..' files through C preprocessor (which 'expands' above 'include' files):"
379
380set  i = 1
381while ($i <= $#tmp_c_files)
382	set c_file = "$tmp_c_files[$i]";	@ i++
383	if ($cpp_args == "") then
384		echo "	$cpp -P  $c_file > $tmp_file;   mv  $tmp_file  $c_file"
385		$cpp -P  "$c_file" > "$tmp_file";   mv  "$tmp_file"  "$c_file"
386	else
387		echo "	$cpp -P  $cpp_args  $c_file > $tmp_file;   mv  $tmp_file  $c_file"
388		$cpp -P  "$cpp_args"  "$c_file" > "$tmp_file";   mv  "$tmp_file"  "$c_file"
389	endif
390end
391
392echo -n "Continue (y/n) ?: "
393switch ($<)
394case  n:
395	exit (0);
396endsw
397
398#-------------------------------#
399# Munge C files (using 'ctran')	#
400#-------------------------------#
401set  id_dict = "${outdir}Id.dict"
402if ("$outdir" != "") set  ctran_args = ($ctran_args -D "$id_dict")
403
404echo ""
405echo "'Munging' 'tmp_c..' files..."
406
407if ($?version_str) then
408	echo "	ctran"  $ctran_args -v \""$version_str"\" $tmp_c_files
409	$ctran  $ctran_args -v "$version_str" $tmp_c_files
410else
411	echo "	ctran"  $ctran_args  $tmp_c_files
412	$ctran  $ctran_args  $tmp_c_files
413endif
414echo -n "Continue (y/n) ?: "
415switch ($<)
416case  n:
417	exit (0);
418endsw
419
420#-------------------------------------------------------#
421# Make sorted dictionary files, 'Id2val' and 'Val2id'	#
422#-------------------------------------------------------#
423echo  ""
424echo  "Munging complete."
425
426echo  ""
427echo  "Making sorted ID dictionaries '$id2val' and '$val2id'."
428sed 1d  "$id_dict" | sed '/^$/d' | sort +1 -df -o "$id2val"
429sed 1d  "$id_dict" | sed '/^$/d' | sort -n -o "$val2id"
430
431#-------------------------------------------------#
432# Write general information to file 'Cmunge.info' #
433#-------------------------------------------------#
434cat > ${outdir}Cmunge.readme <<INFO
435'cmunge' information
436====================
437The following files are generated:
438$munged_files
439	-- The 'munged' C output.
440File.dict
441	-- A dictionary of new filenames versus original ones.
442Id.dict	-- A list (in no particular order) of every identifier with
443	   its translation value (a number) or '-' if not renamed.
444		If this file exists when 'cmunge' is run it controls
445	   the renaming of identifiers, so the translation can be
446	   modified by editing this file.  E.g. to prevent an identifier
447	   from being renamed, replace its translation value by '-',
448	   and to force the renaming of an identifier that by default
449	   isn't (e.g. 'printf') replace its '-' by a (unique!) number,
450	   and then re-run 'cmunge'.
451		The first line of the file contains a number, N say.
452	   If the 'Id.dict' file is used in a subsequent 'cmunge' run,
453	   then the starting value for translation of new identifiers
454	   (i.e. ones not listed in 'Id.dict') is (N+1).  Therefore
455	   N must be greater than or equal to the maximum listed
456	   translation value.  On output from a 'cmunge' run, this
457	   number is set to the maximum listed translation value.
458Id2val	-- As above, listed in alphabetical order of the old names.
459Val2id	-- As above, listed in order of translation value.
460
461
462'Include file' information
463==========================
464The following 'include' files:
465
466	Expanded include files:
467	-----------------------
468`sed 's/^p	\(.*\)/	"\1"/' $incl_dict | sort`
469
470are expanded in the 'munged' C source code.
471
472Check that all of these files are *user-defined*.  'Standard library'
473or 'system' include files shouldn't be expanded, as this may cause
474inconsistent renaming and/or make the resulting C code non-portable.
475If any of the above *are* 'standard library' or 'system' files,
476take the following actions:
477
478-- To prevent the relevant files being expanded, either:
479	-- change the filename delimiters of the relevant files
480	     in '#include' statements from  "..."  to  <...>; or
481	-- put an '@' character at the start of the relevant
482	     '#include' lines, which will make 'cmunge' copy them
483	     out unchanged  (as described in the final section).
484-- Delete the file 'Id.dict' (generated by 'cmunge') and the
485	'munged' output files.
486-- Re-run 'cmunge'.
487
488The following 'include' files:
489
490	Unexpanded include files:
491	-------------------------
492`grep -h '^#include' $final_files | sed 's/^#include[ 	]*/	/' | sort -u`
493
494*aren't* expanded in the 'munged' C source code, i.e. their '#include'
495directives are preserved.
496
497'cmunge' contains a dictionary of the 'standard library' identifiers
498defined in Appendix B of Kernighan & Ritchie's 'ANSI C' -- basically,
499those in the following files:
500
501	Standard library include files:
502	-------------------------------
503	<assert.h>	<limits.h>	<stdarg.h>	<time.h>
504	<ctype.h>	<math.h>	<stdio.h>
505	<errno.h>	<setjmp.h>	<stdlib.h>
506	<float.h>	<signal.h>	<string.h>
507
508These identifiers aren't renamed in the 'munged' output and hence
509remain consistent with the 'standard library'.  If all of the
510above 'unexpanded include files' are in the list of 'standard library
511include files', and no use is made of external functions from 'system'
512libraries that are not part of the 'standard library', then all should
513be OK, so skip the rest of this message.
514
515By default, all other identifiers in the C source code are renamed
516in the 'munged' output.  Therefore, if any of the 'unexpanded include
517files' are *not* in the 'standard library' list, or any external
518functions are used from a 'system' library that is not part of the
519'standard library', 'cmunge' will rename the corresponding identifiers
520in the source code, thus making them inconsistent with the names in
521the 'system' files or libraries.  Two possible remedies are:
522
523(i) If any of the 'unexpanded include files' are user-defined rather
524than 'system' files, then:
525
526-- Change their filename delimiters in the '#include' statements
527	from  <...>  to  "...",  or remove the initial '@' from
528	their '#include' lines  (if '@' was added to protect the
529	lines from being 'munged' as indicated earlier).
530-- Delete the 'munged' output files.
531-- Re-run 'cmunge'.
532
533This will cause those 'include' files to be expanded in the 'munged'
534source code.
535
536-- OR --
537
538(ii) Prevent renaming of the identifiers concerned as follows:
539
540-- Find the improperly renamed identifiers.  (Perhaps the easiest
541	way to do this is to compile the 'munged' output to find
542	which identifiers aren't declared or defined, using file
543	'Val2id' to translate from new to old names).
544-- In file 'Id.dict', find the entry for each identifier concerned,
545	and replace its first field (a number) by '-'.
546-- Delete the 'munged' output files.
547-- Re-run 'cmunge'.
548
549N.B. When 'cmunge' is re-run ignore this warning  (as names already
550marked with '-' in 'Id.dict' aren't renamed).
551
552
553How to protect lines of the original source code from being 'munged'
554===================================================================
555If is possible to protect any line of the source code  (including
556blank lines, comments and pre-processor directives) from being
557'munged' by prefixing an '@' character as the first character on
558the line.  This will cause 'cmunge' to copy out the line unchanged,
559apart from removing its initial '@' character.
560
561One possible use of this is to preserve conditional compilation
562directives in the 'munged' output, e.g. lines like:
563
564#ifdef __STDC___
565...
566#else
567...
568#endif
569
570If 'protected' lines contain identifiers that also appear in
571'unprotected' lines (i.e. lines that will be 'munged'), then it is
572necessary to prevent those identifiers from being renamed in the
573'munged' lines.  This can be done by modifying their entries in the
574file 'Id.dict' as described in (ii) above, and re-running 'cmunge'.
575INFO
576
577#---------------------------------------------------------------#
578# Cleanup and exit!						#
579# N.B. We know that there actually *are* some 'tmp_.." files	#
580# (e.g. 'tmp_file', 'tmp_incl_dict' and some 'tmp_c' files).	#
581# If there weren't, 'tmp_*' would just expand to itself and	#
582# 'rm' would give an errmsg!					#
583#---------------------------------------------------------------#
584echo  ""
585echo  "rm  ${outdir}tmp_*"
586rm  ${outdir}tmp_*
587
588echo  ""
589echo "*****************************************************************************"
590echo "*  SEE FILE '${outdir}Cmunge.readme' FOR IMPORTANT INFORMATION ABOUT THIS RUN"
591echo "*****************************************************************************"
592
593exit (0)
594
595#-----------------#
596# error messages  #
597#-----------------#
598no_input_files:
599	echo 'cmunge: no input files specified'
600	goto  usage
601
602usage:
603	echo ""
604	echo "Usage:  cmunge [options] files..."
605	echo ""
606	echo "Options:"
607	echo "  -I include-dir        -- Search directory 'include-dir' for #include files."
608	echo "  -d output-dir         -- Write the output file(s) to directory"
609	echo "                             'output-dir'.  (Default: current directory.)"
610	echo "  -f output-file-rootname -- Output files have names starting with the"
611	echo "                             string 'output-file-rootname' followed by '.c'"
612	echo "                             if there is one output file, or '1.c', '2.c',"
613	echo "                             etc, if there is more than one.  (Default: f)."
614	echo "  -l min-output-linelen -- Make lines of the output file(s) at least"
615	echo "                             'min-output-linelen' characters long."
616	echo "                             (Default: 90.)"
617	echo "  -p prefix-letter      -- Give the translated C identifiers names"
618	echo "                             consisting of the letter 'prefix-letter'"
619	echo "                             followed by a number.  (Default: l)."
620	echo "  -v version-string     -- Insert string 'version-string' as a C comment"
621	echo "                             in the first line of each output file."
622	echo "                             (Default: no comment is written.)"
623	echo "  -<anything else>      -- Other arguments are passed directly to the"
624	echo "                             C pre-processor, 'cpp'."
625	echo "  files...              -- The input C source file(s) to be 'munged'."
626	echo ""
627	echo "Spaces are optional between option letters and their accompanying arguments."
628	echo ""
629
630#	echo "usage:  cmunge [-I include-dir]... [-d output-dir] [-f output-file-rootname]"
631#	echo "               [-l min-output-linelen] [-p prefix-letter] [-v version-string]"
632#	echo "               [-<anything-else-is-passed-to-cpp>] files..."
633
634	exit (1)
635