1 /******************************************************************************
2 
3   ####    #####  #####   #          #    #####           #    #
4  #          #    #    #  #          #    #    #          #    #
5   ####      #    #    #  #          #    #####           ######
6       #     #    #####   #          #    #    #   ###    #    #
7  #    #     #    #   #   #          #    #    #   ###    #    #
8   ####      #    #    #  ######     #    #####    ###    #    #
9 
10 ******************************************************************************/
11 /* This file is part of MAPMAKER 3.0b, Copyright 1987-1992, Whitehead Institute
12    for Biomedical Research. All rights reserved. See READ.ME for license. */
13 
14 /***** String and parsing functions for the helpers library *****/
15 
16 /****************************************************************************
17 Unfortunately, much of the C-library of string functions is not very
18 portable: Most functions are in general the same, although they differ
19 in a number of minor anoying ways. Of particular annayance are strlen()
20 and the strn... functions (strncmp(), strncpy(), and strncat()).
21 
22 The functions strchr(), strrchr(), strspn(), strcspn(), strbrk(), and
23 strtok() do not exist in all C implementations. Also, the string <->
24 number conversion routines, including atof(), strtod(), strto(),
25 atoi(), atol() and strtoul() may have compatibility problems. Some of
26 the helper functions provide similar functionalities while preserving
27 compatibility: use them instead!
28 
29 The functions toupper() etc. mentioned in K&R seem to be portable.
30 However, they are macros in some implementations! Thus, don't try
31 toupper(ptr++), as ptr may get incremented more than once! Note that
32 contrary to what K&R says, it is entirely reasonable to assume ASCII
33 codes are in use.
34 
35 The only library functions known to be portable are strcat(),
36 strcpy(), and strcmp(). Otherwise, you should use the functions
37 provided here. Note that strcat() and strcpy() are inherently
38 dangerous, and they do not provide any bounds checking, and may
39 result in a crahing program. nstrcat() etc. are much preferred!
40 ****************************************************************************/
41 
42 /* Replacements for the C-Library... */
43 
44 /* These will return TRUE if they don't have to truncate, FALSE otherwise. */
45 /* THIS IS NOT IMPLEMENTED YET! */
46 void nstrcpy();	  /* args: char *to, *from; int num; copy <=num chars */
47 void nstrcat();	  /* args: char *to, *from; int num; append <=num chars */
48 void maxstrcat(); /* args: char *to, *from; int max; length kept <=max chars */
49 
50 /* To avoid the ANSI size_t idiocy... */
51 
52 #define nstrcmp(s1,s2,max_chars) strncmp(s1,s2,((int)max_chars))
53 #define len(str) ((int) strlen(str))
54 
55 /* Other useful stuff... */
56 
57 char *mkstrcpy(); /* args: char *s; returns an allocated copy */
58 char *ptr_to();   /* args: constant character string - return a ptr to it */
59 void strdel();    /* args: char *s; int num; deletes num chars at s */
60 #define endof(str) ((str)+len(str))
61 
62 int strfinder();  /* args: char *str, c; get index of c in str or NOT_FOUND */
63 #define NOT_FOUND (-1)
64 #define strin(str,chr) (strfinder(str,chr)!=NOT_FOUND)
65 
66 void strins();    /* args: char *a, *b; insert string b at a */
67 void nstrins();	  /* args: char *a, *b; int num; insert at most num chars */
68 void maxstrins(); /* args: char *a, *b; int max; length kept <= max */
69 
70 bool nullstr();      /* args: char *s; TRUE if s==NULL or is all whitespace */
71 extern char *null_string; /* set to "" */
72 #define streq(s1,s2)    (!strcmp(s1,s2))
73 #define nstreq(s1,s2,n) (!nstrcmp(s1,s2,n))
74 
75 extern char Cw, Ct;
76 #define white(chr) ((Cw=(chr))==' ' || Cw=='\t' || Cw=='\n')
77 #define trash(chr) (((Ct=(chr))!='\0') && (Ct<' ' || Ct>'~') && !white(Ct))
78 
79 bool nmatches(); /* args: char *s, *t; int num; */
80 /* returns TRUE if chars in the 1st token in s match those in the token in
81    "template" t, and if the token in s is at least num chars long. For
82    example: "land" matches the template "lander", but not "lampshade".
83    NOTE: s and t should be despace()ed and filter()ed, but not necessarily
84    lowercase()ed (the match is case INSENSITIVE anyway). */
85 #define matches(s,t) nmatches(s,t,1)
86 
87 int xstreq(); 	/* currently broken? */
88 #define istrlen len /* THIS IS AN OBSOLETE NAME- DON'T USE IT */
89 
90 /****************************************************************************
91 Various string crunching routines: despace() changes all globs of
92 whitespace to single spaces and does away with leading and trailing
93 whitespace entirely. filter() removes all non-printing ASCII
94 characters, and lowercase() converts uppercase letters to lowercase().
95 crunch() invokes despace, filter and lowercase.  filter_nonspaces() is
96 like filter, except that in addition tabs and newlines are converted
97 to spaces. uppercase() is the obvious opposite of lowercase().
98 truncstr() limits the length of a string to some number of characters
99 (not including the trailing '\0') pad_to_len() adds spaces to the end
100 of the string until it is a particular length, while append_spaces()
101 simply adds the requested number of spaces to the string. All
102 side-effect their argument str, and return a pointer to it for yucks.
103 ****************************************************************************/
104 
105 char *despace(); 	   /* args: char *str; side-effected */
106 char *lowercase();	   /* args: char *str; side-effected */
107 char *uppercase();	   /* args: char *str; side-effected */
108 char *_filter();		   /* args: char *str; side-effected */
109 char *filter_nonspaces();  /* args: char *str; side-effected */
110 char *crunch(); 	   /* args: char *str; despace(_filter(lowercase())) */
111 
112 char *truncstr();	/* args: char *str; int max_chars; str side-effected */
113 char *pad_to_len();     /* args: char *str; int max_chars; adds spaces */
114 char *append_spaces();  /* args: char *str; int num_spaces; also adds spaces */
115 
116 /****************************************************************************
117 Each of the token-parsing functions work as follows:
118 
119 bool itoken(), ltoken(), rtoken();
120 args: char **p_str; <value type> default_value, *result;
121 
122 bool stoken();
123 args: char **p_str; char *default_value, *result;
124 
125 bool nstoken(), maxstoken();
126 args: char **p_str; char *default_value,*result; int num_chars;
127 
128 bool stokenof();
129 args: char **p_str; char *default_value, *result; char *parsable_chars;
130 
131 bool nstokenof(), maxstokenof();
132 args: char **p_str; char *default_value, *result; int num_chars;
133       char *parsable_chars;
134 
135 If one of these succeeds: *p_str points to the delimiting character which
136 follows the token (which may be '\0'), *result is set, and TRUE is returned.
137 
138 If no token is avail: *p_str points to the '\0' at the end of the
139 string. In this case, if a default is given then TRUE is returned and
140 *result is set.  Otherwise, FALSE is returned, and *result is
141 undefined.
142 
143 If the token is bad: *p_str points to the beginning of the token (so
144 that nullstr(*p_str)==FALSE, see bad_token() below). If a default is
145 available, then *result is set to it, otherwise *result is undefined.
146 FALSE is always returned.
147 
148 Note that for itoken(), rtoken(), ltoken(), stoken(), and stokenof(),
149 the length of a token is limited to TOKLEN chars. (Thus, stoken() etc.
150 should be passed a pointer to a string of at least TOKLEN+1 chars to
151 hold the result.) Longer tokens are truncated (which always makes
152 numbers 'bad').
153 
154 For nstoken(), the user may specify the length of the result string to
155 use instead of TOKLEN. For maxstoken() the length of the token must be
156 <= num_chars characters, otherwise the result string is truncated,
157 *p_str is left untouched, and FALSE is returned.
158 
159 stokenof(), nstokenof(), and maxstokenof() specify the legal
160 characters which may comprise the token. Other characters (excluding
161 the self_delimiting, described below) cause FALSE to be returned, and
162 no token is still parsed from the string. ANYCHAR (really NULL,
163 defined below for parse_char()) may be used to indicate that for
164 parsable_chars to indicate that any character is OK.
165 ****************************************************************************/
166 
167 #define TOKLEN 40
168 int itoken();	 /* int token */
169 int ltoken();	 /* long int token */
170 int rtoken();	 /* real token */
171 
172 int stok(); /* INTERNAL USE ONLY! */
173 int stoken(); /* args: p,def,val; does stok(p,def,val,TOKLEN,TRUE,NULL) */
174 #define nstoken(p_str,def,val,num) stok(p_str,def,val,num,TRUE,NULL)
175 #define maxstoken(p_str,def,val,num) stok(p_str,def,val,num,FALSE,NULL)
176 #define stokenof(p_str,def,val,chrs) stok(p_str,def,val,TOKLEN,TRUE,chrs)
177 #define nstokenof(p_str,def,val,num,chrs)   stok(p_str,def,val,num,TRUE,chrs)
178 #define maxstokenof(p_str,def,val,num,chrs) stok(p_str,def,val,num,FALSE,chrs)
179 
180 /* Possible default values */
181 #define sREQUIRED NULL
182 #define iREQUIRED (-32768)
183 #define lREQUIRED -1073741823L
184 #define rREQUIRED ((real)-1.2345e31)
185 
186 /* To decipher FALSE responses of the token parsers */
187 #define no_token(p_str)  (**p_str=='\0')
188 #define bad_token(p_str) (**p_str!='\0')
189 
190 /*** Usually, tokens are separated by whitespace. Certain characters
191 however, which are listed in the self_delimiting string, will always
192 be parsed as separate tokens, whether surrounded by whitespace or not.
193 For example: the string "53 - (14+2)" with self_delimiting equal to
194 "()-+" will parse into tokens "53","-","(","14","+","2", and ")". ***/
195 extern char *self_delimiting;
196 
197 /*** Other useful stuff for parsing ***/
198 int count_tokens();  /* args: char *str; */
199 bool is_a_token();   /* args: char *str; must be despaced, or from stoken() */
200 bool split_string(); /* args: char *str, **rest, divider; rest side-effected */
201 
202 
203 /****************************************************************************
204 The range functions work as follows:
205 
206    args: <value type> *value, low, high; returns bool;
207 
208    If *value is in the range [low,high] (inclusive), then TRUE is
209 returned.  Otherwise, *value is set to the apprpriate limit (low or
210 high) and FALSE is returned.
211 ****************************************************************************/
212 
213 int irange();	 /* integer range */
214 int lrange(); 	 /* long int range */
215 int rrange();	 /* real range */
216 
217 /****************************************************************************
218 Parse_char parsing is similar to (and compatible with) the token
219 parsing routines shown above.
220 
221 bool parse_char(char **p_str,*parsable_chars; int skip_whitespace; char *c;)
222 
223 First, if skip_white is TRUE, *p_str is incremented until a non-
224 whitespace char or the '\0' is encountered.  If **p_str is '\0', FALSE
225 is returned and *c is set to '\0'.  If **p_str is in the
226 parsable_chars string, or if parsable_chars is NULL, then *c=**p_str
227 and *p_str is incremented.  If **p_str is not in the parsable_chars
228 string, then c=**p_str, *p_str is NOT incremented, and FALSE is
229 returned.
230 
231 parse_whitespace() moves the ptr along until a non-whitespace character
232 is encountered.
233 ****************************************************************************/
234 
235 bool parse_char();	  /* args shown above */
236 void parse_whitespace();  /* args: char **p_str; *p_str is side-effected */
237 
238 /* Arguments to parse_char() */
239 #define ANYCHAR    NULL
240 #define SKIPWHITE  TRUE
241 #define NOSKIP     FALSE
242 
243 /* To decipher FALSE responses of parse_char */
244 #define no_char(p_str) (**p_str=='\0')
245 #define bad_char(p_str) (**p_str!='\0' && !white(**p_str))
246 #define white_char(p_str) (white(**p_str))
247 
248 
249 /***********************************************************************
250 The global pool of strings for printing things into. These strings are
251 "allocated" from a reusable global pool of strings, which when
252 exhausted begins reusing strings previously allocated. Thus, strings
253 gotten using get_temp_str() should be considered VERY temporary
254 storage: use mkstrcpy() etc. to make more permanent storage. The same
255 caution applies to any routines which return strings "allocated" by
256 this function, including pr(), prn(), prd(), and others. Be sure to
257 doccument this in your functions which use these or which call
258 get_temp_str() directly.
259 ***********************************************************************/
260 
261 char *get_temp_string(); /* returns the next available string for bashing */
262 
263 #define NUM_TEMP_STRINGS 50
264 #define TEMP_STRING_LEN 500
265 
266 /************************************************************************
267 rs() etc.: The funky real number printing routines...
268 
269 rs(), meaning "real to string", takes a format number (like one given
270 to sprintf) and a data number (both reals) and returns a string
271 containing a human readable form of the number. Unlike sprintf(), if
272 the number can't fit properly, decimal places are thrown away. If it
273 still can't fit, the string is filled with asterisks. Thus, unlike
274 sprintf()ed strings, the returned string will ALWAYS have the
275 specified length. The format number must be of the form n.m, where n
276 and m are SINGLE DIGITS, m<=n-2, and m.n>0.0. n specifies the printed
277 string length, and m specifies the desired number of decimal places.
278 If the number to be printed is negative, then the minus sign will take
279 one space of the string's length. Note that decimal places which are
280 truncated are simply cut out, not rounded out!
281 
282 rsn(), with 'n' for "negative", is like rs() except that if the number
283 is positive, a leading space is printed where the minus sign will
284 appear for negative numbers. This way columnar output will have have
285 the leading digits (rather than leading digit OR minus sign) line up,
286 and positive and negative numbers will both be printed to the same
287 precision.
288 
289 rsd(), with 'd' for "decimal", is also essentially the same as rs() except
290 that it adds spaces to the front rather than rear for shorter numbers,
291 in order to make columns of numbers line up on their decimal points.
292 
293 Each of these routines return a string "allocated" (appropriated) using
294 get_temp_string(). Thus, heed the warnings above.
295 ************************************************************************/
296 
297 char *rs();   /* args: real format, num_to_print; */
298 char *rsn();  /* args: real format, num_to_print; */
299 char *rsd();  /* args: real format, num_to_print; */
300 
301 /* other output formating stuff... */
302 #define sf sprintf
303 char *binary(); /* args: int num_to_print, num_bits; char *str; */
304 
305 /* macro char *maynl();  args: int chars;
306    returns a ptr to a string containing only the \n character if the number of
307    chars is too many to fit on a screen line, and otherwise returns a null
308    (e.g. zero length) string. For example:
309 
310    sprintf(str,"successfully loaded file: %s%s\n",maynl(len(name)+26),name);
311 
312    maynls(str,chars) is the same as maynl(len(str)+chars) */
313 
314 #define maynl(chars) ((chars)>LINE ? "\n" : "")
315 #define maynls(str,chars) ((len(str)+chars)>LINE ? "\n" : "")
316 
317 /* macro char *maybe_s(num); returns a string containing "s" if num!=1,
318    or a string of "" if num==1. Useful for making words maybe plural. */
319 #define maybe_s(n) ((n)!=1 ? "s" : "")
320 /* macro char *maybe_sp(num); returns a string containing "" if num!=1,
321    or a string of " " if num==1. Useful with maybe_s for lining things up */
322 #define maybe_sp(n) ((n)!=1 ? "" : " ")
323 
324 void str_init();
325 
326 
327 
328 
329 
330 
331 
332