1 /****************************************************************************** 2 3 #### ##### ##### # # ##### # # 4 # # # # # # # # # # 5 #### # # # # # ##### ###### 6 # # ##### # # # # ### # # 7 # # # # # # # # # ### # # 8 #### # # # ###### # ##### ### # # 9 10 ******************************************************************************/ 11 /* This file is part of MAPMAKER 3.0b, Copyright 1987-1992, Whitehead Institute 12 for Biomedical Research. All rights reserved. See READ.ME for license. */ 13 14 /***** String and parsing functions for the helpers library *****/ 15 16 /**************************************************************************** 17 Unfortunately, much of the C-library of string functions is not very 18 portable: Most functions are in general the same, although they differ 19 in a number of minor anoying ways. Of particular annayance are strlen() 20 and the strn... functions (strncmp(), strncpy(), and strncat()). 21 22 The functions strchr(), strrchr(), strspn(), strcspn(), strbrk(), and 23 strtok() do not exist in all C implementations. Also, the string <-> 24 number conversion routines, including atof(), strtod(), strto(), 25 atoi(), atol() and strtoul() may have compatibility problems. Some of 26 the helper functions provide similar functionalities while preserving 27 compatibility: use them instead! 28 29 The functions toupper() etc. mentioned in K&R seem to be portable. 30 However, they are macros in some implementations! Thus, don't try 31 toupper(ptr++), as ptr may get incremented more than once! Note that 32 contrary to what K&R says, it is entirely reasonable to assume ASCII 33 codes are in use. 34 35 The only library functions known to be portable are strcat(), 36 strcpy(), and strcmp(). Otherwise, you should use the functions 37 provided here. Note that strcat() and strcpy() are inherently 38 dangerous, and they do not provide any bounds checking, and may 39 result in a crahing program. nstrcat() etc. are much preferred! 40 ****************************************************************************/ 41 42 /* Replacements for the C-Library... */ 43 44 /* These will return TRUE if they don't have to truncate, FALSE otherwise. */ 45 /* THIS IS NOT IMPLEMENTED YET! */ 46 void nstrcpy(); /* args: char *to, *from; int num; copy <=num chars */ 47 void nstrcat(); /* args: char *to, *from; int num; append <=num chars */ 48 void maxstrcat(); /* args: char *to, *from; int max; length kept <=max chars */ 49 50 /* To avoid the ANSI size_t idiocy... */ 51 52 #define nstrcmp(s1,s2,max_chars) strncmp(s1,s2,((int)max_chars)) 53 #define len(str) ((int) strlen(str)) 54 55 /* Other useful stuff... */ 56 57 char *mkstrcpy(); /* args: char *s; returns an allocated copy */ 58 char *ptr_to(); /* args: constant character string - return a ptr to it */ 59 void strdel(); /* args: char *s; int num; deletes num chars at s */ 60 #define endof(str) ((str)+len(str)) 61 62 int strfinder(); /* args: char *str, c; get index of c in str or NOT_FOUND */ 63 #define NOT_FOUND (-1) 64 #define strin(str,chr) (strfinder(str,chr)!=NOT_FOUND) 65 66 void strins(); /* args: char *a, *b; insert string b at a */ 67 void nstrins(); /* args: char *a, *b; int num; insert at most num chars */ 68 void maxstrins(); /* args: char *a, *b; int max; length kept <= max */ 69 70 bool nullstr(); /* args: char *s; TRUE if s==NULL or is all whitespace */ 71 extern char *null_string; /* set to "" */ 72 #define streq(s1,s2) (!strcmp(s1,s2)) 73 #define nstreq(s1,s2,n) (!nstrcmp(s1,s2,n)) 74 75 extern char Cw, Ct; 76 #define white(chr) ((Cw=(chr))==' ' || Cw=='\t' || Cw=='\n') 77 #define trash(chr) (((Ct=(chr))!='\0') && (Ct<' ' || Ct>'~') && !white(Ct)) 78 79 bool nmatches(); /* args: char *s, *t; int num; */ 80 /* returns TRUE if chars in the 1st token in s match those in the token in 81 "template" t, and if the token in s is at least num chars long. For 82 example: "land" matches the template "lander", but not "lampshade". 83 NOTE: s and t should be despace()ed and filter()ed, but not necessarily 84 lowercase()ed (the match is case INSENSITIVE anyway). */ 85 #define matches(s,t) nmatches(s,t,1) 86 87 int xstreq(); /* currently broken? */ 88 #define istrlen len /* THIS IS AN OBSOLETE NAME- DON'T USE IT */ 89 90 /**************************************************************************** 91 Various string crunching routines: despace() changes all globs of 92 whitespace to single spaces and does away with leading and trailing 93 whitespace entirely. filter() removes all non-printing ASCII 94 characters, and lowercase() converts uppercase letters to lowercase(). 95 crunch() invokes despace, filter and lowercase. filter_nonspaces() is 96 like filter, except that in addition tabs and newlines are converted 97 to spaces. uppercase() is the obvious opposite of lowercase(). 98 truncstr() limits the length of a string to some number of characters 99 (not including the trailing '\0') pad_to_len() adds spaces to the end 100 of the string until it is a particular length, while append_spaces() 101 simply adds the requested number of spaces to the string. All 102 side-effect their argument str, and return a pointer to it for yucks. 103 ****************************************************************************/ 104 105 char *despace(); /* args: char *str; side-effected */ 106 char *lowercase(); /* args: char *str; side-effected */ 107 char *uppercase(); /* args: char *str; side-effected */ 108 char *_filter(); /* args: char *str; side-effected */ 109 char *filter_nonspaces(); /* args: char *str; side-effected */ 110 char *crunch(); /* args: char *str; despace(_filter(lowercase())) */ 111 112 char *truncstr(); /* args: char *str; int max_chars; str side-effected */ 113 char *pad_to_len(); /* args: char *str; int max_chars; adds spaces */ 114 char *append_spaces(); /* args: char *str; int num_spaces; also adds spaces */ 115 116 /**************************************************************************** 117 Each of the token-parsing functions work as follows: 118 119 bool itoken(), ltoken(), rtoken(); 120 args: char **p_str; <value type> default_value, *result; 121 122 bool stoken(); 123 args: char **p_str; char *default_value, *result; 124 125 bool nstoken(), maxstoken(); 126 args: char **p_str; char *default_value,*result; int num_chars; 127 128 bool stokenof(); 129 args: char **p_str; char *default_value, *result; char *parsable_chars; 130 131 bool nstokenof(), maxstokenof(); 132 args: char **p_str; char *default_value, *result; int num_chars; 133 char *parsable_chars; 134 135 If one of these succeeds: *p_str points to the delimiting character which 136 follows the token (which may be '\0'), *result is set, and TRUE is returned. 137 138 If no token is avail: *p_str points to the '\0' at the end of the 139 string. In this case, if a default is given then TRUE is returned and 140 *result is set. Otherwise, FALSE is returned, and *result is 141 undefined. 142 143 If the token is bad: *p_str points to the beginning of the token (so 144 that nullstr(*p_str)==FALSE, see bad_token() below). If a default is 145 available, then *result is set to it, otherwise *result is undefined. 146 FALSE is always returned. 147 148 Note that for itoken(), rtoken(), ltoken(), stoken(), and stokenof(), 149 the length of a token is limited to TOKLEN chars. (Thus, stoken() etc. 150 should be passed a pointer to a string of at least TOKLEN+1 chars to 151 hold the result.) Longer tokens are truncated (which always makes 152 numbers 'bad'). 153 154 For nstoken(), the user may specify the length of the result string to 155 use instead of TOKLEN. For maxstoken() the length of the token must be 156 <= num_chars characters, otherwise the result string is truncated, 157 *p_str is left untouched, and FALSE is returned. 158 159 stokenof(), nstokenof(), and maxstokenof() specify the legal 160 characters which may comprise the token. Other characters (excluding 161 the self_delimiting, described below) cause FALSE to be returned, and 162 no token is still parsed from the string. ANYCHAR (really NULL, 163 defined below for parse_char()) may be used to indicate that for 164 parsable_chars to indicate that any character is OK. 165 ****************************************************************************/ 166 167 #define TOKLEN 40 168 int itoken(); /* int token */ 169 int ltoken(); /* long int token */ 170 int rtoken(); /* real token */ 171 172 int stok(); /* INTERNAL USE ONLY! */ 173 int stoken(); /* args: p,def,val; does stok(p,def,val,TOKLEN,TRUE,NULL) */ 174 #define nstoken(p_str,def,val,num) stok(p_str,def,val,num,TRUE,NULL) 175 #define maxstoken(p_str,def,val,num) stok(p_str,def,val,num,FALSE,NULL) 176 #define stokenof(p_str,def,val,chrs) stok(p_str,def,val,TOKLEN,TRUE,chrs) 177 #define nstokenof(p_str,def,val,num,chrs) stok(p_str,def,val,num,TRUE,chrs) 178 #define maxstokenof(p_str,def,val,num,chrs) stok(p_str,def,val,num,FALSE,chrs) 179 180 /* Possible default values */ 181 #define sREQUIRED NULL 182 #define iREQUIRED (-32768) 183 #define lREQUIRED -1073741823L 184 #define rREQUIRED ((real)-1.2345e31) 185 186 /* To decipher FALSE responses of the token parsers */ 187 #define no_token(p_str) (**p_str=='\0') 188 #define bad_token(p_str) (**p_str!='\0') 189 190 /*** Usually, tokens are separated by whitespace. Certain characters 191 however, which are listed in the self_delimiting string, will always 192 be parsed as separate tokens, whether surrounded by whitespace or not. 193 For example: the string "53 - (14+2)" with self_delimiting equal to 194 "()-+" will parse into tokens "53","-","(","14","+","2", and ")". ***/ 195 extern char *self_delimiting; 196 197 /*** Other useful stuff for parsing ***/ 198 int count_tokens(); /* args: char *str; */ 199 bool is_a_token(); /* args: char *str; must be despaced, or from stoken() */ 200 bool split_string(); /* args: char *str, **rest, divider; rest side-effected */ 201 202 203 /**************************************************************************** 204 The range functions work as follows: 205 206 args: <value type> *value, low, high; returns bool; 207 208 If *value is in the range [low,high] (inclusive), then TRUE is 209 returned. Otherwise, *value is set to the apprpriate limit (low or 210 high) and FALSE is returned. 211 ****************************************************************************/ 212 213 int irange(); /* integer range */ 214 int lrange(); /* long int range */ 215 int rrange(); /* real range */ 216 217 /**************************************************************************** 218 Parse_char parsing is similar to (and compatible with) the token 219 parsing routines shown above. 220 221 bool parse_char(char **p_str,*parsable_chars; int skip_whitespace; char *c;) 222 223 First, if skip_white is TRUE, *p_str is incremented until a non- 224 whitespace char or the '\0' is encountered. If **p_str is '\0', FALSE 225 is returned and *c is set to '\0'. If **p_str is in the 226 parsable_chars string, or if parsable_chars is NULL, then *c=**p_str 227 and *p_str is incremented. If **p_str is not in the parsable_chars 228 string, then c=**p_str, *p_str is NOT incremented, and FALSE is 229 returned. 230 231 parse_whitespace() moves the ptr along until a non-whitespace character 232 is encountered. 233 ****************************************************************************/ 234 235 bool parse_char(); /* args shown above */ 236 void parse_whitespace(); /* args: char **p_str; *p_str is side-effected */ 237 238 /* Arguments to parse_char() */ 239 #define ANYCHAR NULL 240 #define SKIPWHITE TRUE 241 #define NOSKIP FALSE 242 243 /* To decipher FALSE responses of parse_char */ 244 #define no_char(p_str) (**p_str=='\0') 245 #define bad_char(p_str) (**p_str!='\0' && !white(**p_str)) 246 #define white_char(p_str) (white(**p_str)) 247 248 249 /*********************************************************************** 250 The global pool of strings for printing things into. These strings are 251 "allocated" from a reusable global pool of strings, which when 252 exhausted begins reusing strings previously allocated. Thus, strings 253 gotten using get_temp_str() should be considered VERY temporary 254 storage: use mkstrcpy() etc. to make more permanent storage. The same 255 caution applies to any routines which return strings "allocated" by 256 this function, including pr(), prn(), prd(), and others. Be sure to 257 doccument this in your functions which use these or which call 258 get_temp_str() directly. 259 ***********************************************************************/ 260 261 char *get_temp_string(); /* returns the next available string for bashing */ 262 263 #define NUM_TEMP_STRINGS 50 264 #define TEMP_STRING_LEN 500 265 266 /************************************************************************ 267 rs() etc.: The funky real number printing routines... 268 269 rs(), meaning "real to string", takes a format number (like one given 270 to sprintf) and a data number (both reals) and returns a string 271 containing a human readable form of the number. Unlike sprintf(), if 272 the number can't fit properly, decimal places are thrown away. If it 273 still can't fit, the string is filled with asterisks. Thus, unlike 274 sprintf()ed strings, the returned string will ALWAYS have the 275 specified length. The format number must be of the form n.m, where n 276 and m are SINGLE DIGITS, m<=n-2, and m.n>0.0. n specifies the printed 277 string length, and m specifies the desired number of decimal places. 278 If the number to be printed is negative, then the minus sign will take 279 one space of the string's length. Note that decimal places which are 280 truncated are simply cut out, not rounded out! 281 282 rsn(), with 'n' for "negative", is like rs() except that if the number 283 is positive, a leading space is printed where the minus sign will 284 appear for negative numbers. This way columnar output will have have 285 the leading digits (rather than leading digit OR minus sign) line up, 286 and positive and negative numbers will both be printed to the same 287 precision. 288 289 rsd(), with 'd' for "decimal", is also essentially the same as rs() except 290 that it adds spaces to the front rather than rear for shorter numbers, 291 in order to make columns of numbers line up on their decimal points. 292 293 Each of these routines return a string "allocated" (appropriated) using 294 get_temp_string(). Thus, heed the warnings above. 295 ************************************************************************/ 296 297 char *rs(); /* args: real format, num_to_print; */ 298 char *rsn(); /* args: real format, num_to_print; */ 299 char *rsd(); /* args: real format, num_to_print; */ 300 301 /* other output formating stuff... */ 302 #define sf sprintf 303 char *binary(); /* args: int num_to_print, num_bits; char *str; */ 304 305 /* macro char *maynl(); args: int chars; 306 returns a ptr to a string containing only the \n character if the number of 307 chars is too many to fit on a screen line, and otherwise returns a null 308 (e.g. zero length) string. For example: 309 310 sprintf(str,"successfully loaded file: %s%s\n",maynl(len(name)+26),name); 311 312 maynls(str,chars) is the same as maynl(len(str)+chars) */ 313 314 #define maynl(chars) ((chars)>LINE ? "\n" : "") 315 #define maynls(str,chars) ((len(str)+chars)>LINE ? "\n" : "") 316 317 /* macro char *maybe_s(num); returns a string containing "s" if num!=1, 318 or a string of "" if num==1. Useful for making words maybe plural. */ 319 #define maybe_s(n) ((n)!=1 ? "s" : "") 320 /* macro char *maybe_sp(num); returns a string containing "" if num!=1, 321 or a string of " " if num==1. Useful with maybe_s for lining things up */ 322 #define maybe_sp(n) ((n)!=1 ? "" : " ") 323 324 void str_init(); 325 326 327 328 329 330 331 332