1 /*
2 * NIST Utils Class Library
3 * clutils/Str.cc
4 * April 1997
5 * K. C. Morris
6 * David Sauder
7 
8 * Development of this software was funded by the United States Government,
9 * and is not subject to copyright.
10 */
11 
12 #include "Str.h"
13 #include <sstream>
14 #include <string>
15 
16 /******************************************************************
17  ** Procedure:  string functions
18  ** Description:  These functions take a character or a string and return
19  ** a temporary copy of the string with the function applied to it.
20  ** Parameters:
21  ** Returns:  temporary copy of characters
22  ** Side Effects:
23  ** Status:  complete
24  ******************************************************************/
25 
ToLower(const char c)26 char ToLower( const char c ) {
27     if( isupper( c ) ) {
28         return ( tolower( c ) );
29     } else {
30         return ( c );
31     }
32 
33 }
34 
ToUpper(const char c)35 char ToUpper( const char c ) {
36     if( islower( c ) ) {
37         return ( toupper( c ) );
38     } else {
39         return ( c );
40     }
41 }
42 
43 // Place in strNew a lowercase version of strOld.
StrToLower(const char * strOld,char * strNew)44 char * StrToLower( const char * strOld, char * strNew ) {
45     int i = 0;
46 
47     while( strOld[i] != '\0' ) {
48         strNew[i] = ToLower( strOld[i] );
49         i++;
50     }
51     strNew[i] = '\0';
52     return strNew;
53 }
54 
StrToLower(const char * word,std::string & s)55 const char * StrToLower( const char * word, std::string & s ) {
56     char newword [BUFSIZ];
57     int i = 0;
58 
59     while( word [i] != '\0' ) {
60         newword [i] = ToLower( word [i] );
61         ++i;
62     }
63     newword [i] = '\0';
64     s = newword;
65     return const_cast<char *>( s.c_str() );
66 }
67 
StrToUpper(const char * word,std::string & s)68 const char * StrToUpper( const char * word, std::string & s ) {
69     char newword [BUFSIZ];
70     int i = 0;
71 
72     while( word [i] != '\0' ) {
73         newword [i] = ToUpper( word [i] );
74         ++i;
75     }
76     newword [i] = '\0';
77     s = newword;
78     return const_cast<char *>( s.c_str() );
79 }
80 
StrToConstant(const char * word,std::string & s)81 const char * StrToConstant( const char * word, std::string & s ) {
82     char newword [BUFSIZ];
83     int i = 0;
84 
85     while( word [i] != '\0' ) {
86         if( word [i] == '/' || word [i] == '.' ) {
87             newword [i] = '_';
88         } else {
89             newword [i] = ToUpper( word [i] );
90         }
91         ++i;
92     }
93     newword [i] = '\0';
94     s = newword;
95     return const_cast<char *>( s.c_str() );
96 }
97 
98 /**************************************************************//**
99  ** \fn  StrCmpIns (const char * str1, const char * str2)
100  ** \returns  Comparison result
101  ** Compares two strings case insensitive (lowercase).
102  ** Returns < 0  when str1 less then str2
103  **         == 0 when str1 equals str2
104  **         > 0  when str1 greater then str2
105  ******************************************************************/
StrCmpIns(const char * str1,const char * str2)106 int StrCmpIns( const char * str1, const char * str2 ) {
107     char c1, c2;
108     while( ( c1 = tolower( *str1 ) ) == ( c2 = tolower( *str2 ) ) && c1 != '\0' ) {
109         str1++;
110         str2++;
111     }
112     return c1 - c2;
113 }
114 
115 /**
116  * Test if a string ends with the given suffix.
117  */
StrEndsWith(const std::string & s,const char * suf)118 bool StrEndsWith( const std::string & s, const char * suf ) {
119     if( suf == NULL ) {
120         return false;
121     }
122     std::string suffix = suf;
123     size_t sLen = s.length();
124     size_t suffixLen = suffix.length();
125     if( sLen < suffixLen ) {
126         return false;
127     }
128     if( s.substr( sLen - suffixLen ).compare( suffix ) == 0 ) {
129         return true;
130     }
131     return false;
132 }
133 
134 /**
135  *  Extract the next delimited string from the istream.
136  */
GetLiteralStr(istream & in,ErrorDescriptor * err)137 std::string GetLiteralStr( istream & in, ErrorDescriptor * err ) {
138     std::string s;
139     in >> std::ws; // skip whitespace
140 
141     if( in.good() && in.peek() == STRING_DELIM ) {
142         s += in.get();
143         bool allDelimsEscaped = true;
144         while( in.good() ) {
145             if( in.peek() == STRING_DELIM ) {
146                 // A delimiter closes the string unless it's followed by another
147                 // delimiter, in which case it's escaped. \S\ starts an ISO
148                 // 8859 character escape sequence, so we ignore delimiters
149                 // prefixed with \S\.
150                 if( !StrEndsWith( s, "\\S\\" ) ) {
151                     allDelimsEscaped = !allDelimsEscaped;
152                 }
153             } else if( !allDelimsEscaped ) {
154                 // Found normal char after unescaped delim, so last delim
155                 // that was appended terminated the string.
156                 break;
157             }
158             if( !in.eof() ) {
159                 s += in.get();
160             }
161         }
162         if( allDelimsEscaped ) {
163             // Any delimiters found after the opening delimiter were escaped,
164             // so the string is unclosed.
165             // non-recoverable error
166             err->AppendToDetailMsg( "Missing closing quote on string value.\n" );
167             err->AppendToUserMsg( "Missing closing quote on string value.\n" );
168             err->GreaterSeverity( SEVERITY_INPUT_ERROR );
169         }
170     }
171     return s;
172 }
173 
174 /**************************************************************//**
175  ** \fn  PrettyTmpName (char * oldname)
176  ** \returns  a new capitalized name in a static buffer
177  ** Capitalizes first char of word, rest is lowercase. Removes '_'.
178  ** Status:   OK  7-Oct-1992 kcm
179  ******************************************************************/
PrettyTmpName(const char * oldname)180 const char * PrettyTmpName( const char * oldname ) {
181     int i = 0;
182     static char newname [BUFSIZ];
183     newname [0] = '\0';
184     while( ( oldname [i] != '\0' ) && ( i < BUFSIZ ) ) {
185         newname [i] = ToLower( oldname [i] );
186         if( oldname [i] == '_' ) { /*  character is '_'   */
187             ++i;
188             newname [i] = ToUpper( oldname [i] );
189         }
190         if( oldname [i] != '\0' ) {
191             ++i;
192         }
193     }
194     newname [0] = ToUpper( oldname [0] );
195     newname [i] = '\0';
196     return newname;
197 }
198 
199 /**************************************************************//**
200  ** \fn  PrettyNewName (char * oldname)
201  ** \returns  a new capitalized name
202  ** Capitalizes first char of word, rest is lowercase. Removes '_'.
203  ** Side Effects:  allocates memory for the new name
204  ** Status:   OK  7-Oct-1992 kcm
205  ******************************************************************/
PrettyNewName(const char * oldname)206 char * PrettyNewName( const char * oldname ) {
207     char * name = new char [strlen( oldname ) + 1];
208     strcpy( name, PrettyTmpName( oldname ) );
209     return name;
210 }
211 
212 /**
213 *** This function is used to check an input stream following a read.  It writes
214 *** error messages in the 'ErrorDescriptor &err' argument as appropriate.
215 *** 'const char *tokenList' argument contains a string made up of delimiters
216 *** that are used to move the file pointer in the input stream to the end of
217 *** the value you are reading (i.e. the ending marked by the presence of the
218 *** delimiter).  The file pointer is moved just prior to the delimiter.  If the
219 *** tokenList argument is a null pointer then this function expects to find EOF.
220 ***
221 *** If input is being read from a stream then a tokenList should be provided so
222 *** this function can push the file pointer up to but not past the delimiter
223 *** (i.e. not removing the delimiter from the input stream).  If you have a
224 *** string containing a single value and you expect the whole string to contain
225 *** a valid value, you can change the string to an istrstream, read the value
226 *** then send the istrstream to this function with tokenList set to null
227 *** and this function will set an error for you if any input remains following
228 *** the value.
229 
230 *** If the input stream can be readable again then
231 ***  - any error states set for the the stream are cleared.
232 ***  - white space skipped in the input stream
233 ***  - if EOF is encountered it returns
234 ***    otherwise it peeks at the next character
235 ***  - if the tokenList argument exists (i.e. is not null)
236 ***    then if looks to see if the char peeked at is in the tokenList string
237 ***    if it is then no error is set in the ErrorDescriptor
238 ***    if the char peeked at is not in the tokenList string that implies
239 ***       that there is garbage following the value that was successfully
240 ***       or unsuccessfully read.  The garbage is read until EOF or a
241 ***       delimiter in the tokenList is found.
242 ***       - EOF is found you did not recover -> SEVERITY_INPUT_ERROR
243 ***       - delimiter found you recovered successfully => SEVERITY_WARNING
244 ***  - if tokenList does not exist then it expects to find EOF, if it does
245 ***    not then it is an error but the bad chars are not read since you have
246 ***    no way to know when to stop.
247 **/
CheckRemainingInput(istream & in,ErrorDescriptor * err,const char * typeName,const char * delimiterList)248 Severity CheckRemainingInput( istream & in, ErrorDescriptor * err,
249                               const char * typeName, // used in error message
250                               const char * delimiterList ) { // e.g. ",)"
251     string skipBuf;
252     ostringstream errMsg;
253 
254     if( in.eof() ) {
255         // no error
256         return err->severity();
257     } else if( in.bad() ) {
258         // Bad bit must have been set during read. Recovery is impossible.
259         err->GreaterSeverity( SEVERITY_INPUT_ERROR );
260         errMsg << "Invalid " << typeName << " value.\n";
261         err->AppendToUserMsg( errMsg.str().c_str() );
262         err->AppendToDetailMsg( errMsg.str().c_str() );
263     } else {
264         // At most the fail bit is set, so stream can still be read.
265         // Clear errors and skip whitespace.
266         in.clear();
267         in >> ws;
268 
269         if( in.eof() ) {
270             // no error
271             return err->severity();
272         }
273 
274         if( delimiterList != NULL ) {
275             // If the next char is a delimiter then there's no error.
276             char c = in.peek();
277             if( strchr( delimiterList, c ) == NULL ) {
278                 // Error. Extra input is more than just a delimiter and is
279                 // now considered invalid. We'll try to recover by skipping
280                 // to the next delimiter.
281                 for( in.get( c ); in && !strchr( delimiterList, c ); in.get( c ) ) {
282                     skipBuf += c;
283                 }
284 
285                 if( strchr( delimiterList, c ) != NULL ) {
286                     // Delimiter found. Recovery succeeded.
287                     in.putback( c );
288 
289                     errMsg << "\tFound invalid " << typeName << " value...\n";
290                     err->AppendToUserMsg( errMsg.str().c_str() );
291                     err->AppendToDetailMsg( errMsg.str().c_str() );
292                     err->AppendToDetailMsg( "\tdata lost looking for end of "
293                                             "attribute: " );
294                     err->AppendToDetailMsg( skipBuf.c_str() );
295                     err->AppendToDetailMsg( "\n" );
296 
297                     err->GreaterSeverity( SEVERITY_WARNING );
298                 } else {
299                     // No delimiter found. Recovery failed.
300                     errMsg << "Unable to recover from input error while "
301                            << "reading " << typeName << " value.\n";
302                     err->AppendToUserMsg( errMsg.str().c_str() );
303                     err->AppendToDetailMsg( errMsg.str().c_str() );
304 
305                     err->GreaterSeverity( SEVERITY_INPUT_ERROR );
306                 }
307             }
308         } else if( in.good() ) {
309             // Error. Have more input, but lack of delimiter list means we
310             // don't know where we can safely resume. Recovery is impossible.
311             err->GreaterSeverity( SEVERITY_WARNING );
312 
313             errMsg << "Invalid " << typeName << " value.\n";
314 
315             err->AppendToUserMsg( errMsg.str().c_str() );
316             err->AppendToDetailMsg( errMsg.str().c_str() );
317         }
318     }
319     return err->severity();
320 }
321