1 //========= Copyright Valve Corporation ============//
2 #include "strtools_public.h"
3 #include <string.h>
4 #include <stdio.h>
5 #include <stdlib.h>
6 
7 //-----------------------------------------------------------------------------
8 // Purpose:
9 //-----------------------------------------------------------------------------
StringHasPrefix(const std::string & sString,const std::string & sPrefix)10 bool StringHasPrefix( const std::string & sString, const std::string & sPrefix )
11 {
12 	return 0 == strnicmp( sString.c_str(), sPrefix.c_str(), sPrefix.length() );
13 }
14 
StringHasPrefixCaseSensitive(const std::string & sString,const std::string & sPrefix)15 bool StringHasPrefixCaseSensitive( const std::string & sString, const std::string & sPrefix )
16 {
17 	return 0 == strncmp( sString.c_str(), sPrefix.c_str(), sPrefix.length() );
18 }
19 
20 
StringHasSuffix(const std::string & sString,const std::string & sSuffix)21 bool StringHasSuffix( const std::string &sString, const std::string &sSuffix )
22 {
23 	size_t cStrLen = sString.length();
24 	size_t cSuffixLen = sSuffix.length();
25 
26 	if ( cSuffixLen > cStrLen )
27 		return false;
28 
29 	std::string sStringSuffix = sString.substr( cStrLen - cSuffixLen, cSuffixLen );
30 
31 	return 0 == stricmp( sStringSuffix.c_str(), sSuffix.c_str() );
32 }
33 
StringHasSuffixCaseSensitive(const std::string & sString,const std::string & sSuffix)34 bool StringHasSuffixCaseSensitive( const std::string &sString, const std::string &sSuffix )
35 {
36 	size_t cStrLen = sString.length();
37 	size_t cSuffixLen = sSuffix.length();
38 
39 	if ( cSuffixLen > cStrLen )
40 		return false;
41 
42 	std::string sStringSuffix = sString.substr( cStrLen - cSuffixLen, cSuffixLen );
43 
44 	return 0 == strncmp( sStringSuffix.c_str(), sSuffix.c_str(),cSuffixLen );
45 }
46 
47 //-----------------------------------------------------------------------------
48 // Purpose:
49 //-----------------------------------------------------------------------------
UTF16to8(const wchar_t * in)50 std::string UTF16to8(const wchar_t * in)
51 {
52 	std::string out;
53 	unsigned int codepoint = 0;
54 	for ( ; in && *in != 0; ++in )
55 	{
56 		if (*in >= 0xd800 && *in <= 0xdbff)
57 			codepoint = ((*in - 0xd800) << 10) + 0x10000;
58 		else
59 		{
60 			if (*in >= 0xdc00 && *in <= 0xdfff)
61 				codepoint |= *in - 0xdc00;
62 			else
63 				codepoint = *in;
64 
65 			if (codepoint <= 0x7f)
66 				out.append(1, static_cast<char>(codepoint));
67 			else if (codepoint <= 0x7ff)
68 			{
69 				out.append(1, static_cast<char>(0xc0 | ((codepoint >> 6) & 0x1f)));
70 				out.append(1, static_cast<char>(0x80 | (codepoint & 0x3f)));
71 			}
72 			else if (codepoint <= 0xffff)
73 			{
74 				out.append(1, static_cast<char>(0xe0 | ((codepoint >> 12) & 0x0f)));
75 				out.append(1, static_cast<char>(0x80 | ((codepoint >> 6) & 0x3f)));
76 				out.append(1, static_cast<char>(0x80 | (codepoint & 0x3f)));
77 			}
78 			else
79 			{
80 				out.append(1, static_cast<char>(0xf0 | ((codepoint >> 18) & 0x07)));
81 				out.append(1, static_cast<char>(0x80 | ((codepoint >> 12) & 0x3f)));
82 				out.append(1, static_cast<char>(0x80 | ((codepoint >> 6) & 0x3f)));
83 				out.append(1, static_cast<char>(0x80 | (codepoint & 0x3f)));
84 			}
85 			codepoint = 0;
86 		}
87 	}
88 	return out;
89 }
90 
UTF8to16(const char * in)91 std::wstring UTF8to16(const char * in)
92 {
93 	std::wstring out;
94 	unsigned int codepoint = 0;
95 	int following = 0;
96 	for ( ; in && *in != 0; ++in )
97 	{
98 		unsigned char ch = *in;
99 		if (ch <= 0x7f)
100 		{
101 			codepoint = ch;
102 			following = 0;
103 		}
104 		else if (ch <= 0xbf)
105 		{
106 			if (following > 0)
107 			{
108 				codepoint = (codepoint << 6) | (ch & 0x3f);
109 				--following;
110 			}
111 		}
112 		else if (ch <= 0xdf)
113 		{
114 			codepoint = ch & 0x1f;
115 			following = 1;
116 		}
117 		else if (ch <= 0xef)
118 		{
119 			codepoint = ch & 0x0f;
120 			following = 2;
121 		}
122 		else
123 		{
124 			codepoint = ch & 0x07;
125 			following = 3;
126 		}
127 		if (following == 0)
128 		{
129 			if (codepoint > 0xffff)
130 			{
131 				out.append(1, static_cast<wchar_t>(0xd800 + (codepoint >> 10)));
132 				out.append(1, static_cast<wchar_t>(0xdc00 + (codepoint & 0x03ff)));
133 			}
134 			else
135 				out.append(1, static_cast<wchar_t>(codepoint));
136 			codepoint = 0;
137 		}
138 	}
139 	return out;
140 }
141 
142 
strcpy_safe(char * pchBuffer,size_t unBufferSizeBytes,const char * pchSource)143 void strcpy_safe( char *pchBuffer, size_t unBufferSizeBytes, const char *pchSource )
144 {
145 	pchBuffer[ unBufferSizeBytes - 1 ] = '\0';
146 	strncpy( pchBuffer, pchSource, unBufferSizeBytes - 1 );
147 }
148 
149 
150 // --------------------------------------------------------------------
151 // Purpose: converts a string to upper case
152 // --------------------------------------------------------------------
StringToUpper(const std::string & sString)153 std::string StringToUpper( const std::string & sString )
154 {
155 	std::string sOut;
156 	sOut.reserve( sString.size() + 1 );
157 	for( std::string::const_iterator i = sString.begin(); i != sString.end(); i++ )
158 	{
159 		sOut.push_back( (char)toupper( *i ) );
160 	}
161 
162 	return sOut;
163 }
164 
165 
166 // --------------------------------------------------------------------
167 // Purpose: converts a string to lower case
168 // --------------------------------------------------------------------
StringToLower(const std::string & sString)169 std::string StringToLower( const std::string & sString )
170 {
171 	std::string sOut;
172 	sOut.reserve( sString.size() + 1 );
173 	for( std::string::const_iterator i = sString.begin(); i != sString.end(); i++ )
174 	{
175 		sOut.push_back( (char)tolower( *i ) );
176 	}
177 
178 	return sOut;
179 }
180 
181 
ReturnStdString(const std::string & sValue,char * pchBuffer,uint32_t unBufferLen)182 uint32_t ReturnStdString( const std::string & sValue, char *pchBuffer, uint32_t unBufferLen )
183 {
184 	uint32_t unLen = (uint32_t)sValue.length() + 1;
185 	if( !pchBuffer || !unBufferLen )
186 		return unLen;
187 
188 	if( unBufferLen < unLen )
189 	{
190 		pchBuffer[0] = '\0';
191 	}
192 	else
193 	{
194 		memcpy( pchBuffer, sValue.c_str(), unLen );
195 	}
196 
197 	return unLen;
198 }
199 
BufferToStdString(std::string & sDest,const char * pchBuffer,uint32_t unBufferLen)200 void BufferToStdString( std::string & sDest, const char *pchBuffer, uint32_t unBufferLen )
201 {
202 	sDest.resize( unBufferLen + 1 );
203 	memcpy( const_cast< char* >( sDest.c_str() ), pchBuffer, unBufferLen );
204 	const_cast< char* >( sDest.c_str() )[ unBufferLen ] = '\0';
205 }
206 
207 // Commented out by Mozilla, please see README.mozilla
208 /** Returns a std::string from a uint64_t */
209 /*
210 std::string Uint64ToString( uint64_t ulValue )
211 {
212 	char buf[ 22 ];
213 #if defined( _WIN32 )
214 	sprintf_s( buf, "%llu", ulValue );
215 #else
216     snprintf( buf, sizeof( buf ), "%llu", (long long unsigned int ) ulValue );
217 #endif
218 	return buf;
219 }
220 */
221 
222 /** returns a uint64_t from a string */
StringToUint64(const std::string & sValue)223 uint64_t StringToUint64( const std::string & sValue )
224 {
225 	return strtoull( sValue.c_str(), NULL, 0 );
226 }
227 
228 //-----------------------------------------------------------------------------
229 // Purpose: Helper for converting a numeric value to a hex digit, value should be 0-15.
230 //-----------------------------------------------------------------------------
cIntToHexDigit(int nValue)231 char cIntToHexDigit( int nValue )
232 {
233 	//Assert( nValue >= 0 && nValue <= 15 );
234 	return "0123456789ABCDEF"[ nValue & 15 ];
235 }
236 
237 //-----------------------------------------------------------------------------
238 // Purpose: Helper for converting a hex char value to numeric, return -1 if the char
239 //          is not a valid hex digit.
240 //-----------------------------------------------------------------------------
iHexCharToInt(char cValue)241 int iHexCharToInt( char cValue )
242 {
243 	int32_t iValue = cValue;
244 	if ( (uint32_t)( iValue - '0' ) < 10 )
245 		return iValue - '0';
246 
247 	iValue |= 0x20;
248 	if ( (uint32_t)( iValue - 'a' ) < 6 )
249 		return iValue - 'a' + 10;
250 
251 	return -1;
252 }
253 
254 //-----------------------------------------------------------------------------
255 // Purpose: Internal implementation of encode, works in the strict RFC manner, or
256 //          with spaces turned to + like HTML form encoding.
257 //-----------------------------------------------------------------------------
V_URLEncodeInternal(char * pchDest,int nDestLen,const char * pchSource,int nSourceLen,bool bUsePlusForSpace)258 void V_URLEncodeInternal( char *pchDest, int nDestLen, const char *pchSource, int nSourceLen, bool bUsePlusForSpace )
259 {
260 	//AssertMsg( nDestLen > 3*nSourceLen, "Target buffer for V_URLEncode should be 3x source length, plus one for terminating null\n" );
261 
262 	int iDestPos = 0;
263 	for ( int i=0; i < nSourceLen; ++i )
264 	{
265 		// worst case we need 3 additional chars
266 		if( (iDestPos+3) > nDestLen  )
267 		{
268 			pchDest[0] = '\0';
269 //			AssertMsg( false, "Target buffer too short\n" );
270 			return;
271 		}
272 
273 		// We allow only a-z, A-Z, 0-9, period, underscore, and hyphen to pass through unescaped.
274 		// These are the characters allowed by both the original RFC 1738 and the latest RFC 3986.
275 		// Current specs also allow '~', but that is forbidden under original RFC 1738.
276 		if ( !( pchSource[i] >= 'a' && pchSource[i] <= 'z' ) && !( pchSource[i] >= 'A' && pchSource[i] <= 'Z' ) && !(pchSource[i] >= '0' && pchSource[i] <= '9' )
277 			 && pchSource[i] != '-' && pchSource[i] != '_' && pchSource[i] != '.'
278 		)
279 		{
280 			if ( bUsePlusForSpace && pchSource[i] == ' ' )
281 			{
282 				pchDest[iDestPos++] = '+';
283 			}
284 			else
285 			{
286 				pchDest[iDestPos++] = '%';
287 				uint8_t iValue = pchSource[i];
288 				if ( iValue == 0 )
289 				{
290 					pchDest[iDestPos++] = '0';
291 					pchDest[iDestPos++] = '0';
292 				}
293 				else
294 				{
295 					char cHexDigit1 = cIntToHexDigit( iValue % 16 );
296 					iValue /= 16;
297 					char cHexDigit2 = cIntToHexDigit( iValue );
298 					pchDest[iDestPos++] = cHexDigit2;
299 					pchDest[iDestPos++] = cHexDigit1;
300 				}
301 			}
302 		}
303 		else
304 		{
305 			pchDest[iDestPos++] = pchSource[i];
306 		}
307 	}
308 
309 	if( (iDestPos+1) > nDestLen )
310 	{
311 		pchDest[0] = '\0';
312 		//AssertMsg( false, "Target buffer too short to terminate\n" );
313 		return;
314 	}
315 
316 	// Null terminate
317 	pchDest[iDestPos++] = 0;
318 }
319 
320 
321 //-----------------------------------------------------------------------------
322 // Purpose: Internal implementation of decode, works in the strict RFC manner, or
323 //          with spaces turned to + like HTML form encoding.
324 //
325 //			Returns the amount of space used in the output buffer.
326 //-----------------------------------------------------------------------------
V_URLDecodeInternal(char * pchDecodeDest,int nDecodeDestLen,const char * pchEncodedSource,int nEncodedSourceLen,bool bUsePlusForSpace)327 size_t V_URLDecodeInternal( char *pchDecodeDest, int nDecodeDestLen, const char *pchEncodedSource, int nEncodedSourceLen, bool bUsePlusForSpace )
328 {
329 	if ( nDecodeDestLen < nEncodedSourceLen )
330 	{
331 		//AssertMsg( false, "V_URLDecode needs a dest buffer at least as large as the source" );
332 		return 0;
333 	}
334 
335 	int iDestPos = 0;
336 	for( int i=0; i < nEncodedSourceLen; ++i )
337 	{
338 		if ( bUsePlusForSpace && pchEncodedSource[i] == '+' )
339 		{
340 			pchDecodeDest[ iDestPos++ ] = ' ';
341 		}
342 		else if ( pchEncodedSource[i] == '%' )
343 		{
344 			// Percent signifies an encoded value, look ahead for the hex code, convert to numeric, and use that
345 
346 			// First make sure we have 2 more chars
347 			if ( i < nEncodedSourceLen - 2 )
348 			{
349 				char cHexDigit1 = pchEncodedSource[i+1];
350 				char cHexDigit2 = pchEncodedSource[i+2];
351 
352 				// Turn the chars into a hex value, if they are not valid, then we'll
353 				// just place the % and the following two chars direct into the string,
354 				// even though this really shouldn't happen, who knows what bad clients
355 				// may do with encoding.
356 				bool bValid = false;
357 				int iValue = iHexCharToInt( cHexDigit1 );
358 				if ( iValue != -1 )
359 				{
360 					iValue *= 16;
361 					int iValue2 = iHexCharToInt( cHexDigit2 );
362 					if ( iValue2 != -1 )
363 					{
364 						iValue += iValue2;
365 						pchDecodeDest[ iDestPos++ ] = (char)iValue;
366 						bValid = true;
367 					}
368 				}
369 
370 				if ( !bValid )
371 				{
372 					pchDecodeDest[ iDestPos++ ] = '%';
373 					pchDecodeDest[ iDestPos++ ] = cHexDigit1;
374 					pchDecodeDest[ iDestPos++ ] = cHexDigit2;
375 				}
376 			}
377 
378 			// Skip ahead
379 			i += 2;
380 		}
381 		else
382 		{
383 			pchDecodeDest[ iDestPos++ ] = pchEncodedSource[i];
384 		}
385 	}
386 
387 	// We may not have extra room to NULL terminate, since this can be used on raw data, but if we do
388 	// go ahead and do it as this can avoid bugs.
389 	if ( iDestPos < nDecodeDestLen )
390 	{
391 		pchDecodeDest[iDestPos] = 0;
392 	}
393 
394 	return (size_t)iDestPos;
395 }
396 
397 //-----------------------------------------------------------------------------
398 // Purpose: Encodes a string (or binary data) from URL encoding format, see rfc1738 section 2.2.
399 //          This version of the call isn't a strict RFC implementation, but uses + for space as is
400 //          the standard in HTML form encoding, despite it not being part of the RFC.
401 //
402 //          Dest buffer should be at least as large as source buffer to guarantee room for decode.
403 //-----------------------------------------------------------------------------
V_URLEncode(char * pchDest,int nDestLen,const char * pchSource,int nSourceLen)404 void V_URLEncode( char *pchDest, int nDestLen, const char *pchSource, int nSourceLen )
405 {
406 	return V_URLEncodeInternal( pchDest, nDestLen, pchSource, nSourceLen, true );
407 }
408 
409 
410 //-----------------------------------------------------------------------------
411 // Purpose: Decodes a string (or binary data) from URL encoding format, see rfc1738 section 2.2.
412 //          This version of the call isn't a strict RFC implementation, but uses + for space as is
413 //          the standard in HTML form encoding, despite it not being part of the RFC.
414 //
415 //          Dest buffer should be at least as large as source buffer to guarantee room for decode.
416 //			Dest buffer being the same as the source buffer (decode in-place) is explicitly allowed.
417 //-----------------------------------------------------------------------------
V_URLDecode(char * pchDecodeDest,int nDecodeDestLen,const char * pchEncodedSource,int nEncodedSourceLen)418 size_t V_URLDecode( char *pchDecodeDest, int nDecodeDestLen, const char *pchEncodedSource, int nEncodedSourceLen )
419 {
420 	return V_URLDecodeInternal( pchDecodeDest, nDecodeDestLen, pchEncodedSource, nEncodedSourceLen, true );
421 }
422 
423 //-----------------------------------------------------------------------------
V_StripExtension(std::string & in)424 void V_StripExtension( std::string &in )
425 {
426 	// Find the last dot. If it's followed by a dot or a slash, then it's part of a
427 	// directory specifier like ../../somedir/./blah.
428 	std::string::size_type test = in.rfind( '.' );
429 	if ( test != std::string::npos )
430 	{
431 		// This handles things like ".\blah" or "c:\my@email.com\abc\def\geh"
432 		// Which would otherwise wind up with "" and "c:\my@email", respectively.
433 		if ( in.rfind( '\\' ) < test && in.rfind( '/' ) < test )
434 		{
435 			in.resize( test );
436 		}
437 	}
438 }
439 
440