1 //
2 // Copyright (C) 1999-2005 Google, Inc.
3 //
4 
5 #include "strutil.h"
6 
7 #include <ctype.h>
8 #include <errno.h>
9 #include <float.h>          // for DBL_DIG and FLT_DIG
10 #include <math.h>           // for HUGE_VAL
11 #include <stdarg.h>
12 #include <stdlib.h>
13 #include <stdio.h>
14 #include <string.h>
15 #include <time.h>           // for FastTimeToBuffer()
16 
17 #include <algorithm>
18 using std::min;
19 using std::max;
20 using std::swap;
21 using std::reverse;
22 
23 #include "hash.h"
24 
25 #include <iterator>
26 #include <limits>
27 using std::numeric_limits;
28 
29 #include <set>
30 using std::set;
31 using std::multiset;
32 
33 #include <string>
34 using std::string;
35 
36 #include <vector>
37 using std::vector;
38 
39 
40 #include "base/logging.h"
41 #include "base/scoped_ptr.h"
42 #include "split.h"
43 
44 #ifdef OS_WINDOWS
45 #ifdef min  // windows.h defines this to something silly
46 #undef min
47 #endif
48 #endif
49 
50 // ----------------------------------------------------------------------
51 // FpToString()
52 // FloatToString()
53 // IntToString()
54 //    Convert various types to their string representation.  These
55 //    all do the obvious, trivial thing.
56 // ----------------------------------------------------------------------
57 
FpToString(Fprint fp)58 string FpToString(Fprint fp) {
59   char buf[17];
60   snprintf(buf, sizeof(buf), "%016llx", fp);
61   return string(buf);
62 }
63 
FloatToString(float f,const char * format)64 string FloatToString(float f, const char* format) {
65   char buf[80];
66   snprintf(buf, sizeof(buf), format, f);
67   return string(buf);
68 }
69 
IntToString(int i,const char * format)70 string IntToString(int i, const char* format) {
71   char buf[80];
72   snprintf(buf, sizeof(buf), format, i);
73   return string(buf);
74 }
75 
Int64ToString(int64 i64,const char * format)76 string Int64ToString(int64 i64, const char* format) {
77   char buf[80];
78   snprintf(buf, sizeof(buf), format, i64);
79   return string(buf);
80 }
81 
UInt64ToString(uint64 ui64,const char * format)82 string UInt64ToString(uint64 ui64, const char* format) {
83   char buf[80];
84   snprintf(buf, sizeof(buf), format, ui64);
85   return string(buf);
86 }
87 
88 // Default arguments
FloatToString(float f)89 string FloatToString(float f)   { return FloatToString(f, "%7f"); }
IntToString(int i)90 string IntToString(int i)       { return IntToString(i, "%7d"); }
Int64ToString(int64 i64)91 string Int64ToString(int64 i64) {
92   return Int64ToString(i64, "%7" GG_LL_FORMAT "d");
93 }
UInt64ToString(uint64 ui64)94 string UInt64ToString(uint64 ui64) {
95   return UInt64ToString(ui64, "%7" GG_LL_FORMAT "u");
96 }
97 
98 // ----------------------------------------------------------------------
99 // FastIntToBuffer()
100 // FastInt64ToBuffer()
101 // FastHexToBuffer()
102 // FastHex64ToBuffer()
103 // FastHex32ToBuffer()
104 // FastTimeToBuffer()
105 //    These are intended for speed.  FastHexToBuffer() assumes the
106 //    integer is non-negative.  FastHexToBuffer() puts output in
107 //    hex rather than decimal.  FastTimeToBuffer() puts the output
108 //    into RFC822 format.  If time is 0, uses the current time.
109 //
110 //    FastHex64ToBuffer() puts a 64-bit unsigned value in hex-format,
111 //    padded to exactly 16 bytes (plus one byte for '\0')
112 //
113 //    FastHex32ToBuffer() puts a 32-bit unsigned value in hex-format,
114 //    padded to exactly 8 bytes (plus one byte for '\0')
115 //
116 //       All functions take the output buffer as an arg.  FastInt()
117 //    uses at most 22 bytes, FastTime() uses exactly 30 bytes.
118 //    They all return a pointer to the beginning of the output,
119 //    which may not be the beginning of the input buffer.  (Though
120 //    for FastTimeToBuffer(), we guarantee that it is.)
121 // ----------------------------------------------------------------------
122 
FastInt64ToBuffer(int64 i,char * buffer)123 char *FastInt64ToBuffer(int64 i, char* buffer) {
124   FastInt64ToBufferLeft(i, buffer);
125   return buffer;
126 }
127 
128 // Sigh, also not actually defined here, copied from:
129 // https://github.com/splitfeed/android-market-api-php/blob/master/proto/protoc-gen-php/strutil.cc
130 
131 static const char two_ASCII_digits[100][2] = {
132   {'0','0'}, {'0','1'}, {'0','2'}, {'0','3'}, {'0','4'},
133   {'0','5'}, {'0','6'}, {'0','7'}, {'0','8'}, {'0','9'},
134   {'1','0'}, {'1','1'}, {'1','2'}, {'1','3'}, {'1','4'},
135   {'1','5'}, {'1','6'}, {'1','7'}, {'1','8'}, {'1','9'},
136   {'2','0'}, {'2','1'}, {'2','2'}, {'2','3'}, {'2','4'},
137   {'2','5'}, {'2','6'}, {'2','7'}, {'2','8'}, {'2','9'},
138   {'3','0'}, {'3','1'}, {'3','2'}, {'3','3'}, {'3','4'},
139   {'3','5'}, {'3','6'}, {'3','7'}, {'3','8'}, {'3','9'},
140   {'4','0'}, {'4','1'}, {'4','2'}, {'4','3'}, {'4','4'},
141   {'4','5'}, {'4','6'}, {'4','7'}, {'4','8'}, {'4','9'},
142   {'5','0'}, {'5','1'}, {'5','2'}, {'5','3'}, {'5','4'},
143   {'5','5'}, {'5','6'}, {'5','7'}, {'5','8'}, {'5','9'},
144   {'6','0'}, {'6','1'}, {'6','2'}, {'6','3'}, {'6','4'},
145   {'6','5'}, {'6','6'}, {'6','7'}, {'6','8'}, {'6','9'},
146   {'7','0'}, {'7','1'}, {'7','2'}, {'7','3'}, {'7','4'},
147   {'7','5'}, {'7','6'}, {'7','7'}, {'7','8'}, {'7','9'},
148   {'8','0'}, {'8','1'}, {'8','2'}, {'8','3'}, {'8','4'},
149   {'8','5'}, {'8','6'}, {'8','7'}, {'8','8'}, {'8','9'},
150   {'9','0'}, {'9','1'}, {'9','2'}, {'9','3'}, {'9','4'},
151   {'9','5'}, {'9','6'}, {'9','7'}, {'9','8'}, {'9','9'}
152 };
153 
FastUInt32ToBufferLeft(uint32 u,char * buffer)154 char* FastUInt32ToBufferLeft(uint32 u, char* buffer) {
155   int digits;
156   const char *ASCII_digits = NULL;
157   // The idea of this implementation is to trim the number of divides to as few
158   // as possible by using multiplication and subtraction rather than mod (%),
159   // and by outputting two digits at a time rather than one.
160   // The huge-number case is first, in the hopes that the compiler will output
161   // that case in one branch-free block of code, and only output conditional
162   // branches into it from below.
163   if (u >= 1000000000) {  // >= 1,000,000,000
164     digits = u / 100000000;  // 100,000,000
165     ASCII_digits = two_ASCII_digits[digits];
166     buffer[0] = ASCII_digits[0];
167     buffer[1] = ASCII_digits[1];
168     buffer += 2;
169 sublt100_000_000:
170     u -= digits * 100000000;  // 100,000,000
171 lt100_000_000:
172     digits = u / 1000000;  // 1,000,000
173     ASCII_digits = two_ASCII_digits[digits];
174     buffer[0] = ASCII_digits[0];
175     buffer[1] = ASCII_digits[1];
176     buffer += 2;
177 sublt1_000_000:
178     u -= digits * 1000000;  // 1,000,000
179 lt1_000_000:
180     digits = u / 10000;  // 10,000
181     ASCII_digits = two_ASCII_digits[digits];
182     buffer[0] = ASCII_digits[0];
183     buffer[1] = ASCII_digits[1];
184     buffer += 2;
185 sublt10_000:
186     u -= digits * 10000;  // 10,000
187 lt10_000:
188     digits = u / 100;
189     ASCII_digits = two_ASCII_digits[digits];
190     buffer[0] = ASCII_digits[0];
191     buffer[1] = ASCII_digits[1];
192     buffer += 2;
193 sublt100:
194     u -= digits * 100;
195 lt100:
196     digits = u;
197     ASCII_digits = two_ASCII_digits[digits];
198     buffer[0] = ASCII_digits[0];
199     buffer[1] = ASCII_digits[1];
200     buffer += 2;
201 done:
202     *buffer = 0;
203     return buffer;
204   }
205 
206   if (u < 100) {
207     digits = u;
208     if (u >= 10) goto lt100;
209     *buffer++ = '0' + digits;
210     goto done;
211   }
212   if (u  <  10000) {   // 10,000
213     if (u >= 1000) goto lt10_000;
214     digits = u / 100;
215     *buffer++ = '0' + digits;
216     goto sublt100;
217   }
218   if (u  <  1000000) {   // 1,000,000
219     if (u >= 100000) goto lt1_000_000;
220     digits = u / 10000;  //    10,000
221     *buffer++ = '0' + digits;
222     goto sublt10_000;
223   }
224   if (u  <  100000000) {   // 100,000,000
225     if (u >= 10000000) goto lt100_000_000;
226     digits = u / 1000000;  //   1,000,000
227     *buffer++ = '0' + digits;
228     goto sublt1_000_000;
229   }
230   // we already know that u < 1,000,000,000
231   digits = u / 100000000;   // 100,000,000
232   *buffer++ = '0' + digits;
233   goto sublt100_000_000;
234 }
FastUInt64ToBufferLeft(uint64 u64,char * buffer)235 char* FastUInt64ToBufferLeft(uint64 u64, char* buffer) {
236   int digits;
237   const char *ASCII_digits = NULL;
238 
239   uint32 u = static_cast<uint32>(u64);
240   if (u == u64) return FastUInt32ToBufferLeft(u, buffer);
241 
242   uint64 top_11_digits = u64 / 1000000000;
243   buffer = FastUInt64ToBufferLeft(top_11_digits, buffer);
244   u = u64 - (top_11_digits * 1000000000);
245 
246   digits = u / 10000000;  // 10,000,000
247   DCHECK_LT(digits, 100);
248   ASCII_digits = two_ASCII_digits[digits];
249   buffer[0] = ASCII_digits[0];
250   buffer[1] = ASCII_digits[1];
251   buffer += 2;
252   u -= digits * 10000000;  // 10,000,000
253   digits = u / 100000;  // 100,000
254   ASCII_digits = two_ASCII_digits[digits];
255   buffer[0] = ASCII_digits[0];
256   buffer[1] = ASCII_digits[1];
257   buffer += 2;
258   u -= digits * 100000;  // 100,000
259   digits = u / 1000;  // 1,000
260   ASCII_digits = two_ASCII_digits[digits];
261   buffer[0] = ASCII_digits[0];
262   buffer[1] = ASCII_digits[1];
263   buffer += 2;
264   u -= digits * 1000;  // 1,000
265   digits = u / 10;
266   ASCII_digits = two_ASCII_digits[digits];
267   buffer[0] = ASCII_digits[0];
268   buffer[1] = ASCII_digits[1];
269   buffer += 2;
270   u -= digits * 10;
271   digits = u;
272   *buffer++ = '0' + digits;
273   *buffer = 0;
274   return buffer;
275 }
276 
FastInt64ToBufferLeft(int64 i,char * buffer)277 char* FastInt64ToBufferLeft(int64 i, char* buffer) {
278   uint64 u = i;
279   if (i < 0) {
280     *buffer++ = '-';
281     u = -i;
282   }
283   return FastUInt64ToBufferLeft(u, buffer);
284 }
285 
286 // Offset into buffer where FastInt32ToBuffer places the end of string
287 // null character.  Also used by FastInt32ToBufferLeft
288 static const int kFastInt32ToBufferOffset = 11;
289 
290 // This used to call out to FastInt32ToBufferLeft but that wasn't defined.
291 // Copied from http://gears.googlecode.com/svn-history/r395/trunk/gears/base/common/string16.cc
FastInt32ToBuffer(int32 i,char * buffer)292 char *FastInt32ToBuffer(int32 i, char* buffer) {
293   // We could collapse the positive and negative sections, but that
294   // would be slightly slower for positive numbers...
295   // 12 bytes is enough to store -2**32, -4294967296.
296   char* p = buffer + kFastInt32ToBufferOffset;
297   *p-- = '\0';
298   if (i >= 0) {
299     do {
300       *p-- = '0' + i % 10;
301       i /= 10;
302     } while (i > 0);
303     return p + 1;
304   } else {
305     // On different platforms, % and / have different behaviors for
306     // negative numbers, so we need to jump through hoops to make sure
307     // we don't divide negative numbers.
308     if (i > -10) {
309       i = -i;
310       *p-- = '0' + i;
311       *p = '-';
312       return p;
313     } else {
314       // Make sure we aren't at MIN_INT, in which case we can't say i = -i
315       i = i + 10;
316       i = -i;
317       *p-- = '0' + i % 10;
318       // Undo what we did a moment ago
319       i = i / 10 + 1;
320       do {
321         *p-- = '0' + i % 10;
322         i /= 10;
323       } while (i > 0);
324       *p = '-';
325       return p;
326     }
327   }
328 }
329 
FastHexToBuffer(int i,char * buffer)330 char *FastHexToBuffer(int i, char* buffer) {
331   CHECK(i >= 0) << "FastHexToBuffer() wants non-negative integers, not " << i;
332 
333   static const char *hexdigits = "0123456789abcdef";
334   char *p = buffer + 21;
335   *p-- = '\0';
336   do {
337     *p-- = hexdigits[i & 15];   // mod by 16
338     i >>= 4;                    // divide by 16
339   } while (i > 0);
340   return p + 1;
341 }
342 
InternalFastHexToBuffer(uint64 value,char * buffer,int num_byte)343 char *InternalFastHexToBuffer(uint64 value, char* buffer, int num_byte) {
344   static const char *hexdigits = "0123456789abcdef";
345   buffer[num_byte] = '\0';
346   for (int i = num_byte - 1; i >= 0; i--) {
347     buffer[i] = hexdigits[uint32(value) & 0xf];
348     value >>= 4;
349   }
350   return buffer;
351 }
352 
FastHex64ToBuffer(uint64 value,char * buffer)353 char *FastHex64ToBuffer(uint64 value, char* buffer) {
354   return InternalFastHexToBuffer(value, buffer, 16);
355 }
356 
FastHex32ToBuffer(uint32 value,char * buffer)357 char *FastHex32ToBuffer(uint32 value, char* buffer) {
358   return InternalFastHexToBuffer(value, buffer, 8);
359 }
360 
PutTwoDigits(int i,char * p)361 static inline void PutTwoDigits(int i, char* p) {
362   DCHECK_GE(i, 0);
363   DCHECK_LT(i, 100);
364   p[0] = two_ASCII_digits[i][0];
365   p[1] = two_ASCII_digits[i][1];
366 }
367 
368 #if 0
369 char* FastTimeToBuffer(time_t s, char* buffer) {
370   if (s == 0) {
371     time(&s);
372   }
373 
374   struct tm tm;
375   if (gmtime_r(&s, &tm) == NULL) {
376     // Error message must fit in 30-char buffer.
377     memcpy(buffer, "Invalid:", sizeof("Invalid:"));
378     FastInt64ToBufferLeft(s, buffer+strlen(buffer));
379     return buffer;
380   }
381 
382   // strftime format: "%a, %d %b %Y %H:%M:%S GMT",
383   // but strftime does locale stuff which we do not want
384   // plus strftime takes > 10x the time of hard code
385 
386   const char* weekday_name = "Xxx";
387   switch (tm.tm_wday) {
388     default: { DCHECK(false); } break;
389     case 0:  weekday_name = "Sun"; break;
390     case 1:  weekday_name = "Mon"; break;
391     case 2:  weekday_name = "Tue"; break;
392     case 3:  weekday_name = "Wed"; break;
393     case 4:  weekday_name = "Thu"; break;
394     case 5:  weekday_name = "Fri"; break;
395     case 6:  weekday_name = "Sat"; break;
396   }
397 
398   const char* month_name = "Xxx";
399   switch (tm.tm_mon) {
400     default:  { DCHECK(false); } break;
401     case 0:   month_name = "Jan"; break;
402     case 1:   month_name = "Feb"; break;
403     case 2:   month_name = "Mar"; break;
404     case 3:   month_name = "Apr"; break;
405     case 4:   month_name = "May"; break;
406     case 5:   month_name = "Jun"; break;
407     case 6:   month_name = "Jul"; break;
408     case 7:   month_name = "Aug"; break;
409     case 8:   month_name = "Sep"; break;
410     case 9:   month_name = "Oct"; break;
411     case 10:  month_name = "Nov"; break;
412     case 11:  month_name = "Dec"; break;
413   }
414 
415   // Write out the buffer.
416 
417   memcpy(buffer+0, weekday_name, 3);
418   buffer[3] = ',';
419   buffer[4] = ' ';
420 
421   PutTwoDigits(tm.tm_mday, buffer+5);
422   buffer[7] = ' ';
423 
424   memcpy(buffer+8, month_name, 3);
425   buffer[11] = ' ';
426 
427   int32 year = tm.tm_year + 1900;
428   PutTwoDigits(year/100, buffer+12);
429   PutTwoDigits(year%100, buffer+14);
430   buffer[16] = ' ';
431 
432   PutTwoDigits(tm.tm_hour, buffer+17);
433   buffer[19] = ':';
434 
435   PutTwoDigits(tm.tm_min, buffer+20);
436   buffer[22] = ':';
437 
438   PutTwoDigits(tm.tm_sec, buffer+23);
439 
440   // includes ending NUL
441   memcpy(buffer+25, " GMT", 5);
442 
443   return buffer;
444 }
445 #endif
446 
447 // ----------------------------------------------------------------------
448 // ParseLeadingUInt64Value
449 // ParseLeadingInt64Value
450 // ParseLeadingHex64Value
451 //    A simple parser for long long values. Returns the parsed value if a
452 //    valid integer is found; else returns deflt
453 //    UInt64 and Int64 cannot handle decimal numbers with leading 0s.
454 // --------------------------------------------------------------------
ParseLeadingUInt64Value(const char * str,uint64 deflt)455 uint64 ParseLeadingUInt64Value(const char *str, uint64 deflt) {
456   char *error = NULL;
457   const uint64 value = strtoull(str, &error, 0);
458   return (error == str) ? deflt : value;
459 }
460 
ParseLeadingInt64Value(const char * str,int64 deflt)461 int64 ParseLeadingInt64Value(const char *str, int64 deflt) {
462   char *error = NULL;
463   const int64 value = strtoll(str, &error, 0);
464   return (error == str) ? deflt : value;
465 }
466 
ParseLeadingHex64Value(const char * str,uint64 deflt)467 uint64 ParseLeadingHex64Value(const char *str, uint64 deflt) {
468   char *error = NULL;
469   const uint64 value = strtoull(str, &error, 16);
470   return (error == str) ? deflt : value;
471 }
472 
473 // ----------------------------------------------------------------------
474 // ParseLeadingDec64Value
475 // ParseLeadingUDec64Value
476 //    A simple parser for [u]int64 values. Returns the parsed value
477 //    if a valid value is found; else returns deflt
478 //    The string passed in is treated as *10 based*.
479 //    This can handle strings with leading 0s.
480 // --------------------------------------------------------------------
481 
ParseLeadingDec64Value(const char * str,int64 deflt)482 int64 ParseLeadingDec64Value(const char *str, int64 deflt) {
483   char *error = NULL;
484   const int64 value = strtoll(str, &error, 10);
485   return (error == str) ? deflt : value;
486 }
487 
ParseLeadingUDec64Value(const char * str,uint64 deflt)488 uint64 ParseLeadingUDec64Value(const char *str, uint64 deflt) {
489   char *error = NULL;
490   const uint64 value = strtoull(str, &error, 10);
491   return (error == str) ? deflt : value;
492 }
493 
DictionaryParse(const string & encoded_str,vector<pair<string,string>> * items)494 bool DictionaryParse(const string& encoded_str,
495                       vector<pair<string, string> >* items) {
496   vector<string> entries;
497   SplitStringUsing(encoded_str, ",", &entries);
498   for (size_t i = 0; i < entries.size(); ++i) {
499     vector<string> fields;
500     SplitStringAllowEmpty(entries[i], ":", &fields);
501     if (fields.size() != 2) // parsing error
502       return false;
503     items->push_back(make_pair(fields[0], fields[1]));
504   }
505   return true;
506 }
507