1 //
2 // Copyright (C) 1999-2005 Google, Inc.
3 //
4
5 #include "strutil.h"
6
7 #include <ctype.h>
8 #include <errno.h>
9 #include <float.h> // for DBL_DIG and FLT_DIG
10 #include <math.h> // for HUGE_VAL
11 #include <stdarg.h>
12 #include <stdlib.h>
13 #include <stdio.h>
14 #include <string.h>
15 #include <time.h> // for FastTimeToBuffer()
16
17 #include <algorithm>
18 using std::min;
19 using std::max;
20 using std::swap;
21 using std::reverse;
22
23 #include "hash.h"
24
25 #include <iterator>
26 #include <limits>
27 using std::numeric_limits;
28
29 #include <set>
30 using std::set;
31 using std::multiset;
32
33 #include <string>
34 using std::string;
35
36 #include <vector>
37 using std::vector;
38
39
40 #include "base/logging.h"
41 #include "base/scoped_ptr.h"
42 #include "split.h"
43
44 #ifdef OS_WINDOWS
45 #ifdef min // windows.h defines this to something silly
46 #undef min
47 #endif
48 #endif
49
50 // ----------------------------------------------------------------------
51 // FpToString()
52 // FloatToString()
53 // IntToString()
54 // Convert various types to their string representation. These
55 // all do the obvious, trivial thing.
56 // ----------------------------------------------------------------------
57
FpToString(Fprint fp)58 string FpToString(Fprint fp) {
59 char buf[17];
60 snprintf(buf, sizeof(buf), "%016llx", fp);
61 return string(buf);
62 }
63
FloatToString(float f,const char * format)64 string FloatToString(float f, const char* format) {
65 char buf[80];
66 snprintf(buf, sizeof(buf), format, f);
67 return string(buf);
68 }
69
IntToString(int i,const char * format)70 string IntToString(int i, const char* format) {
71 char buf[80];
72 snprintf(buf, sizeof(buf), format, i);
73 return string(buf);
74 }
75
Int64ToString(int64 i64,const char * format)76 string Int64ToString(int64 i64, const char* format) {
77 char buf[80];
78 snprintf(buf, sizeof(buf), format, i64);
79 return string(buf);
80 }
81
UInt64ToString(uint64 ui64,const char * format)82 string UInt64ToString(uint64 ui64, const char* format) {
83 char buf[80];
84 snprintf(buf, sizeof(buf), format, ui64);
85 return string(buf);
86 }
87
88 // Default arguments
FloatToString(float f)89 string FloatToString(float f) { return FloatToString(f, "%7f"); }
IntToString(int i)90 string IntToString(int i) { return IntToString(i, "%7d"); }
Int64ToString(int64 i64)91 string Int64ToString(int64 i64) {
92 return Int64ToString(i64, "%7" GG_LL_FORMAT "d");
93 }
UInt64ToString(uint64 ui64)94 string UInt64ToString(uint64 ui64) {
95 return UInt64ToString(ui64, "%7" GG_LL_FORMAT "u");
96 }
97
98 // ----------------------------------------------------------------------
99 // FastIntToBuffer()
100 // FastInt64ToBuffer()
101 // FastHexToBuffer()
102 // FastHex64ToBuffer()
103 // FastHex32ToBuffer()
104 // FastTimeToBuffer()
105 // These are intended for speed. FastHexToBuffer() assumes the
106 // integer is non-negative. FastHexToBuffer() puts output in
107 // hex rather than decimal. FastTimeToBuffer() puts the output
108 // into RFC822 format. If time is 0, uses the current time.
109 //
110 // FastHex64ToBuffer() puts a 64-bit unsigned value in hex-format,
111 // padded to exactly 16 bytes (plus one byte for '\0')
112 //
113 // FastHex32ToBuffer() puts a 32-bit unsigned value in hex-format,
114 // padded to exactly 8 bytes (plus one byte for '\0')
115 //
116 // All functions take the output buffer as an arg. FastInt()
117 // uses at most 22 bytes, FastTime() uses exactly 30 bytes.
118 // They all return a pointer to the beginning of the output,
119 // which may not be the beginning of the input buffer. (Though
120 // for FastTimeToBuffer(), we guarantee that it is.)
121 // ----------------------------------------------------------------------
122
FastInt64ToBuffer(int64 i,char * buffer)123 char *FastInt64ToBuffer(int64 i, char* buffer) {
124 FastInt64ToBufferLeft(i, buffer);
125 return buffer;
126 }
127
128 // Sigh, also not actually defined here, copied from:
129 // https://github.com/splitfeed/android-market-api-php/blob/master/proto/protoc-gen-php/strutil.cc
130
131 static const char two_ASCII_digits[100][2] = {
132 {'0','0'}, {'0','1'}, {'0','2'}, {'0','3'}, {'0','4'},
133 {'0','5'}, {'0','6'}, {'0','7'}, {'0','8'}, {'0','9'},
134 {'1','0'}, {'1','1'}, {'1','2'}, {'1','3'}, {'1','4'},
135 {'1','5'}, {'1','6'}, {'1','7'}, {'1','8'}, {'1','9'},
136 {'2','0'}, {'2','1'}, {'2','2'}, {'2','3'}, {'2','4'},
137 {'2','5'}, {'2','6'}, {'2','7'}, {'2','8'}, {'2','9'},
138 {'3','0'}, {'3','1'}, {'3','2'}, {'3','3'}, {'3','4'},
139 {'3','5'}, {'3','6'}, {'3','7'}, {'3','8'}, {'3','9'},
140 {'4','0'}, {'4','1'}, {'4','2'}, {'4','3'}, {'4','4'},
141 {'4','5'}, {'4','6'}, {'4','7'}, {'4','8'}, {'4','9'},
142 {'5','0'}, {'5','1'}, {'5','2'}, {'5','3'}, {'5','4'},
143 {'5','5'}, {'5','6'}, {'5','7'}, {'5','8'}, {'5','9'},
144 {'6','0'}, {'6','1'}, {'6','2'}, {'6','3'}, {'6','4'},
145 {'6','5'}, {'6','6'}, {'6','7'}, {'6','8'}, {'6','9'},
146 {'7','0'}, {'7','1'}, {'7','2'}, {'7','3'}, {'7','4'},
147 {'7','5'}, {'7','6'}, {'7','7'}, {'7','8'}, {'7','9'},
148 {'8','0'}, {'8','1'}, {'8','2'}, {'8','3'}, {'8','4'},
149 {'8','5'}, {'8','6'}, {'8','7'}, {'8','8'}, {'8','9'},
150 {'9','0'}, {'9','1'}, {'9','2'}, {'9','3'}, {'9','4'},
151 {'9','5'}, {'9','6'}, {'9','7'}, {'9','8'}, {'9','9'}
152 };
153
FastUInt32ToBufferLeft(uint32 u,char * buffer)154 char* FastUInt32ToBufferLeft(uint32 u, char* buffer) {
155 int digits;
156 const char *ASCII_digits = NULL;
157 // The idea of this implementation is to trim the number of divides to as few
158 // as possible by using multiplication and subtraction rather than mod (%),
159 // and by outputting two digits at a time rather than one.
160 // The huge-number case is first, in the hopes that the compiler will output
161 // that case in one branch-free block of code, and only output conditional
162 // branches into it from below.
163 if (u >= 1000000000) { // >= 1,000,000,000
164 digits = u / 100000000; // 100,000,000
165 ASCII_digits = two_ASCII_digits[digits];
166 buffer[0] = ASCII_digits[0];
167 buffer[1] = ASCII_digits[1];
168 buffer += 2;
169 sublt100_000_000:
170 u -= digits * 100000000; // 100,000,000
171 lt100_000_000:
172 digits = u / 1000000; // 1,000,000
173 ASCII_digits = two_ASCII_digits[digits];
174 buffer[0] = ASCII_digits[0];
175 buffer[1] = ASCII_digits[1];
176 buffer += 2;
177 sublt1_000_000:
178 u -= digits * 1000000; // 1,000,000
179 lt1_000_000:
180 digits = u / 10000; // 10,000
181 ASCII_digits = two_ASCII_digits[digits];
182 buffer[0] = ASCII_digits[0];
183 buffer[1] = ASCII_digits[1];
184 buffer += 2;
185 sublt10_000:
186 u -= digits * 10000; // 10,000
187 lt10_000:
188 digits = u / 100;
189 ASCII_digits = two_ASCII_digits[digits];
190 buffer[0] = ASCII_digits[0];
191 buffer[1] = ASCII_digits[1];
192 buffer += 2;
193 sublt100:
194 u -= digits * 100;
195 lt100:
196 digits = u;
197 ASCII_digits = two_ASCII_digits[digits];
198 buffer[0] = ASCII_digits[0];
199 buffer[1] = ASCII_digits[1];
200 buffer += 2;
201 done:
202 *buffer = 0;
203 return buffer;
204 }
205
206 if (u < 100) {
207 digits = u;
208 if (u >= 10) goto lt100;
209 *buffer++ = '0' + digits;
210 goto done;
211 }
212 if (u < 10000) { // 10,000
213 if (u >= 1000) goto lt10_000;
214 digits = u / 100;
215 *buffer++ = '0' + digits;
216 goto sublt100;
217 }
218 if (u < 1000000) { // 1,000,000
219 if (u >= 100000) goto lt1_000_000;
220 digits = u / 10000; // 10,000
221 *buffer++ = '0' + digits;
222 goto sublt10_000;
223 }
224 if (u < 100000000) { // 100,000,000
225 if (u >= 10000000) goto lt100_000_000;
226 digits = u / 1000000; // 1,000,000
227 *buffer++ = '0' + digits;
228 goto sublt1_000_000;
229 }
230 // we already know that u < 1,000,000,000
231 digits = u / 100000000; // 100,000,000
232 *buffer++ = '0' + digits;
233 goto sublt100_000_000;
234 }
FastUInt64ToBufferLeft(uint64 u64,char * buffer)235 char* FastUInt64ToBufferLeft(uint64 u64, char* buffer) {
236 int digits;
237 const char *ASCII_digits = NULL;
238
239 uint32 u = static_cast<uint32>(u64);
240 if (u == u64) return FastUInt32ToBufferLeft(u, buffer);
241
242 uint64 top_11_digits = u64 / 1000000000;
243 buffer = FastUInt64ToBufferLeft(top_11_digits, buffer);
244 u = u64 - (top_11_digits * 1000000000);
245
246 digits = u / 10000000; // 10,000,000
247 DCHECK_LT(digits, 100);
248 ASCII_digits = two_ASCII_digits[digits];
249 buffer[0] = ASCII_digits[0];
250 buffer[1] = ASCII_digits[1];
251 buffer += 2;
252 u -= digits * 10000000; // 10,000,000
253 digits = u / 100000; // 100,000
254 ASCII_digits = two_ASCII_digits[digits];
255 buffer[0] = ASCII_digits[0];
256 buffer[1] = ASCII_digits[1];
257 buffer += 2;
258 u -= digits * 100000; // 100,000
259 digits = u / 1000; // 1,000
260 ASCII_digits = two_ASCII_digits[digits];
261 buffer[0] = ASCII_digits[0];
262 buffer[1] = ASCII_digits[1];
263 buffer += 2;
264 u -= digits * 1000; // 1,000
265 digits = u / 10;
266 ASCII_digits = two_ASCII_digits[digits];
267 buffer[0] = ASCII_digits[0];
268 buffer[1] = ASCII_digits[1];
269 buffer += 2;
270 u -= digits * 10;
271 digits = u;
272 *buffer++ = '0' + digits;
273 *buffer = 0;
274 return buffer;
275 }
276
FastInt64ToBufferLeft(int64 i,char * buffer)277 char* FastInt64ToBufferLeft(int64 i, char* buffer) {
278 uint64 u = i;
279 if (i < 0) {
280 *buffer++ = '-';
281 u = -i;
282 }
283 return FastUInt64ToBufferLeft(u, buffer);
284 }
285
286 // Offset into buffer where FastInt32ToBuffer places the end of string
287 // null character. Also used by FastInt32ToBufferLeft
288 static const int kFastInt32ToBufferOffset = 11;
289
290 // This used to call out to FastInt32ToBufferLeft but that wasn't defined.
291 // Copied from http://gears.googlecode.com/svn-history/r395/trunk/gears/base/common/string16.cc
FastInt32ToBuffer(int32 i,char * buffer)292 char *FastInt32ToBuffer(int32 i, char* buffer) {
293 // We could collapse the positive and negative sections, but that
294 // would be slightly slower for positive numbers...
295 // 12 bytes is enough to store -2**32, -4294967296.
296 char* p = buffer + kFastInt32ToBufferOffset;
297 *p-- = '\0';
298 if (i >= 0) {
299 do {
300 *p-- = '0' + i % 10;
301 i /= 10;
302 } while (i > 0);
303 return p + 1;
304 } else {
305 // On different platforms, % and / have different behaviors for
306 // negative numbers, so we need to jump through hoops to make sure
307 // we don't divide negative numbers.
308 if (i > -10) {
309 i = -i;
310 *p-- = '0' + i;
311 *p = '-';
312 return p;
313 } else {
314 // Make sure we aren't at MIN_INT, in which case we can't say i = -i
315 i = i + 10;
316 i = -i;
317 *p-- = '0' + i % 10;
318 // Undo what we did a moment ago
319 i = i / 10 + 1;
320 do {
321 *p-- = '0' + i % 10;
322 i /= 10;
323 } while (i > 0);
324 *p = '-';
325 return p;
326 }
327 }
328 }
329
FastHexToBuffer(int i,char * buffer)330 char *FastHexToBuffer(int i, char* buffer) {
331 CHECK(i >= 0) << "FastHexToBuffer() wants non-negative integers, not " << i;
332
333 static const char *hexdigits = "0123456789abcdef";
334 char *p = buffer + 21;
335 *p-- = '\0';
336 do {
337 *p-- = hexdigits[i & 15]; // mod by 16
338 i >>= 4; // divide by 16
339 } while (i > 0);
340 return p + 1;
341 }
342
InternalFastHexToBuffer(uint64 value,char * buffer,int num_byte)343 char *InternalFastHexToBuffer(uint64 value, char* buffer, int num_byte) {
344 static const char *hexdigits = "0123456789abcdef";
345 buffer[num_byte] = '\0';
346 for (int i = num_byte - 1; i >= 0; i--) {
347 buffer[i] = hexdigits[uint32(value) & 0xf];
348 value >>= 4;
349 }
350 return buffer;
351 }
352
FastHex64ToBuffer(uint64 value,char * buffer)353 char *FastHex64ToBuffer(uint64 value, char* buffer) {
354 return InternalFastHexToBuffer(value, buffer, 16);
355 }
356
FastHex32ToBuffer(uint32 value,char * buffer)357 char *FastHex32ToBuffer(uint32 value, char* buffer) {
358 return InternalFastHexToBuffer(value, buffer, 8);
359 }
360
PutTwoDigits(int i,char * p)361 static inline void PutTwoDigits(int i, char* p) {
362 DCHECK_GE(i, 0);
363 DCHECK_LT(i, 100);
364 p[0] = two_ASCII_digits[i][0];
365 p[1] = two_ASCII_digits[i][1];
366 }
367
368 #if 0
369 char* FastTimeToBuffer(time_t s, char* buffer) {
370 if (s == 0) {
371 time(&s);
372 }
373
374 struct tm tm;
375 if (gmtime_r(&s, &tm) == NULL) {
376 // Error message must fit in 30-char buffer.
377 memcpy(buffer, "Invalid:", sizeof("Invalid:"));
378 FastInt64ToBufferLeft(s, buffer+strlen(buffer));
379 return buffer;
380 }
381
382 // strftime format: "%a, %d %b %Y %H:%M:%S GMT",
383 // but strftime does locale stuff which we do not want
384 // plus strftime takes > 10x the time of hard code
385
386 const char* weekday_name = "Xxx";
387 switch (tm.tm_wday) {
388 default: { DCHECK(false); } break;
389 case 0: weekday_name = "Sun"; break;
390 case 1: weekday_name = "Mon"; break;
391 case 2: weekday_name = "Tue"; break;
392 case 3: weekday_name = "Wed"; break;
393 case 4: weekday_name = "Thu"; break;
394 case 5: weekday_name = "Fri"; break;
395 case 6: weekday_name = "Sat"; break;
396 }
397
398 const char* month_name = "Xxx";
399 switch (tm.tm_mon) {
400 default: { DCHECK(false); } break;
401 case 0: month_name = "Jan"; break;
402 case 1: month_name = "Feb"; break;
403 case 2: month_name = "Mar"; break;
404 case 3: month_name = "Apr"; break;
405 case 4: month_name = "May"; break;
406 case 5: month_name = "Jun"; break;
407 case 6: month_name = "Jul"; break;
408 case 7: month_name = "Aug"; break;
409 case 8: month_name = "Sep"; break;
410 case 9: month_name = "Oct"; break;
411 case 10: month_name = "Nov"; break;
412 case 11: month_name = "Dec"; break;
413 }
414
415 // Write out the buffer.
416
417 memcpy(buffer+0, weekday_name, 3);
418 buffer[3] = ',';
419 buffer[4] = ' ';
420
421 PutTwoDigits(tm.tm_mday, buffer+5);
422 buffer[7] = ' ';
423
424 memcpy(buffer+8, month_name, 3);
425 buffer[11] = ' ';
426
427 int32 year = tm.tm_year + 1900;
428 PutTwoDigits(year/100, buffer+12);
429 PutTwoDigits(year%100, buffer+14);
430 buffer[16] = ' ';
431
432 PutTwoDigits(tm.tm_hour, buffer+17);
433 buffer[19] = ':';
434
435 PutTwoDigits(tm.tm_min, buffer+20);
436 buffer[22] = ':';
437
438 PutTwoDigits(tm.tm_sec, buffer+23);
439
440 // includes ending NUL
441 memcpy(buffer+25, " GMT", 5);
442
443 return buffer;
444 }
445 #endif
446
447 // ----------------------------------------------------------------------
448 // ParseLeadingUInt64Value
449 // ParseLeadingInt64Value
450 // ParseLeadingHex64Value
451 // A simple parser for long long values. Returns the parsed value if a
452 // valid integer is found; else returns deflt
453 // UInt64 and Int64 cannot handle decimal numbers with leading 0s.
454 // --------------------------------------------------------------------
ParseLeadingUInt64Value(const char * str,uint64 deflt)455 uint64 ParseLeadingUInt64Value(const char *str, uint64 deflt) {
456 char *error = NULL;
457 const uint64 value = strtoull(str, &error, 0);
458 return (error == str) ? deflt : value;
459 }
460
ParseLeadingInt64Value(const char * str,int64 deflt)461 int64 ParseLeadingInt64Value(const char *str, int64 deflt) {
462 char *error = NULL;
463 const int64 value = strtoll(str, &error, 0);
464 return (error == str) ? deflt : value;
465 }
466
ParseLeadingHex64Value(const char * str,uint64 deflt)467 uint64 ParseLeadingHex64Value(const char *str, uint64 deflt) {
468 char *error = NULL;
469 const uint64 value = strtoull(str, &error, 16);
470 return (error == str) ? deflt : value;
471 }
472
473 // ----------------------------------------------------------------------
474 // ParseLeadingDec64Value
475 // ParseLeadingUDec64Value
476 // A simple parser for [u]int64 values. Returns the parsed value
477 // if a valid value is found; else returns deflt
478 // The string passed in is treated as *10 based*.
479 // This can handle strings with leading 0s.
480 // --------------------------------------------------------------------
481
ParseLeadingDec64Value(const char * str,int64 deflt)482 int64 ParseLeadingDec64Value(const char *str, int64 deflt) {
483 char *error = NULL;
484 const int64 value = strtoll(str, &error, 10);
485 return (error == str) ? deflt : value;
486 }
487
ParseLeadingUDec64Value(const char * str,uint64 deflt)488 uint64 ParseLeadingUDec64Value(const char *str, uint64 deflt) {
489 char *error = NULL;
490 const uint64 value = strtoull(str, &error, 10);
491 return (error == str) ? deflt : value;
492 }
493
DictionaryParse(const string & encoded_str,vector<pair<string,string>> * items)494 bool DictionaryParse(const string& encoded_str,
495 vector<pair<string, string> >* items) {
496 vector<string> entries;
497 SplitStringUsing(encoded_str, ",", &entries);
498 for (size_t i = 0; i < entries.size(); ++i) {
499 vector<string> fields;
500 SplitStringAllowEmpty(entries[i], ":", &fields);
501 if (fields.size() != 2) // parsing error
502 return false;
503 items->push_back(make_pair(fields[0], fields[1]));
504 }
505 return true;
506 }
507