1 /*************************************************************************************************
2 * Utility functions
3 * Copyright (C) 2009-2012 Mikio Hirabayashi
4 * This file is part of Kyoto Cabinet.
5 * This program is free software: you can redistribute it and/or modify it under the terms of
6 * the GNU General Public License as published by the Free Software Foundation, either version
7 * 3 of the License, or any later version.
8 * This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
9 * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
10 * See the GNU General Public License for more details.
11 * You should have received a copy of the GNU General Public License along with this program.
12 * If not, see <http://www.gnu.org/licenses/>.
13 *************************************************************************************************/
14
15
16 #ifndef _KCUTIL_H // duplication check
17 #define _KCUTIL_H
18
19 #include <kccommon.h>
20
21 namespace kyotocabinet { // common namespace
22
23
24 /** The maximum value of int8_t. */
25 const int8_t INT8MAX = (std::numeric_limits<int8_t>::max)();
26
27
28 /** The maximum value of int16_t. */
29 const int16_t INT16MAX = (std::numeric_limits<int16_t>::max)();
30
31
32 /** The maximum value of int32_t. */
33 const int32_t INT32MAX = (std::numeric_limits<int32_t>::max)();
34
35
36 /** The maximum value of int64_t. */
37 const int64_t INT64MAX = (std::numeric_limits<int64_t>::max)();
38
39
40 /** The minimum value of int8_t. */
41 const int8_t INT8MIN = (std::numeric_limits<int8_t>::min)();
42
43
44 /** The minimum value of int16_t. */
45 const int16_t INT16MIN = (std::numeric_limits<int16_t>::min)();
46
47
48 /** The minimum value of int32_t. */
49 const int32_t INT32MIN = (std::numeric_limits<int32_t>::min)();
50
51
52 /** The minimum value of int64_t. */
53 const int64_t INT64MIN = (std::numeric_limits<int64_t>::min)();
54
55
56 /** The maximum value of uint8_t. */
57 const uint8_t UINT8MAX = (std::numeric_limits<uint8_t>::max)();
58
59
60 /** The maximum value of uint16_t. */
61 const uint16_t UINT16MAX = (std::numeric_limits<uint16_t>::max)();
62
63
64 /** The maximum value of uint32_t. */
65 const uint32_t UINT32MAX = (std::numeric_limits<uint32_t>::max)();
66
67
68 /** The maximum value of uint64_t. */
69 const uint64_t UINT64MAX = (std::numeric_limits<uint64_t>::max)();
70
71
72 /** The maximum value of size_t. */
73 const size_t SIZEMAX = (std::numeric_limits<size_t>::max)();
74
75
76 /** The maximum value of float. */
77 const float FLTMAX = (std::numeric_limits<float>::max)();
78
79
80 /** The maximum value of double. */
81 const double DBLMAX = (std::numeric_limits<double>::max)();
82
83
84 /** An alias of hash map of strings. */
85 typedef std::unordered_map<std::string, std::string> StringHashMap;
86
87
88 /** An alias of tree map of strings. */
89 typedef std::map<std::string, std::string> StringTreeMap;
90
91
92 /** The package version. */
93 extern const char* const VERSION;
94
95
96 /** The library version. */
97 extern const int32_t LIBVER;
98
99
100 /** The library revision. */
101 extern const int32_t LIBREV;
102
103
104 /** The database format version. */
105 extern const int32_t FMTVER;
106
107
108 /** The system name. */
109 extern const char* const OSNAME;
110
111
112 /** The flag for big endian environments. */
113 extern const bool BIGEND;
114
115
116 /** The clock tick of interruption. */
117 extern const int32_t CLOCKTICK;
118
119
120 /** The size of a page. */
121 extern const int32_t PAGESIZ;
122
123
124 /** The extra feature list. */
125 extern const char* const FEATURES;
126
127
128 /** The buffer size for numeric data. */
129 const size_t NUMBUFSIZ = 32;
130
131
132 /** The maximum memory size for debugging. */
133 const size_t MEMMAXSIZ = INT32MAX / 2;
134
135
136 /**
137 * Convert a decimal string to an integer.
138 * @param str the decimal string.
139 * @return the integer. If the string does not contain numeric expression, 0 is returned.
140 */
141 int64_t atoi(const char* str);
142
143
144 /**
145 * Convert a decimal string with a metric prefix to an integer.
146 * @param str the decimal string, which can be trailed by a binary metric prefix. "K", "M", "G",
147 * "T", "P", and "E" are supported. They are case-insensitive.
148 * @return the integer. If the string does not contain numeric expression, 0 is returned. If
149 * the integer overflows the domain, kyotocabinet::INT64MAX or kyotocabinet::INT64_MIN is
150 * returned according to the sign.
151 */
152 int64_t atoix(const char* str);
153
154
155 /**
156 * Convert a hexadecimal string to an integer.
157 * @param str the hexadecimal string.
158 * @return the integer. If the string does not contain numeric expression, 0 is returned.
159 */
160 int64_t atoih(const char* str);
161
162
163 /**
164 * Convert a decimal byte array to an integer.
165 * @param ptr the decimal byte array.
166 * @param size the size of the decimal byte array.
167 * @return the integer. If the string does not contain numeric expression, 0 is returned.
168 */
169 int64_t atoin(const char* ptr, size_t size);
170
171
172 /**
173 * Convert a decimal string to a real number.
174 * @param str the decimal string.
175 * @return the real number. If the string does not contain numeric expression, 0.0 is returned.
176 */
177 double atof(const char* str);
178
179
180 /**
181 * Convert a decimal byte array to a real number.
182 * @param ptr the decimal byte array.
183 * @param size the size of the decimal byte array.
184 * @return the real number. If the string does not contain numeric expression, 0.0 is returned.
185 */
186 double atofn(const char* ptr, size_t size);
187
188
189 /**
190 * Normalize a 16-bit number in the native order into the network byte order.
191 * @param num the 16-bit number in the native order.
192 * @return the number in the network byte order.
193 */
194 uint16_t hton16(uint16_t num);
195
196
197 /**
198 * Normalize a 32-bit number in the native order into the network byte order.
199 * @param num the 32-bit number in the native order.
200 * @return the number in the network byte order.
201 */
202 uint32_t hton32(uint32_t num);
203
204
205 /**
206 * Normalize a 64-bit number in the native order into the network byte order.
207 * @param num the 64-bit number in the native order.
208 * @return the number in the network byte order.
209 */
210 uint64_t hton64(uint64_t num);
211
212
213 /**
214 * Denormalize a 16-bit number in the network byte order into the native order.
215 * @param num the 16-bit number in the network byte order.
216 * @return the converted number in the native order.
217 */
218 uint16_t ntoh16(uint16_t num);
219
220
221 /**
222 * Denormalize a 32-bit number in the network byte order into the native order.
223 * @param num the 32-bit number in the network byte order.
224 * @return the converted number in the native order.
225 */
226 uint32_t ntoh32(uint32_t num);
227
228
229 /**
230 * Denormalize a 64-bit number in the network byte order into the native order.
231 * @param num the 64-bit number in the network byte order.
232 * @return the converted number in the native order.
233 */
234 uint64_t ntoh64(uint64_t num);
235
236
237 /**
238 * Write a number in fixed length format into a buffer.
239 * @param buf the desitination buffer.
240 * @param num the number.
241 * @param width the width.
242 */
243 void writefixnum(void* buf, uint64_t num, size_t width);
244
245
246 /**
247 * Read a number in fixed length format from a buffer.
248 * @param buf the source buffer.
249 * @param width the width.
250 * @return the read number.
251 */
252 uint64_t readfixnum(const void* buf, size_t width);
253
254
255 /**
256 * Write a number in variable length format into a buffer.
257 * @param buf the desitination buffer.
258 * @param num the number.
259 * @return the length of the written region.
260 */
261 size_t writevarnum(void* buf, uint64_t num);
262
263
264 /**
265 * Read a number in variable length format from a buffer.
266 * @param buf the source buffer.
267 * @param size the size of the source buffer.
268 * @param np the pointer to the variable into which the read number is assigned.
269 * @return the length of the read region, or 0 on failure.
270 */
271 size_t readvarnum(const void* buf, size_t size, uint64_t* np);
272
273
274 /**
275 * Check the size of variable length format of a number.
276 * @return the size of variable length format.
277 */
278 size_t sizevarnum(uint64_t num);
279
280
281 /**
282 * Get the hash value by MurMur hashing.
283 * @param buf the source buffer.
284 * @param size the size of the source buffer.
285 * @return the hash value.
286 */
287 uint64_t hashmurmur(const void* buf, size_t size);
288
289
290 /**
291 * Get the hash value by FNV hashing.
292 * @param buf the source buffer.
293 * @param size the size of the source buffer.
294 * @return the hash value.
295 */
296 uint64_t hashfnv(const void* buf, size_t size);
297
298
299 /**
300 * Get the hash value suitable for a file name.
301 * @param buf the source buffer.
302 * @param size the size of the source buffer.
303 * @param obuf the buffer into which the result hash string is written. It must be more than
304 * NUMBUFSIZ.
305 * @return the auxiliary hash value.
306 */
307 uint32_t hashpath(const void* buf, size_t size, char* obuf);
308
309
310 /**
311 * Get a prime number nearby a number.
312 * @param num a natural number.
313 * @return the result number.
314 */
315 uint64_t nearbyprime(uint64_t num);
316
317
318 /**
319 * Get the quiet Not-a-Number value.
320 * @return the quiet Not-a-Number value.
321 */
322 double nan();
323
324
325 /**
326 * Get the positive infinity value.
327 * @return the positive infinity value.
328 */
329 double inf();
330
331
332 /**
333 * Check a number is a Not-a-Number value.
334 * @return true for the number is a Not-a-Number value, or false if not.
335 */
336 bool chknan(double num);
337
338
339 /**
340 * Check a number is an infinity value.
341 * @return true for the number is an infinity value, or false if not.
342 */
343 bool chkinf(double num);
344
345
346 /**
347 * Append a formatted string at the end of a string.
348 * @param dest the destination string.
349 * @param format the printf-like format string. The conversion character `%' can be used with
350 * such flag characters as `s', `d', `o', `u', `x', `X', `c', `e', `E', `f', `g', `G', and `%'.
351 * @param ap used according to the format string.
352 */
353 void vstrprintf(std::string* dest, const char* format, va_list ap);
354
355
356 /**
357 * Append a formatted string at the end of a string.
358 * @param dest the destination string.
359 * @param format the printf-like format string. The conversion character `%' can be used with
360 * such flag characters as `s', `d', `o', `u', `x', `X', `c', `e', `E', `f', `g', `G', and `%'.
361 * @param ... used according to the format string.
362 */
363 void strprintf(std::string* dest, const char* format, ...);
364
365
366 /**
367 * Generate a formatted string.
368 * @param format the printf-like format string. The conversion character `%' can be used with
369 * such flag characters as `s', `d', `o', `u', `x', `X', `c', `e', `E', `f', `g', `G', and `%'.
370 * @param ... used according to the format string.
371 * @return the result string.
372 */
373 std::string strprintf(const char* format, ...);
374
375
376 /**
377 * Split a string with a delimiter.
378 * @param str the string.
379 * @param delim the delimiter.
380 * @param elems a vector object into which the result elements are pushed.
381 * @return the number of result elements.
382 */
383 size_t strsplit(const std::string& str, char delim, std::vector<std::string>* elems);
384
385
386 /**
387 * Split a string with delimiters.
388 * @param str the string.
389 * @param delims the delimiters.
390 * @param elems a vector object into which the result elements are pushed.
391 * @return the number of result elements.
392 */
393 size_t strsplit(const std::string& str, const std::string& delims,
394 std::vector<std::string>* elems);
395
396
397 /**
398 * Convert the letters of a string into upper case.
399 * @param str the string to convert.
400 * @return the string itself.
401 */
402 std::string* strtoupper(std::string* str);
403
404
405 /**
406 * Convert the letters of a string into lower case.
407 * @param str the string to convert.
408 * @return the string itself.
409 */
410 std::string* strtolower(std::string* str);
411
412
413 /**
414 * Check whether a string begins with a key.
415 * @param str the string.
416 * @param key the forward matching key string.
417 * @return true if the target string begins with the key, else, it is false.
418 */
419 bool strfwm(const std::string& str, const std::string& key);
420
421
422 /**
423 * Check whether a string ends with a key.
424 * @param str the string.
425 * @param key the backward matching key string.
426 * @return true if the target string ends with the key, else, it is false.
427 */
428 bool strbwm(const std::string& str, const std::string& key);
429
430
431 /**
432 * Cut space characters at head or tail of a string.
433 * @param str the string to convert.
434 * @return the string itself.
435 */
436 std::string* strtrim(std::string* str);
437
438
439 /**
440 * Convert a UTF-8 string into a UCS-4 array.
441 * @param src the source object.
442 * @param dest the destination object.
443 */
444 void strutftoucs(const std::string& src, std::vector<uint32_t>* dest);
445
446
447 /**
448 * Convert a UCS-4 array into a UTF-8 string.
449 * @param src the source object.
450 * @param dest the destination object.
451 */
452 void strucstoutf(const std::vector<uint32_t>& src, std::string* dest);
453
454
455 /**
456 * Serialize a string vector object into a string object.
457 * @param src the source object.
458 * @param dest the destination object.
459 */
460 void strvecdump(const std::vector<std::string>& src, std::string* dest);
461
462
463 /**
464 * Deserialize a string object into a string vector object.
465 * @param src the source object.
466 * @param dest the destination object.
467 */
468 void strvecload(const std::string& src, std::vector<std::string>* dest);
469
470
471 /**
472 * Serialize a string vector object into a string object.
473 * @param src the source object.
474 * @param dest the destination object.
475 */
476 void strmapdump(const std::map<std::string, std::string>& src, std::string* dest);
477
478
479 /**
480 * Deserialize a string object into a string map object.
481 * @param src the source object.
482 * @param dest the destination object.
483 */
484 void strmapload(const std::string& src, std::map<std::string, std::string>* dest);
485
486
487 /**
488 * Encode a serial object by hexadecimal encoding.
489 * @param buf the pointer to the region.
490 * @param size the size of the region.
491 * @return the result string.
492 * @note Because the region of the return value is allocated with the the new[] operator, it
493 * should be released with the delete[] operator when it is no longer in use.
494 */
495 char* hexencode(const void* buf, size_t size);
496
497
498 /**
499 * Decode a string encoded by hexadecimal encoding.
500 * @param str specifies the encoded string.
501 * @param sp the pointer to the variable into which the size of the region of the return value
502 * is assigned.
503 * @return the pointer to the region of the result.
504 * @note Because an additional zero code is appended at the end of the region of the return
505 * value, the return value can be treated as a character string. Because the region of the
506 * return value is allocated with the the new[] operator, it should be released with the delete[]
507 * operator when it is no longer in use.
508 */
509 char* hexdecode(const char* str, size_t* sp);
510
511
512 /**
513 * Encode a serial object by URL encoding.
514 * @param buf the pointer to the region.
515 * @param size the size of the region.
516 * @return the result string.
517 * @note Because the region of the return value is allocated with the the new[] operator, it
518 * should be released with the delete[] operator when it is no longer in use.
519 */
520 char* urlencode(const void* buf, size_t size);
521
522
523 /**
524 * Decode a string encoded by URL encoding.
525 * @param str specifies the encoded string.
526 * @param sp the pointer to the variable into which the size of the region of the return value
527 * is assigned.
528 * @return the pointer to the region of the result.
529 * @note Because an additional zero code is appended at the end of the region of the return
530 * value, the return value can be treated as a character string. Because the region of the
531 * return value is allocated with the the new[] operator, it should be released with the delete[]
532 * operator when it is no longer in use.
533 */
534 char* urldecode(const char* str, size_t* sp);
535
536
537 /**
538 * Encode a serial object by Quoted-printable encoding.
539 * @param buf the pointer to the region.
540 * @param size the size of the region.
541 * @return the result string.
542 * @note Because the region of the return value is allocated with the the new[] operator, it
543 * should be released with the delete[] operator when it is no longer in use.
544 */
545 char* quoteencode(const void* buf, size_t size);
546
547
548 /**
549 * Decode a string encoded by Quoted-printable encoding.
550 * @param str specifies the encoded string.
551 * @param sp the pointer to the variable into which the size of the region of the return value
552 * is assigned.
553 * @return the pointer to the region of the result.
554 * @note Because an additional zero code is appended at the end of the region of the return
555 * value, the return value can be treated as a character string. Because the region of the
556 * return value is allocated with the the new[] operator, it should be released with the delete[]
557 * operator when it is no longer in use.
558 */
559 char* quotedecode(const char* str, size_t* sp);
560
561
562 /**
563 * Encode a serial object by Base64 encoding.
564 * @param buf the pointer to the region.
565 * @param size the size of the region.
566 * @return the result string.
567 * @note Because the region of the return value is allocated with the the new[] operator, it
568 * should be released with the delete[] operator when it is no longer in use.
569 */
570 char* baseencode(const void* buf, size_t size);
571
572
573 /**
574 * Decode a string encoded by Base64 encoding.
575 * @param str specifies the encoded string.
576 * @param sp the pointer to the variable into which the size of the region of the return value
577 * is assigned.
578 * @return the pointer to the region of the result.
579 * @note Because an additional zero code is appended at the end of the region of the return
580 * value, the return value can be treated as a character string. Because the region of the
581 * return value is allocated with the the new[] operator, it should be released with the delete[]
582 * operator when it is no longer in use.
583 */
584 char* basedecode(const char* str, size_t* sp);
585
586
587 /**
588 * Cipher or decipher a serial object with the Arcfour stream cipher.
589 * @param ptr the pointer to the region.
590 * @param size the size of the region.
591 * @param kbuf the pointer to the region of the cipher key.
592 * @param ksiz the size of the region of the cipher key.
593 * @param obuf the pointer to the region into which the result data is written. The size of the
594 * buffer should be equal to or more than the input region. The region can be the same as the
595 * source region.
596 */
597 void arccipher(const void* ptr, size_t size, const void* kbuf, size_t ksiz, void* obuf);
598
599
600 /**
601 * Duplicate a region on memory.
602 * @param ptr the source buffer.
603 * @param size the size of the source buffer.
604 * @note Because the region of the return value is allocated with the the new[] operator, it
605 * should be released with the delete[] operator when it is no longer in use.
606 */
607 char* memdup(const char* ptr, size_t size);
608
609
610 /**
611 * Compare two regions by case insensitive evaluation.
612 * @param abuf a buffer.
613 * @param bbuf the other buffer.
614 * @param size the size of each buffer.
615 * @return positive if the former is big, negative if the latter is big, 0 if both are
616 * equivalent.
617 */
618 int32_t memicmp(const void* abuf, const void* bbuf, size_t size);
619
620
621 /**
622 * Find the first occurrence of a sub pattern.
623 * @param hbuf the target pattern buffer.
624 * @param hsiz the size of the target pattern buffer.
625 * @param nbuf the sub pattern buffer.
626 * @param nsiz the size of the sub pattern buffer.
627 * @return the pointer to the beginning of the sub pattern in the target pattern buffer, or NULL
628 * if the sub pattern is not found.
629 */
630 void* memmem(const void* hbuf, size_t hsiz, const void* nbuf, size_t nsiz);
631
632
633 /**
634 * Find the first occurrence of a sub pattern by case insensitive evaluation.
635 * @param hbuf the target pattern buffer.
636 * @param hsiz the size of the target pattern buffer.
637 * @param nbuf the sub pattern buffer.
638 * @param nsiz the size of the sub pattern buffer.
639 * @return the pointer to the beginning of the sub pattern in the target pattern buffer, or NULL
640 * if the sub pattern is not found.
641 */
642 void* memimem(const void* hbuf, size_t hsiz, const void* nbuf, size_t nsiz);
643
644
645 /**
646 * Calculate the levenshtein distance of two regions in bytes.
647 * @param abuf the pointer to the region of one buffer.
648 * @param asiz the size of the region of one buffer.
649 * @param bbuf the pointer to the region of the other buffer.
650 * @param bsiz the size of the region of the other buffer.
651 * @return the levenshtein distance of two regions.
652 */
653 size_t memdist(const void* abuf, size_t asiz, const void* bbuf, size_t bsiz);
654
655
656 /**
657 * Duplicate a string on memory.
658 * @param str the source string.
659 * @note Because the region of the return value is allocated with the the new[] operator, it
660 * should be released with the delete[] operator when it is no longer in use.
661 */
662 char* strdup(const char* str);
663
664
665 /**
666 * Convert the letters of a string into upper case.
667 * @param str the string to convert.
668 * @return the string itself.
669 */
670 char* strtoupper(char* str);
671
672
673 /**
674 * Convert the letters of a string into lower case.
675 * @param str the string to convert.
676 * @return the string itself.
677 */
678 char* strtolower(char* str);
679
680
681 /**
682 * Cut space characters at head or tail of a string.
683 * @param str the string to convert.
684 * @return the string itself.
685 */
686 char* strtrim(char* str);
687
688
689 /**
690 * Squeeze space characters in a string and trim it.
691 * @param str the string to convert.
692 * @return the string itself.
693 */
694 char* strsqzspc(char* str);
695
696
697 /**
698 * Normalize space characters in a string and trim it.
699 * @param str the string to convert.
700 * @return the string itself.
701 */
702 char* strnrmspc(char* str);
703
704
705 /**
706 * Compare two strings by case insensitive evaluation.
707 * @param astr a string.
708 * @param bstr the other string.
709 * @return positive if the former is big, negative if the latter is big, 0 if both are
710 * equivalent.
711 */
712 int32_t stricmp(const char* astr, const char* bstr);
713
714
715 /**
716 * Find the first occurrence of a substring by case insensitive evaluation.
717 * @param hstr the target string.
718 * @param nstr the substring.
719 * @return the pointer to the beginning of the substring in the target string, or NULL if the
720 * substring is not found.
721 */
722 char* stristr(const char* hstr, const char* nstr);
723
724
725 /**
726 * Check whether a string begins with a key.
727 * @param str the string.
728 * @param key the forward matching key string.
729 * @return true if the target string begins with the key, else, it is false.
730 */
731 bool strfwm(const char* str, const char* key);
732
733
734 /**
735 * Check whether a string begins with a key by case insensitive evaluation.
736 * @param str the string.
737 * @param key the forward matching key string.
738 * @return true if the target string begins with the key, else, it is false.
739 */
740 bool strifwm(const char* str, const char* key);
741
742
743 /**
744 * Check whether a string ends with a key.
745 * @param str the string.
746 * @param key the backward matching key string.
747 * @return true if the target string ends with the key, else, it is false.
748 */
749 bool strbwm(const char* str, const char* key);
750
751
752 /**
753 * Check whether a string ends with a key by case insensitive evaluation.
754 * @param str the string.
755 * @param key the backward matching key string.
756 * @return true if the target string ends with the key, else, it is false.
757 */
758 bool stribwm(const char* str, const char* key);
759
760
761 /**
762 * Get the number of characters in a UTF-8 string.
763 * @param str the UTF-8 string.
764 * @return the number of characters in the string.
765 */
766 size_t strutflen(const char* str);
767
768
769 /**
770 * Convert a UTF-8 string into a UCS-4 array.
771 * @param src the source object.
772 * @param dest the destination object. It must have enough size.
773 * @param np the pointer to the variable into which the number of elements in the destination
774 * object is assgined.
775 */
776 void strutftoucs(const char* src, uint32_t* dest, size_t* np);
777
778
779 /**
780 * Convert a UTF-8 string into a UCS-4 array.
781 * @param src the source object which does not have to be trailed by zero code.
782 * @param slen the length of the source object.
783 * @param dest the destination object. It must have enough size.
784 * @param np the pointer to the variable into which the number of elements in the destination
785 * object is assgined.
786 */
787 void strutftoucs(const char* src, size_t slen, uint32_t* dest, size_t* np);
788
789
790 /**
791 * Convert a UCS-4 array into a UTF-8 string.
792 * @param src the source object.
793 * @param snum the number of elements in the source object.
794 * @param dest the destination object. It must have enough size.
795 * @return the size of the result string.
796 */
797 size_t strucstoutf(const uint32_t* src, size_t snum, char* dest);
798
799
800 /**
801 * Calculate the levenshtein distance of two UTF-8 strings.
802 * @param astr one UTF-8 string.
803 * @param bstr the other UTF-8 string.
804 * @return the levenshtein distance of two arrays.
805 */
806 size_t strutfdist(const char* astr, const char* bstr);
807
808
809 /**
810 * Calculate the levenshtein distance of two UCS-4 arrays.
811 * @param aary one UCS-4 array.
812 * @param anum the number of elements of one array.
813 * @param bary the other UCS-4 array.
814 * @param bnum the number of elements of the other array.
815 * @return the levenshtein distance of two arrays.
816 */
817 size_t strucsdist(const uint32_t* aary, size_t anum, const uint32_t* bary, size_t bnum);
818
819
820 /**
821 * Allocate a region on memory.
822 * @param size the size of the region.
823 * @return the pointer to the allocated region.
824 */
825 void* xmalloc(size_t size);
826
827
828 /**
829 * Allocate a nullified region on memory.
830 * @param nmemb the number of elements.
831 * @param size the size of each element.
832 * @return the pointer to the allocated region.
833 */
834 void* xcalloc(size_t nmemb, size_t size);
835
836
837 /**
838 * Re-allocate a region on memory.
839 * @param ptr the pointer to the region.
840 * @param size the size of the region.
841 * @return the pointer to the re-allocated region.
842 */
843 void* xrealloc(void* ptr, size_t size);
844
845
846 /**
847 * Free a region on memory.
848 * @param ptr the pointer to the region.
849 */
850 void xfree(void* ptr);
851
852
853 /**
854 * Allocate a nullified region on mapped memory.
855 * @param size the size of the region.
856 * @return the pointer to the allocated region. It should be released with the memfree call.
857 */
858 void* mapalloc(size_t size);
859
860
861 /**
862 * Free a region on mapped memory.
863 * @param ptr the pointer to the allocated region.
864 */
865 void mapfree(void* ptr);
866
867
868 /**
869 * Get the time of day in seconds.
870 * @return the time of day in seconds. The accuracy is in microseconds.
871 */
872 double time();
873
874
875 /**
876 * Get the process ID.
877 * @return the process ID.
878 */
879 int64_t getpid();
880
881
882 /**
883 * Get the value of an environment variable.
884 * @return the value of the environment variable, or NULL on failure.
885 */
886 const char* getenv(const char* name);
887
888
889 /**
890 * Get system information of the environment.
891 * @param strmap a string map to contain the result.
892 */
893 void getsysinfo(std::map<std::string, std::string>* strmap);
894
895
896 /**
897 * Set the standard streams into the binary mode.
898 */
899 void setstdiobin();
900
901
902 /**
903 * Dummy test driver.
904 * @return always true.
905 */
906 bool _dummytest();
907
908
909 /**
910 * Convert a decimal string to an integer.
911 */
atoi(const char * str)912 inline int64_t atoi(const char* str) {
913 _assert_(str);
914 while (*str > '\0' && *str <= ' ') {
915 str++;
916 }
917 int32_t sign = 1;
918 int64_t num = 0;
919 if (*str == '-') {
920 str++;
921 sign = -1;
922 } else if (*str == '+') {
923 str++;
924 }
925 while (*str != '\0') {
926 if (*str < '0' || *str > '9') break;
927 num = num * 10 + *str - '0';
928 str++;
929 }
930 return num * sign;
931 }
932
933
934 /**
935 * Convert a decimal string with a metric prefix to an integer.
936 */
atoix(const char * str)937 inline int64_t atoix(const char* str) {
938 _assert_(str);
939 while (*str > '\0' && *str <= ' ') {
940 str++;
941 }
942 int32_t sign = 1;
943 if (*str == '-') {
944 str++;
945 sign = -1;
946 } else if (*str == '+') {
947 str++;
948 }
949 long double num = 0;
950 while (*str != '\0') {
951 if (*str < '0' || *str > '9') break;
952 num = num * 10 + *str - '0';
953 str++;
954 }
955 if (*str == '.') {
956 str++;
957 long double base = 10;
958 while (*str != '\0') {
959 if (*str < '0' || *str > '9') break;
960 num += (*str - '0') / base;
961 str++;
962 base *= 10;
963 }
964 }
965 num *= sign;
966 while (*str > '\0' && *str <= ' ') {
967 str++;
968 }
969 if (*str == 'k' || *str == 'K') {
970 num *= 1LL << 10;
971 } else if (*str == 'm' || *str == 'M') {
972 num *= 1LL << 20;
973 } else if (*str == 'g' || *str == 'G') {
974 num *= 1LL << 30;
975 } else if (*str == 't' || *str == 'T') {
976 num *= 1LL << 40;
977 } else if (*str == 'p' || *str == 'P') {
978 num *= 1LL << 50;
979 } else if (*str == 'e' || *str == 'E') {
980 num *= 1LL << 60;
981 }
982 if (num > INT64MAX) return INT64MAX;
983 if (num < INT64MIN) return INT64MIN;
984 return (int64_t)num;
985 }
986
987
988 /**
989 * Convert a hexadecimal string to an integer.
990 */
atoih(const char * str)991 inline int64_t atoih(const char* str) {
992 _assert_(str);
993 while (*str > '\0' && *str <= ' ') {
994 str++;
995 }
996 if (str[0] == '0' && (str[1] == 'x' || str[1] == 'X')) {
997 str += 2;
998 }
999 int64_t num = 0;
1000 while (true) {
1001 if (*str >= '0' && *str <= '9') {
1002 num = num * 0x10 + *str - '0';
1003 } else if (*str >= 'a' && *str <= 'f') {
1004 num = num * 0x10 + *str - 'a' + 10;
1005 } else if (*str >= 'A' && *str <= 'F') {
1006 num = num * 0x10 + *str - 'A' + 10;
1007 } else {
1008 break;
1009 }
1010 str++;
1011 }
1012 return num;
1013 }
1014
1015
1016 /**
1017 * Convert a decimal byte array to an integer.
1018 */
atoin(const char * ptr,size_t size)1019 inline int64_t atoin(const char* ptr, size_t size) {
1020 _assert_(ptr && size <= MEMMAXSIZ);
1021 while (size > 0 && *ptr >= '\0' && *ptr <= ' ') {
1022 ptr++;
1023 size--;
1024 }
1025 int32_t sign = 1;
1026 int64_t num = 0;
1027 if (size > 0) {
1028 if (*ptr == '-') {
1029 ptr++;
1030 size--;
1031 sign = -1;
1032 } else if (*ptr == '+') {
1033 ptr++;
1034 size--;
1035 }
1036 }
1037 while (size > 0) {
1038 if (*ptr < '0' || *ptr > '9') break;
1039 num = num * 10 + *ptr - '0';
1040 ptr++;
1041 size--;
1042 }
1043 return num * sign;
1044 }
1045
1046
1047 /**
1048 * Convert a decimal string to a real number.
1049 */
atof(const char * str)1050 inline double atof(const char* str) {
1051 _assert_(str);
1052 while (*str > '\0' && *str <= ' ') {
1053 str++;
1054 }
1055 int32_t sign = 1;
1056 if (*str == '-') {
1057 str++;
1058 sign = -1;
1059 } else if (*str == '+') {
1060 str++;
1061 }
1062 if ((str[0] == 'i' || str[0] == 'I') && (str[1] == 'n' || str[1] == 'N') &&
1063 (str[2] == 'f' || str[2] == 'F')) return HUGE_VAL * sign;
1064 if ((str[0] == 'n' || str[0] == 'N') && (str[1] == 'a' || str[1] == 'A') &&
1065 (str[2] == 'n' || str[2] == 'N')) return nan();
1066 long double num = 0;
1067 int32_t col = 0;
1068 while (*str != '\0') {
1069 if (*str < '0' || *str > '9') break;
1070 num = num * 10 + *str - '0';
1071 str++;
1072 if (num > 0) col++;
1073 }
1074 if (*str == '.') {
1075 str++;
1076 long double fract = 0.0;
1077 long double base = 10;
1078 while (col < 16 && *str != '\0') {
1079 if (*str < '0' || *str > '9') break;
1080 fract += (*str - '0') / base;
1081 str++;
1082 col++;
1083 base *= 10;
1084 }
1085 num += fract;
1086 }
1087 if (*str == 'e' || *str == 'E') {
1088 str++;
1089 num *= std::pow((long double)10, (long double)atoi(str));
1090 }
1091 return num * sign;
1092 }
1093
1094
1095 /**
1096 * Convert a decimal byte array to a real number.
1097 */
atofn(const char * ptr,size_t size)1098 inline double atofn(const char* ptr, size_t size) {
1099 _assert_(ptr && size <= MEMMAXSIZ);
1100 while (size > 0 && *ptr >= '\0' && *ptr <= ' ') {
1101 ptr++;
1102 size--;
1103 }
1104 int32_t sign = 1;
1105 if (size > 0) {
1106 if (*ptr == '-') {
1107 ptr++;
1108 size--;
1109 sign = -1;
1110 } else if (*ptr == '+') {
1111 ptr++;
1112 size--;
1113 }
1114 }
1115 if (size > 2) {
1116 if ((ptr[0] == 'i' || ptr[0] == 'I') && (ptr[1] == 'n' || ptr[1] == 'N') &&
1117 (ptr[2] == 'f' || ptr[2] == 'F')) return HUGE_VAL * sign;
1118 if ((ptr[0] == 'n' || ptr[0] == 'N') && (ptr[1] == 'a' || ptr[1] == 'A') &&
1119 (ptr[2] == 'n' || ptr[2] == 'N')) return nan();
1120 }
1121 long double num = 0;
1122 int32_t col = 0;
1123 while (size > 0) {
1124 if (*ptr < '0' || *ptr > '9') break;
1125 num = num * 10 + *ptr - '0';
1126 ptr++;
1127 size--;
1128 if (num > 0) col++;
1129 }
1130 if (size > 0 && *ptr == '.') {
1131 ptr++;
1132 size--;
1133 long double fract = 0.0;
1134 long double base = 10;
1135 while (col < 16 && size > 0) {
1136 if (*ptr < '0' || *ptr > '9') break;
1137 fract += (*ptr - '0') / base;
1138 ptr++;
1139 size--;
1140 col++;
1141 base *= 10;
1142 }
1143 num += fract;
1144 }
1145 if (size > 0 && (*ptr == 'e' || *ptr == 'E')) {
1146 ptr++;
1147 size--;
1148 num *= std::pow((long double)10, (long double)atoin(ptr, size));
1149 }
1150 return num * sign;
1151 }
1152
1153
1154
1155 /**
1156 * Normalize a 16-bit number in the native order into the network byte order.
1157 */
hton16(uint16_t num)1158 inline uint16_t hton16(uint16_t num) {
1159 _assert_(true);
1160 if (BIGEND) return num;
1161 return ((num & 0x00ffU) << 8) | ((num & 0xff00U) >> 8);
1162 }
1163
1164
1165 /**
1166 * Normalize a 32-bit number in the native order into the network byte order.
1167 */
hton32(uint32_t num)1168 inline uint32_t hton32(uint32_t num) {
1169 _assert_(true);
1170 if (BIGEND) return num;
1171 return ((num & 0x000000ffUL) << 24) | ((num & 0x0000ff00UL) << 8) | \
1172 ((num & 0x00ff0000UL) >> 8) | ((num & 0xff000000UL) >> 24);
1173 }
1174
1175
1176 /**
1177 * Normalize a 64-bit number in the native order into the network byte order.
1178 */
hton64(uint64_t num)1179 inline uint64_t hton64(uint64_t num) {
1180 _assert_(true);
1181 if (BIGEND) return num;
1182 return ((num & 0x00000000000000ffULL) << 56) | ((num & 0x000000000000ff00ULL) << 40) |
1183 ((num & 0x0000000000ff0000ULL) << 24) | ((num & 0x00000000ff000000ULL) << 8) |
1184 ((num & 0x000000ff00000000ULL) >> 8) | ((num & 0x0000ff0000000000ULL) >> 24) |
1185 ((num & 0x00ff000000000000ULL) >> 40) | ((num & 0xff00000000000000ULL) >> 56);
1186 }
1187
1188
1189 /**
1190 * Denormalize a 16-bit number in the network byte order into the native order.
1191 */
ntoh16(uint16_t num)1192 inline uint16_t ntoh16(uint16_t num) {
1193 _assert_(true);
1194 return hton16(num);
1195 }
1196
1197
1198 /**
1199 * Denormalize a 32-bit number in the network byte order into the native order.
1200 */
ntoh32(uint32_t num)1201 inline uint32_t ntoh32(uint32_t num) {
1202 _assert_(true);
1203 return hton32(num);
1204 }
1205
1206
1207 /**
1208 * Denormalize a 64-bit number in the network byte order into the native order.
1209 */
ntoh64(uint64_t num)1210 inline uint64_t ntoh64(uint64_t num) {
1211 _assert_(true);
1212 return hton64(num);
1213 }
1214
1215
1216 /**
1217 * Write a number in fixed length format into a buffer.
1218 */
writefixnum(void * buf,uint64_t num,size_t width)1219 inline void writefixnum(void* buf, uint64_t num, size_t width) {
1220 _assert_(buf && width <= sizeof(int64_t));
1221 num = hton64(num);
1222 std::memcpy(buf, (const char*)&num + sizeof(num) - width, width);
1223 }
1224
1225
1226 /**
1227 * Read a number in fixed length format from a buffer.
1228 */
readfixnum(const void * buf,size_t width)1229 inline uint64_t readfixnum(const void* buf, size_t width) {
1230 _assert_(buf && width <= sizeof(int64_t));
1231 uint64_t num = 0;
1232 std::memcpy(&num, buf, width);
1233 return ntoh64(num) >> ((sizeof(num) - width) * 8);
1234 }
1235
1236
1237 /**
1238 * Write a number in variable length format into a buffer.
1239 */
writevarnum(void * buf,uint64_t num)1240 inline size_t writevarnum(void* buf, uint64_t num) {
1241 _assert_(buf);
1242 unsigned char* wp = (unsigned char*)buf;
1243 if (num < (1ULL << 7)) {
1244 *(wp++) = num;
1245 } else if (num < (1ULL << 14)) {
1246 *(wp++) = (num >> 7) | 0x80;
1247 *(wp++) = num & 0x7f;
1248 } else if (num < (1ULL << 21)) {
1249 *(wp++) = (num >> 14) | 0x80;
1250 *(wp++) = ((num >> 7) & 0x7f) | 0x80;
1251 *(wp++) = num & 0x7f;
1252 } else if (num < (1ULL << 28)) {
1253 *(wp++) = (num >> 21) | 0x80;
1254 *(wp++) = ((num >> 14) & 0x7f) | 0x80;
1255 *(wp++) = ((num >> 7) & 0x7f) | 0x80;
1256 *(wp++) = num & 0x7f;
1257 } else if (num < (1ULL << 35)) {
1258 *(wp++) = (num >> 28) | 0x80;
1259 *(wp++) = ((num >> 21) & 0x7f) | 0x80;
1260 *(wp++) = ((num >> 14) & 0x7f) | 0x80;
1261 *(wp++) = ((num >> 7) & 0x7f) | 0x80;
1262 *(wp++) = num & 0x7f;
1263 } else if (num < (1ULL << 42)) {
1264 *(wp++) = (num >> 35) | 0x80;
1265 *(wp++) = ((num >> 28) & 0x7f) | 0x80;
1266 *(wp++) = ((num >> 21) & 0x7f) | 0x80;
1267 *(wp++) = ((num >> 14) & 0x7f) | 0x80;
1268 *(wp++) = ((num >> 7) & 0x7f) | 0x80;
1269 *(wp++) = num & 0x7f;
1270 } else if (num < (1ULL << 49)) {
1271 *(wp++) = (num >> 42) | 0x80;
1272 *(wp++) = ((num >> 35) & 0x7f) | 0x80;
1273 *(wp++) = ((num >> 28) & 0x7f) | 0x80;
1274 *(wp++) = ((num >> 21) & 0x7f) | 0x80;
1275 *(wp++) = ((num >> 14) & 0x7f) | 0x80;
1276 *(wp++) = ((num >> 7) & 0x7f) | 0x80;
1277 *(wp++) = num & 0x7f;
1278 } else if (num < (1ULL << 56)) {
1279 *(wp++) = (num >> 49) | 0x80;
1280 *(wp++) = ((num >> 42) & 0x7f) | 0x80;
1281 *(wp++) = ((num >> 35) & 0x7f) | 0x80;
1282 *(wp++) = ((num >> 28) & 0x7f) | 0x80;
1283 *(wp++) = ((num >> 21) & 0x7f) | 0x80;
1284 *(wp++) = ((num >> 14) & 0x7f) | 0x80;
1285 *(wp++) = ((num >> 7) & 0x7f) | 0x80;
1286 *(wp++) = num & 0x7f;
1287 } else if (num < (1ULL << 63)) {
1288 *(wp++) = (num >> 56) | 0x80;
1289 *(wp++) = ((num >> 49) & 0x7f) | 0x80;
1290 *(wp++) = ((num >> 42) & 0x7f) | 0x80;
1291 *(wp++) = ((num >> 35) & 0x7f) | 0x80;
1292 *(wp++) = ((num >> 28) & 0x7f) | 0x80;
1293 *(wp++) = ((num >> 21) & 0x7f) | 0x80;
1294 *(wp++) = ((num >> 14) & 0x7f) | 0x80;
1295 *(wp++) = ((num >> 7) & 0x7f) | 0x80;
1296 *(wp++) = num & 0x7f;
1297 } else {
1298 *(wp++) = (num >> 63) | 0x80;
1299 *(wp++) = ((num >> 56) & 0x7f) | 0x80;
1300 *(wp++) = ((num >> 49) & 0x7f) | 0x80;
1301 *(wp++) = ((num >> 42) & 0x7f) | 0x80;
1302 *(wp++) = ((num >> 35) & 0x7f) | 0x80;
1303 *(wp++) = ((num >> 28) & 0x7f) | 0x80;
1304 *(wp++) = ((num >> 21) & 0x7f) | 0x80;
1305 *(wp++) = ((num >> 14) & 0x7f) | 0x80;
1306 *(wp++) = ((num >> 7) & 0x7f) | 0x80;
1307 *(wp++) = num & 0x7f;
1308 }
1309 return wp - (unsigned char*)buf;
1310 }
1311
1312
1313 /**
1314 * Read a number in variable length format from a buffer.
1315 */
readvarnum(const void * buf,size_t size,uint64_t * np)1316 inline size_t readvarnum(const void* buf, size_t size, uint64_t* np) {
1317 _assert_(buf && size <= MEMMAXSIZ && np);
1318 const unsigned char* rp = (const unsigned char*)buf;
1319 const unsigned char* ep = rp + size;
1320 uint64_t num = 0;
1321 uint32_t c;
1322 do {
1323 if (rp >= ep) {
1324 *np = 0;
1325 return 0;
1326 }
1327 c = *rp;
1328 num = (num << 7) + (c & 0x7f);
1329 rp++;
1330 } while (c >= 0x80);
1331 *np = num;
1332 return rp - (const unsigned char*)buf;
1333 }
1334
1335
1336 /**
1337 * Check the size of variable length format of a number.
1338 */
sizevarnum(uint64_t num)1339 inline size_t sizevarnum(uint64_t num) {
1340 _assert_(true);
1341 if (num < (1ULL << 7)) return 1;
1342 if (num < (1ULL << 14)) return 2;
1343 if (num < (1ULL << 21)) return 3;
1344 if (num < (1ULL << 28)) return 4;
1345 if (num < (1ULL << 35)) return 5;
1346 if (num < (1ULL << 42)) return 6;
1347 if (num < (1ULL << 49)) return 7;
1348 if (num < (1ULL << 56)) return 8;
1349 if (num < (1ULL << 63)) return 9;
1350 return 10;
1351 }
1352
1353
1354 /**
1355 * Get the hash value by MurMur hashing.
1356 */
hashmurmur(const void * buf,size_t size)1357 inline uint64_t hashmurmur(const void* buf, size_t size) {
1358 _assert_(buf && size <= MEMMAXSIZ);
1359 const uint64_t mul = 0xc6a4a7935bd1e995ULL;
1360 const int32_t rtt = 47;
1361 uint64_t hash = 19780211ULL ^ (size * mul);
1362 const unsigned char* rp = (const unsigned char*)buf;
1363 while (size >= sizeof(uint64_t)) {
1364 uint64_t num = ((uint64_t)rp[0] << 0) | ((uint64_t)rp[1] << 8) |
1365 ((uint64_t)rp[2] << 16) | ((uint64_t)rp[3] << 24) |
1366 ((uint64_t)rp[4] << 32) | ((uint64_t)rp[5] << 40) |
1367 ((uint64_t)rp[6] << 48) | ((uint64_t)rp[7] << 56);
1368 num *= mul;
1369 num ^= num >> rtt;
1370 num *= mul;
1371 hash *= mul;
1372 hash ^= num;
1373 rp += sizeof(uint64_t);
1374 size -= sizeof(uint64_t);
1375 }
1376 switch (size) {
1377 case 7: hash ^= (uint64_t)rp[6] << 48; // fall through
1378 case 6: hash ^= (uint64_t)rp[5] << 40; // fall through
1379 case 5: hash ^= (uint64_t)rp[4] << 32; // fall through
1380 case 4: hash ^= (uint64_t)rp[3] << 24; // fall through
1381 case 3: hash ^= (uint64_t)rp[2] << 16; // fall through
1382 case 2: hash ^= (uint64_t)rp[1] << 8; // fall through
1383 case 1: hash ^= (uint64_t)rp[0]; hash *= mul; // fall through
1384 };
1385 hash ^= hash >> rtt;
1386 hash *= mul;
1387 hash ^= hash >> rtt;
1388 return hash;
1389 }
1390
1391
1392 /**
1393 * Get the hash value by FNV hashing.
1394 */
hashfnv(const void * buf,size_t size)1395 inline uint64_t hashfnv(const void* buf, size_t size) {
1396 _assert_(buf && size <= MEMMAXSIZ);
1397 uint64_t hash = 14695981039346656037ULL;
1398 const unsigned char* rp = (unsigned char*)buf;
1399 const unsigned char* ep = rp + size;
1400 while (rp < ep) {
1401 hash = (hash ^ *(rp++)) * 109951162811ULL;
1402 }
1403 return hash;
1404 }
1405
1406
1407 /**
1408 * Get the hash value suitable for a file name.
1409 */
hashpath(const void * buf,size_t size,char * obuf)1410 inline uint32_t hashpath(const void* buf, size_t size, char* obuf) {
1411 _assert_(buf && size <= MEMMAXSIZ && obuf);
1412 const unsigned char* rp = (const unsigned char*)buf;
1413 uint32_t rv;
1414 char* wp = obuf;
1415 if (size <= 10) {
1416 if (size > 0) {
1417 const unsigned char* ep = rp + size;
1418 while (rp < ep) {
1419 int32_t num = *rp >> 4;
1420 if (num < 10) {
1421 *(wp++) = '0' + num;
1422 } else {
1423 *(wp++) = 'a' + num - 10;
1424 }
1425 num = *rp & 0x0f;
1426 if (num < 10) {
1427 *(wp++) = '0' + num;
1428 } else {
1429 *(wp++) = 'a' + num - 10;
1430 }
1431 rp++;
1432 }
1433 } else {
1434 *(wp++) = '0';
1435 }
1436 uint64_t hash = hashmurmur(buf, size);
1437 rv = (((hash & 0xffff000000000000ULL) >> 48) | ((hash & 0x0000ffff00000000ULL) >> 16)) ^
1438 (((hash & 0x000000000000ffffULL) << 16) | ((hash & 0x00000000ffff0000ULL) >> 16));
1439 } else {
1440 *(wp++) = 'f' + 1 + (size & 0x0f);
1441 for (int32_t i = 0; i <= 6; i += 3) {
1442 uint32_t num = (rp[i] ^ rp[i+1] ^ rp[i+2] ^
1443 rp[size-i-1] ^ rp[size-i-2] ^ rp[size-i-3]) % 36;
1444 if (num < 10) {
1445 *(wp++) = '0' + num;
1446 } else {
1447 *(wp++) = 'a' + num - 10;
1448 }
1449 }
1450 uint64_t hash = hashmurmur(buf, size);
1451 rv = (((hash & 0xffff000000000000ULL) >> 48) | ((hash & 0x0000ffff00000000ULL) >> 16)) ^
1452 (((hash & 0x000000000000ffffULL) << 16) | ((hash & 0x00000000ffff0000ULL) >> 16));
1453 uint64_t inc = hashfnv(buf, size);
1454 inc = (((inc & 0xffff000000000000ULL) >> 48) | ((inc & 0x0000ffff00000000ULL) >> 16)) ^
1455 (((inc & 0x000000000000ffffULL) << 16) | ((inc & 0x00000000ffff0000ULL) >> 16));
1456 for (size_t i = 0; i < sizeof(hash); i++) {
1457 uint32_t least = hash >> ((sizeof(hash) - 1) * 8);
1458 uint64_t num = least >> 4;
1459 if (inc & 0x01) num += 0x10;
1460 inc = inc >> 1;
1461 if (num < 10) {
1462 *(wp++) = '0' + num;
1463 } else {
1464 *(wp++) = 'a' + num - 10;
1465 }
1466 num = least & 0x0f;
1467 if (inc & 0x01) num += 0x10;
1468 inc = inc >> 1;
1469 if (num < 10) {
1470 *(wp++) = '0' + num;
1471 } else {
1472 *(wp++) = 'a' + num - 10;
1473 }
1474 hash = hash << 8;
1475 }
1476 }
1477 *wp = '\0';
1478 return rv;
1479 }
1480
1481
1482 /**
1483 * Get a prime number nearby a number.
1484 */
nearbyprime(uint64_t num)1485 inline uint64_t nearbyprime(uint64_t num) {
1486 _assert_(true);
1487 static uint64_t table[] = {
1488 2ULL, 3ULL, 5ULL, 7ULL, 11ULL, 13ULL, 17ULL, 19ULL, 23ULL, 29ULL, 31ULL, 37ULL, 41ULL,
1489 43ULL, 47ULL, 53ULL, 59ULL, 61ULL, 67ULL, 71ULL, 79ULL, 97ULL, 107ULL, 131ULL, 157ULL,
1490 181ULL, 223ULL, 257ULL, 307ULL, 367ULL, 431ULL, 521ULL, 613ULL, 727ULL, 863ULL, 1031ULL,
1491 1217ULL, 1451ULL, 1723ULL, 2053ULL, 2437ULL, 2897ULL, 3449ULL, 4099ULL, 4871ULL, 5801ULL,
1492 6899ULL, 8209ULL, 9743ULL, 11587ULL, 13781ULL, 16411ULL, 19483ULL, 23173ULL, 27581ULL,
1493 32771ULL, 38971ULL, 46349ULL, 55109ULL, 65537ULL, 77951ULL, 92681ULL, 110221ULL, 131101ULL,
1494 155887ULL, 185363ULL, 220447ULL, 262147ULL, 311743ULL, 370759ULL, 440893ULL, 524309ULL,
1495 623521ULL, 741457ULL, 881743ULL, 1048583ULL, 1246997ULL, 1482919ULL, 1763491ULL,
1496 2097169ULL, 2493949ULL, 2965847ULL, 3526987ULL, 4194319ULL, 4987901ULL, 5931641ULL,
1497 7053971ULL, 8388617ULL, 9975803ULL, 11863289ULL, 14107921ULL, 16777259ULL, 19951597ULL,
1498 23726569ULL, 28215809ULL, 33554467ULL, 39903197ULL, 47453149ULL, 56431657ULL,
1499 67108879ULL, 79806341ULL, 94906297ULL, 112863217ULL, 134217757ULL, 159612679ULL,
1500 189812533ULL, 225726419ULL, 268435459ULL, 319225391ULL, 379625083ULL, 451452839ULL,
1501 536870923ULL, 638450719ULL, 759250133ULL, 902905657ULL, 1073741827ULL, 1276901429ULL,
1502 1518500279ULL, 1805811341ULL, 2147483659ULL, 2553802871ULL, 3037000507ULL, 3611622607ULL,
1503 4294967311ULL, 5107605691ULL, 6074001001ULL, 7223245229ULL, 8589934609ULL, 10215211387ULL,
1504 12148002047ULL, 14446490449ULL, 17179869209ULL, 20430422699ULL, 24296004011ULL,
1505 28892980877ULL, 34359738421ULL, 40860845437ULL, 48592008053ULL, 57785961671ULL,
1506 68719476767ULL, 81721690807ULL, 97184016049ULL, 115571923303ULL, 137438953481ULL,
1507 163443381347ULL, 194368032011ULL, 231143846587ULL, 274877906951ULL, 326886762733ULL,
1508 388736063999ULL, 462287693167ULL, 549755813911ULL, 653773525393ULL, 777472128049ULL,
1509 924575386373ULL, 1099511627791ULL, 1307547050819ULL, 1554944255989ULL, 1849150772699ULL,
1510 2199023255579ULL, 2615094101561ULL, 3109888512037ULL, 3698301545321ULL,
1511 4398046511119ULL, 5230188203153ULL, 6219777023959ULL, 7396603090651ULL,
1512 8796093022237ULL, 10460376406273ULL, 12439554047911ULL, 14793206181251ULL,
1513 17592186044423ULL, 20920752812471ULL, 24879108095833ULL, 29586412362491ULL,
1514 35184372088891ULL, 41841505624973ULL, 49758216191633ULL, 59172824724919ULL,
1515 70368744177679ULL, 83683011249917ULL, 99516432383281ULL, 118345649449813ULL,
1516 140737488355333ULL, 167366022499847ULL, 199032864766447ULL, 236691298899683ULL,
1517 281474976710677ULL, 334732044999557ULL, 398065729532981ULL, 473382597799229ULL,
1518 562949953421381ULL, 669464089999087ULL, 796131459065743ULL, 946765195598473ULL,
1519 1125899906842679ULL, 1338928179998197ULL, 1592262918131449ULL, 1893530391196921ULL,
1520 2251799813685269ULL, 2677856359996339ULL, 3184525836262943ULL, 3787060782393821ULL,
1521 4503599627370517ULL, 5355712719992603ULL, 6369051672525833ULL, 7574121564787633ULL
1522 };
1523 static const size_t tnum = sizeof(table) / sizeof(table[0]);
1524 uint64_t* ub = std::lower_bound(table, table + tnum, num);
1525 return ub == (uint64_t*)table + tnum ? num : *ub;
1526 }
1527
1528
1529 /**
1530 * Get the quiet Not-a-Number value.
1531 */
nan()1532 inline double nan() {
1533 _assert_(true);
1534 return std::numeric_limits<double>::quiet_NaN();
1535 }
1536
1537
1538 /**
1539 * Get the positive infinity value.
1540 */
inf()1541 inline double inf() {
1542 _assert_(true);
1543 return std::numeric_limits<double>::infinity();
1544 }
1545
1546
1547 /**
1548 * Check a number is a Not-a-Number value.
1549 */
chknan(double num)1550 inline bool chknan(double num) {
1551 _assert_(true);
1552 return num != num;
1553 }
1554
1555
1556 /**
1557 * Check a number is an infinity value.
1558 */
chkinf(double num)1559 inline bool chkinf(double num) {
1560 _assert_(true);
1561 return num == inf() || num == -inf();
1562 }
1563
1564
1565 /**
1566 * Append a formatted string at the end of a string.
1567 */
vstrprintf(std::string * dest,const char * format,va_list ap)1568 inline void vstrprintf(std::string* dest, const char* format, va_list ap) {
1569 _assert_(dest && format);
1570 while (*format != '\0') {
1571 if (*format == '%') {
1572 char cbuf[NUMBUFSIZ];
1573 cbuf[0] = '%';
1574 size_t cbsiz = 1;
1575 int32_t lnum = 0;
1576 format++;
1577 while (std::strchr("0123456789 .+-hlLz", *format) && *format != '\0' &&
1578 cbsiz < NUMBUFSIZ - 1) {
1579 if (*format == 'l' || *format == 'L') lnum++;
1580 cbuf[cbsiz++] = *(format++);
1581 }
1582 cbuf[cbsiz++] = *format;
1583 cbuf[cbsiz] = '\0';
1584 switch (*format) {
1585 case 's': {
1586 const char* tmp = va_arg(ap, const char*);
1587 if (tmp) {
1588 dest->append(tmp);
1589 } else {
1590 dest->append("(null)");
1591 }
1592 break;
1593 }
1594 case 'd': {
1595 char tbuf[NUMBUFSIZ*4];
1596 size_t tsiz;
1597 if (lnum >= 2) {
1598 tsiz = std::sprintf(tbuf, cbuf, va_arg(ap, long long));
1599 } else if (lnum >= 1) {
1600 tsiz = std::sprintf(tbuf, cbuf, va_arg(ap, long));
1601 } else {
1602 tsiz = std::sprintf(tbuf, cbuf, va_arg(ap, int));
1603 }
1604 dest->append(tbuf, tsiz);
1605 break;
1606 }
1607 case 'o': case 'u': case 'x': case 'X': case 'c': {
1608 char tbuf[NUMBUFSIZ*4];
1609 size_t tsiz;
1610 if (lnum >= 2) {
1611 tsiz = std::sprintf(tbuf, cbuf, va_arg(ap, unsigned long long));
1612 } else if (lnum >= 1) {
1613 tsiz = std::sprintf(tbuf, cbuf, va_arg(ap, unsigned long));
1614 } else {
1615 tsiz = std::sprintf(tbuf, cbuf, va_arg(ap, unsigned int));
1616 }
1617 dest->append(tbuf, tsiz);
1618 break;
1619 }
1620 case 'e': case 'E': case 'f': case 'g': case 'G': {
1621 char tbuf[NUMBUFSIZ*4];
1622 size_t tsiz;
1623 if (lnum >= 1) {
1624 tsiz = std::snprintf(tbuf, sizeof(tbuf), cbuf, va_arg(ap, long double));
1625 } else {
1626 tsiz = std::snprintf(tbuf, sizeof(tbuf), cbuf, va_arg(ap, double));
1627 }
1628 if (tsiz > sizeof(tbuf)) {
1629 tbuf[sizeof(tbuf)-1] = '*';
1630 tsiz = sizeof(tbuf);
1631 }
1632 dest->append(tbuf, tsiz);
1633 break;
1634 }
1635 case 'p': {
1636 char tbuf[NUMBUFSIZ*4];
1637 size_t tsiz = std::sprintf(tbuf, "%p", va_arg(ap, void*));
1638 dest->append(tbuf, tsiz);
1639 break;
1640 }
1641 case '%': {
1642 dest->append("%", 1);
1643 break;
1644 }
1645 }
1646 } else {
1647 dest->append(format, 1);
1648 }
1649 format++;
1650 }
1651 }
1652
1653
1654 /**
1655 * Append a formatted string at the end of a string.
1656 */
strprintf(std::string * dest,const char * format,...)1657 inline void strprintf(std::string* dest, const char* format, ...) {
1658 _assert_(dest && format);
1659 va_list ap;
1660 va_start(ap, format);
1661 vstrprintf(dest, format, ap);
1662 va_end(ap);
1663 }
1664
1665
1666 /**
1667 * Generate a formatted string on memory.
1668 */
strprintf(const char * format,...)1669 inline std::string strprintf(const char* format, ...) {
1670 _assert_(format);
1671 std::string str;
1672 va_list ap;
1673 va_start(ap, format);
1674 vstrprintf(&str, format, ap);
1675 va_end(ap);
1676 return str;
1677 }
1678
1679
1680 /**
1681 * Split a string with a delimiter
1682 */
strsplit(const std::string & str,char delim,std::vector<std::string> * elems)1683 inline size_t strsplit(const std::string& str, char delim, std::vector<std::string>* elems) {
1684 _assert_(elems);
1685 elems->clear();
1686 std::string::const_iterator it = str.begin();
1687 std::string::const_iterator pv = it;
1688 while (it != str.end()) {
1689 if (*it == delim) {
1690 std::string col(pv, it);
1691 elems->push_back(col);
1692 pv = it + 1;
1693 }
1694 ++it;
1695 }
1696 std::string col(pv, it);
1697 elems->push_back(col);
1698 return elems->size();
1699 }
1700
1701
1702 /**
1703 * Split a string with delimiters.
1704 */
strsplit(const std::string & str,const std::string & delims,std::vector<std::string> * elems)1705 inline size_t strsplit(const std::string& str, const std::string& delims,
1706 std::vector<std::string>* elems) {
1707 _assert_(elems);
1708 elems->clear();
1709 std::string::const_iterator it = str.begin();
1710 std::string::const_iterator pv = it;
1711 while (it != str.end()) {
1712 while (delims.find(*it, 0) != std::string::npos) {
1713 std::string col(pv, it);
1714 elems->push_back(col);
1715 pv = it + 1;
1716 break;
1717 }
1718 ++it;
1719 }
1720 std::string col(pv, it);
1721 elems->push_back(col);
1722 return elems->size();
1723 }
1724
1725
1726 /**
1727 * Convert the letters of a string into upper case.
1728 */
strtoupper(std::string * str)1729 inline std::string* strtoupper(std::string* str) {
1730 _assert_(str);
1731 size_t size = str->size();
1732 for (size_t i = 0; i < size; i++) {
1733 int32_t c = (unsigned char)(*str)[i];
1734 if (c >= 'a' && c <= 'z') (*str)[i] = c - ('a' - 'A');
1735 }
1736 return str;
1737 }
1738
1739
1740 /**
1741 * Convert the letters of a string into lower case.
1742 */
strtolower(std::string * str)1743 inline std::string* strtolower(std::string* str) {
1744 _assert_(str);
1745 size_t size = str->size();
1746 for (size_t i = 0; i < size; i++) {
1747 int32_t c = (unsigned char)(*str)[i];
1748 if (c >= 'A' && c <= 'Z') (*str)[i] = c + ('a' - 'A');
1749 }
1750 return str;
1751 }
1752
1753
1754 /**
1755 * Check whether a string begins with a key.
1756 */
strfwm(const std::string & str,const std::string & key)1757 inline bool strfwm(const std::string& str, const std::string& key) {
1758 _assert_(true);
1759 size_t ksiz = key.size();
1760 if (ksiz > str.size()) return false;
1761 return !std::memcmp(str.data(), key.data(), ksiz);
1762 }
1763
1764
1765 /**
1766 * Check whether a string ends with a key.
1767 */
strbwm(const std::string & str,const std::string & key)1768 inline bool strbwm(const std::string& str, const std::string& key) {
1769 _assert_(true);
1770 size_t ksiz = key.size();
1771 if (ksiz > str.size()) return false;
1772 return !std::memcmp(str.data() + str.size() - ksiz, key.data(), ksiz);
1773 }
1774
1775
1776 /**
1777 * Cut space characters at head or tail of a string.
1778 */
strtrim(std::string * str)1779 inline std::string* strtrim(std::string* str) {
1780 _assert_(str);
1781 size_t size = str->size();
1782 size_t wi = 0;
1783 size_t li = 0;
1784 for (size_t i = 0; i < size; i++) {
1785 int32_t c = (unsigned char)(*str)[i];
1786 if (c >= '\0' && c <= ' ') {
1787 if (wi > 0) (*str)[wi++] = c;
1788 } else {
1789 (*str)[wi++] = c;
1790 li = wi;
1791 }
1792 }
1793 str->resize(li);
1794 return str;
1795 }
1796
1797
1798 /**
1799 * Convert a UTF-8 string into a UCS-4 array.
1800 */
strutftoucs(const std::string & src,std::vector<uint32_t> * dest)1801 inline void strutftoucs(const std::string& src, std::vector<uint32_t>* dest) {
1802 _assert_(dest);
1803 dest->reserve(dest->size() + src.size());
1804 size_t size = src.size();
1805 size_t ri = 0;
1806 while (ri < size) {
1807 uint32_t c = (unsigned char)src[ri];
1808 if (c < 0x80) {
1809 dest->push_back(c);
1810 } else if (c < 0xe0) {
1811 if (c >= 0xc0 && ri + 1 < size) {
1812 c = ((c & 0x1f) << 6) | (src[ri+1] & 0x3f);
1813 if (c >= 0x80) dest->push_back(c);
1814 ri++;
1815 }
1816 } else if (c < 0xf0) {
1817 if (ri + 2 < size) {
1818 c = ((c & 0x0f) << 12) | ((src[ri+1] & 0x3f) << 6) | (src[ri+2] & 0x3f);
1819 if (c >= 0x800) dest->push_back(c);
1820 ri += 2;
1821 }
1822 } else if (c < 0xf8) {
1823 if (ri + 3 < size) {
1824 c = ((c & 0x07) << 18) | ((src[ri+1] & 0x3f) << 12) | ((src[ri+2] & 0x3f) << 6) |
1825 (src[ri+3] & 0x3f);
1826 if (c >= 0x10000) dest->push_back(c);
1827 ri += 3;
1828 }
1829 } else if (c < 0xfc) {
1830 if (ri + 4 < size) {
1831 c = ((c & 0x03) << 24) | ((src[ri+1] & 0x3f) << 18) | ((src[ri+2] & 0x3f) << 12) |
1832 ((src[ri+3] & 0x3f) << 6) | (src[ri+4] & 0x3f);
1833 if (c >= 0x200000) dest->push_back(c);
1834 ri += 4;
1835 }
1836 } else if (c < 0xfe) {
1837 if (ri + 5 < size) {
1838 c = ((c & 0x01) << 30) | ((src[ri+1] & 0x3f) << 24) | ((src[ri+2] & 0x3f) << 18) |
1839 ((src[ri+3] & 0x3f) << 12) | ((src[ri+4] & 0x3f) << 6) | (src[ri+5] & 0x3f);
1840 if (c >= 0x4000000) dest->push_back(c);
1841 ri += 5;
1842 }
1843 }
1844 ri++;
1845 }
1846 }
1847
1848
1849 /**
1850 * Convert a UCS-4 array into a UTF-8 string.
1851 */
strucstoutf(const std::vector<uint32_t> & src,std::string * dest)1852 inline void strucstoutf(const std::vector<uint32_t>& src, std::string* dest) {
1853 _assert_(dest);
1854 dest->reserve(dest->size() + src.size() * 3);
1855 std::vector<uint32_t>::const_iterator it = src.begin();
1856 std::vector<uint32_t>::const_iterator itend = src.end();
1857 while (it != itend) {
1858 uint32_t c = *it;
1859 if (c < 0x80) {
1860 dest->append(1, c);
1861 } else if (c < 0x800) {
1862 dest->append(1, 0xc0 | (c >> 6));
1863 dest->append(1, 0x80 | (c & 0x3f));
1864 } else if (c < 0x10000) {
1865 dest->append(1, 0xe0 | (c >> 12));
1866 dest->append(1, 0x80 | ((c & 0xfff) >> 6));
1867 dest->append(1, 0x80 | (c & 0x3f));
1868 } else if (c < 0x200000) {
1869 dest->append(1, 0xf0 | (c >> 18));
1870 dest->append(1, 0x80 | ((c & 0x3ffff) >> 12));
1871 dest->append(1, 0x80 | ((c & 0xfff) >> 6));
1872 dest->append(1, 0x80 | (c & 0x3f));
1873 } else if (c < 0x4000000) {
1874 dest->append(1, 0xf8 | (c >> 24));
1875 dest->append(1, 0x80 | ((c & 0xffffff) >> 18));
1876 dest->append(1, 0x80 | ((c & 0x3ffff) >> 12));
1877 dest->append(1, 0x80 | ((c & 0xfff) >> 6));
1878 dest->append(1, 0x80 | (c & 0x3f));
1879 } else if (c < 0x80000000) {
1880 dest->append(1, 0xfc | (c >> 30));
1881 dest->append(1, 0x80 | ((c & 0x3fffffff) >> 24));
1882 dest->append(1, 0x80 | ((c & 0xffffff) >> 18));
1883 dest->append(1, 0x80 | ((c & 0x3ffff) >> 12));
1884 dest->append(1, 0x80 | ((c & 0xfff) >> 6));
1885 dest->append(1, 0x80 | (c & 0x3f));
1886 }
1887 ++it;
1888 }
1889 }
1890
1891
1892 /**
1893 * Serialize a string vector object into a string object.
1894 */
strvecdump(const std::vector<std::string> & src,std::string * dest)1895 inline void strvecdump(const std::vector<std::string>& src, std::string* dest) {
1896 _assert_(dest);
1897 std::vector<std::string>::const_iterator it = src.begin();
1898 std::vector<std::string>::const_iterator itend = src.end();
1899 size_t dsiz = 1;
1900 while (it != itend) {
1901 dsiz += 2 + it->size();
1902 ++it;
1903 }
1904 dest->reserve(dest->size() + dsiz);
1905 it = src.begin();
1906 while (it != itend) {
1907 char nbuf[NUMBUFSIZ];
1908 size_t nsiz = writevarnum(nbuf, it->size());
1909 dest->append(nbuf, nsiz);
1910 dest->append(it->data(), it->size());
1911 ++it;
1912 }
1913 }
1914
1915
1916 /**
1917 * Deserialize a string object into a string vector object.
1918 */
strvecload(const std::string & src,std::vector<std::string> * dest)1919 inline void strvecload(const std::string& src, std::vector<std::string>* dest) {
1920 _assert_(dest);
1921 const char* rp = src.data();
1922 size_t size = src.size();
1923 while (size > 0) {
1924 uint64_t vsiz;
1925 size_t step = readvarnum(rp, size, &vsiz);
1926 rp += step;
1927 size -= step;
1928 if (vsiz > size) break;
1929 dest->push_back(std::string(rp, vsiz));
1930 rp += vsiz;
1931 size -= vsiz;
1932 }
1933 }
1934
1935
1936 /**
1937 * Serialize a string vector object into a string object.
1938 */
strmapdump(const std::map<std::string,std::string> & src,std::string * dest)1939 inline void strmapdump(const std::map<std::string, std::string>& src, std::string* dest) {
1940 _assert_(dest);
1941 std::map<std::string, std::string>::const_iterator it = src.begin();
1942 std::map<std::string, std::string>::const_iterator itend = src.end();
1943 size_t dsiz = 1;
1944 while (it != itend) {
1945 dsiz += 4 + it->first.size() + it->second.size();
1946 ++it;
1947 }
1948 dest->reserve(dest->size() + dsiz);
1949 it = src.begin();
1950 while (it != itend) {
1951 char nbuf[NUMBUFSIZ*2];
1952 size_t nsiz = writevarnum(nbuf, it->first.size());
1953 nsiz += writevarnum(nbuf + nsiz, it->second.size());
1954 dest->append(nbuf, nsiz);
1955 dest->append(it->first.data(), it->first.size());
1956 dest->append(it->second.data(), it->second.size());
1957 ++it;
1958 }
1959 }
1960
1961
1962 /**
1963 * Deserialize a string object into a string map object.
1964 */
strmapload(const std::string & src,std::map<std::string,std::string> * dest)1965 inline void strmapload(const std::string& src, std::map<std::string, std::string>* dest) {
1966 _assert_(dest);
1967 const char* rp = src.data();
1968 int64_t size = src.size();
1969 while (size > 1) {
1970 uint64_t ksiz;
1971 size_t step = readvarnum(rp, size, &ksiz);
1972 rp += step;
1973 size -= step;
1974 if (size < 1) break;
1975 uint64_t vsiz;
1976 step = readvarnum(rp, size, &vsiz);
1977 rp += step;
1978 size -= step;
1979 int64_t rsiz = ksiz + vsiz;
1980 if (rsiz > size) break;
1981 (*dest)[std::string(rp, ksiz)] = std::string(rp + ksiz, vsiz);
1982 rp += rsiz;
1983 size -= rsiz;
1984 }
1985 }
1986
1987
1988 /**
1989 * Encode a serial object by hexadecimal encoding.
1990 */
hexencode(const void * buf,size_t size)1991 inline char* hexencode(const void* buf, size_t size) {
1992 _assert_(buf && size <= MEMMAXSIZ);
1993 const unsigned char* rp = (const unsigned char*)buf;
1994 char* zbuf = new char[size*2+1];
1995 char* wp = zbuf;
1996 for (const unsigned char* ep = rp + size; rp < ep; rp++) {
1997 int32_t num = *rp >> 4;
1998 if (num < 10) {
1999 *(wp++) = '0' + num;
2000 } else {
2001 *(wp++) = 'a' + num - 10;
2002 }
2003 num = *rp & 0x0f;
2004 if (num < 10) {
2005 *(wp++) = '0' + num;
2006 } else {
2007 *(wp++) = 'a' + num - 10;
2008 }
2009 }
2010 *wp = '\0';
2011 return zbuf;
2012 }
2013
2014
2015 /**
2016 * Decode a string encoded by hexadecimal encoding.
2017 */
hexdecode(const char * str,size_t * sp)2018 inline char* hexdecode(const char* str, size_t* sp) {
2019 _assert_(str && sp);
2020 char* zbuf = new char[std::strlen(str)+1];
2021 char* wp = zbuf;
2022 while (true) {
2023 while (*str > '\0' && *str <= ' ') {
2024 str++;
2025 }
2026 int32_t num = 0;
2027 int32_t c = *(str++);
2028 if (c >= '0' && c <= '9') {
2029 num = c - '0';
2030 } else if (c >= 'a' && c <= 'f') {
2031 num = c - 'a' + 10;
2032 } else if (c >= 'A' && c <= 'F') {
2033 num = c - 'A' + 10;
2034 } else if (c == '\0') {
2035 break;
2036 }
2037 c = *(str++);
2038 if (c >= '0' && c <= '9') {
2039 num = num * 0x10 + c - '0';
2040 } else if (c >= 'a' && c <= 'f') {
2041 num = num * 0x10 + c - 'a' + 10;
2042 } else if (c >= 'A' && c <= 'F') {
2043 num = num * 0x10 + c - 'A' + 10;
2044 } else if (c == '\0') {
2045 *(wp++) = num;
2046 break;
2047 }
2048 *(wp++) = num;
2049 }
2050 *wp = '\0';
2051 *sp = wp - zbuf;
2052 return zbuf;
2053 }
2054
2055
2056 /**
2057 * Encode a serial object by URL encoding.
2058 */
urlencode(const void * buf,size_t size)2059 inline char* urlencode(const void* buf, size_t size) {
2060 _assert_(buf && size <= MEMMAXSIZ);
2061 const unsigned char* rp = (const unsigned char*)buf;
2062 char* zbuf = new char[size*3+1];
2063 char* wp = zbuf;
2064 for (const unsigned char* ep = rp + size; rp < ep; rp++) {
2065 int32_t c = *rp;
2066 if ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') ||
2067 (c >= '0' && c <= '9') || (c != '\0' && std::strchr("_-.~", c))) {
2068 *(wp++) = c;
2069 } else {
2070 *(wp++) = '%';
2071 int32_t num = c >> 4;
2072 if (num < 10) {
2073 *(wp++) = '0' + num;
2074 } else {
2075 *(wp++) = 'a' + num - 10;
2076 }
2077 num = c & 0x0f;
2078 if (num < 10) {
2079 *(wp++) = '0' + num;
2080 } else {
2081 *(wp++) = 'a' + num - 10;
2082 }
2083 }
2084 }
2085 *wp = '\0';
2086 return zbuf;
2087 }
2088
2089
2090 /**
2091 * Decode a string encoded by URL encoding.
2092 */
urldecode(const char * str,size_t * sp)2093 inline char* urldecode(const char* str, size_t* sp) {
2094 _assert_(str && sp);
2095 size_t zsiz = std::strlen(str);
2096 char* zbuf = new char[zsiz+1];
2097 char* wp = zbuf;
2098 const char* ep = str + zsiz;
2099 while (str < ep) {
2100 int32_t c = *str;
2101 if (c == '%') {
2102 int32_t num = 0;
2103 if (++str >= ep) break;
2104 c = *str;
2105 if (c >= '0' && c <= '9') {
2106 num = c - '0';
2107 } else if (c >= 'a' && c <= 'f') {
2108 num = c - 'a' + 10;
2109 } else if (c >= 'A' && c <= 'F') {
2110 num = c - 'A' + 10;
2111 }
2112 if (++str >= ep) break;
2113 c = *str;
2114 if (c >= '0' && c <= '9') {
2115 num = num * 0x10 + c - '0';
2116 } else if (c >= 'a' && c <= 'f') {
2117 num = num * 0x10 + c - 'a' + 10;
2118 } else if (c >= 'A' && c <= 'F') {
2119 num = num * 0x10 + c - 'A' + 10;
2120 }
2121 *(wp++) = num;
2122 str++;
2123 } else if (c == '+') {
2124 *(wp++) = ' ';
2125 str++;
2126 } else if (c <= ' ' || c == 0x7f) {
2127 str++;
2128 } else {
2129 *(wp++) = c;
2130 str++;
2131 }
2132 }
2133 *wp = '\0';
2134 *sp = wp - zbuf;
2135 return zbuf;
2136 }
2137
2138
2139 /**
2140 * Encode a serial object by Quoted-printable encoding.
2141 */
quoteencode(const void * buf,size_t size)2142 inline char* quoteencode(const void* buf, size_t size) {
2143 _assert_(buf && size <= MEMMAXSIZ);
2144 const unsigned char* rp = (const unsigned char*)buf;
2145 char* zbuf = new char[size*3+1];
2146 char* wp = zbuf;
2147 for (const unsigned char* ep = rp + size; rp < ep; rp++) {
2148 int32_t c = *rp;
2149 if (c == '=' || c < ' ' || c > 0x7e) {
2150 *(wp++) = '=';
2151 int32_t num = c >> 4;
2152 if (num < 10) {
2153 *(wp++) = '0' + num;
2154 } else {
2155 *(wp++) = 'A' + num - 10;
2156 }
2157 num = c & 0x0f;
2158 if (num < 10) {
2159 *(wp++) = '0' + num;
2160 } else {
2161 *(wp++) = 'A' + num - 10;
2162 }
2163 } else {
2164 *(wp++) = c;
2165 }
2166 }
2167 *wp = '\0';
2168 return zbuf;
2169 }
2170
2171
2172 /**
2173 * Decode a string encoded by Quoted-printable encoding.
2174 */
quotedecode(const char * str,size_t * sp)2175 inline char* quotedecode(const char* str, size_t* sp) {
2176 _assert_(str && sp);
2177 size_t zsiz = std::strlen(str);
2178 char* zbuf = new char[zsiz+1];
2179 char* wp = zbuf;
2180 const char* ep = str + zsiz;
2181 while (str < ep) {
2182 int32_t c = *str;
2183 if (c == '=') {
2184 int32_t num = 0;
2185 if (++str >= ep) break;
2186 c = *str;
2187 if (c == '\r') {
2188 if (++str >= ep) break;
2189 if (*str == '\n') str++;
2190 } else if (c == '\n') {
2191 str++;
2192 } else {
2193 if (c >= '0' && c <= '9') {
2194 num = c - '0';
2195 } else if (c >= 'a' && c <= 'f') {
2196 num = c - 'a' + 10;
2197 } else if (c >= 'A' && c <= 'F') {
2198 num = c - 'A' + 10;
2199 }
2200 if (++str >= ep) break;
2201 c = *str;
2202 if (c >= '0' && c <= '9') {
2203 num = num * 0x10 + c - '0';
2204 } else if (c >= 'a' && c <= 'f') {
2205 num = num * 0x10 + c - 'a' + 10;
2206 } else if (c >= 'A' && c <= 'F') {
2207 num = num * 0x10 + c - 'A' + 10;
2208 }
2209 *(wp++) = num;
2210 str++;
2211 }
2212 } else if (c < ' ' || c == 0x7f) {
2213 str++;
2214 } else {
2215 *(wp++) = c;
2216 str++;
2217 }
2218 }
2219 *wp = '\0';
2220 *sp = wp - zbuf;
2221 return zbuf;
2222 }
2223
2224
2225 /**
2226 * Encode a serial object by Base64 encoding.
2227 */
baseencode(const void * buf,size_t size)2228 inline char* baseencode(const void* buf, size_t size) {
2229 _assert_(buf && size <= MEMMAXSIZ);
2230 const char* tbl = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
2231 const unsigned char* rp = (const unsigned char*)buf;
2232 char* zbuf = new char[size*4/3+5];
2233 char* wp = zbuf;
2234 for (size_t i = 0; i < size; i += 3) {
2235 switch (size - i) {
2236 case 1: {
2237 *(wp++) = tbl[rp[0] >> 2];
2238 *(wp++) = tbl[(rp[0] & 3) << 4];
2239 *(wp++) = '=';
2240 *(wp++) = '=';
2241 break;
2242 }
2243 case 2: {
2244 *(wp++) = tbl[rp[0] >> 2];
2245 *(wp++) = tbl[((rp[0] & 3) << 4) + (rp[1] >> 4)];
2246 *(wp++) = tbl[(rp[1] & 0xf) << 2];
2247 *(wp++) = '=';
2248 break;
2249 }
2250 default: {
2251 *(wp++) = tbl[rp[0] >> 2];
2252 *(wp++) = tbl[((rp[0] & 3) << 4) + (rp[1] >> 4)];
2253 *(wp++) = tbl[((rp[1] & 0xf) << 2) + (rp[2] >> 6)];
2254 *(wp++) = tbl[rp[2] & 0x3f];
2255 break;
2256 }
2257 }
2258 rp += 3;
2259 }
2260 *wp = '\0';
2261 return zbuf;
2262 }
2263
2264
2265 /**
2266 * Decode a string encoded by Base64 encoding.
2267 */
basedecode(const char * str,size_t * sp)2268 inline char* basedecode(const char* str, size_t* sp) {
2269 _assert_(str && sp);
2270 size_t bpos = 0;
2271 size_t eqcnt = 0;
2272 size_t len = std::strlen(str);
2273 unsigned char* zbuf = new unsigned char[len+4];
2274 unsigned char* wp = zbuf;
2275 size_t zsiz = 0;
2276 while (bpos < len && eqcnt == 0) {
2277 size_t bits = 0;
2278 size_t i;
2279 for (i = 0; bpos < len && i < 4; bpos++) {
2280 if (str[bpos] >= 'A' && str[bpos] <= 'Z') {
2281 bits = (bits << 6) | (str[bpos] - 'A');
2282 i++;
2283 } else if (str[bpos] >= 'a' && str[bpos] <= 'z') {
2284 bits = (bits << 6) | (str[bpos] - 'a' + 26);
2285 i++;
2286 } else if (str[bpos] >= '0' && str[bpos] <= '9') {
2287 bits = (bits << 6) | (str[bpos] - '0' + 52);
2288 i++;
2289 } else if (str[bpos] == '+') {
2290 bits = (bits << 6) | 62;
2291 i++;
2292 } else if (str[bpos] == '/') {
2293 bits = (bits << 6) | 63;
2294 i++;
2295 } else if (str[bpos] == '=') {
2296 bits <<= 6;
2297 i++;
2298 eqcnt++;
2299 }
2300 }
2301 if (i == 0 && bpos >= len) continue;
2302 switch (eqcnt) {
2303 case 0: {
2304 *wp++ = (bits >> 16) & 0xff;
2305 *wp++ = (bits >> 8) & 0xff;
2306 *wp++ = bits & 0xff;
2307 zsiz += 3;
2308 break;
2309 }
2310 case 1: {
2311 *wp++ = (bits >> 16) & 0xff;
2312 *wp++ = (bits >> 8) & 0xff;
2313 zsiz += 2;
2314 break;
2315 }
2316 case 2: {
2317 *wp++ = (bits >> 16) & 0xff;
2318 zsiz += 1;
2319 break;
2320 }
2321 }
2322 }
2323 zbuf[zsiz] = '\0';
2324 *sp = zsiz;
2325 return (char*)zbuf;
2326 }
2327
2328
2329 /**
2330 * Cipher or decipher a serial object with the Arcfour stream cipher.
2331 */
arccipher(const void * ptr,size_t size,const void * kbuf,size_t ksiz,void * obuf)2332 inline void arccipher(const void* ptr, size_t size, const void* kbuf, size_t ksiz, void* obuf) {
2333 _assert_(ptr && size <= MEMMAXSIZ && kbuf && ksiz <= MEMMAXSIZ && obuf);
2334 if (ksiz < 1) {
2335 kbuf = "";
2336 ksiz = 1;
2337 }
2338 uint32_t sbox[0x100], kbox[0x100];
2339 for (int32_t i = 0; i < 0x100; i++) {
2340 sbox[i] = i;
2341 kbox[i] = ((uint8_t*)kbuf)[i%ksiz];
2342 }
2343 uint32_t sidx = 0;
2344 for (int32_t i = 0; i < 0x100; i++) {
2345 sidx = (sidx + sbox[i] + kbox[i]) & 0xff;
2346 uint32_t swap = sbox[i];
2347 sbox[i] = sbox[sidx];
2348 sbox[sidx] = swap;
2349 }
2350 uint32_t x = 0;
2351 uint32_t y = 0;
2352 for (size_t i = 0; i < size; i++) {
2353 x = (x + 1) & 0xff;
2354 y = (y + sbox[x]) & 0xff;
2355 uint32_t swap = sbox[x];
2356 sbox[x] = sbox[y];
2357 sbox[y] = swap;
2358 ((uint8_t*)obuf)[i] = ((uint8_t*)ptr)[i] ^ sbox[(sbox[x]+sbox[y])&0xff];
2359 }
2360 }
2361
2362
2363 /**
2364 * Duplicate a region on memory.
2365 */
memdup(const char * ptr,size_t size)2366 inline char* memdup(const char* ptr, size_t size) {
2367 _assert_(ptr && size <= MEMMAXSIZ);
2368 char* obuf = new char[size+1];
2369 std::memcpy(obuf, ptr, size);
2370 return obuf;
2371 }
2372
2373
2374 /**
2375 * Compare two regions by case insensitive evaluation.
2376 */
memicmp(const void * abuf,const void * bbuf,size_t size)2377 inline int32_t memicmp(const void* abuf, const void* bbuf, size_t size) {
2378 _assert_(abuf && bbuf && size <= MEMMAXSIZ);
2379 const unsigned char* ap = (unsigned char*)abuf;
2380 const unsigned char* bp = (unsigned char*)bbuf;
2381 const unsigned char* ep = ap + size;
2382 while (ap < ep) {
2383 int32_t ac = *ap;
2384 if (ac >= 'A' && ac <= 'Z') ac += 'a' - 'A';
2385 int32_t bc = *bp;
2386 if (bc >= 'A' && bc <= 'Z') bc += 'a' - 'A';
2387 if (ac != bc) return ac - bc;
2388 ap++;
2389 bp++;
2390 }
2391 return 0;
2392 }
2393
2394
2395 /**
2396 * Find the first occurrence of a sub pattern.
2397 */
memmem(const void * hbuf,size_t hsiz,const void * nbuf,size_t nsiz)2398 inline void* memmem(const void* hbuf, size_t hsiz, const void* nbuf, size_t nsiz) {
2399 _assert_(hbuf && hsiz <= MEMMAXSIZ && nbuf && nsiz <= MEMMAXSIZ);
2400 if (nsiz < 1) return (void*)hbuf;
2401 if (hsiz < nsiz) return NULL;
2402 int32_t tc = *(unsigned char*)nbuf;
2403 const unsigned char* rp = (unsigned char*)hbuf;
2404 const unsigned char* ep = (unsigned char*)hbuf + hsiz - nsiz;
2405 while (rp <= ep) {
2406 if (*rp == tc) {
2407 bool hit = true;
2408 for (size_t i = 1; i < nsiz; i++) {
2409 if (rp[i] != ((unsigned char*)nbuf)[i]) {
2410 hit = false;
2411 break;
2412 }
2413 }
2414 if (hit) return (void*)rp;
2415 }
2416 rp++;
2417 }
2418 return NULL;
2419 }
2420
2421
2422 /**
2423 * Find the first occurrence of a sub pattern by case insensitive evaluation.
2424 */
memimem(const void * hbuf,size_t hsiz,const void * nbuf,size_t nsiz)2425 inline void* memimem(const void* hbuf, size_t hsiz, const void* nbuf, size_t nsiz) {
2426 _assert_(hbuf && hsiz <= MEMMAXSIZ && nbuf && nsiz <= MEMMAXSIZ);
2427 if (nsiz < 1) return (void*)hbuf;
2428 if (hsiz < nsiz) return NULL;
2429 int32_t tc = *(unsigned char*)nbuf;
2430 if (tc >= 'A' && tc <= 'Z') tc += 'a' - 'A';
2431 const unsigned char* rp = (unsigned char*)hbuf;
2432 const unsigned char* ep = (unsigned char*)hbuf + hsiz - nsiz;
2433 while (rp <= ep) {
2434 int32_t cc = *rp;
2435 if (cc >= 'A' && cc <= 'Z') cc += 'a' - 'A';
2436 if (cc == tc) {
2437 bool hit = true;
2438 for (size_t i = 1; i < nsiz; i++) {
2439 int32_t hc = rp[i];
2440 if (hc >= 'A' && hc <= 'Z') hc += 'a' - 'A';
2441 int32_t nc = ((unsigned char*)nbuf)[i];
2442 if (nc >= 'A' && nc <= 'Z') nc += 'a' - 'A';
2443 if (hc != nc) {
2444 hit = false;
2445 break;
2446 }
2447 }
2448 if (hit) return (void*)rp;
2449 }
2450 rp++;
2451 }
2452 return NULL;
2453 }
2454
2455
2456 /**
2457 * Duplicate a string on memory.
2458 */
strdup(const char * str)2459 inline char* strdup(const char* str) {
2460 _assert_(str);
2461 size_t size = std::strlen(str);
2462 char* obuf = memdup(str, size);
2463 obuf[size] = '\0';
2464 return obuf;
2465 }
2466
2467
2468 /**
2469 * Convert the letters of a string into upper case.
2470 */
strtoupper(char * str)2471 inline char* strtoupper(char* str) {
2472 _assert_(str);
2473 char* wp = str;
2474 while (*wp != '\0') {
2475 if (*wp >= 'a' && *wp <= 'z') *wp -= 'a' - 'A';
2476 wp++;
2477 }
2478 return str;
2479 }
2480
2481
2482 /**
2483 * Convert the letters of a string into lower case.
2484 */
strtolower(char * str)2485 inline char* strtolower(char* str) {
2486 _assert_(str);
2487 char* wp = str;
2488 while (*wp != '\0') {
2489 if (*wp >= 'A' && *wp <= 'Z') *wp += 'a' - 'A';
2490 wp++;
2491 }
2492 return str;
2493 }
2494
2495
2496 /**
2497 * Cut space characters at head or tail of a string.
2498 */
strtrim(char * str)2499 inline char* strtrim(char* str) {
2500 _assert_(str);
2501 const char* rp = str;
2502 char* wp = str;
2503 bool head = true;
2504 while (*rp != '\0') {
2505 if (*rp > '\0' && *rp <= ' ') {
2506 if (!head) *(wp++) = *rp;
2507 } else {
2508 *(wp++) = *rp;
2509 head = false;
2510 }
2511 rp++;
2512 }
2513 *wp = '\0';
2514 while (wp > str && wp[-1] > '\0' && wp[-1] <= ' ') {
2515 *(--wp) = '\0';
2516 }
2517 return str;
2518 }
2519
2520
2521 /**
2522 * Squeeze space characters in a string and trim it.
2523 */
strsqzspc(char * str)2524 inline char* strsqzspc(char* str) {
2525 _assert_(str);
2526 const char* rp = str;
2527 char* wp = str;
2528 bool spc = true;
2529 while (*rp != '\0') {
2530 if (*rp > '\0' && *rp <= ' ') {
2531 if (!spc) *(wp++) = *rp;
2532 spc = true;
2533 } else {
2534 *(wp++) = *rp;
2535 spc = false;
2536 }
2537 rp++;
2538 }
2539 *wp = '\0';
2540 for (wp--; wp >= str; wp--) {
2541 if (*wp > '\0' && *wp <= ' ') {
2542 *wp = '\0';
2543 } else {
2544 break;
2545 }
2546 }
2547 return str;
2548 }
2549
2550
2551 /**
2552 * Normalize space characters in a string and trim it.
2553 */
strnrmspc(char * str)2554 inline char* strnrmspc(char* str) {
2555 _assert_(str);
2556 const char* rp = str;
2557 char* wp = str;
2558 bool spc = true;
2559 while (*rp != '\0') {
2560 if ((*rp > '\0' && *rp <= ' ') || *rp == 0x7f) {
2561 if (!spc) *(wp++) = ' ';
2562 spc = true;
2563 } else {
2564 *(wp++) = *rp;
2565 spc = false;
2566 }
2567 rp++;
2568 }
2569 *wp = '\0';
2570 for (wp--; wp >= str; wp--) {
2571 if (*wp == ' ') {
2572 *wp = '\0';
2573 } else {
2574 break;
2575 }
2576 }
2577 return str;
2578 }
2579
2580
2581
2582 /**
2583 * Compare two strings by case insensitive evaluation.
2584 */
stricmp(const char * astr,const char * bstr)2585 inline int32_t stricmp(const char* astr, const char* bstr) {
2586 _assert_(astr && bstr);
2587 while (*astr != '\0') {
2588 if (*bstr == '\0') return 1;
2589 int32_t ac = *(unsigned char*)astr;
2590 if (ac >= 'A' && ac <= 'Z') ac += 'a' - 'A';
2591 int32_t bc = *(unsigned char*)bstr;
2592 if (bc >= 'A' && bc <= 'Z') bc += 'a' - 'A';
2593 if (ac != bc) return ac - bc;
2594 astr++;
2595 bstr++;
2596 }
2597 return (*bstr == '\0') ? 0 : -1;
2598 }
2599
2600
2601 /**
2602 * Find the first occurrence of a substring by case insensitive evaluation.
2603 */
stristr(const char * hstr,const char * nstr)2604 inline char* stristr(const char* hstr, const char* nstr) {
2605 _assert_(hstr && nstr);
2606 if (*nstr == '\0') return (char*)hstr;
2607 int32_t tc = *nstr;
2608 if (tc >= 'A' && tc <= 'Z') tc += 'a' - 'A';
2609 const char* rp = hstr;
2610 while (*rp != '\0') {
2611 int32_t cc = *rp;
2612 if (cc >= 'A' && cc <= 'Z') cc += 'a' - 'A';
2613 if (cc == tc) {
2614 bool hit = true;
2615 for (size_t i = 1; nstr[i] != '\0'; i++) {
2616 int32_t hc = rp[i];
2617 if (hc >= 'A' && hc <= 'Z') hc += 'a' - 'A';
2618 int32_t nc = nstr[i];
2619 if (nc >= 'A' && nc <= 'Z') nc += 'a' - 'A';
2620 if (hc != nc) {
2621 hit = false;
2622 break;
2623 }
2624 }
2625 if (hit) return (char*)rp;
2626 }
2627 rp++;
2628 }
2629 return NULL;
2630 }
2631
2632
2633 /**
2634 * Check whether a string begins with a key.
2635 */
strfwm(const char * str,const char * key)2636 inline bool strfwm(const char* str, const char* key) {
2637 _assert_(str && key);
2638 while (*key != '\0') {
2639 if (*str != *key || *str == '\0') return false;
2640 key++;
2641 str++;
2642 }
2643 return true;
2644 }
2645
2646
2647 /**
2648 * Check whether a string begins with a key by case insensitive evaluation.
2649 */
strifwm(const char * str,const char * key)2650 inline bool strifwm(const char* str, const char* key) {
2651 _assert_(str && key);
2652 while (*key != '\0') {
2653 if (*str == '\0') return false;
2654 int32_t sc = *str;
2655 if (sc >= 'A' && sc <= 'Z') sc += 'a' - 'A';
2656 int32_t kc = *key;
2657 if (kc >= 'A' && kc <= 'Z') kc += 'a' - 'A';
2658 if (sc != kc) return false;
2659 key++;
2660 str++;
2661 }
2662 return true;
2663 }
2664
2665
2666 /**
2667 * Check whether a string ends with a key.
2668 */
strbwm(const char * str,const char * key)2669 inline bool strbwm(const char* str, const char* key) {
2670 _assert_(str && key);
2671 size_t slen = std::strlen(str);
2672 size_t klen = std::strlen(key);
2673 for (size_t i = 1; i <= klen; i++) {
2674 if (i > slen || str[slen-i] != key[klen-i]) return false;
2675 }
2676 return true;
2677 }
2678
2679
2680 /**
2681 * Check whether a string ends with a key by case insensitive evaluation.
2682 */
stribwm(const char * str,const char * key)2683 inline bool stribwm(const char* str, const char* key) {
2684 _assert_(str && key);
2685 size_t slen = std::strlen(str);
2686 size_t klen = std::strlen(key);
2687 for (size_t i = 1; i <= klen; i++) {
2688 if (i > slen) return false;
2689 int32_t sc = str[slen-i];
2690 if (sc >= 'A' && sc <= 'Z') sc += 'a' - 'A';
2691 int32_t kc = key[klen-i];
2692 if (kc >= 'A' && kc <= 'Z') kc += 'a' - 'A';
2693 if (sc != kc) return false;
2694 }
2695 return true;
2696 }
2697
2698
2699 /**
2700 * Get the number of characters in a UTF-8 string.
2701 */
strutflen(const char * str)2702 inline size_t strutflen(const char* str) {
2703 _assert_(str);
2704 size_t len = 0;
2705 while (*str != '\0') {
2706 len += (*(unsigned char*)str & 0xc0) != 0x80;
2707 str++;
2708 }
2709 return len;
2710 }
2711
2712
2713 /**
2714 * Convert a UTF-8 string into a UCS-4 array.
2715 */
strutftoucs(const char * src,uint32_t * dest,size_t * np)2716 inline void strutftoucs(const char* src, uint32_t* dest, size_t* np) {
2717 _assert_(src && dest && np);
2718 const unsigned char* rp = (unsigned char*)src;
2719 size_t dnum = 0;
2720 while (*rp != '\0') {
2721 uint32_t c = *rp;
2722 if (c < 0x80) {
2723 dest[dnum++] = c;
2724 } else if (c < 0xe0) {
2725 if (rp[1] != '\0') {
2726 c = ((c & 0x1f) << 6) | (rp[1] & 0x3f);
2727 if (c >= 0x80) dest[dnum++] = c;
2728 rp++;
2729 }
2730 } else if (c < 0xf0) {
2731 if (rp[1] != '\0' && rp[2] != '\0') {
2732 c = ((c & 0x0f) << 12) | ((rp[1] & 0x3f) << 6) | (rp[2] & 0x3f);
2733 if (c >= 0x800) dest[dnum++] = c;
2734 rp += 2;
2735 }
2736 } else if (c < 0xf8) {
2737 if (rp[1] != '\0' && rp[2] != '\0' && rp[3] != '\0') {
2738 c = ((c & 0x07) << 18) | ((rp[1] & 0x3f) << 12) | ((rp[2] & 0x3f) << 6) |
2739 (rp[3] & 0x3f);
2740 if (c >= 0x10000) dest[dnum++] = c;
2741 rp += 3;
2742 }
2743 } else if (c < 0xfc) {
2744 if (rp[1] != '\0' && rp[2] != '\0' && rp[3] != '\0' && rp[4] != '\0') {
2745 c = ((c & 0x03) << 24) | ((rp[1] & 0x3f) << 18) | ((rp[2] & 0x3f) << 12) |
2746 ((rp[3] & 0x3f) << 6) | (rp[4] & 0x3f);
2747 if (c >= 0x200000) dest[dnum++] = c;
2748 rp += 4;
2749 }
2750 } else if (c < 0xfe) {
2751 if (rp[1] != '\0' && rp[2] != '\0' && rp[3] != '\0' && rp[4] != '\0' && rp[5] != '\0') {
2752 c = ((c & 0x01) << 30) | ((rp[1] & 0x3f) << 24) | ((rp[2] & 0x3f) << 18) |
2753 ((rp[3] & 0x3f) << 12) | ((rp[4] & 0x3f) << 6) | (rp[5] & 0x3f);
2754 if (c >= 0x4000000) dest[dnum++] = c;
2755 rp += 5;
2756 }
2757 }
2758 rp++;
2759 }
2760 *np = dnum;
2761 }
2762
2763
2764 /**
2765 * Convert a UTF-8 string into a UCS-4 array.
2766 */
strutftoucs(const char * src,size_t slen,uint32_t * dest,size_t * np)2767 inline void strutftoucs(const char* src, size_t slen, uint32_t* dest, size_t* np) {
2768 _assert_(src && slen <= MEMMAXSIZ && dest && np);
2769 const unsigned char* rp = (unsigned char*)src;
2770 const unsigned char* ep = rp + slen;
2771 size_t dnum = 0;
2772 while (rp < ep) {
2773 uint32_t c = *rp;
2774 if (c < 0x80) {
2775 dest[dnum++] = c;
2776 } else if (c < 0xe0) {
2777 if (rp[1] != '\0') {
2778 c = ((c & 0x1f) << 6) | (rp[1] & 0x3f);
2779 if (c >= 0x80) dest[dnum++] = c;
2780 rp++;
2781 }
2782 } else if (c < 0xf0) {
2783 if (rp[1] != '\0' && rp[2] != '\0') {
2784 c = ((c & 0x0f) << 12) | ((rp[1] & 0x3f) << 6) | (rp[2] & 0x3f);
2785 if (c >= 0x800) dest[dnum++] = c;
2786 rp += 2;
2787 }
2788 } else if (c < 0xf8) {
2789 if (rp[1] != '\0' && rp[2] != '\0' && rp[3] != '\0') {
2790 c = ((c & 0x07) << 18) | ((rp[1] & 0x3f) << 12) | ((rp[2] & 0x3f) << 6) |
2791 (rp[3] & 0x3f);
2792 if (c >= 0x10000) dest[dnum++] = c;
2793 rp += 3;
2794 }
2795 } else if (c < 0xfc) {
2796 if (rp[1] != '\0' && rp[2] != '\0' && rp[3] != '\0' && rp[4] != '\0') {
2797 c = ((c & 0x03) << 24) | ((rp[1] & 0x3f) << 18) | ((rp[2] & 0x3f) << 12) |
2798 ((rp[3] & 0x3f) << 6) | (rp[4] & 0x3f);
2799 if (c >= 0x200000) dest[dnum++] = c;
2800 rp += 4;
2801 }
2802 } else if (c < 0xfe) {
2803 if (rp[1] != '\0' && rp[2] != '\0' && rp[3] != '\0' && rp[4] != '\0' && rp[5] != '\0') {
2804 c = ((c & 0x01) << 30) | ((rp[1] & 0x3f) << 24) | ((rp[2] & 0x3f) << 18) |
2805 ((rp[3] & 0x3f) << 12) | ((rp[4] & 0x3f) << 6) | (rp[5] & 0x3f);
2806 if (c >= 0x4000000) dest[dnum++] = c;
2807 rp += 5;
2808 }
2809 }
2810 rp++;
2811 }
2812 *np = dnum;
2813 }
2814
2815
2816 /**
2817 * Convert a UCS-4 array into a UTF-8 string.
2818 */
strucstoutf(const uint32_t * src,size_t snum,char * dest)2819 inline size_t strucstoutf(const uint32_t* src, size_t snum, char* dest) {
2820 _assert_(src && snum <= MEMMAXSIZ && dest);
2821 const uint32_t* ep = src + snum;
2822 unsigned char* wp = (unsigned char*)dest;
2823 while (src < ep) {
2824 uint32_t c = *src;
2825 if (c < 0x80) {
2826 *(wp++) = c;
2827 } else if (c < 0x800) {
2828 *(wp++) = 0xc0 | (c >> 6);
2829 *(wp++) = 0x80 | (c & 0x3f);
2830 } else if (c < 0x10000) {
2831 *(wp++) = 0xe0 | (c >> 12);
2832 *(wp++) = 0x80 | ((c & 0xfff) >> 6);
2833 *(wp++) = 0x80 | (c & 0x3f);
2834 } else if (c < 0x200000) {
2835 *(wp++) = 0xf0 | (c >> 18);
2836 *(wp++) = 0x80 | ((c & 0x3ffff) >> 12);
2837 *(wp++) = 0x80 | ((c & 0xfff) >> 6);
2838 *(wp++) = 0x80 | (c & 0x3f);
2839 } else if (c < 0x4000000) {
2840 *(wp++) = 0xf8 | (c >> 24);
2841 *(wp++) = 0x80 | ((c & 0xffffff) >> 18);
2842 *(wp++) = 0x80 | ((c & 0x3ffff) >> 12);
2843 *(wp++) = 0x80 | ((c & 0xfff) >> 6);
2844 *(wp++) = 0x80 | (c & 0x3f);
2845 } else if (c < 0x80000000) {
2846 *(wp++) = 0xfc | (c >> 30);
2847 *(wp++) = 0x80 | ((c & 0x3fffffff) >> 24);
2848 *(wp++) = 0x80 | ((c & 0xffffff) >> 18);
2849 *(wp++) = 0x80 | ((c & 0x3ffff) >> 12);
2850 *(wp++) = 0x80 | ((c & 0xfff) >> 6);
2851 *(wp++) = 0x80 | (c & 0x3f);
2852 }
2853 src++;
2854 }
2855 *wp = '\0';
2856 return wp - (unsigned char*)dest;
2857 }
2858
2859
2860 /**
2861 * Allocate a region on memory.
2862 */
xmalloc(size_t size)2863 inline void* xmalloc(size_t size) {
2864 _assert_(size <= MEMMAXSIZ);
2865 void* ptr = std::malloc(size);
2866 if (!ptr) throw std::bad_alloc();
2867 return ptr;
2868 }
2869
2870
2871 /**
2872 * Allocate a nullified region on memory.
2873 */
xcalloc(size_t nmemb,size_t size)2874 inline void* xcalloc(size_t nmemb, size_t size) {
2875 _assert_(nmemb <= MEMMAXSIZ && size <= MEMMAXSIZ);
2876 void* ptr = std::calloc(nmemb, size);
2877 if (!ptr) throw std::bad_alloc();
2878 return ptr;
2879 }
2880
2881
2882 /**
2883 * Re-allocate a region on memory.
2884 */
xrealloc(void * ptr,size_t size)2885 inline void* xrealloc(void* ptr, size_t size) {
2886 _assert_(size <= MEMMAXSIZ);
2887 ptr = std::realloc(ptr, size);
2888 if (!ptr) throw std::bad_alloc();
2889 return ptr;
2890 }
2891
2892
2893 /**
2894 * Free a region on memory.
2895 */
xfree(void * ptr)2896 inline void xfree(void* ptr) {
2897 _assert_(true);
2898 std::free(ptr);
2899 }
2900
2901
2902 /**
2903 * Dummy test driver.
2904 */
_dummytest()2905 inline bool _dummytest() {
2906 _assert_(true);
2907 std::ostringstream oss;
2908 oss << INT8MAX << INT16MAX << INT32MAX << INT64MAX;
2909 oss << INT8MIN << INT16MIN << INT32MIN << INT64MIN;
2910 oss << UINT8MAX << UINT16MAX << UINT32MAX << UINT64MAX;
2911 oss << SIZEMAX << FLTMAX << DBLMAX;
2912 oss << VERSION << LIBVER << LIBREV << FMTVER << OSNAME;
2913 oss << BIGEND << CLOCKTICK << PAGESIZ << FEATURES;
2914 oss << NUMBUFSIZ << MEMMAXSIZ;
2915 return oss.tellp() > 0;
2916 }
2917
2918
2919 } // common namespace
2920
2921 #endif // duplication check
2922
2923 // END OF FILE
2924