1 // -*- related-file-name: "../include/lcdf/string.hh" -*-
2 /*
3  * string.{cc,hh} -- a String class with shared substrings
4  * Eddie Kohler
5  *
6  * Copyright (c) 1999-2000 Massachusetts Institute of Technology
7  * Copyright (c) 2001-2019 Eddie Kohler
8  * Copyright (c) 2008-2009 Meraki, Inc.
9  *
10  * Permission is hereby granted, free of charge, to any person obtaining a
11  * copy of this software and associated documentation files (the "Software"),
12  * to deal in the Software without restriction, subject to the conditions
13  * listed in the Click LICENSE file. These conditions include: you must
14  * preserve this copyright notice, and you cannot mention the copyright
15  * holders in advertising related to the Software without their permission.
16  * The Software is provided WITHOUT ANY WARRANTY, EXPRESS OR IMPLIED. This
17  * notice is a summary of the Click LICENSE file; the license in that file is
18  * legally binding.
19  */
20 
21 #ifdef HAVE_CONFIG_H
22 # include <config.h>
23 #endif
24 #include <lcdf/string.hh>
25 #include <lcdf/straccum.hh>
26 #include <stdio.h>
27 #include <string.h>
28 #include <ctype.h>
29 #include <lcdf/inttypes.h>
30 
31 #ifndef likely
32 #define likely(x) (x)
33 #endif
34 #ifndef unlikely
35 #define unlikely(x) (x)
36 #endif
37 
38 /** @file string.hh
39  * @brief The LCDF String class.
40  */
41 
42 /** @class String
43  * @brief A string of characters.
44  *
45  * The String class represents a string of characters.  Strings may be
46  * constructed from C strings, characters, numbers, and so forth.  They may
47  * also be added together.  The underlying character arrays are dynamically
48  * allocated; String operations allocate and free memory as needed.  A String
49  * and its substrings generally share memory.  Accessing a character by index
50  * takes O(1) time; so does creating a substring.
51  *
52  * <h3>Out-of-memory strings</h3>
53  *
54  * When there is not enough memory to create a particular string, a special
55  * "out-of-memory" string is returned instead. Out-of-memory strings are
56  * contagious: the result of any concatenation operation involving an
57  * out-of-memory string is another out-of-memory string. Thus, the final
58  * result of a series of String operations will be an out-of-memory string,
59  * even if the out-of-memory condition occurs in the middle.
60  *
61  * Out-of-memory strings have zero characters, but they aren't equal to other
62  * empty strings.  If @a s is a normal String (even an empty string), and @a
63  * oom is an out-of-memory string, then @a s @< @a oom.
64  *
65  * All out-of-memory strings are equal and share the same data(), which is
66  * different from the data() of any other string.  See
67  * String::out_of_memory_data().  The String::make_out_of_memory() function
68  * returns an out-of-memory string.
69  */
70 
71 const char String::null_data = '\0';
72 const char String::oom_data = '\0';
73 const char String::bool_data[] = "true\0false";
74 const char String::int_data[] = "0\0001\0002\0003\0004\0005\0006\0007\0008\0009";
75 
76 #if HAVE_STRING_PROFILING > 1
77 # define MEMO_INITIALIZER_TAIL , 0, 0
78 #else
79 # define MEMO_INITIALIZER_TAIL
80 #endif
81 
82 const String::rep_t String::null_string_rep = {
83     &null_data, 0, 0
84 };
85 const String::rep_t String::oom_string_rep = {
86     &oom_data, 0, 0
87 };
88 
89 #if HAVE_STRING_PROFILING
90 uint64_t String::live_memo_count;
91 uint64_t String::memo_sizes[55];
92 uint64_t String::live_memo_sizes[55];
93 uint64_t String::live_memo_bytes[55];
94 # if HAVE_STRING_PROFILING > 1
95 String::memo_t *String::live_memos[55];
96 # endif
97 #endif
98 
99 /** @cond never */
100 String::memo_t *
create_memo(char * space,int dirty,int capacity)101 String::create_memo(char *space, int dirty, int capacity)
102 {
103     assert(capacity > 0 && capacity >= dirty);
104     memo_t *memo;
105     if (space)
106 	memo = reinterpret_cast<memo_t *>(space);
107     else
108 	memo = reinterpret_cast<memo_t *>(new char[MEMO_SPACE + capacity]);
109     if (memo) {
110 	memo->capacity = capacity;
111 	memo->dirty = dirty;
112 	memo->refcount = (space ? 0 : 1);
113 #if HAVE_STRING_PROFILING
114 	int bucket = profile_memo_size_bucket(dirty, capacity);
115 	++memo_sizes[bucket];
116 	++live_memo_sizes[bucket];
117 	live_memo_bytes[bucket] += capacity;
118 	++live_memo_count;
119 # if HAVE_STRING_PROFILING > 1
120 	memo->pprev = &live_memos[bucket];
121 	if ((memo->next = *memo->pprev))
122 	    memo->next->pprev = &memo->next;
123 	*memo->pprev = memo;
124 # endif
125 #endif
126     }
127     return memo;
128 }
129 
130 void
delete_memo(memo_t * memo)131 String::delete_memo(memo_t *memo)
132 {
133     assert(memo->capacity > 0);
134     assert(memo->capacity >= memo->dirty);
135 #if HAVE_STRING_PROFILING
136     int bucket = profile_memo_size_bucket(memo->dirty, memo->capacity);
137     --live_memo_sizes[bucket];
138     live_memo_bytes[bucket] -= memo->capacity;
139     --live_memo_count;
140 # if HAVE_STRING_PROFILING > 1
141     if ((*memo->pprev = memo->next))
142 	memo->next->pprev = memo->pprev;
143 # endif
144 #endif
145     delete[] reinterpret_cast<char *>(memo);
146 }
147 
148 
149 #if HAVE_STRING_PROFILING
150 void
one_profile_report(StringAccum & sa,int i,int examples)151 String::one_profile_report(StringAccum &sa, int i, int examples)
152 {
153     if (i <= 16)
154 	sa << "memo_dirty_" << i;
155     else if (i < 25) {
156 	uint32_t s = (i - 17) * 2 + 17;
157 	sa << "memo_cap_" << s << '_' << (s + 1);
158     } else if (i < 29) {
159 	uint32_t s = (i - 25) * 8 + 33;
160 	sa << "memo_cap_" << s << '_' << (s + 7);
161     } else {
162 	uint32_t s1 = (1U << (i - 23)) + 1;
163 	uint32_t s2 = (s1 - 1) << 1;
164 	sa << "memo_cap_" << s1 << '_' << s2;
165     }
166     sa << '\t' << live_memo_sizes[i] << '\t' << memo_sizes[i] << '\t' << live_memo_bytes[i] << '\n';
167     if (examples) {
168 # if HAVE_STRING_PROFILING > 1
169 	for (memo_t *m = live_memos[i]; m; m = m->next) {
170 	    sa << "    [" << m->dirty << "] ";
171 	    uint32_t dirty = m->dirty;
172 	    if (dirty > 0 && m->real_data[dirty - 1] == '\0')
173 		--dirty;
174 	    sa.append(m->real_data, dirty > 128 ? 128 : dirty);
175 	    sa << '\n';
176 	}
177 # endif
178     }
179 }
180 
181 void
profile_report(StringAccum & sa,int examples)182 String::profile_report(StringAccum &sa, int examples)
183 {
184     uint64_t all_live_sizes = 0, all_sizes = 0, all_live_bytes = 0;
185     for (int i = 0; i < 55; ++i) {
186 	if (memo_sizes[i])
187 	    one_profile_report(sa, i, examples);
188 	all_live_sizes += live_memo_sizes[i];
189 	all_sizes += memo_sizes[i];
190 	all_live_bytes += live_memo_bytes[i];
191     }
192     sa << "memo_total\t" << all_live_sizes << '\t' << all_sizes << '\t' << all_live_bytes << '\n';
193 }
194 #endif
195 
196 /** @endcond never */
197 
198 
199 /** @brief Construct a base-10 string representation of @a x. */
String(int x)200 String::String(int x)
201 {
202     if (x >= 0 && x < 10)
203 	assign_memo(int_data + 2 * x, 1, 0);
204     else {
205 	char buf[128];
206 	sprintf(buf, "%d", x);
207 	assign(buf, -1, false);
208     }
209 }
210 
211 /** @overload */
String(unsigned x)212 String::String(unsigned x)
213 {
214     if (x < 10)
215 	assign_memo(int_data + 2 * x, 1, 0);
216     else {
217 	char buf[128];
218 	sprintf(buf, "%u", x);
219 	assign(buf, -1, false);
220     }
221 }
222 
223 /** @overload */
String(long x)224 String::String(long x)
225 {
226     if (x >= 0 && x < 10)
227 	assign_memo(int_data + 2 * x, 1, 0);
228     else {
229 	char buf[128];
230 	sprintf(buf, "%ld", x);
231 	assign(buf, -1, false);
232     }
233 }
234 
235 /** @overload */
String(unsigned long x)236 String::String(unsigned long x)
237 {
238     if (x < 10)
239 	assign_memo(int_data + 2 * x, 1, 0);
240     else {
241 	char buf[128];
242 	sprintf(buf, "%lu", x);
243 	assign(buf, -1, false);
244     }
245 }
246 
String(double x)247 String::String(double x)
248 {
249     char buf[128];
250     int len = sprintf(buf, "%.12g", x);
251     assign(buf, len, false);
252 }
253 
254 String
make_claim(char * str,int len,int capacity)255 String::make_claim(char *str, int len, int capacity)
256 {
257     assert(str && len > 0 && capacity >= len);
258     memo_t *new_memo = create_memo(str - MEMO_SPACE, len, capacity);
259     return String(str, len, new_memo);
260 }
261 
262 String
make_stable(const char * s,int len)263 String::make_stable(const char *s, int len)
264 {
265     if (len < 0)
266 	len = (s ? strlen(s) : 0);
267     return String(s, len, 0);
268 }
269 
270 String
make_fill(int c,int len)271 String::make_fill(int c, int len)
272 {
273     String s;
274     s.append_fill(c, len);
275     return s;
276 }
277 
278 void
assign_out_of_memory()279 String::assign_out_of_memory()
280 {
281     if (_r.memo)
282 	deref();
283     _r.memo = 0;
284     _r.data = &oom_data;
285     _r.length = 0;
286 }
287 
288 void
assign(const char * s,int len,bool need_deref)289 String::assign(const char *s, int len, bool need_deref)
290 {
291     if (!s) {
292 	assert(len <= 0);
293 	len = 0;
294     } else if (len < 0)
295 	len = strlen(s);
296 
297     // need to start with dereference
298     if (need_deref) {
299 	if (unlikely(_r.memo
300 		     && s >= _r.memo->real_data
301 		     && s + len <= _r.memo->real_data + _r.memo->capacity)) {
302 	    // Be careful about "String s = ...; s = s.c_str();"
303 	    _r.data = s;
304 	    _r.length = len;
305 	    return;
306 	} else
307 	    deref();
308     }
309 
310     if (len == 0) {
311 	_r.memo = 0;
312 	_r.data = (s == &oom_data ? s : &null_data);
313 
314     } else {
315 	// Make the memo a multiple of 16 characters and bigger than 'len'.
316 	int memo_capacity = (len + 15 + MEMO_SPACE) & ~15;
317 	_r.memo = create_memo(0, len, memo_capacity - MEMO_SPACE);
318 	if (!_r.memo) {
319 	    assign_out_of_memory();
320 	    return;
321 	}
322 	memcpy(_r.memo->real_data, s, len);
323 	_r.data = _r.memo->real_data;
324     }
325 
326     _r.length = len;
327 }
328 
329 /** @brief Append @a len unknown characters to this string.
330  * @return Modifiable pointer to the appended characters.
331  *
332  * The caller may safely modify the returned memory. Null is returned if
333  * the string becomes out-of-memory. */
334 char *
append_uninitialized(int len)335 String::append_uninitialized(int len)
336 {
337     // Appending anything to "out of memory" leaves it as "out of memory"
338     if (unlikely(len <= 0) || out_of_memory())
339 	return 0;
340 
341     // If we can, append into unused space. First, we check that there's
342     // enough unused space for 'len' characters to fit; then, we check
343     // that the unused space immediately follows the data in '*this'.
344     uint32_t dirty;
345     if (_r.memo
346 	&& ((dirty = _r.memo->dirty), _r.memo->capacity > dirty + len)) {
347 	char *real_dirty = _r.memo->real_data + dirty;
348 	if (real_dirty == _r.data + _r.length) {
349 	    _r.memo->dirty = dirty + len;
350 	    _r.length += len;
351 	    assert(_r.memo->dirty < _r.memo->capacity);
352 #if HAVE_STRING_PROFILING
353 	    profile_update_memo_dirty(_r.memo, dirty, dirty + len, _r.memo->capacity);
354 #endif
355 	    return real_dirty;
356 	}
357     }
358 
359     // Now we have to make new space. Make sure the memo is a multiple of 16
360     // bytes and that it is at least 16. But for large strings, allocate a
361     // power of 2, since power-of-2 sizes minimize waste in frequently-used
362     // allocators, like Linux kmalloc.
363     int want_memo_len = _r.length + len + MEMO_SPACE;
364     int memo_capacity;
365     if (want_memo_len <= 1024)
366 	memo_capacity = (want_memo_len + 15) & ~15;
367     else
368 	for (memo_capacity = 2048; memo_capacity < want_memo_len; )
369 	    memo_capacity *= 2;
370 
371     memo_t *new_memo = create_memo(0, _r.length + len, memo_capacity - MEMO_SPACE);
372     if (!new_memo) {
373 	assign_out_of_memory();
374 	return 0;
375     }
376 
377     char *new_data = new_memo->real_data;
378     memcpy(new_data, _r.data, _r.length);
379 
380     deref();
381     _r.data = new_data;
382     new_data += _r.length;	// now new_data points to the garbage
383     _r.length += len;
384     _r.memo = new_memo;
385     return new_data;
386 }
387 
388 void
append(const char * s,int len,memo_t * memo)389 String::append(const char *s, int len, memo_t *memo)
390 {
391     if (!s) {
392 	assert(len <= 0);
393 	len = 0;
394     } else if (len < 0)
395 	len = strlen(s);
396 
397     if (s == &oom_data)
398 	// Appending "out of memory" to a regular string makes it "out of
399 	// memory"
400 	assign_out_of_memory();
401     else if (len == 0)
402 	/* do nothing */;
403     else if (_r.length == 0 && memo && !out_of_memory()) {
404 	deref();
405 	assign_memo(s, len, memo);
406     } else if (likely(!(_r.memo
407 			&& s >= _r.memo->real_data
408 			&& s + len <= _r.memo->real_data + _r.memo->capacity))) {
409 	if (char *space = append_uninitialized(len))
410 	    memcpy(space, s, len);
411     } else {
412 	String preserve_s(*this);
413 	if (char *space = append_uninitialized(len))
414 	    memcpy(space, s, len);
415     }
416 }
417 
418 /** @brief Append @a len copies of character @a c to this string. */
419 void
append_fill(int c,int len)420 String::append_fill(int c, int len)
421 {
422     assert(len >= 0);
423     if (char *space = append_uninitialized(len))
424 	memset(space, c, len);
425 }
426 
427 /** @brief Ensure the string's data is unshared and return a mutable
428     pointer to it. */
429 char *
mutable_data()430 String::mutable_data()
431 {
432     // If _memo has a capacity (it's not one of the special strings) and it's
433     // uniquely referenced, return _data right away.
434     if (_r.memo && _r.memo->refcount == 1)
435 	return const_cast<char *>(_r.data);
436 
437     // Otherwise, make a copy of it. Rely on: deref() doesn't change _data or
438     // _length; and if _capacity == 0, then deref() doesn't free _real_data.
439     assert(!_r.memo || _r.memo->refcount > 1);
440     // But in multithreaded situations we must hold a local copy of memo!
441     String do_not_delete_underlying_memo(*this);
442     deref();
443     assign(_r.data, _r.length, false);
444     return const_cast<char *>(_r.data);
445 }
446 
447 char *
mutable_c_str()448 String::mutable_c_str()
449 {
450     (void) mutable_data();
451     (void) c_str();
452     return const_cast<char *>(_r.data);
453 }
454 
455 /** @brief Return a substring of this string, consisting of the @a len
456     characters starting at index @a pos.
457     @param pos substring's first position relative to the string
458     @param len length of substring
459 
460     If @a pos is negative, starts that far from the end of the string. If @a
461     len is negative, leaves that many characters off the end of the string.
462     If @a pos and @a len specify a substring that is partly outside the
463     string, only the part within the string is returned. If the substring is
464     beyond either end of the string, returns an empty string (but this
465     should be considered a programming error; a future version may generate
466     a warning for this case).
467 
468     @note String::substring() is intended to behave like Perl's substr(). */
469 String
substring(int pos,int len) const470 String::substring(int pos, int len) const
471 {
472     if (pos < 0)
473 	pos += _r.length;
474 
475     int pos2;
476     if (len < 0)
477 	pos2 = _r.length + len;
478     else if (pos >= 0 && len >= _r.length) // avoid integer overflow
479 	pos2 = _r.length;
480     else
481 	pos2 = pos + len;
482 
483     if (pos < 0)
484 	pos = 0;
485     if (pos2 > _r.length)
486 	pos2 = _r.length;
487 
488     if (pos >= pos2)
489 	return String();
490     else
491 	return String(_r.data + pos, pos2 - pos, _r.memo);
492 }
493 
494 int
find_left(char c,int start) const495 String::find_left(char c, int start) const
496 {
497     if (start < 0)
498 	start = 0;
499     for (int i = start; i < _r.length; i++)
500 	if (_r.data[i] == c)
501 	    return i;
502     return -1;
503 }
504 
505 int
find_left(const String & str,int start) const506 String::find_left(const String &str, int start) const
507 {
508     if (start < 0)
509 	start = 0;
510     int max_pos = length() - str.length();
511     for (int i = start; i <= max_pos; ++i)
512 	if (memcmp(_r.data + i, str.data(), str.length()) == 0)
513 	    return i;
514     return -1;
515 }
516 
517 int
find_right(char c,int start) const518 String::find_right(char c, int start) const
519 {
520     if (start >= _r.length)
521 	start = _r.length - 1;
522     for (int i = start; i >= 0; i--)
523 	if (_r.data[i] == c)
524 	    return i;
525     return -1;
526 }
527 
528 static String
hard_lower(const String & s,int pos)529 hard_lower(const String &s, int pos)
530 {
531     String new_s(s.data(), s.length());
532     char *x = const_cast<char *>(new_s.data()); // know it's mutable
533     int len = s.length();
534     for (; pos < len; pos++)
535 	x[pos] = tolower((unsigned char) x[pos]);
536     return new_s;
537 }
538 
539 /** @brief Return a lowercased version of this string.
540 
541     Translates the ASCII characters 'A' through 'Z' into their lowercase
542     equivalents. */
543 String
lower() const544 String::lower() const
545 {
546     // avoid copies
547     if (!out_of_memory())
548 	for (int i = 0; i < _r.length; i++)
549 	    if (_r.data[i] >= 'A' && _r.data[i] <= 'Z')
550 		return hard_lower(*this, i);
551     return *this;
552 }
553 
554 static String
hard_upper(const String & s,int pos)555 hard_upper(const String &s, int pos)
556 {
557     String new_s(s.data(), s.length());
558     char *x = const_cast<char *>(new_s.data()); // know it's mutable
559     int len = s.length();
560     for (; pos < len; pos++)
561 	x[pos] = toupper((unsigned char) x[pos]);
562     return new_s;
563 }
564 
565 /** @brief Return an uppercased version of this string.
566 
567     Translates the ASCII characters 'a' through 'z' into their uppercase
568     equivalents. */
569 String
upper() const570 String::upper() const
571 {
572     // avoid copies
573     for (int i = 0; i < _r.length; i++)
574 	if (_r.data[i] >= 'a' && _r.data[i] <= 'z')
575 	    return hard_upper(*this, i);
576     return *this;
577 }
578 
579 static String
hard_printable(const String & s,int pos,int type)580 hard_printable(const String &s, int pos, int type)
581 {
582     StringAccum sa(s.length() * 2);
583     sa.append(s.data(), pos);
584     const unsigned char *x = reinterpret_cast<const unsigned char *>(s.data());
585     int len = s.length();
586     for (; pos < len; pos++) {
587 	if (x[pos] >= 32 && x[pos] < 127)
588 	    sa << x[pos];
589 	else if (x[pos] < 32 && type != 1)
590 	    sa << '^' << (unsigned char)(x[pos] + 64);
591 	else if (char *buf = sa.extend(4, 1))
592 	    sprintf(buf, "\\%03o", x[pos]);
593     }
594     return sa.take_string();
595 }
596 
597 /** @brief Return a "printable" version of this string.
598     @param type quoting type
599 
600     The default quoting type (0) translates control characters 0-31 into
601     "control" sequences, such as "^@" for the null character, and characters
602     127-255 into octal escape sequences, such as "\377" for 255. Quoting
603     type 1 translates all characters outside of 32-126 into octal escape
604     sequences. */
605 String
printable(int type) const606 String::printable(int type) const
607 {
608     // avoid copies
609     if (!out_of_memory())
610 	for (int i = 0; i < _r.length; i++)
611 	    if (_r.data[i] < 32 || _r.data[i] > 126)
612 		return hard_printable(*this, i, type);
613     return *this;
614 }
615 
616 hashcode_t
hashcode(const char * begin,const char * end)617 String::hashcode(const char *begin, const char *end)
618 {
619     if (end <= begin)
620 	return 0;
621 
622     uint32_t hash = end - begin;
623     int rem = hash & 3;
624     end -= rem;
625     uint32_t last16;
626 
627 #if !HAVE_INDIFFERENT_ALIGNMENT
628     if (!(reinterpret_cast<uintptr_t>(begin) & 1)) {
629 #endif
630 #define get16(p) (*reinterpret_cast<const uint16_t *>((p)))
631 	for (; begin != end; begin += 4) {
632 	    hash += get16(begin);
633 	    uint32_t tmp = (get16(begin + 2) << 11) ^ hash;
634 	    hash = (hash << 16) ^ tmp;
635 	    hash += hash >> 11;
636 	}
637 	if (rem >= 2) {
638 	    last16 = get16(begin);
639 	    goto rem2;
640 	}
641 #undef get16
642 #if !HAVE_INDIFFERENT_ALIGNMENT
643     } else {
644 # if WORDS_BIGENDIAN
645 #  define get16(p) (((unsigned char) (p)[0] << 8) + (unsigned char) (p)[1])
646 # elif WORDS_BIGENDIAN_SET
647 #  define get16(p) ((unsigned char) (p)[0] + ((unsigned char) (p)[1] << 8))
648 # else
649 #  error "unknown byte order"
650 # endif
651 	// should be exactly the same as the code above
652 	for (; begin != end; begin += 4) {
653 	    hash += get16(begin);
654 	    uint32_t tmp = (get16(begin + 2) << 11) ^ hash;
655 	    hash = (hash << 16) ^ tmp;
656 	    hash += hash >> 11;
657 	}
658 	if (rem >= 2) {
659 	    last16 = get16(begin);
660 	    goto rem2;
661 	}
662 # undef get16
663     }
664 #endif
665 
666     /* Handle end cases */
667     if (0) {			// weird organization avoids uninitialized
668       rem2:			// variable warnings
669 	if (rem == 3) {
670 	    hash += last16;
671 	    hash ^= hash << 16;
672 	    hash ^= ((unsigned char) begin[2]) << 18;
673 	    hash += hash >> 11;
674 	} else {
675 	    hash += last16;
676 	    hash ^= hash << 11;
677 	    hash += hash >> 17;
678 	}
679     } else if (rem == 1) {
680 	hash += (unsigned char) *begin;
681 	hash ^= hash << 10;
682 	hash += hash >> 1;
683     }
684 
685     /* Force "avalanching" of final 127 bits */
686     hash ^= hash << 3;
687     hash += hash >> 5;
688     hash ^= hash << 4;
689     hash += hash >> 17;
690     hash ^= hash << 25;
691     hash += hash >> 6;
692 
693     return hash;
694 }
695 
696 #if 0
697 // 11.Apr.2008 -- This old hash function was swapped out in favor of
698 // SuperFastHash, above.
699 hashcode_t
700 String::hashcode() const
701 {
702     int l = length();
703     const char *d = data();
704     if (!l)
705 	return 0;
706     else if (l == 1)
707 	return d[0] | (d[0] << 8);
708     else if (l < 4)
709 	return d[0] + (d[1] << 3) + (l << 12);
710     else
711 	return d[0] + (d[1] << 8) + (d[2] << 16) + (d[3] << 24)
712 	    + (l << 12) + (d[l-1] << 10);
713 }
714 #endif
715 
716 bool
equals(const char * s,int len) const717 String::equals(const char *s, int len) const
718 {
719     // It'd be nice to make "out-of-memory" strings compare unequal to
720     // anything, even themselves, but this would be a bad idea for Strings
721     // used as (for example) keys in hashtables. Instead, "out-of-memory"
722     // strings compare unequal to other null strings, but equal to each other.
723     if (len < 0)
724 	len = strlen(s);
725     if (_r.length != len)
726 	return false;
727     else if (_r.data == s)
728 	return true;
729     else if (len == 0)
730 	return (s != &oom_data && _r.data != &oom_data);
731     else
732 	return memcmp(_r.data, s, len) == 0;
733 }
734 
735 bool
starts_with(const char * s,int len) const736 String::starts_with(const char *s, int len) const
737 {
738     // See note on equals() re: "out-of-memory" strings.
739     if (len < 0)
740 	len = strlen(s);
741     if (_r.length < len)
742 	return false;
743     else if (_r.data == s)
744 	return true;
745     else if (len == 0)
746 	return (s != &oom_data && _r.data != &oom_data);
747     else
748 	return memcmp(_r.data, s, len) == 0;
749 }
750 
751 int
compare(const char * s,int len) const752 String::compare(const char *s, int len) const
753 {
754     if (len < 0)
755 	len = strlen(s);
756     if (_r.data == s)
757 	return _r.length - len;
758     else if (_r.data == &oom_data)
759 	return 1;
760     else if (s == &oom_data)
761 	return -1;
762     else if (_r.length == len)
763 	return memcmp(_r.data, s, len);
764     else if (_r.length < len) {
765 	int v = memcmp(_r.data, s, _r.length);
766 	return (v ? v : -1);
767     } else {
768 	int v = memcmp(_r.data, s, len);
769 	return (v ? v : 1);
770     }
771 }
772 
773 void
align(int n)774 String::align(int n)
775 {
776     int offset = reinterpret_cast<uintptr_t>(_r.data) % n;
777     if (offset) {
778 	String s;
779 	s.append_uninitialized(_r.length + n + 1);
780 	offset = reinterpret_cast<uintptr_t>(s._r.data) % n;
781 	memcpy((char *)s._r.data + n - offset, _r.data, _r.length);
782 	s._r.data += n - offset;
783 	s._r.length = _r.length;
784 	*this = s;
785     }
786 }
787