1 // Copyright 2020 The Abseil Authors.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //      https://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #include "absl/strings/cord.h"
16 
17 #include <algorithm>
18 #include <atomic>
19 #include <cstddef>
20 #include <cstdio>
21 #include <cstdlib>
22 #include <iomanip>
23 #include <iostream>
24 #include <limits>
25 #include <ostream>
26 #include <sstream>
27 #include <type_traits>
28 #include <unordered_set>
29 #include <vector>
30 
31 #include "absl/base/casts.h"
32 #include "absl/base/internal/raw_logging.h"
33 #include "absl/base/macros.h"
34 #include "absl/base/port.h"
35 #include "absl/container/fixed_array.h"
36 #include "absl/container/inlined_vector.h"
37 #include "absl/strings/escaping.h"
38 #include "absl/strings/internal/cord_internal.h"
39 #include "absl/strings/internal/resize_uninitialized.h"
40 #include "absl/strings/str_cat.h"
41 #include "absl/strings/str_format.h"
42 #include "absl/strings/str_join.h"
43 #include "absl/strings/string_view.h"
44 
45 namespace absl {
46 ABSL_NAMESPACE_BEGIN
47 
48 using ::absl::cord_internal::CordRep;
49 using ::absl::cord_internal::CordRepConcat;
50 using ::absl::cord_internal::CordRepExternal;
51 using ::absl::cord_internal::CordRepSubstring;
52 
53 // Various representations that we allow
54 enum CordRepKind {
55   CONCAT        = 0,
56   EXTERNAL      = 1,
57   SUBSTRING     = 2,
58 
59   // We have different tags for different sized flat arrays,
60   // starting with FLAT
61   FLAT          = 3,
62 };
63 
64 namespace cord_internal {
65 
concat()66 inline CordRepConcat* CordRep::concat() {
67   assert(tag == CONCAT);
68   return static_cast<CordRepConcat*>(this);
69 }
70 
concat() const71 inline const CordRepConcat* CordRep::concat() const {
72   assert(tag == CONCAT);
73   return static_cast<const CordRepConcat*>(this);
74 }
75 
substring()76 inline CordRepSubstring* CordRep::substring() {
77   assert(tag == SUBSTRING);
78   return static_cast<CordRepSubstring*>(this);
79 }
80 
substring() const81 inline const CordRepSubstring* CordRep::substring() const {
82   assert(tag == SUBSTRING);
83   return static_cast<const CordRepSubstring*>(this);
84 }
85 
external()86 inline CordRepExternal* CordRep::external() {
87   assert(tag == EXTERNAL);
88   return static_cast<CordRepExternal*>(this);
89 }
90 
external() const91 inline const CordRepExternal* CordRep::external() const {
92   assert(tag == EXTERNAL);
93   return static_cast<const CordRepExternal*>(this);
94 }
95 
96 }  // namespace cord_internal
97 
98 static const size_t kFlatOverhead = offsetof(CordRep, data);
99 
100 // Largest and smallest flat node lengths we are willing to allocate
101 // Flat allocation size is stored in tag, which currently can encode sizes up
102 // to 4K, encoded as multiple of either 8 or 32 bytes.
103 // If we allow for larger sizes, we need to change this to 8/64, 16/128, etc.
104 static constexpr size_t kMaxFlatSize = 4096;
105 static constexpr size_t kMaxFlatLength = kMaxFlatSize - kFlatOverhead;
106 static constexpr size_t kMinFlatLength = 32 - kFlatOverhead;
107 
108 // Prefer copying blocks of at most this size, otherwise reference count.
109 static const size_t kMaxBytesToCopy = 511;
110 
111 // Helper functions for rounded div, and rounding to exact sizes.
DivUp(size_t n,size_t m)112 static size_t DivUp(size_t n, size_t m) { return (n + m - 1) / m; }
RoundUp(size_t n,size_t m)113 static size_t RoundUp(size_t n, size_t m) { return DivUp(n, m) * m; }
114 
115 // Returns the size to the nearest equal or larger value that can be
116 // expressed exactly as a tag value.
RoundUpForTag(size_t size)117 static size_t RoundUpForTag(size_t size) {
118   return RoundUp(size, (size <= 1024) ? 8 : 32);
119 }
120 
121 // Converts the allocated size to a tag, rounding down if the size
122 // does not exactly match a 'tag expressible' size value. The result is
123 // undefined if the size exceeds the maximum size that can be encoded in
124 // a tag, i.e., if size is larger than TagToAllocatedSize(<max tag>).
AllocatedSizeToTag(size_t size)125 static uint8_t AllocatedSizeToTag(size_t size) {
126   const size_t tag = (size <= 1024) ? size / 8 : 128 + size / 32 - 1024 / 32;
127   assert(tag <= std::numeric_limits<uint8_t>::max());
128   return tag;
129 }
130 
131 // Converts the provided tag to the corresponding allocated size
TagToAllocatedSize(uint8_t tag)132 static constexpr size_t TagToAllocatedSize(uint8_t tag) {
133   return (tag <= 128) ? (tag * 8) : (1024 + (tag - 128) * 32);
134 }
135 
136 // Converts the provided tag to the corresponding available data length
TagToLength(uint8_t tag)137 static constexpr size_t TagToLength(uint8_t tag) {
138   return TagToAllocatedSize(tag) - kFlatOverhead;
139 }
140 
141 // Enforce that kMaxFlatSize maps to a well-known exact tag value.
142 static_assert(TagToAllocatedSize(224) == kMaxFlatSize, "Bad tag logic");
143 
Fibonacci(unsigned char n,uint64_t a=0,uint64_t b=1)144 constexpr uint64_t Fibonacci(unsigned char n, uint64_t a = 0, uint64_t b = 1) {
145   return n == 0 ? a : Fibonacci(n - 1, b, a + b);
146 }
147 
148 static_assert(Fibonacci(63) == 6557470319842,
149               "Fibonacci values computed incorrectly");
150 
151 // Minimum length required for a given depth tree -- a tree is considered
152 // balanced if
153 //      length(t) >= min_length[depth(t)]
154 // The root node depth is allowed to become twice as large to reduce rebalancing
155 // for larger strings (see IsRootBalanced).
156 static constexpr uint64_t min_length[] = {
157     Fibonacci(2),          Fibonacci(3),  Fibonacci(4),  Fibonacci(5),
158     Fibonacci(6),          Fibonacci(7),  Fibonacci(8),  Fibonacci(9),
159     Fibonacci(10),         Fibonacci(11), Fibonacci(12), Fibonacci(13),
160     Fibonacci(14),         Fibonacci(15), Fibonacci(16), Fibonacci(17),
161     Fibonacci(18),         Fibonacci(19), Fibonacci(20), Fibonacci(21),
162     Fibonacci(22),         Fibonacci(23), Fibonacci(24), Fibonacci(25),
163     Fibonacci(26),         Fibonacci(27), Fibonacci(28), Fibonacci(29),
164     Fibonacci(30),         Fibonacci(31), Fibonacci(32), Fibonacci(33),
165     Fibonacci(34),         Fibonacci(35), Fibonacci(36), Fibonacci(37),
166     Fibonacci(38),         Fibonacci(39), Fibonacci(40), Fibonacci(41),
167     Fibonacci(42),         Fibonacci(43), Fibonacci(44), Fibonacci(45),
168     Fibonacci(46),         Fibonacci(47),
169     0xffffffffffffffffull,  // Avoid overflow
170 };
171 
172 static const int kMinLengthSize = ABSL_ARRAYSIZE(min_length);
173 
174 // The inlined size to use with absl::InlinedVector.
175 //
176 // Note: The InlinedVectors in this file (and in cord.h) do not need to use
177 // the same value for their inlined size. The fact that they do is historical.
178 // It may be desirable for each to use a different inlined size optimized for
179 // that InlinedVector's usage.
180 //
181 // TODO(jgm): Benchmark to see if there's a more optimal value than 47 for
182 // the inlined vector size (47 exists for backward compatibility).
183 static const int kInlinedVectorSize = 47;
184 
IsRootBalanced(CordRep * node)185 static inline bool IsRootBalanced(CordRep* node) {
186   if (node->tag != CONCAT) {
187     return true;
188   } else if (node->concat()->depth() <= 15) {
189     return true;
190   } else if (node->concat()->depth() > kMinLengthSize) {
191     return false;
192   } else {
193     // Allow depth to become twice as large as implied by fibonacci rule to
194     // reduce rebalancing for larger strings.
195     return (node->length >= min_length[node->concat()->depth() / 2]);
196   }
197 }
198 
199 static CordRep* Rebalance(CordRep* node);
200 static void DumpNode(CordRep* rep, bool include_data, std::ostream* os);
201 static bool VerifyNode(CordRep* root, CordRep* start_node,
202                        bool full_validation);
203 
VerifyTree(CordRep * node)204 static inline CordRep* VerifyTree(CordRep* node) {
205   // Verification is expensive, so only do it in debug mode.
206   // Even in debug mode we normally do only light validation.
207   // If you are debugging Cord itself, you should define the
208   // macro EXTRA_CORD_VALIDATION, e.g. by adding
209   // --copt=-DEXTRA_CORD_VALIDATION to the blaze line.
210 #ifdef EXTRA_CORD_VALIDATION
211   assert(node == nullptr || VerifyNode(node, node, /*full_validation=*/true));
212 #else   // EXTRA_CORD_VALIDATION
213   assert(node == nullptr || VerifyNode(node, node, /*full_validation=*/false));
214 #endif  // EXTRA_CORD_VALIDATION
215   static_cast<void>(&VerifyNode);
216 
217   return node;
218 }
219 
220 // --------------------------------------------------------------------
221 // Memory management
222 
Ref(CordRep * rep)223 inline CordRep* Ref(CordRep* rep) {
224   if (rep != nullptr) {
225     rep->refcount.Increment();
226   }
227   return rep;
228 }
229 
230 // This internal routine is called from the cold path of Unref below. Keeping it
231 // in a separate routine allows good inlining of Unref into many profitable call
232 // sites. However, the call to this function can be highly disruptive to the
233 // register pressure in those callers. To minimize the cost to callers, we use
234 // a special LLVM calling convention that preserves most registers. This allows
235 // the call to this routine in cold paths to not disrupt the caller's register
236 // pressure. This calling convention is not available on all platforms; we
237 // intentionally allow LLVM to ignore the attribute rather than attempting to
238 // hardcode the list of supported platforms.
239 #if defined(__clang__) && !defined(__i386__)
240 #pragma clang diagnostic push
241 #pragma clang diagnostic ignored "-Wattributes"
242 __attribute__((preserve_most))
243 #pragma clang diagnostic pop
244 #endif
UnrefInternal(CordRep * rep)245 static void UnrefInternal(CordRep* rep) {
246   assert(rep != nullptr);
247 
248   absl::InlinedVector<CordRep*, kInlinedVectorSize> pending;
249   while (true) {
250     if (rep->tag == CONCAT) {
251       CordRepConcat* rep_concat = rep->concat();
252       CordRep* right = rep_concat->right;
253       if (!right->refcount.Decrement()) {
254         pending.push_back(right);
255       }
256       CordRep* left = rep_concat->left;
257       delete rep_concat;
258       rep = nullptr;
259       if (!left->refcount.Decrement()) {
260         rep = left;
261         continue;
262       }
263     } else if (rep->tag == EXTERNAL) {
264       CordRepExternal* rep_external = rep->external();
265       rep_external->releaser_invoker(rep_external);
266       rep = nullptr;
267     } else if (rep->tag == SUBSTRING) {
268       CordRepSubstring* rep_substring = rep->substring();
269       CordRep* child = rep_substring->child;
270       delete rep_substring;
271       rep = nullptr;
272       if (!child->refcount.Decrement()) {
273         rep = child;
274         continue;
275       }
276     } else {
277       // Flat CordReps are allocated and constructed with raw ::operator new
278       // and placement new, and must be destructed and deallocated
279       // accordingly.
280 #if defined(__cpp_sized_deallocation)
281       size_t size = TagToAllocatedSize(rep->tag);
282       rep->~CordRep();
283       ::operator delete(rep, size);
284 #else
285       rep->~CordRep();
286       ::operator delete(rep);
287 #endif
288       rep = nullptr;
289     }
290 
291     if (!pending.empty()) {
292       rep = pending.back();
293       pending.pop_back();
294     } else {
295       break;
296     }
297   }
298 }
299 
Unref(CordRep * rep)300 inline void Unref(CordRep* rep) {
301   // Fast-path for two common, hot cases: a null rep and a shared root.
302   if (ABSL_PREDICT_TRUE(rep == nullptr ||
303                         rep->refcount.DecrementExpectHighRefcount())) {
304     return;
305   }
306 
307   UnrefInternal(rep);
308 }
309 
310 // Return the depth of a node
Depth(const CordRep * rep)311 static int Depth(const CordRep* rep) {
312   if (rep->tag == CONCAT) {
313     return rep->concat()->depth();
314   } else {
315     return 0;
316   }
317 }
318 
SetConcatChildren(CordRepConcat * concat,CordRep * left,CordRep * right)319 static void SetConcatChildren(CordRepConcat* concat, CordRep* left,
320                               CordRep* right) {
321   concat->left = left;
322   concat->right = right;
323 
324   concat->length = left->length + right->length;
325   concat->set_depth(1 + std::max(Depth(left), Depth(right)));
326 }
327 
328 // Create a concatenation of the specified nodes.
329 // Does not change the refcounts of "left" and "right".
330 // The returned node has a refcount of 1.
RawConcat(CordRep * left,CordRep * right)331 static CordRep* RawConcat(CordRep* left, CordRep* right) {
332   // Avoid making degenerate concat nodes (one child is empty)
333   if (left == nullptr || left->length == 0) {
334     Unref(left);
335     return right;
336   }
337   if (right == nullptr || right->length == 0) {
338     Unref(right);
339     return left;
340   }
341 
342   CordRepConcat* rep = new CordRepConcat();
343   rep->tag = CONCAT;
344   SetConcatChildren(rep, left, right);
345 
346   return rep;
347 }
348 
Concat(CordRep * left,CordRep * right)349 static CordRep* Concat(CordRep* left, CordRep* right) {
350   CordRep* rep = RawConcat(left, right);
351   if (rep != nullptr && !IsRootBalanced(rep)) {
352     rep = Rebalance(rep);
353   }
354   return VerifyTree(rep);
355 }
356 
357 // Make a balanced tree out of an array of leaf nodes.
MakeBalancedTree(CordRep ** reps,size_t n)358 static CordRep* MakeBalancedTree(CordRep** reps, size_t n) {
359   // Make repeated passes over the array, merging adjacent pairs
360   // until we are left with just a single node.
361   while (n > 1) {
362     size_t dst = 0;
363     for (size_t src = 0; src < n; src += 2) {
364       if (src + 1 < n) {
365         reps[dst] = Concat(reps[src], reps[src + 1]);
366       } else {
367         reps[dst] = reps[src];
368       }
369       dst++;
370     }
371     n = dst;
372   }
373 
374   return reps[0];
375 }
376 
377 // Create a new flat node.
NewFlat(size_t length_hint)378 static CordRep* NewFlat(size_t length_hint) {
379   if (length_hint <= kMinFlatLength) {
380     length_hint = kMinFlatLength;
381   } else if (length_hint > kMaxFlatLength) {
382     length_hint = kMaxFlatLength;
383   }
384 
385   // Round size up so it matches a size we can exactly express in a tag.
386   const size_t size = RoundUpForTag(length_hint + kFlatOverhead);
387   void* const raw_rep = ::operator new(size);
388   CordRep* rep = new (raw_rep) CordRep();
389   rep->tag = AllocatedSizeToTag(size);
390   return VerifyTree(rep);
391 }
392 
393 // Create a new tree out of the specified array.
394 // The returned node has a refcount of 1.
NewTree(const char * data,size_t length,size_t alloc_hint)395 static CordRep* NewTree(const char* data,
396                         size_t length,
397                         size_t alloc_hint) {
398   if (length == 0) return nullptr;
399   absl::FixedArray<CordRep*> reps((length - 1) / kMaxFlatLength + 1);
400   size_t n = 0;
401   do {
402     const size_t len = std::min(length, kMaxFlatLength);
403     CordRep* rep = NewFlat(len + alloc_hint);
404     rep->length = len;
405     memcpy(rep->data, data, len);
406     reps[n++] = VerifyTree(rep);
407     data += len;
408     length -= len;
409   } while (length != 0);
410   return MakeBalancedTree(reps.data(), n);
411 }
412 
413 namespace cord_internal {
414 
InitializeCordRepExternal(absl::string_view data,CordRepExternal * rep)415 void InitializeCordRepExternal(absl::string_view data, CordRepExternal* rep) {
416   assert(!data.empty());
417   rep->length = data.size();
418   rep->tag = EXTERNAL;
419   rep->base = data.data();
420   VerifyTree(rep);
421 }
422 
423 }  // namespace cord_internal
424 
NewSubstring(CordRep * child,size_t offset,size_t length)425 static CordRep* NewSubstring(CordRep* child, size_t offset, size_t length) {
426   // Never create empty substring nodes
427   if (length == 0) {
428     Unref(child);
429     return nullptr;
430   } else {
431     CordRepSubstring* rep = new CordRepSubstring();
432     assert((offset + length) <= child->length);
433     rep->length = length;
434     rep->tag = SUBSTRING;
435     rep->start = offset;
436     rep->child = child;
437     return VerifyTree(rep);
438   }
439 }
440 
441 // --------------------------------------------------------------------
442 // Cord::InlineRep functions
443 
444 constexpr unsigned char Cord::InlineRep::kMaxInline;
445 
set_data(const char * data,size_t n,bool nullify_tail)446 inline void Cord::InlineRep::set_data(const char* data, size_t n,
447                                       bool nullify_tail) {
448   static_assert(kMaxInline == 15, "set_data is hard-coded for a length of 15");
449 
450   cord_internal::SmallMemmove(data_, data, n, nullify_tail);
451   data_[kMaxInline] = static_cast<char>(n);
452 }
453 
set_data(size_t n)454 inline char* Cord::InlineRep::set_data(size_t n) {
455   assert(n <= kMaxInline);
456   memset(data_, 0, sizeof(data_));
457   data_[kMaxInline] = static_cast<char>(n);
458   return data_;
459 }
460 
force_tree(size_t extra_hint)461 inline CordRep* Cord::InlineRep::force_tree(size_t extra_hint) {
462   size_t len = data_[kMaxInline];
463   CordRep* result;
464   if (len > kMaxInline) {
465     memcpy(&result, data_, sizeof(result));
466   } else {
467     result = NewFlat(len + extra_hint);
468     result->length = len;
469     memcpy(result->data, data_, len);
470     set_tree(result);
471   }
472   return result;
473 }
474 
reduce_size(size_t n)475 inline void Cord::InlineRep::reduce_size(size_t n) {
476   size_t tag = data_[kMaxInline];
477   assert(tag <= kMaxInline);
478   assert(tag >= n);
479   tag -= n;
480   memset(data_ + tag, 0, n);
481   data_[kMaxInline] = static_cast<char>(tag);
482 }
483 
remove_prefix(size_t n)484 inline void Cord::InlineRep::remove_prefix(size_t n) {
485   cord_internal::SmallMemmove(data_, data_ + n, data_[kMaxInline] - n);
486   reduce_size(n);
487 }
488 
AppendTree(CordRep * tree)489 void Cord::InlineRep::AppendTree(CordRep* tree) {
490   if (tree == nullptr) return;
491   size_t len = data_[kMaxInline];
492   if (len == 0) {
493     set_tree(tree);
494   } else {
495     set_tree(Concat(force_tree(0), tree));
496   }
497 }
498 
PrependTree(CordRep * tree)499 void Cord::InlineRep::PrependTree(CordRep* tree) {
500   assert(tree != nullptr);
501   size_t len = data_[kMaxInline];
502   if (len == 0) {
503     set_tree(tree);
504   } else {
505     set_tree(Concat(tree, force_tree(0)));
506   }
507 }
508 
509 // Searches for a non-full flat node at the rightmost leaf of the tree. If a
510 // suitable leaf is found, the function will update the length field for all
511 // nodes to account for the size increase. The append region address will be
512 // written to region and the actual size increase will be written to size.
PrepareAppendRegion(CordRep * root,char ** region,size_t * size,size_t max_length)513 static inline bool PrepareAppendRegion(CordRep* root, char** region,
514                                        size_t* size, size_t max_length) {
515   // Search down the right-hand path for a non-full FLAT node.
516   CordRep* dst = root;
517   while (dst->tag == CONCAT && dst->refcount.IsOne()) {
518     dst = dst->concat()->right;
519   }
520 
521   if (dst->tag < FLAT || !dst->refcount.IsOne()) {
522     *region = nullptr;
523     *size = 0;
524     return false;
525   }
526 
527   const size_t in_use = dst->length;
528   const size_t capacity = TagToLength(dst->tag);
529   if (in_use == capacity) {
530     *region = nullptr;
531     *size = 0;
532     return false;
533   }
534 
535   size_t size_increase = std::min(capacity - in_use, max_length);
536 
537   // We need to update the length fields for all nodes, including the leaf node.
538   for (CordRep* rep = root; rep != dst; rep = rep->concat()->right) {
539     rep->length += size_increase;
540   }
541   dst->length += size_increase;
542 
543   *region = dst->data + in_use;
544   *size = size_increase;
545   return true;
546 }
547 
GetAppendRegion(char ** region,size_t * size,size_t max_length)548 void Cord::InlineRep::GetAppendRegion(char** region, size_t* size,
549                                       size_t max_length) {
550   if (max_length == 0) {
551     *region = nullptr;
552     *size = 0;
553     return;
554   }
555 
556   // Try to fit in the inline buffer if possible.
557   size_t inline_length = data_[kMaxInline];
558   if (inline_length < kMaxInline && max_length <= kMaxInline - inline_length) {
559     *region = data_ + inline_length;
560     *size = max_length;
561     data_[kMaxInline] = static_cast<char>(inline_length + max_length);
562     return;
563   }
564 
565   CordRep* root = force_tree(max_length);
566 
567   if (PrepareAppendRegion(root, region, size, max_length)) {
568     return;
569   }
570 
571   // Allocate new node.
572   CordRep* new_node =
573       NewFlat(std::max(static_cast<size_t>(root->length), max_length));
574   new_node->length =
575       std::min(static_cast<size_t>(TagToLength(new_node->tag)), max_length);
576   *region = new_node->data;
577   *size = new_node->length;
578   replace_tree(Concat(root, new_node));
579 }
580 
GetAppendRegion(char ** region,size_t * size)581 void Cord::InlineRep::GetAppendRegion(char** region, size_t* size) {
582   const size_t max_length = std::numeric_limits<size_t>::max();
583 
584   // Try to fit in the inline buffer if possible.
585   size_t inline_length = data_[kMaxInline];
586   if (inline_length < kMaxInline) {
587     *region = data_ + inline_length;
588     *size = kMaxInline - inline_length;
589     data_[kMaxInline] = kMaxInline;
590     return;
591   }
592 
593   CordRep* root = force_tree(max_length);
594 
595   if (PrepareAppendRegion(root, region, size, max_length)) {
596     return;
597   }
598 
599   // Allocate new node.
600   CordRep* new_node = NewFlat(root->length);
601   new_node->length = TagToLength(new_node->tag);
602   *region = new_node->data;
603   *size = new_node->length;
604   replace_tree(Concat(root, new_node));
605 }
606 
607 // If the rep is a leaf, this will increment the value at total_mem_usage and
608 // will return true.
RepMemoryUsageLeaf(const CordRep * rep,size_t * total_mem_usage)609 static bool RepMemoryUsageLeaf(const CordRep* rep, size_t* total_mem_usage) {
610   if (rep->tag >= FLAT) {
611     *total_mem_usage += TagToAllocatedSize(rep->tag);
612     return true;
613   }
614   if (rep->tag == EXTERNAL) {
615     *total_mem_usage += sizeof(CordRepConcat) + rep->length;
616     return true;
617   }
618   return false;
619 }
620 
AssignSlow(const Cord::InlineRep & src)621 void Cord::InlineRep::AssignSlow(const Cord::InlineRep& src) {
622   ClearSlow();
623 
624   memcpy(data_, src.data_, sizeof(data_));
625   if (is_tree()) {
626     Ref(tree());
627   }
628 }
629 
ClearSlow()630 void Cord::InlineRep::ClearSlow() {
631   if (is_tree()) {
632     Unref(tree());
633   }
634   memset(data_, 0, sizeof(data_));
635 }
636 
637 // --------------------------------------------------------------------
638 // Constructors and destructors
639 
Cord(const Cord & src)640 Cord::Cord(const Cord& src) : contents_(src.contents_) {
641   Ref(contents_.tree());  // Does nothing if contents_ has embedded data
642 }
643 
Cord(absl::string_view src)644 Cord::Cord(absl::string_view src) {
645   const size_t n = src.size();
646   if (n <= InlineRep::kMaxInline) {
647     contents_.set_data(src.data(), n, false);
648   } else {
649     contents_.set_tree(NewTree(src.data(), n, 0));
650   }
651 }
652 
653 template <typename T, Cord::EnableIfString<T>>
Cord(T && src)654 Cord::Cord(T&& src) {
655   if (
656       // String is short: copy data to avoid external block overhead.
657       src.size() <= kMaxBytesToCopy ||
658       // String is wasteful: copy data to avoid pinning too much unused memory.
659       src.size() < src.capacity() / 2
660   ) {
661     if (src.size() <= InlineRep::kMaxInline) {
662       contents_.set_data(src.data(), src.size(), false);
663     } else {
664       contents_.set_tree(NewTree(src.data(), src.size(), 0));
665     }
666   } else {
667     struct StringReleaser {
668       void operator()(absl::string_view /* data */) {}
669       std::string data;
670     };
671     const absl::string_view original_data = src;
672     auto* rep = static_cast<
673         ::absl::cord_internal::CordRepExternalImpl<StringReleaser>*>(
674         absl::cord_internal::NewExternalRep(
675             original_data, StringReleaser{std::forward<T>(src)}));
676     // Moving src may have invalidated its data pointer, so adjust it.
677     rep->base = rep->template get<0>().data.data();
678     contents_.set_tree(rep);
679   }
680 }
681 
682 template Cord::Cord(std::string&& src);
683 
684 // The destruction code is separate so that the compiler can determine
685 // that it does not need to call the destructor on a moved-from Cord.
DestroyCordSlow()686 void Cord::DestroyCordSlow() {
687   Unref(VerifyTree(contents_.tree()));
688 }
689 
690 // --------------------------------------------------------------------
691 // Mutators
692 
Clear()693 void Cord::Clear() {
694   Unref(contents_.clear());
695 }
696 
operator =(absl::string_view src)697 Cord& Cord::operator=(absl::string_view src) {
698 
699   const char* data = src.data();
700   size_t length = src.size();
701   CordRep* tree = contents_.tree();
702   if (length <= InlineRep::kMaxInline) {
703     // Embed into this->contents_
704     contents_.set_data(data, length, true);
705     Unref(tree);
706     return *this;
707   }
708   if (tree != nullptr && tree->tag >= FLAT &&
709       TagToLength(tree->tag) >= length && tree->refcount.IsOne()) {
710     // Copy in place if the existing FLAT node is reusable.
711     memmove(tree->data, data, length);
712     tree->length = length;
713     VerifyTree(tree);
714     return *this;
715   }
716   contents_.set_tree(NewTree(data, length, 0));
717   Unref(tree);
718   return *this;
719 }
720 
721 template <typename T, Cord::EnableIfString<T>>
operator =(T && src)722 Cord& Cord::operator=(T&& src) {
723   if (src.size() <= kMaxBytesToCopy) {
724     *this = absl::string_view(src);
725   } else {
726     *this = Cord(std::forward<T>(src));
727   }
728   return *this;
729 }
730 
731 template Cord& Cord::operator=(std::string&& src);
732 
733 // TODO(sanjay): Move to Cord::InlineRep section of file.  For now,
734 // we keep it here to make diffs easier.
AppendArray(const char * src_data,size_t src_size)735 void Cord::InlineRep::AppendArray(const char* src_data, size_t src_size) {
736   if (src_size == 0) return;  // memcpy(_, nullptr, 0) is undefined.
737   // Try to fit in the inline buffer if possible.
738   size_t inline_length = data_[kMaxInline];
739   if (inline_length < kMaxInline && src_size <= kMaxInline - inline_length) {
740     // Append new data to embedded array
741     data_[kMaxInline] = static_cast<char>(inline_length + src_size);
742     memcpy(data_ + inline_length, src_data, src_size);
743     return;
744   }
745 
746   CordRep* root = tree();
747 
748   size_t appended = 0;
749   if (root) {
750     char* region;
751     if (PrepareAppendRegion(root, &region, &appended, src_size)) {
752       memcpy(region, src_data, appended);
753     }
754   } else {
755     // It is possible that src_data == data_, but when we transition from an
756     // InlineRep to a tree we need to assign data_ = root via set_tree. To
757     // avoid corrupting the source data before we copy it, delay calling
758     // set_tree until after we've copied data.
759     // We are going from an inline size to beyond inline size. Make the new size
760     // either double the inlined size, or the added size + 10%.
761     const size_t size1 = inline_length * 2 + src_size;
762     const size_t size2 = inline_length + src_size / 10;
763     root = NewFlat(std::max<size_t>(size1, size2));
764     appended = std::min(src_size, TagToLength(root->tag) - inline_length);
765     memcpy(root->data, data_, inline_length);
766     memcpy(root->data + inline_length, src_data, appended);
767     root->length = inline_length + appended;
768     set_tree(root);
769   }
770 
771   src_data += appended;
772   src_size -= appended;
773   if (src_size == 0) {
774     return;
775   }
776 
777   // Use new block(s) for any remaining bytes that were not handled above.
778   // Alloc extra memory only if the right child of the root of the new tree is
779   // going to be a FLAT node, which will permit further inplace appends.
780   size_t length = src_size;
781   if (src_size < kMaxFlatLength) {
782     // The new length is either
783     // - old size + 10%
784     // - old_size + src_size
785     // This will cause a reasonable conservative step-up in size that is still
786     // large enough to avoid excessive amounts of small fragments being added.
787     length = std::max<size_t>(root->length / 10, src_size);
788   }
789   set_tree(Concat(root, NewTree(src_data, src_size, length - src_size)));
790 }
791 
TakeRep() const792 inline CordRep* Cord::TakeRep() const& {
793   return Ref(contents_.tree());
794 }
795 
TakeRep()796 inline CordRep* Cord::TakeRep() && {
797   CordRep* rep = contents_.tree();
798   contents_.clear();
799   return rep;
800 }
801 
802 template <typename C>
AppendImpl(C && src)803 inline void Cord::AppendImpl(C&& src) {
804   if (empty()) {
805     // In case of an empty destination avoid allocating a new node, do not copy
806     // data.
807     *this = std::forward<C>(src);
808     return;
809   }
810 
811   // For short cords, it is faster to copy data if there is room in dst.
812   const size_t src_size = src.contents_.size();
813   if (src_size <= kMaxBytesToCopy) {
814     CordRep* src_tree = src.contents_.tree();
815     if (src_tree == nullptr) {
816       // src has embedded data.
817       contents_.AppendArray(src.contents_.data(), src_size);
818       return;
819     }
820     if (src_tree->tag >= FLAT) {
821       // src tree just has one flat node.
822       contents_.AppendArray(src_tree->data, src_size);
823       return;
824     }
825     if (&src == this) {
826       // ChunkIterator below assumes that src is not modified during traversal.
827       Append(Cord(src));
828       return;
829     }
830     // TODO(mec): Should we only do this if "dst" has space?
831     for (absl::string_view chunk : src.Chunks()) {
832       Append(chunk);
833     }
834     return;
835   }
836 
837   contents_.AppendTree(std::forward<C>(src).TakeRep());
838 }
839 
Append(const Cord & src)840 void Cord::Append(const Cord& src) { AppendImpl(src); }
841 
Append(Cord && src)842 void Cord::Append(Cord&& src) { AppendImpl(std::move(src)); }
843 
844 template <typename T, Cord::EnableIfString<T>>
Append(T && src)845 void Cord::Append(T&& src) {
846   if (src.size() <= kMaxBytesToCopy) {
847     Append(absl::string_view(src));
848   } else {
849     Append(Cord(std::forward<T>(src)));
850   }
851 }
852 
853 template void Cord::Append(std::string&& src);
854 
Prepend(const Cord & src)855 void Cord::Prepend(const Cord& src) {
856   CordRep* src_tree = src.contents_.tree();
857   if (src_tree != nullptr) {
858     Ref(src_tree);
859     contents_.PrependTree(src_tree);
860     return;
861   }
862 
863   // `src` cord is inlined.
864   absl::string_view src_contents(src.contents_.data(), src.contents_.size());
865   return Prepend(src_contents);
866 }
867 
Prepend(absl::string_view src)868 void Cord::Prepend(absl::string_view src) {
869   if (src.empty()) return;  // memcpy(_, nullptr, 0) is undefined.
870   size_t cur_size = contents_.size();
871   if (!contents_.is_tree() && cur_size + src.size() <= InlineRep::kMaxInline) {
872     // Use embedded storage.
873     char data[InlineRep::kMaxInline + 1] = {0};
874     data[InlineRep::kMaxInline] = cur_size + src.size();  // set size
875     memcpy(data, src.data(), src.size());
876     memcpy(data + src.size(), contents_.data(), cur_size);
877     memcpy(reinterpret_cast<void*>(&contents_), data,
878            InlineRep::kMaxInline + 1);
879   } else {
880     contents_.PrependTree(NewTree(src.data(), src.size(), 0));
881   }
882 }
883 
884 template <typename T, Cord::EnableIfString<T>>
Prepend(T && src)885 inline void Cord::Prepend(T&& src) {
886   if (src.size() <= kMaxBytesToCopy) {
887     Prepend(absl::string_view(src));
888   } else {
889     Prepend(Cord(std::forward<T>(src)));
890   }
891 }
892 
893 template void Cord::Prepend(std::string&& src);
894 
RemovePrefixFrom(CordRep * node,size_t n)895 static CordRep* RemovePrefixFrom(CordRep* node, size_t n) {
896   if (n >= node->length) return nullptr;
897   if (n == 0) return Ref(node);
898   absl::InlinedVector<CordRep*, kInlinedVectorSize> rhs_stack;
899 
900   while (node->tag == CONCAT) {
901     assert(n <= node->length);
902     if (n < node->concat()->left->length) {
903       // Push right to stack, descend left.
904       rhs_stack.push_back(node->concat()->right);
905       node = node->concat()->left;
906     } else {
907       // Drop left, descend right.
908       n -= node->concat()->left->length;
909       node = node->concat()->right;
910     }
911   }
912   assert(n <= node->length);
913 
914   if (n == 0) {
915     Ref(node);
916   } else {
917     size_t start = n;
918     size_t len = node->length - n;
919     if (node->tag == SUBSTRING) {
920       // Consider in-place update of node, similar to in RemoveSuffixFrom().
921       start += node->substring()->start;
922       node = node->substring()->child;
923     }
924     node = NewSubstring(Ref(node), start, len);
925   }
926   while (!rhs_stack.empty()) {
927     node = Concat(node, Ref(rhs_stack.back()));
928     rhs_stack.pop_back();
929   }
930   return node;
931 }
932 
933 // RemoveSuffixFrom() is very similar to RemovePrefixFrom(), with the
934 // exception that removing a suffix has an optimization where a node may be
935 // edited in place iff that node and all its ancestors have a refcount of 1.
RemoveSuffixFrom(CordRep * node,size_t n)936 static CordRep* RemoveSuffixFrom(CordRep* node, size_t n) {
937   if (n >= node->length) return nullptr;
938   if (n == 0) return Ref(node);
939   absl::InlinedVector<CordRep*, kInlinedVectorSize> lhs_stack;
940   bool inplace_ok = node->refcount.IsOne();
941 
942   while (node->tag == CONCAT) {
943     assert(n <= node->length);
944     if (n < node->concat()->right->length) {
945       // Push left to stack, descend right.
946       lhs_stack.push_back(node->concat()->left);
947       node = node->concat()->right;
948     } else {
949       // Drop right, descend left.
950       n -= node->concat()->right->length;
951       node = node->concat()->left;
952     }
953     inplace_ok = inplace_ok && node->refcount.IsOne();
954   }
955   assert(n <= node->length);
956 
957   if (n == 0) {
958     Ref(node);
959   } else if (inplace_ok && node->tag != EXTERNAL) {
960     // Consider making a new buffer if the current node capacity is much
961     // larger than the new length.
962     Ref(node);
963     node->length -= n;
964   } else {
965     size_t start = 0;
966     size_t len = node->length - n;
967     if (node->tag == SUBSTRING) {
968       start = node->substring()->start;
969       node = node->substring()->child;
970     }
971     node = NewSubstring(Ref(node), start, len);
972   }
973   while (!lhs_stack.empty()) {
974     node = Concat(Ref(lhs_stack.back()), node);
975     lhs_stack.pop_back();
976   }
977   return node;
978 }
979 
RemovePrefix(size_t n)980 void Cord::RemovePrefix(size_t n) {
981   ABSL_INTERNAL_CHECK(n <= size(),
982                       absl::StrCat("Requested prefix size ", n,
983                                    " exceeds Cord's size ", size()));
984   CordRep* tree = contents_.tree();
985   if (tree == nullptr) {
986     contents_.remove_prefix(n);
987   } else {
988     CordRep* newrep = RemovePrefixFrom(tree, n);
989     Unref(tree);
990     contents_.replace_tree(VerifyTree(newrep));
991   }
992 }
993 
RemoveSuffix(size_t n)994 void Cord::RemoveSuffix(size_t n) {
995   ABSL_INTERNAL_CHECK(n <= size(),
996                       absl::StrCat("Requested suffix size ", n,
997                                    " exceeds Cord's size ", size()));
998   CordRep* tree = contents_.tree();
999   if (tree == nullptr) {
1000     contents_.reduce_size(n);
1001   } else {
1002     CordRep* newrep = RemoveSuffixFrom(tree, n);
1003     Unref(tree);
1004     contents_.replace_tree(VerifyTree(newrep));
1005   }
1006 }
1007 
1008 // Work item for NewSubRange().
1009 struct SubRange {
SubRangeabsl::SubRange1010   SubRange(CordRep* a_node, size_t a_pos, size_t a_n)
1011       : node(a_node), pos(a_pos), n(a_n) {}
1012   CordRep* node;  // nullptr means concat last 2 results.
1013   size_t pos;
1014   size_t n;
1015 };
1016 
NewSubRange(CordRep * node,size_t pos,size_t n)1017 static CordRep* NewSubRange(CordRep* node, size_t pos, size_t n) {
1018   absl::InlinedVector<CordRep*, kInlinedVectorSize> results;
1019   absl::InlinedVector<SubRange, kInlinedVectorSize> todo;
1020   todo.push_back(SubRange(node, pos, n));
1021   do {
1022     const SubRange& sr = todo.back();
1023     node = sr.node;
1024     pos = sr.pos;
1025     n = sr.n;
1026     todo.pop_back();
1027 
1028     if (node == nullptr) {
1029       assert(results.size() >= 2);
1030       CordRep* right = results.back();
1031       results.pop_back();
1032       CordRep* left = results.back();
1033       results.pop_back();
1034       results.push_back(Concat(left, right));
1035     } else if (pos == 0 && n == node->length) {
1036       results.push_back(Ref(node));
1037     } else if (node->tag != CONCAT) {
1038       if (node->tag == SUBSTRING) {
1039         pos += node->substring()->start;
1040         node = node->substring()->child;
1041       }
1042       results.push_back(NewSubstring(Ref(node), pos, n));
1043     } else if (pos + n <= node->concat()->left->length) {
1044       todo.push_back(SubRange(node->concat()->left, pos, n));
1045     } else if (pos >= node->concat()->left->length) {
1046       pos -= node->concat()->left->length;
1047       todo.push_back(SubRange(node->concat()->right, pos, n));
1048     } else {
1049       size_t left_n = node->concat()->left->length - pos;
1050       todo.push_back(SubRange(nullptr, 0, 0));  // Concat()
1051       todo.push_back(SubRange(node->concat()->right, 0, n - left_n));
1052       todo.push_back(SubRange(node->concat()->left, pos, left_n));
1053     }
1054   } while (!todo.empty());
1055   assert(results.size() == 1);
1056   return results[0];
1057 }
1058 
Subcord(size_t pos,size_t new_size) const1059 Cord Cord::Subcord(size_t pos, size_t new_size) const {
1060   Cord sub_cord;
1061   size_t length = size();
1062   if (pos > length) pos = length;
1063   if (new_size > length - pos) new_size = length - pos;
1064   CordRep* tree = contents_.tree();
1065   if (tree == nullptr) {
1066     // sub_cord is newly constructed, no need to re-zero-out the tail of
1067     // contents_ memory.
1068     sub_cord.contents_.set_data(contents_.data() + pos, new_size, false);
1069   } else if (new_size == 0) {
1070     // We want to return empty subcord, so nothing to do.
1071   } else if (new_size <= InlineRep::kMaxInline) {
1072     Cord::ChunkIterator it = chunk_begin();
1073     it.AdvanceBytes(pos);
1074     char* dest = sub_cord.contents_.data_;
1075     size_t remaining_size = new_size;
1076     while (remaining_size > it->size()) {
1077       cord_internal::SmallMemmove(dest, it->data(), it->size());
1078       remaining_size -= it->size();
1079       dest += it->size();
1080       ++it;
1081     }
1082     cord_internal::SmallMemmove(dest, it->data(), remaining_size);
1083     sub_cord.contents_.data_[InlineRep::kMaxInline] = new_size;
1084   } else {
1085     sub_cord.contents_.set_tree(NewSubRange(tree, pos, new_size));
1086   }
1087   return sub_cord;
1088 }
1089 
1090 // --------------------------------------------------------------------
1091 // Balancing
1092 
1093 class CordForest {
1094  public:
CordForest(size_t length)1095   explicit CordForest(size_t length)
1096       : root_length_(length), trees_(kMinLengthSize, nullptr) {}
1097 
Build(CordRep * cord_root)1098   void Build(CordRep* cord_root) {
1099     std::vector<CordRep*> pending = {cord_root};
1100 
1101     while (!pending.empty()) {
1102       CordRep* node = pending.back();
1103       pending.pop_back();
1104       CheckNode(node);
1105       if (ABSL_PREDICT_FALSE(node->tag != CONCAT)) {
1106         AddNode(node);
1107         continue;
1108       }
1109 
1110       CordRepConcat* concat_node = node->concat();
1111       if (concat_node->depth() >= kMinLengthSize ||
1112           concat_node->length < min_length[concat_node->depth()]) {
1113         pending.push_back(concat_node->right);
1114         pending.push_back(concat_node->left);
1115 
1116         if (concat_node->refcount.IsOne()) {
1117           concat_node->left = concat_freelist_;
1118           concat_freelist_ = concat_node;
1119         } else {
1120           Ref(concat_node->right);
1121           Ref(concat_node->left);
1122           Unref(concat_node);
1123         }
1124       } else {
1125         AddNode(node);
1126       }
1127     }
1128   }
1129 
ConcatNodes()1130   CordRep* ConcatNodes() {
1131     CordRep* sum = nullptr;
1132     for (auto* node : trees_) {
1133       if (node == nullptr) continue;
1134 
1135       sum = PrependNode(node, sum);
1136       root_length_ -= node->length;
1137       if (root_length_ == 0) break;
1138     }
1139     ABSL_INTERNAL_CHECK(sum != nullptr, "Failed to locate sum node");
1140     return VerifyTree(sum);
1141   }
1142 
1143  private:
AppendNode(CordRep * node,CordRep * sum)1144   CordRep* AppendNode(CordRep* node, CordRep* sum) {
1145     return (sum == nullptr) ? node : MakeConcat(sum, node);
1146   }
1147 
PrependNode(CordRep * node,CordRep * sum)1148   CordRep* PrependNode(CordRep* node, CordRep* sum) {
1149     return (sum == nullptr) ? node : MakeConcat(node, sum);
1150   }
1151 
AddNode(CordRep * node)1152   void AddNode(CordRep* node) {
1153     CordRep* sum = nullptr;
1154 
1155     // Collect together everything with which we will merge with node
1156     int i = 0;
1157     for (; node->length > min_length[i + 1]; ++i) {
1158       auto& tree_at_i = trees_[i];
1159 
1160       if (tree_at_i == nullptr) continue;
1161       sum = PrependNode(tree_at_i, sum);
1162       tree_at_i = nullptr;
1163     }
1164 
1165     sum = AppendNode(node, sum);
1166 
1167     // Insert sum into appropriate place in the forest
1168     for (; sum->length >= min_length[i]; ++i) {
1169       auto& tree_at_i = trees_[i];
1170       if (tree_at_i == nullptr) continue;
1171 
1172       sum = MakeConcat(tree_at_i, sum);
1173       tree_at_i = nullptr;
1174     }
1175 
1176     // min_length[0] == 1, which means sum->length >= min_length[0]
1177     assert(i > 0);
1178     trees_[i - 1] = sum;
1179   }
1180 
1181   // Make concat node trying to resue existing CordRepConcat nodes we
1182   // already collected in the concat_freelist_.
MakeConcat(CordRep * left,CordRep * right)1183   CordRep* MakeConcat(CordRep* left, CordRep* right) {
1184     if (concat_freelist_ == nullptr) return RawConcat(left, right);
1185 
1186     CordRepConcat* rep = concat_freelist_;
1187     if (concat_freelist_->left == nullptr) {
1188       concat_freelist_ = nullptr;
1189     } else {
1190       concat_freelist_ = concat_freelist_->left->concat();
1191     }
1192     SetConcatChildren(rep, left, right);
1193 
1194     return rep;
1195   }
1196 
CheckNode(CordRep * node)1197   static void CheckNode(CordRep* node) {
1198     ABSL_INTERNAL_CHECK(node->length != 0u, "");
1199     if (node->tag == CONCAT) {
1200       ABSL_INTERNAL_CHECK(node->concat()->left != nullptr, "");
1201       ABSL_INTERNAL_CHECK(node->concat()->right != nullptr, "");
1202       ABSL_INTERNAL_CHECK(node->length == (node->concat()->left->length +
1203                                            node->concat()->right->length),
1204                           "");
1205     }
1206   }
1207 
1208   size_t root_length_;
1209 
1210   // use an inlined vector instead of a flat array to get bounds checking
1211   absl::InlinedVector<CordRep*, kInlinedVectorSize> trees_;
1212 
1213   // List of concat nodes we can re-use for Cord balancing.
1214   CordRepConcat* concat_freelist_ = nullptr;
1215 };
1216 
Rebalance(CordRep * node)1217 static CordRep* Rebalance(CordRep* node) {
1218   VerifyTree(node);
1219   assert(node->tag == CONCAT);
1220 
1221   if (node->length == 0) {
1222     return nullptr;
1223   }
1224 
1225   CordForest forest(node->length);
1226   forest.Build(node);
1227   return forest.ConcatNodes();
1228 }
1229 
1230 // --------------------------------------------------------------------
1231 // Comparators
1232 
1233 namespace {
1234 
ClampResult(int memcmp_res)1235 int ClampResult(int memcmp_res) {
1236   return static_cast<int>(memcmp_res > 0) - static_cast<int>(memcmp_res < 0);
1237 }
1238 
CompareChunks(absl::string_view * lhs,absl::string_view * rhs,size_t * size_to_compare)1239 int CompareChunks(absl::string_view* lhs, absl::string_view* rhs,
1240                   size_t* size_to_compare) {
1241   size_t compared_size = std::min(lhs->size(), rhs->size());
1242   assert(*size_to_compare >= compared_size);
1243   *size_to_compare -= compared_size;
1244 
1245   int memcmp_res = ::memcmp(lhs->data(), rhs->data(), compared_size);
1246   if (memcmp_res != 0) return memcmp_res;
1247 
1248   lhs->remove_prefix(compared_size);
1249   rhs->remove_prefix(compared_size);
1250 
1251   return 0;
1252 }
1253 
1254 // This overload set computes comparison results from memcmp result. This
1255 // interface is used inside GenericCompare below. Differet implementations
1256 // are specialized for int and bool. For int we clamp result to {-1, 0, 1}
1257 // set. For bool we just interested in "value == 0".
1258 template <typename ResultType>
ComputeCompareResult(int memcmp_res)1259 ResultType ComputeCompareResult(int memcmp_res) {
1260   return ClampResult(memcmp_res);
1261 }
1262 template <>
ComputeCompareResult(int memcmp_res)1263 bool ComputeCompareResult<bool>(int memcmp_res) {
1264   return memcmp_res == 0;
1265 }
1266 
1267 }  // namespace
1268 
1269 // Helper routine. Locates the first flat chunk of the Cord without
1270 // initializing the iterator.
FindFlatStartPiece() const1271 inline absl::string_view Cord::InlineRep::FindFlatStartPiece() const {
1272   size_t n = data_[kMaxInline];
1273   if (n <= kMaxInline) {
1274     return absl::string_view(data_, n);
1275   }
1276 
1277   CordRep* node = tree();
1278   if (node->tag >= FLAT) {
1279     return absl::string_view(node->data, node->length);
1280   }
1281 
1282   if (node->tag == EXTERNAL) {
1283     return absl::string_view(node->external()->base, node->length);
1284   }
1285 
1286   // Walk down the left branches until we hit a non-CONCAT node.
1287   while (node->tag == CONCAT) {
1288     node = node->concat()->left;
1289   }
1290 
1291   // Get the child node if we encounter a SUBSTRING.
1292   size_t offset = 0;
1293   size_t length = node->length;
1294   assert(length != 0);
1295 
1296   if (node->tag == SUBSTRING) {
1297     offset = node->substring()->start;
1298     node = node->substring()->child;
1299   }
1300 
1301   if (node->tag >= FLAT) {
1302     return absl::string_view(node->data + offset, length);
1303   }
1304 
1305   assert((node->tag == EXTERNAL) && "Expect FLAT or EXTERNAL node here");
1306 
1307   return absl::string_view(node->external()->base + offset, length);
1308 }
1309 
CompareSlowPath(absl::string_view rhs,size_t compared_size,size_t size_to_compare) const1310 inline int Cord::CompareSlowPath(absl::string_view rhs, size_t compared_size,
1311                                  size_t size_to_compare) const {
1312   auto advance = [](Cord::ChunkIterator* it, absl::string_view* chunk) {
1313     if (!chunk->empty()) return true;
1314     ++*it;
1315     if (it->bytes_remaining_ == 0) return false;
1316     *chunk = **it;
1317     return true;
1318   };
1319 
1320   Cord::ChunkIterator lhs_it = chunk_begin();
1321 
1322   // compared_size is inside first chunk.
1323   absl::string_view lhs_chunk =
1324       (lhs_it.bytes_remaining_ != 0) ? *lhs_it : absl::string_view();
1325   assert(compared_size <= lhs_chunk.size());
1326   assert(compared_size <= rhs.size());
1327   lhs_chunk.remove_prefix(compared_size);
1328   rhs.remove_prefix(compared_size);
1329   size_to_compare -= compared_size;  // skip already compared size.
1330 
1331   while (advance(&lhs_it, &lhs_chunk) && !rhs.empty()) {
1332     int comparison_result = CompareChunks(&lhs_chunk, &rhs, &size_to_compare);
1333     if (comparison_result != 0) return comparison_result;
1334     if (size_to_compare == 0) return 0;
1335   }
1336 
1337   return static_cast<int>(rhs.empty()) - static_cast<int>(lhs_chunk.empty());
1338 }
1339 
CompareSlowPath(const Cord & rhs,size_t compared_size,size_t size_to_compare) const1340 inline int Cord::CompareSlowPath(const Cord& rhs, size_t compared_size,
1341                                  size_t size_to_compare) const {
1342   auto advance = [](Cord::ChunkIterator* it, absl::string_view* chunk) {
1343     if (!chunk->empty()) return true;
1344     ++*it;
1345     if (it->bytes_remaining_ == 0) return false;
1346     *chunk = **it;
1347     return true;
1348   };
1349 
1350   Cord::ChunkIterator lhs_it = chunk_begin();
1351   Cord::ChunkIterator rhs_it = rhs.chunk_begin();
1352 
1353   // compared_size is inside both first chunks.
1354   absl::string_view lhs_chunk =
1355       (lhs_it.bytes_remaining_ != 0) ? *lhs_it : absl::string_view();
1356   absl::string_view rhs_chunk =
1357       (rhs_it.bytes_remaining_ != 0) ? *rhs_it : absl::string_view();
1358   assert(compared_size <= lhs_chunk.size());
1359   assert(compared_size <= rhs_chunk.size());
1360   lhs_chunk.remove_prefix(compared_size);
1361   rhs_chunk.remove_prefix(compared_size);
1362   size_to_compare -= compared_size;  // skip already compared size.
1363 
1364   while (advance(&lhs_it, &lhs_chunk) && advance(&rhs_it, &rhs_chunk)) {
1365     int memcmp_res = CompareChunks(&lhs_chunk, &rhs_chunk, &size_to_compare);
1366     if (memcmp_res != 0) return memcmp_res;
1367     if (size_to_compare == 0) return 0;
1368   }
1369 
1370   return static_cast<int>(rhs_chunk.empty()) -
1371          static_cast<int>(lhs_chunk.empty());
1372 }
1373 
GetFirstChunk(const Cord & c)1374 inline absl::string_view Cord::GetFirstChunk(const Cord& c) {
1375   return c.contents_.FindFlatStartPiece();
1376 }
GetFirstChunk(absl::string_view sv)1377 inline absl::string_view Cord::GetFirstChunk(absl::string_view sv) {
1378   return sv;
1379 }
1380 
1381 // Compares up to 'size_to_compare' bytes of 'lhs' with 'rhs'. It is assumed
1382 // that 'size_to_compare' is greater that size of smallest of first chunks.
1383 template <typename ResultType, typename RHS>
1384 ResultType GenericCompare(const Cord& lhs, const RHS& rhs,
1385                           size_t size_to_compare) {
1386   absl::string_view lhs_chunk = Cord::GetFirstChunk(lhs);
1387   absl::string_view rhs_chunk = Cord::GetFirstChunk(rhs);
1388 
1389   size_t compared_size = std::min(lhs_chunk.size(), rhs_chunk.size());
1390   assert(size_to_compare >= compared_size);
1391   int memcmp_res = ::memcmp(lhs_chunk.data(), rhs_chunk.data(), compared_size);
1392   if (compared_size == size_to_compare || memcmp_res != 0) {
1393     return ComputeCompareResult<ResultType>(memcmp_res);
1394   }
1395 
1396   return ComputeCompareResult<ResultType>(
1397       lhs.CompareSlowPath(rhs, compared_size, size_to_compare));
1398 }
1399 
EqualsImpl(absl::string_view rhs,size_t size_to_compare) const1400 bool Cord::EqualsImpl(absl::string_view rhs, size_t size_to_compare) const {
1401   return GenericCompare<bool>(*this, rhs, size_to_compare);
1402 }
1403 
EqualsImpl(const Cord & rhs,size_t size_to_compare) const1404 bool Cord::EqualsImpl(const Cord& rhs, size_t size_to_compare) const {
1405   return GenericCompare<bool>(*this, rhs, size_to_compare);
1406 }
1407 
1408 template <typename RHS>
SharedCompareImpl(const Cord & lhs,const RHS & rhs)1409 inline int SharedCompareImpl(const Cord& lhs, const RHS& rhs) {
1410   size_t lhs_size = lhs.size();
1411   size_t rhs_size = rhs.size();
1412   if (lhs_size == rhs_size) {
1413     return GenericCompare<int>(lhs, rhs, lhs_size);
1414   }
1415   if (lhs_size < rhs_size) {
1416     auto data_comp_res = GenericCompare<int>(lhs, rhs, lhs_size);
1417     return data_comp_res == 0 ? -1 : data_comp_res;
1418   }
1419 
1420   auto data_comp_res = GenericCompare<int>(lhs, rhs, rhs_size);
1421   return data_comp_res == 0 ? +1 : data_comp_res;
1422 }
1423 
Compare(absl::string_view rhs) const1424 int Cord::Compare(absl::string_view rhs) const {
1425   return SharedCompareImpl(*this, rhs);
1426 }
1427 
CompareImpl(const Cord & rhs) const1428 int Cord::CompareImpl(const Cord& rhs) const {
1429   return SharedCompareImpl(*this, rhs);
1430 }
1431 
EndsWith(absl::string_view rhs) const1432 bool Cord::EndsWith(absl::string_view rhs) const {
1433   size_t my_size = size();
1434   size_t rhs_size = rhs.size();
1435 
1436   if (my_size < rhs_size) return false;
1437 
1438   Cord tmp(*this);
1439   tmp.RemovePrefix(my_size - rhs_size);
1440   return tmp.EqualsImpl(rhs, rhs_size);
1441 }
1442 
EndsWith(const Cord & rhs) const1443 bool Cord::EndsWith(const Cord& rhs) const {
1444   size_t my_size = size();
1445   size_t rhs_size = rhs.size();
1446 
1447   if (my_size < rhs_size) return false;
1448 
1449   Cord tmp(*this);
1450   tmp.RemovePrefix(my_size - rhs_size);
1451   return tmp.EqualsImpl(rhs, rhs_size);
1452 }
1453 
1454 // --------------------------------------------------------------------
1455 // Misc.
1456 
operator std::string() const1457 Cord::operator std::string() const {
1458   std::string s;
1459   absl::CopyCordToString(*this, &s);
1460   return s;
1461 }
1462 
CopyCordToString(const Cord & src,std::string * dst)1463 void CopyCordToString(const Cord& src, std::string* dst) {
1464   if (!src.contents_.is_tree()) {
1465     src.contents_.CopyTo(dst);
1466   } else {
1467     absl::strings_internal::STLStringResizeUninitialized(dst, src.size());
1468     src.CopyToArraySlowPath(&(*dst)[0]);
1469   }
1470 }
1471 
CopyToArraySlowPath(char * dst) const1472 void Cord::CopyToArraySlowPath(char* dst) const {
1473   assert(contents_.is_tree());
1474   absl::string_view fragment;
1475   if (GetFlatAux(contents_.tree(), &fragment)) {
1476     memcpy(dst, fragment.data(), fragment.size());
1477     return;
1478   }
1479   for (absl::string_view chunk : Chunks()) {
1480     memcpy(dst, chunk.data(), chunk.size());
1481     dst += chunk.size();
1482   }
1483 }
1484 
operator ++()1485 Cord::ChunkIterator& Cord::ChunkIterator::operator++() {
1486   ABSL_HARDENING_ASSERT(bytes_remaining_ > 0 &&
1487                         "Attempted to iterate past `end()`");
1488   assert(bytes_remaining_ >= current_chunk_.size());
1489   bytes_remaining_ -= current_chunk_.size();
1490 
1491   if (stack_of_right_children_.empty()) {
1492     assert(!current_chunk_.empty());  // Called on invalid iterator.
1493     // We have reached the end of the Cord.
1494     return *this;
1495   }
1496 
1497   // Process the next node on the stack.
1498   CordRep* node = stack_of_right_children_.back();
1499   stack_of_right_children_.pop_back();
1500 
1501   // Walk down the left branches until we hit a non-CONCAT node. Save the
1502   // right children to the stack for subsequent traversal.
1503   while (node->tag == CONCAT) {
1504     stack_of_right_children_.push_back(node->concat()->right);
1505     node = node->concat()->left;
1506   }
1507 
1508   // Get the child node if we encounter a SUBSTRING.
1509   size_t offset = 0;
1510   size_t length = node->length;
1511   if (node->tag == SUBSTRING) {
1512     offset = node->substring()->start;
1513     node = node->substring()->child;
1514   }
1515 
1516   assert(node->tag == EXTERNAL || node->tag >= FLAT);
1517   assert(length != 0);
1518   const char* data =
1519       node->tag == EXTERNAL ? node->external()->base : node->data;
1520   current_chunk_ = absl::string_view(data + offset, length);
1521   current_leaf_ = node;
1522   return *this;
1523 }
1524 
AdvanceAndReadBytes(size_t n)1525 Cord Cord::ChunkIterator::AdvanceAndReadBytes(size_t n) {
1526   ABSL_HARDENING_ASSERT(bytes_remaining_ >= n &&
1527                         "Attempted to iterate past `end()`");
1528   Cord subcord;
1529 
1530   if (n <= InlineRep::kMaxInline) {
1531     // Range to read fits in inline data. Flatten it.
1532     char* data = subcord.contents_.set_data(n);
1533     while (n > current_chunk_.size()) {
1534       memcpy(data, current_chunk_.data(), current_chunk_.size());
1535       data += current_chunk_.size();
1536       n -= current_chunk_.size();
1537       ++*this;
1538     }
1539     memcpy(data, current_chunk_.data(), n);
1540     if (n < current_chunk_.size()) {
1541       RemoveChunkPrefix(n);
1542     } else if (n > 0) {
1543       ++*this;
1544     }
1545     return subcord;
1546   }
1547   if (n < current_chunk_.size()) {
1548     // Range to read is a proper subrange of the current chunk.
1549     assert(current_leaf_ != nullptr);
1550     CordRep* subnode = Ref(current_leaf_);
1551     const char* data =
1552         subnode->tag == EXTERNAL ? subnode->external()->base : subnode->data;
1553     subnode = NewSubstring(subnode, current_chunk_.data() - data, n);
1554     subcord.contents_.set_tree(VerifyTree(subnode));
1555     RemoveChunkPrefix(n);
1556     return subcord;
1557   }
1558 
1559   // Range to read begins with a proper subrange of the current chunk.
1560   assert(!current_chunk_.empty());
1561   assert(current_leaf_ != nullptr);
1562   CordRep* subnode = Ref(current_leaf_);
1563   if (current_chunk_.size() < subnode->length) {
1564     const char* data =
1565         subnode->tag == EXTERNAL ? subnode->external()->base : subnode->data;
1566     subnode = NewSubstring(subnode, current_chunk_.data() - data,
1567                            current_chunk_.size());
1568   }
1569   n -= current_chunk_.size();
1570   bytes_remaining_ -= current_chunk_.size();
1571 
1572   // Process the next node(s) on the stack, reading whole subtrees depending on
1573   // their length and how many bytes we are advancing.
1574   CordRep* node = nullptr;
1575   while (!stack_of_right_children_.empty()) {
1576     node = stack_of_right_children_.back();
1577     stack_of_right_children_.pop_back();
1578     if (node->length > n) break;
1579     // TODO(qrczak): This might unnecessarily recreate existing concat nodes.
1580     // Avoiding that would need pretty complicated logic (instead of
1581     // current_leaf_, keep current_subtree_ which points to the highest node
1582     // such that the current leaf can be found on the path of left children
1583     // starting from current_subtree_; delay creating subnode while node is
1584     // below current_subtree_; find the proper node along the path of left
1585     // children starting from current_subtree_ if this loop exits while staying
1586     // below current_subtree_; etc.; alternatively, push parents instead of
1587     // right children on the stack).
1588     subnode = Concat(subnode, Ref(node));
1589     n -= node->length;
1590     bytes_remaining_ -= node->length;
1591     node = nullptr;
1592   }
1593 
1594   if (node == nullptr) {
1595     // We have reached the end of the Cord.
1596     assert(bytes_remaining_ == 0);
1597     subcord.contents_.set_tree(VerifyTree(subnode));
1598     return subcord;
1599   }
1600 
1601   // Walk down the appropriate branches until we hit a non-CONCAT node. Save the
1602   // right children to the stack for subsequent traversal.
1603   while (node->tag == CONCAT) {
1604     if (node->concat()->left->length > n) {
1605       // Push right, descend left.
1606       stack_of_right_children_.push_back(node->concat()->right);
1607       node = node->concat()->left;
1608     } else {
1609       // Read left, descend right.
1610       subnode = Concat(subnode, Ref(node->concat()->left));
1611       n -= node->concat()->left->length;
1612       bytes_remaining_ -= node->concat()->left->length;
1613       node = node->concat()->right;
1614     }
1615   }
1616 
1617   // Get the child node if we encounter a SUBSTRING.
1618   size_t offset = 0;
1619   size_t length = node->length;
1620   if (node->tag == SUBSTRING) {
1621     offset = node->substring()->start;
1622     node = node->substring()->child;
1623   }
1624 
1625   // Range to read ends with a proper (possibly empty) subrange of the current
1626   // chunk.
1627   assert(node->tag == EXTERNAL || node->tag >= FLAT);
1628   assert(length > n);
1629   if (n > 0) subnode = Concat(subnode, NewSubstring(Ref(node), offset, n));
1630   const char* data =
1631       node->tag == EXTERNAL ? node->external()->base : node->data;
1632   current_chunk_ = absl::string_view(data + offset + n, length - n);
1633   current_leaf_ = node;
1634   bytes_remaining_ -= n;
1635   subcord.contents_.set_tree(VerifyTree(subnode));
1636   return subcord;
1637 }
1638 
AdvanceBytesSlowPath(size_t n)1639 void Cord::ChunkIterator::AdvanceBytesSlowPath(size_t n) {
1640   assert(bytes_remaining_ >= n && "Attempted to iterate past `end()`");
1641   assert(n >= current_chunk_.size());  // This should only be called when
1642                                        // iterating to a new node.
1643 
1644   n -= current_chunk_.size();
1645   bytes_remaining_ -= current_chunk_.size();
1646 
1647   // Process the next node(s) on the stack, skipping whole subtrees depending on
1648   // their length and how many bytes we are advancing.
1649   CordRep* node = nullptr;
1650   while (!stack_of_right_children_.empty()) {
1651     node = stack_of_right_children_.back();
1652     stack_of_right_children_.pop_back();
1653     if (node->length > n) break;
1654     n -= node->length;
1655     bytes_remaining_ -= node->length;
1656     node = nullptr;
1657   }
1658 
1659   if (node == nullptr) {
1660     // We have reached the end of the Cord.
1661     assert(bytes_remaining_ == 0);
1662     return;
1663   }
1664 
1665   // Walk down the appropriate branches until we hit a non-CONCAT node. Save the
1666   // right children to the stack for subsequent traversal.
1667   while (node->tag == CONCAT) {
1668     if (node->concat()->left->length > n) {
1669       // Push right, descend left.
1670       stack_of_right_children_.push_back(node->concat()->right);
1671       node = node->concat()->left;
1672     } else {
1673       // Skip left, descend right.
1674       n -= node->concat()->left->length;
1675       bytes_remaining_ -= node->concat()->left->length;
1676       node = node->concat()->right;
1677     }
1678   }
1679 
1680   // Get the child node if we encounter a SUBSTRING.
1681   size_t offset = 0;
1682   size_t length = node->length;
1683   if (node->tag == SUBSTRING) {
1684     offset = node->substring()->start;
1685     node = node->substring()->child;
1686   }
1687 
1688   assert(node->tag == EXTERNAL || node->tag >= FLAT);
1689   assert(length > n);
1690   const char* data =
1691       node->tag == EXTERNAL ? node->external()->base : node->data;
1692   current_chunk_ = absl::string_view(data + offset + n, length - n);
1693   current_leaf_ = node;
1694   bytes_remaining_ -= n;
1695 }
1696 
operator [](size_t i) const1697 char Cord::operator[](size_t i) const {
1698   ABSL_HARDENING_ASSERT(i < size());
1699   size_t offset = i;
1700   const CordRep* rep = contents_.tree();
1701   if (rep == nullptr) {
1702     return contents_.data()[i];
1703   }
1704   while (true) {
1705     assert(rep != nullptr);
1706     assert(offset < rep->length);
1707     if (rep->tag >= FLAT) {
1708       // Get the "i"th character directly from the flat array.
1709       return rep->data[offset];
1710     } else if (rep->tag == EXTERNAL) {
1711       // Get the "i"th character from the external array.
1712       return rep->external()->base[offset];
1713     } else if (rep->tag == CONCAT) {
1714       // Recursively branch to the side of the concatenation that the "i"th
1715       // character is on.
1716       size_t left_length = rep->concat()->left->length;
1717       if (offset < left_length) {
1718         rep = rep->concat()->left;
1719       } else {
1720         offset -= left_length;
1721         rep = rep->concat()->right;
1722       }
1723     } else {
1724       // This must be a substring a node, so bypass it to get to the child.
1725       assert(rep->tag == SUBSTRING);
1726       offset += rep->substring()->start;
1727       rep = rep->substring()->child;
1728     }
1729   }
1730 }
1731 
FlattenSlowPath()1732 absl::string_view Cord::FlattenSlowPath() {
1733   size_t total_size = size();
1734   CordRep* new_rep;
1735   char* new_buffer;
1736 
1737   // Try to put the contents into a new flat rep. If they won't fit in the
1738   // biggest possible flat node, use an external rep instead.
1739   if (total_size <= kMaxFlatLength) {
1740     new_rep = NewFlat(total_size);
1741     new_rep->length = total_size;
1742     new_buffer = new_rep->data;
1743     CopyToArraySlowPath(new_buffer);
1744   } else {
1745     new_buffer = std::allocator<char>().allocate(total_size);
1746     CopyToArraySlowPath(new_buffer);
1747     new_rep = absl::cord_internal::NewExternalRep(
1748         absl::string_view(new_buffer, total_size), [](absl::string_view s) {
1749           std::allocator<char>().deallocate(const_cast<char*>(s.data()),
1750                                             s.size());
1751         });
1752   }
1753   Unref(contents_.tree());
1754   contents_.set_tree(new_rep);
1755   return absl::string_view(new_buffer, total_size);
1756 }
1757 
GetFlatAux(CordRep * rep,absl::string_view * fragment)1758 /* static */ bool Cord::GetFlatAux(CordRep* rep, absl::string_view* fragment) {
1759   assert(rep != nullptr);
1760   if (rep->tag >= FLAT) {
1761     *fragment = absl::string_view(rep->data, rep->length);
1762     return true;
1763   } else if (rep->tag == EXTERNAL) {
1764     *fragment = absl::string_view(rep->external()->base, rep->length);
1765     return true;
1766   } else if (rep->tag == SUBSTRING) {
1767     CordRep* child = rep->substring()->child;
1768     if (child->tag >= FLAT) {
1769       *fragment =
1770           absl::string_view(child->data + rep->substring()->start, rep->length);
1771       return true;
1772     } else if (child->tag == EXTERNAL) {
1773       *fragment = absl::string_view(
1774           child->external()->base + rep->substring()->start, rep->length);
1775       return true;
1776     }
1777   }
1778   return false;
1779 }
1780 
ForEachChunkAux(absl::cord_internal::CordRep * rep,absl::FunctionRef<void (absl::string_view)> callback)1781 /* static */ void Cord::ForEachChunkAux(
1782     absl::cord_internal::CordRep* rep,
1783     absl::FunctionRef<void(absl::string_view)> callback) {
1784   assert(rep != nullptr);
1785   int stack_pos = 0;
1786   constexpr int stack_max = 128;
1787   // Stack of right branches for tree traversal
1788   absl::cord_internal::CordRep* stack[stack_max];
1789   absl::cord_internal::CordRep* current_node = rep;
1790   while (true) {
1791     if (current_node->tag == CONCAT) {
1792       if (stack_pos == stack_max) {
1793         // There's no more room on our stack array to add another right branch,
1794         // and the idea is to avoid allocations, so call this function
1795         // recursively to navigate this subtree further.  (This is not something
1796         // we expect to happen in practice).
1797         ForEachChunkAux(current_node, callback);
1798 
1799         // Pop the next right branch and iterate.
1800         current_node = stack[--stack_pos];
1801         continue;
1802       } else {
1803         // Save the right branch for later traversal and continue down the left
1804         // branch.
1805         stack[stack_pos++] = current_node->concat()->right;
1806         current_node = current_node->concat()->left;
1807         continue;
1808       }
1809     }
1810     // This is a leaf node, so invoke our callback.
1811     absl::string_view chunk;
1812     bool success = GetFlatAux(current_node, &chunk);
1813     assert(success);
1814     if (success) {
1815       callback(chunk);
1816     }
1817     if (stack_pos == 0) {
1818       // end of traversal
1819       return;
1820     }
1821     current_node = stack[--stack_pos];
1822   }
1823 }
1824 
DumpNode(CordRep * rep,bool include_data,std::ostream * os)1825 static void DumpNode(CordRep* rep, bool include_data, std::ostream* os) {
1826   const int kIndentStep = 1;
1827   int indent = 0;
1828   absl::InlinedVector<CordRep*, kInlinedVectorSize> stack;
1829   absl::InlinedVector<int, kInlinedVectorSize> indents;
1830   for (;;) {
1831     *os << std::setw(3) << rep->refcount.Get();
1832     *os << " " << std::setw(7) << rep->length;
1833     *os << " [";
1834     if (include_data) *os << static_cast<void*>(rep);
1835     *os << "]";
1836     *os << " " << (IsRootBalanced(rep) ? 'b' : 'u');
1837     *os << " " << std::setw(indent) << "";
1838     if (rep->tag == CONCAT) {
1839       *os << "CONCAT depth=" << Depth(rep) << "\n";
1840       indent += kIndentStep;
1841       indents.push_back(indent);
1842       stack.push_back(rep->concat()->right);
1843       rep = rep->concat()->left;
1844     } else if (rep->tag == SUBSTRING) {
1845       *os << "SUBSTRING @ " << rep->substring()->start << "\n";
1846       indent += kIndentStep;
1847       rep = rep->substring()->child;
1848     } else {  // Leaf
1849       if (rep->tag == EXTERNAL) {
1850         *os << "EXTERNAL [";
1851         if (include_data)
1852           *os << absl::CEscape(std::string(rep->external()->base, rep->length));
1853         *os << "]\n";
1854       } else {
1855         *os << "FLAT cap=" << TagToLength(rep->tag) << " [";
1856         if (include_data)
1857           *os << absl::CEscape(std::string(rep->data, rep->length));
1858         *os << "]\n";
1859       }
1860       if (stack.empty()) break;
1861       rep = stack.back();
1862       stack.pop_back();
1863       indent = indents.back();
1864       indents.pop_back();
1865     }
1866   }
1867   ABSL_INTERNAL_CHECK(indents.empty(), "");
1868 }
1869 
ReportError(CordRep * root,CordRep * node)1870 static std::string ReportError(CordRep* root, CordRep* node) {
1871   std::ostringstream buf;
1872   buf << "Error at node " << node << " in:";
1873   DumpNode(root, true, &buf);
1874   return buf.str();
1875 }
1876 
VerifyNode(CordRep * root,CordRep * start_node,bool full_validation)1877 static bool VerifyNode(CordRep* root, CordRep* start_node,
1878                        bool full_validation) {
1879   absl::InlinedVector<CordRep*, 2> worklist;
1880   worklist.push_back(start_node);
1881   do {
1882     CordRep* node = worklist.back();
1883     worklist.pop_back();
1884 
1885     ABSL_INTERNAL_CHECK(node != nullptr, ReportError(root, node));
1886     if (node != root) {
1887       ABSL_INTERNAL_CHECK(node->length != 0, ReportError(root, node));
1888     }
1889 
1890     if (node->tag == CONCAT) {
1891       ABSL_INTERNAL_CHECK(node->concat()->left != nullptr,
1892                           ReportError(root, node));
1893       ABSL_INTERNAL_CHECK(node->concat()->right != nullptr,
1894                           ReportError(root, node));
1895       ABSL_INTERNAL_CHECK((node->length == node->concat()->left->length +
1896                                                node->concat()->right->length),
1897                           ReportError(root, node));
1898       if (full_validation) {
1899         worklist.push_back(node->concat()->right);
1900         worklist.push_back(node->concat()->left);
1901       }
1902     } else if (node->tag >= FLAT) {
1903       ABSL_INTERNAL_CHECK(node->length <= TagToLength(node->tag),
1904                           ReportError(root, node));
1905     } else if (node->tag == EXTERNAL) {
1906       ABSL_INTERNAL_CHECK(node->external()->base != nullptr,
1907                           ReportError(root, node));
1908     } else if (node->tag == SUBSTRING) {
1909       ABSL_INTERNAL_CHECK(
1910           node->substring()->start < node->substring()->child->length,
1911           ReportError(root, node));
1912       ABSL_INTERNAL_CHECK(node->substring()->start + node->length <=
1913                               node->substring()->child->length,
1914                           ReportError(root, node));
1915     }
1916   } while (!worklist.empty());
1917   return true;
1918 }
1919 
1920 // Traverses the tree and computes the total memory allocated.
MemoryUsageAux(const CordRep * rep)1921 /* static */ size_t Cord::MemoryUsageAux(const CordRep* rep) {
1922   size_t total_mem_usage = 0;
1923 
1924   // Allow a quick exit for the common case that the root is a leaf.
1925   if (RepMemoryUsageLeaf(rep, &total_mem_usage)) {
1926     return total_mem_usage;
1927   }
1928 
1929   // Iterate over the tree. cur_node is never a leaf node and leaf nodes will
1930   // never be appended to tree_stack. This reduces overhead from manipulating
1931   // tree_stack.
1932   absl::InlinedVector<const CordRep*, kInlinedVectorSize> tree_stack;
1933   const CordRep* cur_node = rep;
1934   while (true) {
1935     const CordRep* next_node = nullptr;
1936 
1937     if (cur_node->tag == CONCAT) {
1938       total_mem_usage += sizeof(CordRepConcat);
1939       const CordRep* left = cur_node->concat()->left;
1940       if (!RepMemoryUsageLeaf(left, &total_mem_usage)) {
1941         next_node = left;
1942       }
1943 
1944       const CordRep* right = cur_node->concat()->right;
1945       if (!RepMemoryUsageLeaf(right, &total_mem_usage)) {
1946         if (next_node) {
1947           tree_stack.push_back(next_node);
1948         }
1949         next_node = right;
1950       }
1951     } else {
1952       // Since cur_node is not a leaf or a concat node it must be a substring.
1953       assert(cur_node->tag == SUBSTRING);
1954       total_mem_usage += sizeof(CordRepSubstring);
1955       next_node = cur_node->substring()->child;
1956       if (RepMemoryUsageLeaf(next_node, &total_mem_usage)) {
1957         next_node = nullptr;
1958       }
1959     }
1960 
1961     if (!next_node) {
1962       if (tree_stack.empty()) {
1963         return total_mem_usage;
1964       }
1965       next_node = tree_stack.back();
1966       tree_stack.pop_back();
1967     }
1968     cur_node = next_node;
1969   }
1970 }
1971 
operator <<(std::ostream & out,const Cord & cord)1972 std::ostream& operator<<(std::ostream& out, const Cord& cord) {
1973   for (absl::string_view chunk : cord.Chunks()) {
1974     out.write(chunk.data(), chunk.size());
1975   }
1976   return out;
1977 }
1978 
1979 namespace strings_internal {
FlatOverhead()1980 size_t CordTestAccess::FlatOverhead() { return kFlatOverhead; }
MaxFlatLength()1981 size_t CordTestAccess::MaxFlatLength() { return kMaxFlatLength; }
FlatTagToLength(uint8_t tag)1982 size_t CordTestAccess::FlatTagToLength(uint8_t tag) {
1983   return TagToLength(tag);
1984 }
LengthToTag(size_t s)1985 uint8_t CordTestAccess::LengthToTag(size_t s) {
1986   ABSL_INTERNAL_CHECK(s <= kMaxFlatLength, absl::StrCat("Invalid length ", s));
1987   return AllocatedSizeToTag(s + kFlatOverhead);
1988 }
SizeofCordRepConcat()1989 size_t CordTestAccess::SizeofCordRepConcat() { return sizeof(CordRepConcat); }
SizeofCordRepExternal()1990 size_t CordTestAccess::SizeofCordRepExternal() {
1991   return sizeof(CordRepExternal);
1992 }
SizeofCordRepSubstring()1993 size_t CordTestAccess::SizeofCordRepSubstring() {
1994   return sizeof(CordRepSubstring);
1995 }
1996 }  // namespace strings_internal
1997 ABSL_NAMESPACE_END
1998 }  // namespace absl
1999