1 // Copyright 2021 The Abseil Authors
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //     https://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #include "absl/strings/internal/cord_rep_btree.h"
16 
17 #include <cassert>
18 #include <cstdint>
19 #include <iostream>
20 #include <string>
21 
22 #include "absl/base/attributes.h"
23 #include "absl/base/config.h"
24 #include "absl/base/internal/raw_logging.h"
25 #include "absl/strings/internal/cord_internal.h"
26 #include "absl/strings/internal/cord_rep_consume.h"
27 #include "absl/strings/internal/cord_rep_flat.h"
28 #include "absl/strings/str_cat.h"
29 #include "absl/strings/string_view.h"
30 
31 namespace absl {
32 ABSL_NAMESPACE_BEGIN
33 namespace cord_internal {
34 
35 constexpr size_t CordRepBtree::kMaxCapacity;  // NOLINT: needed for c++ < c++17
36 
37 namespace {
38 
39 using NodeStack = CordRepBtree * [CordRepBtree::kMaxDepth];
40 using EdgeType = CordRepBtree::EdgeType;
41 using OpResult = CordRepBtree::OpResult;
42 using CopyResult = CordRepBtree::CopyResult;
43 
44 constexpr auto kFront = CordRepBtree::kFront;
45 constexpr auto kBack = CordRepBtree::kBack;
46 
exhaustive_validation()47 inline bool exhaustive_validation() {
48   return cord_btree_exhaustive_validation.load(std::memory_order_relaxed);
49 }
50 
51 // Implementation of the various 'Dump' functions.
52 // Prints the entire tree structure or 'rep'. External callers should
53 // not specify 'depth' and leave it to its default (0) value.
54 // Rep may be a CordRepBtree tree, or a SUBSTRING / EXTERNAL / FLAT node.
DumpAll(const CordRep * rep,bool include_contents,std::ostream & stream,int depth=0)55 void DumpAll(const CordRep* rep, bool include_contents, std::ostream& stream,
56              int depth = 0) {
57   // Allow for full height trees + substring -> flat / external nodes.
58   assert(depth <= CordRepBtree::kMaxDepth + 2);
59   std::string sharing = const_cast<CordRep*>(rep)->refcount.IsOne()
60                             ? std::string("Private")
61                             : absl::StrCat("Shared(", rep->refcount.Get(), ")");
62   std::string sptr = absl::StrCat("0x", absl::Hex(rep));
63 
64   // Dumps the data contents of `rep` if `include_contents` is true.
65   // Always emits a new line character.
66   auto maybe_dump_data = [&stream, include_contents](const CordRep* r) {
67     if (include_contents) {
68       // Allow for up to 60 wide display of content data, which with some
69       // indentation and prefix / labels keeps us within roughly 80-100 wide.
70       constexpr size_t kMaxDataLength = 60;
71       stream << ", data = \""
72              << CordRepBtree::EdgeData(r).substr(0, kMaxDataLength)
73              << (r->length > kMaxDataLength ? "\"..." : "\"");
74     }
75     stream << '\n';
76   };
77 
78   // For each level, we print the 'shared/private' state and the rep pointer,
79   // indented by two spaces per recursive depth.
80   stream << std::string(depth * 2, ' ') << sharing << " (" << sptr << ") ";
81 
82   if (rep->IsBtree()) {
83     const CordRepBtree* node = rep->btree();
84     std::string label =
85         node->height() ? absl::StrCat("Node(", node->height(), ")") : "Leaf";
86     stream << label << ", len = " << node->length
87            << ", begin = " << node->begin() << ", end = " << node->end()
88            << "\n";
89     for (CordRep* edge : node->Edges()) {
90       DumpAll(edge, include_contents, stream, depth + 1);
91     }
92   } else if (rep->tag == SUBSTRING) {
93     const CordRepSubstring* substring = rep->substring();
94     stream << "Substring, len = " << rep->length
95            << ", start = " << substring->start;
96     maybe_dump_data(rep);
97     DumpAll(substring->child, include_contents, stream, depth + 1);
98   } else if (rep->tag >= FLAT) {
99     stream << "Flat, len = " << rep->length
100            << ", cap = " << rep->flat()->Capacity();
101     maybe_dump_data(rep);
102   } else if (rep->tag == EXTERNAL) {
103     stream << "Extn, len = " << rep->length;
104     maybe_dump_data(rep);
105   }
106 }
107 
108 // TODO(b/192061034): add 'bytes to copy' logic to avoid large slop on substring
109 // small data out of large reps, and general efficiency of 'always copy small
110 // data'. Consider making this a cord rep internal library function.
CreateSubstring(CordRep * rep,size_t offset,size_t n)111 CordRepSubstring* CreateSubstring(CordRep* rep, size_t offset, size_t n) {
112   assert(n != 0);
113   assert(offset + n <= rep->length);
114   assert(offset != 0 || n != rep->length);
115 
116   if (rep->tag == SUBSTRING) {
117     CordRepSubstring* substring = rep->substring();
118     offset += substring->start;
119     rep = CordRep::Ref(substring->child);
120     CordRep::Unref(substring);
121   }
122   CordRepSubstring* substring = new CordRepSubstring();
123   substring->length = n;
124   substring->tag = SUBSTRING;
125   substring->start = offset;
126   substring->child = rep;
127   return substring;
128 }
129 
130 // TODO(b/192061034): consider making this a cord rep library function.
MakeSubstring(CordRep * rep,size_t offset,size_t n)131 inline CordRep* MakeSubstring(CordRep* rep, size_t offset, size_t n) {
132   if (n == rep->length) return rep;
133   if (n == 0) return CordRep::Unref(rep), nullptr;
134   return CreateSubstring(rep, offset, n);
135 }
136 
137 // TODO(b/192061034): consider making this a cord rep library function.
MakeSubstring(CordRep * rep,size_t offset)138 inline CordRep* MakeSubstring(CordRep* rep, size_t offset) {
139   if (offset == 0) return rep;
140   return CreateSubstring(rep, offset, rep->length - offset);
141 }
142 
143 // Resizes `edge` to the provided `length`. Adopts a reference on `edge`.
144 // This method directly returns `edge` if `length` equals `edge->length`.
145 // If `is_mutable` is set to true, this function may return `edge` with
146 // `edge->length` set to the new length depending on the type and size of
147 // `edge`. Otherwise, this function returns a new CordRepSubstring value.
148 // Requires `length > 0 && length <= edge->length`.
ResizeEdge(CordRep * edge,size_t length,bool is_mutable)149 CordRep* ResizeEdge(CordRep* edge, size_t length, bool is_mutable) {
150   assert(length > 0);
151   assert(length <= edge->length);
152   assert(CordRepBtree::IsDataEdge(edge));
153   if (length >= edge->length) return edge;
154 
155   if (is_mutable && (edge->tag >= FLAT || edge->tag == SUBSTRING)) {
156     edge->length = length;
157     return edge;
158   }
159 
160   return CreateSubstring(edge, 0, length);
161 }
162 
163 template <EdgeType edge_type>
Consume(absl::string_view s,size_t n)164 inline absl::string_view Consume(absl::string_view s, size_t n) {
165   return edge_type == kBack ? s.substr(n) : s.substr(0, s.size() - n);
166 }
167 
168 template <EdgeType edge_type>
Consume(char * dst,absl::string_view s,size_t n)169 inline absl::string_view Consume(char* dst, absl::string_view s, size_t n) {
170   if (edge_type == kBack) {
171     memcpy(dst, s.data(), n);
172     return s.substr(n);
173   } else {
174     const size_t offset = s.size() - n;
175     memcpy(dst, s.data() + offset, n);
176     return s.substr(0, offset);
177   }
178 }
179 
180 // Known issue / optimization weirdness: the store associated with the
181 // decrement introduces traffic between cpus (even if the result of that
182 // traffic does nothing), making this faster than a single call to
183 // refcount.Decrement() checking the zero refcount condition.
184 template <typename R, typename Fn>
FastUnref(R * r,Fn && fn)185 inline void FastUnref(R* r, Fn&& fn) {
186   if (r->refcount.IsOne()) {
187     fn(r);
188   } else if (!r->refcount.DecrementExpectHighRefcount()) {
189     fn(r);
190   }
191 }
192 
193 // Deletes a leaf node data edge. Requires `rep` to be an EXTERNAL or FLAT
194 // node, or a SUBSTRING of an EXTERNAL or FLAT node.
DeleteLeafEdge(CordRep * rep)195 void DeleteLeafEdge(CordRep* rep) {
196   for (;;) {
197     if (rep->tag >= FLAT) {
198       CordRepFlat::Delete(rep->flat());
199       return;
200     }
201     if (rep->tag == EXTERNAL) {
202       CordRepExternal::Delete(rep->external());
203       return;
204     }
205     assert(rep->tag == SUBSTRING);
206     CordRepSubstring* substring = rep->substring();
207     rep = substring->child;
208     assert(rep->tag == EXTERNAL || rep->tag >= FLAT);
209     delete substring;
210     if (rep->refcount.Decrement()) return;
211   }
212 }
213 
214 // StackOperations contains the logic to build a left-most or right-most stack
215 // (leg) down to the leaf level of a btree, and 'unwind' / 'Finalize' methods to
216 // propagate node changes up the stack.
217 template <EdgeType edge_type>
218 struct StackOperations {
219   // Returns true if the node at 'depth' is mutable, i.e. has a refcount
220   // of one, carries no CRC, and all of its parent nodes have a refcount of one.
ownedabsl::cord_internal::__anond84a6a070111::StackOperations221   inline bool owned(int depth) const { return depth < share_depth; }
222 
223   // Returns the node at 'depth'.
nodeabsl::cord_internal::__anond84a6a070111::StackOperations224   inline CordRepBtree* node(int depth) const { return stack[depth]; }
225 
226   // Builds a `depth` levels deep stack starting at `tree` recording which nodes
227   // are private in the form of the 'share depth' where nodes are shared.
BuildStackabsl::cord_internal::__anond84a6a070111::StackOperations228   inline CordRepBtree* BuildStack(CordRepBtree* tree, int depth) {
229     assert(depth <= tree->height());
230     int current_depth = 0;
231     while (current_depth < depth && tree->refcount.IsMutable()) {
232       stack[current_depth++] = tree;
233       tree = tree->Edge(edge_type)->btree();
234     }
235     share_depth = current_depth + (tree->refcount.IsMutable() ? 1 : 0);
236     while (current_depth < depth) {
237       stack[current_depth++] = tree;
238       tree = tree->Edge(edge_type)->btree();
239     }
240     return tree;
241   }
242 
243   // Builds a stack with the invariant that all nodes are private owned / not
244   // shared and carry no CRC data. This is used in iterative updates where a
245   // previous propagation guaranteed all nodes have this property.
BuildOwnedStackabsl::cord_internal::__anond84a6a070111::StackOperations246   inline void BuildOwnedStack(CordRepBtree* tree, int height) {
247     assert(height <= CordRepBtree::kMaxHeight);
248     int depth = 0;
249     while (depth < height) {
250       assert(tree->refcount.IsMutable());
251       stack[depth++] = tree;
252       tree = tree->Edge(edge_type)->btree();
253     }
254     assert(tree->refcount.IsMutable());
255     share_depth = depth + 1;
256   }
257 
258   // Processes the final 'top level' result action for the tree.
259   // See the 'Action' enum for the various action implications.
Finalizeabsl::cord_internal::__anond84a6a070111::StackOperations260   static inline CordRepBtree* Finalize(CordRepBtree* tree, OpResult result) {
261     switch (result.action) {
262       case CordRepBtree::kPopped:
263         tree = edge_type == kBack ? CordRepBtree::New(tree, result.tree)
264                                   : CordRepBtree::New(result.tree, tree);
265         if (ABSL_PREDICT_FALSE(tree->height() > CordRepBtree::kMaxHeight)) {
266           tree = CordRepBtree::Rebuild(tree);
267           ABSL_RAW_CHECK(tree->height() <= CordRepBtree::kMaxHeight,
268                          "Max height exceeded");
269         }
270         return tree;
271       case CordRepBtree::kCopied:
272         CordRep::Unref(tree);
273         ABSL_FALLTHROUGH_INTENDED;
274       case CordRepBtree::kSelf:
275         return result.tree;
276     }
277     ABSL_INTERNAL_UNREACHABLE;
278     return result.tree;
279   }
280 
281   // Propagate the action result in 'result' up into all nodes of the stack
282   // starting at depth 'depth'. 'length' contains the extra length of data that
283   // was added at the lowest level, and is updated into all nodes of the stack.
284   // See the 'Action' enum for the various action implications.
285   // If 'propagate' is true, then any copied node values are updated into the
286   // stack, which is used for iterative processing on the same stack.
287   template <bool propagate = false>
Unwindabsl::cord_internal::__anond84a6a070111::StackOperations288   inline CordRepBtree* Unwind(CordRepBtree* tree, int depth, size_t length,
289                               OpResult result) {
290     // TODO(mvels): revisit the below code to check if 3 loops with 3
291     // (incremental) conditions is faster than 1 loop with a switch.
292     // Benchmarking and perf recordings indicate the loop with switch is
293     // fastest, likely because of indirect jumps on the tight case values and
294     // dense branches. But it's worth considering 3 loops, as the `action`
295     // transitions are mono directional. E.g.:
296     //   while (action == kPopped) {
297     //     ...
298     //   }
299     //   while (action == kCopied) {
300     //     ...
301     //   }
302     //   ...
303     // We also  found that an "if () do {}" loop here seems faster, possibly
304     // because it allows the branch predictor more granular heuristics on
305     // 'single leaf' (`depth` == 0) and 'single depth' (`depth` == 1) cases
306     // which appear to be the most common use cases.
307     if (depth != 0) {
308       do {
309         CordRepBtree* node = stack[--depth];
310         const bool owned = depth < share_depth;
311         switch (result.action) {
312           case CordRepBtree::kPopped:
313             assert(!propagate);
314             result = node->AddEdge<edge_type>(owned, result.tree, length);
315             break;
316           case CordRepBtree::kCopied:
317             result = node->SetEdge<edge_type>(owned, result.tree, length);
318             if (propagate) stack[depth] = result.tree;
319             break;
320           case CordRepBtree::kSelf:
321             node->length += length;
322             while (depth > 0) {
323               node = stack[--depth];
324               node->length += length;
325             }
326             return node;
327         }
328       } while (depth > 0);
329     }
330     return Finalize(tree, result);
331   }
332 
333   // Invokes `Unwind` with `propagate=true` to update the stack node values.
Propagateabsl::cord_internal::__anond84a6a070111::StackOperations334   inline CordRepBtree* Propagate(CordRepBtree* tree, int depth, size_t length,
335                                  OpResult result) {
336     return Unwind</*propagate=*/true>(tree, depth, length, result);
337   }
338 
339   // `share_depth` contains the depth at which the nodes in the stack cannot
340   // be mutated. I.e., if the top most level is shared (i.e.:
341   // `!refcount.IsMutable()`), then `share_depth` is 0. If the 2nd node
342   // is shared (and implicitly all nodes below that) then `share_depth` is 1,
343   // etc. A `share_depth` greater than the depth of the stack indicates that
344   // none of the nodes in the stack are shared.
345   int share_depth;
346 
347   NodeStack stack;
348 };
349 
350 }  // namespace
351 
Dump(const CordRep * rep,absl::string_view label,bool include_contents,std::ostream & stream)352 void CordRepBtree::Dump(const CordRep* rep, absl::string_view label,
353                         bool include_contents, std::ostream& stream) {
354   stream << "===================================\n";
355   if (!label.empty()) {
356     stream << label << '\n';
357     stream << "-----------------------------------\n";
358   }
359   if (rep) {
360     DumpAll(rep, include_contents, stream);
361   } else {
362     stream << "NULL\n";
363   }
364 }
365 
Dump(const CordRep * rep,absl::string_view label,std::ostream & stream)366 void CordRepBtree::Dump(const CordRep* rep, absl::string_view label,
367                         std::ostream& stream) {
368   Dump(rep, label, false, stream);
369 }
370 
Dump(const CordRep * rep,std::ostream & stream)371 void CordRepBtree::Dump(const CordRep* rep, std::ostream& stream) {
372   Dump(rep, absl::string_view(), false, stream);
373 }
374 
DestroyLeaf(CordRepBtree * tree,size_t begin,size_t end)375 void CordRepBtree::DestroyLeaf(CordRepBtree* tree, size_t begin, size_t end) {
376   for (CordRep* edge : tree->Edges(begin, end)) {
377     FastUnref(edge, DeleteLeafEdge);
378   }
379   Delete(tree);
380 }
381 
DestroyNonLeaf(CordRepBtree * tree,size_t begin,size_t end)382 void CordRepBtree::DestroyNonLeaf(CordRepBtree* tree, size_t begin,
383                                   size_t end) {
384   for (CordRep* edge : tree->Edges(begin, end)) {
385     FastUnref(edge->btree(), Destroy);
386   }
387   Delete(tree);
388 }
389 
IsValid(const CordRepBtree * tree,bool shallow)390 bool CordRepBtree::IsValid(const CordRepBtree* tree, bool shallow) {
391 #define NODE_CHECK_VALID(x)                                           \
392   if (!(x)) {                                                         \
393     ABSL_RAW_LOG(ERROR, "CordRepBtree::CheckValid() FAILED: %s", #x); \
394     return false;                                                     \
395   }
396 #define NODE_CHECK_EQ(x, y)                                                    \
397   if ((x) != (y)) {                                                            \
398     ABSL_RAW_LOG(ERROR,                                                        \
399                  "CordRepBtree::CheckValid() FAILED: %s != %s (%s vs %s)", #x, \
400                  #y, absl::StrCat(x).c_str(), absl::StrCat(y).c_str());        \
401     return false;                                                              \
402   }
403 
404   NODE_CHECK_VALID(tree != nullptr);
405   NODE_CHECK_VALID(tree->IsBtree());
406   NODE_CHECK_VALID(tree->height() <= kMaxHeight);
407   NODE_CHECK_VALID(tree->begin() < tree->capacity());
408   NODE_CHECK_VALID(tree->end() <= tree->capacity());
409   NODE_CHECK_VALID(tree->begin() <= tree->end());
410   size_t child_length = 0;
411   for (CordRep* edge : tree->Edges()) {
412     NODE_CHECK_VALID(edge != nullptr);
413     if (tree->height() > 0) {
414       NODE_CHECK_VALID(edge->IsBtree());
415       NODE_CHECK_VALID(edge->btree()->height() == tree->height() - 1);
416     } else {
417       NODE_CHECK_VALID(IsDataEdge(edge));
418     }
419     child_length += edge->length;
420   }
421   NODE_CHECK_EQ(child_length, tree->length);
422   if ((!shallow || exhaustive_validation()) && tree->height() > 0) {
423     for (CordRep* edge : tree->Edges()) {
424       if (!IsValid(edge->btree(), shallow)) return false;
425     }
426   }
427   return true;
428 
429 #undef NODE_CHECK_VALID
430 #undef NODE_CHECK_EQ
431 }
432 
433 #ifndef NDEBUG
434 
AssertValid(CordRepBtree * tree,bool shallow)435 CordRepBtree* CordRepBtree::AssertValid(CordRepBtree* tree, bool shallow) {
436   if (!IsValid(tree, shallow)) {
437     Dump(tree, "CordRepBtree validation failed:", false, std::cout);
438     ABSL_RAW_LOG(FATAL, "CordRepBtree::CheckValid() FAILED");
439   }
440   return tree;
441 }
442 
AssertValid(const CordRepBtree * tree,bool shallow)443 const CordRepBtree* CordRepBtree::AssertValid(const CordRepBtree* tree,
444                                               bool shallow) {
445   if (!IsValid(tree, shallow)) {
446     Dump(tree, "CordRepBtree validation failed:", false, std::cout);
447     ABSL_RAW_LOG(FATAL, "CordRepBtree::CheckValid() FAILED");
448   }
449   return tree;
450 }
451 
452 #endif  // NDEBUG
453 
454 template <EdgeType edge_type>
AddEdge(bool owned,CordRep * edge,size_t delta)455 inline OpResult CordRepBtree::AddEdge(bool owned, CordRep* edge, size_t delta) {
456   if (size() >= kMaxCapacity) return {New(edge), kPopped};
457   OpResult result = ToOpResult(owned);
458   result.tree->Add<edge_type>(edge);
459   result.tree->length += delta;
460   return result;
461 }
462 
463 template <EdgeType edge_type>
SetEdge(bool owned,CordRep * edge,size_t delta)464 OpResult CordRepBtree::SetEdge(bool owned, CordRep* edge, size_t delta) {
465   OpResult result;
466   const size_t idx = index(edge_type);
467   if (owned) {
468     result = {this, kSelf};
469     CordRep::Unref(edges_[idx]);
470   } else {
471     // Create a copy containing all unchanged edges. Unchanged edges are the
472     // open interval [begin, back) or [begin + 1, end) depending on `edge_type`.
473     // We conveniently cover both case using a constexpr `shift` being 0 or 1
474     // as `end :== back + 1`.
475     result = {CopyRaw(), kCopied};
476     constexpr int shift = edge_type == kFront ? 1 : 0;
477     for (CordRep* r : Edges(begin() + shift, back() + shift)) {
478       CordRep::Ref(r);
479     }
480   }
481   result.tree->edges_[idx] = edge;
482   result.tree->length += delta;
483   return result;
484 }
485 
486 template <EdgeType edge_type>
AddCordRep(CordRepBtree * tree,CordRep * rep)487 CordRepBtree* CordRepBtree::AddCordRep(CordRepBtree* tree, CordRep* rep) {
488   const int depth = tree->height();
489   const size_t length = rep->length;
490   StackOperations<edge_type> ops;
491   CordRepBtree* leaf = ops.BuildStack(tree, depth);
492   const OpResult result =
493       leaf->AddEdge<edge_type>(ops.owned(depth), rep, length);
494   return ops.Unwind(tree, depth, length, result);
495 }
496 
497 template <>
NewLeaf(absl::string_view data,size_t extra)498 CordRepBtree* CordRepBtree::NewLeaf<kBack>(absl::string_view data,
499                                            size_t extra) {
500   CordRepBtree* leaf = CordRepBtree::New(0);
501   size_t length = 0;
502   size_t end = 0;
503   const size_t cap = leaf->capacity();
504   while (!data.empty() && end != cap) {
505     auto* flat = CordRepFlat::New(data.length() + extra);
506     flat->length = (std::min)(data.length(), flat->Capacity());
507     length += flat->length;
508     leaf->edges_[end++] = flat;
509     data = Consume<kBack>(flat->Data(), data, flat->length);
510   }
511   leaf->length = length;
512   leaf->set_end(end);
513   return leaf;
514 }
515 
516 template <>
NewLeaf(absl::string_view data,size_t extra)517 CordRepBtree* CordRepBtree::NewLeaf<kFront>(absl::string_view data,
518                                             size_t extra) {
519   CordRepBtree* leaf = CordRepBtree::New(0);
520   size_t length = 0;
521   size_t begin = leaf->capacity();
522   leaf->set_end(leaf->capacity());
523   while (!data.empty() && begin != 0) {
524     auto* flat = CordRepFlat::New(data.length() + extra);
525     flat->length = (std::min)(data.length(), flat->Capacity());
526     length += flat->length;
527     leaf->edges_[--begin] = flat;
528     data = Consume<kFront>(flat->Data(), data, flat->length);
529   }
530   leaf->length = length;
531   leaf->set_begin(begin);
532   return leaf;
533 }
534 
535 template <>
AddData(absl::string_view data,size_t extra)536 absl::string_view CordRepBtree::AddData<kBack>(absl::string_view data,
537                                                size_t extra) {
538   assert(!data.empty());
539   assert(size() < capacity());
540   AlignBegin();
541   const size_t cap = capacity();
542   do {
543     CordRepFlat* flat = CordRepFlat::New(data.length() + extra);
544     const size_t n = (std::min)(data.length(), flat->Capacity());
545     flat->length = n;
546     edges_[fetch_add_end(1)] = flat;
547     data = Consume<kBack>(flat->Data(), data, n);
548   } while (!data.empty() && end() != cap);
549   return data;
550 }
551 
552 template <>
AddData(absl::string_view data,size_t extra)553 absl::string_view CordRepBtree::AddData<kFront>(absl::string_view data,
554                                                 size_t extra) {
555   assert(!data.empty());
556   assert(size() < capacity());
557   AlignEnd();
558   do {
559     CordRepFlat* flat = CordRepFlat::New(data.length() + extra);
560     const size_t n = (std::min)(data.length(), flat->Capacity());
561     flat->length = n;
562     edges_[sub_fetch_begin(1)] = flat;
563     data = Consume<kFront>(flat->Data(), data, n);
564   } while (!data.empty() && begin() != 0);
565   return data;
566 }
567 
568 template <EdgeType edge_type>
AddData(CordRepBtree * tree,absl::string_view data,size_t extra)569 CordRepBtree* CordRepBtree::AddData(CordRepBtree* tree, absl::string_view data,
570                                     size_t extra) {
571   if (ABSL_PREDICT_FALSE(data.empty())) return tree;
572 
573   const size_t original_data_size = data.size();
574   int depth = tree->height();
575   StackOperations<edge_type> ops;
576   CordRepBtree* leaf = ops.BuildStack(tree, depth);
577 
578   // If there is capacity in the last edge, append as much data
579   // as possible into this last edge.
580   if (leaf->size() < leaf->capacity()) {
581     OpResult result = leaf->ToOpResult(ops.owned(depth));
582     data = result.tree->AddData<edge_type>(data, extra);
583     if (data.empty()) {
584       result.tree->length += original_data_size;
585       return ops.Unwind(tree, depth, original_data_size, result);
586     }
587 
588     // We added some data into this leaf, but not all. Propagate the added
589     // length to the top most node, and rebuild the stack with any newly copied
590     // or updated nodes. From this point on, the path (leg) from the top most
591     // node to the right-most node towards the leaf node is privately owned.
592     size_t delta = original_data_size - data.size();
593     assert(delta > 0);
594     result.tree->length += delta;
595     tree = ops.Propagate(tree, depth, delta, result);
596     ops.share_depth = depth + 1;
597   }
598 
599   // We were unable to append all data into the existing right-most leaf node.
600   // This means all remaining data must be put into (a) new leaf node(s) which
601   // we append to the tree. To make this efficient, we iteratively build full
602   // leaf nodes from `data` until the created leaf contains all remaining data.
603   // We utilize the `Unwind` method to merge the created leaf into the first
604   // level towards root that has capacity. On each iteration with remaining
605   // data, we rebuild the stack in the knowledge that right-most nodes are
606   // privately owned after the first `Unwind` completes.
607   for (;;) {
608     OpResult result = {CordRepBtree::NewLeaf<edge_type>(data, extra), kPopped};
609     if (result.tree->length == data.size()) {
610       return ops.Unwind(tree, depth, result.tree->length, result);
611     }
612     data = Consume<edge_type>(data, result.tree->length);
613     tree = ops.Unwind(tree, depth, result.tree->length, result);
614     depth = tree->height();
615     ops.BuildOwnedStack(tree, depth);
616   }
617 }
618 
619 template <EdgeType edge_type>
Merge(CordRepBtree * dst,CordRepBtree * src)620 CordRepBtree* CordRepBtree::Merge(CordRepBtree* dst, CordRepBtree* src) {
621   assert(dst->height() >= src->height());
622 
623   // Capture source length as we may consume / destroy `src`.
624   const size_t length = src->length;
625 
626   // We attempt to merge `src` at its corresponding height in `dst`.
627   const int depth = dst->height() - src->height();
628   StackOperations<edge_type> ops;
629   CordRepBtree* merge_node = ops.BuildStack(dst, depth);
630 
631   // If there is enough space in `merge_node` for all edges from `src`, add all
632   // edges to this node, making a fresh copy as needed if not privately owned.
633   // If `merge_node` does not have capacity for `src`, we rely on `Unwind` and
634   // `Finalize` to merge `src` into the first level towards `root` where there
635   // is capacity for another edge, or create a new top level node.
636   OpResult result;
637   if (merge_node->size() + src->size() <= kMaxCapacity) {
638     result = merge_node->ToOpResult(ops.owned(depth));
639     result.tree->Add<edge_type>(src->Edges());
640     result.tree->length += src->length;
641     if (src->refcount.IsOne()) {
642       Delete(src);
643     } else {
644       for (CordRep* edge : src->Edges()) CordRep::Ref(edge);
645       CordRepBtree::Unref(src);
646     }
647   } else {
648     result = {src, kPopped};
649   }
650 
651   // Unless we merged at the top level (i.e.: src and dst are equal height),
652   // unwind the result towards the top level, and finalize the result.
653   if (depth) {
654     return ops.Unwind(dst, depth, length, result);
655   }
656   return ops.Finalize(dst, result);
657 }
658 
CopySuffix(size_t offset)659 CopyResult CordRepBtree::CopySuffix(size_t offset) {
660   assert(offset < this->length);
661 
662   // As long as `offset` starts inside the last edge, we can 'drop' the current
663   // depth. For the most extreme example: if offset references the last data
664   // edge in the tree, there is only a single edge / path from the top of the
665   // tree to that last edge, so we can drop all the nodes except that edge.
666   // The fast path check for this is `back->length >= length - offset`.
667   int height = this->height();
668   CordRepBtree* node = this;
669   size_t len = node->length - offset;
670   CordRep* back = node->Edge(kBack);
671   while (back->length >= len) {
672     offset = back->length - len;
673     if (--height < 0) {
674       return {MakeSubstring(CordRep::Ref(back), offset), height};
675     }
676     node = back->btree();
677     back = node->Edge(kBack);
678   }
679   if (offset == 0) return {CordRep::Ref(node), height};
680 
681   // Offset does not point into the last edge, so we span at least two edges.
682   // Find the index of offset with `IndexBeyond` which provides us the edge
683   // 'beyond' the offset if offset is not a clean starting point of an edge.
684   Position pos = node->IndexBeyond(offset);
685   CordRepBtree* sub = node->CopyToEndFrom(pos.index, len);
686   const CopyResult result = {sub, height};
687 
688   // `pos.n` contains a non zero value if the offset is not an exact starting
689   // point of an edge. In this case, `pos.n` contains the 'trailing' amount of
690   // bytes of the edge preceding that in `pos.index`. We need to iteratively
691   // adjust the preceding edge with the 'broken' offset until we have a perfect
692   // start of the edge.
693   while (pos.n != 0) {
694     assert(pos.index >= 1);
695     const size_t begin = pos.index - 1;
696     sub->set_begin(begin);
697     CordRep* const edge = node->Edge(begin);
698 
699     len = pos.n;
700     offset = edge->length - len;
701 
702     if (--height < 0) {
703       sub->edges_[begin] = MakeSubstring(CordRep::Ref(edge), offset, len);
704       return result;
705     }
706 
707     node = edge->btree();
708     pos = node->IndexBeyond(offset);
709 
710     CordRepBtree* nsub = node->CopyToEndFrom(pos.index, len);
711     sub->edges_[begin] = nsub;
712     sub = nsub;
713   }
714   sub->set_begin(pos.index);
715   return result;
716 }
717 
CopyPrefix(size_t n,bool allow_folding)718 CopyResult CordRepBtree::CopyPrefix(size_t n, bool allow_folding) {
719   assert(n > 0);
720   assert(n <= this->length);
721 
722   // As long as `n` does not exceed the length of the first edge, we can 'drop'
723   // the current depth. For the most extreme example: if we'd copy a 1 byte
724   // prefix from a tree, there is only a single edge / path from the top of the
725   // tree to the single data edge containing this byte, so we can drop all the
726   // nodes except the data node.
727   int height = this->height();
728   CordRepBtree* node = this;
729   CordRep* front = node->Edge(kFront);
730   if (allow_folding) {
731     while (front->length >= n) {
732       if (--height < 0) return {MakeSubstring(CordRep::Ref(front), 0, n), -1};
733       node = front->btree();
734       front = node->Edge(kFront);
735     }
736   }
737   if (node->length == n) return {CordRep::Ref(node), height};
738 
739   // `n` spans at least two nodes, find the end point of the span.
740   Position pos = node->IndexOf(n);
741 
742   // Create a partial copy of the node up to `pos.index`, with a defined length
743   // of `n`. Any 'partial last edge' is added further below as needed.
744   CordRepBtree* sub = node->CopyBeginTo(pos.index, n);
745   const CopyResult result = {sub, height};
746 
747   // `pos.n` contains the 'offset inside the edge for IndexOf(n)'. As long as
748   // this is not zero, we don't have a 'clean cut', so we need to make a
749   // (partial) copy of that last edge, and repeat this until pos.n is zero.
750   while (pos.n != 0) {
751     size_t end = pos.index;
752     n = pos.n;
753 
754     CordRep* edge = node->Edge(pos.index);
755     if (--height < 0) {
756       sub->edges_[end++] = MakeSubstring(CordRep::Ref(edge), 0, n);
757       sub->set_end(end);
758       AssertValid(result.edge->btree());
759       return result;
760     }
761 
762     node = edge->btree();
763     pos = node->IndexOf(n);
764     CordRepBtree* nsub = node->CopyBeginTo(pos.index, n);
765     sub->edges_[end++] = nsub;
766     sub->set_end(end);
767     sub = nsub;
768   }
769   sub->set_end(pos.index);
770   AssertValid(result.edge->btree());
771   return result;
772 }
773 
ExtractFront(CordRepBtree * tree)774 CordRep* CordRepBtree::ExtractFront(CordRepBtree* tree) {
775   CordRep* front = tree->Edge(tree->begin());
776   if (tree->refcount.IsMutable()) {
777     Unref(tree->Edges(tree->begin() + 1, tree->end()));
778     CordRepBtree::Delete(tree);
779   } else {
780     CordRep::Ref(front);
781     CordRep::Unref(tree);
782   }
783   return front;
784 }
785 
ConsumeBeginTo(CordRepBtree * tree,size_t end,size_t new_length)786 CordRepBtree* CordRepBtree::ConsumeBeginTo(CordRepBtree* tree, size_t end,
787                                            size_t new_length) {
788   assert(end <= tree->end());
789   if (tree->refcount.IsMutable()) {
790     Unref(tree->Edges(end, tree->end()));
791     tree->set_end(end);
792     tree->length = new_length;
793   } else {
794     CordRepBtree* old = tree;
795     tree = tree->CopyBeginTo(end, new_length);
796     CordRep::Unref(old);
797   }
798   return tree;
799 }
800 
RemoveSuffix(CordRepBtree * tree,size_t n)801 CordRep* CordRepBtree::RemoveSuffix(CordRepBtree* tree, size_t n) {
802   // Check input and deal with trivial cases 'Remove all/none'
803   assert(tree != nullptr);
804   assert(n <= tree->length);
805   const size_t len = tree->length;
806   if (ABSL_PREDICT_FALSE(n == 0)) {
807     return tree;
808   }
809   if (ABSL_PREDICT_FALSE(n >= len)) {
810     CordRepBtree::Unref(tree);
811     return nullptr;
812   }
813 
814   size_t length = len - n;
815   int height = tree->height();
816   bool is_mutable = tree->refcount.IsMutable();
817 
818   // Extract all top nodes which are reduced to size = 1
819   Position pos = tree->IndexOfLength(length);
820   while (pos.index == tree->begin()) {
821     CordRep* edge = ExtractFront(tree);
822     is_mutable &= edge->refcount.IsMutable();
823     if (height-- == 0) return ResizeEdge(edge, length, is_mutable);
824     tree = edge->btree();
825     pos = tree->IndexOfLength(length);
826   }
827 
828   // Repeat the following sequence traversing down the tree:
829   // - Crop the top node to the 'last remaining edge' adjusting length.
830   // - Set the length for down edges to the partial length in that last edge.
831   // - Repeat this until the last edge is 'included in full'
832   // - If we hit the data edge level, resize and return the last data edge
833   CordRepBtree* top = tree = ConsumeBeginTo(tree, pos.index + 1, length);
834   CordRep* edge = tree->Edge(pos.index);
835   length = pos.n;
836   while (length != edge->length) {
837     // ConsumeBeginTo guarantees `tree` is a clean, privately owned copy.
838     assert(tree->refcount.IsMutable());
839     const bool edge_is_mutable = edge->refcount.IsMutable();
840 
841     if (height-- == 0) {
842       tree->edges_[pos.index] = ResizeEdge(edge, length, edge_is_mutable);
843       return AssertValid(top);
844     }
845 
846     if (!edge_is_mutable) {
847       // We can't 'in place' remove any suffixes down this edge.
848       // Replace this edge with a prefix copy instead.
849       tree->edges_[pos.index] = edge->btree()->CopyPrefix(length, false).edge;
850       CordRep::Unref(edge);
851       return AssertValid(top);
852     }
853 
854     // Move down one level, rinse repeat.
855     tree = edge->btree();
856     pos = tree->IndexOfLength(length);
857     tree = ConsumeBeginTo(edge->btree(), pos.index + 1, length);
858     edge = tree->Edge(pos.index);
859     length = pos.n;
860   }
861 
862   return AssertValid(top);
863 }
864 
SubTree(size_t offset,size_t n)865 CordRep* CordRepBtree::SubTree(size_t offset, size_t n) {
866   assert(n <= this->length);
867   assert(offset <= this->length - n);
868   if (ABSL_PREDICT_FALSE(n == 0)) return nullptr;
869 
870   CordRepBtree* node = this;
871   int height = node->height();
872   Position front = node->IndexOf(offset);
873   CordRep* left = node->edges_[front.index];
874   while (front.n + n <= left->length) {
875     if (--height < 0) return MakeSubstring(CordRep::Ref(left), front.n, n);
876     node = left->btree();
877     front = node->IndexOf(front.n);
878     left = node->edges_[front.index];
879   }
880 
881   const Position back = node->IndexBefore(front, n);
882   CordRep* const right = node->edges_[back.index];
883   assert(back.index > front.index);
884 
885   // Get partial suffix and prefix entries.
886   CopyResult prefix;
887   CopyResult suffix;
888   if (height > 0) {
889     // Copy prefix and suffix of the boundary nodes.
890     prefix = left->btree()->CopySuffix(front.n);
891     suffix = right->btree()->CopyPrefix(back.n);
892 
893     // If there is an edge between the prefix and suffix edges, then the tree
894     // must remain at its previous (full) height. If we have no edges between
895     // prefix and suffix edges, then the tree must be as high as either the
896     // suffix or prefix edges (which are collapsed to their minimum heights).
897     if (front.index + 1 == back.index) {
898       height = (std::max)(prefix.height, suffix.height) + 1;
899     }
900 
901     // Raise prefix and suffixes to the new tree height.
902     for (int h = prefix.height + 1; h < height; ++h) {
903       prefix.edge = CordRepBtree::New(prefix.edge);
904     }
905     for (int h = suffix.height + 1; h < height; ++h) {
906       suffix.edge = CordRepBtree::New(suffix.edge);
907     }
908   } else {
909     // Leaf node, simply take substrings for prefix and suffix.
910     prefix = CopyResult{MakeSubstring(CordRep::Ref(left), front.n), -1};
911     suffix = CopyResult{MakeSubstring(CordRep::Ref(right), 0, back.n), -1};
912   }
913 
914   // Compose resulting tree.
915   CordRepBtree* sub = CordRepBtree::New(height);
916   size_t end = 0;
917   sub->edges_[end++] = prefix.edge;
918   for (CordRep* r : node->Edges(front.index + 1, back.index)) {
919     sub->edges_[end++] = CordRep::Ref(r);
920   }
921   sub->edges_[end++] = suffix.edge;
922   sub->set_end(end);
923   sub->length = n;
924   return AssertValid(sub);
925 }
926 
MergeTrees(CordRepBtree * left,CordRepBtree * right)927 CordRepBtree* CordRepBtree::MergeTrees(CordRepBtree* left,
928                                        CordRepBtree* right) {
929   return left->height() >= right->height() ? Merge<kBack>(left, right)
930                                            : Merge<kFront>(right, left);
931 }
932 
IsFlat(absl::string_view * fragment) const933 bool CordRepBtree::IsFlat(absl::string_view* fragment) const {
934   if (height() == 0 && size() == 1) {
935     if (fragment) *fragment = Data(begin());
936     return true;
937   }
938   return false;
939 }
940 
IsFlat(size_t offset,const size_t n,absl::string_view * fragment) const941 bool CordRepBtree::IsFlat(size_t offset, const size_t n,
942                           absl::string_view* fragment) const {
943   assert(n <= this->length);
944   assert(offset <= this->length - n);
945   if (ABSL_PREDICT_FALSE(n == 0)) return false;
946   int height = this->height();
947   const CordRepBtree* node = this;
948   for (;;) {
949     const Position front = node->IndexOf(offset);
950     const CordRep* edge = node->Edge(front.index);
951     if (edge->length < front.n + n) return false;
952     if (--height < 0) {
953       if (fragment) *fragment = EdgeData(edge).substr(front.n, n);
954       return true;
955     }
956     offset = front.n;
957     node = node->Edge(front.index)->btree();
958   }
959 }
960 
GetCharacter(size_t offset) const961 char CordRepBtree::GetCharacter(size_t offset) const {
962   assert(offset < length);
963   const CordRepBtree* node = this;
964   int height = node->height();
965   for (;;) {
966     Position front = node->IndexOf(offset);
967     if (--height < 0) return node->Data(front.index)[front.n];
968     offset = front.n;
969     node = node->Edge(front.index)->btree();
970   }
971 }
972 
GetAppendBufferSlow(size_t size)973 Span<char> CordRepBtree::GetAppendBufferSlow(size_t size) {
974   // The inlined version in `GetAppendBuffer()` deals with all heights <= 3.
975   assert(height() >= 4);
976   assert(refcount.IsMutable());
977 
978   // Build a stack of nodes we may potentially need to update if we find a
979   // non-shared FLAT with capacity at the leaf level.
980   const int depth = height();
981   CordRepBtree* node = this;
982   CordRepBtree* stack[kMaxDepth];
983   for (int i = 0; i < depth; ++i) {
984     node = node->Edge(kBack)->btree();
985     if (!node->refcount.IsMutable()) return {};
986     stack[i] = node;
987   }
988 
989   // Must be a privately owned, mutable flat.
990   CordRep* const edge = node->Edge(kBack);
991   if (!edge->refcount.IsMutable() || edge->tag < FLAT) return {};
992 
993   // Must have capacity.
994   const size_t avail = edge->flat()->Capacity() - edge->length;
995   if (avail == 0) return {};
996 
997   // Build span on remaining capacity.
998   size_t delta = (std::min)(size, avail);
999   Span<char> span = {edge->flat()->Data() + edge->length, delta};
1000   edge->length += delta;
1001   this->length += delta;
1002   for (int i = 0; i < depth; ++i) {
1003     stack[i]->length += delta;
1004   }
1005   return span;
1006 }
1007 
CreateSlow(CordRep * rep)1008 CordRepBtree* CordRepBtree::CreateSlow(CordRep* rep) {
1009   if (rep->IsBtree()) return rep->btree();
1010 
1011   CordRepBtree* node = nullptr;
1012   auto consume = [&node](CordRep* r, size_t offset, size_t length) {
1013     r = MakeSubstring(r, offset, length);
1014     if (node == nullptr) {
1015       node = New(r);
1016     } else {
1017       node = CordRepBtree::AddCordRep<kBack>(node, r);
1018     }
1019   };
1020   Consume(rep, consume);
1021   return node;
1022 }
1023 
AppendSlow(CordRepBtree * tree,CordRep * rep)1024 CordRepBtree* CordRepBtree::AppendSlow(CordRepBtree* tree, CordRep* rep) {
1025   if (ABSL_PREDICT_TRUE(rep->IsBtree())) {
1026     return MergeTrees(tree, rep->btree());
1027   }
1028   auto consume = [&tree](CordRep* r, size_t offset, size_t length) {
1029     r = MakeSubstring(r, offset, length);
1030     tree = CordRepBtree::AddCordRep<kBack>(tree, r);
1031   };
1032   Consume(rep, consume);
1033   return tree;
1034 }
1035 
PrependSlow(CordRepBtree * tree,CordRep * rep)1036 CordRepBtree* CordRepBtree::PrependSlow(CordRepBtree* tree, CordRep* rep) {
1037   if (ABSL_PREDICT_TRUE(rep->IsBtree())) {
1038     return MergeTrees(rep->btree(), tree);
1039   }
1040   auto consume = [&tree](CordRep* r, size_t offset, size_t length) {
1041     r = MakeSubstring(r, offset, length);
1042     tree = CordRepBtree::AddCordRep<kFront>(tree, r);
1043   };
1044   ReverseConsume(rep, consume);
1045   return tree;
1046 }
1047 
Append(CordRepBtree * tree,absl::string_view data,size_t extra)1048 CordRepBtree* CordRepBtree::Append(CordRepBtree* tree, absl::string_view data,
1049                                    size_t extra) {
1050   return CordRepBtree::AddData<kBack>(tree, data, extra);
1051 }
1052 
Prepend(CordRepBtree * tree,absl::string_view data,size_t extra)1053 CordRepBtree* CordRepBtree::Prepend(CordRepBtree* tree, absl::string_view data,
1054                                     size_t extra) {
1055   return CordRepBtree::AddData<kFront>(tree, data, extra);
1056 }
1057 
1058 template CordRepBtree* CordRepBtree::AddCordRep<kFront>(CordRepBtree* tree,
1059                                                         CordRep* rep);
1060 template CordRepBtree* CordRepBtree::AddCordRep<kBack>(CordRepBtree* tree,
1061                                                        CordRep* rep);
1062 template CordRepBtree* CordRepBtree::AddData<kFront>(CordRepBtree* tree,
1063                                                      absl::string_view data,
1064                                                      size_t extra);
1065 template CordRepBtree* CordRepBtree::AddData<kBack>(CordRepBtree* tree,
1066                                                     absl::string_view data,
1067                                                     size_t extra);
1068 
Rebuild(CordRepBtree ** stack,CordRepBtree * tree,bool consume)1069 void CordRepBtree::Rebuild(CordRepBtree** stack, CordRepBtree* tree,
1070                            bool consume) {
1071   bool owned = consume && tree->refcount.IsOne();
1072   if (tree->height() == 0) {
1073     for (CordRep* edge : tree->Edges()) {
1074       if (!owned) edge = CordRep::Ref(edge);
1075       size_t height = 0;
1076       size_t length = edge->length;
1077       CordRepBtree* node = stack[0];
1078       OpResult result = node->AddEdge<kBack>(true, edge, length);
1079       while (result.action == CordRepBtree::kPopped) {
1080         stack[height] = result.tree;
1081         if (stack[++height] == nullptr) {
1082           result.action = CordRepBtree::kSelf;
1083           stack[height] = CordRepBtree::New(node, result.tree);
1084         } else {
1085           node = stack[height];
1086           result = node->AddEdge<kBack>(true, result.tree, length);
1087         }
1088       }
1089       while (stack[++height] != nullptr) {
1090         stack[height]->length += length;
1091       }
1092     }
1093   } else {
1094     for (CordRep* rep : tree->Edges()) {
1095       Rebuild(stack, rep->btree(), owned);
1096     }
1097   }
1098   if (consume) {
1099     if (owned) {
1100       CordRepBtree::Delete(tree);
1101     } else {
1102       CordRepBtree::Unref(tree);
1103     }
1104   }
1105 }
1106 
Rebuild(CordRepBtree * tree)1107 CordRepBtree* CordRepBtree::Rebuild(CordRepBtree* tree) {
1108   // Set up initial stack with empty leaf node.
1109   CordRepBtree* node = CordRepBtree::New();
1110   CordRepBtree* stack[CordRepBtree::kMaxDepth + 1] = {node};
1111 
1112   // Recursively build the tree, consuming the input tree.
1113   Rebuild(stack, tree, /* consume reference */ true);
1114 
1115   // Return top most node
1116   for (CordRepBtree* parent : stack) {
1117     if (parent == nullptr) return node;
1118     node = parent;
1119   }
1120 
1121   // Unreachable
1122   assert(false);
1123   return nullptr;
1124 }
1125 
1126 }  // namespace cord_internal
1127 ABSL_NAMESPACE_END
1128 }  // namespace absl
1129