1 // Copyright 2021 The Abseil Authors
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //     https://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #include "absl/strings/internal/cord_rep_btree_navigator.h"
16 
17 #include <cassert>
18 
19 #include "absl/strings/internal/cord_internal.h"
20 #include "absl/strings/internal/cord_rep_btree.h"
21 
22 namespace absl {
23 ABSL_NAMESPACE_BEGIN
24 namespace cord_internal {
25 
26 using ReadResult = CordRepBtreeNavigator::ReadResult;
27 
28 namespace {
29 
30 // Returns a `CordRepSubstring` from `rep` starting at `offset` of size `n`.
31 // If `rep` is already a `CordRepSubstring` instance, an adjusted instance is
32 // created based on the old offset and new offset.
33 // Adopts a reference on `rep`. Rep must be a valid data edge. Returns
34 // nullptr if `n == 0`, `rep` if `n == rep->length`.
35 // Requires `offset < rep->length` and `offset + n <= rep->length`.
36 // TODO(192061034): move to utility library in internal and optimize for small
37 // substrings of larger reps.
Substring(CordRep * rep,size_t offset,size_t n)38 inline CordRep* Substring(CordRep* rep, size_t offset, size_t n) {
39   assert(n <= rep->length);
40   assert(offset < rep->length);
41   assert(offset <= rep->length - n);
42   assert(CordRepBtree::IsDataEdge(rep));
43 
44   if (n == 0) return nullptr;
45   if (n == rep->length) return CordRep::Ref(rep);
46 
47   if (rep->tag == SUBSTRING) {
48     offset += rep->substring()->start;
49     rep = rep->substring()->child;
50   }
51 
52   CordRepSubstring* substring = new CordRepSubstring();
53   substring->length = n;
54   substring->tag = SUBSTRING;
55   substring->start = offset;
56   substring->child = CordRep::Ref(rep);
57   return substring;
58 }
59 
Substring(CordRep * rep,size_t offset)60 inline CordRep* Substring(CordRep* rep, size_t offset) {
61   return Substring(rep, offset, rep->length - offset);
62 }
63 
64 }  // namespace
65 
Skip(size_t n)66 CordRepBtreeNavigator::Position CordRepBtreeNavigator::Skip(size_t n) {
67   int height = 0;
68   size_t index = index_[0];
69   CordRepBtree* node = node_[0];
70   CordRep* edge = node->Edge(index);
71 
72   // Overall logic: Find an edge of at least the length we need to skip.
73   // We consume all edges which are smaller (i.e., must be 100% skipped).
74   // If we exhausted all edges on the current level, we move one level
75   // up the tree, and repeat until we either find the edge, or until we hit
76   // the top of the tree meaning the skip exceeds tree->length.
77   while (n >= edge->length) {
78     n -= edge->length;
79     while (++index == node->end()) {
80       if (++height > height_) return {nullptr, n};
81       node = node_[height];
82       index = index_[height];
83     }
84     edge = node->Edge(index);
85   }
86 
87   // If we moved up the tree, descend down to the leaf level, consuming all
88   // edges that must be skipped.
89   while (height > 0) {
90     node = edge->btree();
91     index_[height] = index;
92     node_[--height] = node;
93     index = node->begin();
94     edge = node->Edge(index);
95     while (n >= edge->length) {
96       n -= edge->length;
97       ++index;
98       assert(index != node->end());
99       edge = node->Edge(index);
100     }
101   }
102   index_[0] = index;
103   return {edge, n};
104 }
105 
Read(size_t edge_offset,size_t n)106 ReadResult CordRepBtreeNavigator::Read(size_t edge_offset, size_t n) {
107   int height = 0;
108   size_t length = edge_offset + n;
109   size_t index = index_[0];
110   CordRepBtree* node = node_[0];
111   CordRep* edge = node->Edge(index);
112   assert(edge_offset < edge->length);
113 
114   if (length < edge->length) {
115     return {Substring(edge, edge_offset, n), length};
116   }
117 
118   // Similar to 'Skip', we consume all edges that are inside the 'length' of
119   // data that needs to be read. If we exhaust the current level, we move one
120   // level up the tree and repeat until we hit the final edge that must be
121   // (partially) read. We consume all edges into `subtree`.
122   CordRepBtree* subtree = CordRepBtree::New(Substring(edge, edge_offset));
123   size_t subtree_end = 1;
124   do {
125     length -= edge->length;
126     while (++index == node->end()) {
127       index_[height] = index;
128       if (++height > height_) {
129         subtree->set_end(subtree_end);
130         if (length == 0) return {subtree, 0};
131         CordRep::Unref(subtree);
132         return {nullptr, length};
133       }
134       if (length != 0) {
135         subtree->set_end(subtree_end);
136         subtree = CordRepBtree::New(subtree);
137         subtree_end = 1;
138       }
139       node = node_[height];
140       index = index_[height];
141     }
142     edge = node->Edge(index);
143     if (length >= edge->length) {
144       subtree->length += edge->length;
145       subtree->edges_[subtree_end++] = CordRep::Ref(edge);
146     }
147   } while (length >= edge->length);
148   CordRepBtree* tree = subtree;
149   subtree->length += length;
150 
151   // If we moved up the tree, descend down to the leaf level, consuming all
152   // edges that must be read, adding 'down' nodes to `subtree`.
153   while (height > 0) {
154     node = edge->btree();
155     index_[height] = index;
156     node_[--height] = node;
157     index = node->begin();
158     edge = node->Edge(index);
159 
160     if (length != 0) {
161       CordRepBtree* right = CordRepBtree::New(height);
162       right->length = length;
163       subtree->edges_[subtree_end++] = right;
164       subtree->set_end(subtree_end);
165       subtree = right;
166       subtree_end = 0;
167       while (length >= edge->length) {
168         subtree->edges_[subtree_end++] = CordRep::Ref(edge);
169         length -= edge->length;
170         edge = node->Edge(++index);
171       }
172     }
173   }
174   // Add any (partial) edge still remaining at the leaf level.
175   if (length != 0) {
176     subtree->edges_[subtree_end++] = Substring(edge, 0, length);
177   }
178   subtree->set_end(subtree_end);
179   index_[0] = index;
180   return {tree, length};
181 }
182 
183 }  // namespace cord_internal
184 ABSL_NAMESPACE_END
185 }  // namespace absl
186