1 // Copyright 2021 The Abseil Authors
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // https://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14
15 #include "absl/strings/internal/cord_rep_btree_navigator.h"
16
17 #include <cassert>
18
19 #include "absl/strings/internal/cord_internal.h"
20 #include "absl/strings/internal/cord_rep_btree.h"
21
22 namespace absl {
23 ABSL_NAMESPACE_BEGIN
24 namespace cord_internal {
25
26 using ReadResult = CordRepBtreeNavigator::ReadResult;
27
28 namespace {
29
30 // Returns a `CordRepSubstring` from `rep` starting at `offset` of size `n`.
31 // If `rep` is already a `CordRepSubstring` instance, an adjusted instance is
32 // created based on the old offset and new offset.
33 // Adopts a reference on `rep`. Rep must be a valid data edge. Returns
34 // nullptr if `n == 0`, `rep` if `n == rep->length`.
35 // Requires `offset < rep->length` and `offset + n <= rep->length`.
36 // TODO(192061034): move to utility library in internal and optimize for small
37 // substrings of larger reps.
Substring(CordRep * rep,size_t offset,size_t n)38 inline CordRep* Substring(CordRep* rep, size_t offset, size_t n) {
39 assert(n <= rep->length);
40 assert(offset < rep->length);
41 assert(offset <= rep->length - n);
42 assert(CordRepBtree::IsDataEdge(rep));
43
44 if (n == 0) return nullptr;
45 if (n == rep->length) return CordRep::Ref(rep);
46
47 if (rep->tag == SUBSTRING) {
48 offset += rep->substring()->start;
49 rep = rep->substring()->child;
50 }
51
52 CordRepSubstring* substring = new CordRepSubstring();
53 substring->length = n;
54 substring->tag = SUBSTRING;
55 substring->start = offset;
56 substring->child = CordRep::Ref(rep);
57 return substring;
58 }
59
Substring(CordRep * rep,size_t offset)60 inline CordRep* Substring(CordRep* rep, size_t offset) {
61 return Substring(rep, offset, rep->length - offset);
62 }
63
64 } // namespace
65
Skip(size_t n)66 CordRepBtreeNavigator::Position CordRepBtreeNavigator::Skip(size_t n) {
67 int height = 0;
68 size_t index = index_[0];
69 CordRepBtree* node = node_[0];
70 CordRep* edge = node->Edge(index);
71
72 // Overall logic: Find an edge of at least the length we need to skip.
73 // We consume all edges which are smaller (i.e., must be 100% skipped).
74 // If we exhausted all edges on the current level, we move one level
75 // up the tree, and repeat until we either find the edge, or until we hit
76 // the top of the tree meaning the skip exceeds tree->length.
77 while (n >= edge->length) {
78 n -= edge->length;
79 while (++index == node->end()) {
80 if (++height > height_) return {nullptr, n};
81 node = node_[height];
82 index = index_[height];
83 }
84 edge = node->Edge(index);
85 }
86
87 // If we moved up the tree, descend down to the leaf level, consuming all
88 // edges that must be skipped.
89 while (height > 0) {
90 node = edge->btree();
91 index_[height] = index;
92 node_[--height] = node;
93 index = node->begin();
94 edge = node->Edge(index);
95 while (n >= edge->length) {
96 n -= edge->length;
97 ++index;
98 assert(index != node->end());
99 edge = node->Edge(index);
100 }
101 }
102 index_[0] = index;
103 return {edge, n};
104 }
105
Read(size_t edge_offset,size_t n)106 ReadResult CordRepBtreeNavigator::Read(size_t edge_offset, size_t n) {
107 int height = 0;
108 size_t length = edge_offset + n;
109 size_t index = index_[0];
110 CordRepBtree* node = node_[0];
111 CordRep* edge = node->Edge(index);
112 assert(edge_offset < edge->length);
113
114 if (length < edge->length) {
115 return {Substring(edge, edge_offset, n), length};
116 }
117
118 // Similar to 'Skip', we consume all edges that are inside the 'length' of
119 // data that needs to be read. If we exhaust the current level, we move one
120 // level up the tree and repeat until we hit the final edge that must be
121 // (partially) read. We consume all edges into `subtree`.
122 CordRepBtree* subtree = CordRepBtree::New(Substring(edge, edge_offset));
123 size_t subtree_end = 1;
124 do {
125 length -= edge->length;
126 while (++index == node->end()) {
127 index_[height] = index;
128 if (++height > height_) {
129 subtree->set_end(subtree_end);
130 if (length == 0) return {subtree, 0};
131 CordRep::Unref(subtree);
132 return {nullptr, length};
133 }
134 if (length != 0) {
135 subtree->set_end(subtree_end);
136 subtree = CordRepBtree::New(subtree);
137 subtree_end = 1;
138 }
139 node = node_[height];
140 index = index_[height];
141 }
142 edge = node->Edge(index);
143 if (length >= edge->length) {
144 subtree->length += edge->length;
145 subtree->edges_[subtree_end++] = CordRep::Ref(edge);
146 }
147 } while (length >= edge->length);
148 CordRepBtree* tree = subtree;
149 subtree->length += length;
150
151 // If we moved up the tree, descend down to the leaf level, consuming all
152 // edges that must be read, adding 'down' nodes to `subtree`.
153 while (height > 0) {
154 node = edge->btree();
155 index_[height] = index;
156 node_[--height] = node;
157 index = node->begin();
158 edge = node->Edge(index);
159
160 if (length != 0) {
161 CordRepBtree* right = CordRepBtree::New(height);
162 right->length = length;
163 subtree->edges_[subtree_end++] = right;
164 subtree->set_end(subtree_end);
165 subtree = right;
166 subtree_end = 0;
167 while (length >= edge->length) {
168 subtree->edges_[subtree_end++] = CordRep::Ref(edge);
169 length -= edge->length;
170 edge = node->Edge(++index);
171 }
172 }
173 }
174 // Add any (partial) edge still remaining at the leaf level.
175 if (length != 0) {
176 subtree->edges_[subtree_end++] = Substring(edge, 0, length);
177 }
178 subtree->set_end(subtree_end);
179 index_[0] = index;
180 return {tree, length};
181 }
182
183 } // namespace cord_internal
184 ABSL_NAMESPACE_END
185 } // namespace absl
186