1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "components/sync/base/unique_position.h"
6
7 #include <algorithm>
8 #include <limits>
9
10 #include "base/logging.h"
11 #include "base/notreached.h"
12 #include "base/rand_util.h"
13 #include "base/stl_util.h"
14 #include "base/strings/string_number_conversions.h"
15 #include "base/trace_event/memory_usage_estimator.h"
16 #include "components/sync/protocol/unique_position.pb.h"
17 #include "third_party/zlib/zlib.h"
18
19 namespace syncer {
20
21 constexpr size_t UniquePosition::kSuffixLength;
22 constexpr size_t UniquePosition::kCompressBytesThreshold;
23
24 // static.
IsValidSuffix(const std::string & suffix)25 bool UniquePosition::IsValidSuffix(const std::string& suffix) {
26 // The suffix must be exactly the specified length, otherwise unique suffixes
27 // are not sufficient to guarantee unique positions (because prefix + suffix
28 // == p + refixsuffix).
29 return suffix.length() == kSuffixLength && suffix[kSuffixLength - 1] != 0;
30 }
31
32 // static.
IsValidBytes(const std::string & bytes)33 bool UniquePosition::IsValidBytes(const std::string& bytes) {
34 // The first condition ensures that our suffix uniqueness is sufficient to
35 // guarantee position uniqueness. Otherwise, it's possible the end of some
36 // prefix + some short suffix == some long suffix.
37 // The second condition ensures that FindSmallerWithSuffix can always return a
38 // result.
39 return bytes.length() >= kSuffixLength && bytes[bytes.length() - 1] != 0;
40 }
41
42 // static.
RandomSuffix()43 std::string UniquePosition::RandomSuffix() {
44 // Users random data for all but the last byte. The last byte must not be
45 // zero. We arbitrarily set it to 0x7f.
46 return base::RandBytesAsString(kSuffixLength - 1) + "\x7f";
47 }
48
49 // static.
CreateInvalid()50 UniquePosition UniquePosition::CreateInvalid() {
51 UniquePosition pos;
52 DCHECK(!pos.IsValid());
53 return pos;
54 }
55
56 // static.
FromProto(const sync_pb::UniquePosition & proto)57 UniquePosition UniquePosition::FromProto(const sync_pb::UniquePosition& proto) {
58 if (proto.has_custom_compressed_v1()) {
59 return UniquePosition(proto.custom_compressed_v1());
60 } else if (proto.has_value() && !proto.value().empty()) {
61 return UniquePosition(Compress(proto.value()));
62 } else if (proto.has_compressed_value() && proto.has_uncompressed_length()) {
63 uLongf uncompressed_len = proto.uncompressed_length();
64 std::string un_gzipped;
65
66 un_gzipped.resize(uncompressed_len);
67 int result = uncompress(
68 reinterpret_cast<Bytef*>(base::data(un_gzipped)), &uncompressed_len,
69 reinterpret_cast<const Bytef*>(proto.compressed_value().data()),
70 proto.compressed_value().size());
71 if (result != Z_OK) {
72 DLOG(ERROR) << "Unzip failed " << result;
73 return UniquePosition::CreateInvalid();
74 }
75 if (uncompressed_len != proto.uncompressed_length()) {
76 DLOG(ERROR) << "Uncompressed length " << uncompressed_len
77 << " did not match specified length "
78 << proto.uncompressed_length();
79 return UniquePosition::CreateInvalid();
80 }
81 return UniquePosition(Compress(un_gzipped));
82 } else {
83 return UniquePosition::CreateInvalid();
84 }
85 }
86
87 // static.
FromInt64(int64_t x,const std::string & suffix)88 UniquePosition UniquePosition::FromInt64(int64_t x, const std::string& suffix) {
89 uint64_t y = static_cast<uint64_t>(x);
90 y ^= 0x8000000000000000ULL; // Make it non-negative.
91 std::string bytes(8, 0);
92 for (int i = 7; i >= 0; --i) {
93 bytes[i] = static_cast<uint8_t>(y);
94 y >>= 8;
95 }
96 return UniquePosition(bytes + suffix, suffix);
97 }
98
99 // static.
InitialPosition(const std::string & suffix)100 UniquePosition UniquePosition::InitialPosition(const std::string& suffix) {
101 DCHECK(IsValidSuffix(suffix));
102 return UniquePosition(suffix, suffix);
103 }
104
105 // static.
Before(const UniquePosition & x,const std::string & suffix)106 UniquePosition UniquePosition::Before(const UniquePosition& x,
107 const std::string& suffix) {
108 DCHECK(IsValidSuffix(suffix));
109 DCHECK(x.IsValid());
110 const std::string& before =
111 FindSmallerWithSuffix(Uncompress(x.compressed_), suffix);
112 return UniquePosition(before + suffix, suffix);
113 }
114
115 // static.
After(const UniquePosition & x,const std::string & suffix)116 UniquePosition UniquePosition::After(const UniquePosition& x,
117 const std::string& suffix) {
118 DCHECK(IsValidSuffix(suffix));
119 DCHECK(x.IsValid());
120 const std::string& after =
121 FindGreaterWithSuffix(Uncompress(x.compressed_), suffix);
122 return UniquePosition(after + suffix, suffix);
123 }
124
125 // static.
Between(const UniquePosition & before,const UniquePosition & after,const std::string & suffix)126 UniquePosition UniquePosition::Between(const UniquePosition& before,
127 const UniquePosition& after,
128 const std::string& suffix) {
129 DCHECK(before.IsValid());
130 DCHECK(after.IsValid());
131 DCHECK(before.LessThan(after));
132 DCHECK(IsValidSuffix(suffix));
133 const std::string& mid = FindBetweenWithSuffix(
134 Uncompress(before.compressed_), Uncompress(after.compressed_), suffix);
135 return UniquePosition(mid + suffix, suffix);
136 }
137
UniquePosition()138 UniquePosition::UniquePosition() : is_valid_(false) {}
139
LessThan(const UniquePosition & other) const140 bool UniquePosition::LessThan(const UniquePosition& other) const {
141 DCHECK(this->IsValid());
142 DCHECK(other.IsValid());
143
144 return compressed_ < other.compressed_;
145 }
146
Equals(const UniquePosition & other) const147 bool UniquePosition::Equals(const UniquePosition& other) const {
148 if (!this->IsValid() && !other.IsValid())
149 return true;
150
151 return compressed_ == other.compressed_;
152 }
153
ToProto() const154 sync_pb::UniquePosition UniquePosition::ToProto() const {
155 sync_pb::UniquePosition proto;
156
157 // This is the current preferred foramt.
158 proto.set_custom_compressed_v1(compressed_);
159 return proto;
160
161 // Older clients used to write other formats. We don't bother doing that
162 // anymore because that form of backwards compatibility is expensive. We no
163 // longer want to pay that price just too support clients that have been
164 // obsolete for a long time. See the proto definition for details.
165 }
166
SerializeToString(std::string * blob) const167 void UniquePosition::SerializeToString(std::string* blob) const {
168 DCHECK(blob);
169 ToProto().SerializeToString(blob);
170 }
171
ToInt64() const172 int64_t UniquePosition::ToInt64() const {
173 uint64_t y = 0;
174 const std::string& s = Uncompress(compressed_);
175 size_t l = sizeof(int64_t);
176 if (s.length() < l) {
177 NOTREACHED();
178 l = s.length();
179 }
180 for (size_t i = 0; i < l; ++i) {
181 const uint8_t byte = s[l - i - 1];
182 y |= static_cast<uint64_t>(byte) << (i * 8);
183 }
184 y ^= 0x8000000000000000ULL;
185 // This is technically implementation-defined if y > INT64_MAX, so
186 // we're assuming that we're on a twos-complement machine.
187 return static_cast<int64_t>(y);
188 }
189
IsValid() const190 bool UniquePosition::IsValid() const {
191 return is_valid_;
192 }
193
ToDebugString() const194 std::string UniquePosition::ToDebugString() const {
195 const std::string bytes = Uncompress(compressed_);
196 if (bytes.empty())
197 return std::string("INVALID[]");
198
199 std::string debug_string = base::HexEncode(bytes.data(), bytes.length());
200 if (!IsValid()) {
201 debug_string = "INVALID[" + debug_string + "]";
202 }
203
204 std::string compressed_string =
205 base::HexEncode(compressed_.data(), compressed_.length());
206 debug_string.append(", compressed: " + compressed_string);
207 return debug_string;
208 }
209
GetSuffixForTest() const210 std::string UniquePosition::GetSuffixForTest() const {
211 const std::string bytes = Uncompress(compressed_);
212 const size_t prefix_len = bytes.length() - kSuffixLength;
213 return bytes.substr(prefix_len, std::string::npos);
214 }
215
FindSmallerWithSuffix(const std::string & reference,const std::string & suffix)216 std::string UniquePosition::FindSmallerWithSuffix(const std::string& reference,
217 const std::string& suffix) {
218 size_t ref_zeroes = reference.find_first_not_of('\0');
219 size_t suffix_zeroes = suffix.find_first_not_of('\0');
220
221 // Neither of our inputs are allowed to have trailing zeroes, so the following
222 // must be true.
223 DCHECK_NE(ref_zeroes, std::string::npos);
224 DCHECK_NE(suffix_zeroes, std::string::npos);
225
226 if (suffix_zeroes > ref_zeroes) {
227 // Implies suffix < ref.
228 return std::string();
229 }
230
231 if (suffix.substr(suffix_zeroes) < reference.substr(ref_zeroes)) {
232 // Prepend zeroes so the result has as many zero digits as |reference|.
233 return std::string(ref_zeroes - suffix_zeroes, '\0');
234 } else if (suffix_zeroes > 1) {
235 // Prepend zeroes so the result has one more zero digit than |reference|.
236 // We could also take the "else" branch below, but taking this branch will
237 // give us a smaller result.
238 return std::string(ref_zeroes - suffix_zeroes + 1, '\0');
239 } else {
240 // Prepend zeroes to match those in the |reference|, then something smaller
241 // than the first non-zero digit in |reference|.
242 char lt_digit = static_cast<uint8_t>(reference[ref_zeroes]) / 2;
243 return std::string(ref_zeroes, '\0') + lt_digit;
244 }
245 }
246
247 // static
FindGreaterWithSuffix(const std::string & reference,const std::string & suffix)248 std::string UniquePosition::FindGreaterWithSuffix(const std::string& reference,
249 const std::string& suffix) {
250 size_t ref_FFs =
251 reference.find_first_not_of(std::numeric_limits<uint8_t>::max());
252 size_t suffix_FFs =
253 suffix.find_first_not_of(std::numeric_limits<uint8_t>::max());
254
255 if (ref_FFs == std::string::npos) {
256 ref_FFs = reference.length();
257 }
258 if (suffix_FFs == std::string::npos) {
259 suffix_FFs = suffix.length();
260 }
261
262 if (suffix_FFs > ref_FFs) {
263 // Implies suffix > reference.
264 return std::string();
265 }
266
267 if (suffix.substr(suffix_FFs) > reference.substr(ref_FFs)) {
268 // Prepend FF digits to match those in |reference|.
269 return std::string(ref_FFs - suffix_FFs,
270 std::numeric_limits<uint8_t>::max());
271 } else if (suffix_FFs > 1) {
272 // Prepend enough leading FF digits so result has one more of them than
273 // |reference| does. We could also take the "else" branch below, but this
274 // gives us a smaller result.
275 return std::string(ref_FFs - suffix_FFs + 1,
276 std::numeric_limits<uint8_t>::max());
277 } else {
278 // Prepend FF digits to match those in |reference|, then something larger
279 // than the first non-FF digit in |reference|.
280 char gt_digit = static_cast<uint8_t>(reference[ref_FFs]) +
281 (std::numeric_limits<uint8_t>::max() -
282 static_cast<uint8_t>(reference[ref_FFs]) + 1) /
283 2;
284 return std::string(ref_FFs, std::numeric_limits<uint8_t>::max()) + gt_digit;
285 }
286 }
287
288 // static
FindBetweenWithSuffix(const std::string & before,const std::string & after,const std::string & suffix)289 std::string UniquePosition::FindBetweenWithSuffix(const std::string& before,
290 const std::string& after,
291 const std::string& suffix) {
292 DCHECK(IsValidSuffix(suffix));
293 DCHECK_NE(before, after);
294 DCHECK_LT(before, after);
295
296 std::string mid;
297
298 // Sometimes our suffix puts us where we want to be.
299 if (before < suffix && suffix < after) {
300 return std::string();
301 }
302
303 size_t i = 0;
304 for (; i < std::min(before.length(), after.length()); ++i) {
305 uint8_t a_digit = before[i];
306 uint8_t b_digit = after[i];
307
308 if (b_digit - a_digit >= 2) {
309 mid.push_back(a_digit + (b_digit - a_digit) / 2);
310 return mid;
311 } else if (a_digit == b_digit) {
312 mid.push_back(a_digit);
313
314 // Both strings are equal so far. Will appending the suffix at this point
315 // give us the comparison we're looking for?
316 if (before.substr(i + 1) < suffix && suffix < after.substr(i + 1)) {
317 return mid;
318 }
319 } else {
320 DCHECK_EQ(b_digit - a_digit, 1); // Implied by above if branches.
321
322 // The two options are off by one digit. The choice of whether to round
323 // up or down here will have consequences on what we do with the remaining
324 // digits. Exploring both options is an optimization and is not required
325 // for the correctness of this algorithm.
326
327 // Option A: Round down the current digit. This makes our |mid| <
328 // |after|, no matter what we append afterwards. We then focus on
329 // appending digits until |mid| > |before|.
330 std::string mid_a = mid;
331 mid_a.push_back(a_digit);
332 mid_a.append(FindGreaterWithSuffix(before.substr(i + 1), suffix));
333
334 // Option B: Round up the current digit. This makes our |mid| > |before|,
335 // no matter what we append afterwards. We then focus on appending digits
336 // until |mid| < |after|. Note that this option may not be viable if the
337 // current digit is the last one in |after|, so we skip the option in that
338 // case.
339 if (after.length() > i + 1) {
340 std::string mid_b = mid;
341 mid_b.push_back(b_digit);
342 mid_b.append(FindSmallerWithSuffix(after.substr(i + 1), suffix));
343
344 // Does this give us a shorter position value? If so, use it.
345 if (mid_b.length() < mid_a.length()) {
346 return mid_b;
347 }
348 }
349 return mid_a;
350 }
351 }
352
353 // If we haven't found a midpoint yet, the following must be true:
354 DCHECK_EQ(before.substr(0, i), after.substr(0, i));
355 DCHECK_EQ(before, mid);
356 DCHECK_LT(before.length(), after.length());
357
358 // We know that we'll need to append at least one more byte to |mid| in the
359 // process of making it < |after|. Appending any digit, regardless of the
360 // value, will make |before| < |mid|. Therefore, the following will get us a
361 // valid position.
362
363 mid.append(FindSmallerWithSuffix(after.substr(i), suffix));
364 return mid;
365 }
366
UniquePosition(const std::string & internal_rep)367 UniquePosition::UniquePosition(const std::string& internal_rep)
368 : compressed_(internal_rep),
369 is_valid_(IsValidBytes(Uncompress(internal_rep))) {}
370
UniquePosition(const std::string & uncompressed,const std::string & suffix)371 UniquePosition::UniquePosition(const std::string& uncompressed,
372 const std::string& suffix)
373 : compressed_(Compress(uncompressed)),
374 is_valid_(IsValidBytes(uncompressed)) {
375 DCHECK(uncompressed.rfind(suffix) + kSuffixLength == uncompressed.length());
376 DCHECK(IsValidSuffix(suffix));
377 DCHECK(IsValid());
378 }
379
380 // On custom compression:
381 //
382 // Let C(x) be the compression function and U(x) be the uncompression function.
383 //
384 // This compression scheme has a few special properties. For one, it is
385 // order-preserving. For any two valid position strings x and y:
386 // x < y <=> C(x) < C(y)
387 // This allows us keep the position strings compressed as we sort them.
388 //
389 // The compressed format and the decode algorithm:
390 //
391 // The compressed string is a series of blocks, almost all of which are 8 bytes
392 // in length. The only exception is the last block in the compressed string,
393 // which may be a remainder block, which has length no greater than 7. The
394 // full-length blocks are either repeated character blocks or plain data blocks.
395 // All blocks are entirely self-contained. Their decoded values are independent
396 // from that of their neighbours.
397 //
398 // A repeated character block is encoded into eight bytes and represents between
399 // 4 and 2^31 repeated instances of a given character in the unencoded stream.
400 // The encoding consists of a single character repeated four times, followed by
401 // an encoded count. The encoded count is stored as a big-endian 32 bit
402 // integer. There are 2^31 possible count values, and two encodings for each.
403 // The high encoding is 'enc = kuint32max - count'; the low encoding is 'enc =
404 // count'. At compression time, the algorithm will choose between the two
405 // encodings based on which of the two will maintain the appropriate sort
406 // ordering (by a process which will be described below). The decompression
407 // algorithm need not concern itself with which encoding was used; it needs only
408 // to decode it. The decoded value of this block is "count" instances of the
409 // character that was repeated four times in the first half of this block.
410 //
411 // A plain data block is encoded into eight bytes and represents exactly eight
412 // bytes of data in the unencoded stream. The plain data block must not begin
413 // with the same character repeated four times. It is allowed to contain such a
414 // four-character sequence, just not at the start of the block. The decoded
415 // value of a plain data block is identical to its encoded value.
416 //
417 // A remainder block has length of at most seven. It is a shorter version of
418 // the plain data block. It occurs only at the end of the encoded stream and
419 // represents exactly as many bytes of unencoded data as its own length. Like a
420 // plain data block, the remainder block never begins with the same character
421 // repeated four times. The decoded value of this block is identical to its
422 // encoded value.
423 //
424 // The encode algorithm:
425 //
426 // From the above description, it can be seen that there may be more than one
427 // way to encode a given input string. The encoder must be careful to choose
428 // the encoding that guarantees sort ordering.
429 //
430 // The rules for the encoder are as follows:
431 // 1. Iterate through the input string and produce output blocks one at a time.
432 // 2. Where possible (ie. where the next four bytes of input consist of the
433 // same character repeated four times), produce a repeated data block of
434 // maximum possible length.
435 // 3. If there is at least 8 bytes of data remaining and it is not possible
436 // to produce a repeated character block, produce a plain data block.
437 // 4. If there are less than 8 bytes of data remaining and it is not possible
438 // to produce a repeated character block, produce a remainder block.
439 // 5. When producing a repeated character block, the count encoding must be
440 // chosen in such a way that the sort ordering is maintained. The choice is
441 // best illustrated by way of example:
442 //
443 // When comparing two strings, the first of which begins with of 8
444 // instances of the letter 'B' and the second with 10 instances of the
445 // letter 'B', which of the two should compare lower? The result depends
446 // on the 9th character of the first string, since it will be compared
447 // against the 9th 'B' in the second string. If that character is an 'A',
448 // then the first string will compare lower. If it is a 'C', then the
449 // first string will compare higher.
450 //
451 // The key insight is that the comparison value of a repeated character block
452 // depends on the value of the character that follows it. If the character
453 // follows the repeated character has a value greater than the repeated
454 // character itself, then a shorter run length should translate to a higher
455 // comparison value. Therefore, we encode its count using the low encoding.
456 // Similarly, if the following character is lower, we use the high encoding.
457
458 namespace {
459
460 // Appends an encoded run length to |output_str|.
WriteEncodedRunLength(uint32_t length,bool high_encoding,std::string * output_str)461 static void WriteEncodedRunLength(uint32_t length,
462 bool high_encoding,
463 std::string* output_str) {
464 CHECK_GE(length, 4U);
465 CHECK_LT(length, 0x80000000);
466
467 // Step 1: Invert the count, if necessary, to account for the following digit.
468 uint32_t encoded_length;
469 if (high_encoding) {
470 encoded_length = 0xffffffff - length;
471 } else {
472 encoded_length = length;
473 }
474
475 // Step 2: Write it as big-endian so it compares correctly with memcmp(3).
476 output_str->append(1, 0xff & (encoded_length >> 24U));
477 output_str->append(1, 0xff & (encoded_length >> 16U));
478 output_str->append(1, 0xff & (encoded_length >> 8U));
479 output_str->append(1, 0xff & (encoded_length >> 0U));
480 }
481
482 // Reads an encoded run length for |str| at position |i|.
ReadEncodedRunLength(const std::string & str,size_t i)483 static uint32_t ReadEncodedRunLength(const std::string& str, size_t i) {
484 DCHECK_LE(i + 4, str.length());
485
486 // Step 1: Extract the big-endian count.
487 uint32_t encoded_length =
488 ((uint8_t)(str[i + 3]) << 0) | ((uint8_t)(str[i + 2]) << 8) |
489 ((uint8_t)(str[i + 1]) << 16) | ((uint8_t)(str[i + 0]) << 24);
490
491 // Step 2: If this was an inverted count, un-invert it.
492 uint32_t length;
493 if (encoded_length & 0x80000000) {
494 length = 0xffffffff - encoded_length;
495 } else {
496 length = encoded_length;
497 }
498
499 return length;
500 }
501
502 // A series of four identical chars at the beginning of a block indicates
503 // the beginning of a repeated character block.
IsRepeatedCharPrefix(const std::string & chars,size_t start_index)504 static bool IsRepeatedCharPrefix(const std::string& chars, size_t start_index) {
505 return chars[start_index] == chars[start_index + 1] &&
506 chars[start_index] == chars[start_index + 2] &&
507 chars[start_index] == chars[start_index + 3];
508 }
509
510 } // namespace
511
512 // static
513 // Wraps the CompressImpl function with a bunch of DCHECKs.
Compress(const std::string & str)514 std::string UniquePosition::Compress(const std::string& str) {
515 DCHECK(IsValidBytes(str));
516 std::string compressed = CompressImpl(str);
517 DCHECK(IsValidCompressed(compressed));
518 DCHECK_EQ(str, Uncompress(compressed));
519 return compressed;
520 }
521
522 // static
523 // Performs the order preserving run length compression of a given input string.
CompressImpl(const std::string & str)524 std::string UniquePosition::CompressImpl(const std::string& str) {
525 std::string output;
526
527 // The compressed length will usually be at least as long as the suffix (28),
528 // since the suffix bytes are mostly random. Most are a few bytes longer; a
529 // small few are tens of bytes longer. Some early tests indicated that
530 // roughly 99% had length 40 or smaller. We guess that pre-sizing for 48 is a
531 // good trade-off, but that has not been confirmed through profiling.
532 output.reserve(48);
533
534 // Each loop iteration will consume 8, or N bytes, where N >= 4 and is the
535 // length of a string of identical digits starting at i.
536 for (size_t i = 0; i < str.length();) {
537 if (i + 4 <= str.length() && IsRepeatedCharPrefix(str, i)) {
538 // Four identical bytes in a row at this position means that we must start
539 // a repeated character block. Begin by outputting those four bytes.
540 output.append(str, i, 4);
541
542 // Determine the size of the run.
543 const char rep_digit = str[i];
544 const size_t runs_until = str.find_first_not_of(rep_digit, i + 4);
545
546 // Handle the 'runs until end' special case specially.
547 size_t run_length;
548 bool encode_high; // True if the next byte is greater than |rep_digit|.
549 if (runs_until == std::string::npos) {
550 run_length = str.length() - i;
551 encode_high = false;
552 } else {
553 run_length = runs_until - i;
554 encode_high = static_cast<uint8_t>(str[runs_until]) >
555 static_cast<uint8_t>(rep_digit);
556 }
557 DCHECK_LT(run_length,
558 static_cast<size_t>(std::numeric_limits<int32_t>::max()))
559 << "This implementation can't encode run-lengths greater than 2^31.";
560
561 WriteEncodedRunLength(run_length, encode_high, &output);
562 i += run_length; // Jump forward by the size of the run length.
563 } else {
564 // Output up to eight bytes without any encoding.
565 const size_t len = std::min(static_cast<size_t>(8), str.length() - i);
566 output.append(str, i, len);
567 i += len; // Jump forward by the amount of input consumed (usually 8).
568 }
569 }
570
571 return output;
572 }
573
574 // static
575 // Uncompresses strings that were compresed with UniquePosition::Compress.
Uncompress(const std::string & str)576 std::string UniquePosition::Uncompress(const std::string& str) {
577 std::string output;
578 size_t i = 0;
579 // Iterate through the compressed string one block at a time.
580 for (i = 0; i + 8 <= str.length(); i += 8) {
581 if (IsRepeatedCharPrefix(str, i)) {
582 // Found a repeated character block. Expand it.
583 const char rep_digit = str[i];
584 uint32_t length = ReadEncodedRunLength(str, i + 4);
585 output.append(length, rep_digit);
586 } else {
587 // Found a regular block. Copy it.
588 output.append(str, i, 8);
589 }
590 }
591 // Copy the remaining bytes that were too small to form a block.
592 output.append(str, i, std::string::npos);
593 return output;
594 }
595
IsValidCompressed(const std::string & str)596 bool UniquePosition::IsValidCompressed(const std::string& str) {
597 for (size_t i = 0; i + 8 <= str.length(); i += 8) {
598 if (IsRepeatedCharPrefix(str, i)) {
599 uint32_t count = ReadEncodedRunLength(str, i + 4);
600 if (count < 4) {
601 // A repeated character block should at least represent the four
602 // characters that started it.
603 return false;
604 }
605 if (str[i] == str[i + 4]) {
606 // Does the next digit after a count match the repeated character? Then
607 // this is not the highest possible count.
608 return false;
609 }
610 }
611 }
612 // We don't bother looking for the existence or checking the validity of
613 // any partial blocks. There's no way they could be invalid anyway.
614 return true;
615 }
616
EstimateMemoryUsage() const617 size_t UniquePosition::EstimateMemoryUsage() const {
618 using base::trace_event::EstimateMemoryUsage;
619 return EstimateMemoryUsage(compressed_);
620 }
621
622 } // namespace syncer
623