1 /* packed_data.c : implement the packed binary stream data structure
2 *
3 * ====================================================================
4 * Licensed to the Apache Software Foundation (ASF) under one
5 * or more contributor license agreements. See the NOTICE file
6 * distributed with this work for additional information
7 * regarding copyright ownership. The ASF licenses this file
8 * to you under the Apache License, Version 2.0 (the
9 * "License"); you may not use this file except in compliance
10 * with the License. You may obtain a copy of the License at
11 *
12 * http://www.apache.org/licenses/LICENSE-2.0
13 *
14 * Unless required by applicable law or agreed to in writing,
15 * software distributed under the License is distributed on an
16 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
17 * KIND, either express or implied. See the License for the
18 * specific language governing permissions and limitations
19 * under the License.
20 * ====================================================================
21 */
22
23 #include <apr_tables.h>
24
25 #include "svn_string.h"
26 #include "svn_sorts.h"
27 #include "private/svn_string_private.h"
28 #include "private/svn_subr_private.h"
29 #include "private/svn_delta_private.h"
30 #include "private/svn_packed_data.h"
31
32 #include "svn_private_config.h"
33
34
35
36 /* Private int stream data referenced by svn_packed__int_stream_t.
37 */
38 typedef struct packed_int_private_t
39 {
40 /* First sub-stream, if any. NULL otherwise. */
41 svn_packed__int_stream_t *first_substream;
42
43 /* Last sub-stream, if any. NULL otherwise. */
44 svn_packed__int_stream_t *last_substream;
45
46 /* Current sub-stream to read from / write to, if any. NULL otherwise.
47 This will be initialized to FIRST_SUBSTREAM and then advanced in a
48 round-robin scheme after each number being read. */
49 svn_packed__int_stream_t *current_substream;
50
51 /* Number of sub-streams. */
52 apr_size_t substream_count;
53
54 /* Next (sibling) integer stream. If this is the last one, points to
55 the first in the list (i.e. this forms a ring list). Never NULL. */
56 svn_packed__int_stream_t *next;
57
58 /* 7b/8b encoded integer values (previously diff'ed and sign-handled,
59 if indicated by the flags below). The contents are disjoint from
60 the unparsed number buffer. May be NULL while not written to. */
61 svn_stringbuf_t *packed;
62
63 /* Initialized to 0. Latest value written to / read from PACKED.
64 Undefined if DIFF is FALSE. */
65 apr_uint64_t last_value;
66
67 /* Deltify data before storing it in PACKED. */
68 svn_boolean_t diff;
69
70 /* Numbers are likely to contain negative values with small absolutes.
71 If TRUE, store the signed bit in LSB before encoding. */
72 svn_boolean_t is_signed;
73
74 /* Number of integers in this stream. */
75 apr_size_t item_count;
76
77 /* TRUE for the last stream in a list of siblings. */
78 svn_boolean_t is_last;
79
80 /* Pool to use for allocations. */
81 apr_pool_t *pool;
82 } packed_int_private_t;
83
84 /* A byte sequence stream. Please note that NEXT is defined different
85 * from the NEXT member in integer streams.
86 */
87 struct svn_packed__byte_stream_t
88 {
89 /* First sub-stream, if any. NULL otherwise. */
90 svn_packed__byte_stream_t *first_substream;
91
92 /* Last sub-stream, if any. NULL otherwise. */
93 svn_packed__byte_stream_t *last_substream;
94
95 /* Next (sibling) byte sequence stream, if any. NULL otherwise. */
96 svn_packed__byte_stream_t *next;
97
98 /* Stream to store the sequence lengths. */
99 svn_packed__int_stream_t *lengths_stream;
100
101 /* It's index (relative to its parent). */
102 apr_size_t lengths_stream_index;
103
104 /* Concatenated byte sequences. */
105 svn_stringbuf_t *packed;
106
107 /* Pool to use for allocations. */
108 apr_pool_t *pool;
109 };
110
111 /* The serialization root object. It references the top-level streams.
112 */
113 struct svn_packed__data_root_t
114 {
115 /* First top-level integer stream, if any. NULL otherwise. */
116 svn_packed__int_stream_t *first_int_stream;
117
118 /* Last top-level integer stream, if any. NULL otherwise. */
119 svn_packed__int_stream_t *last_int_stream;
120
121 /* Number of top-level integer streams. */
122 apr_size_t int_stream_count;
123
124 /* First top-level byte sequence stream, if any. NULL otherwise. */
125 svn_packed__byte_stream_t *first_byte_stream;
126
127 /* Last top-level byte sequence stream, if any. NULL otherwise. */
128 svn_packed__byte_stream_t *last_byte_stream;
129
130 /* Number of top-level byte sequence streams. */
131 apr_size_t byte_stream_count;
132
133 /* Pool to use for allocations. */
134 apr_pool_t *pool;
135 };
136
137 /* Write access. */
138
139 svn_packed__data_root_t *
svn_packed__data_create_root(apr_pool_t * pool)140 svn_packed__data_create_root(apr_pool_t *pool)
141 {
142 svn_packed__data_root_t *root = apr_pcalloc(pool, sizeof(*root));
143 root->pool = pool;
144
145 return root;
146 }
147
148 svn_packed__int_stream_t *
svn_packed__create_int_stream(svn_packed__data_root_t * root,svn_boolean_t diff,svn_boolean_t signed_ints)149 svn_packed__create_int_stream(svn_packed__data_root_t *root,
150 svn_boolean_t diff,
151 svn_boolean_t signed_ints)
152 {
153 /* allocate and initialize the stream node */
154 packed_int_private_t *private_data
155 = apr_pcalloc(root->pool, sizeof(*private_data));
156 svn_packed__int_stream_t *stream
157 = apr_palloc(root->pool, sizeof(*stream));
158
159 private_data->diff = diff;
160 private_data->is_signed = signed_ints;
161 private_data->is_last = TRUE;
162 private_data->pool = root->pool;
163
164 stream->buffer_used = 0;
165 stream->private_data = private_data;
166
167 /* maintain the ring list */
168 if (root->last_int_stream)
169 {
170 packed_int_private_t *previous_private_data
171 = root->last_int_stream->private_data;
172 previous_private_data->next = stream;
173 previous_private_data->is_last = FALSE;
174 }
175 else
176 {
177 root->first_int_stream = stream;
178 }
179
180 root->last_int_stream = stream;
181 root->int_stream_count++;
182
183 return stream;
184 }
185
186 svn_packed__int_stream_t *
svn_packed__create_int_substream(svn_packed__int_stream_t * parent,svn_boolean_t diff,svn_boolean_t signed_ints)187 svn_packed__create_int_substream(svn_packed__int_stream_t *parent,
188 svn_boolean_t diff,
189 svn_boolean_t signed_ints)
190 {
191 packed_int_private_t *parent_private = parent->private_data;
192
193 /* allocate and initialize the stream node */
194 packed_int_private_t *private_data
195 = apr_pcalloc(parent_private->pool, sizeof(*private_data));
196 svn_packed__int_stream_t *stream
197 = apr_palloc(parent_private->pool, sizeof(*stream));
198
199 private_data->diff = diff;
200 private_data->is_signed = signed_ints;
201 private_data->is_last = TRUE;
202 private_data->pool = parent_private->pool;
203
204 stream->buffer_used = 0;
205 stream->private_data = private_data;
206
207 /* maintain the ring list */
208 if (parent_private->last_substream)
209 {
210 packed_int_private_t *previous_private_data
211 = parent_private->last_substream->private_data;
212 previous_private_data->next = stream;
213 previous_private_data->is_last = FALSE;
214 }
215 else
216 {
217 parent_private->first_substream = stream;
218 parent_private->current_substream = stream;
219 }
220
221 parent_private->last_substream = stream;
222 parent_private->substream_count++;
223 private_data->next = parent_private->first_substream;
224
225 return stream;
226 }
227
228 /* Returns a new top-level byte sequence stream for ROOT but does not
229 * initialize the LENGTH_STREAM member.
230 */
231 static svn_packed__byte_stream_t *
create_bytes_stream_body(svn_packed__data_root_t * root)232 create_bytes_stream_body(svn_packed__data_root_t *root)
233 {
234 svn_packed__byte_stream_t *stream
235 = apr_pcalloc(root->pool, sizeof(*stream));
236
237 stream->packed = svn_stringbuf_create_empty(root->pool);
238
239 if (root->last_byte_stream)
240 root->last_byte_stream->next = stream;
241 else
242 root->first_byte_stream = stream;
243
244 root->last_byte_stream = stream;
245 root->byte_stream_count++;
246
247 return stream;
248 }
249
250 svn_packed__byte_stream_t *
svn_packed__create_bytes_stream(svn_packed__data_root_t * root)251 svn_packed__create_bytes_stream(svn_packed__data_root_t *root)
252 {
253 svn_packed__byte_stream_t *stream
254 = create_bytes_stream_body(root);
255
256 stream->lengths_stream_index = root->int_stream_count;
257 stream->lengths_stream = svn_packed__create_int_stream(root, FALSE, FALSE);
258
259 return stream;
260 }
261
262 /* Write the 7b/8b representation of VALUE into BUFFER. BUFFER must
263 * provide at least 10 bytes space.
264 * Returns the first position behind the written data.
265 */
266 static unsigned char *
write_packed_uint_body(unsigned char * buffer,apr_uint64_t value)267 write_packed_uint_body(unsigned char *buffer, apr_uint64_t value)
268 {
269 while (value >= 0x80)
270 {
271 *(buffer++) = (unsigned char)((value % 0x80) + 0x80);
272 value /= 0x80;
273 }
274
275 *(buffer++) = (unsigned char)value;
276 return buffer;
277 }
278
279 /* Return remapped VALUE.
280 *
281 * Due to sign conversion and diff underflow, values close to UINT64_MAX
282 * are almost as frequent as those close to 0. Remap them such that the
283 * MSB is stored in the LSB and the remainder stores the absolute distance
284 * to 0.
285 *
286 * This minimizes the absolute value to store in many scenarios.
287 * Hence, the variable-length representation on disk is shorter, too.
288 */
289 static apr_uint64_t
remap_uint(apr_uint64_t value)290 remap_uint(apr_uint64_t value)
291 {
292 return value & APR_UINT64_C(0x8000000000000000)
293 ? APR_UINT64_MAX - (2 * value)
294 : 2 * value;
295 }
296
297 /* Invert remap_uint. */
298 static apr_uint64_t
unmap_uint(apr_uint64_t value)299 unmap_uint(apr_uint64_t value)
300 {
301 return value & 1
302 ? (APR_UINT64_MAX - value / 2)
303 : value / 2;
304 }
305
306 /* Empty the unprocessed integer buffer in STREAM by either pushing the
307 * data to the sub-streams or writing to the packed data (in case there
308 * are no sub-streams).
309 */
310 static void
data_flush_buffer(svn_packed__int_stream_t * stream)311 data_flush_buffer(svn_packed__int_stream_t *stream)
312 {
313 packed_int_private_t *private_data = stream->private_data;
314 apr_size_t i;
315
316 /* if we have sub-streams, push the data down to them */
317 if (private_data->current_substream)
318 for (i = 0; i < stream->buffer_used; ++i)
319 {
320 packed_int_private_t *current_private_data
321 = private_data->current_substream->private_data;
322
323 svn_packed__add_uint(private_data->current_substream,
324 stream->buffer[i]);
325 private_data->current_substream = current_private_data->next;
326 }
327 else
328 {
329 /* pack the numbers into our local PACKED buffer */
330
331 /* temporary buffer, max 10 bytes required per 7b/8b encoded number */
332 unsigned char local_buffer[10 * SVN__PACKED_DATA_BUFFER_SIZE];
333 unsigned char *p = local_buffer;
334
335 /* if configured, deltify numbers before packing them.
336 Since delta may be negative, always use the 'signed' encoding. */
337 if (private_data->diff)
338 {
339 apr_uint64_t last_value = private_data->last_value;
340 for (i = 0; i < stream->buffer_used; ++i)
341 {
342 apr_uint64_t temp = stream->buffer[i];
343 stream->buffer[i] = remap_uint(temp - last_value);
344 last_value = temp;
345 }
346
347 private_data->last_value = last_value;
348 }
349
350 /* if configured and not already done by the deltification above,
351 transform to 'signed' encoding. Store the sign in the LSB and
352 the absolute value (-1 for negative values) in the remaining
353 63 bits. */
354 if (!private_data->diff && private_data->is_signed)
355 for (i = 0; i < stream->buffer_used; ++i)
356 stream->buffer[i] = remap_uint(stream->buffer[i]);
357
358 /* auto-create packed data buffer. Give it some reasonable initial
359 size - just enough for a few tens of values. */
360 if (private_data->packed == NULL)
361 private_data->packed
362 = svn_stringbuf_create_ensure(256, private_data->pool);
363
364 /* encode numbers into our temp buffer. */
365 for (i = 0; i < stream->buffer_used; ++i)
366 p = write_packed_uint_body(p, stream->buffer[i]);
367
368 /* append them to the final packed data */
369 svn_stringbuf_appendbytes(private_data->packed,
370 (char *)local_buffer,
371 p - local_buffer);
372 }
373
374 /* maintain counters */
375 private_data->item_count += stream->buffer_used;
376 stream->buffer_used = 0;
377 }
378
379 void
svn_packed__add_uint(svn_packed__int_stream_t * stream,apr_uint64_t value)380 svn_packed__add_uint(svn_packed__int_stream_t *stream,
381 apr_uint64_t value)
382 {
383 stream->buffer[stream->buffer_used] = value;
384 if (++stream->buffer_used == SVN__PACKED_DATA_BUFFER_SIZE)
385 data_flush_buffer(stream);
386 }
387
388 void
svn_packed__add_int(svn_packed__int_stream_t * stream,apr_int64_t value)389 svn_packed__add_int(svn_packed__int_stream_t *stream,
390 apr_int64_t value)
391 {
392 svn_packed__add_uint(stream, (apr_uint64_t)value);
393 }
394
395 void
svn_packed__add_bytes(svn_packed__byte_stream_t * stream,const char * data,apr_size_t len)396 svn_packed__add_bytes(svn_packed__byte_stream_t *stream,
397 const char *data,
398 apr_size_t len)
399 {
400 svn_packed__add_uint(stream->lengths_stream, len);
401 svn_stringbuf_appendbytes(stream->packed, data, len);
402 }
403
404 /* Append the 7b/8b encoded representation of VALUE to PACKED.
405 */
406 static void
write_packed_uint(svn_stringbuf_t * packed,apr_uint64_t value)407 write_packed_uint(svn_stringbuf_t* packed, apr_uint64_t value)
408 {
409 if (value < 0x80)
410 {
411 svn_stringbuf_appendbyte(packed, (char)value);
412 }
413 else
414 {
415 unsigned char buffer[10];
416 unsigned char *p = write_packed_uint_body(buffer, value);
417
418 svn_stringbuf_appendbytes(packed, (char *)buffer, p - buffer);
419 }
420 }
421
422 /* Recursively write the structure (config parameters, sub-streams, data
423 * sizes) of the STREAM and all its siblings to the TREE_STRUCT buffer.
424 */
425 static void
write_int_stream_structure(svn_stringbuf_t * tree_struct,svn_packed__int_stream_t * stream)426 write_int_stream_structure(svn_stringbuf_t* tree_struct,
427 svn_packed__int_stream_t* stream)
428 {
429 while (stream)
430 {
431 /* store config parameters and number of sub-streams in 1 number */
432 packed_int_private_t *private_data = stream->private_data;
433 write_packed_uint(tree_struct, (private_data->substream_count << 2)
434 + (private_data->diff ? 1 : 0)
435 + (private_data->is_signed ? 2 : 0));
436
437 /* store item count and length their of packed representation */
438 data_flush_buffer(stream);
439
440 write_packed_uint(tree_struct, private_data->item_count);
441 write_packed_uint(tree_struct, private_data->packed
442 ? private_data->packed->len
443 : 0);
444
445 /* append all sub-stream structures */
446 write_int_stream_structure(tree_struct, private_data->first_substream);
447
448 /* continue with next sibling */
449 stream = private_data->is_last ? NULL : private_data->next;
450 }
451 }
452
453 /* Recursively write the structure (sub-streams, data sizes) of the STREAM
454 * and all its siblings to the TREE_STRUCT buffer.
455 */
456 static void
write_byte_stream_structure(svn_stringbuf_t * tree_struct,svn_packed__byte_stream_t * stream)457 write_byte_stream_structure(svn_stringbuf_t* tree_struct,
458 svn_packed__byte_stream_t* stream)
459 {
460 /* for this and all siblings */
461 for (; stream; stream = stream->next)
462 {
463 /* this stream's structure and size */
464 write_packed_uint(tree_struct, 0);
465 write_packed_uint(tree_struct, stream->lengths_stream_index);
466 write_packed_uint(tree_struct, stream->packed->len);
467
468 /* followed by all its sub-streams */
469 write_byte_stream_structure(tree_struct, stream->first_substream);
470 }
471 }
472
473 /* Write the 7b/8b encoded representation of VALUE to STREAM.
474 */
475 static svn_error_t *
write_stream_uint(svn_stream_t * stream,apr_uint64_t value)476 write_stream_uint(svn_stream_t *stream,
477 apr_uint64_t value)
478 {
479 unsigned char buffer[10];
480 apr_size_t count = write_packed_uint_body(buffer, value) - buffer;
481
482 SVN_ERR(svn_stream_write(stream, (char *)buffer, &count));
483
484 return SVN_NO_ERROR;
485 }
486
487 /* Return the total size of all packed data in STREAM, its siblings and
488 * all sub-streams. To get an accurate value, flush all buffers prior to
489 * calling this function.
490 */
491 static apr_size_t
packed_int_stream_length(svn_packed__int_stream_t * stream)492 packed_int_stream_length(svn_packed__int_stream_t *stream)
493 {
494 packed_int_private_t *private_data = stream->private_data;
495 apr_size_t result = private_data->packed ? private_data->packed->len : 0;
496
497 stream = private_data->first_substream;
498 while (stream)
499 {
500 private_data = stream->private_data;
501 result += packed_int_stream_length(stream);
502 stream = private_data->is_last ? NULL : private_data->next;
503 }
504
505 return result;
506 }
507
508 /* Return the total size of all byte sequences data in STREAM, its siblings
509 * and all sub-streams.
510 */
511 static apr_size_t
packed_byte_stream_length(svn_packed__byte_stream_t * stream)512 packed_byte_stream_length(svn_packed__byte_stream_t *stream)
513 {
514 apr_size_t result = stream->packed->len;
515
516 for (stream = stream->first_substream; stream; stream = stream->next)
517 result += packed_byte_stream_length(stream);
518
519 return result;
520 }
521
522 /* Append all packed data in STREAM, its siblings and all sub-streams to
523 * COMBINED.
524 */
525 static void
append_int_stream(svn_packed__int_stream_t * stream,svn_stringbuf_t * combined)526 append_int_stream(svn_packed__int_stream_t *stream,
527 svn_stringbuf_t *combined)
528 {
529 packed_int_private_t *private_data = stream->private_data;
530 if (private_data->packed)
531 svn_stringbuf_appendstr(combined, private_data->packed);
532
533 stream = private_data->first_substream;
534 while (stream)
535 {
536 private_data = stream->private_data;
537 append_int_stream(stream, combined);
538 stream = private_data->is_last ? NULL : private_data->next;
539 }
540 }
541
542 /* Append all byte sequences in STREAM, its siblings and all sub-streams
543 * to COMBINED.
544 */
545 static void
append_byte_stream(svn_packed__byte_stream_t * stream,svn_stringbuf_t * combined)546 append_byte_stream(svn_packed__byte_stream_t *stream,
547 svn_stringbuf_t *combined)
548 {
549 svn_stringbuf_appendstr(combined, stream->packed);
550
551 for (stream = stream->first_substream; stream; stream = stream->next)
552 append_byte_stream(stream, combined);
553 }
554
555 /* Take the binary data in UNCOMPRESSED, zip it into COMPRESSED and write
556 * it to STREAM. COMPRESSED simply acts as a re-usable memory buffer.
557 * Clear all buffers (COMPRESSED, UNCOMPRESSED) at the end of the function.
558 */
559 static svn_error_t *
write_stream_data(svn_stream_t * stream,svn_stringbuf_t * uncompressed,svn_stringbuf_t * compressed)560 write_stream_data(svn_stream_t *stream,
561 svn_stringbuf_t *uncompressed,
562 svn_stringbuf_t *compressed)
563 {
564 SVN_ERR(svn__compress_zlib(uncompressed->data, uncompressed->len,
565 compressed,
566 SVN_DELTA_COMPRESSION_LEVEL_DEFAULT));
567
568 SVN_ERR(write_stream_uint(stream, compressed->len));
569 SVN_ERR(svn_stream_write(stream, compressed->data, &compressed->len));
570
571 svn_stringbuf_setempty(uncompressed);
572 svn_stringbuf_setempty(compressed);
573
574 return SVN_NO_ERROR;
575 }
576
577 svn_error_t *
svn_packed__data_write(svn_stream_t * stream,svn_packed__data_root_t * root,apr_pool_t * scratch_pool)578 svn_packed__data_write(svn_stream_t *stream,
579 svn_packed__data_root_t *root,
580 apr_pool_t *scratch_pool)
581 {
582 svn_packed__int_stream_t *int_stream;
583 svn_packed__byte_stream_t *byte_stream;
584
585 /* re-usable data buffers */
586 svn_stringbuf_t *compressed
587 = svn_stringbuf_create_ensure(1024, scratch_pool);
588 svn_stringbuf_t *uncompressed
589 = svn_stringbuf_create_ensure(1024, scratch_pool);
590
591 /* write tree structure */
592 svn_stringbuf_t *tree_struct
593 = svn_stringbuf_create_ensure(127, scratch_pool);
594
595 write_packed_uint(tree_struct, root->int_stream_count);
596 write_int_stream_structure(tree_struct, root->first_int_stream);
597
598 write_packed_uint(tree_struct, root->byte_stream_count);
599 write_byte_stream_structure(tree_struct, root->first_byte_stream);
600
601 SVN_ERR(write_stream_uint(stream, tree_struct->len));
602 SVN_ERR(svn_stream_write(stream, tree_struct->data, &tree_struct->len));
603
604 /* flatten sub-streams, zip them and write them to disk */
605
606 for (int_stream = root->first_int_stream;
607 int_stream;
608 int_stream = ((packed_int_private_t*)int_stream->private_data)->next)
609 {
610 apr_size_t len = packed_int_stream_length(int_stream);
611 svn_stringbuf_ensure(uncompressed, len);
612
613 append_int_stream(int_stream, uncompressed);
614 SVN_ERR(write_stream_data(stream, uncompressed, compressed));
615 }
616
617 for (byte_stream = root->first_byte_stream;
618 byte_stream;
619 byte_stream = byte_stream->next)
620 {
621 apr_size_t len = packed_byte_stream_length(byte_stream);
622 svn_stringbuf_ensure(uncompressed, len);
623
624 append_byte_stream(byte_stream, uncompressed);
625 SVN_ERR(write_stream_data(stream, uncompressed, compressed));
626 }
627
628 return SVN_NO_ERROR;
629 }
630
631
632 /* Read access. */
633
634 svn_packed__int_stream_t *
svn_packed__first_int_stream(svn_packed__data_root_t * root)635 svn_packed__first_int_stream(svn_packed__data_root_t *root)
636 {
637 return root->first_int_stream;
638 }
639
640 svn_packed__byte_stream_t *
svn_packed__first_byte_stream(svn_packed__data_root_t * root)641 svn_packed__first_byte_stream(svn_packed__data_root_t *root)
642 {
643 return root->first_byte_stream;
644 }
645
646 svn_packed__int_stream_t *
svn_packed__next_int_stream(svn_packed__int_stream_t * stream)647 svn_packed__next_int_stream(svn_packed__int_stream_t *stream)
648 {
649 packed_int_private_t *private_data = stream->private_data;
650 return private_data->is_last ? NULL : private_data->next;
651 }
652
653 svn_packed__byte_stream_t *
svn_packed__next_byte_stream(svn_packed__byte_stream_t * stream)654 svn_packed__next_byte_stream(svn_packed__byte_stream_t *stream)
655 {
656 return stream->next;
657 }
658
659 svn_packed__int_stream_t *
svn_packed__first_int_substream(svn_packed__int_stream_t * stream)660 svn_packed__first_int_substream(svn_packed__int_stream_t *stream)
661 {
662 packed_int_private_t *private_data = stream->private_data;
663 return private_data->first_substream;
664 }
665
666 apr_size_t
svn_packed__int_count(svn_packed__int_stream_t * stream)667 svn_packed__int_count(svn_packed__int_stream_t *stream)
668 {
669 packed_int_private_t *private_data = stream->private_data;
670 return private_data->item_count + stream->buffer_used;
671 }
672
673 apr_size_t
svn_packed__byte_count(svn_packed__byte_stream_t * stream)674 svn_packed__byte_count(svn_packed__byte_stream_t *stream)
675 {
676 return stream->packed->len;
677 }
678
679 apr_size_t
svn_packed__byte_block_count(svn_packed__byte_stream_t * stream)680 svn_packed__byte_block_count(svn_packed__byte_stream_t *stream)
681 {
682 return svn_packed__int_count(stream->lengths_stream);
683 }
684
685 /* Read one 7b/8b encoded value from *P and return it in *RESULT. Returns
686 * the first position after the parsed data.
687 *
688 * Overflows will be detected in the sense that it will end parsing the
689 * input but the result is undefined.
690 */
691 static unsigned char *
read_packed_uint_body(unsigned char * p,apr_uint64_t * result)692 read_packed_uint_body(unsigned char *p, apr_uint64_t *result)
693 {
694 if (*p < 0x80)
695 {
696 *result = *p;
697 }
698 else
699 {
700 apr_uint64_t shift = 0;
701 apr_uint64_t value = 0;
702 while (*p >= 0x80)
703 {
704 value += (apr_uint64_t)(*p & 0x7f) << shift;
705 ++p;
706
707 shift += 7;
708 if (shift > 64)
709 {
710 /* a definite overflow. Note, that numbers of 65 .. 70
711 bits will not be detected as an overflow as they don't
712 threaten to exceed the input buffer. */
713 *result = 0;
714 return p;
715 }
716 }
717
718 *result = value + ((apr_uint64_t)*p << shift);
719 }
720
721 return ++p;
722 }
723
724 /* Read one 7b/8b encoded value from STREAM and return it in *RESULT.
725 *
726 * Overflows will be detected in the sense that it will end parsing the
727 * input but the result is undefined.
728 */
729 static svn_error_t *
read_stream_uint(svn_stream_t * stream,apr_uint64_t * result)730 read_stream_uint(svn_stream_t *stream, apr_uint64_t *result)
731 {
732 apr_uint64_t shift = 0;
733 apr_uint64_t value = 0;
734 unsigned char c;
735
736 do
737 {
738 apr_size_t len = 1;
739 SVN_ERR(svn_stream_read_full(stream, (char *)&c, &len));
740 if (len != 1)
741 return svn_error_create(SVN_ERR_CORRUPT_PACKED_DATA, NULL,
742 _("Unexpected end of stream"));
743
744 value += (apr_uint64_t)(c & 0x7f) << shift;
745 shift += 7;
746 if (shift > 64)
747 return svn_error_create(SVN_ERR_CORRUPT_PACKED_DATA, NULL,
748 _("Integer representation too long"));
749 }
750 while (c >= 0x80);
751
752 *result = value;
753 return SVN_NO_ERROR;
754 }
755
756 /* Extract and return the next integer from PACKED and make PACKED point
757 * to the next integer.
758 */
759 static apr_uint64_t
read_packed_uint(svn_stringbuf_t * packed)760 read_packed_uint(svn_stringbuf_t *packed)
761 {
762 apr_uint64_t result = 0;
763 unsigned char *p = (unsigned char *)packed->data;
764 apr_size_t read = read_packed_uint_body(p, &result) - p;
765
766 if (read > packed->len)
767 read = packed->len;
768
769 packed->data += read;
770 packed->blocksize -= read;
771 packed->len -= read;
772
773 return result;
774 }
775
776 /* Ensure that STREAM contains at least one item in its buffer.
777 */
778 static void
svn_packed__data_fill_buffer(svn_packed__int_stream_t * stream)779 svn_packed__data_fill_buffer(svn_packed__int_stream_t *stream)
780 {
781 packed_int_private_t *private_data = stream->private_data;
782 apr_size_t i;
783 apr_size_t end = MIN(SVN__PACKED_DATA_BUFFER_SIZE,
784 private_data->item_count);
785
786 /* in case, some user calls us explicitly without a good reason ... */
787 if (stream->buffer_used)
788 return;
789
790 /* can we get data from the sub-streams or do we have to decode it from
791 our local packed container? */
792 if (private_data->current_substream)
793 for (i = end; i > 0; --i)
794 {
795 packed_int_private_t *current_private_data
796 = private_data->current_substream->private_data;
797 stream->buffer[i-1]
798 = svn_packed__get_uint(private_data->current_substream);
799 private_data->current_substream = current_private_data->next;
800 }
801 else
802 {
803 /* use this local buffer only if the packed data is shorter than this.
804 The goal is that read_packed_uint_body doesn't need check for
805 overflows. */
806 unsigned char local_buffer[10 * SVN__PACKED_DATA_BUFFER_SIZE];
807 unsigned char *p;
808 unsigned char *start;
809 apr_size_t packed_read;
810
811 if (private_data->packed->len < sizeof(local_buffer))
812 {
813 apr_size_t trail = sizeof(local_buffer) - private_data->packed->len;
814 memcpy(local_buffer,
815 private_data->packed->data,
816 private_data->packed->len);
817 memset(local_buffer + private_data->packed->len, 0, MIN(trail, end));
818
819 p = local_buffer;
820 }
821 else
822 p = (unsigned char *)private_data->packed->data;
823
824 /* unpack numbers */
825 start = p;
826 for (i = end; i > 0; --i)
827 p = read_packed_uint_body(p, &stream->buffer[i-1]);
828
829 /* adjust remaining packed data buffer */
830 packed_read = p - start;
831 private_data->packed->data += packed_read;
832 private_data->packed->len -= packed_read;
833 private_data->packed->blocksize -= packed_read;
834
835 /* undeltify numbers, if configured */
836 if (private_data->diff)
837 {
838 apr_uint64_t last_value = private_data->last_value;
839 for (i = end; i > 0; --i)
840 {
841 last_value += unmap_uint(stream->buffer[i-1]);
842 stream->buffer[i-1] = last_value;
843 }
844
845 private_data->last_value = last_value;
846 }
847
848 /* handle signed values, if configured and not handled already */
849 if (!private_data->diff && private_data->is_signed)
850 for (i = 0; i < end; ++i)
851 stream->buffer[i] = unmap_uint(stream->buffer[i]);
852 }
853
854 stream->buffer_used = end;
855 private_data->item_count -= end;
856 }
857
858 apr_uint64_t
svn_packed__get_uint(svn_packed__int_stream_t * stream)859 svn_packed__get_uint(svn_packed__int_stream_t *stream)
860 {
861 if (stream->buffer_used == 0)
862 svn_packed__data_fill_buffer(stream);
863
864 return stream->buffer_used ? stream->buffer[--stream->buffer_used] : 0;
865 }
866
867 apr_int64_t
svn_packed__get_int(svn_packed__int_stream_t * stream)868 svn_packed__get_int(svn_packed__int_stream_t *stream)
869 {
870 return (apr_int64_t)svn_packed__get_uint(stream);
871 }
872
873 const char *
svn_packed__get_bytes(svn_packed__byte_stream_t * stream,apr_size_t * len)874 svn_packed__get_bytes(svn_packed__byte_stream_t *stream,
875 apr_size_t *len)
876 {
877 const char *result = stream->packed->data;
878 apr_size_t count = (apr_size_t)svn_packed__get_uint(stream->lengths_stream);
879
880 if (count > stream->packed->len)
881 count = stream->packed->len;
882
883 /* advance packed buffer */
884 stream->packed->data += count;
885 stream->packed->len -= count;
886 stream->packed->blocksize -= count;
887
888 *len = count;
889 return result;
890 }
891
892 /* Read the integer stream structure and recreate it in STREAM, including
893 * sub-streams, from TREE_STRUCT.
894 */
895 static void
read_int_stream_structure(svn_stringbuf_t * tree_struct,svn_packed__int_stream_t * stream)896 read_int_stream_structure(svn_stringbuf_t *tree_struct,
897 svn_packed__int_stream_t *stream)
898 {
899 packed_int_private_t *private_data = stream->private_data;
900 apr_uint64_t value = read_packed_uint(tree_struct);
901 apr_size_t substream_count;
902 apr_size_t i;
903
904 /* extract local parameters */
905 private_data->diff = (value & 1) != 0;
906 private_data->is_signed = (value & 2) != 0;
907 substream_count = (apr_size_t)(value >> 2);
908
909 /* read item count & packed size; allocate packed data buffer */
910 private_data->item_count = (apr_size_t)read_packed_uint(tree_struct);
911 value = read_packed_uint(tree_struct);
912 if (value)
913 {
914 private_data->packed = svn_stringbuf_create_ensure((apr_size_t)value,
915 private_data->pool);
916 private_data->packed->len = (apr_size_t)value;
917 }
918
919 /* add sub-streams and read their config, too */
920 for (i = 0; i < substream_count; ++i)
921 read_int_stream_structure(tree_struct,
922 svn_packed__create_int_substream(stream,
923 FALSE,
924 FALSE));
925 }
926
927 /* Read the integer stream structure and recreate it in STREAM, including
928 * sub-streams, from TREE_STRUCT. FIRST_INT_STREAM is the integer stream
929 * that would correspond to lengths_stream_index 0.
930 */
931 static void
read_byte_stream_structure(svn_stringbuf_t * tree_struct,svn_packed__byte_stream_t * stream,svn_packed__int_stream_t * first_int_stream)932 read_byte_stream_structure(svn_stringbuf_t *tree_struct,
933 svn_packed__byte_stream_t *stream,
934 svn_packed__int_stream_t *first_int_stream)
935 {
936 apr_size_t lengths_stream_index;
937 apr_size_t packed_size;
938 apr_size_t i;
939
940 /* read parameters from the TREE_STRUCT buffer */
941 (void) (apr_size_t)read_packed_uint(tree_struct); /* discard first uint */
942 lengths_stream_index = (apr_size_t)read_packed_uint(tree_struct);
943 packed_size = (apr_size_t)read_packed_uint(tree_struct);
944
945 /* allocate byte sequence buffer size */
946 svn_stringbuf_ensure(stream->packed, packed_size);
947 stream->packed->len = packed_size;
948
949 /* navigate to the (already existing) lengths_stream */
950 stream->lengths_stream_index = lengths_stream_index;
951 stream->lengths_stream = first_int_stream;
952 for (i = 0; i < lengths_stream_index; ++i)
953 {
954 packed_int_private_t *length_private
955 = stream->lengths_stream->private_data;
956 stream->lengths_stream = length_private->next;
957 }
958 }
959
960 /* Read a compressed block from STREAM and uncompress it into UNCOMPRESSED.
961 * UNCOMPRESSED_LEN is the expected size of the stream. COMPRESSED is a
962 * re-used buffer for temporary data.
963 */
964 static svn_error_t *
read_stream_data(svn_stream_t * stream,apr_size_t uncompressed_len,svn_stringbuf_t * uncompressed,svn_stringbuf_t * compressed)965 read_stream_data(svn_stream_t *stream,
966 apr_size_t uncompressed_len,
967 svn_stringbuf_t *uncompressed,
968 svn_stringbuf_t *compressed)
969 {
970 apr_uint64_t len;
971 apr_size_t compressed_len;
972
973 SVN_ERR(read_stream_uint(stream, &len));
974 compressed_len = (apr_size_t)len;
975
976 svn_stringbuf_ensure(compressed, compressed_len);
977 compressed->len = compressed_len;
978 SVN_ERR(svn_stream_read_full(stream, compressed->data, &compressed->len));
979 compressed->data[compressed_len] = '\0';
980
981 SVN_ERR(svn__decompress_zlib(compressed->data, compressed->len,
982 uncompressed, uncompressed_len));
983
984 return SVN_NO_ERROR;
985 }
986
987 /* Read the packed contents from COMBINED, starting at *OFFSET and store
988 * it in STREAM. Update *OFFSET to point to the next stream's data and
989 * continue with the sub-streams.
990 */
991 static void
unflatten_int_stream(svn_packed__int_stream_t * stream,svn_stringbuf_t * combined,apr_size_t * offset)992 unflatten_int_stream(svn_packed__int_stream_t *stream,
993 svn_stringbuf_t *combined,
994 apr_size_t *offset)
995 {
996 packed_int_private_t *private_data = stream->private_data;
997 if (private_data->packed)
998 {
999 memcpy(private_data->packed->data,
1000 combined->data + *offset,
1001 private_data->packed->len);
1002
1003 private_data->packed->data[private_data->packed->len] = '\0';
1004 *offset += private_data->packed->len;
1005 }
1006
1007 stream = private_data->first_substream;
1008 while (stream)
1009 {
1010 private_data = stream->private_data;
1011 unflatten_int_stream(stream, combined, offset);
1012 stream = private_data->is_last ? NULL : private_data->next;
1013 }
1014 }
1015
1016 /* Read the packed contents from COMBINED, starting at *OFFSET and store
1017 * it in STREAM. Update *OFFSET to point to the next stream's data and
1018 * continue with the sub-streams.
1019 */
1020 static void
unflatten_byte_stream(svn_packed__byte_stream_t * stream,svn_stringbuf_t * combined,apr_size_t * offset)1021 unflatten_byte_stream(svn_packed__byte_stream_t *stream,
1022 svn_stringbuf_t *combined,
1023 apr_size_t *offset)
1024 {
1025 memcpy(stream->packed->data,
1026 combined->data + *offset,
1027 stream->packed->len);
1028 stream->packed->data[stream->packed->len] = '\0';
1029
1030 *offset += stream->packed->len;
1031 for (stream = stream->first_substream; stream; stream = stream->next)
1032 unflatten_byte_stream(stream, combined, offset);
1033 }
1034
1035 svn_error_t *
svn_packed__data_read(svn_packed__data_root_t ** root_p,svn_stream_t * stream,apr_pool_t * result_pool,apr_pool_t * scratch_pool)1036 svn_packed__data_read(svn_packed__data_root_t **root_p,
1037 svn_stream_t *stream,
1038 apr_pool_t *result_pool,
1039 apr_pool_t *scratch_pool)
1040 {
1041 apr_uint64_t i;
1042 apr_uint64_t count;
1043
1044 svn_packed__int_stream_t *int_stream;
1045 svn_packed__byte_stream_t *byte_stream;
1046 svn_packed__data_root_t *root = svn_packed__data_create_root(result_pool);
1047
1048 svn_stringbuf_t *compressed
1049 = svn_stringbuf_create_ensure(1024, scratch_pool);
1050 svn_stringbuf_t *uncompressed
1051 = svn_stringbuf_create_ensure(1024, scratch_pool);
1052
1053 /* read tree structure */
1054
1055 apr_uint64_t tree_struct_size;
1056 svn_stringbuf_t *tree_struct;
1057
1058 SVN_ERR(read_stream_uint(stream, &tree_struct_size));
1059 tree_struct
1060 = svn_stringbuf_create_ensure((apr_size_t)tree_struct_size, scratch_pool);
1061 tree_struct->len = (apr_size_t)tree_struct_size;
1062
1063 SVN_ERR(svn_stream_read_full(stream, tree_struct->data, &tree_struct->len));
1064 tree_struct->data[tree_struct->len] = '\0';
1065
1066 /* reconstruct tree structure */
1067
1068 count = read_packed_uint(tree_struct);
1069 for (i = 0; i < count; ++i)
1070 read_int_stream_structure(tree_struct,
1071 svn_packed__create_int_stream(root, FALSE,
1072 FALSE));
1073
1074 count = read_packed_uint(tree_struct);
1075 for (i = 0; i < count; ++i)
1076 read_byte_stream_structure(tree_struct,
1077 create_bytes_stream_body(root),
1078 root->first_int_stream);
1079
1080 /* read sub-stream data from disk, unzip it and buffer it */
1081
1082 for (int_stream = root->first_int_stream;
1083 int_stream;
1084 int_stream = ((packed_int_private_t*)int_stream->private_data)->next)
1085 {
1086 apr_size_t offset = 0;
1087 SVN_ERR(read_stream_data(stream,
1088 packed_int_stream_length(int_stream),
1089 uncompressed, compressed));
1090 unflatten_int_stream(int_stream, uncompressed, &offset);
1091 }
1092
1093 for (byte_stream = root->first_byte_stream;
1094 byte_stream;
1095 byte_stream = byte_stream->next)
1096 {
1097 apr_size_t offset = 0;
1098 SVN_ERR(read_stream_data(stream,
1099 packed_byte_stream_length(byte_stream),
1100 uncompressed, compressed));
1101 unflatten_byte_stream(byte_stream, uncompressed, &offset);
1102 }
1103
1104 *root_p = root;
1105 return SVN_NO_ERROR;
1106 }
1107