1 /* reps.c --- FSX representation container
2 *
3 * ====================================================================
4 * Licensed to the Apache Software Foundation (ASF) under one
5 * or more contributor license agreements. See the NOTICE file
6 * distributed with this work for additional information
7 * regarding copyright ownership. The ASF licenses this file
8 * to you under the Apache License, Version 2.0 (the
9 * "License"); you may not use this file except in compliance
10 * with the License. You may obtain a copy of the License at
11 *
12 * http://www.apache.org/licenses/LICENSE-2.0
13 *
14 * Unless required by applicable law or agreed to in writing,
15 * software distributed under the License is distributed on an
16 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
17 * KIND, either express or implied. See the License for the
18 * specific language governing permissions and limitations
19 * under the License.
20 * ====================================================================
21 */
22
23 #include "reps.h"
24
25 #include "svn_sorts.h"
26 #include "private/svn_string_private.h"
27 #include "private/svn_packed_data.h"
28 #include "private/svn_temp_serializer.h"
29
30 #include "svn_private_config.h"
31
32 #include "cached_data.h"
33
34 /* Length of the text chunks we hash and match. The algorithm will find
35 * most matches with a length of 2 * MATCH_BLOCKSIZE and only specific
36 * ones that are shorter than MATCH_BLOCKSIZE.
37 *
38 * This should be a power of two and must be a multiple of 8.
39 * Good choices are 32, 64 and 128.
40 */
41 #define MATCH_BLOCKSIZE 64
42
43 /* Limit the total text body within a container to 16MB. Larger values
44 * of up to 2GB are possible but become increasingly impractical as the
45 * container has to be loaded in its entirety before any of it can be read.
46 */
47 #define MAX_TEXT_BODY 0x1000000
48
49 /* Limit the size of the instructions stream. This should not exceed the
50 * text body size limit. */
51 #define MAX_INSTRUCTIONS (MAX_TEXT_BODY / 8)
52
53 /* value of unused hash buckets */
54 #define NO_OFFSET ((apr_uint32_t)(-1))
55
56 /* Byte strings are described by a series of copy instructions that each
57 * do one of the following
58 *
59 * - copy a given number of bytes from the text corpus starting at a
60 * given offset
61 * - reference other instruction and specify how many of instructions of
62 * that sequence shall be executed (i.e. a sub-sequence)
63 * - copy a number of bytes from the base representation buffer starting
64 * at a given offset
65 */
66
67 /* The contents of a fulltext / representation is defined by its first
68 * instruction and the number of instructions to execute.
69 */
70 typedef struct rep_t
71 {
72 apr_uint32_t first_instruction;
73 apr_uint32_t instruction_count;
74 } rep_t;
75
76 /* A single instruction. The instruction type is being encoded in OFFSET.
77 */
78 typedef struct instruction_t
79 {
80 /* Instruction type and offset.
81 * - offset < 0
82 * reference to instruction sub-sequence starting with
83 * container->instructions[-offset].
84 * - 0 <= offset < container->base_text_len
85 * reference to the base text corpus;
86 * start copy at offset
87 * - offset >= container->base_text_len
88 * reference to the text corpus;
89 * start copy at offset-container->base_text_len
90 */
91 apr_int32_t offset;
92
93 /* Number of bytes to copy / instructions to execute
94 */
95 apr_uint32_t count;
96 } instruction_t;
97
98 /* Describe a base fulltext.
99 */
100 typedef struct base_t
101 {
102 /* Revision */
103 svn_revnum_t revision;
104
105 /* Item within that revision */
106 apr_uint64_t item_index;
107
108 /* Priority with which to use this base over others */
109 int priority;
110
111 /* Index into builder->representations that identifies the copy
112 * instructions for this base. */
113 apr_uint32_t rep;
114 } base_t;
115
116 /* Yet another hash data structure. This one tries to be more cache
117 * friendly by putting the first byte of each hashed sequence in a
118 * common array. This array will often fit into L1 or L2 at least and
119 * give a 99% accurate test for a match without giving false negatives.
120 */
121 typedef struct hash_t
122 {
123 /* for used entries i, prefixes[i] == text[offsets[i]]; 0 otherwise.
124 * This allows for a quick check without resolving the double
125 * indirection. */
126 char *prefixes;
127
128 /* for used entries i, offsets[i] is start offset in the text corpus;
129 * NO_OFFSET otherwise.
130 */
131 apr_uint32_t *offsets;
132
133 /* to be used later for optimizations. */
134 apr_uint32_t *last_matches;
135
136 /* number of buckets in this hash, i.e. elements in each array above.
137 * Must be 1 << (8 * sizeof(hash_key_t) - shift) */
138 apr_size_t size;
139
140 /* number of buckets actually in use. Must be <= size. */
141 apr_size_t used;
142
143 /* number of bits to shift right to map a hash_key_t to a bucket index */
144 apr_size_t shift;
145
146 /* pool to use when growing the hash */
147 apr_pool_t *pool;
148 } hash_t;
149
150 /* Hash key type. 32 bits for pseudo-Adler32 hash sums.
151 */
152 typedef apr_uint32_t hash_key_t;
153
154 /* Constructor data structure.
155 */
156 struct svn_fs_x__reps_builder_t
157 {
158 /* file system to read base representations from */
159 svn_fs_t *fs;
160
161 /* text corpus */
162 svn_stringbuf_t *text;
163
164 /* text block hash */
165 hash_t hash;
166
167 /* array of base_t objects describing all bases defined so far */
168 apr_array_header_t *bases;
169
170 /* array of rep_t objects describing all fulltexts (including bases)
171 * added so far */
172 apr_array_header_t *reps;
173
174 /* array of instruction_t objects describing all instructions */
175 apr_array_header_t *instructions;
176
177 /* number of bytes in the text corpus that belongs to bases */
178 apr_size_t base_text_len;
179 };
180
181 /* R/o container.
182 */
183 struct svn_fs_x__reps_t
184 {
185 /* text corpus */
186 const char *text;
187
188 /* length of the text corpus in bytes */
189 apr_size_t text_len;
190
191 /* bases used */
192 const base_t *bases;
193
194 /* number of bases used */
195 apr_size_t base_count;
196
197 /* fulltext i can be reconstructed by executing instructions
198 * first_instructions[i] .. first_instructions[i+1]-1
199 * (this array has one extra element at the end)
200 */
201 const apr_uint32_t *first_instructions;
202
203 /* number of fulltexts (no bases) */
204 apr_size_t rep_count;
205
206 /* instructions */
207 const instruction_t *instructions;
208
209 /* total number of instructions */
210 apr_size_t instruction_count;
211
212 /* offsets > 0 but smaller that this are considered base references */
213 apr_size_t base_text_len;
214 };
215
216 /* describe a section in the extractor's result string that is not filled
217 * yet (but already exists).
218 */
219 typedef struct missing_t
220 {
221 /* start offset within the result string */
222 apr_uint32_t start;
223
224 /* number of bytes to write */
225 apr_uint32_t count;
226
227 /* index into extractor->bases selecting the base representation to
228 * copy from */
229 apr_uint32_t base;
230
231 /* copy source offset within that base representation */
232 apr_uint32_t offset;
233 } missing_t;
234
235 /* Fulltext extractor data structure.
236 */
237 struct svn_fs_x__rep_extractor_t
238 {
239 /* filesystem to read the bases from */
240 svn_fs_t *fs;
241
242 /* fulltext being constructed */
243 svn_stringbuf_t *result;
244
245 /* bases (base_t) yet to process (not used ATM) */
246 apr_array_header_t *bases;
247
248 /* missing sections (missing_t) in result->data that need to be filled,
249 * yet */
250 apr_array_header_t *missing;
251
252 /* pool to use for allocating the above arrays */
253 apr_pool_t *pool;
254 };
255
256 /* Given the ADLER32 checksum for a certain range of MATCH_BLOCKSIZE
257 * bytes, return the checksum for the range excluding the first byte
258 * C_OUT and appending C_IN.
259 */
260 static hash_key_t
hash_key_replace(hash_key_t adler32,const char c_out,const char c_in)261 hash_key_replace(hash_key_t adler32, const char c_out, const char c_in)
262 {
263 adler32 -= (MATCH_BLOCKSIZE * 0x10000u * ((unsigned char) c_out));
264
265 adler32 -= (unsigned char)c_out;
266 adler32 += (unsigned char)c_in;
267
268 return adler32 + adler32 * 0x10000;
269 }
270
271 /* Calculate an pseudo-adler32 checksum for MATCH_BLOCKSIZE bytes starting
272 at DATA. Return the checksum value. */
273 static hash_key_t
hash_key(const char * data)274 hash_key(const char *data)
275 {
276 const unsigned char *input = (const unsigned char *)data;
277 const unsigned char *last = input + MATCH_BLOCKSIZE;
278
279 hash_key_t s1 = 0;
280 hash_key_t s2 = 0;
281
282 for (; input < last; input += 8)
283 {
284 s1 += input[0]; s2 += s1;
285 s1 += input[1]; s2 += s1;
286 s1 += input[2]; s2 += s1;
287 s1 += input[3]; s2 += s1;
288 s1 += input[4]; s2 += s1;
289 s1 += input[5]; s2 += s1;
290 s1 += input[6]; s2 += s1;
291 s1 += input[7]; s2 += s1;
292 }
293
294 return s2 * 0x10000 + s1;
295 }
296
297 /* Map the ADLER32 key to a bucket index in HASH and return that index.
298 */
299 static apr_size_t
hash_to_index(hash_t * hash,hash_key_t adler32)300 hash_to_index(hash_t *hash, hash_key_t adler32)
301 {
302 return (adler32 * 0xd1f3da69) >> hash->shift;
303 }
304
305 /* Allocate and initialized SIZE buckets in RESULT_POOL.
306 * Assign them to HASH.
307 */
308 static void
allocate_hash_members(hash_t * hash,apr_size_t size,apr_pool_t * result_pool)309 allocate_hash_members(hash_t *hash,
310 apr_size_t size,
311 apr_pool_t *result_pool)
312 {
313 apr_size_t i;
314
315 hash->pool = result_pool;
316 hash->size = size;
317
318 hash->prefixes = apr_pcalloc(result_pool, size);
319 hash->last_matches = apr_pcalloc(result_pool,
320 sizeof(*hash->last_matches) * size);
321 hash->offsets = apr_palloc(result_pool, sizeof(*hash->offsets) * size);
322
323 for (i = 0; i < size; ++i)
324 hash->offsets[i] = NO_OFFSET;
325 }
326
327 /* Initialize the HASH data structure with 2**TWOPOWER buckets allocated
328 * in RESULT_POOL.
329 */
330 static void
init_hash(hash_t * hash,apr_size_t twoPower,apr_pool_t * result_pool)331 init_hash(hash_t *hash,
332 apr_size_t twoPower,
333 apr_pool_t *result_pool)
334 {
335 hash->used = 0;
336 hash->shift = sizeof(hash_key_t) * 8 - twoPower;
337
338 allocate_hash_members(hash, 1 << twoPower, result_pool);
339 }
340
341 /* Make HASH have at least MIN_SIZE buckets but at least double the number
342 * of buckets in HASH by rehashing it based TEXT.
343 */
344 static void
grow_hash(hash_t * hash,svn_stringbuf_t * text,apr_size_t min_size)345 grow_hash(hash_t *hash,
346 svn_stringbuf_t *text,
347 apr_size_t min_size)
348 {
349 hash_t copy;
350 apr_size_t i;
351
352 /* determine the new hash size */
353 apr_size_t new_size = hash->size * 2;
354 apr_size_t new_shift = hash->shift - 1;
355 while (new_size < min_size)
356 {
357 new_size *= 2;
358 --new_shift;
359 }
360
361 /* allocate new hash */
362 allocate_hash_members(©, new_size, hash->pool);
363 copy.used = 0;
364 copy.shift = new_shift;
365
366 /* copy / translate data */
367 for (i = 0; i < hash->size; ++i)
368 {
369 apr_uint32_t offset = hash->offsets[i];
370 if (offset != NO_OFFSET)
371 {
372 hash_key_t key = hash_key(text->data + offset);
373 size_t idx = hash_to_index(©, key);
374
375 if (copy.offsets[idx] == NO_OFFSET)
376 copy.used++;
377
378 copy.prefixes[idx] = hash->prefixes[i];
379 copy.offsets[idx] = offset;
380 copy.last_matches[idx] = hash->last_matches[i];
381 }
382 }
383
384 *hash = copy;
385 }
386
387 svn_fs_x__reps_builder_t *
svn_fs_x__reps_builder_create(svn_fs_t * fs,apr_pool_t * result_pool)388 svn_fs_x__reps_builder_create(svn_fs_t *fs,
389 apr_pool_t *result_pool)
390 {
391 svn_fs_x__reps_builder_t *result = apr_pcalloc(result_pool,
392 sizeof(*result));
393
394 result->fs = fs;
395 result->text = svn_stringbuf_create_empty(result_pool);
396 init_hash(&result->hash, 4, result_pool);
397
398 result->bases = apr_array_make(result_pool, 0, sizeof(base_t));
399 result->reps = apr_array_make(result_pool, 0, sizeof(rep_t));
400 result->instructions = apr_array_make(result_pool, 0,
401 sizeof(instruction_t));
402
403 return result;
404 }
405
406 svn_error_t *
svn_fs_x__reps_add_base(svn_fs_x__reps_builder_t * builder,svn_fs_x__representation_t * rep,int priority,apr_pool_t * scratch_pool)407 svn_fs_x__reps_add_base(svn_fs_x__reps_builder_t *builder,
408 svn_fs_x__representation_t *rep,
409 int priority,
410 apr_pool_t *scratch_pool)
411 {
412 base_t base;
413 apr_size_t text_start_offset = builder->text->len;
414
415 svn_stream_t *stream;
416 svn_string_t *contents;
417 apr_size_t idx;
418 SVN_ERR(svn_fs_x__get_contents(&stream, builder->fs, rep, FALSE,
419 scratch_pool));
420 SVN_ERR(svn_string_from_stream2(&contents, stream, SVN__STREAM_CHUNK_SIZE,
421 scratch_pool));
422 SVN_ERR(svn_fs_x__reps_add(&idx, builder, contents));
423
424 base.revision = svn_fs_x__get_revnum(rep->id.change_set);
425 base.item_index = rep->id.number;
426 base.priority = priority;
427 base.rep = (apr_uint32_t)idx;
428
429 APR_ARRAY_PUSH(builder->bases, base_t) = base;
430 builder->base_text_len += builder->text->len - text_start_offset;
431
432 return SVN_NO_ERROR;
433 }
434
435 /* Add LEN bytes from DATA to BUILDER's text corpus. Also, add a copy
436 * operation for that text fragment.
437 */
438 static void
add_new_text(svn_fs_x__reps_builder_t * builder,const char * data,apr_size_t len)439 add_new_text(svn_fs_x__reps_builder_t *builder,
440 const char *data,
441 apr_size_t len)
442 {
443 instruction_t instruction;
444 apr_size_t offset;
445 apr_size_t buckets_required;
446
447 if (len == 0)
448 return;
449
450 /* new instruction */
451 instruction.offset = (apr_int32_t)builder->text->len;
452 instruction.count = (apr_uint32_t)len;
453 APR_ARRAY_PUSH(builder->instructions, instruction_t) = instruction;
454
455 /* add to text corpus */
456 svn_stringbuf_appendbytes(builder->text, data, len);
457
458 /* expand the hash upfront to minimize the chances of collisions */
459 buckets_required = builder->hash.used + len / MATCH_BLOCKSIZE;
460 if (buckets_required * 3 >= builder->hash.size * 2)
461 grow_hash(&builder->hash, builder->text, 2 * buckets_required);
462
463 /* add hash entries for the new sequence */
464 for (offset = instruction.offset;
465 offset + MATCH_BLOCKSIZE <= builder->text->len;
466 offset += MATCH_BLOCKSIZE)
467 {
468 hash_key_t key = hash_key(builder->text->data + offset);
469 size_t idx = hash_to_index(&builder->hash, key);
470
471 /* Don't replace hash entries that stem from the current text.
472 * This makes early matches more likely. */
473 if (builder->hash.offsets[idx] == NO_OFFSET)
474 ++builder->hash.used;
475 else if (builder->hash.offsets[idx] >= instruction.offset)
476 continue;
477
478 builder->hash.offsets[idx] = (apr_uint32_t)offset;
479 builder->hash.prefixes[idx] = builder->text->data[offset];
480 }
481 }
482
483 svn_error_t *
svn_fs_x__reps_add(apr_size_t * rep_idx,svn_fs_x__reps_builder_t * builder,const svn_string_t * contents)484 svn_fs_x__reps_add(apr_size_t *rep_idx,
485 svn_fs_x__reps_builder_t *builder,
486 const svn_string_t *contents)
487 {
488 rep_t rep;
489 const char *current = contents->data;
490 const char *processed = current;
491 const char *end = current + contents->len;
492 const char *last_to_test = end - MATCH_BLOCKSIZE - 1;
493
494 if (builder->text->len + contents->len > MAX_TEXT_BODY)
495 return svn_error_create(SVN_ERR_FS_CONTAINER_SIZE, NULL,
496 _("Text body exceeds star delta container capacity"));
497
498 if ( builder->instructions->nelts + 2 * contents->len / MATCH_BLOCKSIZE
499 > MAX_INSTRUCTIONS)
500 return svn_error_create(SVN_ERR_FS_CONTAINER_SIZE, NULL,
501 _("Instruction count exceeds star delta container capacity"));
502
503 rep.first_instruction = (apr_uint32_t)builder->instructions->nelts;
504 while (current < last_to_test)
505 {
506 hash_key_t key = hash_key(current);
507 size_t offset;
508 size_t idx;
509
510 /* search for the next matching sequence */
511
512 for (; current < last_to_test; ++current)
513 {
514 idx = hash_to_index(&builder->hash, key);
515 if (builder->hash.prefixes[idx] == current[0])
516 {
517 offset = builder->hash.offsets[idx];
518 if ( (offset != NO_OFFSET)
519 && (memcmp(&builder->text->data[offset], current,
520 MATCH_BLOCKSIZE) == 0))
521 break;
522 }
523 key = hash_key_replace(key, current[0], current[MATCH_BLOCKSIZE]);
524 }
525
526 /* found it? */
527
528 if (current < last_to_test)
529 {
530 instruction_t instruction;
531
532 /* extend the match */
533
534 size_t prefix_match
535 = svn_cstring__reverse_match_length(current,
536 builder->text->data + offset,
537 MIN(offset, current - processed));
538 size_t postfix_match
539 = svn_cstring__match_length(current + MATCH_BLOCKSIZE,
540 builder->text->data + offset + MATCH_BLOCKSIZE,
541 MIN(builder->text->len - offset - MATCH_BLOCKSIZE,
542 end - current - MATCH_BLOCKSIZE));
543
544 /* non-matched section */
545
546 size_t new_copy = (current - processed) - prefix_match;
547 if (new_copy)
548 add_new_text(builder, processed, new_copy);
549
550 /* add instruction for matching section */
551
552 instruction.offset = (apr_int32_t)(offset - prefix_match);
553 instruction.count = (apr_uint32_t)(prefix_match + postfix_match +
554 MATCH_BLOCKSIZE);
555 APR_ARRAY_PUSH(builder->instructions, instruction_t) = instruction;
556
557 processed = current + MATCH_BLOCKSIZE + postfix_match;
558 current = processed;
559 }
560 }
561
562 add_new_text(builder, processed, end - processed);
563 rep.instruction_count = (apr_uint32_t)builder->instructions->nelts
564 - rep.first_instruction;
565 APR_ARRAY_PUSH(builder->reps, rep_t) = rep;
566
567 *rep_idx = (apr_size_t)(builder->reps->nelts - 1);
568 return SVN_NO_ERROR;
569 }
570
571 apr_size_t
svn_fs_x__reps_estimate_size(const svn_fs_x__reps_builder_t * builder)572 svn_fs_x__reps_estimate_size(const svn_fs_x__reps_builder_t *builder)
573 {
574 /* approx: size of the text exclusive to us @ 50% compression rate
575 * + 2 bytes per instruction
576 * + 2 bytes per representation
577 * + 8 bytes per base representation
578 * + 1:8 inefficiency in using the base representations
579 * + 100 bytes static overhead
580 */
581 return (builder->text->len - builder->base_text_len) / 2
582 + builder->instructions->nelts * 2
583 + builder->reps->nelts * 2
584 + builder->bases->nelts * 8
585 + builder->base_text_len / 8
586 + 100;
587 }
588
589 /* Execute COUNT instructions starting at INSTRUCTION_IDX in CONTAINER
590 * and fill the parts of EXTRACTOR->RESULT that we can from this container.
591 * Record the remainder in EXTRACTOR->MISSING.
592 *
593 * This function will recurse for instructions that reference other
594 * instruction sequences. COUNT refers to the top-level instructions only.
595 */
596 static void
get_text(svn_fs_x__rep_extractor_t * extractor,const svn_fs_x__reps_t * container,apr_size_t instruction_idx,apr_size_t count)597 get_text(svn_fs_x__rep_extractor_t *extractor,
598 const svn_fs_x__reps_t *container,
599 apr_size_t instruction_idx,
600 apr_size_t count)
601 {
602 const instruction_t *instruction;
603 const char *offset_0 = container->text - container->base_text_len;
604
605 for (instruction = container->instructions + instruction_idx;
606 instruction < container->instructions + instruction_idx + count;
607 instruction++)
608 if (instruction->offset < 0)
609 {
610 /* instruction sub-sequence */
611 get_text(extractor, container, -instruction->offset,
612 instruction->count);
613 }
614 else if (instruction->offset >= container->base_text_len)
615 {
616 /* direct copy instruction */
617 svn_stringbuf_appendbytes(extractor->result,
618 offset_0 + instruction->offset,
619 instruction->count);
620 }
621 else
622 {
623 /* a section that we need to fill from some external base rep. */
624 missing_t missing;
625 missing.base = 0;
626 missing.start = (apr_uint32_t)extractor->result->len;
627 missing.count = instruction->count;
628 missing.offset = instruction->offset;
629 svn_stringbuf_appendfill(extractor->result, 0, instruction->count);
630
631 if (extractor->missing == NULL)
632 extractor->missing = apr_array_make(extractor->pool, 1,
633 sizeof(missing));
634
635 APR_ARRAY_PUSH(extractor->missing, missing_t) = missing;
636 }
637 }
638
639 svn_error_t *
svn_fs_x__reps_get(svn_fs_x__rep_extractor_t ** extractor,svn_fs_t * fs,const svn_fs_x__reps_t * container,apr_size_t idx,apr_pool_t * result_pool)640 svn_fs_x__reps_get(svn_fs_x__rep_extractor_t **extractor,
641 svn_fs_t *fs,
642 const svn_fs_x__reps_t *container,
643 apr_size_t idx,
644 apr_pool_t *result_pool)
645 {
646 apr_uint32_t first = container->first_instructions[idx];
647 apr_uint32_t last = container->first_instructions[idx + 1];
648
649 /* create the extractor object */
650 svn_fs_x__rep_extractor_t *result = apr_pcalloc(result_pool,
651 sizeof(*result));
652 result->fs = fs;
653 result->result = svn_stringbuf_create_empty(result_pool);
654 result->pool = result_pool;
655
656 /* fill all the bits of the result that we can, i.e. all but bits coming
657 * from base representations */
658 get_text(result, container, first, last - first);
659 *extractor = result;
660 return SVN_NO_ERROR;
661 }
662
663 svn_error_t *
svn_fs_x__extractor_drive(svn_stringbuf_t ** contents,svn_fs_x__rep_extractor_t * extractor,apr_size_t start_offset,apr_size_t size,apr_pool_t * result_pool,apr_pool_t * scratch_pool)664 svn_fs_x__extractor_drive(svn_stringbuf_t **contents,
665 svn_fs_x__rep_extractor_t *extractor,
666 apr_size_t start_offset,
667 apr_size_t size,
668 apr_pool_t *result_pool,
669 apr_pool_t *scratch_pool)
670 {
671 /* we don't support base reps right now */
672 SVN_ERR_ASSERT(extractor->missing == NULL);
673
674 if (size == 0)
675 {
676 *contents = svn_stringbuf_dup(extractor->result, result_pool);
677 }
678 else
679 {
680 /* clip the selected range */
681 if (start_offset > extractor->result->len)
682 start_offset = extractor->result->len;
683
684 if (size > extractor->result->len - start_offset)
685 size = extractor->result->len - start_offset;
686
687 *contents = svn_stringbuf_ncreate(extractor->result->data + start_offset,
688 size, result_pool);
689 }
690
691 return SVN_NO_ERROR;
692 }
693
694 svn_error_t *
svn_fs_x__write_reps_container(svn_stream_t * stream,const svn_fs_x__reps_builder_t * builder,apr_pool_t * scratch_pool)695 svn_fs_x__write_reps_container(svn_stream_t *stream,
696 const svn_fs_x__reps_builder_t *builder,
697 apr_pool_t *scratch_pool)
698 {
699 int i;
700 svn_packed__data_root_t *root = svn_packed__data_create_root(scratch_pool);
701
702 /* one top-level stream for each array */
703 svn_packed__int_stream_t *bases_stream
704 = svn_packed__create_int_stream(root, FALSE, FALSE);
705 svn_packed__int_stream_t *reps_stream
706 = svn_packed__create_int_stream(root, TRUE, FALSE);
707 svn_packed__int_stream_t *instructions_stream
708 = svn_packed__create_int_stream(root, FALSE, FALSE);
709
710 /* for misc stuff */
711 svn_packed__int_stream_t *misc_stream
712 = svn_packed__create_int_stream(root, FALSE, FALSE);
713
714 /* TEXT will be just a single string */
715 svn_packed__byte_stream_t *text_stream
716 = svn_packed__create_bytes_stream(root);
717
718 /* structure the struct streams such we can extract much of the redundancy
719 */
720 svn_packed__create_int_substream(bases_stream, TRUE, TRUE);
721 svn_packed__create_int_substream(bases_stream, TRUE, FALSE);
722 svn_packed__create_int_substream(bases_stream, TRUE, FALSE);
723 svn_packed__create_int_substream(bases_stream, TRUE, FALSE);
724
725 svn_packed__create_int_substream(instructions_stream, TRUE, TRUE);
726 svn_packed__create_int_substream(instructions_stream, FALSE, FALSE);
727
728 /* text */
729 svn_packed__add_bytes(text_stream, builder->text->data, builder->text->len);
730
731 /* serialize bases */
732 for (i = 0; i < builder->bases->nelts; ++i)
733 {
734 const base_t *base = &APR_ARRAY_IDX(builder->bases, i, base_t);
735 svn_packed__add_int(bases_stream, base->revision);
736 svn_packed__add_uint(bases_stream, base->item_index);
737 svn_packed__add_uint(bases_stream, base->priority);
738 svn_packed__add_uint(bases_stream, base->rep);
739 }
740
741 /* serialize reps */
742 for (i = 0; i < builder->reps->nelts; ++i)
743 {
744 const rep_t *rep = &APR_ARRAY_IDX(builder->reps, i, rep_t);
745 svn_packed__add_uint(reps_stream, rep->first_instruction);
746 }
747
748 svn_packed__add_uint(reps_stream, builder->instructions->nelts);
749
750 /* serialize instructions */
751 for (i = 0; i < builder->instructions->nelts; ++i)
752 {
753 const instruction_t *instruction
754 = &APR_ARRAY_IDX(builder->instructions, i, instruction_t);
755 svn_packed__add_int(instructions_stream, instruction->offset);
756 svn_packed__add_uint(instructions_stream, instruction->count);
757 }
758
759 /* other elements */
760 svn_packed__add_uint(misc_stream, 0);
761
762 /* write to stream */
763 SVN_ERR(svn_packed__data_write(stream, root, scratch_pool));
764
765 return SVN_NO_ERROR;
766 }
767
768 svn_error_t *
svn_fs_x__read_reps_container(svn_fs_x__reps_t ** container,svn_stream_t * stream,apr_pool_t * result_pool,apr_pool_t * scratch_pool)769 svn_fs_x__read_reps_container(svn_fs_x__reps_t **container,
770 svn_stream_t *stream,
771 apr_pool_t *result_pool,
772 apr_pool_t *scratch_pool)
773 {
774 apr_size_t i;
775
776 base_t *bases;
777 apr_uint32_t *first_instructions;
778 instruction_t *instructions;
779
780 svn_fs_x__reps_t *reps = apr_pcalloc(result_pool, sizeof(*reps));
781
782 svn_packed__data_root_t *root;
783 svn_packed__int_stream_t *bases_stream;
784 svn_packed__int_stream_t *reps_stream;
785 svn_packed__int_stream_t *instructions_stream;
786 svn_packed__int_stream_t *misc_stream;
787 svn_packed__byte_stream_t *text_stream;
788
789 /* read from disk */
790 SVN_ERR(svn_packed__data_read(&root, stream, result_pool, scratch_pool));
791
792 bases_stream = svn_packed__first_int_stream(root);
793 reps_stream = svn_packed__next_int_stream(bases_stream);
794 instructions_stream = svn_packed__next_int_stream(reps_stream);
795 misc_stream = svn_packed__next_int_stream(instructions_stream);
796 text_stream = svn_packed__first_byte_stream(root);
797
798 /* text */
799 reps->text = svn_packed__get_bytes(text_stream, &reps->text_len);
800 reps->text = apr_pmemdup(result_pool, reps->text, reps->text_len);
801
802 /* de-serialize bases */
803 reps->base_count
804 = svn_packed__int_count(svn_packed__first_int_substream(bases_stream));
805 bases = apr_palloc(result_pool, reps->base_count * sizeof(*bases));
806 reps->bases = bases;
807
808 for (i = 0; i < reps->base_count; ++i)
809 {
810 base_t *base = bases + i;
811 base->revision = (svn_revnum_t)svn_packed__get_int(bases_stream);
812 base->item_index = svn_packed__get_uint(bases_stream);
813 base->priority = (int)svn_packed__get_uint(bases_stream);
814 base->rep = (apr_uint32_t)svn_packed__get_uint(bases_stream);
815 }
816
817 /* de-serialize instructions */
818 reps->instruction_count
819 = svn_packed__int_count
820 (svn_packed__first_int_substream(instructions_stream));
821 instructions
822 = apr_palloc(result_pool,
823 reps->instruction_count * sizeof(*instructions));
824 reps->instructions = instructions;
825
826 for (i = 0; i < reps->instruction_count; ++i)
827 {
828 instruction_t *instruction = instructions + i;
829 instruction->offset
830 = (apr_int32_t)svn_packed__get_int(instructions_stream);
831 instruction->count
832 = (apr_uint32_t)svn_packed__get_uint(instructions_stream);
833 }
834
835 /* de-serialize reps */
836 reps->rep_count = svn_packed__int_count(reps_stream);
837 first_instructions
838 = apr_palloc(result_pool,
839 (reps->rep_count + 1) * sizeof(*first_instructions));
840 reps->first_instructions = first_instructions;
841
842 for (i = 0; i < reps->rep_count; ++i)
843 first_instructions[i]
844 = (apr_uint32_t)svn_packed__get_uint(reps_stream);
845 first_instructions[reps->rep_count] = (apr_uint32_t)reps->instruction_count;
846
847 /* other elements */
848 reps->base_text_len = (apr_size_t)svn_packed__get_uint(misc_stream);
849
850 /* return result */
851 *container = reps;
852
853 return SVN_NO_ERROR;
854 }
855
856 svn_error_t *
svn_fs_x__serialize_reps_container(void ** data,apr_size_t * data_len,void * in,apr_pool_t * pool)857 svn_fs_x__serialize_reps_container(void **data,
858 apr_size_t *data_len,
859 void *in,
860 apr_pool_t *pool)
861 {
862 svn_fs_x__reps_t *reps = in;
863 svn_stringbuf_t *serialized;
864
865 /* make a guesstimate on the size of the serialized data. Erring on the
866 * low side will cause the serializer to re-alloc its buffer. */
867 apr_size_t size
868 = reps->text_len
869 + reps->base_count * sizeof(*reps->bases)
870 + reps->rep_count * sizeof(*reps->first_instructions)
871 + reps->instruction_count * sizeof(*reps->instructions)
872 + 100;
873
874 /* serialize array header and all its elements */
875 svn_temp_serializer__context_t *context
876 = svn_temp_serializer__init(reps, sizeof(*reps), size, pool);
877
878 /* serialize sub-structures */
879 svn_temp_serializer__add_leaf(context, (const void **)&reps->text,
880 reps->text_len);
881 svn_temp_serializer__add_leaf(context, (const void **)&reps->bases,
882 reps->base_count * sizeof(*reps->bases));
883 svn_temp_serializer__add_leaf(context,
884 (const void **)&reps->first_instructions,
885 reps->rep_count *
886 sizeof(*reps->first_instructions));
887 svn_temp_serializer__add_leaf(context, (const void **)&reps->instructions,
888 reps->instruction_count *
889 sizeof(*reps->instructions));
890
891 /* return the serialized result */
892 serialized = svn_temp_serializer__get(context);
893
894 *data = serialized->data;
895 *data_len = serialized->len;
896
897 return SVN_NO_ERROR;
898 }
899
900 svn_error_t *
svn_fs_x__deserialize_reps_container(void ** out,void * data,apr_size_t data_len,apr_pool_t * result_pool)901 svn_fs_x__deserialize_reps_container(void **out,
902 void *data,
903 apr_size_t data_len,
904 apr_pool_t *result_pool)
905 {
906 svn_fs_x__reps_t *reps = (svn_fs_x__reps_t *)data;
907
908 /* de-serialize sub-structures */
909 svn_temp_deserializer__resolve(reps, (void **)&reps->text);
910 svn_temp_deserializer__resolve(reps, (void **)&reps->bases);
911 svn_temp_deserializer__resolve(reps, (void **)&reps->first_instructions);
912 svn_temp_deserializer__resolve(reps, (void **)&reps->instructions);
913
914 /* done */
915 *out = reps;
916
917 return SVN_NO_ERROR;
918 }
919
920 svn_error_t *
svn_fs_x__reps_get_func(void ** out,const void * data,apr_size_t data_len,void * baton,apr_pool_t * pool)921 svn_fs_x__reps_get_func(void **out,
922 const void *data,
923 apr_size_t data_len,
924 void *baton,
925 apr_pool_t *pool)
926 {
927 svn_fs_x__reps_baton_t *reps_baton = baton;
928
929 /* get a usable reps structure */
930 const svn_fs_x__reps_t *cached = data;
931 svn_fs_x__reps_t *reps = apr_pmemdup(pool, cached, sizeof(*reps));
932
933 reps->text
934 = svn_temp_deserializer__ptr(cached, (const void **)&cached->text);
935 reps->bases
936 = svn_temp_deserializer__ptr(cached, (const void **)&cached->bases);
937 reps->first_instructions
938 = svn_temp_deserializer__ptr(cached,
939 (const void **)&cached->first_instructions);
940 reps->instructions
941 = svn_temp_deserializer__ptr(cached,
942 (const void **)&cached->instructions);
943
944 /* return an extractor for the selected item */
945 SVN_ERR(svn_fs_x__reps_get((svn_fs_x__rep_extractor_t **)out,
946 reps_baton->fs, reps, reps_baton->idx, pool));
947
948 return SVN_NO_ERROR;
949 }
950