1 /* pack.c --- FSFS shard packing functionality
2 *
3 * ====================================================================
4 * Licensed to the Apache Software Foundation (ASF) under one
5 * or more contributor license agreements. See the NOTICE file
6 * distributed with this work for additional information
7 * regarding copyright ownership. The ASF licenses this file
8 * to you under the Apache License, Version 2.0 (the
9 * "License"); you may not use this file except in compliance
10 * with the License. You may obtain a copy of the License at
11 *
12 * http://www.apache.org/licenses/LICENSE-2.0
13 *
14 * Unless required by applicable law or agreed to in writing,
15 * software distributed under the License is distributed on an
16 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
17 * KIND, either express or implied. See the License for the
18 * specific language governing permissions and limitations
19 * under the License.
20 * ====================================================================
21 */
22 #include <assert.h>
23 #include <string.h>
24
25 #include "svn_pools.h"
26 #include "svn_dirent_uri.h"
27 #include "svn_sorts.h"
28 #include "private/svn_temp_serializer.h"
29 #include "private/svn_sorts_private.h"
30 #include "private/svn_subr_private.h"
31 #include "private/svn_string_private.h"
32 #include "private/svn_io_private.h"
33
34 #include "fs_fs.h"
35 #include "pack.h"
36 #include "util.h"
37 #include "id.h"
38 #include "index.h"
39 #include "low_level.h"
40 #include "revprops.h"
41 #include "transaction.h"
42
43 #include "../libsvn_fs/fs-loader.h"
44
45 #include "svn_private_config.h"
46 #include "temp_serializer.h"
47
48 /* Logical addressing packing logic:
49 *
50 * We pack files on a pack file basis (e.g. 1000 revs) without changing
51 * existing pack files nor the revision files outside the range to pack.
52 *
53 * First, we will scan the revision file indexes to determine the number
54 * of items to "place" (i.e. determine their optimal position within the
55 * future pack file). For each item, we will need a constant amount of
56 * memory to track it. A MAX_MEM parameter sets a limit to the number of
57 * items we may place in one go. That means, we may not be able to add
58 * all revisions at once. Instead, we will run the placement for a subset
59 * of revisions at a time. The very unlikely worst case will simply append
60 * all revision data with just a little reshuffling inside each revision.
61 *
62 * In a second step, we read all revisions in the selected range, build
63 * the item tracking information and copy the items themselves from the
64 * revision files to temporary files. The latter serve as buckets for a
65 * very coarse bucket presort: Separate change lists, file properties,
66 * directory properties and noderevs + representations from one another.
67 *
68 * The third step will determine an optimized placement for the items in
69 * each of the 4 buckets separately. The first three will simply order
70 * their items by revision, starting with the newest once. Placing rep
71 * and noderev items is a more elaborate process documented in the code.
72 *
73 * In short, we store items in the following order:
74 * - changed paths lists
75 * - node property
76 * - directory properties
77 * - directory representations corresponding noderevs, lexical path order
78 * with special treatment of "trunk" and "branches"
79 * - same for file representations
80 *
81 * Step 4 copies the items from the temporary buckets into the final
82 * pack file and writes the temporary index files.
83 *
84 * Finally, after the last range of revisions, create the final indexes.
85 */
86
87 /* Maximum amount of memory we allocate for placement information during
88 * the pack process.
89 */
90 #define DEFAULT_MAX_MEM (64 * 1024 * 1024)
91
92 /* Data structure describing a node change at PATH, REVISION.
93 * We will sort these instances by PATH and NODE_ID such that we can combine
94 * similar nodes in the same reps container and store containers in path
95 * major order.
96 */
97 typedef struct path_order_t
98 {
99 /* changed path */
100 svn_prefix_string__t *path;
101
102 /* node ID for this PATH in REVISION */
103 svn_fs_fs__id_part_t node_id;
104
105 /* when this change happened */
106 svn_revnum_t revision;
107
108 /* noderev predecessor count */
109 int predecessor_count;
110
111 /* this is a node is the latest for this PATH in this rev / pack file */
112 svn_boolean_t is_head;
113
114 /* length of the expanded representation content */
115 apr_int64_t expanded_size;
116
117 /* item ID of the noderev linked to the change. May be (0, 0). */
118 svn_fs_fs__id_part_t noderev_id;
119
120 /* item ID of the representation containing the new data. May be (0, 0). */
121 svn_fs_fs__id_part_t rep_id;
122 } path_order_t;
123
124 /* Represents a reference from item FROM to item TO. FROM may be a noderev
125 * or rep_id while TO is (currently) always a representation. We will sort
126 * them by TO which allows us to collect all dependent items.
127 */
128 typedef struct reference_t
129 {
130 svn_fs_fs__id_part_t to;
131 svn_fs_fs__id_part_t from;
132 } reference_t;
133
134 /* This structure keeps track of all the temporary data and status that
135 * needs to be kept around during the creation of one pack file. After
136 * each revision range (in case we can't process all revs at once due to
137 * memory restrictions), parts of the data will get re-initialized.
138 */
139 typedef struct pack_context_t
140 {
141 /* file system that we operate on */
142 svn_fs_t *fs;
143
144 /* cancel function to invoke at regular intervals. May be NULL */
145 svn_cancel_func_t cancel_func;
146
147 /* baton to pass to CANCEL_FUNC */
148 void *cancel_baton;
149
150 /* first revision in the shard (and future pack file) */
151 svn_revnum_t shard_rev;
152
153 /* first revision in the range to process (>= SHARD_REV) */
154 svn_revnum_t start_rev;
155
156 /* first revision after the range to process (<= SHARD_END_REV) */
157 svn_revnum_t end_rev;
158
159 /* first revision after the current shard */
160 svn_revnum_t shard_end_rev;
161
162 /* log-to-phys proto index for the whole pack file */
163 apr_file_t *proto_l2p_index;
164
165 /* phys-to-log proto index for the whole pack file */
166 apr_file_t *proto_p2l_index;
167
168 /* full shard directory path (containing the unpacked revisions) */
169 const char *shard_dir;
170
171 /* full packed shard directory path (containing the pack file + indexes) */
172 const char *pack_file_dir;
173
174 /* full pack file path (including PACK_FILE_DIR) */
175 const char *pack_file_path;
176
177 /* current write position (i.e. file length) in the pack file */
178 apr_off_t pack_offset;
179
180 /* the pack file to ultimately write all data to */
181 apr_file_t *pack_file;
182
183 /* array of svn_fs_fs__p2l_entry_t *, all referring to change lists.
184 * Will be filled in phase 2 and be cleared after each revision range. */
185 apr_array_header_t *changes;
186
187 /* temp file receiving all change list items (referenced by CHANGES).
188 * Will be filled in phase 2 and be cleared after each revision range. */
189 apr_file_t *changes_file;
190
191 /* array of svn_fs_fs__p2l_entry_t *, all referring to file properties.
192 * Will be filled in phase 2 and be cleared after each revision range. */
193 apr_array_header_t *file_props;
194
195 /* temp file receiving all file prop items (referenced by FILE_PROPS).
196 * Will be filled in phase 2 and be cleared after each revision range.*/
197 apr_file_t *file_props_file;
198
199 /* array of svn_fs_fs__p2l_entry_t *, all referring to directory properties.
200 * Will be filled in phase 2 and be cleared after each revision range. */
201 apr_array_header_t *dir_props;
202
203 /* temp file receiving all directory prop items (referenced by DIR_PROPS).
204 * Will be filled in phase 2 and be cleared after each revision range.*/
205 apr_file_t *dir_props_file;
206
207 /* container for all PATH members in PATH_ORDER. */
208 svn_prefix_tree__t *paths;
209
210 /* array of path_order_t *. Will be filled in phase 2 and be cleared
211 * after each revision range. Sorted by PATH, NODE_ID. */
212 apr_array_header_t *path_order;
213
214 /* array of reference_t* linking representations to their delta bases.
215 * Will be filled in phase 2 and be cleared after each revision range.
216 * It will be sorted by the FROM members (for rep->base rep lookup). */
217 apr_array_header_t *references;
218
219 /* array of svn_fs_fs__p2l_entry_t*. Will be filled in phase 2 and be
220 * cleared after each revision range. During phase 3, we will set items
221 * to NULL that we already processed. */
222 apr_array_header_t *reps;
223
224 /* array of int, marking for each revision, at which offset their items
225 * begin in REPS. Will be filled in phase 2 and be cleared after
226 * each revision range. */
227 apr_array_header_t *rev_offsets;
228
229 /* temp file receiving all items referenced by REPS.
230 * Will be filled in phase 2 and be cleared after each revision range.*/
231 apr_file_t *reps_file;
232
233 /* pool used for temporary data structures that will be cleaned up when
234 * the next range of revisions is being processed */
235 apr_pool_t *info_pool;
236
237 /* ensure that all filesystem changes are written to disk. */
238 svn_boolean_t flush_to_disk;
239 } pack_context_t;
240
241 /* Create and initialize a new pack context for packing shard SHARD_REV in
242 * SHARD_DIR into PACK_FILE_DIR within filesystem FS. Allocate it in POOL
243 * and return the structure in *CONTEXT.
244 *
245 * Limit the number of items being copied per iteration to MAX_ITEMS.
246 * Set FLUSH_TO_DISK, CANCEL_FUNC and CANCEL_BATON as well.
247 */
248 static svn_error_t *
initialize_pack_context(pack_context_t * context,svn_fs_t * fs,const char * pack_file_dir,const char * shard_dir,svn_revnum_t shard_rev,int max_items,svn_boolean_t flush_to_disk,svn_cancel_func_t cancel_func,void * cancel_baton,apr_pool_t * pool)249 initialize_pack_context(pack_context_t *context,
250 svn_fs_t *fs,
251 const char *pack_file_dir,
252 const char *shard_dir,
253 svn_revnum_t shard_rev,
254 int max_items,
255 svn_boolean_t flush_to_disk,
256 svn_cancel_func_t cancel_func,
257 void *cancel_baton,
258 apr_pool_t *pool)
259 {
260 fs_fs_data_t *ffd = fs->fsap_data;
261 const char *temp_dir;
262 int max_revs = MIN(ffd->max_files_per_dir, max_items);
263
264 SVN_ERR_ASSERT(ffd->format >= SVN_FS_FS__MIN_LOG_ADDRESSING_FORMAT);
265 SVN_ERR_ASSERT(shard_rev % ffd->max_files_per_dir == 0);
266
267 /* where we will place our various temp files */
268 SVN_ERR(svn_io_temp_dir(&temp_dir, pool));
269
270 /* store parameters */
271 context->fs = fs;
272 context->cancel_func = cancel_func;
273 context->cancel_baton = cancel_baton;
274
275 context->shard_rev = shard_rev;
276 context->start_rev = shard_rev;
277 context->end_rev = shard_rev;
278 context->shard_end_rev = shard_rev + ffd->max_files_per_dir;
279
280 /* the pool used for temp structures */
281 context->info_pool = svn_pool_create(pool);
282 context->paths = svn_prefix_tree__create(context->info_pool);
283
284 context->flush_to_disk = flush_to_disk;
285
286 /* Create the new directory and pack file. */
287 context->shard_dir = shard_dir;
288 context->pack_file_dir = pack_file_dir;
289 context->pack_file_path
290 = svn_dirent_join(pack_file_dir, PATH_PACKED, pool);
291 SVN_ERR(svn_io_file_open(&context->pack_file, context->pack_file_path,
292 APR_WRITE | APR_BUFFERED | APR_BINARY | APR_EXCL
293 | APR_CREATE, APR_OS_DEFAULT, pool));
294
295 /* Proto index files */
296 SVN_ERR(svn_fs_fs__l2p_proto_index_open(
297 &context->proto_l2p_index,
298 svn_dirent_join(pack_file_dir,
299 PATH_INDEX PATH_EXT_L2P_INDEX,
300 pool),
301 pool));
302 SVN_ERR(svn_fs_fs__p2l_proto_index_open(
303 &context->proto_p2l_index,
304 svn_dirent_join(pack_file_dir,
305 PATH_INDEX PATH_EXT_P2L_INDEX,
306 pool),
307 pool));
308
309 /* item buckets: one item info array and one temp file per bucket */
310 context->changes = apr_array_make(pool, max_items,
311 sizeof(svn_fs_fs__p2l_entry_t *));
312 SVN_ERR(svn_io_open_unique_file3(&context->changes_file, NULL, temp_dir,
313 svn_io_file_del_on_close,
314 context->info_pool, pool));
315 context->file_props = apr_array_make(pool, max_items,
316 sizeof(svn_fs_fs__p2l_entry_t *));
317 SVN_ERR(svn_io_open_unique_file3(&context->file_props_file, NULL, temp_dir,
318 svn_io_file_del_on_close,
319 context->info_pool, pool));
320 context->dir_props = apr_array_make(pool, max_items,
321 sizeof(svn_fs_fs__p2l_entry_t *));
322 SVN_ERR(svn_io_open_unique_file3(&context->dir_props_file, NULL, temp_dir,
323 svn_io_file_del_on_close,
324 context->info_pool, pool));
325
326 /* noderev and representation item bucket */
327 context->rev_offsets = apr_array_make(pool, max_revs, sizeof(int));
328 context->path_order = apr_array_make(pool, max_items,
329 sizeof(path_order_t *));
330 context->references = apr_array_make(pool, max_items,
331 sizeof(reference_t *));
332 context->reps = apr_array_make(pool, max_items,
333 sizeof(svn_fs_fs__p2l_entry_t *));
334 SVN_ERR(svn_io_open_unique_file3(&context->reps_file, NULL, temp_dir,
335 svn_io_file_del_on_close, pool, pool));
336
337 return SVN_NO_ERROR;
338 }
339
340 /* Clean up / free all revision range specific data and files in CONTEXT.
341 * Use POOL for temporary allocations.
342 */
343 static svn_error_t *
reset_pack_context(pack_context_t * context,apr_pool_t * pool)344 reset_pack_context(pack_context_t *context,
345 apr_pool_t *pool)
346 {
347 const char *temp_dir;
348
349 apr_array_clear(context->changes);
350 SVN_ERR(svn_io_file_close(context->changes_file, pool));
351 apr_array_clear(context->file_props);
352 SVN_ERR(svn_io_file_close(context->file_props_file, pool));
353 apr_array_clear(context->dir_props);
354 SVN_ERR(svn_io_file_close(context->dir_props_file, pool));
355
356 apr_array_clear(context->rev_offsets);
357 apr_array_clear(context->path_order);
358 apr_array_clear(context->references);
359 apr_array_clear(context->reps);
360 SVN_ERR(svn_io_file_close(context->reps_file, pool));
361
362 svn_pool_clear(context->info_pool);
363
364 /* The new temporary files must live at least as long as any other info
365 * object in CONTEXT. */
366 SVN_ERR(svn_io_temp_dir(&temp_dir, pool));
367 SVN_ERR(svn_io_open_unique_file3(&context->changes_file, NULL, temp_dir,
368 svn_io_file_del_on_close,
369 context->info_pool, pool));
370 SVN_ERR(svn_io_open_unique_file3(&context->file_props_file, NULL, temp_dir,
371 svn_io_file_del_on_close,
372 context->info_pool, pool));
373 SVN_ERR(svn_io_open_unique_file3(&context->dir_props_file, NULL, temp_dir,
374 svn_io_file_del_on_close,
375 context->info_pool, pool));
376 SVN_ERR(svn_io_open_unique_file3(&context->reps_file, NULL, temp_dir,
377 svn_io_file_del_on_close,
378 context->info_pool, pool));
379 context->paths = svn_prefix_tree__create(context->info_pool);
380
381 return SVN_NO_ERROR;
382 }
383
384 /* Call this after the last revision range. It will finalize all index files
385 * for CONTEXT and close any open files. Use POOL for temporary allocations.
386 */
387 static svn_error_t *
close_pack_context(pack_context_t * context,apr_pool_t * pool)388 close_pack_context(pack_context_t *context,
389 apr_pool_t *pool)
390 {
391 const char *proto_l2p_index_path;
392 const char *proto_p2l_index_path;
393
394 /* need the file names for the actual index creation call further down */
395 SVN_ERR(svn_io_file_name_get(&proto_l2p_index_path,
396 context->proto_l2p_index, pool));
397 SVN_ERR(svn_io_file_name_get(&proto_p2l_index_path,
398 context->proto_p2l_index, pool));
399
400 /* finalize proto index files */
401 SVN_ERR(svn_io_file_close(context->proto_l2p_index, pool));
402 SVN_ERR(svn_io_file_close(context->proto_p2l_index, pool));
403
404 /* Append the actual index data to the pack file. */
405 SVN_ERR(svn_fs_fs__add_index_data(context->fs, context->pack_file,
406 proto_l2p_index_path,
407 proto_p2l_index_path,
408 context->shard_rev,
409 pool));
410
411 /* remove proto index files */
412 SVN_ERR(svn_io_remove_file2(proto_l2p_index_path, FALSE, pool));
413 SVN_ERR(svn_io_remove_file2(proto_p2l_index_path, FALSE, pool));
414
415 /* Ensure that packed file is written to disk.*/
416 if (context->flush_to_disk)
417 SVN_ERR(svn_io_file_flush_to_disk(context->pack_file, pool));
418 SVN_ERR(svn_io_file_close(context->pack_file, pool));
419
420 return SVN_NO_ERROR;
421 }
422
423 /* Efficiently copy SIZE bytes from SOURCE to DEST. Invoke the CANCEL_FUNC
424 * from CONTEXT at regular intervals. Use POOL for allocations.
425 */
426 static svn_error_t *
copy_file_data(pack_context_t * context,apr_file_t * dest,apr_file_t * source,apr_off_t size,apr_pool_t * pool)427 copy_file_data(pack_context_t *context,
428 apr_file_t *dest,
429 apr_file_t *source,
430 apr_off_t size,
431 apr_pool_t *pool)
432 {
433 /* most non-representation items will be small. Minimize the buffer
434 * and infrastructure overhead in that case. */
435 enum { STACK_BUFFER_SIZE = 1024 };
436
437 if (size < STACK_BUFFER_SIZE)
438 {
439 /* copy small data using a fixed-size buffer on stack */
440 char buffer[STACK_BUFFER_SIZE];
441 SVN_ERR(svn_io_file_read_full2(source, buffer, (apr_size_t)size,
442 NULL, NULL, pool));
443 SVN_ERR(svn_io_file_write_full(dest, buffer, (apr_size_t)size,
444 NULL, pool));
445 }
446 else
447 {
448 /* use streaming copies for larger data blocks. That may require
449 * the allocation of larger buffers and we should make sure that
450 * this extra memory is released asap. */
451 fs_fs_data_t *ffd = context->fs->fsap_data;
452 apr_pool_t *copypool = svn_pool_create(pool);
453 char *buffer = apr_palloc(copypool, ffd->block_size);
454
455 while (size)
456 {
457 apr_size_t to_copy = (apr_size_t)(MIN(size, ffd->block_size));
458 if (context->cancel_func)
459 SVN_ERR(context->cancel_func(context->cancel_baton));
460
461 SVN_ERR(svn_io_file_read_full2(source, buffer, to_copy,
462 NULL, NULL, pool));
463 SVN_ERR(svn_io_file_write_full(dest, buffer, to_copy,
464 NULL, pool));
465
466 size -= to_copy;
467 }
468
469 svn_pool_destroy(copypool);
470 }
471
472 return SVN_NO_ERROR;
473 }
474
475 /* Writes SIZE bytes, all 0, to DEST. Uses POOL for allocations.
476 */
477 static svn_error_t *
write_null_bytes(apr_file_t * dest,apr_off_t size,apr_pool_t * pool)478 write_null_bytes(apr_file_t *dest,
479 apr_off_t size,
480 apr_pool_t *pool)
481 {
482 /* Have a collection of high-quality, easy to access NUL bytes handy. */
483 enum { BUFFER_SIZE = 1024 };
484 static const char buffer[BUFFER_SIZE] = { 0 };
485
486 /* copy SIZE of them into the file's buffer */
487 while (size)
488 {
489 apr_size_t to_write = MIN(size, BUFFER_SIZE);
490 SVN_ERR(svn_io_file_write_full(dest, buffer, to_write, NULL, pool));
491 size -= to_write;
492 }
493
494 return SVN_NO_ERROR;
495 }
496
497 /* Copy the "simple" item (changed paths list or property representation)
498 * from the current position in REV_FILE to TEMP_FILE using CONTEXT. Add
499 * a copy of ENTRY to ENTRIES but with an updated offset value that points
500 * to the copy destination in TEMP_FILE. Use POOL for allocations.
501 */
502 static svn_error_t *
copy_item_to_temp(pack_context_t * context,apr_array_header_t * entries,apr_file_t * temp_file,apr_file_t * rev_file,svn_fs_fs__p2l_entry_t * entry,apr_pool_t * pool)503 copy_item_to_temp(pack_context_t *context,
504 apr_array_header_t *entries,
505 apr_file_t *temp_file,
506 apr_file_t *rev_file,
507 svn_fs_fs__p2l_entry_t *entry,
508 apr_pool_t *pool)
509 {
510 svn_fs_fs__p2l_entry_t *new_entry
511 = apr_pmemdup(context->info_pool, entry, sizeof(*entry));
512
513 SVN_ERR(svn_io_file_get_offset(&new_entry->offset, temp_file, pool));
514 APR_ARRAY_PUSH(entries, svn_fs_fs__p2l_entry_t *) = new_entry;
515
516 SVN_ERR(copy_file_data(context, temp_file, rev_file, entry->size, pool));
517
518 return SVN_NO_ERROR;
519 }
520
521 /* Return the offset within CONTEXT->REPS that corresponds to item
522 * ITEM_INDEX in REVISION.
523 */
524 static int
get_item_array_index(pack_context_t * context,svn_revnum_t revision,apr_int64_t item_index)525 get_item_array_index(pack_context_t *context,
526 svn_revnum_t revision,
527 apr_int64_t item_index)
528 {
529 assert(revision >= context->start_rev);
530 return (int)item_index + APR_ARRAY_IDX(context->rev_offsets,
531 revision - context->start_rev,
532 int);
533 }
534
535 /* Write INFO to the correct position in CONTEXT->REP_INFOS. The latter
536 * may need auto-expanding. Overwriting an array element is not allowed.
537 */
538 static void
add_item_rep_mapping(pack_context_t * context,svn_fs_fs__p2l_entry_t * entry)539 add_item_rep_mapping(pack_context_t *context,
540 svn_fs_fs__p2l_entry_t *entry)
541 {
542 int idx;
543
544 /* index of INFO */
545 idx = get_item_array_index(context,
546 entry->item.revision,
547 entry->item.number);
548
549 /* make sure the index exists in the array */
550 while (context->reps->nelts <= idx)
551 APR_ARRAY_PUSH(context->reps, void *) = NULL;
552
553 /* set the element. If there is already an entry, there are probably
554 * two items claiming to be the same -> bail out */
555 assert(!APR_ARRAY_IDX(context->reps, idx, void *));
556 APR_ARRAY_IDX(context->reps, idx, void *) = entry;
557 }
558
559 /* Return the P2L entry from CONTEXT->REPS for the given ID. If there is
560 * none (or not anymore), return NULL. If RESET has been specified, set
561 * the array entry to NULL after returning the entry.
562 */
563 static svn_fs_fs__p2l_entry_t *
get_item(pack_context_t * context,const svn_fs_fs__id_part_t * id,svn_boolean_t reset)564 get_item(pack_context_t *context,
565 const svn_fs_fs__id_part_t *id,
566 svn_boolean_t reset)
567 {
568 svn_fs_fs__p2l_entry_t *result = NULL;
569 if (id->number && id->revision >= context->start_rev)
570 {
571 int idx = get_item_array_index(context, id->revision, id->number);
572 if (context->reps->nelts > idx)
573 {
574 result = APR_ARRAY_IDX(context->reps, idx, void *);
575 if (result && reset)
576 APR_ARRAY_IDX(context->reps, idx, void *) = NULL;
577 }
578 }
579
580 return result;
581 }
582
583 /* Copy representation item identified by ENTRY from the current position
584 * in REV_FILE into CONTEXT->REPS_FILE. Add all tracking into needed by
585 * our placement algorithm to CONTEXT. Use POOL for temporary allocations.
586 */
587 static svn_error_t *
copy_rep_to_temp(pack_context_t * context,apr_file_t * rev_file,svn_fs_fs__p2l_entry_t * entry,apr_pool_t * pool)588 copy_rep_to_temp(pack_context_t *context,
589 apr_file_t *rev_file,
590 svn_fs_fs__p2l_entry_t *entry,
591 apr_pool_t *pool)
592 {
593 svn_fs_fs__rep_header_t *rep_header;
594 svn_stream_t *stream;
595 apr_off_t source_offset = entry->offset;
596
597 /* create a copy of ENTRY, make it point to the copy destination and
598 * store it in CONTEXT */
599 entry = apr_pmemdup(context->info_pool, entry, sizeof(*entry));
600 SVN_ERR(svn_io_file_get_offset(&entry->offset, context->reps_file, pool));
601 add_item_rep_mapping(context, entry);
602
603 /* read & parse the representation header */
604 stream = svn_stream_from_aprfile2(rev_file, TRUE, pool);
605 SVN_ERR(svn_fs_fs__read_rep_header(&rep_header, stream, pool, pool));
606 SVN_ERR(svn_stream_close(stream));
607
608 /* if the representation is a delta against some other rep, link the two */
609 if ( rep_header->type == svn_fs_fs__rep_delta
610 && rep_header->base_revision >= context->start_rev)
611 {
612 reference_t *reference = apr_pcalloc(context->info_pool,
613 sizeof(*reference));
614 reference->from = entry->item;
615 reference->to.revision = rep_header->base_revision;
616 reference->to.number = rep_header->base_item_index;
617 APR_ARRAY_PUSH(context->references, reference_t *) = reference;
618 }
619
620 /* copy the whole rep (including header!) to our temp file */
621 SVN_ERR(svn_io_file_seek(rev_file, APR_SET, &source_offset, pool));
622 SVN_ERR(copy_file_data(context, context->reps_file, rev_file, entry->size,
623 pool));
624
625 return SVN_NO_ERROR;
626 }
627
628 /* Directories first, dirs / files sorted by name in reverse lexical order.
629 * This maximizes the chance of two items being located close to one another
630 * in *all* pack files independent of their change order. It also groups
631 * multi-project repos nicely according to their sub-projects. The reverse
632 * order aspect gives "trunk" preference over "tags" and "branches", so
633 * trunk-related items are more likely to be contiguous.
634 */
635 static int
compare_dir_entries_format7(const svn_sort__item_t * a,const svn_sort__item_t * b)636 compare_dir_entries_format7(const svn_sort__item_t *a,
637 const svn_sort__item_t *b)
638 {
639 const svn_fs_dirent_t *lhs = (const svn_fs_dirent_t *) a->value;
640 const svn_fs_dirent_t *rhs = (const svn_fs_dirent_t *) b->value;
641
642 return strcmp(lhs->name, rhs->name);
643 }
644
645 /* Directories entries sorted by revision (decreasing - to max cache hits)
646 * and offset (increasing - to max benefit from APR file buffering).
647 */
648 static int
compare_dir_entries_format6(const svn_sort__item_t * a,const svn_sort__item_t * b)649 compare_dir_entries_format6(const svn_sort__item_t *a,
650 const svn_sort__item_t *b)
651 {
652 const svn_fs_dirent_t *lhs = (const svn_fs_dirent_t *) a->value;
653 const svn_fs_dirent_t *rhs = (const svn_fs_dirent_t *) b->value;
654
655 const svn_fs_fs__id_part_t *lhs_rev_item
656 = svn_fs_fs__id_rev_item(lhs->id);
657 const svn_fs_fs__id_part_t *rhs_rev_item
658 = svn_fs_fs__id_rev_item(rhs->id);
659
660 /* decreasing ("reverse") order on revs */
661 if (lhs_rev_item->revision != rhs_rev_item->revision)
662 return lhs_rev_item->revision > rhs_rev_item->revision ? -1 : 1;
663
664 /* increasing order on offsets */
665 if (lhs_rev_item->number != rhs_rev_item->number)
666 return lhs_rev_item->number > rhs_rev_item->number ? 1 : -1;
667
668 return 0;
669 }
670
671 apr_array_header_t *
svn_fs_fs__order_dir_entries(svn_fs_t * fs,apr_hash_t * directory,apr_pool_t * result_pool,apr_pool_t * scratch_pool)672 svn_fs_fs__order_dir_entries(svn_fs_t *fs,
673 apr_hash_t *directory,
674 apr_pool_t *result_pool,
675 apr_pool_t *scratch_pool)
676 {
677 apr_array_header_t *ordered
678 = svn_sort__hash(directory,
679 svn_fs_fs__use_log_addressing(fs)
680 ? compare_dir_entries_format7
681 : compare_dir_entries_format6,
682 scratch_pool);
683
684 apr_array_header_t *result
685 = apr_array_make(result_pool, ordered->nelts, sizeof(svn_fs_dirent_t *));
686
687 int i;
688 for (i = 0; i < ordered->nelts; ++i)
689 APR_ARRAY_PUSH(result, svn_fs_dirent_t *)
690 = APR_ARRAY_IDX(ordered, i, svn_sort__item_t).value;
691
692 return result;
693 }
694
695 /* Return a duplicate of the ORIGINAL path and with special sub-strings
696 * (e.g. "trunk") modified in such a way that have a lower lexicographic
697 * value than any other "normal" file name.
698 */
699 static const char *
tweak_path_for_ordering(const char * original,apr_pool_t * pool)700 tweak_path_for_ordering(const char *original,
701 apr_pool_t *pool)
702 {
703 /* We may add further special cases as needed. */
704 enum {SPECIAL_COUNT = 2};
705 static const char *special[SPECIAL_COUNT] = {"trunk", "branch"};
706 char *pos;
707 char *path = apr_pstrdup(pool, original);
708 int i;
709
710 /* Replace the first char of any "special" sub-string we find by
711 * a control char, i.e. '\1' .. '\31'. In the rare event that this
712 * would clash with existing paths, no data will be lost but merely
713 * the node ordering will be sub-optimal.
714 */
715 for (i = 0; i < SPECIAL_COUNT; ++i)
716 for (pos = strstr(path, special[i]);
717 pos;
718 pos = strstr(pos + 1, special[i]))
719 {
720 *pos = (char)(i + '\1');
721 }
722
723 return path;
724 }
725
726 /* Copy node revision item identified by ENTRY from the current position
727 * in REV_FILE into CONTEXT->REPS_FILE. Add all tracking into needed by
728 * our placement algorithm to CONTEXT. Use POOL for temporary allocations.
729 */
730 static svn_error_t *
copy_node_to_temp(pack_context_t * context,svn_fs_fs__revision_file_t * rev_file,svn_fs_fs__p2l_entry_t * entry,apr_pool_t * pool)731 copy_node_to_temp(pack_context_t *context,
732 svn_fs_fs__revision_file_t *rev_file,
733 svn_fs_fs__p2l_entry_t *entry,
734 apr_pool_t *pool)
735 {
736 path_order_t *path_order = apr_pcalloc(context->info_pool,
737 sizeof(*path_order));
738 node_revision_t *noderev;
739 const char *sort_path;
740 apr_off_t source_offset = entry->offset;
741
742 /* read & parse noderev */
743 SVN_ERR(svn_fs_fs__read_noderev(&noderev, rev_file->stream, pool, pool));
744
745 /* create a copy of ENTRY, make it point to the copy destination and
746 * store it in CONTEXT */
747 entry = apr_pmemdup(context->info_pool, entry, sizeof(*entry));
748 SVN_ERR(svn_io_file_get_offset(&entry->offset, context->reps_file,
749 pool));
750 add_item_rep_mapping(context, entry);
751
752 /* copy the noderev to our temp file */
753 SVN_ERR(svn_io_file_seek(rev_file->file, APR_SET, &source_offset, pool));
754 SVN_ERR(copy_file_data(context, context->reps_file, rev_file->file,
755 entry->size, pool));
756
757 /* if the node has a data representation, make that the node's "base".
758 * This will (often) cause the noderev to be placed right in front of
759 * its data representation. */
760
761 if (noderev->data_rep && noderev->data_rep->revision >= context->start_rev)
762 {
763 path_order->rep_id.revision = noderev->data_rep->revision;
764 path_order->rep_id.number = noderev->data_rep->item_index;
765 path_order->expanded_size = noderev->data_rep->expanded_size;
766 }
767
768 /* Sort path is the key used for ordering noderevs and associated reps.
769 * It will not be stored in the final pack file. */
770 sort_path = tweak_path_for_ordering(noderev->created_path, pool);
771 path_order->path = svn_prefix_string__create(context->paths, sort_path);
772 path_order->node_id = *svn_fs_fs__id_node_id(noderev->id);
773 path_order->revision = svn_fs_fs__id_rev(noderev->id);
774 path_order->predecessor_count = noderev->predecessor_count;
775 path_order->noderev_id = *svn_fs_fs__id_rev_item(noderev->id);
776 APR_ARRAY_PUSH(context->path_order, path_order_t *) = path_order;
777
778 return SVN_NO_ERROR;
779 }
780
781 /* implements compare_fn_t. Bring all directories in front of the files
782 and sort descendingly by PATH, NODE_ID and REVISION.
783 */
784 static int
compare_path_order(const path_order_t * const * lhs_p,const path_order_t * const * rhs_p)785 compare_path_order(const path_order_t * const * lhs_p,
786 const path_order_t * const * rhs_p)
787 {
788 const path_order_t * lhs = *lhs_p;
789 const path_order_t * rhs = *rhs_p;
790
791 /* lexicographic order on path and node (i.e. latest first) */
792 int diff = svn_prefix_string__compare(lhs->path, rhs->path);
793 if (diff)
794 return diff;
795
796 /* reverse order on node (i.e. latest first) */
797 diff = svn_fs_fs__id_part_compare(&rhs->node_id, &lhs->node_id);
798 if (diff)
799 return diff;
800
801 /* reverse order on revision (i.e. latest first) */
802 if (lhs->revision != rhs->revision)
803 return lhs->revision < rhs->revision ? 1 : -1;
804
805 return 0;
806 }
807
808 /* implements compare_fn_t. Sort ascendingly by FROM, TO.
809 */
810 static int
compare_references(const reference_t * const * lhs_p,const reference_t * const * rhs_p)811 compare_references(const reference_t * const * lhs_p,
812 const reference_t * const * rhs_p)
813 {
814 const reference_t * lhs = *lhs_p;
815 const reference_t * rhs = *rhs_p;
816
817 int diff = svn_fs_fs__id_part_compare(&lhs->from, &rhs->from);
818 return diff ? diff : svn_fs_fs__id_part_compare(&lhs->to, &rhs->to);
819 }
820
821 /* implements compare_fn_t. Assume ascending order by FROM.
822 */
823 static int
compare_ref_to_item(const reference_t * const * lhs_p,const svn_fs_fs__id_part_t * rhs_p)824 compare_ref_to_item(const reference_t * const * lhs_p,
825 const svn_fs_fs__id_part_t * rhs_p)
826 {
827 return svn_fs_fs__id_part_compare(&(*lhs_p)->from, rhs_p);
828 }
829
830 /* Look for the least significant bit set in VALUE and return the smallest
831 * number with the same property, i.e. the largest power of 2 that is a
832 * factor in VALUE. Edge case: roundness(0) := 0 . */
833 static int
roundness(int value)834 roundness(int value)
835 {
836 return value - (value & (value - 1));
837 }
838
839 /* For all paths in first COUNT entries in PATH_ORDER, mark their latest
840 * node as "HEAD". PATH_ORDER must be ordered by path, revision.
841 */
842 static void
classify_nodes(path_order_t ** path_order,int count)843 classify_nodes(path_order_t **path_order,
844 int count)
845 {
846 const svn_prefix_string__t *path;
847 int i;
848
849 /* The logic below would fail for empty ranges. */
850 if (count == 0)
851 return;
852
853 /* All entries are sorted by path, followed by revision.
854 * So, the first index is also HEAD for the first path.
855 */
856 path = path_order[0]->path;
857 path_order[0]->is_head = TRUE;
858
859 /* Since the sorting implicitly groups all entries by path and then sorts
860 * by descending revision within the group, whenever we encounter a new
861 * path, the first entry is "HEAD" for that path.
862 */
863 for (i = 1; i < count; ++i)
864 {
865 /* New path? */
866 if (svn_prefix_string__compare(path, path_order[i]->path))
867 {
868 path = path_order[i]->path;
869 path_order[i]->is_head = TRUE;
870 }
871 }
872 }
873
874 /* Order a range of data collected in CONTEXT such that we can place them
875 * in the desired order. The input is taken from *PATH_ORDER, offsets FIRST
876 * to LAST and then written in the final order to the same range in *TEMP.
877 */
878 static void
sort_reps_range(pack_context_t * context,path_order_t ** path_order,path_order_t ** temp,int first,int last)879 sort_reps_range(pack_context_t *context,
880 path_order_t **path_order,
881 path_order_t **temp,
882 int first,
883 int last)
884 {
885 const svn_prefix_string__t *path;
886 int i, dest;
887 svn_fs_fs__id_part_t rep_id;
888 fs_fs_data_t *ffd = context->fs->fsap_data;
889
890 /* The logic below would fail for empty ranges. */
891 if (first == last)
892 return;
893
894 /* Re-order noderevs like this:
895 *
896 * (1) Most likely to be referenced by future pack files, in path order.
897 * (2) highest revision rep per path + dependency chain
898 * (3) Remaining reps in path, rev order
899 *
900 * We simply pick & chose from the existing path, rev order.
901 */
902 dest = first;
903
904 /* (1) There are two classes of representations that are likely to be
905 * referenced from future shards. These form a "hot zone" of mostly
906 * relevant data, i.e. we try to include as many reps as possible that
907 * are needed for future checkouts while trying to exclude as many as
908 * possible that are likely not needed in future checkouts.
909 *
910 * First, "very round" representations from frequently changing nodes.
911 * That excludes many in-between representations not accessed from HEAD.
912 *
913 * The second class are infrequently changing nodes. Because they are
914 * unlikely to change often in the future, they will remain relevant for
915 * HEAD even over long spans of revisions. They are most likely the only
916 * thing we need from very old pack files.
917 */
918 for (i = first; i < last; ++i)
919 {
920 int round = roundness(path_order[i]->predecessor_count);
921
922 /* Class 1:
923 * Pretty round _and_ a significant stop in the node's delta chain.
924 * This may pick up more than one representation from the same chain
925 * but that's rare and not a problem. Prefer simple checks here.
926 *
927 * The divider of 4 is arbitrary but seems to work well in practice.
928 * Larger values increase the number of items in the "hot zone".
929 * Smaller values make delta chains at HEAD more likely to contain
930 * "cold zone" representations. */
931 svn_boolean_t likely_target
932 = (round >= ffd->max_linear_deltification)
933 && (round >= path_order[i]->predecessor_count / 4);
934
935 /* Class 2:
936 * Anything from short node chains. The default of 16 is generous
937 * but we'd rather include too many than too few nodes here to keep
938 * seeks between different regions of this pack file at a minimum. */
939 svn_boolean_t likely_head
940 = path_order[i]->predecessor_count
941 < ffd->max_linear_deltification;
942
943 /* Pick any node that from either class. */
944 if (likely_target || likely_head)
945 {
946 temp[dest++] = path_order[i];
947 path_order[i] = NULL;
948 }
949 }
950
951 /* (2) For each (remaining) path, pick the nodes along the delta chain
952 * for the highest revision. Due to our ordering, this is the first
953 * node we encounter for any path.
954 *
955 * Most references that don't hit a delta base picked in (1), will
956 * access HEAD of the respective path. Keeping all its dependency chain
957 * in one place turns reconstruction into a linear scan of minimal length.
958 */
959 for (i = first; i < last; ++i)
960 if (path_order[i])
961 {
962 /* This is the first path we still have to handle. */
963 path = path_order[i]->path;
964 rep_id = path_order[i]->rep_id;
965 break;
966 }
967
968 for (i = first; i < last; ++i)
969 if (path_order[i])
970 {
971 /* New path? */
972 if (svn_prefix_string__compare(path, path_order[i]->path))
973 {
974 path = path_order[i]->path;
975 rep_id = path_order[i]->rep_id;
976 }
977
978 /* Pick nodes along the deltification chain. Skip side-branches. */
979 if (svn_fs_fs__id_part_eq(&path_order[i]->rep_id, &rep_id))
980 {
981 reference_t **reference;
982
983 temp[dest++] = path_order[i];
984 path_order[i] = NULL;
985
986 reference = svn_sort__array_lookup(context->references,
987 &rep_id, NULL,
988 (int (*)(const void *, const void *))compare_ref_to_item);
989 if (reference)
990 rep_id = (*reference)->to;
991 }
992 }
993
994 /* (3) All remaining nodes in path, rev order. Linear deltification
995 * makes HEAD delta chains from (2) cover all or most of their deltas
996 * in a given pack file. So, this is just a few remnants that we put
997 * at the end of the pack file.
998 */
999 for (i = first; i < last; ++i)
1000 if (path_order[i])
1001 temp[dest++] = path_order[i];
1002
1003 /* We now know the final ordering. */
1004 assert(dest == last);
1005 }
1006
1007 /* Order the data collected in CONTEXT such that we can place them in the
1008 * desired order.
1009 */
1010 static void
sort_reps(pack_context_t * context)1011 sort_reps(pack_context_t *context)
1012 {
1013 apr_pool_t *temp_pool;
1014 path_order_t **temp, **path_order;
1015 int i, count;
1016
1017 /* We will later assume that there is at least one node / path.
1018 */
1019 if (context->path_order->nelts == 0)
1020 {
1021 assert(context->references->nelts == 0);
1022 return;
1023 }
1024
1025 /* Sort containers by path and IDs, respectively.
1026 */
1027 svn_sort__array(context->path_order,
1028 (int (*)(const void *, const void *))compare_path_order);
1029 svn_sort__array(context->references,
1030 (int (*)(const void *, const void *))compare_references);
1031
1032 /* Directories are already in front; sort directories section and files
1033 * section separately but use the same heuristics (see sub-function).
1034 */
1035 temp_pool = svn_pool_create(context->info_pool);
1036 count = context->path_order->nelts;
1037 temp = apr_pcalloc(temp_pool, count * sizeof(*temp));
1038 path_order = (void *)context->path_order->elts;
1039
1040 /* Mark nodes depending on what other nodes exist for the same path etc. */
1041 classify_nodes(path_order, count);
1042
1043 /* Rearrange those sub-sections separately. */
1044 sort_reps_range(context, path_order, temp, 0, count);
1045
1046 /* We now know the final ordering. */
1047 for (i = 0; i < count; ++i)
1048 path_order[i] = temp[i];
1049
1050 svn_pool_destroy(temp_pool);
1051 }
1052
1053 /* implements compare_fn_t. Place LHS before RHS, if the latter is older.
1054 */
1055 static int
compare_p2l_info(const svn_fs_fs__p2l_entry_t * const * lhs,const svn_fs_fs__p2l_entry_t * const * rhs)1056 compare_p2l_info(const svn_fs_fs__p2l_entry_t * const * lhs,
1057 const svn_fs_fs__p2l_entry_t * const * rhs)
1058 {
1059 assert(*lhs != *rhs);
1060
1061 if ((*lhs)->item.revision == (*rhs)->item.revision)
1062 return (*lhs)->item.number > (*rhs)->item.number ? -1 : 1;
1063
1064 return (*lhs)->item.revision > (*rhs)->item.revision ? -1 : 1;
1065 }
1066
1067 /* Sort svn_fs_fs__p2l_entry_t * array ENTRIES by age. Place the latest
1068 * items first.
1069 */
1070 static void
sort_items(apr_array_header_t * entries)1071 sort_items(apr_array_header_t *entries)
1072 {
1073 svn_sort__array(entries,
1074 (int (*)(const void *, const void *))compare_p2l_info);
1075 }
1076
1077 /* Return the remaining unused bytes in the current block in CONTEXT's
1078 * pack file.
1079 */
1080 static apr_off_t
get_block_left(pack_context_t * context)1081 get_block_left(pack_context_t *context)
1082 {
1083 fs_fs_data_t *ffd = context->fs->fsap_data;
1084 return ffd->block_size - (context->pack_offset % ffd->block_size);
1085 }
1086
1087 /* To prevent items from overlapping a block boundary, we will usually
1088 * put them into the next block and top up the old one with NUL bytes.
1089 * Pad CONTEXT's pack file to the end of the current block, if TO_ADD does
1090 * not fit into the current block and the padding is short enough.
1091 * Use POOL for allocations.
1092 */
1093 static svn_error_t *
auto_pad_block(pack_context_t * context,apr_off_t to_add,apr_pool_t * pool)1094 auto_pad_block(pack_context_t *context,
1095 apr_off_t to_add,
1096 apr_pool_t *pool)
1097 {
1098 fs_fs_data_t *ffd = context->fs->fsap_data;
1099
1100 /* This is the maximum number of bytes "wasted" that way per block.
1101 * Larger items will cross the block boundaries. */
1102 const apr_off_t max_padding = MAX(ffd->block_size / 50, 512);
1103
1104 /* Is wasted space small enough to align the current item to the next
1105 * block? */
1106 apr_off_t padding = get_block_left(context);
1107
1108 if (padding < to_add && padding < max_padding)
1109 {
1110 /* Yes. To up with NUL bytes and don't forget to create
1111 * an P2L index entry marking this section as unused. */
1112 svn_fs_fs__p2l_entry_t null_entry;
1113
1114 null_entry.offset = context->pack_offset;
1115 null_entry.size = padding;
1116 null_entry.type = SVN_FS_FS__ITEM_TYPE_UNUSED;
1117 null_entry.item.revision = SVN_INVALID_REVNUM;
1118 null_entry.item.number = SVN_FS_FS__ITEM_INDEX_UNUSED;
1119 null_entry.fnv1_checksum = 0;
1120
1121 SVN_ERR(write_null_bytes(context->pack_file, padding, pool));
1122 SVN_ERR(svn_fs_fs__p2l_proto_index_add_entry(
1123 context->proto_p2l_index, &null_entry, pool));
1124 context->pack_offset += padding;
1125 }
1126
1127 return SVN_NO_ERROR;
1128 }
1129
1130 /* Read the contents of ITEM, if not empty, from TEMP_FILE and write it
1131 * to CONTEXT->PACK_FILE. Use POOL for allocations.
1132 */
1133 static svn_error_t *
store_item(pack_context_t * context,apr_file_t * temp_file,svn_fs_fs__p2l_entry_t * item,apr_pool_t * pool)1134 store_item(pack_context_t *context,
1135 apr_file_t *temp_file,
1136 svn_fs_fs__p2l_entry_t *item,
1137 apr_pool_t *pool)
1138 {
1139 apr_off_t safety_margin;
1140
1141 /* skip empty entries */
1142 if (item->type == SVN_FS_FS__ITEM_TYPE_UNUSED)
1143 return SVN_NO_ERROR;
1144
1145 /* If the next item does not fit into the current block, auto-pad it.
1146 Take special care of textual noderevs since their parsers may
1147 prefetch up to 80 bytes and we don't want them to cross block
1148 boundaries. */
1149 safety_margin = item->type == SVN_FS_FS__ITEM_TYPE_NODEREV
1150 ? SVN__LINE_CHUNK_SIZE
1151 : 0;
1152 SVN_ERR(auto_pad_block(context, item->size + safety_margin, pool));
1153
1154 /* select the item in the source file and copy it into the target
1155 * pack file */
1156 SVN_ERR(svn_io_file_seek(temp_file, APR_SET, &item->offset, pool));
1157 SVN_ERR(copy_file_data(context, context->pack_file, temp_file,
1158 item->size, pool));
1159
1160 /* write index entry and update current position */
1161 item->offset = context->pack_offset;
1162 context->pack_offset += item->size;
1163
1164 SVN_ERR(svn_fs_fs__p2l_proto_index_add_entry(context->proto_p2l_index,
1165 item, pool));
1166
1167 APR_ARRAY_PUSH(context->reps, svn_fs_fs__p2l_entry_t *) = item;
1168
1169 return SVN_NO_ERROR;
1170 }
1171
1172 /* Read the contents of the non-empty items in ITEMS from TEMP_FILE and
1173 * write them to CONTEXT->PACK_FILE. Use POOL for allocations.
1174 */
1175 static svn_error_t *
store_items(pack_context_t * context,apr_file_t * temp_file,apr_array_header_t * items,apr_pool_t * pool)1176 store_items(pack_context_t *context,
1177 apr_file_t *temp_file,
1178 apr_array_header_t *items,
1179 apr_pool_t *pool)
1180 {
1181 int i;
1182 apr_pool_t *iterpool = svn_pool_create(pool);
1183
1184 /* copy all items in strict order */
1185 for (i = 0; i < items->nelts; ++i)
1186 {
1187 svn_pool_clear(iterpool);
1188 SVN_ERR(store_item(context, temp_file,
1189 APR_ARRAY_IDX(items, i, svn_fs_fs__p2l_entry_t *),
1190 iterpool));
1191 }
1192
1193 svn_pool_destroy(iterpool);
1194
1195 return SVN_NO_ERROR;
1196 }
1197
1198 /* Copy (append) the items identified by svn_fs_fs__p2l_entry_t * elements
1199 * in ENTRIES strictly in order from TEMP_FILE into CONTEXT->PACK_FILE.
1200 * Use POOL for temporary allocations.
1201 */
1202 static svn_error_t *
copy_reps_from_temp(pack_context_t * context,apr_file_t * temp_file,apr_pool_t * pool)1203 copy_reps_from_temp(pack_context_t *context,
1204 apr_file_t *temp_file,
1205 apr_pool_t *pool)
1206 {
1207 apr_pool_t *iterpool = svn_pool_create(pool);
1208 apr_array_header_t *path_order = context->path_order;
1209 int i;
1210
1211 /* copy items in path order. Exclude the non-HEAD noderevs. */
1212 for (i = 0; i < path_order->nelts; ++i)
1213 {
1214 path_order_t *current_path;
1215 svn_fs_fs__p2l_entry_t *node_part;
1216 svn_fs_fs__p2l_entry_t *rep_part;
1217
1218 svn_pool_clear(iterpool);
1219
1220 current_path = APR_ARRAY_IDX(path_order, i, path_order_t *);
1221 if (current_path->is_head)
1222 {
1223 node_part = get_item(context, ¤t_path->noderev_id, TRUE);
1224 if (node_part)
1225 SVN_ERR(store_item(context, temp_file, node_part, iterpool));
1226 }
1227
1228 rep_part = get_item(context, ¤t_path->rep_id, TRUE);
1229 if (rep_part)
1230 SVN_ERR(store_item(context, temp_file, rep_part, iterpool));
1231 }
1232
1233 /* copy the remaining non-head noderevs. */
1234 for (i = 0; i < path_order->nelts; ++i)
1235 {
1236 path_order_t *current_path;
1237 svn_fs_fs__p2l_entry_t *node_part;
1238
1239 svn_pool_clear(iterpool);
1240
1241 current_path = APR_ARRAY_IDX(path_order, i, path_order_t *);
1242 node_part = get_item(context, ¤t_path->noderev_id, TRUE);
1243 if (node_part)
1244 SVN_ERR(store_item(context, temp_file, node_part, iterpool));
1245 }
1246
1247 svn_pool_destroy(iterpool);
1248
1249 return SVN_NO_ERROR;
1250 }
1251
1252 /* implements compare_fn_t. Place LHS before RHS, if the latter belongs to
1253 * a newer revision.
1254 */
1255 static int
compare_p2l_info_rev(const svn_fs_fs__p2l_entry_t * const * lhs_p,const svn_fs_fs__p2l_entry_t * const * rhs_p)1256 compare_p2l_info_rev(const svn_fs_fs__p2l_entry_t * const * lhs_p,
1257 const svn_fs_fs__p2l_entry_t * const * rhs_p)
1258 {
1259 const svn_fs_fs__p2l_entry_t * lhs = *lhs_p;
1260 const svn_fs_fs__p2l_entry_t * rhs = *rhs_p;
1261
1262 if (lhs->item.revision == rhs->item.revision)
1263 return 0;
1264
1265 return lhs->item.revision < rhs->item.revision ? -1 : 1;
1266 }
1267
1268 /* Write the log-to-phys proto index file for CONTEXT and use POOL for
1269 * temporary allocations. All items in all buckets must have been placed
1270 * by now.
1271 */
1272 static svn_error_t *
write_l2p_index(pack_context_t * context,apr_pool_t * pool)1273 write_l2p_index(pack_context_t *context,
1274 apr_pool_t *pool)
1275 {
1276 apr_pool_t *iterpool = svn_pool_create(pool);
1277 svn_revnum_t prev_rev = SVN_INVALID_REVNUM;
1278 int i, dest;
1279
1280 /* eliminate empty entries from CONTEXT->REPS */
1281 for (i = 0, dest = 0; i < context->reps->nelts; ++i)
1282 {
1283 svn_fs_fs__p2l_entry_t *entry
1284 = APR_ARRAY_IDX(context->reps, i, svn_fs_fs__p2l_entry_t *);
1285 if (entry)
1286 APR_ARRAY_IDX(context->reps, dest++, svn_fs_fs__p2l_entry_t *)
1287 = entry;
1288 }
1289 context->reps->nelts = dest;
1290
1291 /* we need to write the l2p index revision by revision */
1292 svn_sort__array(context->reps,
1293 (int (*)(const void *, const void *))compare_p2l_info_rev);
1294
1295 /* write index entries */
1296 for (i = 0; i < context->reps->nelts; ++i)
1297 {
1298 svn_fs_fs__p2l_entry_t *p2l_entry
1299 = APR_ARRAY_IDX(context->reps, i, svn_fs_fs__p2l_entry_t *);
1300 if (p2l_entry == NULL)
1301 continue;
1302
1303 /* next revision? */
1304 if (prev_rev != p2l_entry->item.revision)
1305 {
1306 prev_rev = p2l_entry->item.revision;
1307 SVN_ERR(svn_fs_fs__l2p_proto_index_add_revision(
1308 context->proto_l2p_index, iterpool));
1309 }
1310
1311 /* add entry */
1312 SVN_ERR(svn_fs_fs__l2p_proto_index_add_entry(context->proto_l2p_index,
1313 p2l_entry->offset,
1314 p2l_entry->item.number,
1315 iterpool));
1316
1317 /* keep memory usage in check */
1318 if (i % 256 == 0)
1319 svn_pool_clear(iterpool);
1320 }
1321
1322 svn_pool_destroy(iterpool);
1323
1324 return SVN_NO_ERROR;
1325 }
1326
1327 /* Pack the current revision range of CONTEXT, i.e. this covers phases 2
1328 * to 4. Use POOL for allocations.
1329 */
1330 static svn_error_t *
pack_range(pack_context_t * context,apr_pool_t * pool)1331 pack_range(pack_context_t *context,
1332 apr_pool_t *pool)
1333 {
1334 fs_fs_data_t *ffd = context->fs->fsap_data;
1335 apr_pool_t *revpool = svn_pool_create(pool);
1336 apr_pool_t *iterpool = svn_pool_create(pool);
1337 apr_pool_t *iterpool2 = svn_pool_create(pool);
1338
1339 /* Phase 2: Copy items into various buckets and build tracking info */
1340 svn_revnum_t revision;
1341 for (revision = context->start_rev; revision < context->end_rev; ++revision)
1342 {
1343 apr_off_t offset = 0;
1344 svn_fs_fs__revision_file_t *rev_file;
1345
1346 svn_pool_clear(revpool);
1347
1348 /* Get the rev file dimensions (mainly index locations). */
1349 SVN_ERR(svn_fs_fs__open_pack_or_rev_file(&rev_file, context->fs,
1350 revision, revpool, iterpool));
1351 SVN_ERR(svn_fs_fs__auto_read_footer(rev_file));
1352
1353 /* store the indirect array index */
1354 APR_ARRAY_PUSH(context->rev_offsets, int) = context->reps->nelts;
1355
1356 /* read the phys-to-log index file until we covered the whole rev file.
1357 * That index contains enough info to build both target indexes from it. */
1358 while (offset < rev_file->l2p_offset)
1359 {
1360 /* read one cluster */
1361 int i;
1362 apr_array_header_t *entries;
1363
1364 svn_pool_clear(iterpool);
1365
1366 SVN_ERR(svn_fs_fs__p2l_index_lookup(&entries, context->fs,
1367 rev_file, revision, offset,
1368 ffd->p2l_page_size, iterpool,
1369 iterpool));
1370
1371 for (i = 0; i < entries->nelts; ++i)
1372 {
1373 svn_fs_fs__p2l_entry_t *entry
1374 = &APR_ARRAY_IDX(entries, i, svn_fs_fs__p2l_entry_t);
1375
1376 /* skip first entry if that was duplicated due crossing a
1377 cluster boundary */
1378 if (offset > entry->offset)
1379 continue;
1380
1381 svn_pool_clear(iterpool2);
1382
1383 /* process entry while inside the rev file */
1384 offset = entry->offset;
1385 if (offset < rev_file->l2p_offset)
1386 {
1387 SVN_ERR(svn_io_file_seek(rev_file->file, APR_SET, &offset,
1388 iterpool2));
1389
1390 if (entry->type == SVN_FS_FS__ITEM_TYPE_CHANGES)
1391 SVN_ERR(copy_item_to_temp(context,
1392 context->changes,
1393 context->changes_file,
1394 rev_file->file, entry,
1395 iterpool2));
1396 else if (entry->type == SVN_FS_FS__ITEM_TYPE_FILE_PROPS)
1397 SVN_ERR(copy_item_to_temp(context,
1398 context->file_props,
1399 context->file_props_file,
1400 rev_file->file, entry,
1401 iterpool2));
1402 else if (entry->type == SVN_FS_FS__ITEM_TYPE_DIR_PROPS)
1403 SVN_ERR(copy_item_to_temp(context,
1404 context->dir_props,
1405 context->dir_props_file,
1406 rev_file->file, entry,
1407 iterpool2));
1408 else if ( entry->type == SVN_FS_FS__ITEM_TYPE_FILE_REP
1409 || entry->type == SVN_FS_FS__ITEM_TYPE_DIR_REP)
1410 SVN_ERR(copy_rep_to_temp(context, rev_file->file, entry,
1411 iterpool2));
1412 else if (entry->type == SVN_FS_FS__ITEM_TYPE_NODEREV)
1413 SVN_ERR(copy_node_to_temp(context, rev_file, entry,
1414 iterpool2));
1415 else
1416 SVN_ERR_ASSERT(entry->type == SVN_FS_FS__ITEM_TYPE_UNUSED);
1417
1418 offset += entry->size;
1419 }
1420 }
1421
1422 if (context->cancel_func)
1423 SVN_ERR(context->cancel_func(context->cancel_baton));
1424 }
1425 }
1426
1427 svn_pool_destroy(iterpool2);
1428 svn_pool_destroy(iterpool);
1429
1430 /* phase 3: placement.
1431 * Use "newest first" placement for simple items. */
1432 sort_items(context->changes);
1433 sort_items(context->file_props);
1434 sort_items(context->dir_props);
1435
1436 /* follow dependencies recursively for noderevs and data representations */
1437 sort_reps(context);
1438
1439 /* phase 4: copy bucket data to pack file. Write P2L index. */
1440 SVN_ERR(store_items(context, context->changes_file, context->changes,
1441 revpool));
1442 svn_pool_clear(revpool);
1443 SVN_ERR(store_items(context, context->file_props_file, context->file_props,
1444 revpool));
1445 svn_pool_clear(revpool);
1446 SVN_ERR(store_items(context, context->dir_props_file, context->dir_props,
1447 revpool));
1448 svn_pool_clear(revpool);
1449 SVN_ERR(copy_reps_from_temp(context, context->reps_file, revpool));
1450 svn_pool_clear(revpool);
1451
1452 /* write L2P index as well (now that we know all target offsets) */
1453 SVN_ERR(write_l2p_index(context, revpool));
1454
1455 svn_pool_destroy(revpool);
1456
1457 return SVN_NO_ERROR;
1458 }
1459
1460 /* Append CONTEXT->START_REV to the context's pack file with no re-ordering.
1461 * This function will only be used for very large revisions (>>100k changes).
1462 * Use POOL for temporary allocations.
1463 */
1464 static svn_error_t *
append_revision(pack_context_t * context,apr_pool_t * pool)1465 append_revision(pack_context_t *context,
1466 apr_pool_t *pool)
1467 {
1468 fs_fs_data_t *ffd = context->fs->fsap_data;
1469 apr_off_t offset = 0;
1470 apr_pool_t *iterpool = svn_pool_create(pool);
1471 svn_fs_fs__revision_file_t *rev_file;
1472 svn_filesize_t revdata_size;
1473
1474 /* Copy all non-index contents the rev file to the end of the pack file. */
1475 SVN_ERR(svn_fs_fs__open_pack_or_rev_file(&rev_file, context->fs,
1476 context->start_rev, pool,
1477 iterpool));
1478 SVN_ERR(svn_fs_fs__auto_read_footer(rev_file));
1479 revdata_size = rev_file->l2p_offset;
1480
1481 SVN_ERR(svn_io_file_aligned_seek(rev_file->file, ffd->block_size, NULL, 0,
1482 iterpool));
1483 SVN_ERR(copy_file_data(context, context->pack_file, rev_file->file,
1484 revdata_size, iterpool));
1485
1486 /* mark the start of a new revision */
1487 SVN_ERR(svn_fs_fs__l2p_proto_index_add_revision(context->proto_l2p_index,
1488 pool));
1489
1490 /* read the phys-to-log index file until we covered the whole rev file.
1491 * That index contains enough info to build both target indexes from it. */
1492 while (offset < revdata_size)
1493 {
1494 /* read one cluster */
1495 int i;
1496 apr_array_header_t *entries;
1497
1498 svn_pool_clear(iterpool);
1499 SVN_ERR(svn_fs_fs__p2l_index_lookup(&entries, context->fs, rev_file,
1500 context->start_rev, offset,
1501 ffd->p2l_page_size, iterpool,
1502 iterpool));
1503
1504 for (i = 0; i < entries->nelts; ++i)
1505 {
1506 svn_fs_fs__p2l_entry_t *entry
1507 = &APR_ARRAY_IDX(entries, i, svn_fs_fs__p2l_entry_t);
1508
1509 /* skip first entry if that was duplicated due crossing a
1510 cluster boundary */
1511 if (offset > entry->offset)
1512 continue;
1513
1514 /* process entry while inside the rev file */
1515 offset = entry->offset;
1516 if (offset < revdata_size)
1517 {
1518 entry->offset += context->pack_offset;
1519 offset += entry->size;
1520 SVN_ERR(svn_fs_fs__l2p_proto_index_add_entry(
1521 context->proto_l2p_index, entry->offset,
1522 entry->item.number, iterpool));
1523 SVN_ERR(svn_fs_fs__p2l_proto_index_add_entry(
1524 context->proto_p2l_index, entry, iterpool));
1525 }
1526 }
1527 }
1528
1529 svn_pool_destroy(iterpool);
1530 context->pack_offset += revdata_size;
1531
1532 SVN_ERR(svn_fs_fs__close_revision_file(rev_file));
1533
1534 return SVN_NO_ERROR;
1535 }
1536
1537 /* Logical addressing mode packing logic.
1538 *
1539 * Pack the revision shard starting at SHARD_REV in filesystem FS from
1540 * SHARD_DIR into the PACK_FILE_DIR, using POOL for allocations. Limit
1541 * the extra memory consumption to MAX_MEM bytes. If FLUSH_TO_DISK is
1542 * non-zero, do not return until the data has actually been written on
1543 * the disk. CANCEL_FUNC and CANCEL_BATON are what you think they are.
1544 */
1545 static svn_error_t *
pack_log_addressed(svn_fs_t * fs,const char * pack_file_dir,const char * shard_dir,svn_revnum_t shard_rev,apr_size_t max_mem,svn_boolean_t flush_to_disk,svn_cancel_func_t cancel_func,void * cancel_baton,apr_pool_t * pool)1546 pack_log_addressed(svn_fs_t *fs,
1547 const char *pack_file_dir,
1548 const char *shard_dir,
1549 svn_revnum_t shard_rev,
1550 apr_size_t max_mem,
1551 svn_boolean_t flush_to_disk,
1552 svn_cancel_func_t cancel_func,
1553 void *cancel_baton,
1554 apr_pool_t *pool)
1555 {
1556 enum
1557 {
1558 /* estimated amount of memory used to represent one item in memory
1559 * during rev file packing */
1560 PER_ITEM_MEM = APR_ALIGN_DEFAULT(sizeof(path_order_t))
1561 + APR_ALIGN_DEFAULT(2 *sizeof(void*))
1562 + APR_ALIGN_DEFAULT(sizeof(reference_t))
1563 + APR_ALIGN_DEFAULT(sizeof(svn_fs_fs__p2l_entry_t))
1564 + 6 * sizeof(void*)
1565 };
1566
1567 int max_items;
1568 apr_array_header_t *max_ids;
1569 pack_context_t context = { 0 };
1570 int i;
1571 apr_size_t item_count = 0;
1572 apr_pool_t *iterpool = svn_pool_create(pool);
1573
1574 /* Prevent integer overflow. We use apr arrays to process the items so
1575 * the maximum number of items is INT_MAX. */
1576 {
1577 apr_size_t temp = max_mem / PER_ITEM_MEM;
1578 SVN_ERR_ASSERT(temp <= INT_MAX);
1579 max_items = (int)temp;
1580 }
1581
1582 /* set up a pack context */
1583 SVN_ERR(initialize_pack_context(&context, fs, pack_file_dir, shard_dir,
1584 shard_rev, max_items, flush_to_disk,
1585 cancel_func, cancel_baton, pool));
1586
1587 /* phase 1: determine the size of the revisions to pack */
1588 SVN_ERR(svn_fs_fs__l2p_get_max_ids(&max_ids, fs, shard_rev,
1589 context.shard_end_rev - shard_rev,
1590 pool, pool));
1591
1592 /* pack revisions in ranges that don't exceed MAX_MEM */
1593 for (i = 0; i < max_ids->nelts; ++i)
1594 if ( APR_ARRAY_IDX(max_ids, i, apr_uint64_t)
1595 <= (apr_uint64_t)max_items - item_count)
1596 {
1597 item_count += APR_ARRAY_IDX(max_ids, i, apr_uint64_t);
1598 context.end_rev++;
1599 }
1600 else
1601 {
1602 svn_pool_clear(iterpool);
1603
1604 /* some unpacked revisions before this one? */
1605 if (context.start_rev < context.end_rev)
1606 {
1607 /* pack them intelligently (might be just 1 rev but still ...) */
1608 SVN_ERR(pack_range(&context, iterpool));
1609 SVN_ERR(reset_pack_context(&context, iterpool));
1610 item_count = 0;
1611 }
1612
1613 /* next revision range is to start with the current revision */
1614 context.start_rev = i + context.shard_rev;
1615 context.end_rev = context.start_rev + 1;
1616
1617 /* if this is a very large revision, we must place it as is */
1618 if (APR_ARRAY_IDX(max_ids, i, apr_uint64_t) > max_items)
1619 {
1620 SVN_ERR(append_revision(&context, iterpool));
1621 context.start_rev++;
1622 }
1623 else
1624 item_count += (apr_size_t)APR_ARRAY_IDX(max_ids, i, apr_uint64_t);
1625 }
1626
1627 /* non-empty revision range at the end? */
1628 if (context.start_rev < context.end_rev)
1629 SVN_ERR(pack_range(&context, iterpool));
1630
1631 /* last phase: finalize indexes and clean up */
1632 SVN_ERR(reset_pack_context(&context, iterpool));
1633 SVN_ERR(close_pack_context(&context, iterpool));
1634 svn_pool_destroy(iterpool);
1635
1636 return SVN_NO_ERROR;
1637 }
1638
1639 /* Given REV in FS, set *REV_OFFSET to REV's offset in the packed file.
1640 Use POOL for temporary allocations. */
1641 svn_error_t *
svn_fs_fs__get_packed_offset(apr_off_t * rev_offset,svn_fs_t * fs,svn_revnum_t rev,apr_pool_t * pool)1642 svn_fs_fs__get_packed_offset(apr_off_t *rev_offset,
1643 svn_fs_t *fs,
1644 svn_revnum_t rev,
1645 apr_pool_t *pool)
1646 {
1647 fs_fs_data_t *ffd = fs->fsap_data;
1648 svn_stream_t *manifest_stream;
1649 svn_boolean_t is_cached;
1650 svn_revnum_t shard;
1651 apr_int64_t shard_pos;
1652 apr_array_header_t *manifest;
1653 apr_pool_t *iterpool;
1654
1655 shard = rev / ffd->max_files_per_dir;
1656
1657 /* position of the shard within the manifest */
1658 shard_pos = rev % ffd->max_files_per_dir;
1659
1660 /* fetch exactly that element into *rev_offset, if the manifest is found
1661 in the cache */
1662 SVN_ERR(svn_cache__get_partial((void **) rev_offset, &is_cached,
1663 ffd->packed_offset_cache, &shard,
1664 svn_fs_fs__get_sharded_offset, &shard_pos,
1665 pool));
1666
1667 if (is_cached)
1668 return SVN_NO_ERROR;
1669
1670 /* Open the manifest file. */
1671 SVN_ERR(svn_stream_open_readonly(&manifest_stream,
1672 svn_fs_fs__path_rev_packed(fs, rev,
1673 PATH_MANIFEST,
1674 pool),
1675 pool, pool));
1676
1677 /* While we're here, let's just read the entire manifest file into an array,
1678 so we can cache the entire thing. */
1679 iterpool = svn_pool_create(pool);
1680 manifest = apr_array_make(pool, ffd->max_files_per_dir, sizeof(apr_off_t));
1681 while (1)
1682 {
1683 svn_boolean_t eof;
1684 apr_int64_t val;
1685
1686 svn_pool_clear(iterpool);
1687 SVN_ERR(svn_fs_fs__read_number_from_stream(&val, &eof, manifest_stream,
1688 iterpool));
1689 if (eof)
1690 break;
1691
1692 APR_ARRAY_PUSH(manifest, apr_off_t) = (apr_off_t)val;
1693 }
1694 svn_pool_destroy(iterpool);
1695
1696 *rev_offset = APR_ARRAY_IDX(manifest, rev % ffd->max_files_per_dir,
1697 apr_off_t);
1698
1699 /* Close up shop and cache the array. */
1700 SVN_ERR(svn_stream_close(manifest_stream));
1701 return svn_cache__set(ffd->packed_offset_cache, &shard, manifest, pool);
1702 }
1703
1704 /* Packing logic for physical addresssing mode:
1705 * Simply concatenate all revision contents.
1706 *
1707 * Pack the revision shard starting at SHARD_REV containing exactly
1708 * MAX_FILES_PER_DIR revisions from SHARD_PATH into the PACK_FILE_DIR,
1709 * using POOL for allocations. If FLUSH_TO_DISK is non-zero, do not
1710 * return until the data has actually been written on the disk.
1711 * CANCEL_FUNC and CANCEL_BATON are what you think they are.
1712 */
1713 static svn_error_t *
pack_phys_addressed(const char * pack_file_dir,const char * shard_path,svn_revnum_t start_rev,int max_files_per_dir,svn_boolean_t flush_to_disk,svn_cancel_func_t cancel_func,void * cancel_baton,apr_pool_t * pool)1714 pack_phys_addressed(const char *pack_file_dir,
1715 const char *shard_path,
1716 svn_revnum_t start_rev,
1717 int max_files_per_dir,
1718 svn_boolean_t flush_to_disk,
1719 svn_cancel_func_t cancel_func,
1720 void *cancel_baton,
1721 apr_pool_t *pool)
1722 {
1723 const char *pack_file_path, *manifest_file_path;
1724 apr_file_t *pack_file;
1725 apr_file_t *manifest_file;
1726 svn_stream_t *manifest_stream;
1727 svn_revnum_t end_rev, rev;
1728 apr_pool_t *iterpool;
1729
1730 /* Some useful paths. */
1731 pack_file_path = svn_dirent_join(pack_file_dir, PATH_PACKED, pool);
1732 manifest_file_path = svn_dirent_join(pack_file_dir, PATH_MANIFEST, pool);
1733
1734 /* Create the new directory and pack file.
1735 * Use unbuffered apr_file_t since we're going to write using 16kb
1736 * chunks. */
1737 SVN_ERR(svn_io_file_open(&pack_file, pack_file_path,
1738 APR_WRITE | APR_CREATE | APR_EXCL,
1739 APR_OS_DEFAULT, pool));
1740
1741 /* Create the manifest file. */
1742 SVN_ERR(svn_io_file_open(&manifest_file, manifest_file_path,
1743 APR_WRITE | APR_BUFFERED | APR_CREATE | APR_EXCL,
1744 APR_OS_DEFAULT, pool));
1745 manifest_stream = svn_stream_from_aprfile2(manifest_file, TRUE, pool);
1746
1747 end_rev = start_rev + max_files_per_dir - 1;
1748 iterpool = svn_pool_create(pool);
1749
1750 /* Iterate over the revisions in this shard, squashing them together. */
1751 for (rev = start_rev; rev <= end_rev; rev++)
1752 {
1753 svn_stream_t *rev_stream;
1754 const char *path;
1755 apr_off_t offset;
1756 apr_file_t *rev_file;
1757
1758 svn_pool_clear(iterpool);
1759
1760 path = svn_dirent_join(shard_path, apr_psprintf(iterpool, "%ld", rev),
1761 iterpool);
1762
1763 /* Obtain current offset in pack file. */
1764 SVN_ERR(svn_io_file_get_offset(&offset, pack_file, iterpool));
1765
1766 /* build manifest */
1767 SVN_ERR(svn_stream_printf(manifest_stream, iterpool,
1768 "%" APR_OFF_T_FMT "\n", offset));
1769
1770 /* Copy all the bits from the rev file to the end of the pack file.
1771 * Use unbuffered apr_file_t since we're going to write using 16kb
1772 * chunks. */
1773 SVN_ERR(svn_io_file_open(&rev_file, path, APR_READ, APR_OS_DEFAULT,
1774 iterpool));
1775 rev_stream = svn_stream_from_aprfile2(rev_file, FALSE, iterpool);
1776 SVN_ERR(svn_stream_copy3(rev_stream,
1777 svn_stream_from_aprfile2(pack_file, TRUE,
1778 iterpool),
1779 cancel_func, cancel_baton, iterpool));
1780 }
1781
1782 /* Close stream over APR file. */
1783 SVN_ERR(svn_stream_close(manifest_stream));
1784
1785 /* Ensure that pack file is written to disk. */
1786 if (flush_to_disk)
1787 SVN_ERR(svn_io_file_flush_to_disk(manifest_file, pool));
1788 SVN_ERR(svn_io_file_close(manifest_file, pool));
1789
1790 /* disallow write access to the manifest file */
1791 SVN_ERR(svn_io_set_file_read_only(manifest_file_path, FALSE, iterpool));
1792
1793 /* Ensure that pack file is written to disk. */
1794 if (flush_to_disk)
1795 SVN_ERR(svn_io_file_flush_to_disk(pack_file, pool));
1796 SVN_ERR(svn_io_file_close(pack_file, pool));
1797
1798 svn_pool_destroy(iterpool);
1799
1800 return SVN_NO_ERROR;
1801 }
1802
1803 /* In filesystem FS, pack the revision SHARD containing exactly
1804 * MAX_FILES_PER_DIR revisions from SHARD_PATH into the PACK_FILE_DIR,
1805 * using POOL for allocations. Try to limit the amount of temporary
1806 * memory needed to MAX_MEM bytes. If FLUSH_TO_DISK is non-zero, do
1807 * not return until the data has actually been written on the disk.
1808 * CANCEL_FUNC and CANCEL_BATON are what you think they are.
1809 *
1810 * If for some reason we detect a partial packing already performed, we
1811 * remove the pack file and start again.
1812 *
1813 * The actual packing will be done in a format-specific sub-function.
1814 */
1815 static svn_error_t *
pack_rev_shard(svn_fs_t * fs,const char * pack_file_dir,const char * shard_path,apr_int64_t shard,int max_files_per_dir,apr_size_t max_mem,svn_boolean_t flush_to_disk,svn_cancel_func_t cancel_func,void * cancel_baton,apr_pool_t * pool)1816 pack_rev_shard(svn_fs_t *fs,
1817 const char *pack_file_dir,
1818 const char *shard_path,
1819 apr_int64_t shard,
1820 int max_files_per_dir,
1821 apr_size_t max_mem,
1822 svn_boolean_t flush_to_disk,
1823 svn_cancel_func_t cancel_func,
1824 void *cancel_baton,
1825 apr_pool_t *pool)
1826 {
1827 const char *pack_file_path;
1828 svn_revnum_t shard_rev = (svn_revnum_t) (shard * max_files_per_dir);
1829
1830 /* Some useful paths. */
1831 pack_file_path = svn_dirent_join(pack_file_dir, PATH_PACKED, pool);
1832
1833 /* Remove any existing pack file for this shard, since it is incomplete. */
1834 SVN_ERR(svn_io_remove_dir2(pack_file_dir, TRUE, cancel_func, cancel_baton,
1835 pool));
1836
1837 /* Create the new directory and pack file. */
1838 SVN_ERR(svn_io_dir_make(pack_file_dir, APR_OS_DEFAULT, pool));
1839
1840 /* Index information files */
1841 if (svn_fs_fs__use_log_addressing(fs))
1842 SVN_ERR(pack_log_addressed(fs, pack_file_dir, shard_path,
1843 shard_rev, max_mem, flush_to_disk,
1844 cancel_func, cancel_baton, pool));
1845 else
1846 SVN_ERR(pack_phys_addressed(pack_file_dir, shard_path, shard_rev,
1847 max_files_per_dir, flush_to_disk,
1848 cancel_func, cancel_baton, pool));
1849
1850 SVN_ERR(svn_io_copy_perms(shard_path, pack_file_dir, pool));
1851 SVN_ERR(svn_io_set_file_read_only(pack_file_path, FALSE, pool));
1852
1853 return SVN_NO_ERROR;
1854 }
1855
1856 /* Baton struct used by pack_body(), pack_shard() and synced_pack_shard().
1857 These calls are nested and for every level additional fields will be
1858 available. */
1859 struct pack_baton
1860 {
1861 /* Valid when entering pack_body(). */
1862 svn_fs_t *fs;
1863 svn_fs_pack_notify_t notify_func;
1864 void *notify_baton;
1865 svn_cancel_func_t cancel_func;
1866 void *cancel_baton;
1867 size_t max_mem;
1868
1869 /* Additional entries valid when entering pack_shard(). */
1870 const char *revs_dir;
1871 const char *revsprops_dir;
1872 apr_int64_t shard;
1873
1874 /* Additional entries valid when entering synced_pack_shard(). */
1875 const char *rev_shard_path;
1876 };
1877
1878
1879 /* Part of the pack process that requires global (write) synchronization.
1880 * We pack the revision properties of the shard described by BATON and
1881 * In the file system at FS_PATH, pack the SHARD in REVS_DIR and replace
1882 * the non-packed revprop & rev shard folder(s) with the packed ones.
1883 * The packed rev folder has been created prior to calling this function.
1884 */
1885 static svn_error_t *
synced_pack_shard(void * baton,apr_pool_t * pool)1886 synced_pack_shard(void *baton,
1887 apr_pool_t *pool)
1888 {
1889 struct pack_baton *pb = baton;
1890 fs_fs_data_t *ffd = pb->fs->fsap_data;
1891 const char *revprops_shard_path, *revprops_pack_file_dir;
1892
1893 /* if enabled, pack the revprops in an equivalent way */
1894 if (pb->revsprops_dir)
1895 {
1896 apr_int64_t pack_size_limit = 0.9 * ffd->revprop_pack_size;
1897
1898 revprops_pack_file_dir = svn_dirent_join(pb->revsprops_dir,
1899 apr_psprintf(pool,
1900 "%" APR_INT64_T_FMT PATH_EXT_PACKED_SHARD,
1901 pb->shard),
1902 pool);
1903 revprops_shard_path = svn_dirent_join(pb->revsprops_dir,
1904 apr_psprintf(pool, "%" APR_INT64_T_FMT, pb->shard),
1905 pool);
1906
1907 SVN_ERR(svn_fs_fs__pack_revprops_shard(revprops_pack_file_dir,
1908 revprops_shard_path,
1909 pb->shard,
1910 ffd->max_files_per_dir,
1911 pack_size_limit,
1912 ffd->compress_packed_revprops
1913 ? SVN__COMPRESSION_ZLIB_DEFAULT
1914 : SVN__COMPRESSION_NONE,
1915 ffd->flush_to_disk,
1916 pb->cancel_func,
1917 pb->cancel_baton,
1918 pool));
1919 }
1920
1921 /* Update the min-unpacked-rev file to reflect our newly packed shard. */
1922 SVN_ERR(svn_fs_fs__write_min_unpacked_rev(pb->fs,
1923 (svn_revnum_t)((pb->shard + 1) * ffd->max_files_per_dir),
1924 pool));
1925 ffd->min_unpacked_rev
1926 = (svn_revnum_t)((pb->shard + 1) * ffd->max_files_per_dir);
1927
1928 /* Finally, remove the existing shard directories.
1929 * For revprops, clean up older obsolete shards as well as they might
1930 * have been left over from an interrupted FS upgrade. */
1931 SVN_ERR(svn_io_remove_dir2(pb->rev_shard_path, TRUE,
1932 pb->cancel_func, pb->cancel_baton, pool));
1933 if (pb->revsprops_dir)
1934 {
1935 svn_node_kind_t kind = svn_node_dir;
1936 apr_int64_t to_cleanup = pb->shard;
1937 do
1938 {
1939 SVN_ERR(svn_fs_fs__delete_revprops_shard(revprops_shard_path,
1940 to_cleanup,
1941 ffd->max_files_per_dir,
1942 pb->cancel_func,
1943 pb->cancel_baton,
1944 pool));
1945
1946 /* If the previous shard exists, clean it up as well.
1947 Don't try to clean up shard 0 as it we can't tell quickly
1948 whether it actually needs cleaning up. */
1949 revprops_shard_path = svn_dirent_join(pb->revsprops_dir,
1950 apr_psprintf(pool, "%" APR_INT64_T_FMT, --to_cleanup),
1951 pool);
1952 SVN_ERR(svn_io_check_path(revprops_shard_path, &kind, pool));
1953 }
1954 while (kind == svn_node_dir && to_cleanup > 0);
1955 }
1956
1957 return SVN_NO_ERROR;
1958 }
1959
1960 /* Pack the shard described by BATON.
1961 *
1962 * If for some reason we detect a partial packing already performed,
1963 * we remove the pack file and start again.
1964 */
1965 static svn_error_t *
pack_shard(struct pack_baton * baton,apr_pool_t * pool)1966 pack_shard(struct pack_baton *baton,
1967 apr_pool_t *pool)
1968 {
1969 fs_fs_data_t *ffd = baton->fs->fsap_data;
1970 const char *rev_pack_file_dir;
1971
1972 /* Notify caller we're starting to pack this shard. */
1973 if (baton->notify_func)
1974 SVN_ERR(baton->notify_func(baton->notify_baton, baton->shard,
1975 svn_fs_pack_notify_start, pool));
1976
1977 /* Some useful paths. */
1978 rev_pack_file_dir = svn_dirent_join(baton->revs_dir,
1979 apr_psprintf(pool,
1980 "%" APR_INT64_T_FMT PATH_EXT_PACKED_SHARD,
1981 baton->shard),
1982 pool);
1983 baton->rev_shard_path = svn_dirent_join(baton->revs_dir,
1984 apr_psprintf(pool,
1985 "%" APR_INT64_T_FMT,
1986 baton->shard),
1987 pool);
1988
1989 /* pack the revision content */
1990 SVN_ERR(pack_rev_shard(baton->fs, rev_pack_file_dir, baton->rev_shard_path,
1991 baton->shard, ffd->max_files_per_dir,
1992 baton->max_mem, ffd->flush_to_disk,
1993 baton->cancel_func, baton->cancel_baton, pool));
1994
1995 /* For newer repo formats, we only acquired the pack lock so far.
1996 Before modifying the repo state by switching over to the packed
1997 data, we need to acquire the global (write) lock. */
1998 if (ffd->format >= SVN_FS_FS__MIN_PACK_LOCK_FORMAT)
1999 SVN_ERR(svn_fs_fs__with_write_lock(baton->fs, synced_pack_shard, baton,
2000 pool));
2001 else
2002 SVN_ERR(synced_pack_shard(baton, pool));
2003
2004 /* Notify caller we're starting to pack this shard. */
2005 if (baton->notify_func)
2006 SVN_ERR(baton->notify_func(baton->notify_baton, baton->shard,
2007 svn_fs_pack_notify_end, pool));
2008
2009 return SVN_NO_ERROR;
2010 }
2011
2012 /* Read the youngest rev and the first non-packed rev info for FS from disk.
2013 Set *FULLY_PACKED when there is no completed unpacked shard.
2014 Use SCRATCH_POOL for temporary allocations.
2015 */
2016 static svn_error_t *
get_pack_status(svn_boolean_t * fully_packed,svn_fs_t * fs,apr_pool_t * scratch_pool)2017 get_pack_status(svn_boolean_t *fully_packed,
2018 svn_fs_t *fs,
2019 apr_pool_t *scratch_pool)
2020 {
2021 fs_fs_data_t *ffd = fs->fsap_data;
2022 apr_int64_t completed_shards;
2023 svn_revnum_t youngest;
2024
2025 SVN_ERR(svn_fs_fs__read_min_unpacked_rev(&ffd->min_unpacked_rev, fs,
2026 scratch_pool));
2027
2028 SVN_ERR(svn_fs_fs__youngest_rev(&youngest, fs, scratch_pool));
2029 completed_shards = (youngest + 1) / ffd->max_files_per_dir;
2030
2031 /* See if we've already completed all possible shards thus far. */
2032 if (ffd->min_unpacked_rev == (completed_shards * ffd->max_files_per_dir))
2033 *fully_packed = TRUE;
2034 else
2035 *fully_packed = FALSE;
2036
2037 return SVN_NO_ERROR;
2038 }
2039
2040 /* The work-horse for svn_fs_fs__pack, called with the FS write lock.
2041 This implements the svn_fs_fs__with_write_lock() 'body' callback
2042 type. BATON is a 'struct pack_baton *'.
2043
2044 WARNING: if you add a call to this function, please note:
2045 The code currently assumes that any piece of code running with
2046 the write-lock set can rely on the ffd->min_unpacked_rev and
2047 ffd->min_unpacked_revprop caches to be up-to-date (and, by
2048 extension, on not having to use a retry when calling
2049 svn_fs_fs__path_rev_absolute() and friends). If you add a call
2050 to this function, consider whether you have to call
2051 svn_fs_fs__update_min_unpacked_rev().
2052 See this thread: http://thread.gmane.org/1291206765.3782.3309.camel@edith
2053 */
2054 static svn_error_t *
pack_body(void * baton,apr_pool_t * pool)2055 pack_body(void *baton,
2056 apr_pool_t *pool)
2057 {
2058 struct pack_baton *pb = baton;
2059 fs_fs_data_t *ffd = pb->fs->fsap_data;
2060 apr_int64_t completed_shards;
2061 apr_pool_t *iterpool;
2062 svn_boolean_t fully_packed;
2063
2064 /* Since another process might have already packed the repo,
2065 we need to re-read the pack status. */
2066 SVN_ERR(get_pack_status(&fully_packed, pb->fs, pool));
2067 if (fully_packed)
2068 {
2069 if (pb->notify_func)
2070 SVN_ERR(pb->notify_func(pb->notify_baton,
2071 ffd->min_unpacked_rev / ffd->max_files_per_dir,
2072 svn_fs_pack_notify_noop, pool));
2073
2074 return SVN_NO_ERROR;
2075 }
2076
2077 completed_shards = (ffd->youngest_rev_cache + 1) / ffd->max_files_per_dir;
2078 pb->revs_dir = svn_dirent_join(pb->fs->path, PATH_REVS_DIR, pool);
2079 if (ffd->format >= SVN_FS_FS__MIN_PACKED_REVPROP_FORMAT)
2080 pb->revsprops_dir = svn_dirent_join(pb->fs->path, PATH_REVPROPS_DIR,
2081 pool);
2082
2083 iterpool = svn_pool_create(pool);
2084 for (pb->shard = ffd->min_unpacked_rev / ffd->max_files_per_dir;
2085 pb->shard < completed_shards;
2086 pb->shard++)
2087 {
2088 svn_pool_clear(iterpool);
2089
2090 if (pb->cancel_func)
2091 SVN_ERR(pb->cancel_func(pb->cancel_baton));
2092
2093 SVN_ERR(pack_shard(pb, iterpool));
2094 }
2095
2096 svn_pool_destroy(iterpool);
2097 return SVN_NO_ERROR;
2098 }
2099
2100 svn_error_t *
svn_fs_fs__pack(svn_fs_t * fs,apr_size_t max_mem,svn_fs_pack_notify_t notify_func,void * notify_baton,svn_cancel_func_t cancel_func,void * cancel_baton,apr_pool_t * pool)2101 svn_fs_fs__pack(svn_fs_t *fs,
2102 apr_size_t max_mem,
2103 svn_fs_pack_notify_t notify_func,
2104 void *notify_baton,
2105 svn_cancel_func_t cancel_func,
2106 void *cancel_baton,
2107 apr_pool_t *pool)
2108 {
2109 struct pack_baton pb = { 0 };
2110 fs_fs_data_t *ffd = fs->fsap_data;
2111 svn_error_t *err;
2112 svn_boolean_t fully_packed;
2113
2114 /* If the repository isn't a new enough format, we don't support packing.
2115 Return a friendly error to that effect. */
2116 if (ffd->format < SVN_FS_FS__MIN_PACKED_FORMAT)
2117 return svn_error_createf(SVN_ERR_UNSUPPORTED_FEATURE, NULL,
2118 _("FSFS format (%d) too old to pack; please upgrade the filesystem."),
2119 ffd->format);
2120
2121 /* If we aren't using sharding, we can't do any packing, so quit. */
2122 if (!ffd->max_files_per_dir)
2123 {
2124 if (notify_func)
2125 SVN_ERR(notify_func(notify_baton, -1, svn_fs_pack_notify_noop, pool));
2126
2127 return SVN_NO_ERROR;
2128 }
2129
2130 /* Is there we even anything to do?. */
2131 SVN_ERR(get_pack_status(&fully_packed, fs, pool));
2132 if (fully_packed)
2133 {
2134 if (notify_func)
2135 SVN_ERR(notify_func(notify_baton,
2136 ffd->min_unpacked_rev / ffd->max_files_per_dir,
2137 svn_fs_pack_notify_noop, pool));
2138
2139 return SVN_NO_ERROR;
2140 }
2141
2142 /* Lock the repo and start the pack process. */
2143 pb.fs = fs;
2144 pb.notify_func = notify_func;
2145 pb.notify_baton = notify_baton;
2146 pb.cancel_func = cancel_func;
2147 pb.cancel_baton = cancel_baton;
2148 pb.max_mem = max_mem ? max_mem : DEFAULT_MAX_MEM;
2149
2150 if (ffd->format >= SVN_FS_FS__MIN_PACK_LOCK_FORMAT)
2151 {
2152 /* Newer repositories provide a pack operation specific lock.
2153 Acquire it to prevent concurrent packs.
2154
2155 Since the file lock's lifetime is bound to a pool, we create a
2156 separate subpool here to release the lock immediately after the
2157 operation finished.
2158 */
2159 err = svn_fs_fs__with_pack_lock(fs, pack_body, &pb, pool);
2160 }
2161 else
2162 {
2163 /* Use the global write lock for older repos. */
2164 err = svn_fs_fs__with_write_lock(fs, pack_body, &pb, pool);
2165 }
2166
2167 return svn_error_trace(err);
2168 }
2169