1 /* revprops.c --- everything needed to handle revprops in FSX
2 *
3 * ====================================================================
4 * Licensed to the Apache Software Foundation (ASF) under one
5 * or more contributor license agreements. See the NOTICE file
6 * distributed with this work for additional information
7 * regarding copyright ownership. The ASF licenses this file
8 * to you under the Apache License, Version 2.0 (the
9 * "License"); you may not use this file except in compliance
10 * with the License. You may obtain a copy of the License at
11 *
12 * http://www.apache.org/licenses/LICENSE-2.0
13 *
14 * Unless required by applicable law or agreed to in writing,
15 * software distributed under the License is distributed on an
16 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
17 * KIND, either express or implied. See the License for the
18 * specific language governing permissions and limitations
19 * under the License.
20 * ====================================================================
21 */
22
23 #include <assert.h>
24 #include <apr_md5.h>
25
26 #include "svn_pools.h"
27 #include "svn_hash.h"
28 #include "svn_dirent_uri.h"
29 #include "svn_sorts.h"
30
31 #include "fs_x.h"
32 #include "low_level.h"
33 #include "revprops.h"
34 #include "util.h"
35 #include "transaction.h"
36
37 #include "private/svn_packed_data.h"
38 #include "private/svn_sorts_private.h"
39 #include "private/svn_subr_private.h"
40 #include "private/svn_string_private.h"
41 #include "../libsvn_fs/fs-loader.h"
42
43 #include "svn_private_config.h"
44
45 /* Give writing processes 10 seconds to replace an existing revprop
46 file with a new one. After that time, we assume that the writing
47 process got aborted and that we have re-read revprops. */
48 #define REVPROP_CHANGE_TIMEOUT (10 * 1000000)
49
50 /* In case of an inconsistent read, close the generation file, yield,
51 re-open and re-read. This is the number of times we try this before
52 giving up. */
53 #define GENERATION_READ_RETRY_COUNT 100
54
55
56 /* Revprop caching management.
57 *
58 * Mechanism:
59 * ----------
60 *
61 * Revprop caching needs to be activated and will be deactivated for the
62 * respective FS instance if the necessary infrastructure could not be
63 * initialized. As long as no revprops are being read or changed, revprop
64 * caching imposes no overhead.
65 *
66 * When activated, we cache revprops using (revision, generation) pairs
67 * as keys with the generation being incremented upon every revprop change.
68 * Since the cache is process-local, the generation needs to be tracked
69 * for at least as long as the process lives but may be reset afterwards.
70 * We track the revprop generation in a file that.
71 *
72 * A race condition exists between switching to the modified revprop data
73 * and bumping the generation number. In particular, the process may crash
74 * just after switching to the new revprop data and before bumping the
75 * generation. To be able to detect this scenario, we bump the generation
76 * twice per revprop change: once immediately before (creating an odd number)
77 * and once after the atomic switch (even generation).
78 *
79 * A writer holding the write lock can immediately assume a crashed writer
80 * in case of an odd generation or they would not have been able to acquire
81 * the lock. A reader detecting an odd generation will use that number and
82 * be forced to re-read any revprop data - usually getting the new revprops
83 * already. If the generation file modification timestamp is too old, the
84 * reader will assume a crashed writer, acquire the write lock and bump
85 * the generation if it is still odd. So, for about REVPROP_CHANGE_TIMEOUT
86 * after the crash, reader caches may be stale.
87 */
88
89 /* Read revprop generation as stored on disk for repository FS. The result is
90 * returned in *CURRENT. Call only for repos that support revprop caching.
91 */
92 static svn_error_t *
read_revprop_generation_file(apr_int64_t * current,svn_fs_t * fs,apr_pool_t * scratch_pool)93 read_revprop_generation_file(apr_int64_t *current,
94 svn_fs_t *fs,
95 apr_pool_t *scratch_pool)
96 {
97 apr_pool_t *iterpool = svn_pool_create(scratch_pool);
98 int i;
99 svn_error_t *err = SVN_NO_ERROR;
100 const char *path = svn_fs_x__path_revprop_generation(fs, scratch_pool);
101
102 /* Retry in case of incomplete file buffer updates. */
103 for (i = 0; i < GENERATION_READ_RETRY_COUNT; ++i)
104 {
105 svn_stringbuf_t *buf;
106
107 svn_error_clear(err);
108 svn_pool_clear(iterpool);
109
110 /* Read the generation file. */
111 err = svn_stringbuf_from_file2(&buf, path, iterpool);
112
113 /* If we could read the file, it should be complete due to our atomic
114 * file replacement scheme. */
115 if (!err)
116 {
117 svn_stringbuf_strip_whitespace(buf);
118 SVN_ERR(svn_cstring_atoi64(current, buf->data));
119 break;
120 }
121
122 /* Got unlucky the file was not available. Retry. */
123 #if APR_HAS_THREADS
124 apr_thread_yield();
125 #else
126 apr_sleep(0);
127 #endif
128 }
129
130 svn_pool_destroy(iterpool);
131
132 /* If we had to give up, propagate the error. */
133 return svn_error_trace(err);
134 }
135
136 /* Write the CURRENT revprop generation to disk for repository FS.
137 * Call only for repos that support revprop caching.
138 */
139 static svn_error_t *
write_revprop_generation_file(svn_fs_t * fs,apr_int64_t current,apr_pool_t * scratch_pool)140 write_revprop_generation_file(svn_fs_t *fs,
141 apr_int64_t current,
142 apr_pool_t *scratch_pool)
143 {
144 svn_fs_x__data_t *ffd = fs->fsap_data;
145 svn_stringbuf_t *buffer;
146 const char *path = svn_fs_x__path_revprop_generation(fs, scratch_pool);
147
148 /* Invalidate our cached revprop generation in case the file operations
149 * below fail. */
150 ffd->revprop_generation = -1;
151
152 /* Write the new number. */
153 buffer = svn_stringbuf_createf(scratch_pool, "%" APR_INT64_T_FMT "\n",
154 current);
155 SVN_ERR(svn_io_write_atomic2(path, buffer->data, buffer->len,
156 path /* copy_perms */, FALSE,
157 scratch_pool));
158
159 /* Remember it to spare us the re-read. */
160 ffd->revprop_generation = current;
161
162 return SVN_NO_ERROR;
163 }
164
165 svn_error_t *
svn_fs_x__reset_revprop_generation_file(svn_fs_t * fs,apr_pool_t * scratch_pool)166 svn_fs_x__reset_revprop_generation_file(svn_fs_t *fs,
167 apr_pool_t *scratch_pool)
168 {
169 /* Write the initial revprop generation file contents. */
170 SVN_ERR(write_revprop_generation_file(fs, 0, scratch_pool));
171
172 return SVN_NO_ERROR;
173 }
174
175 /* Test whether revprop cache and necessary infrastructure are
176 available in FS. */
177 static svn_boolean_t
has_revprop_cache(svn_fs_t * fs,apr_pool_t * scratch_pool)178 has_revprop_cache(svn_fs_t *fs,
179 apr_pool_t *scratch_pool)
180 {
181 svn_fs_x__data_t *ffd = fs->fsap_data;
182
183 /* is the cache enabled? */
184 return ffd->revprop_cache != NULL;
185 }
186
187 /* Baton structure for revprop_generation_fixup. */
188 typedef struct revprop_generation_fixup_t
189 {
190 /* revprop generation to read */
191 apr_int64_t *generation;
192
193 /* file system context */
194 svn_fs_t *fs;
195 } revprop_generation_upgrade_t;
196
197 /* If the revprop generation has an odd value, it means the original writer
198 of the revprop got killed. We don't know whether that process as able
199 to change the revprop data but we assume that it was. Therefore, we
200 increase the generation in that case to basically invalidate everyone's
201 cache content.
202 Execute this only while holding the write lock to the repo in baton->FFD.
203 */
204 static svn_error_t *
revprop_generation_fixup(void * void_baton,apr_pool_t * scratch_pool)205 revprop_generation_fixup(void *void_baton,
206 apr_pool_t *scratch_pool)
207 {
208 revprop_generation_upgrade_t *baton = void_baton;
209 svn_fs_x__data_t *ffd = baton->fs->fsap_data;
210 assert(ffd->has_write_lock);
211
212 /* Maybe, either the original revprop writer or some other reader has
213 already corrected / bumped the revprop generation. Thus, we need
214 to read it again. However, we will now be the only ones changing
215 the file contents due to us holding the write lock. */
216 SVN_ERR(read_revprop_generation_file(baton->generation, baton->fs,
217 scratch_pool));
218
219 /* Cause everyone to re-read revprops upon their next access, if the
220 last revprop write did not complete properly. */
221 if (*baton->generation % 2)
222 {
223 ++*baton->generation;
224 SVN_ERR(write_revprop_generation_file(baton->fs,
225 *baton->generation,
226 scratch_pool));
227 }
228
229 return SVN_NO_ERROR;
230 }
231
232 /* Read the current revprop generation of FS and its value in FS->FSAP_DATA.
233 Also, detect aborted / crashed writers and recover from that. */
234 static svn_error_t *
read_revprop_generation(svn_fs_t * fs,apr_pool_t * scratch_pool)235 read_revprop_generation(svn_fs_t *fs,
236 apr_pool_t *scratch_pool)
237 {
238 apr_int64_t current = 0;
239 svn_fs_x__data_t *ffd = fs->fsap_data;
240
241 /* read the current revprop generation number */
242 SVN_ERR(read_revprop_generation_file(¤t, fs, scratch_pool));
243
244 /* is an unfinished revprop write under the way? */
245 if (current % 2)
246 {
247 svn_boolean_t timeout = FALSE;
248
249 /* Has the writer process been aborted?
250 * Either by timeout or by us being the writer now.
251 */
252 if (!ffd->has_write_lock)
253 {
254 apr_time_t mtime;
255 SVN_ERR(svn_io_file_affected_time(&mtime,
256 svn_fs_x__path_revprop_generation(fs, scratch_pool),
257 scratch_pool));
258 timeout = apr_time_now() > mtime + REVPROP_CHANGE_TIMEOUT;
259 }
260
261 if (ffd->has_write_lock || timeout)
262 {
263 revprop_generation_upgrade_t baton;
264 baton.generation = ¤t;
265 baton.fs = fs;
266
267 /* Ensure that the original writer process no longer exists by
268 * acquiring the write lock to this repository. Then, fix up
269 * the revprop generation.
270 */
271 if (ffd->has_write_lock)
272 SVN_ERR(revprop_generation_fixup(&baton, scratch_pool));
273 else
274 SVN_ERR(svn_fs_x__with_write_lock(fs, revprop_generation_fixup,
275 &baton, scratch_pool));
276 }
277 }
278
279 /* return the value we just got */
280 ffd->revprop_generation = current;
281 return SVN_NO_ERROR;
282 }
283
284 void
svn_fs_x__invalidate_revprop_generation(svn_fs_t * fs)285 svn_fs_x__invalidate_revprop_generation(svn_fs_t *fs)
286 {
287 svn_fs_x__data_t *ffd = fs->fsap_data;
288 ffd->revprop_generation = -1;
289 }
290
291 /* Return TRUE if the revprop generation value in FS->FSAP_DATA is valid. */
292 static svn_boolean_t
is_generation_valid(svn_fs_t * fs)293 is_generation_valid(svn_fs_t *fs)
294 {
295 svn_fs_x__data_t *ffd = fs->fsap_data;
296 return ffd->revprop_generation >= 0;
297 }
298
299 /* Set the revprop generation in FS to the next odd number to indicate
300 that there is a revprop write process under way. Update the value
301 in FS->FSAP_DATA accordingly. If the change times out, readers shall
302 recover from that state & re-read revprops.
303 This is a no-op for repo formats that don't support revprop caching. */
304 static svn_error_t *
begin_revprop_change(svn_fs_t * fs,apr_pool_t * scratch_pool)305 begin_revprop_change(svn_fs_t *fs,
306 apr_pool_t *scratch_pool)
307 {
308 svn_fs_x__data_t *ffd = fs->fsap_data;
309 SVN_ERR_ASSERT(ffd->has_write_lock);
310
311 /* Set the revprop generation to an odd value to indicate
312 * that a write is in progress.
313 */
314 SVN_ERR(read_revprop_generation(fs, scratch_pool));
315 ++ffd->revprop_generation;
316 SVN_ERR_ASSERT(ffd->revprop_generation % 2);
317 SVN_ERR(write_revprop_generation_file(fs, ffd->revprop_generation,
318 scratch_pool));
319
320 return SVN_NO_ERROR;
321 }
322
323 /* Set the revprop generation in FS to the next even generation after
324 the odd value in FS->FSAP_DATA to indicate that
325 a) readers shall re-read revprops, and
326 b) the write process has been completed (no recovery required).
327 This is a no-op for repo formats that don't support revprop caching. */
328 static svn_error_t *
end_revprop_change(svn_fs_t * fs,apr_pool_t * scratch_pool)329 end_revprop_change(svn_fs_t *fs,
330 apr_pool_t *scratch_pool)
331 {
332 svn_fs_x__data_t *ffd = fs->fsap_data;
333 SVN_ERR_ASSERT(ffd->has_write_lock);
334 SVN_ERR_ASSERT(ffd->revprop_generation % 2);
335
336 /* Set the revprop generation to an even value to indicate
337 * that a write has been completed. Since we held the write
338 * lock, nobody else could have updated the file contents.
339 */
340 SVN_ERR(write_revprop_generation_file(fs, ffd->revprop_generation + 1,
341 scratch_pool));
342
343 return SVN_NO_ERROR;
344 }
345
346 /* Represents an entry in the packed revprop manifest.
347 * There is one such entry per pack file. */
348 typedef struct manifest_entry_t
349 {
350 /* First revision in the pack file. */
351 svn_revnum_t start_rev;
352
353 /* Tag (a counter) appended to the file name to distinguish it from
354 outdated ones. */
355 apr_uint64_t tag;
356 } manifest_entry_t;
357
358 /* Container for all data required to access the packed revprop file
359 * for a given REVISION. This structure will be filled incrementally
360 * by read_pack_revprops() its sub-routines.
361 */
362 typedef struct packed_revprops_t
363 {
364 /* revision number to read (not necessarily the first in the pack) */
365 svn_revnum_t revision;
366
367 /* the actual revision properties */
368 apr_hash_t *properties;
369
370 /* their size when serialized to a single string
371 * (as found in PACKED_REVPROPS) */
372 apr_size_t serialized_size;
373
374
375 /* manifest entry describing the pack file */
376 manifest_entry_t entry;
377
378 /* packed shard folder path */
379 const char *folder;
380
381 /* sum of values in SIZES */
382 apr_size_t total_size;
383
384 /* Array of svn_string_t, containing the serialized revprops for
385 * REVISION * I. */
386 apr_array_header_t *revprops;
387
388 /* content of the manifest.
389 * Sorted list of manifest_entry_t. */
390 apr_array_header_t *manifest;
391 } packed_revprops_t;
392
393 /* Parse the serialized revprops in CONTENT and return them in *PROPERTIES.
394 * Also, put them into the revprop cache, if activated, for future use.
395 * Three more parameters are being used to update the revprop cache: FS is
396 * our file system, the revprops belong to REVISION.
397 *
398 * The returned hash will be allocated in RESULT_POOL, SCRATCH_POOL is
399 * being used for temporary allocations.
400 */
401 static svn_error_t *
parse_revprop(apr_hash_t ** properties,svn_fs_t * fs,svn_revnum_t revision,const svn_string_t * content,apr_pool_t * result_pool,apr_pool_t * scratch_pool)402 parse_revprop(apr_hash_t **properties,
403 svn_fs_t *fs,
404 svn_revnum_t revision,
405 const svn_string_t *content,
406 apr_pool_t *result_pool,
407 apr_pool_t *scratch_pool)
408 {
409 SVN_ERR_W(svn_fs_x__parse_properties(properties, content, result_pool),
410 apr_psprintf(scratch_pool, "Failed to parse revprops for r%ld.",
411 revision));
412
413 if (has_revprop_cache(fs, scratch_pool))
414 {
415 svn_fs_x__data_t *ffd = fs->fsap_data;
416 svn_fs_x__pair_cache_key_t key = { 0 };
417
418 SVN_ERR_ASSERT(is_generation_valid(fs));
419
420 key.revision = revision;
421 key.second = ffd->revprop_generation;
422 SVN_ERR(svn_cache__set(ffd->revprop_cache, &key, *properties,
423 scratch_pool));
424 }
425
426 return SVN_NO_ERROR;
427 }
428
429 /* Verify the checksum attached to CONTENT and remove it.
430 * Use SCRATCH_POOL for temporary allocations.
431 */
432 static svn_error_t *
verify_checksum(svn_stringbuf_t * content,apr_pool_t * scratch_pool)433 verify_checksum(svn_stringbuf_t *content,
434 apr_pool_t *scratch_pool)
435 {
436 const apr_byte_t *digest;
437 svn_checksum_t *actual, *expected;
438
439 /* Verify the checksum. */
440 if (content->len < sizeof(apr_uint32_t))
441 return svn_error_create(SVN_ERR_CORRUPT_PACKED_DATA, NULL,
442 "File too short");
443
444 content->len -= sizeof(apr_uint32_t);
445 digest = (apr_byte_t *)content->data + content->len;
446
447 expected = svn_checksum__from_digest_fnv1a_32x4(digest, scratch_pool);
448 SVN_ERR(svn_checksum(&actual, svn_checksum_fnv1a_32x4, content->data,
449 content->len, scratch_pool));
450
451 if (!svn_checksum_match(actual, expected))
452 SVN_ERR(svn_checksum_mismatch_err(expected, actual, scratch_pool,
453 "checksum mismatch"));
454
455 return SVN_NO_ERROR;
456 }
457
458 /* Read the non-packed revprops for revision REV in FS, put them into the
459 * revprop cache if activated and return them in *PROPERTIES.
460 *
461 * If the data could not be read due to an otherwise recoverable error,
462 * leave *PROPERTIES unchanged. No error will be returned in that case.
463 *
464 * Allocate *PROPERTIES in RESULT_POOL and temporaries in SCRATCH_POOL.
465 */
466 static svn_error_t *
read_non_packed_revprop(apr_hash_t ** properties,svn_fs_t * fs,svn_revnum_t rev,apr_pool_t * result_pool,apr_pool_t * scratch_pool)467 read_non_packed_revprop(apr_hash_t **properties,
468 svn_fs_t *fs,
469 svn_revnum_t rev,
470 apr_pool_t *result_pool,
471 apr_pool_t *scratch_pool)
472 {
473 svn_stringbuf_t *content = NULL;
474 apr_pool_t *iterpool = svn_pool_create(scratch_pool);
475 svn_boolean_t missing = FALSE;
476 int i;
477
478 for (i = 0;
479 i < SVN_FS_X__RECOVERABLE_RETRY_COUNT && !missing && !content;
480 ++i)
481 {
482 svn_pool_clear(iterpool);
483 SVN_ERR(svn_fs_x__try_stringbuf_from_file(&content,
484 &missing,
485 svn_fs_x__path_revprops(fs, rev, iterpool),
486 i + 1 < SVN_FS_X__RECOVERABLE_RETRY_COUNT,
487 iterpool));
488 }
489
490 if (content)
491 {
492 svn_string_t *as_string;
493
494 /* Consistency check. */
495 SVN_ERR_W(verify_checksum(content, scratch_pool),
496 apr_psprintf(scratch_pool,
497 "Revprop file for r%ld is corrupt",
498 rev));
499
500 /* The contents string becomes part of the *PROPERTIES structure, i.e.
501 * we must make sure it lives at least as long as the latter. */
502 as_string = svn_string_create_from_buf(content, result_pool);
503 SVN_ERR(parse_revprop(properties, fs, rev, as_string,
504 result_pool, iterpool));
505 }
506
507 svn_pool_clear(iterpool);
508
509 return SVN_NO_ERROR;
510 }
511
512 /* Serialize ROOT into FILE and append a checksum to it.
513 * Use SCRATCH_POOL for temporary allocations.
514 */
515 static svn_error_t *
write_packed_data_checksummed(svn_packed__data_root_t * root,apr_file_t * file,apr_pool_t * scratch_pool)516 write_packed_data_checksummed(svn_packed__data_root_t *root,
517 apr_file_t *file,
518 apr_pool_t *scratch_pool)
519 {
520 svn_checksum_t *checksum;
521 svn_stream_t *stream;
522
523 stream = svn_stream_from_aprfile2(file, TRUE, scratch_pool);
524 stream = svn_checksum__wrap_write_stream(&checksum, stream,
525 svn_checksum_fnv1a_32x4,
526 scratch_pool);
527 SVN_ERR(svn_packed__data_write(stream, root, scratch_pool));
528 SVN_ERR(svn_stream_close(stream));
529
530 /* Append the checksum */
531 SVN_ERR(svn_io_file_write_full(file, checksum->digest,
532 svn_checksum_size(checksum), NULL,
533 scratch_pool));
534
535 return SVN_NO_ERROR;
536 }
537
538 /* Serialize the packed revprops MANIFEST into FILE.
539 * Use SCRATCH_POOL for temporary allocations.
540 */
541 static svn_error_t *
write_manifest(apr_file_t * file,const apr_array_header_t * manifest,apr_pool_t * scratch_pool)542 write_manifest(apr_file_t *file,
543 const apr_array_header_t *manifest,
544 apr_pool_t *scratch_pool)
545 {
546 int i;
547 svn_packed__data_root_t *root = svn_packed__data_create_root(scratch_pool);
548
549 /* one top-level stream per struct element */
550 svn_packed__int_stream_t *start_rev_stream
551 = svn_packed__create_int_stream(root, TRUE, FALSE);
552 svn_packed__int_stream_t *tag_stream
553 = svn_packed__create_int_stream(root, FALSE, FALSE);
554
555 /* serialize ENTRIES */
556 for (i = 0; i < manifest->nelts; ++i)
557 {
558 manifest_entry_t *entry = &APR_ARRAY_IDX(manifest, i, manifest_entry_t);
559 svn_packed__add_uint(start_rev_stream, entry->start_rev);
560 svn_packed__add_uint(tag_stream, entry->tag);
561 }
562
563 /* Write to file and calculate the checksum. */
564 SVN_ERR(write_packed_data_checksummed(root, file, scratch_pool));
565
566 return SVN_NO_ERROR;
567 }
568
569 /* Read *ROOT from CONTENT and verify its checksum. Allocate *ROOT in
570 * RESULT_POOL and use SCRATCH_POOL for temporary allocations.
571 */
572 static svn_error_t *
read_packed_data_checksummed(svn_packed__data_root_t ** root,svn_stringbuf_t * content,apr_pool_t * result_pool,apr_pool_t * scratch_pool)573 read_packed_data_checksummed(svn_packed__data_root_t **root,
574 svn_stringbuf_t *content,
575 apr_pool_t *result_pool,
576 apr_pool_t *scratch_pool)
577 {
578 svn_stream_t *stream;
579
580 SVN_ERR(verify_checksum(content, scratch_pool));
581
582 stream = svn_stream_from_stringbuf(content, scratch_pool);
583 SVN_ERR(svn_packed__data_read(root, stream, result_pool, scratch_pool));
584
585 return SVN_NO_ERROR;
586 }
587
588 /* Read the packed revprops manifest from the CONTENT buffer and return it
589 * in *MANIFEST, allocated in RESULT_POOL. REVISION is the revision number
590 * to put into error messages. Use SCRATCH_POOL for temporary allocations.
591 */
592 static svn_error_t *
read_manifest(apr_array_header_t ** manifest,svn_stringbuf_t * content,svn_revnum_t revision,apr_pool_t * result_pool,apr_pool_t * scratch_pool)593 read_manifest(apr_array_header_t **manifest,
594 svn_stringbuf_t *content,
595 svn_revnum_t revision,
596 apr_pool_t *result_pool,
597 apr_pool_t *scratch_pool)
598 {
599 apr_size_t i;
600 apr_size_t count;
601
602 svn_packed__data_root_t *root;
603 svn_packed__int_stream_t *start_rev_stream;
604 svn_packed__int_stream_t *tag_stream;
605
606 /* Verify the checksum and decode packed data. */
607 SVN_ERR_W(read_packed_data_checksummed(&root, content, result_pool,
608 scratch_pool),
609 apr_psprintf(scratch_pool,
610 "Revprop manifest file for r%ld is corrupt",
611 revision));
612
613 /* get streams */
614 start_rev_stream = svn_packed__first_int_stream(root);
615 tag_stream = svn_packed__next_int_stream(start_rev_stream);
616
617 /* read ids array */
618 count = svn_packed__int_count(start_rev_stream);
619 *manifest = apr_array_make(result_pool, (int)count,
620 sizeof(manifest_entry_t));
621
622 for (i = 0; i < count; ++i)
623 {
624 manifest_entry_t *entry = apr_array_push(*manifest);
625 entry->start_rev = (svn_revnum_t)svn_packed__get_int(start_rev_stream);
626 entry->tag = svn_packed__get_uint(tag_stream);
627 }
628
629 return SVN_NO_ERROR;
630 }
631
632 /* Implements the standard comparison function signature comparing the
633 * manifest_entry_t(lhs).start_rev to svn_revnum_t(rhs). */
634 static int
compare_entry_revision(const void * lhs,const void * rhs)635 compare_entry_revision(const void *lhs,
636 const void *rhs)
637 {
638 const manifest_entry_t *entry = lhs;
639 const svn_revnum_t *revision = rhs;
640
641 if (entry->start_rev < *revision)
642 return -1;
643
644 return entry->start_rev == *revision ? 0 : 1;
645 }
646
647 /* Return the index in MANIFEST that has the info for the pack file
648 * containing REVISION. */
649 static int
get_entry(apr_array_header_t * manifest,svn_revnum_t revision)650 get_entry(apr_array_header_t *manifest,
651 svn_revnum_t revision)
652 {
653 manifest_entry_t *entry;
654 int idx = svn_sort__bsearch_lower_bound(manifest, &revision,
655 compare_entry_revision);
656
657 assert(manifest->nelts > 0);
658 if (idx >= manifest->nelts)
659 return idx - 1;
660
661 entry = &APR_ARRAY_IDX(manifest, idx, manifest_entry_t);
662 if (entry->start_rev > revision && idx > 0)
663 return idx - 1;
664
665 return idx;
666 }
667
668 /* Return the full path of the revprop pack file given by ENTRY within
669 * REVPROPS. Allocate the result in RESULT_POOL. */
670 static const char *
get_revprop_pack_filepath(packed_revprops_t * revprops,manifest_entry_t * entry,apr_pool_t * result_pool)671 get_revprop_pack_filepath(packed_revprops_t *revprops,
672 manifest_entry_t *entry,
673 apr_pool_t *result_pool)
674 {
675 const char *filename = apr_psprintf(result_pool, "%ld.%" APR_UINT64_T_FMT,
676 entry->start_rev, entry->tag);
677 return svn_dirent_join(revprops->folder, filename, result_pool);
678 }
679
680 /* Given FS and REVPROPS->REVISION, fill the FILENAME, FOLDER and MANIFEST
681 * members. Use RESULT_POOL for allocating results and SCRATCH_POOL for
682 * temporaries.
683 */
684 static svn_error_t *
get_revprop_packname(svn_fs_t * fs,packed_revprops_t * revprops,apr_pool_t * result_pool,apr_pool_t * scratch_pool)685 get_revprop_packname(svn_fs_t *fs,
686 packed_revprops_t *revprops,
687 apr_pool_t *result_pool,
688 apr_pool_t *scratch_pool)
689 {
690 svn_fs_x__data_t *ffd = fs->fsap_data;
691 svn_stringbuf_t *content = NULL;
692 const char *manifest_file_path;
693 int idx;
694 svn_revnum_t previous_start_rev;
695 int i;
696
697 /* Determine the dimensions. Rev 0 is excluded from the first shard. */
698 int rev_count = ffd->max_files_per_dir;
699 svn_revnum_t manifest_start
700 = revprops->revision - (revprops->revision % rev_count);
701 if (manifest_start == 0)
702 {
703 ++manifest_start;
704 --rev_count;
705 }
706
707 /* Read the content of the manifest file */
708 revprops->folder = svn_fs_x__path_pack_shard(fs, revprops->revision,
709 result_pool);
710 manifest_file_path = svn_dirent_join(revprops->folder, PATH_MANIFEST,
711 result_pool);
712 SVN_ERR(svn_fs_x__read_content(&content, manifest_file_path, result_pool));
713 SVN_ERR(read_manifest(&revprops->manifest, content, revprops->revision,
714 result_pool, scratch_pool));
715
716 /* Verify the manifest data. */
717 if (revprops->manifest->nelts == 0)
718 return svn_error_createf(SVN_ERR_FS_CORRUPT_REVPROP_MANIFEST, NULL,
719 "Revprop manifest for r%ld is empty",
720 revprops->revision);
721
722 previous_start_rev = 0;
723 for (i = 0; i < revprops->manifest->nelts; ++i)
724 {
725 svn_revnum_t start_rev = APR_ARRAY_IDX(revprops->manifest, i,
726 manifest_entry_t).start_rev;
727 if ( start_rev < manifest_start
728 || start_rev >= manifest_start + rev_count)
729 return svn_error_createf(SVN_ERR_FS_CORRUPT_REVPROP_MANIFEST, NULL,
730 "Revprop manifest for r%ld contains "
731 "out-of-range revision r%ld",
732 revprops->revision, start_rev);
733
734 if (start_rev < previous_start_rev)
735 return svn_error_createf(SVN_ERR_FS_CORRUPT_REVPROP_MANIFEST, NULL,
736 "Entries in revprop manifest for r%ld "
737 "are not ordered", revprops->revision);
738
739 previous_start_rev = start_rev;
740 }
741
742 /* Now get the pack file description */
743 idx = get_entry(revprops->manifest, revprops->revision);
744 revprops->entry = APR_ARRAY_IDX(revprops->manifest, idx,
745 manifest_entry_t);
746
747 return SVN_NO_ERROR;
748 }
749
750 /* Return TRUE, if revision R1 and R2 refer to the same shard in FS.
751 */
752 static svn_boolean_t
same_shard(svn_fs_t * fs,svn_revnum_t r1,svn_revnum_t r2)753 same_shard(svn_fs_t *fs,
754 svn_revnum_t r1,
755 svn_revnum_t r2)
756 {
757 svn_fs_x__data_t *ffd = fs->fsap_data;
758 return (r1 / ffd->max_files_per_dir) == (r2 / ffd->max_files_per_dir);
759 }
760
761 /* Given FS and the full packed file content in CONTENT and make
762 * PACKED_REVPROPS point to the first serialized revprop. If READ_ALL
763 * is set, initialize the SIZES and OFFSETS members as well.
764 *
765 * Parse the revprops for REVPROPS->REVISION and set the PROPERTIES as
766 * well as the SERIALIZED_SIZE member. If revprop caching has been
767 * enabled, parse all revprops in the pack and cache them.
768 */
769 static svn_error_t *
parse_packed_revprops(svn_fs_t * fs,packed_revprops_t * revprops,svn_stringbuf_t * content,svn_boolean_t read_all,apr_pool_t * result_pool,apr_pool_t * scratch_pool)770 parse_packed_revprops(svn_fs_t *fs,
771 packed_revprops_t *revprops,
772 svn_stringbuf_t *content,
773 svn_boolean_t read_all,
774 apr_pool_t *result_pool,
775 apr_pool_t *scratch_pool)
776 {
777 apr_size_t count, i;
778 apr_pool_t *iterpool = svn_pool_create(scratch_pool);
779 svn_boolean_t cache_all = has_revprop_cache(fs, scratch_pool);
780 svn_packed__data_root_t *root;
781 svn_packed__byte_stream_t *revprops_stream;
782 svn_revnum_t first_rev = revprops->entry.start_rev;
783
784 /* Verify the checksum and decode packed data. */
785 SVN_ERR_W(read_packed_data_checksummed(&root, content, result_pool,
786 scratch_pool),
787 apr_psprintf(scratch_pool,
788 "Revprop pack file for r%ld is corrupt",
789 first_rev));
790
791 /* get streams */
792 revprops_stream = svn_packed__first_byte_stream(root);
793 count = svn_packed__byte_block_count(revprops_stream);
794
795 /* Check revision range for validity. */
796 if (!same_shard(fs, first_rev, first_rev + count - 1) || count < 1)
797 return svn_error_createf(SVN_ERR_FS_CORRUPT, NULL,
798 _("Revprop pack for revision r%ld"
799 " contains revprops for r%ld .. r%ld"),
800 revprops->revision,
801 (svn_revnum_t)first_rev,
802 (svn_revnum_t)(first_rev + count -1));
803
804 /* Since start & end are in the same shard, it is enough to just test
805 * the FIRST_REV for being actually packed. That will also cover the
806 * special case of rev 0 never being packed. */
807 if (!svn_fs_x__is_packed_revprop(fs, first_rev))
808 return svn_error_createf(SVN_ERR_FS_CORRUPT, NULL,
809 _("Revprop pack for revision r%ld"
810 " starts at non-packed revisions r%ld"),
811 revprops->revision, (svn_revnum_t)first_rev);
812
813 /* Request all data (just references to data already expanded in ROOT) */
814 revprops->revprops = apr_array_make(result_pool, (int)count,
815 sizeof(svn_string_t));
816 for (i = 0, revprops->total_size = 0; i < count; ++i)
817 {
818 svn_string_t *props = apr_array_push(revprops->revprops);
819 props->data = svn_packed__get_bytes(revprops_stream, &props->len);
820
821 revprops->total_size += props->len;
822 }
823
824 /* Now parse the serialized revprops. */
825 for (i = 0; i < count; ++i)
826 {
827 const svn_string_t *serialized;
828 svn_revnum_t revision;
829
830 svn_pool_clear(iterpool);
831
832 serialized = &APR_ARRAY_IDX(revprops->revprops, (int)i, svn_string_t);
833 revision = first_rev + (long)i;
834
835 /* Parse this revprops list, if necessary */
836 if (revision == revprops->revision)
837 {
838 /* Parse (and possibly cache) the one revprop list we care about. */
839 SVN_ERR(parse_revprop(&revprops->properties, fs, revision,
840 serialized, result_pool, iterpool));
841 revprops->serialized_size = serialized->len;
842
843 /* If we only wanted the revprops for REVISION then we are done. */
844 if (!read_all && !cache_all)
845 break;
846 }
847 else if (cache_all)
848 {
849 /* Parse and cache all other revprop lists. */
850 apr_hash_t *properties;
851 SVN_ERR(parse_revprop(&properties, fs, revision, serialized,
852 iterpool, iterpool));
853 }
854 }
855
856 svn_pool_destroy(iterpool);
857
858 return SVN_NO_ERROR;
859 }
860
861 /* In filesystem FS, read the packed revprops for revision REV into
862 * *REVPROPS. Populate the revprop cache, if enabled. If you want to
863 * modify revprop contents / update REVPROPS, READ_ALL must be set.
864 * Otherwise, only the properties of REV are being provided.
865 *
866 * Allocate *PROPERTIES in RESULT_POOL and temporaries in SCRATCH_POOL.
867 */
868 static svn_error_t *
read_pack_revprop(packed_revprops_t ** revprops,svn_fs_t * fs,svn_revnum_t rev,svn_boolean_t read_all,apr_pool_t * result_pool,apr_pool_t * scratch_pool)869 read_pack_revprop(packed_revprops_t **revprops,
870 svn_fs_t *fs,
871 svn_revnum_t rev,
872 svn_boolean_t read_all,
873 apr_pool_t *result_pool,
874 apr_pool_t *scratch_pool)
875 {
876 apr_pool_t *iterpool = svn_pool_create(scratch_pool);
877 svn_boolean_t missing = FALSE;
878 packed_revprops_t *result;
879 int i;
880
881 /* someone insisted that REV is packed. Double-check if necessary */
882 if (!svn_fs_x__is_packed_revprop(fs, rev))
883 SVN_ERR(svn_fs_x__update_min_unpacked_rev(fs, iterpool));
884
885 if (!svn_fs_x__is_packed_revprop(fs, rev))
886 return svn_error_createf(SVN_ERR_FS_NO_SUCH_REVISION, NULL,
887 _("No such packed revision %ld"), rev);
888
889 /* initialize the result data structure */
890 result = apr_pcalloc(result_pool, sizeof(*result));
891 result->revision = rev;
892
893 /* try to read the packed revprops. This may require retries if we have
894 * concurrent writers. */
895 for (i = 0; i < SVN_FS_X__RECOVERABLE_RETRY_COUNT; ++i)
896 {
897 const char *file_path;
898 svn_stringbuf_t *contents = NULL;
899
900 svn_pool_clear(iterpool);
901
902 /* there might have been concurrent writes.
903 * Re-read the manifest and the pack file.
904 */
905 SVN_ERR(get_revprop_packname(fs, result, result_pool, iterpool));
906 file_path = get_revprop_pack_filepath(result, &result->entry,
907 iterpool);
908 SVN_ERR(svn_fs_x__try_stringbuf_from_file(&contents,
909 &missing,
910 file_path,
911 i + 1 < SVN_FS_X__RECOVERABLE_RETRY_COUNT,
912 iterpool));
913
914 if (contents)
915 {
916 SVN_ERR_W(parse_packed_revprops(fs, result, contents, read_all,
917 result_pool, iterpool),
918 apr_psprintf(iterpool,
919 "Revprop pack file for r%ld is corrupt",
920 rev));
921 break;
922 }
923
924 /* If we could not find the file, there was a write.
925 * So, we should refresh our revprop generation info as well such
926 * that others may find data we will put into the cache. They would
927 * consider it outdated, otherwise.
928 */
929 if (missing && has_revprop_cache(fs, iterpool))
930 SVN_ERR(read_revprop_generation(fs, iterpool));
931 }
932
933 /* the file content should be available now */
934 if (!result->revprops)
935 return svn_error_createf(SVN_ERR_FS_PACKED_REVPROP_READ_FAILURE, NULL,
936 _("Failed to read revprop pack file for r%ld"), rev);
937
938 *revprops = result;
939
940 return SVN_NO_ERROR;
941 }
942
943 svn_error_t *
svn_fs_x__get_revision_proplist(apr_hash_t ** proplist_p,svn_fs_t * fs,svn_revnum_t rev,svn_boolean_t bypass_cache,svn_boolean_t refresh,apr_pool_t * result_pool,apr_pool_t * scratch_pool)944 svn_fs_x__get_revision_proplist(apr_hash_t **proplist_p,
945 svn_fs_t *fs,
946 svn_revnum_t rev,
947 svn_boolean_t bypass_cache,
948 svn_boolean_t refresh,
949 apr_pool_t *result_pool,
950 apr_pool_t *scratch_pool)
951 {
952 svn_fs_x__data_t *ffd = fs->fsap_data;
953
954 /* not found, yet */
955 *proplist_p = NULL;
956
957 /* should they be available at all? */
958 SVN_ERR(svn_fs_x__ensure_revision_exists(rev, fs, scratch_pool));
959
960 /* Ensure that the revprop generation info is valid. */
961 if (refresh || !is_generation_valid(fs))
962 SVN_ERR(read_revprop_generation(fs, scratch_pool));
963
964 /* Try cache lookup first. */
965 if (!bypass_cache && has_revprop_cache(fs, scratch_pool))
966 {
967 svn_boolean_t is_cached;
968 svn_fs_x__pair_cache_key_t key = { 0 };
969
970 key.revision = rev;
971 key.second = ffd->revprop_generation;
972 SVN_ERR(svn_cache__get((void **) proplist_p, &is_cached,
973 ffd->revprop_cache, &key, result_pool));
974 if (is_cached)
975 return SVN_NO_ERROR;
976 }
977
978 /* if REV had not been packed when we began, try reading it from the
979 * non-packed shard. If that fails, we will fall through to packed
980 * shard reads. */
981 if (!svn_fs_x__is_packed_revprop(fs, rev))
982 {
983 svn_error_t *err = read_non_packed_revprop(proplist_p, fs, rev,
984 result_pool, scratch_pool);
985 if (err)
986 {
987 if (!APR_STATUS_IS_ENOENT(err->apr_err))
988 return svn_error_trace(err);
989
990 svn_error_clear(err);
991 *proplist_p = NULL; /* in case read_non_packed_revprop changed it */
992 }
993 }
994
995 /* if revprop packing is available and we have not read the revprops, yet,
996 * try reading them from a packed shard. If that fails, REV is most
997 * likely invalid (or its revprops highly contested). */
998 if (!*proplist_p)
999 {
1000 packed_revprops_t *revprops;
1001 SVN_ERR(read_pack_revprop(&revprops, fs, rev, FALSE,
1002 result_pool, scratch_pool));
1003 *proplist_p = revprops->properties;
1004 }
1005
1006 /* The revprops should have been there. Did we get them? */
1007 if (!*proplist_p)
1008 return svn_error_createf(SVN_ERR_FS_NO_SUCH_REVISION, NULL,
1009 _("Could not read revprops for revision %ld"),
1010 rev);
1011
1012 return SVN_NO_ERROR;
1013 }
1014
1015 svn_error_t *
svn_fs_x__write_non_packed_revprops(apr_file_t * file,apr_hash_t * proplist,apr_pool_t * scratch_pool)1016 svn_fs_x__write_non_packed_revprops(apr_file_t *file,
1017 apr_hash_t *proplist,
1018 apr_pool_t *scratch_pool)
1019 {
1020 svn_stream_t *stream;
1021 svn_checksum_t *checksum;
1022
1023 stream = svn_stream_from_aprfile2(file, TRUE, scratch_pool);
1024 stream = svn_checksum__wrap_write_stream(&checksum, stream,
1025 svn_checksum_fnv1a_32x4,
1026 scratch_pool);
1027 SVN_ERR(svn_fs_x__write_properties(stream, proplist, scratch_pool));
1028 SVN_ERR(svn_stream_close(stream));
1029
1030 /* Append the checksum */
1031 SVN_ERR(svn_io_file_write_full(file, checksum->digest,
1032 svn_checksum_size(checksum), NULL,
1033 scratch_pool));
1034
1035 return SVN_NO_ERROR;
1036 }
1037
1038 /* Serialize the revision property list PROPLIST of revision REV in
1039 * filesystem FS to a non-packed file. Return the name of that temporary
1040 * file in *TMP_PATH and the file path that it must be moved to in
1041 * *FINAL_PATH. Schedule necessary fsync calls in BATCH.
1042 *
1043 * Allocate *FINAL_PATH and *TMP_PATH in RESULT_POOL. Use SCRATCH_POOL
1044 * for temporary allocations.
1045 */
1046 static svn_error_t *
write_non_packed_revprop(const char ** final_path,const char ** tmp_path,svn_fs_t * fs,svn_revnum_t rev,apr_hash_t * proplist,svn_fs_x__batch_fsync_t * batch,apr_pool_t * result_pool,apr_pool_t * scratch_pool)1047 write_non_packed_revprop(const char **final_path,
1048 const char **tmp_path,
1049 svn_fs_t *fs,
1050 svn_revnum_t rev,
1051 apr_hash_t *proplist,
1052 svn_fs_x__batch_fsync_t *batch,
1053 apr_pool_t *result_pool,
1054 apr_pool_t *scratch_pool)
1055 {
1056 apr_file_t *file;
1057 *final_path = svn_fs_x__path_revprops(fs, rev, result_pool);
1058
1059 *tmp_path = apr_pstrcat(result_pool, *final_path, ".tmp", SVN_VA_NULL);
1060 SVN_ERR(svn_fs_x__batch_fsync_open_file(&file, batch, *tmp_path,
1061 scratch_pool));
1062
1063 SVN_ERR(svn_fs_x__write_non_packed_revprops(file, proplist, scratch_pool));
1064
1065 return SVN_NO_ERROR;
1066 }
1067
1068 /* After writing the new revprop file(s), call this function to move the
1069 * file at TMP_PATH to FINAL_PATH and give it the permissions from
1070 * PERMS_REFERENCE. Schedule necessary fsync calls in BATCH.
1071 *
1072 * If indicated in BUMP_GENERATION, increase FS' revprop generation.
1073 * Finally, delete all the temporary files given in FILES_TO_DELETE.
1074 * The latter may be NULL.
1075 *
1076 * Use SCRATCH_POOL for temporary allocations.
1077 */
1078 static svn_error_t *
switch_to_new_revprop(svn_fs_t * fs,const char * final_path,const char * tmp_path,const char * perms_reference,apr_array_header_t * files_to_delete,svn_boolean_t bump_generation,svn_fs_x__batch_fsync_t * batch,apr_pool_t * scratch_pool)1079 switch_to_new_revprop(svn_fs_t *fs,
1080 const char *final_path,
1081 const char *tmp_path,
1082 const char *perms_reference,
1083 apr_array_header_t *files_to_delete,
1084 svn_boolean_t bump_generation,
1085 svn_fs_x__batch_fsync_t *batch,
1086 apr_pool_t *scratch_pool)
1087 {
1088 /* Now, we may actually be replacing revprops. Make sure that all other
1089 threads and processes will know about this. */
1090 if (bump_generation)
1091 SVN_ERR(begin_revprop_change(fs, scratch_pool));
1092
1093 /* Ensure the new file contents makes it to disk before switching over to
1094 * it. */
1095 SVN_ERR(svn_fs_x__batch_fsync_run(batch, scratch_pool));
1096
1097 /* Make the revision visible to all processes and threads. */
1098 SVN_ERR(svn_fs_x__move_into_place(tmp_path, final_path, perms_reference,
1099 batch, scratch_pool));
1100 SVN_ERR(svn_fs_x__batch_fsync_run(batch, scratch_pool));
1101
1102 /* Indicate that the update (if relevant) has been completed. */
1103 if (bump_generation)
1104 SVN_ERR(end_revprop_change(fs, scratch_pool));
1105
1106 /* Clean up temporary files, if necessary. */
1107 if (files_to_delete)
1108 {
1109 apr_pool_t *iterpool = svn_pool_create(scratch_pool);
1110 int i;
1111
1112 for (i = 0; i < files_to_delete->nelts; ++i)
1113 {
1114 const char *path = APR_ARRAY_IDX(files_to_delete, i, const char*);
1115
1116 svn_pool_clear(iterpool);
1117 SVN_ERR(svn_io_remove_file2(path, TRUE, iterpool));
1118 }
1119
1120 svn_pool_destroy(iterpool);
1121 }
1122 return SVN_NO_ERROR;
1123 }
1124
1125 /* Writes the a pack file to FILE. It copies the serialized data
1126 * from REVPROPS for the indexes [START,END).
1127 *
1128 * NEW_TOTAL_SIZE is a hint for pre-allocating buffers of appropriate size.
1129 * SCRATCH_POOL is used for temporary allocations.
1130 */
1131 static svn_error_t *
repack_revprops(svn_fs_t * fs,packed_revprops_t * revprops,int start,int end,apr_size_t new_total_size,apr_file_t * file,apr_pool_t * scratch_pool)1132 repack_revprops(svn_fs_t *fs,
1133 packed_revprops_t *revprops,
1134 int start,
1135 int end,
1136 apr_size_t new_total_size,
1137 apr_file_t *file,
1138 apr_pool_t *scratch_pool)
1139 {
1140 int i;
1141
1142 svn_packed__data_root_t *root = svn_packed__data_create_root(scratch_pool);
1143 svn_packed__byte_stream_t *revprops_stream
1144 = svn_packed__create_bytes_stream(root);
1145
1146 /* append the serialized revprops */
1147 for (i = start; i < end; ++i)
1148 {
1149 const svn_string_t *props
1150 = &APR_ARRAY_IDX(revprops->revprops, i, svn_string_t);
1151
1152 svn_packed__add_bytes(revprops_stream, props->data, props->len);
1153 }
1154
1155 /* Write to file. */
1156 SVN_ERR(write_packed_data_checksummed(root, file, scratch_pool));
1157
1158 return SVN_NO_ERROR;
1159 }
1160
1161 /* Allocate a new pack file name for revisions starting at START_REV in
1162 * REVPROPS->MANIFEST. Add the name of old file to FILES_TO_DELETE,
1163 * auto-create that array if necessary. Return an open file *FILE that is
1164 * allocated in RESULT_POOL. Allocate the paths in *FILES_TO_DELETE from
1165 * the same pool that contains the array itself. Schedule necessary fsync
1166 * calls in BATCH.
1167 *
1168 * Use SCRATCH_POOL for temporary allocations.
1169 */
1170 static svn_error_t *
repack_file_open(apr_file_t ** file,svn_fs_t * fs,packed_revprops_t * revprops,svn_revnum_t start_rev,apr_array_header_t ** files_to_delete,svn_fs_x__batch_fsync_t * batch,apr_pool_t * result_pool,apr_pool_t * scratch_pool)1171 repack_file_open(apr_file_t **file,
1172 svn_fs_t *fs,
1173 packed_revprops_t *revprops,
1174 svn_revnum_t start_rev,
1175 apr_array_header_t **files_to_delete,
1176 svn_fs_x__batch_fsync_t *batch,
1177 apr_pool_t *result_pool,
1178 apr_pool_t *scratch_pool)
1179 {
1180 manifest_entry_t new_entry;
1181 const char *new_path;
1182 int idx;
1183
1184 /* We always replace whole pack files - possibly by more than one new file.
1185 * When we create the file for the first part of the pack, enlist the old
1186 * one for later deletion */
1187 SVN_ERR_ASSERT(start_rev >= revprops->entry.start_rev);
1188
1189 if (*files_to_delete == NULL)
1190 *files_to_delete = apr_array_make(result_pool, 3, sizeof(const char*));
1191
1192 if (revprops->entry.start_rev == start_rev)
1193 APR_ARRAY_PUSH(*files_to_delete, const char*)
1194 = get_revprop_pack_filepath(revprops, &revprops->entry,
1195 (*files_to_delete)->pool);
1196
1197 /* Initialize the new manifest entry. Bump the tag part. */
1198 new_entry.start_rev = start_rev;
1199 new_entry.tag = revprops->entry.tag + 1;
1200
1201 /* update the manifest to point to the new file */
1202 idx = get_entry(revprops->manifest, start_rev);
1203 if (revprops->entry.start_rev == start_rev)
1204 APR_ARRAY_IDX(revprops->manifest, idx, manifest_entry_t) = new_entry;
1205 else
1206 SVN_ERR(svn_sort__array_insert2(revprops->manifest, &new_path, idx + 1));
1207
1208 /* open the file */
1209 new_path = get_revprop_pack_filepath(revprops, &new_entry, scratch_pool);
1210 SVN_ERR(svn_fs_x__batch_fsync_open_file(file, batch, new_path,
1211 scratch_pool));
1212
1213 return SVN_NO_ERROR;
1214 }
1215
1216 /* Return the length of the serialized reprop list of index I in REVPROPS. */
1217 static apr_size_t
props_len(packed_revprops_t * revprops,int i)1218 props_len(packed_revprops_t *revprops,
1219 int i)
1220 {
1221 return APR_ARRAY_IDX(revprops->revprops, i, svn_string_t).len;
1222 }
1223
1224 /* For revision REV in filesystem FS, set the revision properties to
1225 * PROPLIST. Return a new file in *TMP_PATH that the caller shall move
1226 * to *FINAL_PATH to make the change visible. Files to be deleted will
1227 * be listed in *FILES_TO_DELETE which may remain unchanged / unallocated.
1228 * Schedule necessary fsync calls in BATCH.
1229 *
1230 * Allocate output values in RESULT_POOL and temporaries from SCRATCH_POOL.
1231 */
1232 static svn_error_t *
write_packed_revprop(const char ** final_path,const char ** tmp_path,apr_array_header_t ** files_to_delete,svn_fs_t * fs,svn_revnum_t rev,apr_hash_t * proplist,svn_fs_x__batch_fsync_t * batch,apr_pool_t * result_pool,apr_pool_t * scratch_pool)1233 write_packed_revprop(const char **final_path,
1234 const char **tmp_path,
1235 apr_array_header_t **files_to_delete,
1236 svn_fs_t *fs,
1237 svn_revnum_t rev,
1238 apr_hash_t *proplist,
1239 svn_fs_x__batch_fsync_t *batch,
1240 apr_pool_t *result_pool,
1241 apr_pool_t *scratch_pool)
1242 {
1243 svn_fs_x__data_t *ffd = fs->fsap_data;
1244 packed_revprops_t *revprops;
1245 svn_stream_t *stream;
1246 apr_file_t *file;
1247 svn_stringbuf_t *serialized;
1248 apr_size_t new_total_size;
1249 int changed_index;
1250 int count;
1251
1252 /* read the current revprop generation. This value will not change
1253 * while we hold the global write lock to this FS. */
1254 if (has_revprop_cache(fs, scratch_pool))
1255 SVN_ERR(read_revprop_generation(fs, scratch_pool));
1256
1257 /* read contents of the current pack file */
1258 SVN_ERR(read_pack_revprop(&revprops, fs, rev, TRUE,
1259 scratch_pool, scratch_pool));
1260
1261 /* serialize the new revprops */
1262 serialized = svn_stringbuf_create_empty(scratch_pool);
1263 stream = svn_stream_from_stringbuf(serialized, scratch_pool);
1264 SVN_ERR(svn_fs_x__write_properties(stream, proplist, scratch_pool));
1265 SVN_ERR(svn_stream_close(stream));
1266
1267 /* estimate the size of the new data */
1268 count = revprops->revprops->nelts;
1269 changed_index = (int)(rev - revprops->entry.start_rev);
1270 new_total_size = revprops->total_size - revprops->serialized_size
1271 + serialized->len
1272 + (count + 2) * SVN_INT64_BUFFER_SIZE;
1273
1274 APR_ARRAY_IDX(revprops->revprops, changed_index, svn_string_t)
1275 = *svn_stringbuf__morph_into_string(serialized);
1276
1277 /* can we put the new data into the same pack as the before? */
1278 if (new_total_size < ffd->revprop_pack_size || count == 1)
1279 {
1280 /* simply replace the old pack file with new content as we do it
1281 * in the non-packed case */
1282
1283 *final_path = get_revprop_pack_filepath(revprops, &revprops->entry,
1284 result_pool);
1285 *tmp_path = apr_pstrcat(result_pool, *final_path, ".tmp", SVN_VA_NULL);
1286 SVN_ERR(svn_fs_x__batch_fsync_open_file(&file, batch, *tmp_path,
1287 scratch_pool));
1288 SVN_ERR(repack_revprops(fs, revprops, 0, count,
1289 new_total_size, file, scratch_pool));
1290 }
1291 else
1292 {
1293 /* split the pack file into two of roughly equal size */
1294 int right_count, left_count;
1295
1296 int left = 0;
1297 int right = count - 1;
1298 apr_size_t left_size = 2 * SVN_INT64_BUFFER_SIZE;
1299 apr_size_t right_size = 2 * SVN_INT64_BUFFER_SIZE;
1300
1301 /* let left and right side grow such that their size difference
1302 * is minimal after each step. */
1303 while (left <= right)
1304 if ( left_size + props_len(revprops, left)
1305 < right_size + props_len(revprops, right))
1306 {
1307 left_size += props_len(revprops, left) + SVN_INT64_BUFFER_SIZE;
1308 ++left;
1309 }
1310 else
1311 {
1312 right_size += props_len(revprops, right) + SVN_INT64_BUFFER_SIZE;
1313 --right;
1314 }
1315
1316 /* since the items need much less than SVN_INT64_BUFFER_SIZE
1317 * bytes to represent their length, the split may not be optimal */
1318 left_count = left;
1319 right_count = count - left;
1320
1321 /* if new_size is large, one side may exceed the pack size limit.
1322 * In that case, split before and after the modified revprop.*/
1323 if ( left_size > ffd->revprop_pack_size
1324 || right_size > ffd->revprop_pack_size)
1325 {
1326 left_count = changed_index;
1327 right_count = count - left_count - 1;
1328 }
1329
1330 /* Allocate this here such that we can call the repack functions with
1331 * the scratch pool alone. */
1332 if (*files_to_delete == NULL)
1333 *files_to_delete = apr_array_make(result_pool, 3,
1334 sizeof(const char*));
1335
1336 /* write the new, split files */
1337 if (left_count)
1338 {
1339 SVN_ERR(repack_file_open(&file, fs, revprops,
1340 revprops->entry.start_rev,
1341 files_to_delete, batch,
1342 scratch_pool, scratch_pool));
1343 SVN_ERR(repack_revprops(fs, revprops, 0, left_count,
1344 new_total_size, file, scratch_pool));
1345 }
1346
1347 if (left_count + right_count < count)
1348 {
1349 SVN_ERR(repack_file_open(&file, fs, revprops, rev,
1350 files_to_delete, batch,
1351 scratch_pool, scratch_pool));
1352 SVN_ERR(repack_revprops(fs, revprops, changed_index,
1353 changed_index + 1,
1354 new_total_size, file, scratch_pool));
1355 }
1356
1357 if (right_count)
1358 {
1359 SVN_ERR(repack_file_open(&file, fs, revprops, rev + 1,
1360 files_to_delete, batch,
1361 scratch_pool, scratch_pool));
1362 SVN_ERR(repack_revprops(fs, revprops, count - right_count, count,
1363 new_total_size, file, scratch_pool));
1364 }
1365
1366 /* write the new manifest */
1367 *final_path = svn_dirent_join(revprops->folder, PATH_MANIFEST,
1368 result_pool);
1369 *tmp_path = apr_pstrcat(result_pool, *final_path, ".tmp", SVN_VA_NULL);
1370 SVN_ERR(svn_fs_x__batch_fsync_open_file(&file, batch, *tmp_path,
1371 scratch_pool));
1372 SVN_ERR(write_manifest(file, revprops->manifest, scratch_pool));
1373 }
1374
1375 return SVN_NO_ERROR;
1376 }
1377
1378 /* Set the revision property list of revision REV in filesystem FS to
1379 PROPLIST. Use SCRATCH_POOL for temporary allocations. */
1380 svn_error_t *
svn_fs_x__set_revision_proplist(svn_fs_t * fs,svn_revnum_t rev,apr_hash_t * proplist,apr_pool_t * scratch_pool)1381 svn_fs_x__set_revision_proplist(svn_fs_t *fs,
1382 svn_revnum_t rev,
1383 apr_hash_t *proplist,
1384 apr_pool_t *scratch_pool)
1385 {
1386 svn_boolean_t is_packed;
1387 svn_boolean_t bump_generation = FALSE;
1388 const char *final_path;
1389 const char *tmp_path;
1390 const char *perms_reference;
1391 apr_array_header_t *files_to_delete = NULL;
1392 svn_fs_x__batch_fsync_t *batch;
1393 svn_fs_x__data_t *ffd = fs->fsap_data;
1394
1395 SVN_ERR(svn_fs_x__ensure_revision_exists(rev, fs, scratch_pool));
1396
1397 /* Perform all fsyncs through this instance. */
1398 SVN_ERR(svn_fs_x__batch_fsync_create(&batch, ffd->flush_to_disk,
1399 scratch_pool));
1400
1401 /* this info will not change while we hold the global FS write lock */
1402 is_packed = svn_fs_x__is_packed_revprop(fs, rev);
1403
1404 /* Test whether revprops already exist for this revision.
1405 * Only then will we need to bump the revprop generation.
1406 * The fact that they did not yet exist is never cached. */
1407 if (is_packed)
1408 {
1409 bump_generation = TRUE;
1410 }
1411 else
1412 {
1413 svn_node_kind_t kind;
1414 SVN_ERR(svn_io_check_path(svn_fs_x__path_revprops(fs, rev,
1415 scratch_pool),
1416 &kind, scratch_pool));
1417 bump_generation = kind != svn_node_none;
1418 }
1419
1420 /* Serialize the new revprop data */
1421 if (is_packed)
1422 SVN_ERR(write_packed_revprop(&final_path, &tmp_path, &files_to_delete,
1423 fs, rev, proplist, batch, scratch_pool,
1424 scratch_pool));
1425 else
1426 SVN_ERR(write_non_packed_revprop(&final_path, &tmp_path,
1427 fs, rev, proplist, batch,
1428 scratch_pool, scratch_pool));
1429
1430 /* We use the rev file of this revision as the perms reference,
1431 * because when setting revprops for the first time, the revprop
1432 * file won't exist and therefore can't serve as its own reference.
1433 * (Whereas the rev file should already exist at this point.)
1434 */
1435 perms_reference = svn_fs_x__path_rev_absolute(fs, rev, scratch_pool);
1436
1437 /* Now, switch to the new revprop data. */
1438 SVN_ERR(switch_to_new_revprop(fs, final_path, tmp_path, perms_reference,
1439 files_to_delete, bump_generation, batch,
1440 scratch_pool));
1441
1442 return SVN_NO_ERROR;
1443 }
1444
1445 /* Return TRUE, if for REVISION in FS, we can find the revprop pack file.
1446 * Use SCRATCH_POOL for temporary allocations.
1447 * Set *MISSING, if the reason is a missing manifest or pack file.
1448 */
1449 svn_boolean_t
svn_fs_x__packed_revprop_available(svn_boolean_t * missing,svn_fs_t * fs,svn_revnum_t revision,apr_pool_t * scratch_pool)1450 svn_fs_x__packed_revprop_available(svn_boolean_t *missing,
1451 svn_fs_t *fs,
1452 svn_revnum_t revision,
1453 apr_pool_t *scratch_pool)
1454 {
1455 svn_node_kind_t kind;
1456 packed_revprops_t *revprops;
1457 svn_error_t *err;
1458
1459 /* try to read the manifest file */
1460 revprops = apr_pcalloc(scratch_pool, sizeof(*revprops));
1461 revprops->revision = revision;
1462 err = get_revprop_packname(fs, revprops, scratch_pool, scratch_pool);
1463
1464 /* if the manifest cannot be read, consider the pack files inaccessible
1465 * even if the file itself exists. */
1466 if (err)
1467 {
1468 svn_error_clear(err);
1469 return FALSE;
1470 }
1471
1472 /* the respective pack file must exist (and be a file) */
1473 err = svn_io_check_path(get_revprop_pack_filepath(revprops,
1474 &revprops->entry,
1475 scratch_pool),
1476 &kind, scratch_pool);
1477 if (err)
1478 {
1479 svn_error_clear(err);
1480 return FALSE;
1481 }
1482
1483 *missing = kind == svn_node_none;
1484 return kind == svn_node_file;
1485 }
1486
1487
1488 /****** Packing FSX shards *********/
1489
1490 /* Copy revprop files for revisions [START_REV, END_REV) from SHARD_PATH
1491 * in filesystem FS to the pack file at PACK_FILE_NAME in PACK_FILE_DIR.
1492 *
1493 * The file sizes have already been determined and written to SIZES.
1494 * Please note that this function will be executed while the filesystem
1495 * has been locked and that revprops files will therefore not be modified
1496 * while the pack is in progress.
1497 *
1498 * COMPRESSION_LEVEL defines how well the resulting pack file shall be
1499 * compressed or whether is shall be compressed at all. TOTAL_SIZE is
1500 * a hint on which initial buffer size we should use to hold the pack file
1501 * content. Schedule necessary fsync calls in BATCH.
1502 *
1503 * CANCEL_FUNC and CANCEL_BATON are used as usual. Temporary allocations
1504 * are done in SCRATCH_POOL.
1505 */
1506 static svn_error_t *
copy_revprops(svn_fs_t * fs,const char * pack_file_dir,const char * pack_filename,const char * shard_path,svn_revnum_t start_rev,svn_revnum_t end_rev,apr_array_header_t * sizes,apr_size_t total_size,int compression_level,svn_fs_x__batch_fsync_t * batch,svn_cancel_func_t cancel_func,void * cancel_baton,apr_pool_t * scratch_pool)1507 copy_revprops(svn_fs_t *fs,
1508 const char *pack_file_dir,
1509 const char *pack_filename,
1510 const char *shard_path,
1511 svn_revnum_t start_rev,
1512 svn_revnum_t end_rev,
1513 apr_array_header_t *sizes,
1514 apr_size_t total_size,
1515 int compression_level,
1516 svn_fs_x__batch_fsync_t *batch,
1517 svn_cancel_func_t cancel_func,
1518 void *cancel_baton,
1519 apr_pool_t *scratch_pool)
1520 {
1521 apr_file_t *pack_file;
1522 svn_revnum_t rev;
1523 apr_pool_t *iterpool = svn_pool_create(scratch_pool);
1524
1525 svn_packed__data_root_t *root = svn_packed__data_create_root(scratch_pool);
1526 svn_packed__byte_stream_t *stream
1527 = svn_packed__create_bytes_stream(root);
1528
1529 /* Iterate over the revisions in this shard, squashing them together. */
1530 for (rev = start_rev; rev <= end_rev; rev++)
1531 {
1532 const char *path;
1533 svn_stringbuf_t *props;
1534
1535 svn_pool_clear(iterpool);
1536
1537 /* Construct the file name. */
1538 path = svn_fs_x__path_revprops(fs, rev, iterpool);
1539
1540 /* Copy all the bits from the non-packed revprop file to the end of
1541 * the pack file. */
1542 SVN_ERR(svn_stringbuf_from_file2(&props, path, iterpool));
1543 SVN_ERR_W(verify_checksum(props, iterpool),
1544 apr_psprintf(iterpool, "Failed to read revprops for r%ld.",
1545 rev));
1546
1547 svn_packed__add_bytes(stream, props->data, props->len);
1548 }
1549
1550 /* Create the auto-fsync'ing pack file. */
1551 SVN_ERR(svn_fs_x__batch_fsync_open_file(&pack_file, batch,
1552 svn_dirent_join(pack_file_dir,
1553 pack_filename,
1554 scratch_pool),
1555 scratch_pool));
1556
1557 /* write all to disk */
1558 SVN_ERR(write_packed_data_checksummed(root, pack_file, scratch_pool));
1559
1560 svn_pool_destroy(iterpool);
1561
1562 return SVN_NO_ERROR;
1563 }
1564
1565 svn_error_t *
svn_fs_x__pack_revprops_shard(svn_fs_t * fs,const char * pack_file_dir,const char * shard_path,apr_int64_t shard,int max_files_per_dir,apr_int64_t max_pack_size,int compression_level,svn_fs_x__batch_fsync_t * batch,svn_cancel_func_t cancel_func,void * cancel_baton,apr_pool_t * scratch_pool)1566 svn_fs_x__pack_revprops_shard(svn_fs_t *fs,
1567 const char *pack_file_dir,
1568 const char *shard_path,
1569 apr_int64_t shard,
1570 int max_files_per_dir,
1571 apr_int64_t max_pack_size,
1572 int compression_level,
1573 svn_fs_x__batch_fsync_t *batch,
1574 svn_cancel_func_t cancel_func,
1575 void *cancel_baton,
1576 apr_pool_t *scratch_pool)
1577 {
1578 const char *manifest_file_path, *pack_filename = NULL;
1579 apr_file_t *manifest_file;
1580 svn_revnum_t start_rev, end_rev, rev;
1581 apr_size_t total_size;
1582 apr_pool_t *iterpool = svn_pool_create(scratch_pool);
1583 apr_array_header_t *sizes;
1584 apr_array_header_t *manifest;
1585
1586 /* Sanitize config file values. */
1587 apr_size_t max_size = (apr_size_t)MIN(MAX(max_pack_size, 1),
1588 SVN_MAX_OBJECT_SIZE);
1589
1590 /* Some useful paths. */
1591 manifest_file_path = svn_dirent_join(pack_file_dir, PATH_MANIFEST,
1592 scratch_pool);
1593
1594 /* Create the manifest file. */
1595 SVN_ERR(svn_fs_x__batch_fsync_open_file(&manifest_file, batch,
1596 manifest_file_path, scratch_pool));
1597
1598 /* revisions to handle. Special case: revision 0 */
1599 start_rev = (svn_revnum_t) (shard * max_files_per_dir);
1600 end_rev = (svn_revnum_t) ((shard + 1) * (max_files_per_dir) - 1);
1601 if (start_rev == 0)
1602 {
1603 /* Never pack revprops for r0, just copy it. */
1604 SVN_ERR(svn_io_copy_file(svn_fs_x__path_revprops(fs, 0, iterpool),
1605 svn_dirent_join(pack_file_dir, "p0",
1606 scratch_pool),
1607 TRUE,
1608 iterpool));
1609
1610 ++start_rev;
1611 /* Special special case: if max_files_per_dir is 1, then at this point
1612 start_rev == 1 and end_rev == 0 (!). Fortunately, everything just
1613 works. */
1614 }
1615
1616 /* initialize the revprop size info */
1617 sizes = apr_array_make(scratch_pool, max_files_per_dir, sizeof(apr_size_t));
1618 total_size = 2 * SVN_INT64_BUFFER_SIZE;
1619
1620 manifest = apr_array_make(scratch_pool, 4, sizeof(manifest_entry_t));
1621
1622 /* Iterate over the revisions in this shard, determine their size and
1623 * squashing them together into pack files. */
1624 for (rev = start_rev; rev <= end_rev; rev++)
1625 {
1626 apr_finfo_t finfo;
1627 const char *path;
1628
1629 svn_pool_clear(iterpool);
1630
1631 /* Get the size of the file. */
1632 path = svn_fs_x__path_revprops(fs, rev, iterpool);
1633 SVN_ERR(svn_io_stat(&finfo, path, APR_FINFO_SIZE, iterpool));
1634
1635 /* If we already have started a pack file and this revprop cannot be
1636 * appended to it, write the previous pack file. Note this overflow
1637 * check works because we enforced MAX_SIZE <= SVN_MAX_OBJECT_SIZE. */
1638 if (sizes->nelts != 0
1639 && ( finfo.size > max_size
1640 || total_size > max_size
1641 || SVN_INT64_BUFFER_SIZE + finfo.size > max_size - total_size))
1642 {
1643 SVN_ERR(copy_revprops(fs, pack_file_dir, pack_filename,
1644 shard_path, start_rev, rev-1,
1645 sizes, (apr_size_t)total_size,
1646 compression_level, batch, cancel_func,
1647 cancel_baton, iterpool));
1648
1649 /* next pack file starts empty again */
1650 apr_array_clear(sizes);
1651 total_size = 2 * SVN_INT64_BUFFER_SIZE;
1652 start_rev = rev;
1653 }
1654
1655 /* Update the manifest. Allocate a file name for the current pack
1656 * file if it is a new one */
1657 if (sizes->nelts == 0)
1658 {
1659 manifest_entry_t *entry = apr_array_push(manifest);
1660 entry->start_rev = rev;
1661 entry->tag = 0;
1662
1663 pack_filename = apr_psprintf(scratch_pool, "%ld.0", rev);
1664 }
1665
1666 /* add to list of files to put into the current pack file */
1667 APR_ARRAY_PUSH(sizes, apr_size_t) = finfo.size;
1668 total_size += SVN_INT64_BUFFER_SIZE + finfo.size;
1669 }
1670
1671 /* write the last pack file */
1672 if (sizes->nelts != 0)
1673 SVN_ERR(copy_revprops(fs, pack_file_dir, pack_filename, shard_path,
1674 start_rev, rev-1, sizes,
1675 (apr_size_t)total_size, compression_level,
1676 batch, cancel_func, cancel_baton, iterpool));
1677
1678 SVN_ERR(write_manifest(manifest_file, manifest, iterpool));
1679
1680 /* flush all data to disk and update permissions */
1681 SVN_ERR(svn_io_copy_perms(shard_path, pack_file_dir, iterpool));
1682 svn_pool_destroy(iterpool);
1683
1684 return SVN_NO_ERROR;
1685 }
1686