1 /* revprops.c --- everything needed to handle revprops in FSX
2  *
3  * ====================================================================
4  *    Licensed to the Apache Software Foundation (ASF) under one
5  *    or more contributor license agreements.  See the NOTICE file
6  *    distributed with this work for additional information
7  *    regarding copyright ownership.  The ASF licenses this file
8  *    to you under the Apache License, Version 2.0 (the
9  *    "License"); you may not use this file except in compliance
10  *    with the License.  You may obtain a copy of the License at
11  *
12  *      http://www.apache.org/licenses/LICENSE-2.0
13  *
14  *    Unless required by applicable law or agreed to in writing,
15  *    software distributed under the License is distributed on an
16  *    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
17  *    KIND, either express or implied.  See the License for the
18  *    specific language governing permissions and limitations
19  *    under the License.
20  * ====================================================================
21  */
22 
23 #include <assert.h>
24 #include <apr_md5.h>
25 
26 #include "svn_pools.h"
27 #include "svn_hash.h"
28 #include "svn_dirent_uri.h"
29 #include "svn_sorts.h"
30 
31 #include "fs_x.h"
32 #include "low_level.h"
33 #include "revprops.h"
34 #include "util.h"
35 #include "transaction.h"
36 
37 #include "private/svn_packed_data.h"
38 #include "private/svn_sorts_private.h"
39 #include "private/svn_subr_private.h"
40 #include "private/svn_string_private.h"
41 #include "../libsvn_fs/fs-loader.h"
42 
43 #include "svn_private_config.h"
44 
45 /* Give writing processes 10 seconds to replace an existing revprop
46    file with a new one. After that time, we assume that the writing
47    process got aborted and that we have re-read revprops. */
48 #define REVPROP_CHANGE_TIMEOUT (10 * 1000000)
49 
50 /* In case of an inconsistent read, close the generation file, yield,
51    re-open and re-read.  This is the number of times we try this before
52    giving up. */
53 #define GENERATION_READ_RETRY_COUNT 100
54 
55 
56 /* Revprop caching management.
57  *
58  * Mechanism:
59  * ----------
60  *
61  * Revprop caching needs to be activated and will be deactivated for the
62  * respective FS instance if the necessary infrastructure could not be
63  * initialized.  As long as no revprops are being read or changed, revprop
64  * caching imposes no overhead.
65  *
66  * When activated, we cache revprops using (revision, generation) pairs
67  * as keys with the generation being incremented upon every revprop change.
68  * Since the cache is process-local, the generation needs to be tracked
69  * for at least as long as the process lives but may be reset afterwards.
70  * We track the revprop generation in a file that.
71  *
72  * A race condition exists between switching to the modified revprop data
73  * and bumping the generation number.  In particular, the process may crash
74  * just after switching to the new revprop data and before bumping the
75  * generation.  To be able to detect this scenario, we bump the generation
76  * twice per revprop change: once immediately before (creating an odd number)
77  * and once after the atomic switch (even generation).
78  *
79  * A writer holding the write lock can immediately assume a crashed writer
80  * in case of an odd generation or they would not have been able to acquire
81  * the lock.  A reader detecting an odd generation will use that number and
82  * be forced to re-read any revprop data - usually getting the new revprops
83  * already.  If the generation file modification timestamp is too old, the
84  * reader will assume a crashed writer, acquire the write lock and bump
85  * the generation if it is still odd.  So, for about REVPROP_CHANGE_TIMEOUT
86  * after the crash, reader caches may be stale.
87  */
88 
89 /* Read revprop generation as stored on disk for repository FS. The result is
90  * returned in *CURRENT.  Call only for repos that support revprop caching.
91  */
92 static svn_error_t *
read_revprop_generation_file(apr_int64_t * current,svn_fs_t * fs,apr_pool_t * scratch_pool)93 read_revprop_generation_file(apr_int64_t *current,
94                              svn_fs_t *fs,
95                              apr_pool_t *scratch_pool)
96 {
97   apr_pool_t *iterpool = svn_pool_create(scratch_pool);
98   int i;
99   svn_error_t *err = SVN_NO_ERROR;
100   const char *path = svn_fs_x__path_revprop_generation(fs, scratch_pool);
101 
102   /* Retry in case of incomplete file buffer updates. */
103   for (i = 0; i < GENERATION_READ_RETRY_COUNT; ++i)
104     {
105       svn_stringbuf_t *buf;
106 
107       svn_error_clear(err);
108       svn_pool_clear(iterpool);
109 
110       /* Read the generation file. */
111       err = svn_stringbuf_from_file2(&buf, path, iterpool);
112 
113       /* If we could read the file, it should be complete due to our atomic
114        * file replacement scheme. */
115       if (!err)
116         {
117           svn_stringbuf_strip_whitespace(buf);
118           SVN_ERR(svn_cstring_atoi64(current, buf->data));
119           break;
120         }
121 
122       /* Got unlucky the file was not available.  Retry. */
123 #if APR_HAS_THREADS
124       apr_thread_yield();
125 #else
126       apr_sleep(0);
127 #endif
128     }
129 
130   svn_pool_destroy(iterpool);
131 
132   /* If we had to give up, propagate the error. */
133   return svn_error_trace(err);
134 }
135 
136 /* Write the CURRENT revprop generation to disk for repository FS.
137  * Call only for repos that support revprop caching.
138  */
139 static svn_error_t *
write_revprop_generation_file(svn_fs_t * fs,apr_int64_t current,apr_pool_t * scratch_pool)140 write_revprop_generation_file(svn_fs_t *fs,
141                               apr_int64_t current,
142                               apr_pool_t *scratch_pool)
143 {
144   svn_fs_x__data_t *ffd = fs->fsap_data;
145   svn_stringbuf_t *buffer;
146   const char *path = svn_fs_x__path_revprop_generation(fs, scratch_pool);
147 
148   /* Invalidate our cached revprop generation in case the file operations
149    * below fail. */
150   ffd->revprop_generation = -1;
151 
152   /* Write the new number. */
153   buffer = svn_stringbuf_createf(scratch_pool, "%" APR_INT64_T_FMT "\n",
154                                  current);
155   SVN_ERR(svn_io_write_atomic2(path, buffer->data, buffer->len,
156                                path /* copy_perms */, FALSE,
157                                scratch_pool));
158 
159   /* Remember it to spare us the re-read. */
160   ffd->revprop_generation = current;
161 
162   return SVN_NO_ERROR;
163 }
164 
165 svn_error_t *
svn_fs_x__reset_revprop_generation_file(svn_fs_t * fs,apr_pool_t * scratch_pool)166 svn_fs_x__reset_revprop_generation_file(svn_fs_t *fs,
167                                         apr_pool_t *scratch_pool)
168 {
169   /* Write the initial revprop generation file contents. */
170   SVN_ERR(write_revprop_generation_file(fs, 0, scratch_pool));
171 
172   return SVN_NO_ERROR;
173 }
174 
175 /* Test whether revprop cache and necessary infrastructure are
176    available in FS. */
177 static svn_boolean_t
has_revprop_cache(svn_fs_t * fs,apr_pool_t * scratch_pool)178 has_revprop_cache(svn_fs_t *fs,
179                   apr_pool_t *scratch_pool)
180 {
181   svn_fs_x__data_t *ffd = fs->fsap_data;
182 
183   /* is the cache enabled? */
184   return ffd->revprop_cache != NULL;
185 }
186 
187 /* Baton structure for revprop_generation_fixup. */
188 typedef struct revprop_generation_fixup_t
189 {
190   /* revprop generation to read */
191   apr_int64_t *generation;
192 
193   /* file system context */
194   svn_fs_t *fs;
195 } revprop_generation_upgrade_t;
196 
197 /* If the revprop generation has an odd value, it means the original writer
198    of the revprop got killed. We don't know whether that process as able
199    to change the revprop data but we assume that it was. Therefore, we
200    increase the generation in that case to basically invalidate everyone's
201    cache content.
202    Execute this only while holding the write lock to the repo in baton->FFD.
203  */
204 static svn_error_t *
revprop_generation_fixup(void * void_baton,apr_pool_t * scratch_pool)205 revprop_generation_fixup(void *void_baton,
206                          apr_pool_t *scratch_pool)
207 {
208   revprop_generation_upgrade_t *baton = void_baton;
209   svn_fs_x__data_t *ffd = baton->fs->fsap_data;
210   assert(ffd->has_write_lock);
211 
212   /* Maybe, either the original revprop writer or some other reader has
213      already corrected / bumped the revprop generation.  Thus, we need
214      to read it again.  However, we will now be the only ones changing
215      the file contents due to us holding the write lock. */
216   SVN_ERR(read_revprop_generation_file(baton->generation, baton->fs,
217                                        scratch_pool));
218 
219   /* Cause everyone to re-read revprops upon their next access, if the
220      last revprop write did not complete properly. */
221   if (*baton->generation % 2)
222     {
223       ++*baton->generation;
224       SVN_ERR(write_revprop_generation_file(baton->fs,
225                                             *baton->generation,
226                                             scratch_pool));
227     }
228 
229   return SVN_NO_ERROR;
230 }
231 
232 /* Read the current revprop generation of FS and its value in FS->FSAP_DATA.
233    Also, detect aborted / crashed writers and recover from that. */
234 static svn_error_t *
read_revprop_generation(svn_fs_t * fs,apr_pool_t * scratch_pool)235 read_revprop_generation(svn_fs_t *fs,
236                         apr_pool_t *scratch_pool)
237 {
238   apr_int64_t current = 0;
239   svn_fs_x__data_t *ffd = fs->fsap_data;
240 
241   /* read the current revprop generation number */
242   SVN_ERR(read_revprop_generation_file(&current, fs, scratch_pool));
243 
244   /* is an unfinished revprop write under the way? */
245   if (current % 2)
246     {
247       svn_boolean_t timeout = FALSE;
248 
249       /* Has the writer process been aborted?
250        * Either by timeout or by us being the writer now.
251        */
252       if (!ffd->has_write_lock)
253         {
254           apr_time_t mtime;
255           SVN_ERR(svn_io_file_affected_time(&mtime,
256                         svn_fs_x__path_revprop_generation(fs, scratch_pool),
257                         scratch_pool));
258           timeout = apr_time_now() > mtime + REVPROP_CHANGE_TIMEOUT;
259         }
260 
261       if (ffd->has_write_lock || timeout)
262         {
263           revprop_generation_upgrade_t baton;
264           baton.generation = &current;
265           baton.fs = fs;
266 
267           /* Ensure that the original writer process no longer exists by
268            * acquiring the write lock to this repository.  Then, fix up
269            * the revprop generation.
270            */
271           if (ffd->has_write_lock)
272             SVN_ERR(revprop_generation_fixup(&baton, scratch_pool));
273           else
274             SVN_ERR(svn_fs_x__with_write_lock(fs, revprop_generation_fixup,
275                                               &baton, scratch_pool));
276         }
277     }
278 
279   /* return the value we just got */
280   ffd->revprop_generation = current;
281   return SVN_NO_ERROR;
282 }
283 
284 void
svn_fs_x__invalidate_revprop_generation(svn_fs_t * fs)285 svn_fs_x__invalidate_revprop_generation(svn_fs_t *fs)
286 {
287   svn_fs_x__data_t *ffd = fs->fsap_data;
288   ffd->revprop_generation = -1;
289 }
290 
291 /* Return TRUE if the revprop generation value in FS->FSAP_DATA is valid. */
292 static svn_boolean_t
is_generation_valid(svn_fs_t * fs)293 is_generation_valid(svn_fs_t *fs)
294 {
295   svn_fs_x__data_t *ffd = fs->fsap_data;
296   return ffd->revprop_generation >= 0;
297 }
298 
299 /* Set the revprop generation in FS to the next odd number to indicate
300    that there is a revprop write process under way.  Update the value
301    in FS->FSAP_DATA accordingly.  If the change times out, readers shall
302    recover from that state & re-read revprops.
303    This is a no-op for repo formats that don't support revprop caching. */
304 static svn_error_t *
begin_revprop_change(svn_fs_t * fs,apr_pool_t * scratch_pool)305 begin_revprop_change(svn_fs_t *fs,
306                      apr_pool_t *scratch_pool)
307 {
308   svn_fs_x__data_t *ffd = fs->fsap_data;
309   SVN_ERR_ASSERT(ffd->has_write_lock);
310 
311   /* Set the revprop generation to an odd value to indicate
312    * that a write is in progress.
313    */
314   SVN_ERR(read_revprop_generation(fs, scratch_pool));
315   ++ffd->revprop_generation;
316   SVN_ERR_ASSERT(ffd->revprop_generation % 2);
317   SVN_ERR(write_revprop_generation_file(fs, ffd->revprop_generation,
318                                         scratch_pool));
319 
320   return SVN_NO_ERROR;
321 }
322 
323 /* Set the revprop generation in FS to the next even generation after
324    the odd value in FS->FSAP_DATA to indicate that
325    a) readers shall re-read revprops, and
326    b) the write process has been completed (no recovery required).
327    This is a no-op for repo formats that don't support revprop caching. */
328 static svn_error_t *
end_revprop_change(svn_fs_t * fs,apr_pool_t * scratch_pool)329 end_revprop_change(svn_fs_t *fs,
330                    apr_pool_t *scratch_pool)
331 {
332   svn_fs_x__data_t *ffd = fs->fsap_data;
333   SVN_ERR_ASSERT(ffd->has_write_lock);
334   SVN_ERR_ASSERT(ffd->revprop_generation % 2);
335 
336   /* Set the revprop generation to an even value to indicate
337    * that a write has been completed.  Since we held the write
338    * lock, nobody else could have updated the file contents.
339    */
340   SVN_ERR(write_revprop_generation_file(fs, ffd->revprop_generation + 1,
341                                         scratch_pool));
342 
343   return SVN_NO_ERROR;
344 }
345 
346 /* Represents an entry in the packed revprop manifest.
347  * There is one such entry per pack file. */
348 typedef struct manifest_entry_t
349 {
350   /* First revision in the pack file. */
351   svn_revnum_t start_rev;
352 
353   /* Tag (a counter) appended to the file name to distinguish it from
354      outdated ones. */
355   apr_uint64_t tag;
356 } manifest_entry_t;
357 
358 /* Container for all data required to access the packed revprop file
359  * for a given REVISION.  This structure will be filled incrementally
360  * by read_pack_revprops() its sub-routines.
361  */
362 typedef struct packed_revprops_t
363 {
364   /* revision number to read (not necessarily the first in the pack) */
365   svn_revnum_t revision;
366 
367   /* the actual revision properties */
368   apr_hash_t *properties;
369 
370   /* their size when serialized to a single string
371    * (as found in PACKED_REVPROPS) */
372   apr_size_t serialized_size;
373 
374 
375   /* manifest entry describing the pack file */
376   manifest_entry_t entry;
377 
378   /* packed shard folder path */
379   const char *folder;
380 
381   /* sum of values in SIZES */
382   apr_size_t total_size;
383 
384   /* Array of svn_string_t, containing the serialized revprops for
385    * REVISION * I. */
386   apr_array_header_t *revprops;
387 
388   /* content of the manifest.
389    * Sorted list of manifest_entry_t. */
390   apr_array_header_t *manifest;
391 } packed_revprops_t;
392 
393 /* Parse the serialized revprops in CONTENT and return them in *PROPERTIES.
394  * Also, put them into the revprop cache, if activated, for future use.
395  * Three more parameters are being used to update the revprop cache: FS is
396  * our file system, the revprops belong to REVISION.
397  *
398  * The returned hash will be allocated in RESULT_POOL, SCRATCH_POOL is
399  * being used for temporary allocations.
400  */
401 static svn_error_t *
parse_revprop(apr_hash_t ** properties,svn_fs_t * fs,svn_revnum_t revision,const svn_string_t * content,apr_pool_t * result_pool,apr_pool_t * scratch_pool)402 parse_revprop(apr_hash_t **properties,
403               svn_fs_t *fs,
404               svn_revnum_t revision,
405               const svn_string_t *content,
406               apr_pool_t *result_pool,
407               apr_pool_t *scratch_pool)
408 {
409   SVN_ERR_W(svn_fs_x__parse_properties(properties, content, result_pool),
410             apr_psprintf(scratch_pool, "Failed to parse revprops for r%ld.",
411                          revision));
412 
413   if (has_revprop_cache(fs, scratch_pool))
414     {
415       svn_fs_x__data_t *ffd = fs->fsap_data;
416       svn_fs_x__pair_cache_key_t key = { 0 };
417 
418       SVN_ERR_ASSERT(is_generation_valid(fs));
419 
420       key.revision = revision;
421       key.second = ffd->revprop_generation;
422       SVN_ERR(svn_cache__set(ffd->revprop_cache, &key, *properties,
423                              scratch_pool));
424     }
425 
426   return SVN_NO_ERROR;
427 }
428 
429 /* Verify the checksum attached to CONTENT and remove it.
430  * Use SCRATCH_POOL for temporary allocations.
431  */
432 static svn_error_t *
verify_checksum(svn_stringbuf_t * content,apr_pool_t * scratch_pool)433 verify_checksum(svn_stringbuf_t *content,
434                 apr_pool_t *scratch_pool)
435 {
436   const apr_byte_t *digest;
437   svn_checksum_t *actual, *expected;
438 
439   /* Verify the checksum. */
440   if (content->len < sizeof(apr_uint32_t))
441     return svn_error_create(SVN_ERR_CORRUPT_PACKED_DATA, NULL,
442                             "File too short");
443 
444   content->len -= sizeof(apr_uint32_t);
445   digest = (apr_byte_t *)content->data + content->len;
446 
447   expected = svn_checksum__from_digest_fnv1a_32x4(digest, scratch_pool);
448   SVN_ERR(svn_checksum(&actual, svn_checksum_fnv1a_32x4, content->data,
449                        content->len, scratch_pool));
450 
451   if (!svn_checksum_match(actual, expected))
452     SVN_ERR(svn_checksum_mismatch_err(expected, actual, scratch_pool,
453                                       "checksum mismatch"));
454 
455   return SVN_NO_ERROR;
456 }
457 
458 /* Read the non-packed revprops for revision REV in FS, put them into the
459  * revprop cache if activated and return them in *PROPERTIES.
460  *
461  * If the data could not be read due to an otherwise recoverable error,
462  * leave *PROPERTIES unchanged. No error will be returned in that case.
463  *
464  * Allocate *PROPERTIES in RESULT_POOL and temporaries in SCRATCH_POOL.
465  */
466 static svn_error_t *
read_non_packed_revprop(apr_hash_t ** properties,svn_fs_t * fs,svn_revnum_t rev,apr_pool_t * result_pool,apr_pool_t * scratch_pool)467 read_non_packed_revprop(apr_hash_t **properties,
468                         svn_fs_t *fs,
469                         svn_revnum_t rev,
470                         apr_pool_t *result_pool,
471                         apr_pool_t *scratch_pool)
472 {
473   svn_stringbuf_t *content = NULL;
474   apr_pool_t *iterpool = svn_pool_create(scratch_pool);
475   svn_boolean_t missing = FALSE;
476   int i;
477 
478   for (i = 0;
479        i < SVN_FS_X__RECOVERABLE_RETRY_COUNT && !missing && !content;
480        ++i)
481     {
482       svn_pool_clear(iterpool);
483       SVN_ERR(svn_fs_x__try_stringbuf_from_file(&content,
484                                   &missing,
485                                   svn_fs_x__path_revprops(fs, rev, iterpool),
486                                   i + 1 < SVN_FS_X__RECOVERABLE_RETRY_COUNT,
487                                   iterpool));
488     }
489 
490   if (content)
491     {
492       svn_string_t *as_string;
493 
494       /* Consistency check. */
495       SVN_ERR_W(verify_checksum(content, scratch_pool),
496                 apr_psprintf(scratch_pool,
497                              "Revprop file for r%ld is corrupt",
498                              rev));
499 
500       /* The contents string becomes part of the *PROPERTIES structure, i.e.
501        * we must make sure it lives at least as long as the latter. */
502       as_string = svn_string_create_from_buf(content, result_pool);
503       SVN_ERR(parse_revprop(properties, fs, rev, as_string,
504                             result_pool, iterpool));
505     }
506 
507   svn_pool_clear(iterpool);
508 
509   return SVN_NO_ERROR;
510 }
511 
512 /* Serialize ROOT into FILE and append a checksum to it.
513  * Use SCRATCH_POOL for temporary allocations.
514  */
515 static svn_error_t *
write_packed_data_checksummed(svn_packed__data_root_t * root,apr_file_t * file,apr_pool_t * scratch_pool)516 write_packed_data_checksummed(svn_packed__data_root_t *root,
517                               apr_file_t *file,
518                               apr_pool_t *scratch_pool)
519 {
520   svn_checksum_t *checksum;
521   svn_stream_t *stream;
522 
523   stream = svn_stream_from_aprfile2(file, TRUE, scratch_pool);
524   stream = svn_checksum__wrap_write_stream(&checksum, stream,
525                                            svn_checksum_fnv1a_32x4,
526                                            scratch_pool);
527   SVN_ERR(svn_packed__data_write(stream, root, scratch_pool));
528   SVN_ERR(svn_stream_close(stream));
529 
530   /* Append the checksum */
531   SVN_ERR(svn_io_file_write_full(file, checksum->digest,
532                                  svn_checksum_size(checksum), NULL,
533                                  scratch_pool));
534 
535   return SVN_NO_ERROR;
536 }
537 
538 /* Serialize the packed revprops MANIFEST into FILE.
539  * Use SCRATCH_POOL for temporary allocations.
540  */
541 static svn_error_t *
write_manifest(apr_file_t * file,const apr_array_header_t * manifest,apr_pool_t * scratch_pool)542 write_manifest(apr_file_t *file,
543                const apr_array_header_t *manifest,
544                apr_pool_t *scratch_pool)
545 {
546   int i;
547   svn_packed__data_root_t *root = svn_packed__data_create_root(scratch_pool);
548 
549   /* one top-level stream per struct element */
550   svn_packed__int_stream_t *start_rev_stream
551     = svn_packed__create_int_stream(root, TRUE, FALSE);
552   svn_packed__int_stream_t *tag_stream
553     = svn_packed__create_int_stream(root, FALSE, FALSE);
554 
555   /* serialize ENTRIES */
556   for (i = 0; i < manifest->nelts; ++i)
557     {
558       manifest_entry_t *entry = &APR_ARRAY_IDX(manifest, i, manifest_entry_t);
559       svn_packed__add_uint(start_rev_stream, entry->start_rev);
560       svn_packed__add_uint(tag_stream, entry->tag);
561     }
562 
563   /* Write to file and calculate the checksum. */
564   SVN_ERR(write_packed_data_checksummed(root, file, scratch_pool));
565 
566   return SVN_NO_ERROR;
567 }
568 
569 /* Read *ROOT from CONTENT and verify its checksum.  Allocate *ROOT in
570  * RESULT_POOL and use SCRATCH_POOL for temporary allocations.
571  */
572 static svn_error_t *
read_packed_data_checksummed(svn_packed__data_root_t ** root,svn_stringbuf_t * content,apr_pool_t * result_pool,apr_pool_t * scratch_pool)573 read_packed_data_checksummed(svn_packed__data_root_t **root,
574                              svn_stringbuf_t *content,
575                              apr_pool_t *result_pool,
576                              apr_pool_t *scratch_pool)
577 {
578   svn_stream_t *stream;
579 
580   SVN_ERR(verify_checksum(content, scratch_pool));
581 
582   stream = svn_stream_from_stringbuf(content, scratch_pool);
583   SVN_ERR(svn_packed__data_read(root, stream, result_pool, scratch_pool));
584 
585   return SVN_NO_ERROR;
586 }
587 
588 /* Read the packed revprops manifest from the CONTENT buffer and return it
589  * in *MANIFEST, allocated in RESULT_POOL.  REVISION is the revision number
590  * to put into error messages.  Use SCRATCH_POOL for temporary allocations.
591  */
592 static svn_error_t *
read_manifest(apr_array_header_t ** manifest,svn_stringbuf_t * content,svn_revnum_t revision,apr_pool_t * result_pool,apr_pool_t * scratch_pool)593 read_manifest(apr_array_header_t **manifest,
594               svn_stringbuf_t *content,
595               svn_revnum_t revision,
596               apr_pool_t *result_pool,
597               apr_pool_t *scratch_pool)
598 {
599   apr_size_t i;
600   apr_size_t count;
601 
602   svn_packed__data_root_t *root;
603   svn_packed__int_stream_t *start_rev_stream;
604   svn_packed__int_stream_t *tag_stream;
605 
606   /* Verify the checksum and decode packed data. */
607   SVN_ERR_W(read_packed_data_checksummed(&root, content, result_pool,
608                                          scratch_pool),
609             apr_psprintf(scratch_pool,
610                          "Revprop manifest file for r%ld is corrupt",
611                          revision));
612 
613   /* get streams */
614   start_rev_stream = svn_packed__first_int_stream(root);
615   tag_stream = svn_packed__next_int_stream(start_rev_stream);
616 
617   /* read ids array */
618   count = svn_packed__int_count(start_rev_stream);
619   *manifest = apr_array_make(result_pool, (int)count,
620                             sizeof(manifest_entry_t));
621 
622   for (i = 0; i < count; ++i)
623     {
624       manifest_entry_t *entry = apr_array_push(*manifest);
625       entry->start_rev = (svn_revnum_t)svn_packed__get_int(start_rev_stream);
626       entry->tag = svn_packed__get_uint(tag_stream);
627     }
628 
629   return SVN_NO_ERROR;
630 }
631 
632 /* Implements the standard comparison function signature comparing the
633  * manifest_entry_t(lhs).start_rev to svn_revnum_t(rhs). */
634 static int
compare_entry_revision(const void * lhs,const void * rhs)635 compare_entry_revision(const void *lhs,
636                        const void *rhs)
637 {
638   const manifest_entry_t *entry = lhs;
639   const svn_revnum_t *revision = rhs;
640 
641   if (entry->start_rev < *revision)
642     return -1;
643 
644   return entry->start_rev == *revision ? 0 : 1;
645 }
646 
647 /* Return the index in MANIFEST that has the info for the pack file
648  * containing REVISION. */
649 static int
get_entry(apr_array_header_t * manifest,svn_revnum_t revision)650 get_entry(apr_array_header_t *manifest,
651           svn_revnum_t revision)
652 {
653   manifest_entry_t *entry;
654   int idx = svn_sort__bsearch_lower_bound(manifest, &revision,
655                                           compare_entry_revision);
656 
657   assert(manifest->nelts > 0);
658   if (idx >= manifest->nelts)
659     return idx - 1;
660 
661   entry = &APR_ARRAY_IDX(manifest, idx, manifest_entry_t);
662   if (entry->start_rev > revision && idx > 0)
663     return idx - 1;
664 
665   return idx;
666 }
667 
668 /* Return the full path of the revprop pack file given by ENTRY within
669  * REVPROPS.  Allocate the result in RESULT_POOL. */
670 static const char *
get_revprop_pack_filepath(packed_revprops_t * revprops,manifest_entry_t * entry,apr_pool_t * result_pool)671 get_revprop_pack_filepath(packed_revprops_t *revprops,
672                           manifest_entry_t *entry,
673                           apr_pool_t *result_pool)
674 {
675   const char *filename = apr_psprintf(result_pool, "%ld.%" APR_UINT64_T_FMT,
676                                       entry->start_rev, entry->tag);
677   return svn_dirent_join(revprops->folder, filename, result_pool);
678 }
679 
680 /* Given FS and REVPROPS->REVISION, fill the FILENAME, FOLDER and MANIFEST
681  * members. Use RESULT_POOL for allocating results and SCRATCH_POOL for
682  * temporaries.
683  */
684 static svn_error_t *
get_revprop_packname(svn_fs_t * fs,packed_revprops_t * revprops,apr_pool_t * result_pool,apr_pool_t * scratch_pool)685 get_revprop_packname(svn_fs_t *fs,
686                      packed_revprops_t *revprops,
687                      apr_pool_t *result_pool,
688                      apr_pool_t *scratch_pool)
689 {
690   svn_fs_x__data_t *ffd = fs->fsap_data;
691   svn_stringbuf_t *content = NULL;
692   const char *manifest_file_path;
693   int idx;
694   svn_revnum_t previous_start_rev;
695   int i;
696 
697   /* Determine the dimensions. Rev 0 is excluded from the first shard. */
698   int rev_count = ffd->max_files_per_dir;
699   svn_revnum_t manifest_start
700     = revprops->revision - (revprops->revision % rev_count);
701   if (manifest_start == 0)
702     {
703       ++manifest_start;
704       --rev_count;
705     }
706 
707   /* Read the content of the manifest file */
708   revprops->folder = svn_fs_x__path_pack_shard(fs, revprops->revision,
709                                                result_pool);
710   manifest_file_path = svn_dirent_join(revprops->folder, PATH_MANIFEST,
711                                        result_pool);
712   SVN_ERR(svn_fs_x__read_content(&content, manifest_file_path, result_pool));
713   SVN_ERR(read_manifest(&revprops->manifest, content, revprops->revision,
714                         result_pool, scratch_pool));
715 
716   /* Verify the manifest data. */
717   if (revprops->manifest->nelts == 0)
718     return svn_error_createf(SVN_ERR_FS_CORRUPT_REVPROP_MANIFEST, NULL,
719                              "Revprop manifest for r%ld is empty",
720                              revprops->revision);
721 
722   previous_start_rev = 0;
723   for (i = 0; i < revprops->manifest->nelts; ++i)
724     {
725       svn_revnum_t start_rev = APR_ARRAY_IDX(revprops->manifest, i,
726                                              manifest_entry_t).start_rev;
727       if (   start_rev < manifest_start
728           || start_rev >= manifest_start + rev_count)
729         return svn_error_createf(SVN_ERR_FS_CORRUPT_REVPROP_MANIFEST, NULL,
730                                  "Revprop manifest for r%ld contains "
731                                  "out-of-range revision r%ld",
732                                  revprops->revision, start_rev);
733 
734       if (start_rev < previous_start_rev)
735         return svn_error_createf(SVN_ERR_FS_CORRUPT_REVPROP_MANIFEST, NULL,
736                                  "Entries in revprop manifest for r%ld "
737                                  "are not ordered", revprops->revision);
738 
739       previous_start_rev = start_rev;
740     }
741 
742   /* Now get the pack file description */
743   idx = get_entry(revprops->manifest, revprops->revision);
744   revprops->entry = APR_ARRAY_IDX(revprops->manifest, idx,
745                                   manifest_entry_t);
746 
747   return SVN_NO_ERROR;
748 }
749 
750 /* Return TRUE, if revision R1 and R2 refer to the same shard in FS.
751  */
752 static svn_boolean_t
same_shard(svn_fs_t * fs,svn_revnum_t r1,svn_revnum_t r2)753 same_shard(svn_fs_t *fs,
754            svn_revnum_t r1,
755            svn_revnum_t r2)
756 {
757   svn_fs_x__data_t *ffd = fs->fsap_data;
758   return (r1 / ffd->max_files_per_dir) == (r2 / ffd->max_files_per_dir);
759 }
760 
761 /* Given FS and the full packed file content in CONTENT and make
762  * PACKED_REVPROPS point to the first serialized revprop.  If READ_ALL
763  * is set, initialize the SIZES and OFFSETS members as well.
764  *
765  * Parse the revprops for REVPROPS->REVISION and set the PROPERTIES as
766  * well as the SERIALIZED_SIZE member.  If revprop caching has been
767  * enabled, parse all revprops in the pack and cache them.
768  */
769 static svn_error_t *
parse_packed_revprops(svn_fs_t * fs,packed_revprops_t * revprops,svn_stringbuf_t * content,svn_boolean_t read_all,apr_pool_t * result_pool,apr_pool_t * scratch_pool)770 parse_packed_revprops(svn_fs_t *fs,
771                       packed_revprops_t *revprops,
772                       svn_stringbuf_t *content,
773                       svn_boolean_t read_all,
774                       apr_pool_t *result_pool,
775                       apr_pool_t *scratch_pool)
776 {
777   apr_size_t count, i;
778   apr_pool_t *iterpool = svn_pool_create(scratch_pool);
779   svn_boolean_t cache_all = has_revprop_cache(fs, scratch_pool);
780   svn_packed__data_root_t *root;
781   svn_packed__byte_stream_t *revprops_stream;
782   svn_revnum_t first_rev = revprops->entry.start_rev;
783 
784   /* Verify the checksum and decode packed data. */
785   SVN_ERR_W(read_packed_data_checksummed(&root, content, result_pool,
786                                          scratch_pool),
787             apr_psprintf(scratch_pool,
788                          "Revprop pack file for r%ld is corrupt",
789                          first_rev));
790 
791   /* get streams */
792   revprops_stream = svn_packed__first_byte_stream(root);
793   count = svn_packed__byte_block_count(revprops_stream);
794 
795   /* Check revision range for validity. */
796   if (!same_shard(fs, first_rev, first_rev + count - 1) || count < 1)
797     return svn_error_createf(SVN_ERR_FS_CORRUPT, NULL,
798                              _("Revprop pack for revision r%ld"
799                                " contains revprops for r%ld .. r%ld"),
800                              revprops->revision,
801                              (svn_revnum_t)first_rev,
802                              (svn_revnum_t)(first_rev + count -1));
803 
804   /* Since start & end are in the same shard, it is enough to just test
805    * the FIRST_REV for being actually packed.  That will also cover the
806    * special case of rev 0 never being packed. */
807   if (!svn_fs_x__is_packed_revprop(fs, first_rev))
808     return svn_error_createf(SVN_ERR_FS_CORRUPT, NULL,
809                              _("Revprop pack for revision r%ld"
810                                " starts at non-packed revisions r%ld"),
811                              revprops->revision, (svn_revnum_t)first_rev);
812 
813   /* Request all data (just references to data already expanded in ROOT) */
814   revprops->revprops = apr_array_make(result_pool, (int)count,
815                                       sizeof(svn_string_t));
816   for (i = 0, revprops->total_size = 0; i < count; ++i)
817     {
818       svn_string_t *props = apr_array_push(revprops->revprops);
819       props->data = svn_packed__get_bytes(revprops_stream, &props->len);
820 
821       revprops->total_size += props->len;
822     }
823 
824   /* Now parse the serialized revprops. */
825   for (i = 0; i < count; ++i)
826     {
827       const svn_string_t *serialized;
828       svn_revnum_t revision;
829 
830       svn_pool_clear(iterpool);
831 
832       serialized = &APR_ARRAY_IDX(revprops->revprops, (int)i, svn_string_t);
833       revision = first_rev + (long)i;
834 
835       /* Parse this revprops list, if necessary */
836       if (revision == revprops->revision)
837         {
838           /* Parse (and possibly cache) the one revprop list we care about. */
839           SVN_ERR(parse_revprop(&revprops->properties, fs, revision,
840                                 serialized, result_pool, iterpool));
841           revprops->serialized_size = serialized->len;
842 
843           /* If we only wanted the revprops for REVISION then we are done. */
844           if (!read_all && !cache_all)
845             break;
846         }
847       else if (cache_all)
848         {
849           /* Parse and cache all other revprop lists. */
850           apr_hash_t *properties;
851           SVN_ERR(parse_revprop(&properties, fs, revision, serialized,
852                                 iterpool, iterpool));
853         }
854     }
855 
856   svn_pool_destroy(iterpool);
857 
858   return SVN_NO_ERROR;
859 }
860 
861 /* In filesystem FS, read the packed revprops for revision REV into
862  * *REVPROPS.  Populate the revprop cache, if enabled.  If you want to
863  * modify revprop contents / update REVPROPS, READ_ALL must be set.
864  * Otherwise, only the properties of REV are being provided.
865  *
866  * Allocate *PROPERTIES in RESULT_POOL and temporaries in SCRATCH_POOL.
867  */
868 static svn_error_t *
read_pack_revprop(packed_revprops_t ** revprops,svn_fs_t * fs,svn_revnum_t rev,svn_boolean_t read_all,apr_pool_t * result_pool,apr_pool_t * scratch_pool)869 read_pack_revprop(packed_revprops_t **revprops,
870                   svn_fs_t *fs,
871                   svn_revnum_t rev,
872                   svn_boolean_t read_all,
873                   apr_pool_t *result_pool,
874                   apr_pool_t *scratch_pool)
875 {
876   apr_pool_t *iterpool = svn_pool_create(scratch_pool);
877   svn_boolean_t missing = FALSE;
878   packed_revprops_t *result;
879   int i;
880 
881   /* someone insisted that REV is packed. Double-check if necessary */
882   if (!svn_fs_x__is_packed_revprop(fs, rev))
883      SVN_ERR(svn_fs_x__update_min_unpacked_rev(fs, iterpool));
884 
885   if (!svn_fs_x__is_packed_revprop(fs, rev))
886     return svn_error_createf(SVN_ERR_FS_NO_SUCH_REVISION, NULL,
887                               _("No such packed revision %ld"), rev);
888 
889   /* initialize the result data structure */
890   result = apr_pcalloc(result_pool, sizeof(*result));
891   result->revision = rev;
892 
893   /* try to read the packed revprops. This may require retries if we have
894    * concurrent writers. */
895   for (i = 0; i < SVN_FS_X__RECOVERABLE_RETRY_COUNT; ++i)
896     {
897       const char *file_path;
898       svn_stringbuf_t *contents = NULL;
899 
900       svn_pool_clear(iterpool);
901 
902       /* there might have been concurrent writes.
903        * Re-read the manifest and the pack file.
904        */
905       SVN_ERR(get_revprop_packname(fs, result, result_pool, iterpool));
906       file_path = get_revprop_pack_filepath(result, &result->entry,
907                                             iterpool);
908       SVN_ERR(svn_fs_x__try_stringbuf_from_file(&contents,
909                                 &missing,
910                                 file_path,
911                                 i + 1 < SVN_FS_X__RECOVERABLE_RETRY_COUNT,
912                                 iterpool));
913 
914       if (contents)
915         {
916           SVN_ERR_W(parse_packed_revprops(fs, result, contents, read_all,
917                                           result_pool, iterpool),
918                     apr_psprintf(iterpool,
919                                  "Revprop pack file for r%ld is corrupt",
920                                  rev));
921           break;
922         }
923 
924       /* If we could not find the file, there was a write.
925        * So, we should refresh our revprop generation info as well such
926        * that others may find data we will put into the cache.  They would
927        * consider it outdated, otherwise.
928        */
929       if (missing && has_revprop_cache(fs, iterpool))
930         SVN_ERR(read_revprop_generation(fs, iterpool));
931     }
932 
933   /* the file content should be available now */
934   if (!result->revprops)
935     return svn_error_createf(SVN_ERR_FS_PACKED_REVPROP_READ_FAILURE, NULL,
936                   _("Failed to read revprop pack file for r%ld"), rev);
937 
938   *revprops = result;
939 
940   return SVN_NO_ERROR;
941 }
942 
943 svn_error_t *
svn_fs_x__get_revision_proplist(apr_hash_t ** proplist_p,svn_fs_t * fs,svn_revnum_t rev,svn_boolean_t bypass_cache,svn_boolean_t refresh,apr_pool_t * result_pool,apr_pool_t * scratch_pool)944 svn_fs_x__get_revision_proplist(apr_hash_t **proplist_p,
945                                 svn_fs_t *fs,
946                                 svn_revnum_t rev,
947                                 svn_boolean_t bypass_cache,
948                                 svn_boolean_t refresh,
949                                 apr_pool_t *result_pool,
950                                 apr_pool_t *scratch_pool)
951 {
952   svn_fs_x__data_t *ffd = fs->fsap_data;
953 
954   /* not found, yet */
955   *proplist_p = NULL;
956 
957   /* should they be available at all? */
958   SVN_ERR(svn_fs_x__ensure_revision_exists(rev, fs, scratch_pool));
959 
960   /* Ensure that the revprop generation info is valid. */
961   if (refresh || !is_generation_valid(fs))
962     SVN_ERR(read_revprop_generation(fs, scratch_pool));
963 
964   /* Try cache lookup first. */
965   if (!bypass_cache && has_revprop_cache(fs, scratch_pool))
966     {
967       svn_boolean_t is_cached;
968       svn_fs_x__pair_cache_key_t key = { 0 };
969 
970       key.revision = rev;
971       key.second = ffd->revprop_generation;
972       SVN_ERR(svn_cache__get((void **) proplist_p, &is_cached,
973                              ffd->revprop_cache, &key, result_pool));
974       if (is_cached)
975         return SVN_NO_ERROR;
976     }
977 
978   /* if REV had not been packed when we began, try reading it from the
979    * non-packed shard.  If that fails, we will fall through to packed
980    * shard reads. */
981   if (!svn_fs_x__is_packed_revprop(fs, rev))
982     {
983       svn_error_t *err = read_non_packed_revprop(proplist_p, fs, rev,
984                                                  result_pool, scratch_pool);
985       if (err)
986         {
987           if (!APR_STATUS_IS_ENOENT(err->apr_err))
988             return svn_error_trace(err);
989 
990           svn_error_clear(err);
991           *proplist_p = NULL; /* in case read_non_packed_revprop changed it */
992         }
993     }
994 
995   /* if revprop packing is available and we have not read the revprops, yet,
996    * try reading them from a packed shard.  If that fails, REV is most
997    * likely invalid (or its revprops highly contested). */
998   if (!*proplist_p)
999     {
1000       packed_revprops_t *revprops;
1001       SVN_ERR(read_pack_revprop(&revprops, fs, rev, FALSE,
1002                                 result_pool, scratch_pool));
1003       *proplist_p = revprops->properties;
1004     }
1005 
1006   /* The revprops should have been there. Did we get them? */
1007   if (!*proplist_p)
1008     return svn_error_createf(SVN_ERR_FS_NO_SUCH_REVISION, NULL,
1009                              _("Could not read revprops for revision %ld"),
1010                              rev);
1011 
1012   return SVN_NO_ERROR;
1013 }
1014 
1015 svn_error_t *
svn_fs_x__write_non_packed_revprops(apr_file_t * file,apr_hash_t * proplist,apr_pool_t * scratch_pool)1016 svn_fs_x__write_non_packed_revprops(apr_file_t *file,
1017                                     apr_hash_t *proplist,
1018                                     apr_pool_t *scratch_pool)
1019 {
1020   svn_stream_t *stream;
1021   svn_checksum_t *checksum;
1022 
1023   stream = svn_stream_from_aprfile2(file, TRUE, scratch_pool);
1024   stream = svn_checksum__wrap_write_stream(&checksum, stream,
1025                                            svn_checksum_fnv1a_32x4,
1026                                            scratch_pool);
1027   SVN_ERR(svn_fs_x__write_properties(stream, proplist, scratch_pool));
1028   SVN_ERR(svn_stream_close(stream));
1029 
1030   /* Append the checksum */
1031   SVN_ERR(svn_io_file_write_full(file, checksum->digest,
1032                                  svn_checksum_size(checksum), NULL,
1033                                  scratch_pool));
1034 
1035   return SVN_NO_ERROR;
1036 }
1037 
1038 /* Serialize the revision property list PROPLIST of revision REV in
1039  * filesystem FS to a non-packed file.  Return the name of that temporary
1040  * file in *TMP_PATH and the file path that it must be moved to in
1041  * *FINAL_PATH.  Schedule necessary fsync calls in BATCH.
1042  *
1043  * Allocate *FINAL_PATH and *TMP_PATH in RESULT_POOL.  Use SCRATCH_POOL
1044  * for temporary allocations.
1045  */
1046 static svn_error_t *
write_non_packed_revprop(const char ** final_path,const char ** tmp_path,svn_fs_t * fs,svn_revnum_t rev,apr_hash_t * proplist,svn_fs_x__batch_fsync_t * batch,apr_pool_t * result_pool,apr_pool_t * scratch_pool)1047 write_non_packed_revprop(const char **final_path,
1048                          const char **tmp_path,
1049                          svn_fs_t *fs,
1050                          svn_revnum_t rev,
1051                          apr_hash_t *proplist,
1052                          svn_fs_x__batch_fsync_t *batch,
1053                          apr_pool_t *result_pool,
1054                          apr_pool_t *scratch_pool)
1055 {
1056   apr_file_t *file;
1057   *final_path = svn_fs_x__path_revprops(fs, rev, result_pool);
1058 
1059   *tmp_path = apr_pstrcat(result_pool, *final_path, ".tmp", SVN_VA_NULL);
1060   SVN_ERR(svn_fs_x__batch_fsync_open_file(&file, batch, *tmp_path,
1061                                           scratch_pool));
1062 
1063   SVN_ERR(svn_fs_x__write_non_packed_revprops(file, proplist, scratch_pool));
1064 
1065   return SVN_NO_ERROR;
1066 }
1067 
1068 /* After writing the new revprop file(s), call this function to move the
1069  * file at TMP_PATH to FINAL_PATH and give it the permissions from
1070  * PERMS_REFERENCE.  Schedule necessary fsync calls in BATCH.
1071  *
1072  * If indicated in BUMP_GENERATION, increase FS' revprop generation.
1073  * Finally, delete all the temporary files given in FILES_TO_DELETE.
1074  * The latter may be NULL.
1075  *
1076  * Use SCRATCH_POOL for temporary allocations.
1077  */
1078 static svn_error_t *
switch_to_new_revprop(svn_fs_t * fs,const char * final_path,const char * tmp_path,const char * perms_reference,apr_array_header_t * files_to_delete,svn_boolean_t bump_generation,svn_fs_x__batch_fsync_t * batch,apr_pool_t * scratch_pool)1079 switch_to_new_revprop(svn_fs_t *fs,
1080                       const char *final_path,
1081                       const char *tmp_path,
1082                       const char *perms_reference,
1083                       apr_array_header_t *files_to_delete,
1084                       svn_boolean_t bump_generation,
1085                       svn_fs_x__batch_fsync_t *batch,
1086                       apr_pool_t *scratch_pool)
1087 {
1088   /* Now, we may actually be replacing revprops. Make sure that all other
1089      threads and processes will know about this. */
1090   if (bump_generation)
1091     SVN_ERR(begin_revprop_change(fs, scratch_pool));
1092 
1093   /* Ensure the new file contents makes it to disk before switching over to
1094    * it. */
1095   SVN_ERR(svn_fs_x__batch_fsync_run(batch, scratch_pool));
1096 
1097   /* Make the revision visible to all processes and threads. */
1098   SVN_ERR(svn_fs_x__move_into_place(tmp_path, final_path, perms_reference,
1099                                     batch, scratch_pool));
1100   SVN_ERR(svn_fs_x__batch_fsync_run(batch, scratch_pool));
1101 
1102   /* Indicate that the update (if relevant) has been completed. */
1103   if (bump_generation)
1104     SVN_ERR(end_revprop_change(fs, scratch_pool));
1105 
1106   /* Clean up temporary files, if necessary. */
1107   if (files_to_delete)
1108     {
1109       apr_pool_t *iterpool = svn_pool_create(scratch_pool);
1110       int i;
1111 
1112       for (i = 0; i < files_to_delete->nelts; ++i)
1113         {
1114           const char *path = APR_ARRAY_IDX(files_to_delete, i, const char*);
1115 
1116           svn_pool_clear(iterpool);
1117           SVN_ERR(svn_io_remove_file2(path, TRUE, iterpool));
1118         }
1119 
1120       svn_pool_destroy(iterpool);
1121     }
1122   return SVN_NO_ERROR;
1123 }
1124 
1125 /* Writes the a pack file to FILE.  It copies the serialized data
1126  * from REVPROPS for the indexes [START,END).
1127  *
1128  * NEW_TOTAL_SIZE is a hint for pre-allocating buffers of appropriate size.
1129  * SCRATCH_POOL is used for temporary allocations.
1130  */
1131 static svn_error_t *
repack_revprops(svn_fs_t * fs,packed_revprops_t * revprops,int start,int end,apr_size_t new_total_size,apr_file_t * file,apr_pool_t * scratch_pool)1132 repack_revprops(svn_fs_t *fs,
1133                 packed_revprops_t *revprops,
1134                 int start,
1135                 int end,
1136                 apr_size_t new_total_size,
1137                 apr_file_t *file,
1138                 apr_pool_t *scratch_pool)
1139 {
1140   int i;
1141 
1142   svn_packed__data_root_t *root = svn_packed__data_create_root(scratch_pool);
1143   svn_packed__byte_stream_t *revprops_stream
1144     = svn_packed__create_bytes_stream(root);
1145 
1146   /* append the serialized revprops */
1147   for (i = start; i < end; ++i)
1148     {
1149       const svn_string_t *props
1150         = &APR_ARRAY_IDX(revprops->revprops, i, svn_string_t);
1151 
1152       svn_packed__add_bytes(revprops_stream, props->data, props->len);
1153     }
1154 
1155   /* Write to file. */
1156   SVN_ERR(write_packed_data_checksummed(root, file, scratch_pool));
1157 
1158   return SVN_NO_ERROR;
1159 }
1160 
1161 /* Allocate a new pack file name for revisions starting at START_REV in
1162  * REVPROPS->MANIFEST.  Add the name of old file to FILES_TO_DELETE,
1163  * auto-create that array if necessary.  Return an open file *FILE that is
1164  * allocated in RESULT_POOL.  Allocate the paths in *FILES_TO_DELETE from
1165  * the same pool that contains the array itself.  Schedule necessary fsync
1166  * calls in BATCH.
1167  *
1168  * Use SCRATCH_POOL for temporary allocations.
1169  */
1170 static svn_error_t *
repack_file_open(apr_file_t ** file,svn_fs_t * fs,packed_revprops_t * revprops,svn_revnum_t start_rev,apr_array_header_t ** files_to_delete,svn_fs_x__batch_fsync_t * batch,apr_pool_t * result_pool,apr_pool_t * scratch_pool)1171 repack_file_open(apr_file_t **file,
1172                  svn_fs_t *fs,
1173                  packed_revprops_t *revprops,
1174                  svn_revnum_t start_rev,
1175                  apr_array_header_t **files_to_delete,
1176                  svn_fs_x__batch_fsync_t *batch,
1177                  apr_pool_t *result_pool,
1178                  apr_pool_t *scratch_pool)
1179 {
1180   manifest_entry_t new_entry;
1181   const char *new_path;
1182   int idx;
1183 
1184   /* We always replace whole pack files - possibly by more than one new file.
1185    * When we create the file for the first part of the pack, enlist the old
1186    * one for later deletion */
1187   SVN_ERR_ASSERT(start_rev >= revprops->entry.start_rev);
1188 
1189   if (*files_to_delete == NULL)
1190     *files_to_delete = apr_array_make(result_pool, 3, sizeof(const char*));
1191 
1192   if (revprops->entry.start_rev == start_rev)
1193     APR_ARRAY_PUSH(*files_to_delete, const char*)
1194       = get_revprop_pack_filepath(revprops, &revprops->entry,
1195                                   (*files_to_delete)->pool);
1196 
1197   /* Initialize the new manifest entry. Bump the tag part. */
1198   new_entry.start_rev = start_rev;
1199   new_entry.tag = revprops->entry.tag + 1;
1200 
1201   /* update the manifest to point to the new file */
1202   idx = get_entry(revprops->manifest, start_rev);
1203   if (revprops->entry.start_rev == start_rev)
1204     APR_ARRAY_IDX(revprops->manifest, idx, manifest_entry_t) = new_entry;
1205   else
1206     SVN_ERR(svn_sort__array_insert2(revprops->manifest, &new_path, idx + 1));
1207 
1208   /* open the file */
1209   new_path = get_revprop_pack_filepath(revprops, &new_entry, scratch_pool);
1210   SVN_ERR(svn_fs_x__batch_fsync_open_file(file, batch, new_path,
1211                                           scratch_pool));
1212 
1213   return SVN_NO_ERROR;
1214 }
1215 
1216 /* Return the length of the serialized reprop list of index I in REVPROPS. */
1217 static apr_size_t
props_len(packed_revprops_t * revprops,int i)1218 props_len(packed_revprops_t *revprops,
1219           int i)
1220 {
1221   return APR_ARRAY_IDX(revprops->revprops, i, svn_string_t).len;
1222 }
1223 
1224 /* For revision REV in filesystem FS, set the revision properties to
1225  * PROPLIST.  Return a new file in *TMP_PATH that the caller shall move
1226  * to *FINAL_PATH to make the change visible.  Files to be deleted will
1227  * be listed in *FILES_TO_DELETE which may remain unchanged / unallocated.
1228  * Schedule necessary fsync calls in BATCH.
1229  *
1230  * Allocate output values in RESULT_POOL and temporaries from SCRATCH_POOL.
1231  */
1232 static svn_error_t *
write_packed_revprop(const char ** final_path,const char ** tmp_path,apr_array_header_t ** files_to_delete,svn_fs_t * fs,svn_revnum_t rev,apr_hash_t * proplist,svn_fs_x__batch_fsync_t * batch,apr_pool_t * result_pool,apr_pool_t * scratch_pool)1233 write_packed_revprop(const char **final_path,
1234                      const char **tmp_path,
1235                      apr_array_header_t **files_to_delete,
1236                      svn_fs_t *fs,
1237                      svn_revnum_t rev,
1238                      apr_hash_t *proplist,
1239                      svn_fs_x__batch_fsync_t *batch,
1240                      apr_pool_t *result_pool,
1241                      apr_pool_t *scratch_pool)
1242 {
1243   svn_fs_x__data_t *ffd = fs->fsap_data;
1244   packed_revprops_t *revprops;
1245   svn_stream_t *stream;
1246   apr_file_t *file;
1247   svn_stringbuf_t *serialized;
1248   apr_size_t new_total_size;
1249   int changed_index;
1250   int count;
1251 
1252   /* read the current revprop generation. This value will not change
1253    * while we hold the global write lock to this FS. */
1254   if (has_revprop_cache(fs, scratch_pool))
1255     SVN_ERR(read_revprop_generation(fs, scratch_pool));
1256 
1257   /* read contents of the current pack file */
1258   SVN_ERR(read_pack_revprop(&revprops, fs, rev, TRUE,
1259                             scratch_pool, scratch_pool));
1260 
1261   /* serialize the new revprops */
1262   serialized = svn_stringbuf_create_empty(scratch_pool);
1263   stream = svn_stream_from_stringbuf(serialized, scratch_pool);
1264   SVN_ERR(svn_fs_x__write_properties(stream, proplist, scratch_pool));
1265   SVN_ERR(svn_stream_close(stream));
1266 
1267   /* estimate the size of the new data */
1268   count = revprops->revprops->nelts;
1269   changed_index = (int)(rev - revprops->entry.start_rev);
1270   new_total_size = revprops->total_size - revprops->serialized_size
1271                  + serialized->len
1272                  + (count + 2) * SVN_INT64_BUFFER_SIZE;
1273 
1274   APR_ARRAY_IDX(revprops->revprops, changed_index, svn_string_t)
1275     = *svn_stringbuf__morph_into_string(serialized);
1276 
1277   /* can we put the new data into the same pack as the before? */
1278   if (new_total_size < ffd->revprop_pack_size || count == 1)
1279     {
1280       /* simply replace the old pack file with new content as we do it
1281        * in the non-packed case */
1282 
1283       *final_path = get_revprop_pack_filepath(revprops, &revprops->entry,
1284                                               result_pool);
1285       *tmp_path = apr_pstrcat(result_pool, *final_path, ".tmp", SVN_VA_NULL);
1286       SVN_ERR(svn_fs_x__batch_fsync_open_file(&file, batch, *tmp_path,
1287                                               scratch_pool));
1288       SVN_ERR(repack_revprops(fs, revprops, 0, count,
1289                               new_total_size, file, scratch_pool));
1290     }
1291   else
1292     {
1293       /* split the pack file into two of roughly equal size */
1294       int right_count, left_count;
1295 
1296       int left = 0;
1297       int right = count - 1;
1298       apr_size_t left_size = 2 * SVN_INT64_BUFFER_SIZE;
1299       apr_size_t right_size = 2 * SVN_INT64_BUFFER_SIZE;
1300 
1301       /* let left and right side grow such that their size difference
1302        * is minimal after each step. */
1303       while (left <= right)
1304         if (  left_size + props_len(revprops, left)
1305             < right_size + props_len(revprops, right))
1306           {
1307             left_size += props_len(revprops, left) + SVN_INT64_BUFFER_SIZE;
1308             ++left;
1309           }
1310         else
1311           {
1312             right_size += props_len(revprops, right) + SVN_INT64_BUFFER_SIZE;
1313             --right;
1314           }
1315 
1316        /* since the items need much less than SVN_INT64_BUFFER_SIZE
1317         * bytes to represent their length, the split may not be optimal */
1318       left_count = left;
1319       right_count = count - left;
1320 
1321       /* if new_size is large, one side may exceed the pack size limit.
1322        * In that case, split before and after the modified revprop.*/
1323       if (   left_size > ffd->revprop_pack_size
1324           || right_size > ffd->revprop_pack_size)
1325         {
1326           left_count = changed_index;
1327           right_count = count - left_count - 1;
1328         }
1329 
1330       /* Allocate this here such that we can call the repack functions with
1331        * the scratch pool alone. */
1332       if (*files_to_delete == NULL)
1333         *files_to_delete = apr_array_make(result_pool, 3,
1334                                           sizeof(const char*));
1335 
1336       /* write the new, split files */
1337       if (left_count)
1338         {
1339           SVN_ERR(repack_file_open(&file, fs, revprops,
1340                                    revprops->entry.start_rev,
1341                                    files_to_delete, batch,
1342                                    scratch_pool, scratch_pool));
1343           SVN_ERR(repack_revprops(fs, revprops, 0, left_count,
1344                                   new_total_size, file, scratch_pool));
1345         }
1346 
1347       if (left_count + right_count < count)
1348         {
1349           SVN_ERR(repack_file_open(&file, fs, revprops, rev,
1350                                    files_to_delete, batch,
1351                                    scratch_pool, scratch_pool));
1352           SVN_ERR(repack_revprops(fs, revprops, changed_index,
1353                                   changed_index + 1,
1354                                   new_total_size, file, scratch_pool));
1355         }
1356 
1357       if (right_count)
1358         {
1359           SVN_ERR(repack_file_open(&file, fs, revprops, rev + 1,
1360                                    files_to_delete,  batch,
1361                                    scratch_pool, scratch_pool));
1362           SVN_ERR(repack_revprops(fs, revprops, count - right_count, count,
1363                                   new_total_size, file, scratch_pool));
1364         }
1365 
1366       /* write the new manifest */
1367       *final_path = svn_dirent_join(revprops->folder, PATH_MANIFEST,
1368                                     result_pool);
1369       *tmp_path = apr_pstrcat(result_pool, *final_path, ".tmp", SVN_VA_NULL);
1370       SVN_ERR(svn_fs_x__batch_fsync_open_file(&file, batch, *tmp_path,
1371                                               scratch_pool));
1372       SVN_ERR(write_manifest(file, revprops->manifest, scratch_pool));
1373     }
1374 
1375   return SVN_NO_ERROR;
1376 }
1377 
1378 /* Set the revision property list of revision REV in filesystem FS to
1379    PROPLIST.  Use SCRATCH_POOL for temporary allocations. */
1380 svn_error_t *
svn_fs_x__set_revision_proplist(svn_fs_t * fs,svn_revnum_t rev,apr_hash_t * proplist,apr_pool_t * scratch_pool)1381 svn_fs_x__set_revision_proplist(svn_fs_t *fs,
1382                                 svn_revnum_t rev,
1383                                 apr_hash_t *proplist,
1384                                 apr_pool_t *scratch_pool)
1385 {
1386   svn_boolean_t is_packed;
1387   svn_boolean_t bump_generation = FALSE;
1388   const char *final_path;
1389   const char *tmp_path;
1390   const char *perms_reference;
1391   apr_array_header_t *files_to_delete = NULL;
1392   svn_fs_x__batch_fsync_t *batch;
1393   svn_fs_x__data_t *ffd = fs->fsap_data;
1394 
1395   SVN_ERR(svn_fs_x__ensure_revision_exists(rev, fs, scratch_pool));
1396 
1397   /* Perform all fsyncs through this instance. */
1398   SVN_ERR(svn_fs_x__batch_fsync_create(&batch, ffd->flush_to_disk,
1399                                        scratch_pool));
1400 
1401   /* this info will not change while we hold the global FS write lock */
1402   is_packed = svn_fs_x__is_packed_revprop(fs, rev);
1403 
1404   /* Test whether revprops already exist for this revision.
1405    * Only then will we need to bump the revprop generation.
1406    * The fact that they did not yet exist is never cached. */
1407   if (is_packed)
1408     {
1409       bump_generation = TRUE;
1410     }
1411   else
1412     {
1413       svn_node_kind_t kind;
1414       SVN_ERR(svn_io_check_path(svn_fs_x__path_revprops(fs, rev,
1415                                                         scratch_pool),
1416                                 &kind, scratch_pool));
1417       bump_generation = kind != svn_node_none;
1418     }
1419 
1420   /* Serialize the new revprop data */
1421   if (is_packed)
1422     SVN_ERR(write_packed_revprop(&final_path, &tmp_path, &files_to_delete,
1423                                  fs, rev, proplist, batch, scratch_pool,
1424                                  scratch_pool));
1425   else
1426     SVN_ERR(write_non_packed_revprop(&final_path, &tmp_path,
1427                                      fs, rev, proplist, batch,
1428                                      scratch_pool, scratch_pool));
1429 
1430   /* We use the rev file of this revision as the perms reference,
1431    * because when setting revprops for the first time, the revprop
1432    * file won't exist and therefore can't serve as its own reference.
1433    * (Whereas the rev file should already exist at this point.)
1434    */
1435   perms_reference = svn_fs_x__path_rev_absolute(fs, rev, scratch_pool);
1436 
1437   /* Now, switch to the new revprop data. */
1438   SVN_ERR(switch_to_new_revprop(fs, final_path, tmp_path, perms_reference,
1439                                 files_to_delete, bump_generation, batch,
1440                                 scratch_pool));
1441 
1442   return SVN_NO_ERROR;
1443 }
1444 
1445 /* Return TRUE, if for REVISION in FS, we can find the revprop pack file.
1446  * Use SCRATCH_POOL for temporary allocations.
1447  * Set *MISSING, if the reason is a missing manifest or pack file.
1448  */
1449 svn_boolean_t
svn_fs_x__packed_revprop_available(svn_boolean_t * missing,svn_fs_t * fs,svn_revnum_t revision,apr_pool_t * scratch_pool)1450 svn_fs_x__packed_revprop_available(svn_boolean_t *missing,
1451                                    svn_fs_t *fs,
1452                                    svn_revnum_t revision,
1453                                    apr_pool_t *scratch_pool)
1454 {
1455   svn_node_kind_t kind;
1456   packed_revprops_t *revprops;
1457   svn_error_t *err;
1458 
1459   /* try to read the manifest file */
1460   revprops = apr_pcalloc(scratch_pool, sizeof(*revprops));
1461   revprops->revision = revision;
1462   err = get_revprop_packname(fs, revprops, scratch_pool, scratch_pool);
1463 
1464   /* if the manifest cannot be read, consider the pack files inaccessible
1465    * even if the file itself exists. */
1466   if (err)
1467     {
1468       svn_error_clear(err);
1469       return FALSE;
1470     }
1471 
1472   /* the respective pack file must exist (and be a file) */
1473   err = svn_io_check_path(get_revprop_pack_filepath(revprops,
1474                                                     &revprops->entry,
1475                                                     scratch_pool),
1476                           &kind, scratch_pool);
1477   if (err)
1478     {
1479       svn_error_clear(err);
1480       return FALSE;
1481     }
1482 
1483   *missing = kind == svn_node_none;
1484   return kind == svn_node_file;
1485 }
1486 
1487 
1488 /****** Packing FSX shards *********/
1489 
1490 /* Copy revprop files for revisions [START_REV, END_REV) from SHARD_PATH
1491  * in filesystem FS to the pack file at PACK_FILE_NAME in PACK_FILE_DIR.
1492  *
1493  * The file sizes have already been determined and written to SIZES.
1494  * Please note that this function will be executed while the filesystem
1495  * has been locked and that revprops files will therefore not be modified
1496  * while the pack is in progress.
1497  *
1498  * COMPRESSION_LEVEL defines how well the resulting pack file shall be
1499  * compressed or whether is shall be compressed at all.  TOTAL_SIZE is
1500  * a hint on which initial buffer size we should use to hold the pack file
1501  * content.  Schedule necessary fsync calls in BATCH.
1502  *
1503  * CANCEL_FUNC and CANCEL_BATON are used as usual. Temporary allocations
1504  * are done in SCRATCH_POOL.
1505  */
1506 static svn_error_t *
copy_revprops(svn_fs_t * fs,const char * pack_file_dir,const char * pack_filename,const char * shard_path,svn_revnum_t start_rev,svn_revnum_t end_rev,apr_array_header_t * sizes,apr_size_t total_size,int compression_level,svn_fs_x__batch_fsync_t * batch,svn_cancel_func_t cancel_func,void * cancel_baton,apr_pool_t * scratch_pool)1507 copy_revprops(svn_fs_t *fs,
1508               const char *pack_file_dir,
1509               const char *pack_filename,
1510               const char *shard_path,
1511               svn_revnum_t start_rev,
1512               svn_revnum_t end_rev,
1513               apr_array_header_t *sizes,
1514               apr_size_t total_size,
1515               int compression_level,
1516               svn_fs_x__batch_fsync_t *batch,
1517               svn_cancel_func_t cancel_func,
1518               void *cancel_baton,
1519               apr_pool_t *scratch_pool)
1520 {
1521   apr_file_t *pack_file;
1522   svn_revnum_t rev;
1523   apr_pool_t *iterpool = svn_pool_create(scratch_pool);
1524 
1525   svn_packed__data_root_t *root = svn_packed__data_create_root(scratch_pool);
1526   svn_packed__byte_stream_t *stream
1527     = svn_packed__create_bytes_stream(root);
1528 
1529   /* Iterate over the revisions in this shard, squashing them together. */
1530   for (rev = start_rev; rev <= end_rev; rev++)
1531     {
1532       const char *path;
1533       svn_stringbuf_t *props;
1534 
1535       svn_pool_clear(iterpool);
1536 
1537       /* Construct the file name. */
1538       path = svn_fs_x__path_revprops(fs, rev, iterpool);
1539 
1540       /* Copy all the bits from the non-packed revprop file to the end of
1541        * the pack file. */
1542       SVN_ERR(svn_stringbuf_from_file2(&props, path, iterpool));
1543       SVN_ERR_W(verify_checksum(props, iterpool),
1544                 apr_psprintf(iterpool, "Failed to read revprops for r%ld.",
1545                              rev));
1546 
1547       svn_packed__add_bytes(stream, props->data, props->len);
1548     }
1549 
1550   /* Create the auto-fsync'ing pack file. */
1551   SVN_ERR(svn_fs_x__batch_fsync_open_file(&pack_file, batch,
1552                                           svn_dirent_join(pack_file_dir,
1553                                                           pack_filename,
1554                                                           scratch_pool),
1555                                           scratch_pool));
1556 
1557   /* write all to disk */
1558   SVN_ERR(write_packed_data_checksummed(root, pack_file, scratch_pool));
1559 
1560   svn_pool_destroy(iterpool);
1561 
1562   return SVN_NO_ERROR;
1563 }
1564 
1565 svn_error_t *
svn_fs_x__pack_revprops_shard(svn_fs_t * fs,const char * pack_file_dir,const char * shard_path,apr_int64_t shard,int max_files_per_dir,apr_int64_t max_pack_size,int compression_level,svn_fs_x__batch_fsync_t * batch,svn_cancel_func_t cancel_func,void * cancel_baton,apr_pool_t * scratch_pool)1566 svn_fs_x__pack_revprops_shard(svn_fs_t *fs,
1567                               const char *pack_file_dir,
1568                               const char *shard_path,
1569                               apr_int64_t shard,
1570                               int max_files_per_dir,
1571                               apr_int64_t max_pack_size,
1572                               int compression_level,
1573                               svn_fs_x__batch_fsync_t *batch,
1574                               svn_cancel_func_t cancel_func,
1575                               void *cancel_baton,
1576                               apr_pool_t *scratch_pool)
1577 {
1578   const char *manifest_file_path, *pack_filename = NULL;
1579   apr_file_t *manifest_file;
1580   svn_revnum_t start_rev, end_rev, rev;
1581   apr_size_t total_size;
1582   apr_pool_t *iterpool = svn_pool_create(scratch_pool);
1583   apr_array_header_t *sizes;
1584   apr_array_header_t *manifest;
1585 
1586   /* Sanitize config file values. */
1587   apr_size_t max_size = (apr_size_t)MIN(MAX(max_pack_size, 1),
1588                                         SVN_MAX_OBJECT_SIZE);
1589 
1590   /* Some useful paths. */
1591   manifest_file_path = svn_dirent_join(pack_file_dir, PATH_MANIFEST,
1592                                        scratch_pool);
1593 
1594   /* Create the manifest file. */
1595   SVN_ERR(svn_fs_x__batch_fsync_open_file(&manifest_file, batch,
1596                                           manifest_file_path, scratch_pool));
1597 
1598   /* revisions to handle. Special case: revision 0 */
1599   start_rev = (svn_revnum_t) (shard * max_files_per_dir);
1600   end_rev = (svn_revnum_t) ((shard + 1) * (max_files_per_dir) - 1);
1601   if (start_rev == 0)
1602     {
1603       /* Never pack revprops for r0, just copy it. */
1604       SVN_ERR(svn_io_copy_file(svn_fs_x__path_revprops(fs, 0, iterpool),
1605                                svn_dirent_join(pack_file_dir, "p0",
1606                                                scratch_pool),
1607                                TRUE,
1608                                iterpool));
1609 
1610       ++start_rev;
1611       /* Special special case: if max_files_per_dir is 1, then at this point
1612          start_rev == 1 and end_rev == 0 (!).  Fortunately, everything just
1613          works. */
1614     }
1615 
1616   /* initialize the revprop size info */
1617   sizes = apr_array_make(scratch_pool, max_files_per_dir, sizeof(apr_size_t));
1618   total_size = 2 * SVN_INT64_BUFFER_SIZE;
1619 
1620   manifest = apr_array_make(scratch_pool, 4, sizeof(manifest_entry_t));
1621 
1622   /* Iterate over the revisions in this shard, determine their size and
1623    * squashing them together into pack files. */
1624   for (rev = start_rev; rev <= end_rev; rev++)
1625     {
1626       apr_finfo_t finfo;
1627       const char *path;
1628 
1629       svn_pool_clear(iterpool);
1630 
1631       /* Get the size of the file. */
1632       path = svn_fs_x__path_revprops(fs, rev, iterpool);
1633       SVN_ERR(svn_io_stat(&finfo, path, APR_FINFO_SIZE, iterpool));
1634 
1635       /* If we already have started a pack file and this revprop cannot be
1636        * appended to it, write the previous pack file.  Note this overflow
1637        * check works because we enforced MAX_SIZE <= SVN_MAX_OBJECT_SIZE. */
1638       if (sizes->nelts != 0
1639           && (   finfo.size > max_size
1640               || total_size > max_size
1641               || SVN_INT64_BUFFER_SIZE + finfo.size > max_size - total_size))
1642         {
1643           SVN_ERR(copy_revprops(fs, pack_file_dir, pack_filename,
1644                                 shard_path, start_rev, rev-1,
1645                                 sizes, (apr_size_t)total_size,
1646                                 compression_level, batch, cancel_func,
1647                                 cancel_baton, iterpool));
1648 
1649           /* next pack file starts empty again */
1650           apr_array_clear(sizes);
1651           total_size = 2 * SVN_INT64_BUFFER_SIZE;
1652           start_rev = rev;
1653         }
1654 
1655       /* Update the manifest. Allocate a file name for the current pack
1656        * file if it is a new one */
1657       if (sizes->nelts == 0)
1658         {
1659           manifest_entry_t *entry = apr_array_push(manifest);
1660           entry->start_rev = rev;
1661           entry->tag = 0;
1662 
1663           pack_filename = apr_psprintf(scratch_pool, "%ld.0", rev);
1664         }
1665 
1666       /* add to list of files to put into the current pack file */
1667       APR_ARRAY_PUSH(sizes, apr_size_t) = finfo.size;
1668       total_size += SVN_INT64_BUFFER_SIZE + finfo.size;
1669     }
1670 
1671   /* write the last pack file */
1672   if (sizes->nelts != 0)
1673     SVN_ERR(copy_revprops(fs, pack_file_dir, pack_filename, shard_path,
1674                           start_rev, rev-1, sizes,
1675                           (apr_size_t)total_size, compression_level,
1676                           batch, cancel_func, cancel_baton, iterpool));
1677 
1678   SVN_ERR(write_manifest(manifest_file, manifest, iterpool));
1679 
1680   /* flush all data to disk and update permissions */
1681   SVN_ERR(svn_io_copy_perms(shard_path, pack_file_dir, iterpool));
1682   svn_pool_destroy(iterpool);
1683 
1684   return SVN_NO_ERROR;
1685 }
1686