1 /* fs.h : interface to Subversion filesystem
2  *
3  * ====================================================================
4  *    Licensed to the Apache Software Foundation (ASF) under one
5  *    or more contributor license agreements.  See the NOTICE file
6  *    distributed with this work for additional information
7  *    regarding copyright ownership.  The ASF licenses this file
8  *    to you under the Apache License, Version 2.0 (the
9  *    "License"); you may not use this file except in compliance
10  *    with the License.  You may obtain a copy of the License at
11  *
12  *      http://www.apache.org/licenses/LICENSE-2.0
13  *
14  *    Unless required by applicable law or agreed to in writing,
15  *    software distributed under the License is distributed on an
16  *    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
17  *    KIND, either express or implied.  See the License for the
18  *    specific language governing permissions and limitations
19  *    under the License.
20  * ====================================================================
21  */
22 
23 #ifndef SVN_LIBSVN_FS_X_FS_H
24 #define SVN_LIBSVN_FS_X_FS_H
25 
26 #include <apr_pools.h>
27 #include <apr_hash.h>
28 #include <apr_network_io.h>
29 #include <apr_md5.h>
30 #include <apr_sha1.h>
31 
32 #include "svn_fs.h"
33 #include "svn_config.h"
34 #include "private/svn_atomic.h"
35 #include "private/svn_cache.h"
36 #include "private/svn_fs_private.h"
37 #include "private/svn_sqlite.h"
38 #include "private/svn_mutex.h"
39 
40 #include "rev_file.h"
41 
42 #ifdef __cplusplus
43 extern "C" {
44 #endif /* __cplusplus */
45 
46 
47 /*** The filesystem structure.  ***/
48 
49 /* Following are defines that specify the textual elements of the
50    native filesystem directories and revision files. */
51 
52 /* Names of special files in the fs_x filesystem. */
53 #define PATH_FORMAT           "format"           /* Contains format number */
54 #define PATH_UUID             "uuid"             /* Contains UUID */
55 #define PATH_CURRENT          "current"          /* Youngest revision */
56 #define PATH_NEXT             "next"             /* Revision begin written. */
57 #define PATH_LOCK_FILE        "write-lock"       /* Revision lock file */
58 #define PATH_PACK_LOCK_FILE   "pack-lock"        /* Pack lock file */
59 #define PATH_REVS_DIR         "revs"             /* Directory of revisions */
60 #define PATH_TXNS_DIR         "transactions"     /* Directory of transactions */
61 #define PATH_TXN_PROTOS_DIR   "txn-protorevs"    /* Directory of proto-revs */
62 #define PATH_TXN_CURRENT      "txn-current"      /* File with next txn key */
63 #define PATH_TXN_CURRENT_LOCK "txn-current-lock" /* Lock for txn-current */
64 #define PATH_LOCKS_DIR        "locks"            /* Directory of locks */
65 #define PATH_MIN_UNPACKED_REV "min-unpacked-rev" /* Oldest revision which
66                                                     has not been packed. */
67 #define PATH_REVPROP_GENERATION "revprop-generation"
68                                                  /* Current revprop generation*/
69 #define PATH_MANIFEST         "manifest"         /* Manifest file name */
70 #define PATH_PACKED           "pack"             /* Packed revision data file */
71 #define PATH_EXT_PACKED_SHARD ".pack"            /* Extension for packed
72                                                     shards */
73 #define PATH_EXT_L2P_INDEX    ".l2p"             /* extension of the log-
74                                                     to-phys index */
75 #define PATH_EXT_P2L_INDEX    ".p2l"             /* extension of the phys-
76                                                     to-log index */
77 /* If you change this, look at tests/svn_test_fs.c(maybe_install_fsx_conf) */
78 #define PATH_CONFIG           "fsx.conf"         /* Configuration */
79 
80 /* Names of special files and file extensions for transactions */
81 #define PATH_CHANGES       "changes"       /* Records changes made so far */
82 #define PATH_TXN_PROPS     "props"         /* Transaction properties */
83 #define PATH_NEXT_IDS      "next-ids"      /* Next temporary ID assignments */
84 #define PATH_PREFIX_NODE   "node."         /* Prefix for node filename */
85 #define PATH_EXT_TXN       ".txn"          /* Extension of txn dir */
86 #define PATH_EXT_CHILDREN  ".children"     /* Extension for dir contents */
87 #define PATH_EXT_PROPS     ".props"        /* Extension for node props */
88 #define PATH_EXT_REV       ".rev"          /* Extension of protorev file */
89 #define PATH_EXT_REV_LOCK  ".rev-lock"     /* Extension of protorev lock file */
90 #define PATH_TXN_ITEM_INDEX "itemidx"      /* File containing the current item
91                                              index number */
92 #define PATH_INDEX          "index"        /* name of index files w/o ext */
93 
94 /* Names of files in legacy FS formats */
95 #define PATH_REV           "rev"           /* Proto rev file */
96 #define PATH_REV_LOCK      "rev-lock"      /* Proto rev (write) lock file */
97 
98 /* Names of sections and options in fsx.conf. */
99 #define CONFIG_SECTION_CACHES            "caches"
100 #define CONFIG_OPTION_FAIL_STOP          "fail-stop"
101 #define CONFIG_SECTION_REP_SHARING       "rep-sharing"
102 #define CONFIG_OPTION_ENABLE_REP_SHARING "enable-rep-sharing"
103 #define CONFIG_SECTION_DELTIFICATION     "deltification"
104 #define CONFIG_OPTION_MAX_DELTIFICATION_WALK     "max-deltification-walk"
105 #define CONFIG_OPTION_MAX_LINEAR_DELTIFICATION   "max-linear-deltification"
106 #define CONFIG_OPTION_COMPRESSION_LEVEL  "compression-level"
107 #define CONFIG_SECTION_PACKED_REVPROPS   "packed-revprops"
108 #define CONFIG_OPTION_REVPROP_PACK_SIZE  "revprop-pack-size"
109 #define CONFIG_OPTION_COMPRESS_PACKED_REVPROPS  "compress-packed-revprops"
110 #define CONFIG_SECTION_IO                "io"
111 #define CONFIG_OPTION_BLOCK_SIZE         "block-size"
112 #define CONFIG_OPTION_L2P_PAGE_SIZE      "l2p-page-size"
113 #define CONFIG_OPTION_P2L_PAGE_SIZE      "p2l-page-size"
114 #define CONFIG_SECTION_DEBUG             "debug"
115 #define CONFIG_OPTION_PACK_AFTER_COMMIT  "pack-after-commit"
116 
117 /* The format number of this filesystem.
118    This is independent of the repository format number, and
119    independent of any other FS back ends.
120 
121    Note: If you bump this, please update the switch statement in
122          svn_fs_x__create() as well.
123  */
124 #define SVN_FS_X__FORMAT_NUMBER   2
125 
126 /* Latest experimental format number.  Experimental formats are only
127    compatible with themselves. */
128 #define SVN_FS_X__EXPERIMENTAL_FORMAT_NUMBER   2
129 
130 /* On most operating systems apr implements file locks per process, not
131    per file.  On Windows apr implements the locking as per file handle
132    locks, so we don't have to add our own mutex for just in-process
133    synchronization. */
134 #if APR_HAS_THREADS && !defined(WIN32)
135 #define SVN_FS_X__USE_LOCK_MUTEX 1
136 #else
137 #define SVN_FS_X__USE_LOCK_MUTEX 0
138 #endif
139 
140 /* Maximum number of changes we deliver per request when listing the
141    changed paths for a given revision.   Anything > 0 will do.
142    At 100..300 bytes per entry, this limits the allocation to ~30kB. */
143 #define SVN_FS_X__CHANGES_BLOCK_SIZE 100
144 
145 /* Private FSX-specific data shared between all svn_txn_t objects that
146    relate to a particular transaction in a filesystem (as identified
147    by transaction id and filesystem UUID).  Objects of this type are
148    allocated in their own subpool of the common pool. */
149 typedef struct svn_fs_x__shared_txn_data_t
150 {
151   /* The next transaction in the list, or NULL if there is no following
152      transaction. */
153   struct svn_fs_x__shared_txn_data_t *next;
154 
155   /* ID of this transaction. */
156   svn_fs_x__txn_id_t txn_id;
157 
158   /* Whether the transaction's prototype revision file is locked for
159      writing by any thread in this process (including the current
160      thread; recursive locks are not permitted).  This is effectively
161      a non-recursive mutex. */
162   svn_boolean_t being_written;
163 
164   /* The pool in which this object has been allocated; a subpool of the
165      common pool. */
166   apr_pool_t *pool;
167 } svn_fs_x__shared_txn_data_t;
168 
169 /* Private FSX-specific data shared between all svn_fs_t objects that
170    relate to a particular filesystem, as identified by filesystem UUID.
171    Objects of this type are allocated in the common pool. */
172 typedef struct svn_fs_x__shared_data_t
173 {
174   /* A list of shared transaction objects for each transaction that is
175      currently active, or NULL if none are.  All access to this list,
176      including the contents of the objects stored in it, is synchronised
177      under TXN_LIST_LOCK. */
178   svn_fs_x__shared_txn_data_t *txns;
179 
180   /* A free transaction object, or NULL if there is no free object.
181      Access to this object is synchronised under TXN_LIST_LOCK. */
182   svn_fs_x__shared_txn_data_t *free_txn;
183 
184   /* The following lock must be taken out in reverse order of their
185      declaration here.  Any subset may be acquired and held at any given
186      time but their relative acquisition order must not change.
187 
188      (lock 'pack' before 'write' before 'txn-current' before 'txn-list') */
189 
190   /* A lock for intra-process synchronization when accessing the TXNS list. */
191   svn_mutex__t *txn_list_lock;
192 
193   /* A lock for intra-process synchronization when locking the
194      txn-current file. */
195   svn_mutex__t *txn_current_lock;
196 
197   /* A lock for intra-process synchronization when grabbing the
198      repository write lock. */
199   svn_mutex__t *fs_write_lock;
200 
201   /* A lock for intra-process synchronization when grabbing the
202      repository pack operation lock. */
203   svn_mutex__t *fs_pack_lock;
204 
205   /* The common pool, under which this object is allocated, subpools
206      of which are used to allocate the transaction objects. */
207   apr_pool_t *common_pool;
208 } svn_fs_x__shared_data_t;
209 
210 /* Data structure for the 1st level DAG node cache. */
211 typedef struct svn_fs_x__dag_cache_t svn_fs_x__dag_cache_t;
212 
213 /* Key type for all caches that use revision + offset / counter as key.
214 
215    Note: Cache keys should be 16 bytes for best performance and there
216          should be no padding. */
217 typedef struct svn_fs_x__pair_cache_key_t
218 {
219   /* The object's revision.  Use the 64 data type to prevent padding. */
220   apr_int64_t revision;
221 
222   /* Sub-address: item index, revprop generation, packed flag, etc. */
223   apr_int64_t second;
224 } svn_fs_x__pair_cache_key_t;
225 
226 /* Key type that identifies a representation / rep header.
227 
228    Note: Cache keys should require no padding. */
229 typedef struct svn_fs_x__representation_cache_key_t
230 {
231   /* Revision that contains the representation */
232   apr_int64_t revision;
233 
234   /* Packed or non-packed representation (boolean)? */
235   apr_int64_t is_packed;
236 
237   /* Item index of the representation */
238   apr_uint64_t item_index;
239 } svn_fs_x__representation_cache_key_t;
240 
241 /* Key type that identifies a txdelta window.
242 
243    Note: Cache keys should require no padding. */
244 typedef struct svn_fs_x__window_cache_key_t
245 {
246   /* The object's revision.  Use the 64 data type to prevent padding. */
247   apr_int64_t revision;
248 
249   /* Window number within that representation. */
250   apr_int64_t chunk_index;
251 
252   /* Item index of the representation */
253   apr_uint64_t item_index;
254 } svn_fs_x__window_cache_key_t;
255 
256 /* Private (non-shared) FSX-specific data for each svn_fs_t object.
257    Any caches in here may be NULL. */
258 typedef struct svn_fs_x__data_t
259 {
260   /* The format number of this FS. */
261   int format;
262 
263   /* The maximum number of files to store per directory. */
264   int max_files_per_dir;
265 
266   /* Rev / pack file read granularity in bytes. */
267   apr_int64_t block_size;
268 
269   /* Rev / pack file granularity (in bytes) covered by a single phys-to-log
270    * index page. */
271   /* Capacity in entries of log-to-phys index pages */
272   apr_int64_t l2p_page_size;
273 
274   /* Rev / pack file granularity covered by phys-to-log index pages */
275   apr_int64_t p2l_page_size;
276 
277   /* The revision that was youngest, last time we checked. */
278   svn_revnum_t youngest_rev_cache;
279 
280   /* Caches of immutable data.  (Note that these may be shared between
281      multiple svn_fs_t's for the same filesystem.) */
282 
283   /* Access to the configured memcached instances.  May be NULL. */
284   svn_memcache_t *memcache;
285 
286   /* If TRUE, don't ignore any cache-related errors.  If FALSE, errors from
287      e.g. memcached may be ignored as caching is an optional feature. */
288   svn_boolean_t fail_stop;
289 
290   /* Caches native dag_node_t* instances */
291   svn_fs_x__dag_cache_t *dag_node_cache;
292 
293   /* A cache of the contents of immutable directories; maps from
294      unparsed FS ID to a apr_hash_t * mapping (const char *) dirent
295      names to (svn_fs_x__dirent_t *). */
296   svn_cache__t *dir_cache;
297 
298   /* Fulltext cache; currently only used with memcached.  Maps from
299      rep key (revision/offset) to svn_stringbuf_t. */
300   svn_cache__t *fulltext_cache;
301 
302   /* Revprop generation number.  Will be -1 if it has to reread from disk. */
303   apr_int64_t revprop_generation;
304 
305   /* Revision property cache.  Maps from (rev,generation) to apr_hash_t. */
306   svn_cache__t *revprop_cache;
307 
308   /* Node properties cache.  Maps from rep key to apr_hash_t. */
309   svn_cache__t *properties_cache;
310 
311   /* Cache for txdelta_window_t objects;
312    * the key is svn_fs_x__window_cache_key_t */
313   svn_cache__t *txdelta_window_cache;
314 
315   /* Cache for combined windows as svn_stringbuf_t objects;
316      the key is svn_fs_x__window_cache_key_t */
317   svn_cache__t *combined_window_cache;
318 
319   /* Cache for svn_fs_x__rep_header_t objects;
320    * the key is (revision, item index) */
321   svn_cache__t *node_revision_cache;
322 
323   /* Cache for noderevs_t containers;
324      the key is a (pack file revision, file offset) pair */
325   svn_cache__t *noderevs_container_cache;
326 
327   /* Cache for change lists n blocks as svn_fs_x__changes_list_t * objects;
328      the key is the (revision, first-element-in-block) pair. */
329   svn_cache__t *changes_cache;
330 
331   /* Cache for change_list_t containers;
332      the key is a (pack file revision, file offset) pair */
333   svn_cache__t *changes_container_cache;
334 
335   /* Cache for star-delta / representation containers;
336      the key is a (pack file revision, file offset) pair */
337   svn_cache__t *reps_container_cache;
338 
339   /* Cache for svn_fs_x__rep_header_t objects; the key is a
340      (revision, item index) pair */
341   svn_cache__t *rep_header_cache;
342 
343   /* Cache for l2p_header_t objects; the key is (revision, is-packed).
344      Will be NULL for pre-format7 repos */
345   svn_cache__t *l2p_header_cache;
346 
347   /* Cache for l2p_page_t objects; the key is svn_fs_x__page_cache_key_t.
348      Will be NULL for pre-format7 repos */
349   svn_cache__t *l2p_page_cache;
350 
351   /* Cache for p2l_header_t objects; the key is (revision, is-packed).
352      Will be NULL for pre-format7 repos */
353   svn_cache__t *p2l_header_cache;
354 
355   /* Cache for apr_array_header_t objects containing svn_fs_x__p2l_entry_t
356      elements; the key is svn_fs_x__page_cache_key_t.
357      Will be NULL for pre-format7 repos */
358   svn_cache__t *p2l_page_cache;
359 
360   /* TRUE while the we hold a lock on the write lock file. */
361   svn_boolean_t has_write_lock;
362 
363   /* Data shared between all svn_fs_t objects for a given filesystem. */
364   svn_fs_x__shared_data_t *shared;
365 
366   /* The sqlite database used for rep caching. */
367   svn_sqlite__db_t *rep_cache_db;
368 
369   /* Thread-safe boolean */
370   svn_atomic_t rep_cache_db_opened;
371 
372   /* The oldest revision not in a pack file.  It also applies to revprops
373    * if revprop packing has been enabled by the FSX format version. */
374   svn_revnum_t min_unpacked_rev;
375 
376   /* Whether rep-sharing is supported by the filesystem
377    * and allowed by the configuration. */
378   svn_boolean_t rep_sharing_allowed;
379 
380   /* File size limit in bytes up to which multiple revprops shall be packed
381    * into a single file. */
382   apr_int64_t revprop_pack_size;
383 
384   /* Whether packed revprop files shall be compressed. */
385   svn_boolean_t compress_packed_revprops;
386 
387   /* Restart deltification histories after each multiple of this value */
388   apr_int64_t max_deltification_walk;
389 
390   /* Maximum number of length of the linear part at the top of the
391    * deltification history after which skip deltas will be used. */
392   apr_int64_t max_linear_deltification;
393 
394   /* Compression level to use with txdelta storage format in new revs. */
395   int delta_compression_level;
396 
397   /* Pack after every commit. */
398   svn_boolean_t pack_after_commit;
399 
400   /* Per-instance filesystem ID, which provides an additional level of
401      uniqueness for filesystems that share the same UUID, but should
402      still be distinguishable (e.g. backups produced by svn_fs_hotcopy()
403      or dump / load cycles). */
404   const char *instance_id;
405 
406   /* Ensure that all filesystem changes are written to disk. */
407   svn_boolean_t flush_to_disk;
408 
409   /* Pointer to svn_fs_open. */
410   svn_error_t *(*svn_fs_open_)(svn_fs_t **, const char *, apr_hash_t *,
411                                apr_pool_t *, apr_pool_t *);
412 
413 } svn_fs_x__data_t;
414 
415 
416 /*** Filesystem Transaction ***/
417 typedef struct svn_fs_x__transaction_t
418 {
419   /* revision upon which this txn is base.  (unfinished only) */
420   svn_revnum_t base_rev;
421 
422   /* copies list (const char * copy_ids), or NULL if there have been
423      no copies in this transaction.  */
424   apr_array_header_t *copies;
425 
426 } svn_fs_x__transaction_t;
427 
428 
429 /*** Representation ***/
430 /* If you add fields to this, check to see if you need to change
431  * svn_fs_x__rep_copy. */
432 typedef struct svn_fs_x__representation_t
433 {
434   /* Checksums digests for the contents produced by this representation.
435      This checksum is for the contents the rep shows to consumers,
436      regardless of how the rep stores the data under the hood.  It is
437      independent of the storage (fulltext, delta, whatever).
438 
439      If has_sha1 is FALSE, then for compatibility behave as though this
440      checksum matches the expected checksum.
441 
442      The md5 checksum is always filled, unless this is rep which was
443      retrieved from the rep-cache.  The sha1 checksum is only computed on
444      a write, for use with rep-sharing. */
445   svn_boolean_t has_sha1;
446   unsigned char sha1_digest[APR_SHA1_DIGESTSIZE];
447   unsigned char md5_digest[APR_MD5_DIGESTSIZE];
448 
449   /* Change set and item number where this representation is located. */
450   svn_fs_x__id_t id;
451 
452   /* The size of the representation in bytes as seen in the revision
453      file. */
454   svn_filesize_t size;
455 
456   /* The size of the fulltext of the representation. */
457   svn_filesize_t expanded_size;
458 
459 } svn_fs_x__representation_t;
460 
461 
462 /*** Node-Revision ***/
463 /* If you add fields to this, check to see if you need to change
464  * copy_node_revision in dag.c. */
465 typedef struct svn_fs_x__noderev_t
466 {
467   /* Predecessor node revision id.  Will be "unused" if there is no
468      predecessor for this node revision. */
469   svn_fs_x__id_t predecessor_id;
470 
471   /* The ID of this noderev */
472   svn_fs_x__id_t noderev_id;
473 
474   /* Identifier of the node that this noderev belongs to. */
475   svn_fs_x__id_t node_id;
476 
477   /* Copy identifier of this line of history. */
478   svn_fs_x__id_t copy_id;
479 
480   /* If this node-rev is a copy, where was it copied from? */
481   const char *copyfrom_path;
482   svn_revnum_t copyfrom_rev;
483 
484   /* Helper for history tracing, root of the parent tree from whence
485      this node-rev was copied. */
486   svn_revnum_t copyroot_rev;
487   const char *copyroot_path;
488 
489   /* node kind */
490   svn_node_kind_t kind;
491 
492   /* Number of predecessors this node revision has (recursively).
493      A difference from the BDB backend is that it cannot be -1. */
494   int predecessor_count;
495 
496   /* representation key for this node's properties.  may be NULL if
497      there are no properties.  */
498   svn_fs_x__representation_t *prop_rep;
499 
500   /* representation for this node's data.  may be NULL if there is
501      no data. */
502   svn_fs_x__representation_t *data_rep;
503 
504   /* path at which this node first came into existence.  */
505   const char *created_path;
506 
507   /* Does this node itself have svn:mergeinfo? */
508   svn_boolean_t has_mergeinfo;
509 
510   /* Number of nodes with svn:mergeinfo properties that are
511      descendants of this node (including it itself) */
512   apr_int64_t mergeinfo_count;
513 
514 } svn_fs_x__noderev_t;
515 
516 
517 /** The type of a directory entry.  */
518 typedef struct svn_fs_x__dirent_t
519 {
520 
521   /** The name of this directory entry.  */
522   const char *name;
523 
524   /** The node revision ID it names.  */
525   svn_fs_x__id_t id;
526 
527   /** The node kind. */
528   svn_node_kind_t kind;
529 } svn_fs_x__dirent_t;
530 
531 
532 /*** Change ***/
533 typedef svn_fs_path_change3_t svn_fs_x__change_t;
534 
535 /*** Context for reading changed paths lists iteratively. */
536 typedef struct svn_fs_x__changes_context_t
537 {
538   /* Repository to fetch from. */
539   svn_fs_t *fs;
540 
541   /* Revision that we read from. */
542   svn_revnum_t revision;
543 
544   /* Revision file object to use when needed. */
545   svn_fs_x__revision_file_t *revision_file;
546 
547   /* Index of the next change to fetch. */
548   int next;
549 
550   /* Offset, within the changed paths list on disk, of the next change to
551      fetch. */
552   apr_off_t next_offset;
553 
554   /* Has the end of the list been reached? */
555   svn_boolean_t eol;
556 
557 } svn_fs_x__changes_context_t;
558 
559 /*** Directory (only used at the cache interface) ***/
560 typedef struct svn_fs_x__dir_data_t
561 {
562   /* Contents, i.e. all directory entries, sorted by name. */
563   apr_array_header_t *entries;
564 
565   /* SVN_INVALID_FILESIZE for committed data, otherwise the length of the
566    * in-txn on-disk representation of that directory. */
567   svn_filesize_t txn_filesize;
568 } svn_fs_x__dir_data_t;
569 
570 
571 #ifdef __cplusplus
572 }
573 #endif /* __cplusplus */
574 
575 #endif /* SVN_LIBSVN_FS_X_FS_H */
576