1 /* dag_cache.c : DAG walker and node cache.
2  *
3  * ====================================================================
4  *    Licensed to the Apache Software Foundation (ASF) under one
5  *    or more contributor license agreements.  See the NOTICE file
6  *    distributed with this work for additional information
7  *    regarding copyright ownership.  The ASF licenses this file
8  *    to you under the Apache License, Version 2.0 (the
9  *    "License"); you may not use this file except in compliance
10  *    with the License.  You may obtain a copy of the License at
11  *
12  *      http://www.apache.org/licenses/LICENSE-2.0
13  *
14  *    Unless required by applicable law or agreed to in writing,
15  *    software distributed under the License is distributed on an
16  *    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
17  *    KIND, either express or implied.  See the License for the
18  *    specific language governing permissions and limitations
19  *    under the License.
20  * ====================================================================
21  */
22 
23 
24 /* The job of this layer is to take a filesystem with lots of node
25    sharing going on --- the real DAG filesystem as it appears in the
26    database --- and make it look and act like an ordinary tree
27    filesystem, with no sharing.
28 
29    We do just-in-time cloning: you can walk from some unfinished
30    transaction's root down into directories and files shared with
31    committed revisions; as soon as you try to change something, the
32    appropriate nodes get cloned (and parent directory entries updated)
33    invisibly, behind your back.  Any other references you have to
34    nodes that have been cloned by other changes, even made by other
35    processes, are automatically updated to point to the right clones.  */
36 
37 
38 #include <stdlib.h>
39 #include <string.h>
40 #include <assert.h>
41 #include <apr_pools.h>
42 #include <apr_hash.h>
43 
44 #include "svn_hash.h"
45 #include "svn_private_config.h"
46 #include "svn_pools.h"
47 #include "svn_error.h"
48 #include "svn_path.h"
49 #include "svn_mergeinfo.h"
50 #include "svn_fs.h"
51 #include "svn_props.h"
52 #include "svn_sorts.h"
53 
54 #include "fs.h"
55 #include "dag.h"
56 #include "dag_cache.h"
57 #include "lock.h"
58 #include "tree.h"
59 #include "fs_x.h"
60 #include "fs_id.h"
61 #include "temp_serializer.h"
62 #include "cached_data.h"
63 #include "transaction.h"
64 #include "pack.h"
65 #include "util.h"
66 
67 #include "private/svn_mergeinfo_private.h"
68 #include "private/svn_subr_private.h"
69 #include "private/svn_fs_util.h"
70 #include "private/svn_fspath.h"
71 #include "../libsvn_fs/fs-loader.h"
72 
73 
74 /*** Path handling ***/
75 
76 /* DAG caching uses "normalized" paths - which are a relaxed form of
77    canonical relpaths.  They omit the leading '/' of the abspath and trim
78    any trailing '/'.  Any sequences of '//' will be kept as the path walker
79    simply skips over them.
80 
81    Non-canonical sections of the path will therefore only impact efficiency
82    (extra walker iterations and possibly duplicated entries in the cache)
83    but not correctness.
84 
85    Another optimization is that we don't NUL-terminate the path but strictly
86    use its length info.  That way, it can be traversed easily without
87    chopping it up and patching it together again.  ultimately, however,
88    the path string is NUL-terminated because we wrapped a NUL-terminated
89    C string.
90  */
91 
92 /* Set *RESULT to a normalized version of PATH without actually copying any
93    string contents.
94 
95    For convenience, return the RESULT pointer as the function value too. */
96 static svn_string_t *
normalize_path(svn_string_t * result,const char * path)97 normalize_path(svn_string_t *result,
98                const char *path)
99 {
100   apr_size_t len;
101 
102   if (path[0] == '/')
103     ++path;
104 
105   len = strlen(path);
106   while (len && path[len-1] == '/')
107     --len;
108 
109   result->data = path;
110   result->len = len;
111 
112   return result;
113 }
114 
115 /* Extend PATH, i.e. increase its LEN, to cover the next segment.  Skip
116    sequences of '/'.  Store the segment in ENTRY and return a pointer to
117    it C string representation.  If no segment has been found (end of path),
118    return NULL. */
119 static const char *
next_entry_name(svn_string_t * path,svn_stringbuf_t * entry)120 next_entry_name(svn_string_t *path,
121                 svn_stringbuf_t *entry)
122 {
123   const char *segment_start;
124   const char *segment_end;
125 
126   /* Moving to the next segment, skip separators
127      (normalized does not imply canonical). */
128   segment_start = path->data + path->len;
129   while (*segment_start == '/')
130     ++segment_start;
131 
132   /* End of path? */
133   if (*segment_start == '\0')
134     return NULL;
135 
136   /* Find the end of this segment.  Note that strchr would not give us the
137      length of the last segment. */
138   segment_end = segment_start;
139   while (*segment_end != '/' && *segment_end != '\0')
140     ++segment_end;
141 
142   /* Copy the segment into the result buffer. */
143   svn_stringbuf_setempty(entry);
144   svn_stringbuf_appendbytes(entry, segment_start,
145                             segment_end - segment_start);
146 
147   /* Extend the "visible" part of the path to the end of that segment. */
148   path->len = segment_end - path->data;
149 
150   /* Indicate that we found something. */
151   return entry->data;
152 }
153 
154 /* Split the normalized PATH into its last segment the corresponding parent
155    directory.  Store them in ENTRY and DIRECTORY, respectively.
156 
157    If PATH is empty, return FALSE and produce no output.
158    Otherwise, return TRUE.
159  */
160 static svn_boolean_t
extract_last_segment(const svn_string_t * path,svn_string_t * directory,svn_stringbuf_t * entry)161 extract_last_segment(const svn_string_t *path,
162                      svn_string_t *directory,
163                      svn_stringbuf_t *entry)
164 {
165   const char *segment_start;
166   const char *parent_end;
167 
168   /* Edge case.  We can't navigate in empty paths. */
169   if (path->len == 0)
170     return FALSE;
171 
172   /* Find the start of the last segment.  Note that normalized paths never
173      start nor end with a '/'. */
174   segment_start = path->data + path->len - 1;
175   while (*segment_start != '/' && segment_start != path->data)
176     --segment_start;
177 
178   /* At root level already, i.e. no parent? */
179   if (segment_start == path->data)
180     {
181       /* Construct result. */
182       directory->data = "";
183       directory->len = 0;
184 
185       svn_stringbuf_setempty(entry);
186       svn_stringbuf_appendbytes(entry, path->data, path->len);
187 
188       return TRUE;
189     }
190 
191   /* Find the end of the parent directory. */
192   parent_end = segment_start;
193   while (parent_end[-1] == '/')
194     --parent_end;
195 
196   /* Construct result. */
197   directory->data = path->data;
198   directory->len = parent_end - path->data;
199 
200   ++segment_start; /* previously pointed to the last '/'. */
201   svn_stringbuf_setempty(entry);
202   svn_stringbuf_appendbytes(entry, segment_start,
203                             path->len - (segment_start - path->data));
204 
205   return TRUE;
206 }
207 
208 
209 /*** Node Caching ***/
210 
211 /* 1st level cache */
212 
213 /* An entry in the first-level cache.  REVISION and PATH form the key that
214    will ultimately be matched.
215  */
216 typedef struct cache_entry_t
217 {
218   /* hash value derived from PATH, REVISION.
219      Used to short-circuit failed lookups. */
220   apr_uint32_t hash_value;
221 
222   /* change set to which the NODE belongs */
223   svn_fs_x__change_set_t change_set;
224 
225   /* path of the NODE */
226   char *path;
227 
228   /* cached value of strlen(PATH). */
229   apr_size_t path_len;
230 
231   /* the node allocated in the cache's pool. NULL for empty entries. */
232   dag_node_t *node;
233 } cache_entry_t;
234 
235 /* Number of entries in the cache.  Keep this low to keep pressure on the
236    CPU caches low as well.  A binary value is most efficient.  If we walk
237    a directory tree, we want enough entries to store nodes for all files
238    without overwriting the nodes for the parent folder.  That way, there
239    will be no unnecessary misses (except for a few random ones caused by
240    hash collision).
241 
242    The actual number of instances may be higher but entries that got
243    overwritten are no longer visible.
244  */
245 enum { BUCKET_COUNT = 256 };
246 
247 /* The actual cache structure.  All nodes will be allocated in POOL.
248    When the number of INSERTIONS (i.e. objects created form that pool)
249    exceeds a certain threshold, the pool will be cleared and the cache
250    with it.
251  */
252 struct svn_fs_x__dag_cache_t
253 {
254   /* fixed number of (possibly empty) cache entries */
255   cache_entry_t buckets[BUCKET_COUNT];
256 
257   /* pool used for all node allocation */
258   apr_pool_t *pool;
259 
260   /* number of entries created from POOL since the last cleanup */
261   apr_size_t insertions;
262 
263   /* Property lookups etc. have a very high locality (75% re-hit).
264      Thus, remember the last hit location for optimistic lookup. */
265   apr_size_t last_hit;
266 
267   /* Position of the last bucket hit that actually had a DAG node in it.
268      LAST_HIT may refer to a bucket that matches path@rev but has not
269      its NODE element set, yet.
270      This value is a mere hint for optimistic lookup and any value is
271      valid (as long as it is < BUCKET_COUNT). */
272   apr_size_t last_non_empty;
273 };
274 
275 svn_fs_x__dag_cache_t*
svn_fs_x__create_dag_cache(apr_pool_t * result_pool)276 svn_fs_x__create_dag_cache(apr_pool_t *result_pool)
277 {
278   svn_fs_x__dag_cache_t *result = apr_pcalloc(result_pool, sizeof(*result));
279   result->pool = svn_pool_create(result_pool);
280 
281   return result;
282 }
283 
284 /* Clears the CACHE at regular intervals (destroying all cached nodes).
285  * Return TRUE if the cache got cleared and previously obtained references
286  * to cache contents have become invalid.
287  */
288 static svn_boolean_t
auto_clear_dag_cache(svn_fs_x__dag_cache_t * cache)289 auto_clear_dag_cache(svn_fs_x__dag_cache_t* cache)
290 {
291   if (cache->insertions <= BUCKET_COUNT)
292     return FALSE;
293 
294   svn_pool_clear(cache->pool);
295 
296   memset(cache->buckets, 0, sizeof(cache->buckets));
297   cache->insertions = 0;
298 
299   return TRUE;
300 }
301 
302 /* For the given CHANGE_SET and PATH, return the respective entry in CACHE.
303    If the entry is empty, its NODE member will be NULL and the caller
304    may then set it to the corresponding DAG node allocated in CACHE->POOL.
305  */
306 static cache_entry_t *
cache_lookup(svn_fs_x__dag_cache_t * cache,svn_fs_x__change_set_t change_set,const svn_string_t * path)307 cache_lookup(svn_fs_x__dag_cache_t *cache,
308              svn_fs_x__change_set_t change_set,
309              const svn_string_t *path)
310 {
311   apr_size_t i, bucket_index;
312   apr_size_t path_len = path->len;
313   apr_uint32_t hash_value = (apr_uint32_t)(apr_uint64_t)change_set;
314 
315 #if SVN_UNALIGNED_ACCESS_IS_OK
316   /* "randomizing" / distributing factor used in our hash function */
317   const apr_uint32_t factor = 0xd1f3da69;
318 #endif
319 
320   /* optimistic lookup: hit the same bucket again? */
321   cache_entry_t *result = &cache->buckets[cache->last_hit];
322   if (   (result->change_set == change_set)
323       && (result->path_len == path_len)
324       && !memcmp(result->path, path->data, path_len))
325     {
326       /* Remember the position of the last node we found in this cache. */
327       if (result->node)
328         cache->last_non_empty = cache->last_hit;
329 
330       return result;
331     }
332 
333   /* need to do a full lookup.  Calculate the hash value
334      (HASH_VALUE has been initialized to REVISION). */
335   i = 0;
336 #if SVN_UNALIGNED_ACCESS_IS_OK
337   /* We relax the dependency chain between iterations by processing
338      two chunks from the input per hash_value self-multiplication.
339      The HASH_VALUE update latency is now 1 MUL latency + 1 ADD latency
340      per 2 chunks instead of 1 chunk.
341    */
342   for (; i + 8 <= path_len; i += 8)
343     hash_value = hash_value * factor * factor
344                + (  *(const apr_uint32_t*)(path->data + i) * factor
345                   + *(const apr_uint32_t*)(path->data + i + 4));
346 #endif
347 
348   for (; i < path_len; ++i)
349     /* Help GCC to minimize the HASH_VALUE update latency by splitting the
350        MUL 33 of the naive implementation: h = h * 33 + path[i].  This
351        shortens the dependency chain from 1 shift + 2 ADDs to 1 shift + 1 ADD.
352      */
353     hash_value = hash_value * 32 + (hash_value + (apr_byte_t)path->data[i]);
354 
355   bucket_index = hash_value + (hash_value >> 16);
356   bucket_index = (bucket_index + (bucket_index >> 8)) % BUCKET_COUNT;
357 
358   /* access the corresponding bucket and remember its location */
359   result = &cache->buckets[bucket_index];
360   cache->last_hit = bucket_index;
361 
362   /* if it is *NOT* a match,  clear the bucket, expect the caller to fill
363      in the node and count it as an insertion */
364   if (   (result->hash_value != hash_value)
365       || (result->change_set != change_set)
366       || (result->path_len != path_len)
367       || memcmp(result->path, path->data, path_len))
368     {
369       result->hash_value = hash_value;
370       result->change_set = change_set;
371 
372       if (result->path_len < path_len || result->path_len == 0)
373         result->path = apr_palloc(cache->pool, path_len + 1);
374       result->path_len = path_len;
375 
376       memcpy(result->path, path->data, path_len);
377       result->path[path_len] = 0;
378 
379       result->node = NULL;
380 
381       cache->insertions++;
382     }
383   else if (result->node)
384     {
385       /* This bucket is valid & has a suitable DAG node in it.
386          Remember its location. */
387       cache->last_non_empty = bucket_index;
388     }
389 
390   return result;
391 }
392 
393 /* Optimistic lookup using the last seen non-empty location in CACHE.
394    Return the node of that entry, if it is still in use and matches PATH.
395    Return NULL otherwise. */
396 static dag_node_t *
cache_lookup_last_path(svn_fs_x__dag_cache_t * cache,const svn_string_t * path)397 cache_lookup_last_path(svn_fs_x__dag_cache_t *cache,
398                        const svn_string_t *path)
399 {
400   cache_entry_t *result = &cache->buckets[cache->last_non_empty];
401 
402   if (   result->node
403       && (result->path_len == path->len)
404       && !memcmp(result->path, path->data, path->len))
405     {
406       return result->node;
407     }
408 
409   return NULL;
410 }
411 
412 /* Return the cached DAG node for PATH from ROOT's node cache, or NULL if
413    the node isn't cached.
414  */
415 static dag_node_t *
dag_node_cache_get(svn_fs_root_t * root,const svn_string_t * path)416 dag_node_cache_get(svn_fs_root_t *root,
417                    const svn_string_t *path)
418 {
419   svn_fs_x__data_t *ffd = root->fs->fsap_data;
420   svn_fs_x__change_set_t change_set = svn_fs_x__root_change_set(root);
421 
422   auto_clear_dag_cache(ffd->dag_node_cache);
423   return cache_lookup(ffd->dag_node_cache, change_set, path)->node;
424 }
425 
426 
427 void
svn_fs_x__update_dag_cache(dag_node_t * node)428 svn_fs_x__update_dag_cache(dag_node_t *node)
429 {
430   svn_fs_x__data_t *ffd = svn_fs_x__dag_get_fs(node)->fsap_data;
431   const char *path = svn_fs_x__dag_get_created_path(node);
432   svn_fs_x__dag_cache_t *cache = ffd->dag_node_cache;
433 
434   cache_entry_t *bucket;
435   svn_string_t normalized;
436 
437   auto_clear_dag_cache(cache);
438   bucket = cache_lookup(cache, svn_fs_x__dag_get_id(node)->change_set,
439                         normalize_path(&normalized, path));
440   bucket->node = svn_fs_x__dag_dup(node, cache->pool);
441 }
442 
443 void
svn_fs_x__invalidate_dag_cache(svn_fs_root_t * root,const char * path)444 svn_fs_x__invalidate_dag_cache(svn_fs_root_t *root,
445                                const char *path)
446 {
447   svn_fs_x__data_t *ffd = root->fs->fsap_data;
448   svn_fs_x__dag_cache_t *cache = ffd->dag_node_cache;
449   svn_fs_x__change_set_t change_set = svn_fs_x__root_change_set(root);
450 
451   apr_size_t i;
452   for (i = 0; i < BUCKET_COUNT; ++i)
453     {
454       cache_entry_t *bucket = &cache->buckets[i];
455       if (bucket->change_set == change_set && bucket->node)
456         {
457           /* The call to svn_relpath_skip_ancestor() will require both
458              parameters to be canonical.  Since we allow for non-canonical
459              paths in our cache (unlikely to actually happen), we drop all
460              such entries.
461            */
462           if (!svn_relpath_is_canonical(bucket->path)
463               || svn_relpath_skip_ancestor(path + 1, bucket->path))
464             bucket->node = NULL;
465         }
466     }
467 }
468 
469 
470 /* Traversing directory paths.  */
471 
472 /* Try a short-cut for the open_path() function using the last node accessed.
473  * If that ROOT is that nodes's "created rev" and PATH matches its "created-
474  * path", return the node in *NODE_P.  Set it to NULL otherwise.
475  *
476  * This function is used to support ra_serf-style access patterns where we
477  * are first asked for path@rev and then for path@c_rev of the same node.
478  * The shortcut works by ignoring the "rev" part of the cache key and then
479  * checking whether we got lucky.  Lookup and verification are both quick
480  * plus there are many early outs for common types of mismatch.
481  */
482 static svn_error_t *
try_match_last_node(dag_node_t ** node_p,svn_fs_root_t * root,const svn_string_t * path)483 try_match_last_node(dag_node_t **node_p,
484                     svn_fs_root_t *root,
485                     const svn_string_t *path)
486 {
487   svn_fs_x__data_t *ffd = root->fs->fsap_data;
488 
489   /* Optimistic lookup: if the last node returned from the cache applied to
490      the same PATH, return it in NODE. */
491   dag_node_t *node = cache_lookup_last_path(ffd->dag_node_cache, path);
492 
493   /* Did we get a bucket with a committed node? */
494   if (node && !svn_fs_x__dag_check_mutable(node))
495     {
496       /* Get the path&rev pair at which this node was created.
497          This is repository location for which this node is _known_ to be
498          the right lookup result irrespective of how we found it. */
499       const char *created_path
500         = svn_fs_x__dag_get_created_path(node) + 1;
501       svn_revnum_t revision = svn_fs_x__dag_get_revision(node);
502 
503       /* Is it an exact match? */
504       if (   revision == root->rev
505           && strlen(created_path) == path->len
506           && memcmp(created_path, path->data, path->len) == 0)
507         {
508           svn_fs_x__dag_cache_t *cache = ffd->dag_node_cache;
509 
510           /* Insert NODE into the cache at a second location.
511              In a fraction of all calls, the auto-cleanup may
512              cause NODE to become invalid. */
513           if (!auto_clear_dag_cache(cache))
514             {
515               /* Cache it under its full path@rev access path. */
516               svn_fs_x__change_set_t change_set
517                 = svn_fs_x__change_set_by_rev(revision);
518               cache_entry_t *bucket = cache_lookup(cache, change_set, path);
519               bucket->node = node;
520 
521               *node_p = node;
522               return SVN_NO_ERROR;
523             }
524         }
525     }
526 
527   *node_p = NULL;
528   return SVN_NO_ERROR;
529 }
530 
531 
532 /* From directory node PARENT, under ROOT, go one step down to the entry
533    NAME and return a reference to it in *CHILD_P.
534 
535    PATH is the combination of PARENT's path and NAME and is provided by
536    the caller such that we don't have to construct it here ourselves.
537    Similarly, CHANGE_SET is redundant with ROOT.
538 
539    If the directory entry cannot be found, instead of returning an error,
540    *CHILD_P will be set to NULL if ALLOW_EMPTY is TRUE.
541 
542    NOTE: *NODE_P will live within the DAG cache and we merely return a
543    reference to it.  Hence, it will invalid upon the next cache insertion.
544    Callers must create a copy if they want a non-temporary object.
545 */
546 static svn_error_t *
dag_step(dag_node_t ** child_p,svn_fs_root_t * root,dag_node_t * parent,const char * name,const svn_string_t * path,svn_fs_x__change_set_t change_set,svn_boolean_t allow_empty,apr_pool_t * scratch_pool)547 dag_step(dag_node_t **child_p,
548          svn_fs_root_t *root,
549          dag_node_t *parent,
550          const char *name,
551          const svn_string_t *path,
552          svn_fs_x__change_set_t change_set,
553          svn_boolean_t allow_empty,
554          apr_pool_t *scratch_pool)
555 {
556   svn_fs_t *fs = svn_fs_x__dag_get_fs(parent);
557   svn_fs_x__data_t *ffd = fs->fsap_data;
558   cache_entry_t *bucket;
559   svn_fs_x__id_t node_id;
560 
561   /* Locate the corresponding cache entry.  We may need PARENT to remain
562      valid for later use, so don't call auto_clear_dag_cache() here. */
563   bucket = cache_lookup(ffd->dag_node_cache, change_set, path);
564   if (bucket->node)
565     {
566       /* Already cached. Return a reference to the cached object. */
567       *child_p = bucket->node;
568       return SVN_NO_ERROR;
569     }
570 
571   /* Get the ID of the node we are looking for.  The function call checks
572      for various error conditions such like PARENT not being a directory. */
573   SVN_ERR(svn_fs_x__dir_entry_id(&node_id, parent, name, scratch_pool));
574   if (! svn_fs_x__id_used(&node_id))
575     {
576       const char *dir;
577 
578       /* No such directory entry.  Is a simple NULL result o.k.? */
579       if (allow_empty)
580         {
581           *child_p = NULL;
582           return SVN_NO_ERROR;
583         }
584 
585       /* Produce an appropriate error message. */
586       dir = apr_pstrmemdup(scratch_pool, path->data, path->len);
587       dir = svn_fs__canonicalize_abspath(dir, scratch_pool);
588 
589       return SVN_FS__NOT_FOUND(root, dir);
590     }
591 
592   /* We are about to add a new entry to the cache.  Periodically clear it.
593      If we had to clear it just now (< 1% chance), re-add the entry for our
594      item. */
595   if (auto_clear_dag_cache(ffd->dag_node_cache))
596     bucket = cache_lookup(ffd->dag_node_cache, change_set, path);
597 
598   /* Construct the DAG node object for NODE_ID. Let it live in the cache. */
599   SVN_ERR(svn_fs_x__dag_get_node(&bucket->node, fs, &node_id,
600                                  ffd->dag_node_cache->pool,
601                                  scratch_pool));
602 
603   /* Return a reference to the cached object. */
604   *child_p = bucket->node;
605   return SVN_NO_ERROR;
606 }
607 
608 /* Return the CHANGE_SET's root node in *NODE_P.  ROOT is the FS API root
609    object for CHANGE_SET.  Use SCRATCH_POOL for temporary allocations.
610 
611    NOTE: *NODE_P will live within the DAG cache and we merely return a
612    reference to it.  Hence, it will invalid upon the next cache insertion.
613    Callers must create a copy if they want a non-temporary object.
614  */
615 static svn_error_t *
get_root_node(dag_node_t ** node_p,svn_fs_root_t * root,svn_fs_x__change_set_t change_set,apr_pool_t * scratch_pool)616 get_root_node(dag_node_t **node_p,
617               svn_fs_root_t *root,
618               svn_fs_x__change_set_t change_set,
619               apr_pool_t *scratch_pool)
620 {
621   svn_fs_t *fs = root->fs;
622   svn_fs_x__data_t *ffd = fs->fsap_data;
623   cache_entry_t *bucket;
624   const svn_string_t path = { "", 0 };
625 
626   /* Auto-insert the node in the cache. */
627   auto_clear_dag_cache(ffd->dag_node_cache);
628   bucket = cache_lookup(ffd->dag_node_cache, change_set, &path);
629 
630   /* If it is not already cached, construct the DAG node object for NODE_ID.
631      Let it live in the cache.  Sadly, we often can't reuse txn DAG nodes. */
632   if (bucket->node == NULL)
633     SVN_ERR(svn_fs_x__dag_root(&bucket->node, fs, change_set,
634                                ffd->dag_node_cache->pool, scratch_pool));
635 
636   /* Return a reference to the cached object. */
637   *node_p = bucket->node;
638   return SVN_NO_ERROR;
639 }
640 
641 /* Walk the DAG starting at ROOT, following PATH and return a reference to
642    the target node in *NODE_P.   Use SCRATCH_POOL for temporary allocations.
643 
644    NOTE: *NODE_P will live within the DAG cache and we merely return a
645    reference to it.  Hence, it will invalid upon the next cache insertion.
646    Callers must create a copy if they want a non-temporary object.
647 */
648 static svn_error_t *
walk_dag_path(dag_node_t ** node_p,svn_fs_root_t * root,svn_string_t * path,apr_pool_t * scratch_pool)649 walk_dag_path(dag_node_t **node_p,
650               svn_fs_root_t *root,
651               svn_string_t *path,
652               apr_pool_t *scratch_pool)
653 {
654   dag_node_t *here = NULL; /* The directory we're currently looking at.  */
655   apr_pool_t *iterpool;
656   svn_fs_x__change_set_t change_set = svn_fs_x__root_change_set(root);
657   const char *entry;
658   svn_string_t directory;
659   svn_stringbuf_t *entry_buffer;
660 
661   /* Special case: root directory.
662      We will later assume that all paths have at least one parent level,
663      so we must check here for those that don't. */
664   if (path->len == 0)
665     return svn_error_trace(get_root_node(node_p, root, change_set,
666                                          scratch_pool));
667 
668   /* Callers often traverse the DAG in some path-based order or along the
669      history segments.  That allows us to try a few guesses about where to
670      find the next item.  This is only useful if the caller didn't request
671      the full parent chain. */
672 
673   /* First attempt: Assume that we access the DAG for the same path as
674      in the last lookup but for a different revision that happens to be
675      the last revision that touched the respective node.  This is a
676      common pattern when e.g. checking out over ra_serf.  Note that this
677      will only work for committed data as the revision info for nodes in
678      txns is bogus.
679 
680      This shortcut is quick and will exit this function upon success.
681      So, try it first. */
682   if (!root->is_txn_root)
683     {
684       SVN_ERR(try_match_last_node(node_p, root, path));
685 
686       /* Did the shortcut work? */
687       if (*node_p)
688           return SVN_NO_ERROR;
689     }
690 
691   /* Second attempt: Try starting the lookup immediately at the parent
692      node.  We will often have recently accessed either a sibling or
693      said parent directory itself for the same revision.  ENTRY will
694      point to the last '/' in PATH. */
695   entry_buffer = svn_stringbuf_create_ensure(64, scratch_pool);
696   if (extract_last_segment(path, &directory, entry_buffer))
697     {
698       here = dag_node_cache_get(root, &directory);
699 
700       /* Did the shortcut work? */
701       if (here)
702         return svn_error_trace(dag_step(node_p, root, here,
703                                         entry_buffer->data, path,
704                                         change_set, FALSE, scratch_pool));
705     }
706 
707   /* Now there is something to iterate over. Thus, create the ITERPOOL. */
708   iterpool = svn_pool_create(scratch_pool);
709 
710   /* Make a parent_path item for the root node, using its own current
711      copy id.  */
712   SVN_ERR(get_root_node(&here, root, change_set, iterpool));
713   path->len = 0;
714 
715   /* Walk the path segment by segment. */
716   for (entry = next_entry_name(path, entry_buffer);
717        entry;
718        entry = next_entry_name(path, entry_buffer))
719     {
720       svn_pool_clear(iterpool);
721 
722       /* Note that HERE is allocated from the DAG node cache and will
723          therefore survive the ITERPOOL cleanup. */
724       SVN_ERR(dag_step(&here, root, here, entry, path, change_set, FALSE,
725                        iterpool));
726     }
727 
728   svn_pool_destroy(iterpool);
729   *node_p = here;
730 
731   return SVN_NO_ERROR;
732 }
733 
734 
735 /* Return a text string describing the absolute path of parent path
736    DAG_PATH.  It will be allocated in POOL. */
737 static const char *
parent_path_path(svn_fs_x__dag_path_t * dag_path,apr_pool_t * pool)738 parent_path_path(svn_fs_x__dag_path_t *dag_path,
739                  apr_pool_t *pool)
740 {
741   const char *path_so_far = "/";
742   if (dag_path->parent)
743     path_so_far = parent_path_path(dag_path->parent, pool);
744   return dag_path->entry
745     ? svn_fspath__join(path_so_far, dag_path->entry, pool)
746     : path_so_far;
747 }
748 
749 
750 /* Choose a copy ID inheritance method *INHERIT_P to be used in the
751    event that immutable node CHILD in FS needs to be made mutable.  If
752    the inheritance method is copy_id_inherit_new, also return a
753    *COPY_SRC_PATH on which to base the new copy ID (else return NULL
754    for that path).  CHILD must have a parent (it cannot be the root
755    node).  Temporary allocations are taken from SCRATCH_POOL. */
756 static svn_error_t *
get_copy_inheritance(svn_fs_x__copy_id_inherit_t * inherit_p,const char ** copy_src_path,svn_fs_t * fs,svn_fs_x__dag_path_t * child,apr_pool_t * scratch_pool)757 get_copy_inheritance(svn_fs_x__copy_id_inherit_t *inherit_p,
758                      const char **copy_src_path,
759                      svn_fs_t *fs,
760                      svn_fs_x__dag_path_t *child,
761                      apr_pool_t *scratch_pool)
762 {
763   svn_fs_x__id_t child_copy_id, parent_copy_id;
764   const char *id_path = NULL;
765   svn_fs_root_t *copyroot_root;
766   dag_node_t *copyroot_node;
767   svn_revnum_t copyroot_rev;
768   const char *copyroot_path;
769 
770   SVN_ERR_ASSERT(child && child->parent);
771 
772   /* Initialize some convenience variables. */
773   child_copy_id = *svn_fs_x__dag_get_copy_id(child->node);
774   parent_copy_id = *svn_fs_x__dag_get_copy_id(child->parent->node);
775 
776   /* By default, there is no copy source. */
777   *copy_src_path = NULL;
778 
779   /* If this child is already mutable, we have nothing to do. */
780   if (svn_fs_x__dag_check_mutable(child->node))
781     {
782       *inherit_p = svn_fs_x__copy_id_inherit_self;
783       return SVN_NO_ERROR;
784     }
785 
786   /* From this point on, we'll assume that the child will just take
787      its copy ID from its parent. */
788   *inherit_p = svn_fs_x__copy_id_inherit_parent;
789 
790   /* Special case: if the child's copy ID is '0', use the parent's
791      copy ID. */
792   if (svn_fs_x__id_is_root(&child_copy_id))
793     return SVN_NO_ERROR;
794 
795   /* Compare the copy IDs of the child and its parent.  If they are
796      the same, then the child is already on the same branch as the
797      parent, and should use the same mutability copy ID that the
798      parent will use. */
799   if (svn_fs_x__id_eq(&child_copy_id, &parent_copy_id))
800     return SVN_NO_ERROR;
801 
802   /* If the child is on the same branch that the parent is on, the
803      child should just use the same copy ID that the parent would use.
804      Else, the child needs to generate a new copy ID to use should it
805      need to be made mutable.  We will claim that child is on the same
806      branch as its parent if the child itself is not a branch point,
807      or if it is a branch point that we are accessing via its original
808      copy destination path. */
809   svn_fs_x__dag_get_copyroot(&copyroot_rev, &copyroot_path, child->node);
810   SVN_ERR(svn_fs_x__revision_root(&copyroot_root, fs, copyroot_rev,
811                                   scratch_pool));
812   SVN_ERR(svn_fs_x__get_temp_dag_node(&copyroot_node, copyroot_root,
813                                       copyroot_path, scratch_pool));
814 
815   if (!svn_fs_x__dag_related_node(copyroot_node, child->node))
816     return SVN_NO_ERROR;
817 
818   /* Determine if we are looking at the child via its original path or
819      as a subtree item of a copied tree. */
820   id_path = svn_fs_x__dag_get_created_path(child->node);
821   if (strcmp(id_path, parent_path_path(child, scratch_pool)) == 0)
822     {
823       *inherit_p = svn_fs_x__copy_id_inherit_self;
824       return SVN_NO_ERROR;
825     }
826 
827   /* We are pretty sure that the child node is an unedited nested
828      branched node.  When it needs to be made mutable, it should claim
829      a new copy ID. */
830   *inherit_p = svn_fs_x__copy_id_inherit_new;
831   *copy_src_path = id_path;
832   return SVN_NO_ERROR;
833 }
834 
835 /* Allocate a new svn_fs_x__dag_path_t node from RESULT_POOL, containing
836    NODE, ENTRY and PARENT; NODE and ENTRY are copied into RESULT_POOL.  */
837 static svn_fs_x__dag_path_t *
make_parent_path(dag_node_t * node,const svn_stringbuf_t * entry,svn_fs_x__dag_path_t * parent,apr_pool_t * result_pool)838 make_parent_path(dag_node_t *node,
839                  const svn_stringbuf_t *entry,
840                  svn_fs_x__dag_path_t *parent,
841                  apr_pool_t *result_pool)
842 {
843   svn_fs_x__dag_path_t *dag_path
844     = apr_pcalloc(result_pool, sizeof(*dag_path));
845   if (node)
846     dag_path->node = svn_fs_x__dag_dup(node, result_pool);
847   dag_path->entry = apr_pstrmemdup(result_pool, entry->data, entry->len);
848   dag_path->parent = parent;
849   dag_path->copy_inherit = svn_fs_x__copy_id_inherit_unknown;
850   dag_path->copy_src_path = NULL;
851   return dag_path;
852 }
853 
854 svn_error_t *
svn_fs_x__get_dag_path(svn_fs_x__dag_path_t ** dag_path_p,svn_fs_root_t * root,const char * fs_path,int flags,svn_boolean_t is_txn_path,apr_pool_t * result_pool,apr_pool_t * scratch_pool)855 svn_fs_x__get_dag_path(svn_fs_x__dag_path_t **dag_path_p,
856                        svn_fs_root_t *root,
857                        const char *fs_path,
858                        int flags,
859                        svn_boolean_t is_txn_path,
860                        apr_pool_t *result_pool,
861                        apr_pool_t *scratch_pool)
862 {
863   svn_fs_t *fs = root->fs;
864   dag_node_t *here = NULL; /* The directory we're currently looking at.  */
865   svn_fs_x__dag_path_t *dag_path; /* The path from HERE up to the root. */
866   apr_pool_t *iterpool = svn_pool_create(scratch_pool);
867 
868   svn_fs_x__change_set_t change_set = svn_fs_x__root_change_set(root);
869   const char *entry;
870   svn_string_t path;
871   svn_stringbuf_t *entry_buffer = svn_stringbuf_create_ensure(64,
872                                                               scratch_pool);
873   apr_size_t path_len;
874 
875   /* Normalize the FS_PATH to be compatible with our DAG walk utils. */
876   normalize_path(&path, fs_path); /* "" */
877 
878   /* Make a DAG_PATH for the root node, using its own current copy id.  */
879   SVN_ERR(get_root_node(&here, root, change_set, iterpool));
880   dag_path = make_parent_path(here, entry_buffer, NULL, result_pool);
881   dag_path->copy_inherit = svn_fs_x__copy_id_inherit_self;
882 
883   path_len = path.len;
884   path.len = 0;
885 
886   /* Walk the path segment by segment.  Add to the DAG_PATH as we go. */
887   for (entry = next_entry_name(&path, entry_buffer);
888        entry;
889        entry = next_entry_name(&path, entry_buffer))
890     {
891       svn_pool_clear(iterpool);
892 
893       /* If the current node is not a directory and we are just here to
894        * check for the path's existence, then that's o.k.
895        * Otherwise, non-dir nodes will cause an error in dag_step. */
896       if (   (flags & svn_fs_x__dag_path_allow_null)
897           && (svn_fs_x__dag_node_kind(dag_path->node) != svn_node_dir))
898         {
899           dag_path = NULL;
900           break;
901         }
902 
903       /* Find the sub-node. */
904       SVN_ERR(dag_step(&here, root, dag_path->node, entry, &path, change_set,
905                        TRUE, iterpool));
906 
907       /* "node not found" requires special handling.  */
908       if (here == NULL)
909         {
910           /* If this was the last path component, and the caller
911              said it was optional, then don't return an error;
912              just put a NULL node pointer in the path.
913            */
914           if ((flags & svn_fs_x__dag_path_last_optional)
915               && (path_len == path.len))
916             {
917               dag_path = make_parent_path(NULL, entry_buffer, dag_path,
918                                           result_pool);
919               break;
920             }
921           else if (flags & svn_fs_x__dag_path_allow_null)
922             {
923               dag_path = NULL;
924               break;
925             }
926           else
927             {
928               /* Build a better error message than svn_fs_x__dag_open
929                  can provide, giving the root and full path name.  */
930               return SVN_FS__NOT_FOUND(root, fs_path);
931             }
932         }
933 
934       /* Now, make a parent_path item for CHILD. */
935       dag_path = make_parent_path(here, entry_buffer, dag_path, result_pool);
936       if (is_txn_path)
937         {
938           SVN_ERR(get_copy_inheritance(&dag_path->copy_inherit,
939                                        &dag_path->copy_src_path,
940                                        fs, dag_path, iterpool));
941         }
942     }
943 
944   svn_pool_destroy(iterpool);
945   *dag_path_p = dag_path;
946   return SVN_NO_ERROR;
947 }
948 
949 /* Set *NODE_P to a mutable root directory for ROOT, cloning if
950    necessary, allocating in RESULT_POOL.  ROOT must be a transaction root.
951    Use ERROR_PATH in error messages.  Use SCRATCH_POOL for temporaries.*/
952 static svn_error_t *
mutable_root_node(dag_node_t ** node_p,svn_fs_root_t * root,const char * error_path,apr_pool_t * result_pool,apr_pool_t * scratch_pool)953 mutable_root_node(dag_node_t **node_p,
954                   svn_fs_root_t *root,
955                   const char *error_path,
956                   apr_pool_t *result_pool,
957                   apr_pool_t *scratch_pool)
958 {
959   /* If it's not a transaction root, we can't change its contents.  */
960   if (!root->is_txn_root)
961     return SVN_FS__ERR_NOT_MUTABLE(root->fs, root->rev, error_path);
962 
963   /* It's a transaction root.
964      Get the appropriate DAG root node and copy it into RESULT_POOL. */
965   SVN_ERR(get_root_node(node_p, root, svn_fs_x__root_change_set(root),
966                         scratch_pool));
967   *node_p = svn_fs_x__dag_dup(*node_p, result_pool);
968 
969   return SVN_NO_ERROR;
970 }
971 
972 svn_error_t *
svn_fs_x__make_path_mutable(svn_fs_root_t * root,svn_fs_x__dag_path_t * parent_path,const char * error_path,apr_pool_t * result_pool,apr_pool_t * scratch_pool)973 svn_fs_x__make_path_mutable(svn_fs_root_t *root,
974                             svn_fs_x__dag_path_t *parent_path,
975                             const char *error_path,
976                             apr_pool_t *result_pool,
977                             apr_pool_t *scratch_pool)
978 {
979   dag_node_t *clone;
980   svn_fs_x__txn_id_t txn_id = svn_fs_x__root_txn_id(root);
981 
982   /* Is the node mutable already?  */
983   if (svn_fs_x__dag_check_mutable(parent_path->node))
984     return SVN_NO_ERROR;
985 
986   /* Are we trying to clone the root, or somebody's child node?  */
987   if (parent_path->parent)
988     {
989       svn_fs_x__id_t copy_id = { SVN_INVALID_REVNUM, 0 };
990       svn_fs_x__id_t *copy_id_ptr = &copy_id;
991       svn_fs_x__copy_id_inherit_t inherit = parent_path->copy_inherit;
992       const char *clone_path, *copyroot_path;
993       svn_revnum_t copyroot_rev;
994       svn_boolean_t is_parent_copyroot = FALSE;
995       svn_fs_root_t *copyroot_root;
996       dag_node_t *copyroot_node;
997       apr_pool_t *subpool;
998 
999       /* We're trying to clone somebody's child.  Make sure our parent
1000          is mutable.  */
1001       SVN_ERR(svn_fs_x__make_path_mutable(root, parent_path->parent,
1002                                           error_path, result_pool,
1003                                           scratch_pool));
1004 
1005       /* Allocate all temporaries in a sub-pool that we control locally.
1006          That way, we keep only the data of one level of recursion around
1007          at any time. */
1008       subpool = svn_pool_create(scratch_pool);
1009       switch (inherit)
1010         {
1011         case svn_fs_x__copy_id_inherit_parent:
1012           copy_id = *svn_fs_x__dag_get_copy_id(parent_path->parent->node);
1013           break;
1014 
1015         case svn_fs_x__copy_id_inherit_new:
1016           SVN_ERR(svn_fs_x__reserve_copy_id(&copy_id, root->fs, txn_id,
1017                                             subpool));
1018           break;
1019 
1020         case svn_fs_x__copy_id_inherit_self:
1021           copy_id_ptr = NULL;
1022           break;
1023 
1024         case svn_fs_x__copy_id_inherit_unknown:
1025         default:
1026           SVN_ERR_MALFUNCTION(); /* uh-oh -- somebody didn't calculate copy-ID
1027                       inheritance data. */
1028         }
1029 
1030       /* Determine what copyroot our new child node should use. */
1031       svn_fs_x__dag_get_copyroot(&copyroot_rev, &copyroot_path,
1032                                  parent_path->node);
1033       SVN_ERR(svn_fs_x__revision_root(&copyroot_root, root->fs,
1034                                       copyroot_rev, subpool));
1035       SVN_ERR(svn_fs_x__get_temp_dag_node(&copyroot_node, copyroot_root,
1036                                           copyroot_path, subpool));
1037 
1038       if (!svn_fs_x__dag_related_node(copyroot_node, parent_path->node))
1039         is_parent_copyroot = TRUE;
1040 
1041       /* Now make this node mutable.  */
1042       clone_path = parent_path_path(parent_path->parent, subpool);
1043       SVN_ERR(svn_fs_x__dag_clone_child(&clone,
1044                                         parent_path->parent->node,
1045                                         clone_path,
1046                                         parent_path->entry,
1047                                         copy_id_ptr, txn_id,
1048                                         is_parent_copyroot,
1049                                         result_pool,
1050                                         subpool));
1051 
1052       /* Update the path cache. */
1053       svn_fs_x__update_dag_cache(clone);
1054       svn_pool_destroy(subpool);
1055     }
1056   else
1057     {
1058       /* We're trying to clone the root directory.  */
1059       SVN_ERR(mutable_root_node(&clone, root, error_path, result_pool,
1060                                 scratch_pool));
1061     }
1062 
1063   /* Update the PARENT_PATH link to refer to the clone.  */
1064   parent_path->node = clone;
1065 
1066   return SVN_NO_ERROR;
1067 }
1068 
1069 
1070 svn_error_t *
svn_fs_x__get_temp_dag_node(dag_node_t ** node_p,svn_fs_root_t * root,const char * path,apr_pool_t * scratch_pool)1071 svn_fs_x__get_temp_dag_node(dag_node_t **node_p,
1072                             svn_fs_root_t *root,
1073                             const char *path,
1074                             apr_pool_t *scratch_pool)
1075 {
1076   svn_string_t normalized;
1077 
1078   /* First we look for the DAG in our cache. */
1079   *node_p = dag_node_cache_get(root, normalize_path(&normalized, path));
1080 
1081   /* If it is not there, walk the DAG and fill the cache. */
1082   if (! *node_p)
1083     SVN_ERR(walk_dag_path(node_p, root, &normalized, scratch_pool));
1084 
1085   return SVN_NO_ERROR;
1086 }
1087 
1088 
1089 svn_error_t *
svn_fs_x__get_dag_node(dag_node_t ** dag_node_p,svn_fs_root_t * root,const char * path,apr_pool_t * result_pool,apr_pool_t * scratch_pool)1090 svn_fs_x__get_dag_node(dag_node_t **dag_node_p,
1091                        svn_fs_root_t *root,
1092                        const char *path,
1093                        apr_pool_t *result_pool,
1094                        apr_pool_t *scratch_pool)
1095 {
1096   dag_node_t *node = NULL;
1097   SVN_ERR(svn_fs_x__get_temp_dag_node(&node, root, path, scratch_pool));
1098 
1099   /* We want the returned node to live in POOL. */
1100   *dag_node_p = svn_fs_x__dag_dup(node, result_pool);
1101 
1102   return SVN_NO_ERROR;
1103 }
1104