1 /*
2  * missing-branches.c -- Efficiently scan for missing branches.
3  *
4  * ====================================================================
5  *    Licensed to the Apache Software Foundation (ASF) under one
6  *    or more contributor license agreements.  See the NOTICE file
7  *    distributed with this work for additional information
8  *    regarding copyright ownership.  The ASF licenses this file
9  *    to you under the Apache License, Version 2.0 (the
10  *    "License"); you may not use this file except in compliance
11  *    with the License.  You may obtain a copy of the License at
12  *
13  *      http://www.apache.org/licenses/LICENSE-2.0
14  *
15  *    Unless required by applicable law or agreed to in writing,
16  *    software distributed under the License is distributed on an
17  *    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
18  *    KIND, either express or implied.  See the License for the
19  *    specific language governing permissions and limitations
20  *    under the License.
21  * ====================================================================
22  */
23 
24 /* ==================================================================== */
25 
26 
27 
28 /*** Includes. ***/
29 
30 #include <assert.h>
31 
32 #include "svn_hash.h"
33 #include "svn_pools.h"
34 #include "private/svn_sorts_private.h"
35 #include "private/svn_subr_private.h"
36 
37 #include "mergeinfo-normalizer.h"
38 
39 
40 /*** Code. ***/
41 
42 struct svn_min__branch_lookup_t
43 {
44   /* Connection to the repository where we are looking for paths.
45      If this is NULL, then only local lookups may be performed. */
46   svn_ra_session_t *session;
47 
48   /* Keyed by const char * FS paths that are known not to exist.
49      It is implied that sub-paths won't and can't exist either. */
50   apr_hash_t *deleted;
51 
52   /* Keyed by const char * FS paths that are known to exist. */
53   apr_hash_t *existing;
54 };
55 
56 /* Return the location of the last '/' in PATH before LEN.
57    Return 0 for root and empty paths.  PATH must be a canonical FS path. */
58 static apr_size_t
parent_segment(const char * path,apr_size_t len)59 parent_segment(const char *path,
60                apr_size_t len)
61 {
62   assert(path[0] == '/');
63 
64   if (len <= 1)
65     return 0;
66 
67   --len;
68   while (path[len] != '/')
69     --len;
70 
71   return len;
72 }
73 
74 /* Look for BRANCH in LOOKUP without connecting to the server.  Return
75  * svn_tristate_true, if it is known to exist, svn_tristate_false if it is
76  * known to not exist.  Otherwise return svn_tristate_unknown. */
77 static svn_tristate_t
local_lookup(const svn_min__branch_lookup_t * lookup,const char * branch)78 local_lookup(const svn_min__branch_lookup_t *lookup,
79              const char *branch)
80 {
81   apr_size_t len;
82 
83   /* Non-canonical paths are bad but we let the remote lookup take care of
84    * them.  Our hashes simply have no info on them. */
85   if (branch[0] != '/')
86     return svn_tristate_unknown;
87 
88   /* Hard-coded: "/" always exists. */
89   if (branch[1] == '\0')
90     return svn_tristate_true;
91 
92   /* For every existing path that we encountered, there is an entry in the
93      EXISITING hash.  So, we can just use that. */
94   len = strlen(branch);
95   if (apr_hash_get(lookup->existing, branch, len))
96     return svn_tristate_true;
97 
98   /* Not known to exist and might be known to not exist.  We only record
99      the top level deleted directory for DELETED branches, so we need to
100      walk up the path until we either find that deletion or an existing
101      path.  In the latter case, we don't know what happened to the levels
102      below that, including BRANCH. */
103   while (len > 0)
104     {
105       /* Known deleted?  Note that we checked BRANCH for existence but not
106          for deletion, yet. */
107       if (apr_hash_get(lookup->deleted, branch, len))
108         return svn_tristate_false;
109 
110       /* Parent known to exist?
111          Then, we don't know what happened to the BRANCH. */
112       len = parent_segment(branch, len);
113 
114       if (apr_hash_get(lookup->existing, branch, len))
115         return svn_tristate_unknown;
116     }
117 
118   /* We don't know. */
119   return svn_tristate_unknown;
120 }
121 
122 /* Set *DELETED to TRUE, if PATH can't be found at HEAD in SESSION.
123    Use SCRATCH_POOL for temporary allocations. */
124 static svn_error_t *
path_deleted(svn_boolean_t * deleted,svn_ra_session_t * session,const char * path,apr_pool_t * scratch_pool)125 path_deleted(svn_boolean_t *deleted,
126             svn_ra_session_t *session,
127             const char *path,
128             apr_pool_t *scratch_pool)
129 {
130   svn_node_kind_t kind;
131 
132   SVN_ERR_ASSERT(*path == '/');
133   SVN_ERR(svn_ra_check_path(session, path + 1, SVN_INVALID_REVNUM, &kind,
134                             scratch_pool));
135   *deleted = kind == svn_node_none;
136 
137   return SVN_NO_ERROR;
138 }
139 
140 /* Chop the last segment off PATH.  PATH must be a canonical FS path.
141    No-op for the root path. */
142 static void
to_parent(svn_stringbuf_t * path)143 to_parent(svn_stringbuf_t *path)
144 {
145   path->len = parent_segment(path->data, path->len);
146   if (path->len == 0)
147     path->len = 1;
148 
149   path->data[path->len] = '\0';
150 }
151 
152 /* Contact the repository used by LOOKUP and set *DELETED to TRUE, if path
153    BRANCH does not exist at HEAD.  Cache the lookup results in LOOKUP and
154    use SCRATCH_POOL for temporary allocations.  Call this only if
155    local_lookup returned svn_tristate_unknown. */
156 static svn_error_t *
remote_lookup(svn_boolean_t * deleted,const svn_min__branch_lookup_t * lookup,const char * branch,apr_pool_t * scratch_pool)157 remote_lookup(svn_boolean_t *deleted,
158               const svn_min__branch_lookup_t *lookup,
159               const char *branch,
160               apr_pool_t *scratch_pool)
161 {
162   svn_stringbuf_t *path = svn_stringbuf_create(branch, scratch_pool);
163 
164   /* We shall call this function only after the local lookup failed. */
165   assert(local_lookup(lookup, branch) == svn_tristate_unknown);
166 
167   /* Actual repository lookup. */
168   SVN_ERR(path_deleted(deleted, lookup->session, branch, scratch_pool));
169 
170   /* If the path did not exist, store the furthest non-existent parent. */
171   if (*deleted)
172     {
173       apr_pool_t *iterpool;
174       svn_boolean_t is_deleted;
175       const char *deleted_path;
176       apr_size_t len;
177 
178       /* Find the closest parent that exists.  Often, that is something like
179          "branches" and the next level already does not exist.  So, use that
180          as a heuristics to minimize the number of lookups. */
181 
182       /* Set LEN to the length of the last unknown to exist sub-path. */
183       svn_stringbuf_t *temp = svn_stringbuf_dup(path, scratch_pool);
184       do
185         {
186           len = temp->len;
187           to_parent(temp);
188         }
189       while (local_lookup(lookup, temp->data) != svn_tristate_true);
190 
191       /* Check whether that path actually does not exist. */
192       if (len == path->len)
193         {
194           /* We already know that the full PATH does not exist.
195              We get here if the immediate parent of PATH is known to exist. */
196           is_deleted = TRUE;
197         }
198       else
199         {
200           temp = svn_stringbuf_ncreate(branch, len, scratch_pool);
201           SVN_ERR(path_deleted(&is_deleted, lookup->session, temp->data,
202                                scratch_pool));
203         }
204 
205       /* Whether or not that path does not exist, we know now and should
206          store that in LOOKUP. */
207       if (is_deleted)
208         {
209           /* We are almost done here. The existing parent is already in
210              LOOKUP and we only need to add the deleted path. */
211           deleted_path = apr_pstrmemdup(apr_hash_pool_get(lookup->deleted),
212                                         branch, len);
213           apr_hash_set(lookup->deleted, deleted_path, len, deleted_path);
214 
215           return SVN_NO_ERROR;
216         }
217       else
218         {
219           /* We just learned that TEMP does exist. Remember this fact and
220              later continue the search for the deletion boundary. */
221           const char *hash_path
222             = apr_pstrmemdup(apr_hash_pool_get(lookup->existing), temp->data,
223                              temp->len);
224 
225           /* Only add HASH_PATH.  Its parents are already in that hash. */
226           apr_hash_set(lookup->existing, hash_path, temp->len, hash_path);
227         }
228 
229       /* Find the closest parent that does exist.
230         "/" exists, hence, this will terminate. */
231       iterpool = svn_pool_create(scratch_pool);
232       do
233         {
234           svn_pool_clear(iterpool);
235 
236           len = path->len;
237           to_parent(path);
238 
239           /* We often know that "/branches" etc. exist.  So, we can skip
240              the final lookup in that case. */
241           if (local_lookup(lookup, path->data) == svn_tristate_true)
242             break;
243 
244           /* Get the info from the repository. */
245           SVN_ERR(path_deleted(&is_deleted, lookup->session, path->data,
246                               iterpool));
247         }
248       while (is_deleted);
249       svn_pool_destroy(iterpool);
250 
251       /* PATH exists, it's sub-path of length LEN does not. */
252       deleted_path = apr_pstrmemdup(apr_hash_pool_get(lookup->deleted),
253                                     branch, len);
254       apr_hash_set(lookup->deleted, deleted_path, len, deleted_path);
255     }
256 
257   /* PATH and all its parents exist. Add them to the EXISITING hash.
258      Make sure to allocate only the longest path and then reference
259      sub-sequences of it to keep memory usage in check. */
260   if (!apr_hash_get(lookup->existing, path->data, path->len))
261     {
262       const char *hash_path
263         = apr_pstrmemdup(apr_hash_pool_get(lookup->existing), path->data,
264                          path->len);
265 
266       /* Note that we don't need to check for exiting entries here because
267          the APR hash will reuse existing nodes and we are not allocating
268          anything else here.  So, this does not allocate duplicate nodes. */
269       for (; path->len > 1; to_parent(path))
270         apr_hash_set(lookup->existing, hash_path, path->len, hash_path);
271     }
272 
273   return SVN_NO_ERROR;
274 }
275 
276 svn_min__branch_lookup_t *
svn_min__branch_lookup_create(svn_ra_session_t * session,apr_pool_t * result_pool)277 svn_min__branch_lookup_create(svn_ra_session_t *session,
278                               apr_pool_t *result_pool)
279 {
280   svn_min__branch_lookup_t *result = apr_pcalloc(result_pool,
281                                                  sizeof(*result));
282   result->session = session;
283   result->deleted = svn_hash__make(result_pool);
284   result->existing = svn_hash__make(result_pool);
285 
286   return result;
287 }
288 
289 svn_min__branch_lookup_t *
svn_min__branch_lookup_from_paths(apr_array_header_t * paths,apr_pool_t * result_pool)290 svn_min__branch_lookup_from_paths(apr_array_header_t *paths,
291                                   apr_pool_t *result_pool)
292 {
293   svn_min__branch_lookup_t *result
294     = svn_min__branch_lookup_create(NULL, result_pool);
295 
296   int i;
297   for (i = 0; i < paths->nelts; ++i)
298     {
299       const char *path = APR_ARRAY_IDX(paths, i, const char *);
300       if (strlen(path) > 0)
301         {
302           path = apr_pstrdup(result_pool, path);
303           svn_hash_sets(result->deleted, path, path);
304         }
305     }
306 
307   return result;
308 }
309 
310 svn_error_t *
svn_min__branch_lookup(svn_boolean_t * deleted,svn_min__branch_lookup_t * lookup,const char * branch,svn_boolean_t local_only,apr_pool_t * scratch_pool)311 svn_min__branch_lookup(svn_boolean_t *deleted,
312                        svn_min__branch_lookup_t *lookup,
313                        const char *branch,
314                        svn_boolean_t local_only,
315                        apr_pool_t *scratch_pool)
316 {
317   switch (local_lookup(lookup, branch))
318     {
319       case svn_tristate_false:
320         *deleted = TRUE;
321         return SVN_NO_ERROR;
322 
323       case svn_tristate_true:
324         *deleted = FALSE;
325         return SVN_NO_ERROR;
326 
327       default:
328         /* If the state is unknown and we are only allowed to do a local
329            lookup, default to a possible false negative.  Note that not
330            having the session available implies local-only lookup. */
331         if (local_only || !lookup->session)
332           {
333             *deleted = FALSE;
334             return SVN_NO_ERROR;
335           }
336     }
337 
338   return svn_error_trace(remote_lookup(deleted, lookup, branch,
339                                        scratch_pool));
340 }
341 
342 apr_array_header_t *
svn_min__branch_deleted_list(svn_min__branch_lookup_t * lookup,apr_pool_t * result_pool,apr_pool_t * scratch_pool)343 svn_min__branch_deleted_list(svn_min__branch_lookup_t *lookup,
344                              apr_pool_t *result_pool,
345                              apr_pool_t *scratch_pool)
346 {
347   apr_array_header_t *result = apr_array_make(result_pool,
348                                               apr_hash_count(lookup->deleted),
349                                               sizeof(const char *));
350   apr_hash_index_t *hi;
351   for (hi = apr_hash_first(scratch_pool, lookup->deleted);
352        hi;
353        hi = apr_hash_next(hi))
354     {
355       const char *path = apr_hash_this_key(hi);
356       apr_size_t len = apr_hash_this_key_len(hi);
357 
358       APR_ARRAY_PUSH(result, const char *) = apr_pstrmemdup(result_pool,
359                                                             path, len);
360     }
361 
362   svn_sort__array(result, svn_sort_compare_paths);
363 
364   return result;
365 }
366