1 /*
2  * scanlog.c:  scanning the log for moves
3  *
4  * ====================================================================
5  *    Licensed to the Apache Software Foundation (ASF) under one
6  *    or more contributor license agreements.  See the NOTICE file
7  *    distributed with this work for additional information
8  *    regarding copyright ownership.  The ASF licenses this file
9  *    to you under the Apache License, Version 2.0 (the
10  *    "License"); you may not use this file except in compliance
11  *    with the License.  You may obtain a copy of the License at
12  *
13  *      http://www.apache.org/licenses/LICENSE-2.0
14  *
15  *    Unless required by applicable law or agreed to in writing,
16  *    software distributed under the License is distributed on an
17  *    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
18  *    KIND, either express or implied.  See the License for the
19  *    specific language governing permissions and limitations
20  *    under the License.
21  * ====================================================================
22  */
23 
24 /* ==================================================================== */
25 
26 
27 
28 /*** Includes. ***/
29 
30 #include "svn_hash.h"
31 #include "svn_wc.h"
32 #include "svn_client.h"
33 #include "svn_error.h"
34 #include "svn_config.h"
35 #include "svn_time.h"
36 #include "svn_dirent_uri.h"
37 #include "svn_path.h"
38 #include "svn_pools.h"
39 #include "svn_io.h"
40 
41 #include "private/svn_client_private.h"
42 #include "private/svn_wc_private.h"
43 #include "private/svn_ra_private.h"
44 
45 #include "svnmover.h"
46 
47 #include "svn_private_config.h"
48 
49 
50 /* From moves-scan-log branch */
51 
52 svn_repos_move_info_t *
svn_repos_move_info_create(const char * moved_from_repos_relpath,const char * moved_to_repos_relpath,svn_revnum_t revision,svn_revnum_t copyfrom_rev,svn_repos_move_info_t * prev,svn_repos_move_info_t * next,apr_pool_t * result_pool)53 svn_repos_move_info_create(const char *moved_from_repos_relpath,
54                            const char *moved_to_repos_relpath,
55                            svn_revnum_t revision,
56                            svn_revnum_t copyfrom_rev,
57                            svn_repos_move_info_t *prev,
58                            svn_repos_move_info_t *next,
59                            apr_pool_t *result_pool)
60 {
61   svn_repos_move_info_t *move = apr_palloc(result_pool, sizeof(*move));
62 
63   move->moved_from_repos_relpath = moved_from_repos_relpath;
64   move->moved_to_repos_relpath = moved_to_repos_relpath;
65   move->revision = revision;
66   move->copyfrom_rev = copyfrom_rev;
67   move->prev = prev;
68   move->next = next;
69 
70   return move;
71 }
72 
73 const char *
svn_client__format_move_chain_for_display(svn_repos_move_info_t * first_move,const char * indent,apr_pool_t * result_pool)74 svn_client__format_move_chain_for_display(svn_repos_move_info_t *first_move,
75                                           const char *indent,
76                                           apr_pool_t *result_pool)
77 {
78   const char *s;
79   svn_repos_move_info_t *last_move;
80 
81   last_move = first_move;
82   while (last_move->next)
83     last_move = last_move->next;
84 
85   if (last_move != first_move)
86     {
87       svn_repos_move_info_t *this_move;
88 
89       s = apr_psprintf(result_pool,
90                         _("Combined move:\n%s  %s@%ld -> %s\n"
91                        "%sIndividual moves:\n"),
92                        indent, first_move->moved_from_repos_relpath,
93                        first_move->copyfrom_rev,
94                        last_move->moved_to_repos_relpath, indent);
95 
96       this_move = first_move;
97       do
98         {
99           s = apr_pstrcat(result_pool, s,
100                           apr_psprintf(
101                             result_pool, _("%s  [r%ld] %s@%ld -> %s\n"),
102                             indent,
103                             this_move->revision,
104                             this_move->moved_from_repos_relpath,
105                             this_move->copyfrom_rev,
106                             this_move->moved_to_repos_relpath),
107                           (char *)NULL);
108           this_move = this_move->next;
109         }
110       while (this_move);
111     }
112   else
113     s = apr_psprintf(result_pool, _("  [r%ld] %s@%ld -> %s\n"),
114                      first_move->revision,
115                      first_move->moved_from_repos_relpath,
116                      first_move->copyfrom_rev,
117                      first_move->moved_to_repos_relpath);
118 
119   return s;
120 }
121 
122 typedef struct scan_moves_log_receiver_baton {
123   /*const char *anchor_abspath;*/
124   svn_client_ctx_t *ctx;
125   svn_revnum_t start;
126   svn_revnum_t end;
127   svn_ra_session_t *ra_session;
128 
129   /* The moved nodes hash to be populated.
130    * Maps a revision number to an array of svn_repos_move_info_t
131    * objects describing moves which happened in the revision.
132    *
133    * Given a sequence of moves which happened in given revisions, such as:
134    *   rA: mv x->z
135    *   rA: mv a->b
136    *   rB: mv b->c
137    *   rC: mv c->d
138    * we map each revision number to all moves which happened in the
139    * revision, which looks as follows:
140    *   rA : [(rA, x->z), (rA, a->b)]
141    *   rB : [(rB, b->c)]
142    *   rC : [(rC, c->d)]
143    * This allows an update to find relevant moves based on the base
144    * revision of a node (during updates the base revision of each node
145    * in the working copy is arbitrary so we might not know the nodes 'a'
146    * and 'x' under these names).
147    * Additionally, all moves pertaining to the same node are chained into a
148    * doubly-linked list via 'next' and 'prev' pointers (see definition of
149    * svn_repos_move_info_t).
150    * This way, an update can look up all moves relevant to a node, forwards
151    * or backwards in history, once it has located a relevant move in the chain.
152    * This can be visualized as follows:
153    *   rA : [(rA, x->z, prev=>NULL, next=>NULL),
154    *         (rA, a->b, prev=>NULL, next=>(rB, b->c))]
155    *   rB : [(rB, b->c), prev=>(rA, a->b), next=>(rC, c->d)]
156    *   rC : [(rC, c->d), prev=>(rB, c->d), next=>NULL]
157    */
158   apr_hash_t *moves;
159 
160   /* Temporary map of move-target paths to repos_move_info_t.
161    * Used to link multiple moves of the same node across revisions. */
162   apr_hash_t *moves_by_target_path;
163 } scan_moves_log_receiver_baton;
164 
165 typedef struct copy_info {
166   const char *copyto_path;
167   const char *copyfrom_path;
168   svn_revnum_t copyfrom_rev;
169 } copy_info;
170 
171 
172 /* Set *RELATED to true if the deleted node at repository relpath
173  * DELETED_PATH@DELETED_REV is ancestrally related to the node at
174  * repository relpath COPYFROM_PATH@COPYFROM_REV, else set it to false.
175  *
176  * ### JAF:  In practice this attempts to trace back, starting from
177  *       DELETED_PATH@(DELETED_REV-1).  What if that does not exist?
178  */
179 static svn_error_t *
check_ancestry(svn_boolean_t * related,const char * session_url,const char * repos_root_url,const char * deleted_path,svn_revnum_t deleted_rev,const char * copyfrom_path,svn_revnum_t copyfrom_rev,svn_client_ctx_t * ctx,apr_pool_t * scratch_pool)180 check_ancestry(svn_boolean_t *related,
181                const char *session_url,
182                const char *repos_root_url,
183                const char *deleted_path,
184                svn_revnum_t deleted_rev,
185                const char *copyfrom_path,
186                svn_revnum_t copyfrom_rev,
187                svn_client_ctx_t *ctx,
188                apr_pool_t *scratch_pool)
189 {
190   apr_hash_t *locations;
191   const char *old_url;
192   const char *old_location;
193   const char *relpath;
194   svn_ra_session_t *ra_session2;
195   apr_array_header_t *location_revisions;
196 
197   *related = FALSE;
198 
199   location_revisions = apr_array_make(scratch_pool, 1, sizeof(svn_revnum_t));
200   APR_ARRAY_PUSH(location_revisions, svn_revnum_t) = copyfrom_rev;
201   old_url = svn_uri_canonicalize(apr_pstrcat(scratch_pool,
202                                              repos_root_url, "/",
203                                              deleted_path, NULL),
204                                  scratch_pool);
205   relpath = svn_uri_skip_ancestor(session_url, old_url, scratch_pool);
206   SVN_ERR(svn_client_open_ra_session2(&ra_session2, session_url, NULL,
207                                       ctx, scratch_pool, scratch_pool));
208   if (relpath == NULL)
209     {
210       svn_error_t *err;
211 
212       /* The deleted path is outside of the baton's RA session URL.
213        * Try to open the new RA session to the repository root. */
214       SVN_ERR(svn_ra_reparent(ra_session2, repos_root_url, scratch_pool));
215       relpath = svn_uri_skip_ancestor(repos_root_url, old_url, scratch_pool);
216       if (relpath == NULL)
217         return SVN_NO_ERROR;
218       err = svn_ra_get_locations(ra_session2, &locations, relpath,
219                                  deleted_rev - 1, location_revisions,
220                                  scratch_pool);
221       if (err)
222         {
223           if (err->apr_err == SVN_ERR_RA_NOT_AUTHORIZED ||
224               err->apr_err == SVN_ERR_RA_DAV_FORBIDDEN)
225             {
226               svn_error_clear(err);
227               return SVN_NO_ERROR;
228             }
229           else
230             return svn_error_trace(err);
231         }
232     }
233   else
234     SVN_ERR(svn_ra_get_locations(ra_session2, &locations, relpath,
235                                  deleted_rev - 1, location_revisions,
236                                  scratch_pool));
237 
238   old_location = apr_hash_get(locations, &copyfrom_rev, sizeof(svn_revnum_t));
239   *related = (old_location &&
240               strcmp(old_location[0] == '/' ? old_location + 1 : old_location,
241                      copyfrom_path) == 0);
242 
243   return SVN_NO_ERROR;
244 }
245 
246 static svn_error_t *
scan_moves_log_receiver(void * baton,svn_log_entry_t * log_entry,apr_pool_t * scratch_pool)247 scan_moves_log_receiver(void *baton,
248                         svn_log_entry_t *log_entry,
249                         apr_pool_t *scratch_pool)
250 {
251   apr_hash_index_t *hi;
252   apr_hash_t *copies;
253   apr_array_header_t *deleted_paths;
254   struct scan_moves_log_receiver_baton *b = baton;
255   apr_pool_t *result_pool = apr_hash_pool_get(b->moves);
256   apr_pool_t *iterpool;
257   int i;
258   const char *session_url;
259   const char *repos_root_url;
260   apr_array_header_t *moves;
261 
262   if (b->ctx->notify_func2)
263     {
264 #if 0
265       svn_wc_notify_t *notify;
266       notify = svn_wc_create_notify(b->anchor_abspath,
267                                     svn_wc_notify_moves_scan_log_in_progress,
268                                     scratch_pool);
269       notify->moves_scan_log_start_rev = b->start;
270       notify->moves_scan_log_end_rev = b->end;
271       notify->moves_scan_log_current_rev = log_entry->revision;
272       b->ctx->notify_func2(b->ctx->notify_baton2, notify, scratch_pool);
273 #endif
274     }
275 
276   if (log_entry->changed_paths2 == NULL)
277     return SVN_NO_ERROR;
278 
279   copies = apr_hash_make(scratch_pool);
280   deleted_paths = apr_array_make(scratch_pool, 0, sizeof(const char *));
281 
282   /* Scan for copied and deleted nodes in this revision. */
283   for (hi = apr_hash_first(scratch_pool, log_entry->changed_paths2);
284        hi; hi = apr_hash_next(hi))
285     {
286       const char *path = apr_hash_this_key(hi);
287       svn_log_changed_path2_t *data = apr_hash_this_val(hi);
288 
289       if ((data->action == 'A' || data->action == 'R') && data->copyfrom_path)
290         {
291           struct copy_info *copy;
292           apr_array_header_t *copies_with_same_source_path;
293 
294           SVN_ERR_ASSERT(path[0] == '/');
295 
296           if (data->copyfrom_path[0] == '/')
297             data->copyfrom_path++;
298 
299           copy = apr_palloc(scratch_pool, sizeof(*copy));
300           copy->copyto_path = path + 1; /* Strip leading '/' */
301           copy->copyfrom_path = data->copyfrom_path;
302           copy->copyfrom_rev = data->copyfrom_rev;
303           copies_with_same_source_path = apr_hash_get(copies,
304                                                       data->copyfrom_path,
305                                                       APR_HASH_KEY_STRING);
306           if (copies_with_same_source_path == NULL)
307             {
308               copies_with_same_source_path = apr_array_make(
309                                                result_pool, 1,
310                                                sizeof(struct copy_info *));
311               apr_hash_set(copies, copy->copyfrom_path, APR_HASH_KEY_STRING,
312                            copies_with_same_source_path);
313             }
314           APR_ARRAY_PUSH(copies_with_same_source_path,
315                          struct copy_info *) = copy;
316         }
317 
318       if (data->action == 'D' || data->action == 'R')
319         {
320           const char *parent_path;
321 
322           /* ### Is this true?  What does the API guarantee?  Is it
323              ### true that copyfrom_path is a relpath? */
324           SVN_ERR_ASSERT(path[0] == '/');
325 
326           /* When a delete is within a copy the deleted path in the
327              changed_paths2 hash is the copied path, but for the purposes
328              of move detection we want the pre-copy path.
329 
330              ### Not sure if this is the correct thing to do.  Yes, it
331              ### allows us to detect moves in copies/moves but will it
332              ### lead to false positives?  Does it matter that the
333              ### adjusted path may not have been committed?  Does it
334              ### matter that the adjusted path may be the same as
335              ### another committed path? */
336           parent_path = svn_dirent_dirname(path, scratch_pool);
337           while(strcmp(parent_path, "/"))
338             {
339               svn_log_changed_path2_t *data2
340                 = apr_hash_get(log_entry->changed_paths2, parent_path,
341                                APR_HASH_KEY_STRING);
342 
343               if (data2 && data2->action == 'A')
344                 {
345                   const char *relpath = svn_dirent_skip_ancestor(parent_path,
346                                                                  path);
347                   path = svn_dirent_join_many(scratch_pool, "/",
348                                               data2->copyfrom_path, relpath,
349                                               NULL);
350                   break;
351                 }
352               else
353                 parent_path = svn_dirent_dirname(parent_path, scratch_pool);
354             }
355           APR_ARRAY_PUSH(deleted_paths, const char *) = path + 1;
356         }
357     }
358 
359   /* If a node was deleted at one location and copied from the deleted
360    * location to a new location within the same revision, put the node
361    * on the moved-nodes list. */
362   SVN_ERR(svn_ra_get_session_url(b->ra_session, &session_url, scratch_pool));
363   SVN_ERR(svn_ra_get_repos_root2(b->ra_session, &repos_root_url, scratch_pool));
364   iterpool = svn_pool_create(scratch_pool);
365   for (i = 0; i < deleted_paths->nelts; i++)
366     {
367       const char *deleted_path;
368       apr_array_header_t *copies_with_same_source_path;
369       svn_repos_move_info_t *new_move;
370       svn_repos_move_info_t *prior_move;
371       svn_boolean_t related;
372       int j;
373 
374       deleted_path = APR_ARRAY_IDX(deleted_paths, i, const char *);
375       copies_with_same_source_path = apr_hash_get(copies, deleted_path,
376                                                   APR_HASH_KEY_STRING);
377       if (copies_with_same_source_path == NULL)
378         continue;
379 
380       svn_pool_clear(iterpool);
381 
382       for (j = 0; j < copies_with_same_source_path->nelts; j++)
383         {
384           struct copy_info *copy;
385 
386           copy = APR_ARRAY_IDX(copies_with_same_source_path, j,
387                                struct copy_info *);
388 
389           /* We found a deleted node which matches the copyfrom path of
390            * a copied node. Verify that the deleted node is an ancestor
391            * of the copied node. Tracing back history of the deleted node
392            * from revision log_entry->revision-1 to the copyfrom-revision
393            * we must end up at the copyfrom-path. */
394           SVN_ERR(check_ancestry(&related, session_url, repos_root_url,
395                                  deleted_path, log_entry->revision,
396                                  copy->copyfrom_path,
397                                  copy->copyfrom_rev,
398                                  b->ctx, iterpool));
399           if (!related)
400             continue;
401 
402           /* ### TODO:
403            * If the node was not copied from the most recent last-changed
404            * revision of the deleted node, this is not a move but a
405            * "copy from the past + delete". */
406 
407           /* Remember details of this move. */
408           new_move = svn_repos_move_info_create(
409                        apr_pstrdup(result_pool, deleted_path),
410                        apr_pstrdup(result_pool, copy->copyto_path),
411                        log_entry->revision, copy->copyfrom_rev,
412                        NULL, NULL, result_pool);
413 
414           /* Link together multiple moves of the same node. */
415           prior_move = apr_hash_get(b->moves_by_target_path,
416                                     new_move->moved_from_repos_relpath,
417                                     APR_HASH_KEY_STRING);
418           if (prior_move)
419             {
420               /* Tracing back history of the delete-half of the new move
421                * to the copyfrom-revision of the prior move we must end up
422                * at the delete-half of the prior move. */
423               SVN_ERR(check_ancestry(&related, session_url, repos_root_url,
424                                      new_move->moved_from_repos_relpath,
425                                      new_move->revision,
426                                      prior_move->moved_from_repos_relpath,
427                                      prior_move->copyfrom_rev,
428                                      b->ctx, iterpool));
429               if (related)
430                 {
431                   prior_move->next = new_move;
432                   new_move->prev = prior_move;
433                 }
434             }
435           apr_hash_set(b->moves_by_target_path,
436                        new_move->moved_to_repos_relpath,
437                        APR_HASH_KEY_STRING, new_move);
438 
439           /* Add this move to the list of moves in this revision. */
440           moves = apr_hash_get(b->moves, &new_move->revision,
441                                sizeof(svn_revnum_t));
442           if (moves == NULL)
443             {
444               moves = apr_array_make(result_pool,  1,
445                                      sizeof(svn_repos_move_info_t *));
446               APR_ARRAY_PUSH(moves, svn_repos_move_info_t *) = new_move;
447               apr_hash_set(b->moves, &new_move->revision, sizeof(svn_revnum_t),
448                            moves);
449             }
450           else
451             APR_ARRAY_PUSH(moves, svn_repos_move_info_t *) = new_move;
452         }
453     }
454   svn_pool_destroy(iterpool);
455 
456   return SVN_NO_ERROR;
457 }
458 
459 svn_error_t *
svn_client__get_repos_moves(apr_hash_t ** moves,const char * anchor_abspath,svn_ra_session_t * ra_session,svn_revnum_t start,svn_revnum_t end,svn_client_ctx_t * ctx,apr_pool_t * result_pool,apr_pool_t * scratch_pool)460 svn_client__get_repos_moves(apr_hash_t **moves,
461                             const char *anchor_abspath,
462                             svn_ra_session_t *ra_session,
463                             svn_revnum_t start,
464                             svn_revnum_t end,
465                             svn_client_ctx_t *ctx,
466                             apr_pool_t *result_pool,
467                             apr_pool_t *scratch_pool)
468 {
469   struct scan_moves_log_receiver_baton lrb;
470 
471   /*lrb.anchor_abspath = anchor_abspath;*/
472   lrb.ctx = ctx;
473   lrb.moves = apr_hash_make(result_pool);
474   lrb.start = start;
475   lrb.end = end;
476   lrb.ra_session = ra_session;
477   lrb.moves_by_target_path = apr_hash_make(scratch_pool);
478 
479   if (ctx->notify_func2)
480     {
481 #if 0
482       svn_wc_notify_t *notify;
483       notify = svn_wc_create_notify(b->anchor_abspath,
484                                     svn_wc_notify_moves_scan_log_start,
485                                     scratch_pool);
486       notify->moves_scan_log_start_rev = start;
487       notify->moves_scan_log_end_rev = end;
488       notify->moves_scan_log_current_rev = start;
489       ctx->notify_func2(b->ctx->notify_baton2, notify, scratch_pool);
490 #endif
491     }
492 
493   SVN_ERR(svn_ra_get_log2(ra_session, NULL, start, end, 0, TRUE, FALSE,
494                           FALSE, apr_array_make(scratch_pool, 0,
495                                                 sizeof(const char *)),
496                           scan_moves_log_receiver, &lrb, scratch_pool));
497 
498   if (ctx->notify_func2)
499     {
500 #if 0
501       svn_wc_notify_t *notify;
502       notify = svn_wc_create_notify(b->anchor_abspath,
503                                     svn_wc_notify_moves_scan_log_done,
504                                     scratch_pool);
505       notify->moves_scan_log_start_rev = start;
506       notify->moves_scan_log_end_rev = end;
507       notify->moves_scan_log_current_rev = end;
508       b->ctx->notify_func2(b->ctx->notify_baton2, notify, scratch_pool);
509 #endif
510     }
511 
512   if (moves)
513     *moves = lrb.moves;
514 
515   return SVN_NO_ERROR;
516 }
517 
518