1 #[macro_use]
2 extern crate slog;
3 use anyhow::{anyhow, Result};
4 
5 mod commute;
6 mod owned;
7 mod stack;
8 
9 use std::io::Write;
10 
11 pub struct Config<'a> {
12     pub dry_run: bool,
13     pub force: bool,
14     pub base: Option<&'a str>,
15     pub and_rebase: bool,
16     pub logger: &'a slog::Logger,
17 }
18 
run(config: &Config) -> Result<()>19 pub fn run(config: &Config) -> Result<()> {
20     let repo = git2::Repository::open_from_env()?;
21     debug!(config.logger, "repository found"; "path" => repo.path().to_str());
22 
23     let stack = stack::working_stack(&repo, config.base, config.force, config.logger)?;
24     if stack.is_empty() {
25         crit!(config.logger, "No commits available to fix up, exiting");
26         return Ok(());
27     }
28 
29     let mut diff_options = Some({
30         let mut ret = git2::DiffOptions::new();
31         ret.context_lines(0)
32             .id_abbrev(40)
33             .ignore_filemode(true)
34             .ignore_submodules(true);
35         ret
36     });
37 
38     let (stack, summary_counts): (Vec<_>, _) = {
39         let mut diffs = Vec::with_capacity(stack.len());
40         for commit in &stack {
41             let diff = owned::Diff::new(
42                 &repo.diff_tree_to_tree(
43                     if commit.parents().len() == 0 {
44                         None
45                     } else {
46                         Some(commit.parent(0)?.tree()?)
47                     }
48                     .as_ref(),
49                     Some(&commit.tree()?),
50                     diff_options.as_mut(),
51                 )?,
52             )?;
53             trace!(config.logger, "parsed commit diff";
54                    "commit" => commit.id().to_string(),
55                    "diff" => format!("{:?}", diff),
56             );
57             diffs.push(diff);
58         }
59 
60         let summary_counts = stack::summary_counts(&stack);
61         (
62             stack.into_iter().zip(diffs.into_iter()).collect(),
63             summary_counts,
64         )
65     };
66 
67     let mut head_tree = repo.head()?.peel_to_tree()?;
68     let index = owned::Diff::new(&repo.diff_tree_to_index(
69         Some(&head_tree),
70         None,
71         diff_options.as_mut(),
72     )?)?;
73     trace!(config.logger, "parsed index";
74            "index" => format!("{:?}", index),
75     );
76 
77     let signature = repo
78         .signature()
79         .or_else(|_| git2::Signature::now("nobody", "nobody@example.com"))?;
80     let mut head_commit = repo.head()?.peel_to_commit()?;
81 
82     let mut patches_considered = 0usize;
83     'patch: for index_patch in index.iter() {
84         let old_path = index_patch.new_path.as_slice();
85         if index_patch.status != git2::Delta::Modified {
86             debug!(config.logger, "skipped non-modified hunk";
87                     "path" => String::from_utf8_lossy(old_path).into_owned(),
88                     "status" => format!("{:?}", index_patch.status),
89             );
90             continue 'patch;
91         }
92 
93         patches_considered += 1;
94 
95         let mut preceding_hunks_offset = 0isize;
96         let mut applied_hunks_offset = 0isize;
97         'hunk: for index_hunk in &index_patch.hunks {
98             debug!(config.logger, "next hunk";
99                    "header" => index_hunk.header(),
100                    "path" => String::from_utf8_lossy(old_path).into_owned(),
101             );
102 
103             // To properly handle files ("patches" in libgit2 lingo) with multiple hunks, we
104             // need to find the updated line coordinates (`header`) of the current hunk in
105             // two cases:
106             // 1) As if it were the only hunk in the index. This only involves shifting the
107             // "added" side *up* by the offset introduced by the preceding hunks:
108             let isolated_hunk = index_hunk
109                 .clone()
110                 .shift_added_block(-preceding_hunks_offset);
111 
112             // 2) When applied on top of the previously committed hunks. This requires shifting
113             // both the "added" and the "removed" sides of the previously isolated hunk *down*
114             // by the offset of the committed hunks:
115             let hunk_to_apply = isolated_hunk
116                 .clone()
117                 .shift_both_blocks(applied_hunks_offset);
118 
119             // The offset is the number of lines added minus the number of lines removed by a hunk:
120             let hunk_offset = index_hunk.changed_offset();
121 
122             // To aid in understanding these arithmetics, here's an illustration.
123             // There are two hunks in the original patch, each adding one line ("line2" and
124             // "line5"). Assuming the first hunk (with offset = -1) was already proceesed
125             // and applied, the table shows the three versions of the patch, with line numbers
126             // on the <A>dded and <R>emoved sides for each:
127             // |----------------|-----------|------------------|
128             // |                |           | applied on top   |
129             // | original patch | isolated  | of the preceding |
130             // |----------------|-----------|------------------|
131             // | <R> <A>        | <R> <A>   | <R> <A>          |
132             // |----------------|-----------|------------------|
133             // |  1   1  line1  |  1   1    |  1   1   line1   |
134             // |  2      line2  |  2   2    |  2   2   line3   |
135             // |  3   2  line3  |  3   3    |  3   3   line4   |
136             // |  4   3  line4  |  4   4    |  4       line5   |
137             // |  5      line5  |  5        |                  |
138             // |----------------|-----------|------------------|
139             // |       So the second hunk's `header` is:       |
140             // |   -5,1 +3,0    | -5,1 +4,0 |    -4,1 +3,0     |
141             // |----------------|-----------|------------------|
142 
143             debug!(config.logger, "";
144                 "to apply" => hunk_to_apply.header(),
145                 "to commute" => isolated_hunk.header(),
146                 "preceding hunks" => format!("{}/{}", applied_hunks_offset, preceding_hunks_offset),
147             );
148 
149             preceding_hunks_offset += hunk_offset;
150 
151             // find the newest commit that the hunk cannot commute with
152             let mut dest_commit = None;
153             let mut commuted_old_path = old_path;
154             let mut commuted_index_hunk = isolated_hunk;
155 
156             'commit: for &(ref commit, ref diff) in &stack {
157                 let c_logger = config.logger.new(o!(
158                     "commit" => commit.id().to_string(),
159                 ));
160                 let next_patch = match diff.by_new(commuted_old_path) {
161                     Some(patch) => patch,
162                     // this commit doesn't touch the hunk's file, so
163                     // they trivially commute, and the next commit
164                     // should be considered
165                     None => {
166                         debug!(c_logger, "skipped commit with no path");
167                         continue 'commit;
168                     }
169                 };
170                 if next_patch.status == git2::Delta::Added {
171                     debug!(c_logger, "found noncommutative commit by add");
172                     dest_commit = Some(commit);
173                     break 'commit;
174                 }
175                 if commuted_old_path != next_patch.old_path.as_slice() {
176                     debug!(c_logger, "changed commute path";
177                            "path" => String::from_utf8_lossy(&next_patch.old_path).into_owned(),
178                     );
179                     commuted_old_path = next_patch.old_path.as_slice();
180                 }
181                 commuted_index_hunk = match commute::commute_diff_before(
182                     &commuted_index_hunk,
183                     &next_patch.hunks,
184                 ) {
185                     Some(hunk) => {
186                         debug!(c_logger, "commuted hunk with commit";
187                                "offset" => (hunk.added.start as i64) - (commuted_index_hunk.added.start as i64),
188                         );
189                         hunk
190                     }
191                     // this commit contains a hunk that cannot
192                     // commute with the hunk being absorbed
193                     None => {
194                         debug!(c_logger, "found noncommutative commit by conflict");
195                         dest_commit = Some(commit);
196                         break 'commit;
197                     }
198                 };
199             }
200             let dest_commit = match dest_commit {
201                 Some(commit) => commit,
202                 // the hunk commutes with every commit in the stack,
203                 // so there is no commit to absorb it into
204                 None => {
205                     warn!(
206                         config.logger,
207                         "Could not find a commit to fix up, use \
208                          --base to increase the search range."
209                     );
210                     continue 'hunk;
211                 }
212             };
213 
214             // TODO: the git2 api only supports utf8 commit messages,
215             // so it's okay to use strings instead of bytes here
216             // https://docs.rs/git2/0.7.5/src/git2/repo.rs.html#998
217             // https://libgit2.org/libgit2/#HEAD/group/commit/git_commit_create
218             let dest_commit_id = dest_commit.id().to_string();
219             let dest_commit_locator = dest_commit
220                 .summary()
221                 .filter(|&msg| summary_counts[msg] == 1)
222                 .unwrap_or(&dest_commit_id);
223             if !config.dry_run {
224                 head_tree =
225                     apply_hunk_to_tree(&repo, &head_tree, &hunk_to_apply, &index_patch.old_path)?;
226                 head_commit = repo.find_commit(repo.commit(
227                     Some("HEAD"),
228                     &signature,
229                     &signature,
230                     &format!("fixup! {}\n", dest_commit_locator),
231                     &head_tree,
232                     &[&head_commit],
233                 )?)?;
234                 info!(config.logger, "committed";
235                       "commit" => head_commit.id().to_string(),
236                       "header" => hunk_to_apply.header(),
237                 );
238             } else {
239                 info!(config.logger, "would have committed";
240                       "fixup" => dest_commit_locator,
241                       "header" => hunk_to_apply.header(),
242                 );
243             }
244             applied_hunks_offset += hunk_offset;
245         }
246     }
247 
248     if patches_considered == 0 {
249         warn!(
250             config.logger,
251             "No additions staged, try adding something to the index."
252         );
253     } else if config.and_rebase {
254         use std::process::Command;
255         // unwrap() is safe here, as we exit early if the stack is empty
256         let last_commit_in_stack = &stack.last().unwrap().0;
257         // The stack isn't supposed to have any merge commits, per the check in working_stack()
258         let number_of_parents = last_commit_in_stack.parents().len();
259         assert!(number_of_parents <= 1);
260 
261         let mut command = Command::new("git");
262         command.args(&["rebase", "--interactive", "--autosquash"]);
263 
264         if number_of_parents == 0 {
265             command.arg("--root");
266         } else {
267             // Use a range that is guaranteed to include all the commits we might have
268             // committed "fixup!" commits for.
269             let base_commit_sha = last_commit_in_stack.parent(0)?.id().to_string();
270             command.arg(&base_commit_sha);
271         }
272 
273         // Don't check that we have successfully absorbed everything, nor git's
274         // exit code -- as git will print helpful messages on its own.
275         command.status().expect("could not run git rebase");
276     }
277 
278     Ok(())
279 }
280 
apply_hunk_to_tree<'repo>( repo: &'repo git2::Repository, base: &git2::Tree, hunk: &owned::Hunk, path: &[u8], ) -> Result<git2::Tree<'repo>>281 fn apply_hunk_to_tree<'repo>(
282     repo: &'repo git2::Repository,
283     base: &git2::Tree,
284     hunk: &owned::Hunk,
285     path: &[u8],
286 ) -> Result<git2::Tree<'repo>> {
287     let mut treebuilder = repo.treebuilder(Some(base))?;
288 
289     // recurse into nested tree if applicable
290     if let Some(slash) = path.iter().position(|&x| x == b'/') {
291         let (first, rest) = path.split_at(slash);
292         let rest = &rest[1..];
293 
294         let (subtree, submode) = {
295             let entry = treebuilder
296                 .get(first)?
297                 .ok_or_else(|| anyhow!("couldn't find tree entry in tree for path"))?;
298             (repo.find_tree(entry.id())?, entry.filemode())
299         };
300         // TODO: loop instead of recursing to avoid potential stack overflow
301         let result_subtree = apply_hunk_to_tree(repo, &subtree, hunk, rest)?;
302 
303         treebuilder.insert(first, result_subtree.id(), submode)?;
304         return Ok(repo.find_tree(treebuilder.write()?)?);
305     }
306 
307     let (blob, mode) = {
308         let entry = treebuilder
309             .get(path)?
310             .ok_or_else(|| anyhow!("couldn't find blob entry in tree for path"))?;
311         (repo.find_blob(entry.id())?, entry.filemode())
312     };
313 
314     // TODO: convert path to OsStr and pass it during blob_writer
315     // creation, to get gitattributes handling (note that converting
316     // &[u8] to &std::path::Path is only possible on unixy platforms)
317     let mut blobwriter = repo.blob_writer(None)?;
318     let old_content = blob.content();
319     let (old_start, _, _, _) = hunk.anchors();
320 
321     // first, write the lines from the old content that are above the
322     // hunk
323     let old_content = {
324         let (pre, post) = split_lines_after(old_content, old_start);
325         blobwriter.write_all(pre)?;
326         post
327     };
328     // next, write the added side of the hunk
329     for line in &*hunk.added.lines {
330         blobwriter.write_all(line)?;
331     }
332     // if this hunk removed lines from the old content, those must be
333     // skipped
334     let (_, old_content) = split_lines_after(old_content, hunk.removed.lines.len());
335     // finally, write the remaining lines of the old content
336     blobwriter.write_all(old_content)?;
337 
338     treebuilder.insert(path, blobwriter.commit()?, mode)?;
339     Ok(repo.find_tree(treebuilder.write()?)?)
340 }
341 
342 /// Return slices for lines [1..n] and [n+1; ...]
split_lines_after(content: &[u8], n: usize) -> (&[u8], &[u8])343 fn split_lines_after(content: &[u8], n: usize) -> (&[u8], &[u8]) {
344     let split_index = if n > 0 {
345         memchr::Memchr::new(b'\n', content)
346             .fuse() // TODO: is fuse necessary here?
347             .nth(n - 1) // the position of '\n' ending the `n`-th line
348             .map(|x| x + 1)
349             .unwrap_or_else(|| content.len())
350     } else {
351         0
352     };
353     content.split_at(split_index)
354 }
355