1 /*
2  * Copyright (C) the libgit2 contributors. All rights reserved.
3  *
4  * This file is part of libgit2, distributed under the GNU GPL v2 with
5  * a Linking Exception. For full terms see the included COPYING file.
6  */
7 #ifndef INCLUDE_git_diff_h__
8 #define INCLUDE_git_diff_h__
9 
10 #include "common.h"
11 #include "types.h"
12 #include "oid.h"
13 #include "tree.h"
14 #include "refs.h"
15 
16 /**
17  * @file git2/diff.h
18  * @brief Git tree and file differencing routines.
19  *
20  * Overview
21  * --------
22  *
23  * Calculating diffs is generally done in two phases: building a list of
24  * diffs then traversing it.  This makes is easier to share logic across
25  * the various types of diffs (tree vs tree, workdir vs index, etc.), and
26  * also allows you to insert optional diff post-processing phases,
27  * such as rename detection, in between the steps.  When you are done with
28  * a diff object, it must be freed.
29  *
30  * Terminology
31  * -----------
32  *
33  * To understand the diff APIs, you should know the following terms:
34  *
35  * - A `diff` represents the cumulative list of differences between two
36  *   snapshots of a repository (possibly filtered by a set of file name
37  *   patterns).  This is the `git_diff` object.
38  *
39  * - A `delta` is a file pair with an old and new revision.  The old version
40  *   may be absent if the file was just created and the new version may be
41  *   absent if the file was deleted.  A diff is mostly just a list of deltas.
42  *
43  * - A `binary` file / delta is a file (or pair) for which no text diffs
44  *   should be generated.  A diff can contain delta entries that are
45  *   binary, but no diff content will be output for those files.  There is
46  *   a base heuristic for binary detection and you can further tune the
47  *   behavior with git attributes or diff flags and option settings.
48  *
49  * - A `hunk` is a span of modified lines in a delta along with some stable
50  *   surrounding context.  You can configure the amount of context and other
51  *   properties of how hunks are generated.  Each hunk also comes with a
52  *   header that described where it starts and ends in both the old and new
53  *   versions in the delta.
54  *
55  * - A `line` is a range of characters inside a hunk.  It could be a context
56  *   line (i.e. in both old and new versions), an added line (i.e. only in
57  *   the new version), or a removed line (i.e. only in the old version).
58  *   Unfortunately, we don't know anything about the encoding of data in the
59  *   file being diffed, so we cannot tell you much about the line content.
60  *   Line data will not be NUL-byte terminated, however, because it will be
61  *   just a span of bytes inside the larger file.
62  *
63  * @ingroup Git
64  * @{
65  */
66 GIT_BEGIN_DECL
67 
68 /**
69  * Flags for diff options.  A combination of these flags can be passed
70  * in via the `flags` value in the `git_diff_options`.
71  */
72 typedef enum {
73 	/** Normal diff, the default */
74 	GIT_DIFF_NORMAL = 0,
75 
76 	/*
77 	 * Options controlling which files will be in the diff
78 	 */
79 
80 	/** Reverse the sides of the diff */
81 	GIT_DIFF_REVERSE = (1u << 0),
82 
83 	/** Include ignored files in the diff */
84 	GIT_DIFF_INCLUDE_IGNORED = (1u << 1),
85 
86 	/** Even with GIT_DIFF_INCLUDE_IGNORED, an entire ignored directory
87 	 *  will be marked with only a single entry in the diff; this flag
88 	 *  adds all files under the directory as IGNORED entries, too.
89 	 */
90 	GIT_DIFF_RECURSE_IGNORED_DIRS = (1u << 2),
91 
92 	/** Include untracked files in the diff */
93 	GIT_DIFF_INCLUDE_UNTRACKED = (1u << 3),
94 
95 	/** Even with GIT_DIFF_INCLUDE_UNTRACKED, an entire untracked
96 	 *  directory will be marked with only a single entry in the diff
97 	 *  (a la what core Git does in `git status`); this flag adds *all*
98 	 *  files under untracked directories as UNTRACKED entries, too.
99 	 */
100 	GIT_DIFF_RECURSE_UNTRACKED_DIRS = (1u << 4),
101 
102 	/** Include unmodified files in the diff */
103 	GIT_DIFF_INCLUDE_UNMODIFIED = (1u << 5),
104 
105 	/** Normally, a type change between files will be converted into a
106 	 *  DELETED record for the old and an ADDED record for the new; this
107 	 *  options enabled the generation of TYPECHANGE delta records.
108 	 */
109 	GIT_DIFF_INCLUDE_TYPECHANGE = (1u << 6),
110 
111 	/** Even with GIT_DIFF_INCLUDE_TYPECHANGE, blob->tree changes still
112 	 *  generally show as a DELETED blob.  This flag tries to correctly
113 	 *  label blob->tree transitions as TYPECHANGE records with new_file's
114 	 *  mode set to tree.  Note: the tree SHA will not be available.
115 	 */
116 	GIT_DIFF_INCLUDE_TYPECHANGE_TREES = (1u << 7),
117 
118 	/** Ignore file mode changes */
119 	GIT_DIFF_IGNORE_FILEMODE = (1u << 8),
120 
121 	/** Treat all submodules as unmodified */
122 	GIT_DIFF_IGNORE_SUBMODULES = (1u << 9),
123 
124 	/** Use case insensitive filename comparisons */
125 	GIT_DIFF_IGNORE_CASE = (1u << 10),
126 
127 	/** May be combined with `GIT_DIFF_IGNORE_CASE` to specify that a file
128 	 *  that has changed case will be returned as an add/delete pair.
129 	 */
130 	GIT_DIFF_INCLUDE_CASECHANGE = (1u << 11),
131 
132 	/** If the pathspec is set in the diff options, this flags indicates
133 	 *  that the paths will be treated as literal paths instead of
134 	 *  fnmatch patterns.  Each path in the list must either be a full
135 	 *  path to a file or a directory.  (A trailing slash indicates that
136 	 *  the path will _only_ match a directory).  If a directory is
137 	 *  specified, all children will be included.
138 	 */
139 	GIT_DIFF_DISABLE_PATHSPEC_MATCH = (1u << 12),
140 
141 	/** Disable updating of the `binary` flag in delta records.  This is
142 	 *  useful when iterating over a diff if you don't need hunk and data
143 	 *  callbacks and want to avoid having to load file completely.
144 	 */
145 	GIT_DIFF_SKIP_BINARY_CHECK = (1u << 13),
146 
147 	/** When diff finds an untracked directory, to match the behavior of
148 	 *  core Git, it scans the contents for IGNORED and UNTRACKED files.
149 	 *  If *all* contents are IGNORED, then the directory is IGNORED; if
150 	 *  any contents are not IGNORED, then the directory is UNTRACKED.
151 	 *  This is extra work that may not matter in many cases.  This flag
152 	 *  turns off that scan and immediately labels an untracked directory
153 	 *  as UNTRACKED (changing the behavior to not match core Git).
154 	 */
155 	GIT_DIFF_ENABLE_FAST_UNTRACKED_DIRS = (1u << 14),
156 
157 	/** When diff finds a file in the working directory with stat
158 	 * information different from the index, but the OID ends up being the
159 	 * same, write the correct stat information into the index.  Note:
160 	 * without this flag, diff will always leave the index untouched.
161 	 */
162 	GIT_DIFF_UPDATE_INDEX = (1u << 15),
163 
164 	/** Include unreadable files in the diff */
165 	GIT_DIFF_INCLUDE_UNREADABLE = (1u << 16),
166 
167 	/** Include unreadable files in the diff */
168 	GIT_DIFF_INCLUDE_UNREADABLE_AS_UNTRACKED = (1u << 17),
169 
170 	/*
171 	 * Options controlling how output will be generated
172 	 */
173 
174 	/** Treat all files as text, disabling binary attributes & detection */
175 	GIT_DIFF_FORCE_TEXT = (1u << 20),
176 	/** Treat all files as binary, disabling text diffs */
177 	GIT_DIFF_FORCE_BINARY = (1u << 21),
178 
179 	/** Ignore all whitespace */
180 	GIT_DIFF_IGNORE_WHITESPACE = (1u << 22),
181 	/** Ignore changes in amount of whitespace */
182 	GIT_DIFF_IGNORE_WHITESPACE_CHANGE = (1u << 23),
183 	/** Ignore whitespace at end of line */
184 	GIT_DIFF_IGNORE_WHITESPACE_EOL = (1u << 24),
185 
186 	/** When generating patch text, include the content of untracked
187 	 *  files.  This automatically turns on GIT_DIFF_INCLUDE_UNTRACKED but
188 	 *  it does not turn on GIT_DIFF_RECURSE_UNTRACKED_DIRS.  Add that
189 	 *  flag if you want the content of every single UNTRACKED file.
190 	 */
191 	GIT_DIFF_SHOW_UNTRACKED_CONTENT = (1u << 25),
192 
193 	/** When generating output, include the names of unmodified files if
194 	 *  they are included in the git_diff.  Normally these are skipped in
195 	 *  the formats that list files (e.g. name-only, name-status, raw).
196 	 *  Even with this, these will not be included in patch format.
197 	 */
198 	GIT_DIFF_SHOW_UNMODIFIED = (1u << 26),
199 
200 	/** Use the "patience diff" algorithm */
201 	GIT_DIFF_PATIENCE = (1u << 28),
202 	/** Take extra time to find minimal diff */
203 	GIT_DIFF_MINIMAL = (1u << 29),
204 
205 	/** Include the necessary deflate / delta information so that `git-apply`
206 	 *  can apply given diff information to binary files.
207 	 */
208 	GIT_DIFF_SHOW_BINARY = (1u << 30),
209 
210 	/** Use a heuristic that takes indentation and whitespace into account
211 	 * which generally can produce better diffs when dealing with ambiguous
212 	 * diff hunks.
213 	 */
214 	GIT_DIFF_INDENT_HEURISTIC = (1u << 31),
215 } git_diff_option_t;
216 
217 /**
218  * The diff object that contains all individual file deltas.
219  *
220  * This is an opaque structure which will be allocated by one of the diff
221  * generator functions below (such as `git_diff_tree_to_tree`).  You are
222  * responsible for releasing the object memory when done, using the
223  * `git_diff_free()` function.
224  */
225 typedef struct git_diff git_diff;
226 
227 /**
228  * Flags for the delta object and the file objects on each side.
229  *
230  * These flags are used for both the `flags` value of the `git_diff_delta`
231  * and the flags for the `git_diff_file` objects representing the old and
232  * new sides of the delta.  Values outside of this public range should be
233  * considered reserved for internal or future use.
234  */
235 typedef enum {
236 	GIT_DIFF_FLAG_BINARY     = (1u << 0), /**< file(s) treated as binary data */
237 	GIT_DIFF_FLAG_NOT_BINARY = (1u << 1), /**< file(s) treated as text data */
238 	GIT_DIFF_FLAG_VALID_ID   = (1u << 2), /**< `id` value is known correct */
239 	GIT_DIFF_FLAG_EXISTS     = (1u << 3), /**< file exists at this side of the delta */
240 } git_diff_flag_t;
241 
242 /**
243  * What type of change is described by a git_diff_delta?
244  *
245  * `GIT_DELTA_RENAMED` and `GIT_DELTA_COPIED` will only show up if you run
246  * `git_diff_find_similar()` on the diff object.
247  *
248  * `GIT_DELTA_TYPECHANGE` only shows up given `GIT_DIFF_INCLUDE_TYPECHANGE`
249  * in the option flags (otherwise type changes will be split into ADDED /
250  * DELETED pairs).
251  */
252 typedef enum {
253 	GIT_DELTA_UNMODIFIED = 0,  /**< no changes */
254 	GIT_DELTA_ADDED = 1,	   /**< entry does not exist in old version */
255 	GIT_DELTA_DELETED = 2,	   /**< entry does not exist in new version */
256 	GIT_DELTA_MODIFIED = 3,    /**< entry content changed between old and new */
257 	GIT_DELTA_RENAMED = 4,     /**< entry was renamed between old and new */
258 	GIT_DELTA_COPIED = 5,      /**< entry was copied from another old entry */
259 	GIT_DELTA_IGNORED = 6,     /**< entry is ignored item in workdir */
260 	GIT_DELTA_UNTRACKED = 7,   /**< entry is untracked item in workdir */
261 	GIT_DELTA_TYPECHANGE = 8,  /**< type of entry changed between old and new */
262 	GIT_DELTA_UNREADABLE = 9,  /**< entry is unreadable */
263 	GIT_DELTA_CONFLICTED = 10, /**< entry in the index is conflicted */
264 } git_delta_t;
265 
266 /**
267  * Description of one side of a delta.
268  *
269  * Although this is called a "file", it could represent a file, a symbolic
270  * link, a submodule commit id, or even a tree (although that only if you
271  * are tracking type changes or ignored/untracked directories).
272  *
273  * The `id` is the `git_oid` of the item.  If the entry represents an
274  * absent side of a diff (e.g. the `old_file` of a `GIT_DELTA_ADDED` delta),
275  * then the oid will be zeroes.
276  *
277  * `path` is the NUL-terminated path to the entry relative to the working
278  * directory of the repository.
279  *
280  * `size` is the size of the entry in bytes.
281  *
282  * `flags` is a combination of the `git_diff_flag_t` types
283  *
284  * `mode` is, roughly, the stat() `st_mode` value for the item.  This will
285  * be restricted to one of the `git_filemode_t` values.
286  *
287  * The `id_abbrev` represents the known length of the `id` field, when
288  * converted to a hex string.  It is generally `GIT_OID_HEXSZ`, unless this
289  * delta was created from reading a patch file, in which case it may be
290  * abbreviated to something reasonable, like 7 characters.
291  */
292 typedef struct {
293 	git_oid     id;
294 	const char *path;
295 	git_off_t   size;
296 	uint32_t    flags;
297 	uint16_t    mode;
298 	uint16_t    id_abbrev;
299 } git_diff_file;
300 
301 /**
302  * Description of changes to one entry.
303  *
304  * When iterating over a diff, this will be passed to most callbacks and
305  * you can use the contents to understand exactly what has changed.
306  *
307  * The `old_file` represents the "from" side of the diff and the `new_file`
308  * represents to "to" side of the diff.  What those means depend on the
309  * function that was used to generate the diff and will be documented below.
310  * You can also use the `GIT_DIFF_REVERSE` flag to flip it around.
311  *
312  * Although the two sides of the delta are named "old_file" and "new_file",
313  * they actually may correspond to entries that represent a file, a symbolic
314  * link, a submodule commit id, or even a tree (if you are tracking type
315  * changes or ignored/untracked directories).
316  *
317  * Under some circumstances, in the name of efficiency, not all fields will
318  * be filled in, but we generally try to fill in as much as possible.  One
319  * example is that the "flags" field may not have either the `BINARY` or the
320  * `NOT_BINARY` flag set to avoid examining file contents if you do not pass
321  * in hunk and/or line callbacks to the diff foreach iteration function.  It
322  * will just use the git attributes for those files.
323  *
324  * The similarity score is zero unless you call `git_diff_find_similar()`
325  * which does a similarity analysis of files in the diff.  Use that
326  * function to do rename and copy detection, and to split heavily modified
327  * files in add/delete pairs.  After that call, deltas with a status of
328  * GIT_DELTA_RENAMED or GIT_DELTA_COPIED will have a similarity score
329  * between 0 and 100 indicating how similar the old and new sides are.
330  *
331  * If you ask `git_diff_find_similar` to find heavily modified files to
332  * break, but to not *actually* break the records, then GIT_DELTA_MODIFIED
333  * records may have a non-zero similarity score if the self-similarity is
334  * below the split threshold.  To display this value like core Git, invert
335  * the score (a la `printf("M%03d", 100 - delta->similarity)`).
336  */
337 typedef struct {
338 	git_delta_t   status;
339 	uint32_t      flags;	   /**< git_diff_flag_t values */
340 	uint16_t      similarity;  /**< for RENAMED and COPIED, value 0-100 */
341 	uint16_t      nfiles;	   /**< number of files in this delta */
342 	git_diff_file old_file;
343 	git_diff_file new_file;
344 } git_diff_delta;
345 
346 /**
347  * Diff notification callback function.
348  *
349  * The callback will be called for each file, just before the `git_delta_t`
350  * gets inserted into the diff.
351  *
352  * When the callback:
353  * - returns < 0, the diff process will be aborted.
354  * - returns > 0, the delta will not be inserted into the diff, but the
355  *		diff process continues.
356  * - returns 0, the delta is inserted into the diff, and the diff process
357  *		continues.
358  */
359 typedef int (*git_diff_notify_cb)(
360 	const git_diff *diff_so_far,
361 	const git_diff_delta *delta_to_add,
362 	const char *matched_pathspec,
363 	void *payload);
364 
365 /**
366  * Diff progress callback.
367  *
368  * Called before each file comparison.
369  *
370  * @param diff_so_far The diff being generated.
371  * @param old_path The path to the old file or NULL.
372  * @param new_path The path to the new file or NULL.
373  * @return Non-zero to abort the diff.
374  */
375 typedef int (*git_diff_progress_cb)(
376 	const git_diff *diff_so_far,
377 	const char *old_path,
378 	const char *new_path,
379 	void *payload);
380 
381 /**
382  * Structure describing options about how the diff should be executed.
383  *
384  * Setting all values of the structure to zero will yield the default
385  * values.  Similarly, passing NULL for the options structure will
386  * give the defaults.  The default values are marked below.
387  *
388  * - `flags` is a combination of the `git_diff_option_t` values above
389  * - `context_lines` is the number of unchanged lines that define the
390  *    boundary of a hunk (and to display before and after)
391  * - `interhunk_lines` is the maximum number of unchanged lines between
392  *    hunk boundaries before the hunks will be merged into a one.
393  * - `old_prefix` is the virtual "directory" to prefix to old file names
394  *   in hunk headers (default "a")
395  * - `new_prefix` is the virtual "directory" to prefix to new file names
396  *   in hunk headers (default "b")
397  * - `pathspec` is an array of paths / fnmatch patterns to constrain diff
398  * - `max_size` is a file size (in bytes) above which a blob will be marked
399  *   as binary automatically; pass a negative value to disable.
400  * - `notify_cb` is an optional callback function, notifying the consumer of
401  *   changes to the diff as new deltas are added.
402  * - `progress_cb` is an optional callback function, notifying the consumer of
403  *   which files are being examined as the diff is generated.
404  * - `payload` is the payload to pass to the callback functions.
405  * - `ignore_submodules` overrides the submodule ignore setting for all
406  *   submodules in the diff.
407  */
408 typedef struct {
409 	unsigned int version;      /**< version for the struct */
410 	uint32_t flags;            /**< defaults to GIT_DIFF_NORMAL */
411 
412 	/* options controlling which files are in the diff */
413 
414 	git_submodule_ignore_t ignore_submodules; /**< submodule ignore rule */
415 	git_strarray       pathspec;     /**< defaults to include all paths */
416 	git_diff_notify_cb   notify_cb;
417 	git_diff_progress_cb progress_cb;
418 	void                *payload;
419 
420 	/* options controlling how to diff text is generated */
421 
422 	uint32_t    context_lines;    /**< defaults to 3 */
423 	uint32_t    interhunk_lines;  /**< defaults to 0 */
424 	uint16_t    id_abbrev;       /**< default 'core.abbrev' or 7 if unset */
425 	git_off_t   max_size;         /**< defaults to 512MB */
426 	const char *old_prefix;       /**< defaults to "a" */
427 	const char *new_prefix;       /**< defaults to "b" */
428 } git_diff_options;
429 
430 /* The current version of the diff options structure */
431 #define GIT_DIFF_OPTIONS_VERSION 1
432 
433 /* Stack initializer for diff options.  Alternatively use
434  * `git_diff_options_init` programmatic initialization.
435  */
436 #define GIT_DIFF_OPTIONS_INIT \
437 	{GIT_DIFF_OPTIONS_VERSION, 0, GIT_SUBMODULE_IGNORE_UNSPECIFIED, {NULL,0}, NULL, NULL, NULL, 3}
438 
439 /**
440  * Initializes a `git_diff_options` with default values. Equivalent to
441  * creating an instance with GIT_DIFF_OPTIONS_INIT.
442  *
443  * @param opts The `git_diff_options` struct to initialize
444  * @param version Version of struct; pass `GIT_DIFF_OPTIONS_VERSION`
445  * @return Zero on success; -1 on failure.
446  */
447 GIT_EXTERN(int) git_diff_init_options(
448 	git_diff_options *opts,
449 	unsigned int version);
450 
451 /**
452  * When iterating over a diff, callback that will be made per file.
453  *
454  * @param delta A pointer to the delta data for the file
455  * @param progress Goes from 0 to 1 over the diff
456  * @param payload User-specified pointer from foreach function
457  */
458 typedef int (*git_diff_file_cb)(
459 	const git_diff_delta *delta,
460 	float progress,
461 	void *payload);
462 
463 #define GIT_DIFF_HUNK_HEADER_SIZE	128
464 
465 /**
466  * When producing a binary diff, the binary data returned will be
467  * either the deflated full ("literal") contents of the file, or
468  * the deflated binary delta between the two sides (whichever is
469  * smaller).
470  */
471 typedef enum {
472 	/** There is no binary delta. */
473 	GIT_DIFF_BINARY_NONE,
474 
475 	/** The binary data is the literal contents of the file. */
476 	GIT_DIFF_BINARY_LITERAL,
477 
478 	/** The binary data is the delta from one side to the other. */
479 	GIT_DIFF_BINARY_DELTA,
480 } git_diff_binary_t;
481 
482 /** The contents of one of the files in a binary diff. */
483 typedef struct {
484 	/** The type of binary data for this file. */
485 	git_diff_binary_t type;
486 
487 	/** The binary data, deflated. */
488 	const char *data;
489 
490 	/** The length of the binary data. */
491 	size_t datalen;
492 
493 	/** The length of the binary data after inflation. */
494 	size_t inflatedlen;
495 } git_diff_binary_file;
496 
497 /** Structure describing the binary contents of a diff. */
498 typedef struct {
499 	/**
500 	 * Whether there is data in this binary structure or not.  If this
501 	 * is `1`, then this was produced and included binary content.  If
502 	 * this is `0` then this was generated knowing only that a binary
503 	 * file changed but without providing the data, probably from a patch
504 	 * that said `Binary files a/file.txt and b/file.txt differ`.
505 	 */
506 	unsigned int contains_data;
507 	git_diff_binary_file old_file; /**< The contents of the old file. */
508 	git_diff_binary_file new_file; /**< The contents of the new file. */
509 } git_diff_binary;
510 
511 /**
512 * When iterating over a diff, callback that will be made for
513 * binary content within the diff.
514 */
515 typedef int(*git_diff_binary_cb)(
516 	const git_diff_delta *delta,
517 	const git_diff_binary *binary,
518 	void *payload);
519 
520 /**
521  * Structure describing a hunk of a diff.
522  */
523 typedef struct {
524 	int    old_start;     /**< Starting line number in old_file */
525 	int    old_lines;     /**< Number of lines in old_file */
526 	int    new_start;     /**< Starting line number in new_file */
527 	int    new_lines;     /**< Number of lines in new_file */
528 	size_t header_len;    /**< Number of bytes in header text */
529 	char   header[GIT_DIFF_HUNK_HEADER_SIZE];   /**< Header text, NUL-byte terminated */
530 } git_diff_hunk;
531 
532 /**
533  * When iterating over a diff, callback that will be made per hunk.
534  */
535 typedef int (*git_diff_hunk_cb)(
536 	const git_diff_delta *delta,
537 	const git_diff_hunk *hunk,
538 	void *payload);
539 
540 /**
541  * Line origin constants.
542  *
543  * These values describe where a line came from and will be passed to
544  * the git_diff_line_cb when iterating over a diff.  There are some
545  * special origin constants at the end that are used for the text
546  * output callbacks to demarcate lines that are actually part of
547  * the file or hunk headers.
548  */
549 typedef enum {
550 	/* These values will be sent to `git_diff_line_cb` along with the line */
551 	GIT_DIFF_LINE_CONTEXT   = ' ',
552 	GIT_DIFF_LINE_ADDITION  = '+',
553 	GIT_DIFF_LINE_DELETION  = '-',
554 
555 	GIT_DIFF_LINE_CONTEXT_EOFNL = '=', /**< Both files have no LF at end */
556 	GIT_DIFF_LINE_ADD_EOFNL = '>',     /**< Old has no LF at end, new does */
557 	GIT_DIFF_LINE_DEL_EOFNL = '<',     /**< Old has LF at end, new does not */
558 
559 	/* The following values will only be sent to a `git_diff_line_cb` when
560 	 * the content of a diff is being formatted through `git_diff_print`.
561 	 */
562 	GIT_DIFF_LINE_FILE_HDR  = 'F',
563 	GIT_DIFF_LINE_HUNK_HDR  = 'H',
564 	GIT_DIFF_LINE_BINARY    = 'B' /**< For "Binary files x and y differ" */
565 } git_diff_line_t;
566 
567 /**
568  * Structure describing a line (or data span) of a diff.
569  */
570 typedef struct {
571 	char   origin;       /**< A git_diff_line_t value */
572 	int    old_lineno;   /**< Line number in old file or -1 for added line */
573 	int    new_lineno;   /**< Line number in new file or -1 for deleted line */
574 	int    num_lines;    /**< Number of newline characters in content */
575 	size_t content_len;  /**< Number of bytes of data */
576 	git_off_t content_offset; /**< Offset in the original file to the content */
577 	const char *content; /**< Pointer to diff text, not NUL-byte terminated */
578 } git_diff_line;
579 
580 /**
581  * When iterating over a diff, callback that will be made per text diff
582  * line. In this context, the provided range will be NULL.
583  *
584  * When printing a diff, callback that will be made to output each line
585  * of text.  This uses some extra GIT_DIFF_LINE_... constants for output
586  * of lines of file and hunk headers.
587  */
588 typedef int (*git_diff_line_cb)(
589 	const git_diff_delta *delta, /**< delta that contains this data */
590 	const git_diff_hunk *hunk,   /**< hunk containing this data */
591 	const git_diff_line *line,   /**< line data */
592 	void *payload);              /**< user reference data */
593 
594 /**
595  * Flags to control the behavior of diff rename/copy detection.
596  */
597 typedef enum {
598 	/** Obey `diff.renames`. Overridden by any other GIT_DIFF_FIND_... flag. */
599 	GIT_DIFF_FIND_BY_CONFIG = 0,
600 
601 	/** Look for renames? (`--find-renames`) */
602 	GIT_DIFF_FIND_RENAMES = (1u << 0),
603 
604 	/** Consider old side of MODIFIED for renames? (`--break-rewrites=N`) */
605 	GIT_DIFF_FIND_RENAMES_FROM_REWRITES = (1u << 1),
606 
607 	/** Look for copies? (a la `--find-copies`). */
608 	GIT_DIFF_FIND_COPIES = (1u << 2),
609 
610 	/** Consider UNMODIFIED as copy sources? (`--find-copies-harder`).
611 	 *
612 	 * For this to work correctly, use GIT_DIFF_INCLUDE_UNMODIFIED when
613 	 * the initial `git_diff` is being generated.
614 	 */
615 	GIT_DIFF_FIND_COPIES_FROM_UNMODIFIED = (1u << 3),
616 
617 	/** Mark significant rewrites for split (`--break-rewrites=/M`) */
618 	GIT_DIFF_FIND_REWRITES = (1u << 4),
619 	/** Actually split large rewrites into delete/add pairs */
620 	GIT_DIFF_BREAK_REWRITES = (1u << 5),
621 	/** Mark rewrites for split and break into delete/add pairs */
622 	GIT_DIFF_FIND_AND_BREAK_REWRITES =
623 		(GIT_DIFF_FIND_REWRITES | GIT_DIFF_BREAK_REWRITES),
624 
625 	/** Find renames/copies for UNTRACKED items in working directory.
626 	 *
627 	 * For this to work correctly, use GIT_DIFF_INCLUDE_UNTRACKED when the
628 	 * initial `git_diff` is being generated (and obviously the diff must
629 	 * be against the working directory for this to make sense).
630 	 */
631 	GIT_DIFF_FIND_FOR_UNTRACKED = (1u << 6),
632 
633 	/** Turn on all finding features. */
634 	GIT_DIFF_FIND_ALL = (0x0ff),
635 
636 	/** Measure similarity ignoring leading whitespace (default) */
637 	GIT_DIFF_FIND_IGNORE_LEADING_WHITESPACE = 0,
638 	/** Measure similarity ignoring all whitespace */
639 	GIT_DIFF_FIND_IGNORE_WHITESPACE = (1u << 12),
640 	/** Measure similarity including all data */
641 	GIT_DIFF_FIND_DONT_IGNORE_WHITESPACE = (1u << 13),
642 	/** Measure similarity only by comparing SHAs (fast and cheap) */
643 	GIT_DIFF_FIND_EXACT_MATCH_ONLY = (1u << 14),
644 
645 	/** Do not break rewrites unless they contribute to a rename.
646 	 *
647 	 * Normally, GIT_DIFF_FIND_AND_BREAK_REWRITES will measure the self-
648 	 * similarity of modified files and split the ones that have changed a
649 	 * lot into a DELETE / ADD pair.  Then the sides of that pair will be
650 	 * considered candidates for rename and copy detection.
651 	 *
652 	 * If you add this flag in and the split pair is *not* used for an
653 	 * actual rename or copy, then the modified record will be restored to
654 	 * a regular MODIFIED record instead of being split.
655 	 */
656 	GIT_DIFF_BREAK_REWRITES_FOR_RENAMES_ONLY  = (1u << 15),
657 
658 	/** Remove any UNMODIFIED deltas after find_similar is done.
659 	 *
660 	 * Using GIT_DIFF_FIND_COPIES_FROM_UNMODIFIED to emulate the
661 	 * --find-copies-harder behavior requires building a diff with the
662 	 * GIT_DIFF_INCLUDE_UNMODIFIED flag.  If you do not want UNMODIFIED
663 	 * records in the final result, pass this flag to have them removed.
664 	 */
665 	GIT_DIFF_FIND_REMOVE_UNMODIFIED = (1u << 16),
666 } git_diff_find_t;
667 
668 /**
669  * Pluggable similarity metric
670  */
671 typedef struct {
672 	int (*file_signature)(
673 		void **out, const git_diff_file *file,
674 		const char *fullpath, void *payload);
675 	int (*buffer_signature)(
676 		void **out, const git_diff_file *file,
677 		const char *buf, size_t buflen, void *payload);
678 	void (*free_signature)(void *sig, void *payload);
679 	int (*similarity)(int *score, void *siga, void *sigb, void *payload);
680 	void *payload;
681 } git_diff_similarity_metric;
682 
683 /**
684  * Control behavior of rename and copy detection
685  *
686  * These options mostly mimic parameters that can be passed to git-diff.
687  *
688  * - `rename_threshold` is the same as the -M option with a value
689  * - `copy_threshold` is the same as the -C option with a value
690  * - `rename_from_rewrite_threshold` matches the top of the -B option
691  * - `break_rewrite_threshold` matches the bottom of the -B option
692  * - `rename_limit` is the maximum number of matches to consider for
693  *   a particular file.  This is a little different from the `-l` option
694  *   to regular Git because we will still process up to this many matches
695  *   before abandoning the search.
696  *
697  * The `metric` option allows you to plug in a custom similarity metric.
698  * Set it to NULL for the default internal metric which is based on sampling
699  * hashes of ranges of data in the file.  The default metric is a pretty
700  * good similarity approximation that should work fairly well for both text
701  * and binary data, and is pretty fast with fixed memory overhead.
702  */
703 typedef struct {
704 	unsigned int version;
705 
706 	/**
707 	 * Combination of git_diff_find_t values (default GIT_DIFF_FIND_BY_CONFIG).
708 	 * NOTE: if you don't explicitly set this, `diff.renames` could be set
709 	 * to false, resulting in `git_diff_find_similar` doing nothing.
710 	 */
711 	uint32_t flags;
712 
713 	/** Similarity to consider a file renamed (default 50) */
714 	uint16_t rename_threshold;
715 	/** Similarity of modified to be eligible rename source (default 50) */
716 	uint16_t rename_from_rewrite_threshold;
717 	/** Similarity to consider a file a copy (default 50) */
718 	uint16_t copy_threshold;
719 	/** Similarity to split modify into delete/add pair (default 60) */
720 	uint16_t break_rewrite_threshold;
721 
722 	/** Maximum similarity sources to examine for a file (somewhat like
723 	 *  git-diff's `-l` option or `diff.renameLimit` config) (default 200)
724 	 */
725 	size_t rename_limit;
726 
727 	/** Pluggable similarity metric; pass NULL to use internal metric */
728 	git_diff_similarity_metric *metric;
729 } git_diff_find_options;
730 
731 #define GIT_DIFF_FIND_OPTIONS_VERSION 1
732 #define GIT_DIFF_FIND_OPTIONS_INIT {GIT_DIFF_FIND_OPTIONS_VERSION}
733 
734 /**
735  * Initializes a `git_diff_find_options` with default values. Equivalent to
736  * creating an instance with GIT_DIFF_FIND_OPTIONS_INIT.
737  *
738  * @param opts The `git_diff_find_options` struct to initialize
739  * @param version Version of struct; pass `GIT_DIFF_FIND_OPTIONS_VERSION`
740  * @return Zero on success; -1 on failure.
741  */
742 GIT_EXTERN(int) git_diff_find_init_options(
743 	git_diff_find_options *opts,
744 	unsigned int version);
745 
746 /** @name Diff Generator Functions
747  *
748  * These are the functions you would use to create (or destroy) a
749  * git_diff from various objects in a repository.
750  */
751 /**@{*/
752 
753 /**
754  * Deallocate a diff.
755  *
756  * @param diff The previously created diff; cannot be used after free.
757  */
758 GIT_EXTERN(void) git_diff_free(git_diff *diff);
759 
760 /**
761  * Create a diff with the difference between two tree objects.
762  *
763  * This is equivalent to `git diff <old-tree> <new-tree>`
764  *
765  * The first tree will be used for the "old_file" side of the delta and the
766  * second tree will be used for the "new_file" side of the delta.  You can
767  * pass NULL to indicate an empty tree, although it is an error to pass
768  * NULL for both the `old_tree` and `new_tree`.
769  *
770  * @param diff Output pointer to a git_diff pointer to be allocated.
771  * @param repo The repository containing the trees.
772  * @param old_tree A git_tree object to diff from, or NULL for empty tree.
773  * @param new_tree A git_tree object to diff to, or NULL for empty tree.
774  * @param opts Structure with options to influence diff or NULL for defaults.
775  */
776 GIT_EXTERN(int) git_diff_tree_to_tree(
777 	git_diff **diff,
778 	git_repository *repo,
779 	git_tree *old_tree,
780 	git_tree *new_tree,
781 	const git_diff_options *opts); /**< can be NULL for defaults */
782 
783 /**
784  * Create a diff between a tree and repository index.
785  *
786  * This is equivalent to `git diff --cached <treeish>` or if you pass
787  * the HEAD tree, then like `git diff --cached`.
788  *
789  * The tree you pass will be used for the "old_file" side of the delta, and
790  * the index will be used for the "new_file" side of the delta.
791  *
792  * If you pass NULL for the index, then the existing index of the `repo`
793  * will be used.  In this case, the index will be refreshed from disk
794  * (if it has changed) before the diff is generated.
795  *
796  * @param diff Output pointer to a git_diff pointer to be allocated.
797  * @param repo The repository containing the tree and index.
798  * @param old_tree A git_tree object to diff from, or NULL for empty tree.
799  * @param index The index to diff with; repo index used if NULL.
800  * @param opts Structure with options to influence diff or NULL for defaults.
801  */
802 GIT_EXTERN(int) git_diff_tree_to_index(
803 	git_diff **diff,
804 	git_repository *repo,
805 	git_tree *old_tree,
806 	git_index *index,
807 	const git_diff_options *opts); /**< can be NULL for defaults */
808 
809 /**
810  * Create a diff between the repository index and the workdir directory.
811  *
812  * This matches the `git diff` command.  See the note below on
813  * `git_diff_tree_to_workdir` for a discussion of the difference between
814  * `git diff` and `git diff HEAD` and how to emulate a `git diff <treeish>`
815  * using libgit2.
816  *
817  * The index will be used for the "old_file" side of the delta, and the
818  * working directory will be used for the "new_file" side of the delta.
819  *
820  * If you pass NULL for the index, then the existing index of the `repo`
821  * will be used.  In this case, the index will be refreshed from disk
822  * (if it has changed) before the diff is generated.
823  *
824  * @param diff Output pointer to a git_diff pointer to be allocated.
825  * @param repo The repository.
826  * @param index The index to diff from; repo index used if NULL.
827  * @param opts Structure with options to influence diff or NULL for defaults.
828  */
829 GIT_EXTERN(int) git_diff_index_to_workdir(
830 	git_diff **diff,
831 	git_repository *repo,
832 	git_index *index,
833 	const git_diff_options *opts); /**< can be NULL for defaults */
834 
835 /**
836  * Create a diff between a tree and the working directory.
837  *
838  * The tree you provide will be used for the "old_file" side of the delta,
839  * and the working directory will be used for the "new_file" side.
840  *
841  * This is not the same as `git diff <treeish>` or `git diff-index
842  * <treeish>`.  Those commands use information from the index, whereas this
843  * function strictly returns the differences between the tree and the files
844  * in the working directory, regardless of the state of the index.  Use
845  * `git_diff_tree_to_workdir_with_index` to emulate those commands.
846  *
847  * To see difference between this and `git_diff_tree_to_workdir_with_index`,
848  * consider the example of a staged file deletion where the file has then
849  * been put back into the working dir and further modified.  The
850  * tree-to-workdir diff for that file is 'modified', but `git diff` would
851  * show status 'deleted' since there is a staged delete.
852  *
853  * @param diff A pointer to a git_diff pointer that will be allocated.
854  * @param repo The repository containing the tree.
855  * @param old_tree A git_tree object to diff from, or NULL for empty tree.
856  * @param opts Structure with options to influence diff or NULL for defaults.
857  */
858 GIT_EXTERN(int) git_diff_tree_to_workdir(
859 	git_diff **diff,
860 	git_repository *repo,
861 	git_tree *old_tree,
862 	const git_diff_options *opts); /**< can be NULL for defaults */
863 
864 /**
865  * Create a diff between a tree and the working directory using index data
866  * to account for staged deletes, tracked files, etc.
867  *
868  * This emulates `git diff <tree>` by diffing the tree to the index and
869  * the index to the working directory and blending the results into a
870  * single diff that includes staged deleted, etc.
871  *
872  * @param diff A pointer to a git_diff pointer that will be allocated.
873  * @param repo The repository containing the tree.
874  * @param old_tree A git_tree object to diff from, or NULL for empty tree.
875  * @param opts Structure with options to influence diff or NULL for defaults.
876  */
877 GIT_EXTERN(int) git_diff_tree_to_workdir_with_index(
878 	git_diff **diff,
879 	git_repository *repo,
880 	git_tree *old_tree,
881 	const git_diff_options *opts); /**< can be NULL for defaults */
882 
883 /**
884  * Create a diff with the difference between two index objects.
885  *
886  * The first index will be used for the "old_file" side of the delta and the
887  * second index will be used for the "new_file" side of the delta.
888  *
889  * @param diff Output pointer to a git_diff pointer to be allocated.
890  * @param repo The repository containing the indexes.
891  * @param old_index A git_index object to diff from.
892  * @param new_index A git_index object to diff to.
893  * @param opts Structure with options to influence diff or NULL for defaults.
894  */
895 GIT_EXTERN(int) git_diff_index_to_index(
896 	git_diff **diff,
897 	git_repository *repo,
898 	git_index *old_index,
899 	git_index *new_index,
900 	const git_diff_options *opts); /**< can be NULL for defaults */
901 
902 /**
903  * Merge one diff into another.
904  *
905  * This merges items from the "from" list into the "onto" list.  The
906  * resulting diff will have all items that appear in either list.
907  * If an item appears in both lists, then it will be "merged" to appear
908  * as if the old version was from the "onto" list and the new version
909  * is from the "from" list (with the exception that if the item has a
910  * pending DELETE in the middle, then it will show as deleted).
911  *
912  * @param onto Diff to merge into.
913  * @param from Diff to merge.
914  */
915 GIT_EXTERN(int) git_diff_merge(
916 	git_diff *onto,
917 	const git_diff *from);
918 
919 /**
920  * Transform a diff marking file renames, copies, etc.
921  *
922  * This modifies a diff in place, replacing old entries that look
923  * like renames or copies with new entries reflecting those changes.
924  * This also will, if requested, break modified files into add/remove
925  * pairs if the amount of change is above a threshold.
926  *
927  * @param diff diff to run detection algorithms on
928  * @param options Control how detection should be run, NULL for defaults
929  * @return 0 on success, -1 on failure
930  */
931 GIT_EXTERN(int) git_diff_find_similar(
932 	git_diff *diff,
933 	const git_diff_find_options *options);
934 
935 /**@}*/
936 
937 
938 /** @name Diff Processor Functions
939  *
940  * These are the functions you apply to a diff to process it
941  * or read it in some way.
942  */
943 /**@{*/
944 
945 /**
946  * Query how many diff records are there in a diff.
947  *
948  * @param diff A git_diff generated by one of the above functions
949  * @return Count of number of deltas in the list
950  */
951 GIT_EXTERN(size_t) git_diff_num_deltas(const git_diff *diff);
952 
953 /**
954  * Query how many diff deltas are there in a diff filtered by type.
955  *
956  * This works just like `git_diff_entrycount()` with an extra parameter
957  * that is a `git_delta_t` and returns just the count of how many deltas
958  * match that particular type.
959  *
960  * @param diff A git_diff generated by one of the above functions
961  * @param type A git_delta_t value to filter the count
962  * @return Count of number of deltas matching delta_t type
963  */
964 GIT_EXTERN(size_t) git_diff_num_deltas_of_type(
965 	const git_diff *diff, git_delta_t type);
966 
967 /**
968  * Return the diff delta for an entry in the diff list.
969  *
970  * The `git_diff_delta` pointer points to internal data and you do not
971  * have to release it when you are done with it.  It will go away when
972  * the * `git_diff` (or any associated `git_patch`) goes away.
973  *
974  * Note that the flags on the delta related to whether it has binary
975  * content or not may not be set if there are no attributes set for the
976  * file and there has been no reason to load the file data at this point.
977  * For now, if you need those flags to be up to date, your only option is
978  * to either use `git_diff_foreach` or create a `git_patch`.
979  *
980  * @param diff Diff list object
981  * @param idx Index into diff list
982  * @return Pointer to git_diff_delta (or NULL if `idx` out of range)
983  */
984 GIT_EXTERN(const git_diff_delta *) git_diff_get_delta(
985 	const git_diff *diff, size_t idx);
986 
987 /**
988  * Check if deltas are sorted case sensitively or insensitively.
989  *
990  * @param diff diff to check
991  * @return 0 if case sensitive, 1 if case is ignored
992  */
993 GIT_EXTERN(int) git_diff_is_sorted_icase(const git_diff *diff);
994 
995 /**
996  * Loop over all deltas in a diff issuing callbacks.
997  *
998  * This will iterate through all of the files described in a diff.  You
999  * should provide a file callback to learn about each file.
1000  *
1001  * The "hunk" and "line" callbacks are optional, and the text diff of the
1002  * files will only be calculated if they are not NULL.  Of course, these
1003  * callbacks will not be invoked for binary files on the diff or for
1004  * files whose only changed is a file mode change.
1005  *
1006  * Returning a non-zero value from any of the callbacks will terminate
1007  * the iteration and return the value to the user.
1008  *
1009  * @param diff A git_diff generated by one of the above functions.
1010  * @param file_cb Callback function to make per file in the diff.
1011  * @param binary_cb Optional callback to make for binary files.
1012  * @param hunk_cb Optional callback to make per hunk of text diff.  This
1013  *                callback is called to describe a range of lines in the
1014  *                diff.  It will not be issued for binary files.
1015  * @param line_cb Optional callback to make per line of diff text.  This
1016  *                same callback will be made for context lines, added, and
1017  *                removed lines, and even for a deleted trailing newline.
1018  * @param payload Reference pointer that will be passed to your callbacks.
1019  * @return 0 on success, non-zero callback return value, or error code
1020  */
1021 GIT_EXTERN(int) git_diff_foreach(
1022 	git_diff *diff,
1023 	git_diff_file_cb file_cb,
1024 	git_diff_binary_cb binary_cb,
1025 	git_diff_hunk_cb hunk_cb,
1026 	git_diff_line_cb line_cb,
1027 	void *payload);
1028 
1029 /**
1030  * Look up the single character abbreviation for a delta status code.
1031  *
1032  * When you run `git diff --name-status` it uses single letter codes in
1033  * the output such as 'A' for added, 'D' for deleted, 'M' for modified,
1034  * etc.  This function converts a git_delta_t value into these letters for
1035  * your own purposes.  GIT_DELTA_UNTRACKED will return a space (i.e. ' ').
1036  *
1037  * @param status The git_delta_t value to look up
1038  * @return The single character label for that code
1039  */
1040 GIT_EXTERN(char) git_diff_status_char(git_delta_t status);
1041 
1042 /**
1043  * Possible output formats for diff data
1044  */
1045 typedef enum {
1046 	GIT_DIFF_FORMAT_PATCH        = 1u, /**< full git diff */
1047 	GIT_DIFF_FORMAT_PATCH_HEADER = 2u, /**< just the file headers of patch */
1048 	GIT_DIFF_FORMAT_RAW          = 3u, /**< like git diff --raw */
1049 	GIT_DIFF_FORMAT_NAME_ONLY    = 4u, /**< like git diff --name-only */
1050 	GIT_DIFF_FORMAT_NAME_STATUS  = 5u, /**< like git diff --name-status */
1051 } git_diff_format_t;
1052 
1053 /**
1054  * Iterate over a diff generating formatted text output.
1055  *
1056  * Returning a non-zero value from the callbacks will terminate the
1057  * iteration and return the non-zero value to the caller.
1058  *
1059  * @param diff A git_diff generated by one of the above functions.
1060  * @param format A git_diff_format_t value to pick the text format.
1061  * @param print_cb Callback to make per line of diff text.
1062  * @param payload Reference pointer that will be passed to your callback.
1063  * @return 0 on success, non-zero callback return value, or error code
1064  */
1065 GIT_EXTERN(int) git_diff_print(
1066 	git_diff *diff,
1067 	git_diff_format_t format,
1068 	git_diff_line_cb print_cb,
1069 	void *payload);
1070 
1071 /**
1072  * Produce the complete formatted text output from a diff into a
1073  * buffer.
1074  *
1075  * @param out A pointer to a user-allocated git_buf that will
1076  *            contain the diff text
1077  * @param diff A git_diff generated by one of the above functions.
1078  * @param format A git_diff_format_t value to pick the text format.
1079  * @return 0 on success or error code
1080  */
1081 GIT_EXTERN(int) git_diff_to_buf(
1082 	git_buf *out,
1083 	git_diff *diff,
1084 	git_diff_format_t format);
1085 
1086 /**@}*/
1087 
1088 
1089 /*
1090  * Misc
1091  */
1092 
1093 /**
1094  * Directly run a diff on two blobs.
1095  *
1096  * Compared to a file, a blob lacks some contextual information. As such,
1097  * the `git_diff_file` given to the callback will have some fake data; i.e.
1098  * `mode` will be 0 and `path` will be NULL.
1099  *
1100  * NULL is allowed for either `old_blob` or `new_blob` and will be treated
1101  * as an empty blob, with the `oid` set to NULL in the `git_diff_file` data.
1102  * Passing NULL for both blobs is a noop; no callbacks will be made at all.
1103  *
1104  * We do run a binary content check on the blob content and if either blob
1105  * looks like binary data, the `git_diff_delta` binary attribute will be set
1106  * to 1 and no call to the hunk_cb nor line_cb will be made (unless you pass
1107  * `GIT_DIFF_FORCE_TEXT` of course).
1108  *
1109  * @param old_blob Blob for old side of diff, or NULL for empty blob
1110  * @param old_as_path Treat old blob as if it had this filename; can be NULL
1111  * @param new_blob Blob for new side of diff, or NULL for empty blob
1112  * @param new_as_path Treat new blob as if it had this filename; can be NULL
1113  * @param options Options for diff, or NULL for default options
1114  * @param file_cb Callback for "file"; made once if there is a diff; can be NULL
1115  * @param binary_cb Callback for binary files; can be NULL
1116  * @param hunk_cb Callback for each hunk in diff; can be NULL
1117  * @param line_cb Callback for each line in diff; can be NULL
1118  * @param payload Payload passed to each callback function
1119  * @return 0 on success, non-zero callback return value, or error code
1120  */
1121 GIT_EXTERN(int) git_diff_blobs(
1122 	const git_blob *old_blob,
1123 	const char *old_as_path,
1124 	const git_blob *new_blob,
1125 	const char *new_as_path,
1126 	const git_diff_options *options,
1127 	git_diff_file_cb file_cb,
1128 	git_diff_binary_cb binary_cb,
1129 	git_diff_hunk_cb hunk_cb,
1130 	git_diff_line_cb line_cb,
1131 	void *payload);
1132 
1133 /**
1134  * Directly run a diff between a blob and a buffer.
1135  *
1136  * As with `git_diff_blobs`, comparing a blob and buffer lacks some context,
1137  * so the `git_diff_file` parameters to the callbacks will be faked a la the
1138  * rules for `git_diff_blobs()`.
1139  *
1140  * Passing NULL for `old_blob` will be treated as an empty blob (i.e. the
1141  * `file_cb` will be invoked with GIT_DELTA_ADDED and the diff will be the
1142  * entire content of the buffer added).  Passing NULL to the buffer will do
1143  * the reverse, with GIT_DELTA_REMOVED and blob content removed.
1144  *
1145  * @param old_blob Blob for old side of diff, or NULL for empty blob
1146  * @param old_as_path Treat old blob as if it had this filename; can be NULL
1147  * @param buffer Raw data for new side of diff, or NULL for empty
1148  * @param buffer_len Length of raw data for new side of diff
1149  * @param buffer_as_path Treat buffer as if it had this filename; can be NULL
1150  * @param options Options for diff, or NULL for default options
1151  * @param file_cb Callback for "file"; made once if there is a diff; can be NULL
1152  * @param binary_cb Callback for binary files; can be NULL
1153  * @param hunk_cb Callback for each hunk in diff; can be NULL
1154  * @param line_cb Callback for each line in diff; can be NULL
1155  * @param payload Payload passed to each callback function
1156  * @return 0 on success, non-zero callback return value, or error code
1157  */
1158 GIT_EXTERN(int) git_diff_blob_to_buffer(
1159 	const git_blob *old_blob,
1160 	const char *old_as_path,
1161 	const char *buffer,
1162 	size_t buffer_len,
1163 	const char *buffer_as_path,
1164 	const git_diff_options *options,
1165 	git_diff_file_cb file_cb,
1166 	git_diff_binary_cb binary_cb,
1167 	git_diff_hunk_cb hunk_cb,
1168 	git_diff_line_cb line_cb,
1169 	void *payload);
1170 
1171 /**
1172  * Directly run a diff between two buffers.
1173  *
1174  * Even more than with `git_diff_blobs`, comparing two buffer lacks
1175  * context, so the `git_diff_file` parameters to the callbacks will be
1176  * faked a la the rules for `git_diff_blobs()`.
1177  *
1178  * @param old_buffer Raw data for old side of diff, or NULL for empty
1179  * @param old_len Length of the raw data for old side of the diff
1180  * @param old_as_path Treat old buffer as if it had this filename; can be NULL
1181  * @param new_buffer Raw data for new side of diff, or NULL for empty
1182  * @param new_len Length of raw data for new side of diff
1183  * @param new_as_path Treat buffer as if it had this filename; can be NULL
1184  * @param options Options for diff, or NULL for default options
1185  * @param file_cb Callback for "file"; made once if there is a diff; can be NULL
1186  * @param binary_cb Callback for binary files; can be NULL
1187  * @param hunk_cb Callback for each hunk in diff; can be NULL
1188  * @param line_cb Callback for each line in diff; can be NULL
1189  * @param payload Payload passed to each callback function
1190  * @return 0 on success, non-zero callback return value, or error code
1191  */
1192 GIT_EXTERN(int) git_diff_buffers(
1193 	const void *old_buffer,
1194 	size_t old_len,
1195 	const char *old_as_path,
1196 	const void *new_buffer,
1197 	size_t new_len,
1198 	const char *new_as_path,
1199 	const git_diff_options *options,
1200 	git_diff_file_cb file_cb,
1201 	git_diff_binary_cb binary_cb,
1202 	git_diff_hunk_cb hunk_cb,
1203 	git_diff_line_cb line_cb,
1204 	void *payload);
1205 
1206 /**
1207  * Read the contents of a git patch file into a `git_diff` object.
1208  *
1209  * The diff object produced is similar to the one that would be
1210  * produced if you actually produced it computationally by comparing
1211  * two trees, however there may be subtle differences.  For example,
1212  * a patch file likely contains abbreviated object IDs, so the
1213  * object IDs in a `git_diff_delta` produced by this function will
1214  * also be abbreviated.
1215  *
1216  * This function will only read patch files created by a git
1217  * implementation, it will not read unified diffs produced by
1218  * the `diff` program, nor any other types of patch files.
1219  *
1220  * @param out A pointer to a git_diff pointer that will be allocated.
1221  * @param content The contents of a patch file
1222  * @param content_len The length of the patch file contents
1223  * @return 0 or an error code
1224  */
1225 GIT_EXTERN(int) git_diff_from_buffer(
1226 	git_diff **out,
1227 	const char *content,
1228 	size_t content_len);
1229 
1230 /**
1231  * This is an opaque structure which is allocated by `git_diff_get_stats`.
1232  * You are responsible for releasing the object memory when done, using the
1233  * `git_diff_stats_free()` function.
1234  */
1235 typedef struct git_diff_stats git_diff_stats;
1236 
1237 /**
1238  * Formatting options for diff stats
1239  */
1240 typedef enum {
1241 	/** No stats*/
1242 	GIT_DIFF_STATS_NONE = 0,
1243 
1244 	/** Full statistics, equivalent of `--stat` */
1245 	GIT_DIFF_STATS_FULL = (1u << 0),
1246 
1247 	/** Short statistics, equivalent of `--shortstat` */
1248 	GIT_DIFF_STATS_SHORT = (1u << 1),
1249 
1250 	/** Number statistics, equivalent of `--numstat` */
1251 	GIT_DIFF_STATS_NUMBER = (1u << 2),
1252 
1253 	/** Extended header information such as creations, renames and mode changes, equivalent of `--summary` */
1254 	GIT_DIFF_STATS_INCLUDE_SUMMARY = (1u << 3),
1255 } git_diff_stats_format_t;
1256 
1257 /**
1258  * Accumulate diff statistics for all patches.
1259  *
1260  * @param out Structure containg the diff statistics.
1261  * @param diff A git_diff generated by one of the above functions.
1262  * @return 0 on success; non-zero on error
1263  */
1264 GIT_EXTERN(int) git_diff_get_stats(
1265 	git_diff_stats **out,
1266 	git_diff *diff);
1267 
1268 /**
1269  * Get the total number of files changed in a diff
1270  *
1271  * @param stats A `git_diff_stats` generated by one of the above functions.
1272  * @return total number of files changed in the diff
1273  */
1274 GIT_EXTERN(size_t) git_diff_stats_files_changed(
1275 	const git_diff_stats *stats);
1276 
1277 /**
1278  * Get the total number of insertions in a diff
1279  *
1280  * @param stats A `git_diff_stats` generated by one of the above functions.
1281  * @return total number of insertions in the diff
1282  */
1283 GIT_EXTERN(size_t) git_diff_stats_insertions(
1284 	const git_diff_stats *stats);
1285 
1286 /**
1287  * Get the total number of deletions in a diff
1288  *
1289  * @param stats A `git_diff_stats` generated by one of the above functions.
1290  * @return total number of deletions in the diff
1291  */
1292 GIT_EXTERN(size_t) git_diff_stats_deletions(
1293 	const git_diff_stats *stats);
1294 
1295 /**
1296  * Print diff statistics to a `git_buf`.
1297  *
1298  * @param out buffer to store the formatted diff statistics in.
1299  * @param stats A `git_diff_stats` generated by one of the above functions.
1300  * @param format Formatting option.
1301  * @param width Target width for output (only affects GIT_DIFF_STATS_FULL)
1302  * @return 0 on success; non-zero on error
1303  */
1304 GIT_EXTERN(int) git_diff_stats_to_buf(
1305 	git_buf *out,
1306 	const git_diff_stats *stats,
1307 	git_diff_stats_format_t format,
1308 	size_t width);
1309 
1310 /**
1311  * Deallocate a `git_diff_stats`.
1312  *
1313  * @param stats The previously created statistics object;
1314  * cannot be used after free.
1315  */
1316 GIT_EXTERN(void) git_diff_stats_free(git_diff_stats *stats);
1317 
1318 /**
1319  * Formatting options for diff e-mail generation
1320  */
1321 typedef enum {
1322 	/** Normal patch, the default */
1323 	GIT_DIFF_FORMAT_EMAIL_NONE = 0,
1324 
1325 	/** Don't insert "[PATCH]" in the subject header*/
1326 	GIT_DIFF_FORMAT_EMAIL_EXCLUDE_SUBJECT_PATCH_MARKER = (1 << 0),
1327 
1328 } git_diff_format_email_flags_t;
1329 
1330 /**
1331  * Options for controlling the formatting of the generated e-mail.
1332  */
1333 typedef struct {
1334 	unsigned int version;
1335 
1336 	git_diff_format_email_flags_t flags;
1337 
1338 	/** This patch number */
1339 	size_t patch_no;
1340 
1341 	/** Total number of patches in this series */
1342 	size_t total_patches;
1343 
1344 	/** id to use for the commit */
1345 	const git_oid *id;
1346 
1347 	/** Summary of the change */
1348 	const char *summary;
1349 
1350 	/** Commit message's body */
1351 	const char *body;
1352 
1353 	/** Author of the change */
1354 	const git_signature *author;
1355 } git_diff_format_email_options;
1356 
1357 #define GIT_DIFF_FORMAT_EMAIL_OPTIONS_VERSION 1
1358 #define GIT_DIFF_FORMAT_EMAIL_OPTIONS_INIT {GIT_DIFF_FORMAT_EMAIL_OPTIONS_VERSION, 0, 1, 1, NULL, NULL, NULL, NULL}
1359 
1360 /**
1361  * Create an e-mail ready patch from a diff.
1362  *
1363  * @param out buffer to store the e-mail patch in
1364  * @param diff containing the commit
1365  * @param opts structure with options to influence content and formatting.
1366  * @return 0 or an error code
1367  */
1368 GIT_EXTERN(int) git_diff_format_email(
1369 	git_buf *out,
1370 	git_diff *diff,
1371 	const git_diff_format_email_options *opts);
1372 
1373 /**
1374  * Create an e-mail ready patch for a commit.
1375  *
1376  * Does not support creating patches for merge commits (yet).
1377  *
1378  * @param out buffer to store the e-mail patch in
1379  * @param repo containing the commit
1380  * @param commit pointer to up commit
1381  * @param patch_no patch number of the commit
1382  * @param total_patches total number of patches in the patch set
1383  * @param flags determines the formatting of the e-mail
1384  * @param diff_opts structure with options to influence diff or NULL for defaults.
1385  * @return 0 or an error code
1386  */
1387 GIT_EXTERN(int) git_diff_commit_as_email(
1388 	git_buf *out,
1389 	git_repository *repo,
1390 	git_commit *commit,
1391 	size_t patch_no,
1392 	size_t total_patches,
1393 	git_diff_format_email_flags_t flags,
1394 	const git_diff_options *diff_opts);
1395 
1396 /**
1397  * Initializes a `git_diff_format_email_options` with default values.
1398  *
1399  * Equivalent to creating an instance with GIT_DIFF_FORMAT_EMAIL_OPTIONS_INIT.
1400  *
1401  * @param opts The `git_diff_format_email_options` struct to initialize
1402  * @param version Version of struct; pass `GIT_DIFF_FORMAT_EMAIL_OPTIONS_VERSION`
1403  * @return Zero on success; -1 on failure.
1404  */
1405 GIT_EXTERN(int) git_diff_format_email_init_options(
1406 	git_diff_format_email_options *opts,
1407 	unsigned int version);
1408 
1409 /**
1410  * Patch ID options structure
1411  *
1412  * Initialize with `GIT_DIFF_PATCHID_OPTIONS_INIT` macro to
1413  * correctly set the default values and version.
1414  */
1415 typedef struct git_diff_patchid_options {
1416 	unsigned int version;
1417 } git_diff_patchid_options;
1418 
1419 #define GIT_DIFF_PATCHID_OPTIONS_VERSION 1
1420 #define GIT_DIFF_PATCHID_OPTIONS_INIT { GIT_DIFF_PATCHID_OPTIONS_VERSION }
1421 
1422 /**
1423  * Initialize `git_diff_patchid_options` structure.
1424  *
1425  * Initializes the structure with default values. Equivalent to
1426  * creating an instance with `GIT_DIFF_PATCHID_OPTIONS_INIT`.
1427  */
1428 GIT_EXTERN(int) git_diff_patchid_init_options(
1429 	git_diff_patchid_options *opts,
1430 	unsigned int version);
1431 
1432 /**
1433  * Calculate the patch ID for the given patch.
1434  *
1435  * Calculate a stable patch ID for the given patch by summing the
1436  * hash of the file diffs, ignoring whitespace and line numbers.
1437  * This can be used to derive whether two diffs are the same with
1438  * a high probability.
1439  *
1440  * Currently, this function only calculates stable patch IDs, as
1441  * defined in git-patch-id(1), and should in fact generate the
1442  * same IDs as the upstream git project does.
1443  *
1444  * @param out Pointer where the calculated patch ID shoul be
1445  *  stored
1446  * @param diff The diff to calculate the ID for
1447  * @param opts Options for how to calculate the patch ID. This is
1448  *  intended for future changes, as currently no options are
1449  *  available.
1450  * @return 0 on success, an error code otherwise.
1451  */
1452 GIT_EXTERN(int) git_diff_patchid(git_oid *out, git_diff *diff, git_diff_patchid_options *opts);
1453 
1454 GIT_END_DECL
1455 
1456 /** @} */
1457 
1458 #endif
1459