1 /*
2  * Copyright (C) the libgit2 contributors. All rights reserved.
3  *
4  * This file is part of libgit2, distributed under the GNU GPL v2 with
5  * a Linking Exception. For full terms see the included COPYING file.
6  */
7 #ifndef INCLUDE_git_diff_h__
8 #define INCLUDE_git_diff_h__
9 
10 #include "common.h"
11 #include "types.h"
12 #include "oid.h"
13 #include "tree.h"
14 #include "refs.h"
15 
16 /**
17  * @file git2/diff.h
18  * @brief Git tree and file differencing routines.
19  * @ingroup Git
20  * @{
21  */
22 GIT_BEGIN_DECL
23 
24 /**
25  * Flags for diff options.  A combination of these flags can be passed
26  * in via the `flags` value in the `git_diff_options`.
27  */
28 typedef enum {
29 	/** Normal diff, the default */
30 	GIT_DIFF_NORMAL = 0,
31 
32 	/*
33 	 * Options controlling which files will be in the diff
34 	 */
35 
36 	/** Reverse the sides of the diff */
37 	GIT_DIFF_REVERSE = (1u << 0),
38 
39 	/** Include ignored files in the diff */
40 	GIT_DIFF_INCLUDE_IGNORED = (1u << 1),
41 
42 	/** Even with GIT_DIFF_INCLUDE_IGNORED, an entire ignored directory
43 	 *  will be marked with only a single entry in the diff; this flag
44 	 *  adds all files under the directory as IGNORED entries, too.
45 	 */
46 	GIT_DIFF_RECURSE_IGNORED_DIRS = (1u << 2),
47 
48 	/** Include untracked files in the diff */
49 	GIT_DIFF_INCLUDE_UNTRACKED = (1u << 3),
50 
51 	/** Even with GIT_DIFF_INCLUDE_UNTRACKED, an entire untracked
52 	 *  directory will be marked with only a single entry in the diff
53 	 *  (a la what core Git does in `git status`); this flag adds *all*
54 	 *  files under untracked directories as UNTRACKED entries, too.
55 	 */
56 	GIT_DIFF_RECURSE_UNTRACKED_DIRS = (1u << 4),
57 
58 	/** Include unmodified files in the diff */
59 	GIT_DIFF_INCLUDE_UNMODIFIED = (1u << 5),
60 
61 	/** Normally, a type change between files will be converted into a
62 	 *  DELETED record for the old and an ADDED record for the new; this
63 	 *  options enabled the generation of TYPECHANGE delta records.
64 	 */
65 	GIT_DIFF_INCLUDE_TYPECHANGE = (1u << 6),
66 
67 	/** Even with GIT_DIFF_INCLUDE_TYPECHANGE, blob->tree changes still
68 	 *  generally show as a DELETED blob.  This flag tries to correctly
69 	 *  label blob->tree transitions as TYPECHANGE records with new_file's
70 	 *  mode set to tree.  Note: the tree SHA will not be available.
71 	 */
72 	GIT_DIFF_INCLUDE_TYPECHANGE_TREES = (1u << 7),
73 
74 	/** Ignore file mode changes */
75 	GIT_DIFF_IGNORE_FILEMODE = (1u << 8),
76 
77 	/** Treat all submodules as unmodified */
78 	GIT_DIFF_IGNORE_SUBMODULES = (1u << 9),
79 
80 	/** Use case insensitive filename comparisons */
81 	GIT_DIFF_IGNORE_CASE = (1u << 10),
82 
83 	/** May be combined with `GIT_DIFF_IGNORE_CASE` to specify that a file
84 	 *  that has changed case will be returned as an add/delete pair.
85 	 */
86 	GIT_DIFF_INCLUDE_CASECHANGE = (1u << 11),
87 
88 	/** If the pathspec is set in the diff options, this flags indicates
89 	 *  that the paths will be treated as literal paths instead of
90 	 *  fnmatch patterns.  Each path in the list must either be a full
91 	 *  path to a file or a directory.  (A trailing slash indicates that
92 	 *  the path will _only_ match a directory).  If a directory is
93 	 *  specified, all children will be included.
94 	 */
95 	GIT_DIFF_DISABLE_PATHSPEC_MATCH = (1u << 12),
96 
97 	/** Disable updating of the `binary` flag in delta records.  This is
98 	 *  useful when iterating over a diff if you don't need hunk and data
99 	 *  callbacks and want to avoid having to load file completely.
100 	 */
101 	GIT_DIFF_SKIP_BINARY_CHECK = (1u << 13),
102 
103 	/** When diff finds an untracked directory, to match the behavior of
104 	 *  core Git, it scans the contents for IGNORED and UNTRACKED files.
105 	 *  If *all* contents are IGNORED, then the directory is IGNORED; if
106 	 *  any contents are not IGNORED, then the directory is UNTRACKED.
107 	 *  This is extra work that may not matter in many cases.  This flag
108 	 *  turns off that scan and immediately labels an untracked directory
109 	 *  as UNTRACKED (changing the behavior to not match core Git).
110 	 */
111 	GIT_DIFF_ENABLE_FAST_UNTRACKED_DIRS = (1u << 14),
112 
113 	/** When diff finds a file in the working directory with stat
114 	 * information different from the index, but the OID ends up being the
115 	 * same, write the correct stat information into the index.  Note:
116 	 * without this flag, diff will always leave the index untouched.
117 	 */
118 	GIT_DIFF_UPDATE_INDEX = (1u << 15),
119 
120 	/** Include unreadable files in the diff */
121 	GIT_DIFF_INCLUDE_UNREADABLE = (1u << 16),
122 
123 	/** Include unreadable files in the diff */
124 	GIT_DIFF_INCLUDE_UNREADABLE_AS_UNTRACKED = (1u << 17),
125 
126 	/*
127 	 * Options controlling how output will be generated
128 	 */
129 
130 	/** Use a heuristic that takes indentation and whitespace into account
131 	 * which generally can produce better diffs when dealing with ambiguous
132 	 * diff hunks.
133 	 */
134 	GIT_DIFF_INDENT_HEURISTIC = (1u << 18),
135 
136 	/** Treat all files as text, disabling binary attributes & detection */
137 	GIT_DIFF_FORCE_TEXT = (1u << 20),
138 	/** Treat all files as binary, disabling text diffs */
139 	GIT_DIFF_FORCE_BINARY = (1u << 21),
140 
141 	/** Ignore all whitespace */
142 	GIT_DIFF_IGNORE_WHITESPACE = (1u << 22),
143 	/** Ignore changes in amount of whitespace */
144 	GIT_DIFF_IGNORE_WHITESPACE_CHANGE = (1u << 23),
145 	/** Ignore whitespace at end of line */
146 	GIT_DIFF_IGNORE_WHITESPACE_EOL = (1u << 24),
147 
148 	/** When generating patch text, include the content of untracked
149 	 *  files.  This automatically turns on GIT_DIFF_INCLUDE_UNTRACKED but
150 	 *  it does not turn on GIT_DIFF_RECURSE_UNTRACKED_DIRS.  Add that
151 	 *  flag if you want the content of every single UNTRACKED file.
152 	 */
153 	GIT_DIFF_SHOW_UNTRACKED_CONTENT = (1u << 25),
154 
155 	/** When generating output, include the names of unmodified files if
156 	 *  they are included in the git_diff.  Normally these are skipped in
157 	 *  the formats that list files (e.g. name-only, name-status, raw).
158 	 *  Even with this, these will not be included in patch format.
159 	 */
160 	GIT_DIFF_SHOW_UNMODIFIED = (1u << 26),
161 
162 	/** Use the "patience diff" algorithm */
163 	GIT_DIFF_PATIENCE = (1u << 28),
164 	/** Take extra time to find minimal diff */
165 	GIT_DIFF_MINIMAL = (1u << 29),
166 
167 	/** Include the necessary deflate / delta information so that `git-apply`
168 	 *  can apply given diff information to binary files.
169 	 */
170 	GIT_DIFF_SHOW_BINARY = (1u << 30),
171 } git_diff_option_t;
172 
173 /**
174  * The diff object that contains all individual file deltas.
175  *
176  * A `diff` represents the cumulative list of differences between two
177  * snapshots of a repository (possibly filtered by a set of file name
178  * patterns).
179  *
180  * Calculating diffs is generally done in two phases: building a list of
181  * diffs then traversing it. This makes is easier to share logic across
182  * the various types of diffs (tree vs tree, workdir vs index, etc.), and
183  * also allows you to insert optional diff post-processing phases,
184  * such as rename detection, in between the steps. When you are done with
185  * a diff object, it must be freed.
186  *
187  * This is an opaque structure which will be allocated by one of the diff
188  * generator functions below (such as `git_diff_tree_to_tree`). You are
189  * responsible for releasing the object memory when done, using the
190  * `git_diff_free()` function.
191  *
192  */
193 typedef struct git_diff git_diff;
194 
195 /**
196  * Flags for the delta object and the file objects on each side.
197  *
198  * These flags are used for both the `flags` value of the `git_diff_delta`
199  * and the flags for the `git_diff_file` objects representing the old and
200  * new sides of the delta.  Values outside of this public range should be
201  * considered reserved for internal or future use.
202  */
203 typedef enum {
204 	GIT_DIFF_FLAG_BINARY     = (1u << 0), /**< file(s) treated as binary data */
205 	GIT_DIFF_FLAG_NOT_BINARY = (1u << 1), /**< file(s) treated as text data */
206 	GIT_DIFF_FLAG_VALID_ID   = (1u << 2), /**< `id` value is known correct */
207 	GIT_DIFF_FLAG_EXISTS     = (1u << 3), /**< file exists at this side of the delta */
208 } git_diff_flag_t;
209 
210 /**
211  * What type of change is described by a git_diff_delta?
212  *
213  * `GIT_DELTA_RENAMED` and `GIT_DELTA_COPIED` will only show up if you run
214  * `git_diff_find_similar()` on the diff object.
215  *
216  * `GIT_DELTA_TYPECHANGE` only shows up given `GIT_DIFF_INCLUDE_TYPECHANGE`
217  * in the option flags (otherwise type changes will be split into ADDED /
218  * DELETED pairs).
219  */
220 typedef enum {
221 	GIT_DELTA_UNMODIFIED = 0,  /**< no changes */
222 	GIT_DELTA_ADDED = 1,	   /**< entry does not exist in old version */
223 	GIT_DELTA_DELETED = 2,	   /**< entry does not exist in new version */
224 	GIT_DELTA_MODIFIED = 3,    /**< entry content changed between old and new */
225 	GIT_DELTA_RENAMED = 4,     /**< entry was renamed between old and new */
226 	GIT_DELTA_COPIED = 5,      /**< entry was copied from another old entry */
227 	GIT_DELTA_IGNORED = 6,     /**< entry is ignored item in workdir */
228 	GIT_DELTA_UNTRACKED = 7,   /**< entry is untracked item in workdir */
229 	GIT_DELTA_TYPECHANGE = 8,  /**< type of entry changed between old and new */
230 	GIT_DELTA_UNREADABLE = 9,  /**< entry is unreadable */
231 	GIT_DELTA_CONFLICTED = 10, /**< entry in the index is conflicted */
232 } git_delta_t;
233 
234 /**
235  * Description of one side of a delta.
236  *
237  * Although this is called a "file", it could represent a file, a symbolic
238  * link, a submodule commit id, or even a tree (although that only if you
239  * are tracking type changes or ignored/untracked directories).
240  *
241  * The `id` is the `git_oid` of the item.  If the entry represents an
242  * absent side of a diff (e.g. the `old_file` of a `GIT_DELTA_ADDED` delta),
243  * then the oid will be zeroes.
244  *
245  * `path` is the NUL-terminated path to the entry relative to the working
246  * directory of the repository.
247  *
248  * `size` is the size of the entry in bytes.
249  *
250  * `flags` is a combination of the `git_diff_flag_t` types
251  *
252  * `mode` is, roughly, the stat() `st_mode` value for the item.  This will
253  * be restricted to one of the `git_filemode_t` values.
254  *
255  * The `id_abbrev` represents the known length of the `id` field, when
256  * converted to a hex string.  It is generally `GIT_OID_HEXSZ`, unless this
257  * delta was created from reading a patch file, in which case it may be
258  * abbreviated to something reasonable, like 7 characters.
259  */
260 typedef struct {
261 	git_oid            id;
262 	const char        *path;
263 	git_object_size_t  size;
264 	uint32_t           flags;
265 	uint16_t           mode;
266 	uint16_t           id_abbrev;
267 } git_diff_file;
268 
269 /**
270  * Description of changes to one entry.
271  *
272  * A `delta` is a file pair with an old and new revision.  The old version
273  * may be absent if the file was just created and the new version may be
274  * absent if the file was deleted.  A diff is mostly just a list of deltas.
275  *
276  * When iterating over a diff, this will be passed to most callbacks and
277  * you can use the contents to understand exactly what has changed.
278  *
279  * The `old_file` represents the "from" side of the diff and the `new_file`
280  * represents to "to" side of the diff.  What those means depend on the
281  * function that was used to generate the diff and will be documented below.
282  * You can also use the `GIT_DIFF_REVERSE` flag to flip it around.
283  *
284  * Although the two sides of the delta are named "old_file" and "new_file",
285  * they actually may correspond to entries that represent a file, a symbolic
286  * link, a submodule commit id, or even a tree (if you are tracking type
287  * changes or ignored/untracked directories).
288  *
289  * Under some circumstances, in the name of efficiency, not all fields will
290  * be filled in, but we generally try to fill in as much as possible.  One
291  * example is that the "flags" field may not have either the `BINARY` or the
292  * `NOT_BINARY` flag set to avoid examining file contents if you do not pass
293  * in hunk and/or line callbacks to the diff foreach iteration function.  It
294  * will just use the git attributes for those files.
295  *
296  * The similarity score is zero unless you call `git_diff_find_similar()`
297  * which does a similarity analysis of files in the diff.  Use that
298  * function to do rename and copy detection, and to split heavily modified
299  * files in add/delete pairs.  After that call, deltas with a status of
300  * GIT_DELTA_RENAMED or GIT_DELTA_COPIED will have a similarity score
301  * between 0 and 100 indicating how similar the old and new sides are.
302  *
303  * If you ask `git_diff_find_similar` to find heavily modified files to
304  * break, but to not *actually* break the records, then GIT_DELTA_MODIFIED
305  * records may have a non-zero similarity score if the self-similarity is
306  * below the split threshold.  To display this value like core Git, invert
307  * the score (a la `printf("M%03d", 100 - delta->similarity)`).
308  */
309 typedef struct {
310 	git_delta_t   status;
311 	uint32_t      flags;	   /**< git_diff_flag_t values */
312 	uint16_t      similarity;  /**< for RENAMED and COPIED, value 0-100 */
313 	uint16_t      nfiles;	   /**< number of files in this delta */
314 	git_diff_file old_file;
315 	git_diff_file new_file;
316 } git_diff_delta;
317 
318 /**
319  * Diff notification callback function.
320  *
321  * The callback will be called for each file, just before the `git_diff_delta`
322  * gets inserted into the diff.
323  *
324  * When the callback:
325  * - returns < 0, the diff process will be aborted.
326  * - returns > 0, the delta will not be inserted into the diff, but the
327  *		diff process continues.
328  * - returns 0, the delta is inserted into the diff, and the diff process
329  *		continues.
330  */
331 typedef int GIT_CALLBACK(git_diff_notify_cb)(
332 	const git_diff *diff_so_far,
333 	const git_diff_delta *delta_to_add,
334 	const char *matched_pathspec,
335 	void *payload);
336 
337 /**
338  * Diff progress callback.
339  *
340  * Called before each file comparison.
341  *
342  * @param diff_so_far The diff being generated.
343  * @param old_path The path to the old file or NULL.
344  * @param new_path The path to the new file or NULL.
345  * @return Non-zero to abort the diff.
346  */
347 typedef int GIT_CALLBACK(git_diff_progress_cb)(
348 	const git_diff *diff_so_far,
349 	const char *old_path,
350 	const char *new_path,
351 	void *payload);
352 
353 /**
354  * Structure describing options about how the diff should be executed.
355  *
356  * Setting all values of the structure to zero will yield the default
357  * values.  Similarly, passing NULL for the options structure will
358  * give the defaults.  The default values are marked below.
359  *
360  */
361 typedef struct {
362 	unsigned int version;      /**< version for the struct */
363 
364 	/**
365 	 * A combination of `git_diff_option_t` values above.
366 	 * Defaults to GIT_DIFF_NORMAL
367 	 */
368 	uint32_t flags;
369 
370 	/* options controlling which files are in the diff */
371 
372 	/** Overrides the submodule ignore setting for all submodules in the diff. */
373 	git_submodule_ignore_t ignore_submodules;
374 
375 	/**
376 	 * An array of paths / fnmatch patterns to constrain diff.
377 	 * All paths are included by default.
378 	 */
379 	git_strarray       pathspec;
380 
381 	/**
382 	 * An optional callback function, notifying the consumer of changes to
383 	 * the diff as new deltas are added.
384 	 */
385 	git_diff_notify_cb   notify_cb;
386 
387 	/**
388 	 * An optional callback function, notifying the consumer of which files
389 	 * are being examined as the diff is generated.
390 	 */
391 	git_diff_progress_cb progress_cb;
392 
393 	/** The payload to pass to the callback functions. */
394 	void                *payload;
395 
396 	/* options controlling how to diff text is generated */
397 
398 	/**
399 	 * The number of unchanged lines that define the boundary of a hunk
400 	 * (and to display before and after). Defaults to 3.
401 	 */
402 	uint32_t    context_lines;
403 	/**
404 	 * The maximum number of unchanged lines between hunk boundaries before
405 	 * the hunks will be merged into one. Defaults to 0.
406 	 */
407 	uint32_t    interhunk_lines;
408 
409 	/**
410 	 * The abbreviation length to use when formatting object ids.
411 	 * Defaults to the value of 'core.abbrev' from the config, or 7 if unset.
412 	 */
413 	uint16_t    id_abbrev;
414 
415 	/**
416 	 * A size (in bytes) above which a blob will be marked as binary
417 	 * automatically; pass a negative value to disable.
418 	 * Defaults to 512MB.
419 	 */
420 	git_off_t   max_size;
421 
422 	/**
423 	 * The virtual "directory" prefix for old file names in hunk headers.
424 	 * Default is "a".
425 	 */
426 	const char *old_prefix;
427 
428 	/**
429 	 * The virtual "directory" prefix for new file names in hunk headers.
430 	 * Defaults to "b".
431 	 */
432 	const char *new_prefix;
433 } git_diff_options;
434 
435 /* The current version of the diff options structure */
436 #define GIT_DIFF_OPTIONS_VERSION 1
437 
438 /* Stack initializer for diff options.  Alternatively use
439  * `git_diff_options_init` programmatic initialization.
440  */
441 #define GIT_DIFF_OPTIONS_INIT \
442 	{GIT_DIFF_OPTIONS_VERSION, 0, GIT_SUBMODULE_IGNORE_UNSPECIFIED, {NULL,0}, NULL, NULL, NULL, 3}
443 
444 /**
445  * Initialize git_diff_options structure
446  *
447  * Initializes a `git_diff_options` with default values. Equivalent to creating
448  * an instance with GIT_DIFF_OPTIONS_INIT.
449  *
450  * @param opts The `git_diff_options` struct to initialize.
451  * @param version The struct version; pass `GIT_DIFF_OPTIONS_VERSION`.
452  * @return Zero on success; -1 on failure.
453  */
454 GIT_EXTERN(int) git_diff_options_init(
455 	git_diff_options *opts,
456 	unsigned int version);
457 
458 /**
459  * When iterating over a diff, callback that will be made per file.
460  *
461  * @param delta A pointer to the delta data for the file
462  * @param progress Goes from 0 to 1 over the diff
463  * @param payload User-specified pointer from foreach function
464  */
465 typedef int GIT_CALLBACK(git_diff_file_cb)(
466 	const git_diff_delta *delta,
467 	float progress,
468 	void *payload);
469 
470 #define GIT_DIFF_HUNK_HEADER_SIZE	128
471 
472 /**
473  * When producing a binary diff, the binary data returned will be
474  * either the deflated full ("literal") contents of the file, or
475  * the deflated binary delta between the two sides (whichever is
476  * smaller).
477  */
478 typedef enum {
479 	/** There is no binary delta. */
480 	GIT_DIFF_BINARY_NONE,
481 
482 	/** The binary data is the literal contents of the file. */
483 	GIT_DIFF_BINARY_LITERAL,
484 
485 	/** The binary data is the delta from one side to the other. */
486 	GIT_DIFF_BINARY_DELTA,
487 } git_diff_binary_t;
488 
489 /** The contents of one of the files in a binary diff. */
490 typedef struct {
491 	/** The type of binary data for this file. */
492 	git_diff_binary_t type;
493 
494 	/** The binary data, deflated. */
495 	const char *data;
496 
497 	/** The length of the binary data. */
498 	size_t datalen;
499 
500 	/** The length of the binary data after inflation. */
501 	size_t inflatedlen;
502 } git_diff_binary_file;
503 
504 /**
505  * Structure describing the binary contents of a diff.
506  *
507  * A `binary` file / delta is a file (or pair) for which no text diffs
508  * should be generated. A diff can contain delta entries that are
509  * binary, but no diff content will be output for those files. There is
510  * a base heuristic for binary detection and you can further tune the
511  * behavior with git attributes or diff flags and option settings.
512  */
513 typedef struct {
514 	/**
515 	 * Whether there is data in this binary structure or not.
516 	 *
517 	 * If this is `1`, then this was produced and included binary content.
518 	 * If this is `0` then this was generated knowing only that a binary
519 	 * file changed but without providing the data, probably from a patch
520 	 * that said `Binary files a/file.txt and b/file.txt differ`.
521 	 */
522 	unsigned int contains_data;
523 	git_diff_binary_file old_file; /**< The contents of the old file. */
524 	git_diff_binary_file new_file; /**< The contents of the new file. */
525 } git_diff_binary;
526 
527 /**
528  * When iterating over a diff, callback that will be made for
529  * binary content within the diff.
530  */
531 typedef int GIT_CALLBACK(git_diff_binary_cb)(
532 	const git_diff_delta *delta,
533 	const git_diff_binary *binary,
534 	void *payload);
535 
536 /**
537  * Structure describing a hunk of a diff.
538  *
539  * A `hunk` is a span of modified lines in a delta along with some stable
540  * surrounding context. You can configure the amount of context and other
541  * properties of how hunks are generated. Each hunk also comes with a
542  * header that described where it starts and ends in both the old and new
543  * versions in the delta.
544  */
545 typedef struct {
546 	int    old_start;     /**< Starting line number in old_file */
547 	int    old_lines;     /**< Number of lines in old_file */
548 	int    new_start;     /**< Starting line number in new_file */
549 	int    new_lines;     /**< Number of lines in new_file */
550 	size_t header_len;    /**< Number of bytes in header text */
551 	char   header[GIT_DIFF_HUNK_HEADER_SIZE];   /**< Header text, NUL-byte terminated */
552 } git_diff_hunk;
553 
554 /**
555  * When iterating over a diff, callback that will be made per hunk.
556  */
557 typedef int GIT_CALLBACK(git_diff_hunk_cb)(
558 	const git_diff_delta *delta,
559 	const git_diff_hunk *hunk,
560 	void *payload);
561 
562 /**
563  * Line origin constants.
564  *
565  * These values describe where a line came from and will be passed to
566  * the git_diff_line_cb when iterating over a diff.  There are some
567  * special origin constants at the end that are used for the text
568  * output callbacks to demarcate lines that are actually part of
569  * the file or hunk headers.
570  */
571 typedef enum {
572 	/* These values will be sent to `git_diff_line_cb` along with the line */
573 	GIT_DIFF_LINE_CONTEXT   = ' ',
574 	GIT_DIFF_LINE_ADDITION  = '+',
575 	GIT_DIFF_LINE_DELETION  = '-',
576 
577 	GIT_DIFF_LINE_CONTEXT_EOFNL = '=', /**< Both files have no LF at end */
578 	GIT_DIFF_LINE_ADD_EOFNL = '>',     /**< Old has no LF at end, new does */
579 	GIT_DIFF_LINE_DEL_EOFNL = '<',     /**< Old has LF at end, new does not */
580 
581 	/* The following values will only be sent to a `git_diff_line_cb` when
582 	 * the content of a diff is being formatted through `git_diff_print`.
583 	 */
584 	GIT_DIFF_LINE_FILE_HDR  = 'F',
585 	GIT_DIFF_LINE_HUNK_HDR  = 'H',
586 	GIT_DIFF_LINE_BINARY    = 'B' /**< For "Binary files x and y differ" */
587 } git_diff_line_t;
588 
589 /**
590  * Structure describing a line (or data span) of a diff.
591  *
592  * A `line` is a range of characters inside a hunk.  It could be a context
593  * line (i.e. in both old and new versions), an added line (i.e. only in
594  * the new version), or a removed line (i.e. only in the old version).
595  * Unfortunately, we don't know anything about the encoding of data in the
596  * file being diffed, so we cannot tell you much about the line content.
597  * Line data will not be NUL-byte terminated, however, because it will be
598  * just a span of bytes inside the larger file.
599  */
600 typedef struct {
601 	char   origin;       /**< A git_diff_line_t value */
602 	int    old_lineno;   /**< Line number in old file or -1 for added line */
603 	int    new_lineno;   /**< Line number in new file or -1 for deleted line */
604 	int    num_lines;    /**< Number of newline characters in content */
605 	size_t content_len;  /**< Number of bytes of data */
606 	git_off_t content_offset; /**< Offset in the original file to the content */
607 	const char *content; /**< Pointer to diff text, not NUL-byte terminated */
608 } git_diff_line;
609 
610 /**
611  * When iterating over a diff, callback that will be made per text diff
612  * line. In this context, the provided range will be NULL.
613  *
614  * When printing a diff, callback that will be made to output each line
615  * of text.  This uses some extra GIT_DIFF_LINE_... constants for output
616  * of lines of file and hunk headers.
617  */
618 typedef int GIT_CALLBACK(git_diff_line_cb)(
619 	const git_diff_delta *delta, /**< delta that contains this data */
620 	const git_diff_hunk *hunk,   /**< hunk containing this data */
621 	const git_diff_line *line,   /**< line data */
622 	void *payload);              /**< user reference data */
623 
624 /**
625  * Flags to control the behavior of diff rename/copy detection.
626  */
627 typedef enum {
628 	/** Obey `diff.renames`. Overridden by any other GIT_DIFF_FIND_... flag. */
629 	GIT_DIFF_FIND_BY_CONFIG = 0,
630 
631 	/** Look for renames? (`--find-renames`) */
632 	GIT_DIFF_FIND_RENAMES = (1u << 0),
633 
634 	/** Consider old side of MODIFIED for renames? (`--break-rewrites=N`) */
635 	GIT_DIFF_FIND_RENAMES_FROM_REWRITES = (1u << 1),
636 
637 	/** Look for copies? (a la `--find-copies`). */
638 	GIT_DIFF_FIND_COPIES = (1u << 2),
639 
640 	/** Consider UNMODIFIED as copy sources? (`--find-copies-harder`).
641 	 *
642 	 * For this to work correctly, use GIT_DIFF_INCLUDE_UNMODIFIED when
643 	 * the initial `git_diff` is being generated.
644 	 */
645 	GIT_DIFF_FIND_COPIES_FROM_UNMODIFIED = (1u << 3),
646 
647 	/** Mark significant rewrites for split (`--break-rewrites=/M`) */
648 	GIT_DIFF_FIND_REWRITES = (1u << 4),
649 	/** Actually split large rewrites into delete/add pairs */
650 	GIT_DIFF_BREAK_REWRITES = (1u << 5),
651 	/** Mark rewrites for split and break into delete/add pairs */
652 	GIT_DIFF_FIND_AND_BREAK_REWRITES =
653 		(GIT_DIFF_FIND_REWRITES | GIT_DIFF_BREAK_REWRITES),
654 
655 	/** Find renames/copies for UNTRACKED items in working directory.
656 	 *
657 	 * For this to work correctly, use GIT_DIFF_INCLUDE_UNTRACKED when the
658 	 * initial `git_diff` is being generated (and obviously the diff must
659 	 * be against the working directory for this to make sense).
660 	 */
661 	GIT_DIFF_FIND_FOR_UNTRACKED = (1u << 6),
662 
663 	/** Turn on all finding features. */
664 	GIT_DIFF_FIND_ALL = (0x0ff),
665 
666 	/** Measure similarity ignoring leading whitespace (default) */
667 	GIT_DIFF_FIND_IGNORE_LEADING_WHITESPACE = 0,
668 	/** Measure similarity ignoring all whitespace */
669 	GIT_DIFF_FIND_IGNORE_WHITESPACE = (1u << 12),
670 	/** Measure similarity including all data */
671 	GIT_DIFF_FIND_DONT_IGNORE_WHITESPACE = (1u << 13),
672 	/** Measure similarity only by comparing SHAs (fast and cheap) */
673 	GIT_DIFF_FIND_EXACT_MATCH_ONLY = (1u << 14),
674 
675 	/** Do not break rewrites unless they contribute to a rename.
676 	 *
677 	 * Normally, GIT_DIFF_FIND_AND_BREAK_REWRITES will measure the self-
678 	 * similarity of modified files and split the ones that have changed a
679 	 * lot into a DELETE / ADD pair.  Then the sides of that pair will be
680 	 * considered candidates for rename and copy detection.
681 	 *
682 	 * If you add this flag in and the split pair is *not* used for an
683 	 * actual rename or copy, then the modified record will be restored to
684 	 * a regular MODIFIED record instead of being split.
685 	 */
686 	GIT_DIFF_BREAK_REWRITES_FOR_RENAMES_ONLY  = (1u << 15),
687 
688 	/** Remove any UNMODIFIED deltas after find_similar is done.
689 	 *
690 	 * Using GIT_DIFF_FIND_COPIES_FROM_UNMODIFIED to emulate the
691 	 * --find-copies-harder behavior requires building a diff with the
692 	 * GIT_DIFF_INCLUDE_UNMODIFIED flag.  If you do not want UNMODIFIED
693 	 * records in the final result, pass this flag to have them removed.
694 	 */
695 	GIT_DIFF_FIND_REMOVE_UNMODIFIED = (1u << 16),
696 } git_diff_find_t;
697 
698 /**
699  * Pluggable similarity metric
700  */
701 typedef struct {
702 	int GIT_CALLBACK(file_signature)(
703 		void **out, const git_diff_file *file,
704 		const char *fullpath, void *payload);
705 	int GIT_CALLBACK(buffer_signature)(
706 		void **out, const git_diff_file *file,
707 		const char *buf, size_t buflen, void *payload);
708 	void GIT_CALLBACK(free_signature)(void *sig, void *payload);
709 	int GIT_CALLBACK(similarity)(int *score, void *siga, void *sigb, void *payload);
710 	void *payload;
711 } git_diff_similarity_metric;
712 
713 /**
714  * Control behavior of rename and copy detection
715  *
716  * These options mostly mimic parameters that can be passed to git-diff.
717  */
718 typedef struct {
719 	unsigned int version;
720 
721 	/**
722 	 * Combination of git_diff_find_t values (default GIT_DIFF_FIND_BY_CONFIG).
723 	 * NOTE: if you don't explicitly set this, `diff.renames` could be set
724 	 * to false, resulting in `git_diff_find_similar` doing nothing.
725 	 */
726 	uint32_t flags;
727 
728 	/**
729 	 * Threshold above which similar files will be considered renames.
730 	 * This is equivalent to the -M option. Defaults to 50.
731 	 */
732 	uint16_t rename_threshold;
733 
734 	/**
735 	 * Threshold below which similar files will be eligible to be a rename source.
736 	 * This is equivalent to the first part of the -B option. Defaults to 50.
737 	 */
738 	uint16_t rename_from_rewrite_threshold;
739 
740 	/**
741 	 * Threshold above which similar files will be considered copies.
742 	 * This is equivalent to the -C option. Defaults to 50.
743 	 */
744 	uint16_t copy_threshold;
745 
746 	/**
747 	 * Treshold below which similar files will be split into a delete/add pair.
748 	 * This is equivalent to the last part of the -B option. Defaults to 60.
749 	 */
750 	uint16_t break_rewrite_threshold;
751 
752 	/**
753 	 * Maximum number of matches to consider for a particular file.
754 	 *
755 	 * This is a little different from the `-l` option from Git because we
756 	 * will still process up to this many matches before abandoning the search.
757 	 * Defaults to 200.
758 	 */
759 	size_t rename_limit;
760 
761 	/**
762 	 * The `metric` option allows you to plug in a custom similarity metric.
763 	 *
764 	 * Set it to NULL to use the default internal metric.
765 	 *
766 	 * The default metric is based on sampling hashes of ranges of data in
767 	 * the file, which is a pretty good similarity approximation that should
768 	 * work fairly well for both text and binary data while still being
769 	 * pretty fast with a fixed memory overhead.
770 	 */
771 	git_diff_similarity_metric *metric;
772 } git_diff_find_options;
773 
774 #define GIT_DIFF_FIND_OPTIONS_VERSION 1
775 #define GIT_DIFF_FIND_OPTIONS_INIT {GIT_DIFF_FIND_OPTIONS_VERSION}
776 
777 /**
778  * Initialize git_diff_find_options structure
779  *
780  * Initializes a `git_diff_find_options` with default values. Equivalent to creating
781  * an instance with GIT_DIFF_FIND_OPTIONS_INIT.
782  *
783  * @param opts The `git_diff_find_options` struct to initialize.
784  * @param version The struct version; pass `GIT_DIFF_FIND_OPTIONS_VERSION`.
785  * @return Zero on success; -1 on failure.
786  */
787 GIT_EXTERN(int) git_diff_find_options_init(
788 	git_diff_find_options *opts,
789 	unsigned int version);
790 
791 /** @name Diff Generator Functions
792  *
793  * These are the functions you would use to create (or destroy) a
794  * git_diff from various objects in a repository.
795  */
796 /**@{*/
797 
798 /**
799  * Deallocate a diff.
800  *
801  * @param diff The previously created diff; cannot be used after free.
802  */
803 GIT_EXTERN(void) git_diff_free(git_diff *diff);
804 
805 /**
806  * Create a diff with the difference between two tree objects.
807  *
808  * This is equivalent to `git diff <old-tree> <new-tree>`
809  *
810  * The first tree will be used for the "old_file" side of the delta and the
811  * second tree will be used for the "new_file" side of the delta.  You can
812  * pass NULL to indicate an empty tree, although it is an error to pass
813  * NULL for both the `old_tree` and `new_tree`.
814  *
815  * @param diff Output pointer to a git_diff pointer to be allocated.
816  * @param repo The repository containing the trees.
817  * @param old_tree A git_tree object to diff from, or NULL for empty tree.
818  * @param new_tree A git_tree object to diff to, or NULL for empty tree.
819  * @param opts Structure with options to influence diff or NULL for defaults.
820  */
821 GIT_EXTERN(int) git_diff_tree_to_tree(
822 	git_diff **diff,
823 	git_repository *repo,
824 	git_tree *old_tree,
825 	git_tree *new_tree,
826 	const git_diff_options *opts);
827 
828 /**
829  * Create a diff between a tree and repository index.
830  *
831  * This is equivalent to `git diff --cached <treeish>` or if you pass
832  * the HEAD tree, then like `git diff --cached`.
833  *
834  * The tree you pass will be used for the "old_file" side of the delta, and
835  * the index will be used for the "new_file" side of the delta.
836  *
837  * If you pass NULL for the index, then the existing index of the `repo`
838  * will be used.  In this case, the index will be refreshed from disk
839  * (if it has changed) before the diff is generated.
840  *
841  * @param diff Output pointer to a git_diff pointer to be allocated.
842  * @param repo The repository containing the tree and index.
843  * @param old_tree A git_tree object to diff from, or NULL for empty tree.
844  * @param index The index to diff with; repo index used if NULL.
845  * @param opts Structure with options to influence diff or NULL for defaults.
846  */
847 GIT_EXTERN(int) git_diff_tree_to_index(
848 	git_diff **diff,
849 	git_repository *repo,
850 	git_tree *old_tree,
851 	git_index *index,
852 	const git_diff_options *opts);
853 
854 /**
855  * Create a diff between the repository index and the workdir directory.
856  *
857  * This matches the `git diff` command.  See the note below on
858  * `git_diff_tree_to_workdir` for a discussion of the difference between
859  * `git diff` and `git diff HEAD` and how to emulate a `git diff <treeish>`
860  * using libgit2.
861  *
862  * The index will be used for the "old_file" side of the delta, and the
863  * working directory will be used for the "new_file" side of the delta.
864  *
865  * If you pass NULL for the index, then the existing index of the `repo`
866  * will be used.  In this case, the index will be refreshed from disk
867  * (if it has changed) before the diff is generated.
868  *
869  * @param diff Output pointer to a git_diff pointer to be allocated.
870  * @param repo The repository.
871  * @param index The index to diff from; repo index used if NULL.
872  * @param opts Structure with options to influence diff or NULL for defaults.
873  */
874 GIT_EXTERN(int) git_diff_index_to_workdir(
875 	git_diff **diff,
876 	git_repository *repo,
877 	git_index *index,
878 	const git_diff_options *opts);
879 
880 /**
881  * Create a diff between a tree and the working directory.
882  *
883  * The tree you provide will be used for the "old_file" side of the delta,
884  * and the working directory will be used for the "new_file" side.
885  *
886  * This is not the same as `git diff <treeish>` or `git diff-index
887  * <treeish>`.  Those commands use information from the index, whereas this
888  * function strictly returns the differences between the tree and the files
889  * in the working directory, regardless of the state of the index.  Use
890  * `git_diff_tree_to_workdir_with_index` to emulate those commands.
891  *
892  * To see difference between this and `git_diff_tree_to_workdir_with_index`,
893  * consider the example of a staged file deletion where the file has then
894  * been put back into the working dir and further modified.  The
895  * tree-to-workdir diff for that file is 'modified', but `git diff` would
896  * show status 'deleted' since there is a staged delete.
897  *
898  * @param diff A pointer to a git_diff pointer that will be allocated.
899  * @param repo The repository containing the tree.
900  * @param old_tree A git_tree object to diff from, or NULL for empty tree.
901  * @param opts Structure with options to influence diff or NULL for defaults.
902  */
903 GIT_EXTERN(int) git_diff_tree_to_workdir(
904 	git_diff **diff,
905 	git_repository *repo,
906 	git_tree *old_tree,
907 	const git_diff_options *opts);
908 
909 /**
910  * Create a diff between a tree and the working directory using index data
911  * to account for staged deletes, tracked files, etc.
912  *
913  * This emulates `git diff <tree>` by diffing the tree to the index and
914  * the index to the working directory and blending the results into a
915  * single diff that includes staged deleted, etc.
916  *
917  * @param diff A pointer to a git_diff pointer that will be allocated.
918  * @param repo The repository containing the tree.
919  * @param old_tree A git_tree object to diff from, or NULL for empty tree.
920  * @param opts Structure with options to influence diff or NULL for defaults.
921  */
922 GIT_EXTERN(int) git_diff_tree_to_workdir_with_index(
923 	git_diff **diff,
924 	git_repository *repo,
925 	git_tree *old_tree,
926 	const git_diff_options *opts);
927 
928 /**
929  * Create a diff with the difference between two index objects.
930  *
931  * The first index will be used for the "old_file" side of the delta and the
932  * second index will be used for the "new_file" side of the delta.
933  *
934  * @param diff Output pointer to a git_diff pointer to be allocated.
935  * @param repo The repository containing the indexes.
936  * @param old_index A git_index object to diff from.
937  * @param new_index A git_index object to diff to.
938  * @param opts Structure with options to influence diff or NULL for defaults.
939  */
940 GIT_EXTERN(int) git_diff_index_to_index(
941 	git_diff **diff,
942 	git_repository *repo,
943 	git_index *old_index,
944 	git_index *new_index,
945 	const git_diff_options *opts);
946 
947 /**
948  * Merge one diff into another.
949  *
950  * This merges items from the "from" list into the "onto" list.  The
951  * resulting diff will have all items that appear in either list.
952  * If an item appears in both lists, then it will be "merged" to appear
953  * as if the old version was from the "onto" list and the new version
954  * is from the "from" list (with the exception that if the item has a
955  * pending DELETE in the middle, then it will show as deleted).
956  *
957  * @param onto Diff to merge into.
958  * @param from Diff to merge.
959  */
960 GIT_EXTERN(int) git_diff_merge(
961 	git_diff *onto,
962 	const git_diff *from);
963 
964 /**
965  * Transform a diff marking file renames, copies, etc.
966  *
967  * This modifies a diff in place, replacing old entries that look
968  * like renames or copies with new entries reflecting those changes.
969  * This also will, if requested, break modified files into add/remove
970  * pairs if the amount of change is above a threshold.
971  *
972  * @param diff diff to run detection algorithms on
973  * @param options Control how detection should be run, NULL for defaults
974  * @return 0 on success, -1 on failure
975  */
976 GIT_EXTERN(int) git_diff_find_similar(
977 	git_diff *diff,
978 	const git_diff_find_options *options);
979 
980 /**@}*/
981 
982 
983 /** @name Diff Processor Functions
984  *
985  * These are the functions you apply to a diff to process it
986  * or read it in some way.
987  */
988 /**@{*/
989 
990 /**
991  * Query how many diff records are there in a diff.
992  *
993  * @param diff A git_diff generated by one of the above functions
994  * @return Count of number of deltas in the list
995  */
996 GIT_EXTERN(size_t) git_diff_num_deltas(const git_diff *diff);
997 
998 /**
999  * Query how many diff deltas are there in a diff filtered by type.
1000  *
1001  * This works just like `git_diff_num_deltas()` with an extra parameter
1002  * that is a `git_delta_t` and returns just the count of how many deltas
1003  * match that particular type.
1004  *
1005  * @param diff A git_diff generated by one of the above functions
1006  * @param type A git_delta_t value to filter the count
1007  * @return Count of number of deltas matching delta_t type
1008  */
1009 GIT_EXTERN(size_t) git_diff_num_deltas_of_type(
1010 	const git_diff *diff, git_delta_t type);
1011 
1012 /**
1013  * Return the diff delta for an entry in the diff list.
1014  *
1015  * The `git_diff_delta` pointer points to internal data and you do not
1016  * have to release it when you are done with it.  It will go away when
1017  * the * `git_diff` (or any associated `git_patch`) goes away.
1018  *
1019  * Note that the flags on the delta related to whether it has binary
1020  * content or not may not be set if there are no attributes set for the
1021  * file and there has been no reason to load the file data at this point.
1022  * For now, if you need those flags to be up to date, your only option is
1023  * to either use `git_diff_foreach` or create a `git_patch`.
1024  *
1025  * @param diff Diff list object
1026  * @param idx Index into diff list
1027  * @return Pointer to git_diff_delta (or NULL if `idx` out of range)
1028  */
1029 GIT_EXTERN(const git_diff_delta *) git_diff_get_delta(
1030 	const git_diff *diff, size_t idx);
1031 
1032 /**
1033  * Check if deltas are sorted case sensitively or insensitively.
1034  *
1035  * @param diff diff to check
1036  * @return 0 if case sensitive, 1 if case is ignored
1037  */
1038 GIT_EXTERN(int) git_diff_is_sorted_icase(const git_diff *diff);
1039 
1040 /**
1041  * Loop over all deltas in a diff issuing callbacks.
1042  *
1043  * This will iterate through all of the files described in a diff.  You
1044  * should provide a file callback to learn about each file.
1045  *
1046  * The "hunk" and "line" callbacks are optional, and the text diff of the
1047  * files will only be calculated if they are not NULL.  Of course, these
1048  * callbacks will not be invoked for binary files on the diff or for
1049  * files whose only changed is a file mode change.
1050  *
1051  * Returning a non-zero value from any of the callbacks will terminate
1052  * the iteration and return the value to the user.
1053  *
1054  * @param diff A git_diff generated by one of the above functions.
1055  * @param file_cb Callback function to make per file in the diff.
1056  * @param binary_cb Optional callback to make for binary files.
1057  * @param hunk_cb Optional callback to make per hunk of text diff.  This
1058  *                callback is called to describe a range of lines in the
1059  *                diff.  It will not be issued for binary files.
1060  * @param line_cb Optional callback to make per line of diff text.  This
1061  *                same callback will be made for context lines, added, and
1062  *                removed lines, and even for a deleted trailing newline.
1063  * @param payload Reference pointer that will be passed to your callbacks.
1064  * @return 0 on success, non-zero callback return value, or error code
1065  */
1066 GIT_EXTERN(int) git_diff_foreach(
1067 	git_diff *diff,
1068 	git_diff_file_cb file_cb,
1069 	git_diff_binary_cb binary_cb,
1070 	git_diff_hunk_cb hunk_cb,
1071 	git_diff_line_cb line_cb,
1072 	void *payload);
1073 
1074 /**
1075  * Look up the single character abbreviation for a delta status code.
1076  *
1077  * When you run `git diff --name-status` it uses single letter codes in
1078  * the output such as 'A' for added, 'D' for deleted, 'M' for modified,
1079  * etc.  This function converts a git_delta_t value into these letters for
1080  * your own purposes.  GIT_DELTA_UNTRACKED will return a space (i.e. ' ').
1081  *
1082  * @param status The git_delta_t value to look up
1083  * @return The single character label for that code
1084  */
1085 GIT_EXTERN(char) git_diff_status_char(git_delta_t status);
1086 
1087 /**
1088  * Possible output formats for diff data
1089  */
1090 typedef enum {
1091 	GIT_DIFF_FORMAT_PATCH        = 1u, /**< full git diff */
1092 	GIT_DIFF_FORMAT_PATCH_HEADER = 2u, /**< just the file headers of patch */
1093 	GIT_DIFF_FORMAT_RAW          = 3u, /**< like git diff --raw */
1094 	GIT_DIFF_FORMAT_NAME_ONLY    = 4u, /**< like git diff --name-only */
1095 	GIT_DIFF_FORMAT_NAME_STATUS  = 5u, /**< like git diff --name-status */
1096 	GIT_DIFF_FORMAT_PATCH_ID     = 6u, /**< git diff as used by git patch-id */
1097 } git_diff_format_t;
1098 
1099 /**
1100  * Iterate over a diff generating formatted text output.
1101  *
1102  * Returning a non-zero value from the callbacks will terminate the
1103  * iteration and return the non-zero value to the caller.
1104  *
1105  * @param diff A git_diff generated by one of the above functions.
1106  * @param format A git_diff_format_t value to pick the text format.
1107  * @param print_cb Callback to make per line of diff text.
1108  * @param payload Reference pointer that will be passed to your callback.
1109  * @return 0 on success, non-zero callback return value, or error code
1110  */
1111 GIT_EXTERN(int) git_diff_print(
1112 	git_diff *diff,
1113 	git_diff_format_t format,
1114 	git_diff_line_cb print_cb,
1115 	void *payload);
1116 
1117 /**
1118  * Produce the complete formatted text output from a diff into a
1119  * buffer.
1120  *
1121  * @param out A pointer to a user-allocated git_buf that will
1122  *            contain the diff text
1123  * @param diff A git_diff generated by one of the above functions.
1124  * @param format A git_diff_format_t value to pick the text format.
1125  * @return 0 on success or error code
1126  */
1127 GIT_EXTERN(int) git_diff_to_buf(
1128 	git_buf *out,
1129 	git_diff *diff,
1130 	git_diff_format_t format);
1131 
1132 /**@}*/
1133 
1134 
1135 /*
1136  * Misc
1137  */
1138 
1139 /**
1140  * Directly run a diff on two blobs.
1141  *
1142  * Compared to a file, a blob lacks some contextual information. As such,
1143  * the `git_diff_file` given to the callback will have some fake data; i.e.
1144  * `mode` will be 0 and `path` will be NULL.
1145  *
1146  * NULL is allowed for either `old_blob` or `new_blob` and will be treated
1147  * as an empty blob, with the `oid` set to NULL in the `git_diff_file` data.
1148  * Passing NULL for both blobs is a noop; no callbacks will be made at all.
1149  *
1150  * We do run a binary content check on the blob content and if either blob
1151  * looks like binary data, the `git_diff_delta` binary attribute will be set
1152  * to 1 and no call to the hunk_cb nor line_cb will be made (unless you pass
1153  * `GIT_DIFF_FORCE_TEXT` of course).
1154  *
1155  * @param old_blob Blob for old side of diff, or NULL for empty blob
1156  * @param old_as_path Treat old blob as if it had this filename; can be NULL
1157  * @param new_blob Blob for new side of diff, or NULL for empty blob
1158  * @param new_as_path Treat new blob as if it had this filename; can be NULL
1159  * @param options Options for diff, or NULL for default options
1160  * @param file_cb Callback for "file"; made once if there is a diff; can be NULL
1161  * @param binary_cb Callback for binary files; can be NULL
1162  * @param hunk_cb Callback for each hunk in diff; can be NULL
1163  * @param line_cb Callback for each line in diff; can be NULL
1164  * @param payload Payload passed to each callback function
1165  * @return 0 on success, non-zero callback return value, or error code
1166  */
1167 GIT_EXTERN(int) git_diff_blobs(
1168 	const git_blob *old_blob,
1169 	const char *old_as_path,
1170 	const git_blob *new_blob,
1171 	const char *new_as_path,
1172 	const git_diff_options *options,
1173 	git_diff_file_cb file_cb,
1174 	git_diff_binary_cb binary_cb,
1175 	git_diff_hunk_cb hunk_cb,
1176 	git_diff_line_cb line_cb,
1177 	void *payload);
1178 
1179 /**
1180  * Directly run a diff between a blob and a buffer.
1181  *
1182  * As with `git_diff_blobs`, comparing a blob and buffer lacks some context,
1183  * so the `git_diff_file` parameters to the callbacks will be faked a la the
1184  * rules for `git_diff_blobs()`.
1185  *
1186  * Passing NULL for `old_blob` will be treated as an empty blob (i.e. the
1187  * `file_cb` will be invoked with GIT_DELTA_ADDED and the diff will be the
1188  * entire content of the buffer added).  Passing NULL to the buffer will do
1189  * the reverse, with GIT_DELTA_REMOVED and blob content removed.
1190  *
1191  * @param old_blob Blob for old side of diff, or NULL for empty blob
1192  * @param old_as_path Treat old blob as if it had this filename; can be NULL
1193  * @param buffer Raw data for new side of diff, or NULL for empty
1194  * @param buffer_len Length of raw data for new side of diff
1195  * @param buffer_as_path Treat buffer as if it had this filename; can be NULL
1196  * @param options Options for diff, or NULL for default options
1197  * @param file_cb Callback for "file"; made once if there is a diff; can be NULL
1198  * @param binary_cb Callback for binary files; can be NULL
1199  * @param hunk_cb Callback for each hunk in diff; can be NULL
1200  * @param line_cb Callback for each line in diff; can be NULL
1201  * @param payload Payload passed to each callback function
1202  * @return 0 on success, non-zero callback return value, or error code
1203  */
1204 GIT_EXTERN(int) git_diff_blob_to_buffer(
1205 	const git_blob *old_blob,
1206 	const char *old_as_path,
1207 	const char *buffer,
1208 	size_t buffer_len,
1209 	const char *buffer_as_path,
1210 	const git_diff_options *options,
1211 	git_diff_file_cb file_cb,
1212 	git_diff_binary_cb binary_cb,
1213 	git_diff_hunk_cb hunk_cb,
1214 	git_diff_line_cb line_cb,
1215 	void *payload);
1216 
1217 /**
1218  * Directly run a diff between two buffers.
1219  *
1220  * Even more than with `git_diff_blobs`, comparing two buffer lacks
1221  * context, so the `git_diff_file` parameters to the callbacks will be
1222  * faked a la the rules for `git_diff_blobs()`.
1223  *
1224  * @param old_buffer Raw data for old side of diff, or NULL for empty
1225  * @param old_len Length of the raw data for old side of the diff
1226  * @param old_as_path Treat old buffer as if it had this filename; can be NULL
1227  * @param new_buffer Raw data for new side of diff, or NULL for empty
1228  * @param new_len Length of raw data for new side of diff
1229  * @param new_as_path Treat buffer as if it had this filename; can be NULL
1230  * @param options Options for diff, or NULL for default options
1231  * @param file_cb Callback for "file"; made once if there is a diff; can be NULL
1232  * @param binary_cb Callback for binary files; can be NULL
1233  * @param hunk_cb Callback for each hunk in diff; can be NULL
1234  * @param line_cb Callback for each line in diff; can be NULL
1235  * @param payload Payload passed to each callback function
1236  * @return 0 on success, non-zero callback return value, or error code
1237  */
1238 GIT_EXTERN(int) git_diff_buffers(
1239 	const void *old_buffer,
1240 	size_t old_len,
1241 	const char *old_as_path,
1242 	const void *new_buffer,
1243 	size_t new_len,
1244 	const char *new_as_path,
1245 	const git_diff_options *options,
1246 	git_diff_file_cb file_cb,
1247 	git_diff_binary_cb binary_cb,
1248 	git_diff_hunk_cb hunk_cb,
1249 	git_diff_line_cb line_cb,
1250 	void *payload);
1251 
1252 /**
1253  * Read the contents of a git patch file into a `git_diff` object.
1254  *
1255  * The diff object produced is similar to the one that would be
1256  * produced if you actually produced it computationally by comparing
1257  * two trees, however there may be subtle differences.  For example,
1258  * a patch file likely contains abbreviated object IDs, so the
1259  * object IDs in a `git_diff_delta` produced by this function will
1260  * also be abbreviated.
1261  *
1262  * This function will only read patch files created by a git
1263  * implementation, it will not read unified diffs produced by
1264  * the `diff` program, nor any other types of patch files.
1265  *
1266  * @param out A pointer to a git_diff pointer that will be allocated.
1267  * @param content The contents of a patch file
1268  * @param content_len The length of the patch file contents
1269  * @return 0 or an error code
1270  */
1271 GIT_EXTERN(int) git_diff_from_buffer(
1272 	git_diff **out,
1273 	const char *content,
1274 	size_t content_len);
1275 
1276 /**
1277  * This is an opaque structure which is allocated by `git_diff_get_stats`.
1278  * You are responsible for releasing the object memory when done, using the
1279  * `git_diff_stats_free()` function.
1280  */
1281 typedef struct git_diff_stats git_diff_stats;
1282 
1283 /**
1284  * Formatting options for diff stats
1285  */
1286 typedef enum {
1287 	/** No stats*/
1288 	GIT_DIFF_STATS_NONE = 0,
1289 
1290 	/** Full statistics, equivalent of `--stat` */
1291 	GIT_DIFF_STATS_FULL = (1u << 0),
1292 
1293 	/** Short statistics, equivalent of `--shortstat` */
1294 	GIT_DIFF_STATS_SHORT = (1u << 1),
1295 
1296 	/** Number statistics, equivalent of `--numstat` */
1297 	GIT_DIFF_STATS_NUMBER = (1u << 2),
1298 
1299 	/** Extended header information such as creations, renames and mode changes, equivalent of `--summary` */
1300 	GIT_DIFF_STATS_INCLUDE_SUMMARY = (1u << 3),
1301 } git_diff_stats_format_t;
1302 
1303 /**
1304  * Accumulate diff statistics for all patches.
1305  *
1306  * @param out Structure containg the diff statistics.
1307  * @param diff A git_diff generated by one of the above functions.
1308  * @return 0 on success; non-zero on error
1309  */
1310 GIT_EXTERN(int) git_diff_get_stats(
1311 	git_diff_stats **out,
1312 	git_diff *diff);
1313 
1314 /**
1315  * Get the total number of files changed in a diff
1316  *
1317  * @param stats A `git_diff_stats` generated by one of the above functions.
1318  * @return total number of files changed in the diff
1319  */
1320 GIT_EXTERN(size_t) git_diff_stats_files_changed(
1321 	const git_diff_stats *stats);
1322 
1323 /**
1324  * Get the total number of insertions in a diff
1325  *
1326  * @param stats A `git_diff_stats` generated by one of the above functions.
1327  * @return total number of insertions in the diff
1328  */
1329 GIT_EXTERN(size_t) git_diff_stats_insertions(
1330 	const git_diff_stats *stats);
1331 
1332 /**
1333  * Get the total number of deletions in a diff
1334  *
1335  * @param stats A `git_diff_stats` generated by one of the above functions.
1336  * @return total number of deletions in the diff
1337  */
1338 GIT_EXTERN(size_t) git_diff_stats_deletions(
1339 	const git_diff_stats *stats);
1340 
1341 /**
1342  * Print diff statistics to a `git_buf`.
1343  *
1344  * @param out buffer to store the formatted diff statistics in.
1345  * @param stats A `git_diff_stats` generated by one of the above functions.
1346  * @param format Formatting option.
1347  * @param width Target width for output (only affects GIT_DIFF_STATS_FULL)
1348  * @return 0 on success; non-zero on error
1349  */
1350 GIT_EXTERN(int) git_diff_stats_to_buf(
1351 	git_buf *out,
1352 	const git_diff_stats *stats,
1353 	git_diff_stats_format_t format,
1354 	size_t width);
1355 
1356 /**
1357  * Deallocate a `git_diff_stats`.
1358  *
1359  * @param stats The previously created statistics object;
1360  * cannot be used after free.
1361  */
1362 GIT_EXTERN(void) git_diff_stats_free(git_diff_stats *stats);
1363 
1364 /**
1365  * Formatting options for diff e-mail generation
1366  */
1367 typedef enum {
1368 	/** Normal patch, the default */
1369 	GIT_DIFF_FORMAT_EMAIL_NONE = 0,
1370 
1371 	/** Don't insert "[PATCH]" in the subject header*/
1372 	GIT_DIFF_FORMAT_EMAIL_EXCLUDE_SUBJECT_PATCH_MARKER = (1 << 0),
1373 
1374 } git_diff_format_email_flags_t;
1375 
1376 /**
1377  * Options for controlling the formatting of the generated e-mail.
1378  */
1379 typedef struct {
1380 	unsigned int version;
1381 
1382 	/** see `git_diff_format_email_flags_t` above */
1383 	uint32_t flags;
1384 
1385 	/** This patch number */
1386 	size_t patch_no;
1387 
1388 	/** Total number of patches in this series */
1389 	size_t total_patches;
1390 
1391 	/** id to use for the commit */
1392 	const git_oid *id;
1393 
1394 	/** Summary of the change */
1395 	const char *summary;
1396 
1397 	/** Commit message's body */
1398 	const char *body;
1399 
1400 	/** Author of the change */
1401 	const git_signature *author;
1402 } git_diff_format_email_options;
1403 
1404 #define GIT_DIFF_FORMAT_EMAIL_OPTIONS_VERSION 1
1405 #define GIT_DIFF_FORMAT_EMAIL_OPTIONS_INIT {GIT_DIFF_FORMAT_EMAIL_OPTIONS_VERSION, 0, 1, 1, NULL, NULL, NULL, NULL}
1406 
1407 /**
1408  * Create an e-mail ready patch from a diff.
1409  *
1410  * @param out buffer to store the e-mail patch in
1411  * @param diff containing the commit
1412  * @param opts structure with options to influence content and formatting.
1413  * @return 0 or an error code
1414  */
1415 GIT_EXTERN(int) git_diff_format_email(
1416 	git_buf *out,
1417 	git_diff *diff,
1418 	const git_diff_format_email_options *opts);
1419 
1420 /**
1421  * Create an e-mail ready patch for a commit.
1422  *
1423  * Does not support creating patches for merge commits (yet).
1424  *
1425  * @param out buffer to store the e-mail patch in
1426  * @param repo containing the commit
1427  * @param commit pointer to up commit
1428  * @param patch_no patch number of the commit
1429  * @param total_patches total number of patches in the patch set
1430  * @param flags determines the formatting of the e-mail
1431  * @param diff_opts structure with options to influence diff or NULL for defaults.
1432  * @return 0 or an error code
1433  */
1434 GIT_EXTERN(int) git_diff_commit_as_email(
1435 	git_buf *out,
1436 	git_repository *repo,
1437 	git_commit *commit,
1438 	size_t patch_no,
1439 	size_t total_patches,
1440 	uint32_t flags,
1441 	const git_diff_options *diff_opts);
1442 
1443 /**
1444  * Initialize git_diff_format_email_options structure
1445  *
1446  * Initializes a `git_diff_format_email_options` with default values. Equivalent
1447  * to creating an instance with GIT_DIFF_FORMAT_EMAIL_OPTIONS_INIT.
1448  *
1449  * @param opts The `git_blame_options` struct to initialize.
1450  * @param version The struct version; pass `GIT_DIFF_FORMAT_EMAIL_OPTIONS_VERSION`.
1451  * @return Zero on success; -1 on failure.
1452  */
1453 GIT_EXTERN(int) git_diff_format_email_options_init(
1454 	git_diff_format_email_options *opts,
1455 	unsigned int version);
1456 
1457 /**
1458  * Patch ID options structure
1459  *
1460  * Initialize with `GIT_PATCHID_OPTIONS_INIT`. Alternatively, you can
1461  * use `git_diff_patchid_options_init`.
1462  *
1463  */
1464 typedef struct git_diff_patchid_options {
1465 	unsigned int version;
1466 } git_diff_patchid_options;
1467 
1468 #define GIT_DIFF_PATCHID_OPTIONS_VERSION 1
1469 #define GIT_DIFF_PATCHID_OPTIONS_INIT { GIT_DIFF_PATCHID_OPTIONS_VERSION }
1470 
1471 /**
1472  * Initialize git_diff_patchid_options structure
1473  *
1474  * Initializes a `git_diff_patchid_options` with default values. Equivalent to
1475  * creating an instance with `GIT_DIFF_PATCHID_OPTIONS_INIT`.
1476  *
1477  * @param opts The `git_diff_patchid_options` struct to initialize.
1478  * @param version The struct version; pass `GIT_DIFF_PATCHID_OPTIONS_VERSION`.
1479  * @return Zero on success; -1 on failure.
1480  */
1481 GIT_EXTERN(int) git_diff_patchid_options_init(
1482 	git_diff_patchid_options *opts,
1483 	unsigned int version);
1484 
1485 /**
1486  * Calculate the patch ID for the given patch.
1487  *
1488  * Calculate a stable patch ID for the given patch by summing the
1489  * hash of the file diffs, ignoring whitespace and line numbers.
1490  * This can be used to derive whether two diffs are the same with
1491  * a high probability.
1492  *
1493  * Currently, this function only calculates stable patch IDs, as
1494  * defined in git-patch-id(1), and should in fact generate the
1495  * same IDs as the upstream git project does.
1496  *
1497  * @param out Pointer where the calculated patch ID should be stored
1498  * @param diff The diff to calculate the ID for
1499  * @param opts Options for how to calculate the patch ID. This is
1500  *  intended for future changes, as currently no options are
1501  *  available.
1502  * @return 0 on success, an error code otherwise.
1503  */
1504 GIT_EXTERN(int) git_diff_patchid(git_oid *out, git_diff *diff, git_diff_patchid_options *opts);
1505 
1506 GIT_END_DECL
1507 
1508 /** @} */
1509 
1510 #endif
1511