1 /*
2  Shared definitions for GNU DIFF
3     Modified for KDiff3 by Joachim Eibl <joachim.eibl at gmx.de> 2003, 2004, 2005.
4     The original file was part of GNU DIFF.
5 
6     Part of KDiff3 - Text Diff And Merge Tool
7 
8     SPDX-FileCopyrightText: 1988-2002 Free Software Foundation, Inc.
9     SPDX-FileCopyrightText: 2002-2011 Joachim Eibl, joachim.eibl at gmx.de
10     SPDX-FileCopyrightText: 2018-2020 Michael Reeves reeves.87@gmail.com
11     SPDX-License-Identifier: GPL-2.0-or-later
12 */
13 
14 #ifndef GNUDIFF_DIFF_H
15 #define GNUDIFF_DIFF_H
16 
17 #include "LineRef.h"
18 #include "Utils.h"
19 
20 #include <stdint.h>
21 #include <sys/stat.h>
22 #include <sys/types.h>
23 
24 #include <ctype.h>
25 #include <limits.h>
26 #include <stdlib.h>
27 #include <string.h>
28 #include <type_traits>
29 
30 #include <stdio.h>
31 
32 #include <QString>
33 #include <QtGlobal>
34 
35 /* The integer type of a line number. */
36 typedef qint64 GNULineRef;
37 #define GNULINEREF_MAX std::numeric_limits<GNULineRef>::max()
38 static_assert(std::is_signed<GNULineRef>::value, "GNULineRef must be signed.");
39 static_assert(sizeof(GNULineRef) >= sizeof(size_t), "GNULineRef must be able to receive size_t values.");
40 
41 class GnuDiff
42 {
43   public:
44     /* Variables for command line options */
45 
46     /* Nonzero if output cannot be generated for identical files.  */
47     bool no_diff_means_no_output;
48 
49     /* Number of lines of context to show in each set of diffs.
50    This is zero when context is not to be shown.  */
51     GNULineRef context;
52 
53     /* The significance of white space during comparisons.  */
54     enum
55     {
56         /* All white space is significant (the default).  */
57         IGNORE_NO_WHITE_SPACE,
58 
59         /* Ignore changes due to tab expansion (-E).  */
60         IGNORE_TAB_EXPANSION,
61 
62         /* Ignore changes in horizontal white space (-b).  */
63         IGNORE_SPACE_CHANGE,
64 
65         /* Ignore all horizontal white space (-w).  */
66         IGNORE_ALL_SPACE
67     } ignore_white_space;
68 
69     /* Ignore changes that affect only numbers. (J. Eibl)  */
70     bool bIgnoreNumbers;
71     bool bIgnoreWhiteSpace;
72 
73     /* Files can be compared byte-by-byte, as if they were binary.
74    This depends on various options.  */
75     bool files_can_be_treated_as_binary;
76 
77     /* Ignore differences in case of letters (-i).  */
78     bool ignore_case;
79 
80     /* Use heuristics for better speed with large files with a small
81    density of changes.  */
82     bool speed_large_files;
83 
84     /* Don't discard lines.  This makes things slower (sometimes much
85    slower) but will find a guaranteed minimal set of changes.  */
86     bool minimal;
87 
88     /* The result of comparison is an "edit script": a chain of `struct change'.
89    Each `struct change' represents one place where some lines are deleted
90    and some are inserted.
91 
92    LINE0 and LINE1 are the first affected lines in the two files (origin 0).
93    DELETED is the number of lines deleted here from file 0.
94    INSERTED is the number of lines inserted here in file 1.
95 
96    If DELETED is 0 then LINE0 is the number of the line before
97    which the insertion was done; vice versa for INSERTED and LINE1.  */
98 
99     struct change {
100         change *link; /* Previous or next edit command  */
101         GNULineRef inserted; /* # lines of file 1 changed here.  */
102         GNULineRef deleted;  /* # lines of file 0 changed here.  */
103         GNULineRef line0;    /* Line number of 1st deleted line.  */
104         GNULineRef line1;    /* Line number of 1st inserted line.  */
105         bool ignore;         /* Flag used in context.c.  */
106     };
107 
108     /* Structures that describe the input files.  */
109 
110     /* Data on one input file being compared.  */
111 
112     struct file_data {
113         /* Buffer in which text of file is read.  */
114         const QChar *buffer;
115 
116         /* Allocated size of buffer, in QChars.  Always a multiple of
117        sizeof(*buffer).  */
118         size_t bufsize;
119 
120         /* Number of valid bytes now in the buffer.  */
121         size_t buffered;
122 
123         /* Array of pointers to lines in the file.  */
124         const QChar **linbuf;
125 
126         /* linbuf_base <= buffered_lines <= valid_lines <= alloc_lines.
127        linebuf[linbuf_base ... buffered_lines - 1] are possibly differing.
128        linebuf[linbuf_base ... valid_lines - 1] contain valid data.
129        linebuf[linbuf_base ... alloc_lines - 1] are allocated.  */
130         GNULineRef linbuf_base, buffered_lines, valid_lines, alloc_lines;
131 
132         /* Pointer to end of prefix of this file to ignore when hashing.  */
133         const QChar *prefix_end;
134 
135         /* Count of lines in the prefix.
136        There are this many lines in the file before linbuf[0].  */
137         GNULineRef prefix_lines;
138 
139         /* Pointer to start of suffix of this file to ignore when hashing.  */
140         const QChar *suffix_begin;
141 
142         /* Vector, indexed by line number, containing an equivalence code for
143        each line.  It is this vector that is actually compared with that
144        of another file to generate differences.  */
145         GNULineRef *equivs;
146 
147         /* Vector, like the previous one except that
148        the elements for discarded lines have been squeezed out.  */
149         GNULineRef *undiscarded;
150 
151         /* Vector mapping virtual line numbers (not counting discarded lines)
152        to real ones (counting those lines).  Both are origin-0.  */
153         GNULineRef *realindexes;
154 
155         /* Total number of nondiscarded lines.  */
156         GNULineRef nondiscarded_lines;
157 
158         /* Vector, indexed by real origin-0 line number,
159        containing TRUE for a line that is an insertion or a deletion.
160        The results of comparison are stored here.  */
161         bool *changed;
162 
163         /* 1 if at end of file.  */
164         bool eof;
165 
166         /* 1 more than the maximum equivalence value used for this or its
167        sibling file.  */
168         GNULineRef equiv_max;
169     };
170 
171     /* Data on two input files being compared.  */
172 
173     struct comparison {
174         file_data file[2];
175         comparison const *parent; /* parent, if a recursive comparison */
176     };
177 
178     /* Describe the two files currently being compared.  */
179 
180     file_data files[2];
181 
182     /* Declare various functions.  */
183 
184     /* analyze.c */
185     change *diff_2_files(comparison *);
186     /* io.c */
187     bool read_files(file_data[], bool);
188 
189     /* util.c */
190     bool lines_differ(const QChar *, size_t, const QChar *, size_t);
191     void *zalloc(size_t);
192 
193   private:
194     // gnudiff_analyze.cpp
195     GNULineRef diag(GNULineRef xoff, GNULineRef xlim, GNULineRef yoff, GNULineRef ylim, bool find_minimal, struct partition *part) const;
196     void compareseq(GNULineRef xoff, GNULineRef xlim, GNULineRef yoff, GNULineRef ylim, bool find_minimal);
197     void discard_confusing_lines(file_data filevec[]);
198     void shift_boundaries(file_data filevec[]);
199     change *add_change(GNULineRef line0, GNULineRef line1, GNULineRef deleted, GNULineRef inserted, change *old);
200     change *build_reverse_script(file_data const filevec[]);
201     change *build_script(file_data const filevec[]);
202 
203     // gnudiff_io.cpp
204     GNULineRef guess_lines(GNULineRef n, size_t s, size_t t);
205     void find_and_hash_each_line(file_data *current);
206     void find_identical_ends(file_data filevec[]);
207 
208     // gnudiff_xmalloc.cpp
209     void *xmalloc(size_t n);
210     void *xrealloc(void *p, size_t n);
211     void xalloc_die();
212 }; // class GnuDiff
213 
214 #endif
215