1 /*
2  * This file is part of the LibreOffice project.
3  *
4  * This Source Code Form is subject to the terms of the Mozilla Public
5  * License, v. 2.0. If a copy of the MPL was not distributed with this
6  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
7  *
8  * This file incorporates work covered by the following license notice:
9  *
10  *   Licensed to the Apache Software Foundation (ASF) under one or more
11  *   contributor license agreements. See the NOTICE file distributed
12  *   with this work for additional information regarding copyright
13  *   ownership. The ASF licenses this file to you under the Apache
14  *   License, Version 2.0 (the "License"); you may not use this file
15  *   except in compliance with the License. You may obtain a copy of
16  *   the License at http://www.apache.org/licenses/LICENSE-2.0 .
17  */
18 
19 package org.openoffice.xmerge.merger.diff;
20 
21 import java.util.ArrayList;
22 import org.openoffice.xmerge.merger.DiffAlgorithm;
23 import org.openoffice.xmerge.merger.Difference;
24 import org.openoffice.xmerge.merger.Iterator;
25 import org.openoffice.xmerge.util.Debug;
26 
27 /**
28  * This is one of the implementations of {@code DiffAlgorithm} interface.
29  *
30  * <p>Using Longest Common Subsequence (LCS). The algorithm here is based on the
31  * book "Introduction to Algorithms" by Thomas H.Cormen, Charles E.Leiserson and
32  * Ronald L.Riverst (MIT Press 1990) page 314.</p>
33  */
34 public class IteratorLCSAlgorithm implements DiffAlgorithm {
35 
computeDiffs(Iterator orgSeq, Iterator modSeq)36     public Difference[] computeDiffs(Iterator orgSeq, Iterator modSeq) {
37 
38         int orgSeqlen = orgSeq.elementCount();
39         int modSeqlen = modSeq.elementCount();
40 
41         // Diff table is used to keep track which element is the same or not
42         // in those 2 sequences
43         int[][] diffTable = createDiffTable(orgSeq, modSeq);
44 
45         // debug purpose...
46         if (Debug.isFlagSet(Debug.INFO)) {
47             printDiffTable(diffTable);
48         }
49 
50         ArrayList<Difference> diffResult = new ArrayList<Difference>();
51 
52         generateResult(diffTable, orgSeqlen, modSeqlen, diffResult);
53 
54         // convert the vector to array, it has to do in here as
55         // generateResult is called recursively
56         Difference[] diffArray = new Difference[diffResult.size()];
57         diffResult.toArray(diffArray);
58 
59         return diffArray;
60     }
61 
62     /**
63      * Debug function used to print out the nicely formatted difference table.
64      *
65      * @param  diffTable  The difference table to display.
66      */
printDiffTable(int[][] diffTable)67     private void printDiffTable(int[][] diffTable) {
68 
69         for (int i = 0; i < diffTable.length; i++) {
70             StringBuilder sb = new StringBuilder();
71             for (int j = 0; j < diffTable[i].length; j++) {
72                sb.append(" ").append(diffTable[i][j]).append(" ");
73             }
74             Debug.log(Debug.INFO, sb.toString());
75         }
76     }
77 
78     /**
79      * Create the difference table.
80      *
81      * <p>The difference table is used internal to keep track what elements are
82      * common or different in the two sequences.</p>
83      *
84      * @param   orgSeq  The original sequence to be used as a base.
85      * @param   modSeq  The modified sequence to compare.
86      *
87      * @return  A difference table as a two-dimensional array of integers.
88      */
createDiffTable(Iterator orgSeq, Iterator modSeq)89     private int[][] createDiffTable(Iterator orgSeq, Iterator modSeq) {
90         int orgSeqlen = orgSeq.elementCount() + 1;
91         int modSeqlen = modSeq.elementCount() + 1;
92         int[][] diffTable;
93 
94         // initialize the diffTable
95         diffTable = new int[orgSeqlen][];
96         for (int i = 0; i < orgSeqlen; i++) {
97             diffTable[i] = new int[modSeqlen];
98         }
99 
100         // compute the diff Table using LCS algorithm, refer to the book
101         // mentioned at the top of the program
102 
103         int i, j;
104 
105         Object orgSeqObject, modSeqObject;
106 
107         for (orgSeqObject = orgSeq.start(), i = 1;
108              orgSeqObject != null;
109              orgSeqObject = orgSeq.next(), i++) {
110 
111             for (modSeqObject = modSeq.start(), j = 1;
112                  modSeqObject != null;
113                  modSeqObject = modSeq.next(), j++) {
114 
115                 if (orgSeq.equivalent(orgSeqObject, modSeqObject)) {
116                     diffTable[i][j] = diffTable[i-1][j-1]+1;
117                 } else {
118                     if (diffTable[i-1][j] >= diffTable[i][j-1]) {
119                         diffTable[i][j] = diffTable[i-1][j];
120                     } else {
121                         diffTable[i][j] = diffTable[i][j-1];
122                     }
123                 }
124             }
125         }
126 
127         return diffTable;
128     }
129 
130     /**
131      * Generate the {@code Difference} object result vector.
132      *
133      * <p>This method will be called recursively to backtrack the difference
134      * table to get the difference result (and also the LCS).</p>
135      *
136      * @param   diffTable   The difference table containing the {@code Difference}
137      *                      result.
138      * @param   i           The nth element in original sequence to compare. This
139      *                      method is called recursively with {@code i} and
140      *                      {@code j} decreased until {@code 0}.
141      * @param   j           The nth element in modified sequence to compare.
142      * @param   diffVector  A vector to output the {@code Difference} result.
143      *                      Can not use a return variable as it is a recursive
144      *                      method.  The vector will contain {@code Difference}
145      *                      objects with operation and positions fill in.
146      */
generateResult(int[][] diffTable, int i, int j, ArrayList<Difference> diffVector)147     private void generateResult(int[][] diffTable,
148                                 int i, int j, ArrayList<Difference> diffVector) {
149 
150         // handle the first element
151         if (i == 0 && j == 0) {
152             return;
153 
154         } else if (j == 0) {
155             for (int cnt = 0; cnt < i; cnt++) {
156                 Difference diff =
157                     new Difference(Difference.DELETE, cnt, j);
158                 diffVector.add(diff);
159             }
160             return;
161 
162         } else if (i == 0) {
163             for (int cnt = 0; cnt < j; cnt++) {
164                 Difference diff =
165                     new Difference(Difference.ADD, i, cnt);
166                 diffVector.add(diff);
167             }
168             return;
169         }
170 
171         // for the detail of this algorithm, refer to the book mentioned on
172         // the top and page 317 and 318.
173         if ((diffTable[i-1][j-1] == diffTable[i][j] -1) &&
174             (diffTable[i-1][j-1] == diffTable[i-1][j]) &&
175             (diffTable[i-1][j-1] == diffTable[i][j-1])) {
176 
177             // the element of ith and jth in org and mod sequence is the same
178             generateResult(diffTable, i-1, j-1, diffVector);
179         } else {
180             if (diffTable[i-1][j] > diffTable[i][j-1]) {
181 
182                 // recursively call first, then add the result so that
183                 // the beginning of the diffs will be stored first
184                 generateResult(diffTable, i-1, j, diffVector);
185 
186                 Difference diff =
187                     new Difference(Difference.DELETE, i-1, j);
188                 diffVector.add(diff);
189             } else if (diffTable[i-1][j] < diffTable[i][j-1]) {
190 
191                 // recursively call first, then add the result so that
192                 // the beginning of the diffs will be stored first
193                 generateResult(diffTable, i, j-1, diffVector);
194 
195                 Difference diff =
196                     new Difference(Difference.ADD, i, j-1);
197                 diffVector.add(diff);
198             } else { // diffTable[i-1][j] == diffTable[i][j-1]
199                 // recursively call first, then add the result so that
200                 // the beginning of the diffs will be stored first
201                 generateResult(diffTable, i-1, j-1, diffVector);
202 
203                 Difference diff =
204                     new Difference(Difference.CHANGE, i-1, j-1);
205                 diffVector.add(diff);
206 
207             }
208         }
209     }
210 }
211