1 /* 2 * This file is part of the LibreOffice project. 3 * 4 * This Source Code Form is subject to the terms of the Mozilla Public 5 * License, v. 2.0. If a copy of the MPL was not distributed with this 6 * file, You can obtain one at http://mozilla.org/MPL/2.0/. 7 * 8 * This file incorporates work covered by the following license notice: 9 * 10 * Licensed to the Apache Software Foundation (ASF) under one or more 11 * contributor license agreements. See the NOTICE file distributed 12 * with this work for additional information regarding copyright 13 * ownership. The ASF licenses this file to you under the Apache 14 * License, Version 2.0 (the "License"); you may not use this file 15 * except in compliance with the License. You may obtain a copy of 16 * the License at http://www.apache.org/licenses/LICENSE-2.0 . 17 */ 18 19 package org.openoffice.xmerge.merger.diff; 20 21 import java.util.ArrayList; 22 import org.openoffice.xmerge.merger.DiffAlgorithm; 23 import org.openoffice.xmerge.merger.Difference; 24 import org.openoffice.xmerge.merger.Iterator; 25 import org.openoffice.xmerge.util.Debug; 26 27 /** 28 * This is one of the implementations of {@code DiffAlgorithm} interface. 29 * 30 * <p>Using Longest Common Subsequence (LCS). The algorithm here is based on the 31 * book "Introduction to Algorithms" by Thomas H.Cormen, Charles E.Leiserson and 32 * Ronald L.Riverst (MIT Press 1990) page 314.</p> 33 */ 34 public class IteratorLCSAlgorithm implements DiffAlgorithm { 35 computeDiffs(Iterator orgSeq, Iterator modSeq)36 public Difference[] computeDiffs(Iterator orgSeq, Iterator modSeq) { 37 38 int orgSeqlen = orgSeq.elementCount(); 39 int modSeqlen = modSeq.elementCount(); 40 41 // Diff table is used to keep track which element is the same or not 42 // in those 2 sequences 43 int[][] diffTable = createDiffTable(orgSeq, modSeq); 44 45 // debug purpose... 46 if (Debug.isFlagSet(Debug.INFO)) { 47 printDiffTable(diffTable); 48 } 49 50 ArrayList<Difference> diffResult = new ArrayList<Difference>(); 51 52 generateResult(diffTable, orgSeqlen, modSeqlen, diffResult); 53 54 // convert the vector to array, it has to do in here as 55 // generateResult is called recursively 56 Difference[] diffArray = new Difference[diffResult.size()]; 57 diffResult.toArray(diffArray); 58 59 return diffArray; 60 } 61 62 /** 63 * Debug function used to print out the nicely formatted difference table. 64 * 65 * @param diffTable The difference table to display. 66 */ printDiffTable(int[][] diffTable)67 private void printDiffTable(int[][] diffTable) { 68 69 for (int i = 0; i < diffTable.length; i++) { 70 StringBuilder sb = new StringBuilder(); 71 for (int j = 0; j < diffTable[i].length; j++) { 72 sb.append(" ").append(diffTable[i][j]).append(" "); 73 } 74 Debug.log(Debug.INFO, sb.toString()); 75 } 76 } 77 78 /** 79 * Create the difference table. 80 * 81 * <p>The difference table is used internal to keep track what elements are 82 * common or different in the two sequences.</p> 83 * 84 * @param orgSeq The original sequence to be used as a base. 85 * @param modSeq The modified sequence to compare. 86 * 87 * @return A difference table as a two-dimensional array of integers. 88 */ createDiffTable(Iterator orgSeq, Iterator modSeq)89 private int[][] createDiffTable(Iterator orgSeq, Iterator modSeq) { 90 int orgSeqlen = orgSeq.elementCount() + 1; 91 int modSeqlen = modSeq.elementCount() + 1; 92 int[][] diffTable; 93 94 // initialize the diffTable 95 diffTable = new int[orgSeqlen][]; 96 for (int i = 0; i < orgSeqlen; i++) { 97 diffTable[i] = new int[modSeqlen]; 98 } 99 100 // compute the diff Table using LCS algorithm, refer to the book 101 // mentioned at the top of the program 102 103 int i, j; 104 105 Object orgSeqObject, modSeqObject; 106 107 for (orgSeqObject = orgSeq.start(), i = 1; 108 orgSeqObject != null; 109 orgSeqObject = orgSeq.next(), i++) { 110 111 for (modSeqObject = modSeq.start(), j = 1; 112 modSeqObject != null; 113 modSeqObject = modSeq.next(), j++) { 114 115 if (orgSeq.equivalent(orgSeqObject, modSeqObject)) { 116 diffTable[i][j] = diffTable[i-1][j-1]+1; 117 } else { 118 if (diffTable[i-1][j] >= diffTable[i][j-1]) { 119 diffTable[i][j] = diffTable[i-1][j]; 120 } else { 121 diffTable[i][j] = diffTable[i][j-1]; 122 } 123 } 124 } 125 } 126 127 return diffTable; 128 } 129 130 /** 131 * Generate the {@code Difference} object result vector. 132 * 133 * <p>This method will be called recursively to backtrack the difference 134 * table to get the difference result (and also the LCS).</p> 135 * 136 * @param diffTable The difference table containing the {@code Difference} 137 * result. 138 * @param i The nth element in original sequence to compare. This 139 * method is called recursively with {@code i} and 140 * {@code j} decreased until {@code 0}. 141 * @param j The nth element in modified sequence to compare. 142 * @param diffVector A vector to output the {@code Difference} result. 143 * Can not use a return variable as it is a recursive 144 * method. The vector will contain {@code Difference} 145 * objects with operation and positions fill in. 146 */ generateResult(int[][] diffTable, int i, int j, ArrayList<Difference> diffVector)147 private void generateResult(int[][] diffTable, 148 int i, int j, ArrayList<Difference> diffVector) { 149 150 // handle the first element 151 if (i == 0 && j == 0) { 152 return; 153 154 } else if (j == 0) { 155 for (int cnt = 0; cnt < i; cnt++) { 156 Difference diff = 157 new Difference(Difference.DELETE, cnt, j); 158 diffVector.add(diff); 159 } 160 return; 161 162 } else if (i == 0) { 163 for (int cnt = 0; cnt < j; cnt++) { 164 Difference diff = 165 new Difference(Difference.ADD, i, cnt); 166 diffVector.add(diff); 167 } 168 return; 169 } 170 171 // for the detail of this algorithm, refer to the book mentioned on 172 // the top and page 317 and 318. 173 if ((diffTable[i-1][j-1] == diffTable[i][j] -1) && 174 (diffTable[i-1][j-1] == diffTable[i-1][j]) && 175 (diffTable[i-1][j-1] == diffTable[i][j-1])) { 176 177 // the element of ith and jth in org and mod sequence is the same 178 generateResult(diffTable, i-1, j-1, diffVector); 179 } else { 180 if (diffTable[i-1][j] > diffTable[i][j-1]) { 181 182 // recursively call first, then add the result so that 183 // the beginning of the diffs will be stored first 184 generateResult(diffTable, i-1, j, diffVector); 185 186 Difference diff = 187 new Difference(Difference.DELETE, i-1, j); 188 diffVector.add(diff); 189 } else if (diffTable[i-1][j] < diffTable[i][j-1]) { 190 191 // recursively call first, then add the result so that 192 // the beginning of the diffs will be stored first 193 generateResult(diffTable, i, j-1, diffVector); 194 195 Difference diff = 196 new Difference(Difference.ADD, i, j-1); 197 diffVector.add(diff); 198 } else { // diffTable[i-1][j] == diffTable[i][j-1] 199 // recursively call first, then add the result so that 200 // the beginning of the diffs will be stored first 201 generateResult(diffTable, i-1, j-1, diffVector); 202 203 Difference diff = 204 new Difference(Difference.CHANGE, i-1, j-1); 205 diffVector.add(diff); 206 207 } 208 } 209 } 210 } 211