1----------------------------------------------------------------------------- 2-- | 3-- Module : Data.Algorithm.Diff 4-- Copyright : (c) Sterling Clover 2008-2011, Kevin Charter 2011 5-- License : BSD 3 Clause 6-- Maintainer : s.clover@gmail.com 7-- Stability : experimental 8-- Portability : portable 9-- 10-- This is an implementation of the O(ND) diff algorithm as described in 11-- \"An O(ND) Difference Algorithm and Its Variations (1986)\" 12-- <http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.4.6927>. It is O(mn) in space. 13-- The algorithm is the same one used by standared Unix diff. 14----------------------------------------------------------------------------- 15 16module Data.Algorithm.Diff 17 ( Diff, PolyDiff(..) 18 -- * Comparing lists for differences 19 , getDiff 20 , getDiffBy 21 22 -- * Finding chunks of differences 23 , getGroupedDiff 24 , getGroupedDiffBy 25 ) where 26 27import Prelude hiding (pi) 28 29import Data.Array (listArray, (!)) 30 31data DI = F | S | B deriving (Show, Eq) 32 33-- | A value is either from the 'First' list, the 'Second' or from 'Both'. 34-- 'Both' contains both the left and right values, in case you are using a form 35-- of equality that doesn't check all data (for example, if you are using a 36-- newtype to only perform equality on side of a tuple). 37data PolyDiff a b = First a | Second b | Both a b 38 deriving (Show, Eq) 39 40-- | This is 'PolyDiff' specialized so both sides are the same type. 41type Diff a = PolyDiff a a 42 43data DL = DL {poi :: !Int, poj :: !Int, path::[DI]} deriving (Show, Eq) 44 45instance Ord DL 46 where x <= y = if poi x == poi y 47 then poj x > poj y 48 else poi x <= poi y 49 50canDiag :: (a -> b -> Bool) -> [a] -> [b] -> Int -> Int -> Int -> Int -> Bool 51canDiag eq as bs lena lenb = \ i j -> 52 if i < lena && j < lenb then (arAs ! i) `eq` (arBs ! j) else False 53 where arAs = listArray (0,lena - 1) as 54 arBs = listArray (0,lenb - 1) bs 55 56dstep :: (Int -> Int -> Bool) -> [DL] -> [DL] 57dstep cd dls = hd:pairMaxes rst 58 where (hd:rst) = nextDLs dls 59 nextDLs [] = [] 60 nextDLs (dl:rest) = dl':dl'':nextDLs rest 61 where dl' = addsnake cd $ dl {poi=poi dl + 1, path=(F : pdl)} 62 dl'' = addsnake cd $ dl {poj=poj dl + 1, path=(S : pdl)} 63 pdl = path dl 64 pairMaxes [] = [] 65 pairMaxes [x] = [x] 66 pairMaxes (x:y:rest) = max x y:pairMaxes rest 67 68addsnake :: (Int -> Int -> Bool) -> DL -> DL 69addsnake cd dl 70 | cd pi pj = addsnake cd $ 71 dl {poi = pi + 1, poj = pj + 1, path=(B : path dl)} 72 | otherwise = dl 73 where pi = poi dl; pj = poj dl 74 75lcs :: (a -> b -> Bool) -> [a] -> [b] -> [DI] 76lcs eq as bs = path . head . dropWhile (\dl -> poi dl /= lena || poj dl /= lenb) . 77 concat . iterate (dstep cd) . (:[]) . addsnake cd $ 78 DL {poi=0,poj=0,path=[]} 79 where cd = canDiag eq as bs lena lenb 80 lena = length as; lenb = length bs 81 82-- | Takes two lists and returns a list of differences between them. This is 83-- 'getDiffBy' with '==' used as predicate. 84getDiff :: (Eq a) => [a] -> [a] -> [Diff a] 85getDiff = getDiffBy (==) 86 87-- | Takes two lists and returns a list of differences between them, grouped 88-- into chunks. This is 'getGroupedDiffBy' with '==' used as predicate. 89getGroupedDiff :: (Eq a) => [a] -> [a] -> [Diff [a]] 90getGroupedDiff = getGroupedDiffBy (==) 91 92-- | A form of 'getDiff' with no 'Eq' constraint. Instead, an equality predicate 93-- is taken as the first argument. 94getDiffBy :: (a -> b -> Bool) -> [a] -> [b] -> [PolyDiff a b] 95getDiffBy eq a b = markup a b . reverse $ lcs eq a b 96 where markup (x:xs) ys (F:ds) = First x : markup xs ys ds 97 markup xs (y:ys) (S:ds) = Second y : markup xs ys ds 98 markup (x:xs) (y:ys) (B:ds) = Both x y : markup xs ys ds 99 markup _ _ _ = [] 100 101getGroupedDiffBy :: (a -> b -> Bool) -> [a] -> [b] -> [PolyDiff [a] [b]] 102getGroupedDiffBy eq a b = go $ getDiffBy eq a b 103 where go (First x : xs) = let (fs, rest) = goFirsts xs in First (x:fs) : go rest 104 go (Second x : xs) = let (fs, rest) = goSeconds xs in Second (x:fs) : go rest 105 go (Both x y : xs) = let (fs, rest) = goBoth xs 106 (fxs, fys) = unzip fs 107 in Both (x:fxs) (y:fys) : go rest 108 go [] = [] 109 110 goFirsts (First x : xs) = let (fs, rest) = goFirsts xs in (x:fs, rest) 111 goFirsts xs = ([],xs) 112 113 goSeconds (Second x : xs) = let (fs, rest) = goSeconds xs in (x:fs, rest) 114 goSeconds xs = ([],xs) 115 116 goBoth (Both x y : xs) = let (fs, rest) = goBoth xs in ((x,y):fs, rest) 117 goBoth xs = ([],xs) 118