1-----------------------------------------------------------------------------
2-- |
3-- Module      :  Data.Algorithm.Diff
4-- Copyright   :  (c) Sterling Clover 2008-2011, Kevin Charter 2011
5-- License     :  BSD 3 Clause
6-- Maintainer  :  s.clover@gmail.com
7-- Stability   :  experimental
8-- Portability :  portable
9--
10-- This is an implementation of the O(ND) diff algorithm as described in
11-- \"An O(ND) Difference Algorithm and Its Variations (1986)\"
12-- <http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.4.6927>. It is O(mn) in space.
13-- The algorithm is the same one used by standared Unix diff.
14-----------------------------------------------------------------------------
15
16module Data.Algorithm.Diff
17    ( Diff, PolyDiff(..)
18    -- * Comparing lists for differences
19    , getDiff
20    , getDiffBy
21
22    -- * Finding chunks of differences
23    , getGroupedDiff
24    , getGroupedDiffBy
25    ) where
26
27import Prelude hiding (pi)
28
29import Data.Array (listArray, (!))
30
31data DI = F | S | B deriving (Show, Eq)
32
33-- | A value is either from the 'First' list, the 'Second' or from 'Both'.
34-- 'Both' contains both the left and right values, in case you are using a form
35-- of equality that doesn't check all data (for example, if you are using a
36-- newtype to only perform equality on side of a tuple).
37data PolyDiff a b = First a | Second b | Both a b
38    deriving (Show, Eq)
39
40-- | This is 'PolyDiff' specialized so both sides are the same type.
41type Diff a = PolyDiff a a
42
43data DL = DL {poi :: !Int, poj :: !Int, path::[DI]} deriving (Show, Eq)
44
45instance Ord DL
46        where x <= y = if poi x == poi y
47                then  poj x > poj y
48                else poi x <= poi y
49
50canDiag :: (a -> b -> Bool) -> [a] -> [b] -> Int -> Int -> Int -> Int -> Bool
51canDiag eq as bs lena lenb = \ i j ->
52   if i < lena && j < lenb then (arAs ! i) `eq` (arBs ! j) else False
53    where arAs = listArray (0,lena - 1) as
54          arBs = listArray (0,lenb - 1) bs
55
56dstep :: (Int -> Int -> Bool) -> [DL] -> [DL]
57dstep cd dls = hd:pairMaxes rst
58  where (hd:rst) = nextDLs dls
59        nextDLs [] = []
60        nextDLs (dl:rest) = dl':dl'':nextDLs rest
61          where dl'  = addsnake cd $ dl {poi=poi dl + 1, path=(F : pdl)}
62                dl'' = addsnake cd $ dl {poj=poj dl + 1, path=(S : pdl)}
63                pdl = path dl
64        pairMaxes [] = []
65        pairMaxes [x] = [x]
66        pairMaxes (x:y:rest) = max x y:pairMaxes rest
67
68addsnake :: (Int -> Int -> Bool) -> DL -> DL
69addsnake cd dl
70    | cd pi pj = addsnake cd $
71                 dl {poi = pi + 1, poj = pj + 1, path=(B : path dl)}
72    | otherwise   = dl
73    where pi = poi dl; pj = poj dl
74
75lcs :: (a -> b -> Bool) -> [a] -> [b] -> [DI]
76lcs eq as bs = path . head . dropWhile (\dl -> poi dl /= lena || poj dl /= lenb) .
77            concat . iterate (dstep cd) . (:[]) . addsnake cd $
78            DL {poi=0,poj=0,path=[]}
79            where cd = canDiag eq as bs lena lenb
80                  lena = length as; lenb = length bs
81
82-- | Takes two lists and returns a list of differences between them. This is
83-- 'getDiffBy' with '==' used as predicate.
84getDiff :: (Eq a) => [a] -> [a] -> [Diff a]
85getDiff = getDiffBy (==)
86
87-- | Takes two lists and returns a list of differences between them, grouped
88-- into chunks. This is 'getGroupedDiffBy' with '==' used as predicate.
89getGroupedDiff :: (Eq a) => [a] -> [a] -> [Diff [a]]
90getGroupedDiff = getGroupedDiffBy (==)
91
92-- | A form of 'getDiff' with no 'Eq' constraint. Instead, an equality predicate
93-- is taken as the first argument.
94getDiffBy :: (a -> b -> Bool) -> [a] -> [b] -> [PolyDiff a b]
95getDiffBy eq a b = markup a b . reverse $ lcs eq a b
96    where markup (x:xs)   ys   (F:ds) = First x  : markup xs ys ds
97          markup   xs   (y:ys) (S:ds) = Second y : markup xs ys ds
98          markup (x:xs) (y:ys) (B:ds) = Both x y : markup xs ys ds
99          markup _ _ _ = []
100
101getGroupedDiffBy :: (a -> b -> Bool) -> [a] -> [b] -> [PolyDiff [a] [b]]
102getGroupedDiffBy eq a b = go $ getDiffBy eq a b
103    where go (First x  : xs) = let (fs, rest) = goFirsts  xs in First  (x:fs)     : go rest
104          go (Second x : xs) = let (fs, rest) = goSeconds xs in Second (x:fs)     : go rest
105          go (Both x y : xs) = let (fs, rest) = goBoth    xs
106                                   (fxs, fys) = unzip fs
107                               in Both (x:fxs) (y:fys) : go rest
108          go [] = []
109
110          goFirsts  (First x  : xs) = let (fs, rest) = goFirsts  xs in (x:fs, rest)
111          goFirsts  xs = ([],xs)
112
113          goSeconds (Second x : xs) = let (fs, rest) = goSeconds xs in (x:fs, rest)
114          goSeconds xs = ([],xs)
115
116          goBoth    (Both x y : xs) = let (fs, rest) = goBoth xs    in ((x,y):fs, rest)
117          goBoth    xs = ([],xs)
118