1-- 2-- Copyright (c) 2013-2019 Nicola Bonelli <nicola@pfq.io> 3-- 4-- This program is free software; you can redistribute it and/or modify 5-- it under the terms of the GNU General Public License as published by 6-- the Free Software Foundation; either version 2 of the License, or 7-- (at your option) any later version. 8-- 9-- This program is distributed in the hope that it will be useful, 10-- but WITHOUT ANY WARRANTY; without even the implied warranty of 11-- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12-- GNU General Public License for more details. 13-- 14-- You should have received a copy of the GNU General Public License 15-- along with this program; if not, write to the Free Software 16-- Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. 17-- 18 19 20module CGrep.Distance (distance, (~==)) where 21 22-- from http://www.haskell.org/haskellwiki/Edit_distance 23-- 24 25distance :: Eq a => [a] -> [a] -> Int 26distance a b 27 = last (if lab == 0 then mainDiag 28 else if lab > 0 then lowers !! (lab - 1) 29 else {- < 0 -} uppers !! (-1 - lab)) 30 where mainDiag = oneDiag a b (head uppers) (-1 : head lowers) 31 uppers = eachDiag a b (mainDiag : uppers) -- upper diagonals 32 lowers = eachDiag b a (mainDiag : lowers) -- lower diagonals 33 eachDiag _a [] _diags = [] 34 eachDiag a' (_bch:bs) (lastDiag:diags) = oneDiag a' bs nextDiag lastDiag : eachDiag a' bs diags 35 where nextDiag = head (tail diags) 36 eachDiag _ _ [] = undefined -- the original implementation does not cover this case... 37 oneDiag a' b' diagAbove diagBelow = thisdiag 38 where doDiag [] _b _nw _n _w = [] 39 doDiag _a [] _nw _n _w = [] 40 doDiag (ach:as) (bch:bs) nw n w = me : doDiag as bs me (tail n) (tail w) 41 where me = if ach == bch then nw else 1 + min3 (head w) nw (head n) 42 firstelt = 1 + head diagBelow 43 thisdiag = firstelt : doDiag a' b' firstelt diagAbove (tail diagBelow) 44 lab = length a - length b 45 min3 x y z = if x < y then x else min y z 46 47 48(~==) :: String -> String -> Bool 49a ~== b | len < 5 = dist < 3 50 | otherwise = dist < (len * 40 `div` 100) 51 where len = fromIntegral (length a `min` length b) 52 dist = distance a b 53 54