1{-# LANGUAGE PatternGuards #-} 2module Text.EditDistance.Tests.EditOperationOntology where 3 4import Text.EditDistance.EditCosts 5 6import Test.QuickCheck 7import Control.Monad 8 9class Arbitrary ops => EditOperation ops where 10 edit :: String -> ops -> Gen (String, EditCosts -> Int) 11 containsTransposition :: ops -> Bool 12 13instance EditOperation op => EditOperation [op] where 14 edit ys ops = foldM (\(xs, c) op -> fmap (\(xs', cost') -> (xs', \ecs -> c ecs + cost' ecs)) $ edit xs op) (ys, const 0) ops 15 containsTransposition = any containsTransposition 16 17 18data EditedString ops = MkEditedString { 19 oldString :: String, 20 newString :: String, 21 operations :: ops, 22 esCost :: EditCosts -> Int 23} 24 25instance Show ops => Show (EditedString ops) where 26 show (MkEditedString old_string new_string ops _cost) = show old_string ++ " ==> " ++ show new_string ++ " (by " ++ show ops ++ ")" 27 28instance EditOperation ops => Arbitrary (EditedString ops) where 29 arbitrary = do 30 old_string <- arbitrary 31 edit_operations <- arbitrary 32 (new_string, cost) <- edit old_string edit_operations 33 return $ MkEditedString { 34 oldString = old_string, 35 newString = new_string, 36 operations = edit_operations, 37 esCost = cost 38 } 39 40 41data ExtendedEditOperation = Deletion 42 | Insertion Char 43 | Substitution Char 44 | Transposition 45 deriving (Show) 46 47instance Arbitrary ExtendedEditOperation where 48 arbitrary = oneof [return Deletion, fmap Insertion arbitrary, fmap Substitution arbitrary, return Transposition] 49 50instance EditOperation ExtendedEditOperation where 51 edit str op = do 52 let max_split_ix | Transposition <- op = length str - 1 53 | otherwise = length str 54 split_ix <- choose (1, max_split_ix) 55 let (str_l, str_r) = splitAt split_ix str 56 non_null = not $ null str 57 transposable = length str > 1 58 case op of 59 Deletion | non_null -> do 60 let old_ch = last str_l 61 return (init str_l ++ str_r, \ec -> deletionCost ec old_ch) 62 Insertion new_ch | non_null -> do 63 return (str_l ++ new_ch : str_r, \ec -> insertionCost ec new_ch) 64 Insertion new_ch | otherwise -> return ([new_ch], \ec -> insertionCost ec new_ch) -- Need special case because randomR (1, 0) is undefined 65 Substitution new_ch | non_null -> do 66 let old_ch = last str_l 67 return (init str_l ++ new_ch : str_r, \ec -> substitutionCost ec old_ch new_ch) 68 Transposition | transposable -> do -- Need transposable rather than non_null because randomR (1, 0) is undefined 69 let backwards_ch = head str_r 70 forwards_ch = last str_l 71 return (init str_l ++ backwards_ch : forwards_ch : tail str_r, \ec -> transpositionCost ec backwards_ch forwards_ch) 72 _ -> return (str, const 0) 73 74 containsTransposition Transposition = True 75 containsTransposition _ = False 76 77 78-- This all really sucks but I can't think of something better right now 79newtype BasicEditOperation = MkBasic ExtendedEditOperation 80 81instance Show BasicEditOperation where 82 show (MkBasic x) = show x 83 84instance Arbitrary BasicEditOperation where 85 arbitrary = fmap MkBasic $ oneof [return Deletion, fmap Insertion arbitrary, fmap Substitution arbitrary] 86 87instance EditOperation BasicEditOperation where 88 edit str (MkBasic op) = edit str op 89 containsTransposition _ = False 90