1{-# LANGUAGE PatternGuards #-}
2module Text.EditDistance.Tests.EditOperationOntology where
3
4import Text.EditDistance.EditCosts
5
6import Test.QuickCheck
7import Control.Monad
8
9class Arbitrary ops => EditOperation ops where
10    edit :: String -> ops -> Gen (String, EditCosts -> Int)
11    containsTransposition :: ops -> Bool
12
13instance EditOperation op => EditOperation [op] where
14   edit ys ops = foldM (\(xs, c) op -> fmap (\(xs', cost') -> (xs', \ecs -> c ecs + cost' ecs)) $ edit xs op) (ys, const 0) ops
15   containsTransposition = any containsTransposition
16
17
18data EditedString ops = MkEditedString {
19    oldString :: String,
20    newString :: String,
21    operations :: ops,
22    esCost :: EditCosts -> Int
23}
24
25instance Show ops => Show (EditedString ops) where
26    show (MkEditedString old_string new_string ops _cost) = show old_string ++ " ==> " ++ show new_string ++ " (by " ++ show ops ++ ")"
27
28instance EditOperation ops => Arbitrary (EditedString ops) where
29    arbitrary = do
30        old_string <- arbitrary
31        edit_operations <- arbitrary
32        (new_string, cost) <- edit old_string edit_operations
33        return $ MkEditedString {
34            oldString = old_string,
35            newString = new_string,
36            operations = edit_operations,
37            esCost = cost
38        }
39
40
41data ExtendedEditOperation = Deletion
42                           | Insertion Char
43                           | Substitution Char
44                           | Transposition
45                           deriving (Show)
46
47instance Arbitrary ExtendedEditOperation where
48    arbitrary = oneof [return Deletion, fmap Insertion arbitrary, fmap Substitution arbitrary, return Transposition]
49
50instance EditOperation ExtendedEditOperation where
51    edit str op = do
52        let max_split_ix | Transposition <- op = length str - 1
53                         | otherwise           = length str
54        split_ix <- choose (1, max_split_ix)
55        let (str_l, str_r) = splitAt split_ix str
56            non_null = not $ null str
57            transposable = length str > 1
58        case op of
59            Deletion | non_null -> do
60                let old_ch = last str_l
61                return (init str_l ++ str_r, \ec -> deletionCost ec old_ch)
62            Insertion new_ch | non_null -> do
63                return (str_l ++ new_ch : str_r, \ec -> insertionCost ec new_ch)
64            Insertion new_ch | otherwise -> return ([new_ch], \ec -> insertionCost ec new_ch)   -- Need special case because randomR (1, 0) is undefined
65            Substitution new_ch | non_null -> do
66                let old_ch = last str_l
67                return (init str_l ++ new_ch : str_r, \ec -> substitutionCost ec old_ch new_ch)
68            Transposition | transposable -> do                  -- Need transposable rather than non_null because randomR (1, 0) is undefined
69                let backwards_ch = head str_r
70                    forwards_ch = last str_l
71                return (init str_l ++ backwards_ch : forwards_ch : tail str_r, \ec -> transpositionCost ec backwards_ch forwards_ch)
72            _ -> return (str, const 0)
73
74    containsTransposition Transposition = True
75    containsTransposition _             = False
76
77
78-- This all really sucks but I can't think of something better right now
79newtype BasicEditOperation = MkBasic ExtendedEditOperation
80
81instance Show BasicEditOperation where
82    show (MkBasic x) = show x
83
84instance Arbitrary BasicEditOperation where
85    arbitrary = fmap MkBasic $ oneof [return Deletion, fmap Insertion arbitrary, fmap Substitution arbitrary]
86
87instance EditOperation BasicEditOperation where
88    edit str (MkBasic op) = edit str op
89    containsTransposition _ = False
90