1{-# LANGUAGE CPP                  #-}
2{-# LANGUAGE OverloadedStrings    #-}
3{-# OPTIONS_GHC -fno-warn-orphans #-}
4
5module Main (main) where
6
7import Data.Ratio
8import Data.Text (Text)
9import Data.Text.Metrics
10import Test.Hspec
11import Test.QuickCheck
12import qualified Data.Text as T
13
14#if !MIN_VERSION_base(4,8,0)
15import Control.Applicative
16#endif
17
18instance Arbitrary Text where
19  arbitrary = T.pack <$> arbitrary
20
21main :: IO ()
22main = hspec spec
23
24spec :: Spec
25spec = do
26  describe "levenshtein" $ do
27    testSwap levenshtein
28    context "with concrete examples" $ do
29      testPair levenshtein "kitten"   "sitting" 3
30      testPair levenshtein "cake"     "drake"   2
31      testPair levenshtein "saturday" "sunday"  3
32      testPair levenshtein "red"      "wax"     3
33#if __GLASGOW_HASKELL__ >= 710
34      testPair levenshtein "a��c"     "abc"     1
35#endif
36      testPair levenshtein "lucky"    "lucky"   0
37      testPair levenshtein ""         ""        0
38  describe "levenshteinNorm" $ do
39    testSwap levenshteinNorm
40    testPair levenshteinNorm "kitten"   "sitting" (4 % 7)
41    testPair levenshteinNorm "cake"     "drake"   (3 % 5)
42    testPair levenshteinNorm "saturday" "sunday"  (5 % 8)
43    testPair levenshteinNorm "red"      "wax"     (0 % 1)
44#if __GLASGOW_HASKELL__ >= 710
45    testPair levenshteinNorm "a��c"     "abc"     (2 % 3)
46#endif
47    testPair levenshteinNorm "lucky"    "lucky"   (1 % 1)
48    testPair levenshteinNorm ""         ""        (1 % 1)
49  describe "damerauLevenshtein" $ do
50    testSwap damerauLevenshtein
51    testPair damerauLevenshtein "veryvery long" "very long" 4
52    testPair damerauLevenshtein "thing"         "think"     1
53    testPair damerauLevenshtein "nose"          "ones"      2
54    testPair damerauLevenshtein "thing"         "sign"      3
55    testPair damerauLevenshtein "red"           "wax"       3
56#if __GLASGOW_HASKELL__ >= 710
57    testPair damerauLevenshtein "a��c"          "abc"       1
58#endif
59    testPair damerauLevenshtein "lucky"         "lucky"     0
60    testPair damerauLevenshtein ""              ""          0
61  describe "damerauLevenshteinNorm" $ do
62    testSwap damerauLevenshteinNorm
63    testPair damerauLevenshteinNorm "veryvery long" "very long" (9 % 13)
64    testPair damerauLevenshteinNorm "thing"         "think"     (4 % 5)
65    testPair damerauLevenshteinNorm "nose"          "ones"      (1 % 2)
66    testPair damerauLevenshteinNorm "thing"         "sign"      (2 % 5)
67    testPair damerauLevenshteinNorm "red"           "wax"       (0 % 1)
68#if __GLASGOW_HASKELL__ >= 710
69    testPair damerauLevenshteinNorm "a��c"          "abc"       (2 % 3)
70#endif
71    testPair damerauLevenshteinNorm "lucky"         "lucky"     (1 % 1)
72    testPair damerauLevenshteinNorm ""              ""          (1 % 1)
73  describe "hamming" $ do
74    testSwap hamming
75    testPair hamming "karolin" "kathrin" (Just 3)
76    testPair hamming "karolin" "kerstin" (Just 3)
77    testPair hamming "1011101" "1001001" (Just 2)
78    testPair hamming "2173896" "2233796" (Just 3)
79    testPair hamming "toned"   "roses"   (Just 3)
80    testPair hamming "red"     "wax"     (Just 3)
81#if __GLASGOW_HASKELL__ >= 710
82    testPair hamming "a��c"    "abc"      (Just 1)
83#endif
84    testPair hamming "lucky"   "lucky"   (Just 0)
85    testPair hamming ""        ""        (Just 0)
86    testPair hamming "small"   "big"     Nothing
87  describe "jaro" $ do
88    testPair jaro "aa"     "a"        (5  % 6)
89    testPair jaro "a"      "aa"       (5  % 6)
90    testPair jaro "martha" "marhta"   (17 % 18)
91    testPair jaro "marhta" "martha"   (17 % 18)
92    testPair jaro "dwayne" "duane"    (37 % 45)
93    testPair jaro "duane"  "dwayne"   (37 % 45)
94    testPair jaro "dixon"  "dicksonx" (23 % 30)
95    testPair jaro "dicksonx" "dixon"  (23 % 30)
96    testPair jaro "jones"  "johnson"  (83 % 105)
97    testPair jaro "johnson" "jones"   (83 % 105)
98    testPair jaro "brain"  "brian"    (14 % 15)
99    testPair jaro "brian"  "brain"    (14 % 15)
100    testPair jaro "five"   "ten"      (0  % 1)
101    testPair jaro "ten"    "five"     (0  % 1)
102    testPair jaro "lucky"  "lucky"    (1  % 1)
103#if __GLASGOW_HASKELL__ >= 710
104    testPair jaro "a��c"   "abc"      (7  % 9)
105#endif
106    testPair jaro ""       ""         (0  % 1)
107  describe "jaroWinkler" $ do
108    testPair jaroWinkler "aa" "a"            (17 % 20)
109    testPair jaroWinkler "a"  "aa"           (17 % 20)
110    testPair jaroWinkler "martha" "marhta"   (173 % 180)
111    testPair jaroWinkler "marhta" "martha"   (173 % 180)
112    testPair jaroWinkler "dwayne" "duane"    (21 % 25)
113    testPair jaroWinkler "duane"  "dwayne"   (21 % 25)
114    testPair jaroWinkler "dixon"  "dicksonx" (61 % 75)
115    testPair jaroWinkler "dicksonx" "dixon"  (61 % 75)
116    testPair jaroWinkler "jones"  "johnson"  (437 % 525)
117    testPair jaroWinkler "johnson" "jones"   (437 % 525)
118    testPair jaroWinkler "brain"  "brian"    (71 % 75)
119    testPair jaroWinkler "brian"  "brain"    (71 % 75)
120    testPair jaroWinkler "five"   "ten"      (0  % 1)
121    testPair jaroWinkler "ten"    "five"     (0  % 1)
122    testPair jaroWinkler "lucky"  "lucky"    (1  % 1)
123#if __GLASGOW_HASKELL__ >= 710
124    testPair jaroWinkler "a��c"   "abc"      (4  % 5)
125#endif
126    testPair jaroWinkler ""       ""         (0  % 1)
127  describe "overlap" $ do
128    testSwap overlap
129    testPair overlap "fly"     "butterfly" (1 % 1)
130    testPair overlap "night"   "nacht"     (3 % 5)
131    testPair overlap "context" "contact"   (5 % 7)
132    testPair overlap "red"     "wax"       (0 % 1)
133#if __GLASGOW_HASKELL__ >= 710
134    testPair overlap "a��c"   "abc"   (2 % 3)
135#endif
136    testPair overlap "lucky" "lucky" (1 % 1)
137  describe "jaccard" $ do
138    testSwap jaccard
139    testPair jaccard "xxx"     "xyx"     (1 % 2)
140    testPair jaccard "night"   "nacht"   (3 % 7)
141    testPair jaccard "context" "contact" (5 % 9)
142#if __GLASGOW_HASKELL__ >= 710
143    testPair overlap "a��c"     "abc"     (2 % 3)
144#endif
145    testPair jaccard "lucky"   "lucky"   (1 % 1)
146
147-- | Test that given function returns the same results when order of
148-- arguments is swapped.
149
150testSwap :: (Eq a, Show a) => (Text -> Text -> a) -> SpecWith ()
151testSwap f = context "if we swap the arguments" $
152  it "produces the same result" $
153    property $ \a b -> f a b === f b a
154
155-- | Create spec for given metric function applying it to two 'Text' values
156-- and comparing the result with expected one.
157
158testPair :: (Eq a, Show a)
159  => (Text -> Text -> a) -- ^ Function to test
160  -> Text              -- ^ First input
161  -> Text              -- ^ Second input
162  -> a                 -- ^ Expected result
163  -> SpecWith ()
164testPair f a b r = it ("‘" ++ T.unpack a ++ "’ and ‘" ++ T.unpack b ++ "’") $
165  f a b `shouldBe` r
166