1{-# LANGUAGE BangPatterns, CPP, GeneralizedNewtypeDeriving, MagicHash,
2    Rank2Types, UnboxedTuples #-}
3{-# LANGUAGE DeriveGeneric, ScopedTypeVariables #-}
4
5-- | QuickCheck tests for the 'Data.Hashable' module.  We test
6-- functions by comparing the C and Haskell implementations.
7
8module Properties (properties) where
9
10import Data.Hashable (Hashable, hash, hashByteArray, hashPtr,
11         Hashed, hashed, unhashed, hashWithSalt)
12import Data.Hashable.Generic (genericHashWithSalt)
13import Data.Hashable.Lifted (hashWithSalt1)
14import qualified Data.ByteString as B
15import qualified Data.ByteString.Lazy as BL
16import qualified Data.Text as T
17import qualified Data.Text.Lazy as TL
18import Data.List (nub)
19import Control.Monad (ap, liftM)
20import System.IO.Unsafe (unsafePerformIO)
21import Foreign.Marshal.Array (withArray)
22import GHC.Base (ByteArray#, Int(..), newByteArray#, unsafeCoerce#,
23                 writeWord8Array#)
24import GHC.ST (ST(..), runST)
25import GHC.Word (Word8(..))
26import Test.QuickCheck hiding ((.&.))
27import Test.Framework (Test, testGroup)
28import Test.Framework.Providers.QuickCheck2 (testProperty)
29import GHC.Generics
30
31#if MIN_VERSION_bytestring(0,10,4)
32import qualified Data.ByteString.Short as BS
33#endif
34
35------------------------------------------------------------------------
36-- * Properties
37
38instance Arbitrary T.Text where
39    arbitrary = T.pack `fmap` arbitrary
40
41instance Arbitrary TL.Text where
42    arbitrary = TL.pack `fmap` arbitrary
43
44instance Arbitrary B.ByteString where
45    arbitrary   = B.pack `fmap` arbitrary
46
47instance Arbitrary BL.ByteString where
48    arbitrary   = sized $ \n -> resize (round (sqrt (toEnum n :: Double)))
49                  ((BL.fromChunks . map (B.pack . nonEmpty)) `fmap` arbitrary)
50      where nonEmpty (NonEmpty a) = a
51
52#if MIN_VERSION_bytestring(0,10,4)
53instance Arbitrary BS.ShortByteString where
54    arbitrary   = BS.pack `fmap` arbitrary
55#endif
56
57-- | Validate the implementation by comparing the C and Haskell
58-- versions.
59pHash :: [Word8] -> Bool
60pHash xs = unsafePerformIO $ withArray xs $ \ p ->
61    (hashByteArray (fromList xs) 0 len ==) `fmap` hashPtr p len
62  where len = length xs
63
64-- | Content equality implies hash equality.
65pText :: T.Text -> T.Text -> Bool
66pText a b = if (a == b) then (hash a == hash b) else True
67
68-- | Content equality implies hash equality.
69pTextLazy :: TL.Text -> TL.Text -> Bool
70pTextLazy a b = if (a == b) then (hash a == hash b) else True
71
72-- | A small positive integer.
73newtype ChunkSize = ChunkSize { unCS :: Int }
74    deriving (Eq, Ord, Num, Integral, Real, Enum)
75
76instance Show ChunkSize where show = show . unCS
77
78instance Arbitrary ChunkSize where
79    arbitrary = (ChunkSize . (`mod` maxChunkSize)) `fmap`
80                (arbitrary `suchThat` ((/=0) . (`mod` maxChunkSize)))
81        where maxChunkSize = 16
82
83-- | Ensure that the rechunk function causes a rechunked string to
84-- still match its original form.
85pTextRechunk :: T.Text -> NonEmptyList ChunkSize -> Bool
86pTextRechunk t cs = TL.fromStrict t == rechunkText t cs
87
88-- | Lazy strings must hash to the same value no matter how they are
89-- chunked.
90pTextLazyRechunked :: T.Text
91                   -> NonEmptyList ChunkSize -> NonEmptyList ChunkSize -> Bool
92pTextLazyRechunked t cs0 cs1 =
93    hash (rechunkText t cs0) == hash (rechunkText t cs1)
94
95-- | Break up a string into chunks of different sizes.
96rechunkText :: T.Text -> NonEmptyList ChunkSize -> TL.Text
97rechunkText t0 (NonEmpty cs0) = TL.fromChunks . go t0 . cycle $ cs0
98  where
99    go t _ | T.null t = []
100    go t (c:cs)       = a : go b cs
101      where (a,b)     = T.splitAt (unCS c) t
102    go _ []           = error "Properties.rechunk - The 'impossible' happened!"
103
104#if MIN_VERSION_bytestring(0,10,4)
105-- | Content equality implies hash equality.
106pBSShort :: BS.ShortByteString -> BS.ShortByteString -> Bool
107pBSShort a b = if (a == b) then (hash a == hash b) else True
108#endif
109
110-- | Content equality implies hash equality.
111pBS :: B.ByteString -> B.ByteString -> Bool
112pBS a b = if (a == b) then (hash a == hash b) else True
113
114-- | Content equality implies hash equality.
115pBSLazy :: BL.ByteString -> BL.ByteString -> Bool
116pBSLazy a b = if (a == b) then (hash a == hash b) else True
117
118-- | Break up a string into chunks of different sizes.
119rechunkBS :: B.ByteString -> NonEmptyList ChunkSize -> BL.ByteString
120rechunkBS t0 (NonEmpty cs0) = BL.fromChunks . go t0 . cycle $ cs0
121  where
122    go t _ | B.null t = []
123    go t (c:cs)       = a : go b cs
124      where (a,b)     = B.splitAt (unCS c) t
125    go _ []           = error "Properties.rechunkBS - The 'impossible' happened!"
126
127-- | Ensure that the rechunk function causes a rechunked string to
128-- still match its original form.
129pBSRechunk :: B.ByteString -> NonEmptyList ChunkSize -> Bool
130pBSRechunk t cs = fromStrict t == rechunkBS t cs
131
132-- | Lazy bytestrings must hash to the same value no matter how they
133-- are chunked.
134pBSLazyRechunked :: B.ByteString
135                 -> NonEmptyList ChunkSize -> NonEmptyList ChunkSize -> Bool
136pBSLazyRechunked t cs1 cs2 = hash (rechunkBS t cs1) == hash (rechunkBS t cs2)
137
138-- This wrapper is required by 'runST'.
139data ByteArray = BA { unBA :: ByteArray# }
140
141-- | Create a 'ByteArray#' from a list of 'Word8' values.
142fromList :: [Word8] -> ByteArray#
143fromList xs0 = unBA (runST $ ST $ \ s1# ->
144    case newByteArray# len# s1# of
145        (# s2#, marr# #) -> case go s2# 0 marr# xs0 of
146            s3# -> (# s3#, BA (unsafeCoerce# marr#) #))
147  where
148    !(I# len#) = length xs0
149    go s# _         _     []           = s#
150    go s# i@(I# i#) marr# ((W8# x):xs) =
151        case writeWord8Array# marr# i# x s# of
152            s2# -> go s2# (i + 1) marr# xs
153
154-- Generics
155
156data Product2 a b = Product2 a b
157                    deriving (Generic)
158
159instance (Arbitrary a, Arbitrary b) => Arbitrary (Product2 a b) where
160    arbitrary = Product2 `liftM` arbitrary `ap` arbitrary
161
162instance (Hashable a, Hashable b) => Hashable (Product2 a b)
163
164data Product3 a b c = Product3 a b c
165                    deriving (Generic)
166
167instance (Arbitrary a, Arbitrary b, Arbitrary c) =>
168    Arbitrary (Product3 a b c) where
169    arbitrary = Product3 `liftM` arbitrary `ap` arbitrary `ap` arbitrary
170
171instance (Hashable a, Hashable b, Hashable c) => Hashable (Product3 a b c)
172
173-- Hashes of all product types of the same shapes should be the same.
174
175pProduct2 :: Int -> String -> Bool
176pProduct2 x y = hash (x, y) == hash (Product2 x y)
177
178pProduct3 :: Double -> Maybe Bool -> (Int, String) -> Bool
179pProduct3 x y z = hash (x, y, z) == hash (Product3 x y z)
180
181data Sum2 a b = S2a a | S2b b
182                deriving (Eq, Ord, Show, Generic)
183
184instance (Hashable a, Hashable b) => Hashable (Sum2 a b)
185
186data Sum3 a b c = S3a a | S3b b | S3c c
187                  deriving (Eq, Ord, Show, Generic)
188
189instance (Arbitrary a, Arbitrary b, Arbitrary c) => Arbitrary (Sum3 a b c) where
190  arbitrary = oneof
191    [ fmap S3a arbitrary
192    , fmap S3b arbitrary
193    , fmap S3c arbitrary
194    ]
195
196instance (Hashable a, Hashable b, Hashable c) => Hashable (Sum3 a b c)
197
198-- Hashes of the same parameter, but with different sum constructors,
199-- should differ. (They might legitimately collide, but that's
200-- vanishingly unlikely.)
201
202pSum2_differ :: Int -> Bool
203pSum2_differ x = nub hs == hs
204  where hs = [ hash (S2a x :: Sum2 Int Int)
205             , hash (S2b x :: Sum2 Int Int) ]
206
207pSum3_differ :: Int -> Bool
208pSum3_differ x = nub hs == hs
209  where hs = [ hash (S3a x :: Sum3 Int Int Int)
210             , hash (S3b x :: Sum3 Int Int Int)
211             , hash (S3c x :: Sum3 Int Int Int) ]
212
213pGeneric :: Sum3 Int Bool String -> Int -> Bool
214pGeneric x salt = hashWithSalt salt x == genericHashWithSalt salt x
215
216instance (Arbitrary a, Hashable a) => Arbitrary (Hashed a) where
217  arbitrary = fmap hashed arbitrary
218  shrink xs = map hashed $ shrink $ unhashed xs
219
220pLiftedHashed :: Int -> Hashed (Either Int String) -> Bool
221pLiftedHashed s h = hashWithSalt s h == hashWithSalt1 s h
222
223properties :: [Test]
224properties =
225    [ testProperty "bernstein" pHash
226    , testGroup "text"
227      [ testProperty "text/strict" pText
228      , testProperty "text/lazy" pTextLazy
229      , testProperty "text/rechunk" pTextRechunk
230      , testProperty "text/rechunked" pTextLazyRechunked
231      ]
232    , testGroup "bytestring"
233      [ testProperty "bytestring/strict" pBS
234      , testProperty "bytestring/lazy" pBSLazy
235#if MIN_VERSION_bytestring(0,10,4)
236      , testProperty "bytestring/short" pBSShort
237#endif
238      , testProperty "bytestring/rechunk" pBSRechunk
239      , testProperty "bytestring/rechunked" pBSLazyRechunked
240      ]
241    , testGroup "generics"
242      [
243      -- Note: "product2" and "product3" have been temporarily
244      -- disabled until we have added a 'hash' method to the GHashable
245      -- class. Until then (a,b) hashes to a different value than (a
246      -- :*: b). While this is not incorrect, it would be nicer if
247      -- they didn't. testProperty "product2" pProduct2 , testProperty
248      -- "product3" pProduct3
249        testProperty "sum2_differ" pSum2_differ
250      , testProperty "sum3_differ" pSum3_differ
251      , testProperty "genericHashWithSalt" pGeneric
252      ]
253    , testGroup "lifted law"
254      [ testProperty "Hashed" pLiftedHashed
255      ]
256    ]
257
258------------------------------------------------------------------------
259-- Utilities
260
261fromStrict :: B.ByteString -> BL.ByteString
262#if MIN_VERSION_bytestring(0,10,0)
263fromStrict = BL.fromStrict
264#else
265fromStrict b = BL.fromChunks [b]
266#endif
267