1{-# LANGUAGE BangPatterns, ScopedTypeVariables, ForeignFunctionInterface #-} 2{-# OPTIONS_GHC -fwarn-unused-imports #-} 3 4-- | A simple script to do some very basic timing of the RNGs. 5 6module Main where 7 8import System.Exit (exitSuccess, exitFailure) 9import System.Environment 10import System.Random 11import System.CPUTime (getCPUTime) 12import System.CPUTime.Rdtsc 13import System.Console.GetOpt 14 15import GHC.Conc 16import Control.Concurrent 17import Control.Monad 18import Control.Exception 19 20import Data.IORef 21import Data.Word 22import Data.List hiding (last,sum) 23import Data.Int 24import Data.List.Split hiding (split) 25import Text.Printf 26 27import Foreign.Ptr 28import Foreign.C.Types 29import Foreign.ForeignPtr 30import Foreign.Storable (peek,poke) 31 32import Prelude hiding (last,sum) 33import BinSearch 34 35---------------------------------------------------------------------------------------------------- 36-- Miscellaneous helpers: 37 38-- Readable large integer printing: 39commaint :: Show a => a -> String 40commaint n = reverse $ concat $ intersperse "," $ chunk 3 $ reverse (show n) 41 42padleft :: Int -> String -> String 43padleft n str | length str >= n = str 44padleft n str | otherwise = take (n - length str) (repeat ' ') ++ str 45 46padright :: Int -> String -> String 47padright n str | length str >= n = str 48padright n str | otherwise = str ++ take (n - length str) (repeat ' ') 49 50fmt_num :: (RealFrac a, PrintfArg a) => a -> String 51fmt_num n = 52 if n < 100 53 then printf "%.2f" n 54 else commaint (round n :: Integer) 55 56 57-- Measure clock frequency, spinning rather than sleeping to try to 58-- stay on the same core. 59measureFreq :: IO Int64 60measureFreq = do 61 let second = 1000 * 1000 * 1000 * 1000 -- picoseconds are annoying 62 t1 <- rdtsc 63 start <- getCPUTime 64 let loop !n !last = do 65 t2 <- rdtsc 66 when (t2 < last) $ putStrLn $ "COUNTERS WRAPPED " ++ show (last, t2) 67 cput <- getCPUTime 68 if cput - start < second 69 then loop (n + 1) t2 70 else return (n, t2) 71 (n, t2) <- loop 0 t1 72 putStrLn $ " Approx getCPUTime calls per second: " ++ commaint (n :: Int64) 73 when (t2 < t1) $ 74 putStrLn $ 75 "WARNING: rdtsc not monotonically increasing, first " ++ 76 show t1 ++ " then " ++ show t2 ++ " on the same OS thread" 77 return $ fromIntegral (t2 - t1) 78 79---------------------------------------------------------------------------------------------------- 80 81-- Test overheads without actually generating any random numbers: 82data NoopRNG = NoopRNG 83instance RandomGen NoopRNG where 84 next g = (0, g) 85 genRange _ = (0, 0) 86 split g = (g, g) 87 88-- An RNG generating only 0 or 1: 89data BinRNG = BinRNG StdGen 90instance RandomGen BinRNG where 91 next (BinRNG g) = (x `mod` 2, BinRNG g') 92 where 93 (x, g') = next g 94 genRange _ = (0, 1) 95 split (BinRNG g) = (BinRNG g1, BinRNG g2) 96 where 97 (g1, g2) = split g 98 99 100---------------------------------------------------------------------------------------------------- 101-- Drivers to get random numbers repeatedly. 102 103type Kern = Int -> Ptr Int -> IO () 104 105-- [2011.01.28] Changing this to take "count" and "accumulator ptr" as arguments: 106-- foreign import ccall "cbits/c_test.c" blast_rands :: Kern 107-- foreign import ccall "cbits/c_test.c" store_loop :: Kern 108-- foreign import ccall unsafe "stdlib.hs" rand :: IO Int 109 110{-# INLINE timeit #-} 111timeit :: (Random a, RandomGen g) => Int -> Int64 -> String -> g -> (g -> (a,g)) -> IO () 112timeit numthreads freq msg gen nxt = do 113 counters <- forM [1 .. numthreads] (const $ newIORef (1 :: Int64)) 114 tids <- forM counters $ \counter -> forkIO $ infloop counter (nxt gen) 115 threadDelay (1000 * 1000) -- One second 116 mapM_ killThread tids 117 finals <- mapM readIORef counters 118 let mean :: Double = 119 fromIntegral (foldl1 (+) finals) / fromIntegral numthreads 120 cycles_per :: Double = fromIntegral freq / mean 121 printResult (round mean :: Int64) msg cycles_per 122 where 123 infloop !counter (!_, !g) = do 124 incr counter 125 infloop counter (nxt g) 126 incr !counter 127 -- modifyIORef counter (+1) -- Not strict enough! 128 = do 129 c <- readIORef counter 130 let c' = c + 1 131 _ <- evaluate c' 132 writeIORef counter c' 133 134 135-- This function times an IO function on one or more threads. Rather 136-- than running a fixed number of iterations, it uses a binary search 137-- to find out how many iterations can be completed in a second. 138timeit_foreign :: Int -> Int64 -> String -> (Int -> Ptr Int -> IO ()) -> IO Int64 139timeit_foreign numthreads freq msg ffn = do 140 ptr :: ForeignPtr Int <- mallocForeignPtr 141 let kern = 142 if numthreads == 1 143 then ffn 144 else replicate_kernel numthreads ffn 145 wrapped n = withForeignPtr ptr (kern $ fromIntegral n) 146 (n, t) <- binSearch False 1 (1.0, 1.05) wrapped 147 let total_per_second = round $ fromIntegral n * (1 / t) 148 cycles_per = fromIntegral freq * t / fromIntegral n 149 printResult total_per_second msg cycles_per 150 return total_per_second 151 -- This lifts a C kernel to operate simultaneously on N threads. 152 where 153 replicate_kernel :: Int -> Kern -> Kern 154 replicate_kernel nthreads kern n ptr = do 155 ptrs <- forM [1 .. nthreads] (const mallocForeignPtr) 156 tmpchan <- newChan 157 -- let childwork = ceiling$ fromIntegral n / fromIntegral nthreads 158 let childwork = n -- Keep it the same.. interested in per-thread throughput. 159 -- Fork/join pattern: 160 forM_ ptrs $ \pt -> 161 forkIO $ 162 withForeignPtr pt $ \p -> do 163 kern (fromIntegral childwork) p 164 result <- peek p 165 writeChan tmpchan result 166 results <- forM [1 .. nthreads] $ \_ -> readChan tmpchan 167 -- Meaningless semantics here... sum the child ptrs and write to the input one: 168 poke ptr (foldl1 (+) results) 169 170 171printResult :: Int64 -> String -> Double -> IO () 172printResult total msg cycles_per = 173 putStrLn $ 174 " " ++ 175 padleft 11 (commaint total) ++ 176 " randoms generated " ++ 177 padright 27 ("[" ++ msg ++ "]") ++ 178 " ~ " ++ fmt_num cycles_per ++ " cycles/int" 179 180---------------------------------------------------------------------------------------------------- 181-- Main Script 182 183data Flag = NoC | Help 184 deriving (Show, Eq) 185 186options :: [OptDescr Flag] 187options = 188 [ Option ['h'] ["help"] (NoArg Help) "print program help" 189 , Option [] ["noC"] (NoArg NoC) "omit C benchmarks, haskell only" 190 ] 191 192main :: IO () 193main = do 194 argv <- getArgs 195 let (opts,_,other) = getOpt Permute options argv 196 197 unless (null other) $ do 198 putStrLn "ERROR: Unrecognized options: " 199 mapM_ putStr other 200 exitFailure 201 202 when (Help `elem` opts) $ do 203 putStr $ usageInfo "Benchmark random number generation" options 204 exitSuccess 205 206 putStrLn "\nHow many random numbers can we generate in a second on one thread?" 207 208 t1 <- rdtsc 209 t2 <- rdtsc 210 putStrLn (" Cost of rdtsc (ffi call): " ++ show (t2 - t1)) 211 212 freq <- measureFreq 213 putStrLn $ " Approx clock frequency: " ++ commaint freq 214 215 let randInt = random :: RandomGen g => g -> (Int,g) 216 randWord16 = random :: RandomGen g => g -> (Word16,g) 217 randFloat = random :: RandomGen g => g -> (Float,g) 218 randCFloat = random :: RandomGen g => g -> (CFloat,g) 219 randDouble = random :: RandomGen g => g -> (Double,g) 220 randCDouble = random :: RandomGen g => g -> (CDouble,g) 221 randInteger = random :: RandomGen g => g -> (Integer,g) 222 randBool = random :: RandomGen g => g -> (Bool,g) 223 randChar = random :: RandomGen g => g -> (Char,g) 224 225 gen = mkStdGen 23852358661234 226 gamut th = do 227 putStrLn " First, timing System.Random.next:" 228 timeit th freq "constant zero gen" NoopRNG next 229 timeit th freq "System.Random stdGen/next" gen next 230 231 putStrLn "\n Second, timing System.Random.random at different types:" 232 timeit th freq "System.Random Ints" gen randInt 233 timeit th freq "System.Random Word16" gen randWord16 234 timeit th freq "System.Random Floats" gen randFloat 235 timeit th freq "System.Random CFloats" gen randCFloat 236 timeit th freq "System.Random Doubles" gen randDouble 237 timeit th freq "System.Random CDoubles" gen randCDouble 238 timeit th freq "System.Random Integers" gen randInteger 239 timeit th freq "System.Random Bools" gen randBool 240 timeit th freq "System.Random Chars" gen randChar 241 242 putStrLn "\n Next timing range-restricted System.Random.randomR:" 243 timeit th freq "System.Random Ints" gen (randomR (-100, 100::Int)) 244 timeit th freq "System.Random Word16s" gen (randomR ( 100, 300::Word16)) 245 timeit th freq "System.Random Floats" gen (randomR (-100, 100::Float)) 246 timeit th freq "System.Random CFloats" gen (randomR (-100, 100::CFloat)) 247 timeit th freq "System.Random Doubles" gen (randomR (-100, 100::Double)) 248 timeit th freq "System.Random CDoubles" gen (randomR (-100, 100::CDouble)) 249 timeit th freq "System.Random Integers" gen (randomR (-100, 100::Integer)) 250 timeit th freq "System.Random Bools" gen (randomR (False, True::Bool)) 251 timeit th freq "System.Random Chars" gen (randomR ('a', 'z')) 252 timeit th freq "System.Random BIG Integers" gen (randomR (0, (2::Integer) ^ (5000::Int))) 253 254 -- when (not$ NoC `elem` opts) $ do 255 -- putStrLn$ " Comparison to C's rand():" 256 -- timeit_foreign th freq "ptr store in C loop" store_loop 257 -- timeit_foreign th freq "rand/store in C loop" blast_rands 258 -- timeit_foreign th freq "rand in Haskell loop" (\n ptr -> forM_ [1..n]$ \_ -> rand ) 259 -- timeit_foreign th freq "rand/store in Haskell loop" (\n ptr -> forM_ [1..n]$ \_ -> do n <- rand; poke ptr n ) 260 -- return () 261 262 -- Test with 1 thread and numCapabilities threads: 263 gamut 1 264 when (numCapabilities > 1) $ do 265 putStrLn $ "\nNow "++ show numCapabilities ++" threads, reporting mean randoms-per-second-per-thread:" 266 void $ gamut numCapabilities 267 268 putStrLn "Finished." 269 270