1----------------------------------------------------------------------------- 2-- | 3-- TODO: Document module. 4-- 5-- Exports C types for dynamic characters and their constructors allong with 6-- an FFI binding for the memoizing TCM structure. 7----------------------------------------------------------------------------- 8 9{-# LANGUAGE BangPatterns, DeriveGeneric, FlexibleInstances, ForeignFunctionInterface, TypeSynonymInstances #-} 10 11{-# OPTIONS_GHC -fno-warn-orphans #-} 12 13module Data.TCM.Memoized.FFI 14 ( CBufferUnit 15 , CDynamicChar(..) 16 , DCElement(..) 17 , ForeignVoid() 18 , MemoizedCostMatrix(costMatrix) 19 , getMemoizedCostMatrix 20 , getMedianAndCost 21 -- * Utility functions 22 , calculateBufferLength 23 , coerceEnum 24 , constructCharacterFromExportable 25 , constructElementFromExportable 26 , constructEmptyElement 27 ) where 28 29import Bio.Character.Exportable.Class 30import Data.Bits 31import Foreign hiding (alignPtr) 32import Foreign.C.Types 33import GHC.Generics (Generic) 34import System.IO.Unsafe 35 36-- import Debug.Trace 37 38#include "costMatrixWrapper.h" 39#include "dynamicCharacterOperations.h" 40 41 42-- | 43-- A convient type alias for improved clairity of use. 44type CBufferUnit = CULong -- This will be compatible with uint64_t 45 46 47-- | 48-- Type of a dynamic character to pass back and forth across the FFI interface. 49data CDynamicChar 50 = CDynamicChar 51 { alphabetSizeChar :: CSize 52 , numElements :: CSize 53 , dynCharLen :: CSize 54 , dynChar :: Ptr CBufferUnit 55 } 56 57 58-- | 59-- Represents a single element in a dynamic character in an exportable form. 60data DCElement = DCElement 61 { alphabetSizeElem :: CSize 62 , characterElement :: Ptr CBufferUnit 63 } deriving (Show) 64 65 66-- | 67-- A closed type wrapping a void pointer in C to the C++ memoized TCM. 68data ForeignVoid deriving (Generic) 69 70 71-- | 72-- A type-safe wrapper for the mutable, memoized TCm. 73newtype MemoizedCostMatrix 74 = MemoizedCostMatrix 75 { costMatrix :: StablePtr ForeignVoid 76 } deriving (Eq, Generic) 77 78 79{- 80-- | (✔) 81instance Show CDynamicChar where 82 show (CDynamicChar alphSize dcLen numElems dChar) = 83 mconcat 84 ["alphabetSize: " 85 , show intAlphSize 86 , "\ndynCharLen: " 87 , show intLen 88 , "\nbuffer length: " 89 , show bufferLength 90 , "\ndynChar: " 91 , show $ unsafePerformIO printedArr 92 ] 93 where 94 bufferLength = fromEnum numElems 95 intAlphSize = fromEnum alphSize 96 intLen = fromEnum dcLen 97 printedArr = show <$> peekArray bufferLength dChar 98 99-} 100 101 102instance Storable CDynamicChar where 103 104 sizeOf _ = (#size struct dynChar_t) -- #size is a built-in that works with arrays, as are #peek and #poke, below 105 106 alignment _ = alignment (undefined :: CBufferUnit) 107 108 peek ptr = do -- to get values from the C app 109 alphLen <- (#peek struct dynChar_t, alphSize ) ptr 110 nElems <- (#peek struct dynChar_t, numElems ) ptr 111 seqLen <- (#peek struct dynChar_t, dynCharLen) ptr 112 seqVal <- (#peek struct dynChar_t, dynChar ) ptr 113 pure CDynamicChar 114 { alphabetSizeChar = alphLen 115 , numElements = nElems 116 , dynCharLen = seqLen 117 , dynChar = seqVal 118 } 119 120 poke ptr (CDynamicChar alphLen nElems seqLen seqVal) = do -- to modify values in the C app 121 (#poke struct dynChar_t, alphSize ) ptr alphLen 122 (#poke struct dynChar_t, numElems ) ptr nElems 123 (#poke struct dynChar_t, dynCharLen) ptr seqLen 124 (#poke struct dynChar_t, dynChar ) ptr seqVal 125 126 127-- | (✔) 128instance Storable DCElement where 129 130 sizeOf _ = (#size struct dcElement_t) 131 132 alignment _ = alignment (undefined :: CBufferUnit) 133 134 peek ptr = do 135 alphLen <- (#peek struct dcElement_t, alphSize) ptr 136 element <- (#peek struct dcElement_t, element ) ptr 137 pure DCElement 138 { alphabetSizeElem = alphLen 139 , characterElement = element 140 } 141 142 poke ptr (DCElement alphLen element) = do 143 (#poke struct dcElement_t, alphSize) ptr alphLen 144 (#poke struct dcElement_t, element ) ptr element 145 146 147 148-- TODO: For now we only allocate 2d matrices. 3d will come later. 149-- | 150-- Create and allocate cost matrix. 151-- The first argument, TCM, is only for non-ambiguous nucleotides, and it used to 152-- generate the entire cost matrix, which includes ambiguous elements. TCM is 153-- row-major, with each row being the left character element. It is therefore 154-- indexed not by powers of two, but by cardinal integer. 155foreign import ccall unsafe "costMatrixWrapper matrixInit" 156 initializeMemoizedCMfn_c :: CSize 157 -> Ptr CInt 158 -> IO (StablePtr ForeignVoid) 159 160 161foreign import ccall unsafe "costMatrix getCostAndMedian" 162 getCostAndMedianFn_c :: Ptr DCElement 163 -> Ptr DCElement 164 -> Ptr DCElement 165-- -> CSize 166 -> StablePtr ForeignVoid 167 -> IO CInt 168 169 170-- | 171-- Set up and return a cost matrix. 172-- 173-- The cost matrix is allocated strictly. 174getMemoizedCostMatrix :: Word 175 -> (Word -> Word -> Word) 176 -> MemoizedCostMatrix 177getMemoizedCostMatrix alphabetSize costFn = unsafePerformIO . withArray rowMajorList $ \allocedTCM -> do 178 !resultPtr <- initializeMemoizedCMfn_c (coerceEnum alphabetSize) allocedTCM 179 pure $ MemoizedCostMatrix resultPtr 180 where 181 rowMajorList = [ coerceEnum $ costFn i j | i <- range, j <- range ] 182 range = [0 .. alphabetSize - 1] 183 184 185-- | 186-- /O(1)/ amortized. 187-- 188-- Calculate the median symbol set and transition cost between the two input 189-- symbol sets. 190-- 191-- *Note:* This operation is lazily evaluated and memoized for future calls. 192getMedianAndCost :: Exportable s => MemoizedCostMatrix -> s -> s -> (s, Word) 193getMedianAndCost memo lhs rhs = unsafePerformIO $ do 194 medianPtr <- constructEmptyElement alphabetSize 195 lhs' <- constructElementFromExportable lhs 196 rhs' <- constructElementFromExportable rhs 197 !cost <- getCostAndMedianFn_c lhs' rhs' medianPtr (costMatrix memo) 198 medianElement <- peek medianPtr 199 medianValue <- fmap buildExportable . peekArray bufferLength $ characterElement medianElement 200 pure (medianValue, coerceEnum cost) 201 where 202 alphabetSize = exportedElementWidthSequence $ toExportableBuffer lhs 203 buildExportable = fromExportableBuffer . ExportableCharacterSequence 1 alphabetSize 204 bufferLength = calculateBufferLength alphabetSize 1 205 206 207-- | 208-- /O(1)/ 209-- 210-- Calculate the buffer length based on the element count and element bit width. 211calculateBufferLength :: Enum b 212 => Int -- ^ Element count 213 -> Int -- ^ Element bit width 214 -> b 215calculateBufferLength count width = coerceEnum $ q + if r == 0 then 0 else 1 216 where 217 (q,r) = (count * width) `divMod` finiteBitSize (undefined :: CULong) 218 219 220-- | 221-- Coerce one 'Enum' value to another through the type's corresponding 'Int' 222-- values. 223coerceEnum :: (Enum a, Enum b) => a -> b 224coerceEnum = toEnum . fromEnum 225 226 227-- | 228-- /O(n)/ where @n@ is the length of the dynamic character. 229-- 230-- Malloc and populate a pointer to an exportable representation of the 231-- 'Exportable' value. The supplied value is assumed to be a dynamic character 232-- and the result is a pointer to a C representation of a dynamic character. 233constructCharacterFromExportable :: Exportable s => s -> IO (Ptr CDynamicChar) 234constructCharacterFromExportable exChar = do 235 valueBuffer <- newArray $ exportedBufferChunks exportableBuffer 236 charPointer <- malloc :: IO (Ptr CDynamicChar) 237 let charValue = CDynamicChar (coerceEnum width) (coerceEnum count) bufLen valueBuffer 238 !_ <- poke charPointer charValue 239 pure charPointer 240 where 241 count = exportedElementCountSequence exportableBuffer 242 width = exportedElementWidthSequence exportableBuffer 243 bufLen = calculateBufferLength count width 244 exportableBuffer = toExportableBuffer exChar 245 246 247-- | 248-- /O(1)/ 249-- 250-- Malloc and populate a pointer to an exportable representation of the 251-- 'Exportable' value. The supplied value is assumed to be a dynamic character 252-- element and the result is a pointer to a C representation of a dynamic 253-- character element. 254constructElementFromExportable :: Exportable s => s -> IO (Ptr DCElement) 255constructElementFromExportable exChar = do 256 valueBuffer <- newArray $ exportedBufferChunks exportableBuffer 257 elementPointer <- malloc :: IO (Ptr DCElement) 258 let elementValue = DCElement (coerceEnum width) valueBuffer 259 !_ <- poke elementPointer elementValue 260 pure elementPointer 261 where 262 width = exportedElementWidthSequence exportableBuffer 263 exportableBuffer = toExportableBuffer exChar 264 265 266-- | 267-- /O(1)/ 268-- 269-- Malloc and populate a pointer to a C representation of a dynamic character. 270-- The buffer of the resulting value is intentially zeroed out. 271constructEmptyElement :: Int -- ^ Bit width of a dynamic character element. 272 -> IO (Ptr DCElement) 273constructEmptyElement alphabetSize = do 274 elementPointer <- malloc :: IO (Ptr DCElement) 275 valueBuffer <- mallocArray bufferLength 276 let elementValue = DCElement (coerceEnum alphabetSize) valueBuffer 277 !_ <- poke elementPointer elementValue 278 pure elementPointer 279 where 280 bufferLength = calculateBufferLength alphabetSize 1 281