1-----------------------------------------------------------------------------
2-- |
3-- TODO: Document module.
4--
5-- Exports C types for dynamic characters and their constructors allong with
6-- an FFI binding for the memoizing TCM structure.
7-----------------------------------------------------------------------------
8
9{-# LANGUAGE BangPatterns, DeriveGeneric, FlexibleInstances, ForeignFunctionInterface, TypeSynonymInstances #-}
10
11{-# OPTIONS_GHC -fno-warn-orphans #-}
12
13module Data.TCM.Memoized.FFI
14  ( CBufferUnit
15  , CDynamicChar(..)
16  , DCElement(..)
17  , ForeignVoid()
18  , MemoizedCostMatrix(costMatrix)
19  , getMemoizedCostMatrix
20  , getMedianAndCost
21  -- * Utility functions
22  , calculateBufferLength
23  , coerceEnum
24  , constructCharacterFromExportable
25  , constructElementFromExportable
26  , constructEmptyElement
27  ) where
28
29import Bio.Character.Exportable.Class
30import Data.Bits
31import Foreign         hiding (alignPtr)
32import Foreign.C.Types
33import GHC.Generics           (Generic)
34import System.IO.Unsafe
35
36-- import Debug.Trace
37
38#include "costMatrixWrapper.h"
39#include "dynamicCharacterOperations.h"
40
41
42-- |
43-- A convient type alias for improved clairity of use.
44type CBufferUnit  = CULong -- This will be compatible with uint64_t
45
46
47-- |
48-- Type of a dynamic character to pass back and forth across the FFI interface.
49data CDynamicChar
50   = CDynamicChar
51   { alphabetSizeChar :: CSize
52   , numElements      :: CSize
53   , dynCharLen       :: CSize
54   , dynChar          :: Ptr CBufferUnit
55   }
56
57
58-- |
59-- Represents a single element in a dynamic character in an exportable form.
60data DCElement = DCElement
61    { alphabetSizeElem :: CSize
62    , characterElement :: Ptr CBufferUnit
63    } deriving (Show)
64
65
66-- |
67-- A closed type wrapping a void pointer in C to the C++ memoized TCM.
68data ForeignVoid deriving (Generic)
69
70
71-- |
72-- A type-safe wrapper for the mutable, memoized TCm.
73newtype MemoizedCostMatrix
74      = MemoizedCostMatrix
75      { costMatrix :: StablePtr ForeignVoid
76      } deriving (Eq, Generic)
77
78
79{-
80-- | (✔)
81instance Show CDynamicChar where
82    show (CDynamicChar alphSize dcLen numElems dChar) =
83       mconcat
84         ["alphabetSize:  "
85         , show intAlphSize
86         , "\ndynCharLen: "
87         , show intLen
88         , "\nbuffer length: "
89         , show bufferLength
90         , "\ndynChar:    "
91         , show $ unsafePerformIO printedArr
92         ]
93        where
94            bufferLength = fromEnum numElems
95            intAlphSize  = fromEnum alphSize
96            intLen       = fromEnum dcLen
97            printedArr   = show <$> peekArray bufferLength dChar
98
99-}
100
101
102instance Storable CDynamicChar where
103
104    sizeOf    _ = (#size struct dynChar_t) -- #size is a built-in that works with arrays, as are #peek and #poke, below
105
106    alignment _ = alignment (undefined :: CBufferUnit)
107
108    peek ptr    = do -- to get values from the C app
109        alphLen <- (#peek struct dynChar_t, alphSize  ) ptr
110        nElems  <- (#peek struct dynChar_t, numElems  ) ptr
111        seqLen  <- (#peek struct dynChar_t, dynCharLen) ptr
112        seqVal  <- (#peek struct dynChar_t, dynChar   ) ptr
113        pure CDynamicChar
114             { alphabetSizeChar = alphLen
115             , numElements      = nElems
116             , dynCharLen       = seqLen
117             , dynChar          = seqVal
118             }
119
120    poke ptr (CDynamicChar alphLen nElems seqLen seqVal) = do -- to modify values in the C app
121        (#poke struct dynChar_t, alphSize  ) ptr alphLen
122        (#poke struct dynChar_t, numElems  ) ptr nElems
123        (#poke struct dynChar_t, dynCharLen) ptr seqLen
124        (#poke struct dynChar_t, dynChar   ) ptr seqVal
125
126
127-- | (✔)
128instance Storable DCElement where
129
130    sizeOf    _ = (#size struct dcElement_t)
131
132    alignment _ = alignment (undefined :: CBufferUnit)
133
134    peek ptr    = do
135        alphLen <- (#peek struct dcElement_t, alphSize) ptr
136        element <- (#peek struct dcElement_t, element ) ptr
137        pure DCElement
138            { alphabetSizeElem = alphLen
139            , characterElement = element
140            }
141
142    poke ptr (DCElement alphLen element) = do
143        (#poke struct dcElement_t, alphSize) ptr alphLen
144        (#poke struct dcElement_t, element ) ptr element
145
146
147
148-- TODO: For now we only allocate 2d matrices. 3d will come later.
149-- |
150-- Create and allocate cost matrix.
151-- The first argument, TCM, is only for non-ambiguous nucleotides, and it used to
152-- generate the entire cost matrix, which includes ambiguous elements. TCM is
153-- row-major, with each row being the left character element. It is therefore
154-- indexed not by powers of two, but by cardinal integer.
155foreign import ccall unsafe "costMatrixWrapper matrixInit"
156    initializeMemoizedCMfn_c :: CSize
157                             -> Ptr CInt
158                             -> IO (StablePtr ForeignVoid)
159
160
161foreign import ccall unsafe "costMatrix getCostAndMedian"
162    getCostAndMedianFn_c :: Ptr DCElement
163                         -> Ptr DCElement
164                         -> Ptr DCElement
165--                         -> CSize
166                         -> StablePtr ForeignVoid
167                         -> IO CInt
168
169
170-- |
171-- Set up and return a cost matrix.
172--
173-- The cost matrix is allocated strictly.
174getMemoizedCostMatrix :: Word
175                      -> (Word -> Word -> Word)
176                      -> MemoizedCostMatrix
177getMemoizedCostMatrix alphabetSize costFn = unsafePerformIO . withArray rowMajorList $ \allocedTCM -> do
178    !resultPtr <- initializeMemoizedCMfn_c (coerceEnum alphabetSize) allocedTCM
179    pure $ MemoizedCostMatrix resultPtr
180  where
181    rowMajorList = [ coerceEnum $ costFn i j | i <- range,  j <- range ]
182    range = [0 .. alphabetSize - 1]
183
184
185-- |
186-- /O(1)/ amortized.
187--
188-- Calculate the median symbol set and transition cost between the two input
189-- symbol sets.
190--
191-- *Note:* This operation is lazily evaluated and memoized for future calls.
192getMedianAndCost :: Exportable s => MemoizedCostMatrix -> s -> s -> (s, Word)
193getMedianAndCost memo lhs rhs = unsafePerformIO $ do
194    medianPtr     <- constructEmptyElement alphabetSize
195    lhs'          <- constructElementFromExportable lhs
196    rhs'          <- constructElementFromExportable rhs
197    !cost         <- getCostAndMedianFn_c lhs' rhs' medianPtr (costMatrix memo)
198    medianElement <- peek medianPtr
199    medianValue   <- fmap buildExportable . peekArray bufferLength $ characterElement medianElement
200    pure (medianValue, coerceEnum cost)
201  where
202    alphabetSize    = exportedElementWidthSequence $ toExportableBuffer lhs
203    buildExportable = fromExportableBuffer . ExportableCharacterSequence 1 alphabetSize
204    bufferLength    = calculateBufferLength alphabetSize 1
205
206
207-- |
208-- /O(1)/
209--
210-- Calculate the buffer length based on the element count and element bit width.
211calculateBufferLength :: Enum b
212                      => Int -- ^ Element count
213                      -> Int -- ^ Element bit width
214                      -> b
215calculateBufferLength count width = coerceEnum $ q + if r == 0 then 0 else 1
216   where
217    (q,r)  = (count * width) `divMod` finiteBitSize (undefined :: CULong)
218
219
220-- |
221-- Coerce one 'Enum' value to another through the type's corresponding 'Int'
222-- values.
223coerceEnum :: (Enum a, Enum b) => a -> b
224coerceEnum = toEnum . fromEnum
225
226
227-- |
228-- /O(n)/ where @n@ is the length of the dynamic character.
229--
230-- Malloc and populate a pointer to an exportable representation of the
231-- 'Exportable' value. The supplied value is assumed to be a dynamic character
232-- and the result is a pointer to a C representation of a dynamic character.
233constructCharacterFromExportable :: Exportable s => s -> IO (Ptr CDynamicChar)
234constructCharacterFromExportable exChar = do
235    valueBuffer <- newArray $ exportedBufferChunks exportableBuffer
236    charPointer <- malloc :: IO (Ptr CDynamicChar)
237    let charValue = CDynamicChar (coerceEnum width) (coerceEnum count) bufLen valueBuffer
238    !_ <- poke charPointer charValue
239    pure charPointer
240  where
241    count  = exportedElementCountSequence exportableBuffer
242    width  = exportedElementWidthSequence exportableBuffer
243    bufLen = calculateBufferLength count width
244    exportableBuffer = toExportableBuffer exChar
245
246
247-- |
248-- /O(1)/
249--
250-- Malloc and populate a pointer to an exportable representation of the
251-- 'Exportable' value. The supplied value is assumed to be a dynamic character
252-- element and the result is a pointer to a C representation of a dynamic
253-- character element.
254constructElementFromExportable :: Exportable s => s -> IO (Ptr DCElement)
255constructElementFromExportable exChar = do
256    valueBuffer    <- newArray $ exportedBufferChunks exportableBuffer
257    elementPointer <- malloc :: IO (Ptr DCElement)
258    let elementValue = DCElement (coerceEnum width) valueBuffer
259    !_ <- poke elementPointer elementValue
260    pure elementPointer
261  where
262    width  = exportedElementWidthSequence exportableBuffer
263    exportableBuffer = toExportableBuffer exChar
264
265
266-- |
267-- /O(1)/
268--
269-- Malloc and populate a pointer to a C representation of a dynamic character.
270-- The buffer of the resulting value is intentially zeroed out.
271constructEmptyElement :: Int -- ^ Bit width of a dynamic character element.
272                      -> IO (Ptr DCElement)
273constructEmptyElement alphabetSize = do
274    elementPointer <- malloc :: IO (Ptr DCElement)
275    valueBuffer    <- mallocArray bufferLength
276    let elementValue = DCElement (coerceEnum alphabetSize) valueBuffer
277    !_ <- poke elementPointer elementValue
278    pure elementPointer
279  where
280    bufferLength = calculateBufferLength alphabetSize 1
281