1 // $Id: dlcell.h,v 1.36 2011/03/08 08:16:33 bobgian Exp $
2 
3 /*
4   Copyright 2002  Peter Beerli, Mary Kuhner, Jon Yamato and Joseph Felsenstein
5 
6   This software is distributed free of charge for non-commercial use
7   and is copyrighted.  Of course, we do not guarantee that the software
8   works, and are not responsible for any damage you may cause or have.
9 */
10 
11 /******************************************************************
12  This file defines the branch data-likelihood storage object.  It maintains the data
13  likelihoods internally as a bare array for speed reasons (ability to use memcpy).
14 
15  The DLCell constructors and Clone make fully functional *empty* DLCells suitable
16  for use on internal branches.  Copy() can be used to make a filled-up DLCell,
17  or you can Initialize() it with a sequence.
18 
19  The base DLCell class maintains a private internal store of allocated arrays, as a speed-up.
20  This store should be cleared at the beginning of every region (by calling ClearStore()) or
21  it will turn into a major memory leak.  The freestore-management code assumes that all of
22  the data-likelihood arrays are three-dimensional contiguous allocation using new[].  If you
23  write one that isn't, derive from Cell, not from DLCell!
24 
25  The file also contains the simple helper class 'triplet', analogous to std::pair.
26 
27  NB:  It would be more efficient to do MSCells in some other way, but it would mean writing
28  a lot of new code, so I didn't.  If Microsats are too space-intensive to use, review this code.
29 
30  Written by Jim Sloan, revised by Mary Kuhner
31     added datalikelihood branchwise-normalization -- Jon 2001/03/09
32     added NullCell -- Mary 2002/05/03
33     deleted SNPCell (replaced with a pair of DNACells) -- Mary 2002/05/28
34     put SNPCell back, much simplified -- Mary 2002/06/11
35     added KCell -- Mary 2002/07/08
36     collapsed the AlleleCell subclasses -- Mary 2002/07/22
37 
38 ********************************************************************/
39 
40 #ifndef DLCELL_H
41 #define DLCELL_H
42 
43 #include <cassert>                      // May be needed for inline definitions.
44 #include <iostream>
45 #include <stdlib.h>
46 #include <string>
47 
48 #include "constants.h"
49 #include "types.h"
50 #include "vectorx.h"
51 
52 //------------------------------------------------------------------------------------
53 //------------------------------------------------------------------------------------
54 
55 // This struct is similar to std::pair<long>
56 
57 struct triplet
58 {
59   public:
60     long first;
61     long second;
62     long third;
63 
64     triplet();
65     triplet(long f, long s, long t);
66     bool operator<(const triplet& rhs) const;
67 
68     // We accept default destructor, copy constructor and operator=
69 };
70 
71 //------------------------------------------------------------------------------------
72 //------------------------------------------------------------------------------------
73 
74 class Cell
75 {
76   protected:
Cell()77     Cell() {};
78 
79   public:
~Cell()80     virtual          ~Cell() {};
81     virtual Cell*    Clone()         const                = 0;
82     virtual Cell*    Copy()          const                = 0;
83 
84     // Initialize from data.
85     virtual void     Initialize(const StringVec1d& sequence, const DataModel_ptr trans) = 0;
86 #if 1
87     virtual void     CopyInitialize(const Cell& src)         = 0;
88 #endif
89     virtual void     EmptyCell()                             = 0;
90 
91     // Retrieve individual marker DLs.
92     virtual void     SetSiteDLs(long posn, double** siteDLs)   = 0;
93     virtual void     AddToSiteDLs(long posn, double** siteDLs) = 0;
94     virtual double** GetSiteDLs(long posn) const               = 0;
95 
96     virtual double** GetNextMarker(double** marker)          = 0;
97 
98     // Manage the normalization coefficients.
99     virtual double   Normalize(double** siteDLs)             = 0;
100     virtual void     SetNorms(double val, long pos)          = 0;
101     virtual double   GetNorms(long pos) const                = 0;
102 
103     virtual void SumMarkers(long startpos, long endpos, bool normalize) = 0;
104 
105     // Swap two markers..
106     virtual void     SwapDLs(Cell_ptr other, long pos)          = 0;
107 
108     // Compare DLCell contents.
109     virtual bool     IsSameAs(const Cell_ptr othercell, long pos) const = 0;
110     virtual long     DiffersFrom(Cell_ptr othercell) const = 0;
111 
112     // Simulation functions.
113     virtual void     SetAllCategoriesTo(DoubleVec1d& state, long posn) = 0;
114     virtual DoubleVec1d GetStateFor(long posn, long cat) const = 0;
115     virtual void     SetStateTo(long posn, long cat, DoubleVec1d state) = 0;
116     virtual void     AddTo(const Cell_ptr othercell) = 0;
117     virtual void     SubtractFrom(const Cell_ptr othercell) = 0;
118     virtual void     MultiplyBy(double mult) = 0;
119     virtual void     MultiplyBy(const Cell_ptr othercell) = 0;
120 
121     // For output.
122     virtual LongVec1d GetOnes(long marker) const = 0;
123 
124     // Debugging functions.
125     virtual string   DLsToString(long start, long end) const = 0;
126     virtual long     GetNMarkers() = 0;
127     virtual long     GetNCats() = 0;
128     virtual long     GetNBins() = 0;
129 
130 }; // Cell
131 
132 //------------------------------------------------------------------------------------
133 //------------------------------------------------------------------------------------
134 
135 class DLCell : public Cell
136 {
137   private:
138     DLCell(const DLCell&);               // undefined
139     DLCell&    operator=(const DLCell&); // undefined
140 
141   protected:
142     triplet     m_identifier;           // used to manage free store
143     long        m_nmarkers;             // number of markers
144     long        m_ncats;                // number of rate categories
145     long        m_nbins;                // number of allelic states
146     DoubleVec1d m_norms;                // normalization coefficients
147     cellarray   m_DLs;                  // array of data likelihoods: position X category X bin
148 
149     // Error checking (debugging).
150     bool       IsValidPosition(long pos) const;
151 
152   public:
153 
154     DLCell(long markers, long cats, long bins);
155     virtual          ~DLCell();
156     virtual Cell*    Copy()          const;
157     cellarray  MakeArray();             // NB:  must not be virtual--it's called by base class constructor!
158     void EmptyCell();
159 
160     virtual void     CopyInitialize(const Cell& src); // Initialize from data.
161 
162     // Retrieve individual marker DLs.
163     virtual void     SetSiteDLs(long posn, double** siteDLs);
164     virtual void     AddToSiteDLs(long posn, double** siteDLs);
GetSiteDLs(long posn)165     virtual double** GetSiteDLs(long posn) const {assert(IsValidPosition(posn)); return m_DLs[posn];};
GetNextMarker(double ** marker)166     virtual double** GetNextMarker(double** marker) {return marker + m_ncats;};
167 
168     // Manage the normalization coefficients.
169     double   Normalize(double** siteDLs);
SetNorms(double val,long pos)170     void     SetNorms(double val, long pos)  {assert(IsValidPosition(pos)); m_norms[pos] = val; };
GetNorms(long pos)171     double   GetNorms(long pos) const        {assert(IsValidPosition(pos)); return m_norms[pos]; };
172 
173     // This function accumulates all the values from startpos, up to but not including endpos, *INTO* endpos.  Careful!
174     void     SumMarkers(long startpos, long endpos, bool normalize);
175 
176     // Swap two markers.
177     virtual void     SwapDLs(Cell_ptr other, long pos);
178 
179     // Clear the free store.
180     // This is called by control code when a new region is begun, so that
181     // the old cellarrays, which are no longer useful, can be discarded.
182     static  void     ClearStore();
183 
184     // Compare DLCell contents.
185     virtual bool     IsSameAs(const Cell_ptr othercell, long pos) const;
186 
187     // DiffersFrom returns the first position that IsSameAs() thinks
188     // the 2 cells differ at, FLAGLONG if no position is found.
189     virtual long     DiffersFrom(Cell_ptr othercell) const;
190 
191     // Simulation functions.
192     virtual void     SetAllCategoriesTo(DoubleVec1d& state, long posn);
193     virtual DoubleVec1d GetStateFor(long posn, long cat) const;
194     virtual void     SetStateTo(long posn, long cat, DoubleVec1d state);
195     virtual void     AddTo(const Cell_ptr othercell);
196     virtual void     SubtractFrom(const Cell_ptr othercell);
197     virtual void     MultiplyBy(double mult);
198     virtual void     MultiplyBy(const Cell_ptr othercell);
199 
200     // For output.
201     virtual LongVec1d GetOnes(long marker) const;
202 
203     // Debugging functions.
204     virtual string   DLsToString(long start, long end) const;
GetNMarkers()205     virtual long     GetNMarkers() {return m_nmarkers;};
GetNCats()206     virtual long     GetNCats() {return m_ncats;};
GetNBins()207     virtual long     GetNBins() {return m_nbins;};
208 
209 }; // DLCell
210 
211 //------------------------------------------------------------------------------------
212 //------------------------------------------------------------------------------------
213 
214 // NullCell is a Cell that does nothing, useful in simplifying certain algorithms
215 // in DLCalculator.  They are not currently used in the Tree.
216 
217 class NullCell : public Cell
218 {
219   public:
220     NullCell();
~NullCell()221     virtual          ~NullCell() {};
Clone()222     virtual Cell*    Clone() const { return new NullCell; };
Copy()223     virtual Cell*    Copy() const { return new NullCell; };
224     virtual void     Initialize(const StringVec1d&, const DataModel_ptr trans);
225 #if 1
CopyInitialize(const Cell &)226     virtual void     CopyInitialize(const Cell&) { assert(false); }; // no! it's Null!
227 #endif
EmptyCell()228     virtual void     EmptyCell() {};
SetSiteDLs(long,double **)229     virtual void     SetSiteDLs(long, double**) { assert(false); };  // no! it's Null!
AddToSiteDLs(long,double **)230     virtual void     AddToSiteDLs(long, double**) {};
GetSiteDLs(long)231     virtual double** GetSiteDLs(long) const    { return NULL; };
Normalize(double **)232     virtual double   Normalize(double**) {return 0; };
SetNorms(double,long)233     virtual void     SetNorms(double, long) {};
GetNorms(long)234     virtual double   GetNorms(long) const {return 0; };
SumMarkers(long,long,bool)235     virtual void     SumMarkers(long, long, bool) { assert(false); };
GetNextMarker(double **)236     virtual double** GetNextMarker(double**) { assert(false); return NULL; };
SwapDLs(Cell_ptr,long)237     virtual void     SwapDLs(Cell_ptr, long) { assert(false); };
238 
IsSameAs(const Cell_ptr,long)239     virtual bool     IsSameAs(const Cell_ptr, long) const
240     { assert(false); return false; };   // no! it's Null!
241 
DiffersFrom(Cell_ptr)242     virtual long     DiffersFrom(Cell_ptr) const { assert(false); return 0; };
243 
244     // Simulation functions.
SetAllCategoriesTo(DoubleVec1d & state,long posn)245     virtual void     SetAllCategoriesTo(DoubleVec1d& state, long posn) {};
246     virtual DoubleVec1d GetStateFor(long posn, long cat) const;
SetStateTo(long posn,long cat,DoubleVec1d state)247     virtual void     SetStateTo(long posn, long cat, DoubleVec1d state) {};
AddTo(const Cell_ptr othercell)248     virtual void     AddTo(const Cell_ptr othercell) {};
SubtractFrom(const Cell_ptr othercell)249     virtual void     SubtractFrom(const Cell_ptr othercell) {};
MultiplyBy(double mult)250     virtual void     MultiplyBy(double mult) {};
MultiplyBy(const Cell_ptr othercell)251     virtual void     MultiplyBy(const Cell_ptr othercell) {};
252 
253     // For output.
GetOnes(long marker)254     virtual LongVec1d GetOnes(long marker) const { return LongVec1d();};
255 
256     // Debugging functions.
DLsToString(long,long)257     virtual string   DLsToString(long, long) const { return string(""); };
GetNMarkers()258     virtual long     GetNMarkers() {return 0;};
GetNCats()259     virtual long     GetNCats() {return 0;};
GetNBins()260     virtual long     GetNBins() {return 0;};
261 }; // NullCell
262 
263 //------------------------------------------------------------------------------------
264 //------------------------------------------------------------------------------------
265 
266 class NucCell : public DLCell
267 {
268   public:
269     NucCell(long markers, long cats);
~NucCell()270     virtual          ~NucCell()          {};
271     virtual void     Initialize(const StringVec1d& sequence, const DataModel_ptr trans);
272 
273 }; // NucCell
274 
275 //------------------------------------------------------------------------------------
276 //------------------------------------------------------------------------------------
277 
278 class DNACell : public NucCell
279 {
280   public:
281     DNACell(long markers, long cats);
~DNACell()282     virtual         ~DNACell()            {};
283     virtual Cell* Clone()    const;
284 };
285 
286 //------------------------------------------------------------------------------------
287 //------------------------------------------------------------------------------------
288 
289 class SNPCell : public NucCell
290 {
291   public:
292     SNPCell(long markers, long cats);
293     virtual           ~SNPCell();
294     virtual  Cell*    Clone() const;
295     virtual  void     Initialize(const StringVec1d&, const DataModel_ptr trans);
296 
297 }; // SNPCell
298 
299 //------------------------------------------------------------------------------------
300 //------------------------------------------------------------------------------------
301 
302 class AlleleCell : public DLCell
303 {
304   public:
305     AlleleCell(long markers, long cats, long bins);
~AlleleCell()306     virtual          ~AlleleCell()               {};
307     virtual  Cell*   Clone() const;
308     virtual void     Initialize(const StringVec1d& sequence, const DataModel_ptr trans);
309 
310 }; // AlleleCell
311 
312 #endif // DLCELL_H
313 
314 //____________________________________________________________________________________
315