1 // $Id: dlcell.cpp,v 1.43 2012/05/22 18:17:01 ewalkup Exp $
2
3 /*
4 Copyright 2002 Peter Beerli, Mary Kuhner, Jon Yamato and Joseph Felsenstein
5
6 This software is distributed free of charge for non-commercial use
7 and is copyrighted. Of course, we do not guarantee that the software
8 works, and are not responsible for any damage you may cause or have.
9 */
10
11 // This file contains the implementation code for the data-likelihood
12 // storage object.
13
14 #include <cassert>
15 #include <cmath>
16 #include <cstring>
17 #include <iostream> //debug
18
19 #include "local_build.h"
20
21 #include "dlcell.h"
22 #include "dlmodel.h"
23 #include "errhandling.h"
24 #include "definitions.h"
25 #include "registry.h"
26 #include "cellmanager.h"
27 #include "stringx.h" // for ToString() use in debug function DLsToString()
28
29 #ifdef DMALLOC_FUNC_CHECK
30 #include "/usr/local/include/dmalloc.h"
31 #endif
32
33 // this turns on the data likelihood details for testing
34 //#define DATA_LIKELIHOOD_DETAILS
35
36 using namespace std;
37
38 //------------------------------------------------------------------------------------
39 // DLCell
40 //------------------------------------------------------------------------------------
41
DLCell(long markers,long cats,long bins)42 DLCell::DLCell(long markers, long cats, long bins)
43 : Cell(),
44 m_nmarkers(markers),
45 m_ncats(cats),
46 m_nbins(bins),
47 m_norms(markers, 0.0)
48 {
49 // obtain an internal array from the free store
50 m_identifier.first = m_nmarkers;
51 m_identifier.second = m_ncats;
52 m_identifier.third = m_nbins;
53
54 m_DLs = registry.GetCellManager().GetArray(m_identifier, *this);
55 } // DLCell constructor
56
57 //------------------------------------------------------------------------------------
58
~DLCell()59 DLCell::~DLCell()
60 {
61 // return the internal array to the free store
62 registry.GetCellManager().FreeArray(m_identifier, m_DLs);
63 m_DLs = NULL;
64 } // DLCell destructor
65
66 //------------------------------------------------------------------------------------
67
Copy() const68 Cell* DLCell::Copy() const
69 {
70 Cell* pCell = Clone();
71 pCell->CopyInitialize(*this);
72 return pCell;
73
74 } // Copy
75
76 //------------------------------------------------------------------------------------
77
EmptyCell()78 void DLCell::EmptyCell()
79 {
80 for (long posn = 0; posn < m_nmarkers; ++posn)
81 {
82 for (long cat = 0; cat < m_ncats; ++cat)
83 {
84 for (long base = 0; base < m_nbins; ++base)
85 {
86 m_DLs[posn][cat][base] = 0;
87 }
88 }
89 }
90 }
91
92 //------------------------------------------------------------------------------------
93
CopyInitialize(const Cell & src)94 void DLCell::CopyInitialize(const Cell& src)
95 {
96
97 const DLCell& srcell = dynamic_cast<const DLCell&>(src);
98
99 assert(m_nmarkers == srcell.m_nmarkers);
100 assert(m_ncats == srcell.m_ncats);
101 assert(m_nbins == srcell.m_nbins);
102
103
104 m_norms = srcell.m_norms;
105 // m_nmarkers+1 because there is an extra cell at the end, to
106 // allow an STL-like interface where one past the end is a valid
107 // address
108
109 long arraySize = (m_nmarkers+1) * m_ncats * m_nbins;
110 // cout << "arraysize=" << arraySize << endl;
111 // memcpy for speed--be careful!
112 memcpy(m_DLs[0][0], srcell.m_DLs[0][0], arraySize*sizeof(double));
113
114 } // CopyInitialize
115
116 //------------------------------------------------------------------------------------
117 // Debugging function.
118
IsValidPosition(long pos) const119 bool DLCell::IsValidPosition(long pos) const
120 {
121 // for some purposes, pos == m_nmarkers is in fact valid; this
122 // allows "one past the end" logic emulating the STL
123
124 if (0 <= pos && pos <= m_nmarkers) return true;
125 return false;
126 } // IsValidPosition
127
128 //------------------------------------------------------------------------------------
129 // Make a datalikelihood array appropriately sized for this Cell
130
MakeArray()131 cellarray DLCell::MakeArray()
132 {
133 long len = m_nmarkers + 1;
134 // provide a STL like interface into the m_DLs array.
135 // (i.e. provides a legal reference one past the normal end
136 // of the array)
137
138 // set up a 3-dimensional array in which all positions are
139 // contiguous, so that memcpy can be used on it (a speed
140 // optimization).
141
142 long site, category;
143 cellarray dl;
144
145 dl = new double**[len];
146
147 dl[0] = new double*[len * m_ncats];
148 for (site = 0; site < len; ++site)
149 {
150 dl[site] = dl[0] + site * m_ncats;
151 }
152
153 dl[0][0] = new double[len * m_ncats * m_nbins];
154 for (site = 0; site < len; ++site)
155 {
156 for (category = 0; category < m_ncats; ++category)
157 {
158 dl[site][category] = dl[0][0] + site * m_ncats * m_nbins + category * m_nbins;
159 }
160 }
161
162 return dl;
163
164 } // MakeArray
165
166 //------------------------------------------------------------------------------------
167
SwapDLs(Cell_ptr othercell,long pos)168 void DLCell::SwapDLs(Cell_ptr othercell, long pos)
169 {
170 // This creates a 2D array (categories x bins) and uses it to
171 // swap the information for a single marker between two DLCells.
172 // All bets are off if they are not the same type!
173
174 // WARNING -- Not exception safe due to raw new.
175
176 // take pointers to the two things to be swapped
177 double** otherDLs = othercell->GetSiteDLs(pos);
178 double** myDLs = GetSiteDLs(pos);
179
180 // create temporary storage for the swap
181 double** newDLs = new double* [m_ncats];
182 newDLs[0] = new double [m_ncats * m_nbins];
183 long cat;
184 for(cat = 1; cat < m_ncats; ++cat)
185 newDLs[cat] = newDLs[0] + cat * m_nbins;
186
187 // memcpy for speed
188 memcpy(newDLs[0], myDLs[0], m_ncats * m_nbins * sizeof(double));
189
190 // swap
191 SetSiteDLs(pos, otherDLs);
192 othercell->SetSiteDLs(pos, newDLs);
193
194 // release temporary storage
195 delete [] newDLs[0];
196 delete [] newDLs;
197
198 } // SwapDLs
199
200 //------------------------------------------------------------------------------------
201
SumMarkers(long startpos,long endpos,bool normalize)202 void DLCell::SumMarkers(long startpos, long endpos, bool normalize)
203 {
204 long cat, bin, pos;
205
206 if (normalize)
207 {
208
209 for (cat = 0; cat < m_ncats; ++cat)
210 {
211 for (bin = 0; bin < m_nbins; ++bin)
212 {
213 double result = 0.0;
214 for (pos = startpos; pos != endpos; ++pos)
215 {
216 double markerval = log(m_DLs[pos][cat][bin]) + GetNorms(pos);
217 if (markerval > EXPMIN) result += exp(markerval);
218 }
219 #ifdef DATA_LIKELIHOOD_DETAILS
220 cout<< "SumMarkers-normalized cat:" << cat
221 << " bin:" << bin
222 << " result:" << result
223 <<endl;
224 #endif
225 if (result == 0.0) // never found a good value?
226 m_DLs[endpos][cat][bin] = exp(EXPMIN);
227 else
228 m_DLs[endpos][cat][bin] = result;
229 }
230 }
231
232 // renormalize and re-set norms
233 double newnorm = Normalize(m_DLs[endpos]);
234 SetNorms(newnorm, endpos);
235
236 }
237 else // no normalization
238 {
239
240 for (cat = 0; cat < m_ncats; ++cat)
241 {
242 for (bin = 0; bin < m_nbins; ++bin)
243 {
244 double result = 0.0;
245 for (pos = startpos; pos != endpos; ++pos)
246 {
247 result += m_DLs[pos][cat][bin];
248 }
249 #ifdef DATA_LIKELIHOOD_DETAILS
250 cout<< "SumMarkers-not normalized cat:" << cat
251 << " bin:" << bin
252 << " result:" << result
253 <<endl;
254 #endif
255
256 m_DLs[endpos][cat][bin] = result;
257 }
258 }
259 }
260
261 } // SumMarkers
262
263 //------------------------------------------------------------------------------------
264
IsSameAs(const Cell_ptr othercell,long pos) const265 bool DLCell::IsSameAs(const Cell_ptr othercell, long pos) const
266 {
267 double** otherDLs = othercell->GetSiteDLs(pos);
268 double** myDLs = GetSiteDLs(pos);
269 long cat, bin;
270 for(cat = 0; cat < m_ncats; ++cat)
271 for (bin = 0; bin < m_nbins; ++bin)
272 if (myDLs[cat][bin] != otherDLs[cat][bin]) return false;
273
274 return true;
275 } // DLCell::IsSameAs
276
277 //------------------------------------------------------------------------------------
278
DiffersFrom(Cell_ptr othercell) const279 long DLCell::DiffersFrom(Cell_ptr othercell) const
280 {
281 long marker;
282 for(marker = 0; marker < m_nmarkers; ++marker)
283 if (!IsSameAs(othercell,marker)) return marker;
284
285 return FLAGLONG;
286
287 } // DLCell::DiffersFrom
288
289 //------------------------------------------------------------------------------------
290
SetAllCategoriesTo(DoubleVec1d & state,long posn)291 void DLCell::SetAllCategoriesTo(DoubleVec1d& state, long posn)
292 {
293 for (long cat = 0; cat < m_ncats; cat++)
294 {
295 for (unsigned long nstate=0; nstate < state.size(); nstate++)
296 {
297 m_DLs[posn][cat][nstate] = state[nstate];
298 }
299 }
300 }
301
302 //------------------------------------------------------------------------------------
303
GetStateFor(long posn,long cat) const304 DoubleVec1d DLCell::GetStateFor(long posn, long cat) const
305 {
306 DoubleVec1d state;
307 for (long nstate = 0; nstate < m_nbins; nstate++)
308 {
309 state.push_back(m_DLs[posn][cat][nstate]);
310 }
311 return state;
312 }
313
314
315 //------------------------------------------------------------------------------------
316
SetStateTo(long posn,long cat,DoubleVec1d state)317 void DLCell::SetStateTo (long posn, long cat, DoubleVec1d state)
318 {
319 for (long nstate = 0; nstate < m_nbins; nstate++)
320 {
321 m_DLs[posn][cat][nstate] = state[nstate];
322 }
323 }
324
325 //------------------------------------------------------------------------------------
326
AddTo(const Cell_ptr othercell)327 void DLCell::AddTo(const Cell_ptr othercell)
328 {
329 for (long pos = 0; pos < m_nmarkers; pos++)
330 {
331 double** otherDLs = othercell->GetSiteDLs(pos);
332 double** myDLs = GetSiteDLs(pos);
333 long cat, bin;
334 for(cat = 0; cat < m_ncats; ++cat)
335 {
336 for (bin = 0; bin < m_nbins; ++bin)
337 {
338 myDLs[cat][bin] += otherDLs[cat][bin];
339 }
340 }
341 }
342 }
343
344 //------------------------------------------------------------------------------------
345
SubtractFrom(const Cell_ptr othercell)346 void DLCell::SubtractFrom(const Cell_ptr othercell)
347 {
348 for (long pos = 0; pos < m_nmarkers; pos++)
349 {
350 double** otherDLs = othercell->GetSiteDLs(pos);
351 double** myDLs = GetSiteDLs(pos);
352 long cat, bin;
353 for(cat = 0; cat < m_ncats; ++cat)
354 {
355 for (bin = 0; bin < m_nbins; ++bin)
356 {
357 myDLs[cat][bin] -= otherDLs[cat][bin];
358 }
359 }
360 }
361 }
362
363 //------------------------------------------------------------------------------------
364
MultiplyBy(double mult)365 void DLCell::MultiplyBy(double mult)
366 {
367 for (long pos = 0; pos < m_nmarkers; pos++)
368 {
369 double** myDLs = GetSiteDLs(pos);
370 long cat, bin;
371 for(cat = 0; cat < m_ncats; ++cat)
372 {
373 for (bin = 0; bin < m_nbins; ++bin)
374 {
375 myDLs[cat][bin] *= mult;
376 }
377 }
378 }
379 }
380
381 //------------------------------------------------------------------------------------
382
MultiplyBy(const Cell_ptr othercell)383 void DLCell::MultiplyBy(const Cell_ptr othercell)
384 {
385 for (long pos = 0; pos < m_nmarkers; pos++)
386 {
387 double** otherDLs = othercell->GetSiteDLs(pos);
388 double** myDLs = GetSiteDLs(pos);
389 long cat, bin;
390 for(cat = 0; cat < m_ncats; ++cat)
391 {
392 for (bin = 0; bin < m_nbins; ++bin)
393 {
394 myDLs[cat][bin] *= otherDLs[cat][bin];
395 }
396 }
397 }
398 }
399
400 //------------------------------------------------------------------------------------
401
GetOnes(long marker) const402 LongVec1d DLCell::GetOnes(long marker) const
403 {
404 LongVec1d ones;
405 double** dls = GetSiteDLs(marker);
406 //Just mess with the first category
407 for (long bin = 0; bin < m_nbins; ++bin)
408 {
409 if (dls[0][bin] == 1.0)
410 {
411 ones.push_back(bin);
412 }
413 }
414 return ones;
415 }
416
417 //------------------------------------------------------------------------------------
418 // Debugging function.
419
DLsToString(long start,long end) const420 string DLCell::DLsToString(long start, long end) const
421 {
422 string lines;
423
424 long line;
425 for(line = start; line <= end; ++line)
426 {
427 lines += "marker " + ToString(line) + ": ";
428 double** dls = GetSiteDLs(line);
429 long cat;
430 for(cat = 0; cat < m_ncats; ++cat)
431 {
432 lines += "{";
433 long bin;
434 for(bin = 0; bin < m_nbins; ++bin)
435 {
436 lines += ToString(dls[cat][bin]);
437 if (bin != m_nbins - 1) lines += ",";
438 }
439 lines += "}";
440 if (cat != m_ncats - 1) lines += ", ";
441 }
442 lines += "\n";
443 }
444
445 return lines;
446
447 } // DLCell::DLsToString
448
449 //------------------------------------------------------------------------------------
450
Normalize(double ** siteDLs)451 double DLCell::Normalize(double** siteDLs)
452 {
453 double biggest = NEG_MAX;
454 long cat, bin;
455 for(cat = 0; cat < m_ncats; ++cat)
456 {
457 for(bin = 0; bin < m_nbins; ++bin)
458 {
459 if (siteDLs[cat][bin] > biggest) biggest = siteDLs[cat][bin];
460 }
461 }
462
463 for(cat = 0; cat < m_ncats; ++cat)
464 {
465 for(bin = 0; bin < m_nbins; ++bin)
466 {
467 siteDLs[cat][bin] /= biggest;
468 }
469 }
470
471 return log(biggest);
472
473 } // Normalize
474
475 //------------------------------------------------------------------------------------
476
SetSiteDLs(long posn,double ** siteDLs)477 void DLCell::SetSiteDLs(long posn, double **siteDLs)
478 {
479 assert(IsValidPosition(posn));
480 long siteSize = m_ncats * m_nbins;
481 memcpy(m_DLs[posn][0], siteDLs[0], siteSize * sizeof(double));
482 }
483
484 //------------------------------------------------------------------------------------
485
AddToSiteDLs(long posn,double ** siteDLs)486 void DLCell::AddToSiteDLs(long posn, double **siteDLs)
487 {
488 assert(IsValidPosition(posn));
489 long cat, bin;
490 for(cat = 0; cat < m_ncats; ++cat)
491 {
492 for (bin = 0; bin < m_nbins; ++bin)
493 {
494 m_DLs[posn][cat][bin] += siteDLs[cat][bin];
495 }
496 }
497 }
498
499 //------------------------------------------------------------------------------------
500 // NullCell
501 //------------------------------------------------------------------------------------
502
NullCell()503 NullCell::NullCell()
504 : Cell()
505 {
506 // deliberately blank
507 } // NullCell constructor
508
509 //------------------------------------------------------------------------------------
510
Initialize(const StringVec1d &,const DataModel_ptr)511 void NullCell::Initialize (const StringVec1d&, const DataModel_ptr)
512 {
513 assert (false); // should never call this!
514
515 } // Initialize
516
GetStateFor(long posn,long cat) const517 DoubleVec1d NullCell::GetStateFor(long posn, long cat) const
518 {
519 throw implementation_error("No state for this data likelihood.");
520 }
521
522 //------------------------------------------------------------------------------------
523 // NucCell
524 //------------------------------------------------------------------------------------
525
NucCell(long markers,long cats)526 NucCell::NucCell(long markers, long cats)
527 : DLCell(markers, cats, BASES)
528 {
529 // deliberately left blank
530 } // NucCell constructor
531
532 //------------------------------------------------------------------------------------
533
Initialize(const StringVec1d & sequence,const DataModel_ptr trans)534 void NucCell::Initialize(const StringVec1d &sequence, const DataModel_ptr trans)
535 {
536 long posn, base, cat;
537
538 string postring;
539 vector<double> likes;
540
541 // could be OPTIMIZED
542
543 for (posn = 0; posn < m_nmarkers; ++posn)
544 {
545 postring = sequence[posn];
546 likes = trans->DataToLikes(postring);
547 for (cat = 0; cat < m_ncats; ++cat)
548 {
549 for (base = 0; base < m_nbins; ++base)
550 {
551 m_DLs[posn][cat][base] = likes[base];
552 }
553 }
554 }
555 } // Initialize
556
557 //------------------------------------------------------------------------------------
558 // DNACell
559 //------------------------------------------------------------------------------------
560
DNACell(long markers,long cats)561 DNACell::DNACell(long markers, long cats)
562 : NucCell(markers, cats)
563 {
564 // intentionally blank
565 } // DNACell constructor
566
567 //------------------------------------------------------------------------------------
568
Clone() const569 Cell *DNACell::Clone() const
570 {
571 Cell *pDLCell = new DNACell(m_nmarkers, m_ncats);
572 return pDLCell;
573 }
574
575 //------------------------------------------------------------------------------------
576 // SNPCell
577 //------------------------------------------------------------------------------------
578
SNPCell(long markers,long cats)579 SNPCell::SNPCell(long markers, long cats)
580 : NucCell(markers, cats)
581 {
582 // deliberately blank
583 } // SNPCell constructor
584
585 //------------------------------------------------------------------------------------
586
~SNPCell()587 SNPCell::~SNPCell()
588 {
589 // deliberately blank
590 }
591
592 //------------------------------------------------------------------------------------
593
Clone() const594 Cell *SNPCell::Clone() const
595 {
596 Cell *pDLCell = new SNPCell(m_nmarkers, m_ncats);
597 return pDLCell;
598 }
599
600 //------------------------------------------------------------------------------------
601
Initialize(const StringVec1d &,const DataModel_ptr trans)602 void SNPCell::Initialize(const StringVec1d&, const DataModel_ptr trans)
603 {
604 // We do not use the sequence input -- SNPCell::Initialize should
605 // only be used to initialize invariant cells. We keep the first
606 // argument for conformity with the base class interface.
607
608 long categ, invar, base;
609
610 // In the m_DLs array, the basic idea is to set all entries to 0.0
611 // except for the entries where the invariant equals the base,
612 // where they are set to 1.0.
613 // HOWEVER,
614 // since we've now got a data uncertainty model, each of the
615 // invariant bases needs to go through the error correction
616 // transformation first.
617
618 for (invar = 0; invar < INVARIANTS; ++invar)
619 {
620 vector<double> likes = trans->DataToLikes(SINGLEBASES[invar]);
621 for (categ = 0; categ < m_ncats; ++categ)
622 {
623 for (base = 0; base < m_nbins; ++base)
624 {
625 m_DLs[invar][categ][base] = likes[base];
626 }
627 }
628 }
629 }
630
631 //------------------------------------------------------------------------------------
632 // AlleleCell
633 //------------------------------------------------------------------------------------
634
AlleleCell(long markers,long cats,long bins)635 AlleleCell::AlleleCell(long markers, long cats, long bins)
636 : DLCell(markers, cats, bins)
637 {
638 // deliberately blank
639 } // AlleleCell constructor
640
641 //------------------------------------------------------------------------------------
642
Clone() const643 Cell *AlleleCell::Clone() const
644 {
645 Cell *pDLCell = new AlleleCell(m_nmarkers, m_ncats, m_nbins);
646 return pDLCell;
647 } // Clone
648
649 //------------------------------------------------------------------------------------
650
Initialize(const StringVec1d & sequence,const DataModel_ptr trans)651 void AlleleCell::Initialize(const StringVec1d &sequence, const DataModel_ptr trans)
652 {
653 long posn, bin, cat;
654 vector<double> likes;
655
656 // could be OPTIMIZED
657
658 for (posn = 0; posn < m_nmarkers; ++posn)
659 {
660 likes = trans->DataToLikes(sequence[posn], posn);
661 for (cat = 0; cat < m_ncats; ++cat)
662 {
663 for (bin = 0; bin < m_nbins; ++bin)
664 {
665 m_DLs[posn][cat][bin] = likes[bin];
666 }
667 }
668 }
669 } // Initialize
670
671 //------------------------------------------------------------------------------------
672 //------------------------------------------------------------------------------------
673
triplet()674 triplet::triplet()
675 : first(0),
676 second(0),
677 third(0)
678 {
679 // intentionally blank
680 } // triplet default constructor
681
682 //------------------------------------------------------------------------------------
683
triplet(long f,long s,long t)684 triplet::triplet(long f, long s, long t)
685 : first(f),
686 second(s),
687 third(t)
688 {
689 // intentionally blank
690 } // triplet constructor
691
692 //------------------------------------------------------------------------------------
693
operator <(const triplet & rhs) const694 bool triplet::operator<(const triplet& rhs) const
695 {
696 if (first != rhs.first) return (first < rhs.first);
697 if (second != rhs.second) return (second < rhs.second);
698 return (third < rhs.third);
699 } // triplet operator<
700
701 //____________________________________________________________________________________
702