1 // $Id: parameter.cpp,v 1.49 2011/10/31 22:04:51 ewalkup Exp $
2 
3 /*
4   Copyright 2002  Peter Beerli, Mary Kuhner, Jon Yamato and Joseph Felsenstein
5 
6   This software is distributed free of charge for non-commercial use
7   and is copyrighted.  Of course, we do not guarantee that the software
8   works, and are not responsible for any damage you may cause or have.
9 */
10 
11 #include <cassert>
12 
13 #include "parameter.h"
14 #include "forcesummary.h"
15 #include "prior.h"
16 #include "registry.h"
17 #include "mathx.h"
18 #include "random.h"
19 #include "stringx.h"
20 #include "ui_vars_prior.h"
21 #include "ui_strings.h" // for kludgy Parameter::GetUserName()
22 #include "local_build.h" // for definition of LAMARC_NEW_FEATURE_RELATIVE_SAMPLING
23 //#include "regiongammainfo.h"
24 
25 using namespace std;
26 
27 //------------------------------------------------------------------------------------
28 
29 class RegionGammaInfo;
30 
31 //------------------------------------------------------------------------------------
32 
GetMLE(long region) const33 double ResultStruct::GetMLE(long region) const
34 {
35     assert(region >= 0 && static_cast<unsigned long>(region) < mles.size());
36     return mles[region];
37 } // GetMLE
38 
39 //------------------------------------------------------------------------------------
40 
GetOverallMLE() const41 double ResultStruct::GetOverallMLE() const
42 {
43     // if there is exactly one region, we will return the regional
44     // mle as the overall mle.  If there are multiple regions and
45     // no overall mle, something is wrong.
46     assert(!(overallmle.empty() && mles.size() != 1));
47 
48     if (overallmle.empty()) return mles[0];
49     else return overallmle[0];
50 } // GetOverallMLE
51 
52 //------------------------------------------------------------------------------------
53 
GetAllMLEs() const54 DoubleVec1d ResultStruct::GetAllMLEs()  const
55 {
56     assert(!mles.empty());
57     DoubleVec1d result = mles;
58     if (!overallmle.empty()) result.push_back(overallmle[0]);
59     return result;
60 } // GetAllMLEs
61 
62 //------------------------------------------------------------------------------------
63 
GetProfile(long region) const64 const ProfileStruct& ResultStruct::GetProfile(long region) const
65 {
66     assert(region >= 0 && static_cast<unsigned long>(region) < profiles.size());
67     return profiles[region];
68 } // GetProfile
69 
70 //------------------------------------------------------------------------------------
71 
GetOverallProfile() const72 const ProfileStruct& ResultStruct::GetOverallProfile() const
73 {
74     // if there is exactly one region, we will return its profile.
75     // otherwise, it is an error to ask for overalls if there aren't
76     // any.
77 
78     assert(!(overallprofile.empty() && profiles.size() != 1));
79 
80     if (overallprofile.empty()) return profiles[0];
81     else return overallprofile[0];
82 } // GetOverallProfile
83 
84 //------------------------------------------------------------------------------------
85 
GetAllProfiles() const86 vector<ProfileStruct> ResultStruct::GetAllProfiles() const
87 {
88     assert(!profiles.empty());
89     vector<ProfileStruct> result = profiles;
90     result.push_back(overallprofile[0]);
91     return result;
92 } // GetAllProfiles
93 
GetProfileFor(double centile,long reg) const94 const ProfileLineStruct& ResultStruct::GetProfileFor(double centile,
95                                                      long reg) const
96 {
97     if (reg == registry.GetDataPack().GetNRegions())
98     {
99         return GetOverallProfile().GetProfileLine(centile);
100     }
101     return GetProfile(reg).GetProfileLine(centile);
102 }
103 
AddMLE(double mle,long region)104 void ResultStruct::AddMLE(double mle, long region)
105 {
106     //We have to replace the MLE sometimes when reading from a summary file.
107     if (region < static_cast<long>(mles.size()))
108     {
109         mles[region] = mle;
110     }
111     else
112     {
113         assert(region == static_cast<long>(mles.size()));
114         mles.push_back(mle);
115     }
116 }
117 
AddOverallMLE(double mle)118 void ResultStruct::AddOverallMLE(double mle)
119 {
120     if (overallmle.size() > 0)
121     {
122         overallmle[0] = mle;
123     }
124     else
125     {
126         overallmle.push_back(mle);
127     }
128 }
129 
130 //------------------------------------------------------------------------------------
131 //------------------------------------------------------------------------------------
132 
Parameter(const ParamStatus & status,unsigned long paramvecIndex,const string sname,const string lname,force_type thisforce,method_type meth,proftype prof,const UIVarsPrior & uiprior,double truevalue)133 Parameter::Parameter(   const ParamStatus& status,
134                         unsigned long paramvecIndex,
135                         const string sname,
136                         const string lname,
137                         force_type thisforce,
138                         method_type meth,
139                         proftype prof,
140                         const UIVarsPrior & uiprior,
141                         double truevalue
142     )
143     : m_status(status),
144       m_paramvecIndex(paramvecIndex),
145       m_shortname(sname),
146       m_name(lname),
147       m_force(thisforce),
148       m_method(meth),
149       m_profiletype(prof),
150       m_truevalue(truevalue),
151       m_prior(uiprior)
152 {
153     if (m_status.Status() == pstat_constant)
154     {
155         m_profiletype = profile_NONE;
156         m_name += " (held constant)";
157     } // The names of grouped parameters are set in registry.cpp later.
158 } // Parameter constructor
159 
160 //------------------------------------------------------------------------------------
161 
Parameter(const ParamStatus & status,unsigned long paramvecIndex)162 Parameter::Parameter(const ParamStatus& status, unsigned long paramvecIndex)
163     : m_status(status),
164       m_paramvecIndex(paramvecIndex),
165       m_method(method_PROGRAMDEFAULT),
166       m_profiletype(profile_NONE),
167       m_prior(status)
168 {
169     if (m_status.Valid())
170     {
171         throw implementation_error("Tried to create a valid parameter object using the invalid parameter constructor.");
172     }
173 } // Invalid parameter constructor
174 
175 //------------------------------------------------------------------------------------
176 
GetUserName() const177 string Parameter::GetUserName() const
178 {
179     return m_shortname;
180 } // GetUserName
181 
182 //------------------------------------------------------------------------------------
183 
IsEasilyBayesianRearrangeable() const184 bool Parameter::IsEasilyBayesianRearrangeable() const
185 {
186     // currently epoch boundary times are not casually movable
187     // and require their own seperate arranger (the EpochSizeArranger)
188     return (IsVariable() && m_force != force_DIVERGENCE);
189 } // IsEasilyBayesianRearrangeable
190 
191 //------------------------------------------------------------------------------------
192 
DrawFromPrior() const193 pair<double, double> Parameter::DrawFromPrior() const
194 {
195     return m_prior.RandomDraw();
196 } // DrawFromPrior
197 
198 //------------------------------------------------------------------------------------
199 
IsZeroTrueMin()200 bool Parameter::IsZeroTrueMin()
201 {
202     if (m_force == force_GROW)
203         return false;
204     return true;
205 }
206 
207 //------------------------------------------------------------------------------------
208 
GetPriorLikes(long region) const209 vector<centilepair> Parameter::GetPriorLikes(long region) const
210 {
211 
212     assert(IsValidParameter());
213     vector<centilepair> answer;
214 
215     const ProfileStruct& profile = m_results.GetProfile(region);
216     vector<ProfileLineStruct>::const_iterator it;
217 
218     for (it = profile.profilelines.begin();
219          it != profile.profilelines.end();
220          ++it)
221     {
222         centilepair newpair(it->percentile, it->loglikelihood);
223         answer.push_back(newpair);
224     }
225     return answer;
226 } // GetPriorLikes
227 
228 //------------------------------------------------------------------------------------
229 
GetOverallPriorLikes() const230 vector<centilepair> Parameter::GetOverallPriorLikes() const
231 {
232     assert(IsValidParameter());
233 
234     vector<centilepair> answer;
235 
236     const ProfileStruct& profile = m_results.GetOverallProfile();
237     vector<ProfileLineStruct>::const_iterator it;
238 
239     for (it = profile.profilelines.begin();
240          it != profile.profilelines.end();
241          ++it)
242     {
243         centilepair newpair(it->percentile, it->loglikelihood);
244         answer.push_back(newpair);
245     }
246     return answer;
247 } // GetOverallPriorLikes
248 
249 //------------------------------------------------------------------------------------
250 
GetProfiles(long region) const251 vector<vector<centilepair> > Parameter::GetProfiles(long region) const
252 {
253     assert(IsValidParameter());
254 
255     const ProfileStruct& profile = m_results.GetProfile(region);
256     vector<ProfileLineStruct>::const_iterator line;
257 
258     vector<vector<centilepair> > answer;
259     vector<centilepair> answerline;
260 
261     long parameter;
262     long numparams = 0;
263     // hack, sorry
264     for (line = profile.profilelines.begin(); line != profile.profilelines.end(); ++line)
265     {
266         if (!line->profparam.empty())
267         {
268             numparams = line->profparam.size();
269             break;
270         }
271     }
272 
273     // if this assert fires, profiles have been requested but none
274     // whatsoever can be found....
275     // assert(numparams > 0);
276     // LS NOTE:  In a bayesian analysis, having no profiles is expected--we
277     //  use GetCIs and GetLikes, etc. instead.  It's convenient to call this
278     //  routine anyway, so I don't have to special-case the calling code in
279     //  runreports.  Hence, commenting out the above 'assert'.
280 
281     //long numparams = profile.profilelines[0].profparam.size();
282 
283     for (parameter = 0; parameter < numparams; ++parameter)
284     {
285         for (line = profile.profilelines.begin();
286              line != profile.profilelines.end();
287              ++line)
288         {
289             double thisparam = line->profparam[parameter];
290             centilepair newpair(line->percentile, thisparam);
291             answerline.push_back(newpair);
292         }
293         answer.push_back(answerline);
294         answerline.clear();
295     }
296 
297     return answer;
298 
299 } // GetProfiles
300 
301 //------------------------------------------------------------------------------------
302 
GetOverallProfile() const303 vector<vector<centilepair> > Parameter::GetOverallProfile() const
304 {
305     assert(IsValidParameter());
306 
307     vector<vector<centilepair> > answer;
308     vector<centilepair> answerline;
309 
310     const ProfileStruct& profile = m_results.GetOverallProfile();
311     vector<ProfileLineStruct>::const_iterator line;
312 
313     long parameter;
314     // hack, sorry
315     long numparams = profile.profilelines[0].profparam.size();
316 
317     for (parameter = 0; parameter < numparams; ++parameter)
318     {
319         for (line = profile.profilelines.begin();
320              line != profile.profilelines.end();
321              ++line)
322         {
323             double thisparam = line->profparam[parameter];
324             centilepair newpair(line->percentile, thisparam);
325             answerline.push_back(newpair);
326         }
327         answer.push_back(answerline);
328         answerline.clear();
329     }
330 
331     return answer;
332 
333 } // GetOverallProfile
334 
335 //------------------------------------------------------------------------------------
336 
GetCIs(long region) const337 vector<centilepair> Parameter::GetCIs(long region) const
338 {
339     assert(IsValidParameter());
340 
341     vector<centilepair> answer;
342 
343     const ProfileStruct& profile = m_results.GetProfile(region);
344     vector<ProfileLineStruct>::const_iterator it;
345 
346     for (it = profile.profilelines.begin();
347          it != profile.profilelines.end();
348          ++it)
349     {
350         centilepair newpair(it->percentile, it->profilevalue);
351         answer.push_back(newpair);
352     }
353     return answer;
354 
355 } // GetCIs
356 
357 //------------------------------------------------------------------------------------
358 
GetOverallCIs() const359 vector<centilepair> Parameter::GetOverallCIs() const
360 {
361     assert(IsValidParameter());
362 
363     vector<centilepair> answer;
364 
365     const ProfileStruct& profile = m_results.GetOverallProfile();
366     vector<ProfileLineStruct>::const_iterator it;
367 
368     for (it = profile.profilelines.begin();
369          it != profile.profilelines.end();
370          ++it)
371     {
372         centilepair newpair(it->percentile, it->profilevalue);
373         answer.push_back(newpair);
374     }
375     return answer;
376 
377 } // GetOverallCIs
378 
CentileIsExtremeLow(double centile,long reg) const379 bool Parameter::CentileIsExtremeLow(double centile, long reg) const
380 {
381     return m_results.GetProfileFor(centile, reg).isExtremeLow;
382 }
383 
CentileIsExtremeHigh(double centile,long reg) const384 bool Parameter::CentileIsExtremeHigh(double centile, long reg) const
385 {
386     return m_results.GetProfileFor(centile, reg).isExtremeHigh;
387 }
388 
CentileHadWarning(double centile,long reg) const389 bool Parameter::CentileHadWarning(double centile, long reg) const
390 {
391     return m_results.GetProfileFor(centile, reg).maximizerWarning;
392 }
393 
394 //------------------------------------------------------------------------------------
395 
AddProfile(const ProfileStruct & prof,likelihoodtype like)396 void Parameter::AddProfile(const ProfileStruct& prof, likelihoodtype like)
397 {
398     assert(IsValidParameter());
399     switch (like)
400     {
401         case ltype_ssingle:
402             m_results.AddProfile(prof);
403             break;
404         case ltype_replicate:
405             m_results.AddProfile(prof);
406             break;
407         case ltype_region:
408             m_results.AddOverallProfile(prof);
409             break;
410         case ltype_gammaregion:
411         {
412             RegionGammaInfo *pRegionGammaInfo = registry.GetRegionGammaInfo();
413             if (!pRegionGammaInfo ||
414                 !pRegionGammaInfo->CurrentlyPerformingAnalysisOverRegions())
415             {
416                 string msg = "Parameter::AddProfile() was told to add a profile ";
417                 msg += "for an overall estimate over all genomic regions with ";
418                 msg += "gamma-distributed background mutation rates, but the ";
419                 msg += "necessary RegionGammaInfo object was not found, or was ";
420                 msg += "found in the \"off\" state, neither of which should happen.";
421                 throw implementation_error(msg);
422             }
423             m_results.AddOverallProfile(prof);
424             if (!pRegionGammaInfo->HaveProfile() && force_REGION_GAMMA == m_force)
425                 pRegionGammaInfo->AddProfile(prof);
426             break;
427         }
428     }
429 } // AddProfile
430 
431 //------------------------------------------------------------------------------------
432 //------------------------------------------------------------------------------------
433 
434 // Initialization of static variable, needs to be in .cpp
435 bool ParamVector::s_locked = false;
436 
437 //------------------------------------------------------------------------------------
438 
439 // This constructor can make a read-only ParamVector
440 
ParamVector(bool readonly)441 ParamVector::ParamVector(bool readonly)
442     : m_readonly(readonly),
443       forcesum(registry.GetForceSummary())
444 {
445     if (m_readonly)
446     {
447         parameters = forcesum.GetAllParameters();
448     }
449     else
450     {
451         assert(!s_locked);        // attempt to check out a second set of parameters!
452         parameters = forcesum.GetAllParameters();
453         s_locked = true;
454     }
455     const RegionGammaInfo *pRegionGammaInfo = registry.GetRegionGammaInfo();
456     if (pRegionGammaInfo && pRegionGammaInfo->CurrentlyPerformingAnalysisOverRegions())
457     {
458         UIVarsPrior gammaprior(force_REGION_GAMMA);
459         Parameter paramAlpha(pRegionGammaInfo->GetParamStatus(),
460                              parameters.size(),
461                              "alpha",
462                              "alpha (shape parameter for gamma over regions)",
463                              force_REGION_GAMMA,
464                              method_PROGRAMDEFAULT,
465                              pRegionGammaInfo->GetProfType(),
466                              gammaprior,
467                              FLAGDOUBLE
468                              );
469         if (pRegionGammaInfo->HaveMLE())
470         {
471             paramAlpha.AddOverallMLE(pRegionGammaInfo->GetMLE());
472             if (pRegionGammaInfo->HaveProfile())
473                 paramAlpha.AddProfile(pRegionGammaInfo->GetProfile(), ltype_gammaregion);
474         }
475         parameters.push_back(paramAlpha);
476     }
477 
478 } // ParamVector
479 
480 //------------------------------------------------------------------------------------
481 
~ParamVector()482 ParamVector::~ParamVector()
483 {
484     if (m_readonly) return;
485 
486     assert(s_locked);        // how did it get unlocked before destruction??
487     forcesum.SetAllParameters(parameters);
488     s_locked = false;
489 
490 } // ParamVector destructor
491 
492 //------------------------------------------------------------------------------------
493 
operator [](long index)494 Parameter& ParamVector::operator[](long index)
495 {
496     assert(ParamVector::s_locked);  // should only be unlocked in 'const' context
497 
498     // bounds checking
499     assert(index >= 0);
500     assert(index < static_cast<long>(parameters.size()));
501 
502     return parameters[index];
503 
504 } // ParamVector operator[]
505 
506 //------------------------------------------------------------------------------------
507 
operator [](long index) const508 const Parameter& ParamVector::operator[](long index) const
509 {
510     assert(index >= 0);
511     assert(index < static_cast<long>(parameters.size()));
512 
513     return parameters[index];
514 
515 } // ParamVector operator[] const
516 
517 //------------------------------------------------------------------------------------
518 
CheckCalcProfiles() const519 paramlistcondition ParamVector::CheckCalcProfiles() const
520 {
521     vector <Parameter> :: const_iterator pit;
522     long ok=0;
523     long nok=0;
524     for(pit=parameters.begin(); pit != parameters.end(); pit++)
525     {
526         if(pit->IsValidParameter())
527         {
528             switch(pit->GetProfileType())
529             {
530                 case profile_FIX:
531                 case profile_PERCENTILE:
532                     ok++;
533                     break;
534                 case profile_NONE:
535                     nok++;
536                     continue;
537             }
538         }
539     }
540     long sum = ok + nok;
541     if(nok==sum)
542         return paramlist_NO;
543     else
544     {
545         if(ok==sum)
546             return paramlist_YES;
547         else
548             return paramlist_MIX;
549     }
550 }
551 
552 //------------------------------------------------------------------------------------
553 
CheckCalcPProfiles() const554 paramlistcondition ParamVector::CheckCalcPProfiles() const
555 {
556     vector <Parameter> :: const_iterator pit;
557     long fok=0;
558     long pok=0;
559     long nok=0;
560     for(pit=parameters.begin(); pit != parameters.end(); pit++)
561     {
562         if(pit->IsValidParameter())
563         {
564             switch(pit->GetProfileType())
565             {
566                 case profile_FIX:
567                     fok++;
568                     break;
569                 case profile_PERCENTILE:
570                     pok++;
571                     break;
572                 case profile_NONE:
573                     nok++;
574                     continue;
575             }
576         }
577     }
578     long sum = fok + pok + nok;
579     if(nok==sum)
580         return paramlist_NO;
581     else
582     {
583         if(pok==sum)
584             return paramlist_YES;
585         else
586             return paramlist_MIX;
587     }
588 }
589 
590 //------------------------------------------------------------------------------------
591 
NumProfiledParameters() const592 long ParamVector::NumProfiledParameters() const
593 {
594     vector<Parameter>::const_iterator it;
595 
596     long result = 0;
597 
598     for (it = parameters.begin(); it != parameters.end(); ++it)
599     {
600         if (it->IsProfiled())
601             ++result;
602     }
603 
604     return result;
605 } // NumProfiledParameters
606 
607 //------------------------------------------------------------------------------------
608 
NumVariableParameters() const609 long ParamVector::NumVariableParameters() const
610 {
611     vector<Parameter>::const_iterator it;
612 
613     long result = 0;
614 
615     for (it = parameters.begin(); it != parameters.end(); ++it)
616     {
617         ParamStatus mystatus = ParamStatus(it->GetStatus());
618         if (mystatus.Varies()) ++result;
619     }
620 
621     return result;
622 } // NumVariableParameters
623 
624 //------------------------------------------------------------------------------------
625 
ChooseSampleParameterIndex(Random * randomSource) const626 long ParamVector::ChooseSampleParameterIndex(Random * randomSource) const
627 {
628 #ifdef LAMARC_NEW_FEATURE_RELATIVE_SAMPLING
629 
630     long totalWeight = 0;
631     vector<Parameter>::const_iterator it;
632     for (it = parameters.begin(); it != parameters.end(); ++it)
633     {
634         if (it->IsEasilyBayesianRearrangeable())
635         {
636             long thisRate = it->GetPrior().GetSamplingRate();
637             assert(thisRate > 0); // EWFIX -- really must handle this.
638             totalWeight += thisRate;
639         }
640     }
641     assert(totalWeight > 0);
642 
643     long weightedIndex = randomSource->Long(totalWeight);
644 
645     long remainingWeight = weightedIndex;
646     long chosenIndex = 0;
647     for (it = parameters.begin(); it != parameters.end(); ++it)
648     {
649         long thisWeight = it->GetPrior().GetSamplingRate();
650         remainingWeight -= thisWeight;
651         if (remainingWeight < 0)
652         {
653             return chosenIndex;
654         }
655         chosenIndex++;
656     }
657     assert(false);
658     return FLAGLONG;
659 
660 #else
661     long chosen;
662     while (true)
663     {
664         long chosen = randomSource->Long(size());
665         if (operator[](chosen).IsEasilyBayesianRearrangeable()) return chosen;
666         //LS DEBUG:  Potential for an infinite loop here if no parameter is set
667         // to be variable, but this should be caught when exiting the menu.
668 
669     }
670     assert(false);
671     return(FLAGLONG);
672 #endif
673 
674 }
675 
676 //____________________________________________________________________________________
677