1 // $Id: locus.h,v 1.51 2011/03/12 20:02:52 bobgian Exp $ 2 3 /* 4 Copyright 2003 Peter Beerli, Mary Kuhner, Jon Yamato and Joseph Felsenstein 5 6 This software is distributed free of charge for non-commercial use 7 and is copyrighted. Of course, we do not guarantee that the software 8 works, and are not responsible for any damage you may cause or have. 9 */ 10 11 /*************************************************************** 12 The Locus class contains data information for one locus. 13 14 The LocusVec is a managed container of Locus objects which knows 15 that some represent "fixed" and others represent "floating" loci. 16 17 NB This code distinguishes between "markers" and "sites". A marker 18 is a site for which we have data. In SNP data, for example, every base 19 pair is a site, but only the SNPs are markers. Data likelihoods are 20 calculated on markers; recombination probabilities are calculated on 21 sites. Please keep these straight! 22 23 Locus written by Mary Kuhner 2002/07/24 24 2004/09/15 Moved TipData class to its own file--Mary 25 2004/09/15 Merged in functionality of class LocusLike--Mary 26 2004/10/05 Added managed container class LocusVec--Mary 27 ****************************************************************/ 28 29 #ifndef LOCUS_H 30 #define LOCUS_H 31 32 #include <cassert> // May be needed for inline definitions. 33 #include <map> 34 #include <string> 35 #include <vector> 36 37 #include "types.h" 38 #include "vectorx.h" 39 #include "dlmodel.h" 40 #include "dlcalc.h" 41 #include "locuscell.h" 42 #include "phenotypes.h" 43 #include "tipdata.h" 44 #include "ui_vars_traitmodels.h" 45 46 //------------------------------------------------------------------------------------ 47 48 class Individual; 49 50 //------------------------------------------------------------------------------------ 51 //------------------------------------------------------------------------------------ 52 53 class Locus 54 { 55 typedef std::map<force_type,std::string> tipidtype; 56 57 private: 58 long m_index; // position in vector 59 string m_name; 60 long m_nmarkers; 61 long m_nsites; 62 long m_regionalmapposition; // numbered from start of region 63 long m_globalmapposition; // numbered in user's coordinate system 64 long m_offset; 65 bool m_movable; // If possible, this should move to phase-1 only. 66 mloc_type m_type; // And this should be phase-2. 67 bool m_defaultlocations; // phase-1 68 LongVec1d m_positions; // dim: markers 69 DataType_ptr m_pDatatype; 70 DataModel_ptr m_pDatamodel; 71 vector<TipData> m_tipdata; // dim: tips (LS DEBUG: phase 1) 72 73 LocusCell m_protoCell; 74 DLCalc_ptr m_pDLCalculator; 75 vector<LocusCell> m_tipcells; // dim: tips (LS DEBUG: phase 2) 76 rangeset m_allowedrange; 77 rangeset m_variablerange; // LS DEBUG SIM: probably don't need later. 78 rangeset m_unknownrange; // LS DEBUG SIM: probably don't need later. 79 80 DoubleVec1d m_map; 81 DoubleVec1d m_rawmap; 82 83 // For simulation purposes. 84 bool m_simulate; 85 long m_truesite; 86 std::map<long,DoubleVec1d> m_variability; 87 Phenotypes m_phenotypes; //Phase 2 (during phase 1, it lives elsewhere) 88 89 // Helper function for CalcNVariableMarkers(). 90 long CalcNVariableMarkers(tipidtype xpart) const; 91 92 93 public: 94 95 // Construction/destruction. 96 // We accept dtor, copy ctor and op=, though with some doubts about the latter two. 97 // Apparently this class is copied only to put it into a vector and the original is discarded, 98 // so sharing datatype and datamodel is not a problem. 99 Locus(long ind, bool movable, string name); 100 101 // Getters. GetIndex()102 long GetIndex() const { return m_index; }; 103 string GetName() const; GetNmarkers()104 long GetNmarkers() const { return m_nmarkers; }; 105 long GetNsites() const; GetGlobalMapposition()106 long GetGlobalMapposition() const { return m_globalmapposition; }; GetRegionalMapposition()107 long GetRegionalMapposition() const { return m_regionalmapposition; }; 108 long GetOffset() const; GetMarkerLocations()109 LongVec1d GetMarkerLocations() const { return m_positions; }; 110 LongVec1d GetUserMarkerLocations() const; GetDataType()111 data_type GetDataType() const { return m_pDatatype->GetType(); }; GetDataTypePtr()112 DataType_ptr GetDataTypePtr() const { return m_pDatatype; }; GetDataModel()113 DataModel_ptr GetDataModel() const { return m_pDatamodel; }; GetMuRate()114 double GetMuRate() const { return m_pDatamodel->GetRelMuRate(); }; GetDLCalc()115 DLCalc_ptr GetDLCalc() const { return m_pDLCalculator; }; GetProtoCell()116 LocusCell GetProtoCell() const { return m_protoCell; }; GetTipCells()117 vector<LocusCell> GetTipCells() const { return m_tipcells; }; GetTipData()118 vector<TipData>& GetTipData() { return m_tipdata; }; GetTipData()119 const vector<TipData>& GetTipData() const { return m_tipdata; }; 120 vector<TipData> GetPopulationTipData(const std::string& popname) const; 121 StringVec2d GetCrossPartitionGeneticData(tipidtype xpart) const; 122 StringVec3d GetPartitionGeneticData(force_type partname) const; 123 StringVec1d GetMarkerDataWithLabels(const std::vector<Individual>&) const;// dim: ntips GetNTips()124 long GetNTips() const { return m_tipdata.size(); }; 125 long GetNTips(tipidtype xpart) const; GetAllowedRange()126 rangeset GetAllowedRange() const {return m_allowedrange;}; GetMappingInfo()127 DoubleVec1d GetMappingInfo() const {return m_map;}; GetRawMappingInfo()128 DoubleVec1d GetRawMappingInfo() const {return m_rawmap;}; GetShouldSimulate()129 bool GetShouldSimulate() const {return m_simulate;}; 130 131 // Helper for F84 Model base freqs setting, called by Region::CountNBases(). 132 DoubleVec1d CountNNucleotides() const; 133 134 bool MultiSampleIndividuals() const; 135 bool SiteInLocus(long site) const; 136 long SiteToMarker(long site) const; 137 std::pair<long, long> GetSiteSpan() const; 138 std::pair<long, long> GetGlobalScaleSiteSpan() const; 139 bool IsMovable() const; //Phase 1 140 bool IsMoving() const; //Phase 2 IsUsingDefaultLocations()141 bool IsUsingDefaultLocations() const {return m_defaultlocations;}; GetAnalysisType()142 mloc_type GetAnalysisType() const {return m_type;}; 143 double GetPerBaseErrorRate() const; 144 145 // Setters. SetIndex(long index)146 void SetIndex(long index) {assert(m_movable); m_index = index;}; SetName(string newname)147 void SetName(string newname) { m_name = newname; }; 148 void SetNmarkers(long val); // throws! SetNsites(long val)149 void SetNsites(long val) { m_nsites = val; }; 150 void SetGlobalMapposition(long site); 151 void SetRegionalMapposition(long site); 152 void SetOffset(long val); 153 void SetPositions(const LongVec1d& pos); 154 void SetPositions(); // set to default 155 void SetDataType(DataType_ptr dt); 156 void SetDataModelOnce(DataModel_ptr dm); SetTipData(const TipData & td)157 void SetTipData(const TipData& td) { m_tipdata.push_back(td); }; 158 void SetEmptyTipData(vector<TipData> td); SetTrueSite(long site)159 void SetTrueSite(long site) { m_truesite = site; }; 160 void SetAllowedRange(rangeset rs, long regoffset); SetMappingInfo(DoubleVec1d map)161 void SetMappingInfo(DoubleVec1d map) {m_map = map;}; SetRawMappingInfo(DoubleVec1d rawmap)162 void SetRawMappingInfo(DoubleVec1d rawmap) {m_rawmap = rawmap;}; 163 void SetAnalysisType(mloc_type type); SetShouldSimulate(bool sim)164 void SetShouldSimulate(bool sim) {m_simulate = sim;}; 165 166 void SetVariableRange(rangeset rs); SetPhenotypes(Phenotypes p)167 void SetPhenotypes(Phenotypes p) {m_phenotypes = p;}; 168 169 // Workers. 170 void Setup(const vector<Individual>& individuals); 171 LongVec1d CalcNVariableMarkers() const; // dim: xparts 172 double CalculateDataLikelihood(Tree& tree, bool moving) const; 173 void AddUniqueNamesTo(std::set<string>& popnames) const; 174 175 // Helper function for Region::RemovePartitionFromLoci. 176 void RemovePartitionFromTipDatas(force_type forcename); 177 178 // Validators. 179 bool IsValidLocus(string& errorString) const; 180 StringVec1d ReportMappingInfo(long regoffset, bool isshort=false) const; 181 bool IsDuplicateTipName(const string& newname) const; 182 183 // XML. 184 StringVec1d MakeTraitXML(long nspaces, long regoffset) const; 185 186 // Simulation. 187 void SetNewMappositionIfMoving(); 188 void SimulateData(Tree& tree, long nsites); 189 void CopyDataFrom(Locus& original, Tree& tree); 190 void MakePhenotypesFor(IndVec& individuals); 191 void SaveState(DoubleVec1d& state, long marker, string label); 192 void RandomizeHalf(Tree& tree, bool swath); ClearVariability()193 void ClearVariability() {m_variability.clear();}; 194 195 // Helper functions for simulations. 196 bool IsNighInvariant() const; 197 bool IsNighInvariant(long marker) const; 198 bool IsCompletelyInvariant(long marker) const; 199 long ChooseVariableSiteFrom(rangeset rset); 200 rangeset GetVariableRange(); 201 rangeset CalculateVariableRange() const; 202 rangeset CalculateCompleteVariableRange() const; 203 rangeset GetVariableAndUnknownRange() const; 204 long GetVariabilityOfUnknownRange() const; 205 long GetVariabilityOfKnownRange() const; GetUnknownRange()206 rangeset GetUnknownRange() const {return m_unknownrange;}; 207 rangeset GetKnownRange() const; ClearUnknownRange()208 void ClearUnknownRange() {m_unknownrange.clear();}; 209 StringVec1d CreateDataModelReport(string regionname) const; 210 void WriteMapping(string regname, long regoffset) const; 211 212 // Debugging functions. 213 void PrintVariability(); 214 void PrintOnesAndZeroesForVariableSites(); 215 216 // For XML reporting. 217 std::set<std::pair<double, long int> > MakeOrderedSites(long int regoffset) const; 218 rangeset GetTopSites (std::set<std::pair<double, long int> > orderedsites, double percLimit) const; 219 rangeset GetBestSites(std::set<std::pair<double, long int> > orderedsites) const; 220 }; 221 222 #endif // LOCUS_H 223 224 //____________________________________________________________________________________ 225