1 // $Id: locus.h,v 1.51 2011/03/12 20:02:52 bobgian Exp $
2 
3 /*
4   Copyright 2003  Peter Beerli, Mary Kuhner, Jon Yamato and Joseph Felsenstein
5 
6   This software is distributed free of charge for non-commercial use
7   and is copyrighted.  Of course, we do not guarantee that the software
8   works, and are not responsible for any damage you may cause or have.
9 */
10 
11 /***************************************************************
12  The Locus class contains data information for one locus.
13 
14  The LocusVec is a managed container of Locus objects which knows
15  that some represent "fixed" and others represent "floating" loci.
16 
17  NB  This code distinguishes between "markers" and "sites".  A marker
18  is a site for which we have data.  In SNP data, for example, every base
19  pair is a site, but only the SNPs are markers.  Data likelihoods are
20  calculated on markers; recombination probabilities are calculated on
21  sites.  Please keep these straight!
22 
23  Locus written by Mary Kuhner 2002/07/24
24  2004/09/15 Moved TipData class to its own file--Mary
25  2004/09/15 Merged in functionality of class LocusLike--Mary
26  2004/10/05 Added managed container class LocusVec--Mary
27 ****************************************************************/
28 
29 #ifndef LOCUS_H
30 #define LOCUS_H
31 
32 #include <cassert>                      // May be needed for inline definitions.
33 #include <map>
34 #include <string>
35 #include <vector>
36 
37 #include "types.h"
38 #include "vectorx.h"
39 #include "dlmodel.h"
40 #include "dlcalc.h"
41 #include "locuscell.h"
42 #include "phenotypes.h"
43 #include "tipdata.h"
44 #include "ui_vars_traitmodels.h"
45 
46 //------------------------------------------------------------------------------------
47 
48 class Individual;
49 
50 //------------------------------------------------------------------------------------
51 //------------------------------------------------------------------------------------
52 
53 class Locus
54 {
55     typedef std::map<force_type,std::string> tipidtype;
56 
57   private:
58     long            m_index;            // position in vector
59     string          m_name;
60     long            m_nmarkers;
61     long            m_nsites;
62     long            m_regionalmapposition; // numbered from start of region
63     long            m_globalmapposition; // numbered in user's coordinate system
64     long            m_offset;
65     bool            m_movable;          // If possible, this should move to phase-1 only.
66     mloc_type       m_type;             // And this should be phase-2.
67     bool            m_defaultlocations; // phase-1
68     LongVec1d       m_positions;        // dim: markers
69     DataType_ptr    m_pDatatype;
70     DataModel_ptr   m_pDatamodel;
71     vector<TipData> m_tipdata;          // dim: tips (LS DEBUG:  phase 1)
72 
73     LocusCell       m_protoCell;
74     DLCalc_ptr      m_pDLCalculator;
75     vector<LocusCell> m_tipcells;       // dim: tips  (LS DEBUG:  phase 2)
76     rangeset        m_allowedrange;
77     rangeset        m_variablerange;    // LS DEBUG SIM:  probably don't need later.
78     rangeset        m_unknownrange;     // LS DEBUG SIM:  probably don't need later.
79 
80     DoubleVec1d     m_map;
81     DoubleVec1d     m_rawmap;
82 
83     // For simulation purposes.
84     bool            m_simulate;
85     long            m_truesite;
86     std::map<long,DoubleVec1d> m_variability;
87     Phenotypes      m_phenotypes;       //Phase 2 (during phase 1, it lives elsewhere)
88 
89     // Helper function for CalcNVariableMarkers().
90     long            CalcNVariableMarkers(tipidtype xpart) const;
91 
92 
93   public:
94 
95     // Construction/destruction.
96     // We accept dtor, copy ctor and op=, though with some doubts about the latter two.
97     // Apparently this class is copied only to put it into a vector and the original is discarded,
98     // so sharing datatype and datamodel is not a problem.
99     Locus(long ind, bool movable, string name);
100 
101     // Getters.
GetIndex()102     long          GetIndex()          const { return m_index; };
103     string        GetName()           const;
GetNmarkers()104     long          GetNmarkers()       const { return m_nmarkers; };
105     long          GetNsites()         const;
GetGlobalMapposition()106     long     GetGlobalMapposition()   const { return m_globalmapposition; };
GetRegionalMapposition()107     long     GetRegionalMapposition() const { return m_regionalmapposition; };
108     long          GetOffset()         const;
GetMarkerLocations()109     LongVec1d     GetMarkerLocations()     const { return m_positions; };
110     LongVec1d     GetUserMarkerLocations() const;
GetDataType()111     data_type     GetDataType()       const { return m_pDatatype->GetType(); };
GetDataTypePtr()112     DataType_ptr  GetDataTypePtr()    const { return m_pDatatype; };
GetDataModel()113     DataModel_ptr GetDataModel()      const { return m_pDatamodel; };
GetMuRate()114     double        GetMuRate()         const { return m_pDatamodel->GetRelMuRate(); };
GetDLCalc()115     DLCalc_ptr    GetDLCalc()         const { return m_pDLCalculator; };
GetProtoCell()116     LocusCell     GetProtoCell()      const { return m_protoCell; };
GetTipCells()117     vector<LocusCell> GetTipCells()   const { return m_tipcells; };
GetTipData()118     vector<TipData>& GetTipData()           { return m_tipdata; };
GetTipData()119     const vector<TipData>& GetTipData() const { return m_tipdata; };
120     vector<TipData> GetPopulationTipData(const std::string& popname) const;
121     StringVec2d   GetCrossPartitionGeneticData(tipidtype xpart) const;
122     StringVec3d   GetPartitionGeneticData(force_type partname) const;
123     StringVec1d   GetMarkerDataWithLabels(const std::vector<Individual>&) const;// dim: ntips
GetNTips()124     long          GetNTips()          const { return m_tipdata.size(); };
125     long          GetNTips(tipidtype xpart) const;
GetAllowedRange()126     rangeset      GetAllowedRange() const {return m_allowedrange;};
GetMappingInfo()127     DoubleVec1d   GetMappingInfo() const {return m_map;};
GetRawMappingInfo()128     DoubleVec1d   GetRawMappingInfo() const {return m_rawmap;};
GetShouldSimulate()129     bool          GetShouldSimulate() const {return m_simulate;};
130 
131     // Helper for F84 Model base freqs setting, called by Region::CountNBases().
132     DoubleVec1d   CountNNucleotides() const;
133 
134     bool          MultiSampleIndividuals() const;
135     bool          SiteInLocus(long site) const;
136     long          SiteToMarker(long site) const;
137     std::pair<long, long> GetSiteSpan() const;
138     std::pair<long, long> GetGlobalScaleSiteSpan() const;
139     bool          IsMovable() const;    //Phase 1
140     bool          IsMoving() const;     //Phase 2
IsUsingDefaultLocations()141     bool          IsUsingDefaultLocations() const {return m_defaultlocations;};
GetAnalysisType()142     mloc_type     GetAnalysisType() const {return m_type;};
143     double        GetPerBaseErrorRate() const;
144 
145     // Setters.
SetIndex(long index)146     void SetIndex(long index) {assert(m_movable); m_index = index;};
SetName(string newname)147     void SetName(string newname)           { m_name = newname; };
148     void SetNmarkers(long val);         // throws!
SetNsites(long val)149     void SetNsites(long val)               { m_nsites = val; };
150     void SetGlobalMapposition(long site);
151     void SetRegionalMapposition(long site);
152     void SetOffset(long val);
153     void SetPositions(const LongVec1d& pos);
154     void SetPositions();                // set to default
155     void SetDataType(DataType_ptr dt);
156     void SetDataModelOnce(DataModel_ptr dm);
SetTipData(const TipData & td)157     void SetTipData(const TipData& td)     { m_tipdata.push_back(td); };
158     void SetEmptyTipData(vector<TipData> td);
SetTrueSite(long site)159     void SetTrueSite(long site)     { m_truesite = site; };
160     void SetAllowedRange(rangeset rs, long regoffset);
SetMappingInfo(DoubleVec1d map)161     void SetMappingInfo(DoubleVec1d map) {m_map = map;};
SetRawMappingInfo(DoubleVec1d rawmap)162     void SetRawMappingInfo(DoubleVec1d rawmap) {m_rawmap = rawmap;};
163     void SetAnalysisType(mloc_type type);
SetShouldSimulate(bool sim)164     void SetShouldSimulate(bool sim) {m_simulate = sim;};
165 
166     void SetVariableRange(rangeset rs);
SetPhenotypes(Phenotypes p)167     void SetPhenotypes(Phenotypes p) {m_phenotypes = p;};
168 
169     // Workers.
170     void            Setup(const vector<Individual>& individuals);
171     LongVec1d       CalcNVariableMarkers() const; // dim: xparts
172     double          CalculateDataLikelihood(Tree& tree, bool moving) const;
173     void            AddUniqueNamesTo(std::set<string>& popnames) const;
174 
175     // Helper function for Region::RemovePartitionFromLoci.
176     void            RemovePartitionFromTipDatas(force_type forcename);
177 
178     // Validators.
179     bool            IsValidLocus(string& errorString) const;
180     StringVec1d     ReportMappingInfo(long regoffset, bool isshort=false) const;
181     bool            IsDuplicateTipName(const string& newname) const;
182 
183     // XML.
184     StringVec1d MakeTraitXML(long nspaces, long regoffset) const;
185 
186     // Simulation.
187     void SetNewMappositionIfMoving();
188     void SimulateData(Tree& tree, long nsites);
189     void CopyDataFrom(Locus& original, Tree& tree);
190     void MakePhenotypesFor(IndVec& individuals);
191     void SaveState(DoubleVec1d& state, long marker, string label);
192     void RandomizeHalf(Tree& tree, bool swath);
ClearVariability()193     void ClearVariability() {m_variability.clear();};
194 
195     // Helper functions for simulations.
196     bool            IsNighInvariant() const;
197     bool            IsNighInvariant(long marker) const;
198     bool            IsCompletelyInvariant(long marker) const;
199     long            ChooseVariableSiteFrom(rangeset rset);
200     rangeset        GetVariableRange();
201     rangeset        CalculateVariableRange() const;
202     rangeset        CalculateCompleteVariableRange() const;
203     rangeset        GetVariableAndUnknownRange() const;
204     long            GetVariabilityOfUnknownRange() const;
205     long            GetVariabilityOfKnownRange() const;
GetUnknownRange()206     rangeset        GetUnknownRange() const {return m_unknownrange;};
207     rangeset        GetKnownRange() const;
ClearUnknownRange()208     void            ClearUnknownRange() {m_unknownrange.clear();};
209     StringVec1d     CreateDataModelReport(string regionname) const;
210     void            WriteMapping(string regname, long regoffset) const;
211 
212     // Debugging functions.
213     void            PrintVariability();
214     void            PrintOnesAndZeroesForVariableSites();
215 
216     // For XML reporting.
217     std::set<std::pair<double, long int> >  MakeOrderedSites(long int regoffset) const;
218     rangeset        GetTopSites (std::set<std::pair<double, long int> > orderedsites, double percLimit) const;
219     rangeset        GetBestSites(std::set<std::pair<double, long int> > orderedsites) const;
220 };
221 
222 #endif // LOCUS_H
223 
224 //____________________________________________________________________________________
225