1 // $Id: haplotypes.cpp,v 1.12 2011/03/07 06:08:49 bobgian Exp $
2 
3 /*
4   Copyright 2006  Lucian Smith, Mary Kuhner, Jon Yamato and Joseph Felsenstein
5 
6   This software is distributed free of charge for non-commercial use
7   and is copyrighted.  Of course, we do not guarantee that the software
8   works, and are not responsible for any damage you may cause or have.
9 */
10 
11 #include <cassert>
12 #include <iostream>                     // debug only
13 
14 #include "local_build.h"
15 
16 #include "errhandling.h"
17 #include "haplotypes.h"
18 #include "locus.h"
19 #include "locuscell.h"
20 #include "registry.h"
21 #include "region.h"
22 #include "stringx.h"
23 #include "xml_strings.h"
24 
25 using std::multiset;
26 using std::cout;
27 using std::endl;
28 
29 //------------------------------------------------------------------------------------
30 
Haplotypes(long regnum,string lname)31 Haplotypes::Haplotypes(long regnum, string lname)
32     : m_regionnum(regnum),
33       m_locusname(lname),
34       m_haplotype_alleles(),
35       m_penetrances(),
36       m_haplotype_dlcells(),
37       m_current_hapindex(FLAGLONG)
38 {
39     // intentionally blank
40 }
41 
42 //------------------------------------------------------------------------------------
43 
Haplotypes(Haplotypes oldhaps,bool clear)44 Haplotypes::Haplotypes(Haplotypes oldhaps, bool clear)
45     : m_regionnum(oldhaps.m_regionnum),
46       m_locusname(oldhaps.m_locusname),
47       m_haplotype_alleles(),
48       m_penetrances(),
49       m_haplotype_dlcells(),
50       m_current_hapindex(FLAGLONG)
51 {
52     assert(clear == true);
53 }
54 
55 //------------------------------------------------------------------------------------
56 
ConvertAllelesToDLCells()57 void Haplotypes::ConvertAllelesToDLCells()
58 {
59     assert(m_haplotype_alleles.size() > 0);
60     assert(m_haplotype_alleles.size() == m_penetrances.size());
61     assert(m_haplotype_dlcells.size() == 0);
62 
63     for (unsigned long hap=0; hap<m_haplotype_alleles.size(); hap++)
64     {
65         vector<LocusCell> resolution;
66         for (unsigned long cell=0; cell<m_haplotype_alleles[hap].size(); cell++)
67         {
68             StringVec1d onecell;
69             onecell.push_back(m_haplotype_alleles[hap][cell]);
70             const Locus& locus=registry.GetDataPack().GetRegion(m_regionnum).GetLocus(m_locusname);
71             resolution.push_back(locus.GetDataTypePtr()->CreateInitializedDLCell(locus, onecell));
72         }
73         //Now we multiply final LocusCell by the appropriate penetrance.  We
74         // could choose any of them if we wanted to, but choose the last so that
75         // the CollapseHaplotypeDLs() routine doesn't have to worry about
76         // penetrances.
77         resolution[resolution.size()-1] *= m_penetrances[hap];
78         m_haplotype_dlcells.push_back(resolution);
79     }
80     // Combine haplotype resolutions which share their identity at a tip
81     //  by simply adding the DLCells at the other tip.
82     CollapseHaplotypeDLs();
83 }
84 
85 //------------------------------------------------------------------------------------
86 
CollapseHaplotypeDLs()87 void Haplotypes::CollapseHaplotypeDLs()
88 {
89     vector<vector<LocusCell> >::iterator dlList = m_haplotype_dlcells.begin();
90     for (;dlList != m_haplotype_dlcells.end();dlList++)
91     {
92         vector<vector<LocusCell> >::iterator dlComp = dlList;
93         dlComp++;
94         for (;dlComp != m_haplotype_dlcells.end();)
95         {
96             bool allbutlastmatch = true;
97             //Actually, the last can match, too, but that should never happen.
98             for (size_t allele=0; allele<((*dlList).size()-1); allele++)
99             {
100                 if (!((*dlList)[allele] == (*dlComp)[allele]))
101                 {
102                     allbutlastmatch = false;
103                 }
104             }
105             if (allbutlastmatch)
106             {
107                 //We can collapse them into one DLcell
108                 (*dlList)[(*dlList).size()-1] += (*dlComp)[(*dlComp).size()-1];
109                 //Now delete the compared one.
110                 dlComp = m_haplotype_dlcells.erase(dlComp);
111             }
112             else
113             {
114                 dlComp++;
115             }
116         }
117     }
118 }
119 
120 //------------------------------------------------------------------------------------
121 
AddHaplotype(StringVec1d alleles,double penetrance)122 void Haplotypes::AddHaplotype(StringVec1d alleles, double penetrance)
123 {
124     if (m_haplotype_alleles.size() > 0)
125     {
126         if (alleles.size() != m_haplotype_alleles[0].size())
127         {
128             string msg = "The haplotype resolution \"";
129             for (unsigned long i=0; i<alleles.size(); i++)
130             {
131                 msg += alleles[i] + " ";
132             }
133             msg += "\" has a different number of alleles than the haplotype "
134                 "resolution \"";
135             for (unsigned long i=0; i<m_haplotype_alleles[0].size(); i++)
136             {
137                 msg += m_haplotype_alleles[0][i] + " ";
138             }
139             msg += "\".  Remember that spaces are not allowed in allele names.  Also, if you have samples"
140                 " with multiple ploidies (if you have samples from an X chromosome, say) each phenotype must match"
141                 " a set of genotypes of the same ploidy -- even if the phenotype of 'X0' matches the phenotype"
142                 " of 'XX', they must be defined separately.";
143             throw data_error(msg);
144         }
145     }
146     for (size_t hap=0; hap<m_haplotype_alleles.size(); hap++)
147     {
148         if (m_haplotype_alleles[hap] == alleles)
149         {
150             return;
151         }
152     }
153     m_haplotype_alleles.push_back(alleles);
154     m_penetrances.push_back(penetrance);
155 }
156 
157 //------------------------------------------------------------------------------------
158 
AddHaplotype(multiset<string> alleles,double penetrance)159 void Haplotypes::AddHaplotype(multiset<string> alleles, double penetrance)
160 {
161     StringVec2d allAlleles = SetToVecs(alleles);
162     for (size_t alleleVec=0; alleleVec != allAlleles.size(); alleleVec++)
163     {
164         AddHaplotype(allAlleles[alleleVec], penetrance);
165     }
166 }
167 
168 //------------------------------------------------------------------------------------
169 
ChooseNewHaplotypes()170 vector<LocusCell> Haplotypes::ChooseNewHaplotypes()
171 {
172     if (m_haplotype_dlcells.size() == 0)
173     {
174         ConvertAllelesToDLCells();
175     }
176     if (m_haplotype_dlcells.size() == 1)
177     {
178         return m_haplotype_dlcells[0];
179     }
180     //Choose a new haplotype index
181     long newindex = m_current_hapindex;
182     while (newindex == m_current_hapindex)
183     {
184         newindex = registry.GetRandom().Long(m_haplotype_dlcells.size());
185     }
186 
187     m_current_hapindex = newindex;
188     return m_haplotype_dlcells[newindex];
189 }
190 
191 //------------------------------------------------------------------------------------
192 
ChooseRandomHaplotypes()193 vector<LocusCell> Haplotypes::ChooseRandomHaplotypes()
194 {
195     if (m_haplotype_dlcells.size() == 0)
196     {
197         ConvertAllelesToDLCells();
198     }
199     //Choose a new haplotype index
200     m_current_hapindex = registry.GetRandom().Long(m_haplotype_dlcells.size());
201     return m_haplotype_dlcells[m_current_hapindex];
202 }
203 
204 //------------------------------------------------------------------------------------
205 
ChooseFirstHaplotypes()206 vector<LocusCell> Haplotypes::ChooseFirstHaplotypes()
207 {
208     if (m_haplotype_dlcells.size() == 0)
209     {
210         ConvertAllelesToDLCells();
211     }
212     //Choose haplotype zero.
213     m_current_hapindex = 0;
214     return m_haplotype_dlcells[m_current_hapindex];
215 }
216 
217 //------------------------------------------------------------------------------------
218 
ChooseNextHaplotypes()219 vector<LocusCell> Haplotypes::ChooseNextHaplotypes()
220 {
221     assert(m_haplotype_dlcells.size() != 0);
222     //Choose the next haplotype index
223     if (static_cast<unsigned long>(m_current_hapindex) == m_haplotype_dlcells.size()-1)
224     {
225         //Already at the last one.
226         vector<LocusCell> blankcells;
227         return blankcells;
228     }
229     m_current_hapindex++;
230     return m_haplotype_dlcells[m_current_hapindex];
231 }
232 
233 //------------------------------------------------------------------------------------
234 
GetAlleles() const235 StringVec1d Haplotypes::GetAlleles() const
236 {
237     StringVec1d retvec;
238     for (unsigned long res=0; res<m_haplotype_alleles.size(); res++)
239     {
240         for (unsigned long allele=0; allele<m_haplotype_alleles[res].size(); allele++)
241         {
242             retvec.push_back(m_haplotype_alleles[res][allele]);
243         }
244     }
245     return retvec;
246 }
247 
248 //------------------------------------------------------------------------------------
249 
GetMarkerData() const250 string Haplotypes::GetMarkerData() const
251 {
252     assert(m_haplotype_alleles.size() == m_penetrances.size());
253     string markerdata;
254     for (unsigned long hapres=0; hapres<m_haplotype_alleles.size(); hapres++)
255     {
256         markerdata += ToString(m_haplotype_alleles[hapres]) + "  ";
257         if (m_penetrances[hapres] < 1)
258         {
259             markerdata += "(" + ToString(m_penetrances[hapres]) + ") ";
260         }
261     }
262     return markerdata;
263 }
264 
265 //------------------------------------------------------------------------------------
266 
GetHaplotypesXML(long nspaces) const267 StringVec1d Haplotypes::GetHaplotypesXML(long nspaces) const
268 {
269     string spaces(nspaces, ' ');
270     string spaces2(nspaces+2, ' ');
271 
272     StringVec1d retvec;
273     for (unsigned long hap=0; hap<m_penetrances.size(); hap++)
274     {
275         retvec.push_back(spaces + MakeTag(xmlstr::XML_TAG_HAPLOTYPES));
276         retvec.push_back(spaces2 + MakeTag(xmlstr::XML_TAG_PENETRANCE) + " "
277                          + ToString(m_penetrances[hap]) + " "
278                          + MakeCloseTag(xmlstr::XML_TAG_PENETRANCE));
279         retvec.push_back(spaces2 + MakeTag(xmlstr::XML_TAG_ALLELES)
280                          + ToString(m_haplotype_alleles[hap]) + " "
281                          + MakeCloseTag(xmlstr::XML_TAG_ALLELES));
282         retvec.push_back(spaces + MakeCloseTag(xmlstr::XML_TAG_HAPLOTYPES));
283     }
284     return retvec;
285 }
286 
287 //------------------------------------------------------------------------------------
288 
MultipleHaplotypes() const289 bool Haplotypes::MultipleHaplotypes()  const
290 {
291     if (m_haplotype_dlcells.size() > 0)
292     {
293         return (m_haplotype_dlcells.size() > 1); //we're in phase 2
294     }
295     else
296     {
297         return (m_haplotype_alleles.size() > 1); //still in phase 1
298     }
299 }
300 
301 //------------------------------------------------------------------------------------
302 
SetToVecs(multiset<string> stringSet) const303 StringVec2d Haplotypes::SetToVecs(multiset<string> stringSet) const
304 {
305     StringVec2d retvecs;
306     for (multiset<string>::iterator newstring=stringSet.begin();
307          newstring != stringSet.end(); newstring++)
308     {
309         multiset<string> partialSet = stringSet;
310         partialSet.erase(partialSet.find(*newstring));
311         if (partialSet.size() == 0)
312         {
313             StringVec1d strings;
314             strings.push_back(*newstring);
315             retvecs.push_back(strings);
316             return retvecs;
317         }
318         StringVec2d partialVecs = SetToVecs(partialSet);
319         for (StringVec2d::iterator partialVec = partialVecs.begin();
320              partialVec != partialVecs.end(); partialVec++)
321         {
322             (*partialVec).push_back(*newstring);
323             retvecs.push_back(*partialVec);
324         }
325     }
326     return retvecs;
327 }
328 
329 //------------------------------------------------------------------------------------
330 // Debugging function.
331 
PrintCellsAndAlleles() const332 void Haplotypes::PrintCellsAndAlleles() const
333 {
334     cout << "Here are the original strings:" << endl;
335     for (size_t set=0; set<m_haplotype_alleles.size(); set++)
336     {
337         cout << ToString(m_haplotype_alleles[set]) << endl;
338     }
339     cout << "And here are the corresponding DLCells.  They should match!" << endl;
340     for (size_t set=0; set<m_haplotype_dlcells.size(); set++)
341     {
342         for (size_t allele=0; allele<m_haplotype_dlcells[set].size(); allele++)
343         {
344             cout << registry.GetDataPack().GetRegion(m_regionnum).GetLocus(m_locusname).GetDataModel()->
345                 CellToData(m_haplotype_dlcells[set][allele][0], 0)
346                  << " ";
347         }
348     }
349     cout << endl;
350     cout << "And finally, the DLCells as raw data." << endl;
351     for (size_t set=0; set<m_haplotype_dlcells.size(); set++)
352     {
353         for (size_t allele=0; allele<m_haplotype_dlcells[set].size(); allele++)
354         {
355             cout << m_haplotype_dlcells[set][allele][0]->DLsToString(0,0);
356         }
357     }
358     cout << endl;
359 }
360 
361 //____________________________________________________________________________________
362