1 // $Id: haplotypes.cpp,v 1.12 2011/03/07 06:08:49 bobgian Exp $
2
3 /*
4 Copyright 2006 Lucian Smith, Mary Kuhner, Jon Yamato and Joseph Felsenstein
5
6 This software is distributed free of charge for non-commercial use
7 and is copyrighted. Of course, we do not guarantee that the software
8 works, and are not responsible for any damage you may cause or have.
9 */
10
11 #include <cassert>
12 #include <iostream> // debug only
13
14 #include "local_build.h"
15
16 #include "errhandling.h"
17 #include "haplotypes.h"
18 #include "locus.h"
19 #include "locuscell.h"
20 #include "registry.h"
21 #include "region.h"
22 #include "stringx.h"
23 #include "xml_strings.h"
24
25 using std::multiset;
26 using std::cout;
27 using std::endl;
28
29 //------------------------------------------------------------------------------------
30
Haplotypes(long regnum,string lname)31 Haplotypes::Haplotypes(long regnum, string lname)
32 : m_regionnum(regnum),
33 m_locusname(lname),
34 m_haplotype_alleles(),
35 m_penetrances(),
36 m_haplotype_dlcells(),
37 m_current_hapindex(FLAGLONG)
38 {
39 // intentionally blank
40 }
41
42 //------------------------------------------------------------------------------------
43
Haplotypes(Haplotypes oldhaps,bool clear)44 Haplotypes::Haplotypes(Haplotypes oldhaps, bool clear)
45 : m_regionnum(oldhaps.m_regionnum),
46 m_locusname(oldhaps.m_locusname),
47 m_haplotype_alleles(),
48 m_penetrances(),
49 m_haplotype_dlcells(),
50 m_current_hapindex(FLAGLONG)
51 {
52 assert(clear == true);
53 }
54
55 //------------------------------------------------------------------------------------
56
ConvertAllelesToDLCells()57 void Haplotypes::ConvertAllelesToDLCells()
58 {
59 assert(m_haplotype_alleles.size() > 0);
60 assert(m_haplotype_alleles.size() == m_penetrances.size());
61 assert(m_haplotype_dlcells.size() == 0);
62
63 for (unsigned long hap=0; hap<m_haplotype_alleles.size(); hap++)
64 {
65 vector<LocusCell> resolution;
66 for (unsigned long cell=0; cell<m_haplotype_alleles[hap].size(); cell++)
67 {
68 StringVec1d onecell;
69 onecell.push_back(m_haplotype_alleles[hap][cell]);
70 const Locus& locus=registry.GetDataPack().GetRegion(m_regionnum).GetLocus(m_locusname);
71 resolution.push_back(locus.GetDataTypePtr()->CreateInitializedDLCell(locus, onecell));
72 }
73 //Now we multiply final LocusCell by the appropriate penetrance. We
74 // could choose any of them if we wanted to, but choose the last so that
75 // the CollapseHaplotypeDLs() routine doesn't have to worry about
76 // penetrances.
77 resolution[resolution.size()-1] *= m_penetrances[hap];
78 m_haplotype_dlcells.push_back(resolution);
79 }
80 // Combine haplotype resolutions which share their identity at a tip
81 // by simply adding the DLCells at the other tip.
82 CollapseHaplotypeDLs();
83 }
84
85 //------------------------------------------------------------------------------------
86
CollapseHaplotypeDLs()87 void Haplotypes::CollapseHaplotypeDLs()
88 {
89 vector<vector<LocusCell> >::iterator dlList = m_haplotype_dlcells.begin();
90 for (;dlList != m_haplotype_dlcells.end();dlList++)
91 {
92 vector<vector<LocusCell> >::iterator dlComp = dlList;
93 dlComp++;
94 for (;dlComp != m_haplotype_dlcells.end();)
95 {
96 bool allbutlastmatch = true;
97 //Actually, the last can match, too, but that should never happen.
98 for (size_t allele=0; allele<((*dlList).size()-1); allele++)
99 {
100 if (!((*dlList)[allele] == (*dlComp)[allele]))
101 {
102 allbutlastmatch = false;
103 }
104 }
105 if (allbutlastmatch)
106 {
107 //We can collapse them into one DLcell
108 (*dlList)[(*dlList).size()-1] += (*dlComp)[(*dlComp).size()-1];
109 //Now delete the compared one.
110 dlComp = m_haplotype_dlcells.erase(dlComp);
111 }
112 else
113 {
114 dlComp++;
115 }
116 }
117 }
118 }
119
120 //------------------------------------------------------------------------------------
121
AddHaplotype(StringVec1d alleles,double penetrance)122 void Haplotypes::AddHaplotype(StringVec1d alleles, double penetrance)
123 {
124 if (m_haplotype_alleles.size() > 0)
125 {
126 if (alleles.size() != m_haplotype_alleles[0].size())
127 {
128 string msg = "The haplotype resolution \"";
129 for (unsigned long i=0; i<alleles.size(); i++)
130 {
131 msg += alleles[i] + " ";
132 }
133 msg += "\" has a different number of alleles than the haplotype "
134 "resolution \"";
135 for (unsigned long i=0; i<m_haplotype_alleles[0].size(); i++)
136 {
137 msg += m_haplotype_alleles[0][i] + " ";
138 }
139 msg += "\". Remember that spaces are not allowed in allele names. Also, if you have samples"
140 " with multiple ploidies (if you have samples from an X chromosome, say) each phenotype must match"
141 " a set of genotypes of the same ploidy -- even if the phenotype of 'X0' matches the phenotype"
142 " of 'XX', they must be defined separately.";
143 throw data_error(msg);
144 }
145 }
146 for (size_t hap=0; hap<m_haplotype_alleles.size(); hap++)
147 {
148 if (m_haplotype_alleles[hap] == alleles)
149 {
150 return;
151 }
152 }
153 m_haplotype_alleles.push_back(alleles);
154 m_penetrances.push_back(penetrance);
155 }
156
157 //------------------------------------------------------------------------------------
158
AddHaplotype(multiset<string> alleles,double penetrance)159 void Haplotypes::AddHaplotype(multiset<string> alleles, double penetrance)
160 {
161 StringVec2d allAlleles = SetToVecs(alleles);
162 for (size_t alleleVec=0; alleleVec != allAlleles.size(); alleleVec++)
163 {
164 AddHaplotype(allAlleles[alleleVec], penetrance);
165 }
166 }
167
168 //------------------------------------------------------------------------------------
169
ChooseNewHaplotypes()170 vector<LocusCell> Haplotypes::ChooseNewHaplotypes()
171 {
172 if (m_haplotype_dlcells.size() == 0)
173 {
174 ConvertAllelesToDLCells();
175 }
176 if (m_haplotype_dlcells.size() == 1)
177 {
178 return m_haplotype_dlcells[0];
179 }
180 //Choose a new haplotype index
181 long newindex = m_current_hapindex;
182 while (newindex == m_current_hapindex)
183 {
184 newindex = registry.GetRandom().Long(m_haplotype_dlcells.size());
185 }
186
187 m_current_hapindex = newindex;
188 return m_haplotype_dlcells[newindex];
189 }
190
191 //------------------------------------------------------------------------------------
192
ChooseRandomHaplotypes()193 vector<LocusCell> Haplotypes::ChooseRandomHaplotypes()
194 {
195 if (m_haplotype_dlcells.size() == 0)
196 {
197 ConvertAllelesToDLCells();
198 }
199 //Choose a new haplotype index
200 m_current_hapindex = registry.GetRandom().Long(m_haplotype_dlcells.size());
201 return m_haplotype_dlcells[m_current_hapindex];
202 }
203
204 //------------------------------------------------------------------------------------
205
ChooseFirstHaplotypes()206 vector<LocusCell> Haplotypes::ChooseFirstHaplotypes()
207 {
208 if (m_haplotype_dlcells.size() == 0)
209 {
210 ConvertAllelesToDLCells();
211 }
212 //Choose haplotype zero.
213 m_current_hapindex = 0;
214 return m_haplotype_dlcells[m_current_hapindex];
215 }
216
217 //------------------------------------------------------------------------------------
218
ChooseNextHaplotypes()219 vector<LocusCell> Haplotypes::ChooseNextHaplotypes()
220 {
221 assert(m_haplotype_dlcells.size() != 0);
222 //Choose the next haplotype index
223 if (static_cast<unsigned long>(m_current_hapindex) == m_haplotype_dlcells.size()-1)
224 {
225 //Already at the last one.
226 vector<LocusCell> blankcells;
227 return blankcells;
228 }
229 m_current_hapindex++;
230 return m_haplotype_dlcells[m_current_hapindex];
231 }
232
233 //------------------------------------------------------------------------------------
234
GetAlleles() const235 StringVec1d Haplotypes::GetAlleles() const
236 {
237 StringVec1d retvec;
238 for (unsigned long res=0; res<m_haplotype_alleles.size(); res++)
239 {
240 for (unsigned long allele=0; allele<m_haplotype_alleles[res].size(); allele++)
241 {
242 retvec.push_back(m_haplotype_alleles[res][allele]);
243 }
244 }
245 return retvec;
246 }
247
248 //------------------------------------------------------------------------------------
249
GetMarkerData() const250 string Haplotypes::GetMarkerData() const
251 {
252 assert(m_haplotype_alleles.size() == m_penetrances.size());
253 string markerdata;
254 for (unsigned long hapres=0; hapres<m_haplotype_alleles.size(); hapres++)
255 {
256 markerdata += ToString(m_haplotype_alleles[hapres]) + " ";
257 if (m_penetrances[hapres] < 1)
258 {
259 markerdata += "(" + ToString(m_penetrances[hapres]) + ") ";
260 }
261 }
262 return markerdata;
263 }
264
265 //------------------------------------------------------------------------------------
266
GetHaplotypesXML(long nspaces) const267 StringVec1d Haplotypes::GetHaplotypesXML(long nspaces) const
268 {
269 string spaces(nspaces, ' ');
270 string spaces2(nspaces+2, ' ');
271
272 StringVec1d retvec;
273 for (unsigned long hap=0; hap<m_penetrances.size(); hap++)
274 {
275 retvec.push_back(spaces + MakeTag(xmlstr::XML_TAG_HAPLOTYPES));
276 retvec.push_back(spaces2 + MakeTag(xmlstr::XML_TAG_PENETRANCE) + " "
277 + ToString(m_penetrances[hap]) + " "
278 + MakeCloseTag(xmlstr::XML_TAG_PENETRANCE));
279 retvec.push_back(spaces2 + MakeTag(xmlstr::XML_TAG_ALLELES)
280 + ToString(m_haplotype_alleles[hap]) + " "
281 + MakeCloseTag(xmlstr::XML_TAG_ALLELES));
282 retvec.push_back(spaces + MakeCloseTag(xmlstr::XML_TAG_HAPLOTYPES));
283 }
284 return retvec;
285 }
286
287 //------------------------------------------------------------------------------------
288
MultipleHaplotypes() const289 bool Haplotypes::MultipleHaplotypes() const
290 {
291 if (m_haplotype_dlcells.size() > 0)
292 {
293 return (m_haplotype_dlcells.size() > 1); //we're in phase 2
294 }
295 else
296 {
297 return (m_haplotype_alleles.size() > 1); //still in phase 1
298 }
299 }
300
301 //------------------------------------------------------------------------------------
302
SetToVecs(multiset<string> stringSet) const303 StringVec2d Haplotypes::SetToVecs(multiset<string> stringSet) const
304 {
305 StringVec2d retvecs;
306 for (multiset<string>::iterator newstring=stringSet.begin();
307 newstring != stringSet.end(); newstring++)
308 {
309 multiset<string> partialSet = stringSet;
310 partialSet.erase(partialSet.find(*newstring));
311 if (partialSet.size() == 0)
312 {
313 StringVec1d strings;
314 strings.push_back(*newstring);
315 retvecs.push_back(strings);
316 return retvecs;
317 }
318 StringVec2d partialVecs = SetToVecs(partialSet);
319 for (StringVec2d::iterator partialVec = partialVecs.begin();
320 partialVec != partialVecs.end(); partialVec++)
321 {
322 (*partialVec).push_back(*newstring);
323 retvecs.push_back(*partialVec);
324 }
325 }
326 return retvecs;
327 }
328
329 //------------------------------------------------------------------------------------
330 // Debugging function.
331
PrintCellsAndAlleles() const332 void Haplotypes::PrintCellsAndAlleles() const
333 {
334 cout << "Here are the original strings:" << endl;
335 for (size_t set=0; set<m_haplotype_alleles.size(); set++)
336 {
337 cout << ToString(m_haplotype_alleles[set]) << endl;
338 }
339 cout << "And here are the corresponding DLCells. They should match!" << endl;
340 for (size_t set=0; set<m_haplotype_dlcells.size(); set++)
341 {
342 for (size_t allele=0; allele<m_haplotype_dlcells[set].size(); allele++)
343 {
344 cout << registry.GetDataPack().GetRegion(m_regionnum).GetLocus(m_locusname).GetDataModel()->
345 CellToData(m_haplotype_dlcells[set][allele][0], 0)
346 << " ";
347 }
348 }
349 cout << endl;
350 cout << "And finally, the DLCells as raw data." << endl;
351 for (size_t set=0; set<m_haplotype_dlcells.size(); set++)
352 {
353 for (size_t allele=0; allele<m_haplotype_dlcells[set].size(); allele++)
354 {
355 cout << m_haplotype_dlcells[set][allele][0]->DLsToString(0,0);
356 }
357 }
358 cout << endl;
359 }
360
361 //____________________________________________________________________________________
362