1 // $Id: gc_phase_info.cpp,v 1.13 2011/03/08 19:22:00 bobgian Exp $
2
3 /*
4 Copyright 2002 Mary Kuhner, Jon Yamato, and Joseph Felsenstein
5
6 This software is distributed free of charge for non-commercial use
7 and is copyrighted. Of course, we do not guarantee that the software
8 works, and are not responsible for any damage you may cause or have.
9 */
10
11 #include <cassert>
12
13 #include "gc_data.h"
14 #include "gc_phase_err.h"
15 #include "gc_phase_info.h"
16 #include "gc_strings.h"
17 #include "gc_strings_phase.h"
18 #include "wx/log.h"
19
20 //------------------------------------------------------------------------------------
21
gcPhaseRecord()22 gcPhaseRecord::gcPhaseRecord()
23 :
24 m_phaseSource(phaseSource_NONE_SET),
25 m_fileName(wxEmptyString),
26 m_hasLineNumber(false),
27 m_lineNumber(0),
28 m_individual(wxEmptyString),
29 m_sampleCountIfNoSamples(0)
30 {
31 assert(m_samples.IsEmpty());
32 assert(m_unphasedInfo.empty());
33 }
34
~gcPhaseRecord()35 gcPhaseRecord::~gcPhaseRecord()
36 {
37 }
38
39 void
AddPhenotypeId(size_t phenoId)40 gcPhaseRecord::AddPhenotypeId(size_t phenoId)
41 {
42 m_phenotypeIds.insert(phenoId);
43 }
44
45 const gcIdSet &
GetPhenotypeIds() const46 gcPhaseRecord::GetPhenotypeIds() const
47 {
48 return m_phenotypeIds;
49 }
50
51 void
MergePhenotypeIds(const gcPhaseRecord & otherRec)52 gcPhaseRecord::MergePhenotypeIds(const gcPhaseRecord & otherRec)
53 {
54 m_phenotypeIds.insert(otherRec.m_phenotypeIds.begin(),otherRec.m_phenotypeIds.end());
55 }
56
57 gcPhaseSource
GetPhaseSource() const58 gcPhaseRecord::GetPhaseSource() const
59 {
60 return m_phaseSource;
61 }
62
63 wxString
GetDescriptiveName() const64 gcPhaseRecord::GetDescriptiveName() const
65 {
66 wxString indName = gcstr::unknown;
67 wxString lineName = gcstr::unknown;
68 wxString fileName = gcstr::unknown;
69 wxString sizeName = wxString::Format("%ld",(long)GetSampleCount());
70 if(HasIndividual())
71 {
72 indName = GetIndividual();
73 }
74 if(HasFileName())
75 {
76 fileName = GetFileName();
77 }
78 if(HasLineNumber())
79 {
80 lineName = wxString::Format("%ld",(long)(GetLineNumber()));
81 }
82 wxString samplesString = "";
83 wxArrayString sampleNames = GetSamples();
84 for(size_t i=0; i < sampleNames.Count(); i++)
85 {
86 if(i != 0)
87 {
88 samplesString += ", ";
89 }
90 samplesString += wxString::Format("\"%s\"",sampleNames[i].c_str());
91 }
92
93 switch(m_phaseSource)
94 {
95 case phaseSource_NONE_SET:
96 // EWFIX -- better reply
97 return gcstr::unknown;
98 break;
99 case phaseSource_PHASE_FILE:
100 return wxString::Format(gcstr_phase::descPhaseFile,indName.c_str(),lineName.c_str(),fileName.c_str(),samplesString.c_str());
101 break;
102 case phaseSource_MULTI_PHASE_SAMPLE:
103 return wxString::Format(gcstr_phase::descMultiPhase,sizeName.c_str(),lineName.c_str(),fileName.c_str(),samplesString.c_str());
104 break;
105 case phaseSource_FILE_ADJACENCY:
106 return wxString::Format(gcstr_phase::descFileAdjacency,lineName.c_str(),fileName.c_str(),samplesString.c_str());
107 break;
108 case phaseSource_COUNT:
109 assert(false);
110 return gcstr::unknown;
111 break;
112 }
113 assert(false);
114 return wxT("");
115 }
116
117 bool
HasFileName() const118 gcPhaseRecord::HasFileName() const
119 {
120 return (! (m_fileName.IsEmpty()));
121 }
122
123 const wxString &
GetFileName() const124 gcPhaseRecord::GetFileName() const
125 {
126 assert(HasFileName());
127 return m_fileName;
128 }
129
130 bool
HasLineNumber() const131 gcPhaseRecord::HasLineNumber() const
132 {
133 return m_hasLineNumber;
134 }
135
136 size_t
GetLineNumber() const137 gcPhaseRecord::GetLineNumber() const
138 {
139 return m_lineNumber;
140 }
141
142 void
SetLineNumber(size_t lineNumber)143 gcPhaseRecord::SetLineNumber(size_t lineNumber)
144 {
145 m_hasLineNumber = true;
146 m_lineNumber = lineNumber;
147 }
148
149 bool
HasIndividual() const150 gcPhaseRecord::HasIndividual() const
151 {
152 return (! (m_individual.IsEmpty()));
153 }
154
155 const wxString &
GetIndividual() const156 gcPhaseRecord::GetIndividual() const
157 {
158 assert(HasIndividual());
159 return m_individual;
160 }
161
162 bool
HasSamples() const163 gcPhaseRecord::HasSamples() const
164 {
165 return (! (m_samples.IsEmpty()));
166 }
167
168 const wxArrayString &
GetSamples() const169 gcPhaseRecord::GetSamples() const
170 {
171 assert(HasSamples());
172 return m_samples;
173 }
174
175 size_t
GetSampleCount() const176 gcPhaseRecord::GetSampleCount() const
177 {
178 if(! HasSamples())
179 {
180 return m_sampleCountIfNoSamples;
181 }
182 return m_samples.Count();
183 }
184
185 bool
HasAnyZeroes() const186 gcPhaseRecord::HasAnyZeroes() const
187 {
188 for(gcIndPhaseInfo::const_iterator i = m_unphasedInfo.begin(); i != m_unphasedInfo.end(); i++)
189 {
190 const gcUnphasedMarkers & markers = (*i).second;
191 if(markers.HasZero()) return true;
192 }
193 return false;
194 }
195
196 void
AddUnphased(wxString locusName,const gcUnphasedMarkers & unphased)197 gcPhaseRecord::AddUnphased(wxString locusName, const gcUnphasedMarkers & unphased)
198 {
199 gcIndPhaseInfo::iterator iter = m_unphasedInfo.find(locusName);
200 if(iter == m_unphasedInfo.end())
201 {
202 m_unphasedInfo[locusName] = unphased;
203 }
204 else
205 {
206 gcUnphasedMarkers & oldUnphased = (*iter).second;
207 oldUnphased.Merge(unphased);
208 }
209 }
210
211 bool
HasUnphased(wxString locusName) const212 gcPhaseRecord::HasUnphased(wxString locusName) const
213 {
214 gcIndPhaseInfo::const_iterator iter = m_unphasedInfo.find(locusName);
215 return (iter != m_unphasedInfo.end());
216 }
217
218 const gcUnphasedMarkers &
GetUnphased(wxString locusName) const219 gcPhaseRecord::GetUnphased(wxString locusName) const
220 {
221 assert(HasUnphased(locusName));
222 gcIndPhaseInfo::const_iterator iter = m_unphasedInfo.find(locusName);
223 const gcUnphasedMarkers & markers = (*iter).second;
224 return markers;
225 }
226
227 wxArrayString
GetUnphasedLocusNames() const228 gcPhaseRecord::GetUnphasedLocusNames() const
229 {
230 wxArrayString names;
231 for(gcIndPhaseInfo::const_iterator i = m_unphasedInfo.begin(); i != m_unphasedInfo.end(); i++)
232 {
233 const wxString & name = (*i).first;
234 names.Add(name);
235 }
236 return names;
237 }
238
239 bool
operator ==(const gcPhaseRecord & rec) const240 gcPhaseRecord::operator==(const gcPhaseRecord& rec) const
241 {
242 if (GetPhaseSource() != rec.GetPhaseSource()) return false;
243
244 if (HasFileName() != rec.HasFileName()) return false;
245 if (GetFileName() != rec.GetFileName()) return false;
246
247 if (HasIndividual() != rec.HasIndividual()) return false;
248 if (GetIndividual() != rec.GetIndividual()) return false;
249
250 if (HasSamples() != rec.HasSamples()) return false;
251 if (GetSamples() != rec.GetSamples()) return false;
252 if (GetSampleCount() != rec.GetSampleCount()) return false;
253
254 wxArrayString locusNames = GetUnphasedLocusNames();
255 wxArrayString recLocusNames = rec.GetUnphasedLocusNames();
256 if (locusNames.Count() != recLocusNames.Count()) return false;
257 for(size_t i=0; i < locusNames.Count(); i++)
258 {
259 wxString locusName = locusNames[i];
260
261 if(HasUnphased(locusName) != rec.HasUnphased(locusName)) return false;
262 if(GetUnphased(locusName) != rec.GetUnphased(locusName)) return false;
263 }
264
265 return true;
266 }
267
268 bool
operator !=(const gcPhaseRecord & rec) const269 gcPhaseRecord::operator!=(const gcPhaseRecord & rec) const
270 {
271 return !(operator==(rec));
272 }
273
274 void
DebugDump(wxString prefix) const275 gcPhaseRecord::DebugDump(wxString prefix) const
276 {
277 wxString indName = "";
278 if(HasIndividual())
279 {
280 indName = GetIndividual();
281 }
282 wxLogDebug("%s%s:",prefix.c_str(),indName.c_str());
283
284 wxLogDebug("%s%sphasesource: %s",prefix.c_str(),gcstr::indent.c_str(),
285 ToWxString(GetPhaseSource()).c_str());
286
287 if( !(HasSamples()) )
288 {
289 wxLogDebug("%s%ssampleCount: %d",prefix.c_str(),gcstr::indent.c_str(),
290 (int)GetSampleCount());
291 }
292 else
293 {
294 const wxArrayString & samples = GetSamples();
295 for(size_t i = 0 ; i < samples.Count(); i++)
296 {
297 wxLogDebug("%s%ssample:%s", prefix.c_str(),gcstr::indent.c_str(),
298 samples[i].c_str());
299 }
300 }
301 wxLogDebug("%s%sfilename: %s",prefix.c_str(),gcstr::indent.c_str(),
302 GetFileName().c_str());
303
304 wxLogDebug("%s%sphenoIds: %s",prefix.c_str(),gcstr::indent.c_str(),GetPhenotypeIds().AsString().c_str());
305
306 wxArrayString locusNames = GetUnphasedLocusNames();
307 for(size_t i=0; i < locusNames.Count(); i++)
308 {
309 wxString locusName = locusNames[i];
310
311 if(HasUnphased(locusName))
312 {
313 wxLogDebug("%s%sunphased markers for %s: %s",
314 prefix.c_str(),
315 gcstr::indent.c_str(),
316 locusName.c_str(),
317 GetUnphased(locusName).AsString().c_str());
318 }
319
320 }
321
322 }
323
324 gcPhaseRecord
MakeAdjacentPhaseRecord(wxString fileName,size_t lineNumber,wxArrayString samples)325 gcPhaseRecord::MakeAdjacentPhaseRecord( wxString fileName,
326 size_t lineNumber,
327 wxArrayString samples)
328 {
329 gcPhaseRecord newRec;
330 newRec.m_phaseSource = phaseSource_FILE_ADJACENCY;
331 newRec.m_fileName = fileName;
332 newRec.SetLineNumber(lineNumber);
333 newRec.m_samples = samples;
334 return newRec;
335 }
336
337 gcPhaseRecord
MakeAllelicPhaseRecord(wxString fileName,size_t lineNumber,wxString individualName,size_t numSamples)338 gcPhaseRecord::MakeAllelicPhaseRecord( wxString fileName,
339 size_t lineNumber,
340 wxString individualName,
341 size_t numSamples)
342 {
343 gcPhaseRecord newRec;
344 newRec.m_phaseSource = phaseSource_MULTI_PHASE_SAMPLE;
345 newRec.m_fileName = fileName;
346 newRec.SetLineNumber(lineNumber);
347 newRec.m_individual = individualName;
348 newRec.m_sampleCountIfNoSamples = numSamples;
349 assert(newRec.m_samples.IsEmpty());
350 return newRec;
351 }
352
353 gcPhaseRecord *
MakeFullPhaseRecord(wxString fileName,size_t lineNumber,wxString individualName,wxArrayString samples)354 gcPhaseRecord::MakeFullPhaseRecord( wxString fileName,
355 size_t lineNumber,
356 wxString individualName,
357 wxArrayString samples)
358 {
359 gcPhaseRecord * newRec = new gcPhaseRecord();
360 newRec->m_phaseSource = phaseSource_PHASE_FILE;
361 newRec->m_fileName = fileName;
362 newRec->SetLineNumber(lineNumber);
363 newRec->m_individual = individualName;
364 newRec->m_samples = samples;
365 return newRec;
366 }
367
368 //------------------------------------------------------------------------------------
369
gcPhaseInfo()370 gcPhaseInfo::gcPhaseInfo()
371 {
372 };
373
~gcPhaseInfo()374 gcPhaseInfo::~gcPhaseInfo()
375 {
376 };
377
378 bool
AddRecordIndividual(const gcPhaseRecord & rec)379 gcPhaseInfo::AddRecordIndividual(const gcPhaseRecord & rec)
380 {
381 if(rec.HasIndividual())
382 // nothing to do if it doesn't
383 {
384 const wxString & indName = rec.GetIndividual();
385 if(HasIndividualRecord(indName))
386 // need to merge info or complain if not possible
387 {
388 bool didReplace = MergeIndividualRecs(GetIndividualRecord(indName),rec);
389 return didReplace;
390 }
391 else
392 {
393 m_fromIndividual.insert(recordPair(indName,rec));
394 return true;
395 }
396 }
397 return false;
398 }
399
400 bool
AddRecordSample(const gcPhaseRecord & rec)401 gcPhaseInfo::AddRecordSample(const gcPhaseRecord & rec)
402 {
403
404 bool addedAnything = false;
405 // the samples half
406 if(rec.HasSamples())
407 {
408 wxArrayString samples = rec.GetSamples();
409 bool anyPresent = false;
410
411 for(size_t i=0; i < samples.Count(); i++)
412 {
413 if(HasSampleRecord(samples[i]))
414 // checking that if this sample name already occurs, it
415 // occurs in the same configuration
416 {
417 anyPresent = true;
418 const gcPhaseRecord & oldRecord = GetSampleRecord(samples[i]);
419 assert(oldRecord.HasSamples());
420 if(oldRecord.GetSamples() != samples)
421 {
422 throw gc_phase_mismatch(oldRecord,rec);
423 }
424 }
425 }
426
427 if(anyPresent == false)
428 {
429 for(size_t i=0; i < samples.Count(); i++)
430 {
431 m_fromSample[samples[i]] = rec;
432 addedAnything = true;
433 }
434 }
435 }
436 return addedAnything;
437 }
438
439 bool
MergeIndividualRecs(const gcPhaseRecord & oldRec,const gcPhaseRecord & newRec)440 gcPhaseInfo::MergeIndividualRecs( const gcPhaseRecord & oldRec,
441 const gcPhaseRecord & newRec)
442 {
443 assert(oldRec.HasIndividual());
444 assert(newRec.HasIndividual());
445 assert(oldRec.GetIndividual() == newRec.GetIndividual());
446
447 if(oldRec.GetSampleCount() != newRec.GetSampleCount())
448 {
449 throw gc_phase_mismatch(oldRec,newRec);
450 }
451
452 if(oldRec.HasSamples())
453 {
454 if(newRec.HasSamples())
455 // need to make sure they match
456 {
457 if(oldRec.GetSamples() != newRec.GetSamples())
458 {
459 throw gc_phase_mismatch(oldRec,newRec);
460 }
461 }
462 }
463 else
464 {
465 if(newRec.HasSamples())
466 // need to replace old rec with this rec
467 {
468 gcPhaseRecord replacement = newRec;
469 replacement.MergePhenotypeIds(oldRec);
470 m_fromIndividual[newRec.GetIndividual()] = replacement;
471 return true;
472 }
473 }
474 gcPhaseRecord replacement = oldRec;
475 replacement.MergePhenotypeIds(newRec);
476 m_fromIndividual[oldRec.GetIndividual()] = replacement;
477 return false;
478
479 }
480
481 void
AddRecord(const gcPhaseRecord & phaseRecord)482 gcPhaseInfo::AddRecord(const gcPhaseRecord & phaseRecord)
483 {
484 bool addedI = AddRecordIndividual(phaseRecord);
485 bool addedS = AddRecordSample(phaseRecord);
486
487 assert( (phaseRecord.GetPhaseSource() != phaseSource_PHASE_FILE)
488 || (addedI == addedS) );
489
490 #ifdef NDEBUG // Silence compiler warning if variables not used.
491 (void)addedI;
492 (void)addedS;
493 #endif // NDEBUG
494 }
495
496 void
AddRecords(const gcPhaseInfo & rs)497 gcPhaseInfo::AddRecords(const gcPhaseInfo & rs)
498 {
499 for(stringToRecord::const_iterator i = rs.m_fromIndividual.begin();
500 i != rs.m_fromIndividual.end();
501 i++)
502 {
503 const gcPhaseRecord & rec = (*i).second;
504 AddRecord(rec);
505 }
506 for(stringToRecord::const_iterator i = rs.m_fromSample.begin();
507 i != rs.m_fromSample.end();
508 i++)
509 {
510 const gcPhaseRecord & rec = (*i).second;
511 AddRecord(rec);
512 }
513 }
514
515 bool
HasIndividualRecord(wxString name) const516 gcPhaseInfo::HasIndividualRecord(wxString name) const
517 {
518 stringToRecord::const_iterator iter = m_fromIndividual.find(name);
519 return (iter != m_fromIndividual.end());
520 }
521
522 const gcPhaseRecord &
GetIndividualRecord(wxString name) const523 gcPhaseInfo::GetIndividualRecord(wxString name) const
524 {
525 stringToRecord::const_iterator iter = m_fromIndividual.find(name);
526 assert (iter != m_fromIndividual.end());
527 return (*iter).second;
528 }
529
530 bool
HasSampleRecord(wxString name) const531 gcPhaseInfo::HasSampleRecord(wxString name) const
532 {
533 stringToRecord::const_iterator iter = m_fromSample.find(name);
534 return (iter != m_fromSample.end());
535 }
536
537 const gcPhaseRecord &
GetSampleRecord(wxString name) const538 gcPhaseInfo::GetSampleRecord(wxString name) const
539 {
540 stringToRecord::const_iterator iter = m_fromSample.find(name);
541 assert (iter != m_fromSample.end());
542 return (*iter).second;
543 }
544
545 void
DebugDump(wxString prefix) const546 gcPhaseInfo::DebugDump(wxString prefix) const
547 {
548 wxLogDebug("%sIndividual phase records:",prefix.c_str());
549 for(stringToRecord::const_iterator i=m_fromIndividual.begin(); i != m_fromIndividual.end(); i++)
550 {
551 (*i).second.DebugDump(prefix+gcstr::indent);
552 }
553
554 wxLogDebug("%sSample phase records:",prefix.c_str());
555 for(stringToRecord::const_iterator i=m_fromSample.begin(); i != m_fromSample.end(); i++)
556 {
557 (*i).second.DebugDump(prefix+gcstr::indent);
558 }
559 }
560
561 const stringToRecord &
GetIndividualRecords() const562 gcPhaseInfo::GetIndividualRecords() const
563 {
564 return m_fromIndividual;
565 }
566
567 bool
HasAnyZeroes() const568 gcPhaseInfo::HasAnyZeroes() const
569 {
570 for(stringToRecord::const_iterator i=m_fromIndividual.begin(); i != m_fromIndividual.end(); i++)
571 {
572 const gcPhaseRecord & rec = (*i).second;
573 if(rec.HasAnyZeroes()) return true;
574 }
575
576 for(stringToRecord::const_iterator i=m_fromSample.begin(); i != m_fromSample.end(); i++)
577 {
578 const gcPhaseRecord & rec = (*i).second;
579 if(rec.HasAnyZeroes()) return true;
580 }
581 return false;
582 }
583
584 //____________________________________________________________________________________
585