1 // $Id: gc_phase_info.cpp,v 1.13 2011/03/08 19:22:00 bobgian Exp $
2 
3 /*
4   Copyright 2002  Mary Kuhner, Jon Yamato, and Joseph Felsenstein
5 
6   This software is distributed free of charge for non-commercial use
7   and is copyrighted.  Of course, we do not guarantee that the software
8   works, and are not responsible for any damage you may cause or have.
9 */
10 
11 #include <cassert>
12 
13 #include "gc_data.h"
14 #include "gc_phase_err.h"
15 #include "gc_phase_info.h"
16 #include "gc_strings.h"
17 #include "gc_strings_phase.h"
18 #include "wx/log.h"
19 
20 //------------------------------------------------------------------------------------
21 
gcPhaseRecord()22 gcPhaseRecord::gcPhaseRecord()
23     :
24     m_phaseSource(phaseSource_NONE_SET),
25     m_fileName(wxEmptyString),
26     m_hasLineNumber(false),
27     m_lineNumber(0),
28     m_individual(wxEmptyString),
29     m_sampleCountIfNoSamples(0)
30 {
31     assert(m_samples.IsEmpty());
32     assert(m_unphasedInfo.empty());
33 }
34 
~gcPhaseRecord()35 gcPhaseRecord::~gcPhaseRecord()
36 {
37 }
38 
39 void
AddPhenotypeId(size_t phenoId)40 gcPhaseRecord::AddPhenotypeId(size_t phenoId)
41 {
42     m_phenotypeIds.insert(phenoId);
43 }
44 
45 const gcIdSet &
GetPhenotypeIds() const46 gcPhaseRecord::GetPhenotypeIds() const
47 {
48     return m_phenotypeIds;
49 }
50 
51 void
MergePhenotypeIds(const gcPhaseRecord & otherRec)52 gcPhaseRecord::MergePhenotypeIds(const gcPhaseRecord & otherRec)
53 {
54     m_phenotypeIds.insert(otherRec.m_phenotypeIds.begin(),otherRec.m_phenotypeIds.end());
55 }
56 
57 gcPhaseSource
GetPhaseSource() const58 gcPhaseRecord::GetPhaseSource() const
59 {
60     return m_phaseSource;
61 }
62 
63 wxString
GetDescriptiveName() const64 gcPhaseRecord::GetDescriptiveName() const
65 {
66     wxString indName = gcstr::unknown;
67     wxString lineName = gcstr::unknown;
68     wxString fileName = gcstr::unknown;
69     wxString sizeName = wxString::Format("%ld",(long)GetSampleCount());
70     if(HasIndividual())
71     {
72         indName = GetIndividual();
73     }
74     if(HasFileName())
75     {
76         fileName = GetFileName();
77     }
78     if(HasLineNumber())
79     {
80         lineName = wxString::Format("%ld",(long)(GetLineNumber()));
81     }
82     wxString samplesString = "";
83     wxArrayString sampleNames = GetSamples();
84     for(size_t i=0; i < sampleNames.Count(); i++)
85     {
86         if(i != 0)
87         {
88             samplesString += ", ";
89         }
90         samplesString += wxString::Format("\"%s\"",sampleNames[i].c_str());
91     }
92 
93     switch(m_phaseSource)
94     {
95         case phaseSource_NONE_SET:
96             // EWFIX -- better reply
97             return gcstr::unknown;
98             break;
99         case phaseSource_PHASE_FILE:
100             return wxString::Format(gcstr_phase::descPhaseFile,indName.c_str(),lineName.c_str(),fileName.c_str(),samplesString.c_str());
101             break;
102         case phaseSource_MULTI_PHASE_SAMPLE:
103             return wxString::Format(gcstr_phase::descMultiPhase,sizeName.c_str(),lineName.c_str(),fileName.c_str(),samplesString.c_str());
104             break;
105         case phaseSource_FILE_ADJACENCY:
106             return wxString::Format(gcstr_phase::descFileAdjacency,lineName.c_str(),fileName.c_str(),samplesString.c_str());
107             break;
108         case phaseSource_COUNT:
109             assert(false);
110             return gcstr::unknown;
111             break;
112     }
113     assert(false);
114     return wxT("");
115 }
116 
117 bool
HasFileName() const118 gcPhaseRecord::HasFileName() const
119 {
120     return (! (m_fileName.IsEmpty()));
121 }
122 
123 const wxString &
GetFileName() const124 gcPhaseRecord::GetFileName() const
125 {
126     assert(HasFileName());
127     return m_fileName;
128 }
129 
130 bool
HasLineNumber() const131 gcPhaseRecord::HasLineNumber() const
132 {
133     return m_hasLineNumber;
134 }
135 
136 size_t
GetLineNumber() const137 gcPhaseRecord::GetLineNumber() const
138 {
139     return m_lineNumber;
140 }
141 
142 void
SetLineNumber(size_t lineNumber)143 gcPhaseRecord::SetLineNumber(size_t lineNumber)
144 {
145     m_hasLineNumber = true;
146     m_lineNumber = lineNumber;
147 }
148 
149 bool
HasIndividual() const150 gcPhaseRecord::HasIndividual() const
151 {
152     return (! (m_individual.IsEmpty()));
153 }
154 
155 const wxString &
GetIndividual() const156 gcPhaseRecord::GetIndividual() const
157 {
158     assert(HasIndividual());
159     return m_individual;
160 }
161 
162 bool
HasSamples() const163 gcPhaseRecord::HasSamples() const
164 {
165     return (! (m_samples.IsEmpty()));
166 }
167 
168 const wxArrayString &
GetSamples() const169 gcPhaseRecord::GetSamples() const
170 {
171     assert(HasSamples());
172     return m_samples;
173 }
174 
175 size_t
GetSampleCount() const176 gcPhaseRecord::GetSampleCount() const
177 {
178     if(! HasSamples())
179     {
180         return m_sampleCountIfNoSamples;
181     }
182     return m_samples.Count();
183 }
184 
185 bool
HasAnyZeroes() const186 gcPhaseRecord::HasAnyZeroes() const
187 {
188     for(gcIndPhaseInfo::const_iterator i = m_unphasedInfo.begin(); i != m_unphasedInfo.end(); i++)
189     {
190         const gcUnphasedMarkers & markers = (*i).second;
191         if(markers.HasZero()) return true;
192     }
193     return false;
194 }
195 
196 void
AddUnphased(wxString locusName,const gcUnphasedMarkers & unphased)197 gcPhaseRecord::AddUnphased(wxString locusName, const gcUnphasedMarkers & unphased)
198 {
199     gcIndPhaseInfo::iterator iter = m_unphasedInfo.find(locusName);
200     if(iter == m_unphasedInfo.end())
201     {
202         m_unphasedInfo[locusName] = unphased;
203     }
204     else
205     {
206         gcUnphasedMarkers & oldUnphased = (*iter).second;
207         oldUnphased.Merge(unphased);
208     }
209 }
210 
211 bool
HasUnphased(wxString locusName) const212 gcPhaseRecord::HasUnphased(wxString locusName) const
213 {
214     gcIndPhaseInfo::const_iterator iter = m_unphasedInfo.find(locusName);
215     return (iter != m_unphasedInfo.end());
216 }
217 
218 const gcUnphasedMarkers &
GetUnphased(wxString locusName) const219 gcPhaseRecord::GetUnphased(wxString locusName) const
220 {
221     assert(HasUnphased(locusName));
222     gcIndPhaseInfo::const_iterator iter = m_unphasedInfo.find(locusName);
223     const gcUnphasedMarkers & markers = (*iter).second;
224     return markers;
225 }
226 
227 wxArrayString
GetUnphasedLocusNames() const228 gcPhaseRecord::GetUnphasedLocusNames() const
229 {
230     wxArrayString names;
231     for(gcIndPhaseInfo::const_iterator i = m_unphasedInfo.begin(); i != m_unphasedInfo.end(); i++)
232     {
233         const wxString & name = (*i).first;
234         names.Add(name);
235     }
236     return names;
237 }
238 
239 bool
operator ==(const gcPhaseRecord & rec) const240 gcPhaseRecord::operator==(const gcPhaseRecord& rec) const
241 {
242     if (GetPhaseSource() != rec.GetPhaseSource()) return false;
243 
244     if (HasFileName() != rec.HasFileName()) return false;
245     if (GetFileName() != rec.GetFileName()) return false;
246 
247     if (HasIndividual() != rec.HasIndividual()) return false;
248     if (GetIndividual() != rec.GetIndividual()) return false;
249 
250     if (HasSamples() != rec.HasSamples()) return false;
251     if (GetSamples() != rec.GetSamples()) return false;
252     if (GetSampleCount() != rec.GetSampleCount()) return false;
253 
254     wxArrayString locusNames = GetUnphasedLocusNames();
255     wxArrayString recLocusNames = rec.GetUnphasedLocusNames();
256     if (locusNames.Count() != recLocusNames.Count()) return false;
257     for(size_t i=0; i < locusNames.Count(); i++)
258     {
259         wxString locusName = locusNames[i];
260 
261         if(HasUnphased(locusName) != rec.HasUnphased(locusName)) return false;
262         if(GetUnphased(locusName) != rec.GetUnphased(locusName)) return false;
263     }
264 
265     return true;
266 }
267 
268 bool
operator !=(const gcPhaseRecord & rec) const269 gcPhaseRecord::operator!=(const gcPhaseRecord & rec) const
270 {
271     return !(operator==(rec));
272 }
273 
274 void
DebugDump(wxString prefix) const275 gcPhaseRecord::DebugDump(wxString prefix) const
276 {
277     wxString indName = "";
278     if(HasIndividual())
279     {
280         indName = GetIndividual();
281     }
282     wxLogDebug("%s%s:",prefix.c_str(),indName.c_str());
283 
284     wxLogDebug("%s%sphasesource: %s",prefix.c_str(),gcstr::indent.c_str(),
285                ToWxString(GetPhaseSource()).c_str());
286 
287     if( !(HasSamples()) )
288     {
289         wxLogDebug("%s%ssampleCount: %d",prefix.c_str(),gcstr::indent.c_str(),
290                    (int)GetSampleCount());
291     }
292     else
293     {
294         const wxArrayString & samples = GetSamples();
295         for(size_t i = 0 ; i < samples.Count(); i++)
296         {
297             wxLogDebug("%s%ssample:%s", prefix.c_str(),gcstr::indent.c_str(),
298                        samples[i].c_str());
299         }
300     }
301     wxLogDebug("%s%sfilename: %s",prefix.c_str(),gcstr::indent.c_str(),
302                GetFileName().c_str());
303 
304     wxLogDebug("%s%sphenoIds: %s",prefix.c_str(),gcstr::indent.c_str(),GetPhenotypeIds().AsString().c_str());
305 
306     wxArrayString locusNames = GetUnphasedLocusNames();
307     for(size_t i=0; i < locusNames.Count(); i++)
308     {
309         wxString locusName = locusNames[i];
310 
311         if(HasUnphased(locusName))
312         {
313             wxLogDebug("%s%sunphased markers for %s: %s",
314                        prefix.c_str(),
315                        gcstr::indent.c_str(),
316                        locusName.c_str(),
317                        GetUnphased(locusName).AsString().c_str());
318         }
319 
320     }
321 
322 }
323 
324 gcPhaseRecord
MakeAdjacentPhaseRecord(wxString fileName,size_t lineNumber,wxArrayString samples)325 gcPhaseRecord::MakeAdjacentPhaseRecord( wxString        fileName,
326                                         size_t          lineNumber,
327                                         wxArrayString   samples)
328 {
329     gcPhaseRecord newRec;
330     newRec.m_phaseSource = phaseSource_FILE_ADJACENCY;
331     newRec.m_fileName = fileName;
332     newRec.SetLineNumber(lineNumber);
333     newRec.m_samples = samples;
334     return newRec;
335 }
336 
337 gcPhaseRecord
MakeAllelicPhaseRecord(wxString fileName,size_t lineNumber,wxString individualName,size_t numSamples)338 gcPhaseRecord::MakeAllelicPhaseRecord(  wxString        fileName,
339                                         size_t          lineNumber,
340                                         wxString        individualName,
341                                         size_t          numSamples)
342 {
343     gcPhaseRecord newRec;
344     newRec.m_phaseSource = phaseSource_MULTI_PHASE_SAMPLE;
345     newRec.m_fileName = fileName;
346     newRec.SetLineNumber(lineNumber);
347     newRec.m_individual = individualName;
348     newRec.m_sampleCountIfNoSamples = numSamples;
349     assert(newRec.m_samples.IsEmpty());
350     return newRec;
351 }
352 
353 gcPhaseRecord *
MakeFullPhaseRecord(wxString fileName,size_t lineNumber,wxString individualName,wxArrayString samples)354 gcPhaseRecord::MakeFullPhaseRecord(     wxString        fileName,
355                                         size_t          lineNumber,
356                                         wxString        individualName,
357                                         wxArrayString   samples)
358 {
359     gcPhaseRecord * newRec = new gcPhaseRecord();
360     newRec->m_phaseSource = phaseSource_PHASE_FILE;
361     newRec->m_fileName = fileName;
362     newRec->SetLineNumber(lineNumber);
363     newRec->m_individual = individualName;
364     newRec->m_samples = samples;
365     return newRec;
366 }
367 
368 //------------------------------------------------------------------------------------
369 
gcPhaseInfo()370 gcPhaseInfo::gcPhaseInfo()
371 {
372 };
373 
~gcPhaseInfo()374 gcPhaseInfo::~gcPhaseInfo()
375 {
376 };
377 
378 bool
AddRecordIndividual(const gcPhaseRecord & rec)379 gcPhaseInfo::AddRecordIndividual(const gcPhaseRecord & rec)
380 {
381     if(rec.HasIndividual())
382         // nothing to do if it doesn't
383     {
384         const wxString & indName = rec.GetIndividual();
385         if(HasIndividualRecord(indName))
386             // need to merge info or complain if not possible
387         {
388             bool didReplace = MergeIndividualRecs(GetIndividualRecord(indName),rec);
389             return didReplace;
390         }
391         else
392         {
393             m_fromIndividual.insert(recordPair(indName,rec));
394             return true;
395         }
396     }
397     return false;
398 }
399 
400 bool
AddRecordSample(const gcPhaseRecord & rec)401 gcPhaseInfo::AddRecordSample(const gcPhaseRecord & rec)
402 {
403 
404     bool addedAnything = false;
405     // the samples half
406     if(rec.HasSamples())
407     {
408         wxArrayString samples = rec.GetSamples();
409         bool anyPresent = false;
410 
411         for(size_t i=0; i < samples.Count(); i++)
412         {
413             if(HasSampleRecord(samples[i]))
414                 // checking that if this sample name already occurs, it
415                 // occurs in the same configuration
416             {
417                 anyPresent = true;
418                 const gcPhaseRecord & oldRecord = GetSampleRecord(samples[i]);
419                 assert(oldRecord.HasSamples());
420                 if(oldRecord.GetSamples() != samples)
421                 {
422                     throw gc_phase_mismatch(oldRecord,rec);
423                 }
424             }
425         }
426 
427         if(anyPresent == false)
428         {
429             for(size_t i=0; i < samples.Count(); i++)
430             {
431                 m_fromSample[samples[i]] = rec;
432                 addedAnything = true;
433             }
434         }
435     }
436     return addedAnything;
437 }
438 
439 bool
MergeIndividualRecs(const gcPhaseRecord & oldRec,const gcPhaseRecord & newRec)440 gcPhaseInfo::MergeIndividualRecs(   const gcPhaseRecord & oldRec,
441                                     const gcPhaseRecord & newRec)
442 {
443     assert(oldRec.HasIndividual());
444     assert(newRec.HasIndividual());
445     assert(oldRec.GetIndividual() == newRec.GetIndividual());
446 
447     if(oldRec.GetSampleCount() != newRec.GetSampleCount())
448     {
449         throw gc_phase_mismatch(oldRec,newRec);
450     }
451 
452     if(oldRec.HasSamples())
453     {
454         if(newRec.HasSamples())
455             // need to make sure they match
456         {
457             if(oldRec.GetSamples() != newRec.GetSamples())
458             {
459                 throw gc_phase_mismatch(oldRec,newRec);
460             }
461         }
462     }
463     else
464     {
465         if(newRec.HasSamples())
466             // need to replace old rec with this rec
467         {
468             gcPhaseRecord replacement = newRec;
469             replacement.MergePhenotypeIds(oldRec);
470             m_fromIndividual[newRec.GetIndividual()] = replacement;
471             return true;
472         }
473     }
474     gcPhaseRecord replacement = oldRec;
475     replacement.MergePhenotypeIds(newRec);
476     m_fromIndividual[oldRec.GetIndividual()] = replacement;
477     return false;
478 
479 }
480 
481 void
AddRecord(const gcPhaseRecord & phaseRecord)482 gcPhaseInfo::AddRecord(const gcPhaseRecord & phaseRecord)
483 {
484     bool addedI = AddRecordIndividual(phaseRecord);
485     bool addedS = AddRecordSample(phaseRecord);
486 
487     assert(     (phaseRecord.GetPhaseSource() != phaseSource_PHASE_FILE)
488                 ||  (addedI == addedS) );
489 
490 #ifdef NDEBUG  // Silence compiler warning if variables not used.
491     (void)addedI;
492     (void)addedS;
493 #endif // NDEBUG
494 }
495 
496 void
AddRecords(const gcPhaseInfo & rs)497 gcPhaseInfo::AddRecords(const gcPhaseInfo & rs)
498 {
499     for(stringToRecord::const_iterator i = rs.m_fromIndividual.begin();
500         i != rs.m_fromIndividual.end();
501         i++)
502     {
503         const gcPhaseRecord & rec = (*i).second;
504         AddRecord(rec);
505     }
506     for(stringToRecord::const_iterator i = rs.m_fromSample.begin();
507         i != rs.m_fromSample.end();
508         i++)
509     {
510         const gcPhaseRecord & rec = (*i).second;
511         AddRecord(rec);
512     }
513 }
514 
515 bool
HasIndividualRecord(wxString name) const516 gcPhaseInfo::HasIndividualRecord(wxString name) const
517 {
518     stringToRecord::const_iterator iter = m_fromIndividual.find(name);
519     return (iter != m_fromIndividual.end());
520 }
521 
522 const gcPhaseRecord &
GetIndividualRecord(wxString name) const523 gcPhaseInfo::GetIndividualRecord(wxString name) const
524 {
525     stringToRecord::const_iterator iter = m_fromIndividual.find(name);
526     assert (iter != m_fromIndividual.end());
527     return (*iter).second;
528 }
529 
530 bool
HasSampleRecord(wxString name) const531 gcPhaseInfo::HasSampleRecord(wxString name) const
532 {
533     stringToRecord::const_iterator iter = m_fromSample.find(name);
534     return (iter != m_fromSample.end());
535 }
536 
537 const gcPhaseRecord &
GetSampleRecord(wxString name) const538 gcPhaseInfo::GetSampleRecord(wxString name) const
539 {
540     stringToRecord::const_iterator iter = m_fromSample.find(name);
541     assert (iter != m_fromSample.end());
542     return (*iter).second;
543 }
544 
545 void
DebugDump(wxString prefix) const546 gcPhaseInfo::DebugDump(wxString prefix) const
547 {
548     wxLogDebug("%sIndividual phase records:",prefix.c_str());
549     for(stringToRecord::const_iterator i=m_fromIndividual.begin(); i != m_fromIndividual.end(); i++)
550     {
551         (*i).second.DebugDump(prefix+gcstr::indent);
552     }
553 
554     wxLogDebug("%sSample phase records:",prefix.c_str());
555     for(stringToRecord::const_iterator i=m_fromSample.begin(); i != m_fromSample.end(); i++)
556     {
557         (*i).second.DebugDump(prefix+gcstr::indent);
558     }
559 }
560 
561 const stringToRecord &
GetIndividualRecords() const562 gcPhaseInfo::GetIndividualRecords() const
563 {
564     return m_fromIndividual;
565 }
566 
567 bool
HasAnyZeroes() const568 gcPhaseInfo::HasAnyZeroes() const
569 {
570     for(stringToRecord::const_iterator i=m_fromIndividual.begin(); i != m_fromIndividual.end(); i++)
571     {
572         const gcPhaseRecord & rec = (*i).second;
573         if(rec.HasAnyZeroes()) return true;
574     }
575 
576     for(stringToRecord::const_iterator i=m_fromSample.begin(); i != m_fromSample.end(); i++)
577     {
578         const gcPhaseRecord & rec = (*i).second;
579         if(rec.HasAnyZeroes()) return true;
580     }
581     return false;
582 }
583 
584 //____________________________________________________________________________________
585