1 // $Id: gc_datastore_export.cpp,v 1.78 2012/05/11 23:27:53 ewalkup Exp $
2 
3 /*
4   Copyright 2002  Mary Kuhner, Jon Yamato, and Joseph Felsenstein
5 
6   This software is distributed free of charge for non-commercial use
7   and is copyrighted.  Of course, we do not guarantee that the software
8   works, and are not responsible for any damage you may cause or have.
9 */
10 
11 #include <cassert>
12 //#include <iostream>
13 //using namespace std;
14 
15 #include "gc_data.h"
16 #include "gc_data_missing_err.h"
17 #include "gc_datastore.h"
18 #include "gc_default.h"
19 #include "gc_errhandling.h"
20 #include "gc_exportable.h"
21 #include "gc_genotype_resolution.h"
22 #include "gc_individual.h"
23 #include "gc_individual_err.h"
24 #include "gc_parse_block.h"
25 #include "gc_parse_sample.h"
26 #include "gc_phase.h"
27 #include "gc_population.h"
28 #include "gc_locus.h"
29 #include "gc_locus_err.h"
30 #include "gc_region.h"
31 #include "gc_sequential_data.h"
32 #include "gc_strings.h"
33 #include "gc_strings_locus.h"
34 #include "gc_structure_maps.h"
35 #include "gc_structures.h"
36 #include "gc_trait.h"
37 #include "gc_types.h"
38 #include "tinyxml.h"
39 #include "xml_strings.h"
40 #include "wx/log.h"
41 #include "wx/string.h"
42 
43 //------------------------------------------------------------------------------------
44 
45 gcUnphasedMarkers *
CheckPhaseMarkers(const gcUnphasedMarkers * phaseInfo,wxString indName,const gcLocus & locusRef,bool anyZeroes) const46 GCDataStore::CheckPhaseMarkers( const gcUnphasedMarkers * phaseInfo,
47                                 wxString            indName,
48                                 const gcLocus &     locusRef,
49                                 bool                anyZeroes) const
50 {
51     assert(locusRef.HasLength());
52 
53     if(phaseInfo->NumMarkers() > 0)
54     {
55         long smallest = anyZeroes ? 0 : 1;
56         if(locusRef.HasOffset()) smallest = locusRef.GetOffset();
57 
58         long largest = smallest + locusRef.GetLength() - 1;
59         if(smallest < 0 && largest >= 0 && (!anyZeroes)) largest++;
60 
61         if(phaseInfo->Smallest() < smallest)
62         {
63             throw gc_phase_too_small(   phaseInfo->Smallest(),
64                                         indName,
65                                         smallest,
66                                         locusRef.GetName());
67         }
68 
69         if(phaseInfo->Largest() > largest)
70         {
71             throw gc_phase_too_large(   phaseInfo->Largest(),
72                                         indName,
73                                         largest,
74                                         locusRef.GetName());
75         }
76     }
77 
78     phaseInfo->CheckAgainstLocations(locusRef);
79 
80     gcUnphasedMarkers * newMarkers = new gcUnphasedMarkers(*phaseInfo);
81     if(!anyZeroes)
82     {
83         newMarkers->ShiftNegsUp();
84     }
85     return newMarkers;
86 }
87 
88 void
AddDefaultPhaseElem(TiXmlElement * individualElement) const89 GCDataStore::AddDefaultPhaseElem(TiXmlElement * individualElement) const
90 {
91     TiXmlElement * phaseElement = new TiXmlElement( xmlstr::XML_TAG_PHASE.c_str());
92     individualElement->LinkEndChild(phaseElement);
93     phaseElement->SetAttribute( xmlstr::XML_ATTRTYPE_TYPE.c_str(),
94                                 xmlstr::XML_ATTRVALUE_UNKNOWN.c_str());
95     TiXmlText * phaseText = new TiXmlText("");
96     phaseElement->LinkEndChild(phaseText);
97 }
98 
99 void
AddPhaseElem(TiXmlElement * individualElem,const GCIndividual & indi,const gcLocus & locusRef) const100 GCDataStore::AddPhaseElem(TiXmlElement * individualElem, const GCIndividual & indi, const gcLocus & locusRef) const
101 // EWFIX.P3.BUG.537 -- FIXED ?? need to do this at the locus level as well ??
102 // EWFIX.P4 -- add a locus name attribute in a later release ??
103 {
104     const gcUnphasedMarkers * phaseInfo = indi.GetUnphased(locusRef);
105     if(phaseInfo == NULL || phaseInfo->NumMarkers() == 0)
106     {
107         if (locusRef.HasUnphasedMarkers())
108         {
109             phaseInfo = locusRef.GetUnphasedMarkers();
110         }
111     }
112 
113     if(phaseInfo == NULL || phaseInfo->NumMarkers() == 0)
114     {
115         AddDefaultPhaseElem(individualElem);
116     }
117     else
118     {
119         // can throw
120         bool anyZeroes = GetStructures().AnyZeroes();   // EWFIX.P4 -- wasteful
121         gcUnphasedMarkers * newPhaseInfo = CheckPhaseMarkers(phaseInfo,
122                                                              indi.GetName(),
123                                                              locusRef,
124                                                              anyZeroes);
125 
126         TiXmlElement * phaseElement = new TiXmlElement( xmlstr::XML_TAG_PHASE.c_str());
127         individualElem->LinkEndChild(phaseElement);
128 
129         phaseElement->SetAttribute( xmlstr::XML_ATTRTYPE_TYPE.c_str(),
130                                     xmlstr::XML_ATTRVALUE_UNKNOWN.c_str());
131 
132         TiXmlText * phaseText = new TiXmlText(newPhaseInfo->AsString().c_str());
133         delete newPhaseInfo;
134         phaseElement->LinkEndChild(phaseText);
135     }
136 }
137 
138 void
AddDataBlockElem(TiXmlElement * sampleElement,const GCSequentialData & seqData,const gcLocus & locusRef,size_t siteIndex) const139 GCDataStore::AddDataBlockElem(TiXmlElement * sampleElement, const GCSequentialData & seqData, const gcLocus & locusRef, size_t siteIndex) const
140 {
141     gcSpecificDataType dataType = locusRef.GetDataType();
142     if(dataType == sdatatype_NONE_SET)
143     {
144         wxString msg = wxString::Format(gcerr::missingDataTypeForLocus,locusRef.GetName().c_str());
145         gui_error g(msg.c_str());
146         throw g;
147     }
148 
149     wxString dataTypeString = ToWxString(dataType);
150     wxString dataString = " "+seqData.GetData(siteIndex)+" ";
151 
152     TiXmlElement * dataBlockElement = new TiXmlElement( xmlstr::XML_TAG_DATABLOCK.c_str());
153     sampleElement->LinkEndChild(dataBlockElement);
154     dataBlockElement->SetAttribute( xmlstr::XML_ATTRTYPE_TYPE.c_str(),
155                                     dataTypeString.c_str());
156 
157     TiXmlText * data = new TiXmlText(dataString);
158     dataBlockElement->LinkEndChild(data);
159 
160 }
161 
162 void
AddDataBlockElem(TiXmlElement * sampleElem,const GCSequentialData & seqData,const gcLocus & locusRef) const163 GCDataStore::AddDataBlockElem(TiXmlElement * sampleElem, const GCSequentialData & seqData, const gcLocus & locusRef ) const
164 {
165     gcSpecificDataType dataType = locusRef.GetDataType();
166     if(dataType == sdatatype_NONE_SET)
167     {
168         wxString msg = wxString::Format(gcerr::missingDataTypeForLocus,locusRef.GetName().c_str());
169         gui_error g(msg.c_str());
170         throw g;
171     }
172 
173     TiXmlElement * dataBlockElement = new TiXmlElement( xmlstr::XML_TAG_DATABLOCK.c_str());
174     sampleElem->LinkEndChild(dataBlockElement);
175 
176     wxString dataTypeString = ToWxString(dataType);
177     dataBlockElement->SetAttribute( xmlstr::XML_ATTRTYPE_TYPE.c_str(),
178                                     dataTypeString.c_str());
179 
180     wxString dataString = wxString::Format(" %s ",seqData.GetData().c_str());
181     TiXmlText * data = new TiXmlText(dataString);
182     dataBlockElement->LinkEndChild(data);
183 
184 }
185 
186 void
AddSampleElem(TiXmlElement * individualElem,const GCIndividual & individual,const gcRegion & regionRef,const gcLocus & locusRef,size_t siteIndex,size_t hapIndex) const187 GCDataStore::AddSampleElem(TiXmlElement * individualElem, const GCIndividual & individual, const gcRegion & regionRef, const gcLocus & locusRef, size_t siteIndex, size_t hapIndex) const
188 {
189 
190     const gcSample * sample = individual.GetSample(hapIndex);
191     assert(!locusRef.GetLinked());
192     const GCSequentialData & data = sample->GetData(&locusRef);
193 
194     TiXmlElement * sampleElement = new TiXmlElement( xmlstr::XML_TAG_SAMPLE.c_str());
195     individualElem->LinkEndChild(sampleElement);
196     sampleElement->SetAttribute(xmlstr::XML_ATTRTYPE_NAME.c_str(),
197                                 sample->GetLabel().c_str());
198 
199     AddDataBlockElem(sampleElement,data,locusRef,siteIndex);
200 }
201 
202 void
AddSampleElem(TiXmlElement * individualElem,const GCIndividual & individual,const gcRegion & regionRef,size_t hapIndex) const203 GCDataStore::AddSampleElem(TiXmlElement * individualElem, const GCIndividual & individual, const gcRegion & regionRef, size_t hapIndex) const
204 {
205     const gcSample * sample = individual.GetSample(hapIndex);
206 
207     TiXmlElement * sampleElement = new TiXmlElement( xmlstr::XML_TAG_SAMPLE.c_str());
208     individualElem->LinkEndChild(sampleElement);
209     sampleElement->SetAttribute(xmlstr::XML_ATTRTYPE_NAME.c_str(),
210                                 sample->GetLabel().c_str());
211 
212     constObjVector loci = m_structures.GetConstDisplayableLinkedLociInMapOrderFor(regionRef.GetId());
213     for(constObjVector::const_iterator i = loci.begin(); i != loci.end(); i++)
214     {
215         const gcLocus * locP = dynamic_cast<const gcLocus*>(*i);
216         assert(locP != NULL);
217 
218         const gcLocus & locusRef = *locP;
219         if(locusRef.GetLinked())
220         {
221             try
222             {
223                 const GCSequentialData & data = sample->GetData(locP);
224                 AddDataBlockElem(sampleElement,data,locusRef);
225             }
226             catch (const gc_data_error& e)
227                 // for gc_sample_missing_locus_data
228             {
229                 // EWFIX.P3 -- bad formatting because string e.what()
230                 // contains quote characters
231                 wxString msg = wxString::Format(gcerr::inIndividual,
232                                                 individual.GetName().c_str(),
233                                                 e.what());
234                 throw gc_data_error (msg.c_str());
235             }
236         }
237     }
238 }
239 
240 void
AddGenoResolutionElem(TiXmlElement * individualElem,const gcPhenotype & pheno) const241 GCDataStore::AddGenoResolutionElem(TiXmlElement * individualElem, const gcPhenotype & pheno) const
242 {
243     size_t traitId = pheno.GetTraitId();
244     const gcTraitInfo & trait = GetStructures().GetTrait(traitId);
245     wxString traitName = trait.GetName();
246 
247     TiXmlElement * genoResoElement = new TiXmlElement( xmlstr::XML_TAG_GENOTYPE_RESOLUTIONS.c_str());
248     individualElem->LinkEndChild(genoResoElement);
249 
250     TiXmlElement * traitNameElement = new TiXmlElement( xmlstr::XML_TAG_TRAIT_NAME.c_str());
251     genoResoElement->LinkEndChild(traitNameElement);
252 
253     TiXmlText * traitNameTextElement = new TiXmlText(traitName.c_str());
254     traitNameElement->LinkEndChild(traitNameTextElement);
255 
256     const std::vector<gcHapProbability> & haps = pheno.GetHapProbabilities();
257     std::vector<gcHapProbability>::const_iterator i;
258     for(i = haps.begin(); i != haps.end(); i++)
259     {
260         const gcHapProbability & hapProb = *i;
261         double relProb = hapProb.GetPenetrance();
262         const gcIdVec & alleleIds = hapProb.GetAlleleIds();
263 
264         TiXmlElement * hapElement = new TiXmlElement( xmlstr::XML_TAG_HAPLOTYPES.c_str());
265         genoResoElement->LinkEndChild(hapElement);
266 
267         TiXmlElement * relProbElem = new TiXmlElement( xmlstr::XML_TAG_PENETRANCE.c_str());
268         hapElement->LinkEndChild(relProbElem);
269 
270         TiXmlText * probText = new TiXmlText(wxString::Format("%f",relProb).c_str());
271         relProbElem->LinkEndChild(probText);
272 
273         TiXmlElement * allelesElement = new TiXmlElement( xmlstr::XML_TAG_ALLELES.c_str());
274         hapElement->LinkEndChild(allelesElement);
275 
276         wxString alleles = wxEmptyString;
277         for(size_t j=0; j < alleleIds.size(); j++)
278 
279         {
280             if(j == 0)
281             {
282                 alleles += " ";
283             }
284             const gcTraitAllele & traitAllele = GetStructures().GetAllele(alleleIds[j]);
285             alleles += traitAllele.GetName();
286             alleles += " ";
287         }
288 
289         TiXmlText * alleleText = new TiXmlText(alleles.c_str());
290         allelesElement->LinkEndChild(alleleText);
291     }
292 }
293 
294 void
AddIndividualElem(TiXmlElement * popElem,const GCIndividual & individual,const gcRegion & regionRef,const gcLocus & locusRef,size_t siteIndex) const295 GCDataStore::AddIndividualElem(TiXmlElement * popElem, const GCIndividual & individual, const gcRegion & regionRef, const gcLocus & locusRef, size_t siteIndex) const
296 {
297     wxString indName = individual.GetName();
298 
299     TiXmlElement * individualElement = new TiXmlElement( xmlstr::XML_TAG_INDIVIDUAL.c_str());
300     popElem->LinkEndChild(individualElement);
301     individualElement->SetAttribute(    xmlstr::XML_ATTRTYPE_NAME.c_str(),
302                                         indName.c_str());
303 
304     // NOTE : we don't put phase info in because this is a single unlinked site
305     for(size_t hapIndex=0; hapIndex < individual.GetNumSamples(); hapIndex++)
306     {
307         AddSampleElem(individualElement,individual,regionRef,locusRef,siteIndex,hapIndex);
308     }
309 }
310 
311 void
AddIndividualElem(TiXmlElement * popElem,const GCIndividual & individual,const gcRegion & regionRef) const312 GCDataStore::AddIndividualElem(TiXmlElement * popElem, const GCIndividual & individual, const gcRegion & regionRef) const
313 {
314     wxString indName = individual.GetName();
315 
316     TiXmlElement * individualElement = new TiXmlElement( xmlstr::XML_TAG_INDIVIDUAL.c_str());
317     popElem->LinkEndChild(individualElement);
318     individualElement->SetAttribute(    xmlstr::XML_ATTRTYPE_NAME.c_str(),
319                                         indName.c_str());
320 
321     const gcIdSet & phenoIds = individual.GetPhenotypeIds();
322     for(gcIdSet::const_iterator i=phenoIds.begin(); i!=phenoIds.end(); i++)
323     {
324         const gcPhenotype & pheno = GetStructures().GetPhenotype(*i);
325         AddGenoResolutionElem(individualElement,pheno);
326     }
327 
328     if(individual.GetNumSamples() > 1)
329         // to put in phase elem only
330     {
331         gcIdVec locIds = GetStructures().GetLocusIdsForRegionByMapPosition(regionRef.GetId());
332 
333         for(gcIdVec::const_iterator i = locIds.begin(); i != locIds.end(); i++)
334         {
335             size_t locusId = (*i);
336             const gcLocus & locusRef = m_structures.GetLocus(locusId);
337             if(locusRef.GetLinked())
338             {
339                 AddPhaseElem(individualElement,individual,locusRef);
340             }
341         }
342 
343     }
344 
345     for(size_t hapIndex=0; hapIndex < individual.GetNumSamples(); hapIndex++)
346     {
347         AddSampleElem(individualElement,individual,regionRef,hapIndex);
348     }
349 }
350 
351 void
AddPanelElem(TiXmlElement * popElem,const gcPanel & panel) const352 GCDataStore::AddPanelElem(TiXmlElement * popElem, const gcPanel & panel) const
353 {
354     TiXmlElement * panelElement = new TiXmlElement( xmlstr::XML_TAG_PANEL.c_str());
355     popElem->LinkEndChild(panelElement);
356     panelElement->SetAttribute(xmlstr::XML_ATTRTYPE_NAME.c_str(), (panel.GetName()).c_str());
357 
358     TiXmlElement * sizeElement = new TiXmlElement( xmlstr::XML_TAG_PANELSIZE.c_str());
359     panelElement->LinkEndChild(sizeElement);
360     wxString sizeString = wxString::Format(" %ld ",panel.GetNumPanels());
361     TiXmlText * sizeText = new TiXmlText(sizeString);
362     sizeElement->LinkEndChild(sizeText);
363 }
364 
365 TiXmlElement *
MakeBlockElem(const gcLocus & locusRef,bool requireMapPosition) const366 GCDataStore::MakeBlockElem(const gcLocus & locusRef, bool requireMapPosition) const
367 {
368     TiXmlElement * blockElement  = new TiXmlElement( xmlstr::XML_TAG_BLOCK.c_str());
369     blockElement->SetAttribute( xmlstr::XML_ATTRTYPE_NAME.c_str(),
370                                 locusRef.GetName().c_str());
371 
372     if(locusRef.HasOffset())
373     {
374         wxString offsetString = wxString::Format(" %ld ",locusRef.GetOffset());
375 
376         TiXmlElement * offsetElement  = new TiXmlElement( xmlstr::XML_TAG_OFFSET.c_str());
377         TiXmlText * offsetText = new TiXmlText(offsetString);
378         offsetElement->LinkEndChild(offsetText);
379         blockElement->LinkEndChild(offsetElement);
380     }
381 
382     long mapPosition = 1;
383     if(requireMapPosition && !locusRef.HasMapPosition())
384     {
385         wxString msg = wxString::Format(gcerr::locusWithoutMapPosition,
386                                         locusRef.GetName().c_str());
387         gui_error g(msg);
388         throw g;
389     }
390     if(locusRef.HasMapPosition())
391     {
392         mapPosition = locusRef.GetMapPosition();
393     }
394     wxString mapPosString = wxString::Format(" %ld ",mapPosition);
395     TiXmlText * mapPosText = new TiXmlText(mapPosString);
396     TiXmlElement * mapPositionElement  = new TiXmlElement( xmlstr::XML_TAG_MAP_POSITION.c_str());
397     mapPositionElement->LinkEndChild(mapPosText);
398     blockElement->LinkEndChild(mapPositionElement);
399 
400     TiXmlElement * lengthElement  = new TiXmlElement( xmlstr::XML_TAG_LENGTH.c_str());
401     if(locusRef.HasTotalLength())
402     {
403         if( ! (locusRef.GetDataType() == sdatatype_DNA && locusRef.GetLength() == locusRef.GetNumMarkers()))
404         {
405             wxString lengthString = wxString::Format(" %ld ",(long)locusRef.GetLength());
406             TiXmlText * lengthText = new TiXmlText(lengthString);
407             lengthElement->LinkEndChild(lengthText);
408             blockElement->LinkEndChild(lengthElement);
409         }
410     }
411     else
412     {
413         if(locusRef.GetDataType() != sdatatype_DNA && locusRef.GetNumMarkers() > 1)
414         {
415             delete lengthElement;
416             wxString msg = wxString::Format(gcerr::missingLengthForLocus,locusRef.GetName().c_str());
417             gui_error g(msg.c_str());
418             throw g;
419         }
420     }
421 
422     if(locusRef.HasLocations())
423     {
424         TiXmlElement * locationsElement  = new TiXmlElement( xmlstr::XML_TAG_LOCATIONS.c_str());
425         wxString locationsString = locusRef.GetLocationsAsString();
426         TiXmlText * locationsText = new TiXmlText(locationsString);
427         locationsElement->LinkEndChild(locationsText);
428         blockElement->LinkEndChild(locationsElement);
429     }
430 
431     return blockElement;
432 }
433 
434 TiXmlElement *
MakeBlockElemWithMapPosition(const gcLocus & locusRef) const435 GCDataStore::MakeBlockElemWithMapPosition(const gcLocus & locusRef) const
436 {
437     return MakeBlockElem(locusRef,true);
438 }
439 
440 TiXmlElement *
MakeBlockElemWithoutMapPosition(const gcLocus & locusRef) const441 GCDataStore::MakeBlockElemWithoutMapPosition(const gcLocus & locusRef) const
442 {
443     return MakeBlockElem(locusRef,false);
444 }
445 
446 void
AddEffectivePopSizeElem(TiXmlElement * regionElem,const gcRegion & regionRef) const447 GCDataStore::AddEffectivePopSizeElem(TiXmlElement * regionElem, const gcRegion & regionRef) const
448 {
449     if(regionRef.HasEffectivePopulationSize())
450     {
451         double psize = regionRef.GetEffectivePopulationSize();
452         TiXmlElement * effPopSizeElem = new TiXmlElement( xmlstr::XML_TAG_EFFECTIVE_POPSIZE.c_str());
453         regionElem->LinkEndChild(effPopSizeElem);
454         TiXmlText * popSizeText = new TiXmlText(wxString::Format("%f",psize).c_str());
455         effPopSizeElem->LinkEndChild(popSizeText);
456     }
457 }
458 
459 #if 0  // Potentially DEAD CODE (bobgian, Feb 2010)
460 void
461 GCDataStore::AddSpacingElemSingle(TiXmlElement * regionElem, const gcLocus & locusRef) const
462 {
463     TiXmlElement * spacingElement = new TiXmlElement( xmlstr::XML_TAG_SPACING.c_str());
464     regionElem->LinkEndChild(spacingElement);
465     TiXmlElement * blockElement = MakeBlockElemWithoutMapPosition(locusRef);
466     spacingElement->LinkEndChild(blockElement);
467 }
468 #endif
469 
470 #if 0 // EWFIX.REMOVE
471 TiXmlElement *
472 GCDataStore::MakeSpacingElemMulti(const constObjVector & loci, const gcRegion & regionRef) const
473 {
474     // check that loci don't overlap
475     GetStructures().VerifyLocusSeparations(regionRef);
476 
477     TiXmlElement * spacingElement = new TiXmlElement( xmlstr::XML_TAG_SPACING.c_str());
478 
479     long nextAllowable = gcdefault::badMapPosition;
480     wxString lastLocusName;
481 
482     for(constObjVector::const_iterator iter = loci.begin(); iter != loci.end(); iter++)
483     {
484         const gcLocus * locP = dynamic_cast<const gcLocus*>(*iter);
485         if(locP == NULL)
486         {
487             wxString msg = wxString::Format(gcerr::corruptedDisplayableLociInMapOrder);
488             gc_implementation_error g(msg.c_str());
489             throw g;
490         }
491 
492         if(!locP->HasMapPosition())
493         {
494             wxString msg = wxString::Format(gcerr::locusWithoutMapPosition,
495                                             locP->GetName().c_str());
496             gui_error g(msg);
497             throw g;
498         }
499         if(!locP->HasTotalLength() && ! (locP->GetDataType() == sdatatype_DNA))
500         {
501             wxString msg = wxString::Format(gcerr::locusWithoutLength,
502                                             locP->GetName().c_str());
503             gui_error g(msg);
504             throw g;
505         }
506         long mapPosition = locP->GetMapPosition();
507         size_t length = locP->GetLength();
508 
509         if(iter != loci.begin())
510         {
511             if(mapPosition < nextAllowable)
512             {
513                 wxString msg = wxString::Format(gcerr::locusOverlap,
514                                                 lastLocusName.c_str(),
515                                                 nextAllowable-1,
516                                                 locP->GetName().c_str(),
517                                                 mapPosition);
518                 gui_error g(msg);
519                 throw g;
520             }
521         }
522 
523         nextAllowable = mapPosition + length;
524         lastLocusName = locP->GetName();
525 
526         TiXmlElement * blockElement = MakeBlockElemWithMapPosition(*locP);
527         spacingElement->LinkEndChild(blockElement);
528     }
529 
530     return spacingElement;
531 }
532 #endif
533 
534 void
AddSpacingElem(TiXmlElement * regionElem,const gcRegion & regionRef) const535 GCDataStore::AddSpacingElem(TiXmlElement * regionElem, const gcRegion & regionRef) const
536 {
537     constObjVector loci = GetStructures().GetConstDisplayableLinkedLociInMapOrderFor(regionRef.GetId());
538 
539     bool needMapInfo = (loci.size() > 1);
540 
541     bool anyZeroes = GetStructures().AnyZeroes();   // EWFIX.P4 -- wasteful
542 
543     TiXmlElement * spacingElement = new TiXmlElement( xmlstr::XML_TAG_SPACING.c_str());
544     regionElem->LinkEndChild(spacingElement);
545 
546     bool havePrevious = false;
547     long lastExtent = 0;
548     size_t lastLocusId = 0;
549 
550     for(constObjVector::const_iterator i=loci.begin(); i != loci.end(); i++)
551     {
552         const gcLocus * locusP = dynamic_cast<const gcLocus *>(*i);
553         assert(locusP != NULL);
554 
555         assert(locusP->GetLinked());
556 
557         TiXmlElement * blockElement  = new TiXmlElement( xmlstr::XML_TAG_BLOCK.c_str());
558         blockElement->SetAttribute( xmlstr::XML_ATTRTYPE_NAME.c_str(),
559                                     locusP->GetName().c_str());
560 
561         long start = gcdefault::badMapPosition;
562         //////////////////////////////////////////////////////
563         // map position
564         if(! locusP->HasMapPosition())
565         {
566             if(needMapInfo)
567             {
568                 // EWFIX.P3 -- should use specific error
569                 throw gc_locus_err(wxString::Format(gcerr::locusWithoutMapPosition,
570                                                     locusP->GetName().c_str()));
571             }
572         }
573         else
574         {
575             start = locusP->GetMapPosition();
576             if(!anyZeroes && (start < 0))
577             {
578                 start++;
579             }
580             TiXmlElement * mapPositionElement  = new TiXmlElement( xmlstr::XML_TAG_MAP_POSITION.c_str());
581             wxString mapPosString = wxString::Format(" %ld ",start);
582             TiXmlText * mapPosText = new TiXmlText(mapPosString);
583             mapPositionElement->LinkEndChild(mapPosText);
584             blockElement->LinkEndChild(mapPositionElement);
585         }
586 
587         //////////////////////////////////////////////////////
588         // length
589         if(locusP->HasTotalLength())
590         {
591             TiXmlElement * lengthElement  = new TiXmlElement( xmlstr::XML_TAG_LENGTH.c_str());
592             wxString lengthString = wxString::Format(" %ld ",(long)locusP->GetTotalLength());
593             TiXmlText * lengthText = new TiXmlText(lengthString);
594             lengthElement->LinkEndChild(lengthText);
595             blockElement->LinkEndChild(lengthElement);
596         }
597 
598         if(havePrevious)
599         {
600             if(start <= lastExtent)
601             {
602                 long end = locusP->GetMapPosition()+locusP->GetLength()-1;
603                 long lastStart = GetStructures().GetLocus(lastLocusId).GetMapPosition();
604                 throw gc_locus_overlap(locusP->GetName(),start,end,
605                                        GetStructures().GetLocus(lastLocusId).GetName(),lastStart,lastExtent);
606             }
607         }
608 
609         if(needMapInfo)
610         {
611             havePrevious = true;
612             size_t length = locusP->GetLength();
613             long stop = start + length;
614             stop -= 1;    // EWFIX explain why
615             lastExtent = stop;
616             lastLocusId = locusP->GetId();
617         }
618 
619         //////////////////////////////////////////////////////
620         // offset
621         long offset = 1;
622         if(locusP->HasOffset())
623         {
624             offset = locusP->GetOffset();
625 
626             if(!anyZeroes && (offset < 1))
627             {
628                 offset++;
629             }
630             start += offset;
631             TiXmlElement * offsetElement  = new TiXmlElement( xmlstr::XML_TAG_OFFSET.c_str());
632             wxString offsetString = wxString::Format(" %ld ",offset);
633             TiXmlText * offsetText = new TiXmlText(offsetString);
634             offsetElement->LinkEndChild(offsetText);
635             blockElement->LinkEndChild(offsetElement);
636         }
637         else
638         {
639             if(locusP->GetDataType() == sdatatype_SNP && locusP->HasLocations())
640             {
641                 // EWFIX.P3 -- should use specific error
642                 throw gc_locus_err(wxString::Format(gcerr_locus::offsetMissingSnpLocations,locusP->GetName().c_str()));
643             }
644 
645             if(needMapInfo)
646             {
647                 bool lengthIsMarkers = false;
648                 if(locusP->HasNumMarkers())
649                 {
650                     if(locusP->HasLength())
651                     {
652                         lengthIsMarkers = (locusP->GetNumMarkers() == locusP->GetLength());
653                     }
654                     else
655                     {
656                         lengthIsMarkers = (locusP->GetNumMarkers() == 1);
657                     }
658                 }
659                 if(!lengthIsMarkers)
660                 {
661                     throw gc_locus_err(wxString::Format(gcerr_locus::offsetMissingMultiSegment,locusP->GetName().c_str()));
662                 }
663             }
664 
665         }
666 
667         //////////////////////////////////////////////////////
668         // locations
669         if(locusP->HasLocations())
670         {
671             std::vector<long> locations = locusP->GetLocations();
672             wxString locationString = " ";
673 
674             if(locusP->HasNumMarkers())
675             {
676                 size_t numLocs = locations.size();
677                 size_t numMarkers = locusP->GetNumMarkers();
678                 if(numLocs != numMarkers)
679                 {
680                     throw gc_wrong_location_count(  (long)numLocs,
681                                                     (long)numMarkers,
682                                                     locusP->GetName());
683                 }
684             }
685 
686             for(size_t i = 0; i < locations.size(); i++)
687             {
688                 long loc = locations[i];
689 
690                 // EWFIX.P3 -- mouse cut and paste from similar
691                 // calculations in phase item
692                 long smallest = anyZeroes ? 0 : 1;
693                 if(locusP->HasOffset()) smallest = locusP->GetOffset();
694 
695                 long largest = smallest + locusP->GetLength() - 1;
696                 if(smallest < 0 && largest >= 0 && (!anyZeroes)) largest++;
697 
698                 if(loc < smallest)
699                 {
700                     throw gc_location_too_small(loc,
701                                                 smallest,
702                                                 locusP->GetName());
703                 }
704 
705                 if(loc > largest)
706                 {
707                     throw gc_location_too_large(loc,
708                                                 largest,
709                                                 locusP->GetName());
710                 }
711 
712                 if(!anyZeroes && loc < 1)
713                 {
714                     loc++;
715                 }
716                 locationString += wxString::Format("%ld ",loc);
717             }
718 
719             TiXmlElement * locationElement  = new TiXmlElement( xmlstr::XML_TAG_LOCATIONS.c_str());
720             TiXmlText * locationText = new TiXmlText(locationString);
721             locationElement->LinkEndChild(locationText);
722             blockElement->LinkEndChild(locationElement);
723         }
724         else
725             // when num markers > 1
726             // dna -- required if length > markers
727             // snp -- required for recom
728             // msat/kallele -- required for recom
729         {
730             if(locusP->GetNumMarkers() > 1)
731             {
732                 if(locusP->GetDataType() == sdatatype_DNA)
733                 {
734                     if(locusP->HasTotalLength())
735                     {
736                         if(locusP->GetTotalLength() > locusP->GetNumMarkers())
737                         {
738                             throw gc_locus_err(wxString::Format(gcerr_locus::dnaBigLengthNeedsLocations,locusP->GetName().c_str()));
739                         }
740                     }
741                 }
742                 else
743                 {
744                     bool continueOn = guiQuestionBatchLog(wxString::Format(gcstr::locationsForRecom,locusP->GetName().c_str()),gcstr::abandonExport,gcstr::continueExport);
745                     if(!continueOn)
746                     {
747                         throw gc_abandon_export();
748                     }
749                 }
750             }
751         }
752 
753         spacingElement->LinkEndChild(blockElement);
754     }
755 }
756 
757 void
AddTraitsElem(TiXmlElement * regionElem,const GCTraitInfoSet & traits) const758 GCDataStore::AddTraitsElem(TiXmlElement * regionElem, const GCTraitInfoSet & traits) const
759 {
760     if(!traits.empty())
761     {
762         TiXmlElement * traitsElement = new TiXmlElement( xmlstr::XML_TAG_TRAITS.c_str());
763         regionElem->LinkEndChild(traitsElement);
764 
765         for(GCTraitInfoSet::const_iterator i=traits.begin(); i!=traits.end(); i++)
766         {
767             const gcTraitInfo & traitInfo = m_structures.GetTrait(*i);
768             TiXmlText * name = new TiXmlText(traitInfo.GetName());
769 
770             TiXmlElement * nameElement = new TiXmlElement( xmlstr::XML_TAG_NAME.c_str());
771             nameElement->LinkEndChild(name);
772 
773             TiXmlElement * traitElement = new TiXmlElement( xmlstr::XML_TAG_TRAIT.c_str());
774             traitElement->LinkEndChild(nameElement);
775 
776             traitsElement->LinkEndChild(traitElement);
777         }
778     }
779 }
780 
781 TiXmlElement *
MakePopulationElemForUnlinkedLocus(const gcNameResolvedInfo & info,const gcPopulation & popRef,const gcRegion & regionRef,const gcLocus & locusRef,size_t siteIndex) const782 GCDataStore::MakePopulationElemForUnlinkedLocus(const gcNameResolvedInfo & info, const gcPopulation & popRef, const gcRegion & regionRef, const gcLocus & locusRef, size_t siteIndex) const
783 {
784     TiXmlElement * popElement = new TiXmlElement( xmlstr::XML_TAG_POPULATION.c_str());
785     popElement->SetAttribute(   xmlstr::XML_ATTRTYPE_NAME.c_str(),
786                                 popRef.GetName().c_str());
787 
788     std::vector<const GCIndividual*> individuals = info.GetIndividuals();
789     for(size_t individualIndex=0; individualIndex < individuals.size(); individualIndex++)
790     {
791         assert(individuals[individualIndex] != NULL);
792         const GCIndividual & individual = *(individuals[individualIndex]);
793         AddIndividualElem(popElement,individual,regionRef,locusRef,siteIndex);
794     }
795 
796     return popElement;
797 }
798 
799 void
AddPopulationElemForRegion(TiXmlElement * regionElem,const gcNameResolvedInfo & info,const gcRegion & regionRef,const gcPopulation & popRef) const800 GCDataStore::AddPopulationElemForRegion(TiXmlElement * regionElem, const gcNameResolvedInfo & info, const gcRegion & regionRef, const gcPopulation & popRef) const
801 {
802     TiXmlElement * popElement = new TiXmlElement( xmlstr::XML_TAG_POPULATION.c_str());
803     regionElem->LinkEndChild(popElement);
804     popElement->SetAttribute(   xmlstr::XML_ATTRTYPE_NAME.c_str(),
805                                 popRef.GetName().c_str());
806 
807     std::vector<const GCIndividual*> individuals = info.GetIndividuals();
808     for(size_t index=0; index < individuals.size(); index++)
809     {
810         assert(individuals[index] != NULL);
811         const GCIndividual & individual = *(individuals[index]);
812         AddIndividualElem(popElement,individual,info.GetRegionRef());
813     }
814 
815     if (m_structures.GetPanelsState())
816     {
817         if (m_structures.HasPanel(regionRef.GetId(), popRef.GetId()))
818         {
819             const gcPanel &panelRef =  m_structures.GetPanel(regionRef.GetId(), popRef.GetId());
820             if (panelRef.GetBlessed() && (panelRef.GetNumPanels() > 0))
821             {
822                 // only add > 0  panel counts
823                 AddPanelElem(popElement, panelRef);
824             }
825         }
826     }
827 }
828 
829 void
AddRegionElem(TiXmlElement * data,const gcExportable & expo,const gcRegion & regionRef) const830 GCDataStore::AddRegionElem(TiXmlElement * data, const gcExportable & expo, const gcRegion & regionRef) const
831 {
832     // create the named region element
833     TiXmlElement * regionElement = new TiXmlElement( xmlstr::XML_TAG_REGION.c_str());
834     data->LinkEndChild(regionElement);
835     regionElement->SetAttribute(    xmlstr::XML_ATTRTYPE_NAME.c_str(),
836                                     regionRef.GetName().c_str());
837 
838     AddEffectivePopSizeElem(regionElement,regionRef);
839     AddSpacingElem(regionElement,regionRef);
840     AddTraitsElem(regionElement,regionRef.GetTraitInfoSet());
841 
842 #if 0  // Potentially DEAD CODE (bobgian, Feb 2010)
843     TiXmlElement * effPopSizeElem = MakeEffectivePopSizeElem(regionRef);
844     if(effPopSizeElem != NULL)
845     {
846         regionElement->LinkEndChild(effPopSizeElem);
847     }
848 
849     TiXmlElement * spacingElement = MakeSpacingElem(regionRef);
850     regionElement->LinkEndChild(spacingElement);
851 
852     const GCTraitInfoSet & traits = regionRef.GetTraitInfoSet();
853     if(!traits.empty())
854     {
855         TiXmlElement * traitsElement = MakeTraitsElem(traits);
856         regionElement->LinkEndChild(traitsElement);
857     }
858 #endif
859 
860     // write out info for each population
861     constObjVector pops = m_structures.GetConstDisplayablePops();
862     for(constObjVector::const_iterator iter = pops.begin(); iter != pops.end(); iter++)
863     {
864         const gcPopulation * popP = dynamic_cast<const gcPopulation*>(*iter);
865         assert(popP != NULL);
866         const gcNameResolvedInfo & info = expo.GetInfo(*popP,regionRef);
867         AddPopulationElemForRegion(regionElement,info,regionRef,*popP);
868     }
869 }
870 
871 void
AddDivergenceElem(TiXmlElement * forces) const872 GCDataStore::AddDivergenceElem(TiXmlElement * forces) const
873 {
874     wxLogVerbose("In AddDivergenceElem");  // JMDBG
875     // create divergence element
876     TiXmlElement * divergence = new TiXmlElement( xmlstr::XML_TAG_DIVERGENCE.c_str());
877     forces->LinkEndChild(divergence);
878 
879     // boilerplate
880     TiXmlElement * prior = new TiXmlElement( xmlstr::XML_TAG_PRIOR.c_str());
881     prior->SetAttribute(xmlstr::XML_ATTRTYPE_TYPE.c_str(), "linear");
882     divergence->LinkEndChild(prior);
883 
884     TiXmlElement * paramindex = new TiXmlElement( xmlstr::XML_TAG_PARAMINDEX.c_str());
885     wxString dataString = " default ";
886     TiXmlText * data = new TiXmlText(dataString);
887     paramindex->LinkEndChild(data);
888     prior->LinkEndChild(paramindex);
889 
890     TiXmlElement * lower = new TiXmlElement( xmlstr::XML_TAG_PRIORLOWERBOUND.c_str());
891     dataString = " 0.0 ";
892     data = new TiXmlText(dataString);
893     lower->LinkEndChild(data);
894     prior->LinkEndChild(lower);
895 
896     TiXmlElement * upper = new TiXmlElement( xmlstr::XML_TAG_PRIORUPPERBOUND.c_str());
897     dataString = " 0.01 ";
898     data = new TiXmlText(dataString);
899     upper->LinkEndChild(data);
900     prior->LinkEndChild(upper);
901     wxLogVerbose("    boilerplate out");  // JMDBG
902 
903     // variable values
904     gcDisplayOrder parids = m_structures.GetParentIds();
905 
906     TiXmlElement * method = new TiXmlElement( xmlstr::XML_TAG_METHOD.c_str());
907     dataString = " ";
908     for(gcDisplayOrder::const_iterator iter = parids.begin(); iter != parids.end(); iter++)
909     {
910         dataString += " USER ";
911     }
912     data = new TiXmlText(dataString);
913     method->LinkEndChild(data);
914     divergence->LinkEndChild(method);
915 
916     TiXmlElement * startval = new TiXmlElement( xmlstr::XML_TAG_START_VALUES.c_str());
917     float sval = 0;
918     dataString = " ";
919     for(gcDisplayOrder::const_iterator iter = parids.begin(); iter != parids.end(); iter++)
920     {
921         sval += 0.002;
922         dataString += wxString::Format(_T(" %f "),sval);
923     }
924     data = new TiXmlText(dataString);
925     startval->LinkEndChild(data);
926     divergence->LinkEndChild(startval);
927     wxLogVerbose("    start values out");  // JMDBG
928 
929     TiXmlElement * poptree = new TiXmlElement( xmlstr::XML_TAG_POPTREE.c_str());
930     divergence->LinkEndChild(poptree);
931 
932     for(gcDisplayOrder::const_iterator iter = parids.begin(); iter != parids.end(); iter++)
933     {
934         TiXmlElement * epochb = new TiXmlElement( xmlstr::XML_TAG_EPOCH_BOUNDARY.c_str());
935 
936         TiXmlElement * newpop = new TiXmlElement( xmlstr::XML_TAG_NEWPOP.c_str());
937         const gcParent & parent = m_structures.GetParent(*iter);
938         wxLogVerbose("    parentid: %i child1id: %i child2id: %i",(int)*iter, (int)parent.GetChild1Id(), (int)parent.GetChild2Id());  // JMDBG
939 
940         dataString = " ";
941          if (m_structures.IsPop(parent.GetChild1Id()))
942         {
943             dataString += m_structures.GetPop(parent.GetChild1Id()).GetName().c_str();
944         }
945         else
946         {
947             dataString += m_structures.GetParent(parent.GetChild1Id()).GetName().c_str();
948         }
949 
950         dataString += " ";
951         if (m_structures.IsPop(parent.GetChild2Id()))
952         {
953             dataString += m_structures.GetPop(parent.GetChild2Id()).GetName().c_str();
954         }
955         else
956         {
957             dataString += m_structures.GetParent(parent.GetChild2Id()).GetName().c_str();
958         }
959         dataString += " ";
960 
961         data = new TiXmlText(dataString);
962         newpop->LinkEndChild(data);
963         epochb->LinkEndChild(newpop);
964 
965         TiXmlElement * ancestor = new TiXmlElement( xmlstr::XML_TAG_ANCESTOR.c_str());
966         dataString = " ";
967         dataString += parent.GetName().c_str();
968         dataString += " ";
969         data = new TiXmlText(dataString);
970         ancestor->LinkEndChild(data);
971         epochb->LinkEndChild(ancestor);
972 
973         poptree->LinkEndChild(epochb);
974     }
975     wxLogVerbose("    done");  // JMDBG
976 }
977 
978 void
AddMigrationElem(TiXmlElement * forces)979 GCDataStore::AddMigrationElem(TiXmlElement * forces)
980 {
981     wxLogVerbose("In AddMigrationElem");  // JMDBG
982     //wxLogMessage("In AddMigrationElem");  // JMDBG
983     //cout << "In AddMigrationElem" << endl;
984 
985 
986     // create migration element
987     TiXmlElement * mig;
988     if (m_structures.GetDivMigMatrixDefined())
989     {
990         wxLogVerbose("Divergence Migration Matrix being output");  // JMDBG
991         //cout << "Divergence Migration Matrix being output" << endl;
992         mig = new TiXmlElement( xmlstr::XML_TAG_DIVMIG.c_str());
993     }
994     else if (m_structures.GetMigMatrixDefined())
995     {
996         wxLogVerbose("Migration Matrix being output");  // JMDBG
997         //cout << "Migration Matrix being output" << endl;
998         mig = new TiXmlElement( xmlstr::XML_TAG_MIGRATION.c_str());
999     }
1000     else
1001     {
1002         // neither a migration or divergence migration matrix exist (batch converter case)
1003         // so create the default migration matrix
1004         wxLogVerbose("No Migration Matrix exists so creating it");  // JMDBG
1005         //cout << "No Migration Matrix exists so creating it" << endl;
1006 
1007         if ( m_structures.GetDivergenceState())
1008         {
1009             mig = new TiXmlElement( xmlstr::XML_TAG_DIVMIG.c_str());
1010             //cout << "Divergence Migration Matrix being output" << endl;
1011         }
1012         else
1013         {
1014             mig = new TiXmlElement( xmlstr::XML_TAG_MIGRATION.c_str());
1015             //cout << "Migration Matrix being output" << endl;
1016         }
1017         m_structures.MakeMigrationMatrix();
1018         /*
1019         gcDisplayOrder popids = m_structures.GetDisplayablePopIds();
1020 
1021         for(gcDisplayOrder::const_iterator iter = popids.begin(); iter != popids.end(); iter++)
1022         {
1023             for(gcDisplayOrder::const_iterator jter = popids.begin(); jter != popids.end(); jter++)
1024             {
1025                 if (*jter!=*iter)
1026                 {
1027                     gcMigration Mig1 = m_structures.MakeMigration(true, *iter, *jter );
1028                     gcMigration Mig2 = m_structures.MakeMigration(true, *jter, *iter );
1029                 }
1030             }
1031         }
1032         */
1033     }
1034     forces->LinkEndChild(mig);
1035 
1036     wxString startString = " ";
1037     wxString methodString = " ";
1038     wxString profilesString = " ";
1039     wxString constraintsString = " ";
1040 
1041     constObjVector popsToDisplay =  m_structures.GetConstDisplayablePops();
1042     if (popsToDisplay.size() > 0)
1043     {
1044 
1045        // build full migration matrix data in a row by row manner
1046         const gcDisplayOrder popids = m_structures.GetDisplayablePopIds();
1047         const gcDisplayOrder parids = m_structures.GetParentIds();
1048 
1049         // populations first
1050         for(gcDisplayOrder::const_iterator iter = popids.begin(); iter != popids.end(); iter++)
1051         {
1052             for(gcDisplayOrder::const_iterator jter = popids.begin(); jter != popids.end(); jter++)
1053             {
1054                 if(m_structures.HasMigration(*iter, *jter))
1055                 {
1056                     // population / population pairs
1057                     const gcMigration mig = m_structures.GetMigration(*iter, *jter);
1058                     startString += mig.GetStartValueString();
1059                     methodString += mig.GetMethodString();
1060                     profilesString += mig.GetProfileAsString();
1061                     constraintsString += mig.GetConstraintString();
1062                 }
1063                 else
1064                  {
1065                     // doesn't exist so not allowed
1066                     startString += "0";
1067                     methodString += gcstr_mig::migmethodUser;
1068                     profilesString += gcstr_mig::migprofileNone ;
1069                     constraintsString += gcstr_mig::migconstraintInvalid;
1070                 }
1071                 startString += " ";
1072                 methodString += " ";
1073                 profilesString += " ";
1074                 constraintsString += " ";
1075             }
1076 
1077             if (parids.size() > 0)
1078             {
1079                 // population / parent pairs
1080                 for(gcDisplayOrder::const_iterator kter = parids.begin(); kter != parids.end(); kter++)
1081                 {
1082                     if(m_structures.HasMigration(*iter, *kter))
1083                     {
1084                         // migration exists, get values
1085                         const gcMigration mig = m_structures.GetMigration(*iter, *kter);
1086                         startString += mig.GetStartValueString();
1087                         methodString += mig.GetMethodString();
1088                         profilesString += mig.GetProfileAsString();
1089                         constraintsString += mig.GetConstraintString();
1090                     }
1091                     else
1092                     {
1093                         // doesn't exist so not allowed
1094                         startString += "0";
1095                         methodString += gcstr_mig::migmethodUser;
1096                         profilesString += gcstr_mig::migprofileNone ;
1097                         constraintsString += gcstr_mig::migconstraintInvalid;
1098                     }
1099                     startString += " ";
1100                     methodString += " ";
1101                     profilesString += " ";
1102                     constraintsString += " ";
1103                 }
1104             }
1105 
1106             // spacers
1107         }
1108 
1109         // now parents
1110         if (parids.size() > 0)
1111         {
1112             for(gcDisplayOrder::const_iterator iter = parids.begin(); iter != parids.end(); iter++)
1113             {
1114                 for(gcDisplayOrder::const_iterator jter = popids.begin(); jter != popids.end(); jter++)
1115                 {
1116                     if(m_structures.HasMigration(*iter, *jter))
1117                     {
1118                         // parent / population pairs
1119                         const gcMigration mig = m_structures.GetMigration(*iter, *jter);
1120                         startString += mig.GetStartValueString();
1121                         methodString += mig.GetMethodString();
1122                         profilesString += mig.GetProfileAsString();
1123                         constraintsString += mig.GetConstraintString();
1124                     }
1125                     else
1126                     {
1127                         // doesn't exist so not allowed
1128                         startString += "0";
1129                         methodString += gcstr_mig::migmethodUser;
1130                         profilesString += gcstr_mig::migprofileNone ;
1131                         constraintsString += gcstr_mig::migconstraintInvalid;
1132                     }
1133                     startString += " ";
1134                     methodString += " ";
1135                     profilesString += " ";
1136                     constraintsString += " ";
1137                 }
1138                 // parent / parent pairs
1139                 for(gcDisplayOrder::const_iterator kter = parids.begin(); kter != parids.end(); kter++)
1140                 {
1141                     if(m_structures.HasMigration(*iter, *kter))
1142                     {
1143                         // migration exists, get values
1144                         const gcMigration mig = m_structures.GetMigration(*iter, *kter);
1145                         startString += mig.GetStartValueString();
1146                         methodString += mig.GetMethodString();
1147                         profilesString += mig.GetProfileAsString();
1148                         constraintsString += mig.GetConstraintString();
1149                     }
1150                     else
1151                     {
1152                         // doesn't exist so not allowed
1153                         startString += "0";
1154                         methodString += gcstr_mig::migmethodUser;
1155                         profilesString += gcstr_mig::migprofileNone ;
1156                         constraintsString += gcstr_mig::migconstraintInvalid;
1157                     }
1158                     startString += " ";
1159                     methodString += " ";
1160                     profilesString += " ";
1161                     constraintsString += " ";
1162                 }
1163             }
1164         }
1165     }
1166 
1167     TiXmlElement * stvals = new TiXmlElement( xmlstr::XML_TAG_START_VALUES.c_str());
1168     TiXmlText * data = new TiXmlText(startString);
1169     stvals->LinkEndChild(data);
1170     mig->LinkEndChild(stvals);
1171 
1172     TiXmlElement * method = new TiXmlElement( xmlstr::XML_TAG_METHOD.c_str());
1173     data = new TiXmlText(methodString);
1174     method->LinkEndChild(data);
1175     mig->LinkEndChild(method);
1176 
1177     TiXmlElement * maxevts = new TiXmlElement( xmlstr::XML_TAG_MAX_EVENTS.c_str());
1178     wxString dataString = " 10000 ";
1179     data = new TiXmlText(dataString);
1180     maxevts->LinkEndChild(data);
1181     mig->LinkEndChild(maxevts);
1182 
1183     TiXmlElement * profiles = new TiXmlElement( xmlstr::XML_TAG_PROFILES.c_str());
1184     data = new TiXmlText(profilesString);
1185     profiles->LinkEndChild(data);
1186     mig->LinkEndChild(profiles);
1187 
1188     TiXmlElement * constraints = new TiXmlElement( xmlstr::XML_TAG_CONSTRAINTS.c_str());
1189     data = new TiXmlText(constraintsString);
1190     constraints->LinkEndChild(data);
1191     mig->LinkEndChild(constraints);
1192 
1193     TiXmlElement * prior = new TiXmlElement( xmlstr::XML_TAG_PRIOR.c_str());
1194     prior->SetAttribute(xmlstr::XML_ATTRTYPE_TYPE.c_str(), "linear");
1195     mig->LinkEndChild(prior);
1196 
1197     TiXmlElement * paramindex = new TiXmlElement( xmlstr::XML_TAG_PARAMINDEX.c_str());
1198     dataString = " default ";
1199     data = new TiXmlText(dataString);
1200     paramindex->LinkEndChild(data);
1201     prior->LinkEndChild(paramindex);
1202 
1203     TiXmlElement * lower = new TiXmlElement( xmlstr::XML_TAG_PRIORLOWERBOUND.c_str());
1204     dataString = " 0.0 ";
1205     data = new TiXmlText(dataString);
1206     lower->LinkEndChild(data);
1207     prior->LinkEndChild(lower);
1208 
1209     TiXmlElement * upper = new TiXmlElement( xmlstr::XML_TAG_PRIORUPPERBOUND.c_str());
1210     dataString = " 100.0 ";
1211     data = new TiXmlText(dataString);
1212     upper->LinkEndChild(data);
1213     prior->LinkEndChild(upper);
1214     wxLogVerbose("    xml out");  // JMDBG
1215 }
1216 
1217 wxString
MakeUnlinkedName(wxString regionName,size_t index) const1218 GCDataStore::MakeUnlinkedName(wxString regionName, size_t index) const
1219 {
1220     return wxString::Format(gcstr::regionNameUnlinked,regionName.c_str(),(int)index);
1221 }
1222 
1223 TiXmlElement *
MakeUnlinkedSite(const gcExportable & expo,const gcRegion & regionRef,const gcLocus & locus,size_t index) const1224 GCDataStore::MakeUnlinkedSite(const gcExportable & expo, const gcRegion & regionRef, const gcLocus & locus, size_t index) const
1225 {
1226     TiXmlElement * regionElement = new TiXmlElement( xmlstr::XML_TAG_REGION.c_str());
1227     regionElement->SetAttribute(    xmlstr::XML_ATTRTYPE_NAME.c_str(),
1228                                     MakeUnlinkedName(regionRef.GetName(),index).c_str());
1229 
1230     // EWFIX.P3 -- need to fix spacing info ??
1231 
1232     constObjVector pops = m_structures.GetConstDisplayablePops();
1233     for(constObjVector::const_iterator iter=pops.begin(); iter != pops.end(); iter++)
1234     {
1235         const gcPopulation * popP = dynamic_cast<const gcPopulation*>(*iter);
1236         assert(popP != NULL);
1237 
1238         const gcNameResolvedInfo & info = expo.GetInfo(*popP,regionRef);
1239         TiXmlElement * popElement =
1240             MakePopulationElemForUnlinkedLocus(info,*popP,regionRef,locus,index);
1241         if(popElement != NULL)
1242         {
1243             regionElement->LinkEndChild(popElement);
1244         }
1245     }
1246     return regionElement;
1247 }
1248 
1249 std::vector<TiXmlElement*>
MakeUnlinkedRegionElems(const gcExportable & expo,const gcRegion & regionRef) const1250 GCDataStore::MakeUnlinkedRegionElems(const gcExportable & expo, const gcRegion & regionRef) const
1251 {
1252     std::vector<TiXmlElement*> elements;
1253 
1254     // make sure that we are accessing these by order of creation -- a good choice for
1255     // batch converter and for loci within a single input file. Less clear how good
1256     // this is if users have been munging about in the gui
1257     gcIdVec locIds = GetStructures().GetLocusIdsForRegionByCreation(regionRef.GetId());
1258 
1259     for(gcIdVec::const_iterator i = locIds.begin(); i != locIds.end(); i++)
1260     {
1261         size_t locusId = *i;
1262         const gcLocus & locus = m_structures.GetLocus(locusId);
1263         if(!locus.GetLinked())
1264         {
1265             for(size_t index=0; index < locus.GetNumMarkers(); index++)
1266             {
1267                 TiXmlElement * unlinkedElem = MakeUnlinkedSite(expo,regionRef,locus,index);
1268                 elements.push_back(unlinkedElem);
1269             }
1270         }
1271     }
1272 
1273     return elements;
1274 }
1275 
1276 
1277 TiXmlDocument *
ExportFile()1278 GCDataStore::ExportFile()
1279 {
1280     TiXmlDocument * docP = new TiXmlDocument();
1281     try
1282     {
1283         TiXmlDeclaration * decl = new TiXmlDeclaration( "1.0", "", "" );
1284         docP->LinkEndChild( decl );
1285 
1286         if(!(m_commentString.IsEmpty()))
1287         {
1288             TiXmlComment * comment = new TiXmlComment();
1289             comment->SetValue(m_commentString.c_str());
1290             docP->LinkEndChild(comment);
1291         }
1292 
1293         TiXmlElement * lamarc = new TiXmlElement( xmlstr::XML_TAG_LAMARC.c_str() );
1294         docP->LinkEndChild( lamarc );
1295         lamarc->SetAttribute( xmlstr::XML_ATTRTYPE_VERSION.c_str(),VERSION);
1296 
1297         TiXmlElement * format = new TiXmlElement( xmlstr::XML_TAG_FORMAT.c_str() );
1298         lamarc->LinkEndChild( format );
1299         TiXmlElement * coordMunge = new TiXmlElement( xmlstr::XML_TAG_CONVERT_OUTPUT.c_str() );
1300         format->LinkEndChild( coordMunge );
1301         bool anyZeroes = GetStructures().AnyZeroes();   // EWFIX.P4 -- wasteful
1302         TiXmlText * doConvertText = new TiXmlText(anyZeroes ? "false" : "true");
1303         coordMunge->LinkEndChild(doConvertText);
1304 
1305         // add migration if more than one population
1306         if (m_structures.GetPopCount() > 1)
1307         {
1308             TiXmlElement * forces = new TiXmlElement( xmlstr::XML_TAG_FORCES.c_str() );
1309             lamarc->LinkEndChild( forces );
1310             AddMigrationElem(forces);
1311 
1312             if (m_structures.GetDivergenceState())
1313             {
1314                 AddDivergenceElem(forces);
1315             }
1316         }
1317 
1318         TiXmlElement * data = new TiXmlElement( xmlstr::XML_TAG_DATA.c_str() );
1319         lamarc->LinkEndChild( data );
1320 
1321         const gcExportable exportable = BuildExportableData();
1322 
1323         // do all regions that contain linked data
1324         constObjVector regions = m_structures.GetConstDisplayableRegions();
1325         for(constObjVector::const_iterator gIter = regions.begin();
1326             gIter != regions.end(); gIter++)
1327         {
1328             const gcRegion * regionP = dynamic_cast<const gcRegion*>(*gIter);
1329             assert(regionP != NULL);
1330             if(m_structures.RegionHasAnyLinkedLoci(regionP->GetId()))
1331             {
1332                 AddRegionElem(data,exportable,*regionP);
1333             }
1334         }
1335 
1336         // do all regions that contain unlinked data
1337         for(constObjVector::const_iterator gIter = regions.begin();
1338             gIter != regions.end(); gIter++)
1339         {
1340             const gcRegion * regionP = dynamic_cast<const gcRegion*>(*gIter);
1341             assert(regionP != NULL);
1342             if(m_structures.RegionHasAnyUnLinkedLoci(regionP->GetId()))
1343             {
1344                 std::vector<TiXmlElement *> regionElems = MakeUnlinkedRegionElems(exportable,*regionP);
1345                 for(std::vector<TiXmlElement*>::iterator i=regionElems.begin(); i != regionElems.end(); i++)
1346                 {
1347                     data->LinkEndChild( *i );
1348                 }
1349             }
1350         }
1351     }
1352     catch(const gc_ex & e)
1353     {
1354         delete docP;
1355         throw;
1356     }
1357 
1358     return docP;
1359 }
1360 
1361 void
WriteExportedData(TiXmlDocument * docP)1362 GCDataStore::WriteExportedData(TiXmlDocument * docP)
1363 {
1364     docP->SaveFile( m_outfileName.c_str() );
1365 }
1366 
1367 void
ThrowLocusWithoutDataType(wxString locusName,wxString regionName) const1368 GCDataStore::ThrowLocusWithoutDataType(wxString locusName, wxString regionName) const
1369 {
1370     wxString msg = wxString::Format(gcerr::locusWithoutDataType,
1371                                     locusName.c_str(),
1372                                     regionName.c_str());
1373     gui_error g(msg);
1374     throw g;
1375 }
1376 
1377 GCIndividual &
makeOrFetchInd(wxString indName,std::map<wxString,GCIndividual * > & nameDict,gcNameResolvedInfo & info) const1378 GCDataStore::makeOrFetchInd(    wxString indName,
1379                                 std::map<wxString,GCIndividual*> & nameDict,
1380                                 gcNameResolvedInfo & info) const
1381 {
1382     if(nameDict.find(indName) == nameDict.end())
1383     {
1384         GCIndividual * ind = new GCIndividual(indName,info.GetRegionRef());
1385         info.AddIndividual(ind);
1386         nameDict[indName] = ind;
1387     }
1388     std::map<wxString,GCIndividual*>::iterator iter = nameDict.find(indName);
1389     assert(iter != nameDict.end());
1390 
1391     GCIndividual & ind = *((*iter).second);
1392     return ind;
1393 }
1394 
1395 bool
AddLocusData(GCIndividual & ind,const gcLocus & locus,GCParseSample & sample) const1396 GCDataStore::AddLocusData(GCIndividual & ind,
1397                           const gcLocus & locus,
1398                           GCParseSample & sample) const
1399 {
1400     size_t samples = sample.GetSequencesPerLabel();
1401     bool foundAnything = false;
1402     wxString sampleName = sample.GetLabel();
1403     for(size_t i=0; i < samples; i++)
1404     {
1405         if(samples > 1 || (sampleName == ind.GetName()))
1406         {
1407             // EWFIX.P4.BUG.564  -- need to preserve information about how we made
1408             // this name so we can do better error reporting
1409             sampleName = wxString::Format("%s_%d",sample.GetLabel().c_str(),(int)i);
1410         }
1411         ind.AddSample(sampleName,locus,&(sample.GetData(i)));
1412         foundAnything = true;
1413     }
1414     return foundAnything;
1415 }
1416 
1417 bool
AddLocusData(GCIndividual & ind,const gcLocus & locus,GCParseSample & sample,const wxArrayString & sampleNames) const1418 GCDataStore::AddLocusData(GCIndividual & ind,
1419                           const gcLocus & locus,
1420                           GCParseSample & sample,
1421                           const wxArrayString & sampleNames) const
1422 {
1423     size_t numNames = sampleNames.Count();
1424     size_t numSamples = sample.GetSequencesPerLabel();
1425     bool foundAnything = false;
1426     if((numNames != 0) && (numNames != numSamples))
1427     {
1428         // EWFIX.P4.BUG.564 -- use more data to make a better message
1429         throw gc_ind_wrong_sample_count(ind);
1430     }
1431     for(size_t i=0; i < numSamples; i++)
1432     {
1433         wxString sampleName = "";
1434         if(numNames != 0) sampleName = sampleNames[i];
1435         ind.AddSample(sampleName,locus,&(sample.GetData(i)));
1436         foundAnything = true;
1437     }
1438     return foundAnything;
1439 }
1440 
1441 bool
FillExportInfo(gcNameResolvedInfo & info,gcPhaseInfo & phaseInfo) const1442 GCDataStore::FillExportInfo(gcNameResolvedInfo & info, gcPhaseInfo & phaseInfo) const
1443 {
1444     bool regionFoundAnything = false;
1445     std::map<wxString,GCIndividual*> nameDict;
1446 
1447     size_t popId    = info.GetPopRef().GetId();
1448     size_t regionId = info.GetRegionRef().GetId();
1449 
1450     // get list of loci for region
1451     constObjVector loci = m_structures.GetConstDisplayableLociInMapOrderFor(regionId);
1452 
1453     for(constObjVector::const_iterator lIter = loci.begin();
1454         lIter != loci.end(); lIter++)
1455     {
1456         const gcLocus * locusP = dynamic_cast<const gcLocus*>(*lIter);
1457         assert(locusP != NULL);
1458 
1459         wxString locusName = locusP->GetName();
1460 
1461         gcSpecificDataType dtype = locusP->GetDataType();
1462         if(dtype == sdatatype_NONE_SET)
1463         {
1464             ThrowLocusWithoutDataType(locusP->GetName(),info.GetRegionRef().GetName());
1465         }
1466 
1467         size_t locusId = locusP->GetId();
1468         constBlockVector blocks = GetBlocks(popId,locusId);
1469         for(constBlockVector::const_iterator bIter = blocks.begin();
1470             bIter != blocks.end(); bIter++)
1471         {
1472             assert(*bIter != NULL);
1473             bool locusFoundAnything = false;
1474 
1475             const GCParseBlock & block = **bIter;
1476 
1477             const GCParseSamples & samples = block.GetSamples();
1478 
1479             // EWFIX.P3 --  what I really need to do here is
1480             // to have an individual name and a sample name for each
1481             // member data item within each sample
1482             for(size_t i=0; i < samples.size(); i++)
1483             {
1484                 GCParseSample & sample = *(samples[i]);
1485 
1486                 const wxString & thisName = sample.GetLabel();
1487                 bool isInd = phaseInfo.HasIndividualRecord(thisName);
1488                 bool isSam = phaseInfo.HasSampleRecord(thisName);
1489 
1490                 if( (isInd) && (isSam) )
1491                 {
1492                     const gcPhaseRecord & rec1 = phaseInfo.GetIndividualRecord(thisName);
1493                     const gcPhaseRecord & rec2 = phaseInfo.GetSampleRecord(thisName);
1494                     throw gc_both_individual_and_sample(thisName,rec1,rec2);
1495                 }
1496 
1497                 if (isInd)
1498                 {
1499                     GCIndividual & ind = makeOrFetchInd(thisName,nameDict,info);
1500                     const gcPhaseRecord & rec = phaseInfo.GetIndividualRecord(thisName);
1501 
1502                     // get phenotype names
1503                     // find those that apply to this region info.GetRegionRef();
1504                     // get geno resolutions for them
1505                     const gcIdSet & phenoIds = rec.GetPhenotypeIds();
1506                     const GCTraitInfoSet & traits = info.GetRegionRef().GetTraitInfoSet();
1507                     for(gcIdSet::const_iterator i=phenoIds.begin(); i != phenoIds.end(); i++)
1508                     {
1509                         size_t phenotypeId = *i;
1510                         for(GCTraitInfoSet::const_iterator j=traits.begin(); j != traits.end(); j++)
1511                         {
1512                             const gcTraitInfo & traitRef = GetStructures().GetTrait(*j);
1513                             const gcPhenotype & phenoRef = GetStructures().GetPhenotype(phenotypeId);
1514                             if(traitRef.HasPhenotype(phenoRef))
1515                             {
1516                                 ind.AddPhenotype(phenoRef);
1517                             }
1518                         }
1519                     }
1520 
1521                     if(rec.HasUnphased(locusName))
1522                     {
1523                         ind.AddPhase(*locusP,rec.GetUnphased(locusName));
1524                     }
1525                     if(rec.HasSamples())
1526                     {
1527                         const wxArrayString & sampleNames = rec.GetSamples();
1528                         locusFoundAnything = AddLocusData(ind,*locusP,sample,sampleNames) || locusFoundAnything;
1529                     }
1530                     else
1531                     {
1532                         locusFoundAnything = AddLocusData(ind,*locusP,sample) || locusFoundAnything;
1533                     }
1534                 }
1535 
1536                 if (isSam)
1537                 {
1538                     const gcPhaseRecord & rec = phaseInfo.GetSampleRecord(thisName);
1539                     wxString indName;
1540                     if(rec.HasIndividual())
1541                     {
1542                         indName = rec.GetIndividual();
1543                     }
1544                     else
1545                     {
1546                         indName = gcdefault::createdIndividualPrefix;
1547                         assert(rec.HasSamples());
1548                         const wxArrayString & samples = rec.GetSamples();
1549                         for(size_t i = 0; i < samples.Count(); i++)
1550                         {
1551                             indName = wxString::Format("%s:%s",
1552                                                        indName.c_str(),
1553                                                        samples[i].c_str());
1554                         }
1555                     }
1556                     GCIndividual & ind = makeOrFetchInd(indName,nameDict,info);
1557                     // EWFIX.P3 -- refactor -- appears above
1558                     // get phenotype names
1559                     // find those that apply to this region info.GetRegionRef();
1560                     // get geno resolutions for them
1561                     const gcIdSet & phenoIds = rec.GetPhenotypeIds();
1562                     const GCTraitInfoSet & traits = info.GetRegionRef().GetTraitInfoSet();
1563                     for(gcIdSet::const_iterator i=phenoIds.begin(); i != phenoIds.end(); i++)
1564                     {
1565                         size_t phenotypeId = *i;
1566                         for(GCTraitInfoSet::const_iterator j=traits.begin(); j != traits.end(); j++)
1567                         {
1568                             const gcTraitInfo & traitRef = GetStructures().GetTrait(*j);
1569                             const gcPhenotype & phenoRef = GetStructures().GetPhenotype(phenotypeId);
1570                             if(traitRef.HasPhenotype(phenoRef))
1571                             {
1572                                 ind.AddPhenotype(phenoRef);
1573                             }
1574                         }
1575                     }
1576                     locusFoundAnything = AddLocusData(ind,*locusP,sample) || locusFoundAnything;
1577                     if(rec.HasUnphased(locusName))
1578                     {
1579                         ind.AddPhase(*locusP,rec.GetUnphased(locusName));
1580                     }
1581                 }
1582 
1583                 if(!isInd && !isSam)
1584                     // doesn't have any phase information
1585                 {
1586                     GCIndividual & ind = makeOrFetchInd(thisName,nameDict,info);
1587                     locusFoundAnything = AddLocusData(ind,*locusP,sample) || locusFoundAnything;
1588                 }
1589             }
1590 
1591 #if 0  // Potentially DEAD CODE (bobgian, Feb 2010)
1592             // At the moment, this cannot happen without
1593             // causing another error (either missing_pop_region or
1594             // missing_sample).
1595             if(!locusFoundAnything)
1596                 // EWFIX.P3.BUG.511 -- eventually this restriction should
1597                 // be relaxed, but at the moment we require all pop,locus
1598                 // pairs have data
1599             {
1600                 throw gc_data_missing_pop_locus(info.GetPopRef().GetName(),locusP->GetName());
1601             }
1602 #endif
1603             regionFoundAnything = regionFoundAnything || locusFoundAnything;
1604         }
1605     }
1606     if(!regionFoundAnything)
1607         // EWFIX.P3.BUG.511 -- eventually this restriction should
1608         // be relaxed, but at the moment we require all pop,locus
1609         // pairs have data
1610     {
1611         throw gc_data_missing_pop_region(info.GetPopRef().GetName(),info.GetRegionRef().GetName());
1612     }
1613     return regionFoundAnything;
1614 }
1615 
1616 gcExportable
BuildExportableData() const1617 GCDataStore::BuildExportableData() const
1618 {
1619     bool foundAnything = false;
1620     gcExportable exportable;
1621 
1622     constObjVector regions  = m_structures.GetConstDisplayableRegions();
1623     constObjVector pops     = m_structures.GetConstDisplayablePops();
1624 
1625     gcPhaseInfo * phaseInfo = BuildPhaseInfo();
1626     assert(phaseInfo != NULL);
1627 
1628     try
1629     {
1630         DiagnosePhaseInfoProblems(*phaseInfo);
1631 
1632         for(constObjVector::const_iterator gIter = regions.begin();
1633             gIter != regions.end(); gIter++)
1634         {
1635             const gcRegion * regionP = dynamic_cast<const gcRegion*>(*gIter);
1636             assert(regionP != NULL);
1637             bool regionFoundAnything = false;
1638             for(constObjVector::const_iterator iter = pops.begin(); iter != pops.end(); iter++)
1639             {
1640                 const gcPopulation * popP = dynamic_cast<const gcPopulation*>(*iter);
1641                 assert(popP != NULL);
1642 
1643                 gcNameResolvedInfo * info = new gcNameResolvedInfo(*popP,*regionP);
1644                 try
1645                 {
1646                     bool localFoundAnything = FillExportInfo(*info,*phaseInfo);
1647                     foundAnything |= localFoundAnything;
1648                     regionFoundAnything |= localFoundAnything;
1649                     gcPopRegionPair infoPair(popP,regionP);
1650                     assert(exportable.find(infoPair) == exportable.end());
1651                     exportable[infoPair] = info;
1652                 }
1653                 catch(const gc_ex & e)
1654                 {
1655                     delete info;
1656                     throw;
1657                 }
1658             }
1659             if(!regionFoundAnything)
1660             {
1661                 throw gui_error(wxString::Format(gcerr::regionNoData,regionP->GetName().c_str()));
1662             }
1663         }
1664     }
1665     catch(const gc_ex & e)
1666     {
1667         delete phaseInfo;
1668         throw;
1669     }
1670 
1671     delete phaseInfo;
1672 
1673     if(!foundAnything)
1674     {
1675         gui_error g(gcerr::noDataFound);
1676         throw g;
1677     }
1678 
1679     // EWFIX.P3 -- we should gather info about ghost populations,
1680     // and other unexpected data gaps
1681 
1682     return exportable;
1683 }
1684 
1685 //____________________________________________________________________________________
1686